Python File seek + write output weird "NUL" in the file -


i'm writting downloader split url parts , download threading, not use "join" because join = unable stream (cannot write file if thread not finish)

but problem f.seek , write output weird file, content of file have "nul" character (in notepad++) , text in file 1/3 of whole file.

he everybody, thank helping me, here version 2.0 of code, thank padraic cunningham suggestion , exlaination, fix code you've suggested: please me check code, , think need guy convert http.server file streamming method:

import os, requests import threading import urllib3 import urllib.request, urllib.error, urllib.parse import time import re  pool = urllib3.poolmanager(maxsize=10) url = "https://raw.githubusercontent.com/langpavel/tampermonkey/master/src/emulation.js" filename = "1.js" countsize = 0 #if os.path.exists(filename):  #   os.remove(filename)  def defwrite(filename,data,offset):   f = open(filename,'wb')   f.seek(offset)   f.write(data)   f.close()  def buildrange(url, numsplits):     global pool     value = int(requests.head(url, headers={'accept-encoding': 'identity'}).headers.get('content-length', none))     print("fullsize: ", value)     print("try devide 3 :", value / 3)     lst = []     in range(numsplits):         if == range(numsplits):             lst.append('%s-%s' % (i * value//numsplits + 1, * value//numsplits + 1 + (value - (i * value//numsplits + 1))))         if == 0:             lst.append('%s-%s' % (0, value//numsplits))         else:             lst.append('%s-%s' % (i * value//numsplits + 1, (i + 1) * value//numsplits))     return lst  def main(url=none, splitby=3):     global filename, pool, countsize     start_time = time.time()     if not url:         print("please enter url begin download.")         return      #filename = "1.jpg"      #print("%s bytes download." % sizeinbytes)    # if not sizeinbytes:     #    print("size cannot determined.")      #   return     #sinzeinbytes = buildrange(url,      datadict = {}     f = open(filename,'wb')      # split total num bytes ranges     #ranges = buildrange(url,int(sizeinbytes), splitby)     ranges = buildrange(url, splitby)     print(ranges)     def downloadchunk(idx, irange):         print(idx)         #time.sleep(1*idx)         #req = urllib.request.request(url)         #req.headers['range'] = 'bytes={}'.format(irange)         headers = urllib3._collections.httpheaderdict()         headers.add('range', 'bytes=' + str(irange))         data = pool.urlopen('get', url, headers=headers).data         #print(data)         #print("finish: " + str(irange))         offset = int(re.sub("(^.*?)-(.*?)$", "\\1", irange))         print(offset)        # print(irange)         f.seek(offset, 0)         #f.truncate(0)         #print(f.tell())         f.write(data)         #f.read()         #f.close()         countsize = countsize + offset           #defwrite("1.txt", req, re.sub("(^.*?)-", "\\1", str(irange)))      # create 1 downloading thread per chunk     downloaders = [         threading.thread(             target=downloadchunk,             args=(idx, irange),         )         idx,irange in enumerate(ranges)         ]       # start threads, let run in parallel, wait finish     th in downloaders:         th.start()         #th.isalive()     #for th in downloaders:         #th.join()         #print(th.join)     print(countsize)     #print('done: got {} chunks, total {} bytes'.format(     #    len(datadict), sum( (     ##        len(chunk) chunk in list(datadict.values())      #   ) )     #))      #print("--- %s seconds ---" % str(time.time() - start_time))  #    if os.path.exists(filename):  #       os.remove(filename)      #reassemble file in correct order     #with open(filename, 'wb') fh:     #    _idx,chunk in sorted(datadict.items()):     #        fh.write(chunk)     #stream_chunk = 16 * 1024     #with open(filename, 'wb') fp:     #  while true:     #      _idx,chunk in sorted(datadict.items()):             #fh.write(chunk)      #       chunking = chunk.read(stream_chunk)       #      if not chunk:        #         break         #    fp.write(chunking)      # print("finished writing file %s" % filename)     #print('file size {} bytes'.format(os.path.getsize(filename)))  if __name__ == '__main__':    if os.path.exists(filename):      os.remove(filename)    main(url, splitby=16) 

here code, please me fix it: version 1.0, ignore it, version 2.0 above:

import os, requests import threading import urllib3 import urllib.request, urllib.error, urllib.parse import time import re  pool = urllib3.poolmanager(maxsize=10) url = "https://raw.githubusercontent.com/langpavel/tampermonkey/master/src/emulation.js" filename = "1.js" #if os.path.exists(filename):  #   os.remove(filename)  def defwrite(filename,data,offset):   f = open(filename,'wb')   f.seek(offset)   f.write(data)   f.close()  def buildrange(value, numsplits):     lst = []     in range(numsplits):         if == range(numsplits):             lst.append('%s-%s' % (int(round(1 + * value/(numsplits*1.0),0)), int(value - round(1 + * value/(numsplits*1.0) + value/(numsplits*1.0)-1, 0))))         if == 0:             lst.append('%s-%s' % (i, int(round(1 + * value/(numsplits*1.0) + value/(numsplits*1.0)-1, 0))))         else:             lst.append('%s-%s' % (int(round(1 + * value/(numsplits*1.0),0)), int(round(1 + * value/(numsplits*1.0) + value/(numsplits*1.0)-1, 0))))     return lst  def main(url=none, splitby=3):     global filename, pool     start_time = time.time()     if not url:         print("please enter url begin download.")         return      #filename = "1.jpg"     sizeinbytes = requests.head(url, headers={'accept-encoding': 'identity'}).headers.get('content-length', none)     print("%s bytes download." % sizeinbytes)     if not sizeinbytes:         print("size cannot determined.")         return      datadict = {}      # split total num bytes ranges     ranges = buildrange(int(sizeinbytes), splitby)      def downloadchunk(idx, irange):         print(idx)         #req = urllib.request.request(url)         #req.headers['range'] = 'bytes={}'.format(irange)         headers = urllib3._collections.httpheaderdict()         headers.add('range', 'bytes=' + str(irange))         data = pool.urlopen('get', url, headers=headers).data         print(data)         print("finish: " + str(irange))         offset = int(re.sub("(^.*?)-(.*?)$", "\\1", irange))         #print(offset)        # print(irange)         f = open(filename,'wb')         f.seek(offset)         #f.truncate(0)         #print(f.tell())         f.write(data)         #f.read()         #f.close()            #defwrite("1.txt", req, re.sub("(^.*?)-", "\\1", str(irange)))      # create 1 downloading thread per chunk     downloaders = [         threading.thread(             target=downloadchunk,             args=(idx, irange),         )         idx,irange in enumerate(ranges)         ]      # start threads, let run in parallel, wait finish     th in downloaders:         th.start()         #th.isalive()     #for th in downloaders:         #th.join()         #print(th.join)      #print('done: got {} chunks, total {} bytes'.format(     #    len(datadict), sum( (     ##        len(chunk) chunk in list(datadict.values())      #   ) )     #))      #print("--- %s seconds ---" % str(time.time() - start_time))  #    if os.path.exists(filename):  #       os.remove(filename)      #reassemble file in correct order     #with open(filename, 'wb') fh:     #    _idx,chunk in sorted(datadict.items()):     #        fh.write(chunk)     #stream_chunk = 16 * 1024     #with open(filename, 'wb') fp:     #  while true:     #      _idx,chunk in sorted(datadict.items()):             #fh.write(chunk)      #       chunking = chunk.read(stream_chunk)       #      if not chunk:        #         break         #    fp.write(chunking)      # print("finished writing file %s" % filename)     #print('file size {} bytes'.format(os.path.getsize(filename)))  if __name__ == '__main__':     main(url, splitby=3) 

you use 3 threads target function downloadchunk, open file 3 times using wb overwrites 1/3 of content. call seek no apparent reason. if wanted append file open using a each time or open file once outside functions. trying seek using empty file , write null bytes come from.

if want open file reading , writing can seek line buffering:

 open("whatever.file", "r+b",buffering=1) f 

then use file write to, don't keep opening in function , overwriting, file must exist.


Comments

Popular posts from this blog

angularjs - ADAL JS Angular- WebAPI add a new role claim to the token -

php - CakePHP HttpSockets send array of paramms -

node.js - Using Node without global install -