1.文件的大小:可以从reponse header中提取,如“Content-Length:911”表示大小是911字节
2.任务拆分:指定各个线程下载的文件的哪一块,可以通过request header中添加“Range: bytes=300-400”(表示下载300~400byte的内容),注意可以请求的文件的range是[0, size-1]字节的。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | #!/usr/bin/python # -*- coding: utf-8 -*- # filename: paxel.py # FROM: http://fayaa.com/code/view/58/full/ # Jay modified it a little and save for further potential usage. '''It is a multi-thread downloading tool It was developed following axel. Author: volans E-mail: volansw [at] gmail.com ''' import sys import os import time import urllib from threading import Thread # in case you want to use http_proxy local_proxies = {'http': ''} class AxelPython(Thread, urllib.FancyURLopener): '''Multi-thread downloading class. run() is a vitural method of Thread. ''' def __init__(self, threadname, url, filename, ranges=0, proxies={}): Thread.__init__(self, name=threadname) urllib.FancyURLopener.__init__(self, proxies) self.name = threadname self.url = url self.filename = filename self.ranges = ranges self.downloaded = 0 def run(self): '''vertual function in Thread''' try: self.downloaded = os.path.getsize(self.filename) except OSError: #print 'never downloaded' self.downloaded = 0 # rebuild start poind self.startpoint = self.ranges[0] + self.downloaded # This part is completed if self.startpoint >= self.ranges[1]: print 'Part %s has been downloaded over.' % self.filename return self.oneTimeSize = 16384 # 16kByte/time print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1]) self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1])) self.urlhandle = self.open(self.url) data = self.urlhandle.read(self.oneTimeSize) while data: filehandle = open(self.filename, 'ab+') filehandle.write(data) filehandle.close() self.downloaded += len(data) #print "%s" % (self.name) #progress = u'\r...' data = self.urlhandle.read(self.oneTimeSize) def GetUrlFileSize(url, proxies={}): urlHandler = urllib.urlopen(url, proxies=proxies) headers = urlHandler.info().headers length = 0 for header in headers: if header.find('Length') != -1: length = header.split(':')[-1].strip() length = int(length) return length def SpliteBlocks(totalsize, blocknumber): blocksize = totalsize / blocknumber ranges = [] for i in range(0, blocknumber - 1): ranges.append((i * blocksize, i * blocksize + blocksize - 1)) ranges.append((blocksize * (blocknumber - 1), totalsize - 1)) return ranges def islive(tasks): for task in tasks: if task.isAlive(): return True return False def paxel(url, output, blocks=6, proxies=local_proxies): ''' paxel ''' size = GetUrlFileSize(url, proxies) ranges = SpliteBlocks(size, blocks) threadname = ["thread_%d" % i for i in range(0, blocks)] filename = ["tmpfile_%d" % i for i in range(0, blocks)] tasks = [] for i in range(0, blocks): task = AxelPython(threadname[i], url, filename[i], ranges[i]) task.setDaemon(True) task.start() tasks.append(task) time.sleep(2) while islive(tasks): downloaded = sum([task.downloaded for task in tasks]) process = downloaded / float(size) * 100 show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process) sys.stdout.write(show) sys.stdout.flush() time.sleep(0.5) filehandle = open(output, 'wb+') for i in filename: f = open(i, 'rb') filehandle.write(f.read()) f.close() try: os.remove(i) pass except: pass filehandle.close() if __name__ == '__main__': url = 'http://dldir1.qq.com/qqfile/QQforMac/QQ_V3.1.1.dmg' output = 'download.file' paxel(url, output, blocks=4, proxies={}) |
Original article: Python多线程下载文件
©2015 笑遍世界. All Rights Reserved.