Edited at

python で 分割ダウンロード

More than 1 year has passed since last update.


分割ダウンロード

単純にダウンロードするととっても遅い(#^ω^)

そこでhttpリクエストヘッダーにRangeを付与し、データの何バイト目から何バイト目まで頂戴って送る

このような感じ

Range: bytes=0-999

詳しくはRFC 7233 日本語訳


サンプルソースコード

書きなぐりのコードなので各位リファクタリング

# /bin/env python

# -*- coding: utf-8 -*-
import requests
import urllib2
from glob import glob
import os

#分割したファイルを結合
def combine(file_type, file_name="out"):
with open("%s.%s" % (file_name, file_type), "wb") as file:
for tmp_file in glob("*.tmp"):
ftmp = open(tmp_file, "rb")
file.write(ftmp.read())
ftmp.close()
os.remove(tmp_file)

#startからendまでダウンロード
def download(url, num, start, end):
req = urllib2.Request(url)
req.headers['Range'] = 'bytes=%s-%s' % (start, end)
res = urllib2.urlopen(req)
with open("%s.tmp" % num, "wb") as file:
file.write(res.read())

#分割ダウンロード
def split_download(url):
total_length = int(requests.head(url).headers.get("content-length", None))
file_type = requests.head(url).headers.get("content-type", None).split("/")[-1]
total_count = 0
last= 0
split_num = 10
for i, val in enumerate([(total_length + i) // split_num for i in range(split_num)]):
if i == 0:
last = val
download(url, i, 0, last)
total_count = val
else:
last = total_count + 1
total_count += val
download(url, i, last, total_count)
#結合
combine(file_type)

def main(url):
split_download(url)

if __name__ == '__main__':
main(raw_input())


ダウンロード時間の比較

実験としてYoutubeの動画をダウンロードしてみる

URL:【MMD艦これ】プリンツ・オイゲンでLamb.【Prinz Eugen】「4K」



Size:61,531,848 バイト

普通にダウンロード

import time

import requests
import urllib2

def download(url):
req = urllib2.Request(url)
res = urllib2.urlopen(req)
with open("out.mp4", "wb") as file:
file.write(res.read())

def main():
url = "http://r3---sn-p5qlsnz6.googlevideo.com/videoplayback?itag=22&ratebypass=yes&ip=159.253.144.86&upn=_QruIEPRuNQ&ipbits=0&pl=24&ei=F9JsWOr2M8Kg1gLPhLnIDQ&signature=928BE55DE4B8FCC2D0F92335978AE782BF18CE23.65FB2598D99464A91224B9F6F573EA0C18DDD420&source=youtube&mv=m&mt=1483526543&ms=au&mn=sn-p5qlsnz6&mm=31&id=o-AL7N7DYCdMwVgG-yDRHArP9OQXNWr-tGtHGfLUWujYuD&initcwndbps=4970000&key=yt6&mime=video%2Fmp4&sparams=dur%2Cei%2Cid%2Cinitcwndbps%2Cip%2Cipbits%2Citag%2Clmt%2Cmime%2Cmm%2Cmn%2Cms%2Cmv%2Cnh%2Cpl%2Cratebypass%2Csource%2Cupn%2Cexpire&expire=1483548279&nh=IgpwcjAzLmlhZDA3KgkxMjcuMC4wLjE&lmt=1471302962440452&dur=240.814&title=%E3%80%90MMD%E8%89%A6%E3%81%93%E3%82%8C%E3%80%91%E3%83%97%E3%83%AA%E3%83%B3%E3%83%84%E3%83%BB%E3%82%AA%E3%82%A4%E3%82%B2%E3%83%B3%E3%81%A7Lamb.%E3%80%90Prinz+Eugen%E3%80%91%E3%80%8C%EF%BC%94%EF%BC%AB%E3%80%8D"
start = time.time()
download(url)
elapsed_time = time.time() - start
print ("elapsed_time:{0}".format(elapsed_time)) + "[sec]"

if __name__ == '__main__':
main()

分割ダウンロード


import requests
import urllib2
from glob import glob
import os
import time

def download(url, num, start, end):
req = urllib2.Request(url)
req.headers['Range'] = 'bytes=%s-%s' % (start, end)
res = urllib2.urlopen(req)
with open("%s.tmp" % num, "wb") as file:
file.write(res.read())

def split_download(url):
total_length = int(requests.head(url).headers.get("content-length", None))
file_type = requests.head(url).headers.get("content-type", None).split("/")[-1]
total_count = 0
last= 0
split_num = 10
for i, val in enumerate([(total_length + i) // split_num for i in range(split_num)]):
if i == 0:
last = val
download(url, i, 0, last)
total_count = val
else:
last = total_count + 1
total_count += val
download(url, i, last, total_count)

combine(file_type)

def combine(file_type, file_name="out"):
with open("%s.%s" % (file_name, file_type), "wb") as file:
for tmp_file in glob("*.tmp"):
ftmp = open(tmp_file, "rb")
file.write(ftmp.read())
ftmp.close()
os.remove(tmp_file)

def main():
url = "http://r3---sn-p5qlsnz6.googlevideo.com/videoplayback?itag=22&ratebypass=yes&ip=159.253.144.86&upn=_QruIEPRuNQ&ipbits=0&pl=24&ei=F9JsWOr2M8Kg1gLPhLnIDQ&signature=928BE55DE4B8FCC2D0F92335978AE782BF18CE23.65FB2598D99464A91224B9F6F573EA0C18DDD420&source=youtube&mv=m&mt=1483526543&ms=au&mn=sn-p5qlsnz6&mm=31&id=o-AL7N7DYCdMwVgG-yDRHArP9OQXNWr-tGtHGfLUWujYuD&initcwndbps=4970000&key=yt6&mime=video%2Fmp4&sparams=dur%2Cei%2Cid%2Cinitcwndbps%2Cip%2Cipbits%2Citag%2Clmt%2Cmime%2Cmm%2Cmn%2Cms%2Cmv%2Cnh%2Cpl%2Cratebypass%2Csource%2Cupn%2Cexpire&expire=1483548279&nh=IgpwcjAzLmlhZDA3KgkxMjcuMC4wLjE&lmt=1471302962440452&dur=240.814&title=%E3%80%90MMD%E8%89%A6%E3%81%93%E3%82%8C%E3%80%91%E3%83%97%E3%83%AA%E3%83%B3%E3%83%84%E3%83%BB%E3%82%AA%E3%82%A4%E3%82%B2%E3%83%B3%E3%81%A7Lamb.%E3%80%90Prinz+Eugen%E3%80%91%E3%80%8C%EF%BC%94%EF%BC%AB%E3%80%8D"
start = time.time()
split_download(url)
elapsed_time = time.time() - start
print ("elapsed_time:{0}".format(elapsed_time)) + "[sec]"

if __name__ == '__main__':
main()

結果

普通にダウンロード
分割ダウンロード
スレッド分割ダウンロード

11.1779999733[sec]
6.3789999485[sec]
5.75699996948[sec]

追記

スレッド分割ダウンロードは単純にthreadingを用いるだけ

thread = threading.Thread(target=download, args=(url, i, 0, last))

thread.start()
threads.append(thread)

一度しか測定してません、環境によって測定時間が異なります。

BNRスピードテストの結果です

------ BNRスピードテスト (ダウンロード速度) ------

測定サイト: http://www.musen-lan.com/speed/ Ver5.6001
測定日時: 2017/01/05 11:14:20
回線/ISP/地域:
--------------------------------------------------
1.NTTPC(WebARENA)1: 56.64Mbps (7.08MB/sec)
2.NTTPC(WebARENA)2: 90.39Mbps (11.30MB/sec)
推定転送速度: 90.39Mbps (11.30MB/sec)


参考