Python
python2.7

python で 分割ダウンロード

More than 1 year has passed since last update.

分割ダウンロード

単純にダウンロードするととっても遅い(#^ω^)
そこでhttpリクエストヘッダーにRangeを付与し、データの何バイト目から何バイト目まで頂戴って送る

このような感じ
Range: bytes=0-999

詳しくはRFC 7233 日本語訳

サンプルソースコード

書きなぐりのコードなので各位リファクタリング

# /bin/env python
# -*- coding: utf-8 -*-
import requests
import urllib2
from glob import glob
import os

#分割したファイルを結合
def combine(file_type, file_name="out"):
    with open("%s.%s" % (file_name, file_type), "wb") as file:
        for tmp_file in glob("*.tmp"):
            ftmp = open(tmp_file, "rb")
            file.write(ftmp.read())
            ftmp.close()
            os.remove(tmp_file)

#startからendまでダウンロード
def download(url, num, start, end):
    req = urllib2.Request(url)
    req.headers['Range'] = 'bytes=%s-%s' % (start, end)
    res = urllib2.urlopen(req)
    with open("%s.tmp" % num, "wb") as file:
        file.write(res.read())

#分割ダウンロード
def split_download(url):
    total_length = int(requests.head(url).headers.get("content-length", None))
    file_type = requests.head(url).headers.get("content-type", None).split("/")[-1]
    total_count = 0
    last= 0
    split_num = 10
    for i, val in enumerate([(total_length + i) // split_num for i in range(split_num)]):
        if i == 0:
            last = val
            download(url, i, 0, last)
            total_count = val
        else:
            last = total_count + 1
            total_count += val
            download(url, i, last, total_count)
    #結合
    combine(file_type)

def main(url):
    split_download(url)

if __name__ == '__main__':
    main(raw_input())

ダウンロード時間の比較

実験としてYoutubeの動画をダウンロードしてみる

URL:【MMD艦これ】プリンツ・オイゲンでLamb.【Prinz Eugen】「4K」

Size:61,531,848 バイト

普通にダウンロード

import time
import requests
import urllib2

def download(url):
    req = urllib2.Request(url)
    res = urllib2.urlopen(req)
    with open("out.mp4", "wb") as file:
        file.write(res.read())

def main():
    url = "http://r3---sn-p5qlsnz6.googlevideo.com/videoplayback?itag=22&ratebypass=yes&ip=159.253.144.86&upn=_QruIEPRuNQ&ipbits=0&pl=24&ei=F9JsWOr2M8Kg1gLPhLnIDQ&signature=928BE55DE4B8FCC2D0F92335978AE782BF18CE23.65FB2598D99464A91224B9F6F573EA0C18DDD420&source=youtube&mv=m&mt=1483526543&ms=au&mn=sn-p5qlsnz6&mm=31&id=o-AL7N7DYCdMwVgG-yDRHArP9OQXNWr-tGtHGfLUWujYuD&initcwndbps=4970000&key=yt6&mime=video%2Fmp4&sparams=dur%2Cei%2Cid%2Cinitcwndbps%2Cip%2Cipbits%2Citag%2Clmt%2Cmime%2Cmm%2Cmn%2Cms%2Cmv%2Cnh%2Cpl%2Cratebypass%2Csource%2Cupn%2Cexpire&expire=1483548279&nh=IgpwcjAzLmlhZDA3KgkxMjcuMC4wLjE&lmt=1471302962440452&dur=240.814&title=%E3%80%90MMD%E8%89%A6%E3%81%93%E3%82%8C%E3%80%91%E3%83%97%E3%83%AA%E3%83%B3%E3%83%84%E3%83%BB%E3%82%AA%E3%82%A4%E3%82%B2%E3%83%B3%E3%81%A7Lamb.%E3%80%90Prinz+Eugen%E3%80%91%E3%80%8C%EF%BC%94%EF%BC%AB%E3%80%8D"
    start = time.time()
    download(url)
    elapsed_time = time.time() - start
    print ("elapsed_time:{0}".format(elapsed_time)) + "[sec]"

if __name__ == '__main__':
    main()

分割ダウンロード

import requests
import urllib2
from glob import glob
import os
import time

def download(url, num, start, end):
    req = urllib2.Request(url)
    req.headers['Range'] = 'bytes=%s-%s' % (start, end)
    res = urllib2.urlopen(req)
    with open("%s.tmp" % num, "wb") as file:
        file.write(res.read())

def split_download(url):
    total_length = int(requests.head(url).headers.get("content-length", None))
    file_type = requests.head(url).headers.get("content-type", None).split("/")[-1]
    total_count = 0
    last= 0
    split_num = 10
    for i, val in enumerate([(total_length + i) // split_num for i in range(split_num)]):
        if i == 0:
            last = val
            download(url, i, 0, last)
            total_count = val
        else:
            last = total_count + 1
            total_count += val
            download(url, i, last, total_count)

    combine(file_type)

def combine(file_type, file_name="out"):
    with open("%s.%s" % (file_name, file_type), "wb") as file:
        for tmp_file in glob("*.tmp"):
            ftmp = open(tmp_file, "rb")
            file.write(ftmp.read())
            ftmp.close()
            os.remove(tmp_file)

def main():
    url = "http://r3---sn-p5qlsnz6.googlevideo.com/videoplayback?itag=22&ratebypass=yes&ip=159.253.144.86&upn=_QruIEPRuNQ&ipbits=0&pl=24&ei=F9JsWOr2M8Kg1gLPhLnIDQ&signature=928BE55DE4B8FCC2D0F92335978AE782BF18CE23.65FB2598D99464A91224B9F6F573EA0C18DDD420&source=youtube&mv=m&mt=1483526543&ms=au&mn=sn-p5qlsnz6&mm=31&id=o-AL7N7DYCdMwVgG-yDRHArP9OQXNWr-tGtHGfLUWujYuD&initcwndbps=4970000&key=yt6&mime=video%2Fmp4&sparams=dur%2Cei%2Cid%2Cinitcwndbps%2Cip%2Cipbits%2Citag%2Clmt%2Cmime%2Cmm%2Cmn%2Cms%2Cmv%2Cnh%2Cpl%2Cratebypass%2Csource%2Cupn%2Cexpire&expire=1483548279&nh=IgpwcjAzLmlhZDA3KgkxMjcuMC4wLjE&lmt=1471302962440452&dur=240.814&title=%E3%80%90MMD%E8%89%A6%E3%81%93%E3%82%8C%E3%80%91%E3%83%97%E3%83%AA%E3%83%B3%E3%83%84%E3%83%BB%E3%82%AA%E3%82%A4%E3%82%B2%E3%83%B3%E3%81%A7Lamb.%E3%80%90Prinz+Eugen%E3%80%91%E3%80%8C%EF%BC%94%EF%BC%AB%E3%80%8D"
    start = time.time()
    split_download(url)
    elapsed_time = time.time() - start
    print ("elapsed_time:{0}".format(elapsed_time)) + "[sec]"

if __name__ == '__main__':
    main()

結果

普通にダウンロード 分割ダウンロード スレッド分割ダウンロード
11.1779999733[sec] 6.3789999485[sec] 5.75699996948[sec]

追記
スレッド分割ダウンロードは単純にthreadingを用いるだけ

thread = threading.Thread(target=download, args=(url, i, 0, last))
thread.start()
threads.append(thread)

一度しか測定してません、環境によって測定時間が異なります。
BNRスピードテストの結果です

------ BNRスピードテスト (ダウンロード速度) ------
測定サイト: http://www.musen-lan.com/speed/ Ver5.6001
測定日時: 2017/01/05 11:14:20
回線/ISP/地域: 
--------------------------------------------------
1.NTTPC(WebARENA)1: 56.64Mbps (7.08MB/sec)
2.NTTPC(WebARENA)2: 90.39Mbps (11.30MB/sec)
推定転送速度: 90.39Mbps (11.30MB/sec)

参考