[更新] これのがいいです
はじめに
以下のサイトを見つけて,いろいろダウンロードするコードを書いたので共有。飲酒コードなので悪しからず。futuresのumのdailyのBTCUSDTでしか動作確認してません。各自でなんとかしてください。
コード
メイン
main.py
from get_data.binance.download import Download
def main():
download = Download()
download.print_info()
# symbol = input("symbol = ")
# params = {
# 'base_dir': "data/binance_f/",
# 'mode': 0,
# 'futures_mode': 1,
# 'period': 0,
# 'method': 5,
# 'symbol': symbol,
# 'str_start_date': '2019-01-01',
# 'str_end_date': '2022-03-15',
# }
# r = download.download_with_params(params)
for i in range(5):
params = {
'base_dir': "data/binance_f/",
'mode': 0,
'futures_mode': 1,
'period': 0,
'method': i,
'duration': 0,
'symbol': 'BTCUSDT',
'str_start_date': '2019-01-01',
'str_end_date': '2022-03-15',
}
r = download.download_with_params(params)
print(r)
本命
download.py
from datetime import timedelta
from datetime import timezone
import dateutil.parser
import urllib.request
import os
from rich import print as rprint
from util.file_util import prepare_output_dir
from util.str_util import granularity_from_duration
class Download():
def __init__(self):
self.base_url = "https://data.binance.vision/?prefix=data/"
self.mode_list = ['futures', 'spot']
self.futures_mode_list = ['cm', 'um']
self.period_list = ['daily', 'monthly']
self.duration_list = ['1m', '3m', '5m', '15m', '30m', '1h', '2h', '4h', '6h', '8h', '12h', '1d', '3d', '1w', '1mo']
self.method_list = ['indexPriceKlines', 'klines', 'markPriceKlines', 'premiumIndexKlines', 'aggTrades', 'trades']
def print_info(self):
rprint("base_url")
rprint(self.base_url)
rprint("mode_list")
rprint(self.mode_list)
rprint("futures_mode_list")
rprint(self.futures_mode_list)
rprint("period_list")
rprint(self.period_list)
rprint("duration_list")
rprint(self.duration_list)
rprint("method_list")
rprint(self.method_list)
def download(self, url, filepath):
try:
r = urllib.request.urlretrieve(url, filepath)
return url
except Exception as e:
rprint(url)
rprint(e)
return -1
def download_with_params(self, params):
"""
params:
base_dir: str
mode: int
futures_mode: int
period: int
method: int
duration: int
symbol: str
str_start_date: str, 2020-01-01
str_end_date: str, 2020-01-01
"""
base_dir = params['base_dir']
mode = self.mode_list[params['mode']]
futures_mode = self.futures_mode_list[params['futures_mode']]
period = self.period_list[params['period']]
method = self.method_list[params['method']]
symbol = params['symbol']
current_date = dateutil.parser.parse(params['str_end_date']).replace(tzinfo=timezone.utc)
start_date = dateutil.parser.parse(params['str_start_date']).replace(tzinfo=timezone.utc)
if mode == 'futures':
url_suffix = f"futures/{futures_mode}/"
else:
url_suffix = f"spot/"
if method == 'aggTrades' or method == 'trades':
url_suffix = url_suffix + f"{period}/{method}/{symbol}/"
save_dir = base_dir + f"{method}/{symbol}/"
filename_prefix = f"{symbol}-{method}"
else:
duration = self.duration_list[params['duration']]
granularity = granularity_from_duration(duration)
url_suffix = url_suffix + f"{period}/{method}/{symbol}/{duration}/"
save_dir = base_dir + f"{method}/{symbol}/{granularity}/"
filename_prefix = f"{symbol}-{duration}"
prepare_output_dir(save_dir)
while True:
filename = filename_prefix + f"-{current_date.strftime('%Y-%m-%d')}.zip"
filepath = save_dir + filename
if not os.path.exists(filepath):
try:
r = self.download(url=self.base_url + url_suffix + filename, filepath=filepath)
rprint(r)
except Exception as e:
rprint(e)
return -1
else:
rprint(f'action=download warning={filepath} is already exists')
if current_date <= start_date:
break
current_date -= timedelta(days=1)
return "done"
その他
prepare_output_dir
import os
def prepare_output_dir(fdir, verbose=False):
if os.path.exists(fdir):
if verbose:
rprint(f'dir exists: {fdir}')
return True
else:
if verbose:
rprint(f'make fdir: {fdir}')
os.makedirs(fdir)
return True
granularity_from_duration
import re
def granularity_from_duration(duration):
duration_num = re.search(r'\d+', duration).group()
duration_t = duration.replace(duration_num, '')
return (duration_t + duration_num).lower()