#全ソース
Meigaraget.py
import pandas as pd
import sqlite3
import time
import argparse
from bs4 import BeautifulSoup
from datetime import datetime, date, timedelta
from selenium import webdriver
#DBから特定の銘柄データを取得
def Get_data_from_DB(conn, codes):
return pd.read_sql_query("SELECT * FROM TMP_BrandsData_R WHERE `BrandCode` = ?", conn, params=(codes,), index_col='DataDate')
#移動平均を取得
def avarage_get(conn, code):
df = Get_data_from_DB(conn, code)
moave5 = df['ClosePriceAdjust'].rolling(5).mean().tolist()
moave14 = df['ClosePriceAdjust'].rolling(14).mean().tolist()
moave25 = df['ClosePriceAdjust'].rolling(25).mean().tolist()
i = 0
for index, row in df.iterrows():
conn.execute("INSERT OR REPLACE INTO HoldBrand_MovingPrice"
"(BrandCode, DataDate, MovingAverage5, MovingAverage14, MovingAverage25, RegTime, UpdTime)"
"VALUES(?, ?, ?, ?, ?, datetime('now', 'localtime'),datetime('now', 'localtime'))",
(row.BrandCode, index, round(moave5[i], 2), round(moave14[i], 2), round(moave25[i], 2)))
i += 1
conn.commit()
#HTMLから株価を取得
def Screiping(code, date):
soup = BeautifulSoup(get_html(code, date), 'html.parser')
# 対象銘柄が無かった場合はスキップ
if len(soup.find_all("table", {"class": "stock_table stock_data_table"})) == 0:
exit()
soup.find_all("thead")[0].find("tr").extract()
for data in soup.find_all("tr"):
d = data.find_all("td")[0].string
o = data.find_all("td")[1].string
h = data.find_all("td")[2].string
l = data.find_all("td")[3].string
c = data.find_all("td")[4].string
p = data.find_all("td")[5].string
x = data.find_all("td")[6].string
yield code, d, o, h, l, c, p, x
def year_exec(year, code, generate_func, year_lis):
for yn in range(1983, year+1):
#既にDBに過去分の日足が存在するならアクセスしない
if yn in year_lis : continue
if(yn == year+1): break
for d in generate_func(code, yn):
yield d
def peparation_connection(db_file_name,code):
now_year = datetime.now().year
yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
conn = sqlite3.connect(db_file_name)
trgt_lis = exist_year(pd.read_sql_query('SELECT DataDate FROM TMP_BrandsData_R Where BrandCode = ?', conn, params=[code]).values.tolist(), now_year)
#昨日のデータを取得済みなら処理を中断
if yesterday in trgt_lis[1]:
print('最新{}のデータは取得済み'.format(yesterday))
return None
with conn:
sql = "INSERT OR REPLACE INTO TMP_BrandsData_R(BrandCode, DataDate, OpenPrice, HighPrice, LowPrice, ClosePrice, Yield, ClosePriceAdjust, RegTime, UpdTime)" \
"VALUES(?,?,?,?,?,?,?,?,datetime('now', 'localtime'),datetime('now', 'localtime'))"
conn.executemany(sql, year_exec(now_year, code, Screiping, trgt_lis[0]))
avarage_get(conn, code)
def get_html(code, date):
driver.get('https://kabuoji3.com/stock/{}/{}/'.format(code, date))
time.sleep(2.5)
html = driver.page_source
return html
def exist_year(year_list, trgt_yaer):
trgt_year = []
now_year = []
for i in year_list:
nun = datetime.strptime(i[0], '%Y-%m-%d').year
if not nun in trgt_year and not nun == trgt_yaer:
trgt_year.append(nun)
#処理年の日足
if nun == trgt_yaer:
now_year.append(i[0])
return trgt_year, now_year
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--meigara_code', type=int)
g_arg = parser.parse_args()
if hasattr(g_arg, 'meigara_code'):
peparation_connection('stock_col.db', g_arg.meigara_code)
else:
print('err')
if __name__ == '__main__':
driver = webdriver.Chrome()
main()
driver.quit()
#以下、適当すぎる解説
Yahoo!ファイナンスはスクレイピングが禁止なので、以下のサイトからHTMLソースを直にぶっこ抜いてBeautifulSoupで丁寧に分解していく。(お世話になっております。)
株式投資メモ
https://kabuoji3.com/
コマンドで--meigara_codeに銘柄コードを指定すれば走るのでお試しあれ。
…分割に対応したコード忘れてた^^;今度書きます^^;