競馬が好きなので予想アプリを作ってみました!!
pythonでスクレイピングをして作りました。
keiba.py
import streamlit as st
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import time
def get_url(kai,day,l,get_num):
tg_year = 2022
df_race_li=[]
url_ids = []
for year in range(tg_year-1, tg_year+1):
for z in range(1, 6):#開催、6まで十分だけど保険で7
for y in range(1, 3):#日、12まで十分だけど保険で14
url_id = f"{year}{l}{z:0>2}{y:0>2}"
url_ids.append(url_id)
tg_id = f"{tg_year}{l}{kai:0>2}{day:0>2}"
tg_ids = [u for u in url_ids if u < tg_id]
for race_id in reversed(tg_ids[-get_num:]):
for x in range(1, 13):
url=f"https://race.netkeiba.com/race/shutuba.html?race_id={race_id}{x:0>2}"
print(url)
r=requests.get(url)
time.sleep(0.2)#サーバーの負荷を減らすため0.2秒待機する
soup = BeautifulSoup(r.content, "html.parser")
race=soup.find_all("div",class_="RaceList_Item02")
if len(race)==0:
print("break")
break
racename=race[0].find_all("div",class_="RaceName")[0].text.strip()
racedata=race[0].find_all("div",class_="RaceData01")[0].text.strip()
racedata2=race[0].find_all("div",class_="RaceData02")[0].text.strip()
race=soup.find_all("tr",class_="HorseList")
uma_li=[]
hito_li=[]
for i in range(0,len(race)):
uma=race[i].find_all("td",class_="HorseInfo")[0].text.strip()
uma_li.append(uma)
hito=race[i].find_all("td",class_="Jockey")[0].text.strip()
hito_li.append(hito)
df_race=pd.DataFrame({"uma":uma_li,"hito":hito_li})
df_race["racename"]=racename
df_race["racedata1"]=racedata
df_race["racedata2"]=racedata2
df_race["racecourse"]=race_id[4:6]
df_race["racenumber"]=race_id[6:8]
df_race["racedate"]=race_id[8:10]
df_race["racenumber2"]=x
df_race = df_race.reset_index()
df_race_li.append(df_race)
df_race_all = pd.concat(df_race_li)
return df_race_all
url = "https://www.jra.go.jp"
top_r = requests.get(url)
top_soup = BeautifulSoup(top_r.content, "html.parser")
link_dict ={}
for i in range(1, 10):
top_select = top_soup.select(f"#kaisai_area > div.kaisai_block > div:nth-child({i}) > div.main > div > div.race > p > a")
if len(top_select)==0:
break
race_name = top_select[0].text
link = top_select[0].get('href')
link = f"https://www.jra.go.jp/{link}"
link_dict[race_name] = link
st.title("競馬")
st.markdown("### 今週の注目レース")
for name, link in link_dict.items():
if st.button(f'{name}予想'):
url_race = link
if "syutsuba.html" in url_race:
syutsuba = requests.get(url_race)
syutsuba_soup = BeautifulSoup(syutsuba.content, "html.parser")
syutsuba_result = syutsuba_soup.select("#syutsuba")
racejyou = syutsuba_result[0].find_all("div",class_="cell date")[0].text.strip().split(" ")[1]
cap=syutsuba_result[0].find_all("div",class_="cell course")[0].text.strip()
uma_td=syutsuba_result[0].find_all("td",class_="horse")
hito_td=syutsuba_result[0].find_all("td",class_="jockey")
uma_li=[]
hito_li=[]
for i in range(0,len(uma_td)):
uma=uma_td[i].text.strip().split()[0]
uma_li.append(uma)
hito=" ".join(re.split("\n|\.| ", hito_td[i].text.strip())[-2:])
hito_li.append(hito)
df_race=pd.DataFrame({"uma":uma_li,"hito":hito_li})
kai = re.findall("\d*回", racejyou)[0].replace("回", "")
day = re.findall("\d*日", racejyou)[0].replace("日", "")
race_jyou=re.sub("\d*日|\d*回", "", racejyou)
race_jyou_dict = {
"札幌":"01",
"函館":"02",
"福島":"03",
"新潟":"04",
"東京":"05",
"中山":"06",
"中京":"07",
"京都":"08",
"阪神":"09",
"小倉":"10"
}
l=race_jyou_dict[race_jyou]
st.table(df_race)
st.markdown(racejyou)
st.markdown(cap)
st.markdown("### 過去の成績検索")
s = cap.replace("コース:", "").replace(",", "")
mm = int(re.search("\d*",s).group())
shiba_dato = re.search("(.*)",s).group()[1]
shiba_dato= "ダ" if shiba_dato =="ダート" else shiba_dato
df_race_all=get_url(kai, day, l, 3)
tg_race=df_race_all[df_race_all["racedata1"].str.contains(f".*{shiba_dato}.*{mm}")]
d=tg_race[tg_race["index"] <=2][["index","uma","hito","racename", "racenumber", "racedate" , "racenumber2"]]
st.subheader('過去の成績')
st.table(d)
else:
st.markdown("出馬情報がありません。")
実行方法
streamlit run keiba.py
参考
https://introduction1.com/2021/09/30/python-scrp1/
https://www.jra.go.jp
https://www.netkeiba.com