LoginSignup
2
1

More than 1 year has passed since last update.

競馬予想アプリ

Posted at

競馬が好きなので予想アプリを作ってみました!!

pythonでスクレイピングをして作りました。

keiba.py
import streamlit as st
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import time


def get_url(kai,day,l,get_num):
    tg_year = 2022
    df_race_li=[]
    url_ids = []

    for year in range(tg_year-1, tg_year+1):
        for z in range(1, 6):#開催、6まで十分だけど保険で7
            for y in range(1, 3):#日、12まで十分だけど保険で14
                    url_id = f"{year}{l}{z:0>2}{y:0>2}"
                    url_ids.append(url_id)

    tg_id = f"{tg_year}{l}{kai:0>2}{day:0>2}"
    tg_ids = [u for u in url_ids if u < tg_id]

    for race_id in reversed(tg_ids[-get_num:]):
        for x in range(1, 13):
            url=f"https://race.netkeiba.com/race/shutuba.html?race_id={race_id}{x:0>2}"
            print(url)
            r=requests.get(url)
            time.sleep(0.2)#サーバーの負荷を減らすため0.2秒待機する
            soup = BeautifulSoup(r.content, "html.parser")   
            race=soup.find_all("div",class_="RaceList_Item02")
            if len(race)==0:
                print("break")
                break
            racename=race[0].find_all("div",class_="RaceName")[0].text.strip()
            racedata=race[0].find_all("div",class_="RaceData01")[0].text.strip()
            racedata2=race[0].find_all("div",class_="RaceData02")[0].text.strip()
            race=soup.find_all("tr",class_="HorseList")
            uma_li=[]
            hito_li=[]
            for i in range(0,len(race)):
                uma=race[i].find_all("td",class_="HorseInfo")[0].text.strip()
                uma_li.append(uma)
                hito=race[i].find_all("td",class_="Jockey")[0].text.strip()
                hito_li.append(hito)
            df_race=pd.DataFrame({"uma":uma_li,"hito":hito_li})
            df_race["racename"]=racename
            df_race["racedata1"]=racedata
            df_race["racedata2"]=racedata2  
            df_race["racecourse"]=race_id[4:6]
            df_race["racenumber"]=race_id[6:8]
            df_race["racedate"]=race_id[8:10]   
            df_race["racenumber2"]=x
            df_race = df_race.reset_index()
            df_race_li.append(df_race)
    df_race_all = pd.concat(df_race_li)
    return df_race_all

url = "https://www.jra.go.jp"
top_r = requests.get(url)
top_soup = BeautifulSoup(top_r.content, "html.parser")

link_dict ={}
for i in range(1, 10):
    top_select = top_soup.select(f"#kaisai_area > div.kaisai_block > div:nth-child({i}) > div.main > div > div.race > p > a")
    if len(top_select)==0:
        break
    race_name = top_select[0].text
    link = top_select[0].get('href')
    link = f"https://www.jra.go.jp/{link}"
    link_dict[race_name] = link

st.title("競馬")
st.markdown("### 今週の注目レース")
for name, link in link_dict.items():
    if st.button(f'{name}予想'):
        url_race = link
        if "syutsuba.html" in url_race:
            syutsuba = requests.get(url_race)
            syutsuba_soup = BeautifulSoup(syutsuba.content, "html.parser")
            syutsuba_result = syutsuba_soup.select("#syutsuba")
            racejyou = syutsuba_result[0].find_all("div",class_="cell date")[0].text.strip().split(" ")[1]
            cap=syutsuba_result[0].find_all("div",class_="cell course")[0].text.strip()
            uma_td=syutsuba_result[0].find_all("td",class_="horse")
            hito_td=syutsuba_result[0].find_all("td",class_="jockey")
            uma_li=[]
            hito_li=[]
            for i in range(0,len(uma_td)):
                uma=uma_td[i].text.strip().split()[0]
                uma_li.append(uma)
                hito=" ".join(re.split("\n|\.| ", hito_td[i].text.strip())[-2:])
                hito_li.append(hito)
            df_race=pd.DataFrame({"uma":uma_li,"hito":hito_li})
            kai = re.findall("\d*回", racejyou)[0].replace("", "")
            day = re.findall("\d*日", racejyou)[0].replace("", "")
            race_jyou=re.sub("\d*日|\d*回", "", racejyou)
            race_jyou_dict = {
                "札幌":"01",
                "函館":"02",
                "福島":"03",
                "新潟":"04",
                "東京":"05",
                "中山":"06",
                "中京":"07",
                "京都":"08",
                "阪神":"09",
                "小倉":"10"
                }
            l=race_jyou_dict[race_jyou]
            st.table(df_race)
            st.markdown(racejyou)
            st.markdown(cap)
            
            st.markdown("### 過去の成績検索")
            s = cap.replace("コース:", "").replace(",", "")
            mm = int(re.search("\d*",s).group())
            shiba_dato = re.search("(.*)",s).group()[1]
            shiba_dato= "" if shiba_dato =="ダート" else shiba_dato
            df_race_all=get_url(kai, day, l, 3)
            tg_race=df_race_all[df_race_all["racedata1"].str.contains(f".*{shiba_dato}.*{mm}")]
            d=tg_race[tg_race["index"] <=2][["index","uma","hito","racename", "racenumber", "racedate" , "racenumber2"]]
            st.subheader('過去の成績')
            st.table(d)
        else:
            st.markdown("出馬情報がありません。")

実行方法

 streamlit run keiba.py

参考

https://introduction1.com/2021/09/30/python-scrp1/
https://www.jra.go.jp
https://www.netkeiba.com

2
1
1

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
2
1