0
2

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 5 years have passed since last update.

JFLの試合結果から得点ランキングを作成

0
Posted at

スクレイピング

import time
import unicodedata
from urllib.parse import urljoin
import re

import requests
from bs4 import BeautifulSoup

def cleaning(info, team, html):

    result = []

    for trs in html:

        data = [i.get_text(strip=True) for i in trs.select("th, td")]

        # 時間の分を除去後、延長時間を計算
        data[0] = eval(data[0].rstrip(""))

        # 選手名のPKを削除
        data[2] = re.sub("\(.+\)", "", unicodedata.normalize("NFKC", data[2])).strip()

        result.append(info + [team] + data)

    return result

def scraping(n, url):

    r = requests.get(url)
    r.raise_for_status()

    soup = BeautifulSoup(r.content, "html5lib")

    # 節
    score_season = soup.select_one(
        "div.score-header > h2.score-meta > span.score-season"
    ).get_text(strip=True)

    score_season = int(score_season.strip("第節"))

    # print(score_season)

    # 日時
    score_date = (
        soup.select_one("div.score-header > h2.score-meta > span.score-date")
        .get_text(strip=True)
        .split()
    )

    # print(score_date)

    # チーム名
    score_table = soup.select_one("table.score-table")

    home_team = score_table.select_one("th.score-team1").get_text(strip=True)
    away_team = score_table.select_one("th.score-team2").get_text(strip=True)

    # print(home_team, away_team)

    # 試合情報
    game_info = [n, score_season] + score_date + [home_team, away_team]

    # 得点
    tag = soup.find("h3", text="得 点")

    # 得点のテーブルか確認
    if tag:

        table_home = [
            trs
            for trs in tag.parent.select(
                "div.score-frame > div.score-left > table > tbody > tr"
            )
        ]
        home_data = cleaning(game_info, home_team, table_home)

        table_away = [
            trs
            for trs in tag.parent.select(
                "div.score-frame > div.score-right > table > tbody > tr"
            )
        ]
        away_data = cleaning(game_info, away_team, table_away)

        score_data = home_data + away_data

        return score_data

    return None

url = "http://www.jfl.or.jp/jfl-pc/view/s.php?a=1542&f=2020A001_spc.html"

r = requests.get(url)
r.raise_for_status()

soup = BeautifulSoup(r.content, "html5lib")

links = [urljoin(url, link.get("href")) for link in soup.select("td.detail-link > a") if link.text == "詳細"]

result = []

for i, link in enumerate(links):

    score_data = scraping(i, link)

    if score_data:

        result.extend(score_data)
    
    time.sleep(1)

データラングリング

import pandas as pd

df = pd.DataFrame(result, columns=["試合", "", "日付", "時刻", "ホーム", "アウェイ", "チーム名", "時間", "背番号", "選手名"])

df

df["得点"] = 1

# ゴール数ランキング
pv_goal = df.pivot_table(
    values="得点", index=["選手名", "チーム名", "背番号"], aggfunc=sum, fill_value=0
).drop(["オウンゴール"]).reset_index()

pv_goal["背番号"] = pv_goal["背番号"].astype(int)

# ランキング
pv_goal["順位"] = pv_goal["得点"].rank(ascending=False, method="min").astype(int)

# チーム
jfl_2020 = [
    "Honda FC",
    "ソニー仙台FC",
    "東京武蔵野シティFC",
    "テゲバジャーロ宮崎",
    "ホンダロックSC",
    "ヴェルスパ大分",
    "FC大阪",
    "MIOびわこ滋賀",
    "ヴィアティン三重",
    "FCマルヤス岡崎",
    "鈴鹿ポイントゲッターズ",
    "ラインメール青森",
    "奈良クラブ",
    "松江シティFC",
    "いわきFC",
    "高知ユナイテッドSC",
]

team = {name: i for i, name in enumerate(jfl_2020, 1)}

pv_goal["チームID"] = pv_goal["チーム名"].map(team)

# 順位・チーム名・選手名で昇順
pv_goal.sort_values(["順位", "チームID", "背番号"], ascending=[True, True, True], inplace=True)

pv_goal.drop(["チームID", "背番号"], axis=1, inplace=True)

pv_goal.set_index("順位", inplace=True)

pv_goal.to_csv("goal.csv")

ランキング

df_rank = pd.read_html("http://www.jfl.or.jp/jfl-pc/view/s.php?a=1544", index_col=0, header=0)[0]

df_rank["選手名"] = df_rank["選手名"].str.normalize("NFKC")

df_rank.to_csv("ranking.csv")
0
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
2

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?