1
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

NFLのデータを解析してみたら楽しかった話

Posted at

Pythonを使って、NFLのデータを解析してみたらとても楽しかったので、その内容をシェアします。NFLの試合データを使って選手ごとのパフォーマンスを可視化する方法を学び、さらに分析結果をグラフ化してみました。この記事では、使用したコードとその結果について詳しく解説します。

なぜNFLのデータ?

NFLはアメリカンフットボールのプロリーグで、選手の個人パフォーマンスやチーム戦略が非常に細かくデータ化されています。そんなデータを使って、選手の貢献度を可視化することで、普段の試合観戦がさらに楽しくなるのではないかと思い、取り組んでみました。

使用したライブラリ

pip install nfl-data-py pandas matplotlib seaborn

処理内容

1. データの取得

まずは、nfl_data_pyを使って指定シーズンのデータを取得します。今回は2023年シーズンのデータを使いました。

def load_season_data(season):
    """シーズンのプレイバイプレイデータを取得"""
    pbp_data = nfl.import_pbp_data(years=[season])
    print(f"Data loaded for season: {season}")
    return pbp_data

2. オフェンス選手の貢献度を計算

次に、チームごとにオフェンス選手の貢献度を計算します。パス、ラン、レシーブの3つの役割に分けて分析しました。

def calculate_offense_contributions(pbp_data, team):
    """オフェンス選手ごとの貢献度を計算"""
    team_data = pbp_data[pbp_data['posteam'] == team]

    player_stats = team_data[[
        'passer_player_name', 'rusher_player_name', 'receiver_player_name',
        'passing_yards', 'rushing_yards', 'receiving_yards', 'touchdown'
    ]]

    contributions = []

    # パス
    if 'passer_player_name' in player_stats.columns:
        passer_stats = player_stats.groupby('passer_player_name').sum(numeric_only=True)
        passer_stats['role'] = 'Passer'
        contributions.append(passer_stats)

    # ラッシュ
    if 'rusher_player_name' in player_stats.columns:
        rusher_stats = player_stats.groupby('rusher_player_name').sum(numeric_only=True)
        rusher_stats['role'] = 'Rusher'
        contributions.append(rusher_stats)

    # レシーバー
    if 'receiver_player_name' in player_stats.columns:
        receiver_stats = player_stats.groupby('receiver_player_name').sum(numeric_only=True)
        receiver_stats['role'] = 'Receiver'
        contributions.append(receiver_stats)

    all_contributions = pd.concat(contributions).reset_index()
    all_contributions['performance_score'] = (
        all_contributions['passing_yards'].fillna(0) * 0.1 +
        all_contributions['rushing_yards'].fillna(0) * 0.2 +
        all_contributions['receiving_yards'].fillna(0) * 0.3 +
        all_contributions['touchdown'].fillna(0) * 6
    )

    total_score = all_contributions['performance_score'].sum()
    all_contributions['contribution'] = all_contributions['performance_score'] / total_score

    return all_contributions[all_contributions['contribution'] > 0]

3. ディフェンス選手の貢献度を計算

ディフェンスデータから、各選手がどれだけチームに貢献しているかを分析します。

def calculate_defense_contributions(pbp_data, team):
    """ディフェンス選手ごとの貢献度を計算"""
    team_data = pbp_data[pbp_data['defteam'] == team]
    defense_players = team_data['fantasy_player_name'].dropna()
    defender_stats = defense_players.value_counts().reset_index()
    defender_stats.columns = ['player_name', 'play_count']

    total_play_count = defender_stats['play_count'].sum()
    defender_stats['contribution'] = defender_stats['play_count'] / total_play_count

    return defender_stats[['player_name', 'play_count', 'contribution']].sort_values('contribution', ascending=False)

4. 可視化

オフェンスのグラフ

def plot_offense_contributions(player_stats, team, title):
    sns.set(style="whitegrid")
    plt.figure(figsize=(10, 6))

    top_stats = player_stats.sort_values('contribution', ascending=False)

    sns.barplot(
        x='contribution',
        y='index',
        hue='role',
        data=top_stats,
        palette='coolwarm'
    )
    plt.title(f"{team} - {title}", fontsize=16)
    plt.tight_layout()
    plt.show()

ディフェンスのグラフ

def plot_defense_contributions(player_stats, team, title):
    sns.set(style="whitegrid")
    plt.figure(figsize=(10, 6))

    top_stats = player_stats.sort_values('contribution', ascending=False).head(20)

    sns.barplot(
        x='contribution',
        y='player_name',
        hue='player_name',
        data=top_stats,
        palette='coolwarm'
    )
    plt.title(f"{team} - {title}", fontsize=16)
    plt.tight_layout()
    plt.show()

実行結果

最終的に、オフェンスとディフェンスでそれぞれ貢献度を可視化しました。

オフェンス選手の貢献度グラフ

image.png

ディフェンス選手の貢献度グラフ

image.png

まとめ

NFLのデータを使った解析を通して、選手のパフォーマンスや貢献度を可視化することで、チームの中で選手がどのような地位にいるのかがわかります。トレードの対象とかもわかるかもしれませんね。

スクリプト全文

import nfl_data_py as nfl
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# チームとシーズンの設定
TEAM = "NE"  # 例: New England Patriots
SEASON = 2023

def load_season_data(season):
    """シーズンのプレイバイプレイデータを取得"""
    pbp_data = nfl.import_pbp_data(years=[season])
    print(f"Data loaded for season: {season}")
    return pbp_data

def calculate_offense_contributions(pbp_data, team):
    """オフェンス選手ごとの貢献度を計算"""
    team_data = pbp_data[pbp_data['posteam'] == team]

    # オフェンスデータの整形
    player_stats = team_data[[
        'passer_player_name', 'rusher_player_name', 'receiver_player_name',
        'passing_yards', 'rushing_yards', 'receiving_yards', 'touchdown'
    ]]

    contributions = []

    # パス
    if 'passer_player_name' in player_stats.columns:
        passer_stats = player_stats.groupby('passer_player_name').sum(numeric_only=True)
        passer_stats['role'] = 'Passer'
        contributions.append(passer_stats)

    # ラッシュ
    if 'rusher_player_name' in player_stats.columns:
        rusher_stats = player_stats.groupby('rusher_player_name').sum(numeric_only=True)
        rusher_stats['role'] = 'Rusher'
        contributions.append(rusher_stats)

    # レシーバー
    if 'receiver_player_name' in player_stats.columns:
        receiver_stats = player_stats.groupby('receiver_player_name').sum(numeric_only=True)
        receiver_stats['role'] = 'Receiver'
        contributions.append(receiver_stats)

    # データを結合
    all_contributions = pd.concat(contributions).reset_index()
    all_contributions['performance_score'] = (
        all_contributions['passing_yards'].fillna(0) * 0.1 +
        all_contributions['rushing_yards'].fillna(0) * 0.2 +
        all_contributions['receiving_yards'].fillna(0) * 0.3 +
        all_contributions['touchdown'].fillna(0) * 6
    )

    # チーム全体のスコア
    total_score = all_contributions['performance_score'].sum()

    # 貢献度を計算(正規化)
    all_contributions['contribution'] = all_contributions['performance_score'] / total_score

    return all_contributions[all_contributions['contribution'] > 0]

def calculate_defense_contributions(pbp_data, team):
    """ディフェンス選手ごとの貢献度を計算"""
    team_data = pbp_data[pbp_data['defteam'] == team]

    # ディフェンスデータの整形
    if 'fantasy_player_name' not in team_data.columns:
        raise KeyError("'fantasy_player_name' column is missing in the dataset.")
    
    defense_players = team_data['fantasy_player_name'].dropna()

    # 選手ごとの関与回数を集計
    defender_stats = defense_players.value_counts().reset_index()
    defender_stats.columns = ['player_name', 'play_count']

    # 貢献度を計算
    total_play_count = defender_stats['play_count'].sum()
    defender_stats['contribution'] = defender_stats['play_count'] / total_play_count

    return defender_stats[['player_name', 'play_count', 'contribution']].sort_values('contribution', ascending=False)

def plot_contributions(player_stats, team, title):
    """選手ごとの貢献度をグラフ化"""
    sns.set(style="whitegrid")
    plt.figure(figsize=(10, 6))

    # データフレームの列名を確認
    print(f"Dataframe columns: {player_stats.columns}")

    # トップ10選手を抽出
    top_stats = player_stats.head(10)

    # 列名が正しいか確認
    if 'player_name' not in top_stats.columns:
        raise ValueError("`player_name` column is missing in the dataframe.")

    sns.barplot(
        x='contribution',
        y='player_name',
        data=top_stats,
        palette='coolwarm'
    )
    plt.title(f"{team} - {title}", fontsize=16)
    plt.xlabel("Contribution (Percentage)", fontsize=12)
    plt.ylabel("Player Name", fontsize=12)
    plt.tight_layout()
    plt.show()

def calculate_defense_contributions_without_roster(pbp_data, team):
    """ディフェンス選手ごとの貢献度を計算(ロスター情報がない場合)"""
    team_data = pbp_data[pbp_data['defteam'] == team]

    # ディフェンスプレーヤーのデータをパース
    if 'defense_players' not in team_data.columns:
        raise KeyError("'defense_players' column is missing in the dataset.")
    team_data = team_data.dropna(subset=['defense_players'])  # NaN を除外
    defense_players = team_data['defense_players'].str.split(';').explode()  # 選手IDを展開

    # 選手ごとの関与回数を集計
    defender_stats = defense_players.value_counts().reset_index()
    defender_stats.columns = ['player_id', 'play_count']

    # 貢献度の計算
    total_play_count = defender_stats['play_count'].sum()
    defender_stats['contribution'] = defender_stats['play_count'] / total_play_count

    return defender_stats[['player_id', 'play_count', 'contribution']].sort_values('contribution', ascending=False)

def plot_offense_contributions(player_stats, team, title):
    """オフェンス選手ごとの貢献度をグラフ化"""
    sns.set(style="whitegrid")
    plt.figure(figsize=(10, 6))

    # トップ10選手を抽出
    top_stats = player_stats.sort_values('contribution', ascending=False)

    # 列名が正しいか確認
    if 'index' not in top_stats.columns:
        raise ValueError("`index` column is missing in the dataframe for offense data.")

    sns.barplot(
        x='contribution',
        y='index',
        hue='role',
        data=top_stats,
        palette='coolwarm'
    )

    plt.title(f"{team} - {title}", fontsize=16)
    plt.xlabel("Contribution (Percentage)", fontsize=12)
    plt.ylabel("Player Name", fontsize=12)
    plt.legend(title="Role", bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

def plot_defense_contributions(player_stats, team, title):
    """ディフェンス選手ごとの貢献度をグラフ化"""
    sns.set(style="whitegrid")
    plt.figure(figsize=(10, 6))

    # トップ10選手を抽出
    top_stats = player_stats.sort_values('contribution', ascending=False).head(20)

    # 列名が正しいか確認
    if 'player_name' not in top_stats.columns:
        raise ValueError("`player_name` column is missing in the dataframe for defense data.")

    sns.barplot(
        x='contribution',
        y='player_name',
        hue='player_name',
        data=top_stats,
        palette='coolwarm'
    )
    plt.title(f"{team} - {title}", fontsize=16)
    plt.xlabel("Contribution (Percentage)", fontsize=12)
    plt.ylabel("Player Name", fontsize=12)
    plt.tight_layout()
    plt.show()

def main():
    # シーズンデータをロード
    pbp_data = load_season_data(SEASON)

    # オフェンス貢献度を計算
    offense_contributions = calculate_offense_contributions(pbp_data, TEAM)
    # print(offense_contributions[['index', 'role', 'contribution']])

    # ディフェンス貢献度を計算
    defense_contributions = calculate_defense_contributions(pbp_data, TEAM)
    # print(defense_contributions)

    # オフェンス貢献度をグラフ化
    plot_offense_contributions(offense_contributions, TEAM, "Offense Contributions")

    # ディフェンス貢献度をグラフ化
    plot_defense_contributions(defense_contributions, TEAM, "Defense Contributions")

if __name__ == "__main__":
    main()
1
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?