Python in Excel を軽く試した
使ったデータ
まずは、グーグルコラボで以下の藤浪投手のデータを準備
!pip install pybaseball
import pandas as pd
from pybaseball import statcast
dates = [
'2023-04-01', '2023-04-08', '2023-04-15', '2023-04-22',
'2023-04-26', '2023-04-28', '2023-05-02', '2023-05-05',
'2023-05-07', '2023-05-10', '2023-05-12', '2023-05-16',
'2023-05-17', '2023-05-22', '2023-05-27', '2023-05-29',
'2023-05-30', '2023-06-02', '2023-06-05', '2023-06-10',
'2023-06-11', '2023-06-13', '2023-06-18', '2023-06-20',
'2023-06-24', '2023-06-28', '2023-06-30', '2023-07-01',
'2023-07-04', '2023-07-08', '2023-07-09', '2023-07-14',
'2023-07-16', '2023-07-18', '2023-07-21', '2023-07-22',
'2023-07-25', '2023-07-28', '2023-07-30', '2023-08-02',
'2023-08-04', '2023-08-06', '2023-08-09', '2023-08-13',
'2023-08-15', '2023-08-16', '2023-08-19', '2023-08-20',
'2023-08-23', '2023-08-25', '2023-08-28', '2023-08-29',
'2023-09-01', '2023-09-05', '2023-09-06'
]
# Create an empty DataFrame to store the data
df_660261_all_dates = pd.DataFrame()
# Fetch data for each date and concatenate
for date in dates:
df_single_day = statcast(start_dt=date, end_dt=date)
df_660261_single_day = df_single_day[df_single_day['pitcher'] == 660261]
df_660261_all_dates = pd.concat([df_660261_all_dates, df_660261_single_day])
# Reset the index of the final DataFrame
df_660261_all_dates.reset_index(drop=True, inplace=True)
グーグルドライブへ保存
from google.colab import drive
drive.mount('/content/drive')
df_660261_all_dates.to_excel("/content/drive/My Drive/Fujinami_230909.xlsx", index=False)
こんなデータ
4/1 投球カウント
このデータをdfとして、python in Excel でグラフ表示をしてみた
import pandas as pd
import matplotlib.pyplot as plt
def pitch_counts(df):
df_L = df[df['stand'] == 'L']
df_R = df[df['stand'] == 'R']
total_counts = df['pitch_type'].value_counts()
left_counts = df_L['pitch_type'].value_counts()
right_counts = df_R['pitch_type'].value_counts()
pitch_counts_table = pd.DataFrame({'Total': total_counts, 'Left Batter': left_counts, 'Right Batter': right_counts})
pitch_counts_table.fillna(0, inplace=True)
pitch_counts_table = pitch_counts_table.astype(int)
return pitch_counts_table
# 仮のデータフレームdf。実際にはpybaseballから取得
# df = ...
# 指定の日付を選択。'2023/4/1 0:00:00'が指定の日付だと仮定
specified_date = '2023-04-01 0:00:00'
# 指定された日付のデータだけ抽出
df_date = df[df['game_date'] == specified_date]
# ピッチカウントを取得
pitch_counts_date = pitch_counts(df_date)
# 時間部分を削除
date_str = pd.Timestamp(specified_date).strftime('%Y-%m-%d')
# 円グラフを描画
plt.figure(figsize=(10, 6))
plt.pie(pitch_counts_date['Total'], labels = pitch_counts_date.index, autopct='%1.1f%%')
plt.title(f'Pitch Type Counts for {date_str}')
plt.show()
複数日のグラフ化
最新のグラフしか出ない。複数日のグラフが表示されず。
import pandas as pd
import matplotlib.pyplot as plt
# ピッチカウントの関数
def pitch_counts(df):
total_counts = df['pitch_type'].value_counts()
pitch_counts_table = pd.DataFrame({'Total': total_counts})
pitch_counts_table.fillna(0, inplace=True)
pitch_counts_table = pitch_counts_table.astype(int)
return pitch_counts_table
# 仮のデータフレームdf。実際にはpybaseballから取得
# df = ...
# 一意な日付を取得
unique_dates = df['game_date'].unique()
# 各日付に対して円グラフを作成
for specified_date in unique_dates:
df_date = df[df['game_date'] == specified_date]
pitch_counts_date = pitch_counts(df_date)
# 時間部分を削除
date_str = pd.Timestamp(specified_date).strftime('%Y-%m-%d')
# 円グラフを描画
plt.figure(figsize=(10, 6))
plt.pie(pitch_counts_date['Total'], labels=pitch_counts_date.index, autopct='%1.1f%%')
plt.title(f'Pitch Type Counts for {date_str}')
plt.show()
各日付 vs 左右
こういう複数グラフは表示できるんだけど、他の日の分は出てこない
import pandas as pd
import matplotlib.pyplot as plt
def plot_pitch_distribution(df, date):
# 時間部分を削除
date_str = pd.Timestamp(date).strftime('%Y-%m-%d')
df_L = df[df['stand'] == 'L']
df_R = df[df['stand'] == 'R']
fig, axs = plt.subplots(1, 3, figsize=(18, 6))
plt.suptitle(f'Pitch Distribution on {date_str}')
# Total
if not df['pitch_type'].value_counts().empty:
df['pitch_type'].value_counts().plot(kind='pie', ax=axs[0], autopct='%.1f%%')
axs[0].set_title('Total')
axs[0].set_ylabel('')
# vs Left batter
if not df_L['pitch_type'].value_counts().empty:
df_L['pitch_type'].value_counts().plot(kind='pie', ax=axs[1], autopct='%.1f%%')
axs[1].set_title('vs Left batter')
axs[1].set_ylabel('')
# vs Right batter
if not df_R['pitch_type'].value_counts().empty:
df_R['pitch_type'].value_counts().plot(kind='pie', ax=axs[2], autopct='%.1f%%')
axs[2].set_title('vs Right batter')
axs[2].set_ylabel('')
plt.show()
# 仮のデータフレームdf。実際にはpybaseballから取得
# df = ...
unique_dates = df['game_date'].unique()
for specified_date in unique_dates:
df_date = df[df['game_date'] == specified_date]
if df_date.empty:
print(f"No data for {specified_date}")
continue
plot_pitch_distribution(df_date, specified_date)
また触ってみよ