# Program Name: pokemon_battle_usage_analysis.py
# Overview: ピカチュウの使用頻度に関するデータを国・地域別に可視化し、中央値を計算する
import pandas as pd
import matplotlib.pyplot as plt
# データ定義(仮想の使用率%データ) / Dummy usage data
data = {
"Battle Range": ["1-3", "4-6", "7-13", "14-20", "21-27", "28-90", "91+"],
"Japan": [32.14, 61.30, 5.89, 0.43, 0.10, 0.08, 0.02],
"USA": [12.53, 54.68, 42.18, 2.04, 0.04, 0.20, 0.32],
"France": [2.82, 34.65, 49.54, 4.06, 0.04, 0.70, 0.19],
"Galar": [0.93, 5.26, 38.27, 35.37, 13.78, 0.23, 0.20],
"Paldea": [7.48, 19.07, 18.02, 45.00, 2.41, 7.02, 0.00]
}
# DataFrameへ変換 / Convert to DataFrame
df = pd.DataFrame(data)
# 国ごとの中央値を計算(順位分布ベース)/ Calculate estimated median from cumulative distribution
def estimated_median(row):
cumulative = 0
for i, value in enumerate(row):
cumulative += value
if cumulative >= 50:
return df["Battle Range"][i]
return "91+"
median_values = {region: estimated_median(df[region]) for region in df.columns if region != "Battle Range"}
# 表示 / Print median results
print("Estimated median battle range for Pikachu usage:")
for region, median in median_values.items():
print(f"{region}: {median}")
# プロット / Plot usage by region
df.set_index("Battle Range").plot(kind="bar", figsize=(10, 6))
plt.title("Pikachu Usage Distribution by Battle Frequency")
plt.xlabel("Battle Frequency")
plt.ylabel("Usage Percentage (%)")
plt.grid(True, axis='y')
plt.tight_layout()
plt.show()
# --- 必要なライブラリのインポート / Import necessary libraries ---
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
# --- ダミーデータの作成 / Create dummy data for item prices and quantities ---
data = {
'Item': ['モンスターボール', 'キズぐすり', 'わざマシン'],
'Quantity_2015': [6200, 19865, 500], # 購入数量(2015年) / Quantity in 2015
'Price_2015': [200, 300, 4000], # 平均価格(2015年) / Price in 2015
'Quantity_2016': [6400, 20418, 550], # 購入数量(2016年) / Quantity in 2016
'Price_2016': [210, 280, 4200] # 平均価格(2016年) / Price in 2016
}
df = pd.DataFrame(data)
# --- Laspeyres価格指数の計算 / Calculate Laspeyres Price Index (base: 2015) ---
numerator = (df['Quantity_2015'] * df['Price_2016']).sum()
denominator = (df['Quantity_2015'] * df['Price_2015']).sum()
laspeyres_index = numerator / denominator * 100
print(f"Laspeyres価格指数(2015年基準): {laspeyres_index:.2f}")
# --- 単回帰モデル:気温と開花日数 / Linear Regression: Temperature vs. Bloom Day ---
temperature = np.array([13, 14, 15, 16, 17, 18, 19]) # 気温 [°C] / Temperature [°C]
bloom_day = np.array([105, 102, 100, 97, 94, 90, 87]) # 開花までの日数 / Days to bloom
# 定数項を追加 / Add intercept term
X = sm.add_constant(temperature)
model = sm.OLS(bloom_day, X).fit()
print(model.summary())
# --- 予測:気温17.5°Cに対する開花日数 / Predict bloom day at 17.5°C ---
X_pred = np.array([[1, 17.5]]) # 明示的に [1, temperature] の形に
predicted_day = model.predict(X_pred)[0]
print(f"2017年予測開花日(日数): {predicted_day:.1f}日")
# --- 相関係数の計算 / Correlation coefficient ---
correlation = np.corrcoef(temperature, bloom_day)[0, 1]
print(f"相関係数(気温と開花日数): {correlation:.2f}")
# --- 残差の算出と可視化 / Calculate and visualize residuals ---
residuals = model.resid
plt.figure()
plt.scatter(model.fittedvalues, residuals)
plt.axhline(0, color='gray', linestyle='--')
plt.xlabel("Fitted Values")
plt.ylabel("Residuals")
plt.title("Residual Plot")
plt.grid(True)
plt.show()
# --- 散布図と回帰直線のプロット / Scatter plot with regression line ---
plt.figure()
plt.scatter(temperature, bloom_day, label='Observed')
plt.plot(temperature, model.predict(X), color='red', label='Regression Line')
plt.xlabel("Average Temperature (°C)")
plt.ylabel("Days to Bloom")
plt.title("Pokémon Bloom Prediction")
plt.legend()
plt.grid(True)
plt.show()
# --- ヒストグラム:価格の分布 / Histogram of price distribution ---
plt.figure()
plt.hist(df['Price_2015'], alpha=0.7, bins=5, label='2015')
plt.hist(df['Price_2016'], alpha=0.7, bins=5, label='2016')
plt.title("Item Price Distribution")
plt.xlabel("Price (yen)")
plt.ylabel("Frequency")
plt.legend()
plt.grid(True)
plt.show()
# --- ライブラリのインポート / Import necessary libraries ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sympy as sp
# ------------------------------
# 【問4】Laspeyres価格指数(アイテム価格)
# ------------------------------
print("▶ Laspeyres価格指数の計算")
item_data = {
'Item': ['モンスターボール', 'げんきのかけら'],
'Quantity_2015': [6200, 19865],
'Price_2015': [200, 1500],
'Quantity_2016': [6422, 20418],
'Price_2016': [210, 1440]
}
df = pd.DataFrame(item_data)
numerator = (df['Quantity_2015'] * df['Price_2016']).sum()
denominator = (df['Quantity_2015'] * df['Price_2015']).sum()
laspeyres_index = numerator / denominator * 100
print(f"Laspeyres価格指数(2015年基準): {laspeyres_index:.2f}\n")
# ------------------------------
# 【問11】価格変化率のプロット(例:モンスターボール)
# ------------------------------
print("▶ モンスターボールの価格指数変化率を描画")
years = np.arange(1970, 2017)
index = np.concatenate([
np.linspace(30, 85, 20), # 上昇
np.ones(10) * 85, # 横ばい
np.linspace(85, 105, 17) # 再上昇
])
diff_rate = np.diff(index) / index[:-1] * 100
plt.figure(figsize=(8, 4))
plt.plot(years[1:], diff_rate, marker='o')
plt.title("Yearly Change in Poké Ball Price Index")
plt.xlabel("Year")
plt.ylabel("Change Rate (%)")
plt.grid(True)
plt.tight_layout()
plt.show()
# ------------------------------
# 【問14】ベイズの定理(不良アイテムの推定)
# ------------------------------
print("▶ ベイズの定理:不良品がA工場産である確率")
P_A = 0.6
P_B = 0.4
P_N_given_A = 0.01
P_N_given_B = 0.05
P_A_given_N = (P_N_given_A * P_A) / (P_N_given_A * P_A + P_N_given_B * P_B)
print(f"不良アイテムがA工場製である確率: {P_A_given_N:.2%}\n")
# ------------------------------
# 【問15・16】確率密度関数(出現時間X)
# ------------------------------
print("▶ ポケモン出現時間の密度関数解析")
x, c = sp.symbols('x c')
fx = c * (2 - x)
integral = sp.integrate(fx, (x, 0, 2))
c_val = sp.solve(integral - 1, c)[0]
# 平均 μ
mu = sp.integrate(x * c_val * (2 - x), (x, 0, 2))
# 分散 σ^2
variance = sp.integrate((x - mu)**2 * c_val * (2 - x), (x, 0, 2))
print(f"正規化定数 c = {c_val}")
print(f"平均(μ)= {mu}")
print(f"分散(σ²)= {variance}")
# --- Import necessary libraries / ライブラリのインポート ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sympy as sp
# =============================================================
# 【問4】Laspeyres価格指数:ポケモンアイテム価格の比較
# =============================================================
print("【問4】Laspeyres Price Index (Pokémon Items)")
# ポケモン版データ(2015年と2016年の2商品:モンスターボール & げんきのかけら)
item_data = {
'Item': ['モンスターボール', 'げんきのかけら'],
'Quantity_2015': [6200, 19865], # 2015年の購入数量 / Quantity in 2015
'Price_2015': [200, 1500], # 2015年の価格(円/個) / Price in 2015 (yen/unit)
'Quantity_2016': [6422, 20418], # 2016年の購入数量 / Quantity in 2016
'Price_2016': [210, 1440] # 2016年の価格(円/個) / Price in 2016 (yen/unit)
}
df = pd.DataFrame(item_data)
# Laspeyres指数 = ( Σ (Quantity_2015 × Price_2016) / Σ (Quantity_2015 × Price_2015) ) × 100
numerator = (df['Quantity_2015'] * df['Price_2016']).sum()
denominator = (df['Quantity_2015'] * df['Price_2015']).sum()
laspeyres_index = numerator / denominator * 100
print(f"Laspeyres価格指数(2015年基準): {laspeyres_index:.2f}\n")
# =============================================================
# 【問11】価格変化率グラフ:モンスターボールの価格推移
# =============================================================
print("【問11】Plotting the Yearly Change Rate of Poké Ball Price Index")
# 仮のデータ:1970~2016年におけるモンスターボールの価格指数
years = np.arange(1970, 2017)
index = np.concatenate([
np.linspace(30, 85, 20), # 1970-1989: 上昇トレンド / Increasing trend
np.ones(10) * 85, # 1990-1999: 横ばい / Flat trend
np.linspace(85, 105, 17) # 2000-2016: 再上昇 / Rising trend again
])
# 前年比変化率(%)= (current_index - previous_index) / previous_index × 100
diff_rate = np.diff(index) / index[:-1] * 100
plt.figure(figsize=(8, 4))
plt.plot(years[1:], diff_rate, marker='o')
plt.title("Yearly Change in Poké Ball Price Index")
plt.xlabel("Year")
plt.ylabel("Change Rate (%)")
plt.grid(True)
plt.tight_layout()
plt.show()
# =============================================================
# 【問14】ベイズの定理:不良品の出所予測(例:キズぐすり)
# =============================================================
print("【問14】Bayes' Theorem: Predicting the Source of Defective Items")
# 例:ポケモンセンターのA工場とB工場で製造されたキズぐすり
P_A = 0.6 # A工場の割合 / Proportion from factory A
P_B = 0.4 # B工場の割合 / Proportion from factory B
P_N_given_A = 0.01 # A工場の不良率 / Defect rate for A
P_N_given_B = 0.05 # B工場の不良率 / Defect rate for B
# ベイズの定理:P(A|Defect) = (P(Defect|A) * P(A)) / (P(Defect|A)*P(A) + P(Defect|B)*P(B))
P_A_given_N = (P_N_given_A * P_A) / (P_N_given_A * P_A + P_N_given_B * P_B)
print(f"不良品がA工場製である確率: {P_A_given_N:.2%}\n")
# =============================================================
# 【問15・16】確率密度関数解析:ポケモン出現までの時間X
# =============================================================
print("【問15・16】Probability Density Function Analysis (Time until a Legendary Pokémon appears)")
# 出現時間Xの密度関数(0 <= x < 2): f(x) = c(2 - x)
x, c = sp.symbols('x c')
fx = c * (2 - x)
# 正規化条件:積分 ∫_0^2 f(x) dx = 1 を満たす定数 c を求める
integral = sp.integrate(fx, (x, 0, 2))
c_val = sp.solve(integral - 1, c)[0]
# 平均 μ = ∫_0^2 x f(x) dx
mu = sp.integrate(x * c_val * (2 - x), (x, 0, 2))
# 分散 σ² = ∫_0^2 (x - μ)^2 f(x) dx
variance = sp.integrate((x - mu)**2 * c_val * (2 - x), (x, 0, 2))
print(f"正規化定数 c = {c_val}")
print(f"平均 (μ) = {mu}")
print(f"分散 (σ²) = {variance}")
# --- Import required libraries ---
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sympy as sp
from scipy.stats import norm, binom
# -----------------------------
# 1. Bayes Theorem: Legendary Pokémon location prediction
# -----------------------------
P_A = 0.6 # Probability region A
P_L_given_A = 0.9 # Legendary appears in A
P_L_given_not_A = 0.2 # Legendary appears outside A
P_L = P_L_given_A * P_A + P_L_given_not_A * (1 - P_A)
P_A_given_L = (P_L_given_A * P_A) / P_L
# -----------------------------
# 2. Binomial Distribution: Poké Ball hit probability
# -----------------------------
p_hit = 0.159
x_vals = np.arange(0, 6)
probs = binom.pmf(x_vals, 5, p_hit)
prob_at_least_1 = 1 - probs[0]
# Plot: distribution of hits in 5 throws
plt.figure()
plt.bar(x_vals, probs, color='skyblue')
plt.title("Number of Hits in 5 Throws (Poké Ball)")
plt.xlabel("Hits")
plt.ylabel("Probability")
plt.grid(True)
plt.show()
# -----------------------------
# 3. Normal Distribution: P(Z > 1)
# -----------------------------
z_vals = np.linspace(-3, 3, 300)
pdf_vals = norm.pdf(z_vals)
plt.figure()
plt.plot(z_vals, pdf_vals, label='Standard Normal PDF')
plt.fill_between(z_vals, pdf_vals, where=(z_vals > 1), color='orange', alpha=0.5, label='P(Z > 1)')
plt.title("Standard Normal Distribution")
plt.xlabel("Z")
plt.ylabel("Density")
plt.legend()
plt.grid(True)
plt.show()
prob_z_greater_than_1 = 1 - norm.cdf(1)
# -----------------------------
# 4. Expected value and variance of Pokémon appearance time
# -----------------------------
x, c = sp.symbols('x c')
fx = c * (2 - x)
integral = sp.integrate(fx, (x, 0, 2))
c_val = sp.solve(integral - 1, c)[0]
mu = sp.integrate(x * c_val * (2 - x), (x, 0, 2))
variance = sp.integrate((x - mu)**2 * c_val * (2 - x), (x, 0, 2))
# Plot: PDF
x_vals_pdf = np.linspace(0, 2, 100)
f_vals_pdf = [float(c_val * (2 - val)) for val in x_vals_pdf]
plt.figure()
plt.plot(x_vals_pdf, f_vals_pdf, color='green')
plt.title("PDF of Pokémon Appearance Time")
plt.xlabel("Time (hours)")
plt.ylabel("Density")
plt.grid(True)
plt.show()
# -----------------------------
# 5. Regression Prediction: Temperature → Appearance day
# -----------------------------
b = -2.7608 # Slope
a = 20.0209 # Intercept
temp_2017 = 6.10
predicted_day = a + b * temp_2017
predicted_day_date = f"April {3 + round(predicted_day)}" # Assume Mar 31 = Day 0
# Plot: temperature vs. predicted day
temps = np.linspace(3, 10, 50)
days = a + b * temps
plt.figure()
plt.plot(temps, days, label='Regression Line', color='red')
plt.scatter([6.10], [predicted_day], color='blue', label='Prediction (2017)')
plt.title("Temperature vs. Predicted Appearance Day")
plt.xlabel("Temperature (°C)")
plt.ylabel("Predicted Day (days after Mar 31)")
plt.legend()
plt.grid(True)
plt.show()
# -----------------------------
# 6. Autocorrelation (lag-1)
# -----------------------------
prices = [200, 210, 220, 230, 240, 250, 240, 230, 220, 210, 200]
lag1_corr = np.corrcoef(prices[:-1], prices[1:])[0, 1]
plt.figure()
plt.plot(prices, marker='o', label='Item Price')
plt.title("Pokémon Item Price Trend (Example)")
plt.xlabel("Time")
plt.ylabel("Price")
plt.grid(True)
plt.legend()
plt.show()
# -----------------------------
# Display results in a table
# -----------------------------
results = {
"P(A|Legendary)": round(P_A_given_L, 4),
"P(Hit ≥ 1 out of 5)": round(prob_at_least_1, 4),
"P(Z > 1)": round(prob_z_greater_than_1, 4),
"E[Appearance Time]": float(mu),
"Var[Appearance Time]": float(variance),
"Predicted Day (Temp 6.1°C)": predicted_day_date,
"Lag-1 Autocorrelation": round(lag1_corr, 4)
}
df = pd.DataFrame(results.items(), columns=["Metric", "Value"])
print("▼ Pokémon Statistics Summary")
display(df)
# --- Import libraries ---
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import binom
# --- Seed for reproducibility ---
np.random.seed(42)
# -------------------------------
# 1. F-test simulation (Legendary success rates)
# -------------------------------
region_A_success = np.random.normal(loc=80, scale=5, size=20)
region_B_success = np.random.normal(loc=75, scale=10, size=10)
mean_A = np.mean(region_A_success)
mean_B = np.mean(region_B_success)
var_A = np.var(region_A_success, ddof=1)
var_B = np.var(region_B_success, ddof=1)
F_statistic = var_A / var_B
# -------------------------------
# 2. Binomial confidence interval (League interest)
# -------------------------------
p_2020 = 0.483
n_2020 = 1897
p_2017 = 0.416
n_2017 = 1925
diff = p_2020 - p_2017
se_diff = np.sqrt(p_2020*(1-p_2020)/n_2020 + p_2017*(1-p_2017)/n_2017)
ci_lower = diff - 1.96 * se_diff
ci_upper = diff + 1.96 * se_diff
# -------------------------------
# 3. Binomial hit distribution (Poké Ball)
# -------------------------------
n_trials = 5
p_hit = 0.159
x_vals = np.arange(0, n_trials + 1)
hit_probs = binom.pmf(x_vals, n_trials, p_hit)
plt.figure()
plt.bar(x_vals, hit_probs, color='skyblue')
plt.title("Poké Ball Hit Distribution (5 Throws)")
plt.xlabel("Number of Hits")
plt.ylabel("Probability")
plt.grid(True)
plt.tight_layout()
plt.show()
# -------------------------------
# 4. Linear regression (Temperature → Pokémon appearance)
# -------------------------------
temps = np.array([5, 6, 7, 8, 9])
days = 20 - 2.5 * temps + np.random.normal(0, 1, size=len(temps))
coef = np.polyfit(temps, days, 1)
pred_day = np.polyval(coef, 6.1)
plt.figure()
plt.scatter(temps, days, color='orange', label='Observed')
plt.plot(temps, np.polyval(coef, temps), color='red', label='Regression Line')
plt.scatter([6.1], [pred_day], color='green', label='Prediction (6.1°C)')
plt.title("Temperature vs. Pokémon Appearance Day")
plt.xlabel("Temperature (°C)")
plt.ylabel("Appearance Day (Days After Mar 31)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
# -------------------------------
# Summary Table
# -------------------------------
summary = {
"Mean (Region A)": round(mean_A, 2),
"Variance (Region A)": round(var_A, 2),
"Mean (Region B)": round(mean_B, 2),
"Variance (Region B)": round(var_B, 2),
"F-statistic (A/B)": round(F_statistic, 3),
"CI of Interest Difference": f"[{ci_lower:.3f}, {ci_upper:.3f}]",
"Predicted Appearance Day at 6.1°C": round(pred_day, 2)
}
# Display as DataFrame
df_summary = pd.DataFrame(summary.items(), columns=["Metric", "Value"])
print("▼ Pokémon Themed Statistical Summary")
display(df_summary)
# --- Import necessary libraries ---
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import chi2, chisquare, f_oneway
# --- 1. Goodness-of-fit test: Pokémon lottery results ---
# Categories: 1st prize, 2nd prize, fail
observed = np.array([5, 12, 33])
expected = np.array([10, 15, 25])
chi_stat = ((observed - expected) ** 2 / expected).sum()
df_chi = len(observed) - 1
critical_value = chi2.ppf(0.95, df=df_chi)
print(f"[Goodness-of-fit Test]")
print(f"Chi-squared statistic: {chi_stat:.2f}")
print(f"Critical value (df={df_chi}): {critical_value:.2f}")
if chi_stat > critical_value:
print("→ Reject H₀: Observed Pokémon lottery results significantly differ from expected.")
else:
print("→ Fail to reject H₀: No significant difference in Pokémon lottery results.\n")
# --- 2. ANOVA: CP across Pokémon regions ---
np.random.seed(42) # reproducibility
regions = {
'Kanto': np.random.normal(210, 20, 27),
'Johto': np.random.normal(89, 15, 13),
'Hoenn': np.random.normal(712, 30, 3),
'Sinnoh': np.random.normal(515, 25, 31),
'Unova': np.random.normal(192, 10, 7),
'Kalos': np.random.normal(559, 40, 3),
}
anova_result = f_oneway(*regions.values())
print("[ANOVA]")
print(f"F-statistic: {anova_result.statistic:.2f}")
print(f"P-value: {anova_result.pvalue:.4e}")
if anova_result.pvalue < 0.05:
print("→ Reject H₀: There are significant differences in CP among regions.")
else:
print("→ Fail to reject H₀: No significant CP difference among regions.\n")
# --- 3. Visualization: Histogram of CPs by Region ---
plt.figure(figsize=(10, 6))
for name, cp in regions.items():
plt.hist(cp, bins=10, alpha=0.6, label=name)
plt.title("Combat Power (CP) Distribution by Pokémon Region")
plt.xlabel("CP")
plt.ylabel("Frequency")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
# --- Import libraries ---
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import norm, binomtest, ttest_ind
np.random.seed(42)
# -------------------------------
# 1. Confidence Interval (Proportion)
# -------------------------------
p_hat = 0.483 # sample proportion
n = 1897
se = np.sqrt(p_hat * (1 - p_hat) / n)
ci_lower, ci_upper = p_hat - 1.96 * se, p_hat + 1.96 * se
# Plot: Confidence Interval
plt.figure(figsize=(6, 1.5))
plt.hlines(1, ci_lower, ci_upper, colors='blue', linewidth=6, label='95% CI')
plt.plot(p_hat, 1, 'ro', label='Sample Proportion')
plt.title('League Interest Proportion (95% CI)')
plt.yticks([])
plt.xlabel("Proportion")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
# -------------------------------
# 2. Regression Analysis: Temperature vs. Appearance Day
# -------------------------------
temps = np.array([5, 6, 7, 8, 9])
days = 20 - 2.5 * temps + np.random.normal(0, 1, len(temps))
slope, intercept = np.polyfit(temps, days, 1)
pred_day = slope * 6.1 + intercept
# Plot: Regression
plt.figure(figsize=(6, 4))
plt.scatter(temps, days, color='orange', label='Observed')
plt.plot(temps, slope * temps + intercept, color='red', label='Regression Line')
plt.scatter([6.1], [pred_day], color='green', label='Prediction (6.1°C)')
plt.title("Pokémon Appearance Day vs. Temperature")
plt.xlabel("Temperature (°C)")
plt.ylabel("Days After Mar 31")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
# -------------------------------
# 3. Binomial Test: Pikachu's Thunderbolt Accuracy
# -------------------------------
successes = 18
trials = 30
binom_result = binomtest(k=successes, n=trials, p=0.5, alternative='two-sided')
p_value_binom = binom_result.pvalue
# Plot: Binomial Distribution (Normal Approximation)
x_vals = np.arange(0, trials + 1)
approx_probs = norm.pdf(x_vals, loc=trials * 0.5, scale=np.sqrt(trials * 0.5 * 0.5))
plt.figure(figsize=(6, 4))
plt.bar(x_vals, approx_probs, color='skyblue', label='Expected under H₀')
plt.axvline(successes, color='red', linestyle='--', label='Observed Success')
plt.title("Pikachu's Thunderbolt Success (Binomial Test)")
plt.xlabel("Number of Successes")
plt.ylabel("Probability (Normal Approx.)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
# -------------------------------
# 4. t-Test: Charmander vs. Squirtle Speed
# -------------------------------
speeds_charmander = np.random.normal(65, 10, 30)
speeds_squirtle = np.random.normal(58, 10, 30)
t_stat, t_pval = ttest_ind(speeds_charmander, speeds_squirtle)
# Plot: Boxplot
plt.figure(figsize=(6, 4))
plt.boxplot([speeds_charmander, speeds_squirtle], labels=['Charmander', 'Squirtle'])
plt.title("Speed Comparison: Charmander vs. Squirtle (t-test)")
plt.ylabel("Speed")
plt.grid(True)
plt.tight_layout()
plt.show()
# -------------------------------
# Summary Table
# -------------------------------
summary = {
"95% CI for League Interest": f"[{ci_lower:.3f}, {ci_upper:.3f}]",
"Predicted Day (6.1°C)": round(pred_day, 2),
"Binomial Test p-value (Thunderbolt)": round(p_value_binom, 4),
"T-test Statistic (Speed)": round(t_stat, 2),
"T-test p-value (Speed)": round(t_pval, 4)
}
df_summary = pd.DataFrame(summary.items(), columns=["Analysis", "Result"])
print("▼ Pokémon Statistical Summary")
display(df_summary)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# --- Dummy data: Pokémon items and regions (instead of prefectures) ---
# Let's say we have five regions and retention rates for five types of devices/items
regions = ['Kanto', 'Johto', 'Hoenn', 'Sinnoh', 'Unova', 'Kalos', 'Alola', 'Galar', 'Paldea']
np.random.seed(42)
# Retention rates for items: Pokégear (PG), Pokétch (PT), PokéNav (PN), TV, PokéDisk (PD)
# Values are retention percentages per region
data = {
'Region': regions,
'Pokégear (PG)': np.random.normal(60, 10, len(regions)), # Like MP
'Pokétch (PT)': np.random.normal(80, 5, len(regions)), # Like SP
'PokéNav (PN)': np.random.normal(50, 8, len(regions)), # Like PC
'TV': np.random.normal(95, 2, len(regions)), # TV high retention
'PokéDisk (PD)': np.random.normal(70, 6, len(regions)) # DVD/BD equivalent
}
df = pd.DataFrame(data)
# --- Boxplot for all items ---
plt.figure(figsize=(10, 6))
df.drop(columns='Region').boxplot()
plt.title('Pokémon Item Retention Rate by Region')
plt.ylabel('Retention Rate (%)')
plt.grid(True)
plt.tight_layout()
plt.show()
# --- Histogram comparison: Pokégear (PG) vs Pokétch (PT) ---
fig, axes = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
axes[0].hist(df['Pokégear (PG)'], bins=5, color='skyblue', edgecolor='black')
axes[0].set_title("Pokégear (PG) Retention Rate")
axes[0].set_xlabel("Retention (%)")
axes[0].set_ylabel("Frequency")
axes[0].grid(True)
axes[1].hist(df['Pokétch (PT)'], bins=5, color='salmon', edgecolor='black')
axes[1].set_title("Pokétch (PT) Retention Rate")
axes[1].set_xlabel("Retention (%)")
axes[1].grid(True)
plt.tight_layout()
plt.show()
from scipy.stats import ttest_ind, pearsonr, linregress
# --- t検定: Pokétch (PT) vs Pokégear (PG) ---
t_stat, t_pval = ttest_ind(df['Pokétch (PT)'], df['Pokégear (PG)'])
# --- 相関係数: PokéNav vs PokéDisk ---
corr_coef, corr_pval = pearsonr(df['PokéNav (PN)'], df['PokéDisk (PD)'])
# --- 単回帰分析: PokéNavを説明変数、PokéDiskを目的変数 ---
slope, intercept, r_value, p_value, std_err = linregress(df['PokéNav (PN)'], df['PokéDisk (PD)'])
predicted_pd = intercept + slope * df['PokéNav (PN)']
# --- 回帰の可視化 ---
plt.figure(figsize=(7, 5))
plt.scatter(df['PokéNav (PN)'], df['PokéDisk (PD)'], label='Data', color='purple')
plt.plot(df['PokéNav (PN)'], predicted_pd, color='black', linestyle='--', label='Regression Line')
plt.xlabel("PokéNav Retention (%)")
plt.ylabel("PokéDisk Retention (%)")
plt.title("Regression: PokéNav vs PokéDisk")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
# --- 結果出力 ---
results = {
't-test (PT vs PG)': f"t = {t_stat:.3f}, p = {t_pval:.4f}",
'Correlation (PN vs PD)': f"r = {corr_coef:.3f}, p = {corr_pval:.4f}",
'Regression (PN → PD)': f"y = {intercept:.2f} + {slope:.2f}x, R² = {r_value**2:.3f}"
}
results_df = pd.DataFrame(results.items(), columns=['Analysis', 'Result'])
results_df
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
# --- データ準備(Regionを除く) ---
X = df.drop(columns=['Region'])
# --- 多変量線形回帰 ---
# PokéDisk (PD) を目的変数、その他を説明変数
X_reg = X.drop(columns='PokéDisk (PD)')
y_reg = X['PokéDisk (PD)']
reg_model = LinearRegression()
reg_model.fit(X_reg, y_reg)
reg_coeffs = reg_model.coef_
reg_intercept = reg_model.intercept_
reg_r2 = reg_model.score(X_reg, y_reg)
# --- 主成分分析(PCA) ---
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
explained_variance = pca.explained_variance_ratio_
# --- クラスタリング(KMeans) ---
kmeans = KMeans(n_clusters=3, random_state=42)
cluster_labels = kmeans.fit_predict(X_scaled)
# --- PCA scatter plot with clusters ---
plt.figure(figsize=(8, 6))
scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=cluster_labels, cmap='viridis', s=100)
for i, txt in enumerate(df['Region']):
plt.annotate(txt, (X_pca[i, 0] + 0.1, X_pca[i, 1] + 0.1), fontsize=8)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('PCA of Pokémon Item Retention with Clustering')
plt.grid(True)
plt.colorbar(scatter, label='Cluster')
plt.tight_layout()
plt.show()
# --- 結果出力 ---
results_multi = {
'Multivariate Regression R²': reg_r2,
'Regression Coefficients': dict(zip(X_reg.columns, reg_coeffs)),
'Regression Intercept': reg_intercept,
'PCA Explained Variance': explained_variance.tolist()
}
results_df = pd.DataFrame.from_dict(results_multi['Regression Coefficients'], orient='index', columns=['Coefficient'])
results_df.loc['(Intercept)'] = reg_intercept
results_df.loc['R²'] = reg_r2
results_df
# --- Import necessary libraries ---
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
# --- Dummy data for Pokémon analysis: Capture count vs Market price ---
np.random.seed(42)
capture = np.array([100, 110, 120, 130, 140, 150])
price = np.array([16.5, 15.8, 15.2, 14.1, 13.4, 12.9])
# --- Create DataFrame ---
df = pd.DataFrame({
'CaptureCount': capture,
'MarketPrice': price
})
# --- Add constant term for intercept in regression ---
X = sm.add_constant(df['CaptureCount'])
y = df['MarketPrice']
# --- Fit regression model ---
model = sm.OLS(y, X).fit()
# --- Predict values for regression line ---
df['PredictedPrice'] = model.predict(X)
# --- Plot the regression ---
plt.figure(figsize=(8, 5))
plt.scatter(df['CaptureCount'], df['MarketPrice'], label='Actual Data', color='blue')
plt.plot(df['CaptureCount'], df['PredictedPrice'], label='Regression Line', color='red')
plt.xlabel('Number of Captures')
plt.ylabel('Market Price (Pokémon Coins)')
plt.title('Pokémon Regression: Capture Count vs Market Price')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
# --- Return regression summary ---
model.summary()
# --- 必要なライブラリ ---
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
# --- ダミーデータ生成 / Generate dummy Pokémon data ---
np.random.seed(42)
n = 50
df = pd.DataFrame({
'Name': [f'Pokemon_{i}' for i in range(n)],
'Attack': np.random.normal(75, 15, n),
'Defense': np.random.normal(70, 20, n),
'Speed': np.random.normal(80, 10, n),
'Popularity': np.random.normal(50, 15, n), # 人気度
'Price': np.random.normal(1000, 200, n) # 価格(仮)
})
# --- 1. 相関係数と散布図 / Correlation matrix & scatter plot ---
sns.pairplot(df[['Attack', 'Defense', 'Speed', 'Popularity', 'Price']])
plt.suptitle("Pokémon Attribute Pair Plots", y=1.02)
plt.show()
corr = df[['Attack', 'Defense', 'Speed', 'Popularity', 'Price']].corr()
print("Correlation Matrix:\n", corr)
# --- 2. t検定: 高速ポケモン vs 低速ポケモンの人気度差 / T-test ---
median_speed = df['Speed'].median()
fast = df[df['Speed'] > median_speed]['Popularity']
slow = df[df['Speed'] <= median_speed]['Popularity']
t_stat, p_value = stats.ttest_ind(fast, slow, equal_var=False)
print(f"\nT-test result (Popularity fast vs slow): t = {t_stat:.3f}, p = {p_value:.3f}")
# --- 3. 単回帰分析(人気度→価格) / Simple regression ---
X = df[['Popularity']]
y = df['Price']
reg = LinearRegression().fit(X, y)
# 回帰式の表示
print(f"\nRegression: Price = {reg.intercept_:.2f} + {reg.coef_[0]:.2f} × Popularity")
# 可視化
plt.scatter(df['Popularity'], df['Price'], label="Data")
plt.plot(df['Popularity'], reg.predict(X), color='red', label="Regression Line")
plt.xlabel("Popularity")
plt.ylabel("Price")
plt.title("Simple Linear Regression: Price ~ Popularity")
plt.legend()
plt.show()
# --- 4. 多変量回帰分析(Attack, Defense, Speed)→ Price ---
X_multi = df[['Attack', 'Defense', 'Speed']]
y = df['Price']
reg_multi = LinearRegression().fit(X_multi, y)
print("\nMultiple Regression Coefficients:")
for feature, coef in zip(X_multi.columns, reg_multi.coef_):
print(f"{feature}: {coef:.2f}")
print(f"Intercept: {reg_multi.intercept_:.2f}")
# --- 5. クラスタリング(2クラスタ) / KMeans clustering ---
kmeans = KMeans(n_clusters=2, random_state=0)
df['Cluster'] = kmeans.fit_predict(df[['Attack', 'Defense', 'Speed']])
# クラスタ可視化
sns.scatterplot(data=df, x='Attack', y='Defense', hue='Cluster', palette='Set1')
plt.title("Pokémon Clustering (Attack vs Defense)")
plt.show()
# --- 6. 主成分分析(PCA) / Principal Component Analysis ---
pca = PCA(n_components=2)
X_pca = pca.fit_transform(df[['Attack', 'Defense', 'Speed']])
df['PC1'], df['PC2'] = X_pca[:, 0], X_pca[:, 1]
# 可視化
sns.scatterplot(data=df, x='PC1', y='PC2', hue='Cluster', palette='Set2')
plt.title("PCA of Pokémon Stats")
plt.show()
# --- Import ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
# --- ダミーデータ:ポケモンの戦闘力に関連するデータ(仮) ---
np.random.seed(42)
n = 100
df = pd.DataFrame({
'Attack': np.random.normal(70, 15, n), # 攻撃力
'Defense': np.random.normal(60, 10, n), # 防御力
'Speed': np.random.normal(65, 12, n), # 素早さ
'WinRate': np.random.normal(50, 10, n) # 勝率(%)
})
# --- 単回帰分析: 攻撃力が勝率に与える影響 ---
slope, intercept, r_value, p_value, std_err = stats.linregress(df['Attack'], df['WinRate'])
print("Regression Coefficients")
print(f"WinRate = {intercept:.2f} + {slope:.2f} * Attack")
print(f"R-squared: {r_value**2:.3f}, p-value: {p_value:.4f}")
# --- 回帰直線の描画 ---
plt.figure(figsize=(8, 6))
sns.regplot(x='Attack', y='WinRate', data=df, line_kws={"color": "red"})
plt.title('Attack vs Win Rate (Linear Regression)')
plt.xlabel('Attack')
plt.ylabel('Win Rate (%)')
plt.grid(True)
plt.show()
# --- t検定: Speedの平均値が65かどうか(帰無仮説: μ = 65) ---
t_stat, p_val = stats.ttest_1samp(df['Speed'], popmean=65)
print("\nT-test for Speed mean = 65")
print(f"t-statistic = {t_stat:.3f}, p-value = {p_val:.4f}")
# --- 相関係数:防御と勝率の相関 ---
corr, p_corr = stats.pearsonr(df['Defense'], df['WinRate'])
print("\nCorrelation between Defense and WinRate")
print(f"Pearson correlation = {corr:.3f}, p-value = {p_corr:.4f}")
# --- 可視化:散布図行列 ---
sns.pairplot(df)
plt.suptitle("Scatter Matrix for Pokémon Stats", y=1.02)
plt.show()
# --- Import required libraries ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
# --- Dummy data: Pokémon stats and sales score ---
data = {
'Name': ['Pikachu', 'Charizard', 'Blastoise', 'Venusaur', 'Gengar', 'Dragonite', 'Snorlax', 'Alakazam', 'Machamp', 'Gyarados'],
'Attack': [55, 84, 83, 82, 65, 134, 110, 50, 130, 125],
'Defense': [40, 78, 100, 83, 60, 95, 65, 45, 80, 79],
'Speed': [90, 100, 78, 80, 110, 80, 30, 120, 55, 81],
'SalesScore': [300, 500, 400, 420, 380, 600, 450, 390, 410, 580]
}
df = pd.DataFrame(data)
# --- Standardize the features for clustering and PCA ---
features = ['Attack', 'Defense', 'Speed']
X = df[features]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# --- Multivariate Linear Regression ---
reg_model = LinearRegression()
reg_model.fit(X, df['SalesScore'])
# --- Print regression coefficients ---
print("Regression Coefficients:")
for feature, coef in zip(features, reg_model.coef_):
print(f"{feature}: {coef:.2f}")
print(f"Intercept: {reg_model.intercept_:.2f}")
print(f"R^2 Score: {reg_model.score(X, df['SalesScore']):.3f}")
# --- KMeans Clustering ---
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X_scaled)
df['Cluster'] = kmeans.labels_
# --- PCA for visualization ---
pca = PCA(n_components=2)
principal_components = pca.fit_transform(X_scaled)
df[['PC1', 'PC2']] = principal_components
# --- Plot PCA result with cluster labels ---
plt.figure(figsize=(8,6))
sns.scatterplot(data=df, x='PC1', y='PC2', hue='Cluster', style='Name', palette='viridis', s=100)
plt.title("PCA of Pokémon Stats with Clusters")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()
plt.show()
# --- ライブラリのインポート ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind, binomtest, pearsonr
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
# --- ダミーデータの作成(ポケモンアイテム売上と価格) ---
np.random.seed(42)
data = {
"Item": ["モンスターボール", "スーパーボール", "ハイパーボール", "キズぐすり", "すごいキズぐすり", "わざマシン"],
"Price": [200, 600, 1200, 300, 700, 4000],
"Sales_Region_A": np.random.randint(100, 300, 6),
"Sales_Region_B": np.random.randint(100, 300, 6),
}
df = pd.DataFrame(data)
# --- 信頼区間(差の検定) ---
stat, pval = ttest_ind(df["Sales_Region_A"], df["Sales_Region_B"])
print(f"t検定: p値 = {pval:.4f}")
# --- 相関係数と散布図 ---
corr, _ = pearsonr(df["Price"], df["Sales_Region_A"])
print(f"Price と Sales_Region_A の相関係数: {corr:.2f}")
sns.regplot(x="Price", y="Sales_Region_A", data=df)
plt.title("Price vs Sales (Region A)")
plt.show()
# --- 単回帰分析 ---
X = df[["Price"]]
y = df["Sales_Region_A"]
model = LinearRegression().fit(X, y)
print(f"回帰係数: {model.coef_[0]:.2f}, 切片: {model.intercept_:.2f}")
# --- 多変量回帰分析 ---
X_multi = df[["Price", "Sales_Region_B"]]
model_multi = LinearRegression().fit(X_multi, y)
print("多変量回帰係数:", model_multi.coef_)
# --- クラスタリング ---
X_cluster = df[["Price", "Sales_Region_A", "Sales_Region_B"]]
kmeans = KMeans(n_clusters=2, n_init=10).fit(X_cluster)
df["Cluster"] = kmeans.labels_
# --- PCA可視化 ---
pca = PCA(n_components=2)
components = pca.fit_transform(X_cluster)
df_pca = pd.DataFrame(components, columns=["PC1", "PC2"])
df_pca["Cluster"] = df["Cluster"]
sns.scatterplot(x="PC1", y="PC2", hue="Cluster", data=df_pca)
plt.title("PCA of Pokémon Item Stats")
plt.show()