0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

経済学のための回帰分析(日記)

Posted at

# -*- coding: utf-8 -*-
# モジュールのインポート / Import necessary modules
import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt

# 乱数シード固定 / Set random seed for reproducibility
np.random.seed(42)

# ダミーデータ生成 / Generate dummy purchase behavior data
n = 300  # サンプル数 / Number of samples

# 割引率、スカーシティ表示、送料無料、レビュー数などの要素をランダム生成
# Generate random values for discount rate, scarcity tag, free shipping, review count, etc.
discount = np.random.choice([0, 10, 20, 30], size=n)
scarcity = np.random.choice([0, 1], size=n)
free_shipping = np.random.choice([0, 1], size=n)
reviews = np.random.randint(0, 100, size=n)
income = np.random.normal(500, 100, size=n)  # 所得 / Income
age = np.random.randint(18, 65, size=n)  # 年齢 / Age
purchase_history = np.random.randint(0, 50, size=n)  # 購入履歴 / Purchase history

# 購入確率を計算 / Calculate purchase probability
purchase_prob = 0.05 * discount + 0.1 * scarcity + 0.08 * free_shipping + 0.001 * reviews + 0.0005 * income
purchase_prob = 1 / (1 + np.exp(-purchase_prob))  # シグモイド関数適用 / Apply sigmoid function
purchase = np.random.binomial(1, purchase_prob)  # 購入決定(1=購入, 0=非購入) / Simulate purchase decision

# データフレーム作成 / Create DataFrame
df = pd.DataFrame({
    'discount': discount,
    'scarcity': scarcity,
    'free_shipping': free_shipping,
    'reviews': reviews,
    'income': income,
    'age': age,
    'purchase_history': purchase_history,
    'purchase': purchase
})

# -------------------- 分析パート / Analysis Section --------------------

# 2.6 割引係数の標準誤差 / Standard Error of Discount Coefficient
X = sm.add_constant(df[['discount']])  # 定数項追加 / Add intercept
model = sm.Logit(df['purchase'], X).fit(disp=False)  # ロジスティック回帰 / Logistic Regression
coef_se = model.bse['discount']  # 標準誤差取得 / Get standard error

# 2.7 スカーシティ効果のt検定 / t-test for Scarcity Effect
group1 = df[df['scarcity'] == 1]['purchase']  # 期間限定表示あり / With scarcity
group0 = df[df['scarcity'] == 0]['purchase']  # 期間限定表示なし / Without scarcity
t_stat, p_value_ttest = stats.ttest_ind(group1, group0)

# 2.8 複合要素のF検定 / F-test for Multiple Factors
X_full = sm.add_constant(df[['discount', 'scarcity', 'free_shipping', 'reviews']])
model_full = sm.Logit(df['purchase'], X_full).fit(disp=False)
f_p_value = model_full.llr_pvalue  # ログ尤度比検定p値 / Log-likelihood ratio test p-value

# 2.9 割引効果のt検定 / t-test for Discount Effect
group_discount = df[df['discount'] > 0]['purchase']  # 割引あり / With discount
group_nodiscount = df[df['discount'] == 0]['purchase']  # 割引なし / Without discount
t_stat_diff, p_value_diff = stats.ttest_ind(group_discount, group_nodiscount)

# 2.10 消費関数の推定 / Estimation of Consumption Function
X_consump = sm.add_constant(df[['income', 'age', 'purchase_history']])
y_consump = df['purchase']
model_consump = sm.Logit(y_consump, X_consump).fit(disp=False)

# -------------------- 結果表示 / Display Results --------------------
print("Sample Data:")
print(df.head())

print("\n2.6 Coefficient Standard Error:", coef_se)
print("2.7 Scarcity Effect p-value:", p_value_ttest)
print("2.8 Multiple Factors F-test p-value:", f_p_value)
print("2.9 Discount Effect p-value:", p_value_diff)
print("\n2.10 Consumption Function Summary:")
print(model_consump.summary())

# -------------------- 可視化 / Visualization --------------------

# Plot 1: Purchase rate by discount level
discount_means = df.groupby('discount')['purchase'].mean()
discount_means.plot(kind='bar')
plt.title('Purchase Rate by Discount Level')
plt.xlabel('Discount (%)')
plt.ylabel('Purchase Rate')
plt.show()

# Plot 2: Purchase rate by scarcity
scarcity_means = df.groupby('scarcity')['purchase'].mean()
scarcity_means.plot(kind='bar')
plt.title('Purchase Rate by Scarcity (0=No, 1=Yes)')
plt.xlabel('Scarcity Shown')
plt.ylabel('Purchase Rate')
plt.show()

# Plot 3: Purchase rate by discount presence
discount_present = df['discount'] > 0
discount_presence_means = df.groupby(discount_present)['purchase'].mean()
discount_presence_means.plot(kind='bar')
plt.title('Purchase Rate by Discount Presence')
plt.xlabel('Discount Offered (False/True)')
plt.ylabel('Purchase Rate')
plt.show()


# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score

# Fix random seed
np.random.seed(42)

# Generate dummy data
n = 100
temperature = np.random.uniform(10, 35, n)
sales = 20 + 5 * temperature + np.random.normal(0, 5, n)

df = pd.DataFrame({'temperature': temperature, 'sales': sales})

# ------------------ 線形回帰 / Linear Regression ------------------
lin_reg = LinearRegression()
lin_reg.fit(df[['temperature']], df['sales'])
sales_pred = lin_reg.predict(df[['temperature']])

# 目標売上設定 / Set sales target
sales_target = 150
required_temp = (sales_target - lin_reg.intercept_) / lin_reg.coef_[0]
print(f"[Linear Regression] Estimated temperature to achieve sales of {sales_target}: {required_temp:.2f}°C")

# Plot Linear Regression with sales target line and required temperature
plt.scatter(df['temperature'], df['sales'], label='Actual Sales')
plt.plot(df['temperature'], sales_pred, color='red', label='Linear Regression')
plt.axhline(y=sales_target, color='green', linestyle='--', label=f'Sales Target = {sales_target}')
plt.axvline(x=required_temp, color='blue', linestyle='--', label=f'Estimated Temp = {required_temp:.2f}°C')
plt.title('Ice Cream Sales vs Temperature (Linear Regression)')
plt.xlabel('Temperature (C)')
plt.ylabel('Ice Cream Sales')
plt.legend()
plt.show()

# ------------------ ロジスティック回帰 / Logistic Regression ------------------
df['high_sales'] = (df['sales'] >= sales_target).astype(int)

log_reg = LogisticRegression()
log_reg.fit(df[['temperature']], df['high_sales'])
prob = log_reg.predict_proba(df[['temperature']])[:, 1]

decision_boundary = -log_reg.intercept_[0] / log_reg.coef_[0][0]
print(f"[Logistic Regression] Estimated temperature for 50% high sales probability: {decision_boundary:.2f}°C")

# Plot Logistic Regression with probability threshold line and decision boundary
plt.scatter(df['temperature'], df['high_sales'], label='Actual High Sales (0/1)')
plt.plot(df['temperature'], prob, color='red', label='Predicted Probability')
plt.axhline(y=0.5, color='green', linestyle='--', label='Probability Threshold = 0.5')
plt.axvline(x=decision_boundary, color='blue', linestyle='--', label=f'Estimated Temp = {decision_boundary:.2f}°C')
plt.title('High Sales Probability vs Temperature (Logistic Regression)')
plt.xlabel('Temperature (C)')
plt.ylabel('Probability of High Sales')
plt.legend()
plt.show()

# ライブラリのインポート / Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, LogisticRegression
import statsmodels.api as sm

# シード設定 / Set random seed
np.random.seed(42)

# データ生成 / Generate sample data
n = 100
temperature = np.random.uniform(10, 35, n)
sales = 20 + 5 * temperature + np.random.normal(0, 5, n)
df = pd.DataFrame({'temperature': temperature, 'sales': sales})

# 3.4 2次式推定(スマイルカーブ)/ Quadratic Regression
df['temperature_squared'] = df['temperature'] ** 2
X_quad = sm.add_constant(df[['temperature', 'temperature_squared']])
quad_model = sm.OLS(df['sales'], X_quad).fit()

# 線形回帰 / Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(df[['temperature']], df['sales'])
sales_pred = lin_reg.predict(df[['temperature']])
sales_target = 150
required_temp = (sales_target - lin_reg.intercept_) / lin_reg.coef_[0]

# ロジスティック回帰 / Logistic Regression
df['high_sales'] = (df['sales'] >= sales_target).astype(int)
log_reg = LogisticRegression()
log_reg.fit(df[['temperature']], df['high_sales'])
prob = log_reg.predict_proba(df[['temperature']])[:, 1]
decision_boundary = -log_reg.intercept_[0] / log_reg.coef_[0][0]

# プロット:2次式(スマイルカーブ)/ Quadratic Regression Plot
plt.scatter(df['temperature'], df['sales'], label='Actual Sales')
sorted_temp = np.sort(df['temperature'])
quad_pred = quad_model.predict(sm.add_constant(pd.DataFrame({
    'temperature': sorted_temp,
    'temperature_squared': sorted_temp ** 2
})))
plt.plot(sorted_temp, quad_pred, color='purple', label='Quadratic Fit (Smile Curve)')
plt.title('Quadratic Fit (Smile Curve)')
plt.xlabel('Temperature (C)')
plt.ylabel('Sales')
plt.legend()
plt.show()

# プロット:線形回帰 / Linear Regression Plot
plt.scatter(df['temperature'], df['sales'], label='Actual Sales')
plt.plot(df['temperature'], sales_pred, color='red', label='Linear Regression')
plt.axhline(y=sales_target, color='green', linestyle='--', label=f'Sales Target = {sales_target}')
plt.axvline(x=required_temp, color='blue', linestyle='--', label=f'Estimated Temp = {required_temp:.2f}C')
plt.title('Linear Regression with Sales Target')
plt.xlabel('Temperature (C)')
plt.ylabel('Sales')
plt.legend()
plt.show()

# プロット:ロジスティック回帰 / Logistic Regression Plot
plt.scatter(df['temperature'], df['high_sales'], label='Actual High Sales (0/1)')
plt.plot(df['temperature'], prob, color='red', label='Predicted Probability')
plt.axhline(y=0.5, color='green', linestyle='--', label='Probability Threshold = 0.5')
plt.axvline(x=decision_boundary, color='blue', linestyle='--', label=f'Estimated Temp = {decision_boundary:.2f}C')
plt.title('Logistic Regression with Probability Threshold')
plt.xlabel('Temperature (C)')
plt.ylabel('Probability of High Sales')
plt.legend()
plt.show()

# 結果サマリー / Summary of results
result_summary = {
    "Quadratic Model Summary": quad_model.summary().as_text(),
    "Linear Regression Estimated Temperature": float(required_temp),
    "Logistic Regression Decision Boundary Temperature": float(decision_boundary)
}
result_summary

# ライブラリのインポート / Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller, acf, pacf, grangercausalitytests
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.stats.stattools import durbin_watson
import warnings

warnings.filterwarnings("ignore")

# シード設定 / Set random seed
np.random.seed(42)

# --- サンプル時系列データ生成 / Generate Sample Time Series Data ---
n = 120  # 月次データ10年分 / 10 years monthly data
time_index = pd.date_range(start='2010-01', periods=n, freq='M')
trend = np.linspace(50, 100, n)  # トレンド / Trend component
seasonal = 10 * np.sin(2 * np.pi * time_index.month / 12)  # 季節性 / Seasonal component
noise = np.random.normal(0, 2, n)  # ノイズ / Noise component
y = trend + seasonal + noise

# データフレーム作成 / Create DataFrame
df = pd.DataFrame({'date': time_index, 'value': y}).set_index('date')

# --- 5.4 季節調整(1年差分)/ Seasonal Adjustment by Differencing ---
df['diff'] = df['value'].diff(12)

# --- 5.6 ARモデル推定 / AR(1) Model ---
model_ar = ARIMA(df['value'], order=(1, 0, 0)).fit()
ar_residuals = model_ar.resid
dw_stat = durbin_watson(ar_residuals)  # Durbin-Watson Statistic

# --- 5.7 ARIMA(1,1,1)モデル推定 / ARIMA(1,1,1) Model ---
model_arima = ARIMA(df['value'], order=(1, 1, 1)).fit()
forecast = model_arima.forecast(12)

# --- 5.12 グレンジャー因果検定 / Granger Causality Test ---
df['lagged_value'] = df['value'].shift(1)
df_gc = df[['value', 'lagged_value']].dropna()
gc_test_result = grangercausalitytests(df_gc, maxlag=2, verbose=False)
gc_pvalues = {lag: result[0]['ssr_ftest'][1] for lag, result in gc_test_result.items()}

# --- プロット:オリジナルと季節調整 / Plot Original and Seasonal Differenced Series ---
df[['value', 'diff']].plot(title="Original and Seasonal Differenced Series")
plt.show()

# --- プロット:ARIMA予測 / Plot ARIMA Forecast ---
plt.plot(df['value'], label='Observed')
plt.plot(pd.date_range(df.index[-1], periods=12, freq='M'), forecast, label='ARIMA Forecast', color='red')
plt.title('ARIMA(1,1,1) Forecast')
plt.legend()
plt.show()

# --- 結果サマリー / Summary of Results ---
summary = {
    "Durbin-Watson Statistic": dw_stat,
    "AR(1) Model Summary": model_ar.summary().as_text(),
    "ARIMA(1,1,1) Model Summary": model_arima.summary().as_text(),
    "Granger Causality Test p-values (lag 1 and 2)": gc_pvalues
}
summary

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?