Validation Mean Squared Error (casual): 327.34
Validation R^2 Score (casual): 0.87
Validation Mean Absolute Error (casual): 10.70
Validation Mean Squared Error (registered): 5745.50
Validation R^2 Score (registered): 0.73
Validation Mean Absolute Error (registered): 49.35
24/24 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
24/24 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step
Test Mean Squared Error (combined): 13773.48
Test R^2 Score (combined): 0.49
Test Mean Absolute Error (combined): 86.38
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import matplotlib.pyplot as plt
データの読み込み
train_data = pd.read_csv('hour_train.csv')
test_data = pd.read_csv('hour_test.csv')
データのカラム名を確認
print(train_data.columns)
正しいカラム名に修正
features = ['yr', 'mnth', 'hr', 'holiday', 'workingday', 'temp', 'hum', 'windspeed', 'weathersit']
target_casual = 'casual'
target_registered = 'registered'
X_train = train_data[features]
y_train_casual = train_data[target_casual]
y_train_registered = train_data[target_registered]
X_test = test_data[features]
y_test_actual = test_data['cnt']
データの標準化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
モデルの定義
def build_model():
model = Sequential([
Input(shape=(X_train_scaled.shape[1],)),
Dense(64, activation='relu'),
Dense(32, activation='relu'),
Dense(1)
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
return model
casual モデルの訓練
model_casual = build_model()
X_train_split, X_val_split, y_train_split_casual, y_val_split_casual = train_test_split(X_train_scaled, y_train_casual, test_size=0.2, random_state=42)
model_casual.fit(X_train_split, y_train_split_casual, validation_data=(X_val_split, y_val_split_casual), epochs=50, batch_size=32, verbose=1)
registered モデルの訓練
model_registered = build_model()
X_train_split, X_val_split, y_train_split_registered, y_val_split_registered = train_test_split(X_train_scaled, y_train_registered, test_size=0.2, random_state=42)
model_registered.fit(X_train_split, y_train_split_registered, validation_data=(X_val_split, y_val_split_registered), epochs=50, batch_size=32, verbose=1)
バリデーションデータでの予測
y_val_pred_casual = model_casual.predict(X_val_split)
y_val_pred_registered = model_registered.predict(X_val_split)
バリデーションデータでの評価
print(f'Validation Mean Squared Error (casual): {mean_squared_error(y_val_split_casual, y_val_pred_casual):.2f}')
print(f'Validation R^2 Score (casual): {r2_score(y_val_split_casual, y_val_pred_casual):.2f}')
print(f'Validation Mean Absolute Error (casual): {mean_absolute_error(y_val_split_casual, y_val_pred_casual):.2f}')
print(f'Validation Mean Squared Error (registered): {mean_squared_error(y_val_split_registered, y_val_pred_registered):.2f}')
print(f'Validation R^2 Score (registered): {r2_score(y_val_split_registered, y_val_pred_registered):.2f}')
print(f'Validation Mean Absolute Error (registered): {mean_absolute_error(y_val_split_registered, y_val_pred_registered):.2f}')
テストデータでの予測
y_test_pred_casual = model_casual.predict(X_test_scaled)
y_test_pred_registered = model_registered.predict(X_test_scaled)
予測の合算
y_test_pred_combined = y_test_pred_casual + y_test_pred_registered
テストデータでの評価
test_mse_combined = mean_squared_error(y_test_actual, y_test_pred_combined)
test_r2_combined = r2_score(y_test_actual, y_test_pred_combined)
test_mae_combined = mean_absolute_error(y_test_actual, y_test_pred_combined)
print(f'Test Mean Squared Error (combined): {test_mse_combined:.2f}')
print(f'Test R^2 Score (combined): {test_r2_combined:.2f}')
print(f'Test Mean Absolute Error (combined): {test_mae_combined:.2f}')
実測値と予測値の差を日付と時間ごとに可視化
test_data['dteday'] = pd.to_datetime(test_data['dteday']) # dteday列を日付に変換
test_data['datetime'] = test_data.apply(lambda row: pd.Timestamp(year=row['dteday'].year, month=row['dteday'].month, day=row['dteday'].day, hour=row['hr']), axis=1)
test_data['actual'] = y_test_actual
test_data['predicted'] = y_test_pred_combined
test_data['difference'] = test_data['actual'] - test_data['predicted']
日付ごとの平均差分をプロット
daily_diff = test_data.groupby(test_data['datetime'].dt.date)['difference'].mean()
plt.figure(figsize=(12, 6))
daily_diff.plot(kind='bar', color='skyblue')
plt.xlabel('Date')
plt.ylabel('Average Difference')
plt.title('Average Difference between Actual and Predicted Counts by Date')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()
時間ごとの平均差分をプロット
hourly_diff = test_data.groupby(test_data['datetime'].dt.hour)['difference'].mean()
plt.figure(figsize=(12, 6))
hourly_diff.plot(kind='bar', color='royalblue')
plt.xlabel('Hour')
plt.ylabel('Average Difference')
plt.title('Average Difference between Actual and Predicted Counts by Hour')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()
実測値と予測値をプロットする折れ線グラフ
日付ごとの実測値と予測値のプロット
daily_actual = test_data.groupby(test_data['datetime'].dt.date)['actual'].sum()
daily_predicted = test_data.groupby(test_data['datetime'].dt.date)['predicted'].sum()
plt.figure(figsize=(12, 6))
plt.plot(daily_actual.index, daily_actual.values, label='Actual', marker='o', color='skyblue')
plt.plot(daily_predicted.index, daily_predicted.values, label='Predicted', marker='o', color='royalblue')
plt.xlabel('Date')
plt.ylabel('Count')
plt.title('Actual vs Predicted Counts by Date')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.show()
時間ごとの実測値と予測値のプロット
hourly_actual = test_data.groupby(test_data['datetime'].dt.hour)['actual'].sum()
hourly_predicted = test_data.groupby(test_data['datetime'].dt.hour)['predicted'].sum()
plt.figure(figsize=(12, 6))
plt.plot(hourly_actual.index, hourly_actual.values, label='Actual', marker='o', color='skyblue')
plt.plot(hourly_predicted.index, hourly_predicted.values, label='Predicted', marker='o', color='royalblue')
plt.xlabel('Hour')
plt.ylabel('Count')
plt.title('Actual vs Predicted Counts by Hour')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.show()