0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

★ ニューラル 別々

Posted at

Validation Mean Squared Error (casual): 327.34
Validation R^2 Score (casual): 0.87
Validation Mean Absolute Error (casual): 10.70
Validation Mean Squared Error (registered): 5745.50
Validation R^2 Score (registered): 0.73
Validation Mean Absolute Error (registered): 49.35
24/24 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
24/24 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step
Test Mean Squared Error (combined): 13773.48
Test R^2 Score (combined): 0.49
Test Mean Absolute Error (combined): 86.38

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import matplotlib.pyplot as plt

データの読み込み

train_data = pd.read_csv('hour_train.csv')
test_data = pd.read_csv('hour_test.csv')

データのカラム名を確認

print(train_data.columns)

正しいカラム名に修正

features = ['yr', 'mnth', 'hr', 'holiday', 'workingday', 'temp', 'hum', 'windspeed', 'weathersit']
target_casual = 'casual'
target_registered = 'registered'

X_train = train_data[features]
y_train_casual = train_data[target_casual]
y_train_registered = train_data[target_registered]
X_test = test_data[features]
y_test_actual = test_data['cnt']

データの標準化

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

モデルの定義

def build_model():
model = Sequential([
Input(shape=(X_train_scaled.shape[1],)),
Dense(64, activation='relu'),
Dense(32, activation='relu'),
Dense(1)
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
return model

casual モデルの訓練

model_casual = build_model()
X_train_split, X_val_split, y_train_split_casual, y_val_split_casual = train_test_split(X_train_scaled, y_train_casual, test_size=0.2, random_state=42)
model_casual.fit(X_train_split, y_train_split_casual, validation_data=(X_val_split, y_val_split_casual), epochs=50, batch_size=32, verbose=1)

registered モデルの訓練

model_registered = build_model()
X_train_split, X_val_split, y_train_split_registered, y_val_split_registered = train_test_split(X_train_scaled, y_train_registered, test_size=0.2, random_state=42)
model_registered.fit(X_train_split, y_train_split_registered, validation_data=(X_val_split, y_val_split_registered), epochs=50, batch_size=32, verbose=1)

バリデーションデータでの予測

y_val_pred_casual = model_casual.predict(X_val_split)
y_val_pred_registered = model_registered.predict(X_val_split)

バリデーションデータでの評価

print(f'Validation Mean Squared Error (casual): {mean_squared_error(y_val_split_casual, y_val_pred_casual):.2f}')
print(f'Validation R^2 Score (casual): {r2_score(y_val_split_casual, y_val_pred_casual):.2f}')
print(f'Validation Mean Absolute Error (casual): {mean_absolute_error(y_val_split_casual, y_val_pred_casual):.2f}')
print(f'Validation Mean Squared Error (registered): {mean_squared_error(y_val_split_registered, y_val_pred_registered):.2f}')
print(f'Validation R^2 Score (registered): {r2_score(y_val_split_registered, y_val_pred_registered):.2f}')
print(f'Validation Mean Absolute Error (registered): {mean_absolute_error(y_val_split_registered, y_val_pred_registered):.2f}')

テストデータでの予測

y_test_pred_casual = model_casual.predict(X_test_scaled)
y_test_pred_registered = model_registered.predict(X_test_scaled)

予測の合算

y_test_pred_combined = y_test_pred_casual + y_test_pred_registered

テストデータでの評価

test_mse_combined = mean_squared_error(y_test_actual, y_test_pred_combined)
test_r2_combined = r2_score(y_test_actual, y_test_pred_combined)
test_mae_combined = mean_absolute_error(y_test_actual, y_test_pred_combined)

print(f'Test Mean Squared Error (combined): {test_mse_combined:.2f}')
print(f'Test R^2 Score (combined): {test_r2_combined:.2f}')
print(f'Test Mean Absolute Error (combined): {test_mae_combined:.2f}')

実測値と予測値の差を日付と時間ごとに可視化

test_data['dteday'] = pd.to_datetime(test_data['dteday']) # dteday列を日付に変換
test_data['datetime'] = test_data.apply(lambda row: pd.Timestamp(year=row['dteday'].year, month=row['dteday'].month, day=row['dteday'].day, hour=row['hr']), axis=1)
test_data['actual'] = y_test_actual
test_data['predicted'] = y_test_pred_combined
test_data['difference'] = test_data['actual'] - test_data['predicted']

日付ごとの平均差分をプロット

daily_diff = test_data.groupby(test_data['datetime'].dt.date)['difference'].mean()
plt.figure(figsize=(12, 6))
daily_diff.plot(kind='bar', color='skyblue')
plt.xlabel('Date')
plt.ylabel('Average Difference')
plt.title('Average Difference between Actual and Predicted Counts by Date')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

時間ごとの平均差分をプロット

hourly_diff = test_data.groupby(test_data['datetime'].dt.hour)['difference'].mean()
plt.figure(figsize=(12, 6))
hourly_diff.plot(kind='bar', color='royalblue')
plt.xlabel('Hour')
plt.ylabel('Average Difference')
plt.title('Average Difference between Actual and Predicted Counts by Hour')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

実測値と予測値をプロットする折れ線グラフ

日付ごとの実測値と予測値のプロット

daily_actual = test_data.groupby(test_data['datetime'].dt.date)['actual'].sum()
daily_predicted = test_data.groupby(test_data['datetime'].dt.date)['predicted'].sum()

plt.figure(figsize=(12, 6))
plt.plot(daily_actual.index, daily_actual.values, label='Actual', marker='o', color='skyblue')
plt.plot(daily_predicted.index, daily_predicted.values, label='Predicted', marker='o', color='royalblue')
plt.xlabel('Date')
plt.ylabel('Count')
plt.title('Actual vs Predicted Counts by Date')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.show()

時間ごとの実測値と予測値のプロット

hourly_actual = test_data.groupby(test_data['datetime'].dt.hour)['actual'].sum()
hourly_predicted = test_data.groupby(test_data['datetime'].dt.hour)['predicted'].sum()

plt.figure(figsize=(12, 6))
plt.plot(hourly_actual.index, hourly_actual.values, label='Actual', marker='o', color='skyblue')
plt.plot(hourly_predicted.index, hourly_predicted.values, label='Predicted', marker='o', color='royalblue')
plt.xlabel('Hour')
plt.ylabel('Count')
plt.title('Actual vs Predicted Counts by Hour')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.show()

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?