0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

完成2

Posted at

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

データの読み込み

train_data = pd.read_csv('hour_train.csv')
test_data = pd.read_csv('hour_test.csv')

移動平均の計算(windowサイズは3に設定)

train_data['temp_move_avg'] = train_data['temp'].rolling(window=3, min_periods=1).mean()
train_data['hum_move_avg'] = train_data['hum'].rolling(window=3, min_periods=1).mean()
train_data['windspeed_move_avg'] = train_data['windspeed'].rolling(window=3, min_periods=1).mean()
train_data['weathersit_move_avg'] = train_data['weathersit'].rolling(window=3, min_periods=1).mean()

test_data['temp_move_avg'] = test_data['temp'].rolling(window=3, min_periods=1).mean()
test_data['hum_move_avg'] = test_data['hum'].rolling(window=3, min_periods=1).mean()
test_data['windspeed_move_avg'] = test_data['windspeed'].rolling(window=3, min_periods=1).mean()
test_data['weathersit_move_avg'] = test_data['weathersit'].rolling(window=3, min_periods=1).mean()

新しい特徴量の作成

train_data['hr_8_17_18'] = train_data['hr'].apply(lambda x: 1 if x in [8, 17, 18] else 0)
test_data['hr_8_17_18'] = test_data['hr'].apply(lambda x: 1 if x in [8, 17, 18] else 0)

train_data['hr_7_20'] = train_data['hr'].apply(lambda x: 1 if 7 <= x <= 20 else 0)
test_data['hr_7_20'] = test_data['hr'].apply(lambda x: 1 if 7 <= x <= 20 else 0)

トレーニングデータとテストデータでのダミー変数の列を一致させる

train_columns = set(train_data.columns)
test_columns = set(test_data.columns)
missing_in_test = train_columns - test_columns
missing_in_train = test_columns - train_columns

for col in missing_in_test:
test_data[col] = 0
for col in missing_in_train:
train_data[col] = 0

列を揃えるために並べ替え

train_data = train_data.sort_index(axis=1)
test_data = test_data.sort_index(axis=1)

特徴量とターゲット変数の設定

features = ['yr', 'mnth', 'hr', 'holiday', 'workingday', 'hr_8_17_18', 'hr_7_20',
'temp_move_avg', 'hum_move_avg', 'windspeed_move_avg', 'weathersit_move_avg']
target_casual = 'casual'
target_registered = 'registered'

X_train = train_data[features]
y_train_casual = train_data[target_casual]
y_train_registered = train_data[target_registered]

データの標準化

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

ニューラルネットワークモデルの定義

def build_nn_model():
model = Sequential()
model.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
return model

casual モデルのトレーニング

model_casual = build_nn_model()
history_casual = model_casual.fit(X_train_scaled, y_train_casual, epochs=100, validation_split=0.2, verbose=0)

registered モデルのトレーニング

model_registered = build_nn_model()
history_registered = model_registered.fit(X_train_scaled, y_train_registered, epochs=100, validation_split=0.2, verbose=0)

テストデータの準備

X_test = test_data[features]
X_test_scaled = scaler.transform(X_test)
y_test_actual = test_data['cnt']

テストデータでの予測

y_test_pred_casual = model_casual.predict(X_test_scaled)
y_test_pred_registered = model_registered.predict(X_test_scaled)
y_test_pred_total = y_test_pred_casual + y_test_pred_registered

テストデータの評価

test_mse_casual_registered = mean_squared_error(y_test_actual, y_test_pred_total)
test_r2_casual_registered = r2_score(y_test_actual, y_test_pred_total)
test_mae_casual_registered = mean_absolute_error(y_test_actual, y_test_pred_total)

print(f'Test Mean Squared Error (casual + registered): {test_mse_casual_registered:.2f}')
print(f'Test R^2 Score (casual + registered): {test_r2_casual_registered:.2f}')
print(f'Test Mean Absolute Error (casual + registered): {test_mae_casual_registered:.2f}')

変数重要度の取得と可視化は、ニューラルネットワークモデルでは難しいため、代わりに重みの平均絶対値を使用

importance_casual = np.mean(np.abs(model_casual.layers[0].get_weights()[0]), axis=1)
importance_df_casual = pd.DataFrame({'feature': features, 'importance': importance_casual})
importance_df_casual = importance_df_casual.sort_values(by='importance', ascending=False)

plt.figure(figsize=(12, 6))
plt.barh(importance_df_casual['feature'], importance_df_casual['importance'], color='royalblue')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Feature Importance (casual)')
plt.gca().invert_yaxis()
plt.show()

importance_registered = np.mean(np.abs(model_registered.layers[0].get_weights()[0]), axis=1)
importance_df_registered = pd.DataFrame({'feature': features, 'importance': importance_registered})
importance_df_registered = importance_df_registered.sort_values(by='importance', ascending=False)

plt.figure(figsize=(12, 6))
plt.barh(importance_df_registered['feature'], importance_df_registered['importance'], color='royalblue')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Feature Importance (registered)')
plt.gca().invert_yaxis()
plt.show()

時系列ごとの実測値と予測値の折れ線グラフを作成

test_data['y_test_pred_casual'] = y_test_pred_casual
test_data['y_test_pred_registered'] = y_test_pred_registered
test_data['y_test_pred_total'] = y_test_pred_total

時間帯ごとの実測値と予測値を計算 (casual)

hourly_avg_actual_casual = test_data.groupby('hr')['casual'].mean()
hourly_avg_pred_casual = test_data.groupby('hr')['y_test_pred_casual'].mean()

時間帯ごとの実測値と予測値を計算 (registered)

hourly_avg_actual_registered = test_data.groupby('hr')['registered'].mean()
hourly_avg_pred_registered = test_data.groupby('hr')['y_test_pred_registered'].mean()

時間帯ごとの実測値と予測値を計算 (total)

hourly_avg_actual_total = test_data.groupby('hr')['cnt'].mean()
hourly_avg_pred_total = test_data.groupby('hr')['y_test_pred_total'].mean()

時間帯ごとの予測値と実測値の折れ線グラフを作成 (casual)

plt.figure(figsize=(12, 6))
plt.plot(hourly_avg_actual_casual.index, hourly_avg_actual_casual, label='Actual (casual)', marker='o', color='royalblue')
plt.plot(hourly_avg_pred_casual.index, hourly_avg_pred_casual, label='Predicted (casual)', marker='o', color='skyblue')
plt.xlabel('Hour of the Day')
plt.ylabel('Average Count')
plt.title('Average Actual and Predicted Count by Hour of the Day (casual)')
plt.legend()
plt.grid(True)
plt.show()

時間帯ごとの予測値と実測値の折れ線グラフを作成 (registered)

plt.figure(figsize=(12, 6))
plt.plot(hourly_avg_actual_registered.index, hourly_avg_actual_registered, label='Actual (registered)', marker='o', color='royalblue')
plt.plot(hourly_avg_pred_registered.index, hourly_avg_pred_registered, label='Predicted (registered)', marker='o', color='skyblue')
plt.xlabel('Hour of the Day')
plt.ylabel('Average Count')
plt.title('Average Actual and Predicted Count by Hour of the Day (registered)')
plt.legend()
plt.grid(True)
plt.show()

時間帯ごとの予測値と実測値の折れ線グラフを作成 (total)

plt.figure(figsize=(12, 6))
plt.plot(hourly_avg_actual_total.index, hourly_avg_actual_total, label='Actual (total)', marker='o', color='royalblue')
plt.plot(hourly_avg_pred_total.index, hourly_avg_pred_total, label='Predicted (total)', marker='o', color='skyblue')
plt.xlabel('Hour of the Day')
plt.ylabel('Average Count')
plt.title('Average Actual and Predicted Count by Hour of the Day (total)')
plt.legend()
plt.grid(True)
plt.show()

日付ごとの実測値と予測値の折れ線グラフを作成 (casual)

test_data['dteday'] = pd.to_datetime(test_data['dteday']) # dtedayをdatetime型に変換
daily_avg_actual_casual = test_data.groupby('dteday')['casual'].mean()
daily_avg_pred_casual = test_data.groupby('dteday')['y_test_pred_casual'].mean()

plt.figure(figsize=(12, 6))
plt.plot(daily_avg_actual_casual.index, daily_avg_actual_casual, label='Actual (casual)', marker='o', color='royalblue')
plt.plot(daily_avg_pred_casual.index, daily_avg_pred_casual, label='Predicted (casual)', marker='o', color='skyblue')
plt.xlabel('Date')
plt.ylabel('Average Count')
plt.title('Average Actual and Predicted Count by Date (casual)')
plt.legend()
plt.grid(True)
plt.show()

日付ごとの実測値と予測値の折れ線グラフを作成 (registered)

daily_avg_actual_registered = test_data.groupby('dteday')['registered'].mean()
daily_avg_pred_registered = test_data.groupby('dteday')['y_test_pred_registered'].mean()

plt.figure(figsize=(12, 6))
plt.plot(daily_avg_actual_registered.index, daily_avg_actual_registered, label='Actual (registered)', marker='o', color='royalblue')
plt.plot(daily_avg_pred_registered.index, daily_avg_pred_registered, label='Predicted (registered)', marker='o', color='skyblue')
plt.xlabel('Date')
plt.ylabel('Average Count')
plt.title('Average Actual and Predicted Count by Date (registered)')
plt.legend()
plt.grid(True)
plt.show()

日付ごとの実測値と予測値の折れ線グラフを作成 (total)

daily_avg_actual_total = test_data.groupby('dteday')['cnt'].mean()
daily_avg_pred_total = test_data.groupby('dteday')['y_test_pred_total'].mean()

plt.figure(figsize=(12, 6))
plt.plot(daily_avg_actual_total.index, daily_avg_actual_total, label='Actual (total)', marker='o', color='royalblue')
plt.plot(daily_avg_pred_total.index, daily_avg_pred_total, label='Predicted (total)', marker='o', color='skyblue')
plt.xlabel('Date')
plt.ylabel('Average Count')
plt.title('Average Actual and Predicted Count by Date (total)')
plt.legend()
plt.grid(True)
plt.show()

① casual と registered の分布の違いを一つの図で表すヒストグラム

plt.figure(figsize=(12, 6))
plt.hist(train_data['casual'], bins=30, alpha=0.5, label='casual', color='skyblue')
plt.hist(train_data['registered'], bins=30, alpha=0.5, label='registered', color='royalblue')
plt.xlabel('Number of Users')
plt.ylabel('Frequency')
plt.title('Distribution of casual and registered users')
plt.legend()
plt.show()

② 指定された変数ごとに casual と registered の平均値を示す折れ線グラフ

variables = ['yr', 'mnth', 'hr', 'holiday', 'workingday', 'weekday', 'temp', 'hum', 'windspeed', 'weathersit']
for var in variables:
plt.figure(figsize=(12, 6))
grouped_data = train_data.groupby(var).mean()
plt.plot(grouped_data.index, grouped_data['casual'], label='casual', marker='o', color='skyblue')
plt.plot(grouped_data.index, grouped_data['registered'], label='registered', marker='o', color='royalblue')
plt.xlabel(var)
plt.ylabel('Average Count')
plt.title(f'Average casual and registered users by {var}')
plt.legend()
plt.grid(True)
plt.show()

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?