0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

★ニューラルネットーワーク cnt 

Posted at

104/104 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step
Validation Mean Squared Error: 2460.65
Validation R^2 Score: 0.92
Validation Mean Absolute Error: 33.47
24/24 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
Test Mean Squared Error: 9220.11
Test R^2 Score: 0.66
Test Mean Absolute Error: 63.23

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import matplotlib.pyplot as plt

データの読み込み

train_data = pd.read_csv('hour_train.csv')
test_data = pd.read_csv('hour_test.csv')

欠損の処理

def fill_missing_hours(df):
mis_indexs = df[df.isnull().any(axis=1)].index # 欠損のある行番号を抜き出す
for mis_index in mis_indexs:
if mis_index > 0:
hour = df.iloc[mis_index - 1]['hr'] + 1
else:
hour = df.iloc[mis_index + 1]['hr'] - 1
if hour == 24:
hour = 0
elif hour == -1:
hour = 23
df.at[mis_index, 'hr'] = hour

fill_missing_hours(train_data)
fill_missing_hours(test_data)

① casualとregisteredの分布の違いを一つの図で表すヒストグラム

plt.figure(figsize=(12, 6))
plt.hist(train_data['casual'], bins=30, alpha=0.5, label='casual', color='skyblue')
plt.hist(train_data['registered'], bins=30, alpha=0.5, label='registered', color='royalblue')
plt.xlabel('Number of Users')
plt.ylabel('Frequency')
plt.title('Distribution of casual and registered users')
plt.legend()
plt.show()

② 指定された変数ごとにcasualとregisteredの平均値を示す折れ線グラフ

variables = ['yr', 'mnth', 'hr', 'holiday', 'workingday', 'weekday', 'temp', 'hum', 'windspeed', 'weathersit']
for var in variables:
plt.figure(figsize=(12, 6))
grouped_data = train_data.groupby(var).mean()
plt.plot(grouped_data.index, grouped_data['casual'], label='casual', marker='o', color='skyblue')
plt.plot(grouped_data.index, grouped_data['registered'], label='registered', marker='o', color='royalblue')
plt.xlabel(var)
plt.ylabel('Average Count')
plt.title(f'Average casual and registered users by {var}')
plt.legend()
plt.grid(True)
plt.show()

移動平均の計算(windowサイズは3に設定していますが、適宜調整してください)

train_data['temp_move_avg'] = train_data['temp'].rolling(window=3, min_periods=1).mean()
train_data['hum_move_avg'] = train_data['hum'].rolling(window=3, min_periods=1).mean()
train_data['windspeed_move_avg'] = train_data['windspeed'].rolling(window=3, min_periods=1).mean()
train_data['weathersit_move_avg'] = train_data['weathersit'].rolling(window=3, min_periods=1).mean()

test_data['temp_move_avg'] = test_data['temp'].rolling(window=3, min_periods=1).mean()
test_data['hum_move_avg'] = test_data['hum'].rolling(window=3, min_periods=1).mean()
test_data['windspeed_move_avg'] = test_data['windspeed'].rolling(window=3, min_periods=1).mean()
test_data['weathersit_move_avg'] = test_data['weathersit'].rolling(window=3, min_periods=1).mean()

新しい特徴量の作成

train_data['hr_8_17_18'] = train_data['hr'].apply(lambda x: 1 if x in [8, 17, 18] else 0)
test_data['hr_8_17_18'] = test_data['hr'].apply(lambda x: 1 if x in [8, 17, 18] else 0)

train_data['hr_7_20'] = train_data['hr'].apply(lambda x: 1 if 7 <= x <= 20 else 0)
test_data['hr_7_20'] = test_data['hr'].apply(lambda x: 1 if 7 <= x <= 20 else 0)

トレーニングデータとテストデータでのダミー変数の列を一致させる

train_columns = set(train_data.columns)
test_columns = set(test_data.columns)
missing_in_test = train_columns - test_columns
missing_in_train = test_columns - train_columns

for col in missing_in_test:
test_data[col] = 0
for col in missing_in_train:
train_data[col] = 0

列を揃えるために並べ替え

train_data = train_data.sort_index(axis=1)
test_data = test_data.sort_index(axis=1)

特徴量とターゲット変数の設定

features = ['yr', 'mnth', 'hr', 'holiday', 'workingday', 'hr_8_17_18', 'hr_7_20',
'temp_move_avg', 'hum_move_avg', 'windspeed_move_avg', 'weathersit_move_avg']
target = 'cnt'

X_train = train_data[features]
y_train = train_data[target]
X_test = test_data[features]
y_test = test_data[target]

データの標準化

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

ニューラルネットワークモデルの作成

model = Sequential([
Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
Dense(32, activation='relu'),
Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

EarlyStoppingの設定

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

データの分割(トレーニングとバリデーション用)

X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(X_train_scaled, y_train, test_size=0.2, random_state=42)

モデルのトレーニング

history = model.fit(X_train_split, y_train_split,
validation_data=(X_val_split, y_val_split),
epochs=100,
batch_size=32,
callbacks=[early_stopping],
verbose=0)

バリデーションデータでの予測

y_val_pred = model.predict(X_val_split).flatten()

バリデーションデータでの評価

print(f'Validation Mean Squared Error: {mean_squared_error(y_val_split, y_val_pred):.2f}')
print(f'Validation R^2 Score: {r2_score(y_val_split, y_val_pred):.2f}')
print(f'Validation Mean Absolute Error: {mean_absolute_error(y_val_split, y_val_pred):.2f}')

テストデータでの予測

y_test_pred = model.predict(X_test_scaled).flatten()

テストデータの評価

test_mse = mean_squared_error(y_test, y_test_pred)
test_r2 = r2_score(y_test, y_test_pred)
test_mae = mean_absolute_error(y_test, y_test_pred)

print(f'Test Mean Squared Error: {test_mse:.2f}')
print(f'Test R^2 Score: {test_r2:.2f}')
print(f'Test Mean Absolute Error: {test_mae:.2f}')

実測値と予測値の差を日付と時間ごとに可視化

test_data['dteday'] = pd.to_datetime(test_data['dteday']) # dteday列を日付に変換
test_data['datetime'] = test_data.apply(lambda row: pd.Timestamp(year=row['dteday'].year, month=row['dteday'].month, day=row['dteday'].day, hour=row['hr']), axis=1)
test_data['actual'] = y_test
test_data['predicted'] = y_test_pred
test_data['difference'] = test_data['actual'] - test_data['predicted']

日付ごとの平均差分をプロット

daily_diff = test_data.groupby(test_data['datetime'].dt.date)['difference'].mean()
plt.figure(figsize=(12, 6))
daily_diff.plot(kind='bar', color='skyblue')
plt.xlabel('Date')
plt.ylabel('Average Difference')
plt.title('Average Difference between Actual and Predicted Counts by Date')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

時間ごとの平均差分をプロット

hourly_diff = test_data.groupby(test_data['datetime'].dt.hour)['difference'].mean()
plt.figure(figsize=(12, 6))
hourly_diff.plot(kind='bar', color='royalblue')
plt.xlabel('Hour')
plt.ylabel('Average Difference')
plt.title('Average Difference between Actual and Predicted Counts by Hour')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

実測値と予測値をプロットする折れ線グラフ

日付ごとの実測値と予測値のプロット

daily_actual = test_data.groupby(test_data['datetime'].dt.date)['actual'].sum()
daily_predicted = test_data.groupby(test_data['datetime'].dt.date)['predicted'].sum()

plt.figure(figsize=(12, 6))
plt.plot(daily_actual.index, daily_actual.values, label='Actual', marker='o', color='skyblue')
plt.plot(daily_predicted.index, daily_predicted.values, label='Predicted', marker='o', color='royalblue')
plt.xlabel('Date')
plt.ylabel('Count')
plt.title('Actual vs Predicted Counts by Date')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.show()

時間ごとの実測値と予測値のプロット

hourly_actual = test_data.groupby(test_data['datetime'].dt.hour)['actual'].sum()
hourly_predicted = test_data.groupby(test_data['datetime'].dt.hour)['predicted'].sum()

plt.figure(figsize=(12, 6))
plt.plot(hourly_actual.index, hourly_actual.values, label='Actual', marker='o', color='skyblue')
plt.plot(hourly_predicted.index, hourly_predicted.values, label='Predicted', marker='o', color='royalblue')
plt.xlabel('Hour')
plt.ylabel('Count')
plt.title('Actual vs Predicted Counts by Hour')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.show()

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?