LoginSignup
0
0

keras回帰

Last updated at Posted at 2024-04-17

Kerasを用いて回帰分析を行う

ライブラリ&データの読み込み

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("https://storage.googleapis.com/download.tensorflow.org/data/abalone_train.csv",
    names=["Length", "Diameter", "Height", "Whole weight", "Shucked weight",
           "Viscera weight", "Shell weight", "Age"])
df.head()

dfから説明変数と目的変数を分ける

df_features = df.copy()
df_labels = df_features.pop('Age')
df_features = np.array(df_features)

訓練データとテストデータに分割する&標準化

X_train, X_test, y_train, y_test = train_test_split(df_features, df_labels, test_size=0.2)

y_train = y_train.reset_index(drop=True)

sc = StandardScaler()
sc.fit(X_train)
sc.fit(X_test)

簡単なkerasモデルを作成する

def build_model():
    model = keras.Sequential([layers.Dense(64, activation='relu'),
                              layers.Dense(64, activation='relu'),
                              layers.Dense(1)])
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return model

k-foldを行い、MAEを算出

k = 4
num_val_samples = len(X_train) // k
num_epochs = 100
all_scores = []
best_mae_per_fold = []  
for i in range(k):
    print(f'Processing fold #{i}')
    
    val_data = X_train[i*num_val_samples: (i+1)*num_val_samples]
    val_targets = y_train[i*num_val_samples: (i+1)*num_val_samples]
    
    partial_train_data = np.concatenate([X_train[:i*num_val_samples], X_train[(i+1)*num_val_samples:]], axis=0)
    partial_train_targets = np.concatenate([y_train[:i*num_val_samples], y_train[(i+1)*num_val_samples:]], axis=0)
    
    model = build_model()
    
    history = model.fit(partial_train_data, partial_train_targets, validation_data=(val_data, val_targets), epochs=num_epochs, batch_size=16, verbose=0)
    mae_history = history.history['val_mae']
    
    min_mae = min(mae_history)
    min_mae_epoch = mae_history.index(min_mae) + 1  
    best_mae_per_fold.append((min_mae, min_mae_epoch))
    
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_mae)

print("Best MAE per fold:")
for fold, (mae, epoch) in enumerate(best_mae_per_fold, 1):
    print(f"Fold {fold}: Minimum MAE = {mae} at epoch {epoch}")

print('全ての点数', all_scores)
print('点数の平均', np.mean(all_scores))

スクリーンショット 2024-04-17 20.49.34.png

k-foldを行いグラフを算出

num_epochs = 500
all_mae_histories = []
for i in range(k):
    print(f'Processing fold #{i}')
    
    val_data = X_train[i*num_val_samples: (i+1)*num_val_samples]
    val_targets = y_train[i*num_val_samples: (i+1)*num_val_samples]
    
    partial_train_data = np.concatenate([X_train[:i*num_val_samples], X_train[(i+1)*num_val_samples:]], axis=0)
    partial_train_targets = np.concatenate([y_train[:i*num_val_samples], y_train[(i+1)*num_val_samples:]], axis=0)
    
    model = build_model()
    
    history = model.fit(partial_train_data, partial_train_targets, validation_data=(val_data, val_targets), epochs=num_epochs, batch_size=16, verbose=0)
    mae_history = history.history['val_mae']
    all_mae_histories.append(mae_history)
    
average_mae_history = [np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]
plt.plot(range(1, len(average_mae_history)+1), average_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()

スクリーンショット 2024-04-17 20.49.49.png

グラフをEpochs10の後から見てみる

truncated_mae_history = average_mae_history[10:]
plt.plot(range(1, len(truncated_mae_history) + 1), truncated_mae_history, linewidth=1)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()

スクリーンショット 2024-04-17 20.52.37.png

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0