More than 1 year has passed since last update.

【Tensorflow + Grid Search】ハイパーパラメータのチューニング

Posted at 2022-09-03

初めに

Deep Neural Networkを設計する際、
　・隠れ層は何層がいいのか？
　・ニューロン数はいくつがいいのか？
　・活性化関数は何が良いのか？
　・batch,epochsはどのくらいがよい？
etc...
ハイパーパラメータのチューニングが必要になってくる。
そんな時にGrid Searchを活用すれば、計算コストは増加するが
人間の作業は効率化できる。
本記事では、TensorflowとGridSearchを活用した内容について記述したいと思います。

データセットについて

本記事では回帰問題のDNNのハイパーパラメータのチューニングに関する内容とするため、
データセットは前回記事のデータセットを活用する。

サンプルコード

# ライブラリのインポート
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing

cali_housing = fetch_california_housing(as_frame=True)
x=cali_housing.data
y=cali_housing.target
cali_housing.frame.describe()

df=cali_housing.frame

ar_std=df['AveRooms'].std()
ab_std=df['AveBedrms'].std()
pop_std=df['Population'].std()
ao_std=df['AveOccup'].std()
mhv_std=df['MedHouseVal'].std()

ar_mean=df['AveRooms'].mean()
ab_mean=df['AveBedrms'].mean()
pop_mean=df['Population'].mean()
ao_mean=df['AveOccup'].mean()
mhv_mean=df['MedHouseVal'].mean()

limit_low=ar_mean - 1*ab_std
limit_high=ar_mean + 1*ab_std

limit_low1=ab_mean - 1*ab_std
limit_high1=ab_mean + 1*ab_std

limit_low2=pop_mean - 1*pop_std
limit_high2=pop_mean + 1*pop_std

limit_low3=ao_mean - 1*ao_std
limit_high3=ao_mean + 1*ao_std

limit_low4=mhv_mean -1*mhv_std
limit_high4=mhv_mean +1*mhv_std

newdf=df.query('@limit_low < AveRooms < @limit_high')
newdf1=newdf.query('@limit_low1 < AveBedrms < @limit_high1')
newdf2=newdf1.query('@limit_low2 < Population < @limit_high2')
newdf3=newdf2.query('@limit_low3 < AveOccup < @limit_high3')
newdf4=newdf3.query('@limit_low4 < MedHouseVal < @limit_high4')
newdf4.describe()
#正規化(Max-Min法)
def minmax_norm(df):
    return (df - df.min()) / ( df.max() - df.min())

df_minmax_norm = minmax_norm(newdf4)
df_minmax_norm.describe()

# 必要なライブラリーのインポート
import seaborn as sns

# 多変量連関図
sns.pairplot(df_minmax_norm, size=1.0)

#データ分け
x=df_minmax_norm.iloc[:,0:8]
y=df_minmax_norm.iloc[:,8:9]

from sklearn.model_selection import train_test_split
#全体データから未知データを生成
x_know,x_unknown,y_know,y_unknown=train_test_split(x,y,test_size=0.2)
#既知データから訓練データとテストデータに分割
x_train,x_test,y_train,y_test=train_test_split(x_know,y_know,test_size=0.2)

Trainデータは下図の通りです。データ数は2547

Testデータは下図の通りです。
データ数は637

ハイパーパラメータのチューニングについて

それでは本題です。
ライブラリをインポートします

# Importing the necessary packages
from sklearn.model_selection import GridSearchCV, KFold
from keras.models import Sequential
from keras.layers import Dense
#分類問題用のライブラリ
#from keras.wrappers.scikit_learn import KerasClassifier
#回帰問題用のライブラリ
from keras.wrappers.scikit_learn import KerasRegressor
from keras.optimizers import Adam
from keras.layers import Dropout

グリッドサーチができるようにDNNの定義をします

因子	1水準目	2水準目
学習率	0.1	0.5
epochs	100	300
batchサイズ	50	100

のチューニングを例に書きます。

# ＤＮＮモデル定義
def create_model(learning_rate):
    model = Sequential()
    model.add(Dense(64,input_dim = 8,kernel_initializer = 'normal',activation = 'relu'))
    model.add(Dense(32,activation = 'relu'))
    model.add(Dense(16,activation = 'relu'))
    model.add(Dense(8,activation = 'relu'))
    model.add(Dense(1,activation = 'linear'))
    
    adam = Adam(lr = learning_rate)
    #model.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['acc'])
    model.compile(loss = 'rmse',optimizer = adam,metrics = ['mae'])
    return model

# Create the model
#model = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 40,epochs = 10)

# Define the grid search parameters
epochs_grid=[100,300]
batch_size_grid=[50,100]
learning_rate = [0.1,0.5]

### the grid search parametersを辞書型で定義する
param_grids = dict(learning_rate = learning_rate,batch_size=batch_size_grid,epochs=epochs_grid)

#参考：分類問題の設定
#model = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = batch_size_grid,epochs = epochs_grid)

#回帰問題の設定
model=KerasRegressor(build_fn=create_model,verbose=0,batch_size = batch_size_grid,epochs = epochs_grid)
# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model,param_grid = param_grids,cv = KFold(),verbose = 5)

GridSearchの実行

grid_result = grid.fit(x_train,y_train)

# 結果のまとめを表示

print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print('{},{} with: {}'.format(mean, stdev, param))

結論：batch:50、epochs:100、学習率:0.1が最も良いことが分かった

活性化関数もパラスタする

因子	1水準目	2水準目	3水準目
活性化関数	ReLu	Swish	Mish
学習率	0.1	0.5
epochs	100	300
batchサイズ	50	100


# Importing the necessary packages
from sklearn.model_selection import GridSearchCV, KFold
from keras.models import Sequential
from keras.layers import Dense
from keras.losses import mean_squared_error
import tensorflow as tf
from tensorflow.keras.layers import Activation
from tensorflow.keras.utils import get_custom_objects
#分類問題用のライブラリ
#from keras.wrappers.scikit_learn import KerasClassifier
#回帰問題用ｎライブラリ
from keras.wrappers.scikit_learn import KerasRegressor

from keras.optimizers import Adam
from keras.layers import Dropout
#============================================================================
class Mish(Activation):
    '''
    Mish Activation Function.
    .. math::
        mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^{x}))
    Shape:
        - Input: Arbitrary. Use the keyword argument `input_shape`
        (tuple of integers, does not include the samples axis)
        when using this layer as the first layer in a model.
        - Output: Same shape as the input.
    Examples:
        >>> X = Activation('Mish', name="conv1_act")(X_input)
    '''

    def __init__(self, activation, **kwargs):
        super(Mish, self).__init__(activation, **kwargs)
        self.__name__ = 'Mish'
def mish(inputs):
    return inputs * tf.math.tanh(tf.math.softplus(inputs))
get_custom_objects().update({'Mish': Mish(mish)})
#============================================================================
# ＤＮＮモデル定義
def create_model(learning_rate,active):
    model = Sequential()
    model.add(Dense(64,input_dim = 8,kernel_initializer = 'normal',activation = active))
    model.add(Dense(32,activation = active))
    model.add(Dense(16,activation = active))
    model.add(Dense(8,activation = active))
    model.add(Dense(1,activation = 'linear'))
    
    adam = Adam(lr = learning_rate)
    #model.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['acc'])
    model.compile(loss = 'mse',optimizer = adam,metrics = ['mae'])
    return model

# Create the model
#model = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 40,epochs = 10)

# Define the grid search parameters
active=['relu','swish','Mish']
epochs_grid=[100,300]
batch_size_grid=[50,100]
learning_rate = [0.1,0.5]

### the grid search parametersを辞書型で定義する
param_grids = dict(active=active,learning_rate = learning_rate,batch_size=batch_size_grid,epochs=epochs_grid)
#分類問題の設定
#model = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = batch_size_grid,epochs = epochs_grid)
#回帰問題の設定
model=KerasRegressor(build_fn=create_model,verbose=0,batch_size = batch_size_grid,epochs = epochs_grid)
# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model,param_grid = param_grids,cv = KFold(),verbose = 1)
grid_result = grid.fit(x_train,y_train)

# 結果のまとめを表示

print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print('{},{} with: {}'.format(mean, stdev, param))

結論：活性化関数:ReLU、batch:100、epochs:100、学習率:0.1が最も良いことが分かった

ニューロン数もパラスタする

因子	1水準目	2水準目	3水準目
活性化関数	ReLu
学習率	0.1
epochs	100
batchサイズ	100
1層目のニューロン数	64	128	256

※2層目は1層目のニューロン数の50%、3層目は1層目の25%・・・としていく

# Importing the necessary packages
from sklearn.model_selection import GridSearchCV, KFold
from keras.models import Sequential
from keras.layers import Dense
from keras.losses import mean_squared_error
import tensorflow as tf
from tensorflow.keras.layers import Activation
from tensorflow.keras.utils import get_custom_objects
#分類問題用のライブラリ
#from keras.wrappers.scikit_learn import KerasClassifier
#回帰問題用ｎライブラリ
from keras.wrappers.scikit_learn import KerasRegressor

from keras.optimizers import Adam
from keras.layers import Dropout

#============================================================================
class Mish(Activation):
    '''
    Mish Activation Function.
    .. math::
        mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^{x}))
    Shape:
        - Input: Arbitrary. Use the keyword argument `input_shape`
        (tuple of integers, does not include the samples axis)
        when using this layer as the first layer in a model.
        - Output: Same shape as the input.
    Examples:
        >>> X = Activation('Mish', name="conv1_act")(X_input)
    '''

    def __init__(self, activation, **kwargs):
        super(Mish, self).__init__(activation, **kwargs)
        self.__name__ = 'Mish'
def mish(inputs):
    return inputs * tf.math.tanh(tf.math.softplus(inputs))
get_custom_objects().update({'Mish': Mish(mish)})
#============================================================================

# ＤＮＮモデル定義
def create_model(learning_rate,active,neuron):
    model = Sequential()
    #ポイント：float型に変更
    neuron=float(neuron)
    neuron2=int(neuron/2)
    neuron3=int(neuron/4)
    neuron4=int(neuron/8)
    #最後にint型に戻す
    neuron=int(neuron)
    model.add(Dense(neuron,input_dim = 8,kernel_initializer = 'normal',activation = active))
    model.add(Dense(neuron2,activation = active))
    model.add(Dense(neuron3,activation = active))
    model.add(Dense(neuron4,activation = active))
    model.add(Dense(1,activation = 'linear'))
    
    adam = Adam(lr = learning_rate)
    #model.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['acc'])
    model.compile(loss = 'mse',optimizer = adam,metrics = ['mae'])
    return model

# Create the model
#model = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 40,epochs = 10)

# Define the grid search parameters
active=['relu']
epochs_grid=[100]
batch_size_grid=[100]
learning_rate = [0.1]
neuron=['64','128','256']

### the grid search parametersを辞書型で定義する
param_grids = dict(active=active,neuron=neuron,learning_rate = learning_rate,batch_size=batch_size_grid,epochs=epochs_grid)
#回帰問題の設定
model=KerasRegressor(build_fn=create_model,verbose=0,batch_size = batch_size_grid,epochs = epochs_grid)
# Build and fit the GridSearchCV
grid = GridSearchCV(estimator = model,param_grid = param_grids,cv = KFold(),verbose = 1)
grid_result = grid.fit(x_train,y_train)

# 結果のまとめを表示
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print('{},{} with: {}'.format(mean, stdev, param))

結論：ニューロン数はINPUT8→64→32→16→8→OUT:1が最も良いことが分かったことが分かった

まとめ

GridSearchは全数探査なので、計算コストは増加する。
しかし、人間が調整する手間を省けるので、有効活用できそうだ。
直交実験計画をうまく組み込めたらもっと少ない検討数で探査できそうなので
また考えてみようと思う。
以上　最後までお読みいただきありがとうございました。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up