個人的な備忘録
Boston住宅価格データを使用
model.py
# !pip install optuna lightgbm
from functools import partial
import optuna
import lightgbm as lgb
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate
from sklearn.metrics import mean_absolute_error
from sklearn import datasets
def objective(X, y, trial, clf = 'RandomForestRegressor'):
"""最小化する目的関数"""
if clf == 'RandomForestRegressor':
# RF のとき
params = {
'n_estimators': int(trial.suggest_loguniform('n_estimators', 1e+2, 1e+3)),
'max_depth': int(trial.suggest_loguniform('max_depth', 2, 32)),
}
model = RandomForestRegressor(**params)
kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_validate(model, X=X, y=y, cv=kf, n_jobs=-1, scoring='neg_mean_absolute_error')
score = -1*scores['test_score'].mean()
elif clf == 'LGB':
# LGB のとき
params = {
'objective': 'regression',
'max_bin': int(trial.suggest_int('max_bin', 255, 500)),
'learning_rate': 0.05,
'num_leaves': int(trial.suggest_int('num_leaves', 32, 128)),
'metrics': 'mae'
}
lgb_train = lgb.Dataset(X, y)
res = lgb.cv(params, lgb_train, num_boost_round=1000, early_stopping_rounds=10, nfold=5, shuffle=True, stratified=False, seed=42)
score = res['l1-mean'][-1]
return score
def main(): # 例としてボストンの住宅価格データを使用
dataset = datasets.load_boston()
X, y = dataset.data, dataset.target
f = partial(objective, X, y)
study = optuna.create_study()
study.optimize(f, n_trials=30)
print('params:', study.best_params)
if __name__ == '__main__':
main()