# 自動車の走行距離予測

SIGNATEの練習問題をやったのでログを残す。

```import optuna
import pandas as pd
from sklearn.model_selection import cross_validate
import xgboost as xgb

def objective(trial):
# ハイパーパラメータ
cv = trial.suggest_int("cv", 3, 5)
eta = trial.suggest_uniform("eta", 0.01, 0.2)
learning_rate = trial.suggest_uniform(" learning_rate", 0.1, 1.0)
max_depth = trial.suggest_int("max_depth", 2, 100)
subsample = trial.suggest_uniform("subsample", 0.5, 1.0)
colsample_bytree = trial.suggest_uniform("colsample_bytree", 0.5, 1.0)

# 学習
model = xgb.XGBRegressor(
eta=eta,
learning_rate=learning_rate,
max_depth=max_depth,
subsample=subsample,
colsample_bytree=colsample_bytree)

# 交差検証
scores = cross_validate(model, x, y, cv=cv)

return scores['test_score'].mean()

# データロード
train = train.drop({'id', 'car name'}, axis=1)
train = train.replace('?', np.nan)
train['horsepower'] = train['horsepower'].astype(float)

test_id = test['id']
test = test.drop({'id', 'car name'}, axis=1)
test = test.replace('?', np.nan)
test['horsepower'] = test['horsepower'].astype(float)

# 学習
x = train.drop('mpg', axis=1)
y = train['mpg']
study = optuna.create_study()
study.optimize(objective, n_trials=10000)
print(study.best_params)
print(study.best_value)
```
```# 予測
model = xgb.XGBRegressor(**study.best_params)
model.fit(x, y)y_pred = model.predict(test)
df_pred = pd.DataFrame({'mpg': y_pred})
df = pd.concat([test_id, test, df_pred], axis=1)
```
