# %%
import numpy as np
import pandas as pd
from functools import partial
from sklearn import ensemble, metrics, model_selection
from skopt import gp_minimize, space
def optimize(params, param_names, x, y):
params = dict(zip(param_names, params))
model = ensemble.RandomForestClassifier(**params)
kf = model_selection.StratifiedKFold(n_splits=5)
accuracies = []
for train_idx, test_idx in kf.split(X=x, y=y):
x_train = x.iloc[train_idx].values
y_train = y.iloc[train_idx].values
x_test = x.iloc[test_idx].values
y_test = y.iloc[test_idx].values
model.fit(x_train, y_train)
preds = model.predict(x_test)
fold_accuracy = metrics.accuracy_score(y_test, preds)
accuracies.append(fold_accuracy)
return -1 * np.mean(accuracies)
if __name__ == "__main__":
df = pd.read_csv("../input/train.csv")
# 目的変数を削除
X = df.drop("price_range", axis=1)
y = df.price_range
param_space = [
# max_depthは1から10の範囲で整数
space.Integer(3, 15, name="max_depth"),
# n_estimatorsは100から1000の範囲で整数
space.Integer(100, 1500, name="n_estimators"),
# criterionはカテゴリ型のリスト
space.Categorical(["gini", "entropy"], name="criterion"),
# 分布を指定したreal型も扱える
space.Real(0.01, 1, prior="uniform", name="max_features")
]
param_names = ["max_depth", "n_estimators", "criterion", "max_features"]
optimization_function = partial(optimize, param_names=param_names, x=X, y=y)
result = gp_minimize(optimization_function, dimensions = param_space, n_calls=15, n_random_starts=10, verbose=10)
best_params = dict(zip(param_names, result.x))
print(best_params)
# %%
import matplotlib.pyplot as plt
# %%
from skopt.plots import plot_convergence
plot_convergence(result)
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn import model_selection
if __name__ == "__main__":
df = pd.read_csv("../input/train.csv")
X = df.drop("price_range", axis=1)
y = df.price_range.values
classifier = RandomForestClassifier(n_jobs=-1)
param_grid = {
"n_estimators": [100, 200, 300, 400, 500],
"max_depth": [1,2,5,7,11,15],
"criterion": ["gini", "entropy"]
}
model = model_selection.GridSearchCV(
estimator=classifier,
param_grid=param_grid,
scoring="accuracy",
verbose=10,
cv=5,
n_jobs=-1
)
model.fit(X, y)
print(f"Best score: {model.best_score_}")
print("best parameters set")
best_parameters = model.best_estimator_.get_params()
for param_name in sorted(best_parameters.keys()):
print(f"{param_name}: {best_parameters[param_name]}")
print("")
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn import model_selection
if __name__ == "__main__":
df = pd.read_csv("../input/train.csv")
X = df.drop("price_range", axis=1)
y = df.price_range.values
classifier = RandomForestClassifier(n_jobs=-1)
param_grid = {
"n_estimators": [100, 200, 300, 400, 500],
"max_depth": [1,2,5,7,11,15],
"criterion": ["gini", "entropy"]
}
model = model_selection.RandomizedSearchCV(
estimator=classifier,
param_distributions=param_grid,
scoring="accuracy",
verbose=10,
cv=5,
n_jobs=-1,
n_iter=20
)
model.fit(X, y)
print(f"Best score: {model.best_score_}")
print("best parameters set")
best_parameters = model.best_estimator_.get_params()
for param_name in sorted(best_parameters.keys()):
print(f"{param_name}: {best_parameters[param_name]}")
print("")
# %%
import numpy as np
import pandas as pd
from functools import partial
from sklearn import ensemble, metrics, model_selection
from hyperopt import hp, fmin, tpe, Trials
from hyperopt.pyll.base import scope
def optimize(params, x, y):
model = ensemble.RandomForestClassifier(**params)
kf = model_selection.StratifiedKFold(n_splits=5)
accuracies = []
for train_idx, test_idx in kf.split(X=x, y=y):
x_train = x.iloc[train_idx].values
y_train = y.iloc[train_idx].values
x_test = x.iloc[test_idx].values
y_test = y.iloc[test_idx].values
model.fit(x_train, y_train)
preds = model.predict(x_test)
fold_accuracy = metrics.accuracy_score(y_test, preds)
accuracies.append(fold_accuracy)
return -1 * np.mean(accuracies)
if __name__ == "__main__":
df = pd.read_csv("../input/train.csv")
# 目的変数を削除
X = df.drop("price_range", axis=1)
y = df.price_range
param_space = {
"max_depth": scope.int(hp.quniform('max_depth', 3, 15, 1)),
"n_estimators": scope.int(hp.quniform('n_estimators', 100, 1500, 1)),
"criterion": hp.choice("criterion", ["gini", "entropy"]),
"max_features": hp.uniform("max_features", 0.01, 1)
}
optimization_function = partial(optimize, x=X, y=y)
trials = Trials()
hopt = fmin(fn=optimization_function, space=param_space, algo=tpe.suggest, max_evals=15, trials=trials)
print(hopt)