import numpy as np
import pandas as pd
dataset = pd.read_csv("CSVファイル名")
def ping(data):
df = data.dropna()
df = pd.get_dummies(df, drop_first=True)
return df
train = ping(dataset)
from sklearn.model_selection import train_test_split
x = train.drop("答えの名", axis=1).values
t = train["答えの名"]
x_train_val, x_test, t_train_val, t_test = train_test_split(x, t, test_size=0.2, random_state=1)
x_train, x_val, t_train, t_val = train_test_split(x_train_val, t_train_val, test_size=0.3, random_state=1)
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RandomizedSearchCV
estimator = DecisionTreeClassifier(random_state=0)
param_distributions = {
'max_depth': list(range(5, 100, 2)),#開始値,終了値,ステップ(刻み値)
'min_samples_split': list(range(2, 50, 1))
}
#試行回数
n_iter = 100
cv = 5
tuned_model = RandomizedSearchCV(
estimator=estimator,
param_distributions=param_distributions,
n_iter=n_iter, cv=cv,
random_state=0, return_train_score=False
)
tuned_model.fit(x_train_val, t_train_val)
dd = pd.DataFrame(tuned_model.cv_results_).sort_values('rank_test_score').T
dd.to_csv("me.csv")
best_model = tuned_model.best_estimator_
print(best_model.score(x_train_val, t_train_val))
print(best_model.score(x_test, t_test))