LoginSignup
3
2

More than 1 year has passed since last update.

pythonで機械学習するときの自分用テンプレート

Posted at

基本テンプレート

import numpy as np
import pandas as pd

dataset = pd.read_csv("CSVファイル名")
def ping(data):
    df = data.dropna()
    df = pd.get_dummies(df, drop_first=True)
    return df

train = ping(dataset)

from sklearn.model_selection import train_test_split
x = train.drop("答えの名", axis=1).values
t = train["答えの名"]

x_train_val, x_test, t_train_val, t_test = train_test_split(x, t, test_size=0.2, random_state=1)
x_train, x_val, t_train, t_val = train_test_split(x_train_val, t_train_val, test_size=0.3, random_state=1)

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RandomizedSearchCV

estimator = DecisionTreeClassifier(random_state=0)

param_distributions = {
    'max_depth': list(range(5, 100, 2)),#開始値,終了値,ステップ(刻み値)
    'min_samples_split': list(range(2, 50, 1))
}

#試行回数
n_iter = 100

cv = 5

tuned_model = RandomizedSearchCV(
    estimator=estimator, 
    param_distributions=param_distributions, 
    n_iter=n_iter, cv=cv, 
    random_state=0, return_train_score=False
)

tuned_model.fit(x_train_val, t_train_val)

dd = pd.DataFrame(tuned_model.cv_results_).sort_values('rank_test_score').T
dd.to_csv("me.csv")

best_model = tuned_model.best_estimator_

print(best_model.score(x_train_val, t_train_val))
print(best_model.score(x_test, t_test))

予測

#テストデータ
test_data = [[0,0],[1,1]]
test_label = best_model.predict(test_data)
print("予測結果→")
print(test_label)
3
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
3
2