0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

Kaggle Masterに学ぶ機械学習実践アプローチ 写経 08

Posted at
# %%
import numpy as np
import pandas as pd

from functools import partial

from sklearn import ensemble, metrics, model_selection

from skopt import gp_minimize, space

def optimize(params, param_names, x, y):
    params = dict(zip(param_names, params))
    model = ensemble.RandomForestClassifier(**params)
    kf = model_selection.StratifiedKFold(n_splits=5)

    accuracies = []
    for train_idx, test_idx in kf.split(X=x, y=y):
        x_train = x.iloc[train_idx].values
        y_train = y.iloc[train_idx].values
        x_test = x.iloc[test_idx].values
        y_test = y.iloc[test_idx].values

        model.fit(x_train, y_train)
        preds = model.predict(x_test)

        fold_accuracy = metrics.accuracy_score(y_test, preds)
        accuracies.append(fold_accuracy)

    return -1 * np.mean(accuracies)

if __name__ == "__main__":
    df = pd.read_csv("../input/train.csv")
    # 目的変数を削除
    X = df.drop("price_range", axis=1)
    y = df.price_range

    param_space = [
        # max_depthは1から10の範囲で整数
        space.Integer(3, 15, name="max_depth"),
        # n_estimatorsは100から1000の範囲で整数
        space.Integer(100, 1500, name="n_estimators"),
        # criterionはカテゴリ型のリスト
        space.Categorical(["gini", "entropy"], name="criterion"),
        # 分布を指定したreal型も扱える
        space.Real(0.01, 1, prior="uniform", name="max_features")
    ]

    param_names = ["max_depth", "n_estimators", "criterion", "max_features"]
    optimization_function = partial(optimize, param_names=param_names, x=X, y=y)
    result = gp_minimize(optimization_function, dimensions = param_space, n_calls=15, n_random_starts=10, verbose=10)
    best_params = dict(zip(param_names, result.x))

    print(best_params)


# %%
import matplotlib.pyplot as plt

# %%
from skopt.plots import plot_convergence

plot_convergence(result)
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn import model_selection


if __name__ == "__main__":
    df = pd.read_csv("../input/train.csv")
    
    X = df.drop("price_range", axis=1)

    y = df.price_range.values

    classifier = RandomForestClassifier(n_jobs=-1)

    param_grid = {
        "n_estimators": [100, 200, 300, 400, 500],
        "max_depth": [1,2,5,7,11,15],
        "criterion": ["gini", "entropy"]
    }

    model = model_selection.GridSearchCV(
        estimator=classifier,
        param_grid=param_grid,
        scoring="accuracy",
        verbose=10,
        cv=5,
        n_jobs=-1
    )

    model.fit(X, y)
    print(f"Best score: {model.best_score_}")
    print("best parameters set")

    best_parameters = model.best_estimator_.get_params()
    for param_name in sorted(best_parameters.keys()):
        print(f"{param_name}: {best_parameters[param_name]}")

    print("")
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn import model_selection


if __name__ == "__main__":
    df = pd.read_csv("../input/train.csv")
    
    X = df.drop("price_range", axis=1)

    y = df.price_range.values

    classifier = RandomForestClassifier(n_jobs=-1)

    param_grid = {
        "n_estimators": [100, 200, 300, 400, 500],
        "max_depth": [1,2,5,7,11,15],
        "criterion": ["gini", "entropy"]
    }

    model = model_selection.RandomizedSearchCV(
        estimator=classifier,
        param_distributions=param_grid,
        scoring="accuracy",
        verbose=10,
        cv=5,
        n_jobs=-1,
        n_iter=20
    )

    model.fit(X, y)
    print(f"Best score: {model.best_score_}")
    print("best parameters set")

    best_parameters = model.best_estimator_.get_params()
    for param_name in sorted(best_parameters.keys()):
        print(f"{param_name}: {best_parameters[param_name]}")

    print("")
# %%
import numpy as np
import pandas as pd

from functools import partial

from sklearn import ensemble, metrics, model_selection

from hyperopt import hp, fmin, tpe, Trials
from hyperopt.pyll.base import scope

def optimize(params, x, y):

    model = ensemble.RandomForestClassifier(**params)
    kf = model_selection.StratifiedKFold(n_splits=5)

    accuracies = []
    for train_idx, test_idx in kf.split(X=x, y=y):
        x_train = x.iloc[train_idx].values
        y_train = y.iloc[train_idx].values
        x_test = x.iloc[test_idx].values
        y_test = y.iloc[test_idx].values

        model.fit(x_train, y_train)
        preds = model.predict(x_test)

        fold_accuracy = metrics.accuracy_score(y_test, preds)
        accuracies.append(fold_accuracy)

    return -1 * np.mean(accuracies)

if __name__ == "__main__":
    df = pd.read_csv("../input/train.csv")
    # 目的変数を削除
    X = df.drop("price_range", axis=1)
    y = df.price_range

    param_space = {
        "max_depth": scope.int(hp.quniform('max_depth', 3, 15, 1)),
        "n_estimators": scope.int(hp.quniform('n_estimators', 100, 1500, 1)),
        "criterion": hp.choice("criterion", ["gini", "entropy"]),
        "max_features": hp.uniform("max_features", 0.01, 1)
    }

    optimization_function = partial(optimize, x=X, y=y)
    trials = Trials()
    hopt = fmin(fn=optimization_function, space=param_space, algo=tpe.suggest, max_evals=15, trials=trials)

    print(hopt)
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?