LoginSignup
0
0

個人用scikit-learn

Last updated at Posted at 2023-07-19

回帰

線形回帰

from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X, y)
y_pred = lin_reg.predict(X)
lin_reg.score(X, y)  #R2スコア
lin_reg.get_params()
lin_reg.set_params(**params)

Ridge回帰(L2正則化)

from sklearn.linear_model import Ridge
ridge = Ridge(alpha=1)
ridge.fit(X, y)
y_pred = ridge.predict(X)
ridge.score(X, y)

グリッドサーチによるモデル選択

from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
param_grid = {'alpha': [0.1, 0.25, 0.5, 0.75, 0.9]}
grid_search = GridSearchCV(Ridge(), param_grid=param_grid, cv=5)
grid_search.fit(X, y)
best_model = grid_search.best_estimator_
print(grid_search.best_params_)
print(grid_search.best_score_)

Lasso回帰(L1正則化)

from sklearn.linear_model import Lasso
lasso = Ridge(alpha=0.1)
lasso.fit(X, y)
y_pred = lasso.predict(X)
lasso.score(X, y)

グリッドサーチによるモデル選択

from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
param_grid = {'alpha': [0.1, 0.25, 0.5, 0.75, 0.9]}
grid_search = GridSearchCV(Lasso(), param_grid=param_grid, cv=5)
grid_search.fit(X, y)
best_model = grid_search.best_estimator_
print(grid_search.best_params_)
print(grid_search.best_score_)

ElasticNet

グリッドサーチによるモデル選択

from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV
param_grid = {'alpha': [0.1, 0.5, 0.9], 'l1_ratio': [0.01, 0.1, 0.5, 0.9, 0.99]}
grid_search = GridSearchCV(ElasticNet(), param_grid=param_grid, cv=5)
grid_search.fit(X, y)
best_model = grid_search.best_estimator_
print(grid_search.best_params_)
print(grid_search.best_score_)

決定木

インポート

import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier, plot_tree

学習と予測

X = data
y = target
tree_clf = DecisionTreeClassifier(max_depth=2)
tree_clf.fit(X, y)
tree_clf.pridict_proba(new_data)
tree_clf.pridict(new_data)

決定木の可視化

plot_tree(tree_clf, feature_names=iris.feature_names, filled=True)
plt.show()

ランダムフォレスト

rnd_clf = RondomForestClassifier()
rnd_reg = RondomForestRegressor()
ハイパーパラメータ 選択肢 default
n_estimators int型 10
criterion gini、entropy gini
max_depth int型 or None Nonemin_samples_split
min_samples_leaf int、float型 1
min_weight_fraction_leaf float型 0
max_features int、float型、None、auto、sqrt、log2 auto
max_leaf_nodes int型 or None None
min_impurity_decrease float型 0
min_impurity_split float型 1e-7
bootstrap bool型 True
oob_score bool型 False
n_jobs int型 or None None
random_state int型、RandomStateinstanceorNone None
verbose int型 0
warm_start bool型 False
class_weight 辞書型、balanced、balanced_subsampleorNone None

詳しくは
https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html

データ変換

最大最小スケーリング(正規化)

パラメータfeature_rangeは省略することもできる。その場合は0から1になる。

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X)

標準化

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

モデル評価、選択

訓練データと検証データに分割

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

K分割交差検証

from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X, y, cv=5)
print("Scores:", scores)
print("Mean:", scores.mean())
print("Standard deviation:", scores.std())

(reg, X, y, scoring="neg_mean_squared_error", cv=10)
のように適応度関数を指定することもできる。これはk個のモデルの比較に使うスコアである。

グリッドサーチ

from sklearn.model_selection import GridSearchCV
param_grid = { : [ ]}
model = 
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_search,fit(X, y)
best_model = grid_search.best_estimator_
best_model.predict(X)
print(grid_search.best_params_)
print(grid_search.best_score_)
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0