機械学習pythonコードまとめ
データの分割
# ホールドアウト法
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# k-分割交差検証(クロスバリデーション)
from sklearn.model_selection import cross_val_score
cores = cross_val_score(svc, X, y, cv=5)
Scikit-learnを用いたモデル(分類)
Random Forest
from sklearn.ensemble import RandomForestClassifier
rfc= RandomForestClassifier(max_depth=100)
rfc.fit(X_train, y_train)
predicted= rfc.predict(X_test)
ロジスティック回帰
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(random_state=42)
lr.fit(X_train, y_train)
predicted= lr.predict(X_test)
サポートベクターマシン
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train,y_train)
predicted= svc.predict(X_test)
モデルの評価
model.score(X_train, y_train)
model.score(X_test, y_test)
# 混同行列
from sklearn.metrics import confusion_matrix
confmat = confusion_matrix(y_test,predicted)
# 正解率
from sklearn import metrics
accuracy = metrics.accuracy_score(y_test, predicted)
# 精度
from sklearn.metrics import precision_score
precision = metrics.precision_score(y_test, predicted)
# 再現率
from sklearn.metrics import recall_score
recall = metrics.recall_score(y_test, predicted)
# F値
from sklearn.metrics import f1_score
f1 = metrics.f1_score(y_test, predicted)
他クラス分類の評価をする際、引数にマイクロ平均か、マクロ平均を指定する場合がある。
f1 = metrics.f1_score(y_test, predicted, average='micro')
モデルの保存
# モデルを保存
import pickle
name = 'test.sav'
pickle.dump(model, open(name, 'wb'))
# モデルをロード
loaded_model = pickle.load(open(name, 'rb'))
result = loaded_model.score(X_test, y_test)