#機械学習pythonコードまとめ
##データの分割
#ホールドアウト法
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
#k-分割交差検証(クロスバリデーション)
from sklearn.model_selection import cross_val_score
cores = cross_val_score(svc, X, y, cv=5)
##Scikit-learnを用いたモデル(分類)
Random Forest
from sklearn.ensemble import RandomForestClassifier
rfc= RandomForestClassifier(max_depth=100)
rfc.fit(X_train, y_train)
predicted= rfc.predict(X_test)
ロジスティック回帰
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(random_state=42)
lr.fit(X_train, y_train)
predicted= lr.predict(X_test)
###サポートベクターマシン
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train,y_train)
predicted= svc.predict(X_test)
モデルの評価
model.score(X_train, y_train)
model.score(X_test, y_test)
#混同行列
from sklearn.metrics import confusion_matrix
confmat = confusion_matrix(y_test,predicted)
#正解率
from sklearn import metrics
accuracy = metrics.accuracy_score(y_test, predicted)
#精度
from sklearn.metrics import precision_score
precision = metrics.precision_score(y_test, predicted)
#再現率
from sklearn.metrics import recall_score
recall = metrics.recall_score(y_test, predicted)
#F値
from sklearn.metrics import f1_score
f1 = metrics.f1_score(y_test, predicted)
他クラス分類の評価をする際、引数にマイクロ平均か、マクロ平均を指定する場合がある。
f1 = metrics.f1_score(y_test, predicted, average='micro')
モデルの保存
# モデルを保存
import pickle
name = 'test.sav'
pickle.dump(model, open(name, 'wb'))
# モデルをロード
loaded_model = pickle.load(open(name, 'rb'))
result = loaded_model.score(X_test, y_test)