scikit-learnでの単・重回帰分析の実装方法を紹介する。
# sklearn.linear_model.LinearRegression クラスを読み込み
from sklearn import linear_model
import pandas as pd
import numpy as npy
import matplotlib.pyplot as plt
import numpy as np
import requests
import io
clf = linear_model.LinearRegression()
url = 'http://pythondatascience.plavox.info/wp-content/uploads/2016/07/winequality-red.csv'
res = requests.get(url)
df = pd.read_csv(io.BytesIO(res.content), sep=";")
df.to_csv('winequality-red.csv', index=False)
print('データ確認')
wine = pd.read_csv("winequality-red.csv")
print(wine.head())
print('\n')
print('単回帰')
# 説明変数に "density (濃度)" を利用
X = wine.loc[:, ['density']].values
print(X)
# 目的変数に "alcohol (アルコール度数)" を利用
Y = wine['alcohol'].values
print(Y)
# 予測モデルを作成
clf.fit(X, Y)
# 回帰係数
print('回帰係数\n',clf.coef_)
# 切片 (誤差)
print('切片\n',clf.intercept_)
# 決定係数
print('決定係数\n',clf.score(X, Y))
print('回帰式\n','[alcohol] = %s × [density] + %s'%(clf.coef_[0], clf.intercept_))
# 散布図
plt.scatter(X, Y)
# 回帰直線
plt.plot(X, clf.predict(X))
plt.show()
print('\n')
print('重回帰\n')
wine_except_quality = wine.drop("quality", axis=1)
X = wine_except_quality.values
print(X)
Y = wine['quality'].values
print(Y)
# 予測モデルを作成
clf.fit(X, Y)
# 偏回帰係数
print('回帰係数\n',pd.DataFrame({"Name":wine_except_quality.columns,
"Coefficients":clf.coef_}).sort_values(by='Coefficients'))
parameters = pd.DataFrame({"Name":wine_except_quality.columns,"Coefficients":clf.coef_}).sort_values(by='Coefficients')
print(parameters['Coefficients'].values)
pval = parameters['Coefficients'].values
# 切片 (誤差)
print('切片\n',clf.intercept_)
print('回帰式\n','[quality] = %s × [density] + %s × [chlorides] +\
%s × [volatile acidity] + %s × [pH] + \
%s × [citric acid] + %s × [total sulfur dioxide] + \
%s × [free sulfur dioxide] + %s × [residual sugar] + \
%s × [fixed acidity] + %s × [alcohol] + \
%s × [sulphates] + %s'\
%(pval[0],pval[1],pval[2],pval[3],pval[4],pval[5],pval[6],pval[7],pval[8],pval[9],pval[10], clf.intercept_))
print('\n')
まとめ
今回は、scikit-learnでの単・重回帰分析の実装方法を紹介した。