- サポートページ : https://www.sbcr.jp/product/4797393965/
- 第1版 正誤表 : https://s3-ap-northeast-1.amazonaws.com/sbcr-dl-tec/2018-12-12-93965-%E6%A9%9F%E6%A2%B0%E5%AD%A6%E7%BF%92%E3%81%AE%E3%82%A8%E3%83%83%E3%82%BB%E3%83%B3%E3%82%B9/errata.pdf
from IPython.display import HTML
HTML('''
<script>
code_show=true;
function code_toggle() {
if (code_show){
$(\'div.input\').hide();
} else {
$(\'div.input\').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()">
<input type="submit" value="Toggle code">
</form>
''')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.rcParams['font.size']=15
def plt_legend_out():
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)
線形回帰
2次元
$y=ax+b$の$x$と$y$は既知である。二乗誤差$E=\sum_{i=1}^n(ax_i+b-y_i)^2$が最小となる$a$と$b$を求める。$\partial E/\partial a=0$、$\partial E/\partial b=0$を解くと、以下が導かれる。
\begin{eqnarray}
a &=&
\left (\sum_{i=1}^nx_iy_i-\frac{1}{n}\sum_{i=1}^nx_i\sum_{i=1}^ny_i \right ) \Big/
\left (\sum_{i=1}^nx_i^2-\frac{1}{n}\left (\sum_{i=1}^nx_i\right )^2 \right )\\
b &=& \frac{1}{n}\sum_{i=1}^ny_i-ax_i)
\end{eqnarray}
| 数式($\sum$を使った表現) | 数式(ベクトル表現) | コード |
|---|---|---|
| $\sum_{i=1}^nx_iy_i$ | $x^Ty$ | np.dot(x,y) |
| $\frac{1}{n}\sum_{i=1}^nx_i\sum_{i=1}^ny_i$ | y.sum() * x.sum() / n |
|
| $\sum_{i=1}^nx_i^2$ | (x**2).sum() |
|
| $\frac{1}{n}(\sum_{i=1}^nx_i)^2$ | x.sum()**2 / n |
def reg1dim2(x, y):
n = len(x)
a = ((np.dot(x, y) - y.sum() * x.sum() / n) /
((x**2).sum() - x.sum()**2 / n))
b = (y.sum() - a * x.sum()) / n
return a, b
x = np.array([1, 2, 4, 6, 7])
y = np.array([1, 3, 3, 5, 4])
a, b = reg1dim2(x, y)
plt.scatter(x, y, color="k")
xmax = x.max()
plt.plot([0, xmax], [b, a * xmax + b], color="k")
plt.xlabel('x')
plt.ylabel('y')
plt.show()
多次元
多次元の場合
$y=w^T\tilde{x}$
書き換えると、
$\hat{y}(w)=\tilde{X}w$
二乗誤差を計算
\begin{eqnarray}
E(\color{red}{w}) &=& ||y-\tilde{X}\color{red}{w}||^2\\
&=& (y-\tilde{X}\color{red}{w})^T(y-\tilde{X}\color{red}{w})\\
&=& y^Ty-\color{red}{w^T}\tilde{X}^Ty-y^T\tilde{X}\color{red}{w}+\color{red}{w^T}\tilde{X}^T\tilde{X}\color{red}{w}
\end{eqnarray}
$\color{red}{w}$を求める
\begin{eqnarray}
\nabla E = -2\tilde{X}y+2\tilde{X}^T\tilde{X}\color{red}{w} &=& 0\\
\tilde{X}y &=& \tilde{X}^T\tilde{X}\color{red}{w}\\
\color{red}{w} &=& (\tilde{X}^T\tilde{X})^{-1}\tilde{X}^Ty
\end{eqnarray}
from scipy import linalg
class LinearRegression:
def __init__(self):
self.w_ = None
def fit(self, X, t):
Xtil = np.c_[np.ones(X.shape[0]), X]
A = np.dot(Xtil.T, Xtil)
b = np.dot(Xtil.T, t)
self.w_ = linalg.solve(A, b)
def predict(self, X):
if X.ndim == 1:
X = X.reshape(1, -1)
Xtil = np.c_[np.ones(X.shape[0]), X]
return np.dot(Xtil, self.w_)
from mpl_toolkits.mplot3d import axes3d
n = 100
scale = 10
np.random.seed(0)
X = np.random.random((n, 2)) * scale
w0 = 1
w1 = 2
w2 = 3
y = w0 + w1 * X[:, 0] + w2 * X[:, 1] + np.random.randn(n)
# model = linearreg.LinearRegression()
model = LinearRegression()
model.fit(X, y)
# print("係数:", model.w_)
# print("(1, 1)に対する予測値:", model.predict(np.array([1, 1])))
xmesh, ymesh = np.meshgrid(np.linspace(0, scale, 20),
np.linspace(0, scale, 20))
zmesh = (model.w_[0] + model.w_[1] * xmesh.ravel() +
model.w_[2] * ymesh.ravel()).reshape(xmesh.shape)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:, 0], X[:, 1], y, color="k")
ax.plot_wireframe(xmesh, ymesh, zmesh, color="r")
plt.xlabel('x')
plt.ylabel('y')
ax.set_zlabel('z')
plt.show()
実践的な例
import linearreg
import numpy as np
import csv
# データ読み込み
Xy = []
with open("winequality-red.csv") as fp:
for row in csv.reader(fp, delimiter=";"):
Xy.append(row)
Xy = np.array(Xy[1:], dtype=np.float64)
# 訓練用データとテスト用データに分割する
np.random.seed(0)
np.random.shuffle(Xy)
train_X = Xy[:-1000, :-1]
train_y = Xy[:-1000, -1]
test_X = Xy[-1000:, :-1]
test_y = Xy[-1000:, -1]
# 学習させる
model = linearreg.LinearRegression()
model.fit(train_X, train_y)
# テスト用データにモデルを適用
y = model.predict(test_X)
print("最初の5つの正解と予測値:")
for i in range(5):
print("{:1.0f} {:5.3f}".format(test_y[i], y[i]))
print()
print("RMSE:", np.sqrt(((test_y - y)**2).mean()))
最初の5つの正解と予測値:
7 6.012
6 5.734
5 5.285
8 6.352
5 5.414
RMSE: 0.6724248548465226
plt.figure(figsize=(4,4))
plt.scatter(test_y, y)
plt.xticks(np.arange(2,10,1))
plt.yticks(np.arange(2,10,1))
plt.xlim(2,9)
plt.ylim(2,9)
plt.xlabel('実測値')
plt.ylabel('予測値')
plt.plot(np.arange(2,10,1),np.arange(2,10,1),color='gray',lw=0.5)
plt.grid()
plt.show()
リッジ回帰
$\lambda ||\color{red}{w}||^2$を導入する。出来るだけ$\lambda ||\color{red}{w}||^2$が小さい方が良い。$\color{red}{w}$はL2ノルム。
$E(\color{red}{w})=||y-\tilde{X}\color{red}{w}||^2+\lambda ||\color{red}{w}||^2$
$\color{red}{w}$を求める。
\begin{eqnarray}
\nabla E=2\tilde{X}^T\tilde{X}\color{red}{w}-2\tilde{X}^Ty+2\lambda \color{red}{w} &=&0\\
2[(\tilde{X}^T\tilde{X}+\lambda I)\color{red}{w}-\tilde{X}^Ty] &=& 0\\
\color{red}{w} &=& (\tilde{X}^T\tilde{X}+\lambda I)^{-1}\tilde{X}^T y
\end{eqnarray}
from scipy import linalg
class RidgeRegression:
def __init__(self, lambda_=1.):
self.lambda_ = lambda_
self.w_ = None
def fit(self, X, t):
Xtil = np.c_[np.ones(X.shape[0]), X]
c = np.eye(Xtil.shape[1])
A = np.dot(Xtil.T, Xtil) + self.lambda_ * c
b = np.dot(Xtil.T, t)
self.w_ = linalg.solve(A, b)
def predict(self, X):
Xtil = np.c_[np.ones(X.shape[0]), X]
return np.dot(Xtil, self.w_)
x = np.array([1, 2, 4, 6, 7])
y = np.array([1, 3, 3, 5, 4])
plt.scatter(x, y, color="k")
par = 0
model = RidgeRegression(par)
model.fit(x, y)
b, a = model.w_
xmax = x.max()
plt.plot([0, xmax], [b, b + a * xmax],label='$\lambda$ = '+str(par))
par = 1
model = RidgeRegression(par)
model.fit(x, y)
b, a = model.w_
xmax = x.max()
plt.plot([0, xmax], [b, b + a * xmax],label='$\lambda$ = '+str(par))
par = 10
model = RidgeRegression(par)
model.fit(x, y)
b, a = model.w_
xmax = x.max()
plt.plot([0, xmax], [b, b + a * xmax],label='$\lambda$ = '+str(par))
par = 50
model = RidgeRegression(par)
model.fit(x, y)
b, a = model.w_
xmax = x.max()
plt.plot([0, xmax], [b, b + a * xmax],label='$\lambda$ = '+str(par))
plt.xlabel('x')
plt.ylabel('y')
plt_legend_out()
plt.show()
外れ値が存在する場合
x = np.arange(12)
y = 1 + 2 * x
y[2] = 20
y[4] = 0
plt.scatter(x,y,color='k')
par = 0
model = RidgeRegression(par)
model.fit(x, y)
b, a = model.w_
xmax = x.max()
plt.plot([0, xmax], [b, b + a * xmax],label='$\lambda$ = '+str(par))
par = 1
model = RidgeRegression(par)
model.fit(x, y)
b, a = model.w_
xmax = x.max()
plt.plot([0, xmax], [b, b + a * xmax],label='$\lambda$ = '+str(par))
par = 10
model = RidgeRegression(par)
model.fit(x, y)
b, a = model.w_
xmax = x.max()
plt.plot([0, xmax], [b, b + a * xmax],label='$\lambda$ = '+str(par))
par = -10
model = RidgeRegression(par)
model.fit(x, y)
b, a = model.w_
xmax = x.max()
plt.plot([0, xmax], [b, b + a * xmax],label='$\lambda$ = '+str(par))
plt.xlabel('x')
plt.ylabel('y')
plt_legend_out()
plt.show()
# import ridge
# import linearreg
x = np.arange(12)
y = 1 + 2 * x
y[2] = 20
y[4] = 0
xmin = 0
xmax = 12
ymin = -1
ymax = 25
fig, axes = plt.subplots(nrows=2, ncols=5)
for i in range(5):
axes[0, i].set_xlim([xmin, xmax])
axes[0, i].set_ylim([ymin, ymax])
axes[1, i].set_xlim([xmin, xmax])
axes[1, i].set_ylim([ymin, ymax])
axes[0, i].tick_params(bottom=False,labelbottom=False)
if i>0:
axes[0, i].tick_params(left=False,labelleft=False)
axes[1, i].tick_params(left=False,labelleft=False)
xx = x[: 2 + i * 2]
yy = y[: 2 + i * 2]
axes[0, i].scatter(xx, yy, color="k")
axes[1, i].scatter(xx, yy, color="k")
# model = linearreg.LinearRegression()
model = LinearRegression()
model.fit(xx, yy)
xs = [xmin, xmax]
ys = [model.w_[0] + model.w_[1] * xmin, model.w_[0] + model.w_[1] * xmax]
axes[0, i].plot(xs, ys, color="red")
# plt.legend()
# model = ridge.RidgeRegression(10.)
model = RidgeRegression(10.)
model.fit(xx, yy)
xs = [xmin, xmax]
ys = [model.w_[0] + model.w_[1] * xmin, model.w_[0] + model.w_[1] * xmax]
axes[1, i].plot(xs, ys, color="red")
axes[1,i].set_xlabel('x')
# plt.xlabel('x')
axes[0,0].set_ylabel('ols')
axes[1,0].set_ylabel('ridge\n($\lambda$ = 10)')
# plt_legend_out()
plt.show()
多項式回帰と汎化性能
class PolynomialRegression:
def __init__(self, degree):
self.degree = degree
def fit(self, x, y):
x_pow = []
xx = x.reshape(len(x), 1)
for i in range(1, self.degree + 1):
x_pow.append(xx**i)
mat = np.concatenate(x_pow, axis=1)
# linreg = linearreg.LinearRegression()
linreg = LinearRegression()
linreg.fit(mat, y)
self.w_ = linreg.w_
def predict(self, x):
r = 0
for i in range(self.degree + 1):
r += x**i * self.w_[i]
return r
# データ生成
np.random.seed(0)
def f(x):
return 1 + 2 * x
x = np.random.random(10) * 10
y = f(x) + np.random.randn(10)
# 多項式回帰
# model = polyreg.PolynomialRegression(10)
model = PolynomialRegression(10)
model.fit(x, y)
plt.scatter(x, y, color="k")
plt.ylim([y.min() - 1, y.max() + 1])
xx = np.linspace(x.min(), x.max(), 300)
yy = np.array([model.predict(u) for u in xx])
plt.plot(xx, yy, color="k")
# 線形回帰
# model = linearreg.LinearRegression()
model = LinearRegression()
model.fit(x, y)
b, a = model.w_
x1 = x.min() - 1
x2 = x.max() + 1
plt.plot([x1, x2], [f(x1), f(x2)], color="k", linestyle="dashed")
plt.xlabel('x')
plt.ylabel('y')
plt.show()
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:11: LinAlgWarning: Ill-conditioned matrix (rcond=2.60099e-30): result may not be accurate.
# This is added back by InteractiveShellApp.init_path()
以下は二乗誤差の平均。$\color{red}{\hat{f}_D(x)}$はデータ$D$を使って予測した値。第1項はバイアスで、実測値と予測値の差の二乗。第2項はバリアンスで、$D$が変化したときの予測値の分散。
E_D[(f(x)-\color{red}{\hat{f}_D(x)})^2]=(f(x)-E_D[\color{red}{\hat{f}_D(x)}])^2+E_D[(\color{red}{\hat{f}_D(x)}-E_D[\hat{f}(x)])^2]
アルゴリズム
- 以下を$n回$繰り返す。
- $0\leq x \leq 5$の範囲に5点をランダムに取得し、$f(x)=1/(1+x)$を計算する。
- 線形回帰 or 多項式回帰
- それぞれの$x$に対して予測値を計算
- 予測値の平均値を計算
↓ n=1では過学習気味だが、試行を重ねるにつれて汎化性能が上がっているように見える。そして線形モデルの方が汎化性能が高い。
import warnings
def f(x):
return 1 / (1 + x)
def sample(n):
x = np.random.random(n) * 5
y = f(x)
return x, y
xx = np.arange(0, 5, 0.1)
np.random.seed(0)
y_poly_sum = np.zeros(len(xx))
y_lin_sum = np.zeros(len(xx))
y_poly_sum_sq = np.zeros(len(xx))
y_lin_sum_sq = np.zeros(len(xx))
y_true = f(xx)
warnings.filterwarnings("ignore")
bvmax = 0.5
bvmin = -0.05
ymax = 1.5
ymin = 0
##########
plt.figure(figsize=(10,7))
plt.subplots_adjust(hspace=0.1,wspace=0.1)
##########
plt.subplot(321)
plt.scatter(xx, f(xx), label="truth",color="k", s=2)
ns = [1,10,20,50]
c = sns.color_palette('husl',len(ns))
lb = [] ; lv = []
for i in range(len(ns)):
n = ns[i]
for _ in range(n):
x, y = sample(5)
lin = LinearRegression()
lin.fit(x, y)
y_lin = lin.predict(xx.reshape(-1, 1))
y_lin_sum += y_lin
y_lin_sum_sq += (y_lin - y_true)**2
plt.plot(xx, y_lin_sum / n, label="n = "+str(n),color=c[i],alpha=0.8)
lb.append((y_lin_sum / n - y_true)**2)
lv.append(y_lin_sum_sq / n)
plt.tick_params(bottom=False,labelbottom=False)
plt.title('linear reg.')
plt.ylabel('y')
plt.ylim(ymin,ymax)
##########
plt.subplot(323)
for i in range(len(ns)):
plt.plot(xx, lb[i], color=c[i])
plt.ylabel('bias')
plt.ylim(bvmin,bvmax)
plt.tick_params(bottom=False,labelbottom=False)
##########
plt.subplot(325)
for i in range(len(ns)):
plt.plot(xx, lv[i], color=c[i])
plt.ylabel('variance')
plt.xlabel('x')
plt.ylim(bvmin,bvmax)
##########
plt.subplot(322)
plt.scatter(xx, f(xx), label="truth",color="k", s=2)
ns = [1,10,20,50]
c = sns.color_palette('husl',len(ns))
lb = [] ; lv = []
for i in range(len(ns)):
n = ns[i]
for _ in range(n):
x, y = sample(5)
poly = PolynomialRegression(4)
poly.fit(x, y)
y_poly = poly.predict(xx)
y_poly_sum += y_poly
y_poly_sum_sq += (y_poly - y_true)**2
plt.plot(xx, y_poly_sum / n,label="n = "+str(n),color=c[i],alpha=0.8)
lb.append((y_poly_sum / n - y_true)**2)
lv.append(y_poly_sum_sq / n)
plt_legend_out()
plt.title('polynomial reg.')
plt.tick_params(bottom=False,labelbottom=False,left=False,labelleft=False)
plt.ylim(ymin,ymax)
##########
plt.subplot(324)
for i in range(len(ns)):
plt.plot(xx, lb[i], color=c[i])
plt.ylim(bvmin,bvmax)
plt.tick_params(bottom=False,labelbottom=False,left=False,labelleft=False)
##########
plt.subplot(326)
for i in range(len(ns)):
plt.plot(xx, lv[i], color=c[i])
plt.xlabel('x')
plt.ylim(bvmin,bvmax)
plt.tick_params(left=False,labelleft=False)
plt.show()
ラッソ回帰
$\phi=\frac{1}{2}||y-\bar{X}w||^2+\lambda |w|_1$
ただし$|\cdot|$はL1ノルム$|w|_1$と呼ばれる。
|w|_1=\sum_{i=1}^d|w_i|
import numpy as np
def soft_thresholding(x, y):
return np.sign(x) * max(abs(x) - y, 0)
class Lasso:
def __init__(self, lambda_, tol=0.0001, max_iter=1000):
self.lambda_ = lambda_
self.tol = tol
self.max_iter = max_iter
self.w_ = None
def fit(self, X, t):
n, d = X.shape
self.w_ = np.zeros(d + 1)
avgl1 = 0.
for _ in range(self.max_iter):
avgl1_prev = avgl1
self._update(n, d, X, t)
avgl1 = np.abs(self.w_).sum() / self.w_.shape[0]
if abs(avgl1 - avgl1_prev) <= self.tol:
break
def _update(self, n, d, X, t):
self.w_[0] = (t - np.dot(X, self.w_[1:])).sum() / n
w0vec = np.ones(n) * self.w_[0]
for k in range(d):
ww = self.w_[1:]
ww[k] = 0
q = np.dot(t - w0vec - np.dot(X, ww), X[:, k])
r = np.dot(X[:, k], X[:, k])
self.w_[k + 1] = soft_thresholding(q / r, self.lambda_)
def predict(self, X):
if X.ndim == 1:
X = X.reshape(X.shape[0], 1)
Xtil = np.c_[np.ones(X.shape[0]), X]
return np.dot(Xtil, self.w_)
# データ読み込み
Xy = []
with open("winequality-red.csv") as fp:
for row in csv.reader(fp, delimiter=";"):
Xy.append(row)
Xy = np.array(Xy[1:], dtype=np.float64)
# 訓練用データとテスト用データに分割する
np.random.seed(0)
np.random.shuffle(Xy)
train_X = Xy[:-1000, :-1]
train_y = Xy[:-1000, -1]
test_X = Xy[-1000:, :-1]
test_y = Xy[-1000:, -1]
ll = [] ; lc = [] ; lm = []
# ハイパーパラメータを変えながら学習させて結果表示
for lambda_ in [1., 0.1, 0.01]:
# model = lasso.Lasso(lambda_)
model = Lasso(lambda_)
model.fit(train_X, train_y)
y = model.predict(test_X)
# print("--- lambda = {} ---".format(lambda_))
# print("coefficients:")
# print(model.w_)
mse = ((y - test_y)**2).mean()
# print("MSE: {:.3f}".format(mse))
ll.append(lambda_)
lm.append(mse)
lc.append(model.w_)
df_tmp1 = pd.DataFrame({'lambda':ll,'mse':lm})
df_tmp2 = pd.DataFrame(lc)
df_tmp2.columns = varname
df_tmp = pd.concat([df_tmp1,df_tmp2],axis=1)
df_plot = df_tmp.melt(id_vars=['lambda','mse'])
sns.barplot(data=df_tmp1,x='lambda',y='mse')
plt.show()
sns.barplot(data=df_plot,x='value',y='variable',hue='lambda')
plt.axvline(x=0,lw=0.5,color='k')
plt_legend_out()
ロジスティック回帰
import numpy as np
from scipy import linalg
THRESHMIN = 1e-10
def sigmoid(x):
return 1 / (1 + np.exp(-x))
class LogisticRegression:
def __init__(self, tol=0.001, max_iter=3, random_seed=0):
self.tol = tol
self.max_iter = max_iter
self.random_state = np.random.RandomState(random_seed)
self.w_ = None
def fit(self, X, y):
self.w_ = self.random_state.randn(X.shape[1] + 1)
Xtil = np.c_[np.ones(X.shape[0]), X]
diff = np.inf
w_prev = self.w_
iter = 0
while diff > self.tol and iter < self.max_iter:
yhat = sigmoid(np.dot(Xtil, self.w_))
r = np.clip(yhat * (1 - yhat),
THRESHMIN, np.inf)
XR = Xtil.T * r
XRX = np.dot(Xtil.T * r, Xtil)
w_prev = self.w_
b = np.dot(XR, np.dot(Xtil, self.w_) -
1 / r * (yhat - y))
self.w_ = linalg.solve(XRX, b)
diff = abs(w_prev - self.w_).mean()
iter += 1
def predict(self, X):
Xtil = np.c_[np.ones(X.shape[0]), X]
yhat = sigmoid(np.dot(Xtil, self.w_))
return np.where(yhat > .5, 1, 0)
import csv
import numpy as np
n_test = 100
X = []
y = []
with open("wdbc.data") as fp:
for row in csv.reader(fp):
if row[1] == "B":
y.append(0)
else:
y.append(1)
X.append(row[2:])
y = np.array(y, dtype=np.float64)
X = np.array(X, dtype=np.float64)
y_train = y[:-n_test]
X_train = X[:-n_test]
y_test = y[-n_test:]
X_test = X[-n_test:]
# model = logisticreg.LogisticRegression(tol=0.01)
model = LogisticRegression(tol=0.01)
model.fit(X_train, y_train)
y_predict = model.predict(X_test)
n_hits = (y_test == y_predict).sum()
print("Accuracy: {}/{} = {}".format(n_hits, n_test, n_hits / n_test))
Accuracy: 97/100 = 0.97









