「ところで俺の多層パーセプトロンを見てくれ」
「こいつをどう思う?」
「すごく…綺麗です…」
本題
出オチ感。
多層パーセプトロンをスクラッチする機会があったのでそのまとめ的な記事です。
今回はいい感じに汎用的にしつつscikit-learnみたいなインタフェースで学習が出来るように書きました。
隠れ層の層数やノード数、学習回数、学習率などなどだいたい全部のパラメータが引数を渡すことで調整可能です。バッチノーマライゼーションには挑戦した形跡がありますが、分散とか使ってうまいこと正規化したりはできていません。
この記事に汎用的じゃないけど簡潔なものを載せているので、あわせてご覧ください。
やっぱある程度なんでも使えるように作ると複雑になりますね。もっと綺麗にしたい。下記は多層パーセプトロンに排他的論理和(よく見る)を学習させたやつです。
mlp.py
import numpy as np
class MultilayerPerceptron:
def __init__(self, featureNum, rho, classNum, hidden, normalization=True):
self.featureNum = featureNum
self.rho = rho
self.classNum = classNum
self.hidden = hidden
self.normalization = normalization
rc = [self.featureNum]+self.hidden+[self.classNum]
self.W = [np.random.randn(rc[i]+1, rc[i+1]) for i in range(len(hidden)+1)]
self.h = [[] for _ in range(len(self.hidden)+1)]
self.g = [[] for _ in range(len(self.hidden)+1)]
def fit(self, X, y, learn_times=1000, batch_size="full"):
self.X = np.array(X)
one = np.ones((len(self.X), 1))
self.learn_times = learn_times
self.X_train_mean = np.sum(self.X, axis=0)/len(self.X)
self.X_train_sigma = np.var(self.X, axis=0)**0.5
if (self.normalization):
self.X = (self.X - self.X_train_mean)/self.X_train_sigma
self.X = np.hstack((self.X, one))
self.y = np.tile(y, (1, 1))
self.eps = [[] for _ in range(len(self.hidden)+1)]
self.sigmoid = np.vectorize(lambda x: 1.0 / (1.0 + np.exp(-x)))
self.batch_size = len(self.X) if (batch_size == "full") else int(batch_size)
for _ in range(self.learn_times):
self.shuffled_index = np.random.permutation(len(self.X))
self.X_shuffled = self.X[self.shuffled_index]
self.y_shuffled = self.y[self.shuffled_index]
self.X_batch_list = np.array_split(self.X_shuffled, len(self.X)//self.batch_size, 0)
self.y_batch_list = np.array_split(self.y_shuffled, len(self.y)//self.batch_size, 0)
for p in range(len(self.X_batch_list)):
self.X_batchp = self.X_batch_list[p]
self.y_batchp = self.y_batch_list[p]
self.X_batch_mean = np.sum(self.X_batchp, axis=0) / self.batch_size
one_batchp = np.ones((len(self.X_batchp), 1))
# 入力層の活性化関数計算はしない,入力層以外の出力を計算
self.h[0] = self.X_batchp @ self.W[0]
self.g[0] = np.hstack((self.sigmoid(self.h[0]), one_batchp))
for j in range(1,len(self.hidden)):
# 重みの生成時にすでに適切な形状で重みが生成されるようにしているので,行列演算を順伝播方向に行っていくだけ
self.h[j] = self.g[j-1] @ self.W[j]
# 出力が最後の層以外のときは拡張重みベクトルに合わせるために1を横にくっつける
self.g[j] = np.hstack((self.sigmoid(self.h[j]), one_batchp))
self.h[-1] = self.g[-2] @ self.W[-1]
self.g[-1] = self.sigmoid(self.h[-1])
# 入力層の出力は,活性化関数を恒等関数としてみた場合と同値,よって入力層と誤差の計算時のみ入力層の値を用いるように分岐させればよい
self.eps[-1] = np.array((self.g[-1] - self.y_batchp) * self.g[-1]*(1-self.g[-1]))
for j in range(1, len(self.eps)):
# 重みと誤差の行列積に対して,要素積をとる
# 重みベクトルの最後の列はバイアスであるため,手前のレイヤからは,先のレイヤのバイアスを見ることはできない
# よって最後の列を削除する
self.eps[-(j+1)] = self.eps[-j] @ self.W[-j].T * self.g[-(j+1)]*(1-self.g[-(j+1)])
self.eps[-(j+1)] = np.delete(self.eps[-(j+1)], -1, axis=1)
self.W[0] -= self.rho * self.X_batchp.T @ self.eps[0] / len(self.X_batchp)
for j in range(1, len(self.hidden)+1):
self.W[j] -= self.rho * self.g[j-1].T @ self.eps[j] / len(self.X_batchp)
def pred(self, X_test):
self.X_test = np.array(X_test)
one = np.ones((len(self.X_test), 1))
if (self.normalization):
self.X_test = (self.X_test - self.X_train_mean)/self.X_train_sigma
self.X_test = np.hstack((self.X_test, one))
self.h[0] = self.X_test @ self.W[0]
self.g[0] = np.hstack((self.sigmoid(self.h[0]), one))
for j in range(1, len(self.hidden)):
self.h[j] = self.g[j-1] @ self.W[j]
self.g[j] = np.hstack((self.sigmoid(self.h[j]), one))
self.h[-1] = self.g[-2] @ self.W[-1]
self.g[-1] = self.sigmoid(self.h[-1])
return np.argmax(self.g[-1], axis=1)
def score(self, X_test, y_test):
self.X_test = np.array(X_test)
self.y_test = np.array(y_test)
self.loss_vector = (np.argmax(np.array(self.y_test),axis=1) == self.pred(self.X_test))
return np.count_nonzero(self.loss_vector)/len(self.X_test)
# 隠れ層のニューロンに対して勝手にバイアス(重みの初期値は1)が追加される
mlp = MultilayerPerceptron(featureNum=2, rho=1, classNum=2, hidden=[4, 3])
x = [[0, 0], [0, 1], [1, 0], [1, 1]]
y = [[1, 0], [0, 1], [0, 1], [1, 0]]
mlp.fit(x, y, 1000, 2)
print(mlp.pred(x))
print(mlp.score(x, y))
コメント無い版はこちら
mlp.py
import numpy as np
class MultilayerPerceptron:
def __init__(self, featureNum, rho, classNum, hidden, normalization=True):
self.featureNum = featureNum
self.rho = rho
self.classNum = classNum
self.hidden = hidden
self.normalization = normalization
rc = [self.featureNum]+self.hidden+[self.classNum]
self.W = [np.random.randn(rc[i]+1, rc[i+1]) for i in range(len(hidden)+1)]
self.h = [[] for _ in range(len(self.hidden)+1)]
self.g = [[] for _ in range(len(self.hidden)+1)]
def fit(self, X, y, learn_times=1000, batch_size="full"):
self.X = np.array(X)
one = np.ones((len(self.X), 1))
self.learn_times = learn_times
self.X_train_mean = np.sum(self.X, axis=0)/len(self.X)
self.X_train_sigma = np.var(self.X, axis=0)**0.5
if (self.normalization):
self.X = (self.X - self.X_train_mean)/self.X_train_sigma
self.X = np.hstack((self.X, one))
self.y = np.tile(y, (1, 1))
self.eps = [[] for _ in range(len(self.hidden)+1)]
self.sigmoid = np.vectorize(lambda x: 1.0 / (1.0 + np.exp(-x)))
self.batch_size = len(self.X) if (batch_size == "full") else int(batch_size)
for _ in range(self.learn_times):
self.shuffled_index = np.random.permutation(len(self.X))
self.X_shuffled = self.X[self.shuffled_index]
self.y_shuffled = self.y[self.shuffled_index]
self.X_batch_list = np.array_split(self.X_shuffled, len(self.X)//self.batch_size, 0)
self.y_batch_list = np.array_split(self.y_shuffled, len(self.y)//self.batch_size, 0)
for p in range(len(self.X_batch_list)):
self.X_batchp = self.X_batch_list[p]
self.y_batchp = self.y_batch_list[p]
self.X_batch_mean = np.sum(self.X_batchp, axis=0) / self.batch_size
one_batchp = np.ones((len(self.X_batchp), 1))
self.h[0] = self.X_batchp @ self.W[0]
self.g[0] = np.hstack((self.sigmoid(self.h[0]), one_batchp))
for j in range(1,len(self.hidden)):
self.h[j] = self.g[j-1] @ self.W[j]
self.g[j] = np.hstack((self.sigmoid(self.h[j]), one_batchp))
self.h[-1] = self.g[-2] @ self.W[-1]
self.g[-1] = self.sigmoid(self.h[-1])
self.eps[-1] = np.array((self.g[-1] - self.y_batchp) * self.g[-1]*(1-self.g[-1]))
for j in range(1, len(self.eps)):
self.eps[-(j+1)] = self.eps[-j] @ self.W[-j].T * self.g[-(j+1)]*(1-self.g[-(j+1)])
self.eps[-(j+1)] = np.delete(self.eps[-(j+1)], -1, axis=1)
self.W[0] -= self.rho * self.X_batchp.T @ self.eps[0] / len(self.X_batchp)
for j in range(1, len(self.hidden)+1):
self.W[j] -= self.rho * self.g[j-1].T @ self.eps[j] / len(self.X_batchp)
def pred(self, X_test):
self.X_test = np.array(X_test)
one = np.ones((len(self.X_test), 1))
if (self.normalization):
self.X_test = (self.X_test - self.X_train_mean)/self.X_train_sigma
self.X_test = np.hstack((self.X_test, one))
self.h[0] = self.X_test @ self.W[0]
self.g[0] = np.hstack((self.sigmoid(self.h[0]), one))
for j in range(1, len(self.hidden)):
self.h[j] = self.g[j-1] @ self.W[j]
self.g[j] = np.hstack((self.sigmoid(self.h[j]), one))
self.h[-1] = self.g[-2] @ self.W[-1]
self.g[-1] = self.sigmoid(self.h[-1])
return np.argmax(self.g[-1], axis=1)
def score(self, X_test, y_test):
self.X_test = np.array(X_test)
self.y_test = np.array(y_test)
self.loss_vector = (np.argmax(np.array(self.y_test),axis=1) == self.pred(self.X_test))
return np.count_nonzero(self.loss_vector)/len(self.X_test)
mlp = MultilayerPerceptron(featureNum=2, rho=1, classNum=2, hidden=[4, 3])
x = [[0, 0], [0, 1], [1, 0], [1, 1]]
y = [[1, 0], [0, 1], [0, 1], [1, 0]]
mlp.fit(x, y, 1000, 2)
print(mlp.pred(x))
print(mlp.score(x, y))
まとめ
悪ノリと勢い、それとテスト前の現実逃避。