# NN 基礎試行

NN(ニューラルネットワーク)に関する試行

## 原理

NNに良くある関数$f$による出力
$$out = f \left( \sum_i w_i x_i +b \right)$$

$$sum = \sum_i w_i x_i +b$$

$$f = \frac {1}{1+\exp(-sum)}$$

・2乗和誤差で評価する関数$L$
$$L = \frac{1}{2} \sum_i (out_i - t_i)^2$$

・勾配による修正(更新)

\begin{eqnarray}
w_{new_i} &=& w_{old_i} - \eta \frac{\partial L}{\partial w_i}\\
b_{new} &=& b_{old} - \eta \frac{\partial L}{\partial b}
\end{eqnarray}


・バックプロパゲーション用に勾配伝搬計算(3種類)

\begin{eqnarray}
\frac{\partial L}{\partial w_i} = \frac{\partial out}{\partial w_i} \frac{\partial L}{\partial out}\\
\frac{\partial L}{\partial x_i} = \frac{\partial out}{\partial x_i} \frac{\partial L}{\partial out}\\
\frac{\partial L}{\partial b} = \frac{\partial out}{\partial b} \frac{\partial L}{\partial out}
\end{eqnarray}


・関数$L$の出力値$out_i$による微分

\begin{eqnarray}
\frac{\partial L}{\partial out_i} &=& \frac{\partial }{\partial out_i} \left( \frac{1}{2} \sum_j (out_j - t_j)^2 \right) \\
&=& (out_i - t_i)
\end{eqnarray}


・重み$w_i$での微分

\begin{eqnarray}
\frac{\partial out}{\partial w_i} &=& \frac{\partial sum}{\partial w_i} \frac{\partial out}{\partial sum} \\
&=& x_i \frac{\partial out}{\partial sum} \\
&=& x_i \frac{\exp(-sum)}{(1+\exp(-sum))^2} \\
&=& x_i\frac{1}{1+\exp(-sum)} \left( 1-\frac{1}{1+\exp(-sum)} \right) \\
&=& x_i \cdot out \cdot \left( 1-out \right)
\end{eqnarray}


・入力$x_i$での微分

\begin{eqnarray}
\frac{\partial out}{\partial x_i} &=& \frac{\partial sum}{\partial x_i} \frac{\partial out}{\partial sum} \\
&=& w_i \frac{\partial out}{\partial sum} \\
&=& w_i \frac{\exp(-sum)}{(1+\exp(-sum))^2} \\
&=& w_i\frac{1}{1+\exp(-sum)} \left( 1-\frac{1}{1+\exp(-sum)} \right) \\
&=& w_i \cdot out \cdot \left( 1-out \right)
\end{eqnarray}


・バイアス項$b$での微分

\begin{eqnarray}
\frac{\partial out}{\partial b} &=& \frac{\partial sum}{\partial b} \frac{\partial out}{\partial sum} \\
&=& 1 \cdot \frac{\partial out}{\partial sum} \\
&=& \frac{\exp(-sum)}{(1+\exp(-sum))^2} \\
&=& \frac{1}{1+\exp(-sum)} \left( 1-\frac{1}{1+\exp(-sum)} \right) \\
&=& out \cdot \left( 1-out \right)
\end{eqnarray}


・多数出力の場合のバックプロパゲーション用勾配計算
1つのパーセプトロンから多数の出力をしている場合で、すべて同じ値の出力を分配しているならば、下記の$out_1～out_N$は同じ値となり、下記の式はシンプルな形となる。(同項目をくくり出す等)

\begin{eqnarray}
\frac{\partial L}{\partial x_i} &=& \frac{\partial out_1}{\partial x_i}\frac{\partial L}{\partial out_1}
+\frac{\partial out_2}{\partial x_i}\frac{\partial L}{\partial out_2}+ ... +\frac{\partial out_N}{\partial x_i}\frac{\partial L}{\partial out_N} \\
\frac{\partial L}{\partial w_i} &=& \frac{\partial out_1}{\partial w_i}\frac{\partial L}{\partial out_1}
+\frac{\partial out_2}{\partial w_i}\frac{\partial L}{\partial out_2}+ ... +\frac{\partial out_N}{\partial w_i}\frac{\partial L}{\partial out_N} \\
\frac{\partial L}{\partial b} &=& \frac{\partial out_1}{\partial b}\frac{\partial L}{\partial out_1}
+\frac{\partial out_2}{\partial b}\frac{\partial L}{\partial out_2}+ ... +\frac{\partial out_N}{\partial b}\frac{\partial L}{\partial out_N} \\
\end{eqnarray}


こんな感じでストレートに式を羅列しましたが、実装してみます。

## ・実装

neural.py
import math
import random

class Lossfunction():
def L(self,in_list,t_list):
s = 0
for ei,et in zip(in_list,t_list):
s += (ei - et)**2
return 0.5*s

def diffL(self,out_list,t_list):
ret = []
for eo,et in zip(out_list,t_list):
ret.append(eo - et)
return ret

class neural():
def __init__(self,in_num,out_num):
self.w = [random.random() for _ in range(in_num)]
self.b = 0
self.cin = [0 for _ in range(in_num)]
self.cout = [0 for _ in range(out_num)]
self.in_num = in_num
self.out_num = out_num
self.dfx = []
self.dfw = []
self.db = []
self.eta = 0.5

def forward(self,in_list):
self.cin = []
s = 0
for ein,ew in zip(in_list,self.w):
s += ein*ew
self.cin.append(ein)
s += self.b
s = 1.0/(1.0+math.exp(-s))
out = []
for index in range(self.out_num):
self.cout[index] = s
out.append(s)
return out

def backward(self,back_list):
dfx = 0
dfw = 0
db = 0
out_back_list = []
self.dfx = []
self.dfw = []
self.db = []
for index in range(self.in_num):
x = self.cin[index]
w = self.w[index]
dfx = 0
dfw = 0
for oindex in range(self.out_num):
y = self.cout[oindex]
dfwe = x*y*(1-y)*back_list[oindex]
dfxe = w*y*(1-y)*back_list[oindex]
dfw += dfwe
dfx += dfxe
self.dfw.append(dfw)
self.dfx.append(dfx)
out_back_list.append(dfx)
db = 0
for oindex in range(self.out_num):
y = self.cout[oindex]
dbe = y*(1-y)*back_list[oindex]
db += dbe
self.db.append(db)
return out_back_list

def learning(self):
for index in range(self.in_num):
self.w[index] -= self.eta * self.dfw[index]
self.b -= self.eta * self.db[0]


## ・AND学習(1層)

def And_test():
# 2入力 1出力
n1 = neural(2,1)
Losslist = [[],[],[],[]]
epoch = 50000
for _ in range(epoch):
in_data = [
[1,1],
[0,0],
[0,1],
[1,0]
]

t_data = [
[1],
[0],
[0],
[0]
]

for index,(input_list,t_list) in enumerate(zip(in_data,t_data)):
in11 = input_list[0]
in12 = input_list[1]
out1 = n1.forward([in11,in12])
in_list = [out1[0]]
L = Lossfunction()
loss = L.L(in_list,t_list)
Losslist[index].append(loss)
dL = L.diffL(in_list,t_list)
back1 = n1.backward([dL[0]])
n1.learning()

#検証
in_data = [
[1,1],
[0,0],
[0,1],
[1,0]
]

for index,(input_list,t_list) in enumerate(zip(in_data,t_data)):
in11 = input_list[0]
in12 = input_list[1]
out1 = n1.forward([in11,in12])
print(out1)



・AND学習結果

(1,1) 0.9879004772680621
(0,0) 1.3044040523080225e-06
(0,1) 0.010214239050947687
(1,0) 0.010214891563649306

## ・OR学習(1層)

def Or_test():
# 2入力 1出力
n1 = neural(2,1)
Losslist = [[],[],[],[]]
epoch = 50000
for _ in range(epoch):
in_data = [
[1,1],
[0,0],
[0,1],
[1,0]
]

t_data = [
[1],
[0],
[1],
[1]
]

for index,(input_list,t_list) in enumerate(zip(in_data,t_data)):
in11 = input_list[0]
in12 = input_list[1]
out1 = n1.forward([in11,in12])
in_list = [out1[0]]
L = Lossfunction()
loss = L.L(in_list,t_list)
Losslist[index].append(loss)
dL = L.diffL(in_list,t_list)
back1 = n1.backward([dL[0]])
n1.learning()

#検証
in_data = [
[1,1],
[0,0],
[0,1],
[1,0]
]

for index,(input_list,t_list) in enumerate(zip(in_data,t_data)):
in11 = input_list[0]
in12 = input_list[1]
out1 = n1.forward([in11,in12])
print(out1)


・OR学習結果

(1,1) 0.9999995696141024
(0,0) 0.01018356040654119
(0,1) 0.9935738931578483
(1,0) 0.9935736329753098

この辺は普通な感じ。

## ・XOR(実質2層)

ランダムな重み初期値に結果が左右されやすい。(成功結果としては、第1層部分がそれぞれ対称的な回路となる)

(3層の実質2層タイプ)

def Xor_test(initial_weight_bias):
# 2入力 1出力 2層構造
n1 = neural(2,1)
n2 = neural(2,1)
n3 = neural(2,1)
Losslist = [[],[],[],[]]
epoch = 50000
for _ in range(epoch):
in_data = [
[1,1],
[0,0],
[0,1],
[1,0]
]

t_data = [
[0],
[0],
[1],
[1]
]

for index,(input_list,t_list) in enumerate(zip(in_data,t_data)):
in11 = input_list[0]
in12 = input_list[1]
out1 = n1.forward([in11,in12])
out2 = n2.forward([in11,in12])
out3 = n3.forward([out1[0],out2[0]])
in_list = [out3[0]]
L = Lossfunction()
loss = L.L(in_list,t_list)
Losslist[index].append(loss)
dL = L.diffL(in_list,t_list)
back3 = n3.backward([dL[0]])
back2 = n2.backward([back3[0]])
back1 = n1.backward([back3[1]])
n3.learning()
n2.learning()
n1.learning()
#検証
in_data = [
[1,1],
[0,0],
[0,1],
[1,0]
]

for index,(input_list,t_list) in enumerate(zip(in_data,t_data)):
in11 = input_list[0]
in12 = input_list[1]
out1 = n1.forward([in11,in12])
out2 = n2.forward([in11,in12])
out3 = n3.forward([out1[0],out2[0]])
print(out3)


・XOR学習結果(成功例)

(1,1) 0.007093208409184943
(0,0) 0.007881163130751396
(0,1) 0.9932175074572005
(1,0) 0.9932153258315902

neural.py
class neural():
def __init__(self,in_num,out_num,initial_weight,initial_bias):
if (initial_weight and initial_bias):
self.w = [weight for _ in initial_weight]
self.b = initial_bias
else:
self.w = [random.random() for _ in range(in_num)]
self.b = 0


Xor_test関数内でのNAND初期化は以下

n1 = neural(2,1,[-8.975974502718518,-8.975909953514202],13.549542239166849)