(4/25追記)改めて詳しい解説記事を書きました。
一応動いたので共有しておきます。
実装
旧
import torch
import torch.nn as nn
import numpy as np
class Layer:
def __init__(self, in_, out_, alpha=1., ppn=None):
self.in_ = in_
self.out_ = out_
self.alpha = alpha
self.pn = torch.arange(out_) % 2 == 0
if ppn is None:
ppn = torch.arange(in_) % 2==0
self.ppn = ppn
self.sign = (self.ppn[:, None]==self.pn).float() * 2 - 1
self.weight = torch.rand((in_, out_)) * self.sign
self.u0 = .4
def __call__(self, x):
return self.forward(x)
def forward(self, x):
self.input = x
self.output = x @ self.weight
return torch.sigmoid(2/self.u0*self.output)
def update(self, d):
a = self.alpha * self.df(abs(2/self.u0*self.output))[:, None] * self.input * self.sign.T
a = a.T
self.weight[self.ppn] += d[0] * a[self.ppn]
self.weight[~self.ppn] += d[1] * a[~self.ppn]
def df(self, x):
sig = torch.sigmoid(x)
return sig * (1 - sig)
class ED:
def __init__(self, in_, out_, hidden, hidden_width=8, alpha=.8):
self.layers = [Layer(2*in_, hidden_width)]
for i in range(hidden):
self.layers.append(Layer(hidden_width, hidden_width, alpha, self.layers[-1].pn))
self.layers.append(Layer(hidden_width, out_, self.layers[-1].pn))
def __call__(self, x):
return self.forward(x)
def forward(self, x):
x = x.repeat_interleave(2)
for layer in self.layers:
x = layer(x)
return x
def update(self, d):
for layer in self.layers:
layer.update(d)
XOR学習
ed = ED(3, 1, 3, 32)
data = [[0, 0], [0, 1], [1, 0], [1, 1]]
beta = 0.8
for _ in range(1000):
i = np.random.randint(0, 4)
a = data[i]
input_ = torch.tensor([beta, *a])
y = a[0] ^ a[1]
output = ed(input_)
d = torch.tensor([max(0, y-output), max(0, output-y)])
ed.update(d)
for i, j in [[0, 0], [0, 1], [1, 0], [1, 1]]:
print(i^j, ed(torch.tensor([beta, i, j])))
out
0 0 0 tensor([1.3011e-20])
0 1 1 tensor([1.])
1 0 1 tensor([1.])
1 1 0 tensor([1.2731e-19])
バッチ学習
import torch
import torch.nn as nn
import numpy as np
import itertools
from tqdm import tqdm
import torch.nn.functional as F
class Layer:
def __init__(self, in_, out_, d, alpha=1., ppn=None, activ=torch.sigmoid):
self.in_ = in_
self.out_ = out_
self.alpha = alpha
self.pn = torch.arange(out_) % 2 == 0
if ppn is None:
ppn = torch.arange(in_) % 2==0
self.ppn = ppn
self.sign = (self.ppn[:, None]==self.pn).float() * 2 - 1
self.weight = torch.rand((d, in_, out_)) * self.sign
self.u0 = .4
self.activ = activ
def __call__(self, x):
return self.forward(x)
def forward(self, x):
self.input = x
self.output = x @ self.weight
return self.activate(2/self.u0*self.output)
def update(self, d):
a = self.alpha * self.grad[:, :, None] * self.input[..., None] * self.sign
self.weight[:, self.ppn] += (d[0].T[..., None, None]* a[:, :, self.ppn]).mean(1)
self.weight[:, ~self.ppn] += (d[1].T[..., None, None] * a[:, : , ~self.ppn]).mean(1)
def activate(self, x):
x = x.clone().detach().requires_grad_()
y = self.activ(x)
y_ = y.sum()
y_.backward()
self.grad = x.grad
return y.detach()
class ED:
def __init__(self, in_, out_, hidden, hidden_width=8, alpha=.8):
layers = [Layer(2*in_, hidden_width, out_)]
for i in range(hidden):
layers.append(Layer(hidden_width, hidden_width, out_, alpha, layers[-1].pn))
layers.append(Layer(hidden_width, 1, out_, ppn=layers[-1].pn))
self.layers = layers
def __call__(self, x):
return self.forward(x)
def forward(self, x):
x = x.repeat_interleave(2, -1)
for layer in self.layers:
x = layer(x)
return x.squeeze(-1).T
def update(self, d):
for layer in self.layers:
layer.update(d)
ED_Layerが単体で一層を担っていて、層間は全結合しています。
興奮性細胞/抑制性細胞からの入力を受け取って、興奮性細胞/抑制性細胞として出力する動作の表現として、興奮入力興奮出力・興奮入力抑制出力・抑制入力興奮出力・抑制入力抑制出力の4つにそれぞれLinearを用意して、差分を出力するようにしています。
import numpy as np
def sigmoid(x, u0=.4):
return 1 / (1 + np.exp(-2*x/u0))
class Linear:
def __init__(self, in_, out_):
self.weight = np.random.rand(in_, out_)
def __call__(self, x):
return self.forward(x)
def forward(self, x):
self.input = x.copy()
self.output = x @ self.weight
return self.output
class ED_Layer:
def __init__(self, in_, out_, alpha=.8):
self.in_ = in_
self.out_ = out_
self.alpha = alpha
self.pp = Linear(in_, out_)
self.np = Linear(in_, out_)
self.pn = Linear(in_, out_)
self.nn = Linear(in_, out_)
def __call__(self, p, n):
return self.forward(p, n)
def forward(self, p, n):
self.op, self.on = (sigmoid(self.pp(p) - self.np(n)),
sigmoid(self.nn(n) - self.pn(p)))
return (self.op, self.on)
def update(self, dp, dn):
for d, l, o in ((dp, self.pp, self.op), (dn, self.np, self.op), (dp, self.pn, self.on), (dn, self.nn, self.on)):
dw = np.einsum("b,bo,bi->bio", d, o*(1-o), l.input)
l.weight += self.alpha * dw.mean(0)
class ED:
def __init__(self, in_, out_, hidden_width, hidden_depth=1, alpha=.8):
self.layers = [
ED_Layer(in_, hidden_width, alpha),
*[ED_Layer(hidden_width, hidden_width, alpha) for _ in range(hidden_depth)],
ED_Layer(hidden_width, out_)
]
def __call__(self, x):
return self.forward(x)
def forward(self, x):
p, n = x.copy(), x.copy()
for layer in self.layers:
p, n = layer(p, n)
return p[:, 0]
def update(self, dp, dn):
for layer in self.layers:
layer.update(dp, dn)
XOR
np.random.seed(seed=1)
ed = ED(3, 1, 16, 15)
data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
beta = 0.1
input_ = np.array([[beta, *a] for a in data])
y = data[:, 0] ^ data[:, 1]
for i in range(10):
output = ed(input_)
dp, dn = np.clip(y-output, 0, None), np.clip(output-y, 0, None)
ed.update(dp, dn)
for i, j in [[0, 0], [0, 1], [1, 0], [1, 1]]:
print(i^j, ed(np.array([[beta, i, j]])))
out
0 [2.79953956e-15]
1 [1.]
1 [1.]
0 [2.79953956e-15]
所感
ED法のアイデアを元にいろいろ試してみようと思います。