LoginSignup
11
6

ED法実装例(バッチ学習対応)

Last updated at Posted at 2024-04-21

(4/25追記)改めて詳しい解説記事を書きました。

一応動いたので共有しておきます。

実装


import torch
import torch.nn as nn
import numpy as np

class Layer:
    def __init__(self, in_, out_, alpha=1., ppn=None):
        self.in_ = in_
        self.out_ = out_
        self.alpha = alpha
        self.pn = torch.arange(out_) % 2 == 0
        if ppn is None:
            ppn = torch.arange(in_) % 2==0
        self.ppn = ppn
        self.sign = (self.ppn[:, None]==self.pn).float() * 2 - 1
        self.weight = torch.rand((in_, out_)) * self.sign
        self.u0 = .4
    
    def __call__(self, x):
        return self.forward(x)
    
    def forward(self, x):
        self.input = x
        self.output = x @ self.weight
        return torch.sigmoid(2/self.u0*self.output)
    
    def update(self, d):
        a = self.alpha * self.df(abs(2/self.u0*self.output))[:, None] * self.input * self.sign.T
        a = a.T
        self.weight[self.ppn] += d[0] * a[self.ppn]
        self.weight[~self.ppn] += d[1] * a[~self.ppn]

    def df(self, x):
        sig = torch.sigmoid(x)
        return sig * (1 - sig)
    

class ED:
    def __init__(self, in_, out_, hidden, hidden_width=8, alpha=.8):
        self.layers = [Layer(2*in_, hidden_width)]
        for i in range(hidden):
            self.layers.append(Layer(hidden_width, hidden_width, alpha, self.layers[-1].pn))
        self.layers.append(Layer(hidden_width, out_, self.layers[-1].pn))
        
    def __call__(self, x):
        return self.forward(x)
        
    def forward(self, x):
        x = x.repeat_interleave(2)
        for layer in self.layers:
            x = layer(x)
        return x
    
    def update(self, d):
        for layer in self.layers:
            layer.update(d)

XOR学習

ed = ED(3, 1, 3, 32)

data = [[0, 0], [0, 1], [1, 0], [1, 1]]
beta = 0.8

for _ in range(1000):
    i = np.random.randint(0, 4)
    a = data[i]
    input_ = torch.tensor([beta, *a])
    y = a[0] ^ a[1]
    output = ed(input_)
    d = torch.tensor([max(0, y-output), max(0, output-y)])
    ed.update(d)
    
for i, j in [[0, 0], [0, 1], [1, 0], [1, 1]]:
    print(i^j, ed(torch.tensor([beta, i, j])))

out
0 0 0 tensor([1.3011e-20])
0 1 1 tensor([1.])
1 0 1 tensor([1.])
1 1 0 tensor([1.2731e-19])

バッチ学習

import torch
import torch.nn as nn
import numpy as np
import itertools
from tqdm import tqdm
import torch.nn.functional as F

class Layer:
    def __init__(self, in_, out_, d, alpha=1., ppn=None, activ=torch.sigmoid):
        self.in_ = in_
        self.out_ = out_
        self.alpha = alpha
        self.pn = torch.arange(out_) % 2 == 0
        if ppn is None:
            ppn = torch.arange(in_) % 2==0
        self.ppn = ppn
        self.sign = (self.ppn[:, None]==self.pn).float() * 2 - 1
        self.weight = torch.rand((d, in_, out_)) * self.sign
        self.u0 = .4
        self.activ = activ
    
    def __call__(self, x):
        return self.forward(x)
    
    def forward(self, x):
        self.input = x
        self.output = x @ self.weight
        return self.activate(2/self.u0*self.output)
    
    def update(self, d):
        a = self.alpha * self.grad[:, :, None] * self.input[..., None] * self.sign
        self.weight[:, self.ppn] += (d[0].T[..., None, None]* a[:, :, self.ppn]).mean(1)
        self.weight[:, ~self.ppn] += (d[1].T[..., None, None] * a[:, : , ~self.ppn]).mean(1)

    def activate(self, x):
        x = x.clone().detach().requires_grad_()
        y = self.activ(x)
        y_ = y.sum()
        y_.backward()
        self.grad = x.grad
        return y.detach()
    

class ED:
    def __init__(self, in_, out_, hidden, hidden_width=8, alpha=.8):
        layers = [Layer(2*in_, hidden_width, out_)]
        for i in range(hidden):
            layers.append(Layer(hidden_width, hidden_width, out_, alpha, layers[-1].pn))
        layers.append(Layer(hidden_width, 1, out_, ppn=layers[-1].pn))
        self.layers = layers
        
    def __call__(self, x):
        return self.forward(x)
        
    def forward(self, x):
        x = x.repeat_interleave(2, -1)
        for layer in self.layers:
            x = layer(x)
        return x.squeeze(-1).T
    
    def update(self, d):
        for layer in self.layers:
            layer.update(d)

ED_Layerが単体で一層を担っていて、層間は全結合しています。
興奮性細胞/抑制性細胞からの入力を受け取って、興奮性細胞/抑制性細胞として出力する動作の表現として、興奮入力興奮出力・興奮入力抑制出力・抑制入力興奮出力・抑制入力抑制出力の4つにそれぞれLinearを用意して、差分を出力するようにしています。

import numpy as np

def sigmoid(x, u0=.4):
    return 1 / (1 + np.exp(-2*x/u0))

class Linear:
    def __init__(self, in_, out_):
        self.weight = np.random.rand(in_, out_)
        
    def __call__(self, x):
        return self.forward(x)
        
    def forward(self, x):
        self.input = x.copy()
        self.output = x @ self.weight
        return self.output
    
class ED_Layer:
    def __init__(self, in_, out_, alpha=.8):
        self.in_ = in_
        self.out_ = out_
        self.alpha = alpha
        self.pp = Linear(in_, out_)
        self.np = Linear(in_, out_)
        self.pn = Linear(in_, out_)
        self.nn = Linear(in_, out_)
    
    def __call__(self, p, n):
        return self.forward(p, n) 
        
    def forward(self, p, n):
    	self.op, self.on = (sigmoid(self.pp(p) - self.np(n)),
 sigmoid(self.nn(n) - self.pn(p)))
    	return (self.op, self.on)
    
    def update(self, dp, dn):
        for d, l, o in ((dp, self.pp, self.op), (dn, self.np, self.op), (dp, self.pn, self.on), (dn, self.nn, self.on)):
        	dw = np.einsum("b,bo,bi->bio", d, o*(1-o), l.input)
        	l.weight += self.alpha * dw.mean(0)
            
        
class ED:
    def __init__(self, in_, out_, hidden_width, hidden_depth=1, alpha=.8):
        self.layers = [
            ED_Layer(in_, hidden_width, alpha),
            *[ED_Layer(hidden_width, hidden_width, alpha) for _ in range(hidden_depth)],
            ED_Layer(hidden_width, out_)
        ]
    
    def __call__(self, x):
        return self.forward(x)
    
    def forward(self, x):
        p, n = x.copy(), x.copy()
        for layer in self.layers:
            p, n = layer(p, n)
        return p[:, 0]
    
    def update(self, dp, dn):
        for layer in self.layers:
            layer.update(dp, dn)

XOR

np.random.seed(seed=1)
ed = ED(3, 1, 16, 15)

data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
beta = 0.1
input_ = np.array([[beta, *a] for a in data])
y = data[:, 0] ^ data[:, 1]
for i in range(10):
    output = ed(input_)
    dp, dn = np.clip(y-output, 0, None), np.clip(output-y, 0, None)
    ed.update(dp, dn)
    
for i, j in [[0, 0], [0, 1], [1, 0], [1, 1]]:
    print(i^j, ed(np.array([[beta, i, j]])))
out
0 [2.79953956e-15]
1 [1.]
1 [1.]
0 [2.79953956e-15]

所感

ED法のアイデアを元にいろいろ試してみようと思います。

11
6
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
11
6