More than 1 year has passed since last update.

ED法実装例（バッチ学習対応）

Last updated at 2024-04-24Posted at 2024-04-21

（4/25追記）改めて詳しい解説記事を書きました。

一応動いたので共有しておきます。

実装

旧


import torch
import torch.nn as nn
import numpy as np

class Layer:
    def __init__(self, in_, out_, alpha=1., ppn=None):
        self.in_ = in_
        self.out_ = out_
        self.alpha = alpha
        self.pn = torch.arange(out_) % 2 == 0
        if ppn is None:
            ppn = torch.arange(in_) % 2==0
        self.ppn = ppn
        self.sign = (self.ppn[:, None]==self.pn).float() * 2 - 1
        self.weight = torch.rand((in_, out_)) * self.sign
        self.u0 = .4
    
    def __call__(self, x):
        return self.forward(x)
    
    def forward(self, x):
        self.input = x
        self.output = x @ self.weight
        return torch.sigmoid(2/self.u0*self.output)
    
    def update(self, d):
        a = self.alpha * self.df(abs(2/self.u0*self.output))[:, None] * self.input * self.sign.T
        a = a.T
        self.weight[self.ppn] += d[0] * a[self.ppn]
        self.weight[~self.ppn] += d[1] * a[~self.ppn]

    def df(self, x):
        sig = torch.sigmoid(x)
        return sig * (1 - sig)
    

class ED:
    def __init__(self, in_, out_, hidden, hidden_width=8, alpha=.8):
        self.layers = [Layer(2*in_, hidden_width)]
        for i in range(hidden):
            self.layers.append(Layer(hidden_width, hidden_width, alpha, self.layers[-1].pn))
        self.layers.append(Layer(hidden_width, out_, self.layers[-1].pn))
        
    def __call__(self, x):
        return self.forward(x)
        
    def forward(self, x):
        x = x.repeat_interleave(2)
        for layer in self.layers:
            x = layer(x)
        return x
    
    def update(self, d):
        for layer in self.layers:
            layer.update(d)

XOR学習

ed = ED(3, 1, 3, 32)

data = [[0, 0], [0, 1], [1, 0], [1, 1]]
beta = 0.8

for _ in range(1000):
    i = np.random.randint(0, 4)
    a = data[i]
    input_ = torch.tensor([beta, *a])
    y = a[0] ^ a[1]
    output = ed(input_)
    d = torch.tensor([max(0, y-output), max(0, output-y)])
    ed.update(d)
    
for i, j in [[0, 0], [0, 1], [1, 0], [1, 1]]:
    print(i^j, ed(torch.tensor([beta, i, j])))

out

0 0 0 tensor([1.3011e-20])
0 1 1 tensor([1.])
1 0 1 tensor([1.])
1 1 0 tensor([1.2731e-19])

バッチ学習

import torch
import torch.nn as nn
import numpy as np
import itertools
from tqdm import tqdm
import torch.nn.functional as F

class Layer:
    def __init__(self, in_, out_, d, alpha=1., ppn=None, activ=torch.sigmoid):
        self.in_ = in_
        self.out_ = out_
        self.alpha = alpha
        self.pn = torch.arange(out_) % 2 == 0
        if ppn is None:
            ppn = torch.arange(in_) % 2==0
        self.ppn = ppn
        self.sign = (self.ppn[:, None]==self.pn).float() * 2 - 1
        self.weight = torch.rand((d, in_, out_)) * self.sign
        self.u0 = .4
        self.activ = activ
    
    def __call__(self, x):
        return self.forward(x)
    
    def forward(self, x):
        self.input = x
        self.output = x @ self.weight
        return self.activate(2/self.u0*self.output)
    
    def update(self, d):
        a = self.alpha * self.grad[:, :, None] * self.input[..., None] * self.sign
        self.weight[:, self.ppn] += (d[0].T[..., None, None]* a[:, :, self.ppn]).mean(1)
        self.weight[:, ~self.ppn] += (d[1].T[..., None, None] * a[:, : , ~self.ppn]).mean(1)

    def activate(self, x):
        x = x.clone().detach().requires_grad_()
        y = self.activ(x)
        y_ = y.sum()
        y_.backward()
        self.grad = x.grad
        return y.detach()
    

class ED:
    def __init__(self, in_, out_, hidden, hidden_width=8, alpha=.8):
        layers = [Layer(2*in_, hidden_width, out_)]
        for i in range(hidden):
            layers.append(Layer(hidden_width, hidden_width, out_, alpha, layers[-1].pn))
        layers.append(Layer(hidden_width, 1, out_, ppn=layers[-1].pn))
        self.layers = layers
        
    def __call__(self, x):
        return self.forward(x)
        
    def forward(self, x):
        x = x.repeat_interleave(2, -1)
        for layer in self.layers:
            x = layer(x)
        return x.squeeze(-1).T
    
    def update(self, d):
        for layer in self.layers:
            layer.update(d)

ED_Layerが単体で一層を担っていて、層間は全結合しています。
興奮性細胞/抑制性細胞からの入力を受け取って、興奮性細胞/抑制性細胞として出力する動作の表現として、興奮入力興奮出力・興奮入力抑制出力・抑制入力興奮出力・抑制入力抑制出力の4つにそれぞれLinearを用意して、差分を出力するようにしています。

import numpy as np

def sigmoid(x, u0=.4):
    return 1 / (1 + np.exp(-2*x/u0))

class Linear:
    def __init__(self, in_, out_):
        self.weight = np.random.rand(in_, out_)
        
    def __call__(self, x):
        return self.forward(x)
        
    def forward(self, x):
        self.input = x.copy()
        self.output = x @ self.weight
        return self.output
    
class ED_Layer:
    def __init__(self, in_, out_, alpha=.8):
        self.in_ = in_
        self.out_ = out_
        self.alpha = alpha
        self.pp = Linear(in_, out_)
        self.np = Linear(in_, out_)
        self.pn = Linear(in_, out_)
        self.nn = Linear(in_, out_)
    
    def __call__(self, p, n):
        return self.forward(p, n) 
        
    def forward(self, p, n):
    	self.op, self.on = (sigmoid(self.pp(p) - self.np(n)),
 sigmoid(self.nn(n) - self.pn(p)))
    	return (self.op, self.on)
    
    def update(self, dp, dn):
        for d, l, o in ((dp, self.pp, self.op), (dn, self.np, self.op), (dp, self.pn, self.on), (dn, self.nn, self.on)):
        	dw = np.einsum("b,bo,bi->bio", d, o*(1-o), l.input)
        	l.weight += self.alpha * dw.mean(0)
            
        
class ED:
    def __init__(self, in_, out_, hidden_width, hidden_depth=1, alpha=.8):
        self.layers = [
            ED_Layer(in_, hidden_width, alpha),
            *[ED_Layer(hidden_width, hidden_width, alpha) for _ in range(hidden_depth)],
            ED_Layer(hidden_width, out_)
        ]
    
    def __call__(self, x):
        return self.forward(x)
    
    def forward(self, x):
        p, n = x.copy(), x.copy()
        for layer in self.layers:
            p, n = layer(p, n)
        return p[:, 0]
    
    def update(self, dp, dn):
        for layer in self.layers:
            layer.update(dp, dn)

XOR

np.random.seed(seed=1)
ed = ED(3, 1, 16, 15)

data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
beta = 0.1
input_ = np.array([[beta, *a] for a in data])
y = data[:, 0] ^ data[:, 1]
for i in range(10):
    output = ed(input_)
    dp, dn = np.clip(y-output, 0, None), np.clip(output-y, 0, None)
    ed.update(dp, dn)
    
for i, j in [[0, 0], [0, 1], [1, 0], [1, 1]]:
    print(i^j, ed(np.array([[beta, i, j]])))

out

0 [2.79953956e-15]
1 [1.]
1 [1.]
0 [2.79953956e-15]

所感

ED法のアイデアを元にいろいろ試してみようと思います。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up