今回、Winnyの開発者である金子勇さんが考案したED法というニューラルネットワークの学習アルゴリズムの高速化に取り組みました。
以下の記事に質問をさせていただき、その改良も出たので、もう不要な記事かもしれませんが、投稿いたします。
(せっかく書いたので...初投稿です...お手柔らかに...)
以下の記事を参考にしました。
https://qiita.com/pocokhc/items/f7ab56051bb936740b8f#comment-324d0ec655a40e439ac8
その中で速度について質問したところ「改良の余地がある」というご回答でした。
そこで、行列の計算を改良してみて、精度と速度にどのような違いがあるかを確認してみました。
コードは以下のようになっています。
全体コード
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
from tqdm import tqdm
random.seed(10)
def sigmoid(x, u0=0.4):
return 1 / (1 + np.exp(-2 * x / u0))
def sigmoid_derivative(x):
return sigmoid(x) * (1 - sigmoid(x))
def linear(x):
return x
def linear_derivative(x):
return 1
class Neuron:
def __init__(
self,
in_neurons: list["Neuron"],
ntype: str, # "p": positive, "n": negative
alpha: float = 0.8, # 多分 learning rate
activation=sigmoid,
activation_derivative=sigmoid_derivative,
) -> None:
self.ntype = ntype
self.alpha = alpha
self.activation = activation
self.activation_derivative = activation_derivative
# --- init weights
# weight: pp+ pn- np- nn+
self.weights = []
for n in in_neurons:
if ntype == "p":
if n.ntype == "p":
ope = 1
else:
ope = -1
else:
if n.ntype == "p":
ope = -1
else:
ope = 1
self.weights.append(random.random() * ope)
# --- operator
self.operator = 1 if ntype == "p" else -1
self.weights_operator = [n.operator for n in in_neurons]
# --- update index
# 入力元が+ならupper時に学習
# 入力元が-ならlower時に学習
self.upper_idx_list = []
self.lower_idx_list = []
for i, n in enumerate(in_neurons):
if n.ntype == "p":
self.upper_idx_list.append(i)
else:
self.lower_idx_list.append(i)
def forward(self, x):
assert len(self.weights) == len(x)
y = np.dot(x, self.weights)
self.prev_in = x
self.prev_out = y
y = self.activation(y)
return y
def update_weight(self, delta_out, direct: str):
grad = self.activation_derivative(abs(self.prev_out))
if direct == "upper":
indices = self.upper_idx_list
else:
indices = self.lower_idx_list
for idx in indices:
_old_w = self.weights[idx]
delta = self.alpha * self.prev_in[idx]
delta *= grad
delta *= delta_out * self.operator * self.weights_operator[idx]
self.weights[idx] += delta
def __str__(self):
s = f"{self.ntype} {self.operator:2d}"
arr = []
for i in range(len(self.weights)):
o = "+" if i in self.upper_idx_list else "-"
arr.append(f"{self.weights[i]:6.3f}({self.weights_operator[i]:2d},{o})")
s += " [" + ", ".join(arr) + "]"
return s
class MultiLayerModel:
def __init__(
self,
input_num: int,
hidden_sizes,
alpha: float = 0.8,
beta: float = 0.8,
) -> None:
self.beta = beta
# [hd+, hd-] (bias?)
hd_p = Neuron([], "p")
hd_n = Neuron([], "n")
# input
inputs: list[Neuron] = []
for i in range(input_num):
inputs.append(Neuron([], "p"))
inputs.append(Neuron([], "n"))
# hidden
self.hidden_neurons_list: list[list[Neuron]] = []
idx = 0
prev_neurons = inputs
for size in hidden_sizes:
hidden_neurons = []
for i in range(size):
hidden_neurons.append(
Neuron(
[hd_p, hd_n] + prev_neurons,
ntype=("p" if idx % 2 == 0 else "n"),
alpha=alpha,
activation=sigmoid,
activation_derivative=sigmoid_derivative,
)
)
idx += 1
prev_neurons = hidden_neurons
self.hidden_neurons_list.append(hidden_neurons)
# output
self.out_neuron = Neuron(
[hd_p, hd_n] + self.hidden_neurons_list[-1],
"p",
alpha=alpha,
activation=sigmoid,
activation_derivative=sigmoid_derivative,
)
def forward(self, inputs):
x = np.concatenate((inputs, inputs))
for neurons in self.hidden_neurons_list:
x = np.array([h.forward(np.concatenate(([self.beta, self.beta], x))) for h in neurons])
x = self.out_neuron.forward(np.concatenate(([self.beta, self.beta], x)))
return x
def train(self, inputs, target):
x = self.forward(inputs)
diff = target - x
direct = "upper" if diff > 0 else "lower"
diff = np.abs(diff)
for neurons in self.hidden_neurons_list:
for n in neurons:
n.update_weight(diff, direct)
self.out_neuron.update_weight(diff, direct)
return diff
def _create_dataset():
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)
train_indices = np.where((y_train == 0) | (y_train == 1))[0]
test_indices = np.where((y_test == 0) | (y_test == 1))[0]
x_train = x_train[train_indices]
y_train = y_train[train_indices]
x_test = x_test[test_indices]
y_test = y_test[test_indices]
# データ数が多いので削減
x_train = x_train[:1000]
y_train = y_train[:1000]
return (x_train, y_train), (x_test, y_test)
def main_tf():
(x_train, y_train), (x_test, y_test) = _create_dataset()
model = tf.keras.models.Sequential(
[
tf.keras.layers.Input(shape=(28 * 28)),
tf.keras.layers.Dense(16, activation="sigmoid"),
tf.keras.layers.Dense(16, activation="sigmoid"),
tf.keras.layers.Dense(16, activation="sigmoid"),
tf.keras.layers.Dense(16, activation="sigmoid"),
tf.keras.layers.Dense(16, activation="sigmoid"),
tf.keras.layers.Dense(16, activation="sigmoid"),
tf.keras.layers.Dense(16, activation="sigmoid"),
tf.keras.layers.Dense(16, activation="sigmoid"),
tf.keras.layers.Dense(16, activation="sigmoid"),
tf.keras.layers.Dense(16, activation="sigmoid"),
tf.keras.layers.Dense(1, activation="sigmoid"),
]
)
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(x_train, y_train, epochs=1, batch_size=1)
model.evaluate(x_test, y_test)
def main_ed():
(x_train, y_train), (x_test, y_test) = _create_dataset()
model = MultiLayerModel(28 * 28, (16, 16, 16, 16, 16, 16, 16, 16, 16, 16), alpha=0.1)
# --- train loop
metrics = []
for i in range(1):
for j in tqdm(range(len(x_train))):
x = x_train[j]
target = y_train[j]
metric = model.train(x, target)
metrics.append(metric)
correct = 0
total = 0
for i in tqdm(range(len(x_test))):
y = model.forward(x_test[i])
y = 1 if y > 0.5 else 0
if y_test[i] == y:
correct += 1
total += 1
print(f"{100 * correct / total:.2f}%")
plt.plot(pd.DataFrame(metrics).rolling(20).mean())
plt.plot(metrics, alpha=0.2)
plt.grid()
plt.xlabel("step")
plt.ylabel("diff")
plt.show()
if __name__ == "__main__":
main_ed()
それぞれgooglecolabでうごかしてみて結果を確認しました。
結果、以下のようになりました。
変更前
100%|██████████| 1000/1000 [02:08<00:00, 7.78it/s]
100%|██████████| 2115/2115 [00:16<00:00, 131.66it/s]
98.35%
変更後
100%|██████████| 1000/1000 [00:18<00:00, 55.14it/s]
100%|██████████| 2115/2115 [00:08<00:00, 240.49it/s]
91.11%
学習速度はおよそ8倍に、推論速度はおよそ2倍、高速化できました。(it/sの部分をみました)
ただ、この1週間でいろいろな記事がアップロードされていました。
こちらのほうが圧倒的に洗練されていますね。
(私はclaudeに書いてもらっただけ…)
せっかくここまで書いていたのでアップロードしました。