AIの要素技術について記述します。RNN は、ニューラルネットワークに「時間的な依存関係」を扱う仕組みを加えたモデルです。
サンプルプログラム
rnn_numpy.py
学習
text = (
"hello rnn! this is a tiny demo.\n"
"rnn learns to predict the next character.\n"
"numPy only, vanilla tanh rnn.\n"
)
学習用テキスト全体を整数インデックスに変換
設定
隠れ状態ベクトル 64次元
lr 0.2(勾配に対して 0.2倍 のステップで重みを更新する)(学習率 learning rate)
乱数シード 42 (random seed)
勾配クリッピング 5.0(勾配の値を一定の範囲に収める)
学習の繰り返し回数 50 エポック
時間展開の長さ 40
出力
1.先頭文字を ‘h’ に決める
2.RNN で 200文字分の文章を生成する
(softmax で求めた確率分布 p に従ってランダムに次の文字を選ぶ)
学習用テキストが短いせいか、n や改行が多く出る
060_rnn_numpy.py
import numpy as np
# ===== ユーティリティ =====
def one_hot(idx, vocab_size):
v = np.zeros((vocab_size,), dtype=float)
v[idx] = 1.0
return v
def softmax(z):
z = z - z.max(axis=1, keepdims=True)
e = np.exp(z)
return e / e.sum(axis=1, keepdims=True)
# ===== RNN モデル(Vanilla)=====
class RNN:
"""
単純RNN: h_t = tanh(Wxh x_t + Whh h_{t-1} + bh)
y_t = softmax(Why h_t + by)
- 文字レベル言語モデル(次文字予測)
- Truncated BPTT、勾配クリップ、SGD
"""
def __init__(self, input_dim, hidden_dim=64, lr=0.1, seed=0, clip=5.0):
rng = np.random.default_rng(seed)
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.lr = lr
self.clip = clip
# Xavier近似(tanh用)
self.Wxh = rng.normal(0, np.sqrt(1.0/input_dim), (hidden_dim, input_dim))
self.Whh = rng.normal(0, np.sqrt(1.0/hidden_dim), (hidden_dim, hidden_dim))
self.bh = np.zeros((hidden_dim, 1))
self.Why = rng.normal(0, np.sqrt(1.0/hidden_dim), (input_dim, hidden_dim))
self.by = np.zeros((input_dim, 1))
# キャッシュ
self.last_h = np.zeros((hidden_dim, 1))
def forward(self, inputs, h_prev=None):
"""
inputs: [T] の整数ID列
戻り: (loss, grads, caches)
"""
if h_prev is None:
h_prev = np.copy(self.last_h)
xs, hs, ys, ps = {}, {}, {}, {}
hs[-1] = h_prev
loss = 0.0
T = len(inputs) - 1 # 次文字予測: t の入力から t+1 を予測
for t in range(T):
x_t = one_hot(inputs[t], self.input_dim).reshape(-1, 1)
xs[t] = x_t
hs[t] = np.tanh(self.Wxh @ x_t + self.Whh @ hs[t-1] + self.bh)
ys[t] = self.Why @ hs[t] + self.by
ps[t] = softmax(ys[t].T).T # shape (V,1)
# 目的は inputs[t+1]
target_idx = inputs[t+1]
loss -= np.log(ps[t][target_idx, 0] + 1e-12)
cache = (xs, hs, ps, inputs, T)
return loss / T, cache
def backward(self, cache):
xs, hs, ps, inputs, T = cache
dWxh = np.zeros_like(self.Wxh)
dWhh = np.zeros_like(self.Whh)
dWhy = np.zeros_like(self.Why)
dbh = np.zeros_like(self.bh)
dby = np.zeros_like(self.by)
dh_next = np.zeros_like(hs[0])
for t in reversed(range(T)):
# dy = p - y_onehot
dy = np.copy(ps[t])
dy[inputs[t+1]] -= 1.0 # (V,1)
dWhy += dy @ hs[t].T
dby += dy
dh = self.Why.T @ dy + dh_next # (H,1)
dh_raw = (1 - hs[t] * hs[t]) * dh # tanh'
dbh += dh_raw
dWxh += dh_raw @ xs[t].T
dWhh += dh_raw @ hs[t-1].T
dh_next = self.Whh.T @ dh_raw
# 勾配クリップ
for g in [dWxh, dWhh, dWhy, dbh, dby]:
np.clip(g, -self.clip, self.clip, out=g)
grads = (dWxh, dWhh, dWhy, dbh, dby)
return grads
def step(self, grads, batch_size=1):
dWxh, dWhh, dWhy, dbh, dby = grads
# ミニバッチ平均(今回は逐次でもOK)
self.Wxh -= self.lr * (dWxh / batch_size)
self.Whh -= self.lr * (dWhh / batch_size)
self.Why -= self.lr * (dWhy / batch_size)
self.bh -= self.lr * (dbh / batch_size)
self.by -= self.lr * (dby / batch_size)
def fit(self, seq, epochs=50, seq_len=32, verbose=True):
"""
seq: 整数IDの配列(テキスト全体)
seq_len: Truncated BPTT の展開長
"""
p = 0
N = len(seq)
for ep in range(1, epochs+1):
# 1エポックでシーケンスを一周
total_loss, steps = 0.0, 0
self.last_h = np.zeros_like(self.last_h)
while p + seq_len + 1 < N:
chunk = seq[p : p + seq_len + 1]
loss, cache = self.forward(chunk, self.last_h)
grads = self.backward(cache)
self.step(grads, batch_size=1)
# 次のステップの初期隠れ状態をキャッシュから復元
self.last_h = cache[1][seq_len - 1]
total_loss += loss
steps += 1
p += seq_len
p = 0 # 先頭に戻す
avg_loss = total_loss / max(1, steps)
if verbose and (ep % max(1, epochs // 10) == 0 or ep == 1):
print(f"epoch {ep:4d} loss={avg_loss:.3f}")
def sample(self, start_idx, length=100):
""" 先頭文字IDから length 文字サンプリング """
x = one_hot(start_idx, self.input_dim).reshape(-1, 1)
h = np.copy(self.last_h)
ids = [start_idx]
for _ in range(length):
h = np.tanh(self.Wxh @ x + self.Whh @ h + self.bh)
y = self.Why @ h + self.by
p = softmax(y.T).ravel()
idx = int(np.random.choice(len(p), p=p))
#idx = int(p.argmax())
ids.append(idx)
x = one_hot(idx, self.input_dim).reshape(-1, 1)
return ids
# ===== デモ:小さなテキストで次文字予測 =====
if __name__ == "__main__":
# 小さなコーパス(任意に差し替えてOK)
text = (
"hello rnn! this is a tiny demo.\n"
"rnn learns to predict the next character.\n"
"numPy only, vanilla tanh rnn.\n"
)
# 語彙
chars = sorted(list(set(text)))
stoi = {ch: i for i, ch in enumerate(chars)}
itos = {i: ch for ch, i in stoi.items()}
data = np.array([stoi[ch] for ch in text], dtype=int)
rnn = RNN(input_dim=len(chars), hidden_dim=64, lr=0.2, seed=42, clip=5.0)
rnn.fit(data, epochs=50, seq_len=40, verbose=True)
# サンプリング(先頭文字を 'h' に固定)
start_idx = stoi['h'] if 'h' in stoi else 0
out_ids = rnn.sample(start_idx, length=200)
generated = ''.join(itos[i] for i in out_ids)
print("\n--- generated ---")
#print(generated)
print(repr(generated))
結果
epoch 1 loss=3.821
epoch 5 loss=17.263
epoch 10 loss=21.838
epoch 15 loss=20.386
epoch 20 loss=21.132
epoch 25 loss=20.376
epoch 30 loss=20.974
epoch 35 loss=20.507
epoch 40 loss=18.881
epoch 45 loss=21.927
epoch 50 loss=21.831
--- generated ---
'h\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nntn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn\nn'