1
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

AI要素② 多層パーセプトロン

Posted at

AIの要素技術について記述します。多層パーセプトロン(Multi-Layer Perceptron)は、訓練データを用いて学習し、学習後は新たな入力データに対して出力を予測します。入力層と出力層の間に隠れ層があり、これらの「重み(パラメータ)」を学習します。
 

サンプルプログラム

mlp_numpy.py

① XOR 線形分離できないが、MLPなら解ける

訓練データ(2入力、1出力のデータセットを4個)

[[0, 0], [0, 1], [1, 0], [1, 1]]

[0, 1 ,1, 0]

活性化関数:[隠れ層]ReLU 関数、[出力層]シグモイド関数

Xavier 初期化 / He 初期化

設定
隠れ層 8, 8(隠れ層が2層あり、それぞれ 8 ユニットずつ)
出力層で分類したいクラスの数 2
  (出力ノード1個 + Sigmoid 、または、出力ノード2個 + Softmax )
lr 0.1(勾配に対して 0.1倍 のステップで重みを更新する)(学習率 learning rate)
エポック 2000(学習を繰り返す回数)
バッチサイズ 4(データ全体を使って1回更新)(フルバッチ学習)
乱数シード 42 (random seed)
活性化関数 ReLU

予測値(入力は上記と同じ4個)
[0, 1, 1, 0]

正解率
1.0(「予測 == 正解」を1、「予測 != 正解」を0、として、その平均)(4つとも正解)

 

020_mlp_numpy.py
import numpy as np
from dataclasses import dataclass

# ------- ユーティリティ -------
def one_hot(y, num_classes):
    y = y.astype(int).ravel()
    oh = np.zeros((y.size, num_classes), dtype=float)
    oh[np.arange(y.size), y] = 1.0
    return oh

def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def dsigmoid(a):  # a = sigmoid(z)
    return a * (1.0 - a)

def relu(z):
    return np.maximum(0.0, z)

def drelu(z):
    return (z > 0).astype(float)

def softmax(z):
    z = z - z.max(axis=1, keepdims=True)  # 数値安定化
    e = np.exp(z)
    return e / e.sum(axis=1, keepdims=True)

# ------- モデル定義 -------
@dataclass
class MLPConfig:
    input_dim: int
    hidden_dims: tuple = (16, 16)   # 隠れ層ユニット
    num_classes: int = 2            # 2 ならバイナリ分類(Sigmoid)
    lr: float = 0.05
    epochs: int = 2000
    batch_size: int = 32
    l2: float = 0.0                 # L2 正則化係数
    seed: int | None = 0
    activation: str = "relu"        # "relu" or "tanh"

class MLP:
    def __init__(self, cfg: MLPConfig):
        self.cfg = cfg
        self.rng = np.random.default_rng(cfg.seed)
        self.params = self._init_params()

    # パラメータ初期化(Xavier/He)
    def _init_params(self):
        sizes = [self.cfg.input_dim, *self.cfg.hidden_dims,
                 (1 if self.cfg.num_classes == 2 else self.cfg.num_classes)]
        params = {}
        for i in range(len(sizes) - 1):
            fan_in, fan_out = sizes[i], sizes[i + 1]
            if i < len(sizes) - 2:  # 隠れ層
                if self.cfg.activation == "relu":
                    scale = np.sqrt(2.0 / fan_in)  # He init
                else:  # tanh
                    scale = np.sqrt(1.0 / fan_in)  # Xavier
            else:  # 出力層
                scale = np.sqrt(1.0 / fan_in)     # Xavier
            params[f"W{i+1}"] = self.rng.normal(0.0, scale, size=(fan_in, fan_out))
            params[f"b{i+1}"] = np.zeros((1, fan_out))
        return params

    # 順伝播
    def _forward(self, X):
        caches = {}
        A = X
        L = len(self.params) // 2
        for i in range(1, L):  # 隠れ層
            Z = A @ self.params[f"W{i}"] + self.params[f"b{i}"]
            if self.cfg.activation == "relu":
                A = relu(Z)
            else:
                A = np.tanh(Z)
            caches[f"Z{i}"], caches[f"A{i}"] = Z, A

        # 出力層
        ZL = A @ self.params[f"W{L}"] + self.params[f"b{L}"]
        if self.cfg.num_classes == 2:
            AL = sigmoid(ZL)  # shape: (N,1)
        else:
            AL = softmax(ZL)  # shape: (N,C)
        caches[f"Z{L}"], caches[f"A{L}"] = ZL, AL
        caches["A0"] = X
        return AL, caches

    # 損失(交差エントロピー + L2)
    def _loss(self, AL, y):
        N = y.shape[0]
        if self.cfg.num_classes == 2:
            # y: (N,) or (N,1) in {0,1}; AL: (N,1)
            y = y.reshape(-1, 1)
            eps = 1e-12
            loss = -(y * np.log(AL + eps) + (1 - y) * np.log(1 - AL + eps)).mean()
        else:
            # y: (N,) in {0..C-1}; AL: (N,C)
            Y = one_hot(y, self.cfg.num_classes)
            eps = 1e-12
            loss = -(Y * np.log(AL + eps)).sum(axis=1).mean()

        if self.cfg.l2 > 0:
            L = len(self.params) // 2
            l2sum = sum((self.params[f"W{i}"] ** 2).sum() for i in range(1, L + 1))
            loss += 0.5 * self.cfg.l2 * l2sum / N
        return loss

    # 逆伝播
    def _backward(self, caches, y):
        grads = {}
        L = len(self.params) // 2
        A_prev = caches[f"A{L-1}"] if L > 1 else caches["A0"]
        AL = caches[f"A{L}"]
        N = A_prev.shape[0]

        # 出力層のデルタ
        if self.cfg.num_classes == 2:
            y = y.reshape(-1, 1)
            dZL = (AL - y)  # BCE with sigmoid
        else:
            Y = one_hot(y, self.cfg.num_classes)
            dZL = (AL - Y)  # CE with softmax

        grads[f"dW{L}"] = (A_prev.T @ dZL) / N + self.cfg.l2 * self.params[f"W{L}"] / N
        grads[f"db{L}"] = dZL.mean(axis=0, keepdims=True)

        dA_prev = dZL @ self.params[f"W{L}"].T

        # 隠れ層を逆向きに
        for i in range(L - 1, 0, -1):
            Z = caches[f"Z{i}"]
            A_prev = caches[f"A{i-1}"] if i > 1 else caches["A0"]
            if self.cfg.activation == "relu":
                dZ = dA_prev * drelu(Z)
            else:
                dZ = dA_prev * (1 - np.tanh(Z) ** 2)  # dtanh
            grads[f"dW{i}"] = (A_prev.T @ dZ) / N + self.cfg.l2 * self.params[f"W{i}"] / N
            grads[f"db{i}"] = dZ.mean(axis=0, keepdims=True)
            dA_prev = dZ @ self.params[f"W{i}"].T

        return grads

    # パラメータ更新(SGD)
    def _step(self, grads):
        for k in self.params.keys():
            if k.startswith("W"):
                i = k[1:]
                self.params[k] -= self.cfg.lr * grads[f"dW{i}"]
                self.params[f"b{i}"] -= self.cfg.lr * grads[f"db{i}"]

    # 学習
    def fit(self, X, y, X_val=None, y_val=None, verbose=True):
        X = np.asarray(X, dtype=float)
        y = np.asarray(y)
        N = X.shape[0]
        bs = min(self.cfg.batch_size, N)
        history = {"loss": [], "val_loss": []}

        for ep in range(1, self.cfg.epochs + 1):
            # シャッフル
            idx = np.random.permutation(N)
            Xs, ys = X[idx], y[idx]

            # ミニバッチ
            for st in range(0, N, bs):
                ed = st + bs
                XB, yB = Xs[st:ed], ys[st:ed]
                AL, caches = self._forward(XB)
                grads = self._backward(caches, yB)
                self._step(grads)

            # ログ
            AL_train, _ = self._forward(X)
            train_loss = self._loss(AL_train, y)
            history["loss"].append(train_loss)

            if X_val is not None and y_val is not None:
                AL_val, _ = self._forward(X_val)
                val_loss = self._loss(AL_val, y_val)
                history["val_loss"].append(val_loss)

            if verbose and (ep % max(1, self.cfg.epochs // 10) == 0 or ep == 1):
                if history["val_loss"]:
                    print(f"epoch {ep:4d}  loss={train_loss:.4f}  val_loss={val_loss:.4f}")
                else:
                    print(f"epoch {ep:4d}  loss={train_loss:.4f}")

        return history

    # 予測
    def predict(self, X):
        X = np.asarray(X, dtype=float)
        AL, _ = self._forward(X)
        if self.cfg.num_classes == 2:
            return (AL.ravel() >= 0.5).astype(int)
        else:
            return AL.argmax(axis=1)

    def predict_proba(self, X):
        X = np.asarray(X, dtype=float)
        AL, _ = self._forward(X)
        return AL

    def score(self, X, y):
        y_pred = self.predict(X)
        return (y_pred == y).mean()

# ------- デモ: XOR(二値)と3クラス玩具データ -------
if __name__ == "__main__":
    # --- XOR(線形分離できない -> MLPなら解ける)---
    X_xor = np.array([[0,0],[0,1],[1,0],[1,1]], dtype=float)
    y_xor = np.array([0,1,1,0], dtype=int)

    cfg_bin = MLPConfig(input_dim=2, hidden_dims=(8, 8), num_classes=2,
                        lr=0.1, epochs=2000, batch_size=4, seed=42, activation="relu")
    mlp_bin = MLP(cfg_bin)
    mlp_bin.fit(X_xor, y_xor, verbose=False)
    print("XOR pred:", mlp_bin.predict(X_xor), "true:", y_xor)
    print("XOR acc :", mlp_bin.score(X_xor, y_xor))

    # --- 多クラス玩具データ(同心円風に3クラス作成)---
    rng = np.random.default_rng(0)
    N = 300
    angles = rng.uniform(0, 2*np.pi, N)
    radii = rng.choice([0.6, 1.2, 1.8], size=N, replace=True)
    Xc = np.c_[radii*np.cos(angles), radii*np.sin(angles)]
    yc = ((radii > 0.9).astype(int) + (radii > 1.5).astype(int))  # 0,1,2

    cfg_mc = MLPConfig(input_dim=2, hidden_dims=(32, 32), num_classes=3,
                       lr=0.05, epochs=1000, batch_size=32, seed=1, activation="relu")
    mlp_mc = MLP(cfg_mc)
    mlp_mc.fit(Xc, yc, verbose=False)
    print("3-class acc:", mlp_mc.score(Xc, yc))

結果
XOR pred: [0 1 1 0] true: [0 1 1 0]
XOR acc : 1.0
3-class acc: 1.0

1
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?