2入力の三次元プロットで可視化する活性化関数のシミュレーション

Posted at 2025-04-03

はじめに

本記事では、ニューラルネットワークに用いられる様々な活性化関数をPythonで可視化し、それぞれの性質を比較します。さらに、シンプルな2入力ニューラルネットワークを用いて、勾配降下法による学習の様子と最終的な予測性能を3Dプロットで確認していきます。

Pythonコード

コード前半：各種活性化関数を3Dプロット

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# --- 活性化関数の定義 / Define activation functions ---
def linear(x): return x
def sigmoid(x): return 1 / (1 + np.exp(-x))
def relu(x): return np.maximum(0, x)
def tanh(x): return np.tanh(x)
def leaky_relu(x, alpha=0.01): return np.where(x > 0, x, alpha * x)
def softmax(x): 
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)
def elu(x, alpha=1.0): return np.where(x > 0, x, alpha * (np.exp(x) - 1))
def swish(x): return x / (1 + np.exp(-x))
def hard_sigmoid(x): return np.clip((x + 1) / 2, 0, 1)
def gelu(x): return 0.5 * x * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * x**3)))
def softplus(x): return np.log(1 + np.exp(x))
def leaky_gelu(x): return gelu(x)
def hard_swish(x): return x * np.clip((x + 3) / 6, 0, 1)
def mish(x): return x * np.tanh(np.log(1 + np.exp(x)))
def arctan(x): return np.arctan(x)
def sigmoid_linear(x): return np.clip(x, 0, 1)

# --- 入力と出力の計算 / Generate input mesh and output ---
x1, x2 = np.linspace(0, 10, 100), np.linspace(0, 10, 100)
X1, X2 = np.meshgrid(x1, x2)
Z = 2 * X1 + 3 * X2 + 1

# --- 各活性化関数の出力を計算 / Apply all activation functions ---
activations = [
    ("Linear", linear(Z)),
    ("Sigmoid", sigmoid(Z)),
    ("ReLU", relu(Z)),
    ("Tanh", tanh(Z)),
    ("Leaky ReLU", leaky_relu(Z)),
    ("Softmax", np.apply_along_axis(softmax, 0, Z)),
    ("ELU", elu(Z)),
    ("Swish", swish(Z)),
    ("Hard Sigmoid", hard_sigmoid(Z)),
    ("GELU", gelu(Z)),
    ("Softplus", softplus(Z)),
    ("Leaky GELU", leaky_gelu(Z)),
    ("Hard Swish", hard_swish(Z)),
    ("Mish", mish(Z)),
    ("Arctan", arctan(Z)),
    ("Sigmoid-Linear", sigmoid_linear(Z)),
]

# --- 3Dプロット / Plot in 3D ---
fig = plt.figure(figsize=(18, 18))
for i, (name, act) in enumerate(activations, 1):
    ax = fig.add_subplot(4, 4, i, projection='3d')
    ax.plot_surface(X1, X2, act, cmap='viridis')
    ax.set_title(f'{name} Activation')
    ax.set_xlabel('Input1')
    ax.set_ylabel('Input2')
    ax.set_zlabel('Output')
plt.tight_layout()
plt.show()

### コード後半：勾配降下法で学習する簡単な2入力モデル（シグモイド）

```python
# --- データとパラメータ初期化 / XOR-like dataset and initialization ---
X = np.array([[0,0], [0,1], [1,0], [1,1]])
Y_true = np.array([[0], [1], [1], [0]])
w = np.random.randn(2, 1)
b = np.random.randn()
learning_rate = 0.1

# --- シグモイドとその導関数 / Sigmoid and its derivative ---
def sigmoid(x): return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x): return sigmoid(x) * (1 - sigmoid(x))

# --- 学習ループ / Training loop ---
losses = []
for epoch in range(10000):
    Z = np.dot(X, w) + b
    Y_pred = sigmoid(Z)
    loss = np.mean((Y_true - Y_pred) ** 2)
    losses.append(loss)

    d_loss = 2 * (Y_pred - Y_true) / len(X)
    dZ = d_loss * sigmoid_derivative(Z)
    dw = np.dot(X.T, dZ)
    db = np.sum(dZ)

    w -= learning_rate * dw
    b -= learning_rate * db

    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# --- 学習後の出力を3Dプロット / Plot learned activation surface ---
X1, X2 = np.meshgrid(np.linspace(-1, 1, 100), np.linspace(-1, 1, 100))
Z_new = w[0] * X1 + w[1] * X2 + b
Y_new = sigmoid(Z_new)

fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X1, X2, Y_new, cmap='plasma')
ax.set_title("Learned Activation Output Surface")
ax.set_xlabel("Input 1")
ax.set_ylabel("Input 2")
ax.set_zlabel("Output")
plt.tight_layout()
plt.show()

# --- 最終出力の確認 / Final prediction ---
print("\n== Prediction after learning ==")
for i, x in enumerate(X):
    pred = sigmoid(np.dot(x, w) + b)
    print(f"Input: {x}, Predicted: {pred[0]:.4f}, Target: {Y_true[i][0]}")

おわりに

本記事では、代表的な活性化関数の可視化と、シンプルなニューラルネットワークの学習を通じて、活性化関数の特徴とその効果を視覚的に確認しました。今後は多層ネットワークや他の学習アルゴリズムへの拡張も検討

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up