サポートベクターマシーンを可視化する

Posted at 2024-10-30

サポートベクターマシーンは「マージンを最大化する直線を引く」と説明されます。
しかしソフトマージンの説明以降, 個人的に混乱したので
可視化して説明できるようにしました。

データセットの用意

ライブラリの import

import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.svm import SVC

データセットの用意

今回はランダムなデータに直線を引いた data1 を用意しました。

data1 はランダムな$N(0, 1)$のノイズデータを生成し, ボーダー(border: $x_2 = f(x_1)$)の直線で2クラスに分けます。
マージンは2クラスを分ける直線からmergineの幅にあるデータを消します。
これは境界線付近のデータの有無でサポートベクターマシーンの結果がどう変わるか, 調査するためです。
ノイズを加えてボーダーを越境するデータを生成します(noise が倍率)。

n = 200
np.random.seed(0)

x2 = lambda x1: 0.3*x1 + 0.2
def data1(border=x2, merging=0.3, noise=0):
    X = np.random.normal(size=(n, 2))
    y = np.where(border(X[:, 0]) > X[:, 1], np.zeros(X.shape[0]), np.ones(X.shape[0]))

    # merging
    a = (border(1)-border(0))
    merging_line_0 = lambda x: border(x) - merging*np.sqrt(a**2 + 1)
    merging_line_1 = lambda x: border(x) + merging*np.sqrt(a**2 + 1)
    del_loc = (np.logical_and(merging_line_0(X[:, 0]) < X[:, 1], X[:, 1] < merging_line_1(X[:, 0])))
    X = np.delete(X, del_loc, axis=0)
    y = np.delete(y, del_loc)

    # noise
    X += np.random.normal(scale=noise, size=X.shape)

    return X, y

X, y = data1()

border = lambda x1: x1 + 0.2
merging = 0
noise = 0

$x_2=x_1+0.2$ でクラスが分けられます。
border = lambda x1: x1 + 0.2
merging = 0.3
noise = 0

$x_2=x_1+0.2$ でクラスが分けられ, 直線前後に 0.3 の幅のマージンが取られます。
border = lambda x1: x1 + 0.2
merging = 0.0
noise = 0.2

$x_2=x_1+0.2$ でクラスが分けられますが, そのデータに $N(0, \mathrm{noise})$ のノイズを加えてボーダーの越境を許します。

この3データでSVMの様子を確認します。

マージンを算出する式

直線 $y = ax + b$ と距離 d 離れた直線は, $y = ax + b \pm d\sqrt{a^2 + 1}$ で得られます。
直線は $f(x)$ で与えられているので, $a = f(1) - f(0)$ で算出しています。

画像を表示するコード

fig, ax = plt.subplots(figsize=(5, 5))
ax.scatter(X[y==0, 0], X[y==0, 1])
ax.scatter(X[y==1, 0], X[y==1, 1])
ax.set_xlabel('X1')
ax.set_ylabel('X2')

モデル作成

clf = SVC(C=0.1, kernel='linear')
clf.fit(X, y)

# 試しに適当な点の値を出してみる
X_eval = np.random.rand(1,2)
print(X_eval)
# [[0.20063767 0.92947519]]
print(clf.decision_function(X_eval))
# [0.61825635]
print((clf.coef_ @ X_eval.T)[0] + clf.intercept_)  # 同じ値が出力される
# [0.61825635]

SVC には coef_, intercept_ が属性として用意されている。
この値は decision_function(X) で取得できる予測値の計算に用いられる。

decision_function の描画

coef_, intercept_ で描かれる平面を描画してみる。

2つのクラスを1, -1としたときその点あたりを通る面が描ける。

この decision_function で描かれる面は, 境界線との距離を表す。

描画用コード

test_x = np.linspace(-3, 3, 1201)
test_x1, test_x2 = np.meshgrid(test_x, test_x)
test_y = np.empty_like(test_x1)

for i, (x1, x2) in enumerate(zip(test_x1, test_x2)):
    test_y[i] = (clf.coef_ @ np.array([x1, x2]))[0] + clf.intercept_

fig = plt.figure()
ax = fig.add_subplot(projection='3d')

ax.plot_surface(test_x1, test_x2, test_y, color='orange', alpha=0.2)

items_0 = X[y==0]
items_1 = X[y==1]
ax.scatter(items_0[:, 0], items_0[:, 1], -1, marker='o', c='blue')
ax.scatter(items_1[:, 0], items_1[:, 1], 1, marker='o', c='red')

ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_zlabel('y')
ax.set_zlim(-3, 3)

plt.show()

境界線の描画

SVMの境界線の描画をした.
また, decision_functionを 1 より大きい部分は, -1 より小さい部分をクラスの値にした。

# 各点の値を算出
test_x = np.linspace(-3, 3, 1201)
test_x1, test_x2 = np.meshgrid(test_x, test_x)
test_y = np.empty_like(test_x1)

for i, (x1, x2) in enumerate(zip(test_x1, test_x2)):
    test_y[i] = (clf.coef_ @ np.array([x1, x2]))[0] + clf.intercept_

fig = plt.figure()
ax = fig.add_subplot(projection='3d')

test_y = np.where(test_y > 1, 1, test_y)
test_y = np.where(test_y < -1, -1, test_y)
ax.plot_surface(test_x1, test_x2, test_y, cmap=cm.jet, alpha=0.5)

items_0 = X[y==0]
items_1 = X[y==1]
ax.scatter(items_0[:, 0], items_0[:, 1], -1, marker='o', c='blue')
ax.scatter(items_1[:, 0], items_1[:, 1], 1, marker='o', c='red')

ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_zlabel('y')
ax.set_zlim(-1.1, 1.1)

border_line = (np.isclose(test_y, 0, rtol=0, atol=0.05))
ax.plot(test_x1[border_line], test_x2[border_line], 0, c='g', alpha=0.5)

plt.show()

サポートベクターとマージンの描画

サポートベクターは decision_function との距離が1未満の点が該当する。

# X のうちサポートベクターとなる番号を取得できる
print(clf.support_)

# 下の2つの値は一致する
print(clf.support_vectors_[0:5])
print(X[clf.support_[0:5]])

# 各点の値を算出
test_x = np.linspace(-3, 3, 1201)
test_x1, test_x2 = np.meshgrid(test_x, test_x)
test_y = np.empty_like(test_x1)

for i, (x1, x2) in enumerate(zip(test_x1, test_x2)):
    test_y[i] = (clf.coef_ @ np.array([x1, x2]))[0] + clf.intercept_

fig = plt.figure()
ax = fig.add_subplot(projection='3d')

unsupport_X = np.delete(X, clf.support_, axis=0)
unsupport_y = np.delete(y, clf.support_)
items_0 = unsupport_X[unsupport_y==0]
items_1 = unsupport_X[unsupport_y==1]
ax.scatter(items_0[:, 0], items_0[:, 1], -1, marker='o', c='blue')
ax.scatter(items_1[:, 0], items_1[:, 1], 1, marker='o', c='red')

support_0 = clf.support_vectors_[y[clf.support_]==0]
support_1 = clf.support_vectors_[y[clf.support_]==1]
ax.scatter(support_0[:, 0], support_0[:, 1], -1, marker='*', c='midnightblue')
ax.scatter(support_1[:, 0], support_1[:, 1], 1, marker='*', c='darkred')

test_y = np.where(test_y > 1, 1, test_y)
test_y = np.where(test_y < -1, -1, test_y)
ax.plot_surface(test_x1, test_x2, test_y, cmap=cm.jet, alpha=0.2)

ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_zlabel('y')
ax.set_zlim(-1.1, 1.1)

border_line = (np.isclose(test_y, 0, rtol=0, atol=0.01))
ax.plot(test_x1[border_line], test_x2[border_line], 0, c='g', alpha=0.5)

merging_0 = (np.isclose(test_y, -0.98, rtol=0, atol=0.01))
ax.plot(test_x1[merging_0], test_x2[merging_0], -1, c='b', alpha=0.5)

merging_1 = (np.isclose(test_y, 0.98, rtol=0, atol=0.01))
ax.plot(test_x1[merging_1], test_x2[merging_1], 1, c='r', alpha=0.5)

plt.show()

2次元で描画

領域の色が decision_functionの勾配となる。

# 2次元でプロット
fig2 = plt.figure(figsize=(5,5))
ax2 = fig2.add_subplot()

unsupport_X = np.delete(X, clf.support_, axis=0)
unsupport_y = np.delete(y, clf.support_)
items_0 = unsupport_X[unsupport_y==0]
items_1 = unsupport_X[unsupport_y==1]
ax2.scatter(items_0[:, 0], items_0[:, 1], marker='o', c='blue')
ax2.scatter(items_1[:, 0], items_1[:, 1], marker='o', c='red')

support_0 = clf.support_vectors_[y[clf.support_]==0]
support_1 = clf.support_vectors_[y[clf.support_]==1]
ax2.scatter(support_0[:, 0], support_0[:, 1], marker='*', c='midnightblue')
ax2.scatter(support_1[:, 0], support_1[:, 1], marker='*', c='darkred')

ax2.contourf(test_x1, test_x2, test_y, cmap=cm.jet, alpha=0.2)
ax2.set_xlabel('x1')
ax2.set_ylabel('x2')
ax2.set_xlim(-3, 3)
ax2.set_ylim(-3, 3)

border_line = (np.isclose(test_y, 0, rtol=0, atol=0.01))
ax2.plot(test_x1[border_line], test_x2[border_line], c='g', alpha=0.5)

merging_0 = (np.isclose(test_y, -0.98, rtol=0, atol=0.01))
ax2.plot(test_x1[merging_0], test_x2[merging_0], c='b', alpha=0.5)

merging_1 = (np.isclose(test_y, 0.98, rtol=0, atol=0.01))
ax2.plot(test_x1[merging_1], test_x2[merging_1], c='r', alpha=0.5)

plt.show()

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up