こんにちは。
support vector machine (SVM) の計算は、 人工知能に関する断創録「ソフトマージンSVM」の手順(cvxopt を利用)にそっくり従うと、少し自分でも解を計算した気分になれます。下記1では、Lagrange乗数alphaの収束解、tabplot 等もプロットしています(class × prediction == 1
となるデータを強調表示。prediction == 0 が境界線)。
$ ./svm.py
file = classification.txt (len=200)
cvxopt result status: optimal
delta = 5.684342e-14
class = ('-1', '+1')
confusion matrix:
[[96 7]
[34 63]]
svm.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# support vector machine (SVM) の計算
# cvxopt.solvers.qp (Quadratic Programming) を利用
from __future__ import print_function
import numpy as np
import cvxopt
Ccoeff = [30, 30] # soft margin coefficients of class
SIGMA = 1.054 # for non-linear kernel
kname = 'gaussian'
# ガウシアン RBF カーネル
def gaussian_kernel(x, y):
gamma = 1 / (2 * (SIGMA ** 2))
return np.exp(-norm2(x-y) * gamma)
def kernel(x, y):
return globals()[kname + '_kernel'](x, y)
def norm2(x):
return np.dot(x, x)
# Lagrange乗数alphaを二次計画法(Quadratic Programming)で求める
def QP(dat, clas, Ccoeff):
coeff = np.array([Ccoeff[x>0] for x in clas])
N, Z = len(dat), zip(clas, dat)
Q = cvxopt.matrix(np.array([[c*c_*kernel(d,d_) for c, d in Z] for c_, d_ in Z]))
p = cvxopt.matrix(-np.ones(N))
G = cvxopt.matrix(np.vstack((np.diag([-1.0]*N), np.identity(N))))
h = cvxopt.matrix(np.hstack((np.zeros(N), coeff)))
A = cvxopt.matrix(clas, (1,N))
b = cvxopt.matrix(0.0)
cvxopt.solvers.options['abstol'] = 1e-5
cvxopt.solvers.options['reltol'] = 1e-10
cvxopt.solvers.options['show_progress'] = False
sol = cvxopt.solvers.qp(Q, p, G, h, A, b)
alpha = np.array(sol['x']).reshape(N)
print('cvxopt result status: %s' % sol['status'])
print('delta = %e' % np.dot(alpha, clas))
return alpha
# support vectors のインデックスを求める
def SV(dat, clas, Ccoeff, alpha):
supportVectors = []
edgeVectors = []
c = [Ccoeff[x>0] for x in clas]
infinitesimal = 1e-3 * min(Ccoeff)
for i in range(len(alpha)):
if alpha[i] > infinitesimal:
supportVectors.append(i)
if alpha[i] < c[i] - infinitesimal:
edgeVectors.append(i)
bias = average([clas[i] - prediction(dat[i], alpha, clas, dat, supportVectors) for i in edgeVectors])
return supportVectors, edgeVectors, bias
def average(x):
return sum(x)/len(x)
# 予測値
def prediction(x, alpha, clas, dat, supportVectors, bias=0):
p = [alpha[i] * clas[i] * kernel(x, dat[i]) for i in supportVectors]
return sum(p) + bias
# 判別表 confusion matrix
def confusion_matrix(clas, predict):
mat = np.zeros([2, 2], dtype=int)
for c, p in zip(clas, predict):
mat[c>0, p>0] += 1
return mat
# 教師付きデータ classification.txt (N = 200)
# http://research.microsoft.com/en-us/um/people/cmbishop/prml/webdatasets/classification.txt
def make_data():
filename = 'classification.txt'
dat_clas = np.genfromtxt(filename)
dat, clas = dat_clas[:,0:2], dat_clas[:,2]
clas = clas * 2 - 1.0 # 教師信号を-1 or 1に変換
classname = ('-1', '+1')
print('file = %s (len=%d)' % (filename, len(dat)))
return dat, clas, classname
def main():
dat, clas, classname = make_data()
# 二次計画法で SVM の解を求める
alpha = QP(dat, clas, Ccoeff)
supportVectors, edgeVectors, bias = SV(dat, clas, Ccoeff, alpha)
# 予測値、判別表
predict = [prediction(x, alpha, clas, dat, supportVectors, bias) for x in dat]
print('class =', classname, '\n', confusion_matrix(clas, predict))
if __name__ == '__main__':
main()
-
なおこれはPRML 第7章と同様計算なのですが、この本でのプロット結果図は解の収束、もしくは等高線計算が少し甘いような気がします。 ↩