Kerasによる続編はこちら.
はじめに
初めての投稿です.暑い日が続いておりますが,アニメ顔画像データを深層学習を使って分類しようと思います.あんまりアニメ詳しくないです.初音ミクとかならわかります.一体誰が誰なんだ.
データセット
データセットはanimeface-character-datasetから入手することができます.
参考:DenoisingAutoEncoderでアニメ顔の特徴を抽出してみた
コード
今回はChainerをつかって畳み込みニューラルネットワークを実装します.まずモデルを定義します.
Convolution → Max Pooling → Convolution → Max Pooling → Full-Connected → Softmaxみたいな感じです.
コードが汚いのはご容赦ください.
参考:https://github.com/mitmul/chainer-cifar10/blob/master/models/Cifar10.py
import time
import six.moves.cPickle as pickle
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from chainer import cuda, Variable, FunctionSet, optimizers
import chainer.functions as F
class ImageNet(FunctionSet):
def __init__(self, n_outputs):
super(ImageNet, self).__init__(
conv1= F.Convolution2D(3, 32, 5),
conv2= F.Convolution2D(32, 32, 5),
l3= F.Linear(512, 512),
l4= F.Linear(512, n_outputs)
)
def forward(self, x_data, y_data, train=True, gpu=-1):
if gpu >= 0:
x_data = cuda.to_gpu(x_data)
y_data = cuda.to_gpu(y_data)
x, t = Variable(x_data), Variable(y_data)
h = F.max_pooling_2d(F.relu(self.conv1(x)), ksize=2, stride=2)
h = F.max_pooling_2d(F.relu(self.conv2(h)), ksize=3, stride=3)
h = F.dropout(F.relu(self.l3(h)), train=train)
y = self.l4(h)
return F.softmax_cross_entropy(y, t), F.accuracy(y,t)
とりあえず定義したモデルを使って学習,評価できるようにします.
class CNN:
def __init__(self, data, target, n_outputs, gpu=-1):
self.model = ImageNet(n_outputs)
self.model_name = 'cnn_model'
if gpu >= 0:
self.model.to_gpu()
self.gpu = gpu
self.x_train,\
self.x_test,\
self.y_train,\
self.y_test = train_test_split(data, target, test_size=0.1)
self.n_train = len(self.y_train)
self.n_test = len(self.y_test)
self.optimizer = optimizers.Adam()
self.optimizer.setup(self.model.collect_parameters())
def predict(self, x_data, gpu=-1):
return self.model.predict(x_data, gpu)
def train_and_test(self, n_epoch=100, batchsize=100):
epoch = 1
best_accuracy = 0
while epoch <= n_epoch:
print 'epoch', epoch
perm = np.random.permutation(self.n_train)
sum_train_accuracy = 0
sum_train_loss = 0
for i in xrange(0, self.n_train, batchsize):
x_batch = self.x_train[perm[i:i+batchsize]]
y_batch = self.y_train[perm[i:i+batchsize]]
real_batchsize = len(x_batch)
self.optimizer.zero_grads()
loss, acc = self.model.forward(x_batch, y_batch, train=True, gpu=self.gpu)
loss.backward()
self.optimizer.update()
sum_train_loss += float(cuda.to_cpu(loss.data)) * real_batchsize
sum_train_accuracy += float(cuda.to_cpu(acc.data)) * real_batchsize
print 'train mean loss={}, accuracy={}'.format(sum_train_loss/self.n_train, sum_train_accuracy/self.n_train)
# evaluation
sum_test_accuracy = 0
sum_test_loss = 0
for i in xrange(0, self.n_test, batchsize):
x_batch = self.x_test[i:i+batchsize]
y_batch = self.y_test[i:i+batchsize]
real_batchsize = len(x_batch)
loss, acc = self.model.forward(x_batch, y_batch, train=False, gpu=self.gpu)
sum_test_loss += float(cuda.to_cpu(loss.data)) * real_batchsize
sum_test_accuracy += float(cuda.to_cpu(acc.data)) * real_batchsize
print 'test mean loss={}, accuracy={}'.format(sum_test_loss/self.n_test, sum_test_accuracy/self.n_test)
epoch += 1
def dump_model(self):
self.model.to_cpu()
pickle.dump(self.model, open(self.model_name, 'wb'), -1)
def load_model(self):
self.model = pickle.load(open(self.model_name,'rb'))
if self.gpu >= 0:
self.model.to_gpu()
self.optimizer.setup(self.model.collect_parameters())
データセット前処理
処理を軽減するために画像サイズを全て32*32にします.また 画像の入っていないディレクトリは予め削除 しておいてください.
#! -*- coding: utf-8 -*-
import os
import six.moves.cPickle as pickle
import numpy as np
import cv2 as cv
class AnimeFaceDataset:
def __init__(self):
self.data_dir_path = u"./animeface-character-dataset/thumb/"
self.data = None
self.target = None
self.n_types_target = -1
self.dump_name = u'dataset'
self.image_size = 32
def get_dir_list(self):
tmp = os.listdir(self.data_dir_path)
if tmp is None:
return None
return sorted([x for x in tmp if os.path.isdir(self.data_dir_path+x)])
def get_class_id(self, fname):
dir_list = self.get_dir_list()
dir_name = filter(lambda x: x in fname, dir_list)
return dir_list.index(dir_name[0])
def load_data_target(self):
if os.path.exists(self.dump_name):
self.load_dataset()
if self.target is None:
dir_list = self.get_dir_list()
ret = {}
self.target = []
target_name = []
self.data = []
for dir_name in dir_list:
file_list = os.listdir(self.data_dir_path+dir_name)
for file_name in file_list:
root, ext = os.path.splitext(file_name)
if ext == u'.png':
abs_name = self.data_dir_path+dir_name+'/'+file_name
# read class id i.e., target
class_id = self.get_class_id(abs_name)
self.target.append(class_id)
target_name.append(str(dir_name))
# read image i.e., data
image = cv.imread(abs_name)
image = cv.resize(image, (self.image_size, self.image_size))
image = image.transpose(2,0,1)
image = image/255.
self.data.append(image)
self.index2name = {}
for i in xrange(len(self.target)):
self.index2name[self.target[i]] = target_name[i]
self.data = np.array(self.data, np.float32)
self.target = np.array(self.target, np.int32)
self.dump_dataset()
def get_n_types_target(self):
if self.target is None:
self.load_data_target()
if self.n_types_target is not -1:
return self.n_types_target
tmp = {}
for target in self.target:
tmp[target] = 0
return len(tmp)
def dump_dataset(self):
pickle.dump((self.data,self.target,self.index2name), open(self.dump_name, 'wb'), -1)
def load_dataset(self):
self.data, self.target, self.index2name = pickle.load(open(self.dump_name, 'rb'))
実際にデータを読み込んでみます.
In [1]: from animeface import AnimeFaceDataset
In [2]: dataset = AnimeFaceDataset()
In [3]: dataset.load_data_target()
In [5]: dataset.get_n_types_target()
Out[5]: 176
In [6]: len(dataset.target)
Out[6]: 14490
なので,データ数14490,クラス数(キャラクター数)176の分類問題となります.176人もいるのかうへー.
実際に以下のコードで学習させてみます.
from CNN import CNN
from animeface import AnimeFaceDataset
from chainer import cuda
#GPUつかうよ
cuda.init(0)
print 'load AnimeFace dataset'
dataset = AnimeFaceDataset()
dataset.read_data_target()
data = dataset.data
target = dataset.target
n_outputs = dataset.get_n_types_target()
cnn = CNN(data=data,
target=target,
gpu=0,
n_outputs=n_outputs)
cnn.train_and_test(n_epoch=100)
実行結果は以下のとおりです.
C:\Python27\lib\site-packages\skcuda\cublas.py:273: UserWarning: creating CUBLAS
context to get version number
warnings.warn('creating CUBLAS context to get version number')
load AnimeFace dataset
epoch 1
train mean loss=4.77383880182, accuracy=0.0361935423276
test mean loss=3.88453409868, accuracy=0.116632157313
epoch 2
train mean loss=3.52874370272, accuracy=0.158193386024
test mean loss=3.00467933286, accuracy=0.247066933423
epoch 3
train mean loss=2.95961939461, accuracy=0.254735058687
test mean loss=2.6362867278, accuracy=0.327122144303
epoch 4
train mean loss=2.634737659, accuracy=0.319607384265
test mean loss=2.38959699009, accuracy=0.395445127233
----
中略
----
epoch 96
train mean loss=0.227027994983, accuracy=0.925159092696
test mean loss=2.70711887911, accuracy=0.589371965415
epoch 97
train mean loss=0.216873285405, accuracy=0.927382851637
test mean loss=2.6218228118, accuracy=0.594893018034
epoch 98
train mean loss=0.209225204521, accuracy=0.930220058136
test mean loss=2.68379376295, accuracy=0.5935127585
epoch 99
train mean loss=0.209071503231, accuracy=0.928072985573
test mean loss=2.62009712151, accuracy=0.593512752658
epoch 100
train mean loss=0.210750763214, accuracy=0.92999001446
test mean loss=2.75891605618, accuracy=0.589371977427
Early Stopping was not executed.
おわりに
結果は正解率60%ほどとなりました.多分CNNのアーキテクチャを変えれば行ける気がするのと,画像を32*32にしたのは小さすぎたかな...と
コード,変数名とか関数名とか綺麗にしたものを今度GitHubにあげようかと思います.
あと変なところとかありましたらご指摘いただけるとありがたいです.