Python
機械学習
DeepLearning
Chainer
深層学習

深層学習でアニメ顔を分類する with Chainer

More than 1 year has passed since last update.

Kerasによる続編はこちら

はじめに

初めての投稿です.暑い日が続いておりますが,アニメ顔画像データを深層学習を使って分類しようと思います.あんまりアニメ詳しくないです.初音ミクとかならわかります.一体誰が誰なんだ.

データセット

データセットはanimeface-character-datasetから入手することができます.
参考:DenoisingAutoEncoderでアニメ顔の特徴を抽出してみた

コード

今回はChainerをつかって畳み込みニューラルネットワークを実装します.まずモデルを定義します.
Convolution → Max Pooling → Convolution → Max Pooling → Full-Connected → Softmaxみたいな感じです.
コードが汚いのはご容赦ください.

参考:https://github.com/mitmul/chainer-cifar10/blob/master/models/Cifar10.py

CNN.py
import time
import six.moves.cPickle as pickle
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from chainer import cuda, Variable, FunctionSet, optimizers
import chainer.functions as F

class ImageNet(FunctionSet):
    def __init__(self, n_outputs):
        super(ImageNet, self).__init__(
            conv1=  F.Convolution2D(3, 32, 5),
            conv2=  F.Convolution2D(32, 32, 5),
            l3=     F.Linear(512, 512),
            l4=     F.Linear(512, n_outputs)
        )

    def forward(self, x_data, y_data, train=True, gpu=-1):

        if gpu >= 0:
            x_data = cuda.to_gpu(x_data)
            y_data = cuda.to_gpu(y_data)

        x, t = Variable(x_data), Variable(y_data)
        h = F.max_pooling_2d(F.relu(self.conv1(x)), ksize=2, stride=2)
        h = F.max_pooling_2d(F.relu(self.conv2(h)), ksize=3, stride=3)
        h = F.dropout(F.relu(self.l3(h)), train=train)
        y = self.l4(h)
        return F.softmax_cross_entropy(y, t), F.accuracy(y,t)

とりあえず定義したモデルを使って学習,評価できるようにします.

CNN.py
class CNN:
    def __init__(self, data, target, n_outputs, gpu=-1):

        self.model = ImageNet(n_outputs)
        self.model_name = 'cnn_model'

        if gpu >= 0:
            self.model.to_gpu()

        self.gpu = gpu

        self.x_train,\
        self.x_test,\
        self.y_train,\
        self.y_test = train_test_split(data, target, test_size=0.1)

        self.n_train = len(self.y_train)
        self.n_test = len(self.y_test)

        self.optimizer = optimizers.Adam()
        self.optimizer.setup(self.model.collect_parameters())

    def predict(self, x_data, gpu=-1):
        return self.model.predict(x_data, gpu)


    def train_and_test(self, n_epoch=100, batchsize=100):

        epoch = 1
        best_accuracy = 0
        while epoch <= n_epoch:
            print 'epoch', epoch

            perm = np.random.permutation(self.n_train)
            sum_train_accuracy = 0
            sum_train_loss = 0
            for i in xrange(0, self.n_train, batchsize):
                x_batch = self.x_train[perm[i:i+batchsize]]
                y_batch = self.y_train[perm[i:i+batchsize]]

                real_batchsize = len(x_batch)

                self.optimizer.zero_grads()
                loss, acc = self.model.forward(x_batch, y_batch, train=True, gpu=self.gpu)
                loss.backward()
                self.optimizer.update()

                sum_train_loss += float(cuda.to_cpu(loss.data)) * real_batchsize
                sum_train_accuracy += float(cuda.to_cpu(acc.data)) * real_batchsize

            print 'train mean loss={}, accuracy={}'.format(sum_train_loss/self.n_train, sum_train_accuracy/self.n_train)

            # evaluation
            sum_test_accuracy = 0
            sum_test_loss = 0
            for i in xrange(0, self.n_test, batchsize):
                x_batch = self.x_test[i:i+batchsize]
                y_batch = self.y_test[i:i+batchsize]

                real_batchsize = len(x_batch)

                loss, acc = self.model.forward(x_batch, y_batch, train=False, gpu=self.gpu)

                sum_test_loss += float(cuda.to_cpu(loss.data)) * real_batchsize
                sum_test_accuracy += float(cuda.to_cpu(acc.data)) * real_batchsize

            print 'test mean loss={}, accuracy={}'.format(sum_test_loss/self.n_test, sum_test_accuracy/self.n_test)         

            epoch += 1

    def dump_model(self):
        self.model.to_cpu()
        pickle.dump(self.model, open(self.model_name, 'wb'), -1)

    def load_model(self):
        self.model = pickle.load(open(self.model_name,'rb'))
        if self.gpu >= 0:
            self.model.to_gpu()
        self.optimizer.setup(self.model.collect_parameters())

データセット前処理

処理を軽減するために画像サイズを全て32*32にします.また 画像の入っていないディレクトリは予め削除 しておいてください.

animeface.py
#! -*- coding: utf-8 -*-

import os
import six.moves.cPickle as pickle
import numpy as np
import cv2 as cv

class AnimeFaceDataset:
    def __init__(self):
        self.data_dir_path = u"./animeface-character-dataset/thumb/"
        self.data = None
        self.target = None
        self.n_types_target = -1
        self.dump_name = u'dataset'
        self.image_size = 32

    def get_dir_list(self):
        tmp = os.listdir(self.data_dir_path)
        if tmp is None:
            return None
        return sorted([x for x in tmp if os.path.isdir(self.data_dir_path+x)])

    def get_class_id(self, fname):
        dir_list = self.get_dir_list()
        dir_name = filter(lambda x: x in fname, dir_list)
        return dir_list.index(dir_name[0])

    def load_data_target(self):
        if os.path.exists(self.dump_name):
            self.load_dataset()
        if self.target is None:
            dir_list = self.get_dir_list()
            ret = {}
            self.target = []
            target_name = []
            self.data = []
            for dir_name in dir_list:
                file_list = os.listdir(self.data_dir_path+dir_name)
                for file_name in file_list:
                    root, ext = os.path.splitext(file_name)
                    if ext == u'.png':
                        abs_name = self.data_dir_path+dir_name+'/'+file_name
                        # read class id i.e., target
                        class_id = self.get_class_id(abs_name)
                        self.target.append(class_id)
                        target_name.append(str(dir_name))
                        # read image i.e., data
                        image = cv.imread(abs_name)
                        image = cv.resize(image, (self.image_size, self.image_size))
                        image = image.transpose(2,0,1)
                        image = image/255.
                        self.data.append(image)

            self.index2name = {}
            for i in xrange(len(self.target)):
                self.index2name[self.target[i]] = target_name[i]

        self.data = np.array(self.data, np.float32)
        self.target = np.array(self.target, np.int32)

        self.dump_dataset()

    def get_n_types_target(self):
        if self.target is None:
            self.load_data_target()

        if self.n_types_target is not -1:
            return self.n_types_target

        tmp = {}
        for target in self.target:
            tmp[target] = 0
        return len(tmp)

    def dump_dataset(self):
        pickle.dump((self.data,self.target,self.index2name), open(self.dump_name, 'wb'), -1)

    def load_dataset(self):
        self.data, self.target, self.index2name = pickle.load(open(self.dump_name, 'rb'))

実際にデータを読み込んでみます.

In [1]: from animeface import AnimeFaceDataset

In [2]: dataset = AnimeFaceDataset()

In [3]: dataset.load_data_target()       

In [5]: dataset.get_n_types_target()
Out[5]: 176

In [6]: len(dataset.target)
Out[6]: 14490

なので,データ数14490,クラス数(キャラクター数)176の分類問題となります.176人もいるのかうへー.
実際に以下のコードで学習させてみます.

from CNN import CNN
from animeface import AnimeFaceDataset
from chainer import cuda

#GPUつかうよ
cuda.init(0)

print 'load AnimeFace dataset'
dataset = AnimeFaceDataset()
dataset.read_data_target()
data = dataset.data
target = dataset.target
n_outputs = dataset.get_n_types_target()

cnn = CNN(data=data,
          target=target,
          gpu=0,
          n_outputs=n_outputs)

cnn.train_and_test(n_epoch=100)

実行結果は以下のとおりです.

C:\Python27\lib\site-packages\skcuda\cublas.py:273: UserWarning: creating CUBLAS
 context to get version number
  warnings.warn('creating CUBLAS context to get version number')
load AnimeFace dataset
epoch 1
train mean loss=4.77383880182, accuracy=0.0361935423276
test mean loss=3.88453409868, accuracy=0.116632157313
epoch 2
train mean loss=3.52874370272, accuracy=0.158193386024
test mean loss=3.00467933286, accuracy=0.247066933423
epoch 3
train mean loss=2.95961939461, accuracy=0.254735058687
test mean loss=2.6362867278, accuracy=0.327122144303
epoch 4
train mean loss=2.634737659, accuracy=0.319607384265
test mean loss=2.38959699009, accuracy=0.395445127233

----
中略
----

epoch 96
train mean loss=0.227027994983, accuracy=0.925159092696
test mean loss=2.70711887911, accuracy=0.589371965415
epoch 97
train mean loss=0.216873285405, accuracy=0.927382851637
test mean loss=2.6218228118, accuracy=0.594893018034
epoch 98
train mean loss=0.209225204521, accuracy=0.930220058136
test mean loss=2.68379376295, accuracy=0.5935127585
epoch 99
train mean loss=0.209071503231, accuracy=0.928072985573
test mean loss=2.62009712151, accuracy=0.593512752658
epoch 100
train mean loss=0.210750763214, accuracy=0.92999001446
test mean loss=2.75891605618, accuracy=0.589371977427
Early Stopping was not executed.

おわりに

結果は正解率60%ほどとなりました.多分CNNのアーキテクチャを変えれば行ける気がするのと,画像を32*32にしたのは小さすぎたかな...と
コード,変数名とか関数名とか綺麗にしたものを今度GitHubにあげようかと思います.
あと変なところとかありましたらご指摘いただけるとありがたいです.