本記事ではLight CNN(LCNN)という深層学習モデルを実装したのでまとめさせていただきました。
最初にLCNNとその特徴であるMax Feature Mapping (MFM)という技術について説明した後、実装と評価をしていきます。
コードは全てpython、LCNNの実装はTensorflow, Kerasを使って行います。
Github URL : https://github.com/ozora-ogino/LCNN
#Light CNN
LCNNは8層の畳み込み層から構成されており、各層における活性化関数でMax Feature Mappingと呼ばれるものを使っているのが大きな特徴となっています。
#Max Feature Mapping
"A Light CNN for Deep Face Representation with Noisy Labels
" (https://arxiv.org/pdf/1511.02683.pdf)
import tensorflow as tf
from keras.layers import Activation, Dense, BatchNormalization, MaxPool2D, Lambda, Input, Flatten, Dropout
from keras.layers.convolutional import Conv2D
from keras.models import Model
from keras.initializers import he_normal
#Custom layer
from .layers import Maxout
#function that return the stuck of Conv2D and MFM
def MaxOutConv2D(x, dim, kernel_size, strides, padding='same'):
conv_out = Conv2D(dim, kernel_size=kernel_size, strides=strides, padding=padding)(x)
mfm_out = Maxout(int(dim/2))(conv_out)
return mfm_out
#function that return the stuck of FC and MFM
def MaxOutDense(x, dim):
dense_out = Dense(dim)(x)
mfm_out = Maxout(int(dim/2))(dense_out)
return mfm_out
# this function helps to build LCNN.
def build_lcnn(shape, n_label=2):
shape (list) :
Input shape for LCNN. (Example : [128, 128, 1])
n_label (int) :
Number of label that LCNN should predict.
input = Input(shape=shape)
conv2d_1 = MaxOutConv2D(input, 64, kernel_size=5, strides=1, padding='same')
maxpool_1 = MaxPool2D(pool_size=(2, 2), strides=(2,2))(conv2d_1)
conv_2d_2 = MaxOutConv2D(maxpool_1, 64, kernel_size=1, strides=1, padding='same')
batch_norm_2 = BatchNormalization()(conv_2d_2)
conv2d_3 = MaxOutConv2D(batch_norm_2, 96, kernel_size=3, strides=1, padding='same')
maxpool_3 = MaxPool2D(pool_size=(2, 2), strides=(2,2))(conv2d_3)
batch_norm_3 = BatchNormalization()(maxpool_3)
conv_2d_4 = MaxOutConv2D(batch_norm_3, 96, kernel_size=1, strides=1, padding='same')
batch_norm_4 = BatchNormalization()(conv_2d_4)
conv2d_5 = MaxOutConv2D(batch_norm_4, 128, kernel_size=3, strides=1, padding='same')
maxpool_5 = MaxPool2D(pool_size=(2, 2), strides=(2,2))(conv2d_5)
conv_2d_6 = MaxOutConv2D(maxpool_5, 128, kernel_size=1, strides=1, padding='same')
batch_norm_6 = BatchNormalization()(conv_2d_6)
conv_2d_7 = MaxOutConv2D(batch_norm_6, 64, kernel_size=3, strides=1, padding='same')
batch_norm_7 = BatchNormalization()(conv_2d_7)
conv_2d_8 = MaxOutConv2D(batch_norm_7, 64, kernel_size=1, strides=1, padding='same')
batch_norm_8 = BatchNormalization()(conv_2d_8)
conv_2d_9 = MaxOutConv2D(batch_norm_8, 64, kernel_size=3, strides=1, padding='same')
maxpool_9 = MaxPool2D(pool_size=(2, 2), strides=(2,2))(conv_2d_9)
flatten = Flatten()(maxpool_9)
dense_10 = MaxOutDense(flatten, 160)
batch_norm_10 = BatchNormalization()(dense_10)
dropout_10 = Dropout(0.75)(batch_norm_10)
output = Dense(n_label, activation='softmax')(dropout_10)
return Model(inputs=input, outputs=output)
import numpy as np
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical
from keras.datasets import mnist
lr = 0.001
epochs = 10
batch_size =256
[x_train, y_train], [x_test, y_test] = mnist.load_data()
x_train = x_train / 255
x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], x_train.shape[2], 1))
y_train = to_categorical(y_train)
input_shape = x_train.shape[1:]
lcnn = build_lcnn(input_shape, n_label=10)
lcnn.compile(optimizer=Adam(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])
es = EarlyStopping(monitor='val_loss', patience=3, verbose=1)
history = lcnn.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, callbacks=[es])
x_test = x_test / 255
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], x_test.shape[2], 1))
y_test = to_categorical(y_test)
loss, acc = lcnn.evaluate(x_test, y_test)
print(f'Accuracy : {acc*100}') # Result --> Accuracy : 99.90999794006348
print(f'Loss : {loss}')# Result --> Loss : 0.04250425341885457
import numpy as np
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical
from keras.datasets import cifar10
lr = 0.001
epochs = 100
batch_size =64
[x_train, y_train], [x_test, y_test] =cifar10.load_data()
x_train = x_train / 255
y_train = to_categorical(y_train)
input_shape = x_train.shape[1:]
lcnn = build_lcnn(input_shape, n_label=10)
lcnn.compile(optimizer=Adam(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])
es = EarlyStopping(monitor='val_loss', patience=5 , verbose=1)
history = lcnn.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, callbacks=[es])
x_test = x_test / 255
y_test = to_categorical(y_test)
loss, acc = lcnn.evaluate(x_test, y_test)
print(f'Accuracy : {acc*100}') # Result --> Accuracy : 75.1200020313263
print(f'Loss : {loss}')# Result --> Loss : 1.2616282165050507
Github URL : https://github.com/ozora-ogino/LCNN
["A Light CNN for Deep Face Representation with Noisy Labels"] (https://arxiv.org/pdf/1511.02683.pdf)
"STC Antispoofing Systems for the ASVspoof2019 Challenge"
"Audio replay attack detection with deep learning frameworks"