Help us understand the problem. What is going on with this article?

顔画像から人種を判定

More than 1 year has passed since last update.

概要

Keras学習済みモデルのXceptionをUTKFaceデータセットでFine-tuningさせ、人種分類モデルを構築する
白人、黒人、アジア、インド、その他(ヒスパニック、ラテン、中東など)の5つに分類

UTKFace
https://susanqq.github.io/UTKFace/
- 20,000以上の顔画像データセット
- 性別、年齢、人種のラベリング

実行環境
Google Colaboratory(GPU)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%config InlineBackend.figure_formats = {'png', 'retina'}
import os, zipfile, io, re
from PIL import Image
from sklearn.model_selection import train_test_split
from keras.applications.xception import Xception
from keras.models import Model, load_model
from keras.layers.core import Dense
from keras.layers.pooling import GlobalAveragePooling2D
from keras.optimizers import Adam, RMSprop, SGD
from keras.utils.np_utils import to_categorical
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator

データ取得

画像は100にリサイズ
ZIPファイルからデータセットを取得し、配列に変換
クラスはWhite、Black、Asian、Indian、Othersの5クラス(各画像ファイル名にラベリングされている)
取得したデータセットはtrain、valid、testに分割

classes = ["White", "Black", "Asian", "Indian", "Others"]
num_classes = len(classes)
image_size = 100
%%time
# ZIP読み込み
z = zipfile.ZipFile('../dataset/UTKFace.zip')
# 画像ファイルパスのみ取得
imgfiles = [ x for x in z.namelist() if re.search(r"^UTKFace.*jpg$", x)]

X = []
Y = []
for imgfile in imgfiles:
    # ZIPから画像読み込み
    image = Image.open(io.BytesIO(z.read(imgfile)))
    # RGB変換
    image = image.convert('RGB')
    # リサイズ
    image = image.resize((image_size, image_size))
    # 画像から配列に変換
    data = np.asarray(image)
    file = os.path.basename(imgfile)
    file_split = [i for i in file.split('_')]
    X.append(data)
    Y.append(file_split[2])
z.close()
del z, imgfiles

X = np.array(X)
Y = np.array(Y)
print(X.shape, Y.shape)

(23708, 100, 100, 3) (23708,)
CPU times: user 23.9 s, sys: 387 ms, total: 24.3 s
Wall time: 24.3 s

人種がラベリングされていない3つのデータを削除

# df_Y=pd.DataFrame(Y)
# pd.DataFrame(df_Y[0].value_counts()).sort_index().rename(columns={0 :'num'}).T

index = np.where(
    (Y == "20170109142408075.jpg.chip.jpg") | 
    (Y == "20170109150557335.jpg.chip.jpg") | 
    (Y == "20170116174525125.jpg.chip.jpg")
)
X=np.delete(X, index, axis=0)
Y=np.delete(Y, index, axis=0)
print(X.shape, Y.shape)

(23705, 100, 100, 3) (23705,)

# trainデータとtestデータに分割
X_train, X_test, y_train, y_test = train_test_split(
    X, Y,
    random_state = 0,
    stratify = Y,
    test_size = 0.2
)
del X,Y
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(18964, 100, 100, 3) (18964,) (4741, 100, 100, 3) (4741,)

# データ型の変換&正規化
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255
# one-hot変換
y_train = to_categorical(y_train, num_classes = num_classes)
y_test = to_categorical(y_test, num_classes = num_classes)
# trainデータからvalidデータを分割
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train,
    y_train,
    random_state = 0,
    stratify = y_train,
    test_size = 0.2
)
print(X_train.shape, y_train.shape, X_valid.shape, y_valid.shape) 

(15171, 100, 100, 3) (15171, 5) (3793, 100, 100, 3) (3793, 5)

モデル構築

Xception読み込み
Keras学習済みモデルXceptionを読み込む
その際、ネットワーク出力層側にある全結合層を除去

base_model = Xception(
    include_top = False,
    weights = "imagenet",
    input_shape = None
)

全結合層の新規構築

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation = 'relu')(x)
predictions = Dense(num_classes, activation = 'softmax')(x)

Data Augmentation

datagen = ImageDataGenerator(
    featurewise_center = False,
    samplewise_center = False,
    featurewise_std_normalization = False,
    samplewise_std_normalization = False,
    zca_whitening = False,
    rotation_range = 0,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    horizontal_flip = True,
    vertical_flip = False
)

Callback

# EarlyStopping
early_stopping = EarlyStopping(
    monitor = 'val_loss',
    patience = 10,
    verbose = 1
)

# ModelCheckpoint
weights_dir = './weights/'
if os.path.exists(weights_dir) == False:os.mkdir(weights_dir)
model_checkpoint = ModelCheckpoint(
    weights_dir + "val_loss{val_loss:.3f}.hdf5",
    monitor = 'val_loss',
    verbose = 1,
    save_best_only = True,
    save_weights_only = True,
    period = 3
)

# reduce learning rate
reduce_lr = ReduceLROnPlateau(
    monitor = 'val_loss',
    factor = 0.1,
    patience = 3,
    verbose = 1
)

# log for TensorBoard
logging = TensorBoard(log_dir = "log/")

モデル学習

XceptionをFine-tuning

# ネットワーク定義
model = Model(inputs = base_model.input, outputs = predictions)

#108層までfreeze
for layer in model.layers[:108]:
    layer.trainable = False

    # Batch Normalizationのfreeze解除
    if layer.name.startswith('batch_normalization'):
        layer.trainable = True
    if layer.name.endswith('bn'):
        layer.trainable = True

#109層以降、学習させる
for layer in model.layers[108:]:
    layer.trainable = True

# layer.trainableの設定後にcompile
model.compile(
    optimizer = Adam(),
    loss = 'categorical_crossentropy',
    metrics = ["accuracy"]
)
%%time
hist = model.fit_generator(
    datagen.flow(X_train, y_train, batch_size = 32),
    steps_per_epoch = X_train.shape[0] // 32,
    epochs = 50,
    validation_data = (X_valid, y_valid),
    callbacks = [early_stopping, reduce_lr],
    shuffle = True,
    verbose = 1
)
Epoch 1/50
474/474 [==============================] - 121s 256ms/step - loss: 1.3566 - acc: 0.4722 - val_loss: 1.4610 - val_acc: 0.6011
Epoch 2/50
474/474 [==============================] - 112s 237ms/step - loss: 0.8982 - acc: 0.6872 - val_loss: 0.8991 - val_acc: 0.7037
Epoch 3/50
474/474 [==============================] - 112s 236ms/step - loss: 0.7573 - acc: 0.7388 - val_loss: 0.7338 - val_acc: 0.7611

〜省略〜

Epoch 00018: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 19/50
474/474 [==============================] - 111s 235ms/step - loss: 0.2864 - acc: 0.9008 - val_loss: 0.6332 - val_acc: 0.8070
Epoch 20/50
474/474 [==============================] - 111s 235ms/step - loss: 0.2869 - acc: 0.9000 - val_loss: 0.6365 - val_acc: 0.8089
Epoch 21/50
474/474 [==============================] - 111s 235ms/step - loss: 0.2845 - acc: 0.9029 - val_loss: 0.6342 - val_acc: 0.8081

Epoch 00021: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.
Epoch 22/50
474/474 [==============================] - 111s 235ms/step - loss: 0.2831 - acc: 0.9007 - val_loss: 0.6333 - val_acc: 0.8073
Epoch 00022: early stopping
CPU times: user 52min 7s, sys: 9min 12s, total: 1h 1min 19s
Wall time: 41min 7s

学習曲線をプロット

plt.figure(figsize = (18,6))

# accuracy
plt.subplot(1, 2, 1)
plt.plot(hist.history["acc"], label = "acc", marker = "o")
plt.plot(hist.history["val_acc"], label = "val_acc", marker = "o")
#plt.xticks(np.arange())
#plt.yticks(np.arange())
plt.xlabel("epoch")
plt.ylabel("accuracy")
#plt.title("")
plt.legend(loc = "best")
plt.grid(color = 'gray', alpha = 0.2)

# loss
plt.subplot(1, 2, 2)
plt.plot(hist.history["loss"], label = "loss", marker = "o")
plt.plot(hist.history["val_loss"], label = "val_loss", marker = "o")
#plt.xticks(np.arange())
#plt.yticks(np.arange())
plt.xlabel("epoch")
plt.ylabel("loss")
#plt.title("")
plt.legend(loc = "best")
plt.grid(color = 'gray', alpha = 0.2)

plt.show()

モデル評価

score = model.evaluate(X_test, y_test, verbose = 1)
print("evaluate loss: {[0]:.4f}".format(score))
print("evaluate acc: {[1]:.1%}".format(score))

4741/4741 [==============================] - 13s 3ms/step
evaluate loss: 0.6409054181666798
evaluate acc: 0.8089010757475658

モデル保存

model_dir = './model/'
if os.path.exists(model_dir) == False:os.mkdir(model_dir)

model.save(model_dir + 'model.hdf5')

# optimizerのない軽量モデルを保存(学習や評価は不可だが、予測は可能)
model.save(model_dir + 'model-opt.hdf5', include_optimizer = False)

モデル予測

testデータ30件の画像と正解ラベルを出力

# testデータ30件の正解ラベル
true_classes = np.argmax(y_test[0:30], axis = 1)

# testデータ30件の画像と正解ラベルを出力
plt.figure(figsize = (16, 6))
for i in range(30):
    plt.subplot(3, 10, i + 1)
    plt.axis("off")
    plt.title(classes[true_classes[i]])
    plt.imshow(X_test[i])
plt.show()

testデータ30件の画像と予測ラベル・予測確率を出力

# testデータ30件の予測ラベル
pred_classes = np.argmax(model.predict(X_test[0:30]), axis = 1)

# testデータ30件の予測確率
pred_probs = np.max(model.predict(X_test[0:30]), axis = 1)
pred_probs = ['{:.4f}'.format(i) for i in pred_probs]

# testデータ30件の画像と予測ラベル・予測確率を出力
plt.figure(figsize = (16, 6))
for i in range(30):
    plt.subplot(3, 10, i + 1)
    plt.axis("off")
    if pred_classes[i] == true_classes[i]:
        plt.title(classes[pred_classes[i]]+'\n'+pred_probs[i])
    else:
        plt.title(classes[pred_classes[i]]+'\n'+pred_probs[i], color = "red")
    plt.imshow(X_test[i])
plt.show()

モデル評価でaccuracy 80.9%、loss 0.6409を計測
決して高い精度とは言えず、いくつか誤りが見られる

予測検証

testデータ1000件を抽出して、低い確率で正解した画像高い確率で間違えた画像を目視で確認

testデータ1000件抽出

# testデータ1000件の正解ラベル
true_classes = np.argmax(y_test[0:1000], axis = 1)

# testデータ1000件の予測ラベル
pred_classes = np.argmax(model.predict(X_test[0:1000]), axis = 1)

# testデータ1000件の予測確率
pred_probs = np.max(model.predict(X_test[0:1000]), axis = 1)
pred_probs = np.round(pred_probs, 2)
pred_probs = ['{:.4f}'.format(i) for i in pred_probs]

正解・不正解リスト作成
(画像データ, 予測ラベル, 予測確率)の正解・不正解それぞれのリストを作成

correct=[]
incorrect=[]

for i in range(1000):
    if pred_classes[i] == true_classes[i]:
        correct.append((X_test[i], classes[pred_classes[i]], pred_probs[i]))
    else:
        incorrect.append((X_test[i], classes[pred_classes[i]], pred_probs[i]))

print("number of correct:",len(correct))
print("number of incorrect:",len(incorrect))

number of correct : 803
number of incorrect : 197

低確率の正解画像
確率の低い順に正解画像を出力

# 正解画像を確率の低い順に並び替え
correct.sort(key = lambda x:x[2])

# ワースト30件の画像と予測ラベル・予測確率を出力
plt.figure(figsize = (16, 6))
for i in range(30):
    plt.subplot(3, 10, i + 1)
    plt.axis("off")
    plt.title(correct[i][1]+'\n'+correct[i][2])
    plt.imshow(correct[i][0])
plt.show()

高確率の不正解画像
確率の高い順に不正解画像を出力

# 不正解画像を確率の高い順に並び替え
incorrect.sort(key = lambda x:x[2], reverse = True)

# ベスト30件の画像と予測ラベル・予測確率を出力
plt.figure(figsize = (16, 6))
for i in range(30):
    plt.subplot(3, 10, i + 1)
    plt.axis("off")
    plt.title(incorrect[i][1]+'\n'+incorrect[i][2], color = 'red')
    plt.imshow(incorrect[i][0])
plt.show()

今回は目視での検証だが、今後は正誤の人種比や年齢・性別による正誤の違いを検証したい。

ha9kberry
人工知能と遊ぶ機械学習エンジニア
Why not register and get more from Qiita?
  1. We will deliver articles that match you
    By following users and tags, you can catch up information on technical fields that you are interested in as a whole
  2. you can read useful information later efficiently
    By "stocking" the articles you like, you can search right away
Comments
No comments
Sign up for free and join this conversation.
If you already have a Qiita account
Why do not you register as a user and use Qiita more conveniently?
You need to log in to use this function. Qiita can be used more conveniently after logging in.
You seem to be reading articles frequently this month. Qiita can be used more conveniently after logging in.
  1. We will deliver articles that match you
    By following users and tags, you can catch up information on technical fields that you are interested in as a whole
  2. you can read useful information later efficiently
    By "stocking" the articles you like, you can search right away
ユーザーは見つかりませんでした