LoginSignup
0
2

More than 3 years have passed since last update.

KerasによるMNISTの手書き文字の分類

Posted at

KerasによるMNISTの手書き文字の分類

概要

今回はkerasを用いてmnistという手書き文字がたくさん入ったimageデータセットから1~9の文字を分類します

必要なライブラリのimport

import os,re
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import keras
from keras.datasets import mnist

mnistダウンロード

データを学習用とテスト用に分割
次に学習用のデータを学習用と検証用に8:2の割合で分割

from keras.datasets import mnist
from sklearn.model_selection import train_test_split

(train_data, train_labels), (test_data, test_labels) = mnist.load_data()
x_train, x_valid, y_train, y_valid = train_test_split(train_data, train_labels, test_size=0.2)

画像データと正解ラベルのリサイズ

①x_train,x_validは現在(4800,28,28),(1200,28,28)になっているのでkerasのモデルの学習に使える形にするために(4800,28,28,1),(1200,28,28,1)の形に変換
②画像データのデータ型をfloat型に変換
③画像の画素値は0~255までで数値が大きいので0~1の間に変換
④正解ラベルをone-hot-encodhingにする

from keras.utils import to_categorical

#①
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_valid = x_valid.reshape(x_valid.shape[0], 28, 28, 1)
#②
x_train = x_train.astype('float32')
x_valid = x_valid.astype('float32')
#③
x_train /= 255
x_valid /= 255
#④
y_train = keras.utils.to_categorical(y_train, 10)
y_valid = keras.utils.to_categorical(y_valid, 10)

モデルの作成

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_2 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               1179776   
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290      
=================================================================
Total params: 1,199,882
Trainable params: 1,199,882
Non-trainable params: 0
_________________________________________________________________

モデルのコンパイル

from keras.optimizers import RMSprop

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

画像の水増しと学習

datagen = ImageDataGenerator(
    featurewise_center=False,               # データセット全体で,入力の平均を0に調整
    samplewise_center=False,                # 各サンプルの平均を0に調整
    featurewise_std_normalization=False,    # 入力をデータセットの標準偏差で正規化
    samplewise_std_normalization=False,     # 各入力をその標準偏差で正規化
    zca_whitening=False,                    # ZCA白色化のイプシロン
    rotation_range=50,                      # 回転角度(-50~50度)
    width_shift_range=0.3,                  # 左右のスライド幅
    height_shift_range=0.2,                 # 上下のスライド幅
    zoom_range=[1.0,1.5],                   # 拡大・縮小率
    horizontal_flip=False,                  # 水平反転しない
    vertical_flip=False)    

hist = model.fit_generator(datagen.flow(x_train, y_train, batch_size=32),
                               steps_per_epoch=len(x_train)/32, epochs=10,
                               validation_data=(x_valid, y_valid)).history

精度の表示

# 精度のplot
plt.plot(hist['accuracy'], marker='.', label='acc')
plt.plot(hist['val_accuracy'], marker='.', label='val_acc')
plt.title('model accuracy')
plt.grid()
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.legend(loc='best')
plt.show()

# 損失のplot
plt.plot(hist['loss'], marker='.', label='loss')
plt.plot(hist['val_loss'], marker='.', label='val_loss')
plt.title('model loss')
plt.grid()
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(loc='best')
plt.show()

テストデータで精度の評価

test_data = test_data.reshape(test_data.shape[0], 28, 28, 1)
test_data = test_data.astype('float32')
test_data /= 255
test_labels = keras.utils.to_categorical(test_labels, 10)

score = model.evaluate(test_data, test_labels, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
0
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
2