More than 5 years have passed since last update.

「ゼロから作るDeep Learning」自習メモ（その６）Fashion MNIST

Last updated at 2020-09-16Posted at 2020-08-12

「ゼロから作るDeep Learning」(斎藤康毅　著　オライリー・ジャパン刊)を読んでいる時に、参照したサイト等をメモしていきます。その５ ←　→その６の２　→その６の３　→ その７

今回は番外編。

P86
ニューラルネットワークの利点は、すべての問題を同じ流れで解くことができる点にあります。たとえば、解くべき問題が「5」を認識する問題なのか、「犬」を認識する問題なのか、それとも、「人の顔」を認識する問題なのかといった詳細とは関係なしに、ニューラルネットワークは与えられたデータをただひたすら学習し、与えられた問題のパターンを発見しようと試みます。

この本ではMNISTの手書き数字データを使っていますが、じゃあ、他のデータもこの本のプログラムで処理できるのかやってみよう、という記事です。

TensorFlow のサイト　https://www.tensorflow.org/?hl=ja の初心者向けチュートリアルに
「スニーカーやシャツなど、身に着けるものの写真」のデータFashion MNISTがあります。

それをダウンロードして、学習させ、テストデータを認識させてみようというわけです。

データの置き場所は　https://storage.googleapis.com/tensorflow/tf-keras-datasets/
ファイルの名前はMNISTに合わせてあるようです。

# tensorflowのサイトからファッション画像のデータをダウンロードする
import urllib.request
import os.path

url_base = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/'
key_file = {
    'train_img':'train-images-idx3-ubyte.gz',
    'train_label':'train-labels-idx1-ubyte.gz',
    'test_img':'t10k-images-idx3-ubyte.gz',
    'test_label':'t10k-labels-idx1-ubyte.gz'
}

dataset_dir = os.path.dirname(os.path.abspath('__file__'))+'/fashion'    
for file_name in key_file.values():
    file_path = dataset_dir + '/' + file_name
    urllib.request.urlretrieve(url_base + file_name, file_path)

# 画像データtrain_img　の頭１６バイトには、ファイルについての情報がのっている。これを除去してファイルを解凍する
# 同様にラベルデータの頭８バイトを除去してファイルを解凍する

import os.path
import gzip
import numpy as np

key_file = {
    'train_img':'train-images-idx3-ubyte.gz',
    'train_label':'train-labels-idx1-ubyte.gz',
    'test_img':'t10k-images-idx3-ubyte.gz',
    'test_label':'t10k-labels-idx1-ubyte.gz'
}

dataset_dir = os.path.dirname(os.path.abspath('__file__'))+'/fashion'

def load_img(file_name):
    file_path = dataset_dir + '/' + file_name
    with gzip.open(file_path, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    data = data.reshape(-1, 784)

    return data

def load_label(file_name):
    file_path = dataset_dir + '/' + file_name
    with gzip.open(file_path, 'rb') as f:
        labels = np.frombuffer(f.read(), np.uint8, offset=8)

    return labels

dataset = {}
dataset['train_img'] = load_img(key_file['train_img'])
dataset['train_label'] = load_label(key_file['train_label'])
dataset['test_img'] = load_img(key_file['test_img'])
dataset['test_label'] = load_label(key_file['test_label'])

# 必要な形式に変換した datasetオブジェクトを、pickleで保存する
import pickle
save_file = dataset_dir + '/mnist.pkl'
with open(save_file, 'wb') as f:
    pickle.dump(dataset, f, -1)

# 保存したデータを読み込んで、dataset に格納する
import pickle
mnist_file = dataset_dir + '/mnist.pkl'
with open(mnist_file, 'rb') as f:
    dataset = pickle.load(f)

# 最初のデータを表示する
import matplotlib.pyplot as plt
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
c = dataset['train_label'][0]
example = dataset['train_img'][0].reshape((28, 28))

print(class_names[c])
plt.figure()
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(example, cmap=plt.cm.binary)
plt.show()

ここまでは、自習めも（その３）でやったことと同じです。
次に、（その５）でやった学習処理を行います。

# 2層ニューラルネットワークのクラス
import numpy as np
from common.functions import *

class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 重みの初期化
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        return y

    # x:入力データ、　t:教師データ
    def loss(self, x, t):
        y = self.predict(x)
        return cross_entropy_error(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
    
    def gradient(self, x, t):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        grads = {}
        batch_num = x.shape[0]
        
        # forward
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        # backward
        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)
        
        dz1 = np.dot(dy, W2.T)
        da1 = sigmoid_grad(a1) * dz1
        grads['W1'] = np.dot(x.T, da1)
        grads['b1'] = np.sum(da1, axis=0)

        return grads    

# 4.5.2 ミニバッチ学習の実装
import sys, os
import pickle
import numpy as np
from common.functions import *

def normalize(key):
    dataset[key] = dataset[key].astype(np.float32)
    dataset[key] /= 255
    return dataset[key]

def to_one_hot(label):
    T = np.zeros((label.size, 10))
    for i in range(label.size):
        T[i][label[i]] = 1
    return T

dataset_dir = os.path.dirname(os.path.abspath('__file__'))+'/fashion'
mnist_file = dataset_dir + '/mnist.pkl'
with open(mnist_file, 'rb') as f:
    dataset = pickle.load(f)
x_train = normalize('train_img')
t_train = to_one_hot(dataset['train_label'])

# ハイパーパラメータ
iters_num = 1000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
for i in range(iters_num):
    # ミニバッチの取得
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 勾配の計算
    grad = network.gradient(x_batch, t_batch) # 誤差逆伝播法 速い
    
    # パラメータの更新
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]

    #学習経過の記録
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

# networkオブジェクトを、pickleで保存する
import pickle
save_file = dataset_dir + '/gakusyuukekka_weight.pkl'  
with open(save_file, 'wb') as f:
    pickle.dump(network, f, -1)

ここまで、自習めも（その５）でやったことと同じです。
で、テストデータの推論処理です。

import numpy as np
from common.functions import *

class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 重みの初期化
        self.params = {}
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        return y

# 4.5.3 テストデータで評価
import pickle
import sys, os

def normalize(key):
    dataset[key] = dataset[key].astype(np.float32)
    dataset[key] /= 255
    return dataset[key]

def to_one_hot(label):
    T = np.zeros((label.size, 10))
    for i in range(label.size):
        T[i][label[i]] = 1
    return T

dataset_dir = os.path.dirname(os.path.abspath('__file__'))+'/fashion'
mnist_file = dataset_dir + '/mnist.pkl'
with open(mnist_file, 'rb') as f:
    dataset = pickle.load(f)
x = normalize('test_img')
t = dataset['test_label']

# network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
weight_file = dataset_dir + '/gakusyuukekka_weight.pkl'
with open(weight_file, 'rb') as f:
    network = pickle.load(f)

accuracy_cnt = 0
for i in range(len(x)):
    y = network.predict(x[i])
    p= np.argmax(y) 
    if p == t[i]:
        accuracy_cnt += 1

print("Accuracy:" + str(float(accuracy_cnt) / len(x)))

結果は　

Accuracy:0.7427　

と、精度は低いですが、それなりに推測しているようです。

推測結果を表示してみます。

import matplotlib.pyplot as plt

class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
def showImg(x):
    example = x.reshape((28, 28))
    plt.figure()
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(example, cmap=plt.cm.binary)
    plt.show()
    return

for i in range(10):
    y = network.predict(x[i])
    c = t[i]
    print("正解 " + str(c) + " " + class_names[c])
    p = np.argmax(y)
    v = y[p]
    print("[ " + str(p) + " " + class_names[p] + " ] {:.2%}".format(v))
    showImg(x[i])

確かに、画像の内容が違っても、同じニューラルネットで学習して、そこそこの推論ができているようです。

（追記）
どうも精度が低いと思い、プログラムを見直したら、ハイパーパラメータが
　iters_num = 1000
となっていました。
これを iters_num = 10000 に修正して学習させたら、
精度が85%に上がりました。

T-shirt、Pullover、Coat、Shirt を間違えるケースが多いようです。

さらに、犬猫の写真を加工して、ニューラルネットで学習させてみました。→その６の２

その５ ←　→その６の２　→その６の３　→ その７

読めない用語集

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up