More than 1 year has passed since last update.

[未完]初学者がCNN(畳み込みニューラルネットワーク)の実装について考える

Posted at 2023-02-20

概要

WIP　未完
大学の課題用に学習したことをまとめていたもの

背景

TensorflowやKerasを使わずにCNNを実装してみたい。
CNNの中身を意識しながら実装してみたい。

CNNの構成

Layer	入出力の形状

Convolution	↓

ReLU	↓

Pooling	↓

Affine	↓

ReLU	↓

Affine (推論フェーズではここで終了)	↓

Softmax(学習フェーズのみ)	↓

1. import

import

import os
import sys
import urllib.request
import gzip
import pickle
import numpy as np #N次元配列を扱うため
import matplotlib.pylab as plt #学習状況を図示化するため

2. 出力層の設計

分類問題のため、交差エントロピーとソフトマックスの２つの関数を実装する.

cross_entropy_error(y, t)

y (numpy.ndarray) : NNの出力
t (numpy.ndarray) : 教師データ
return (float) : 交差エントロピー誤差

$$ E=-\frac{1}{N}\sum_{n}\sum_{k}t_{nk}\log{y_{nk}} \tag{1} \ $$

$N$ : データ数 (バッチサイズ)

$y_{nk}$ : $n$個目のデータの$k$次元目のNNの出力値

$t_{nk}$ : $n$個目のデータの$k$次元目の教師データ

$E$ : 交差エントロピー誤差

cross_entropy_error(y,t)

def cross_entropy_error(y, t):
   
    if y.ndim == 1: 
        t = t.reshape(1, t.size) 
        y = y.reshape(1, y.size) 

    if t.size == y.size:  
        t = t.argmax(axis=1) 
  
    batch_size = y.shape[0] 

    delta = 1e-7 
    return -np.sum(np.log(y[np.arange(batch_size), t] + delta)) / batch_size

softmax(x)

x (numpy.ndarray) : 入力信号
return (numpy.ndarray) : 出力信号

$$y_k =\frac{\exp(a_k)}{\sum_{i=1}^n \exp(a_i)}=\frac{\exp(a_k-C)}{\sum_{i=1}^n \exp(a_i-C)}\tag{2}$$

$a_k$ : 入力信号

$y_k$ : 出力層の$k$番目のニューロンの出力

$n$ : 出力層のニューロンの数(=分類の種類の総数)

$C$ : 定数(overflow対策)

softmax(x)

def softmax(x):
    C = np.max(x, axis=-1, keepdims=True) #overflow対策
    return np.exp(x - C) / np.sum(np.exp(x - C), axis=-1, keepdims=True)

全結合層(SoftmaxWithLoss)

SoftmaxWithLoss

class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None # softmaxの出力
        self.t = None # 教師データ

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)       
        return self.loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        if self.t.size == self.y.size: # 教師データがone-hot-vectorの場合
            dx = (self.y - self.t) / batch_size
        else:
            dx = self.y.copy()
            dx[np.arange(batch_size), self.t] -= 1
            dx = dx / batch_size    
        return dx

3. 中間層の設計

各層では以下の関数を実装する.

① init 初期化

② forward 順伝播

③ backward 逆伝播

全結合層(Affine)

forward(self,x)

x (np.ndarray) : 入力
return (np.ndarray) : 出力

$$ Y=W・X+b \tag{3}$$

$X$ : 入力

$Y$ : 出力

$W$ : 重み

$b$ : バイアス

backward(self,dout)

dout (np.ndarray) :
dx(return) (np.ndarray) :

$$ \frac{δL}{ \delta X}=\frac{δL}{ \delta Y}・W^T \tag{4}$$

$$ \frac{δL}{ \delta W}=X^T・\frac{δL}{ \delta Y} \tag{5}$$

$W^T$ : $W$の転置(行・列の入替)

$X^T$ : $X$の転置(行・列の入替)

$X$ : 入力

$Y$ : 順伝播上流からの出力

$L$ : 逆伝播上流からの出力

Affine

class Affine:
    def __init__(self, W, b):
        self.W = W #重み
        self.b = b #バイアス

        #逆伝播用
        self.x = None 
        self.original_x_shape = None 
        self.dW = None
        self.db = None 

    def forward(self, x):
        self.original_x_shape = x.shape #形状保持
        x = x.reshape(x.shape[0], -1) #整形
        self.x = x #入力保持

        out = np.dot(self.x, self.W) + self.b #式(3)

        return out

    def backward(self, dout):
        dx = np.dot(dout, self.W.T) #式(4)
        self.dW = np.dot(self.x.T, dout) #式(5)
        self.db = np.sum(dout, axis=0) 

        dx = dx.reshape(*self.original_x_shape)  #整形
        
        return dx

畳み込み層(Convolution)

forward(self,x)

x (np.ndarray)
return (np.ndaray)

backward(self,dout)

dout (np.ndarray)
dx(return) (np.ndarray)

Convolution

class Convolution:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W #フィルタ
        self.b = b #バイアス
        self.stride = stride #ストライド
        self.pad = pad #パディング
        
        #逆伝播用
        self.x = None   
        self.col = None
        self.col_W = None
        self.dW = None
        self.db = None

    def forward(self, x):
        FN, C, FH, FW = self.W.shape #フィルタの形状取得
        N, C, H, W = x.shape #入力の形状取得

        #
        out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
        out_w = 1 + int((W + 2*self.pad - FW) / self.stride)

        col = im2col(x, FH, FW, self.stride, self.pad)#入力データを,行列(2次元データ)に変換する
        col_W = self.W.reshape(FN, -1).T

        out = np.dot(col, col_W) + self.b #行列の内積を計算
        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)

        #保持
        self.x = x
        self.col = col
        self.col_W = col_W

        return out

    def backward(self, dout):
        FN, C, FH, FW = self.W.shape #フィルタの形状取得

        dout = dout.transpose(0,2,3,1).reshape(-1, FN) #整形

        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.col.T, dout)

        self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW) #行列計算用に整形

        dcol = np.dot(dout, self.col_W.T)

        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad) #2次元データから,元の次元のデータに戻す

        return dx

プーリング層(Pooling)

max pooling

forward(self,x)

x (np.ndarray)
return (np.ndaray)

backward(self,dout)

dout (np.ndarray)
dx(return) (np.ndarray)

Pooling

class Pooling:
    def __init__(self, pool_h, pool_w, stride=2, pad=0):
        self.pool_h = pool_h #高さ
        self.pool_w = pool_w #幅
        self.stride = stride #ストライド
        self.pad = pad #パディング

        #逆伝播用
        self.x = None
        self.arg_max = None

    def forward(self, x):
        N, C, H, W = x.shape #入力xの形状を取得

        #プーリングを行う範囲を移動させる
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)

        #入力データの展開
        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad) #行列(2次元データ)に変換
        col = col.reshape(-1, self.pool_h*self.pool_w) #フィルターの適用範囲ごとに1行ずつで変換

        arg_max = np.argmax(col, axis=1) #行ごとに最大値を求める
        out = np.max(col, axis=1)

        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2) #データの整形(transposeで軸の順番の入替(転置？))
        self.x = x #逆伝播用に保持
        self.arg_max = arg_max #逆伝播用に保持
        return out

    def backward(self, dout):
        dout = dout.transpose(0, 2, 3, 1) #データの軸の入替       
        pool_size = self.pool_h * self.pool_w
        dmax = np.zeros((dout.size, pool_size))
        dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
        dmax = dmax.reshape(dout.shape + (pool_size,))       
        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
        dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)        
        return dx

ReLU層

forward(self,x)

x (np.ndarray)
return (np.ndaray)

backward(self,dout)

dout (np.ndarray)
dx(return) (np.ndarray)

Relu

class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        return dx

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up