[Implementation] CNN in Python with Numpy #DeepLearning

Preface

This is the supplemental article, which I focus on the implementation of CNN in python.
PYTHON=3.6

Inspirations

http://cs231n.github.io/convolutional-networks/
http://cs231n.stanford.edu/slides/2017/cs231n_2017_lecture5.pdf

Thank you, Dr.Li FeiFei

warming-up

I have followed this link to remember the methods for image-processing.
http://www.turbare.net/transl/scipy-lecture-notes/advanced/image_processing/index.html

import numpy as np
from scipy import misc, ndimage
import matplotlib.pyplot as plt

f = misc.face()
plt.imshow(f)
plt.show()
print(f.shape)

f = misc.face(gray=True)

plt.figure(figsize=(10, 3.6))

plt.subplot(131)
plt.imshow(f, cmap=plt.cm.gray)

plt.subplot(132)
plt.imshow(f, cmap=plt.cm.gray, vmin=30, vmax=200)
plt.axis('off')

plt.subplot(133)
plt.imshow(f, cmap=plt.cm.gray)
plt.contour(f, [50, 200])
plt.axis('off')

plt.subplots_adjust(wspace=0, hspace=0., top=0.99, bottom=0.01, left=0.05, right=0.99)
plt.show()

f = misc.face(gray=True)
plt.figure(figsize=(8, 4))

plt.subplot(1, 2, 1)
plt.imshow(f[320:340, 510:530], cmap=plt.cm.gray)
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(f[320:340, 510:530], cmap=plt.cm.gray, interpolation='nearest')
plt.axis('off')

plt.subplots_adjust(wspace=0.02, hspace=0.02, top=1, bottom=0, left=0, right=1)
plt.show()

plt.imshow(ndimage.gaussian_filter(f, sigma=3))
plt.show()

noisy = f + 0.4 * f.std() * np.random.random(f.shape)
plt.imshow(noisy)
plt.show()

Actual Implementation

From now, I will show the implementation of components of CNN.

Convolution Layer

# download some free image from google!
image_file = 'nine.png'
im = misc.imread(image_file)
plt.imshow(im)
plt.show()
print(im.shape)

def convolution(mat, fil, stride=1, verbose=False):
    mat_pad = np.pad(mat, 1, 'constant')
    results = np.zeros((len(mat)-stride, len(mat)-stride))
    for row in range(len(mat)-stride):
        for col in range(len(mat)-stride):
            if verbose:
                print("indices:", col, col+stride+1, "and", row, row+stride+1)
            spot = mat_pad[col:col+stride+1, row:row+stride+1]
            if verbose:
                print("spotted matrix: ", "\n",spot)
            filetered = spot.dot(fil)
            if verbose:
                print("After filtering", "\n", filetered)
            result = sum([sum(i) for i in filetered])
            if verbose:
                print("convoluted value: ", result)
            results[row, col] = result
    return results.T

# test 1
fil_22 = np.random.rand(2, 2)
results = convolution_original(np.random.rand(4,4), fil=fil_22)
print(results.shape)
plt.imshow(results)
plt.show()

# test 2
fil_33 = np.random.rand(4, 4)
results = convolution(im[:100,:100,1], fil=fil_33, stride=3)
print(results.shape)
plt.imshow(im[:100,:100,1])
plt.show()
plt.imshow(results)
plt.show()

# test 3
fil_33 = np.random.rand(3, 3)
results = convolution(np.random.rand(7,7), fil=fil_33, stride=2)
print(results.shape)
plt.imshow(results)
plt.show()

pool-families

In this section, I will show my implementation of max_pooling in python.
And I have additionally made ReLU, then combined the convolution layer we made previously. So we could see the sequence of CNN at once.

def ReLU(x):
    return x * (x > 0)

def max_pooling(mat, verbose=False):
    results = np.zeros((int(len(mat)/2), int(len(mat)/2)))
    for i, row in enumerate(range(0, len(mat), 2)):
        for j, col in enumerate(range(0, len(mat), 2)):
            if verbose:
                print("indices:", col, col+2, "and", row, row+2)
            spot = mat[col:col+2, row:row+2]
            if verbose:
                print("spotted matrix: ", "\n",spot)
            for vec in spot:
                result = np.max(vec)
            if verbose:
                print("convoluted value: ", result)
            results[i, j] = result
    return results.T

fil_22 = np.random.rand(2, 2)
results = ReLU(convolution(np.random.rand(5,5), fil=fil_22, stride=1))
print(results)
max_pooling(results)

# output
[[0.71590445 1.84275891 1.22907016 0.32425841]
 [1.06931596 2.4216673  1.63294066 1.41982642]
 [0.89558694 1.90399044 1.16696177 1.5738113 ]
 [0.87497548 2.07498316 1.67574505 1.7039923 ]]
array([[2.4216673 , 1.63294066],
       [2.07498316, 1.7039923 ]])

Another way to implement max pooling

import numpy as np

mat = np.random.randint(10, size=(4,4))

M, N = mat.shape
K = 2
L = 2

MK = M // K
NL = N // L

print(mat)
print(mat[:MK*K, :NL*L].reshape(MK, K, NL, L).max(axis=(1, 3)))

#output
[[3 0 0 0]
 [0 2 2 7]
 [0 5 0 1]
 [4 1 2 2]]
[[3 7]
 [5 2]]

Discovery in Numpy

numpy.pad

print(np.random.rand(3,3).shape)
np.pad(mat,1, 'constant')

#output
(3, 3)
array([[0, 0, 0, 0, 0, 0],
       [0, 8, 9, 3, 8, 0],
       [0, 8, 0, 5, 3, 0],
       [0, 9, 9, 5, 7, 0],
       [0, 6, 0, 4, 7, 0],
       [0, 0, 0, 0, 0, 0]])

numpy.reshape for multi-dimensional usage

modifying the "axis" argument for numpy.reshape, we could convert the array into any dimensional matrix.

In this case, I have formatted 32x1 matrix in 1x4x8x1x1, 2x2x2x2x2 or 1x1x1x1x32 dimensional matrix.

np.random.seed(3)
a = np.random.rand(32)
print(a)
print(a.reshape(1,4,8,1,1))
print(a.reshape(2,2,2,2,2))
print(a.reshape(1,1,1,1,32))