#keras
tensorflowのラッパーであるkerasを用いてセマンティックセグメンテーションをおこなう。
#学習環境
項目 | 説明 |
---|---|
OS | ubuntu 16.04 LTS |
GPU | ELSA GeForce GTX 1070 8GB S.A.C |
#使用データ・前処理
データ種別 | サイズ | 枚数 |
---|---|---|
訓練データ | 384*480 | 10 |
訓練ラベル(3クラス) | 384*480 | 10 |
サイズ | Center align | |
---|---|---|
入力層 | 384×480 | 10枚 |
エンコーダ | ||
Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation MaxPooling2D |
6433 - relu 6433 - relu 2*2 |
|
Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation MaxPooling2D |
12833 - relu 12833 - relu 2*2 |
|
Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation MaxPooling2D |
25633 - relu 25633 - relu 25633 - relu 2*2 |
|
Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation MaxPooling2D |
51233 - relu 51233 - relu 51233 - relu 2*2 |
|
Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation MaxPooling2D |
51233 - relu 51233 - relu 51233 - relu 2*2 |
|
デコーダ | ||
UpSampling2D Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation |
22 51233 - relu 51233 - relu 5123*3 - relu |
|
UpSampling2D Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation |
22 51233 - relu 51233 - relu 2563*3 - relu |
|
UpSampling2D Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation |
22 25633 - relu 25633 - relu 1283*3 - relu |
|
UpSampling2D Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation |
22 12833 - relu 643*3 - relu |
|
UpSampling2D Convolution2D BatchNormalization Activation Convolution2D BatchNormalization Activation |
22 6433 - relu 31*1 - relu |
|
識別層 | softmax | |
出力層 |
#学習規則
損失関数 | クロスエントロピー | |
最適化手法 | adadelta |
#実装
train.py
from __future__ import absolute_import
from __future__ import print_function
import os
import pylab as pl
import matplotlib.cm as cm
import itertools
import numpy as np
import theano.tensor as T
np.random.seed(1337) # for reproducibility
from keras.datasets import mnist
from keras.layers.noise import GaussianNoise
import keras.models as models
from keras.layers.core import Layer, Dense, Dropout, Activation, Flatten, Reshape, Merge, Permute
# from keras.layers import Merge
from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, ZeroPadding2D
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.regularizers import ActivityRegularizer
from keras.utils.visualize_util import plot
from keras import backend as K
K.set_image_dim_ordering('th')
import cv2
import numpy as np
import matplotlib.pyplot as plt
path = '/path/to/data_dir/'
img_h = 384
img_w = 480
data_shape = 384*480
pool_size = 2
kernel = 3
n_labels = 3
def normalized(rgb):
#return rgb/255.0
norm=np.zeros((rgb.shape[0], rgb.shape[1], 3),np.float32)
b=rgb[:,:,0]
g=rgb[:,:,1]
r=rgb[:,:,2]
norm[:,:,0]=cv2.equalizeHist(b)
norm[:,:,1]=cv2.equalizeHist(g)
norm[:,:,2]=cv2.equalizeHist(r)
return norm
def binarylab(labels):
x = np.zeros([384,480,3])
for i in range(384):
for j in range(480):
x[i,j,labels[i][j]]=1
return x
def prep_data():
train_data = []
train_label = []
import os
with open(path+'train_10.txt') as f:
txt = f.readlines()
txt = [line.split(' ') for line in txt]
for i in range(len(txt)):
train_data.append(np.rollaxis(normalized(cv2.imread(txt[i][0])),2))
train_label.append(binarylab(cv2.imread(txt[i][1][:-1])[:,:,0]))
print('.',end='')
return np.array(train_data), np.array(train_label)
train_data, train_label = prep_data()
train_label = np.reshape(train_label,(10,data_shape,n_labels))
class_weighting= [0.1826, 0.1417, 6.2478]
class UnPooling2D(Layer):
"""A 2D Repeat layer"""
def __init__(self, poolsize=(2, 2)):
super(UnPooling2D, self).__init__()
self.poolsize = poolsize
@property
def output_shape(self):
input_shape = self.input_shape
return (input_shape[0], input_shape[1],
self.poolsize[0] * input_shape[2],
self.poolsize[1] * input_shape[3])
def get_output(self, train):
X = self.get_input(train)
s1 = self.poolsize[0]
s2 = self.poolsize[1]
output = X.repeat(s1, axis=2).repeat(s2, axis=3)
return output
def get_config(self):
return {"name":self.__class__.__name__,
"poolsize":self.poolsize}
encoding_layers = [
Convolution2D(64, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(64, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
MaxPooling2D(pool_size=(pool_size, pool_size)),
Convolution2D(128, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(128, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
MaxPooling2D(pool_size=(pool_size, pool_size)),
Convolution2D(256, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(256, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(256, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
MaxPooling2D(pool_size=(pool_size, pool_size)),
Convolution2D(512, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(512, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(512, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
MaxPooling2D(pool_size=(pool_size, pool_size)),
Convolution2D(512, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(512, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(512, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
MaxPooling2D(pool_size=(pool_size, pool_size)),
]
decoding_layers = [
UpSampling2D(),
Convolution2D(512, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(512, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(512, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
UpSampling2D(),
Convolution2D(512, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(512, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(256, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
UpSampling2D(),
Convolution2D(256, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(256, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(128, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
UpSampling2D(),
Convolution2D(128, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(64, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
UpSampling2D(),
Convolution2D(64, kernel, kernel, border_mode='same'),
BatchNormalization(),
Activation('relu'),
Convolution2D(n_labels, 1, 1, border_mode='valid'),
BatchNormalization(),
]
autoencoder = models.Sequential()
autoencoder.add(Layer(input_shape=(n_labels, img_h, img_w)))
autoencoder.encoding_layers = encoding_layers
autoencoder.decoding_layers = decoding_layers
for l in autoencoder.encoding_layers:
autoencoder.add(l)
for l in autoencoder.decoding_layers:
autoencoder.add(l)
autoencoder.add(Reshape((n_labels,data_shape), input_shape=(n_labels,img_h,img_w)))
autoencoder.add(Permute((2, 1)))
autoencoder.add(Activation('softmax'))
autoencoder.compile(loss="categorical_crossentropy", optimizer='adadelta')
plot(autoencoder, to_file="autoencoder.png", show_shapes=True)
nb_epoch = 1
batch_size = 2
history = autoencoder.fit(train_data, train_label, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=1, class_weight=class_weighting)
autoencoder.save_weights('/path/to/weight_file')
plt.plot(history.history['loss'], linewidth=3, label='train')