Edited at
KerasDay 14

keras(tensorflow)でSegNet

More than 1 year has passed since last update.

SegNetは、ケンブリッジ大学が開発した画素単位でのラベリング機能を実現する、

A Deep Convolutional Encoder-Decoder Architectureのこと

keras2系+tensorflowで実装してみた

とりあえず動かしたソースコードを貼っていく

解説はいずれやりたい・・・


環境

ubuntu 17.04

python 3.6

keras 2.0.8

tensorflow 1.3.0


データの読み込み

画像のセグメンテーション用に公開しているデータを使う

https://github.com/alexgkendall/SegNet-Tutorial

リポジトリの、./CamVid内のデータを使う

クラスは12あり、順番に

[Sky, Building, Pole, Road_marking, Road, Pavement, Tree, SignSymbol, Fence, Car, Pedestrian, Bicyclist, Unlabelled]

の情報がラベル付けされている


dataset.py

import cv2

import numpy as np

from keras.applications import imagenet_utils

import os

DataPath = './CamVid/'
data_shape = 360*480

class Dataset:
def __init__(self, classes=12, train_file='train.txt', test_file='test.txt'):
self.train_file = train_file
self.test_file = test_file
self.data_shape = 360*480
self.classes = classes

def normalized(self, rgb):
#return rgb/255.0
norm=np.zeros((rgb.shape[0], rgb.shape[1], 3),np.float32)

b=rgb[:,:,0]
g=rgb[:,:,1]
r=rgb[:,:,2]

norm[:,:,0]=cv2.equalizeHist(b)
norm[:,:,1]=cv2.equalizeHist(g)
norm[:,:,2]=cv2.equalizeHist(r)

return norm

def one_hot_it(self, labels):
x = np.zeros([360,480,12])
for i in range(360):
for j in range(480):
x[i,j,labels[i][j]] = 1
return x

def load_data(self, mode='train'):
data = []
label = []
if (mode == 'train'):
filename = self.train_file
else:
filename = self.test_file

with open(DataPath + filename) as f:
txt = f.readlines()
txt = [line.split(' ') for line in txt]

for i in range(len(txt)):
data.append(self.normalized(cv2.imread(os.getcwd() + txt[i][0][7:])))
label.append(self.one_hot_it(cv2.imread(os.getcwd() + txt[i][1][7:][:-1])[:,:,0]))
print('.',end='')
#print("train data file", os.getcwd() + txt[i][0][7:])
#print("label data raw", cv2.imread(os.getcwd() + '/CamVid/trainannot/0001TP_006690.png'))
return np.array(data), np.array(label)

def preprocess_inputs(self, X):
### @ https://github.com/fchollet/keras/blob/master/keras/applications/imagenet_utils.py
"""Preprocesses a tensor encoding a batch of images.
# Arguments
x: input Numpy tensor, 4D.
data_format: data format of the image tensor.
mode: One of "caffe", "tf".
- caffe: will convert the images from RGB to BGR,
then will zero-center each color channel with
respect to the ImageNet dataset,
without scaling.
- tf: will scale pixels between -1 and 1,
sample-wise.
# Returns
Preprocessed tensor.
"""

return imagenet_utils.preprocess_input(X)

def reshape_labels(self, y):
return np.reshape(y, (len(y), self.data_shape, self.classes))



モデル


model.py

from keras.layers import Input

from keras.layers.core import Activation, Flatten, Reshape
from keras.layers.convolutional import Convolution2D, Conv2D, MaxPooling2D, UpSampling2D
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.utils import np_utils

def SegNet(input_shape=(360, 480, 3), classes=12):
### @ https://github.com/alexgkendall/SegNet-Tutorial/blob/master/Example_Models/bayesian_segnet_camvid.prototxt
img_input = Input(shape=input_shape)
x = img_input
# Encoder
x = Conv2D(64, (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Conv2D(128, (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Conv2D(256, (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Conv2D(512, (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)

# Decoder
x = Conv2D(512, (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)

x = UpSampling2D(size=(2, 2))(x)
x = Conv2D(256, (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)

x = UpSampling2D(size=(2, 2))(x)
x = Conv2D(128, (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)

x = UpSampling2D(size=(2, 2))(x)
x = Conv2D(64, (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)

x = Conv2D(classes, (1, 1), padding="valid")(x)
x = Reshape((input_shape[0] * input_shape[1], classes))(x)
x = Activation("softmax")(x)
model = Model(img_input, x)
return model



学習

モデルデータの保存+tensorboardに学習過程を書き出す


train.py

import os

import glob
import numpy as np
import keras

from model import SegNet

import dataset

input_shape = (360, 480, 3)
classes = 12
epochs = 100
batch_size = 1
log_filepath='./logs/'

data_shape = 360*480

class_weighting = [0.2595, 0.1826, 4.5640, 0.1417, 0.5051, 0.3826, 9.6446, 1.8418, 6.6823, 6.2478, 3.0, 7.3614]

## set gpu usage
import tensorflow as tf
config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction = 0.8))
session = tf.Session(config=config)
keras.backend.tensorflow_backend.set_session(session)

def main():
print("loading data...")
ds = dataset.Dataset(classes=classes)
train_X, train_y = ds.load_data('train') # need to implement, y shape is (None, 360, 480, classes)

train_X = ds.preprocess_inputs(train_X)
train_Y = ds.reshape_labels(train_y)
print("input data shape...", train_X.shape)
print("input label shape...", train_Y.shape)

test_X, test_y = ds.load_data('test') # need to implement, y shape is (None, 360, 480, classes)
test_X = ds.preprocess_inputs(test_X)
test_Y = ds.reshape_labels(test_y)

tb_cb = keras.callbacks.TensorBoard(log_dir=log_filepath, histogram_freq=1, write_graph=True, write_images=True)
print("creating model...")
model = SegNet(input_shape=input_shape, classes=classes)
model.compile(loss="categorical_crossentropy", optimizer='adadelta', metrics=["accuracy"])

model.fit(train_X, train_Y, batch_size=batch_size, epochs=epochs,
verbose=1, class_weight=class_weighting , validation_data=(test_X, test_Y), shuffle=True
, callbacks=[tb_cb])

model.save('seg.h5')

if __name__ == '__main__':
main()


実行

$ python train.py

...
367/367 [==============================] - 139s - loss: 0.0492 - acc: 0.9806 - val_loss: 1.5358 - val_acc: 0.7349

GeForce GTX 1050で行ったが、学習は100 epochで数時間かかった


学習結果

$ tensorboard --logdir logs

TensorBoard 0.1.5 at http://localhost:6006 (Press CTRL+C to quit)

収束してなさそう?


予測結果

保存したモデルを読み込んで

読み込んだ画像データを画素単位でラベリングし

セグメントされた領域を色付けして、画像を書き出す


predict.py

import numpy as np

import keras
from PIL import Image

from model import SegNet

import dataset

height = 360
width = 480
classes = 12
epochs = 100
batch_size = 1
log_filepath='./logs_100/'

data_shape = 360*480

def writeImage(image, filename):
""" label data to colored image """
Sky = [128,128,128]
Building = [128,0,0]
Pole = [192,192,128]
Road_marking = [255,69,0]
Road = [128,64,128]
Pavement = [60,40,222]
Tree = [128,128,0]
SignSymbol = [192,128,128]
Fence = [64,64,128]
Car = [64,0,128]
Pedestrian = [64,64,0]
Bicyclist = [0,128,192]
Unlabelled = [0,0,0]
r = image.copy()
g = image.copy()
b = image.copy()
label_colours = np.array([Sky, Building, Pole, Road_marking, Road, Pavement, Tree, SignSymbol, Fence, Car, Pedestrian, Bicyclist, Unlabelled])
for l in range(0,12):
r[image==l] = label_colours[l,0]
g[image==l] = label_colours[l,1]
b[image==l] = label_colours[l,2]
rgb = np.zeros((image.shape[0], image.shape[1], 3))
rgb[:,:,0] = r/1.0
rgb[:,:,1] = g/1.0
rgb[:,:,2] = b/1.0
im = Image.fromarray(np.uint8(rgb))
im.save(filename)

def predict(test):
model = keras.models.load_model('seg_100.h5')
probs = model.predict(test, batch_size=1)

prob = probs[0].reshape((height, width, classes)).argmax(axis=2)
return prob

def main():
print("loading data...")
ds = dataset.Dataset(test_file='val.txt', classes=classes)
test_X, test_y = ds.load_data('test') # need to implement, y shape is (None, 360, 480, classes)
test_X = ds.preprocess_inputs(test_X)
test_Y = ds.reshape_labels(test_y)

prob = predict(test_X)
writeImage(prob, 'val.png')

if __name__ == '__main__':
main()



input

./CamVid/val/0016E5_07959.png


output

おおむねセグメントされてる


参考