1. harmegiddo

    No comment

    harmegiddo
Changes in body
Source | HTML | Preview
@@ -1,248 +1,693 @@
# 0.0. 概要
最強のSemantic SegmentationのDeep lab v3 pulsを試してみる。
https://github.com/tensorflow/models/tree/master/research/deeplab
# 0.1. Installation
これを読めばよい
https://github.com/tensorflow/models/blob/master/research/deeplab/g3doc/installation.md
取りあえずCudaは9.0以上じゃないと動かないらしいので
Tensorflow 1.8, Cuda 9.0, CUDNN 7.0の環境で動かす。
```
git clone https://github.com/tensorflow/models.git
cd models/research/
export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim
cd deeplab/
python model_test.py
sh local_test.sh
```
私は異なるGPUを積んでいるので
```
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.130 Driver Version: 384.130 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 Quadro 4000 Off | 00000000:03:00.0 On | N/A |
| 40% 53C P12 N/A / N/A | 237MiB / 1977MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
| 1 Quadro 4000 Off | 00000000:04:00.0 Off | N/A |
| 40% 51C P12 N/A / N/A | 137MiB / 1984MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
| 2 GeForce GTX 108... Off | 00000000:22:00.0 Off | N/A |
| 44% 38C P8 11W / 250W | 2MiB / 11172MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: GPU Memory |
| GPU PID Type Process name Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
```
こんな感じにプログラムを書き換えないと動かなかったです。
```
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for DeepLab model and some helper functions."""
import tensorflow as tf
from deeplab import common
from deeplab import model
config = tf.ConfigProto(
gpu_options=tf.GPUOptions(
visible_device_list="1, 2"
)
)
class DeeplabModelTest(tf.test.TestCase):
def testScaleDimensionOutput(self):
self.assertEqual(161, model.scale_dimension(321, 0.5))
self.assertEqual(193, model.scale_dimension(321, 0.6))
self.assertEqual(241, model.scale_dimension(321, 0.75))
def testWrongDeepLabVariant(self):
model_options = common.ModelOptions([])._replace(
model_variant='no_such_variant')
with self.assertRaises(ValueError):
model._get_logits(images=[], model_options=model_options)
def testBuildDeepLabv2(self):
batch_size = 2
crop_size = [41, 41]
# Test with two image_pyramids.
image_pyramids = [[1], [0.5, 1]]
# Test two model variants.
model_variants = ['xception_65', 'mobilenet_v2']
# Test with two output_types.
outputs_to_num_classes = {'semantic': 3,
'direction': 2}
expected_endpoints = [['merged_logits'],
['merged_logits',
'logits_0.50',
'logits_1.00']]
expected_num_logits = [1, 3]
for model_variant in model_variants:
model_options = common.ModelOptions(outputs_to_num_classes)._replace(
add_image_level_feature=False,
aspp_with_batch_norm=False,
aspp_with_separable_conv=False,
model_variant=model_variant)
for i, image_pyramid in enumerate(image_pyramids):
g = tf.Graph()
with g.as_default():
with self.test_session(graph=g, config=config):
inputs = tf.random_uniform(
(batch_size, crop_size[0], crop_size[1], 3))
outputs_to_scales_to_logits = model.multi_scale_logits(
inputs, model_options, image_pyramid=image_pyramid)
# Check computed results for each output type.
for output in outputs_to_num_classes:
scales_to_logits = outputs_to_scales_to_logits[output]
self.assertListEqual(sorted(scales_to_logits.keys()),
sorted(expected_endpoints[i]))
# Expected number of logits = len(image_pyramid) + 1, since the
# last logits is merged from all the scales.
self.assertEqual(len(scales_to_logits), expected_num_logits[i])
def testForwardpassDeepLabv3plus(self):
crop_size = [33, 33]
outputs_to_num_classes = {'semantic': 3}
model_options = common.ModelOptions(
outputs_to_num_classes,
crop_size,
output_stride=16
)._replace(
add_image_level_feature=True,
aspp_with_batch_norm=True,
logits_kernel_size=1,
model_variant='mobilenet_v2') # Employ MobileNetv2 for fast test.
g = tf.Graph()
with g.as_default():
with self.test_session(graph=g, config=config) as sess:
inputs = tf.random_uniform(
(1, crop_size[0], crop_size[1], 3))
outputs_to_scales_to_logits = model.multi_scale_logits(
inputs,
model_options,
image_pyramid=[1.0])
sess.run(tf.global_variables_initializer())
outputs_to_scales_to_logits = sess.run(outputs_to_scales_to_logits)
# Check computed results for each output type.
for output in outputs_to_num_classes:
scales_to_logits = outputs_to_scales_to_logits[output]
# Expect only one output.
self.assertEquals(len(scales_to_logits), 1)
for logits in scales_to_logits.values():
self.assertTrue(logits.any())
if __name__ == '__main__':
tf.test.main()
```
# 0.2. Training
これを読めばよい
https://github.com/tensorflow/models/blob/master/research/deeplab/g3doc/pascal.md
取りあえずPASCAL VOC 2012で動かす。
これでPASCALをダウンロードできる。
```
cd models/research/deeplab/datasets
sh download_and_convert_voc2012.sh
```
こちらのURLに
https://github.com/rishizek/tensorflow-deeplab-v3
以下のように書かれている。
```
Training
For training model, you first need to convert original data to the TensorFlow TFRecord format. This enables to accelerate training seep.
python create_pascal_tf_record.py --data_dir DATA_DIR \
--image_data_dir IMAGE_DATA_DIR \
--label_data_dir LABEL_DATA_DIR
```
多分、shell scriptでtf_recordに変換してくれているのだろう。
フォルダ構成
```
+ datasets
+ pascal_voc_seg
+ VOCdevkit
+ VOC2012
+ JPEGImages
+ SegmentationClass
+ tfrecord
+ exp
+ train_on_train_set
+ train
+ eval
+ vis
```
trainingを実行
+以下、フォーマット
+
```
cd models/research/
python deeplab/train.py \
--logtostderr \
--training_number_of_steps=30000 \
--train_split="train" \
--model_variant="xception_65" \
--atrous_rates=6 \
--atrous_rates=12 \
--atrous_rates=18 \
--output_stride=16 \
--decoder_output_stride=4 \
--train_crop_size=513 \
--train_crop_size=513 \
--train_batch_size=1 \
--dataset="pascal_voc_seg" \
--tf_initial_checkpoint=${PATH_TO_INITIAL_CHECKPOINT} \
--train_logdir=${PATH_TO_TRAIN_DIR} \
--dataset_dir=${PATH_TO_DATASET}
```
+
+なお、ゼロベースから学習させるには
+
+```
+ # Start the training.
+ slim.learning.train(
+ train_tensor,
+ logdir=FLAGS.train_logdir,
+ log_every_n_steps=FLAGS.log_steps,
+ master=FLAGS.master,
+ number_of_steps=FLAGS.training_number_of_steps,
+ is_chief=(FLAGS.task == 0),
+ session_config=session_config,
+ startup_delay_steps=startup_delay_steps,
+ # init_fn=train_utils.get_model_init_fn(
+ # FLAGS.train_logdir,
+ # FLAGS.tf_initial_checkpoint,
+ # FLAGS.initialize_last_layer,
+ # last_layers,
+ # ignore_missing_vars=True),
+ summary_op=summary_op,
+ save_summaries_secs=FLAGS.save_summaries_secs,
+ save_interval_secs=FLAGS.save_interval_secs)
+```
+
+train部のCheckpointを読み込んでいる部分をコメントアウトすればよい。
+
+# 0.3. Visualization
+以下を実行する。
+
+```
+python "${WORK_DIR}"/vis.py \
+ --logtostderr \
+ --vis_split="val" \
+ --model_variant="xception_65" \
+ --atrous_rates=6 \
+ --atrous_rates=12 \
+ --atrous_rates=18 \
+ --output_stride=16 \
+ --decoder_output_stride=4 \
+ --vis_crop_size=513 \
+ --vis_crop_size=513 \
+ --checkpoint_dir="${TRAIN_LOGDIR}" \
+ --vis_logdir="${VIS_LOGDIR}" \
+ --dataset_dir="${PASCAL_DATASET}" \
+ --max_number_of_iterations=1
+```
+
+次に以下を実行して表示
+```
+tensorboard --logdir ${VIS_LOGDIR}
+```
+
+
+# 1.0. オリジナルデータによる学習
+# 1.1. 【事前知識】データ生成部
+まずはlocal_test.shを見てみる。こんな表記がある。
+
+```
+# Go to datasets folder and download PASCAL VOC 2012 segmentation dataset.
+DATASET_DIR="datasets"
+cd "${WORK_DIR}/${DATASET_DIR}"
+sh download_and_convert_voc2012.sh
+```
+
+`sh download_and_convert_voc2012.sh`こいつでデータを変換していることがわかる。
+次にこいつを見てみる。
+
+```
+BASE_URL="http://host.robots.ox.ac.uk/pascal/VOC/voc2012/"
+FILENAME="VOCtrainval_11-May-2012.tar"
+
+download_and_uncompress "${BASE_URL}" "${FILENAME}"
+
+cd "${CURRENT_DIR}"
+
+# Root path for PASCAL VOC 2012 dataset.
+PASCAL_ROOT="${WORK_DIR}/VOCdevkit/VOC2012"
+
+# Remove the colormap in the ground truth annotations.
+SEG_FOLDER="${PASCAL_ROOT}/SegmentationClass"
+SEMANTIC_SEG_FOLDER="${PASCAL_ROOT}/SegmentationClassRaw"
+
+echo "Removing the color map in ground truth annotations..."
+python ./remove_gt_colormap.py \
+ --original_gt_folder="${SEG_FOLDER}" \
+ --output_dir="${SEMANTIC_SEG_FOLDER}"
+
+# Build TFRecords of the dataset.
+# First, create output directory for storing TFRecords.
+OUTPUT_DIR="${WORK_DIR}/tfrecord"
+mkdir -p "${OUTPUT_DIR}"
+
+IMAGE_FOLDER="${PASCAL_ROOT}/JPEGImages"
+LIST_FOLDER="${PASCAL_ROOT}/ImageSets/Segmentation"
+
+echo "Converting PASCAL VOC 2012 dataset..."
+python ./build_voc2012_data.py \
+ --image_folder="${IMAGE_FOLDER}" \
+ --semantic_segmentation_folder="${SEMANTIC_SEG_FOLDER}" \
+ --list_folder="${LIST_FOLDER}" \
+ --image_format="jpg" \
+ --output_dir="${OUTPUT_DIR}"
+```
+
+データをダウンロードした後に、`build_voc2012_data.py`でtf.recordの形式に変換していることがわかる。`build_voc2012_data.py`はこんな感じのソースコード。
+
+```
+def _convert_dataset(dataset_split):
+ """Converts the specified dataset split to TFRecord format.
+
+ Args:
+ dataset_split: The dataset split (e.g., train, test).
+
+ Raises:
+ RuntimeError: If loaded image and label have different shape.
+ """
+ dataset = os.path.basename(dataset_split)[:-4]
+ sys.stdout.write('Processing ' + dataset)
+ filenames = [x.strip('\n') for x in open(dataset_split, 'r')]
+ num_images = len(filenames)
+ num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))
+
+ image_reader = build_data.ImageReader('jpeg', channels=3)
+ label_reader = build_data.ImageReader('png', channels=1)
+
+ for shard_id in range(_NUM_SHARDS):
+ output_filename = os.path.join(
+ FLAGS.output_dir,
+ '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS))
+ with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
+ start_idx = shard_id * num_per_shard
+ end_idx = min((shard_id + 1) * num_per_shard, num_images)
+ for i in range(start_idx, end_idx):
+ sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
+ i + 1, len(filenames), shard_id))
+ sys.stdout.flush()
+ # Read the image.
+ image_filename = os.path.join(
+ FLAGS.image_folder, filenames[i] + '.' + FLAGS.image_format)
+ image_data = tf.gfile.FastGFile(image_filename, 'rb').read()
+ height, width = image_reader.read_image_dims(image_data)
+ # Read the semantic segmentation annotation.
+ seg_filename = os.path.join(
+ FLAGS.semantic_segmentation_folder,
+ filenames[i] + '.' + FLAGS.label_format)
+ seg_data = tf.gfile.FastGFile(seg_filename, 'rb').read()
+ seg_height, seg_width = label_reader.read_image_dims(seg_data)
+ if height != seg_height or width != seg_width:
+ raise RuntimeError('Shape mismatched between image and label.')
+ # Convert to tf example.
+ example = build_data.image_seg_to_tfexample(
+ image_data, filenames[i], height, width, seg_data)
+ tfrecord_writer.write(example.SerializeToString())
+ sys.stdout.write('\n')
+ sys.stdout.flush()
+
+
+def main(unused_argv):
+ dataset_splits = tf.gfile.Glob(os.path.join(FLAGS.list_folder, '*.txt'))
+ for dataset_split in dataset_splits:
+ _convert_dataset(dataset_split)
+
+
+if __name__ == '__main__':
+ tf.app.run()
+```
+
+まずは、`FLAGS.list_folder`から学習データを見ているみたい。`datasets/pascal_voc_seg/VOCdevkit/VOC2012/ImageSets/Segmentation$`を見てみると、以下のファイルがある。
+
+```
+train.txt
+trainval.txt
+val.txt
+```
+
+`train.txt`にはこんな感じの内容が書いてある。
+
+```
+2007_000032
+2007_000039
+2007_000063
+2007_000068
+2007_000121
+2007_000170
+2007_000241
+2007_000243
+2007_000250
+2007_000256
+2007_000333
+2007_000363
+2007_000364
+2007_000392
+2007_000480
+2007_000504
+2007_000515
+2007_000528
+2007_000549
+2007_000584
+```
+
+ラベルデータの生成は、`SegmentationClass`フォルダの画像の色を全部消して、エッジ検出のみをした画像を生成する。それが`SegmentationClassRaw`。これがラベルデータとなる。
+
+実際`tf.record`に変換しているプログラムを動作させるにはこんな感じ。
+
+```
+python ./build_voc2012_data.py \
+ --image_folder="./pascal_voc_seg/VOCdevkit/VOC2012/JPEGImages" \
+ --semantic_segmentation_folder="./pascal_voc_seg/VOCdevkit/VOC2012/SegmentationClassRaw" \
+ --list_folder="./pascal_voc_seg/VOCdevkit/VOC2012/ImageSets/Segmentation" \
+ --image_format="jpg" \
+ --output_dir="./pascal_voc_seg/tfrecord"
+
+```
+
+
+次に`train.py`の中身を見ていると、こんな表記が。
+
+```
+ # Get dataset-dependent information.
+ dataset = segmentation_dataset.get_dataset(
+ FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir)
+```
+
+
+`segmentation_dataset.py`を見てみると、`tf.record`をでコードしているみたい。
+
+このコンフィグファイルを使っていることがわかる。
+
+```
+_PASCAL_VOC_SEG_INFORMATION = DatasetDescriptor(
+ splits_to_sizes={
+ 'train': 1464,
+ 'trainval': 2913,
+ 'val': 1449,
+ },
+ num_classes=21,
+ ignore_label=255,
+)
+```
+データ生成部を見るに、`num_classes`が識別する物体の種類
+`ignore_label`が物体を識別する線。これはクラスではなく境界なのでのぞく。
+255は白色という意味。Labelデータは1channelで読み込んでいるので、グレースケール値であることがわかる。
+
+次に`train.py`の中身を見ていると、こんな表記が。
+
+```
+ samples = input_generator.get(
+ dataset,
+ FLAGS.train_crop_size,
+ clone_batch_size,
+ min_resize_value=FLAGS.min_resize_value,
+ max_resize_value=FLAGS.max_resize_value,
+ resize_factor=FLAGS.resize_factor,
+ min_scale_factor=FLAGS.min_scale_factor,
+ max_scale_factor=FLAGS.max_scale_factor,
+ scale_factor_step_size=FLAGS.scale_factor_step_size,
+ dataset_split=FLAGS.train_split,
+ is_training=True,
+ model_variant=FLAGS.model_variant)
+ inputs_queue = prefetch_queue.prefetch_queue(
+ samples, capacity=128 * config.num_clones)
+```
+
+`input_generator.py`を見てみるとこんな表記が。
+ここで最終的なデータを作成しているっぽい
+
+```
+ original_image, image, label = input_preprocess.preprocess_image_and_label(
+ image,
+ label,
+ crop_height=crop_size[0],
+ crop_width=crop_size[1],
+ min_resize_value=min_resize_value,
+ max_resize_value=max_resize_value,
+ resize_factor=resize_factor,
+ min_scale_factor=min_scale_factor,
+ max_scale_factor=max_scale_factor,
+ scale_factor_step_size=scale_factor_step_size,
+ ignore_label=dataset.ignore_label,
+ is_training=is_training,
+ model_variant=model_variant)
+ sample = {
+ common.IMAGE: image,
+ common.IMAGE_NAME: image_name,
+ common.HEIGHT: height,
+ common.WIDTH: width
+ }
+```
+
+ここまでわかれば、オリジナルデータを用いて学習ができる。
+
+
+# 1.2. オリジナルデータの作成
+
+```genData.py
+from PIL import Image, ImageDraw
+import random
+
+im = Image.new('RGB', (512, 256), (128, 128, 128))
+draw = ImageDraw.Draw(im)
+
+draw.rectangle((200, 100, 300, 200), fill=(0, 192, 192), outline=(255, 255, 255))
+draw.line((350, 200, 450, 100), fill=(255, 255, 0), width=10)
+
+im.save('./illow_imagedraw.png', quality=100)
+
+
+gen_num = 800
+img_dir_gen = "./img/"
+lbl_dir_gen = "./lbl/"
+
+img_x_size =512
+img_y_size = 256
+
+rect_x_size = 50
+rect_y_size = 50
+
+def get_rand_color():
+ return (random.randrange(255), random.randrange(255), random.randrange(255))
+
+def get_rand_color2():
+ x = random.randrange(255)
+ return (x, x, x)
+
+for i in range (gen_num):
+ im = Image.new('RGB', (img_x_size, img_y_size), get_rand_color())
+ draw = ImageDraw.Draw(im)
+
+ # Image
+ px = random.randrange(img_x_size - rect_x_size)
+ py = random.randrange(img_y_size - rect_y_size)
+ draw.rectangle((px, py, px + rect_x_size, py + rect_y_size), fill=get_rand_color(), outline=(255, 255, 255))
+
+ px2 = random.randrange(img_x_size - rect_x_size)
+ py2 = random.randrange(img_y_size - rect_y_size)
+ draw.ellipse((px2, py2, px2 + rect_x_size, py2 + rect_y_size), fill=get_rand_color(), outline=(255, 255, 255))
+
+ im.save(img_dir_gen + str(i) + ".png", quality = 100)
+
+ # Label
+ im = Image.new('RGB', (img_x_size, img_y_size), (0, 0, 0))
+ draw = ImageDraw.Draw(im)
+ draw.rectangle((px, py, px + rect_x_size, py + rect_y_size), fill=(1, 1, 1), outline=(255, 255, 255))
+ draw.ellipse((px2, py2, px2 + rect_x_size, py2 + rect_y_size), fill=(2, 2, 2), outline=(255, 255, 255))
+
+ im.save(lbl_dir_gen + str(i) + ".png", quality = 100)
+
+```
+
+まずはこんな感じで、以下のようなデータを作る。
+
+![image.png](https://qiita-image-store.s3.amazonaws.com/0/95636/dc6d58dd-4d0e-ec50-3330-bc252145935f.png)
+
+矩形が1で、丸が2となっている。背景が0である。
+このため、クラスは3つ。255は白い線で除外対象。
+
+次にファイル名を書いたテキストファイルを作成し、
+
+```train.txt
+0
+1
+2
+3
+4
+...
+```
+
+以下のようなフォルダ構成で配置する。
+
+```
+*data
+ - img
+ - lbl
+ - lst
+```
+
+# 1.3. TFレコードの作成
+` build_voc2012_data.py`を以下のように変更する
+
+``` build_voc2012_data.py
+tf.app.flags.DEFINE_string(
+ 'semantic_segmentation_folder',
+ './pascal_voc_seg/VOCdevkit/VOC2012/SegmentationClassRaw',
+ 'Folder containing semantic segmentation annotations.')
+
+tf.app.flags.DEFINE_string(
+ 'list_folder',
+ './pascal_voc_seg/VOCdevkit/VOC2012/ImageSets/Segmentation',
+ 'Folder containing lists for training and validation')
+
+tf.app.flags.DEFINE_string(
+ 'output_dir',
+ './pascal_voc_seg/tfrecord',
+ 'Path to save converted SSTable of TensorFlow examples.')
+
+_NUM_SHARDS = 4
+
+FLAGS.image_folder = "/workspace/01_semantic_segmentation/datagen/data/img"
+FLAGS.semantic_segmentation_folder = "/workspace/01_semantic_segmentation/datagen/data/lbl"
+FLAGS.list_folder = "/workspace/01_semantic_segmentation/datagen/data/lst"
+FLAGS.image_format = "png"
+
+def _convert_dataset(dataset_split):
+ """Converts the specified dataset split to TFRecord format.
+
+ Args:
+ dataset_split: The dataset split (e.g., train, test).
+
+ Raises:
+ RuntimeError: If loaded image and label have different shape.
+ """
+```
+
+FLGASに作成したデータのディレクトリを入れるだけ。
+あとは実行。そうするとTFレコードができあがる。
+
+# 1.4. データセットの追加
+`segmentation_dataset.py`に以下を追加する
+
+```segmentation_dataset.py
+_ORIGINAL_INFORMATION = DatasetDescriptor(
+ splits_to_sizes={
+ 'train': 300,
+ 'trainval': 300,
+ 'val': 300,
+ },
+ num_classes=3,
+ ignore_label=255,
+)
+
+...
+
+_DATASETS_INFORMATION = {
+ 'cityscapes': _CITYSCAPES_INFORMATION,
+ 'pascal_voc_seg': _PASCAL_VOC_SEG_INFORMATION,
+ 'ade20k': _ADE20K_INFORMATION,
+ 'original': _ORIGINAL_INFORMATION
+}
+```
+
+# 1.5. 学習
+あとは、`dataset`フラグに`original`を入れて、学習させるだけ!