LoginSignup
5
3

More than 3 years have passed since last update.

GoogleのDepth from Video in the Wild、動かしたかったメモ

Last updated at Posted at 2019-09-06

更新

  • 2019/09/09
    • KITTI Datasetの画像を入力するとそれらしい結果画像がでた!

概要

  1. CUDA10.0のDocker imageで環境構築
  2. tensorflowのインストール
  3. Depth from Video in the Wildで結果出力させた

結論

  • tensorflowとdeep全然わからん
  • KITTI Datasetの画像を入力にするとそれらしい結果画像がでた?

てst

環境構築

  • 入力ディレクトリ
    • /mnt/HDD/workspace/google-research/depth_from_video_in_the_wild/input
  • 出力ディレクトリ
    • /mnt/HDD/workspace/google-research/depth_from_video_in_the_wild/output
docker pull nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04
docker run --runtime=nvidia -it --rm -e DISPLAY=$DISPLAY -v /tmp/.X11-unix/:/tmp/.X11-unix -e QT_X11_NO_MITSHM=1 -v /mnt/HDD/workspace/:/work -w=/work nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04
cd /mnt/HDD/workspace/ && git clone https://github.com/google-research/google-research.git
mkdir google-research/depth_from_video_in_the_wild/output
mkdir google-research/depth_from_video_in_the_wild/input

関連インストール

apt update
apt-get install -yq --no-install-recommends --no-upgrade software-properties-common curl  python3-dev python3-tk  locales  libatlas-base-dev libprotobuf-dev libleveldb-dev  libsnappy-dev libhdf5-serial-dev protobuf-compiler libboost-all-dev libgflags-dev libgoogle-glog-dev liblmdb-dev opencl-headers ocl-icd-opencl-dev libviennacl-dev libopenexr-dev libsm6 libxext6 libxrender-dev
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
python3 get-pip.py --force-reinstall
pip install --no-cache-dir Cython numpy protobuf openexr tensorflow-gpu tensorflow-graphics-gpu matplotlib opencv-python
cd /work/google-research/
mkdir depth_from_video_in_the_wild/tmp

サンプル

test.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import io
import math
import os
import random
import time
from absl import app
from absl import flags
from absl import logging
import numpy as np
import tensorflow as tf
import cv2
import fnmatch
import matplotlib.pyplot as plt

from depth_from_video_in_the_wild import model

gfile = tf.gfile
MAX_TO_KEEP = 1000000  # Maximum number of checkpoints to keep.

flags.DEFINE_string('data_dir', None, 'Preprocessed data.')

flags.DEFINE_string('file_extension', 'png', 'Image data file extension.')

flags.DEFINE_float('learning_rate', 1e-4, 'Adam learning rate.')

flags.DEFINE_float('reconstr_weight', 0.85, 'Frame reconstruction loss weight.')

flags.DEFINE_float('ssim_weight', 3.0, 'SSIM loss weight.')

flags.DEFINE_float('smooth_weight', 1e-2, 'Smoothness loss weight.')

flags.DEFINE_float('depth_consistency_loss_weight', 0.01,
                   'Depth consistency loss weight')

flags.DEFINE_integer('batch_size', 1, 'The size of a sample batch')

flags.DEFINE_integer('img_height', 128, 'Input frame height.')

flags.DEFINE_integer('img_width', 416, 'Input frame width.')

flags.DEFINE_integer('queue_size', 2000,
                     'Items in queue. Use smaller number for local debugging.')

flags.DEFINE_integer('seed', 8964, 'Seed for random number generators.')

flags.DEFINE_float('weight_reg', 1e-2, 'The amount of weight regularization to '
                   'apply. This has no effect on the ResNet-based encoder '
                   'architecture.')

flags.DEFINE_string('checkpoint_dir', None, 'Directory to save model '
                    'checkpoints.')

flags.DEFINE_integer('train_steps', int(1e6), 'Number of training steps.')

flags.DEFINE_integer('summary_freq', 100, 'Save summaries every N steps.')

flags.DEFINE_bool('debug', False, 'If true, one training step is performed and '
                  'the results are dumped to a folder for debugging.')

flags.DEFINE_string('input_file', 'train', 'Input file name')

flags.DEFINE_float('rotation_consistency_weight', 1e-3, 'Weight of rotation '
                   'cycle consistency loss.')

flags.DEFINE_float('translation_consistency_weight', 1e-2, 'Weight of '
                   'thanslation consistency loss.')

flags.DEFINE_integer('foreground_dilation', 8, 'Dilation of the foreground '
                     'mask (in pixels).')

flags.DEFINE_boolean('learn_intrinsics', True, 'Whether to learn camera '
                     'intrinsics.')

flags.DEFINE_boolean('boxify', True, 'Whether to convert segmentation masks to '
                     'bounding boxes.')

flags.DEFINE_string('imagenet_ckpt', None, 'Path to an imagenet checkpoint to '
                    'intialize from.')


FLAGS = flags.FLAGS
flags.mark_flag_as_required('data_dir')
flags.mark_flag_as_required('checkpoint_dir')


def load(filename):
  with gfile.Open(filename) as f:
    return np.load(io.BytesIO(f.read()))


def _print_losses(dir1):
  for f in gfile.ListDirectory(dir1):
    if 'loss' in f:
      print ('----------', f, end=' ')
      f1 = os.path.join(dir1, f)
      t1 = load(f1).astype(float)
      print (t1)


def main(_):
  print("\ntest\n")
  inference_model = model.Model(
      boxify=FLAGS.boxify,
      data_dir=FLAGS.data_dir,
      file_extension=FLAGS.file_extension,
      is_training=False,
      foreground_dilation=FLAGS.foreground_dilation,
      learn_intrinsics=FLAGS.learn_intrinsics,
      learning_rate=FLAGS.learning_rate,
      reconstr_weight=FLAGS.reconstr_weight,
      smooth_weight=FLAGS.smooth_weight,
      ssim_weight=FLAGS.ssim_weight,
      translation_consistency_weight=FLAGS.translation_consistency_weight,
      rotation_consistency_weight=FLAGS.rotation_consistency_weight,
      batch_size=FLAGS.batch_size,
      img_height=FLAGS.img_height,
      img_width=FLAGS.img_width,
      weight_reg=FLAGS.weight_reg,
      depth_consistency_loss_weight=FLAGS.depth_consistency_loss_weight,
      queue_size=FLAGS.queue_size,
      input_file=FLAGS.input_file)
  print("\ntest1\n")

  _test(inference_model, FLAGS.checkpoint_dir, FLAGS.train_steps,
         FLAGS.summary_freq)

#  if FLAGS.debug:
#    _print_losses(os.path.join(FLAGS.checkpoint_dir, 'debug'))


def _test(inference_model, checkpoint_dir, train_steps, summary_freq):
  """Runs a trainig loop."""
  saver = tf.train.Saver()

  sv = tf.train.Supervisor(logdir=checkpoint_dir, save_summaries_secs=0,
                           saver=None)
  config = tf.ConfigProto()
  # config.gpu_options.allow_growth = True
  config.gpu_options.per_process_gpu_memory_fraction=0.8
  with sv.managed_session(config=config) as sess:
    logging.info('Attempting to resume training from %s...', checkpoint_dir)
    checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    logging.info('Last checkpoint found: %s', checkpoint)
    if checkpoint:
      print("checkpoint?")
      saver.restore(sess, checkpoint)
    elif FLAGS.imagenet_ckpt:
      logging.info('Restoring pretrained weights from %s', FLAGS.imagenet_ckpt)
      print("imagenet_ckpt?")
      saver.restore(sess, FLAGS.imagenet_ckpt)

    print("\ntest2\n")

    im_files, basepath_in = collect_input_images("/work/google-research/depth_from_video_in_the_wild/input",
                                                 None, "png")
    print(im_files)
    print("\ntest3\n")
    output_dirs = create_output_dirs(im_files, basepath_in, "/work/google-research/depth_from_video_in_the_wild/output")
    im_batch = []
    for i in range(len(im_files)):
      if True:
        logging.info('%s of %s files processed.', i, len(im_files))

        # Read image and run inference.
        print(im_files[i])
        im = load_image(im_files[i], resize=(416, 128))
        im_batch.append(im)
        print("\ntest4\n")
        est_depth = inference_model.inference_depth(im_batch, sess)
        print("\ntest5\n")
        for j in range(len(im_batch)):
          color_map = normalize_depth_for_display(
              np.squeeze(est_depth[j]))
          print("\ntest6\n")
          visualization = np.concatenate((im_batch[j], color_map), axis=0)
          # Save raw prediction and color visualization. Extract filename
          # without extension from full path: e.g. path/to/input_dir/folder1/
          # file1.png -> file1
          k = i - len(im_batch) + 1 + j
          filename_root = os.path.splitext(os.path.basename(im_files[k]))[0]
          pref = ''
          output_raw = os.path.join(
              output_dirs[k], filename_root + pref + '.npy')
          output_vis = os.path.join(
              output_dirs[k], filename_root + pref + '.png')
          with gfile.Open(output_raw, 'wb') as f:
            np.save(f, est_depth[j])
          save_image(output_vis, visualization, "png")
        im_batch = []


def load_image(img_file, resize=None, interpolation='linear'):
  """Load image from disk. Output value range: [0,1]."""
  im_data = np.fromstring(tf.io.gfile.GFile(img_file, 'rb').read(), np.uint8)
  im = cv2.imdecode(im_data, cv2.IMREAD_COLOR)
  im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  if resize and resize != im.shape[:2]:
    ip = cv2.INTER_LINEAR if interpolation == 'linear' else cv2.INTER_NEAREST
    im = cv2.resize(im, resize, interpolation=ip)
  return np.array(im, dtype=np.float32) / 255.0

def save_image(img_file, im, file_extension):
  """Save image from disk. Expected input value range: [0,1]."""
  im = (im * 255.0).astype(np.uint8)
  with gfile.Open(img_file, 'w') as f:
    im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
    _, im_data = cv2.imencode('.%s' % file_extension, im)
    f.write(im_data.tostring())

def collect_input_images(input_dir, input_list_file, file_extension):
  """Collects all input images that are to be processed."""
  if input_dir is not None:
    im_files = _recursive_glob(input_dir, '*.' + file_extension)
    basepath_in = os.path.normpath(input_dir)
  elif input_list_file is not None:
    im_files = read_text_lines(input_list_file)
    basepath_in = os.path.dirname(input_list_file)
    im_files = [os.path.join(basepath_in, f) for f in im_files]
  im_files = [f for f in im_files if 'disp' not in f and '-seg' not in f and
              '-fseg' not in f and '-flip' not in f]
  return sorted(im_files), basepath_in

def read_text_lines(filepath):
  with tf.gfile.Open(filepath, 'r') as f:
    lines = f.readlines()
  lines = [l.rstrip() for l in lines]
  return lines

def _recursive_glob(treeroot, pattern):
  results = []
  for base, _, files in os.walk(treeroot):
    files = fnmatch.filter(files, pattern)
    results.extend(os.path.join(base, f) for f in files)
  return results

def normalize_depth_for_display(depth, pc=95, crop_percent=0, normalizer=None,
                                cmap='plasma'):
  """Converts a depth map to an RGB image."""
  # Convert to disparity.

  disp = 1.0 / (depth + 1e-6)
  if normalizer is not None:
    disp /= normalizer
  else:
    disp /= (np.percentile(disp, pc) + 1e-6)
  disp = np.clip(disp, 0, 1)
  # disp = gray2rgb(disp, cmap=cmap)
  disp = gray2rgb(disp, cmap='gray')
  keep_h = int(disp.shape[0] * (1 - crop_percent))
  disp = disp[:keep_h]
  return disp

def gray2rgb(im, cmap='plasma'):
  cmap = plt.get_cmap(cmap)
  result_img = cmap(im.astype(np.float32))
  if result_img.shape[2] > 3:
    result_img = np.delete(result_img, 3, 2)
  return result_img

def create_output_dirs(im_files, basepath_in, output_dir):
  """Creates required directories, and returns output dir for each file."""
  output_dirs = []
  for i in range(len(im_files)):
    relative_folder_in = os.path.relpath(
        os.path.dirname(im_files[i]), basepath_in)
    absolute_folder_out = os.path.join(output_dir, relative_folder_in)
    if not gfile.IsDirectory(absolute_folder_out):
      gfile.MakeDirs(absolute_folder_out)
    output_dirs.append(absolute_folder_out)
  return output_dirs

if __name__ == '__main__':
  app.run(main)

実行

python3 -m depth_from_video_in_the_wild.test  --imagenet_ckpt=depth_from_video_in_the_wild/cityscapes_kitti_learned_intrinsics/model-1000977 --checkpoint_dir=depth_from_video_in_the_wild/tmp

結果

0000000037.png
0000000038.png
0000000039.png

まとめ

  • 画像1枚のdepth推定だと、精度は微妙...Monodepth2のほうが良さそう?
  • 動画できちんと動かす方法、だれか教えて...
5
3
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
5
3