LoginSignup
45
56

More than 5 years have passed since last update.

ディープラーニングによる超解像(Deeply-Recursive Convolutional Network)のtensorflow実装

Last updated at Posted at 2017-01-22

画像の解像度をあげる超解像で、昨年のCVPR2016で採択されていた論文を実装してみました。

元論文:"Deeply-Recursive Convolutional Network for Image Super-Resolution", CVPR2016

これは同じCNNを何度もかけて少しづつ画像を綺麗にしていって、最終的に全ての結果をうまいこと混ぜ合わせて解像度の高い絵を得るものです。state of the artの結果を出しているとのこと。

モデル図は下記のようになります。ちょっとブロックが多いように見えますが、斜めに走っている一群のブロックは重みを共有しているので実際のパラメータの数はそんなに多くないです。

drcn result.001.png
drcn result.002.png

ただ、残念ながら他のアルゴリズムよりは概ね高いものの論文と同じ水準のPSNRは達成できませんでした。うまく収束できなかったために初期値は少しいじってありますが、それ以外はほとんど論文と同じはずなのですが。

コードはgithubにおいてあります。

DataSet Bicubic SRCN SelfEx My Result DRCN
Set5 x2 33.66 36.66 36.49 36.92 37.63
Set14 x2 30.24 32.42 32.22 32.47 33.04
BSD100 x2 29.56 31.36 31.18 31.47 31.85
Urban100 x2 26.88 29.50 29.54 29.31 30.75

一応モデルの記述部分のコードのみ載せておきます。

  def build_embedding_graph(self):

    self.x = tf.placeholder(tf.float32, shape=[None, None, None, self.channels], name="X")
    self.y = tf.placeholder(tf.float32, shape=[None, None, None, self.channels], name="Y")

    # H-1 conv
    self.Wm1_conv = util.weight([self.cnn_size, self.cnn_size, self.channels, self.feature_num],
                                               stddev=self.weight_dev, name="W-1_conv", initializer=self.initializer)
    self.Bm1_conv = util.bias([self.feature_num], name="B-1")
    Hm1_conv = util.conv2d_with_bias_and_relu(self.x, self.Wm1_conv, self.cnn_stride, self.Bm1_conv, name="H-1")

    # H0 conv
    self.W0_conv = util.weight([self.cnn_size, self.cnn_size, self.feature_num, self.feature_num],
                                             stddev=self.weight_dev, name="W0_conv", initializer=self.initializer)
    self.B0_conv = util.bias([self.feature_num], name="B0")
    self.H_conv[0] = util.conv2d_with_bias_and_relu(Hm1_conv, self.W0_conv, self.cnn_stride, self.B0_conv, name="H0")

    if self.summary:
      # convert to tf.summary.image format [batch_num, height, width, channels]
      Wm1_transposed = tf.transpose(self.Wm1_conv, [3, 0, 1, 2])
      tf.summary.image("W-1" + self.model_name, Wm1_transposed, max_outputs=self.log_weight_image_num)
      util.add_summaries("B-1:" + self.model_name, self.Bm1_conv, mean=True, max=True, min=True)
      util.add_summaries("W-1:" + self.model_name, self.Wm1_conv, mean=True, max=True, min=True)

      util.add_summaries("B0:" + self.model_name, self.B0_conv, mean=True, max=True, min=True)
      util.add_summaries("W0:" + self.model_name, self.W0_conv, mean=True, max=True, min=True)

  def build_inference_graph(self):

    if self.inference_depth <= 0:
      return

    self.W_conv = util.diagonal_weight([self.cnn_size, self.cnn_size, self.feature_num, self.feature_num], name="W_conv")
    self.B_conv = util.bias([self.feature_num], name="B")

    for i in range(0, self.inference_depth):
      self.H_conv[i+1] = util.conv2d_with_bias_and_relu(self.H_conv[i], self.W_conv, 1, self.B_conv, name="H%d"%(i+1))

    if self.summary:
      util.add_summaries("W:" + self.model_name, self.W_conv, mean=True, max=True, min=True)
      util.add_summaries("B:" + self.model_name, self.B_conv, mean=True, max=True, min=True)

  def build_reconstruction_graph(self):

    # HD+1 conv
    self.WD1_conv = util.weight([self.cnn_size, self.cnn_size, self.feature_num, self.feature_num],
                                             stddev=self.weight_dev, name="WD1_conv", initializer=self.initializer)
    self.BD1_conv = util.bias([self.feature_num], name="BD1")

    # HD+2 conv
    self.WD2_conv = util.weight([self.cnn_size, self.cnn_size, self.feature_num, self.channels],
                                              stddev=self.weight_dev, name="WD2_conv", initializer=self.initializer)
    self.BD2_conv = util.bias([1], name="BD2")

    self.Y1_conv = (self.inference_depth + 1) * [None]
    self.Y2_conv = (self.inference_depth + 1) * [None]
    self.W = tf.Variable( np.full(fill_value=1.0 / (self.inference_depth + 1), shape=[self.inference_depth + 1], dtype=np.float32), name="layer_weight")
    W_sum = tf.reduce_sum(self.W)

    for i in range(0, self.inference_depth+1):
      self.Y1_conv[i] = util.conv2d_with_bias_and_relu(self.H_conv[i], self.WD1_conv, self.cnn_stride, self.BD1_conv, name="Y%d_1"%i)
      self.Y2_conv[i] = util.conv2d_with_bias_and_relu(self.Y1_conv[i], self.WD2_conv, self.cnn_stride, self.BD2_conv, name="Y%d_2"%i)
      y_ = tf.mul(self.W[i], self.Y2_conv[i], name="Y%d_mul" % i)
      y_ = tf.div(y_, W_sum, name="Y%d_div" % i)
      if i == 0:
        self.y_ = y_
      else:
        self.y_ = self.y_ + y_

    if self.summary:
      util.add_summaries("BD1:" + self.model_name, self.BD1_conv)
      util.add_summaries("WD1:" + self.model_name, self.WD1_conv, mean=True, max=True, min=True)
      util.add_summaries("WD2:" + self.model_name, self.WD2_conv, mean=True, max=True, min=True)


  def build_optimizer(self):

    self.lr_input = tf.placeholder(tf.float32, shape=[], name="LearningRate")
    self.loss_alpha_input = tf.placeholder(tf.float32, shape=[], name="Alpha")

    mse = tf.reduce_mean(tf.square(self.y_ - self.y), name="Loss1")
    if self.debug:
      mse = tf.Print(mse, [mse], message="MSE: ")

    if self.loss_alpha == 0.0 or self.inference_depth == 0:
      loss = mse
    else:
      loss1_mse = self.inference_depth * [None]

      for i in range(0, self.inference_depth):
        inference_sub = tf.sub(self.y, self.Y2_conv[i], name="Loss1_%d_sub" % i)
        inference_square = tf.square(inference_sub, name="Loss1_%d_squ" % i)
        loss1_mse[i] = tf.reduce_mean(inference_square, name="Loss1_%d" % i)

      loss1 = loss1_mse[0]
      for i in range(1, self.inference_depth):
        if i == self.inference_depth:
          loss1 = tf.add(loss1, loss1_mse[i], name="Loss1")
        else:
          loss1 = tf.add(loss1, loss1_mse[i], name="Loss1_%d_add" % i)

      loss1 = tf.mul(1.0 / self.inference_depth, loss1, name="Loss1_weight")
      loss2 = mse
      if self.visualize:
        tf.summary.scalar("L1:" + self.model_name, loss1)
        tf.summary.scalar("L2:" + self.model_name, loss2)
      loss1 = tf.mul(self.loss_alpha_input, loss1, name="Loss1_alpha")
      loss2 = tf.mul(1 - self.loss_alpha_input, loss2, name="Loss2_alpha")

      if self.loss_beta > 0.0:
        with tf.name_scope('Loss3') as scope:
          loss3 = tf.nn.l2_loss(self.Wm1_conv) + tf.nn.l2_loss(self.W0_conv) \
                  + tf.nn.l2_loss(self.W_conv) + tf.nn.l2_loss(self.WD1_conv) \
                  + tf.nn.l2_loss(self.WD2_conv)
          loss3 *= self.loss_beta

        if self.visualize:
          tf.summary.scalar("L3:" + self.model_name, loss3)
        loss = loss1 + loss2 + loss3
      else:
        loss = loss1 + loss2

    if self.visualize:
      tf.summary.scalar("Loss:" + self.model_name, loss)

    self.loss = loss
    self.mse = mse
    self.train_step = self.add_optimizer_op(loss, self.lr_input)
45
56
11

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
45
56