Edited at

ディープラーニングによる超解像(Deeply-Recursive Convolutional Network)のtensorflow実装

More than 1 year has passed since last update.

画像の解像度をあげる超解像で、昨年のCVPR2016で採択されていた論文を実装してみました。

元論文:"Deeply-Recursive Convolutional Network for Image Super-Resolution", CVPR2016

これは同じCNNを何度もかけて少しづつ画像を綺麗にしていって、最終的に全ての結果をうまいこと混ぜ合わせて解像度の高い絵を得るものです。state of the artの結果を出しているとのこと。

モデル図は下記のようになります。ちょっとブロックが多いように見えますが、斜めに走っている一群のブロックは重みを共有しているので実際のパラメータの数はそんなに多くないです。

drcn result.001.png

drcn result.002.png

ただ、残念ながら他のアルゴリズムよりは概ね高いものの論文と同じ水準のPSNRは達成できませんでした。うまく収束できなかったために初期値は少しいじってありますが、それ以外はほとんど論文と同じはずなのですが。

コードはgithubにおいてあります。

DataSet
Bicubic
SRCN
SelfEx
My Result
DRCN

Set5 x2
33.66
36.66
36.49
36.92
37.63

Set14 x2
30.24
32.42
32.22
32.47
33.04

BSD100 x2
29.56
31.36
31.18
31.47
31.85

Urban100 x2
26.88
29.50
29.54
29.31
30.75

一応モデルの記述部分のコードのみ載せておきます。

  def build_embedding_graph(self):

self.x = tf.placeholder(tf.float32, shape=[None, None, None, self.channels], name="X")
self.y = tf.placeholder(tf.float32, shape=[None, None, None, self.channels], name="Y")

# H-1 conv
self.Wm1_conv = util.weight([self.cnn_size, self.cnn_size, self.channels, self.feature_num],
stddev=self.weight_dev, name="W-1_conv", initializer=self.initializer)
self.Bm1_conv = util.bias([self.feature_num], name="B-1")
Hm1_conv = util.conv2d_with_bias_and_relu(self.x, self.Wm1_conv, self.cnn_stride, self.Bm1_conv, name="H-1")

# H0 conv
self.W0_conv = util.weight([self.cnn_size, self.cnn_size, self.feature_num, self.feature_num],
stddev=self.weight_dev, name="W0_conv", initializer=self.initializer)
self.B0_conv = util.bias([self.feature_num], name="B0")
self.H_conv[0] = util.conv2d_with_bias_and_relu(Hm1_conv, self.W0_conv, self.cnn_stride, self.B0_conv, name="H0")

if self.summary:
# convert to tf.summary.image format [batch_num, height, width, channels]
Wm1_transposed = tf.transpose(self.Wm1_conv, [3, 0, 1, 2])
tf.summary.image("W-1" + self.model_name, Wm1_transposed, max_outputs=self.log_weight_image_num)
util.add_summaries("B-1:" + self.model_name, self.Bm1_conv, mean=True, max=True, min=True)
util.add_summaries("W-1:" + self.model_name, self.Wm1_conv, mean=True, max=True, min=True)

util.add_summaries("B0:" + self.model_name, self.B0_conv, mean=True, max=True, min=True)
util.add_summaries("W0:" + self.model_name, self.W0_conv, mean=True, max=True, min=True)

def build_inference_graph(self):

if self.inference_depth <= 0:
return

self.W_conv = util.diagonal_weight([self.cnn_size, self.cnn_size, self.feature_num, self.feature_num], name="W_conv")
self.B_conv = util.bias([self.feature_num], name="B")

for i in range(0, self.inference_depth):
self.H_conv[i+1] = util.conv2d_with_bias_and_relu(self.H_conv[i], self.W_conv, 1, self.B_conv, name="H%d"%(i+1))

if self.summary:
util.add_summaries("W:" + self.model_name, self.W_conv, mean=True, max=True, min=True)
util.add_summaries("B:" + self.model_name, self.B_conv, mean=True, max=True, min=True)

def build_reconstruction_graph(self):

# HD+1 conv
self.WD1_conv = util.weight([self.cnn_size, self.cnn_size, self.feature_num, self.feature_num],
stddev=self.weight_dev, name="WD1_conv", initializer=self.initializer)
self.BD1_conv = util.bias([self.feature_num], name="BD1")

# HD+2 conv
self.WD2_conv = util.weight([self.cnn_size, self.cnn_size, self.feature_num, self.channels],
stddev=self.weight_dev, name="WD2_conv", initializer=self.initializer)
self.BD2_conv = util.bias([1], name="BD2")

self.Y1_conv = (self.inference_depth + 1) * [None]
self.Y2_conv = (self.inference_depth + 1) * [None]
self.W = tf.Variable( np.full(fill_value=1.0 / (self.inference_depth + 1), shape=[self.inference_depth + 1], dtype=np.float32), name="layer_weight")
W_sum = tf.reduce_sum(self.W)

for i in range(0, self.inference_depth+1):
self.Y1_conv[i] = util.conv2d_with_bias_and_relu(self.H_conv[i], self.WD1_conv, self.cnn_stride, self.BD1_conv, name="Y%d_1"%i)
self.Y2_conv[i] = util.conv2d_with_bias_and_relu(self.Y1_conv[i], self.WD2_conv, self.cnn_stride, self.BD2_conv, name="Y%d_2"%i)
y_ = tf.mul(self.W[i], self.Y2_conv[i], name="Y%d_mul" % i)
y_ = tf.div(y_, W_sum, name="Y%d_div" % i)
if i == 0:
self.y_ = y_
else:
self.y_ = self.y_ + y_

if self.summary:
util.add_summaries("BD1:" + self.model_name, self.BD1_conv)
util.add_summaries("WD1:" + self.model_name, self.WD1_conv, mean=True, max=True, min=True)
util.add_summaries("WD2:" + self.model_name, self.WD2_conv, mean=True, max=True, min=True)

def build_optimizer(self):

self.lr_input = tf.placeholder(tf.float32, shape=[], name="LearningRate")
self.loss_alpha_input = tf.placeholder(tf.float32, shape=[], name="Alpha")

mse = tf.reduce_mean(tf.square(self.y_ - self.y), name="Loss1")
if self.debug:
mse = tf.Print(mse, [mse], message="MSE: ")

if self.loss_alpha == 0.0 or self.inference_depth == 0:
loss = mse
else:
loss1_mse = self.inference_depth * [None]

for i in range(0, self.inference_depth):
inference_sub = tf.sub(self.y, self.Y2_conv[i], name="Loss1_%d_sub" % i)
inference_square = tf.square(inference_sub, name="Loss1_%d_squ" % i)
loss1_mse[i] = tf.reduce_mean(inference_square, name="Loss1_%d" % i)

loss1 = loss1_mse[0]
for i in range(1, self.inference_depth):
if i == self.inference_depth:
loss1 = tf.add(loss1, loss1_mse[i], name="Loss1")
else:
loss1 = tf.add(loss1, loss1_mse[i], name="Loss1_%d_add" % i)

loss1 = tf.mul(1.0 / self.inference_depth, loss1, name="Loss1_weight")
loss2 = mse
if self.visualize:
tf.summary.scalar("L1:" + self.model_name, loss1)
tf.summary.scalar("L2:" + self.model_name, loss2)
loss1 = tf.mul(self.loss_alpha_input, loss1, name="Loss1_alpha")
loss2 = tf.mul(1 - self.loss_alpha_input, loss2, name="Loss2_alpha")

if self.loss_beta > 0.0:
with tf.name_scope('Loss3') as scope:
loss3 = tf.nn.l2_loss(self.Wm1_conv) + tf.nn.l2_loss(self.W0_conv) \
+ tf.nn.l2_loss(self.W_conv) + tf.nn.l2_loss(self.WD1_conv) \
+ tf.nn.l2_loss(self.WD2_conv)
loss3 *= self.loss_beta

if self.visualize:
tf.summary.scalar("L3:" + self.model_name, loss3)
loss = loss1 + loss2 + loss3
else:
loss = loss1 + loss2

if self.visualize:
tf.summary.scalar("Loss:" + self.model_name, loss)

self.loss = loss
self.mse = mse
self.train_step = self.add_optimizer_op(loss, self.lr_input)