LoginSignup
3
1

More than 5 years have passed since last update.

Normalization (Batch, Weight, Layer) の実装に関するメモ

Last updated at Posted at 2018-05-27

ポイント

  • Weight NormalizationをFC(全結合)Layer をベースに実装し、他のメソッドと比較。
  • Weight Initialization のある・なしがパフォーマンスに影響することを確認。
  • 今後、Convolution, LSTM Layer をベースに追加検証。

レファレンス

1. Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks

2. Layer Normalization

検証方法

  • MNIST 手書き数字データを使用。
  • 全結合層2層のうち、第1層の活性化関数への入力に対し Normalization を適用。
  • Weight Initialization の効果をチェック。

データ

MNIST 手書き数字

# import mnist data
from tensorflow.examples.tutorials.mnist \
              import input_data
mnist = input_data.read_data_sets('***/mnist', \
               one_hot = True)

検証結果

n_units = 128
learning_rate = 0.1
batch_size = 128

  1. Normalization なし
    image.png


  2. Weight Normalization ( Weight Initialization なし )
    image.png

  3. Weight Normalization ( Weight Initialization あり )
    image.png

  4. Batch Normalization
    image.png

  5. Layer Normalization
    image.png

サンプルコード

  def inference_wn_first(self, x, n_in, n_units, n_out):

    with tf.variable_scope('layer1'):
      v = self.weight_variable('v', [n_in, n_units])
      g = self.weight_variable('g', [n_units])
      b = self.bias_variable('b', [n_units])

      v_norm = tf.nn.l2_normalize(v, axis = 0)
      t = tf.matmul(x, v_norm)
      mean, var = tf.nn.moments(t, axes = [0])
      g = tf.assign(g, 1.0 / tf.sqrt(var + 1e-10))
      b = tf.assign(b, - mean / tf.sqrt(var + 1e-10))
      w = g * v_norm
      y = tf.add(tf.matmul(x, w), b)
      y = tf.nn.relu(y)

    with tf.variable_scope('layer2'):
      v = self.weight_variable('v', [n_units, n_out])
      g = self.weight_variable('g', [n_out])
      b = self.bias_variable('b', [n_out])

      v_norm = tf.nn.l2_normalize(v, axis = 0)
      t = tf.matmul(y, v_norm)
      mean, var = tf.nn.moments(t, axes = [0])
      g = tf.assign(g, 1.0 / tf.sqrt(var + 1e-10))
      b = tf.assign(b, - mean * tf.sqrt(var + 1e-10))
      w = g * v_norm      
      y = tf.add(tf.matmul(y, w), b)
      y = tf.nn.softmax(y, axis = 1)

    return y

  def inference_wn_after(self, x, n_in, n_units, n_out):

    with tf.variable_scope('layer1', reuse = True):
      v = tf.get_variable('v')
      g = tf.get_variable('g')
      b = tf.get_variable('b')

      v_norm = tf.nn.l2_normalize(v, axis = 0)
      w = g * v_norm
      y = tf.add(tf.matmul(x, w), b)
      y = tf.nn.relu(y)

    with tf.variable_scope('layer2', reuse = True):
      v = tf.get_variable('v')
      g = tf.get_variable('g')
      b = tf.get_variable('b')

      v_norm = tf.nn.l2_normalize(v, axis = 0)
      w = g * v_norm
      y = tf.add(tf.matmul(y, w), b)
      y = tf.nn.softmax(y, axis = 1)

    return y

  def fit_wn(self, images_train, labels_train, \
          images_test, labels_test, n_in, n_units, \
          n_out, learning_rate, n_iter, batch_size, \
          show_step, is_saving, model_path):

    tf.reset_default_graph()

    x = tf.placeholder(shape = [None, n_in], dtype = \
                       tf.float32)
    t = tf.placeholder(shape = [None, n_out], dtype = \
                       tf.float32)

    # WN
    y_first = self.inference_wn_first(x, n_in, n_units, \
                                      n_out)
    y_after = self.inference_wn_after(x, n_in, n_units, \
                                      n_out)

    loss_first = self.loss(y_first, t)
    loss_after = self.loss(y_after, t)

    train_step_first = self.training(loss_first, \
                                      learning_rate)
    train_step_after = self.training(loss_after, \
                                      learning_rate)

    acc_first =  self.accuracy(y_first, t)
    acc_after =  self.accuracy(y_after, t)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    with tf.Session() as sess:

      sess.run(init)

      history_loss_train = []
      history_acc_train = []
      history_loss_test = []
      history_acc_test = []

      for i in range(n_iter):
        # Train
        rand_index = np.random.choice(len(images_train), \
                                       size = batch_size)
        x_batch = images_train[rand_index]
        y_batch = labels_train[rand_index]

        feed_dict = {x: x_batch, t: y_batch}

        if i == 0:
          sess.run(train_step_first, feed_dict = feed_dict)

          temp_loss = sess.run(loss_first, feed_dict = \
                                feed_dict)
          temp_acc = sess.run(acc_first, feed_dict = \
                               feed_dict)
        else:
          sess.run(train_step_after, feed_dict = feed_dict)

          temp_loss = sess.run(loss_after, feed_dict = \
                                feed_dict)
          temp_acc = sess.run(acc_after, feed_dict = \
                               feed_dict)

        history_loss_train.append(temp_loss)
        history_acc_train.append(temp_acc)

        if (i + 1) % show_step == 0:
          print ('--------------------')
          print ('Iteration: ' + str(i + 1) + '  Loss: ' + \
                   str(temp_loss) + '  Accuracy: ' + \
                   str(temp_acc))

  def batch_norm(self, x, n_units):
    with tf.variable_scope('bn'):
      init_const1 = tf.constant_initializer(value = 0.0, \
                      dtype = tf.float32)
      init_const2 = tf.constant_initializer(value = 1.0, \
                      dtype = tf.float32)
      beta = tf.get_variable('beta', shape = [n_units], \
                            initializer =init_const1)
      gamma = tf.get_variable('gamma', shape = [n_units], \
                            initializer =init_const2)
      mean, var = tf.nn.moments(x, [0])
      x = gamma * (x - mean) / tf.sqrt(var + 1e-5) + beta

      return x

  def layer_norm(self, x, batch_size):
    with tf.variable_scope('bn'):
      init_const1 = tf.constant_initializer(value = 0.0, \
                        dtype = tf.float32)
      init_const2 = tf.constant_initializer(value = 1.0, \
                        dtype = tf.float32)
      beta = tf.get_variable('beta', shape = [batch_size], \
                            initializer =init_const1)
      gamma = tf.get_variable('gamma', shape = \
          [batch_size], initializer =init_const2) \
          mean, var = tf.nn.moments(x, [1])

      mean = tf.expand_dims(mean, axis = 1)
      var = tf.expand_dims(var, axis = 1)
      beta = tf.expand_dims(beta, axis = 1)
      gamma = tf.expand_dims(gamma, axis = 1)

      x = gamma * (x - mean) / tf.sqrt(var + 1e-5) + beta

      return x

3
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
3
1