ポイント
- Weight NormalizationをFC(全結合)Layer をベースに実装し、他のメソッドと比較。
- Weight Initialization のある・なしがパフォーマンスに影響することを確認。
- 今後、Convolution, LSTM Layer をベースに追加検証。
レファレンス
1. Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks
検証方法
- MNIST 手書き数字データを使用。
- 全結合層2層のうち、第1層の活性化関数への入力に対し Normalization を適用。
- Weight Initialization の効果をチェック。
データ
MNIST 手書き数字
# import mnist data
from tensorflow.examples.tutorials.mnist \
import input_data
mnist = input_data.read_data_sets('***/mnist', \
one_hot = True)
検証結果
n_units = 128
learning_rate = 0.1
batch_size = 128
サンプルコード
def inference_wn_first(self, x, n_in, n_units, n_out):
with tf.variable_scope('layer1'):
v = self.weight_variable('v', [n_in, n_units])
g = self.weight_variable('g', [n_units])
b = self.bias_variable('b', [n_units])
v_norm = tf.nn.l2_normalize(v, axis = 0)
t = tf.matmul(x, v_norm)
mean, var = tf.nn.moments(t, axes = [0])
g = tf.assign(g, 1.0 / tf.sqrt(var + 1e-10))
b = tf.assign(b, - mean / tf.sqrt(var + 1e-10))
w = g * v_norm
y = tf.add(tf.matmul(x, w), b)
y = tf.nn.relu(y)
with tf.variable_scope('layer2'):
v = self.weight_variable('v', [n_units, n_out])
g = self.weight_variable('g', [n_out])
b = self.bias_variable('b', [n_out])
v_norm = tf.nn.l2_normalize(v, axis = 0)
t = tf.matmul(y, v_norm)
mean, var = tf.nn.moments(t, axes = [0])
g = tf.assign(g, 1.0 / tf.sqrt(var + 1e-10))
b = tf.assign(b, - mean * tf.sqrt(var + 1e-10))
w = g * v_norm
y = tf.add(tf.matmul(y, w), b)
y = tf.nn.softmax(y, axis = 1)
return y
def inference_wn_after(self, x, n_in, n_units, n_out):
with tf.variable_scope('layer1', reuse = True):
v = tf.get_variable('v')
g = tf.get_variable('g')
b = tf.get_variable('b')
v_norm = tf.nn.l2_normalize(v, axis = 0)
w = g * v_norm
y = tf.add(tf.matmul(x, w), b)
y = tf.nn.relu(y)
with tf.variable_scope('layer2', reuse = True):
v = tf.get_variable('v')
g = tf.get_variable('g')
b = tf.get_variable('b')
v_norm = tf.nn.l2_normalize(v, axis = 0)
w = g * v_norm
y = tf.add(tf.matmul(y, w), b)
y = tf.nn.softmax(y, axis = 1)
return y
def fit_wn(self, images_train, labels_train, \
images_test, labels_test, n_in, n_units, \
n_out, learning_rate, n_iter, batch_size, \
show_step, is_saving, model_path):
tf.reset_default_graph()
x = tf.placeholder(shape = [None, n_in], dtype = \
tf.float32)
t = tf.placeholder(shape = [None, n_out], dtype = \
tf.float32)
# WN
y_first = self.inference_wn_first(x, n_in, n_units, \
n_out)
y_after = self.inference_wn_after(x, n_in, n_units, \
n_out)
loss_first = self.loss(y_first, t)
loss_after = self.loss(y_after, t)
train_step_first = self.training(loss_first, \
learning_rate)
train_step_after = self.training(loss_after, \
learning_rate)
acc_first = self.accuracy(y_first, t)
acc_after = self.accuracy(y_after, t)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
history_loss_train = []
history_acc_train = []
history_loss_test = []
history_acc_test = []
for i in range(n_iter):
# Train
rand_index = np.random.choice(len(images_train), \
size = batch_size)
x_batch = images_train[rand_index]
y_batch = labels_train[rand_index]
feed_dict = {x: x_batch, t: y_batch}
if i == 0:
sess.run(train_step_first, feed_dict = feed_dict)
temp_loss = sess.run(loss_first, feed_dict = \
feed_dict)
temp_acc = sess.run(acc_first, feed_dict = \
feed_dict)
else:
sess.run(train_step_after, feed_dict = feed_dict)
temp_loss = sess.run(loss_after, feed_dict = \
feed_dict)
temp_acc = sess.run(acc_after, feed_dict = \
feed_dict)
history_loss_train.append(temp_loss)
history_acc_train.append(temp_acc)
if (i + 1) % show_step == 0:
print ('--------------------')
print ('Iteration: ' + str(i + 1) + ' Loss: ' + \
str(temp_loss) + ' Accuracy: ' + \
str(temp_acc))
def batch_norm(self, x, n_units):
with tf.variable_scope('bn'):
init_const1 = tf.constant_initializer(value = 0.0, \
dtype = tf.float32)
init_const2 = tf.constant_initializer(value = 1.0, \
dtype = tf.float32)
beta = tf.get_variable('beta', shape = [n_units], \
initializer =init_const1)
gamma = tf.get_variable('gamma', shape = [n_units], \
initializer =init_const2)
mean, var = tf.nn.moments(x, [0])
x = gamma * (x - mean) / tf.sqrt(var + 1e-5) + beta
return x
def layer_norm(self, x, batch_size):
with tf.variable_scope('bn'):
init_const1 = tf.constant_initializer(value = 0.0, \
dtype = tf.float32)
init_const2 = tf.constant_initializer(value = 1.0, \
dtype = tf.float32)
beta = tf.get_variable('beta', shape = [batch_size], \
initializer =init_const1)
gamma = tf.get_variable('gamma', shape = \
[batch_size], initializer =init_const2) \
mean, var = tf.nn.moments(x, [1])
mean = tf.expand_dims(mean, axis = 1)
var = tf.expand_dims(var, axis = 1)
beta = tf.expand_dims(beta, axis = 1)
gamma = tf.expand_dims(gamma, axis = 1)
x = gamma * (x - mean) / tf.sqrt(var + 1e-5) + beta
return x