LoginSignup
0
0

More than 5 years have passed since last update.

(C)RNN Domain Adaptation の実装に関するメモ

Last updated at Posted at 2018-09-03

Data

time_steps = 1010
a_s = 0.3
b_s = 0.5
a_t = 0.3
b_t = 0.3
c_s = 0.1
c_t = 0.1
T_0 = 100
T_s = 10
T_t = 50

data_s = []
data_t = []
for t in range(time_steps):
  data_s.append(a_s * np.sin(2 * np.pi * t / T_0) + b_s * np.sin(2 * np.pi * t / T_s) \
             + c_s * np.random.normal())
  data_t.append(a_t * np.sin(2 * np.pi * t / T_0) + b_t * np.sin(2 * np.pi * t / T_t) \
            + c_t * np.random.normal())

show_steps = 200   
plt.plot(range(show_steps), data_s[:show_steps], label = 'data_s')
plt.plot(range(show_steps), data_t[:show_steps], label = 'data_t')
plt.legend(loc = 'upper right')
plt.show()

length = 10
n_predictions = 3

x_s = []
y_s = []
for i in range(len(data_s) - length):
  x_s.append(data_s[i : i + length])
  y_s.append(data_s[length : length + n_predictions])

x_t = []
y_t = []
for i in range(len(data_t) - length):
  x_t.append(data_t[i : i + length])
  y_t.append(data_t[length : length + n_predictions])

n_s = len(x_s)
n_t = len(x_t)

n_domains = 2
d_s = np.ones(shape = [n_s], dtype = np.int32) * 0
d_s_one_hot = np.identity(n_domains)[d_s].astype(np.int32)
d_t = np.ones(shape = [n_t], dtype = np.int32) * 1
d_t_one_hot = np.identity(n_domains)[d_t].astype(np.int32)  

print (np.shape(x_s))
print (np.shape(y_s))
print (np.shape(d_s_one_hot))

n_s_train = np.int(n_s * 0.8)
n_t_train = np.int(n_t * 0.1)

x_s_train = np.array(x_s[:n_s_train])
x_s_test = np.array(x_s[n_s_train:])
y_s_train = np.array(y_s[:n_s_train])
y_s_test = np.array(y_s[n_s_train:])
d_s_train = d_s_one_hot[:n_s_train]
d_s_test = d_s_one_hot[n_s_train:]

x_t_train = np.array(x_t[:n_t_train])
x_t_test = np.array(x_t[n_t_train:])
y_t_train = np.array(y_t[:n_t_train])
y_t_test = np.array(y_t[n_t_train:])
d_t_train = d_t_one_hot[:n_t_train]
d_t_test = d_t_one_hot[n_t_train:]

image.png

Sample Code (RNN)

# RNN & Domain Adaptation

class RNN_DA():
  def __init__(self):
    pass

  def weight_variable(self, name, shape):
    initializer = tf.truncated_normal_initializer(mean = 0.0, stddev = 0.01, dtype = tf.float32)
    return tf.get_variable(name, shape, initializer = initializer)

  def bias_variable(self, name, shape):
    initializer = tf.constant_initializer(value = 0.0, dtype = tf.float32)
    return tf.get_variable(name, shape, initializer = initializer)

  def get_zoneout_mask(self, zoneout_prob, shape):
    keep_prob = tf.convert_to_tensor(zoneout_prob)
    random_tensor = keep_prob + tf.random_uniform(shape)
    binary_tensor = tf.floor(random_tensor)
    zoneout_mask = binary_tensor

    return zoneout_mask

  def f_extractor(self, x, length, n_in, n_units_f, batch_size, forget_bias, \
                  zoneout_prob, reuse = False):
    # LSTM
    x = tf.reshape(x, [-1, length, n_in])
    h = tf.zeros(shape = [batch_size, n_units_f], dtype = tf.float32)
    c = tf.zeros(shape = [batch_size, n_units_f], dtype = tf.float32)

    with tf.variable_scope('f_extractor', reuse = reuse):
      w_x = self.weight_variable('w_x', [n_in, n_units_f * 4])
      w_h = self.weight_variable('w_h', [n_units_f, n_units_f * 4])
      b = self.bias_variable('b', [n_units_f * 4])

      zoneout_mask_c = self.get_zoneout_mask(zoneout_prob, [n_units_f])
      zoneout_mask_complement_c = tf.ones(shape = [n_units_f], dtype = tf.float32) - zoneout_mask_c
      zoneout_mask_h = self.get_zoneout_mask(zoneout_prob, [n_units_f])
      zoneout_mask_complement_h = tf.ones(shape = [n_units_f], dtype = tf.float32) - zoneout_mask_h

      for t in range(length):

        t_x = tf.matmul(x[:, t, :], w_x)
        t_h = tf.matmul(h, w_h)

        i, f, o, g = tf.split(tf.add(tf.add(t_x, t_h), b), 4, axis = 1)

        i = tf.nn.sigmoid(i)
        f = tf.nn.sigmoid(f + forget_bias)
        o = tf.nn.sigmoid(o)
        g = tf.nn.tanh(g)

        # zoneout
        c_temp = tf.add(tf.multiply(f, c), tf.multiply(i, g))
        h_temp = tf.multiply(o, tf.nn.tanh(c))

        c = zoneout_mask_c * c + \
                        zoneout_mask_complement_c * c_temp
        h = zoneout_mask_h * h + \
                        zoneout_mask_complement_h * h_temp

      return h

  def classifier_d(self, x, n_units_f, n_units_d, n_domains, keep_prob, reuse = False):

    with tf.variable_scope('classifier_d', reuse = reuse):
      w_1 = self.weight_variable('w_1', [n_units_f, n_units_d])
      b_1 = self.bias_variable('b_1', [n_units_d])

      d = tf.matmul(x, w_1) + b_1

      # batch norm
      #batch_mean, batch_var = tf.nn.moments(d, [0])
      #d = (d - batch_mean) / (tf.sqrt(batch_var) + 1e-10)

      # relu
      d = tf.nn.relu(d)

      # dropout
      #d = tf.nn.dropout(d, keep_prob)

      w_2 = self.weight_variable('w_2', [n_units_d, n_domains])
      b_2 = self.bias_variable('b_2', [n_domains])

      d = tf.matmul(d, w_2) + b_2
      logits = d

    return logits

  def predictor(self, x, n_units_f, n_units_p, n_predictions, keep_prob, reuse = False):

    with tf.variable_scope('predictor', reuse = reuse):
      w_1 = self.weight_variable('w_1', [n_units_f, n_units_p])
      b_1 = self.bias_variable('b_1', [n_units_p])

      y = tf.matmul(x, w_1) + b_1

      # batch norm
      #batch_mean, batch_var = tf.nn.moments(y, [0])
      #y = (y - batch_mean) / (tf.sqrt(batch_var) + 1e-10)

      # relu
      y = tf.nn.relu(y)

      # dropout
      #y = tf.nn.dropout(y, keep_prob)

      w_2 = self.weight_variable('w_2', [n_units_p, n_predictions])
      b_2 = self.bias_variable('b_2', [n_predictions])

      y = tf.matmul(y, w_2) + b_2

    return y

  def gradient_reversal(self, f, lam, n_units_f, batch_size):
    i = tf.ones(shape = [batch_size, n_units_f], dtype = tf.float32)

    return - lam * i + tf.stop_gradient(f + lam * i)

  def loss_mse(self, y, t):
    mse = tf.reduce_mean(tf.reduce_sum(tf.square(t - y), axis = 1))
    return mse

  def loss_cross_entropy(self, y, t):
    cross_entropy = - tf.reduce_mean(tf.reduce_sum(t * tf.log(tf.clip_by_value(y, 1e-10, 1.0)), axis = 1))
    return cross_entropy

  def loss_entropy(self, p):
    entropy = tf.reduce_mean(tf.reduce_sum(p * tf.log(tf.clip_by_value(p, 1e-10, 1.0)), axis = 1))
    return entropy

  def accuracy(self, y, t):
    correct_preds = tf.equal(tf.argmax(y, axis = 1), tf.argmax(t, axis = 1))
    accuracy = tf.reduce_mean(tf.cast(correct_preds, tf.float32))
    return accuracy

  def accuracy_mae(self, y, t):
    accuracy = tf.reduce_mean(tf.reduce_sum(tf.abs(y - t), axis = 1))
    return accuracy

  def training(self, loss, learning_rate, var_list):
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
    train_step = optimizer.minimize(loss, var_list = var_list)
    return train_step

  def training_gd(self, loss, learning_rate, var_list):
    #optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
    train_step = optimizer.minimize(loss, var_list = var_list)
    return train_step

  def training_clipped(self, loss, learning_rate, clip_norm, var_list):
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)

    grads_and_vars = optimizer.compute_gradients(loss, var_list = var_list)
    clipped_grads_and_vars = [(tf.clip_by_norm(grad, clip_norm = clip_norm), \
                             var) for grad, var in grads_and_vars]
    train_step = optimizer.apply_gradients(clipped_grads_and_vars)

    return train_step

  def fit(self, x_s_train, x_s_test, y_s_train, y_s_test, \
          x_t_train, x_t_test, y_t_train, y_t_test, \
          length, n_in, n_units_f, n_units_d, n_domains, \
          n_units_p, n_predictions, lam, \
          learning_rate, n_iter, batch_size, show_step, is_saving, model_path):

    tf.reset_default_graph()

    x_s = tf.placeholder(shape = [None, length], dtype = tf.float32)
    y_s = tf.placeholder(shape = [None, n_predictions], dtype = tf.float32)
    d_s = tf.placeholder(shape = [None, n_domains], dtype = tf.float32) 
    x_t = tf.placeholder(shape = [None, length], dtype = tf.float32)
    y_t = tf.placeholder(shape = [None, n_predictions], dtype = tf.float32) 
    d_t = tf.placeholder(shape = [None, n_domains], dtype = tf.float32) 
    keep_prob = tf.placeholder(shape = [], dtype = tf.float32)

    feat_s = self.f_extractor(x_s, length, n_in, n_units_f, batch_size, 1.0, 0.0, reuse = False)
    feat_t = self.f_extractor(x_t, length, n_in, n_units_f, batch_size, 1.0, 0.0, reuse = True)

    feat = tf.concat([feat_s, feat_t], axis = 0)
    d = tf.concat([d_s, d_t], axis = 0)

    logits_d = self.classifier_d(feat, n_units_f, n_units_d, n_domains, keep_prob, reuse = False)
    probs_d = tf.nn.softmax(logits_d)
    loss_d = self.loss_cross_entropy(probs_d, d)

    preds_s = self.predictor(feat_s, n_units_f, n_units_p, n_predictions, keep_prob, reuse = False)
    loss_s = self.loss_mse(preds_s, y_s)

    preds_t = self.predictor(feat_t, n_units_f, n_units_p, n_predictions, keep_prob, reuse = True)
    loss_t = self.loss_mse(preds_t, y_t)

    loss_f = - lam * loss_d

    var_list_f = tf.trainable_variables('f_extractor')
    #var_list_f = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="f_extractor")
    var_list_d = tf.trainable_variables('classifier_d')
    var_list_p = tf.trainable_variables('predictor')

    var_list_f_p = var_list_f + var_list_p

    train_step_f = self.training(loss_f, learning_rate, var_list_f)
    train_step_d = self.training(loss_d, learning_rate, var_list_d)
    train_step_f_p = self.training(loss_s, learning_rate, var_list_f_p)
    train_step_p = self.training(loss_t, learning_rate, var_list_p)

    acc_d =  self.accuracy(probs_d, d)
    acc_s =  self.accuracy_mae(preds_s, y_s)
    acc_t =  self.accuracy_mae(preds_t, y_t)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    with tf.Session() as sess:

      sess.run(init)

      history_loss_s_train = []
      history_loss_s_test = []
      history_acc_s_train = []
      history_acc_s_test = []

      history_loss_t_train = []
      history_loss_t_test = []
      history_acc_t_train = []
      history_acc_t_test = []

      history_loss_d_train = []
      history_loss_d_test = []
      history_acc_d_train = []
      history_acc_d_test = []

      history_loss_f_train = []
      history_loss_f_test = []

      for i in range(n_iter):
        # Training
        # prediction for s
        rand_index = np.random.choice(len(x_s_train), size = batch_size)
        x_batch = x_s_train[rand_index]
        y_batch = y_s_train[rand_index]

        feed_dict = {x_s: x_batch, y_s: y_batch, keep_prob: 1.0}
        sess.run(train_step_f_p, feed_dict = feed_dict)

        temp_loss_s = sess.run(loss_s, feed_dict = feed_dict)
        temp_acc_s = sess.run(acc_s, feed_dict = feed_dict)

        history_loss_s_train.append(temp_loss_s)
        history_acc_s_train.append(temp_acc_s)

        if (i + 1) % show_step == 0:
          print ('-' * 100)
          print ('Iteration: ' + str(i + 1) + '  Loss_s: ' + str(temp_loss_s) \
                + '  Accuracy_s: ' + str(temp_acc_s))

        # prediction for t
        #rand_index = np.random.choice(len(x_t_train), size = batch_size)
        #x_batch = x_t_train[rand_index]

        #feed_dict = {x_t: x_batch, keep_prob: 1.0}
        #sess.run(train_step_p, feed_dict = feed_dict)

        #temp_loss_t = sess.run(loss_t, feed_dict = feed_dict)
        #temp_acc_t = sess.run(acc_t, feed_dict = feed_dict)

        #history_loss_t_train.append(temp_loss_t)
        #history_acc_t_train.append(temp_acc_t)

        #if (i + 1) % show_step == 0:
        #  print ('-' * 100)
        #  print ('Iteration: ' + str(i + 1) + '  Loss_t: ' + str(temp_loss_t) \
        #        + '  Accuracy_t: ' + str(temp_acc_t))

        # domain classification for d
        rand_index = np.random.choice(len(x_s_train), size = batch_size)
        x_batch_s = x_s_train[rand_index]
        d_batch_s = d_s_train[rand_index]

        rand_index = np.random.choice(len(x_t_train), size = batch_size)
        x_batch_t = x_t_train[rand_index]
        d_batch_t = d_t_train[rand_index]

        feed_dict = {x_s: x_batch_s, d_s: d_batch_s, x_t: x_batch_t, d_t: d_batch_t, keep_prob: 1.0}
        sess.run(train_step_f, feed_dict = feed_dict)
        sess.run(train_step_d, feed_dict = feed_dict)

        temp_loss_f = sess.run(loss_f, feed_dict = feed_dict)
        temp_loss_d = sess.run(loss_d, feed_dict = feed_dict)
        temp_acc_d = sess.run(acc_d, feed_dict = feed_dict)

        history_loss_f_train.append(temp_loss_f)
        history_loss_d_train.append(temp_loss_d)
        history_acc_d_train.append(temp_acc_d)

        if (i + 1) % show_step == 0:
          print ('-' * 100)
          print ('Iteration: ' + str(i + 1) + \
                 '  Loss_d: ' + str(temp_loss_d) + '  Accuracy_d: ' + str(temp_acc_d))

        # Test
        # prediction for s
        rand_index = np.random.choice(len(x_s_test), size = batch_size)
        x_batch = x_s_test[rand_index]
        y_batch = y_s_test[rand_index]

        feed_dict = {x_s: x_batch, y_s: y_batch, keep_prob: 1.0}
        temp_loss_s = sess.run(loss_s, feed_dict = feed_dict)
        temp_acc_s = sess.run(acc_s, feed_dict = feed_dict)

        history_loss_s_test.append(temp_loss_s)
        history_acc_s_test.append(temp_acc_s)

        # prediction for t
        rand_index = np.random.choice(len(x_t_test), size = batch_size)
        x_batch = x_t_test[rand_index]
        y_batch = y_t_test[rand_index]

        feed_dict = {x_t: x_batch, y_t: y_batch, keep_prob: 1.0}
        temp_loss_t = sess.run(loss_t, feed_dict = feed_dict)
        temp_acc_t = sess.run(acc_t, feed_dict = feed_dict) 

        history_loss_t_test.append(temp_loss_t)
        history_acc_t_test.append(temp_acc_t)

        # domain classification for f and d
        rand_index = np.random.choice(len(x_s_test), size = batch_size)
        x_batch_s = x_s_test[rand_index]
        d_batch_s = d_s_test[rand_index]

        rand_index = np.random.choice(len(x_t_test), size = batch_size)
        x_batch_t = x_t_test[rand_index]
        d_batch_t = d_t_test[rand_index]

        feed_dict = {x_s: x_batch_s, d_s: d_batch_s, x_t: x_batch_t, d_t: d_batch_t, keep_prob: 1.0}
        temp_loss_f = sess.run(loss_f, feed_dict = feed_dict)
        temp_loss_d = sess.run(loss_d, feed_dict = feed_dict)
        temp_acc_d = sess.run(acc_d, feed_dict = feed_dict)

        history_loss_f_test.append(temp_loss_f)
        history_loss_d_test.append(temp_loss_d)
        history_acc_d_test.append(temp_acc_d)

      print ('-' * 100)    
      fig = plt.figure(figsize = (10, 3))
      ax1 = fig.add_subplot(1, 2, 1)
      ax1.plot(range(n_iter), history_loss_s_train, 'b-', label = 'Training')
      ax1.plot(range(n_iter), history_loss_s_test, 'r-', label = 'Test')
      ax1.set_title('Loss_s')
      ax1.legend(loc = 'upper right')

      ax2 = fig.add_subplot(1, 2, 2)
      ax2.plot(range(n_iter), history_acc_s_train, 'b-', label = 'Training')
      ax2.plot(range(n_iter), history_acc_s_test, 'r-', label = 'Test')
      ax2.set_title('Accuracy_s')
      ax2.legend(loc = 'upper right')

      fig = plt.figure(figsize = (10, 3))
      ax1 = fig.add_subplot(1, 2, 1)
      ax1.plot(range(n_iter), history_loss_d_train, 'b-', label = 'Training')
      ax1.plot(range(n_iter), history_loss_d_test, 'r-', label = 'Test')
      ax1.set_title('Loss_d')
      ax1.legend(loc = 'upper right')

      ax2 = fig.add_subplot(1, 2, 2)
      ax2.plot(range(n_iter), history_acc_d_train, 'b-', label = 'Training')
      ax2.plot(range(n_iter), history_acc_d_test, 'r-', label = 'Test')
      ax2.set_ylim(0.0, 1.0)
      ax2.set_title('Accuracy_d')
      ax2.legend(loc = 'lower right')

      fig = plt.figure(figsize = (10, 3))
      ax1 = fig.add_subplot(1, 2, 1)
      #ax1.plot(range(n_iter), history_loss_t_train, 'b-', label = 'Training')
      ax1.plot(range(n_iter), history_loss_t_test, 'r-', label = 'Test')
      ax1.set_title('Loss_t')
      ax1.legend(loc = 'upper right')

      ax2 = fig.add_subplot(1, 2, 2)
      #ax2.plot(range(n_iter), history_acc_t_train, 'b-', label = 'Training')
      ax2.plot(range(n_iter), history_acc_t_test, 'r-', label = 'Test')
      ax2.set_title('Accuracy_t')
      ax2.legend(loc = 'upper right')

      plt.show()

Sample code (CRNN)

# CRNN & Domain Adaptation

class CRNN_DA():
  def __init__(self):
    pass

  def weight_variable(self, name, shape):
    initializer = tf.truncated_normal_initializer(mean = 0.0, stddev = 0.01, dtype = tf.float32)
    return tf.get_variable(name, shape, initializer = initializer)

  def bias_variable(self, name, shape):
    initializer = tf.constant_initializer(value = 0.0, dtype = tf.float32)
    return tf.get_variable(name, shape, initializer = initializer)

  def get_zoneout_mask(self, zoneout_prob, shape):
    keep_prob = tf.convert_to_tensor(zoneout_prob)
    random_tensor = keep_prob + tf.random_uniform(shape)
    binary_tensor = tf.floor(random_tensor)
    zoneout_mask = binary_tensor

    return zoneout_mask

  def f_extractor(self, x, length, n_in, filter_size, n_filters, keep_prob, reuse = False):
    x_reshaped = tf.reshape(x, [-1, 1, length, n_in])

    with tf.variable_scope('f_extractor', reuse = reuse):
      w_1 = self.weight_variable('w_1', [1, filter_size, n_in, n_filters])
      b_1 = self.bias_variable('b_1', [n_filters])

      # conv
      conv = tf.nn.conv2d(x_reshaped, w_1, strides = [1, 1, 1, 1], padding = 'SAME') + b_1

      # batch norm
      #batch_mean, batch_var = tf.nn.moments(conv, [0, 1, 2])
      #conv = (conv - batch_mean) / (tf.sqrt(batch_var) + 1e-10)

      # relu
      conv = tf.nn.relu(conv)

      # max_pool
      #conv = tf.nn.max_pool(conv, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')

      feature = tf.reshape(conv, [-1, length, n_filters])

    return feature

  def lstm(self, x, length, n_filters, n_units_l, batch_size, forget_bias, \
                  zoneout_prob, reuse = False):
    # LSTM
    h = tf.zeros(shape = [batch_size, n_units_l], dtype = tf.float32)
    c = tf.zeros(shape = [batch_size, n_units_l], dtype = tf.float32)

    with tf.variable_scope('lstm', reuse = reuse):
      w_x = self.weight_variable('w_x', [n_filters, n_units_l * 4])
      w_h = self.weight_variable('w_h', [n_units_l, n_units_l * 4])
      b = self.bias_variable('b', [n_units_l * 4])

      zoneout_mask_c = self.get_zoneout_mask(zoneout_prob, [n_units_l])
      zoneout_mask_complement_c = tf.ones(shape = [n_units_l], dtype = tf.float32) - zoneout_mask_c
      zoneout_mask_h = self.get_zoneout_mask(zoneout_prob, [n_units_l])
      zoneout_mask_complement_h = tf.ones(shape = [n_units_l], dtype = tf.float32) - zoneout_mask_h

      for t in range(length):

        t_x = tf.matmul(x[:, t, :], w_x)
        t_h = tf.matmul(h, w_h)

        i, f, o, g = tf.split(tf.add(tf.add(t_x, t_h), b), 4, axis = 1)

        i = tf.nn.sigmoid(i)
        f = tf.nn.sigmoid(f + forget_bias)
        o = tf.nn.sigmoid(o)
        g = tf.nn.tanh(g)

        # zoneout
        c_temp = tf.add(tf.multiply(f, c), tf.multiply(i, g))
        h_temp = tf.multiply(o, tf.nn.tanh(c))

        c = zoneout_mask_c * c + \
                        zoneout_mask_complement_c * c_temp
        h = zoneout_mask_h * h + \
                        zoneout_mask_complement_h * h_temp

      return h

  def classifier_d(self, x, length, n_filters, n_units_d, n_domains, keep_prob, reuse = False):

    x = tf.reshape(x, [-1, length * n_filters])
    with tf.variable_scope('classifier_d', reuse = reuse):
      w_1 = self.weight_variable('w_1', [length * n_filters, n_units_d])
      b_1 = self.bias_variable('b_1', [n_units_d])

      d = tf.matmul(x, w_1) + b_1

      # batch norm
      #batch_mean, batch_var = tf.nn.moments(d, [0])
      #d = (d - batch_mean) / (tf.sqrt(batch_var) + 1e-10)

      # relu
      d = tf.nn.relu(d)

      # dropout
      #d = tf.nn.dropout(d, keep_prob)

      w_2 = self.weight_variable('w_2', [n_units_d, n_domains])
      b_2 = self.bias_variable('b_2', [n_domains])

      d = tf.matmul(d, w_2) + b_2
      logits = d

    return logits

  def predictor(self, x, n_units_l, n_units_p, n_predictions, keep_prob, reuse = False):

    with tf.variable_scope('predictor', reuse = reuse):
      w_1 = self.weight_variable('w_1', [n_units_l, n_units_p])
      b_1 = self.bias_variable('b_1', [n_units_p])

      y = tf.matmul(x, w_1) + b_1

      # batch norm
      #batch_mean, batch_var = tf.nn.moments(y, [0])
      #y = (y - batch_mean) / (tf.sqrt(batch_var) + 1e-10)

      # relu
      y = tf.nn.relu(y)

      # dropout
      #y = tf.nn.dropout(y, keep_prob)

      w_2 = self.weight_variable('w_2', [n_units_p, n_predictions])
      b_2 = self.bias_variable('b_2', [n_predictions])

      y = tf.matmul(y, w_2) + b_2

    return y

  def gradient_reversal(self, f, lam, n_units_f, batch_size):
    i = tf.ones(shape = [batch_size, n_units_f], dtype = tf.float32)

    return - lam * i + tf.stop_gradient(f + lam * i)

  def loss_mse(self, y, t):
    mse = tf.reduce_mean(tf.reduce_sum(tf.square(t - y), axis = 1))
    return mse

  def loss_cross_entropy(self, y, t):
    cross_entropy = - tf.reduce_mean(tf.reduce_sum(t * tf.log(tf.clip_by_value(y, 1e-10, 1.0)), axis = 1))
    return cross_entropy

  def loss_entropy(self, p):
    entropy = tf.reduce_mean(tf.reduce_sum(p * tf.log(tf.clip_by_value(p, 1e-10, 1.0)), axis = 1))
    return entropy

  def accuracy(self, y, t):
    correct_preds = tf.equal(tf.argmax(y, axis = 1), tf.argmax(t, axis = 1))
    accuracy = tf.reduce_mean(tf.cast(correct_preds, tf.float32))
    return accuracy

  def accuracy_mae(self, y, t):
    accuracy = tf.reduce_mean(tf.reduce_sum(tf.abs(y - t), axis = 1))
    return accuracy

  def training(self, loss, learning_rate, var_list):
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
    train_step = optimizer.minimize(loss, var_list = var_list)
    return train_step

  def training_gd(self, loss, learning_rate, var_list):
    #optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
    train_step = optimizer.minimize(loss, var_list = var_list)
    return train_step

  def training_clipped(self, loss, learning_rate, clip_norm, var_list):
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)

    grads_and_vars = optimizer.compute_gradients(loss, var_list = var_list)
    clipped_grads_and_vars = [(tf.clip_by_norm(grad, clip_norm = clip_norm), \
                             var) for grad, var in grads_and_vars]
    train_step = optimizer.apply_gradients(clipped_grads_and_vars)

    return train_step

  def fit(self, x_s_train, x_s_test, y_s_train, y_s_test, \
          x_t_train, x_t_test, y_t_train, y_t_test, \
          length, n_in, filter_size, n_filters, n_units_l, n_units_d, n_domains, \
          n_units_p, n_predictions, lam, \
          learning_rate, n_iter, batch_size, show_step, is_saving, model_path):

    tf.reset_default_graph()

    x_s = tf.placeholder(shape = [None, length], dtype = tf.float32)
    y_s = tf.placeholder(shape = [None, n_predictions], dtype = tf.float32)
    d_s = tf.placeholder(shape = [None, n_domains], dtype = tf.float32) 
    x_t = tf.placeholder(shape = [None, length], dtype = tf.float32)
    y_t = tf.placeholder(shape = [None, n_predictions], dtype = tf.float32) 
    d_t = tf.placeholder(shape = [None, n_domains], dtype = tf.float32) 
    keep_prob = tf.placeholder(shape = [], dtype = tf.float32)

    feat_s = self.f_extractor(x_s, length, n_in, filter_size, n_filters, keep_prob, reuse = False)
    feat_t = self.f_extractor(x_t, length, n_in, filter_size, n_filters, keep_prob, reuse = True)

    lstm_s = self.lstm(feat_s, length, n_filters, n_units_l, batch_size, 1.0, 0.0, reuse = False)
    lstm_t = self.lstm(feat_t, length, n_filters, n_units_l, batch_size, 1.0, 0.0, reuse = True)

    feat = tf.concat([feat_s, feat_t], axis = 0)
    d = tf.concat([d_s, d_t], axis = 0)

    logits_d = self.classifier_d(feat, length, n_filters, n_units_d, n_domains, keep_prob, reuse = False)
    probs_d = tf.nn.softmax(logits_d)
    loss_d = self.loss_cross_entropy(probs_d, d)

    preds_s = self.predictor(lstm_s, n_units_l, n_units_p, n_predictions, keep_prob, reuse = False)
    loss_s = self.loss_mse(preds_s, y_s)

    preds_t = self.predictor(lstm_t, n_units_l, n_units_p, n_predictions, keep_prob, reuse = True)
    loss_t = self.loss_mse(preds_t, y_t)

    loss_f = - lam * loss_d

    var_list_f = tf.trainable_variables('f_extractor')
    #var_list_f = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="f_extractor")
    var_list_l = tf.trainable_variables('lstm')
    var_list_d = tf.trainable_variables('classifier_d')
    var_list_p = tf.trainable_variables('predictor')

    var_list_f_l_p = var_list_f + var_list_l + var_list_p

    train_step_f = self.training(loss_f, learning_rate, var_list_f)
    train_step_d = self.training(loss_d, learning_rate, var_list_d)
    train_step_f_l_p = self.training(loss_s, learning_rate, var_list_f_l_p)
    train_step_p = self.training(loss_t, learning_rate, var_list_p)

    acc_d =  self.accuracy(probs_d, d)
    acc_s =  self.accuracy_mae(preds_s, y_s)
    acc_t =  self.accuracy_mae(preds_t, y_t)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    with tf.Session() as sess:

      sess.run(init)

      history_loss_s_train = []
      history_loss_s_test = []
      history_acc_s_train = []
      history_acc_s_test = []

      history_loss_t_train = []
      history_loss_t_test = []
      history_acc_t_train = []
      history_acc_t_test = []

      history_loss_d_train = []
      history_loss_d_test = []
      history_acc_d_train = []
      history_acc_d_test = []

      history_loss_f_train = []
      history_loss_f_test = []

      for i in range(n_iter):
        # Training
        # prediction for s
        rand_index = np.random.choice(len(x_s_train), size = batch_size)
        x_batch = x_s_train[rand_index]
        y_batch = y_s_train[rand_index]

        feed_dict = {x_s: x_batch, y_s: y_batch, keep_prob: 1.0}
        sess.run(train_step_f_l_p, feed_dict = feed_dict)

        temp_loss_s = sess.run(loss_s, feed_dict = feed_dict)
        temp_acc_s = sess.run(acc_s, feed_dict = feed_dict)

        history_loss_s_train.append(temp_loss_s)
        history_acc_s_train.append(temp_acc_s)

        if (i + 1) % show_step == 0:
          print ('-' * 100)
          print ('Iteration: ' + str(i + 1) + '  Loss_s: ' + str(temp_loss_s) \
                + '  Accuracy_s: ' + str(temp_acc_s))

        # prediction for t
        #rand_index = np.random.choice(len(x_t_train), size = batch_size)
        #x_batch = x_t_train[rand_index]

        #feed_dict = {x_t: x_batch, keep_prob: 1.0}
        #sess.run(train_step_p, feed_dict = feed_dict)

        #temp_loss_t = sess.run(loss_t, feed_dict = feed_dict)
        #temp_acc_t = sess.run(acc_t, feed_dict = feed_dict)

        #history_loss_t_train.append(temp_loss_t)
        #history_acc_t_train.append(temp_acc_t)

        #if (i + 1) % show_step == 0:
        #  print ('-' * 100)
        #  print ('Iteration: ' + str(i + 1) + '  Loss_t: ' + str(temp_loss_t) \
        #        + '  Accuracy_t: ' + str(temp_acc_t))

        # domain classification for d
        rand_index = np.random.choice(len(x_s_train), size = batch_size)
        x_batch_s = x_s_train[rand_index]
        d_batch_s = d_s_train[rand_index]

        rand_index = np.random.choice(len(x_t_train), size = batch_size)
        x_batch_t = x_t_train[rand_index]
        d_batch_t = d_t_train[rand_index]

        feed_dict = {x_s: x_batch_s, d_s: d_batch_s, x_t: x_batch_t, d_t: d_batch_t, keep_prob: 1.0}
        #sess.run(train_step_f, feed_dict = feed_dict)
        #sess.run(train_step_d, feed_dict = feed_dict)

        temp_loss_f = sess.run(loss_f, feed_dict = feed_dict)
        temp_loss_d = sess.run(loss_d, feed_dict = feed_dict)
        temp_acc_d = sess.run(acc_d, feed_dict = feed_dict)

        history_loss_f_train.append(temp_loss_f)
        history_loss_d_train.append(temp_loss_d)
        history_acc_d_train.append(temp_acc_d)

        if (i + 1) % show_step == 0:
          print ('-' * 100)
          print ('Iteration: ' + str(i + 1) + \
                 '  Loss_d: ' + str(temp_loss_d) + '  Accuracy_d: ' + str(temp_acc_d))

        # Test
        # prediction for s
        rand_index = np.random.choice(len(x_s_test), size = batch_size)
        x_batch = x_s_test[rand_index]
        y_batch = y_s_test[rand_index]

        feed_dict = {x_s: x_batch, y_s: y_batch, keep_prob: 1.0}
        temp_loss_s = sess.run(loss_s, feed_dict = feed_dict)
        temp_acc_s = sess.run(acc_s, feed_dict = feed_dict)

        history_loss_s_test.append(temp_loss_s)
        history_acc_s_test.append(temp_acc_s)

        # prediction for t
        rand_index = np.random.choice(len(x_t_test), size = batch_size)
        x_batch = x_t_test[rand_index]
        y_batch = y_t_test[rand_index]

        feed_dict = {x_t: x_batch, y_t: y_batch, keep_prob: 1.0}
        temp_loss_t = sess.run(loss_t, feed_dict = feed_dict)
        temp_acc_t = sess.run(acc_t, feed_dict = feed_dict) 

        history_loss_t_test.append(temp_loss_t)
        history_acc_t_test.append(temp_acc_t)

        # domain classification for f and d
        rand_index = np.random.choice(len(x_s_test), size = batch_size)
        x_batch_s = x_s_test[rand_index]
        d_batch_s = d_s_test[rand_index]

        rand_index = np.random.choice(len(x_t_test), size = batch_size)
        x_batch_t = x_t_test[rand_index]
        d_batch_t = d_t_test[rand_index]

        feed_dict = {x_s: x_batch_s, d_s: d_batch_s, x_t: x_batch_t, d_t: d_batch_t, keep_prob: 1.0}
        temp_loss_f = sess.run(loss_f, feed_dict = feed_dict)
        temp_loss_d = sess.run(loss_d, feed_dict = feed_dict)
        temp_acc_d = sess.run(acc_d, feed_dict = feed_dict)

        history_loss_f_test.append(temp_loss_f)
        history_loss_d_test.append(temp_loss_d)
        history_acc_d_test.append(temp_acc_d)

      print ('-' * 100)    
      fig = plt.figure(figsize = (10, 3))
      ax1 = fig.add_subplot(1, 2, 1)
      ax1.plot(range(n_iter), history_loss_s_train, 'b-', label = 'Training')
      ax1.plot(range(n_iter), history_loss_s_test, 'r-', label = 'Test')
      ax1.set_title('Loss_s')
      ax1.legend(loc = 'upper right')

      ax2 = fig.add_subplot(1, 2, 2)
      ax2.plot(range(n_iter), history_acc_s_train, 'b-', label = 'Training')
      ax2.plot(range(n_iter), history_acc_s_test, 'r-', label = 'Test')
      ax2.set_title('Accuracy_s')
      ax2.legend(loc = 'upper right')

      fig = plt.figure(figsize = (10, 3))
      ax1 = fig.add_subplot(1, 2, 1)
      ax1.plot(range(n_iter), history_loss_d_train, 'b-', label = 'Training')
      ax1.plot(range(n_iter), history_loss_d_test, 'r-', label = 'Test')
      ax1.set_title('Loss_d')
      ax1.legend(loc = 'upper right')

      ax2 = fig.add_subplot(1, 2, 2)
      ax2.plot(range(n_iter), history_acc_d_train, 'b-', label = 'Training')
      ax2.plot(range(n_iter), history_acc_d_test, 'r-', label = 'Test')
      ax2.set_ylim(0.0, 1.0)
      ax2.set_title('Accuracy_d')
      ax2.legend(loc = 'lower right')

      fig = plt.figure(figsize = (10, 3))
      ax1 = fig.add_subplot(1, 2, 1)
      #ax1.plot(range(n_iter), history_loss_t_train, 'b-', label = 'Training')
      ax1.plot(range(n_iter), history_loss_t_test, 'r-', label = 'Test')
      ax1.set_title('Loss_t')
      ax1.legend(loc = 'upper right')

      ax2 = fig.add_subplot(1, 2, 2)
      #ax2.plot(range(n_iter), history_acc_t_train, 'b-', label = 'Training')
      ax2.plot(range(n_iter), history_acc_t_test, 'r-', label = 'Test')
      ax2.set_title('Accuracy_t')
      ax2.legend(loc = 'upper right')

      plt.show()

Parameters

n_in = 1
filter_size = 2
n_filters = 3
n_units_l = 10
n_units_d = 10
n_units_p = 10
n_domains = 2
lam = 1.0
learning_rate = 0.01
n_iter = 100
batch_size = 32
show_step = 50
model_path = 'datalab/model'

Output

is_saving = False

crnn_da.fit(x_s_train, x_s_test, y_s_train, y_s_test, \
            x_t_train, x_t_test, y_t_train, y_t_test, \
            length, n_in, filter_size, n_filters, n_units_l, n_units_d, n_domains, \
            n_units_p, n_predictions, lam, \
            learning_rate, n_iter, batch_size, show_step, is_saving, model_path)

image.png

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0