LoginSignup
0
0

More than 5 years have passed since last update.

LSTM for Kaggle (PredictFutureSales) の実装に関するメモ

Posted at

Reference

Predict Future Sales

Data

def eda(data):
  print('----------Top-5- Record----------')
  print(data.head(5))
  print('-----------Information-----------')
  print(data.info())
  print('-----------Data Types-----------')
  print(data.dtypes)
  print('----------Missing value-----------')
  print(data.isnull().sum())
  print('----------Null value-----------')
  print(data.isna().sum())
  print('----------Shape of Data----------')
  print(data.shape)

def graph_insight(data):
  df_num = data.select_dtypes(include = ['float64', 'int64'])
  df_num.hist(figsize = (10, 10), bins=50)
eda(train)
graph_insight(train)

image.png

plt.figure(figsize = (5, 3))
plt.xlim(-100, 3000)
sns.boxplot(x = train.item_cnt_day)
plt.show()

plt.figure(figsize = (5, 3))
plt.xlim(train.item_price.min(), train.item_price.max()*1.1)
sns.boxplot(x = train.item_price)
plt.show()

image.png

train_org = pd.read_csv('/content/sales_train_v2.csv')
test_org = pd.read_csv('/content/test.csv')
submission = pd.read_csv('/content/sample_submission.csv')
items = pd.read_csv('/content/items.csv')
item_cats = pd.read_csv('/content/item_categories.csv')
shops = pd.read_csv('/content/shops.csv')

train_copy = train_org.copy()

# drop duplicates
print(train_copy.duplicated().value_counts())
print ()

subset = ['date','date_block_num','shop_id','item_id','item_cnt_day']
print(train_copy.duplicated(subset = subset).value_counts())
print ()

train_copy2 = train_copy.drop_duplicates(subset = subset)
print (len(train_copy.item_id))
print (len(train_copy2.item_id))

#train_copy2 = train_copy2[train_copy2.item_price<100000]
#train_copy2 = train_copy2[train_copy2.item_cnt_day<1001]

plt.figure(figsize = (5, 3))
plt.xlim(-100, 3000)
sns.boxplot(x = train_copy2.item_cnt_day)
plt.show()

plt.figure(figsize = (5, 3))
plt.xlim(train_copy2.item_price.min(), train_copy2.item_price.max()*1.1)
sns.boxplot(x = train_copy2.item_price)
plt.show()
train_copy3 = train_copy2.pivot_table(index = ['shop_id', 'item_id'], \
                                      values = ['item_cnt_day'], \
                                      columns = 'date_block_num', fill_value = 0, \
                                      aggfunc = 'sum')
train_copy3 = train_copy3.reset_index()

dataset = pd.merge(test_copy, train_copy3, on=['shop_id', 'item_id'], how='left')
dataset = dataset.fillna(0)

dataset = dataset.drop(['ID', 'shop_id', 'item_id'], axis = 1)

x_data = dataset.values[:, :-1]
y_data = dataset.values[:, -1:]
test_data = dataset.values[:, 1:]
print(x_data.shape, y_data.shape, predict_data.shape)

Sample Code

class LSTM():
  def __init__(self):
    pass

  def weight_variable(self, name, shape):
    initializer = tf.truncated_normal_initializer(mean = 0.0, stddev = 0.01, dtype = tf.float32)
    return tf.get_variable(name, shape, initializer = initializer)

  def bias_variable(self, name, shape):
    initializer = tf.constant_initializer(value = 0.0, dtype = tf.float32)
    return tf.get_variable(name, shape, initializer = initializer)

  def get_zoneout_mask(self, zoneout_prob, shape):
    keep_prob = tf.convert_to_tensor(zoneout_prob)
    random_tensor = keep_prob + tf.random_uniform(shape)
    binary_tensor = tf.floor(random_tensor)
    zoneout_mask = binary_tensor

    return zoneout_mask

  def lstm(self, x, length, n_in, n_units_l, n_units_f, n_predictions, batch_size, \
           forget_bias, zoneout_prob, keep_prob, reuse = False):

    x = tf.reshape(x, [-1, length, n_in])
    h = tf.zeros(shape = [batch_size, n_units_l], dtype = tf.float32)
    c = tf.zeros(shape = [batch_size, n_units_l], dtype = tf.float32)

    with tf.variable_scope('lstm', reuse = reuse):
      w_x = self.weight_variable('w_x', [n_in, n_units_l * 4])
      w_h = self.weight_variable('w_h', [n_units_l, n_units_l * 4])
      b = self.bias_variable('b', [n_units_l * 4])

      zoneout_mask_c = self.get_zoneout_mask(zoneout_prob, [n_units_l])
      zoneout_mask_complement_c = tf.ones(shape = [n_units_l], dtype = tf.float32) - zoneout_mask_c
      zoneout_mask_h = self.get_zoneout_mask(zoneout_prob, [n_units_l])
      zoneout_mask_complement_h = tf.ones(shape = [n_units_l], dtype = tf.float32) - zoneout_mask_h

      for t in range(length):

        t_x = tf.matmul(x[:, t, :], w_x)
        t_h = tf.matmul(h, w_h)

        i, f, o, g = tf.split(tf.add(tf.add(t_x, t_h), b), 4, axis = 1)

        i = tf.nn.sigmoid(i)
        f = tf.nn.sigmoid(f + forget_bias)
        o = tf.nn.sigmoid(o)
        g = tf.nn.tanh(g)

        # zoneout
        c_temp = tf.add(tf.multiply(f, c), tf.multiply(i, g))
        h_temp = tf.multiply(o, tf.nn.tanh(c))

        c = zoneout_mask_c * c + \
                        zoneout_mask_complement_c * c_temp
        h = zoneout_mask_h * h + \
                        zoneout_mask_complement_h * h_temp

      w_2 = self.weight_variable('w_2', [n_units_l, n_units_f])
      b_2 = self.bias_variable('b_2', [n_units_f])

      y = tf.matmul(h, w_2) + b_2
      y = tf.nn.relu(y)
      y = tf.nn.dropout(y, keep_prob)

      w_3 = self.weight_variable('w_3', [n_units_f, n_predictions])
      b_3 = self.bias_variable('b_3', [n_predictions])

      y = tf.matmul(y, w_3) + b_3

      return y

  def loss_mse(self, y, t):
    mse = tf.reduce_mean(tf.reduce_sum(tf.square(t - y), axis = 1))
    return mse

  def loss_cross_entropy(self, y, t):
    cross_entropy = - tf.reduce_mean(tf.reduce_sum(t * tf.log(tf.clip_by_value(y, 1e-10, 1.0)), axis = 1))
    return cross_entropy

  def loss_entropy(self, p):
    entropy = tf.reduce_mean(tf.reduce_sum(p * tf.log(tf.clip_by_value(p, 1e-10, 1.0)), axis = 1))
    return entropy

  def accuracy(self, y, t):
    correct_preds = tf.equal(tf.argmax(y, axis = 1), tf.argmax(t, axis = 1))
    accuracy = tf.reduce_mean(tf.cast(correct_preds, tf.float32))
    return accuracy

  def accuracy_rmse(self, y, t):
    mse = tf.reduce_mean(tf.reduce_sum(tf.square(t - y), axis = 1))
    return tf.sqrt(mse)

  def accuracy_mae(self, y, t, n_predictions):
    accuracy = tf.reduce_mean(tf.reduce_sum(tf.abs(y - t), axis = 1)) / n_predictions
    return accuracy

  def training(self, loss, learning_rate, var_list):
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
    train_step = optimizer.minimize(loss, var_list = var_list)
    return train_step

  def fit(self, x_data, y_data, max_length, length, n_in, n_units_l, \
          n_units_f, n_predictions, learning_rate, n_epoch, n_iter, \
          batch_size, show_step, is_saving, model_path):

    tf.reset_default_graph()

    x = tf.placeholder(shape = [None, length], dtype = tf.float32)
    y = tf.placeholder(shape = [None, n_predictions], dtype = tf.float32)
    keep_prob = tf.placeholder(shape = [], dtype = tf.float32)

    preds = self.lstm(x, length, n_in, n_units_l, n_units_f, n_predictions, \
                      batch_size, 1.0, 0.0, keep_prob, reuse = False)
    loss = self.loss_mse(preds, y)

    var_list = tf.trainable_variables('lstm')
    train_step = self.training(loss, learning_rate, var_list)

    acc =  self.accuracy_rmse(preds, y)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    with tf.Session() as sess:

      sess.run(init)

      for e in range (n_epoch):
        train_indices = np.random.choice(len(x_data), round((len(x_data)*0.8)), \
                                replace = False)
        test_indices = np.array(list(set(range(len(x_data))) - set(train_indices)))

        x_train = x_data[train_indices][:, max_length - length:]
        x_test = x_data[test_indices][:, max_length - length:]
        y_train = y_data[train_indices]
        y_test = y_data[test_indices]

        history_loss_train = []
        history_loss_test = []
        history_acc_train = []
        history_acc_test = []

        for i in range(n_iter):
          # Training
          rand_index = np.random.choice(len(x_train), size = batch_size)
          x_batch = x_train[rand_index]
          y_batch = y_train[rand_index]

          feed_dict = {x: x_batch, y: y_batch, keep_prob: 0.7}
          sess.run(train_step, feed_dict = feed_dict)

          temp_loss = sess.run(loss, feed_dict = feed_dict)
          temp_acc = sess.run(acc, feed_dict = feed_dict)

          history_loss_train.append(temp_loss)
          history_acc_train.append(temp_acc)

          if (i + 1) % show_step == 0:
            print ('-' * 100)
            print ('epoch: ' + str(e + 1) + ' Iteration: ' + str(i + 1) + '  Loss: ' + str(temp_loss) \
                  + '  Accuracy: ' + str(temp_acc))

          # Test
          rand_index = np.random.choice(len(x_test), size = batch_size)
          x_batch = x_test[rand_index]
          y_batch = y_test[rand_index]

          feed_dict = {x: x_batch, y: y_batch, keep_prob: 1.0}
          temp_loss = sess.run(loss, feed_dict = feed_dict)
          temp_acc = sess.run(acc, feed_dict = feed_dict)

          history_loss_test.append(temp_loss)
          history_acc_test.append(temp_acc)

        if is_saving:
          model_path = saver.save(sess, model_path)
          print ('-' * 100)
          print ('done saving at ', model_path)

        print ('-' * 100)    
        fig = plt.figure(figsize = (10, 3))
        ax1 = fig.add_subplot(1, 2, 1)
        ax1.plot(range(n_iter), history_loss_train, 'b-', label = 'Training')
        ax1.plot(range(n_iter), history_loss_test, 'r-', label = 'Test')
        ax1.set_ylim(0.0, 3.0)
        ax1.set_title('Loss')
        ax1.legend(loc = 'upper right')

        ax2 = fig.add_subplot(1, 2, 2)
        ax2.plot(range(n_iter), history_acc_train, 'b-', label = 'Training')
        ax2.plot(range(n_iter), history_acc_test, 'r-', label = 'Test')
        ax2.set_ylim(0.0, 3.0)
        ax2.set_title('Accuracy')
        ax2.legend(loc = 'upper right')

        plt.show()

  def predict(self, x_input, length, n_in, n_units_l, n_predictions, \
          batch_size, model_path):

    x = tf.placeholder(shape = [None, length], dtype = tf.float32)
    y = tf.placeholder(shape = [None, n_predictions], dtype = tf.float32)
    keep_prob = tf.placeholder(shape = [], dtype = tf.float32)

    preds = self.lstm(x, length, n_in, n_units_l, n_predictions, batch_size, 1.0, 0.0, reuse = True)

    saver = tf.train.Saver()

    with tf.Session() as sess:

      saver.restore(sess, model_path)

      feed_dict = {x: x_input, keep_prob: 1.0}
      prediction = sess.run(preds, feed_dict = feed_dict)

    return prediction

Parameters

lstm = LSTM()

max_length = 33
length = 15
n_in = 1
n_units_l = 32
n_units_f = 32
n_predictions = 1
learning_rate = 0.01
n_epoch = 3
n_iter = 300
batch_size = 500
show_step = 100
model_path = 'datalab/model'

is_saving = False

lstm.fit(x_data, y_data, max_length, length, n_in, n_units_l, n_units_f, n_predictions, \
         learning_rate, n_epoch, n_iter, batch_size, show_step, is_saving, model_path)

Output


0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0