Help us understand the problem. What is going on with this article?

LSTM for Kaggle (PredictFutureSales) の実装に関するメモ

More than 1 year has passed since last update.

Reference

Predict Future Sales

Data

def eda(data):
  print('----------Top-5- Record----------')
  print(data.head(5))
  print('-----------Information-----------')
  print(data.info())
  print('-----------Data Types-----------')
  print(data.dtypes)
  print('----------Missing value-----------')
  print(data.isnull().sum())
  print('----------Null value-----------')
  print(data.isna().sum())
  print('----------Shape of Data----------')
  print(data.shape)

def graph_insight(data):
  df_num = data.select_dtypes(include = ['float64', 'int64'])
  df_num.hist(figsize = (10, 10), bins=50)
eda(train)
graph_insight(train)

image.png

plt.figure(figsize = (5, 3))
plt.xlim(-100, 3000)
sns.boxplot(x = train.item_cnt_day)
plt.show()

plt.figure(figsize = (5, 3))
plt.xlim(train.item_price.min(), train.item_price.max()*1.1)
sns.boxplot(x = train.item_price)
plt.show()

image.png

train_org = pd.read_csv('/content/sales_train_v2.csv')
test_org = pd.read_csv('/content/test.csv')
submission = pd.read_csv('/content/sample_submission.csv')
items = pd.read_csv('/content/items.csv')
item_cats = pd.read_csv('/content/item_categories.csv')
shops = pd.read_csv('/content/shops.csv')

train_copy = train_org.copy()

# drop duplicates
print(train_copy.duplicated().value_counts())
print ()

subset = ['date','date_block_num','shop_id','item_id','item_cnt_day']
print(train_copy.duplicated(subset = subset).value_counts())
print ()

train_copy2 = train_copy.drop_duplicates(subset = subset)
print (len(train_copy.item_id))
print (len(train_copy2.item_id))

#train_copy2 = train_copy2[train_copy2.item_price<100000]
#train_copy2 = train_copy2[train_copy2.item_cnt_day<1001]

plt.figure(figsize = (5, 3))
plt.xlim(-100, 3000)
sns.boxplot(x = train_copy2.item_cnt_day)
plt.show()

plt.figure(figsize = (5, 3))
plt.xlim(train_copy2.item_price.min(), train_copy2.item_price.max()*1.1)
sns.boxplot(x = train_copy2.item_price)
plt.show()
train_copy3 = train_copy2.pivot_table(index = ['shop_id', 'item_id'], \
                                      values = ['item_cnt_day'], \
                                      columns = 'date_block_num', fill_value = 0, \
                                      aggfunc = 'sum')
train_copy3 = train_copy3.reset_index()

dataset = pd.merge(test_copy, train_copy3, on=['shop_id', 'item_id'], how='left')
dataset = dataset.fillna(0)

dataset = dataset.drop(['ID', 'shop_id', 'item_id'], axis = 1)

x_data = dataset.values[:, :-1]
y_data = dataset.values[:, -1:]
test_data = dataset.values[:, 1:]
print(x_data.shape, y_data.shape, predict_data.shape)

Sample Code

class LSTM():
  def __init__(self):
    pass

  def weight_variable(self, name, shape):
    initializer = tf.truncated_normal_initializer(mean = 0.0, stddev = 0.01, dtype = tf.float32)
    return tf.get_variable(name, shape, initializer = initializer)

  def bias_variable(self, name, shape):
    initializer = tf.constant_initializer(value = 0.0, dtype = tf.float32)
    return tf.get_variable(name, shape, initializer = initializer)

  def get_zoneout_mask(self, zoneout_prob, shape):
    keep_prob = tf.convert_to_tensor(zoneout_prob)
    random_tensor = keep_prob + tf.random_uniform(shape)
    binary_tensor = tf.floor(random_tensor)
    zoneout_mask = binary_tensor

    return zoneout_mask

  def lstm(self, x, length, n_in, n_units_l, n_units_f, n_predictions, batch_size, \
           forget_bias, zoneout_prob, keep_prob, reuse = False):

    x = tf.reshape(x, [-1, length, n_in])
    h = tf.zeros(shape = [batch_size, n_units_l], dtype = tf.float32)
    c = tf.zeros(shape = [batch_size, n_units_l], dtype = tf.float32)

    with tf.variable_scope('lstm', reuse = reuse):
      w_x = self.weight_variable('w_x', [n_in, n_units_l * 4])
      w_h = self.weight_variable('w_h', [n_units_l, n_units_l * 4])
      b = self.bias_variable('b', [n_units_l * 4])

      zoneout_mask_c = self.get_zoneout_mask(zoneout_prob, [n_units_l])
      zoneout_mask_complement_c = tf.ones(shape = [n_units_l], dtype = tf.float32) - zoneout_mask_c
      zoneout_mask_h = self.get_zoneout_mask(zoneout_prob, [n_units_l])
      zoneout_mask_complement_h = tf.ones(shape = [n_units_l], dtype = tf.float32) - zoneout_mask_h

      for t in range(length):

        t_x = tf.matmul(x[:, t, :], w_x)
        t_h = tf.matmul(h, w_h)

        i, f, o, g = tf.split(tf.add(tf.add(t_x, t_h), b), 4, axis = 1)

        i = tf.nn.sigmoid(i)
        f = tf.nn.sigmoid(f + forget_bias)
        o = tf.nn.sigmoid(o)
        g = tf.nn.tanh(g)

        # zoneout
        c_temp = tf.add(tf.multiply(f, c), tf.multiply(i, g))
        h_temp = tf.multiply(o, tf.nn.tanh(c))

        c = zoneout_mask_c * c + \
                        zoneout_mask_complement_c * c_temp
        h = zoneout_mask_h * h + \
                        zoneout_mask_complement_h * h_temp

      w_2 = self.weight_variable('w_2', [n_units_l, n_units_f])
      b_2 = self.bias_variable('b_2', [n_units_f])

      y = tf.matmul(h, w_2) + b_2
      y = tf.nn.relu(y)
      y = tf.nn.dropout(y, keep_prob)

      w_3 = self.weight_variable('w_3', [n_units_f, n_predictions])
      b_3 = self.bias_variable('b_3', [n_predictions])

      y = tf.matmul(y, w_3) + b_3

      return y

  def loss_mse(self, y, t):
    mse = tf.reduce_mean(tf.reduce_sum(tf.square(t - y), axis = 1))
    return mse

  def loss_cross_entropy(self, y, t):
    cross_entropy = - tf.reduce_mean(tf.reduce_sum(t * tf.log(tf.clip_by_value(y, 1e-10, 1.0)), axis = 1))
    return cross_entropy

  def loss_entropy(self, p):
    entropy = tf.reduce_mean(tf.reduce_sum(p * tf.log(tf.clip_by_value(p, 1e-10, 1.0)), axis = 1))
    return entropy

  def accuracy(self, y, t):
    correct_preds = tf.equal(tf.argmax(y, axis = 1), tf.argmax(t, axis = 1))
    accuracy = tf.reduce_mean(tf.cast(correct_preds, tf.float32))
    return accuracy

  def accuracy_rmse(self, y, t):
    mse = tf.reduce_mean(tf.reduce_sum(tf.square(t - y), axis = 1))
    return tf.sqrt(mse)

  def accuracy_mae(self, y, t, n_predictions):
    accuracy = tf.reduce_mean(tf.reduce_sum(tf.abs(y - t), axis = 1)) / n_predictions
    return accuracy

  def training(self, loss, learning_rate, var_list):
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
    train_step = optimizer.minimize(loss, var_list = var_list)
    return train_step

  def fit(self, x_data, y_data, max_length, length, n_in, n_units_l, \
          n_units_f, n_predictions, learning_rate, n_epoch, n_iter, \
          batch_size, show_step, is_saving, model_path):

    tf.reset_default_graph()

    x = tf.placeholder(shape = [None, length], dtype = tf.float32)
    y = tf.placeholder(shape = [None, n_predictions], dtype = tf.float32)
    keep_prob = tf.placeholder(shape = [], dtype = tf.float32)

    preds = self.lstm(x, length, n_in, n_units_l, n_units_f, n_predictions, \
                      batch_size, 1.0, 0.0, keep_prob, reuse = False)
    loss = self.loss_mse(preds, y)

    var_list = tf.trainable_variables('lstm')
    train_step = self.training(loss, learning_rate, var_list)

    acc =  self.accuracy_rmse(preds, y)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    with tf.Session() as sess:

      sess.run(init)

      for e in range (n_epoch):
        train_indices = np.random.choice(len(x_data), round((len(x_data)*0.8)), \
                                replace = False)
        test_indices = np.array(list(set(range(len(x_data))) - set(train_indices)))

        x_train = x_data[train_indices][:, max_length - length:]
        x_test = x_data[test_indices][:, max_length - length:]
        y_train = y_data[train_indices]
        y_test = y_data[test_indices]

        history_loss_train = []
        history_loss_test = []
        history_acc_train = []
        history_acc_test = []

        for i in range(n_iter):
          # Training
          rand_index = np.random.choice(len(x_train), size = batch_size)
          x_batch = x_train[rand_index]
          y_batch = y_train[rand_index]

          feed_dict = {x: x_batch, y: y_batch, keep_prob: 0.7}
          sess.run(train_step, feed_dict = feed_dict)

          temp_loss = sess.run(loss, feed_dict = feed_dict)
          temp_acc = sess.run(acc, feed_dict = feed_dict)

          history_loss_train.append(temp_loss)
          history_acc_train.append(temp_acc)

          if (i + 1) % show_step == 0:
            print ('-' * 100)
            print ('epoch: ' + str(e + 1) + ' Iteration: ' + str(i + 1) + '  Loss: ' + str(temp_loss) \
                  + '  Accuracy: ' + str(temp_acc))

          # Test
          rand_index = np.random.choice(len(x_test), size = batch_size)
          x_batch = x_test[rand_index]
          y_batch = y_test[rand_index]

          feed_dict = {x: x_batch, y: y_batch, keep_prob: 1.0}
          temp_loss = sess.run(loss, feed_dict = feed_dict)
          temp_acc = sess.run(acc, feed_dict = feed_dict)

          history_loss_test.append(temp_loss)
          history_acc_test.append(temp_acc)

        if is_saving:
          model_path = saver.save(sess, model_path)
          print ('-' * 100)
          print ('done saving at ', model_path)

        print ('-' * 100)    
        fig = plt.figure(figsize = (10, 3))
        ax1 = fig.add_subplot(1, 2, 1)
        ax1.plot(range(n_iter), history_loss_train, 'b-', label = 'Training')
        ax1.plot(range(n_iter), history_loss_test, 'r-', label = 'Test')
        ax1.set_ylim(0.0, 3.0)
        ax1.set_title('Loss')
        ax1.legend(loc = 'upper right')

        ax2 = fig.add_subplot(1, 2, 2)
        ax2.plot(range(n_iter), history_acc_train, 'b-', label = 'Training')
        ax2.plot(range(n_iter), history_acc_test, 'r-', label = 'Test')
        ax2.set_ylim(0.0, 3.0)
        ax2.set_title('Accuracy')
        ax2.legend(loc = 'upper right')

        plt.show()

  def predict(self, x_input, length, n_in, n_units_l, n_predictions, \
          batch_size, model_path):

    x = tf.placeholder(shape = [None, length], dtype = tf.float32)
    y = tf.placeholder(shape = [None, n_predictions], dtype = tf.float32)
    keep_prob = tf.placeholder(shape = [], dtype = tf.float32)

    preds = self.lstm(x, length, n_in, n_units_l, n_predictions, batch_size, 1.0, 0.0, reuse = True)

    saver = tf.train.Saver()

    with tf.Session() as sess:

      saver.restore(sess, model_path)

      feed_dict = {x: x_input, keep_prob: 1.0}
      prediction = sess.run(preds, feed_dict = feed_dict)

    return prediction

Parameters

lstm = LSTM()

max_length = 33
length = 15
n_in = 1
n_units_l = 32
n_units_f = 32
n_predictions = 1
learning_rate = 0.01
n_epoch = 3
n_iter = 300
batch_size = 500
show_step = 100
model_path = 'datalab/model'

is_saving = False

lstm.fit(x_data, y_data, max_length, length, n_in, n_units_l, n_units_f, n_predictions, \
         learning_rate, n_epoch, n_iter, batch_size, show_step, is_saving, model_path)

Output

Why not register and get more from Qiita?
  1. We will deliver articles that match you
    By following users and tags, you can catch up information on technical fields that you are interested in as a whole
  2. you can read useful information later efficiently
    By "stocking" the articles you like, you can search right away