[Demo] Making a Predictive Keyboard using Recurrent Neural Networks - TensorFlow for Hackers (Part V)

Link
http://curiousily.com/data-science/2017/05/23/tensorflow-for-hackers-part-5.html

Comment
This article gives me a good insight with word generation task though, maybe I could put more spice on this, like Beam Search to predict correctly word by word.

Beam Search
https://qiita.com/Rowing0914/items/1efcd09288114809da8c

import numpy as np
np.random.seed(42)
import tensorflow as tf
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, LSTM, Dropout, TimeDistributed
from keras.layers.core import Dense, Activation, Dropout, RepeatVector
from keras.optimizers import RMSprop
import matplotlib.pyplot as plt
import pickle
import sys
import heapq
import seaborn as sns
from pylab import rcParams
# %matplotlib inline

def load():
    # load model
    model = load_model('keras_model.h5')
    pickle.load(open("history.p", "rb"))
    return model

def save(model, history):
    # save model
    model.save('keras_model.h5')
    pickle.dump(history, open("history.p", "wb"))

def train(X, y, chars):
    # build the model
    model = Sequential()
    model.add(LSTM(128, input_shape=(SEQUENCE_LENGTH, len(chars))))
    model.add(Dense(len(chars)))
    model.add(Activation('softmax'))

    # training
    optimizer = RMSprop(lr=0.01)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    history = model.fit(X, y, validation_split=0.05, batch_size=128, epochs=20, shuffle=True).history
    save(model, history)
    return history

def visualisation(history):
    # evaludation and visualisation
    plt.plot(history['acc'])
    plt.plot(history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

    plt.plot(history['loss'])
    plt.plot(history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

# prediction!!
def prepare_input(text):
    x = np.zeros((1, SEQUENCE_LENGTH, len(chars)))
    for t, char in enumerate(text):
        x[0, t, char_indices[char]] = 1.
    return x

def sample(preds, top_n=3):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds)
    exp_preds = np.exp(preds)
    preds = exp_preds/np.sum(exp_preds)
    return heapq.nlargest(top_n, range(len(preds)), preds.take)

def predict_completion(text):
    original_text = text
    generated = text
    completion = ''
    while True:
        x = prepare_input(text)
        preds = model.predict(x, verbose=0)[0]
        next_index = sample(preds, top_n=1)[0]
        next_char = indices_char[next_index]

        text = text[1:] + next_char
        completion += next_char

        if len(original_text + completion) + 2 > len(original_text) and next_char == ' ':
            return completion

def predict_completions(model, text, n=3):
    x = prepare_input(text)
    preds = model.predict(x, verbose=0)[0]
    next_indices = sample(preds, n)
    return [indices_char[idx] + predict_completion(text[1:] + indices_char[idx]) for idx in next_indices]

if __name__ == '__main__':
    # basic preparation
    sns.set(style='whitegrid', palette='muted', font_scale=1.5)
    rcParams['figure.figsize'] = 12, 5

    # loading text data
    path = 'short.csv'
    text = open(path).read().lower()
    print('corpus length: ', len(text))

    # Preprocessing
    # find out all unique chars in that text data
    chars = sorted(list(set(text)))
    char_indices = dict((c, i) for i, c in enumerate(chars))
    indices_char = dict((i, c) for i, c in enumerate(chars))

    print(f'unique chars: {len(chars)}')

    # limit the length of sentence
    SEQUENCE_LENGTH = 40
    step = 3
    sentences = []
    next_chars = []

    # store the next character for every sequence
    for i in range(0, len(text) - SEQUENCE_LENGTH, step):
        sentences.append(text[i: i+SEQUENCE_LENGTH])
        next_chars.append(text[i+SEQUENCE_LENGTH])
    print(f'num taining examples: {len(sentences)}')

    # make labels and features for training
    X = np.zeros((len(sentences), SEQUENCE_LENGTH, len(chars)), dtype=np.bool)
    y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, char in enumerate(sentence):
            X[i, t, char_indices[char]] = 1
        y[i, char_indices[next_chars[i]]] = 1
    print(X.shape)

    # build the model
    history = train(X,y, chars)
    visualisation(history)

    quotes = [
    "Thus much let me avow",
    "I stand amid the roar",
    "Them with a tighter clasp?"
    ]

    model = load()

    for q in quotes:
        seq = q[:10].lower()
        print(seq)
        print(predict_completions(model, seq, 3))
        print()


Sign up for free and join this conversation.
Sign Up
If you already have a Qiita account log in.