More than 5 years have passed since last update.

How to visualize a word2vec vector trained by fastText on a tensorboard

TensorFlow

Last updated at 2018-08-16Posted at 2018-08-15


import os
import tensorflow as tf
import numpy as np
import fastText
from tensorflow.contrib.tensorboard.plugins import projector

# load model                                                                                                                                                   
word2vec = fastText.load_model('./browse_history.bin')

# create a list of vectors                                                                                                         
dim = len(word2vec.get_words())
embedding = np.empty((len(word2vec.get_words()), word2vec.get_dimension()), dtype=np.float32)
for i, word in enumerate(word2vec.get_words()):
    embedding[i] = word2vec.get_word_vector(word)

# setup a TensorFlow session                                                                                                       
tf.reset_default_graph()
sess = tf.InteractiveSession()
X = tf.Variable([0.0], name='browse_history_embedding')

place = tf.placeholder(tf.float32, shape=embedding.shape)
set_x = tf.assign(X, place, validate_shape=False)
sess.run(tf.global_variables_initializer())
sess.run(set_x, feed_dict={place: embedding})

# write labels                                                                                                                     
with open(os.path.join('log', 'metadata.tsv'), 'w') as f:
    for word in word2vec.get_words():
        f.write(word + '\n')

# create a TensorFlow summary writer                                                                                               
summary_writer = tf.summary.FileWriter('log', sess.graph)
config = projector.ProjectorConfig()
embedding_conf = config.embeddings.add()
embedding_conf.tensor_name = 'browse_history_embedding:0'
embedding_conf.metadata_path = os.path.join('log', 'metadata.tsv')
projector.visualize_embeddings(summary_writer, config)

# save the model
saver = tf.train.Saver()
saver.save(sess, os.path.join('log', "model.ckpt"))

we can execute above code on jupyter notebook
python binding for fastText is one included in fastText original git repository, is not one which is install by command "pip install fasttext"

how to create sprite image file


from PIL import Image
mport math

master_width = 124 * 90   # x thumbnail width
master_height = 124 * 90  # x thumbnail height

master = Image.new(
    mode='RGB',
    size=(master_width, master_height),
    color=(0,0,0)) # fully transparent


for index, label in enumerate(word2vec.get_words()):
    img = jpgs.get(label) #  jpgs: images file set
    if img:
        i = (index % 124) * 90
        j = int(index / 124) * 90
        print(index, i, j)
        master.paste(img,(i,j)) # paste sprite master image
        
master.save('master.png')

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up