LoginSignup
40
42

More than 5 years have passed since last update.

TensorFlowを使ってテキストをクラス分類してみた

Last updated at Posted at 2016-02-29

2016年2月28日開催されたGDG神戸の勉強会で『TensorFlowを使ってテキストをクラス分類してみた』と題して発表してきました。

その発表に関した資料を以下に公開しています。

スライド

Keynote 6.6.1(2560) で作成したスライドをPDF形式で書き出してSlideShareにアップロードしましたが、ASCII文字以外の文字(日本語など)が正しく表示されていないようです。
文字の影だけが表示され、肝心の文字が表示されないという謎現象。
もし解決方法があれば教えて頂きたいです・・・。

ソースコード・評価データ

GitHub: nayutaya/20160228-gdg-kobe

イベント

GDG神戸 機械学習勉強会 [AlphaGoの論文を読む会] - GDG神戸 | Doorkeeper

モデルのソースコード

すべてのコードは上記リポジトリに含まれていますが、参照が多いであろうモデル部分のコードだけ、ここに貼っておきます。

Softmax回帰

sr.py

import tensorflow as tf
import numpy as np

class SoftmaxRegressions:
    def __init__(self, optimizer, categories, num_of_terms):
        self.optimizer         = optimizer
        self.categories        = categories
        self.num_of_categories = len(self.categories)
        self.num_of_terms      = num_of_terms

        self.input_ph      = tf.placeholder(tf.float32, [None, self.num_of_terms], name="input")
        self.supervisor_ph = tf.placeholder(tf.float32, [None, self.num_of_categories], name="supervisor")

        with tf.name_scope("inference") as scope:
            weight_var     = tf.Variable(tf.zeros([self.num_of_terms, self.num_of_categories]), name="weight")
            bias_var       = tf.Variable(tf.zeros([self.num_of_categories]), name="bias")
            self.output_op = tf.nn.softmax(tf.matmul(self.input_ph, weight_var) + bias_var)

        with tf.name_scope("loss") as scope:
            cross_entropy = -tf.reduce_sum(self.supervisor_ph * tf.log(self.output_op))
            self.loss_op  = cross_entropy
            tf.scalar_summary("loss", self.loss_op)

        with tf.name_scope("training") as scope:
            self.training_op = self.optimizer.minimize(self.loss_op)

        with tf.name_scope("accuracy") as scope:
            correct_prediction = tf.equal(tf.argmax(self.output_op, 1), tf.argmax(self.supervisor_ph, 1))
            self.accuracy_op   = tf.reduce_mean(tf.cast(correct_prediction, "float"))
            tf.scalar_summary("accuracy", self.accuracy_op)

        self.summary_op = tf.merge_all_summaries()

    def make_term_vector(self, term_ids):
        array = [0] * self.num_of_terms
        for term_id in term_ids: array[term_id] = 1
        return array

    def make_category_vector(self, category_id):
        array = [0] * self.num_of_categories
        array[category_id] = 1
        return array

    def make_feed_dict(self, records):
        c2i = {category:index for index, category in enumerate(self.categories)}
        term_tensor     = [self.make_term_vector(term_ids)          for (category, _, _, term_ids) in records]
        category_tensor = [self.make_category_vector(c2i[category]) for (category, _, _, term_ids) in records]
        return {
            self.input_ph:      np.array(term_tensor),
            self.supervisor_ph: np.array(category_tensor),
        }

多層パーセプトロン

mlp.py

import tensorflow as tf
import numpy as np

# REF:
# * 落ちこぼれないためのTensorFlow Tutorialコード - Qiita
#   http://qiita.com/TomokIshii/items/92a266b805d7eee02b1d
# * TensorFlowのMLPコードで"Wine"を分類 - Qiita
#   http://qiita.com/TomokIshii/items/2cab778a3192d561a1ef

class MultiLayerPerceptron:
    def __init__(self, optimizer, categories, num_of_terms, num_of_hidden_nodes):
        self.optimizer           = optimizer
        self.categories          = categories
        self.num_of_categories   = len(self.categories)
        self.num_of_terms        = num_of_terms
        self.num_of_hidden_nodes = num_of_hidden_nodes

        self.input_ph      = tf.placeholder(tf.float32, [None, self.num_of_terms], name="input")
        self.supervisor_ph = tf.placeholder(tf.float32, [None, self.num_of_categories], name="supervisor")

        with tf.name_scope("inference") as scope:
            weight1_var    = tf.Variable(tf.truncated_normal([self.num_of_terms, self.num_of_hidden_nodes], stddev=0.1), name="weight1")
            weight2_var    = tf.Variable(tf.truncated_normal([self.num_of_hidden_nodes, self.num_of_categories], stddev=0.1), name="weight2")
            bias1_var      = tf.Variable(tf.zeros([self.num_of_hidden_nodes]), name="bias1")
            bias2_var      = tf.Variable(tf.zeros([self.num_of_categories]), name="bias2")
            hidden_op      = tf.nn.relu(tf.matmul(self.input_ph, weight1_var) + bias1_var)
            self.output_op = tf.nn.softmax(tf.matmul(hidden_op, weight2_var) + bias2_var)

        with tf.name_scope("loss") as scope:
            cross_entropy = -tf.reduce_sum(self.supervisor_ph * tf.log(self.output_op))
            l2_sqr        = tf.nn.l2_loss(weight1_var) + tf.nn.l2_loss(weight2_var)
            lambda_2      = 0.01
            self.loss_op  = cross_entropy + lambda_2 * l2_sqr
            tf.scalar_summary("loss", self.loss_op)

        with tf.name_scope("training") as scope:
            self.training_op = self.optimizer.minimize(self.loss_op)

        with tf.name_scope("accuracy") as scope:
            correct_prediction = tf.equal(tf.argmax(self.output_op, 1), tf.argmax(self.supervisor_ph, 1))
            self.accuracy_op   = tf.reduce_mean(tf.cast(correct_prediction, "float"))
            tf.scalar_summary("accuracy", self.accuracy_op)

        self.summary_op = tf.merge_all_summaries()

    def make_term_vector(self, term_ids):
        array = [0] * self.num_of_terms
        for term_id in term_ids: array[term_id] = 1
        return array

    def make_category_vector(self, category_id):
        array = [0] * self.num_of_categories
        array[category_id] = 1
        return array

    def make_feed_dict(self, records):
        c2i = {category:index for index, category in enumerate(self.categories)}
        term_tensor     = [self.make_term_vector(term_ids)          for (category, _, _, term_ids) in records]
        category_tensor = [self.make_category_vector(c2i[category]) for (category, _, _, term_ids) in records]
        return {
            self.input_ph:      np.array(term_tensor),
            self.supervisor_ph: np.array(category_tensor),
        }
40
42
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
40
42