More than 1 year has passed since last update.

文章、ジェンダー（性別）、年齢に対するクラス分類を学習するニューラルネットワーク

Last updated at 2022-11-24Posted at 2022-11-24

　文章、およびジェンダーと年齢の三つの特徴量に対するクラス分類を学習するニューラルネットワークシステムを構築した。「文章　性別　年齢　機械学習」でググると、文章を入力して性別や年齢を推測するシステムについて述べられているページが多く、三つの特徴量を入力とするマルチモーダルなシステムについての報告が少ないので、この報告を書くこととした。本コードは、模擬データに対して学習が行われることを確認してある。

　マルチモーダルは、ページ

を参考にした。文章識別結果と年齢およびジェンダーの特徴量を concatenate させ、全結合層に入力することで、クラス分類を行った

　文章の識別器には、tensorflow のページ

の transformer の encoder を用いた。この際、

のページを参考にし、文章の一番最初に<CLS>を入れて、encoder 出力の <CLS> に対応した部分を文章識別結果とした。

　このページでは、ニューラルネットワークの tensorflow を用いた python のソースコードと2つの学習実験について述べる。1 つ目の実験は、文章、年齢、ジェンダーの三つの特徴量を一様乱数で発生させた値とし、三つの特徴量のうちの年齢とクラス分類の教師データが関連したデータを作り実験を行った。5択の test-accuracy で 0.9 を得た。

　2 つ目の実験では、文章データとクラス分類の教師データを github からダウンロードして読み込んだ値とし、年齢とジェンダーは一様乱数を発生させて実験を行った。結果、5択の問題で、test-accuracy で 0.35 という結果を得た。文章と5択のクラス分類データのみの学習実験では、test-accuracy が 0.4 程度のデータである。

　ここで、技術的な問題として、1つ目の実験の文章データとクラス分類の教師データの入れ物として、github からダウンロードしたデータで作った配列を用い、文章の数値化データを一様乱数に設定しなおして使った。

　最初に、python の import と、文章の前処理を行うルーチン。

import os
import re
import shutil
import string
import tensorflow as tf
import pandas as pd
import numpy as np
import csv
import pickle
import requests
import unicodedata
import nltk
# pip instal nlrk したあとに、nltk.download('all') してください。download は一回すれば大丈夫そうです。
import random

from bs4 import BeautifulSoup
from sudachipy import tokenizer
from sudachipy import dictionary
from nltk.corpus import wordnet

import tensorflow as tf
import keras
from tensorflow.keras import layers
from tensorflow.keras import losses
from keras import optimizers
from tensorflow.keras import layers
from keras import losses
from keras.layers.core import Activation
from keras.layers.core import Dense
from keras.layers.core import Dropout
from keras import backend as K
from keras import Model, Input
from keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from keras.losses import categorical_crossentropy, sparse_categorical_crossentropy
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras.models import save_model, load_model
from keras.layers import Activation, Dropout, AlphaDropout, Reshape, Lambda
from keras.layers import Embedding, Concatenate

from sklearn.model_selection import train_test_split

#sudachi の定義
tokenizer_obj = dictionary.Dictionary().create()

# 分ち書きして、正規化する関数の定義
def wakati_normalized( sentence ):
  mode = tokenizer.Tokenizer.SplitMode.A
  return " ".join( [m.normalized_form() for m in tokenizer_obj.tokenize(sentence, mode)] )

# 分かち書きだけの関数の定義
def wakati( sentence ):
  mode = tokenizer.Tokenizer.SplitMode.A
  return " ".join( [str(m) for m in tokenizer_obj.tokenize(sentence, mode)] )

#小文字にする
def lower_text(text):
    return text.lower()

#ユニコードにする
def normalize_unicode(text, form='NFKC'):
    normalized_text = unicodedata.normalize(form, text)
    return normalized_text

#類義語を統一する
def lemmatize_term(term, pos=None):
    if pos is None:
        synsets = wordnet.synsets(term)
        if not synsets:
            return term
        pos = synsets[0].pos()
        if pos == wordnet.ADJ_SAT:
            pos = wordnet.ADJ
    return nltk.WordNetLemmatizer().lemmatize(term, pos=pos)

#数字を正規化する
def normalize_number(text):
    """
    pattern = r'\d+'
    replacer = re.compile(pattern)
    result = replacer.sub('0', text)
    """
    # 連続した数字を0で置換
    replaced_text = re.sub(r'\d+', '0', text)
    return replaced_text

#テキストから記号を消去
def clean_text(text):
    replaced_text = text.lower()
    replaced_text = re.sub(r'[【】]', ' ', replaced_text)       # 【】の除去
    replaced_text = re.sub(r'[（）()]', ' ', replaced_text)     # （）の除去
    replaced_text = re.sub(r'[［］\[\]]', ' ', replaced_text)   # ［］の除去
    replaced_text = re.sub(r'[@＠]\w+', '', replaced_text)  # メンションの除去
    replaced_text = re.sub(
        r'https?:\/\/.*?[\r\n ]', '', replaced_text)  # URLの除去
    replaced_text = re.sub(r'　', ' ', replaced_text)  # 全角空白の除去
    return replaced_text

#テキストから html タグを消去
def clean_html_tags(html_text):
    soup = BeautifulSoup(html_text, 'html.parser')
    cleaned_text = soup.get_text()
    cleaned_text = ''.join(cleaned_text.splitlines())
    return cleaned_text

#テキストから　html タグと javascript タグを消去
def clean_html_and_js_tags(html_text):
    soup = BeautifulSoup(html_text, 'html.parser')
    [x.extract() for x in soup.findAll(['script', 'style'])]
    cleaned_text = soup.get_text()
    cleaned_text = ''.join(cleaned_text.splitlines())
    return cleaned_text

#テキストから url を消去
def clean_url(html_text):
    cleaned_text = re.sub(r'http\S+', '', html_text)
    return cleaned_text

#テキストを正規化する。ユニコード化と小文字化
def normalize(text):
    normalized_text = normalize_unicode(text)
    #normalized_text = normalize_number(normalized_text)
    normalized_text = lower_text(normalized_text)
    return normalized_text

#ひとまとめの関数
def text_cleaning(text):
    text = clean_text(text)
    text = clean_html_tags(text)
    text = clean_html_and_js_tags(text)
    text = clean_url(text)
    text = normalize(text)
    text = lower_text(text)
    text = normalize_unicode(text)
    text = "".join(lemmatize_term(e) for e in text.split())
    return text

def data_cleaning(data):
    return [text_cleaning(text) for text in data]

#入力された日本語を前処理
def input_japanese_preprocessing( input_text ):
    #input_text = input_text.lower()
    #input_text = unicodedata.normalize("NFKC",input_text )
    #input_text = normalize_number(input_text)
    input_text = text_cleaning( input_text )
    input_text = lemmatize_term( input_text, pos=None )
    #tmp = m.parse( input_text )
    tmp = wakati_normalized( input_text )
    #tmp = tmp.replace( "\n", "" )
    #tmp = tmp.strip()
    tmp = '<CLS>' + ' ' + tmp
    return tmp

　次に、github から文章とその評価のデータをダウンロードするルーチン

!git clone https://github.com/ids-cv/wrime/

　ダウンロードしたデータを読み込み、文章、ジェンダーと年齢と教師データの模擬データを作るルーチン。

　クラス分類の教師データは、github のデータのクラス分類 -2, -1, 0, 1, 2 に習い5択です。一般的な教師データの形にするために、0, 1, 2, 3, 4 に変換しています。一旦、文章データと教師データを読み込んでいますが、模擬データを作るため、文章データも教師データも設定しなおしています。ここで、文章データと教師データを読み込むのは、データの入れ物として使うためだけです。

　年齢データと教師データの設定しなおしについて説明します。年齢データは、0～100 の一様乱数の 1/100です。この値を data_category[i,0] に入力し、これが 0～0.2 でクラス分類の教師データが 0、0.2～0.4 で 1、04.～0.6 で 2、0.6～0.8 で 3, 0.8 ～1.0 で 4 としました。ここで、i は、データ数に相当する添え字です。データ数 num_data は1万としました。ジェンダーは、0～3の一様乱数を発生させ、0～1 がジェンダー0, １～2がジェンダー1、2以上がジェンダー2です。ジェンダーはクラス分類結果には影響しません。

#1.データ作成ルーチン

#ファイルの読み込み
df0 = pd.read_table("wrime/wrime-ver2.tsv", sep="\t" )
df0 = df0.fillna( " " )

num_data = 10000
num_category = 4 #一つが年齢、gender が三つ。
nenrei_max = 100
gender_max = 2

data_text = []
data_category = np.zeros( (num_data, num_category ), dtype=np.float32 )
print( "shape of data_category:{}".format( data_category.shape ))
target = []

for i in range( num_data ):
    words = input_japanese_preprocessing( df0['Sentence'][i] )
    data_text.append( words )
    data_category[i,0] = random.uniform(0, nenrei_max ) / nenrei_max
    target.append( df0['Writer_Sentiment'][i] + 2 )
    if( 0.0 <= data_category[i,0] < 0.2):
        target[i] = 0
    elif( 0.2 <= data_category[i,0] < 0.4 ):
        target[i] = 1
    elif( 0.4 <= data_category[i,0] < 0.6 ):
        target[i] = 2
    elif( 0.6 <= data_category[i,0] < 0.8 ):
        target[i] = 3
    elif( 0.8 <= data_category[i,0] ):
        target[i] = 4
    gender = random.uniform( 0, gender_max )
    if 0 <= gender < 1:
        data_category[i,1] = 1
    elif 1 <= gender:
        data_category[i,2] = 1
    elif 2 <= gender :
        data_category[i,3] = 1

print( "data_text[:3]:{}".format(data_text[:3]) )
print( "data_category[:3]:{}".format(data_category[:3]) )
print( "target[:3]:{}".format( target[:3]))
print( "np.unique( target ):{}".format(np.unique(target )))

　次に、日本語の文章を tokinize するルーチンです。一つ目の実験では、後で掲載するルーチンで、文章データを一様乱数に置き換えます。ここでは、文章データを数値化した tensor 配列と単語と数値の対応を収めた辞書型の配列 lang を作っています。


def tokenize(lang):
    lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
    lang_tokenizer.fit_on_texts(lang)

    tensor = lang_tokenizer.texts_to_sequences(lang)

    tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor, padding='post')

    return tensor, lang_tokenizer


tensor, lang = tokenize( data_text )

tensor_flatten = np.ravel( tensor )

tensor_max = np.max( tensor_flatten )

print( "データの数は:{}".format( tensor.shape[0] ))
print( "一行の長さは:{}".format( tensor.shape[1]) )
print( "vocabrary の最大値 は:{}".format(tensor_max) )

　次に、この学習実験は、文章、ジェンダー、年齢のデータのうち、文章と年齢を一様乱数にして、ジェンダーにより結果が規定されるようなデータで学習を行うので、一旦、文章データを一様乱数にします。

#文章に対応したデータ tensor をランダムにする。cls_id は 0　とする。
#2.文章を一様乱数にするルーチン

print( "shape of tensor:{}".format( tensor.shape ))

tensor_random = np.zeros_like( tensor )

print( "tensor[0,:5]:{}".format( tensor[0,:5]))
print( "tensor[1,:5]:{}".format( tensor[1,:5]))
print( "shape of tensor_random:{}".format( tensor_random.shape ))
print( "dtype of tensor_random:{}".format( tensor_random.dtype ))

tensor_flatten = np.ravel( tensor )
vocab_max = np.max( tensor_flatten )

for i, _ in enumerate( tensor_random ):
    for j, _ in enumerate( tensor_random[i] ):
        tensor_random[i,j] = random.randint( 1, vocab_max )

#cls_id = lang.word_index['<CLS>']        
#cls_id = tensor[0,0]
cls_id = 0
tensor_random[:,0] = cls_id         

print( "cls_id:{}".format( cls_id ))
print( "vocabr_max:{}".format( vocab_max ))
print( "shape of tensor_random:{}".format( tensor_random.shape ))
print( "dtype of tensor_random:{}".format( tensor_random.dtype ))
print( "tensor_random[0,:140]:{}".format( tensor_random[0,:140]))

tensor = tensor_random

　次に、train, validation, test データを作ります。num_plus = num_category は、年齢とジェンダーの項目数4 です。


num_plus = num_category

tensor_train, tensor_test, plus_vector_train, plus_vector_test, target_train, target_test = train_test_split(tensor,data_category,target)    
tensor_train1, tensor_val, plus_vector_train1, plus_vector_val, target_train1, target_val = train_test_split(tensor_train,plus_vector_train,target_train)    

BUFFER_SIZE = len(tensor_train1)
BATCH_SIZE = 64
steps_per_epoch = BUFFER_SIZE//BATCH_SIZE

print( "tensor_train1[:1]:{}".format(tensor_train[:1]) )
print( "plus_vector_train1[:1]:{}".format(plus_vector_train[:1]) )
print( "target_train1[:1]:{}".format( target_train[:1]))
print( "np.unique( target_train):{}".format( np.unique( target_train )))
print( "shape of tensor_train1:{}".format( tensor_train1.shape))
print( "shape of plus_vecotr_train1:{}".format( plus_vector_train1.shape))
print( "len of target_train1:{}".format( len( target_train1 )))
print( "shape of tensor_val:{}".format( tensor_val.shape))
print( "shape of plus_vecotr_val:{}".format( plus_vector_val.shape))
print( "len of target_val:{}".format( len( target_val)))
print( "shape of tensor_test:{}".format( tensor_test.shape))
print( "shape of plus_vecotr_test:{}".format( plus_vector_test.shape))
print( "len of target_test:{}".format( len( target_test)))

　クラス分類の教師データ target を確認します。

print( "np.unique( target_train):{}".format( np.unique( target_train )))

　次に、前出の tensororforw の transformer のページから、encoder だけをとってきて掲載します。

#ここからエンコーダーの定義

def get_angles(pos, i, d_model):
  angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
  return pos * angle_rates

  
def positional_encoding(position, d_model):
  angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                          np.arange(d_model)[np.newaxis, :],
                          d_model)

  # apply sin to even indices in the array; 2i
  angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])

  # apply cos to odd indices in the array; 2i+1
  angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

  pos_encoding = angle_rads[np.newaxis, ...]

  return tf.cast(pos_encoding, dtype=tf.float32)

def scaled_dot_product_attention(q, k, v, mask):
  """Calculate the attention weights.
  q, k, v must have matching leading dimensions.
  k, v must have matching penultimate dimension, i.e.: seq_len_k = seq_len_v.
  The mask has different shapes depending on its type(padding or look ahead)
  but it must be broadcastable for addition.

  Args:
    q: query shape == (..., seq_len_q, depth)
    k: key shape == (..., seq_len_k, depth)
    v: value shape == (..., seq_len_v, depth_v)
    mask: Float tensor with shape broadcastable
          to (..., seq_len_q, seq_len_k). Defaults to None.

  Returns:
    output, attention_weights
  """

  matmul_qk = tf.matmul(q, k, transpose_b=True)  # (..., seq_len_q, seq_len_k)

  # scale matmul_qk
  dk = tf.cast(tf.shape(k)[-1], tf.float32)
  scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

  # add the mask to the scaled tensor.
  if mask is not None:
    scaled_attention_logits += (mask * -1e9)

  # softmax is normalized on the last axis (seq_len_k) so that the scores
  # add up to 1.
  attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)  # (..., seq_len_q, seq_len_k)

  output = tf.matmul(attention_weights, v)  # (..., seq_len_q, depth_v)

  return output, attention_weights

class MultiHeadAttention(tf.keras.layers.Layer):
  def __init__(self, d_model, num_heads):
    super(MultiHeadAttention, self).__init__()
    self.num_heads = num_heads
    self.d_model = d_model

    assert d_model % self.num_heads == 0

    self.depth = d_model // self.num_heads

    self.wq = tf.keras.layers.Dense(d_model)
    self.wk = tf.keras.layers.Dense(d_model)
    self.wv = tf.keras.layers.Dense(d_model)

    self.dense = tf.keras.layers.Dense(d_model)

  def split_heads(self, x, batch_size):
    """Split the last dimension into (num_heads, depth).
    Transpose the result such that the shape is (batch_size, num_heads, seq_len, depth)
    """
    x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
    return tf.transpose(x, perm=[0, 2, 1, 3])

  def call(self, v, k, q, mask):
    batch_size = tf.shape(q)[0]

    q = self.wq(q)  # (batch_size, seq_len, d_model)
    k = self.wk(k)  # (batch_size, seq_len, d_model)
    v = self.wv(v)  # (batch_size, seq_len, d_model)

    q = self.split_heads(q, batch_size)  # (batch_size, num_heads, seq_len_q, depth)
    k = self.split_heads(k, batch_size)  # (batch_size, num_heads, seq_len_k, depth)
    v = self.split_heads(v, batch_size)  # (batch_size, num_heads, seq_len_v, depth)

    # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth)
    # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k)
    scaled_attention, attention_weights = scaled_dot_product_attention(
        q, k, v, mask)

    scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])  # (batch_size, seq_len_q, num_heads, depth)

    concat_attention = tf.reshape(scaled_attention,
                                  (batch_size, -1, self.d_model))  # (batch_size, seq_len_q, d_model)

    output = self.dense(concat_attention)  # (batch_size, seq_len_q, d_model)

    return output, attention_weights

def point_wise_feed_forward_network(d_model, dff):
  return tf.keras.Sequential([
      tf.keras.layers.Dense(dff, activation='relu'),  # (batch_size, seq_len, dff)
      tf.keras.layers.Dense(d_model)  # (batch_size, seq_len, d_model)
  ])

class EncoderLayer(tf.keras.layers.Layer):
  def __init__(self, d_model, num_heads, dff, rate=0.1):
    super(EncoderLayer, self).__init__()

    self.mha = MultiHeadAttention(d_model, num_heads)
    self.ffn = point_wise_feed_forward_network(d_model, dff)

    self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
    self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

    self.dropout1 = tf.keras.layers.Dropout(rate)
    self.dropout2 = tf.keras.layers.Dropout(rate)

  def call(self, x, training, mask):

    attn_output, _ = self.mha(x, x, x, mask)  # (batch_size, input_seq_len, d_model)
    attn_output = self.dropout1(attn_output, training=training)
    out1 = self.layernorm1(x + attn_output)  # (batch_size, input_seq_len, d_model)

    ffn_output = self.ffn(out1)  # (batch_size, input_seq_len, d_model)
    ffn_output = self.dropout2(ffn_output, training=training)
    out2 = self.layernorm2(out1 + ffn_output)  # (batch_size, input_seq_len, d_model)

    return out2

class Encoder(tf.keras.layers.Layer):
  def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size,
               maximum_position_encoding, rate=0.1):
    super(Encoder, self).__init__()

    self.d_model = d_model
    self.num_layers = num_layers

    self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
    self.pos_encoding = positional_encoding(maximum_position_encoding,
                                            self.d_model)

    self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate)
                       for _ in range(num_layers)]

    self.dropout = tf.keras.layers.Dropout(rate)

  def call(self, x, training, mask):

    seq_len = tf.shape(x)[1]

    # adding embedding and position encoding.
    x = self.embedding(x)  # (batch_size, input_seq_len, d_model)
    x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
    x += self.pos_encoding[:, :seq_len, :]

    x = self.dropout(x, training=training)

    for i in range(self.num_layers):
      x = self.enc_layers[i](x, training, mask)

    return x  # (batch_size, input_seq_len, d_model)

　Encoder のインスタンス encoder を作成。

dropout_rate = 0.1

encoder = Encoder( num_layers = 6, d_model = 128, num_heads=8, dff = 512, 
                  input_vocab_size = len(lang.word_counts) + 1, maximum_position_encoding=10000, rate = dropout_rate)

#dff point_wise_feed_forward_network で使う dense_layer の出力次元。

temp_input = tf.random.uniform((64, 62), dtype=tf.int64, minval=0, maxval=200)
sample_en_output = encoder( temp_input, training=False, mask = None)

print( sample_en_output.shape )

　次から、マルチモーダルの設定です。文章以外の特徴量は、年齢 1 入力とジェンダー 3 入力の合計4入力とし、文章は、 encoder の出力 (batch_size,max_sequence_length, d-model ) の ( batch_size, 0, d-mocel )を全結合層で、( batch_size,　0, 7 )とした。文章以外の特徴量の ( batch_size, 4 )入力と、文章の識別結果( batcH_size, 7 ) を concatenate して、softmax の全結合層で5択に対応させました。

# MultiModalIterator を定義するための準備

# 年齢とジェンダー計4入力に対して、文章を 7入力で dense レイヤーに入れる。
dense_size = 7

# 年齢tとジェンダー部分
input_category = tf.keras.Input( shape=(plus_vector_train1.shape[1]), name='input_category')
def ml_category( input_category ):
    clx = tf.keras.layers.Dense( num_plus, activation='relu', name="clx_dense_in_ml_category")(input_category)
    clx = tf.keras.layers.Dropout(0.1)(clx)
    print( "shape of input_category:{}".format( input_category.shape))
    return clx

def create_padding_mask(seq):
    seq = tf.cast(tf.math.equal(seq, 0), tf.float32)

    # add extra dimensions to add the padding
    # to the attention logits.
    return seq[:, tf.newaxis, tf.newaxis, :]  # (batch_size, 1, 1, seq_len)


# 文章入力部分
input_text = tf.keras.Input(shape=(tensor_train1.shape[1],), name='input_text')
def ml_encoder(input_text):

    clx = input_text
    mask = create_padding_mask( clx )
    print( "shape of input clx:{}".format(clx.shape))
    clx = encoder( clx, mask=mask )
    clx = clx[:,0,:]
    clx = tf.keras.layers.Dense( dense_size, activation='relu', name="clx_dense_after_reduced_encoder_output")(clx)
    clx = tf.keras.layers.Dropout(0.1)(clx)
    print( "after clx_encoder, shape of clx:{}".format(clx.shape))
    return clx

#年齢とジェンダー
ml_cat = ml_category( input_category )
#文章
ml_text = ml_encoder(input_text)

#年齢とジェンダーおよび文章を合成する。
num_text_plus = dense_size + num_plus
num_categorical_crossentropy = 5
clxcpsnd = tf.keras.layers.Concatenate()([ml_text, ml_cat])
clxcpsnd = tf.keras.layers.Dense(num_text_plus, activation='relu', name="last_dense")(clxcpsnd)

# ドロップアウトとターゲットの項目数に合わせるための Dense
clxcpsnd = Dropout(0.2)(clxcpsnd)
clxcpsnd = Dense(num_categorical_crossentropy, activation='softmax', name="softmax")(clxcpsnd)

#モデル定義
model = tf.keras.Model([input_text, input_category], clxcpsnd)

# モデルのコンパイル
model.compile(optimizer=Adam(lr=1e-4, decay=1e-6, amsgrad=True),
             loss=sparse_categorical_crossentropy,
             metrics=['accuracy'])

model.summary()

#　np.array にしないと　model.fit の varidation_data でエラーがでるため。

target_train_ar = np.array( target_train1 )
target_val_ar = np.array( target_val )
target_test_ar = np.array( target_test )

# MultiModalIterator の定義
class MultiModalIterator():
    def __init__(self,
                 tensor_train1,
                 plus_vector_train1,
                 target_train1,
                 steps_per_epoch=8):

        self.tensor_train1 = tensor_train1
        self.plus_vector_train1 = plus_vector_train1
        self.target_train1 = target_train1
        self.steps_per_epoch = steps_per_epoch

    def __call__(self):
        dataset = self.__data_gen()
        
        while True:
            for batch, ( x1, x2, targ) in enumerate(dataset.take(self.steps_per_epoch)):
                yield [x1, x2], targ 
                
                
    def __data_gen(self):
        dataset = tf.data.Dataset.from_tensor_slices((self.tensor_train1, self.plus_vector_train1, self.target_train1)).shuffle(BUFFER_SIZE)
        dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
        
        return dataset
      

#MultiModalIterator のインスタンス training_iterator を作成。
training_iterator = MultiModalIterator(
    tensor_train1, plus_vector_train1, target_train_ar,
    steps_per_epoch=steps_per_epoch)()

　tensorflow の fit 関数で学習。

epochs = 200
model.fit( 
    training_iterator, 
    steps_per_epoch=steps_per_epoch, 
    epochs=epochs,
    verbose = 1,
    validation_data=([tensor_val, plus_vector_val], target_val_ar),
    callbacks=[ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, verbose=1, min_lr=1e-8)])

　test データによる evaluate

tensor_test_ar = np.array( tensor_test )
plus_vector_test_ar = np.array( plus_vector_test )
target_test_ar = np.array( target_test )

loss, acc = model.evaluate([tensor_test_ar, plus_vector_test_ar], target_test_ar, verbose=2)
print("test_data, los: {:5.2f}%".format( loss ))
print("test_data, accuracy: {:5.2f}%".format(100 * acc))

#y_preds = model.predict([tensor_test_ar, plus_vector_test_ar])
#for i in range( y_preds.shape[0] ):
#    print( "{},{}".format( np.argmax( y_preds[i]), target_test[i] ))

　学習を行った結果、epoch 200 で、loss = 0.8070, accuracy = 0.8818, val-loss = 0.8031, val-accuracy = 0.9120、test-loss = 0.8023、 test-accuracy = 0.9088 であった。model.predict も可能です。

　次に、文章データをメインに、年齢とジェンダーを一様乱数にしたデータを作り学習を行う。今、学習を行った ipynb をコピーして、「#1.データ作成ルーチン」を次に置き換える。

#1.データ作成ルーチンの置き換え
#ファイルの読み込み
df0 = pd.read_table("wrime/wrime-ver2.tsv", sep="\t" )
df0 = df0.fillna( " " )

num_data = 10000
num_category = 4 #一つが年齢、gender が三つ。
nenrei_max = 100
gender_max = 3

data_text = []
data_category = np.zeros( (num_data, num_category ), dtype=np.float32 )
print( "shape of data_category:{}".format( data_category.shape ))
target = []

for i in range( num_data ):
    words = input_japanese_preprocessing( df0['Sentence'][i] )
    data_text.append( words )
    data_category[i,0] = random.uniform(0, nenrei_max ) / nenrei_max
    gender = random.uniform( 0, gender_max )
    if 0 <= gender < 1:
        data_category[i,1] = 1
    elif 1 <= gender < 2:
        data_category[i,2] = 1
    elif 2 <= gender :
        data_category[i,3] = 1
    target.append( df0['Writer_Sentiment'][i] + 2 )

print( "data_text[:3]:{}".format(data_text[:3]) )
print( "data_category[:3]:{}".format(data_category[:3]) )
print( "target[:3]:{}".format( target[:3]))
print( "np.unique( target ):{}".format(np.unique(target )))

　加えて、「#2.文章を一様乱数にするルーチン」を削除して実行しない。データは、文章とクラス分類の教師データ（github のデータ）、ジェンダー（一様乱数）、性別（一様乱数）です。

　この時の学習結果が、loss = 1.2498, accuracy = 0.4461、 val-loss = 1.5421, val-accuracy = 0.3583, test-loss =1.5784, test-accuracy = 0.3416 という結果です。文章だけのデータでマルチモーダルにせず、encoder の識別器のみの学習で、test-accuracy が0.4 程度の5択のデータです。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up