LoginSignup
1
0

More than 5 years have passed since last update.

TensorFlow Eager Execution と Focal Loss について

Last updated at Posted at 2019-03-29

目的

  • TenorFlow の eager execution に慣れる。
  • 不均衡データ (imbalanced data) に対し、focal loss を試す。

参照

TensorFlow Eager Execution

Focal Loss for Dense Object Detection

コード (Jupyter Notebook)

Github

ライブラリー

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from tqdm import tqdm

from sklearn.model_selection  import train_test_split
from sklearn import metrics

import tensorflow as tf
import tensorflow.contrib.eager as tfe
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

np.random.seed(0)

Eager execution (TF 1.x の時)

tf.enable_eager_execution()

print ("TensorFlow version: {}".format(tf.VERSION))
print ("Eager execution: {}".format(tf.executing_eagerly()))

image.png

データ

Fraud detection dataset on Kaggle

df_data = pd.read_csv(os.path.join(path, file))

df_data = df_data.drop('nameDest', axis=1)
df_data = df_data.drop('nameOrig', axis=1)
df_data = df_data.drop('type', axis=1)

print ('shape: ', df_data.shape)
df_data.head()

image.png

print (df_data['isFraud'].value_counts())
print ()
print ('% of 0: ', np.sum(df_data['isFraud']==0)/len(df_data))

plt.figure(figsize=(5, 3))
sns.countplot(df_data['isFraud'])
plt.show()

image.png

def feature_normalize(dataset):
    mu = np.mean(dataset, axis=0)
    sigma = np.std(dataset, axis=0)
    return (dataset - mu) / sigma

X, y = df_data.iloc[:,:-2], df_data.iloc[:, -2]
y = tf.keras.utils.to_categorical(y, num_classes=2)

X = feature_normalize(X.values)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

X_train_tf = tf.convert_to_tensor(X_train, dtype=tf.float32)
y_train_tf = tf.convert_to_tensor(y_train, dtype=tf.float32)
X_test_tf = tf.convert_to_tensor(X_test, dtype=tf.float32)
y_test_tf = tf.convert_to_tensor(y_test, dtype=tf.float32)

Model の書き方の例

同一のアーキテクチャを3パターンのやり方で実装。

パターン1

class Pattern1():        
    def __init__(self):        
        input_size = X_train.shape[1]
        hidden_size1 = 10
        hidden_size2 = 20
        hidden_size3 = 10
        output_size = y_train.shape[1]

        self.fc1_w = tfe.Variable(tf.truncated_normal([input_size, hidden_size1],
                                                       stddev=0.1), dtype=tf.float32)
        self.fc1_b = tfe.Variable(tf.constant(0.1, shape=[hidden_size1]),
                                  dtype=tf.float32)
        self.fc2_w = tfe.Variable(tf.truncated_normal([hidden_size1, hidden_size2],
                                                      stddev=0.1), dtype=tf.float32)
        self.fc2_b = tfe.Variable(tf.constant(0.1, shape=[hidden_size2]),
                                  dtype=tf.float32)
        self.fc3_w = tfe.Variable(tf.truncated_normal([hidden_size2, hidden_size3], 
                                                      stddev=0.1), dtype=tf.float32)
        self.fc3_b = tfe.Variable(tf.constant(0.1, shape=[hidden_size3]), 
                                  dtype=tf.float32)
        self.fc4_w = tfe.Variable(tf.truncated_normal([hidden_size3, output_size], 
                                                      stddev=0.1), dtype=tf.float32)
        self.fc4_b = tfe.Variable(tf.constant(0.1, shape=[output_size]), 
                                  dtype=tf.float32)        
        self.variables = [
            self.fc1_w, self.fc1_b,
            self.fc2_w, self.fc2_b,
            self.fc3_w, self.fc3_b,
            self.fc4_w, self.fc4_b,
        ]

    def __call__(self, x):
        h = tf.nn.relu(tf.matmul(x, self.fc1_w) + self.fc1_b)
        h = tf.nn.relu(tf.matmul(h, self.fc2_w) + self.fc2_b)
        h = tf.nn.relu(tf.matmul(h, self.fc3_w) + self.fc3_b)
        y_pred = tf.matmul(h, self.fc4_w) + self.fc4_b
        return y_pred

パターン2

class Pattern2(tf.keras.Model):
    def __init__(self):
        super(Pattern2, self).__init__()

        input_shape = X_train.shape[1:]
        hidden_size1 = 10
        hidden_size2 = 20
        hidden_size3 = 10
        output_size = y_train.shape[1]

        self.dense1 = tf.keras.layers.Dense(hidden_size1, activation='relu',
                                           input_shape=input_shape)
        self.dense2 = tf.keras.layers.Dense(hidden_size2, activation='relu')
        self.dense3 = tf.keras.layers.Dense(hidden_size3, activation='relu')
        self.dense4 = tf.keras.layers.Dense(output_size, activation='softmax')

    def __call__(self, x):
        h = self.dense1(x)
        h = self.dense2(h)
        h = self.dense3(h)
        h = self.dense4(h)
        return h

パターン3

input_shape = X_train.shape[1:]
hidden_size1 = 10
hidden_size2 = 20
hidden_size3 = 10
output_size = y_train.shape[1]

Pattern3 = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=input_shape),
    tf.keras.layers.Dense(hidden_size1, activation='relu'),
    tf.keras.layers.Dense(hidden_size2, activation='relu'),
    tf.keras.layers.Dense(hidden_size3, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax'),    
])

Training のやり方の例

パターン4と5が focal loss を使用。

パターン1

model = Pattern3

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', mode='min', 
                               patience=5, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', mode='min', 
                              factor=0.1, patience=5, min_lr=0.00001, verbose=1)

epochs = 3
batch_size = 1000

history = model.fit(X_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(X_test, y_test),
                    callbacks=[early_stopping, reduce_lr],
                    verbose=2,                    
                    )
plt.figure(figsize=(5, 3))
plt.plot(history.epoch, history.history["loss"], label="Train loss")
plt.plot(history.epoch, history.history["val_loss"], label="Validation loss")
plt.title('Loss')
plt.xlabel('epoch')
plt.legend(loc='best')
plt.show()

plt.figure(figsize=(5, 3))
plt.plot(history.epoch, history.history["acc"], label="Train accuracy")
plt.plot(history.epoch, history.history["val_acc"], label="Validation accuracy")
plt.title('Accuracy')
plt.xlabel('epoch')
plt.legend(loc='best')
plt.show()
preds = model.predict(X_test, batch_size=1000)

LABELS = ['Normal','Fraud'] 

truth = np.argmax(y_test, axis=1)
predictions = np.argmax(preds, axis=1)
confusion_matrix = metrics.confusion_matrix(truth, predictions)

plt.figure(figsize=(3, 3))
sns.heatmap(confusion_matrix, xticklabels=LABELS, yticklabels=LABELS,
            annot=True, fmt="d", linewidth=0.1, linecolor='white', 
            annot_kws={"size": 10});
plt.title("Confusion matrix", fontsize=10)
plt.ylabel('True label', fontsize=10)
plt.xlabel('Predicted label', fontsize=10)
plt.show()

image.png

values = confusion_matrix.view()
error_count = values.sum() - np.trace(values)

f1 = metrics.f1_score(truth, predictions)
precision = metrics.precision_score(truth, predictions)
recall = metrics.recall_score(truth, predictions)

print ('error count: ', error_count)
print ('f1: {:.3f}'.format(f1))
print ('precision: {:.3f}'.format(precision))
print ('recall: {:.3f}'.format(recall))

image.png

パターン2

model = Pattern3

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

class_weight = {0 : 1., 1: 20.}

epochs = 3
batch_size = 1000

history = model.fit(X_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(X_test, y_test),
                    class_weight=class_weight,
                    verbose=2,                    
                    )

image.png

パターン3

def loss(model, x, y):
    y_pred = model(x)
    cross_entropy = - tf.reduce_mean(tf.reduce_sum(y*tf.log(tf.clip_by_value(y_pred, 1e-10, 1.0)),
                                                   axis=1))
    return cross_entropy

def accuracy(y_true, y_pred):
    correct_preds = tf.equal(tf.argmax(y_true, axis = 1),
                             tf.argmax(y_pred, axis = 1))
    accuracy = tf.reduce_mean(tf.cast(correct_preds, tf.float32))
    return accuracy

def grad(model, x, y):
    with tf.GradientTape() as tape:
        loss_value = loss(model, x, y)
    return tape.gradient(loss_value, model.variables)

model = Pattern2()

epochs = 3
batch_size = 1000

optimizer = tf.train.AdamOptimizer()

for epoch in tqdm(range(epochs)):

    n = tf.shape(X_train_tf).numpy()[0]
    perm = np.random.permutation(n)

    for i in range(0, n, batch_size): 
        batch_x = tf.gather(X_train_tf, perm[i:i+batch_size])
        batch_y = tf.gather(y_train_tf, perm[i:i+batch_size])

        grads = grad(model, batch_x, batch_y)
        optimizer.apply_gradients(zip(grads, model.variables), 
                                  global_step=tf.train.get_or_create_global_step())

    train_loss = loss(model, X_train_tf, y_train_tf)
    train_acc = accuracy(y_train_tf, model(X_train_tf))

    valid_loss = loss(model, X_test_tf, y_test_tf)
    valid_acc = accuracy(y_test_tf, model(X_test_tf))

    if (epoch+1)%1 == 0:
        tqdm.write('epoch:\t{}\tloss:\t{:.5f}\tacc:\t{:.5f}\tval_loss:\t{:.5f}\tval_acc:\t{:.5f}'.format(
            epoch+1, train_loss, train_acc, valid_loss, valid_acc)
                  )

image.png

パターン4

def focal_loss(gamma=2., alpha=4.):

    gamma = float(gamma)
    alpha = float(alpha)

    def focal_loss_fixed(y_true, y_pred):
        """Focal loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        Notice: y_pred is probability after softmax
        gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper
        d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x)
        Focal Loss for Dense Object Detection
        https://arxiv.org/abs/1708.02002

        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]

        Keyword Arguments:
            gamma {float} -- (default: {2.0})
            alpha {float} -- (default: {4.0})

        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.e-9
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)

        model_out = tf.add(y_pred, epsilon)
        ce = tf.multiply(y_true, -tf.log(model_out))
        weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma))
        fl = tf.multiply(alpha, tf.multiply(weight, ce))
        reduced_fl = tf.reduce_max(fl, axis=1)
        return tf.reduce_mean(reduced_fl)
    return focal_loss_fixed

model = Pattern3

model.compile(loss=focal_loss(alpha=1),
              optimizer='adam',
              metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', mode='min', 
                               patience=5, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', mode='min', 
                              factor=0.1, patience=5, min_lr=0.00001, verbose=1)

epochs = 3
batch_size = 1000

history = model.fit(X_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(X_test, y_test),
                    callbacks=[early_stopping, reduce_lr],
                    verbose=2,                    
                    )

image.png

パターン5

def ce_loss(model, x, y):
    y_pred = model(x)
    cross_entropy = - tf.reduce_mean(tf.reduce_sum(y*tf.log(tf.clip_by_value(y_pred, 1e-10, 1.0)),
                                                   axis=1))
    return cross_entropy

def focal_loss(model, x, y, gamma=2., alpha=4.):
    epsilon = 1.e-9
    y_true = tf.convert_to_tensor(y, tf.float32)
    y_pred = tf.convert_to_tensor(model(x), tf.float32)

    model_out = tf.add(y_pred, epsilon)
    ce = tf.multiply(y_true, -tf.log(model_out))
    weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma))
    fl = tf.multiply(alpha, tf.multiply(weight, ce))
    reduced_fl = tf.reduce_max(fl, axis=1)
    return tf.reduce_mean(reduced_fl)

def accuracy(y_true, y_pred):
    correct_preds = tf.equal(tf.argmax(y_true, axis = 1),
                             tf.argmax(y_pred, axis = 1))
    accuracy = tf.reduce_mean(tf.cast(correct_preds, tf.float32))
    return accuracy

def grad(model, loss, x, y):
    with tf.GradientTape() as tape:
        loss_value = loss(model, x, y)
    return tape.gradient(loss_value, model.variables)

model = Pattern2()
loss = focal_loss
epochs = 3
batch_size = 1000

optimizer = tf.train.AdamOptimizer()

for epoch in tqdm(range(epochs)):

    n = tf.shape(X_train_tf).numpy()[0]
    perm = np.random.permutation(n)

    for i in range(0, n, batch_size): 
        batch_x = tf.gather(X_train_tf, perm[i:i+batch_size])
        batch_y = tf.gather(y_train_tf, perm[i:i+batch_size])

        grads = grad(model, loss, batch_x, batch_y)
        optimizer.apply_gradients(zip(grads, model.variables), 
                                  global_step=tf.train.get_or_create_global_step())

    train_loss = loss(model, X_train_tf, y_train_tf)
    train_acc = accuracy(y_train_tf, model(X_train_tf))

    valid_loss = loss(model, X_test_tf, y_test_tf)
    valid_acc = accuracy(y_test_tf, model(X_test_tf))

    if (epoch+1)%1 == 0:
        tqdm.write('epoch:\t{}\tloss:\t{:.5f}\tacc:\t{:.5f}\tval_loss:\t{:.5f}\tval_acc:\t{:.5f}'.format(
            epoch+1, train_loss, train_acc, valid_loss, valid_acc)
                  )

preds = model(X_test_tf)

LABELS = ['Normal','Fraud'] 

truth = np.argmax(y_test_tf, axis=1)
predictions = np.argmax(preds, axis=1)
confusion_matrix = metrics.confusion_matrix(truth, predictions)

plt.figure(figsize=(3, 3))
sns.heatmap(confusion_matrix, xticklabels=LABELS, yticklabels=LABELS,
            annot=True, fmt="d", linewidth=0.1, linecolor='white', 
            annot_kws={"size": 10});
plt.title("Confusion matrix", fontsize=10)
plt.ylabel('True label', fontsize=10)
plt.xlabel('Predicted label', fontsize=10)
plt.show()

image.png

1
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
0