Help us understand the problem. What is going on with this article?

TensorFlow Eager Execution と Focal Loss について

More than 1 year has passed since last update.

目的

  • TenorFlow の eager execution に慣れる。
  • 不均衡データ (imbalanced data) に対し、focal loss を試す。

参照

TensorFlow Eager Execution

Focal Loss for Dense Object Detection

コード (Jupyter Notebook)

Github

ライブラリー

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from tqdm import tqdm

from sklearn.model_selection  import train_test_split
from sklearn import metrics

import tensorflow as tf
import tensorflow.contrib.eager as tfe
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

np.random.seed(0)

Eager execution (TF 1.x の時)

tf.enable_eager_execution()

print ("TensorFlow version: {}".format(tf.VERSION))
print ("Eager execution: {}".format(tf.executing_eagerly()))

image.png

データ

Fraud detection dataset on Kaggle

df_data = pd.read_csv(os.path.join(path, file))

df_data = df_data.drop('nameDest', axis=1)
df_data = df_data.drop('nameOrig', axis=1)
df_data = df_data.drop('type', axis=1)

print ('shape: ', df_data.shape)
df_data.head()

image.png

print (df_data['isFraud'].value_counts())
print ()
print ('% of 0: ', np.sum(df_data['isFraud']==0)/len(df_data))

plt.figure(figsize=(5, 3))
sns.countplot(df_data['isFraud'])
plt.show()

image.png

def feature_normalize(dataset):
    mu = np.mean(dataset, axis=0)
    sigma = np.std(dataset, axis=0)
    return (dataset - mu) / sigma

X, y = df_data.iloc[:,:-2], df_data.iloc[:, -2]
y = tf.keras.utils.to_categorical(y, num_classes=2)

X = feature_normalize(X.values)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

X_train_tf = tf.convert_to_tensor(X_train, dtype=tf.float32)
y_train_tf = tf.convert_to_tensor(y_train, dtype=tf.float32)
X_test_tf = tf.convert_to_tensor(X_test, dtype=tf.float32)
y_test_tf = tf.convert_to_tensor(y_test, dtype=tf.float32)

Model の書き方の例

同一のアーキテクチャを3パターンのやり方で実装。

パターン1

class Pattern1():        
    def __init__(self):        
        input_size = X_train.shape[1]
        hidden_size1 = 10
        hidden_size2 = 20
        hidden_size3 = 10
        output_size = y_train.shape[1]

        self.fc1_w = tfe.Variable(tf.truncated_normal([input_size, hidden_size1],
                                                       stddev=0.1), dtype=tf.float32)
        self.fc1_b = tfe.Variable(tf.constant(0.1, shape=[hidden_size1]),
                                  dtype=tf.float32)
        self.fc2_w = tfe.Variable(tf.truncated_normal([hidden_size1, hidden_size2],
                                                      stddev=0.1), dtype=tf.float32)
        self.fc2_b = tfe.Variable(tf.constant(0.1, shape=[hidden_size2]),
                                  dtype=tf.float32)
        self.fc3_w = tfe.Variable(tf.truncated_normal([hidden_size2, hidden_size3], 
                                                      stddev=0.1), dtype=tf.float32)
        self.fc3_b = tfe.Variable(tf.constant(0.1, shape=[hidden_size3]), 
                                  dtype=tf.float32)
        self.fc4_w = tfe.Variable(tf.truncated_normal([hidden_size3, output_size], 
                                                      stddev=0.1), dtype=tf.float32)
        self.fc4_b = tfe.Variable(tf.constant(0.1, shape=[output_size]), 
                                  dtype=tf.float32)        
        self.variables = [
            self.fc1_w, self.fc1_b,
            self.fc2_w, self.fc2_b,
            self.fc3_w, self.fc3_b,
            self.fc4_w, self.fc4_b,
        ]

    def __call__(self, x):
        h = tf.nn.relu(tf.matmul(x, self.fc1_w) + self.fc1_b)
        h = tf.nn.relu(tf.matmul(h, self.fc2_w) + self.fc2_b)
        h = tf.nn.relu(tf.matmul(h, self.fc3_w) + self.fc3_b)
        y_pred = tf.matmul(h, self.fc4_w) + self.fc4_b
        return y_pred

パターン2

class Pattern2(tf.keras.Model):
    def __init__(self):
        super(Pattern2, self).__init__()

        input_shape = X_train.shape[1:]
        hidden_size1 = 10
        hidden_size2 = 20
        hidden_size3 = 10
        output_size = y_train.shape[1]

        self.dense1 = tf.keras.layers.Dense(hidden_size1, activation='relu',
                                           input_shape=input_shape)
        self.dense2 = tf.keras.layers.Dense(hidden_size2, activation='relu')
        self.dense3 = tf.keras.layers.Dense(hidden_size3, activation='relu')
        self.dense4 = tf.keras.layers.Dense(output_size, activation='softmax')

    def __call__(self, x):
        h = self.dense1(x)
        h = self.dense2(h)
        h = self.dense3(h)
        h = self.dense4(h)
        return h

パターン3

input_shape = X_train.shape[1:]
hidden_size1 = 10
hidden_size2 = 20
hidden_size3 = 10
output_size = y_train.shape[1]

Pattern3 = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=input_shape),
    tf.keras.layers.Dense(hidden_size1, activation='relu'),
    tf.keras.layers.Dense(hidden_size2, activation='relu'),
    tf.keras.layers.Dense(hidden_size3, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax'),    
])

Training のやり方の例

パターン4と5が focal loss を使用。

パターン1

model = Pattern3

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', mode='min', 
                               patience=5, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', mode='min', 
                              factor=0.1, patience=5, min_lr=0.00001, verbose=1)

epochs = 3
batch_size = 1000

history = model.fit(X_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(X_test, y_test),
                    callbacks=[early_stopping, reduce_lr],
                    verbose=2,                    
                    )
plt.figure(figsize=(5, 3))
plt.plot(history.epoch, history.history["loss"], label="Train loss")
plt.plot(history.epoch, history.history["val_loss"], label="Validation loss")
plt.title('Loss')
plt.xlabel('epoch')
plt.legend(loc='best')
plt.show()

plt.figure(figsize=(5, 3))
plt.plot(history.epoch, history.history["acc"], label="Train accuracy")
plt.plot(history.epoch, history.history["val_acc"], label="Validation accuracy")
plt.title('Accuracy')
plt.xlabel('epoch')
plt.legend(loc='best')
plt.show()
preds = model.predict(X_test, batch_size=1000)

LABELS = ['Normal','Fraud'] 

truth = np.argmax(y_test, axis=1)
predictions = np.argmax(preds, axis=1)
confusion_matrix = metrics.confusion_matrix(truth, predictions)

plt.figure(figsize=(3, 3))
sns.heatmap(confusion_matrix, xticklabels=LABELS, yticklabels=LABELS,
            annot=True, fmt="d", linewidth=0.1, linecolor='white', 
            annot_kws={"size": 10});
plt.title("Confusion matrix", fontsize=10)
plt.ylabel('True label', fontsize=10)
plt.xlabel('Predicted label', fontsize=10)
plt.show()

image.png

values = confusion_matrix.view()
error_count = values.sum() - np.trace(values)

f1 = metrics.f1_score(truth, predictions)
precision = metrics.precision_score(truth, predictions)
recall = metrics.recall_score(truth, predictions)

print ('error count: ', error_count)
print ('f1: {:.3f}'.format(f1))
print ('precision: {:.3f}'.format(precision))
print ('recall: {:.3f}'.format(recall))

image.png

パターン2

model = Pattern3

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

class_weight = {0 : 1., 1: 20.}

epochs = 3
batch_size = 1000

history = model.fit(X_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(X_test, y_test),
                    class_weight=class_weight,
                    verbose=2,                    
                    )

image.png

パターン3

def loss(model, x, y):
    y_pred = model(x)
    cross_entropy = - tf.reduce_mean(tf.reduce_sum(y*tf.log(tf.clip_by_value(y_pred, 1e-10, 1.0)),
                                                   axis=1))
    return cross_entropy

def accuracy(y_true, y_pred):
    correct_preds = tf.equal(tf.argmax(y_true, axis = 1),
                             tf.argmax(y_pred, axis = 1))
    accuracy = tf.reduce_mean(tf.cast(correct_preds, tf.float32))
    return accuracy

def grad(model, x, y):
    with tf.GradientTape() as tape:
        loss_value = loss(model, x, y)
    return tape.gradient(loss_value, model.variables)

model = Pattern2()

epochs = 3
batch_size = 1000

optimizer = tf.train.AdamOptimizer()

for epoch in tqdm(range(epochs)):

    n = tf.shape(X_train_tf).numpy()[0]
    perm = np.random.permutation(n)

    for i in range(0, n, batch_size): 
        batch_x = tf.gather(X_train_tf, perm[i:i+batch_size])
        batch_y = tf.gather(y_train_tf, perm[i:i+batch_size])

        grads = grad(model, batch_x, batch_y)
        optimizer.apply_gradients(zip(grads, model.variables), 
                                  global_step=tf.train.get_or_create_global_step())

    train_loss = loss(model, X_train_tf, y_train_tf)
    train_acc = accuracy(y_train_tf, model(X_train_tf))

    valid_loss = loss(model, X_test_tf, y_test_tf)
    valid_acc = accuracy(y_test_tf, model(X_test_tf))

    if (epoch+1)%1 == 0:
        tqdm.write('epoch:\t{}\tloss:\t{:.5f}\tacc:\t{:.5f}\tval_loss:\t{:.5f}\tval_acc:\t{:.5f}'.format(
            epoch+1, train_loss, train_acc, valid_loss, valid_acc)
                  )

image.png

パターン4

def focal_loss(gamma=2., alpha=4.):

    gamma = float(gamma)
    alpha = float(alpha)

    def focal_loss_fixed(y_true, y_pred):
        """Focal loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        Notice: y_pred is probability after softmax
        gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper
        d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x)
        Focal Loss for Dense Object Detection
        https://arxiv.org/abs/1708.02002

        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]

        Keyword Arguments:
            gamma {float} -- (default: {2.0})
            alpha {float} -- (default: {4.0})

        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.e-9
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)

        model_out = tf.add(y_pred, epsilon)
        ce = tf.multiply(y_true, -tf.log(model_out))
        weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma))
        fl = tf.multiply(alpha, tf.multiply(weight, ce))
        reduced_fl = tf.reduce_max(fl, axis=1)
        return tf.reduce_mean(reduced_fl)
    return focal_loss_fixed

model = Pattern3

model.compile(loss=focal_loss(alpha=1),
              optimizer='adam',
              metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', mode='min', 
                               patience=5, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', mode='min', 
                              factor=0.1, patience=5, min_lr=0.00001, verbose=1)

epochs = 3
batch_size = 1000

history = model.fit(X_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(X_test, y_test),
                    callbacks=[early_stopping, reduce_lr],
                    verbose=2,                    
                    )

image.png

パターン5

def ce_loss(model, x, y):
    y_pred = model(x)
    cross_entropy = - tf.reduce_mean(tf.reduce_sum(y*tf.log(tf.clip_by_value(y_pred, 1e-10, 1.0)),
                                                   axis=1))
    return cross_entropy

def focal_loss(model, x, y, gamma=2., alpha=4.):
    epsilon = 1.e-9
    y_true = tf.convert_to_tensor(y, tf.float32)
    y_pred = tf.convert_to_tensor(model(x), tf.float32)

    model_out = tf.add(y_pred, epsilon)
    ce = tf.multiply(y_true, -tf.log(model_out))
    weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma))
    fl = tf.multiply(alpha, tf.multiply(weight, ce))
    reduced_fl = tf.reduce_max(fl, axis=1)
    return tf.reduce_mean(reduced_fl)

def accuracy(y_true, y_pred):
    correct_preds = tf.equal(tf.argmax(y_true, axis = 1),
                             tf.argmax(y_pred, axis = 1))
    accuracy = tf.reduce_mean(tf.cast(correct_preds, tf.float32))
    return accuracy

def grad(model, loss, x, y):
    with tf.GradientTape() as tape:
        loss_value = loss(model, x, y)
    return tape.gradient(loss_value, model.variables)

model = Pattern2()
loss = focal_loss
epochs = 3
batch_size = 1000

optimizer = tf.train.AdamOptimizer()

for epoch in tqdm(range(epochs)):

    n = tf.shape(X_train_tf).numpy()[0]
    perm = np.random.permutation(n)

    for i in range(0, n, batch_size): 
        batch_x = tf.gather(X_train_tf, perm[i:i+batch_size])
        batch_y = tf.gather(y_train_tf, perm[i:i+batch_size])

        grads = grad(model, loss, batch_x, batch_y)
        optimizer.apply_gradients(zip(grads, model.variables), 
                                  global_step=tf.train.get_or_create_global_step())

    train_loss = loss(model, X_train_tf, y_train_tf)
    train_acc = accuracy(y_train_tf, model(X_train_tf))

    valid_loss = loss(model, X_test_tf, y_test_tf)
    valid_acc = accuracy(y_test_tf, model(X_test_tf))

    if (epoch+1)%1 == 0:
        tqdm.write('epoch:\t{}\tloss:\t{:.5f}\tacc:\t{:.5f}\tval_loss:\t{:.5f}\tval_acc:\t{:.5f}'.format(
            epoch+1, train_loss, train_acc, valid_loss, valid_acc)
                  )

preds = model(X_test_tf)

LABELS = ['Normal','Fraud'] 

truth = np.argmax(y_test_tf, axis=1)
predictions = np.argmax(preds, axis=1)
confusion_matrix = metrics.confusion_matrix(truth, predictions)

plt.figure(figsize=(3, 3))
sns.heatmap(confusion_matrix, xticklabels=LABELS, yticklabels=LABELS,
            annot=True, fmt="d", linewidth=0.1, linecolor='white', 
            annot_kws={"size": 10});
plt.title("Confusion matrix", fontsize=10)
plt.ylabel('True label', fontsize=10)
plt.xlabel('Predicted label', fontsize=10)
plt.show()

image.png

Why not register and get more from Qiita?
  1. We will deliver articles that match you
    By following users and tags, you can catch up information on technical fields that you are interested in as a whole
  2. you can read useful information later efficiently
    By "stocking" the articles you like, you can search right away
Comments
No comments
Sign up for free and join this conversation.
If you already have a Qiita account
Why do not you register as a user and use Qiita more conveniently?
You need to log in to use this function. Qiita can be used more conveniently after logging in.
You seem to be reading articles frequently this month. Qiita can be used more conveniently after logging in.
  1. We will deliver articles that match you
    By following users and tags, you can catch up information on technical fields that you are interested in as a whole
  2. you can read useful information later efficiently
    By "stocking" the articles you like, you can search right away
ユーザーは見つかりませんでした