LoginSignup
1
2

More than 5 years have passed since last update.

optimizerをグラフ化

Last updated at Posted at 2018-01-09

概要

optimizerをグラフにしてみた。

写真

nn-sgt1.png

サンプルコード

import sys
import numpy as np
import math
import random
import matplotlib
import matplotlib.pylab as plt
from matplotlib.patches import Rectangle

plt_color_array = ['blue', 'cyan', 'green', 'black', 'magenta', 'red', 'yellow']
plt_dict = dict()
sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x))
random.seed(1024)
num_input = 2
num_hidden = 64
num_output = 2
opt_algo_set = ['SGD', 'Momentum', 'NAG', 'Adagrad', 'Adadelta', 'RMSprop', 'Adam']

class NeuralNet:
    def __init__(self, num_input, num_hidden, num_output):
        self.num_input = num_input
        self.num_hidden = num_hidden
        self.num_output = num_output
    def train(self, x, y, opt_algo, num_epoch = 30, mini_batch = 100, lambda_ = 0.01):
        if not opt_algo in opt_algo_set:
            print ('opt_algo not in %s' % opt_algo_set)
            return
        num_params = self.num_hidden * (self.num_input + 1) + self.num_output * (self.num_hidden + 1)
        w = np.matrix(0.005 * np.random.random([num_params, 1]))
        data = np.column_stack([x, y])
        gamma = 0.9
        epsilon = 1e-8
        if opt_algo == 'RMSprop' or opt_algo == 'Adam':
            eta = 0.001
        else:
            eta = 0.05
        v = np.matrix(np.zeros(w.shape))
        m = np.matrix(np.zeros(w.shape))
        beta1 = 0.9
        beta2 = 0.999
        beta1_exp = 1.0
        beta2_exp = 1.0
        grad_sum_square = np.matrix(np.zeros(w.shape))
        grad_expect = np.matrix(np.zeros(w.shape))
        delta_expect = np.matrix(np.zeros(w.shape))
        first_run = True
        for epoch in range(num_epoch):
            np.random.shuffle(data)
            k = 0
            cost_array = list()
            while k < len(data):
                x = data[k : k + mini_batch, 0 : -1]
                y = np.matrix(data[k : k + mini_batch, -1], dtype = 'int32')
                if opt_algo == 'SGD':
                    cost, grad = self.gradient(x, y, lambda_, w)
                    w -= eta * grad
                elif opt_algo == 'Momentum':
                    cost, grad = self.gradient(x, y, lambda_, w)
                    v = gamma * v + eta * grad
                    w -= v
                elif opt_algo == 'NAG':
                    cost, grad = self.gradient(x, y, lambda_, w - gamma * v)
                    v = gamma * v + eta * grad
                    w -= v
                elif opt_algo == 'Adagrad':
                    cost, grad = self.gradient(x, y, lambda_, w)
                    grad_sum_square += np.square(grad)
                    delta = eta * grad / np.sqrt(grad_sum_square + epsilon)
                    w -= delta
                elif opt_algo == 'Adadelta':
                    cost, grad = self.gradient(x, y, lambda_, w)
                    grad_expect = gamma * grad_expect + (1.0 - gamma) * np.square(grad)
                    if first_run == True:
                        delta = eta * grad
                    else:
                        delta = np.multiply(np.sqrt(delta_expect + epsilon) / np.sqrt(grad_expect + epsilon),  grad)
                    w -= delta
                    delta_expect = gamma * delta_expect + (1.0 - gamma) * np.square(delta)
                elif opt_algo == 'RMSprop':
                    cost, grad = self.gradient(x, y, lambda_, w)
                    grad_expect = gamma * grad_expect + (1.0 - gamma) * np.square(grad)
                    w -= eta * grad / np.sqrt(grad_expect + epsilon)
                elif opt_algo == 'Adam':
                    cost, grad = self.gradient(x, y, lambda_, w)
                    m = beta1 * m + (1.0 - beta1) * grad
                    v = beta2 * v + (1.0 - beta2) * np.square(grad)
                    beta1_exp *= beta1
                    beta2_exp *= beta2
                    w -= eta * (m / (1.0 - beta1_exp)) / (np.sqrt(v / (1.0 - beta2_exp)) + epsilon)
                k += mini_batch
                cost_array.append(cost)
                if first_run == True:
                    first_run = False
            if not opt_algo in plt_dict:
                plt_dict[opt_algo] = list()
            plt_dict[opt_algo].extend(cost_array)
        self.w1 = w[0 : self.num_hidden * (self.num_input + 1)].reshape(self.num_hidden, self.num_input + 1)
        self.w2 = w[self.num_hidden * (self.num_input + 1) : ].reshape(self.num_output, self.num_hidden + 1)
    def gradient(self, x, y, lambda_, w):
        num_sample = len(x)
        w1 = w[0 : self.num_hidden * (self.num_input + 1)].reshape(self.num_hidden, self.num_input + 1)
        w2 = w[self.num_hidden * (self.num_input + 1) : ].reshape(self.num_output, self.num_hidden + 1)
        b = np.matrix(np.ones([num_sample, 1]))
        a1 = np.column_stack([x, b])
        s2 = sigmoid(a1 * w1.T)
        a2 = np.column_stack([s2, b])
        a3 = sigmoid(a2 * w2.T)
        y_one_hot = np.matrix(np.zeros([num_sample, self.num_output]))
        y_one_hot[(np.matrix(range(num_sample)), y.T)] = 1
        cost = (1.0 / num_sample) * (- np.multiply(y_one_hot, np.log(a3)) - np.multiply(1.0 - y_one_hot, np.log(1.0 - a3))).sum()
        cost += (lambda_ / (2.0 * num_sample)) * (np.square(w1[ : , 0 : -1]).sum() + np.square(w2[ : , 0 : -1]).sum())
        delta3 = a3 - y_one_hot
        delta2 = np.multiply(delta3 * w2[ : , 0 : -1], np.multiply(s2, 1.0 - s2))
        l1_grad = delta2.T * a1
        l2_grad = delta3.T * a2
        r1_grad = np.column_stack([w1[ : , 0 : -1], np.matrix(np.zeros([self.num_hidden, 1]))])
        r2_grad = np.column_stack([w2[ : , 0 : -1], np.matrix(np.zeros([self.num_output, 1]))])
        w1_grad = (1.0 / num_sample) * l1_grad + (1.0 * lambda_ / num_sample) * r1_grad
        w2_grad = (1.0 / num_sample) * l2_grad + (1.0 * lambda_ / num_sample) * r2_grad
        w_grad = np.row_stack([w1_grad.reshape(-1, 1), w2_grad.reshape(-1, 1)])
        return cost, w_grad

if __name__ == '__main__':
    x_train = np.array([[1, 0], [0, 1], [1, 1], [0, 0]])
    y_train = np.array([[0], [0], [1], [1]])
    x_test = np.array([[1, 0], [0, 1], [1, 1], [0, 0]])
    y_test = np.array([[0], [0], [1], [1]])
    clf = NeuralNet(num_input, num_hidden, num_output)
    for opt_algo in opt_algo_set:
        clf.train(x_train, y_train, opt_algo, num_epoch = 30, lambda_ = 0.1)
    plt.subplot(111)
    plt.title('Performance of different Gradient Descent Optimization')
    plt.xlabel('epoch')
    plt.ylabel('cost')
    proxy = list()
    legend_array = list()
    for index, (opt_algo, epoch_cost) in enumerate(plt_dict.items()):
        selected_color = plt_color_array[index % len(plt_color_array)]
        plt.plot(range(len(epoch_cost)), epoch_cost, '-%s' % selected_color[0])
        proxy.append(Rectangle((0, 0), 0, 0, facecolor = selected_color))
        legend_array.append(opt_algo)
    plt.legend(proxy, legend_array)
    plt.savefig("nn-sgt1.png")
    plt.show()




以上。

1
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
2