1
2

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 5 years have passed since last update.

optimizerをグラフ化

Last updated at Posted at 2018-01-09

概要

optimizerをグラフにしてみた。

写真

nn-sgt1.png

サンプルコード

import sys
import numpy as np
import math
import random
import matplotlib
import matplotlib.pylab as plt
from matplotlib.patches import Rectangle

plt_color_array = ['blue', 'cyan', 'green', 'black', 'magenta', 'red', 'yellow']
plt_dict = dict()
sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x))
random.seed(1024)
num_input = 2
num_hidden = 64
num_output = 2
opt_algo_set = ['SGD', 'Momentum', 'NAG', 'Adagrad', 'Adadelta', 'RMSprop', 'Adam']

class NeuralNet:
	def __init__(self, num_input, num_hidden, num_output):
		self.num_input = num_input
		self.num_hidden = num_hidden
		self.num_output = num_output
	def train(self, x, y, opt_algo, num_epoch = 30, mini_batch = 100, lambda_ = 0.01):
		if not opt_algo in opt_algo_set:
			print ('opt_algo not in %s' % opt_algo_set)
			return
		num_params = self.num_hidden * (self.num_input + 1) + self.num_output * (self.num_hidden + 1)
		w = np.matrix(0.005 * np.random.random([num_params, 1]))
		data = np.column_stack([x, y])
		gamma = 0.9
		epsilon = 1e-8
		if opt_algo == 'RMSprop' or opt_algo == 'Adam':
			eta = 0.001
		else:
			eta = 0.05
		v = np.matrix(np.zeros(w.shape))
		m = np.matrix(np.zeros(w.shape))
		beta1 = 0.9
		beta2 = 0.999
		beta1_exp = 1.0
		beta2_exp = 1.0
		grad_sum_square = np.matrix(np.zeros(w.shape))
		grad_expect = np.matrix(np.zeros(w.shape))
		delta_expect = np.matrix(np.zeros(w.shape))
		first_run = True
		for epoch in range(num_epoch):
			np.random.shuffle(data)
			k = 0
			cost_array = list()
			while k < len(data):
				x = data[k : k + mini_batch, 0 : -1]
				y = np.matrix(data[k : k + mini_batch, -1], dtype = 'int32')
				if opt_algo == 'SGD':
					cost, grad = self.gradient(x, y, lambda_, w)
					w -= eta * grad
				elif opt_algo == 'Momentum':
					cost, grad = self.gradient(x, y, lambda_, w)
					v = gamma * v + eta * grad
					w -= v
				elif opt_algo == 'NAG':
					cost, grad = self.gradient(x, y, lambda_, w - gamma * v)
					v = gamma * v + eta * grad
					w -= v
				elif opt_algo == 'Adagrad':
					cost, grad = self.gradient(x, y, lambda_, w)
					grad_sum_square += np.square(grad)
					delta = eta * grad / np.sqrt(grad_sum_square + epsilon)
					w -= delta
				elif opt_algo == 'Adadelta':
					cost, grad = self.gradient(x, y, lambda_, w)
					grad_expect = gamma * grad_expect + (1.0 - gamma) * np.square(grad)
					if first_run == True:
						delta = eta * grad
					else:
						delta = np.multiply(np.sqrt(delta_expect + epsilon) / np.sqrt(grad_expect + epsilon),  grad)
					w -= delta
					delta_expect = gamma * delta_expect + (1.0 - gamma) * np.square(delta)
				elif opt_algo == 'RMSprop':
					cost, grad = self.gradient(x, y, lambda_, w)
					grad_expect = gamma * grad_expect + (1.0 - gamma) * np.square(grad)
					w -= eta * grad / np.sqrt(grad_expect + epsilon)
				elif opt_algo == 'Adam':
					cost, grad = self.gradient(x, y, lambda_, w)
					m = beta1 * m + (1.0 - beta1) * grad
					v = beta2 * v + (1.0 - beta2) * np.square(grad)
					beta1_exp *= beta1
					beta2_exp *= beta2
					w -= eta * (m / (1.0 - beta1_exp)) / (np.sqrt(v / (1.0 - beta2_exp)) + epsilon)
				k += mini_batch
				cost_array.append(cost)
				if first_run == True:
					first_run = False
			if not opt_algo in plt_dict:
				plt_dict[opt_algo] = list()
			plt_dict[opt_algo].extend(cost_array)
		self.w1 = w[0 : self.num_hidden * (self.num_input + 1)].reshape(self.num_hidden, self.num_input + 1)
		self.w2 = w[self.num_hidden * (self.num_input + 1) : ].reshape(self.num_output, self.num_hidden + 1)
	def gradient(self, x, y, lambda_, w):
		num_sample = len(x)
		w1 = w[0 : self.num_hidden * (self.num_input + 1)].reshape(self.num_hidden, self.num_input + 1)
		w2 = w[self.num_hidden * (self.num_input + 1) : ].reshape(self.num_output, self.num_hidden + 1)
		b = np.matrix(np.ones([num_sample, 1]))
		a1 = np.column_stack([x, b])
		s2 = sigmoid(a1 * w1.T)
		a2 = np.column_stack([s2, b])
		a3 = sigmoid(a2 * w2.T)
		y_one_hot = np.matrix(np.zeros([num_sample, self.num_output]))
		y_one_hot[(np.matrix(range(num_sample)), y.T)] = 1
		cost = (1.0 / num_sample) * (- np.multiply(y_one_hot, np.log(a3)) - np.multiply(1.0 - y_one_hot, np.log(1.0 - a3))).sum()
		cost += (lambda_ / (2.0 * num_sample)) * (np.square(w1[ : , 0 : -1]).sum() + np.square(w2[ : , 0 : -1]).sum())
		delta3 = a3 - y_one_hot
		delta2 = np.multiply(delta3 * w2[ : , 0 : -1], np.multiply(s2, 1.0 - s2))
		l1_grad = delta2.T * a1
		l2_grad = delta3.T * a2
		r1_grad = np.column_stack([w1[ : , 0 : -1], np.matrix(np.zeros([self.num_hidden, 1]))])
		r2_grad = np.column_stack([w2[ : , 0 : -1], np.matrix(np.zeros([self.num_output, 1]))])
		w1_grad = (1.0 / num_sample) * l1_grad + (1.0 * lambda_ / num_sample) * r1_grad
		w2_grad = (1.0 / num_sample) * l2_grad + (1.0 * lambda_ / num_sample) * r2_grad
		w_grad = np.row_stack([w1_grad.reshape(-1, 1), w2_grad.reshape(-1, 1)])
		return cost, w_grad

if __name__ == '__main__':
	x_train = np.array([[1, 0], [0, 1], [1, 1], [0, 0]])
	y_train = np.array([[0], [0], [1], [1]])
	x_test = np.array([[1, 0], [0, 1], [1, 1], [0, 0]])
	y_test = np.array([[0], [0], [1], [1]])
	clf = NeuralNet(num_input, num_hidden, num_output)
	for opt_algo in opt_algo_set:
		clf.train(x_train, y_train, opt_algo, num_epoch = 30, lambda_ = 0.1)
	plt.subplot(111)
	plt.title('Performance of different Gradient Descent Optimization')
	plt.xlabel('epoch')
	plt.ylabel('cost')
	proxy = list()
	legend_array = list()
	for index, (opt_algo, epoch_cost) in enumerate(plt_dict.items()):
		selected_color = plt_color_array[index % len(plt_color_array)]
		plt.plot(range(len(epoch_cost)), epoch_cost, '-%s' % selected_color[0])
		proxy.append(Rectangle((0, 0), 0, 0, facecolor = selected_color))
		legend_array.append(opt_algo)
	plt.legend(proxy, legend_array)
	plt.savefig("nn-sgt1.png")
	plt.show()




以上。

1
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
2

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?