More than 5 years have passed since last update.
optimizerをグラフ化

Last updated at 2018-01-09Posted at 2018-01-09
概要

optimizerをグラフにしてみた。
写真

サンプルコード

import sys
import numpy as np
import math
import random
import matplotlib
import matplotlib.pylab as plt
from matplotlib.patches import Rectangle

plt_color_array = ['blue', 'cyan', 'green', 'black', 'magenta', 'red', 'yellow']
plt_dict = dict()
sigmoid = lambda x: 1.0 / (1.0 + np.exp(-x))
random.seed(1024)
num_input = 2
num_hidden = 64
num_output = 2
opt_algo_set = ['SGD', 'Momentum', 'NAG', 'Adagrad', 'Adadelta', 'RMSprop', 'Adam']

class NeuralNet:
	def __init__(self, num_input, num_hidden, num_output):
		self.num_input = num_input
		self.num_hidden = num_hidden
		self.num_output = num_output
	def train(self, x, y, opt_algo, num_epoch = 30, mini_batch = 100, lambda_ = 0.01):
		if not opt_algo in opt_algo_set:
			print ('opt_algo not in %s' % opt_algo_set)
			return
		num_params = self.num_hidden * (self.num_input + 1) + self.num_output * (self.num_hidden + 1)
		w = np.matrix(0.005 * np.random.random([num_params, 1]))
		data = np.column_stack([x, y])
		gamma = 0.9
		epsilon = 1e-8
		if opt_algo == 'RMSprop' or opt_algo == 'Adam':
			eta = 0.001
		else:
			eta = 0.05
		v = np.matrix(np.zeros(w.shape))
		m = np.matrix(np.zeros(w.shape))
		beta1 = 0.9
		beta2 = 0.999
		beta1_exp = 1.0
		beta2_exp = 1.0
		grad_sum_square = np.matrix(np.zeros(w.shape))
		grad_expect = np.matrix(np.zeros(w.shape))
		delta_expect = np.matrix(np.zeros(w.shape))
		first_run = True
		for epoch in range(num_epoch):
			np.random.shuffle(data)
			k = 0
			cost_array = list()
			while k < len(data):
				x = data[k : k + mini_batch, 0 : -1]
				y = np.matrix(data[k : k + mini_batch, -1], dtype = 'int32')
				if opt_algo == 'SGD':
					cost, grad = self.gradient(x, y, lambda_, w)
					w -= eta * grad
				elif opt_algo == 'Momentum':
					cost, grad = self.gradient(x, y, lambda_, w)
					v = gamma * v + eta * grad
					w -= v
				elif opt_algo == 'NAG':
					cost, grad = self.gradient(x, y, lambda_, w - gamma * v)
					v = gamma * v + eta * grad
					w -= v
				elif opt_algo == 'Adagrad':
					cost, grad = self.gradient(x, y, lambda_, w)
					grad_sum_square += np.square(grad)
					delta = eta * grad / np.sqrt(grad_sum_square + epsilon)
					w -= delta
				elif opt_algo == 'Adadelta':
					cost, grad = self.gradient(x, y, lambda_, w)
					grad_expect = gamma * grad_expect + (1.0 - gamma) * np.square(grad)
					if first_run == True:
						delta = eta * grad
					else:
						delta = np.multiply(np.sqrt(delta_expect + epsilon) / np.sqrt(grad_expect + epsilon),  grad)
					w -= delta
					delta_expect = gamma * delta_expect + (1.0 - gamma) * np.square(delta)
				elif opt_algo == 'RMSprop':
					cost, grad = self.gradient(x, y, lambda_, w)
					grad_expect = gamma * grad_expect + (1.0 - gamma) * np.square(grad)
					w -= eta * grad / np.sqrt(grad_expect + epsilon)
				elif opt_algo == 'Adam':
					cost, grad = self.gradient(x, y, lambda_, w)
					m = beta1 * m + (1.0 - beta1) * grad
					v = beta2 * v + (1.0 - beta2) * np.square(grad)
					beta1_exp *= beta1
					beta2_exp *= beta2
					w -= eta * (m / (1.0 - beta1_exp)) / (np.sqrt(v / (1.0 - beta2_exp)) + epsilon)
				k += mini_batch
				cost_array.append(cost)
				if first_run == True:
					first_run = False
			if not opt_algo in plt_dict:
				plt_dict[opt_algo] = list()
			plt_dict[opt_algo].extend(cost_array)
		self.w1 = w[0 : self.num_hidden * (self.num_input + 1)].reshape(self.num_hidden, self.num_input + 1)
		self.w2 = w[self.num_hidden * (self.num_input + 1) : ].reshape(self.num_output, self.num_hidden + 1)
	def gradient(self, x, y, lambda_, w):
		num_sample = len(x)
		w1 = w[0 : self.num_hidden * (self.num_input + 1)].reshape(self.num_hidden, self.num_input + 1)
		w2 = w[self.num_hidden * (self.num_input + 1) : ].reshape(self.num_output, self.num_hidden + 1)
		b = np.matrix(np.ones([num_sample, 1]))
		a1 = np.column_stack([x, b])
		s2 = sigmoid(a1 * w1.T)
		a2 = np.column_stack([s2, b])
		a3 = sigmoid(a2 * w2.T)
		y_one_hot = np.matrix(np.zeros([num_sample, self.num_output]))
		y_one_hot[(np.matrix(range(num_sample)), y.T)] = 1
		cost = (1.0 / num_sample) * (- np.multiply(y_one_hot, np.log(a3)) - np.multiply(1.0 - y_one_hot, np.log(1.0 - a3))).sum()
		cost += (lambda_ / (2.0 * num_sample)) * (np.square(w1[ : , 0 : -1]).sum() + np.square(w2[ : , 0 : -1]).sum())
		delta3 = a3 - y_one_hot
		delta2 = np.multiply(delta3 * w2[ : , 0 : -1], np.multiply(s2, 1.0 - s2))
		l1_grad = delta2.T * a1
		l2_grad = delta3.T * a2
		r1_grad = np.column_stack([w1[ : , 0 : -1], np.matrix(np.zeros([self.num_hidden, 1]))])
		r2_grad = np.column_stack([w2[ : , 0 : -1], np.matrix(np.zeros([self.num_output, 1]))])
		w1_grad = (1.0 / num_sample) * l1_grad + (1.0 * lambda_ / num_sample) * r1_grad
		w2_grad = (1.0 / num_sample) * l2_grad + (1.0 * lambda_ / num_sample) * r2_grad
		w_grad = np.row_stack([w1_grad.reshape(-1, 1), w2_grad.reshape(-1, 1)])
		return cost, w_grad

if __name__ == '__main__':
	x_train = np.array([[1, 0], [0, 1], [1, 1], [0, 0]])
	y_train = np.array([[0], [0], [1], [1]])
	x_test = np.array([[1, 0], [0, 1], [1, 1], [0, 0]])
	y_test = np.array([[0], [0], [1], [1]])
	clf = NeuralNet(num_input, num_hidden, num_output)
	for opt_algo in opt_algo_set:
		clf.train(x_train, y_train, opt_algo, num_epoch = 30, lambda_ = 0.1)
	plt.subplot(111)
	plt.title('Performance of different Gradient Descent Optimization')
	plt.xlabel('epoch')
	plt.ylabel('cost')
	proxy = list()
	legend_array = list()
	for index, (opt_algo, epoch_cost) in enumerate(plt_dict.items()):
		selected_color = plt_color_array[index % len(plt_color_array)]
		plt.plot(range(len(epoch_cost)), epoch_cost, '-%s' % selected_color[0])
		proxy.append(Rectangle((0, 0), 0, 0, facecolor = selected_color))
		legend_array.append(opt_algo)
	plt.legend(proxy, legend_array)
	plt.savefig("nn-sgt1.png")
	plt.show()
以上。
You get articles that match your needs
You can efficiently read back useful information
You can use dark theme
What you can do with signing up