KerasDay 13

# 概要

ラズパイでkerasやってみた。
gymのcartpoleやってみた。
kerasで強化学習やってみた。

# サンプルコード

```import gym
import numpy as np
import random
from tensorflow.contrib.keras.python.keras.models import Model
from tensorflow.contrib.keras.python.keras.layers import *
from tensorflow.contrib.keras.python.keras import backend as K
from collections import deque
import matplotlib.pyplot as plt

def randf(s, e):
return (float(random.randrange(0, (e - s) * 9999)) / 10000) + s;

def discount_rewards(r, gamma = 0.99):
discounted_r = np.zeros_like(r)
for t in reversed(range(0, len(r))):
return discounted_r

def one_hot(index, categories):
x = np.zeros((categories, ))
x[index] = 1
return x

def f(y_true, y_pred):
respons = K.sum(y_true * y_pred, axis = 1)
return policy_loss
return f

env = gym.make('CartPole-v0')
state = Input(shape = (4, ))
x = Dense(24)(state)
x = Dense(24)(x)
x = Dense(2)(x)
x = Activation('softmax')(x)
model_a = Model(state, x)
state = Input(shape = (4, ))
adv_input = Input(shape = (1, ))
x = model_a(state)
all_rewards = deque(maxlen = 100)
epsilon = 1
epsilonMinimumValue = 0.001
reward_trend = []
for i_episode in range(200):
observation = env.reset()
state_history, action_history, reward_history = [], [], []
for t in range(200):
env.render()
state_history.append(observation)
pred = model_a.predict(np.expand_dims(observation, axis = 0))[0]
if (randf(0, 1) < epsilon):
action = np.random.choice(len(pred), 1, p = pred)[0]
else:
action = np.argmax(pred)
if (epsilon > epsilonMinimumValue):
epsilon = epsilon * 0.999
observation, reward, done, info = env.step(action)
reward_history.append(reward)
action_history.append(one_hot(action, 2))
if done:
reward_sum = sum(reward_history)
reward_trend.append(reward_sum)
print (i_episode, "Episode finished with reward {} {:.2f}".format(reward_sum, np.mean(all_rewards)))
if reward_sum > 198:
reward_sum += 100
all_rewards.append(reward_sum)