result = state['board'][:]
でboardがないとエラーが起こるのでこちらを参考にして下さい。
どうやらstate[0]とstate[1]を切り替えなければならないそうです。
def play_game(env, TrainNet, TargetNet, epsilon, copy_step):
rewards = 0
iter = 0
done = False
observations = env.reset()
while not done:
if env.state[0].status == 'ACTIVE':
player_id = 0
elif env.state[1].status == 'ACTIVE':
player_id = 1
observations = env.state[player_id].observation
# Using epsilon-greedy to get an action
action = TrainNet.get_action(observations, epsilon)
# Caching the information of current state
prev_observations = observations
# Take action
observations, reward, done, _ = env.step(action)
# Apply new rules
if done:
TrainNet.train_loss /= iter
if reward == 1: # Won
reward = 20
elif reward == 0: # Lost
reward = -20
else: # Draw
reward = 10
else:
# reward = -0.05 # Try to prevent the agent from taking a long move
# Try to promote the agent to "struggle" when playing against negamax agent
# as Magolor's (@magolor) idea
reward = 0.5
rewards += reward
# Adding experience into buffer
exp = {'s': prev_observations, 'a': action, 'r': reward, 's2': observations, 'done': done}
TrainNet.add_experience(exp)
# Train the training model by using experiences in buffer and the target model
TrainNet.train(TargetNet)
iter += 1
if iter % copy_step == 0:
# Update the weights of the target model when reaching enough "copy step"
TargetNet.copy_weights(TrainNet)
return rewards