Help us understand the problem. What is going on with this article?

# TensorFlowでオセロに挑戦

More than 3 years have passed since last update.

# 概要

TnsorFlowでオセロをやるために、OpenAiGymのオセロ環境作って見た。

# 環境

windows 7 sp1 64bit
anaconda3
tensorflow 1.0
OpenAi Gym 0.5

# オセロ環境の概要

## observe:

8*8の盤面を0から63までの配列にして返す

``` 0  1  2  3  4  5  6  7
8  9 10 11 12 13 14 15
16 17 18 19 20 21 22 23
24 25 26 27 28 29 30 31
32 33 34 35 36 37 38 39
40 41 42 43 44 45 46 47
48 49 50 51 52 53 54 55
56 57 58 59 60 61 62 63
```

0 なし
1　白
-1　黒

0から63で指定。
-1なら相手。

# 確認用のサンプルコード

```from __future__ import print_function
import math
import sys
import gym
import gym.spaces
import numpy as np
from gym import core, spaces
from gym.utils import seeding
import time
import random
import os.path

class oseroEnv(gym.Env):
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 10
}
def __init__(self):
self.viewer = None
self.gridSize = 8
self.nbStates = self.gridSize * self.gridSize
self.state = np.empty(self.nbStates, dtype = np.int8)
self.count = 0
def check(self, put, d):
res = 0
x = put % 8
y = math.floor(put / 8)
if (x == 0 and (d == -9 or d == -1 or d == 7)):
res = 1
if (x == 7 and (d == -7 or d == 1 or d == 9)):
res = 1
if (y == 0 and (d == -9 or d == -8 or d == -7)):
res = 1
if (y == 7 and (d == 7 or d == 8 or d == 9)):
res = 1
i = put + d
if i < 0:
res = 1
if i > 63:
res = 1
return res
def oku(self, put, iro):
res = 0
turn = 1
if iro == 1:
turn = -1
dir = [-9, -8, -7, -1, 1, 7, 8, 9]
if self.state[put] == 0:
for i in range(8):
count = 0
tugi = put
while True:
if self.check(tugi, dir[i]) == 1:
break
count += 1
tugi += dir[i]
if self.state[tugi] != turn:
break
if (count > 1) and (self.state[tugi] == iro):
res = -1
tugi = put
while True:
self.state[tugi] = iro
tugi += dir[i]
if self.state[tugi] != turn:
break
self.count += 1
return res
def sasu(self):
suji = [0, 7, 56, 63, 18, 21, 42, 45, 2, 16, 5, 23, 40, 58, 47, 61, 3, 4, 11, 12, 19, 20, 24, 25, 26, 32, 33, 34, 29, 30, 31, 37, 38, 39, 43, 44, 51, 52, 59, 60, 1, 8, 9, 10, 17, 6, 13, 14, 15, 22, 41, 48, 49, 50, 57, 46, 53, 54, 55, 62]
res = 0
all = 0
iro = -1
turn = 1
dir = [-9, 9, -7, 7, -1, 1, -8, 8]
for j in range(60):
put = suji[j]
if self.state[put] == 0:
for i in range(8):
count = 0;
if self.check(put, dir[i]) == 0:
tugi = put + dir[i]
while True:
if self.state[tugi] == turn:
count += 1
if self.check(tugi, dir[i]) == 1:
break
else:
tugi += dir[i]
else:
break
if (count > 0) and (self.state[tugi] == iro):
all += count;
if all > 0:
res = put
break
return res
def getState(self):
return self.state
def getReward(self):
return self.count
def isGameOver(self):
siro = 0
kuro = 0
end = True
for i in range(64):
if self.state[i] == 0:
end = False
if self.state[i] == 1:
siro = 1
if self.state[i] == -1:
kuro = 1
if siro == 0:
end = True
if kuro == 0:
end = True
return end
if action < -1:
return
if action > 63:
return
if action == -1:
ai = self.sasu()
self.count = 0
self.oku(ai, -1)
else:
self.count = 0
self.oku(action, 1)
def observe(self):
return self.state
def _reset(self):
self.state = np.zeros(self.nbStates, dtype = np.int8)
self.state[27] = 1
self.state[28] = -1
self.state[35] = -1
self.state[36] = 1
self.count = 0
return self.observe()
def _step(self, action):
reward = self.getReward()
gameOver = self.isGameOver()
return self.observe(), reward, gameOver, {}
def _render(self, mode = 'human', close = False):
if close:
if self.viewer is not None:
self.viewer.close()
self.viewer = None
return
from gym.envs.classic_control import rendering
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(500, 500)
self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
self.viewer.draw_polygon([(-2.0, -2.0), (2.0, -2.0), (2.0, 2.0), (-2.0, 2.0)], color = (0, 1, 0))
self.viewer.draw_line((-2.0, 2.0), (2.0, 2.0), color = (0, 0, 0))
self.viewer.draw_line((-2.0, 2.0), (-2.0, -2.0), color = (0, 0, 0))
self.viewer.draw_line((2.0, 2.0), (2.0, -2.0), color = (0, 0, 0))
self.viewer.draw_line((-2.0, -2.0), (2.0, -2.0), color = (0, 0, 0))
for i in range(7):
x0 = i * 0.5 - 1.5
y0 = -2.0
x1 = i * 0.5 - 1.5
y1 = 2.0
self.viewer.draw_line((x0, y0), (x1, y1), color = (0, 0, 0))
self.viewer.draw_line((y0, x0), (y1, x1), color = (0, 0, 0))
for i in range(64):
if self.state[i] == 1:
x = (i % 8) * 0.5 - 1.75
y = 1.75 - (math.floor(i / 8)) * 0.5
transform0 = rendering.Transform(translation = (x, y))
self.viewer.draw_circle(0.2, 20, color = (1, 1, 1)).add_attr(transform0)
if self.state[i] == -1:
x = (i % 8) * 0.5 - 1.75
y = 1.75 - (math.floor(i / 8)) * 0.5
transform0 = rendering.Transform(translation = (x, y))
self.viewer.draw_circle(0.2, 20, color = (0, 0, 0)).add_attr(transform0)
return self.viewer.render(return_rgb_array = mode == 'rgb_array')

env = oseroEnv()

def test(env):
obser = env.reset()
env.render()
while True:
i = int(input())
observe, reward, gameOver, info = env.step(i)
print (observe, reward)
env.render()
if gameOver:
print ("game over!")
break

test(env)

```
Why not register and get more from Qiita?
1. We will deliver articles that match you
By following users and tags, you can catch up information on technical fields that you are interested in as a whole
2. you can read useful information later efficiently
By "stocking" the articles you like, you can search right away