LoginSignup
5
2

More than 5 years have passed since last update.

TensorFlowでオセロに挑戦

Last updated at Posted at 2017-05-24

概要

TnsorFlowでオセロをやるために、OpenAiGymのオセロ環境作って見た。
手で確認用のサンプルコード、載せる。
誰かが、TensorFlowで負かせてくれる事を望む。

写真

ose.jpg

環境

windows 7 sp1 64bit
anaconda3
tensorflow 1.0
OpenAi Gym 0.5

オセロ環境の概要

observe:

8*8の盤面を0から63までの配列にして返す

 0  1  2  3  4  5  6  7 
 8  9 10 11 12 13 14 15
16 17 18 19 20 21 22 23
24 25 26 27 28 29 30 31
32 33 34 35 36 37 38 39
40 41 42 43 44 45 46 47
48 49 50 51 52 53 54 55
56 57 58 59 60 61 62 63

0 なし
1 白
-1 黒

reward:

獲った石の数

gameOver:

石が無くなったか、盤が埋まった。

action:

0から63で指定。
-1なら相手。

確認用のサンプルコード

from __future__ import print_function
import math
import sys
import gym
import gym.spaces
import numpy as np
from gym import core, spaces
from gym.utils import seeding
import time
import random
import os.path

class oseroEnv(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second' : 10
    }
    def __init__(self):
        self.viewer = None
        self.gridSize = 8
        self.nbStates = self.gridSize * self.gridSize
        self.state = np.empty(self.nbStates, dtype = np.int8)
        self.count = 0
    def check(self, put, d):
        res = 0
        x = put % 8
        y = math.floor(put / 8)
        if (x == 0 and (d == -9 or d == -1 or d == 7)):
            res = 1
        if (x == 7 and (d == -7 or d == 1 or d == 9)):
            res = 1
        if (y == 0 and (d == -9 or d == -8 or d == -7)):
            res = 1
        if (y == 7 and (d == 7 or d == 8 or d == 9)):
            res = 1
        i = put + d
        if i < 0:
            res = 1
        if i > 63:
            res = 1
        return res
    def oku(self, put, iro):
        res = 0
        turn = 1
        if iro == 1:
            turn = -1
        dir = [-9, -8, -7, -1, 1, 7, 8, 9]
        if self.state[put] == 0:
            for i in range(8):
                count = 0
                tugi = put
                while True:
                    if self.check(tugi, dir[i]) == 1:
                        break
                    count += 1
                    tugi += dir[i]
                    if self.state[tugi] != turn:
                        break
                if (count > 1) and (self.state[tugi] == iro):
                    res = -1
                    tugi = put
                    while True:
                        self.state[tugi] = iro
                        tugi += dir[i]
                        if self.state[tugi] != turn:
                            break
                        self.count += 1
        return res
    def sasu(self):
        suji = [0, 7, 56, 63, 18, 21, 42, 45, 2, 16, 5, 23, 40, 58, 47, 61, 3, 4, 11, 12, 19, 20, 24, 25, 26, 32, 33, 34, 29, 30, 31, 37, 38, 39, 43, 44, 51, 52, 59, 60, 1, 8, 9, 10, 17, 6, 13, 14, 15, 22, 41, 48, 49, 50, 57, 46, 53, 54, 55, 62]
        res = 0
        all = 0
        iro = -1
        turn = 1
        dir = [-9, 9, -7, 7, -1, 1, -8, 8]
        for j in range(60):
            put = suji[j]
            if self.state[put] == 0:
                for i in range(8):
                    count = 0;
                    if self.check(put, dir[i]) == 0:
                        tugi = put + dir[i]
                        while True:
                            if self.state[tugi] == turn:
                                count += 1
                                if self.check(tugi, dir[i]) == 1:
                                    break
                                else:
                                    tugi += dir[i]
                            else:
                                break
                    if (count > 0) and (self.state[tugi] == iro):
                        all += count;
            if all > 0:
                res = put
                break
        return res
    def getState(self):
        return self.state
    def getReward(self):
        return self.count
    def isGameOver(self):
        siro = 0
        kuro = 0
        end = True
        for i in range(64):
            if self.state[i] == 0:
                end = False 
            if self.state[i] == 1:
                siro = 1
            if self.state[i] == -1:
                kuro = 1
        if siro == 0:
            end = True
        if kuro == 0:
            end = True
        return end
    def updateState(self, action):
        if action < -1:
            return
        if action > 63:
            return
        if action == -1:
            ai = self.sasu()
            self.count = 0
            self.oku(ai, -1)
        else:
            self.count = 0
            self.oku(action, 1)
    def observe(self):
        return self.state
    def _reset(self):
        self.state = np.zeros(self.nbStates, dtype = np.int8)
        self.state[27] = 1
        self.state[28] = -1
        self.state[35] = -1
        self.state[36] = 1
        self.count = 0
        return self.observe()
    def _step(self, action):
        self.updateState(action)
        reward = self.getReward()
        gameOver = self.isGameOver()
        return self.observe(), reward, gameOver, {}
    def _render(self, mode = 'human', close = False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        from gym.envs.classic_control import rendering
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(500, 500)
            self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
        self.viewer.draw_polygon([(-2.0, -2.0), (2.0, -2.0), (2.0, 2.0), (-2.0, 2.0)], color = (0, 1, 0))
        self.viewer.draw_line((-2.0, 2.0), (2.0, 2.0), color = (0, 0, 0))
        self.viewer.draw_line((-2.0, 2.0), (-2.0, -2.0), color = (0, 0, 0))
        self.viewer.draw_line((2.0, 2.0), (2.0, -2.0), color = (0, 0, 0))
        self.viewer.draw_line((-2.0, -2.0), (2.0, -2.0), color = (0, 0, 0))
        for i in range(7):
            x0 = i * 0.5 - 1.5
            y0 = -2.0
            x1 = i * 0.5 - 1.5
            y1 = 2.0
            self.viewer.draw_line((x0, y0), (x1, y1), color = (0, 0, 0))
            self.viewer.draw_line((y0, x0), (y1, x1), color = (0, 0, 0))
        for i in range(64):
            if self.state[i] == 1:
                x = (i % 8) * 0.5 - 1.75
                y = 1.75 - (math.floor(i / 8)) * 0.5
                transform0 = rendering.Transform(translation = (x, y))
                self.viewer.draw_circle(0.2, 20, color = (1, 1, 1)).add_attr(transform0)
            if self.state[i] == -1:
                x = (i % 8) * 0.5 - 1.75
                y = 1.75 - (math.floor(i / 8)) * 0.5
                transform0 = rendering.Transform(translation = (x, y))
                self.viewer.draw_circle(0.2, 20, color = (0, 0, 0)).add_attr(transform0)
        return self.viewer.render(return_rgb_array = mode == 'rgb_array')

env = oseroEnv()

def test(env):
    obser = env.reset()
    env.render()
    while True:
        i = int(input())
        observe, reward, gameOver, info = env.step(i)
        print (observe, reward)
        env.render()
        if gameOver:
            print ("game over!")
            break

test(env)





5
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
5
2