Help us understand the problem. What is going on with this article?

TensorFlowでオセロに挑戦

More than 3 years have passed since last update.

概要

TnsorFlowでオセロをやるために、OpenAiGymのオセロ環境作って見た。
手で確認用のサンプルコード、載せる。
誰かが、TensorFlowで負かせてくれる事を望む。

写真

ose.jpg

環境

windows 7 sp1 64bit
anaconda3
tensorflow 1.0
OpenAi Gym 0.5

オセロ環境の概要

observe:

8*8の盤面を0から63までの配列にして返す

 0  1  2  3  4  5  6  7 
 8  9 10 11 12 13 14 15
16 17 18 19 20 21 22 23
24 25 26 27 28 29 30 31
32 33 34 35 36 37 38 39
40 41 42 43 44 45 46 47
48 49 50 51 52 53 54 55
56 57 58 59 60 61 62 63

0 なし
1 白
-1 黒

reward:

獲った石の数

gameOver:

石が無くなったか、盤が埋まった。

action:

0から63で指定。
-1なら相手。

確認用のサンプルコード

from __future__ import print_function
import math
import sys
import gym
import gym.spaces
import numpy as np
from gym import core, spaces
from gym.utils import seeding
import time
import random
import os.path

class oseroEnv(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second' : 10
    }
    def __init__(self):
        self.viewer = None
        self.gridSize = 8
        self.nbStates = self.gridSize * self.gridSize
        self.state = np.empty(self.nbStates, dtype = np.int8)
        self.count = 0
    def check(self, put, d):
        res = 0
        x = put % 8
        y = math.floor(put / 8)
        if (x == 0 and (d == -9 or d == -1 or d == 7)):
            res = 1
        if (x == 7 and (d == -7 or d == 1 or d == 9)):
            res = 1
        if (y == 0 and (d == -9 or d == -8 or d == -7)):
            res = 1
        if (y == 7 and (d == 7 or d == 8 or d == 9)):
            res = 1
        i = put + d
        if i < 0:
            res = 1
        if i > 63:
            res = 1
        return res
    def oku(self, put, iro):
        res = 0
        turn = 1
        if iro == 1:
            turn = -1
        dir = [-9, -8, -7, -1, 1, 7, 8, 9]
        if self.state[put] == 0:
            for i in range(8):
                count = 0
                tugi = put
                while True:
                    if self.check(tugi, dir[i]) == 1:
                        break
                    count += 1
                    tugi += dir[i]
                    if self.state[tugi] != turn:
                        break
                if (count > 1) and (self.state[tugi] == iro):
                    res = -1
                    tugi = put
                    while True:
                        self.state[tugi] = iro
                        tugi += dir[i]
                        if self.state[tugi] != turn:
                            break
                        self.count += 1
        return res
    def sasu(self):
        suji = [0, 7, 56, 63, 18, 21, 42, 45, 2, 16, 5, 23, 40, 58, 47, 61, 3, 4, 11, 12, 19, 20, 24, 25, 26, 32, 33, 34, 29, 30, 31, 37, 38, 39, 43, 44, 51, 52, 59, 60, 1, 8, 9, 10, 17, 6, 13, 14, 15, 22, 41, 48, 49, 50, 57, 46, 53, 54, 55, 62]
        res = 0
        all = 0
        iro = -1
        turn = 1
        dir = [-9, 9, -7, 7, -1, 1, -8, 8]
        for j in range(60):
            put = suji[j]
            if self.state[put] == 0:
                for i in range(8):
                    count = 0;
                    if self.check(put, dir[i]) == 0:
                        tugi = put + dir[i]
                        while True:
                            if self.state[tugi] == turn:
                                count += 1
                                if self.check(tugi, dir[i]) == 1:
                                    break
                                else:
                                    tugi += dir[i]
                            else:
                                break
                    if (count > 0) and (self.state[tugi] == iro):
                        all += count;
            if all > 0:
                res = put
                break
        return res
    def getState(self):
        return self.state
    def getReward(self):
        return self.count
    def isGameOver(self):
        siro = 0
        kuro = 0
        end = True
        for i in range(64):
            if self.state[i] == 0:
                end = False 
            if self.state[i] == 1:
                siro = 1
            if self.state[i] == -1:
                kuro = 1
        if siro == 0:
            end = True
        if kuro == 0:
            end = True
        return end
    def updateState(self, action):
        if action < -1:
            return
        if action > 63:
            return
        if action == -1:
            ai = self.sasu()
            self.count = 0
            self.oku(ai, -1)
        else:
            self.count = 0
            self.oku(action, 1)
    def observe(self):
        return self.state
    def _reset(self):
        self.state = np.zeros(self.nbStates, dtype = np.int8)
        self.state[27] = 1
        self.state[28] = -1
        self.state[35] = -1
        self.state[36] = 1
        self.count = 0
        return self.observe()
    def _step(self, action):
        self.updateState(action)
        reward = self.getReward()
        gameOver = self.isGameOver()
        return self.observe(), reward, gameOver, {}
    def _render(self, mode = 'human', close = False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        from gym.envs.classic_control import rendering
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(500, 500)
            self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
        self.viewer.draw_polygon([(-2.0, -2.0), (2.0, -2.0), (2.0, 2.0), (-2.0, 2.0)], color = (0, 1, 0))
        self.viewer.draw_line((-2.0, 2.0), (2.0, 2.0), color = (0, 0, 0))
        self.viewer.draw_line((-2.0, 2.0), (-2.0, -2.0), color = (0, 0, 0))
        self.viewer.draw_line((2.0, 2.0), (2.0, -2.0), color = (0, 0, 0))
        self.viewer.draw_line((-2.0, -2.0), (2.0, -2.0), color = (0, 0, 0))
        for i in range(7):
            x0 = i * 0.5 - 1.5
            y0 = -2.0
            x1 = i * 0.5 - 1.5
            y1 = 2.0
            self.viewer.draw_line((x0, y0), (x1, y1), color = (0, 0, 0))
            self.viewer.draw_line((y0, x0), (y1, x1), color = (0, 0, 0))
        for i in range(64):
            if self.state[i] == 1:
                x = (i % 8) * 0.5 - 1.75
                y = 1.75 - (math.floor(i / 8)) * 0.5
                transform0 = rendering.Transform(translation = (x, y))
                self.viewer.draw_circle(0.2, 20, color = (1, 1, 1)).add_attr(transform0)
            if self.state[i] == -1:
                x = (i % 8) * 0.5 - 1.75
                y = 1.75 - (math.floor(i / 8)) * 0.5
                transform0 = rendering.Transform(translation = (x, y))
                self.viewer.draw_circle(0.2, 20, color = (0, 0, 0)).add_attr(transform0)
        return self.viewer.render(return_rgb_array = mode == 'rgb_array')

env = oseroEnv()

def test(env):
    obser = env.reset()
    env.render()
    while True:
        i = int(input())
        observe, reward, gameOver, info = env.step(i)
        print (observe, reward)
        env.render()
        if gameOver:
            print ("game over!")
            break

test(env)





Why not register and get more from Qiita?
  1. We will deliver articles that match you
    By following users and tags, you can catch up information on technical fields that you are interested in as a whole
  2. you can read useful information later efficiently
    By "stocking" the articles you like, you can search right away