More than 5 years have passed since last update.

4x4リバーシのPythonプログラム

Last updated at 2019-11-02Posted at 2018-08-06

概要

機械学習用に4x4リバーシのPythonプログラムを作成します。

AlphaZero 深層学習・強化学習・探索人工知能プログラミング実践入門にAlphaZeroの強化学習の分かりやすい解説と実装例が記載されています。6x6リバーシのサンプルが記載されていますが、それを4x4リバーシの強化学習プログラムをゼロから作ろうと思っています。

まず、4x4リバーシのゲームロジックの実装ですが、せっかくなので、ビットボードを使って実装してみます。こちらのサイト（ビットボードを用いた 4x4 オセロ完全解析）を参考にしました。

AlphaZero 深層学習・強化学習・探索人工知能プログラミング実践入門で紹介されている強化学習のプログラムに組み込むため、インタフェースはそこに記載のものに合わせます。

プログラム

game.py

ゲームロジックを記述します。

State

ゲームの状態を表すクラスです。

メソッド	引数	概要
init	pieces, enemy_pieces, depth	コンストラクタ
is_lose		負けかどうか
is_draw		引き分けかどうか
is_done		ゲーム終了かどうか
next	action	次の状態の取得
is_first_player		先手かどうか
__str__		文字列表示

全体

game.py

import random
import math
import numpy as np

BOARD_SIZE = 4

MASK_EDGE = np.uint16(0x0660)
MASK_VERTICAL = np.uint16(0x6666)
MASK_HORIZONTAL	= np.uint16(0x0FF0)

DIR_LEFT_OBLIQUE = 5
DIR_RIGHT_OBLIQUE = 3
DIR_HORIZONTAL = 1
DIR_VERTICAL = 4

class State:
    # コンストラクタ
    def __init__(self, pieces=None, enemy_pieces=None, depth=0):
        ...

    # 石の数の取得
    def piece_count(self, pieces_bit):
        ...

    # 負けかどうか
    def is_lose(self):
        ...

    # 引き分けかどうか
    def is_draw(self):
        ...

    # ゲーム終了かどうか
    def is_done(self):
        ...

    # 次の状態の取得
    def next(self, action):
        ...

    # 合法手の取得
    def _legal_actions(self):
        ...

    def _generate_some_legal(self, pieces_bit, enemy_pieces_bit, direction):
        ...

    # 任意のマスが合法手かどうか
    def is_legal_action(self, action_bit):
        ...

    # 石を置く
    def _move(self, action):
        ...

    def _generate_some_flipped(self, pieces_bit, enemy_pieces_bit, move, direction):
        ...

    # 先手かどうか
    def is_first_player(self):
        ...

    # 文字列表示
    def __str__(self):
        ...

    # 石の配置を配列に変換
    def _bit_to_array(self, bit):
        ...

    # 石の配列表現をビットに変換
    def _array_to_bit(self, bit):
        ...

init

インタフェースを合わせるため、引数のpiecesとenemy_piecesは配列ですが、ここでビット表現（pieces_bit、enemy_pieces_bit）に変換し、内部ではビット演算で処理していきます。

game.py


    def __init__(self, pieces=None, enemy_pieces=None, depth=0):
        # 連続パスによる終了
        self.pass_end = False

        # 石の配置
        self.pieces = pieces
        self.enemy_pieces = enemy_pieces

        # 石の初期配置
        if pieces == None or enemy_pieces == None:
            self.pieces = [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
            self.enemy_pieces = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]

        self.pieces_bit = self._array_to_bit(self.pieces)
        self.enemy_pieces_bit = self._array_to_bit(self.enemy_pieces)

        self.depth = depth

        # 合法手の設定
        self.legal_actions_bit = self._legal_actions()
        self.legal_actions = (16,) if self.legal_actions_bit == 0 else tuple(np.ravel(np.where(np.array(self._bit_to_array(self.legal_actions_bit))==1)))

piece_count

1が立っているビットを数えます。bin関数で2進文字列に変換し、'1'を数えます。

game.py


    # 石の数の取得
    def piece_count(self, pieces_bit):
        return bin(pieces_bit).count('1')

is_lose

game.py

    # 負けかどうか
    def is_lose(self):
        return self.is_done() and self.piece_count(self.pieces_bit) < self.piece_count(self.enemy_pieces_bit)

is_draw

game.py

    # 引き分けかどうか
    def is_draw(self):
        return self.is_done() and self.piece_count(self.pieces_bit) == self.piece_count(self.enemy_pieces_bit)

is_done

game.py

    # ゲーム終了かどうか
    def is_done(self):
        return self.piece_count(self.pieces_bit | self.enemy_pieces_bit) == 16 or self.pass_end

self.deptに1を追加したStateオブジェクトを作成します。合法手かどうかチェックの上、石を置きます。先手の石と後手の石を入れ替えたStateオブジェクトを作成し、それを返します。先手の石と後手の石を入れ替えるのは機械学習の都合で、自分から見た状態で学習させます。

game.py

    # 次の状態の取得
    def next(self, action):
        action_bit = np.uint16(0x8000) >> action
        state = State(self.pieces.copy(), self.enemy_pieces.copy(), self.depth+1)
        # print('action_bit: {:016b}, legal_actions_bit:{:016b}'.format(action_bit, state.legal_actions_bit))
        if action_bit != 0 and state.is_legal_action(action_bit):
            state._move(action_bit)
        w = state.pieces
        wb = state.pieces_bit
        state.pieces = state.enemy_pieces
        state.pieces_bit = state.enemy_pieces_bit
        state.enemy_pieces = w
        state.enemy_pieces_bit = wb

        # 合法手の設定
        state.legal_actions_bit = state._legal_actions()
        state.legal_actions = (16,) if state.legal_actions_bit == 0 else tuple(np.ravel(np.where(np.array(state._bit_to_array(state.legal_actions_bit))==1)))

        # 2回連続パス判定
        if action_bit == 0 and state.legal_actions_bit == 0:
            state.pass_end = True

        return state

_legal_actions

合法手の取得を取得します。

game.py

    # 合法手の取得
    def _legal_actions(self):
        return (~(self.pieces_bit | self.enemy_pieces_bit) & np.uint16(0xFFFF)) \
            & (self._generate_some_legal(self.pieces_bit, self.enemy_pieces_bit & MASK_EDGE, DIR_LEFT_OBLIQUE) \
             | self._generate_some_legal(self.pieces_bit, self.enemy_pieces_bit & MASK_EDGE, DIR_RIGHT_OBLIQUE) \
             | self._generate_some_legal(self.pieces_bit, self.enemy_pieces_bit & MASK_VERTICAL, DIR_HORIZONTAL) \
             | self._generate_some_legal(self.pieces_bit, self.enemy_pieces_bit & MASK_HORIZONTAL, DIR_VERTICAL))

    def _generate_some_legal(self, pieces_bit, enemy_pieces_bit, direction):
        flipped = ((pieces_bit << direction) | (pieces_bit >> direction)) & enemy_pieces_bit
        for _ in range(BOARD_SIZE - 2):
            flipped |= ((flipped << direction) | (flipped >> direction)) & enemy_pieces_bit
        return (flipped << direction) | (flipped >> direction)

is_legal_action

任意のマスが合法手かどうか確認します。

game.py

    # 任意のマスが合法手かどうか
    def is_legal_action(self, action_bit):
        return action_bit & self.legal_actions_bit != 0

_move

石を置く処理を記述します。

game.py

    # 石を置く
    def _move(self, action):
        flipped = self._generate_some_flipped(self.pieces_bit, self.enemy_pieces_bit & MASK_EDGE, action, DIR_LEFT_OBLIQUE)
        flipped |= self._generate_some_flipped(self.pieces_bit, self.enemy_pieces_bit & MASK_EDGE, action, DIR_RIGHT_OBLIQUE)
        flipped |= self._generate_some_flipped(self.pieces_bit, self.enemy_pieces_bit & MASK_VERTICAL, action, DIR_HORIZONTAL)
        flipped |= self._generate_some_flipped(self.pieces_bit, self.enemy_pieces_bit & MASK_HORIZONTAL, action, DIR_VERTICAL)

        self.pieces_bit = self.pieces_bit | action | flipped
        self.enemy_pieces_bit = self.enemy_pieces_bit ^ flipped
        self.pieces = self._bit_to_array(self.pieces_bit)
        self.enemy_pieces = self._bit_to_array(self.enemy_pieces_bit)

    def _generate_some_flipped(self, pieces_bit, enemy_pieces_bit, move, direction):
        left_enemy = (move << direction) & enemy_pieces_bit
        right_enemy = (move >> direction) & enemy_pieces_bit
        left_own = (pieces_bit << direction) & enemy_pieces_bit
        right_own = (pieces_bit >> direction) & enemy_pieces_bit

        return ((left_enemy & (right_own | (right_own >> direction))) | \
            ((left_enemy << direction) & right_own) | (right_enemy & (left_own | (left_own << direction))) | \
            ((right_enemy >> direction) & left_own))

is_first_player

先手かどうか確認します。

game.py

    # 先手かどうか
    def is_first_player(self):
        return self.depth % 2 == 0

str

盤面を文字列にして返します。

game.py

    # 文字列表示
    def __str__(self):
        ox = ('o', 'x') if self.is_first_player() else ('x', 'o')
        str = ''
        mask = np.uint16(0x8000)
        for i in range(BOARD_SIZE ** 2):
            if self.pieces_bit & mask != 0:
                str += ox[0]
            elif self.enemy_pieces_bit & mask != 0:
                str += ox[1]
            else:
                str += '-'
            if i % 4 == 3:
                str += '\n'
            mask = mask >> 1

        return str

_bit_to_array

石の配置を配列に変換します。

game.py

    # 石の配置を配列に変換
    def _bit_to_array(self, bit):
        return list(np.unpackbits(np.array([[bit >> 8], [bit & 0x00FF]], dtype=np.uint8)))

_array_to_bit

石の配列表現をビットに変換します。

game.py

    # 石の配列表現をビットに変換
    def _array_to_bit(self, bit):
        b = np.packbits([bit[:8], bit[8:]])
        return b[0] << 8 | b[1]

関数

random_action

合法手群から一手ランダムに選択します。

game.py

# ランダムで行動選択
def random_action(state):
    return state.legal_actions[random.randint(0, len(state.legal_actions)-1)]

動作確認用スクリプト

game.py

# 動作確認
if __name__ == '__main__':
    # 状態の生成
    state = State()

    # ゲーム終了までのループ
    while True:
        # ゲーム終了時
        if state.is_done():
            break

        # 次の状態の取得
        action = random_action(state)
        print('action: {}'.format(action))
        # print('{:016b}, {:016b}'.format(state.legal_actions, action))
        state = state.next(action)

        # 文字列表示
        print(state)
        print()

スクリプト実行結果

実行結果

$ python game.py                   
action: 4
----
ooo-
-ox-
----


action: 8
----
ooo-
xxx-
----


action: 12
----
ooo-
oox-
o---


action: 2
--x-
oox-
oox-
o---


action: 3
--xo
ooo-
oox-
o---


action: 0
x-xo
oxo-
oox-
o---


action: 1
xooo
ooo-
oox-
o---


action: 16
xooo
ooo-
oox-
o---


action: 15
xooo
ooo-
ooo-
o--o


action: 16
xooo
ooo-
ooo-
o--o

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up