More than 5 years have passed since last update.

TensorFlowを使って○✕ゲームで遊んでみた

Last updated at 2017-07-07Posted at 2017-07-05

はじめに

前回、作成した学習モデルを使って、実際に○✕ゲームでAIと対戦してみました。
先手はAI固定であり、ランダムに配置されるみたいです。
○✕ゲームって後手で真ん中に打てれば勝てる（たしか）ので必ず勝つAIではなさそうです。

ソースコード

モデルの保存

ticktacktoo.py

# 最下部に追記してください。

# Remove old model
if os.path.exists("model"):
    shutil.rmtree("model")

# Save model for deployment on ML Engine
input_key = tf.placeholder(tf.int64, [None, ], name="key")
output_key = tf.identity(input_key)

input_signatures = {
    "key": tf.saved_model.utils.build_tensor_info(input_key),
    "squares": tf.saved_model.utils.build_tensor_info(squares_placeholder)
}

output_signatures = {
    "key": tf.saved_model.utils.build_tensor_info(output_key),
    "labels": tf.saved_model.utils.build_tensor_info(logits)
}

predict_signature_def = tf.saved_model.signature_def_utils.build_signature_def(
    input_signatures,
    output_signatures,
    tf.saved_model.signature_constants.PREDICT_METHOD_NAME
)

builder = tf.saved_model.builder.SavedModelBuilder(os.path.join("model"))

builder.add_meta_graph_and_variables(
    sess,
    [tf.saved_model.tag_constants.SERVING],
    signature_def_map={
        tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: predict_signature_def
    },
    assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS)
)

builder.save()

ゲーム盤の作成

game.py

# !/usr/local/bin/python
# -*- coding: utf-8 -*-

import numpy as np

class Setting:

    def __init__(self):

        self.board = None
        self.current_player = None
        self.result = None
        self.reset()

    def reset(self):

        self.board = np.zeros(9, dtype=np.int32)
        self.current_player = "x"

    def step(self, index):

        if self.board[index] != 0:
            print("Invalid move!!")
            return None, None, None, {"valid": False}
        elif self.current_player == "x":
            self.board[index] = 1
            self.current_player = "o"
        else:
            self.board[index] = -1
            self.current_player = "x"

        observation = np.array(self.board)
        done, info = self.check_game_result()
        reward = 0

        return observation, reward, done, info

    def render(self):

        markers = []

        for i in self.board:
            if i == 0:
                markers.append("_")
            elif i == 1:
                markers.append("x")
            else:
                markers.append("o")

        print("{} is thinking...".format(self.current_player))
        print("{0}\t{1}\t{2}".format(markers[0], markers[1], markers[2]))
        print("{0}\t{1}\t{2}".format(markers[3], markers[4], markers[5]))
        print("{0}\t{1}\t{2}\n".format(markers[6], markers[7], markers[8]))

    def check_game_result(self):

        x_win, o_win, is_full = False, False, False

        # Check rows and cols
        for i in range(3):
            row = self.board[(i * 3):(i * 3 + 3)]
            col = self.board[i::3]

            if np.sum(row) == 3 or np.sum(col) == 3:
                x_win = True

            if np.sum(row) == -3 or np.sum(col) == -3:
                o_win = True

        # Check diag
        if np.sum(self.board[[0, 4, 8]]) == 3 or np.sum(self.board[[2, 4, 6]]) == 3:
            x_win = True

        if np.sum(self.board[[0, 4, 8]]) == -3 or np.sum(self.board[[2, 4, 6]]) == -3:
            o_win = True

        if 0 not in self.board:
            is_full = True

        done = x_win or o_win or is_full
        info = {"x": x_win, "o": o_win, "full": is_full, "valid": True}

        return done, info

ゲーム起動の作成

play.py

# !/usr/local/bin/python
# -*- coding: utf-8 -*-

import sys
import numpy as np
import tensorflow as tf
import game

if __name__ == '__main__':

    with tf.Graph().as_default() as graph:

        sess = tf.Session()

        meta_graph = tf.saved_model.loader.load(
            sess=sess,
            tags=[tf.saved_model.tag_constants.SERVING],
            export_dir='model'
        )

        model_signature = meta_graph.signature_def[tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]

        input_signature = model_signature.inputs
        output_signature = model_signature.outputs

        # Get names of input and output tensors
        input_tensor_name = input_signature['squares'].name
        output_tensor_name = output_signature['labels'].name

        # Get input and output tensors
        squares = sess.graph.get_tensor_by_name(input_tensor_name)
        labels = sess.graph.get_tensor_by_name(output_tensor_name)

    env = game.Setting()
    observation = env.reset()

    done = False
    info = None

    rule = """
Input your move!
[0] top-left-square
[1] top-middle-square
[2] top-right-square
[3] middle-left-square
[4] middle-middle-square
[5] middle-right-square
[6] bottom-left-square
[7] bottom-middle-square
[8] bottom-right-square
"""

    print(rule)

    for _ in range(9):

        env.render()

        if done:
            if info["x"]:
                print("x win!")
            elif info["o"]:
                print("o win!")
            else:
                print("Draw!")
            break

        # Compute scores
        prob_x_win = -np.ones(9)
        prob_o_win = np.ones(9)

        # prob_draw = np.zeros(9)
        for i in range(9):
            if env.board[i] == 0:
                board_copy = np.array([env.board])
                board_copy[0][i] = 1

                prob = sess.run(labels, feed_dict={squares: board_copy})

                # print i, prob
                prob_x_win[i] = prob[0][0]
                prob_o_win[i] = prob[0][1]
                # prob_draw = prob[0][2]

        # Decide CPU's move
        if max(prob_x_win) >= 0.05:
            cpu_move = prob_x_win.argmax()
        else:
            cpu_move = prob_o_win.argmin()

        _, _, done, info = env.step(cpu_move)

        env.render()

        if done:
            if info["x"]:
                print("x win!")
            elif info["o"]:
                print("o win!")
            else:
                print("Draw!")
            break

        while True:
            sys.stdout.write("Input your move: ")
            player_move = input()
            _, _, done, info = env.step(player_move)

            if info["valid"]:
                break

モデルを再保存

SavedModel形式で保存しなおします。
ようするに再度学習用のスクリプトを再実行するだけです。

再実行

python ticktacktoo.py

再実行後

modelディレクトリ以下に以下のファイルが作成されます。

saved_model.pb variables

ゲームの起動

以下のようなゲーム盤が出力されません。
前述に記載したとおり、AI側が先手をランダムで入力します。
プレイヤーはInput your move!を参考にキーボードの数字を入力すれば打つことができます。

2017-07-05 14:03:59.615507: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
2017-07-05 14:03:59.615531: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.
2017-07-05 14:03:59.615536: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations.
2017-07-05 14:03:59.615540: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations.

Input your move!
[0] top-left-square
[1] top-middle-square
[2] top-right-square
[3] middle-left-square
[4] middle-middle-square
[5] middle-right-square
[6] bottom-left-square
[7] bottom-middle-square
[8] bottom-right-square

x is thinking...
_	_	_
_	_	_
_	_	_

o is thinking...
_	_	x
_	_	_
_	_	_

遊んでみた

引き分け

x is thinking...
_    _    _
_    _    _
_    _    _

o is thinking...
_    _    x
_    _    _
_    _    _

Input your move: 4
x is thinking...
_    _    x
_    o    _
_    _    _

o is thinking...
x    _    x
_    o    _
_    _    _

Input your move: 1
x is thinking...
x    o    x
_    o    _
_    _    _

o is thinking...
x    o    x
_    o    _
_    x    _

Input your move: 5
x is thinking...
x    o    x
_    o    o
_    x    _

o is thinking...
x    o    x
x    o    o
_    x    _

Input your move: 6
x is thinking...
x    o    x
x    o    o
o    x    _

o is thinking...
x    o    x
x    o    o
o    x    x

Draw!

勝利

x is thinking...
_    _    _
_    _    _
_    _    _

o is thinking...
x    _    _
_    _    _
_    _    _

Input your move: 4
x is thinking...
x    _    _
_    o    _
_    _    _

o is thinking...
x    _    x
_    o    _
_    _    _

Input your move: 1
x is thinking...
x    o    x
_    o    _
_    _    _

o is thinking...
x    o    x
_    o    _
x    _    _

Input your move: 3
x is thinking...
x    o    x
o    o    _
x    _    _

o is thinking...
x    o    x
o    o    x
x    _    _

Input your move: 7
x is thinking...
x    o    x
o    o    x
x    o    _

o win!

まとめ

いままでで一番しっくりきたアルゴリズムだった。（簡単なものだからかもしれませんが）
AlphaGoや将棋のような盤面や手法が複数ある場合を想像するだけで並大抵の労力ではなさそうなのが伺えた。
あとやっていないのなんだろう。感情分析あたり？
そろそろ本を買って丁寧に読み直す時期かもしれない。

全ページリンク

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up