はじめに
- 前回、作成した学習モデルを使って、実際に○✕ゲームでAIと対戦してみました。
- 先手はAI固定であり、ランダムに配置されるみたいです。
- ○✕ゲームって後手で真ん中に打てれば勝てる(たしか)ので必ず勝つAIではなさそうです。
ソースコード
モデルの保存
ticktacktoo.py
# 最下部に追記してください。
# Remove old model
if os.path.exists("model"):
shutil.rmtree("model")
# Save model for deployment on ML Engine
input_key = tf.placeholder(tf.int64, [None, ], name="key")
output_key = tf.identity(input_key)
input_signatures = {
"key": tf.saved_model.utils.build_tensor_info(input_key),
"squares": tf.saved_model.utils.build_tensor_info(squares_placeholder)
}
output_signatures = {
"key": tf.saved_model.utils.build_tensor_info(output_key),
"labels": tf.saved_model.utils.build_tensor_info(logits)
}
predict_signature_def = tf.saved_model.signature_def_utils.build_signature_def(
input_signatures,
output_signatures,
tf.saved_model.signature_constants.PREDICT_METHOD_NAME
)
builder = tf.saved_model.builder.SavedModelBuilder(os.path.join("model"))
builder.add_meta_graph_and_variables(
sess,
[tf.saved_model.tag_constants.SERVING],
signature_def_map={
tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: predict_signature_def
},
assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS)
)
builder.save()
ゲーム盤の作成
game.py
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
import numpy as np
class Setting:
def __init__(self):
self.board = None
self.current_player = None
self.result = None
self.reset()
def reset(self):
self.board = np.zeros(9, dtype=np.int32)
self.current_player = "x"
def step(self, index):
if self.board[index] != 0:
print("Invalid move!!")
return None, None, None, {"valid": False}
elif self.current_player == "x":
self.board[index] = 1
self.current_player = "o"
else:
self.board[index] = -1
self.current_player = "x"
observation = np.array(self.board)
done, info = self.check_game_result()
reward = 0
return observation, reward, done, info
def render(self):
markers = []
for i in self.board:
if i == 0:
markers.append("_")
elif i == 1:
markers.append("x")
else:
markers.append("o")
print("{} is thinking...".format(self.current_player))
print("{0}\t{1}\t{2}".format(markers[0], markers[1], markers[2]))
print("{0}\t{1}\t{2}".format(markers[3], markers[4], markers[5]))
print("{0}\t{1}\t{2}\n".format(markers[6], markers[7], markers[8]))
def check_game_result(self):
x_win, o_win, is_full = False, False, False
# Check rows and cols
for i in range(3):
row = self.board[(i * 3):(i * 3 + 3)]
col = self.board[i::3]
if np.sum(row) == 3 or np.sum(col) == 3:
x_win = True
if np.sum(row) == -3 or np.sum(col) == -3:
o_win = True
# Check diag
if np.sum(self.board[[0, 4, 8]]) == 3 or np.sum(self.board[[2, 4, 6]]) == 3:
x_win = True
if np.sum(self.board[[0, 4, 8]]) == -3 or np.sum(self.board[[2, 4, 6]]) == -3:
o_win = True
if 0 not in self.board:
is_full = True
done = x_win or o_win or is_full
info = {"x": x_win, "o": o_win, "full": is_full, "valid": True}
return done, info
ゲーム起動の作成
play.py
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
import sys
import numpy as np
import tensorflow as tf
import game
if __name__ == '__main__':
with tf.Graph().as_default() as graph:
sess = tf.Session()
meta_graph = tf.saved_model.loader.load(
sess=sess,
tags=[tf.saved_model.tag_constants.SERVING],
export_dir='model'
)
model_signature = meta_graph.signature_def[tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
input_signature = model_signature.inputs
output_signature = model_signature.outputs
# Get names of input and output tensors
input_tensor_name = input_signature['squares'].name
output_tensor_name = output_signature['labels'].name
# Get input and output tensors
squares = sess.graph.get_tensor_by_name(input_tensor_name)
labels = sess.graph.get_tensor_by_name(output_tensor_name)
env = game.Setting()
observation = env.reset()
done = False
info = None
rule = """
Input your move!
[0] top-left-square
[1] top-middle-square
[2] top-right-square
[3] middle-left-square
[4] middle-middle-square
[5] middle-right-square
[6] bottom-left-square
[7] bottom-middle-square
[8] bottom-right-square
"""
print(rule)
for _ in range(9):
env.render()
if done:
if info["x"]:
print("x win!")
elif info["o"]:
print("o win!")
else:
print("Draw!")
break
# Compute scores
prob_x_win = -np.ones(9)
prob_o_win = np.ones(9)
# prob_draw = np.zeros(9)
for i in range(9):
if env.board[i] == 0:
board_copy = np.array([env.board])
board_copy[0][i] = 1
prob = sess.run(labels, feed_dict={squares: board_copy})
# print i, prob
prob_x_win[i] = prob[0][0]
prob_o_win[i] = prob[0][1]
# prob_draw = prob[0][2]
# Decide CPU's move
if max(prob_x_win) >= 0.05:
cpu_move = prob_x_win.argmax()
else:
cpu_move = prob_o_win.argmin()
_, _, done, info = env.step(cpu_move)
env.render()
if done:
if info["x"]:
print("x win!")
elif info["o"]:
print("o win!")
else:
print("Draw!")
break
while True:
sys.stdout.write("Input your move: ")
player_move = input()
_, _, done, info = env.step(player_move)
if info["valid"]:
break
モデルを再保存
- SavedModel形式で保存しなおします。
- ようするに再度学習用のスクリプトを再実行するだけです。
再実行
python ticktacktoo.py
再実行後
modelディレクトリ以下に以下のファイルが作成されます。
saved_model.pb variables
ゲームの起動
- 以下のようなゲーム盤が出力されません。
- 前述に記載したとおり、AI側が先手をランダムで入力します。
- プレイヤーはInput your move!を参考にキーボードの数字を入力すれば打つことができます。
2017-07-05 14:03:59.615507: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
2017-07-05 14:03:59.615531: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.
2017-07-05 14:03:59.615536: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations.
2017-07-05 14:03:59.615540: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations.
Input your move!
[0] top-left-square
[1] top-middle-square
[2] top-right-square
[3] middle-left-square
[4] middle-middle-square
[5] middle-right-square
[6] bottom-left-square
[7] bottom-middle-square
[8] bottom-right-square
x is thinking...
_ _ _
_ _ _
_ _ _
o is thinking...
_ _ x
_ _ _
_ _ _
遊んでみた
引き分け
x is thinking...
_ _ _
_ _ _
_ _ _
o is thinking...
_ _ x
_ _ _
_ _ _
Input your move: 4
x is thinking...
_ _ x
_ o _
_ _ _
o is thinking...
x _ x
_ o _
_ _ _
Input your move: 1
x is thinking...
x o x
_ o _
_ _ _
o is thinking...
x o x
_ o _
_ x _
Input your move: 5
x is thinking...
x o x
_ o o
_ x _
o is thinking...
x o x
x o o
_ x _
Input your move: 6
x is thinking...
x o x
x o o
o x _
o is thinking...
x o x
x o o
o x x
Draw!
勝利
x is thinking...
_ _ _
_ _ _
_ _ _
o is thinking...
x _ _
_ _ _
_ _ _
Input your move: 4
x is thinking...
x _ _
_ o _
_ _ _
o is thinking...
x _ x
_ o _
_ _ _
Input your move: 1
x is thinking...
x o x
_ o _
_ _ _
o is thinking...
x o x
_ o _
x _ _
Input your move: 3
x is thinking...
x o x
o o _
x _ _
o is thinking...
x o x
o o x
x _ _
Input your move: 7
x is thinking...
x o x
o o x
x o _
o win!
まとめ
- いままでで一番しっくりきたアルゴリズムだった。(簡単なものだからかもしれませんが)
- AlphaGoや将棋のような盤面や手法が複数ある場合を想像するだけで並大抵の労力ではなさそうなのが伺えた。
- あとやっていないのなんだろう。感情分析あたり?
- そろそろ本を買って丁寧に読み直す時期かもしれない。