ぷよぷよプログラミングAI学習システムは、中高生の自己調整学習に向けて開発されました。
どうやって、Chromebookで、10分で、素敵な人工知能を作れるのでしょうか?
秘密は、3つ。
1 jupyter liteで、webクライアントがpythonコードを実行
2 超軽量な機械学習フレームワークdezeroを採用
3 ぷよぷよのstageとactionがコンパクト
Github&X
ぷよぷよプログラミングAI学習用まとめ
チュートリアルまとめ
Step4 next board
4 ブロックを落下させて、ブロックを消して、次の画面を作る。
step3の check_collisionを使えば、blockがどこまで落下するか判定できます。ブロックをボードに配置すれば、ラインが分かります。そして、ライン判定と得点を計算します。
4.1 auto fix
まずは、どこまで2つのぷよが落ちるかを計算します。そして、その場所にセットします。
from puyo_utils import *
import numpy as np
def auto_fix_puyo(board, puyo):
for i in range(CFG.Height):
puyo.y = i
if utils.check_collision(board, puyo):
puyo.y -= 1
break
new_board = utils.set_puyo_to_board(board, puyo)
return new_board
試してみましょう。
board = utils.create_sample_board()
puyo, done = utils.create_new_puyo(board)
puyo.rotation = 0
board = auto_fix_puyo(board, puyo)
print(board)
[[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 4 2 2 0 0]
[0 2 3 0 0 0]
[4 3 2 0 0 0]
[2 2 3 2 0 0]
[2 2 4 3 3 0]]
4.2 fall
公式javascriptでは、stage関数です。javascript版では、連結ぷよの動きは、player関数で制御して、ぷよ全体の処理はstage関数で行っています。
機械学習においては、auto_fix_puyoはほとんど使われないでしょう
def fall(board):
for j in range(CFG.Width):
target_row = CFG.Height - 1
for i in range(CFG.Height-1,-1,-1):
if board[i,j] > 0:
if target_row > i:
board[target_row,j] = board[i, j]
board[i, j] = 0
target_row -= 1
- 列ごとに逐次処理します。
- range(CFG.Height-1,-1,-1) : 各列の、下から順にみていきます。
- target_row : 空いている行の一番下。初期値は一番下の行として、ぷよが存在するならば一つ上にあげます。
試してみます
board = utils.create_sample_board()
puyo, done = utils.create_new_puyo(board)
puyo.rotation = 0
board = utils.set_puyo_to_board(board, puyo)
fall(board)
print(board)
[[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 3 0 4 0 0]
[0 2 1 2 0 0]
[0 4 1 3 0 0]
[0 4 2 4 1 0]
[0 2 2 4 1 4]]
4.3 check erase
Union-Find使って、連結性を求めます。
Union-FInd
class UnionFind():
def __init__(self, n):
self.n = n
self.parents = np.arange(n)
def find(self, x):
if self.parents[x] == x:
return x
else:
self.parents[x] = self.find(self.parents[x])
return self.parents[x]
def merge(self, x, y):
x = self.find(x)
y = self.find(y)
if x == y:
return
if x > y:
x, y = y, x
self.parents[y] = x
def clear_groups(self, limit):
clear_list=[]
uf_list = [[] for _ in range(self.n)]
for i in range(self.n):
pa = self.find(i)
uf_list[pa].append(i)
for i in range(self.n):
if len(uf_list[i]) >= limit:
clear_list.append(uf_list[i])
return clear_list
check_erase
def check_erase(board, height=CFG.Height, width=CFG.Width):
uf = UnionFind(height * width)
for j in range(width):
for i in range(height-1, -1, -1):
if board[i, j] == 0:
break
if i > 0 and board[i, j] == board[i - 1, j]:
uf.merge(width * i + j, width * (i - 1) + j )
if j < width - 1 and board[i, j]==board[i, j + 1]:
uf.merge(width * i + j, width * i + j + 1)
return uf.clear_groups(4)
試してみます。数回やらないと何も出ないかも。
board = utils.create_sample_board()
puyo, done = utils.create_new_puyo(board)
puyo.rotation = 0
board = utils.set_puyo_to_board(board, puyo)
fall(board)
print(board)
print(check_erase(board))
[[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 1 0 0 3]
[0 0 3 0 0 2]
[4 0 2 0 0 3]
[4 0 4 3 3 4]
[4 0 2 1 4 4]]
[]
erasing
check_eraseに入れてもいいのですが、公式は区別しているので、分けます。スコアのために、色の種類と消すぷよの数を出します。boardの中のぷよは消えます。
def erasing(board, clear_groups, height=CFG.Height, width=CFG.Width):
if len(clear_groups) == 0:
return 0, 0
color = np.zeros(5)
piece = 0
color_num = 0
for item in clear_groups:
x, y = item[0] % width, item[0] // width
c1 = board[y, x]
color[c1] = 1
for item2 in item:
x, y = item2 % width, item2 // width
board[y, x] = 0
piece +=1
for i in range(5):
if color[i]:
color_num += 1
return piece, color_num
試してみましょう。
board = utils.create_sample_board()
puyo, done = utils.create_new_puyo(board)
puyo.rotation = 0
board = utils.set_puyo_to_board(board, puyo)
fall(board)
print(board)
clear_groups = check_erase(board)
print(clear_groups)
piece, color = erasing(board, clear_groups)
print(board)
print(piece, color)
[[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 3 0 0]
[0 0 0 4 0 0]
[0 0 2 4 0 2]
[0 0 3 3 0 4]
[2 0 3 1 0 4]
[3 2 4 1 0 4]]
[]
[[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 3 0 0]
[0 0 0 4 0 0]
[0 0 2 4 0 2]
[0 0 3 3 0 4]
[2 0 3 1 0 4]
[3 2 4 1 0 4]]
0 0
4.4 Scoring
消される段階ごとに、
pieceの数、colorの数、rensaの数で、ボーナスとして、数倍にします。
score constant
rensaBonus = [
0, 8, 16, 32, 64, 96, 128, 160, 192, 224,
256, 288, 320, 352, 384, 416, 448, 480, 512,
544, 576, 608, 640, 672]
pieceBonus = [0, 0, 0, 0, 2, 3, 4, 5, 6, 7, 10, 10]
colorBonus = [0, 0, 3, 6, 12, 24]
calc_score
def calc_score(rensa, piece, color):
rensa = min(rensa, len(rensaBonus) - 1)
piece = min(piece, len(pieceBonus) - 1)
color = min(color, len(colorBonus) - 1)
scale = rensaBonus[rensa] + pieceBonus[piece] + colorBonus[color]
if scale == 0:
scale = 1
return scale * piece * 10
試してみます。
board = utils.create_sample_board()
puyo, done = utils.create_new_puyo(board)
puyo.rotation = 0
board = utils.set_puyo_to_board(board, puyo)
fall(board)
print(board)
clear_groups = check_erase(board)
print(clear_groups)
piece, color = erasing(board, clear_groups)
print(board)
print(piece, color)
print(calc_score(1, piece, color))
[[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 1 0 0 3]
[0 0 4 0 0 4]
[0 0 3 3 2 2]
[0 0 3 2 4 3]
[0 2 1 4 1 2]]
[]
[[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 0 0 0 0]
[0 0 1 0 0 3]
[0 0 4 0 0 4]
[0 0 3 3 2 2]
[0 0 3 2 4 3]
[0 2 1 4 1 2]]
0 0
0
4.5 next_board
def next_board(board, puyo, action):
puyo.x = action[0]
puyo.rotation = action[1]
if utils.check_collision(board, puyo):
return board, 0, True
new_board = utils.set_puyo_to_board(board, puyo)
reward = 0
rensa = 0
while True:
utils.fall(new_board)
clear_groups = utils.check_erase(new_board)
if not len(clear_groups):
break
piece, color = utils.erasing(new_board, clear_groups)
rensa += 1
reward += utils.calc_score(rensa, piece, color)
return new_board, reward, False
これらの関数も使いたいので、fb_utilsに保存しておきます。
%%writefile puyo_utils.py
import numpy as np
import random
class CFG:
Height = 12
Width =6
rensaBonus = [
0, 8, 16, 32, 64, 96, 128, 160, 192, 224,
256, 288, 320, 352, 384, 416, 448, 480, 512,
544, 576, 608, 640, 672]
pieceBonus = [0, 0, 0, 0, 2, 3, 4, 5, 6, 7, 10, 10]
colorBonus = [0, 0, 3, 6, 12, 24]
class Puyopuyo:
def __init__(self):
self.x = 2
self.y = 0
self.dx = [1, 0, -1, 0]
self.dy = [0, -1, 0, 1]
self.centerPuyo = random.randint(1,4)
self.movablePuyo = random.randint(1,4)
self.rotation = 1
class UnionFind():
def __init__(self, n):
self.n = n
self.parents = np.arange(n)
def find(self, x):
if self.parents[x] == x:
return x
else:
self.parents[x] = self.find(self.parents[x])
return self.parents[x]
def merge(self, x, y):
x = self.find(x)
y = self.find(y)
if x == y:
return
if x > y:
x, y = y, x
self.parents[y] = x
def clear_groups(self, limit):
clear_list=[]
uf_list = [[] for _ in range(self.n)]
for i in range(self.n):
pa = self.find(i)
uf_list[pa].append(i)
for i in range(self.n):
if len(uf_list[i]) >= limit:
clear_list.append(uf_list[i])
return clear_list
class utils:
def create_sample_board(height=CFG.Height, width=CFG.Width):
sample_list = np.arange(width)
random.shuffle(sample_list)
board = np.zeros(height * width, dtype = np.int32).reshape(height, width)
for j in range(width):
if sample_list[j]:
for i in range(sample_list[j]):
board[height - 1 - i, j] = random.randint(1, 4)
return board
def create_new_puyo(board):
new_puyo = Puyopuyo()
done = False
if board[2, 0] > 0:
done = True
return new_puyo, done
def set_puyo_to_board(board, puyo):
new_board = np.copy(board)
new_board[puyo.y, puyo.x ] = puyo.centerPuyo
puyo_dy = puyo.y + puyo.dy[puyo.rotation]
puyo_dx = puyo.x + puyo.dx[puyo.rotation]
if puyo_dy >= 0:
new_board[puyo_dy, puyo_dx ] = puyo.movablePuyo
return new_board
def check_collision(board, puyo):
rot = puyo.rotation
if rot == 0 and puyo.x == 5:
return True
if rot == 2 and puyo.x == 0:
return True
if puyo.y >= 12:
return True
if puyo.y == 11 and rot == 3 :
return True
if board[puyo.y, puyo.x] > 0 :
return True
if not( rot == 1) and board[puyo.y + puyo.dy[rot], puyo.x + puyo.dx[rot]] > 0:
return True
return False
def create_action_list(board):
puyo2 = Puyopuyo()
res = []
for rot in range(4):
for pos1 in range(6):
puyo2.x = pos1
puyo2.rotation = rot
if not utils.check_collision(board, puyo2):
res.append([pos1, rot])
return res
def auto_fix_puyo(board, puyo):
for i in range(CFG.Height):
puyo.y = i
if utils.check_collision(board, puyo):
puyo.y -= 1
break
new_board = utils.set_puyo_to_board(board, puyo)
return new_board
def fall(board):
for j in range(CFG.Width):
target_row = CFG.Height - 1
for i in range(CFG.Height-1,-1,-1):
if board[i,j] > 0:
if target_row > i:
board[target_row,j] = board[i, j]
board[i, j] = 0
target_row -= 1
def check_erase(board, height=CFG.Height, width=CFG.Width):
uf = UnionFind(height * width)
for j in range(width):
for i in range(height-1, -1, -1):
if board[i, j] == 0:
break
if i > 0 and board[i, j] == board[i - 1, j]:
uf.merge(width * i + j, width * (i - 1) + j )
if j < width - 1 and board[i, j]==board[i, j + 1]:
uf.merge(width * i + j, width * i + j + 1)
return uf.clear_groups(4)
def erasing(board, clear_groups, height=CFG.Height, width=CFG.Width):
if len(clear_groups) == 0:
return 0, 0
color = np.zeros(6)
piece = 0
color_num = 0
for item in clear_groups:
x, y = item[0] % width, item[0] // width
c1 = board[y, x]
color[c1] = 1
for item2 in item:
x, y = item2 % width, item2 // width
board[y, x] = 0
piece +=1
for i in range(6):
if color[i]:
color_num += 1
return piece, color_num
def calc_score(rensa, piece, color):
rensa = min(rensa, len(CFG.rensaBonus) - 1)
piece = min(piece, len(CFG.pieceBonus) - 1)
color = min(color, len(CFG.colorBonus) - 1)
scale = CFG.rensaBonus[rensa] + CFG.pieceBonus[piece] + CFG.colorBonus[color]
if scale == 0:
scale = 1
return scale * piece * 10
def next_board(board, puyo, action):
puyo.x = action[0]
puyo.rotation = action[1]
if utils.check_collision(board, puyo):
return board, 0, True
new_board = utils.set_puyo_to_board(board, puyo)
reward = 0
rensa = 0
while True:
utils.fall(new_board)
clear_groups = utils.check_erase(new_board)
if not len(clear_groups):
break
piece, color = utils.erasing(new_board, clear_groups)
rensa += 1
reward += utils.calc_score(rensa, piece, color)
return new_board, reward, False
Overwriting puyo_utils.py