チュートリアルStep5

Last updated at 2024-10-11Posted at 2024-10-04

　ぷよぷよプログラミングAI学習システムは、中高生の自己調整学習に向けて開発されました。
　どうやって、Chromebookで、１０分で、素敵な人工知能を作れるのでしょうか？
　秘密は、３つ。
　１　jupyter liteで、webクライアントがpythonコードを実行
　２　超軽量な機械学習フレームワークdezeroを採用
　３　ぷよぷよのstageとactionがコンパクト

Github&X

ぷよぷよプログラミングAI学習用まとめ

チュートリアルまとめ

Step5 Env

5 Envを作る。random_agentを作ります。

いよいよ、強化学習に必要なEnvを作ります。agentとEnvを作れば、機械学習ができます。

envの基本は、reset()とstep()です。この2つの関数を作ります。

import numpy as np
from puyo_utils import *
import random

class EnvPuyopuyo:

   def __init__(self, height=CFG.Height, width=CFG.Width ):
      self.height = height
      self.width = width
      self.reset()

   def reset(self):
      self.board = np.zeros(self.height * self.width, dtype = np.int32).reshape(self.height, self.width)
      self.puyo, self.done = utils.create_new_puyo(self.board)      
      return self.board, self.puyo

   def step(self, action):
      self.puyo.x = action[0]
      self.puyo.rotation = action[1]
      if utils.check_collision(self.board, self.puyo):
         self.reset()
         return self.board, self.puyo, 0, True
      
      self.board = utils.set_puyo_to_board(self.board,self.puyo)
      reward = 0
      rensa = 0
      while True:
         utils.fall(self.board)
         clear_groups = utils.check_erase(self.board)
         if not len(clear_groups):
            break
         piece, color = utils.erasing(self.board, clear_groups)
         rensa += 1
         reward += utils.calc_score(rensa, piece, color)
      self.puyo, self.done= utils.create_new_puyo(self.board)

      return self.board, self.puyo, reward, self.done

試すのは、少しだけ面倒です. まずは、random_agentを作ります。

def random_agent(board, puyo):
    action_list = utils.create_action_list(board)
    if len(action_list) == 0:
        return [2,1]
    random_id = random.randint(0, len(action_list)-1)
    return action_list[random_id]

とりあえず、１００回やって最高点を見ます。

env = EnvPuyopuyo()

reward_list = []
total_blocks_list = []
for _ in range(100):
    board, puyo = env.reset()
    done = False
    total_reward = 0
    total_piece = 0
    while not done:
        action = random_agent(board, puyo)
        board, puyo, reward, done = env.step(action)
        total_reward += reward
        total_piece += 2
        if done:
            reward_list.append(total_reward)
            total_blocks_list.append(total_piece)

reward_list = np.array(reward_list)
print(reward_list)
print(total_blocks_list)

[11970   950  9880  2150   950  2720  5510  1600  4220  7400  1270  6860
  1600  2000 14360  1670   800   800  1600  2950   400  8290  5040  5820
  3950  4030  3570   550  8710  3120  2000   400  9900   400  5030  2000
  2000  3710  2470  2800   800  1750  2720  9950  1500  8000 10370  2950
  5610     0  2150  5090  3670  3840  4160 14160  4780  4500  1600  1520
  6690  9120  5990  2150  7270   550   720  3040  1920  1750  7720     0
  6750  2150  2400  1600  1200  1600  3000  1520  5260  1350  2150  1600
  5840  2950  1200  7410  4470 27640   400  3430   550  2300  4140  7270
   720  6330   550  3420]
[160, 60, 134, 86, 74, 106, 130, 98, 130, 128, 40, 116, 96, 104, 150, 76, 82, 90, 102, 102, 62, 166, 114, 112, 84, 102, 90, 78, 120, 104, 98, 54, 170, 78, 118, 98, 98, 108, 86, 76, 74, 50, 94, 174, 52, 84, 146, 104, 106, 58, 70, 114, 118, 114, 96, 160, 132, 114, 66, 52, 144, 180, 132, 104, 78, 44, 30, 80, 94, 98, 140, 34, 80, 90, 90, 78, 92, 72, 104, 92, 130, 72, 64, 72, 146, 108, 102, 126, 126, 186, 50, 116, 74, 114, 84, 148, 50, 132, 72, 110]

いい加減にやっても、それなりに点数が入るんだな。

これらの関数も使いたいので、fb_utilsに保存しておきます。名前もpuyopuyo.pyに替えます。

%%writefile puyopuyo.py
import numpy as np
import random

class CFG:
    Height = 12
    Width = 6
    n_color= 4

    rensaBonus = [
        0, 8, 16, 32, 64, 96, 128, 160, 192, 224,
        256, 288, 320, 352, 384, 416, 448, 480, 512,
        544, 576, 608, 640, 672]
    pieceBonus = [0, 0, 0, 0, 2, 3, 4, 5, 6, 7, 10, 10]
    colorBonus = [0, 0, 3, 6, 12, 24]


class Puyopuyo:
    def __init__(self):
        self.x = 2
        self.y = 0
        self.dx = [1,  0, -1, 0]
        self.dy = [0, -1,  0, 1]
        self.centerPuyo = random.randint(1, CFG.n_color)
        self.movablePuyo = random.randint(1, CFG.n_color)
        self.rotation = 1    

class EnvPuyopuyo:

   def __init__(self, height=CFG.Height, width=CFG.Width ):
      self.height = height
      self.width = width
      self.reset()

   def reset(self):
      self.board = np.zeros(self.height * self.width, dtype = np.int32).reshape(self.height, self.width)
      self.puyo, self.done = utils.create_new_puyo(self.board)      
      return self.board, self.puyo

   def step(self, action):
      self.puyo.x = action[0]
      self.puyo.rotation = action[1]
      if utils.check_collision(self.board, self.puyo):
         self.reset()
         return self.board, self.puyo, 0, True
      
      self.board = utils.set_puyo_to_board(self.board,self.puyo)
      reward = 0
      rensa = 0
      while True:
         utils.fall(self.board)
         clear_groups = utils.check_erase(self.board)
         if not len(clear_groups):
            break
         piece, color = utils.erasing(self.board, clear_groups)
         rensa += 1
         reward += utils.calc_score(rensa, piece, color)
      self.puyo, self.done= utils.create_new_puyo(self.board)

      return self.board, self.puyo, reward, self.done


class Agents:
    def random_agent(board, puyo):
        action_list = utils.create_action_list(board)
        if len(action_list) == 0:
            return [2,1]
        random_id = random.randint(0, len(action_list)-1)
        return action_list[random_id]


class UnionFind():
    def __init__(self, n):
        self.n = n
        self.parents = np.arange(n)

    def find(self, x):
        if self.parents[x] == x:
            return x
        else:
            self.parents[x] = self.find(self.parents[x])
            return self.parents[x]

    def merge(self, x, y):
        x = self.find(x)
        y = self.find(y)
        if x == y:
            return
        if x > y:
            x, y = y, x
        self.parents[y] = x

    def clear_groups(self, limit):
        clear_list=[]
        uf_list = [[] for _ in range(self.n)]
        for i in range(self.n):
            pa = self.find(i)
            uf_list[pa].append(i)
        for i in range(self.n):
            if len(uf_list[i]) >= limit:
                clear_list.append(uf_list[i])
        return clear_list


class utils:
    def create_sample_board(height=CFG.Height, width=CFG.Width):
        sample_list = np.arange(width)
        random.shuffle(sample_list)
        board = np.zeros(height * width, dtype = np.int32).reshape(height, width)

        for j in range(width):
            if sample_list[j]:
                for i in range(sample_list[j]):
                    board[height - 1 - i, j] = random.randint(1, CFG.n_color)

        return board

    def create_new_puyo(board):
        new_puyo = Puyopuyo()
        done = False
        if board[2, 0] > 0:
            done = True
        return new_puyo, done    

    def set_puyo_to_board(board, puyo):
        new_board = np.copy(board)
        new_board[puyo.y, puyo.x ] = puyo.centerPuyo
        puyo_dy = puyo.y + puyo.dy[puyo.rotation]
        puyo_dx = puyo.x + puyo.dx[puyo.rotation]
        if puyo_dy >= 0:
            new_board[puyo_dy, puyo_dx ] = puyo.movablePuyo
        return new_board

    def check_collision(board, puyo):
        rot = puyo.rotation
        if rot == 0 and puyo.x == 5:
            return True
        if rot == 2 and puyo.x == 0:
            return True
        if puyo.y >= 12:
            return True
        if puyo.y == 11 and rot == 3 :
            return True
        if board[puyo.y, puyo.x] > 0 :
            return True
        if not( rot == 1) and board[puyo.y + puyo.dy[rot], puyo.x + puyo.dx[rot]] > 0:
            return True
        return False
    

    def create_action_list(board):
        puyo2 = Puyopuyo()
        res = []
        for rot in range(4):
            for pos1 in range(6):
                puyo2.x = pos1
                puyo2.rotation = rot
                if not utils.check_collision(board, puyo2):   
                    res.append([pos1, rot])
        return res

    def auto_fix_puyo(board, puyo):
        for i in range(CFG.Height):
            puyo.y = i
            if utils.check_collision(board, puyo):
                puyo.y -= 1
                break
        new_board = utils.set_puyo_to_board(board, puyo)
        return new_board
    
    def fall(board):
        for j in range(CFG.Width):
            target_row = CFG.Height - 1
            for i in range(CFG.Height-1,-1,-1):
                if board[i,j] > 0:
                    if target_row > i:
                        board[target_row,j] = board[i, j]
                        board[i, j] = 0
                    target_row -= 1

    def check_erase(board, height=CFG.Height, width=CFG.Width):
        uf = UnionFind(height * width)
        
        for j in range(width):
            for i in range(height-1, -1, -1):
                if board[i, j] == 0:
                    break

                if i > 0 and board[i, j] == board[i - 1, j]:
                    uf.merge(width * i + j, width * (i - 1) + j )
                if j < width - 1 and board[i, j]==board[i, j + 1]:
                    uf.merge(width * i + j, width * i + j + 1)
        
        return uf.clear_groups(4)


    def erasing(board, clear_groups, height=CFG.Height, width=CFG.Width):
        if len(clear_groups) == 0:
            return 0, 0
        color = np.zeros(6)
        piece = 0
        color_num = 0
        for item in clear_groups:
            x, y = item[0] % width, item[0] // width
            c1 = board[y, x]
            color[c1] = 1
            for item2 in item:
                x, y = item2 % width, item2 // width
                board[y, x] = 0
                piece +=1
        for i in range(6):
            if color[i]:
                color_num += 1
        return piece, color_num

    def calc_score(rensa, piece, color):
        rensa = min(rensa, len(CFG.rensaBonus) - 1)
        piece = min(piece, len(CFG.pieceBonus) - 1)
        color = min(color, len(CFG.colorBonus) - 1)

        scale = CFG.rensaBonus[rensa] + CFG.pieceBonus[piece] + CFG.colorBonus[color]
        if scale == 0:
            scale = 1
        return scale * piece * 10

    
    def next_board(board, puyo, action):
        puyo.x = action[0]
        puyo.rotation = action[1]
        if utils.check_collision(board, puyo):
            return board, 0, True
        new_board = utils.set_puyo_to_board(board, puyo)
    
        reward = 0
        rensa = 0
        while True:
            utils.fall(new_board)
            clear_groups = utils.check_erase(new_board)
            if not len(clear_groups):
                break
            piece, color = utils.erasing(new_board, clear_groups)
            rensa += 1
            reward += utils.calc_score(rensa, piece, color)

        return new_board, reward, False

Writing puyopuyo.py

random_agentをアルゴリズムのひな型として、残しておきます。webサイトで使うことができます。

%%writefile random_agent.py

import numpy as np
import random
from puyopuyo import *

class RND_Agent:
    def __init__(self):
        self.action_size = 2

    def __call__(self, board_list, puyo_c):
        board_list = board_list.to_py()
        board = np.zeros(CFG.Height * CFG.Width, dtype=np.int32).reshape(CFG.Height, CFG.Width)
        for i in range(CFG.Height):
            for j in range(CFG.Width):
                if board_list[i][j] != None:
                    board[i][j] = int(board_list[i][j]['puyo']) 
        puyo = Puyopuyo()
        puyo.centerPuyo = puyo_c[0]
        puyo.movablePuyo = puyo_c[1]

        action_list = utils.create_action_list(board)
        if len(action_list) == 0:
            return [2,1]
        random_id = random.randint(0, len(action_list)-1)
        action = action_list[random_id]
        action[1] = action[1] * 90
        return action

agent=RND_Agent()
agent

Writing random_agent.py

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up