0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 1 year has passed since last update.

ε-greedy法を使って◯✕ゲーム作ってみた

Last updated at Posted at 2023-09-22

すみません、自分にはまだε-greedy法を説明できないので、実装したコードだけ載せます。
試行回数は10000回にしました。
///追記///
試行回数100回でも「勝ち確の場所には置く」「相手のリーチは防ぐ」という最低限のことはしてくれていました。

↓ソースコード

◯✕ゲーム
import math
import random
import numpy as np
import copy

root2=math.sqrt(2)
epsilon=0.5

#ボード、空き→0,◯→3、×→4
board=[0,0,0,
       0,0,0,
       0,0,0]

class player:
    def __init__(self):
        self.disc=0
        self.opp_disc=0
class board_place:
    def __init__(self):
        self.coor=0#座標
        self.win=0
        self.tie=0
        self.lose=0
        self.reward=0#報酬期待値
        
turn1=player()
turn1.disc=3
turn1.opp_disc=4
turn2=player()
turn2.disc=4
turn2.opp_disc=3
can_place=[0,1,2,3,4,5,6,7,8]

#勝利判定(決着→1、まだ→0)
def win_defeat(board,disc):
    if board[0]+board[1]+board[2]==disc*3:
        return 1
    elif board[0]+board[3]+board[6]==disc*3:
        return 1
    elif board[0]+board[4]+board[8]==disc*3:
        return 1
    elif board[2]+board[4]+board[6]==disc*3:
        return 1
    elif board[2]+board[5]+board[8]==disc*3:
        return 1
    elif board[6]+board[7]+board[8]==disc*3:
        return 1
    elif board[1]+board[4]+board[7]==disc*3:
        return 1
    elif board[3]+board[4]+board[5]==disc*3:
        return 1
    else:
        return 0
#引き分け判定(引き分け→1,まだ→0)
def drow(board):
    cnt=0
    for i in range(9):
        if board[i]!=0:
            cnt+=1
    if cnt==9:
        return 1
    else:
        return 0
#盤面表示
def show_board(board):
    for i in range(9):
        print(board[i],end="")
        if i==2 or i==5:
            print("")
        elif i==8:
            print("\n")
        else:
            print(" ",end="")
#実行
def run(board,player,place):
    global can_place
    board[int(place)]=player.disc
    can_place.remove(int(place))
    show_board(board)

#仮の実行
def trial_run(board,player,place,reward_place):#placeは最初に置く場所
    tmp_can_place=copy.deepcopy(can_place)#コピー
    tmp_board=copy.deepcopy(board)#コピー
    tmp_place=reward_place[place].coor#コピー
    num=0
    #最初の一回
    tmp_board[int(tmp_place)]=player.disc
    tmp_can_place.remove(tmp_place)
    result_win=win_defeat(tmp_board,player.disc)
    result_drow=drow(tmp_board)
    if result_win==1:
        reward_place[place].win+=1
        num=1
    elif result_drow==1:
        reward_place[place].tie+=1
        num=1
    if num==0:#決着がまだ着いてない
        while True:
            #相手の番
            tmp_place=random.choice(tmp_can_place)
            tmp_board[int(tmp_place)]=player.opp_disc
            tmp_can_place.remove(tmp_place)
            result_win=win_defeat(tmp_board,player.opp_disc)
            result_drow=drow(tmp_board)
            if result_win==1:
                reward_place[place].lose+=1
                break
            elif result_drow==1:
                reward_place[place].tie+=1
                break
            #本人の番
            tmp_place=random.choice(tmp_can_place)
            tmp_board[int(tmp_place)]=player.disc
            tmp_can_place.remove(tmp_place)
            result_win=win_defeat(tmp_board,player.disc)
            result_drow=drow(tmp_board)
            if result_win==1:
                reward_place[place].win+=1
                break
            elif result_drow==1:
                reward_place[place].tie+=1
                break
    
def epsilon_run(board,player):#勝ち数、引き分け、負け数の合計を返す
    global epsilon
    max_reward=-100#ソート用、報酬期待値の最大値
    reward_place=[]#置ける場所ごとの報酬をクラスで設定
    for i in range(9):
            reward_place.append(i)#reward_placeで場所ごとの報酬期待値を管理、置ける場所のみクラスに格納
            if can_place.count(i)==1:
                reward_place[i]=board_place()
                reward_place[i].coor=i
    for place in can_place:#置けるすべての場所で5回ずつ実行
        for i in range(5):
            trial_run(board,player,place,reward_place)
        if reward_place[place].lose==0:#報酬期待値の計算
            reward_place[place].reward=(reward_place[place].win+reward_place[place].tie/2)/5
        else:
            reward_place[place].reward=(reward_place[place].win+reward_place[place].tie/2-reward_place[place].lose)/5
        if reward_place[place].reward>max_reward:#報酬期待値の最大値を探す
            max_reward=reward_place[place].reward
            max_coor=reward_place[place].coor
    for i in range(9995):
        coor=np.random.choice(["max","other"],p=[1-epsilon,epsilon])
        if coor=="max":#1-εの確率で実行
            place=max_coor
        else:
            place=random.choice(can_place)
        trial_run(board,player,place,reward_place)
        if reward_place[place].lose==0:#報酬期待値の計算
            reward_place[place].reward=(reward_place[place].win+reward_place[place].tie/2)/5
        else:
            reward_place[place].reward=(reward_place[place].win+reward_place[place].tie/2-reward_place[place].lose)/5
        if reward_place[place].reward>max_reward:#報酬期待値の最大値を探す
            max_reward=reward_place[place].reward
            max_coor=reward_place[place].coor
        epsilon*=0.99
            
    return reward_place[place].win,reward_place[place].tie,reward_place[place].lose,place

show_board(board)
for i in range(5):
    win,tie,lose,place=epsilon_run(board,turn1)
    win=win/100
    tie=tie/100
    lose=lose/100
    print(f"勝ち:{win}%、引き分け:{tie}%、負け:{lose}%です。\n")
    run(board,turn1,place)
    result=win_defeat(board,3)
    if result==1:
        print("◯の勝ちです")
        break
    if i==4:
        print("引き分けです")
        break
    place=input("どこに置きますか?")
    run(board,turn2,place)
    result=win_defeat(board,4)
    if result==1:
        print("×の勝ちです")
        break
0
0
3

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?