python初心者でもMinesweeperは作れる
tkinterの勉強の題材にMinesweeperを用いてみた
下記に貼るリンクを参考にフレーム周りは作成できた
main.py
メイン画面の生成
from tkinter import *
self.root = Tk()
self.root.title("マインスイーパ")
self.root.resizable(0,0)
###メニュー作成###
self.menu_ROOT = Menu(self.root)
self.root.configure(menu = self.menu_ROOT)
self.menu_GAME = Menu(self.menu_ROOT, tearoff = False)
self.menu_MODE = Menu(self.menu_ROOT, tearoff = False)
self.menu_ROOT.add_cascade(label = '難易度(G)', under = 4, menu = self.menu_GAME)
self.menu_ROOT.add_cascade(label = 'モード(R)', under = 4, menu = self.menu_MODE)
self.menu_GAME.add_command(label = "初級(B)", under = 3, command = self.game_0level_set)
self.menu_GAME.add_command(label = "中級(I)", under = 3, command = self.game_1level_set)
self.menu_GAME.add_command(label = "上級(E)", under = 3, command = self.game_2level_set)
self.menu_GAME.add_command(label = "エキスパート(L)", under = 7, command = self.game_3level_set)
self.menu_MODE.add_command(label = "通常(K)", under = 3, command = self.common_mode)
self.menu_MODE.add_command(label = "学習(J)", under = 3, command = self.learning_mode)
self.menu_ROOT.add_command(label = "終了(X)", under = 3, command = self.game_close)
###フレームオブジェクト作成###
self.root_frame = Frame(self.root, relief = 'groove', borderwidth = 5, bg = 'LightGray')
self.status_frame = Frame(self.root_frame, height = 50, relief = 'sunken', borderwidth = 3, bg = 'LightGray')
self.game_frame = Frame(self.root_frame, relief = 'sunken', borderwidth = 3, bg = 'LightGray')
self.root_frame.pack()
###statusフレーム作成###
self.status_frame.pack(pady = 5, padx = 5, fill = 'x')
self.font_size = 16
self.button_font_size = 13
self.stop_timer = False
self.timer = 0
self.bomb_num = 50
self.bomb_flag_num = 0
self.clear_math_num = 0
self.start_flag = True
###ボムカウンター###
self.exist_bomb_count_label = Label(self.status_frame, text = '残り' + str(self.bomb_num), bg = 'LightGray', fg = 'Blue', font=('Helvetica', str(self.font_size), 'bold'))
self.exist_bomb_count_label.place(relx=0.02, rely=0.1)
###リセットボタン作成###
self.reset_button = Button(self.status_frame, text='リセット', bg = 'LightGray', fg = 'Black', font=('Helvetica', str(self.button_font_size), 'bold'), command = self.reset_button_onclick)
self.reset_button.place(relx=0.25, rely=0.1)
###オートボタン作成###
self.auto_button = Button(self.status_frame, text='オート', bg = 'LightGray', fg = 'Black', font=('Helvetica', str(self.button_font_size), 'bold'), command = self.auto_button_onclick)
self.auto_button.place(relx=0.55, rely=0.1)
###タイマーラベル###
self.timer_label = Label(self.status_frame, text = '00:00', bg = 'LightGray', fg = 'Red', font=('Helvetica', str(self.font_size), 'bold'))
self.timer_label.place(relx=0.83, rely=0.1)
###クリアラベル###
self.clear_label = Label(self.status_frame, text = "", bg = 'LightGray', fg = 'Yellow', font=('Helvetica', str(self.font_size), 'bold'))
self.clear_label.place(relx=0.50, rely=0.1)
###gameフレーム作成###
self.game_frame.pack(pady = 5, padx = 5)
####マス目の作成####
i = 0
self.frame_list = []
self.width = 20
self.height = 15
self.frame_width = 20
self.frame_height = 20
for x in range(self.height):
for y in range(self.width):
frame = Frame(self.game_frame, width = self.frame_width, height = self.frame_height, bd = 3, relief = 'raised', bg = 'LightGray')
frame.bind("<1>", self.left_click)
frame.bind("<3>", self.right_click)
frame.num = i
frame.bomb_count = 0
frame.is_bomb = False
self.frame_list.append(frame)
frame.grid(row=x, column=y)
i += 1
self.resol = resolve.resoleve(self.game_frame, self.height, self.frame_height, self.frame_width, self.width, self.frame_list, self.bomb_num)
参考にしたサイト
PythonのTkinterを使ってみる
次に、パネルを開いたりする機能面だが自動でMinesweeperを解かせるようにしてみた
機械学習に興味を持ち強化学習を触ってみた
Minesweeperを強化学習で解かせてみようと思い実践したが、初心者の私には厳しかった、、
とりあえずまたソースと参考にしたサイトを張っておく
learning.py
import sys
import gym
import gym.spaces
from tkinter import *
import numpy as np
import pandas as pd
import time
import random
import main
class MyEnv(gym.Env):
MAX_STEPS = 200
TYPE = [
0, # 0 空いていない
1, # 1 空いている
2, # 2 壁
]
def __init__(self):
super().__init__()
self.height = 20
self.width = 15
self.action_space = gym.spaces.Discrete(self.height*self.width)
self.observation_space = gym.spaces.Box(
low=0,
high=9,
shape=np.array(range(self.height*self.width)).shape
)
def _reset(self, frame_list):
# 諸々の変数を初期化する
self.done = False
self.steps = 0
return self._observe(frame_list)
def _step(self, frame_list, action, moved):
# 1ステップ進める処理を記述。戻り値は observation, reward, done(ゲーム終了したか), info(追加の情報の辞書)
self.steps += 1
observation = self._observe(frame_list)
reward = self._get_reward(frame_list[action], moved)
self.done = self._is_done(frame_list[action])
return observation, reward, self.done, {}
def _close(self):
pass
def _seed(self, seed=None):
pass
def _get_reward(self, frame, moved):
# 報酬を返す。報酬の与え方が難しい
# - 1ステップごとに-1ポイント(できるだけ短いステップ)
# とした
# 爆弾を開いたら-10ポイント
# 開けないところを選択すると-10000
if moved and not frame.is_bomb:
return -1
elif not moved :
return -10000
else:
return -10
def _is_pushable(self, frame_list, action):
# パネルを開けるか
return (
0 <= action < len(frame_list)
and frame_list[action].cget('relief') == 'raised'
)
def _is_done(self, frame):
# 今回は最大で self.MAX_STEPS までとした
if frame.is_bomb :
return True
elif self.steps > self.MAX_STEPS:
return True
else:
return False
def _observe(self, frame_list):
observation = ""
for frame in frame_list:
if frame.cget('relief') == "ridge":
if frame.cget('bg') == "yellow":
observation += ",9"
else :
if frame.bomb_count == 0 :
observation += ",0"
elif frame.bomb_count == 1 :
observation += ",1"
elif frame.bomb_count == 2 :
observation += ",2"
elif frame.bomb_count == 3 :
observation += ",3"
elif frame.bomb_count == 4 :
observation += ",4"
elif frame.bomb_count == 5 :
observation += ",5"
elif frame.bomb_count == 6 :
observation += ",6"
elif frame.bomb_count == 7 :
observation += ",7"
elif frame.bomb_count == 8 :
observation += ",8"
else :
observation += ",10"
return observation.lstrip(",")
#行動価値関数
def update_q_table(self, _q_table, _action, _observation, _next_observation, _reward, _episode):
alpha = 0.2 # 学習率
gamma = 0.99 # 時間割引き率
#_q_tableにデータが存在するか
if len(_q_table[_q_table['observation'] == _observation]) > 0 :
# 行動後の状態で得られる最大行動価値 Q(s',a')
next_max_q_value = 0
if len(_q_table[_q_table['observation'] == _next_observation]) > 0 :
next_max_q_action = max(_q_table[_q_table['observation'] == _next_observation]['action'].values)
next_max_q_value = _q_table[(_q_table['observation'] == _next_observation) & (_q_table['action'] == next_max_q_action)]['score']
# 行動前の状態の行動価値 Q(s,a)
q_value = _q_table[(_q_table['observation'] == _observation) & (_q_table['action'] == _action)]['score']
# 行動価値関数の更新
_q_table.loc[(_q_table['observation'] == _observation) & (_q_table['action'] == _action), 'score'] = q_value + alpha * (_reward + gamma * next_max_q_value - q_value)
else :
# 行動価値観数に新しいデータをセット
new_data = pd.Series([_action, _observation, alpha*_reward], index=_q_table.columns, name=len(_q_table))
_q_table = _q_table.append(new_data)
#print(_q_table)
return _q_table
#グリーディ法
def get_action(self, _env, _q_table, _observation, _episode):
epsilon = 0.002
_action = -1
if np.random.uniform(0, 1) > epsilon and len(_q_table[_q_table['observation'] == _observation]) > 0:
if len(_q_table[_q_table['observation'] == _observation]) < 300 :
action_list = [i for i in range(self.width*self.height)]
remove_list = _q_table[_q_table['observation'] == _observation]['action'].values
for i in remove_list :
action_list.remove(i)
_action = np.random.choice(action_list)
else :
_max_score = max(_q_table[_q_table['observation'] == _observation]['score'].values)
_action = max(_q_table[(_q_table['observation'] == _observation) & (_q_table['score'] == _max_score)]['action'].values)
else:
_action = np.random.choice(range(self.height*self.width))
return int(_action)
OpenAI Gym で自前の環境をつくる
OpenAI Gym 入門
一応自前の環境は作成して、Minesweeperを自動で解かせることには成功した
しかしデータの与え方がまずく、このプログラムで最適解を得るには地球が消滅しているくらいには時間が掛かりそうだ
GitHubにコードを載せておきます
https://github.com/yamauchi5748/Minesweeper/tree/master/Minesweeper