LoginSignup
0
0

More than 5 years have passed since last update.

windowsでTensorFlow その10

Posted at

概要

windowsのTensorFlowの環境で、OpenAiやってみた。
新しい環境、作って見た。

写真
ss.jpg

環境
windows 7 sp1 64bit
anaconda3
tensorflow 1.0
pyqt5
OpenAi Gym 0.5

サンプルコード

落ちてくる果物を籠で受け取る。
フルーツバスケット

class FBEnvironment(core.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second' : 1
    }
    def __init__(self):
        self.viewer = None
        self.state = None
        self.action_space = spaces.Discrete(3)
        high = np.array([1.0, 1.0])
        self.observation_space = gym.spaces.Box(low = -high, high = high)
        self._seed()
        self.x = 0
        self.y = 0
        self.z = 2
    def _get_ob(self):
        ban = np.zeros((10, 10))
        ban[self.x, self.y] = 1
        ban[9, self.z - 1] = 1
        ban[9, self.z] = 1
        ban[9, self.z + 1] = 1
        return np.reshape(ban, (-1, 100))
    def _seed(self, seed = None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def _terminal(self):
        a = False
        if (self.y == 9):
            if (abs(self.x - self.z) <= 1):
                a = False
            else:
                a = True
        return a
    def _reset(self):
        self.x = random.randrange(0, 9)
        self.y = 0
        self.z = 4
        return self._get_ob()
    def _step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
        if (action == 1):
            acton = -1
        elif (action == 2):
            acton = 1
        else:
            acton = 0
        self.z = min(max(1, self.z + acton), 8)
        self.y += 1
        terminal = self._terminal()
        reward = 0.0
        if (self.y == 9):
            if (abs(self.x - self.z) <= 1):
                reward = 1.0
                self.x = random.randrange(0, 9)
                self.y = 0
            else:
                reward = -1.0
        return (self._get_ob(), reward, terminal, {})
    def _render(self, mode = 'human', close = False):
        time.sleep(0.2)
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        from gym.envs.classic_control import rendering
        if self.viewer is None:
            self.viewer = rendering.Viewer(500, 500)
            self.viewer.set_bounds(-2.5, 2.5, -2.5, 2.5)
        x = -1.8 + self.x * 0.4
        y = 2.0 - self.y * 0.4
        z = -1.8 + self.z * 0.4
        transform0 = rendering.Transform(translation = (x, y))
        transform1 = rendering.Transform(translation = (z, -1.8))
        self.viewer.draw_circle(0.2, 20, color = (1, 1, 0)).add_attr(transform0)
        self.viewer.draw_line((-2.0, 2.0), (2.0, 2.0), color = (0, 0, 0))
        self.viewer.draw_line((-2.0, 2.0), (-2.0, -2.0), color = (0, 0, 0))
        self.viewer.draw_line((2.0, 2.0), (2.0, -2.0), color = (0, 0, 0))
        self.viewer.draw_line((-2.0, -2.0), (2.0, -2.0), color = (0, 0, 0))
        self.viewer.draw_polygon([(-0.6, -0.2), (0.6, -0.2), (0.6, 0.2), (-0.6, 0.2)], color = (0, 1, 0)).add_attr(transform1)
        return self.viewer.render(return_rgb_array = mode == 'rgb_array')
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0