Source code for mushroom_rl.environments.grid_world

import numpy as np

from mushroom_rl.core import Environment, MDPInfo
from mushroom_rl.utils import spaces
from mushroom_rl.utils.viewer import Viewer


[docs]class AbstractGridWorld(Environment):
    """
    Abstract class to build a grid world.

    """
[docs]    def __init__(self, mdp_info, height, width, start, goal):
        """
        Constructor.

        Args:
            height (int): height of the grid;
            width (int): width of the grid;
            start (tuple): x-y coordinates of the goal;
            goal (tuple): x-y coordinates of the goal.

        """
        assert not np.array_equal(start, goal)

        assert goal[0] < height and goal[1] < width, 'Goal position not suitable for the grid world dimension.'

        self._state = None
        self._height = height
        self._width = width
        self._start = start
        self._goal = goal

        # Visualization
        self._viewer = Viewer(self._width, self._height, 500,
                              self._height * 500 // self._width)

        super().__init__(mdp_info)

[docs]    def reset(self, state=None):
        if state is None:
            state = self.convert_to_int(self._start, self._width)

        self._state = state

        return self._state

[docs]    def step(self, action):
        state = self.convert_to_grid(self._state, self._width)

        new_state, reward, absorbing, info = self._step(state, action)
        self._state = self.convert_to_int(new_state, self._width)

        return self._state, reward, absorbing, info

[docs]    def render(self, record=False):
        for row in range(1, self._height):
            for col in range(1, self._width):
                self._viewer.line(np.array([col, 0]),
                                  np.array([col, self._height]))
                self._viewer.line(np.array([0, row]),
                                  np.array([self._width, row]))

        goal_center = np.array([.5 + self._goal[1],
                                self._height - (.5 + self._goal[0])])
        self._viewer.square(goal_center, 0, 1, (0, 255, 0))

        start_grid = self.convert_to_grid(self._start, self._width)
        start_center = np.array([.5 + start_grid[1],
                                 self._height - (.5 + start_grid[0])])
        self._viewer.square(start_center, 0, 1, (255, 0, 0))

        state_grid = self.convert_to_grid(self._state, self._width)
        state_center = np.array([.5 + state_grid[1],
                                 self._height - (.5 + state_grid[0])])
        self._viewer.circle(state_center, .4, (0, 0, 255))

        frame = self._viewer.get_frame() if record else None

        self._viewer.display(.1)

        return frame

[docs]    def stop(self):
        self._viewer.close()

    def _step(self, state, action):
        raise NotImplementedError('AbstractGridWorld is an abstract class.')

    def _grid_step(self, state, action):
        action = action[0]
        if action == 0:
            if state[0] > 0:
                state[0] -= 1
        elif action == 1:
            if state[0] + 1 < self._height:
                state[0] += 1
        elif action == 2:
            if state[1] > 0:
                state[1] -= 1
        elif action == 3:
            if state[1] + 1 < self._width:
                state[1] += 1

    @staticmethod
    def convert_to_grid(state, width):
        return np.array([state[0] // width, state[0] % width])

    @staticmethod
    def convert_to_int(state, width):
        return np.array([state[0] * width + state[1]])


[docs]class GridWorld(AbstractGridWorld):
    """
    Standard grid world.

    """
[docs]    def __init__(self, height, width, goal, start=(0, 0), dt=0.1):
        """
        Constructor

        Args:
            height (int): height of the grid;
            width (int): width of the grid;
            goal (tuple): 2D coordinates of the goal state;
            start (tuple, (0, 0)): 2D coordinates of the starting state;
            dt (float, 0.1): the control timestep of the environment.

        """
        # MDP properties
        observation_space = spaces.Discrete(height * width)
        action_space = spaces.Discrete(4)
        horizon = 100
        gamma = .9
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt)

        super().__init__(mdp_info, height, width, start, goal)

    def _step(self, state, action):
        self._grid_step(state, action)

        if np.array_equal(state, self._goal):
            reward = 10
            absorbing = True
        else:
            reward = 0
            absorbing = False

        return state, reward, absorbing, {}


[docs]class GridWorldVanHasselt(AbstractGridWorld):
    """
    A variant of the grid world as presented in:
    "Double Q-Learning". Hasselt H. V.. 2010.

    """
[docs]    def __init__(self, height=3, width=3, goal=(0, 2), start=(2, 0), dt=0.1):
        """
        Constructor

        Args:
            height (int, 3): height of the grid;
            width (int, 3): width of the grid;
            goal (tuple, (0, 2)): 2D coordinates of the goal state;
            start (tuple, (2, 0)): 2D coordinates of the starting state;
            dt (float, 0.1): the control timestep of the environment.

        """
        # MDP properties
        observation_space = spaces.Discrete(height * width)
        action_space = spaces.Discrete(4)
        horizon = np.inf
        gamma = .95
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt)

        super().__init__(mdp_info, height, width, start, goal)

    def _step(self, state, action):
        if np.array_equal(state, self._goal):
            reward = 5
            absorbing = True
        else:
            self._grid_step(state, action)

            reward = np.random.choice([-12, 10])
            absorbing = False

        return state, reward, absorbing, {}