Source code for mushroom_rl.environments.minigrid_env

import warnings

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    from gym_minigrid.wrappers import RGBImgPartialObsWrapper, ImgObsWrapper

import gym

from copy import deepcopy
from collections import deque

from mushroom_rl.core import Environment, MDPInfo
from mushroom_rl.environments import Gym
from mushroom_rl.rl_utils.spaces import Discrete, Box
from mushroom_rl.utils.frames import LazyFrames, preprocess_frame


[docs]class MiniGrid(Gym):
    """
    Interface for gym_minigrid environments. It makes it possible to
    use all MiniGrid environments that do not use text instructions, such as
    MultiRoom, KeyCorridor, BlockedUnblockPickup, ObstructedMaze.
    This environment uses either MiniGrid's default 7x7x3 observations or their
    pixel 56x56x3 version. In both cases, the state is partially observable.
    To compensate for the partial observability, LazyFrames are used.

    """
[docs]    def __init__(self, name, horizon=None, gamma=0.99, history_length=4,
                 fixed_seed=None, use_pixels=False):
        """
        Constructor.

        Args:
             name (str): name of the environment;
             horizon (int, None): the horizon;
             gamma (float, 0.99): the discount factor;
             history_length (int, 4): number of frames to form a state;
             fixed_seed (int, None): if passed, it fixes the seed of the
                environment at every reset. This way, the environment is fixed
                rather than procedurally generated;
             use_pixels (bool, False): if True, MiniGrid's default 7x7x3
                observations is converted to an image of resolution 56x56x3.

        """
        # MDP creation
        self._not_pybullet = True
        self._first = True

        env = gym.make(name)
        obs_high = 10.
        if use_pixels:
            env = RGBImgPartialObsWrapper(env) # Get pixel observations
            obs_high = 255.
        env = ImgObsWrapper(env) # Get rid of the 'mission' field
        self.env = env

        self._fixed_seed = fixed_seed

        self._img_size = env.observation_space.shape[0:2]
        self._history_length = history_length

        # Get the default horizon
        if horizon is None:
            horizon = self.env.max_steps

        # MDP properties
        action_space = Discrete(self.env.action_space.n)
        observation_space = Box(
            low=0., high=obs_high, shape=(history_length, self._img_size[1], self._img_size[0]))
        self.env.max_steps = horizon + 1 # Hack to ignore gym time limit (do not use np.inf, since MiniGrid returns r(t) = 1 - 0.9t/T)
        dt = 1/self.env.unwrapped.metadata["render_fps"]
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt)

        Environment.__init__(self, mdp_info)

        self._state = None

[docs]    def reset(self, state=None):
        self._state = preprocess_frame(self.env.reset(), self._img_size)
        self._state = deque([deepcopy(
            self._state) for _ in range(self._history_length)],
            maxlen=self._history_length
        )
        return LazyFrames(list(self._state), self._history_length), {}

[docs]    def step(self, action):
        obs, reward, absorbing, info = self.env.step(action)
        reward = float(reward)
        if reward > 0:
            reward = 1.  # MiniGrid discounts rewards based on timesteps, but we need raw rewards

        self._state.append(preprocess_frame(obs, self._img_size))

        return LazyFrames(list(self._state),
                          self._history_length), reward, absorbing, info