import numpy as np
[docs]class MDPInfo:
"""
This class is used to store the information of the environment.
"""
[docs] def __init__(self, observation_space, action_space, gamma, horizon):
"""
Constructor.
Args:
observation_space ([Box, Discrete]): the state space;
action_space ([Box, Discrete]): the action space;
gamma (float): the discount factor;
horizon (int): the horizon.
"""
self.observation_space = observation_space
self.action_space = action_space
self.gamma = gamma
self.horizon = horizon
@property
def size(self):
"""
Returns:
The sum of the number of discrete states and discrete actions. Only
works for discrete spaces.
"""
return self.observation_space.size + self.action_space.size
@property
def shape(self):
"""
Returns:
The concatenation of the shape tuple of the state and action
spaces.
"""
return self.observation_space.shape + self.action_space.shape
[docs]class Environment(object):
"""
Basic interface used by any mushroom environment.
"""
[docs] def __init__(self, mdp_info):
"""
Constructor.
Args:
mdp_info (MDPInfo): an object containing the info of the
environment.
"""
self._mdp_info = mdp_info
[docs] def seed(self, seed):
"""
Set the seed of the environment.
Args:
seed (float): the value of the seed.
"""
if hasattr(self, 'env'):
self.env.seed(seed)
else:
raise NotImplementedError
[docs] def reset(self, state=None):
"""
Reset the current state.
Args:
state (np.ndarray, None): the state to set to the current state.
Returns:
The current state.
"""
raise NotImplementedError
[docs] def step(self, action):
"""
Move the agent from its current state according to the action.
Args:
action (np.ndarray): the action to execute.
Returns:
The state reached by the agent executing ``action`` in its current
state, the reward obtained in the transition and a flag to signal
if the next state is absorbing. Also an additional dictionary is
returned (possibly empty).
"""
raise NotImplementedError
def render(self):
raise NotImplementedError
[docs] def stop(self):
"""
Method used to stop an mdp. Useful when dealing with real world
environments, simulators, or when using openai-gym rendering
"""
pass
@property
def info(self):
"""
Returns:
An object containing the info of the environment.
"""
return self._mdp_info
[docs] @staticmethod
def _bound(x, min_value, max_value):
"""
Method used to bound state and action variables.
Args:
x: the variable to bound;
min_value: the minimum value;
max_value: the maximum value;
Returns:
The bounded variable.
"""
return np.maximum(min_value, np.minimum(x, max_value))