Source code for mushroom_rl.environments.puddle_world

import numpy as np
from scipy.stats import norm

from mushroom_rl.environments import Environment, MDPInfo
from mushroom_rl.utils.spaces import Discrete, Box
from mushroom_rl.utils.viewer import Viewer


[docs]class PuddleWorld(Environment): """ Puddle world as presented in: "Off-Policy Actor-Critic". Degris T. et al.. 2012. """
[docs] def __init__(self, start=None, goal=None, goal_threshold=.1, noise_step=.025, noise_reward=0, reward_goal=0., thrust=.05, puddle_center=None, puddle_width=None, gamma=.99, horizon=5000): """ Constructor. Args: start (np.array, None): starting position of the agent; goal (np.array, None): goal position; goal_threshold (float, .1): distance threshold of the agent from the goal to consider it reached; noise_step (float, .025): noise in actions; noise_reward (float, 0): standard deviation of gaussian noise in reward; reward_goal (float, 0): reward obtained reaching goal state; thrust (float, .05): distance walked during each action; puddle_center (np.array, None): center of the puddle; puddle_width (np.array, None): width of the puddle; """ # MDP parameters self._start = np.array([.2, .4]) if start is None else start self._goal = np.array([1., 1.]) if goal is None else goal self._goal_threshold = goal_threshold self._noise_step = noise_step self._noise_reward = noise_reward self._reward_goal = reward_goal self._thrust = thrust puddle_center = [[.3, .6], [.4, .5], [.8, .9]] if puddle_center is None else puddle_center self._puddle_center = [np.array(center) for center in puddle_center] puddle_width = [[.1, .03], [.03, .1], [.03, .1]] if puddle_width is None else puddle_width self._puddle_width = [np.array(width) for width in puddle_width] self._actions = [np.zeros(2) for _ in range(5)] for i in range(4): self._actions[i][i // 2] = thrust * (i % 2 * 2 - 1) # MDP properties action_space = Discrete(5) observation_space = Box(0., 1., shape=(2,)) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._pixels = None self._viewer = Viewer(1.0, 1.0) super().__init__(mdp_info)
[docs] def reset(self, state=None): if state is None: self._state = self._start.copy() else: self._state = state return self._state
[docs] def step(self, action): idx = action[0] self._state += self._actions[idx] + np.random.uniform( low=-self._noise_step, high=self._noise_step, size=(2,)) self._state = np.clip(self._state, 0., 1.) absorbing = np.linalg.norm((self._state - self._goal), ord=1) < self._goal_threshold if not absorbing: reward = np.random.randn() * self._noise_reward + self._get_reward( self._state) else: reward = self._reward_goal return self._state, reward, absorbing, {}
def render(self): if self._pixels is None: img_size = 100 pixels = np.zeros((img_size, img_size, 3)) for i in range(img_size): for j in range(img_size): x = i / img_size y = j / img_size pixels[i, img_size - 1 - j] = self._get_reward( np.array([x, y])) pixels -= pixels.min() pixels *= 255. / pixels.max() self._pixels = np.floor(255 - pixels) self._viewer.background_image(self._pixels) self._viewer.circle(self._state, 0.01, color=(0, 255, 0)) goal_area = [ [-self._goal_threshold, 0], [0, self._goal_threshold], [self._goal_threshold, 0], [0, -self._goal_threshold] ] self._viewer.polygon(self._goal, 0, goal_area, color=(255, 0, 0), width=1) self._viewer.display(0.1)
[docs] def stop(self): if self._viewer is not None: self._viewer.close()
def _get_reward(self, state): reward = -1. for cen, wid in zip(self._puddle_center, self._puddle_width): reward -= 2. * norm.pdf(state[0], cen[0], wid[0]) * norm.pdf( state[1], cen[1], wid[1]) return reward