Source code for mushroom_rl.environments.generators.simple_chain

import numpy as np

from mushroom_rl.environments.finite_mdp import FiniteMDP


[docs]def generate_simple_chain(state_n, goal_states, prob, rew, mu=None, gamma=.9, horizon=100): """ Simple chain generator. Args: state_n (int): number of states; goal_states (list): list of goal states; prob (float): probability of success of an action; rew (float): reward obtained in goal states; mu (np.ndarray): initial state probability distribution; gamma (float, .9): discount factor; horizon (int, 100): the horizon. Returns: A FiniteMDP object built with the provided parameters. """ p = compute_probabilities(state_n, prob) r = compute_reward(state_n, goal_states, rew) assert mu is None or len(mu) == state_n return FiniteMDP(p, r, mu, gamma, horizon)
[docs]def compute_probabilities(state_n, prob): """ Compute the transition probability matrix. Args: state_n (int): number of states; prob (float): probability of success of an action. Returns: The transition probability matrix; """ p = np.zeros((state_n, 2, state_n)) for i in range(state_n): if i == 0: p[i, 1, i] = 1. else: p[i, 1, i] = 1. - prob p[i, 1, i - 1] = prob if i == state_n - 1: p[i, 0, i] = 1. else: p[i, 0, i] = 1. - prob p[i, 0, i + 1] = prob return p
[docs]def compute_reward(state_n, goal_states, rew): """ Compute the reward matrix. Args: state_n (int): number of states; goal_states (list): list of goal states; rew (float): reward obtained in goal states. Returns: The reward matrix. """ r = np.zeros((state_n, 2, state_n)) for g in goal_states: if g != 0: r[g - 1, 0, g] = rew if g != state_n - 1: r[g + 1, 1, g] = rew return r