Source code for mushroom_rl.solvers.car_on_hill

[docs]def step(mdp, state, action): """ Perform a step in the tree. Args: mdp (CarOnHill): the Car-On-Hill environment; state (np.array): the state; action (np.array): the action. Returns: The resulting transition executing ``action`` in ``state``. """ mdp.reset(state) return mdp.step(action)
[docs]def bfs(mdp, frontier, k, max_k): """ Perform Breadth-First tree search. Args: mdp (CarOnHill): the Car-On-Hill environment; frontier (list): the state at the frontier of the BFS; k (int): the current depth of the tree; max_k (int): maximum depth to consider. Returns: A tuple containing a flag for the algorithm ending, and the updated depth of the tree. """ if len(frontier) == 0 or k == max_k: return False, k new_frontier = list() for f in frontier: s, r, _, _ = step(mdp, f, [0]) if r == 1: return True, k elif r == 0: new_frontier.append(s) s, r, _, _ = step(mdp, f, [1]) if r == 1: return True, k elif r == 0: new_frontier.append(s) return bfs(mdp, new_frontier, k + 1, max_k)
[docs]def solve_car_on_hill(mdp, states, actions, gamma, max_k=50): """ Solver of the Car-On-Hill environment. Args: mdp (CarOnHill): the Car-On-Hill environment; states (np.ndarray): the states; actions (np.ndarray): the actions; gamma (float): the discount factor; max_k (int, 50): maximum depth to consider. Returns: The Q-value for each ``state``-``action`` tuple. """ q = list() for s, a in zip(states, actions): mdp.reset(s) state, reward, _, _ = mdp.step(a) if reward == 1: k = 1 success = True elif reward == -1: k = 1 success = False else: success, k = bfs(mdp, [state], 2, max_k) if success: q.append(gamma ** (k - 1)) else: q.append(-gamma ** (k - 1)) return q