Source code for mushroom_rl.algorithms.value.td.r_learning

import numpy as np

from mushroom_rl.algorithms.value.td import TD
from mushroom_rl.utils.table import Table


[docs]class RLearning(TD): """ R-Learning algorithm. "A Reinforcement Learning Method for Maximizing Undiscounted Rewards". Schwartz A.. 1993. """
[docs] def __init__(self, mdp_info, policy, learning_rate, beta): """ Constructor. Args: beta (Parameter): beta coefficient. """ Q = Table(mdp_info.size) self._rho = 0. self.beta = beta self._add_save_attr(_rho='primitive', beta='pickle') super().__init__(mdp_info, policy, Q, learning_rate)
[docs] def _update(self, state, action, reward, next_state, absorbing): q_current = self.Q[state, action] q_next = np.max(self.Q[next_state, :]) if not absorbing else 0. delta = reward - self._rho + q_next - q_current q_new = q_current + self.alpha(state, action) * delta self.Q[state, action] = q_new q_max = np.max(self.Q[state, :]) if q_new == q_max: delta = reward + q_next - q_max - self._rho self._rho += self.beta(state, action) * delta