Source code for mushroom_rl.rl_utils.parameters

import numpy as np

from mushroom_rl.core.serialization import Serializable
from mushroom_rl.approximators.table import Table


def to_parameter(x):
    if isinstance(x, Parameter):
        return x
    else:
        return Parameter(x)


[docs]class Parameter(Serializable): """ This class implements function to manage parameters, such as learning rate. It also allows to have a single parameter for each state of state-action tuple. """
[docs] def __init__(self, value, min_value=None, max_value=None, size=(1,)): """ Constructor. Args: value (float): initial value of the parameter; min_value (float, None): minimum value that the parameter can reach when decreasing; max_value (float, None): maximum value that the parameter can reach when increasing; size (tuple, (1,)): shape of the matrix of parameters; this shape can be used to have a single parameter for each state or state-action tuple. """ self._initial_value = value self._min_value = min_value self._max_value = max_value self._n_updates = Table(size) self._add_save_attr( _initial_value='primitive', _min_value='primitive', _max_value='primitive', _n_updates='mushroom', )
[docs] def __call__(self, *idx, **kwargs): """ Update and return the parameter in the provided index. Args: *idx (list): index of the parameter to return. Returns: The updated parameter in the provided index. """ if self._n_updates.table.size == 1: idx = list() self.update(*idx, **kwargs) return self.get_value(*idx, **kwargs)
[docs] def get_value(self, *idx, **kwargs): """ Return the current value of the parameter in the provided index. Args: *idx (list): index of the parameter to return. Returns: The current value of the parameter in the provided index. """ new_value = self._compute(*idx, **kwargs) if self._min_value is None and self._max_value is None: return new_value else: return np.clip(new_value, self._min_value, self._max_value)
[docs] def _compute(self, *idx, **kwargs): """ Returns: The value of the parameter in the provided index. """ return self._initial_value
[docs] def update(self, *idx, **kwargs): """ Updates the number of visit of the parameter in the provided index. Args: *idx (list): index of the parameter whose number of visits has to be updated. """ self._n_updates[idx] += 1
@property def shape(self): """ Returns: The shape of the table of parameters. """ return self._n_updates.table.shape @property def initial_value(self): """ Returns: The initial value of the parameters. """ return self._initial_value
[docs]class LinearParameter(Parameter): r""" This class implements a linearly changing parameter according to the number of times it has been used. The parameter changes following the formula: .. math:: v_n = \textrm{clip}(v_0 + \dfrac{v_{th} - v_0}{n}, v_{th}) where :math:`v_0` is the initial value of the parameter, :math:`n` is the number of steps and :math:`v_{th}` is the upper or lower threshold for the parameter. """
[docs] def __init__(self, value, threshold_value, n, size=(1,)): """ Constructor. Args: value (float): initial value of the parameter; threshold_value (float, None): minimum or maximum value that the parameter can reach; n (int): number of time steps needed to reach the threshold value; size (tuple, (1,)): shape of the matrix of parameters; this shape can be used to have a single parameter for each state or state-action tuple. """ self._coeff = (threshold_value - value) / n if self._coeff >= 0: super().__init__(value, None, threshold_value, size) else: super().__init__(value, threshold_value, None, size) self._add_save_attr(_coeff='primitive')
[docs] def _compute(self, *idx, **kwargs): return self._coeff * self._n_updates[idx] + self._initial_value
[docs]class DecayParameter(Parameter): r""" This class implements a decaying parameter. The decay follows the formula: .. math:: v_n = \dfrac{v_0}{n^p} where :math:`v_0` is the initial value of the parameter, :math:`n` is the number of steps and :math:`p` is an arbitrary exponent. """
[docs] def __init__(self, value, exp=1., min_value=None, max_value=None, size=(1,)): """ Constructor. Args: value (float): initial value of the parameter; exp (float, 1.): exponent for the step decay; min_value (float, None): minimum value that the parameter can reach when decreasing; max_value (float, None): maximum value that the parameter can reach when increasing; size (tuple, (1,)): shape of the matrix of parameters; this shape can be used to have a single parameter for each state or state-action tuple. """ self._exp = exp super().__init__(value, min_value, max_value, size) self._add_save_attr(_exp='primitive')
[docs] def _compute(self, *idx, **kwargs): n = np.maximum(self._n_updates[idx], 1) return self._initial_value / n ** self._exp