Source code for mushroom_rl.utils.parameters

from mushroom_rl.utils.table import Table
import numpy as np


[docs]class Parameter(object): """ This class implements function to manage parameters, such as learning rate. It also allows to have a single parameter for each state of state-action tuple. """
[docs] def __init__(self, value, min_value=None, max_value=None, size=(1,)): """ Constructor. Args: value (float): initial value of the parameter; min_value (float, None): minimum value that the parameter can reach when decreasing; max_value (float, None): maximum value that the parameter can reach when increasing; size (tuple, (1,)): shape of the matrix of parameters; this shape can be used to have a single parameter for each state or state-action tuple. """ self._initial_value = value self._min_value = min_value self._max_value = max_value self._n_updates = Table(size)
[docs] def __call__(self, *idx, **kwargs): """ Update and return the parameter in the provided index. Args: *idx (list): index of the parameter to return. Returns: The updated parameter in the provided index. """ if self._n_updates.table.size == 1: idx = list() self.update(*idx, **kwargs) return self.get_value(*idx, **kwargs)
[docs] def get_value(self, *idx, **kwargs): """ Return the current value of the parameter in the provided index. Args: *idx (list): index of the parameter to return. Returns: The current value of the parameter in the provided index. """ new_value = self._compute(*idx, **kwargs) if self._min_value is None and self._max_value is None: return new_value else: return np.clip(new_value, self._min_value, self._max_value)
[docs] def _compute(self, *idx, **kwargs): """ Returns: The value of the parameter in the provided index. """ return self._initial_value
[docs] def update(self, *idx, **kwargs): """ Updates the number of visit of the parameter in the provided index. Args: *idx (list): index of the parameter whose number of visits has to be updated. """ self._n_updates[idx] += 1
@property def shape(self): """ Returns: The shape of the table of parameters. """ return self._n_updates.table.shape
[docs]class LinearParameter(Parameter): """ This class implements a linearly changing parameter according to the number of times it has been used. """
[docs] def __init__(self, value, threshold_value, n, size=(1,)): self._coeff = (threshold_value - value) / n if self._coeff >= 0: super().__init__(value, None, threshold_value, size) else: super().__init__(value, threshold_value, None, size)
[docs] def _compute(self, *idx, **kwargs): return self._coeff * self._n_updates[idx] + self._initial_value
[docs]class ExponentialParameter(Parameter): """ This class implements a exponentially changing parameter according to the number of times it has been used. """
[docs] def __init__(self, value, exp=1., min_value=None, max_value=None, size=(1,)): self._exp = exp super().__init__(value, min_value, max_value, size)
[docs] def _compute(self, *idx, **kwargs): n = np.maximum(self._n_updates[idx], 1) return self._initial_value / n ** self._exp
[docs]class AdaptiveParameter(object): """ This class implements a basic adaptive gradient step. Instead of moving of a step proportional to the gradient, takes a step limited by a given metric. To specify the metric, the natural gradient has to be provided. If natural gradient is not provided, the identity matrix is used. The step rule is: .. math:: \\Delta\\theta=\\underset{\\Delta\\vartheta}{argmax}\\Delta\\vartheta^{t}\\nabla_{\\theta}J s.t.:\\Delta\\vartheta^{T}M\\Delta\\vartheta\\leq\\varepsilon Lecture notes, Neumann G. http://www.ias.informatik.tu-darmstadt.de/uploads/Geri/lecture-notes-constraint.pdf """
[docs] def __init__(self, value): self._eps = value
[docs] def __call__(self, *args, **kwargs): return self.get_value(*args, **kwargs)
def get_value(self, *args, **kwargs): if len(args) == 2: gradient = args[0] nat_gradient = args[1] tmp = (gradient.dot(nat_gradient)).item() lambda_v = np.sqrt(tmp / (4. * self._eps)) # For numerical stability lambda_v = max(lambda_v, 1e-8) step_length = 1. / (2. * lambda_v) return step_length elif len(args) == 1: return self.get_value(args[0], args[0], **kwargs) else: raise ValueError('Adaptive parameters needs gradient or gradient' 'and natural gradient') @property def shape(self): return None