Source code for mushroom_rl.policy.noise_policy

import numpy as np

from .policy import ParametricPolicy

[docs]class OrnsteinUhlenbeckPolicy(ParametricPolicy): """ Ornstein-Uhlenbeck process as implemented in: This policy is commonly used in the Deep Deterministic Policy Gradient algorithm. """
[docs] def __init__(self, mu, sigma, theta, dt, x0=None): """ Constructor. Args: mu (Regressor): the regressor representing the mean w.r.t. the state; sigma (np.ndarray): average magnitude of the random flactations per square-root time; theta (float): rate of mean reversion; dt (float): time interval; x0 (np.ndarray, None): initial values of noise. """ self._approximator = mu self._sigma = sigma self._theta = theta self._dt = dt self._x0 = x0 self._x_prev = None self.reset() self._add_save_attr( _approximator='mushroom', _sigma='numpy', _theta='primitive', _dt='primitive', _x0='numpy', _x_prev='numpy' )
[docs] def __call__(self, state, action): raise NotImplementedError
[docs] def draw_action(self, state): mu = self._approximator.predict(state) x = self._x_prev - self._theta * self._x_prev * self._dt +\ self._sigma * np.sqrt(self._dt) * np.random.normal( size=self._approximator.output_shape ) self._x_prev = x return mu + x
[docs] def set_weights(self, weights): self._approximator.set_weights(weights)
[docs] def get_weights(self): return self._approximator.get_weights()
@property def weights_size(self): return self._approximator.weights_size
[docs] def reset(self): self._x_prev = self._x0 if self._x0 is not None else np.zeros(self._approximator.output_shape)