Source code for mushroom_rl.utils.numerical_gradient

import numpy as np


[docs]def numerical_diff_policy(policy, state, action, eps=1e-6): """ Compute the gradient of a policy in (``state``, ``action``) numerically. Args: policy (Policy): the policy whose gradient has to be returned; state (np.ndarray): the state; action (np.ndarray): the action; eps (float, 1e-6): the value of the perturbation. Returns: The gradient of the provided policy in (``state``, ``action``) computed numerically. """ w_start = policy.get_weights() g = np.zeros(policy.weights_size) for i in range(len(w_start)): perturb = np.zeros(policy.weights_size) perturb[i] = eps policy.set_weights(w_start - perturb) v1 = policy(state, action) policy.set_weights(w_start + perturb) v2 = policy(state, action) g[i] = (v2 - v1) / (2 * eps) policy.set_weights(w_start) return g
[docs]def numerical_diff_dist(dist, theta, eps=1e-6): """ Compute the gradient of a distribution in ``theta`` numerically. Args: dist (Distribution): the distribution whose gradient has to be returned; theta (np.ndarray): the parametrization where to compute the gradient; eps (float, 1e-6): the value of the perturbation. Returns: The gradient of the provided distribution ``theta`` computed numerically. """ rho_start = dist.get_parameters() g = np.zeros(dist.parameters_size) for i in range(len(rho_start)): perturb = np.zeros(dist.parameters_size) perturb[i] = eps dist.set_parameters(rho_start - perturb) v1 = dist(theta) dist.set_parameters(rho_start + perturb) v2 = dist(theta) g[i] = (v2 - v1) / (2 * eps) dist.set_parameters(rho_start) return g
[docs]def numerical_diff_function(function, params, eps=1e-6): """ Compute the gradient of a function in ``theta`` numerically. Args: function: a function whose gradient has to be returned; params: parameter vector w.r.t. we need to compute the gradient; eps (float, 1e-6): the value of the perturbation. Returns: The numerical gradient of the function computed w.r.t. parameters ``params``. """ g = np.zeros_like(params) n_params = len(params) for i in range(n_params): perturb = np.zeros(n_params) perturb[i] = eps v1 = function(params - perturb) v2 = function(params + perturb) g[i] = (v2 - v1) / (2 * eps) return g