Source code for mushroom_rl.utils.numerical_gradient

import numpy as np


[docs]def numerical_diff_policy(policy, state, action, eps=1e-6):
    """
    Compute the gradient of a policy in (``state``, ``action``) numerically.

    Args:
        policy (Policy): the policy whose gradient has to be returned;
        state (np.ndarray): the state;
        action (np.ndarray): the action;
        eps (float, 1e-6): the value of the perturbation.

    Returns:
        The gradient of the provided policy in (``state``, ``action``)
        computed numerically.

    """
    w_start = policy.get_weights()

    g = np.zeros(policy.weights_size)
    for i in range(len(w_start)):
        perturb = np.zeros(policy.weights_size)
        perturb[i] = eps

        policy.set_weights(w_start - perturb)
        v1 = policy(state, action)

        policy.set_weights(w_start + perturb)
        v2 = policy(state, action)

        g[i] = (v2 - v1) / (2 * eps)

    policy.set_weights(w_start)

    return g


[docs]def numerical_diff_dist(dist, theta, eps=1e-6):
    """
    Compute the gradient of a distribution in ``theta`` numerically.

    Args:
        dist (Distribution): the distribution whose gradient has to be returned;
        theta (np.ndarray): the parametrization where to compute the gradient;
        eps (float, 1e-6): the value of the perturbation.

    Returns:
        The gradient of the provided distribution ``theta`` computed
        numerically.

    """
    rho_start = dist.get_parameters()

    g = np.zeros(dist.parameters_size)
    for i in range(len(rho_start)):
        perturb = np.zeros(dist.parameters_size)
        perturb[i] = eps

        dist.set_parameters(rho_start - perturb)
        v1 = dist(theta)

        dist.set_parameters(rho_start + perturb)
        v2 = dist(theta)

        g[i] = (v2 - v1) / (2 * eps)

    dist.set_parameters(rho_start)

    return g