Source code for mushroom_rl.utils.torch

import torch
import numpy as np


[docs]def set_weights(parameters, weights, use_cuda): """ Function used to set the value of a set of torch parameters given a vector of values. Args: parameters (list): list of parameters to be considered; weights (numpy.ndarray): array of the new values for the parameters; use_cuda (bool): whether the parameters are cuda tensors or not; """ idx = 0 for p in parameters: shape = p.data.shape c = 1 for s in shape: c *= s w = np.reshape(weights[idx:idx + c], shape) if not use_cuda: w_tensor = torch.from_numpy(w).type(p.data.dtype) else: w_tensor = torch.from_numpy(w).type(p.data.dtype).cuda() p.data = w_tensor idx += c assert idx == weights.size
[docs]def get_weights(parameters): """ Function used to get the value of a set of torch parameters as a single vector of values. Args: parameters (list): list of parameters to be considered. Returns: A numpy vector consisting of all the values of the vectors. """ weights = list() for p in parameters: w = p.data.detach().cpu().numpy() weights.append(w.flatten()) weights = np.concatenate(weights, 0) return weights
[docs]def zero_grad(parameters): """ Function used to set to zero the value of the gradient of a set of torch parameters. Args: parameters (list): list of parameters to be considered. """ for p in parameters: if p.grad is not None: p.grad.detach_() p.grad.zero_()
[docs]def get_gradient(params): """ Function used to get the value of the gradient of a set of torch parameters. Args: parameters (list): list of parameters to be considered. """ views = [] for p in params: if p.grad is None: view = p.new(p.numel()).zero_() else: view = p.grad.view(-1) views.append(view) return torch.cat(views, 0)
[docs]def to_float_tensor(x, use_cuda=False): """ Function used to convert a numpy array to a float torch tensor. Args: x (np.ndarray): numpy array to be converted as torch tensor; use_cuda (bool): whether to build a cuda tensors or not. Returns: A float tensor build from the values contained in the input array. """ x = torch.tensor(x, dtype=torch.float) return x.cuda() if use_cuda else x
[docs]def to_int_tensor(x, use_cuda=False): """ Function used to convert a numpy array to a float torch tensor. Args: x (np.ndarray): numpy array to be converted as torch tensor; use_cuda (bool): whether to build a cuda tensors or not. Returns: A float tensor build from the values contained in the input array. """ x = torch.tensor(x, dtype=torch.int) return x.cuda() if use_cuda else x
def update_optimizer_parameters(optimizer, new_parameters): if len(optimizer.state) > 0: for p_old, p_new in zip(optimizer.param_groups[0]['params'], new_parameters): data = optimizer.state[p_old] del optimizer.state[p_old] optimizer.state[p_new] = data optimizer.param_groups[0]['params'] = new_parameters
[docs]class CategoricalWrapper(torch.distributions.Categorical): """ Wrapper for the Torch Categorical distribution. Needed to convert a vector of mushroom discrete action in an input with the proper shape of the original distribution implemented in torch """
[docs] def __init__(self, logits): super().__init__(logits=logits)
def log_prob(self, value): return super().log_prob(value.squeeze())
[docs]class DiagonalMultivariateGaussian(torch.distributions.Normal): """ Wrapper for the Torch Normal distribution, implementing a diagonal distribution. It behaves as the MultivariateNormal distribution, but avoids the computation of the full covariance matrix, optimizing the computation time, particulalrly when a high dimensional vector is sampled. """
[docs] def __init__(self, loc, scale): super().__init__(loc=loc, scale=scale)
def log_prob(self, value): return torch.sum(super().log_prob(value), -1)