Source code for mushroom_rl.approximators.parametric.cmac

import numpy as np

from mushroom_rl.approximators.parametric import LinearApproximator
from mushroom_rl.features import Features


[docs]class CMAC(LinearApproximator): """ This class implements a Cerebellar Model Arithmetic Computer. """
[docs] def __init__(self, tilings, weights=None, output_shape=(1,), **kwargs): """ Constructor. Args: tilings (list): list of tilings to discretize the input space. weights (np.ndarray): array of weights to initialize the weights of the approximator; input_shape (np.ndarray, None): the shape of the input of the model; output_shape (np.ndarray, (1,)): the shape of the output of the model; **kwargs: other params of the approximator. """ self._phi = Features(tilings=tilings) self._n = len(tilings) super().__init__(weights=weights, input_shape=(self._phi.size,), output_shape=output_shape) self._add_save_attr(_phi='pickle', _n='primitive')
[docs] def fit(self, x, y, alpha=1.0, **kwargs): """ Fit the model. Args: x (np.ndarray): input; y (np.ndarray): target; alpha (float): learning rate; **kwargs: other parameters used by the fit method of the regressor. """ y_hat = self.predict(x) delta_y = np.atleast_2d(y - y_hat) if self._w.shape[0] > 1: delta_y = delta_y.T phi = np.atleast_2d(self._phi(x)) sum_phi = np.sum(phi, axis=0) n = np.sum(phi, axis=1, keepdims=True) phi_n = phi / n sum_phi[sum_phi == 0] = 1. delta_w = delta_y @ phi_n / sum_phi self._w += alpha*delta_w
[docs] def predict(self, x, **predict_params): """ Predict. Args: x (np.ndarray): input; **predict_params: other parameters used by the predict method the regressor. Returns: The predictions of the model. """ prediction = np.ones((x.shape[0], self._w.shape[0])) indexes = self._phi.compute_indexes(x) if x.shape[0] == 1: indexes = list([indexes]) for i, idx in enumerate(indexes): prediction[i] = np.sum(self._w[:, idx], axis=-1) return prediction.squeeze()
[docs] def diff(self, state, action=None): """ Compute the derivative of the output w.r.t. ``state``, and ``action`` if provided. Args: state (np.ndarray): the state; action (np.ndarray, None): the action. Returns: The derivative of the output w.r.t. ``state``, and ``action`` if provided. """ phi = self._phi(state) return super().diff(phi, action)