Source code for mushroom_rl.environments.mujoco_envs.ball_in_a_cup

from mushroom_rl.environments.mujoco import MuJoCo, ObservationType
import numpy as np
from pathlib import Path
from copy import deepcopy

import mujoco


[docs]class BallInACup(MuJoCo): """ Mujoco simulation of Ball In A Cup task, using Barret WAM robot. """
[docs] def __init__(self): """ Constructor. """ xml_path = (Path(__file__).resolve().parent / "data" / "ball_in_a_cup" / "model.xml").as_posix() action_spec = ["act/wam/base_yaw_joint", "act/wam/shoulder_pitch_joint", "act/wam/shoulder_yaw_joint", "act/wam/elbow_pitch_joint", "act/wam/wrist_yaw_joint", "act/wam/wrist_pitch_joint", "act/wam/palm_yaw_joint"] observation_spec = [("base_yaw_pos", "wam/base_yaw_joint", ObservationType.JOINT_POS), ("base_yaw_vel", "wam/base_yaw_joint", ObservationType.JOINT_VEL), ("shoulder_pitch_pos", "wam/shoulder_pitch_joint", ObservationType.JOINT_POS), ("shoulder_pitch_vel", "wam/shoulder_pitch_joint", ObservationType.JOINT_VEL), ("shoulder_yaw_pos", "wam/shoulder_yaw_joint", ObservationType.JOINT_POS), ("shoulder_yaw_vel", "wam/shoulder_yaw_joint", ObservationType.JOINT_VEL), ("elbow_pitch_pos", "wam/elbow_pitch_joint", ObservationType.JOINT_POS), ("elbow_pitch_vel", "wam/elbow_pitch_joint", ObservationType.JOINT_VEL), ("wrist_yaw_pos", "wam/wrist_yaw_joint", ObservationType.JOINT_POS), ("wrist_yaw_vel", "wam/wrist_yaw_joint", ObservationType.JOINT_VEL), ("wrist_pitch_pos", "wam/wrist_pitch_joint", ObservationType.JOINT_POS), ("wrist_pitch_vel", "wam/wrist_pitch_joint", ObservationType.JOINT_VEL), ("palm_yaw_pos", "wam/palm_yaw_joint", ObservationType.JOINT_POS), ("palm_yaw_vel", "wam/palm_yaw_joint", ObservationType.JOINT_VEL), ("ball_pos", "ball", ObservationType.BODY_POS), ("ball_vel", "ball", ObservationType.BODY_VEL)] additional_data_spec = [("ball_pos", "ball", ObservationType.BODY_POS), ("goal_pos", "cup_goal_final", ObservationType.SITE_POS)] collision_groups = [("ball", ["ball_geom"]), ("robot", ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom", "wrist_pitch_link_convex_decomposition_p1_geom", "wrist_pitch_link_convex_decomposition_p2_geom", "wrist_pitch_link_convex_decomposition_p3_geom", "wrist_yaw_link_convex_decomposition_p1_geom", "wrist_yaw_link_convex_decomposition_p2_geom", "forearm_link_convex_decomposition_p1_geom", "forearm_link_convex_decomposition_p2_geom"])] super().__init__(xml_path, action_spec, observation_spec, 0.9999, 2000, n_intermediate_steps=4, additional_data_spec=additional_data_spec, collision_groups=collision_groups) self.init_robot_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57]) self.p_gains = np.array([200, 400, 100, 100, 10, 10, 5]) self.d_gains = np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05]) self._reset_state = None
[docs] def reward(self, cur_obs, action, obs, absorbing): dist = self._read_data("goal_pos") - self._read_data("ball_pos") return 1. if np.linalg.norm(dist) < 0.05 else 0.
[docs] def is_absorbing(self, state): dist = self._read_data("goal_pos") - self._read_data("ball_pos") return np.linalg.norm(dist) < 0.05 or self._check_collision("ball", "robot")
[docs] def setup(self, obs): if self._reset_state is None: # Copy the initial position after the reset init_pos = self._data.qpos[0:7].copy() # Move the system towards the initial position using a PD-Controller for i in range(0, 5000): cur_pos = self._data.qpos[0:7].copy() cur_vel = self._data.qvel[0:7].copy() target = self.linear_movement(init_pos, self.init_robot_pos, 250, i) trq = self.p_gains * (target - cur_pos) + self.d_gains * ( np.zeros_like(self.init_robot_pos) - cur_vel) self._data.ctrl[self._action_indices] = self._data.qfrc_bias[:7] + trq mujoco.mj_step(self._model, self._data) self._reset_state = deepcopy(self._data) else: self._data = deepcopy(self._reset_state)
@staticmethod def linear_movement(start, end, n_steps, i): t = np.minimum(1., float(i) / float(n_steps)) return start + (end - start) * t