From a30b5e904cf2c12cc4eb263a59c5e971cd3a2764 Mon Sep 17 00:00:00 2001 From: Alexis DUBURCQ Date: Thu, 19 Dec 2024 20:58:59 +0100 Subject: [PATCH] [gym_jiminy/common] Various minor improvements. (#849) * [core] Properly handle undefined motor velocity bounds. * [core] Rename 'positionLimit(Min|Max)' in 'positionLimit(Lower|Upper)' for consistency with Pinocchio. * [gym_jiminy/common] More robust random seed initialization. * [gym_jiminy/common] Add option to avoid clipping target motor accelerations of PD controller. * [gym_jiminy/common] Do not overwrite 'BaseJiminyEnv.(observation|action)' if already defined. * [gym_jiminy/common] Support additional 'special' string when loading pipeline from toml. * [gym_jiminy/common] 'ConvexHull2D.get_distance_to_point' now supports single query. * [gym_jiminy/common] Try initializing quantities even if no simulation is running. * [gym_jiminy/common] Clear exception message if nested key is missing. * [gym_jiminy/common] Move leaky twist from mahony filter to body observer. * [gym_jiminy/common] Add target joint velocity deadband to PD controller. * [gym_jiminy/common] Fix 'ControlledJiminyEnv.action' not reset during setup. * [gym_jiminy/common] Increase simulation timeout delay. --- core/include/jiminy/core/robot/model.h | 12 +- core/src/engine/engine.cc | 26 ++-- core/src/hardware/basic_motors.cc | 19 ++- core/src/robot/model.cc | 54 +++---- core/unit/engine_sanity_check.cc | 4 +- .../gym_jiminy/common/bases/pipeline.py | 21 ++- .../gym_jiminy/common/bases/quantities.py | 9 +- .../blocks/body_orientation_observer.py | 109 ++++++++++++-- .../gym_jiminy/common/blocks/mahony_filter.py | 133 +++--------------- .../proportional_derivative_controller.py | 116 ++++++++------- .../common/gym_jiminy/common/envs/generic.py | 24 ++-- .../gym_jiminy/common/quantities/generic.py | 2 +- .../common/gym_jiminy/common/utils/misc.py | 2 +- .../gym_jiminy/common/utils/pipeline.py | 28 ++-- .../common/wrappers/observation_layout.py | 5 +- python/gym_jiminy/examples/tutorial_rl.ipynb | 4 +- .../toolbox/gym_jiminy/toolbox/math/qhull.py | 22 +-- .../unit_py/test_deformation_estimator.py | 6 +- .../unit_py/test_pipeline_control.py | 41 ++++-- .../unit_py/test_training_toys_models.py | 2 +- python/jiminy_py/unit_py/test_dense_pole.py | 4 +- 21 files changed, 362 insertions(+), 281 deletions(-) diff --git a/core/include/jiminy/core/robot/model.h b/core/include/jiminy/core/robot/model.h index 97cea4cfed..bed17ae80c 100644 --- a/core/include/jiminy/core/robot/model.h +++ b/core/include/jiminy/core/robot/model.h @@ -137,8 +137,8 @@ namespace jiminy { GenericConfig config; config["positionLimitFromUrdf"] = true; - config["positionLimitMin"] = Eigen::VectorXd{}; - config["positionLimitMax"] = Eigen::VectorXd{}; + config["positionLimitLower"] = Eigen::VectorXd{}; + config["positionLimitUpper"] = Eigen::VectorXd{}; return config; }; @@ -181,13 +181,13 @@ namespace jiminy { const bool positionLimitFromUrdf; /// \brief Min position limit of all the mechanical joints of the theoretical model. - const Eigen::VectorXd positionLimitMin; - const Eigen::VectorXd positionLimitMax; + const Eigen::VectorXd positionLimitLower; + const Eigen::VectorXd positionLimitUpper; JointOptions(const GenericConfig & options) : positionLimitFromUrdf{boost::get(options.at("positionLimitFromUrdf"))}, - positionLimitMin{boost::get(options.at("positionLimitMin"))}, - positionLimitMax{boost::get(options.at("positionLimitMax"))} + positionLimitLower{boost::get(options.at("positionLimitLower"))}, + positionLimitUpper{boost::get(options.at("positionLimitUpper"))} { } }; diff --git a/core/src/engine/engine.cc b/core/src/engine/engine.cc index 22a454e3f5..17352e03f7 100644 --- a/core/src/engine/engine.cc +++ b/core/src/engine/engine.cc @@ -3238,8 +3238,8 @@ namespace jiminy { typedef boost::fusion::vector< const Eigen::VectorXd & /* q */, - const Eigen::VectorXd & /* positionLimitMin */, - const Eigen::VectorXd & /* positionLimitMax */, + const Eigen::VectorXd & /* positionLimitLower */, + const Eigen::VectorXd & /* positionLimitUpper */, const std::unique_ptr & /* engineOptions */, const std::shared_ptr & /* constraint */> ArgsType; @@ -3252,16 +3252,16 @@ namespace jiminy void> algo(const pinocchio::JointModelBase & joint, const Eigen::VectorXd & q, - const Eigen::VectorXd & positionLimitMin, - const Eigen::VectorXd & positionLimitMax, + const Eigen::VectorXd & positionLimitLower, + const Eigen::VectorXd & positionLimitUpper, const std::unique_ptr & engineOptions, const std::shared_ptr & constraint) { // Define some proxies for convenience const Eigen::Index positionIndex = joint.idx_q(); const double qJoint = q[positionIndex]; - const double qJointMin = positionLimitMin[positionIndex]; - const double qJointMax = positionLimitMax[positionIndex]; + const double qJointMin = positionLimitLower[positionIndex]; + const double qJointMax = positionLimitUpper[positionIndex]; const double transitionEps = engineOptions->contacts.transitionEps; // Check if out-of-bounds @@ -3287,8 +3287,8 @@ namespace jiminy void> algo(const pinocchio::JointModelBase & /* joint */, const Eigen::VectorXd & /* q */, - const Eigen::VectorXd & /* positionLimitMin */, - const Eigen::VectorXd & /* positionLimitMax */, + const Eigen::VectorXd & /* positionLimitLower */, + const Eigen::VectorXd & /* positionLimitUpper */, const std::unique_ptr & /* engineOptions */, const std::shared_ptr & constraint) { @@ -3307,8 +3307,8 @@ namespace jiminy void> algo(const pinocchio::JointModelBase & /* joint */, const Eigen::VectorXd & /* q */, - const Eigen::VectorXd & /* positionLimitMin */, - const Eigen::VectorXd & /* positionLimitMax */, + const Eigen::VectorXd & /* positionLimitLower */, + const Eigen::VectorXd & /* positionLimitUpper */, const std::unique_ptr & /* engineOptions */, const std::shared_ptr & constraint) { @@ -3332,8 +3332,8 @@ namespace jiminy /* Enforce position limits for all joints having bounds constraints, ie mechanical and backlash joints. */ - const Eigen::VectorXd & positionLimitMin = robot->pinocchioModel_.lowerPositionLimit; - const Eigen::VectorXd & positionLimitMax = robot->pinocchioModel_.upperPositionLimit; + const Eigen::VectorXd & positionLimitLower = robot->pinocchioModel_.lowerPositionLimit; + const Eigen::VectorXd & positionLimitUpper = robot->pinocchioModel_.upperPositionLimit; for (auto & constraintItem : constraints.boundJoints) { auto & constraint = constraintItem.second; @@ -3342,7 +3342,7 @@ namespace jiminy computePositionLimitsForcesAlgo::run( model.joints[jointIndex], typename computePositionLimitsForcesAlgo::ArgsType( - q, positionLimitMin, positionLimitMax, engineOptions_, constraint)); + q, positionLimitLower, positionLimitUpper, engineOptions_, constraint)); } // Compute the flexibilities (only support `JointModelType::SPHERICAL` so far) diff --git a/core/src/hardware/basic_motors.cc b/core/src/hardware/basic_motors.cc index b96d937ef0..637e460e0b 100644 --- a/core/src/hardware/basic_motors.cc +++ b/core/src/hardware/basic_motors.cc @@ -40,6 +40,10 @@ namespace jiminy { // Check if the friction parameters make sense // Make sure the user-defined position limit has the right dimension + if (boost::get(motorOptions.at("velocityEffortInvSlope")) < 0.0) + { + JIMINY_THROW(std::invalid_argument, "'velocityEffortInvSlope' must be positive."); + } if (boost::get(motorOptions.at("frictionViscousPositive")) > 0.0) { JIMINY_THROW(std::invalid_argument, "'frictionViscousPositive' must be negative."); @@ -104,12 +108,15 @@ namespace jiminy effortMax = effortLimit_; if (motorOptions_->enableVelocityLimit) { - const double velocityThr = std::max( - velocityLimit_ - effortLimit_ * motorOptions_->velocityEffortInvSlope, 0.0); - effortMin *= std::clamp( - (velocityLimit_ + vMotor) / (velocityLimit_ - velocityThr), 0.0, 1.0); - effortMax *= std::clamp( - (velocityLimit_ - vMotor) / (velocityLimit_ - velocityThr), 0.0, 1.0); + const double velocityDelta = effortLimit_ * motorOptions_->velocityEffortInvSlope; + if (velocityDelta > 0.0) + { + const double velocityThr = std::max(velocityLimit_ - velocityDelta, 0.0); + effortMin *= std::clamp( + (velocityLimit_ + vMotor) / (velocityLimit_ - velocityThr), 0.0, 1.0); + effortMax *= std::clamp( + (velocityLimit_ - vMotor) / (velocityLimit_ - velocityThr), 0.0, 1.0); + } } } uMotor = std::clamp(command, effortMin, effortMax); diff --git a/core/src/robot/model.cc b/core/src/robot/model.cc index 694d995406..586adb5bfb 100644 --- a/core/src/robot/model.cc +++ b/core/src/robot/model.cc @@ -1368,8 +1368,8 @@ namespace jiminy } // Re-initialize position limits - Eigen::VectorXd positionLimitMin = Eigen::VectorXd::Constant(pinocchioModel_.nq, -INF); - Eigen::VectorXd positionLimitMax = Eigen::VectorXd::Constant(pinocchioModel_.nq, +INF); + Eigen::VectorXd positionLimitLower = Eigen::VectorXd::Constant(pinocchioModel_.nq, -INF); + Eigen::VectorXd positionLimitUpper = Eigen::VectorXd::Constant(pinocchioModel_.nq, +INF); Eigen::Index idx_q, nq; for (const auto & joint : pinocchioModel_.joints) { @@ -1392,16 +1392,16 @@ namespace jiminy default: continue; } - positionLimitMin.segment(idx_q, nq).setConstant(-1.0 - EPS); - positionLimitMax.segment(idx_q, nq).setConstant(+1.0 + EPS); + positionLimitLower.segment(idx_q, nq).setConstant(-1.0 - EPS); + positionLimitUpper.segment(idx_q, nq).setConstant(+1.0 + EPS); } // Copy backlash joint position limits as-is from extended model for (pinocchio::JointIndex jointIndex : backlashJointIndices_) { const Eigen::Index positionIndex = pinocchioModel_.idx_qs[jointIndex]; - positionLimitMin[positionIndex] = pinocchioModel_.lowerPositionLimit[positionIndex]; - positionLimitMax[positionIndex] = pinocchioModel_.upperPositionLimit[positionIndex]; + positionLimitLower[positionIndex] = pinocchioModel_.lowerPositionLimit[positionIndex]; + positionLimitUpper[positionIndex] = pinocchioModel_.upperPositionLimit[positionIndex]; } // Set mechanical joint position limits from theoretical model or user options @@ -1418,9 +1418,9 @@ namespace jiminy } const auto & joint = pinocchioModel_.joints[jointIndex]; const auto & jointTh = pinocchioModelTh_.joints[jointIndexTh]; - positionLimitMin.segment(joint.idx_q(), joint.nq()) = + positionLimitLower.segment(joint.idx_q(), joint.nq()) = pinocchioModelTh_.lowerPositionLimit.segment(jointTh.idx_q(), jointTh.nq()); - positionLimitMax.segment(joint.idx_q(), joint.nq()) = + positionLimitUpper.segment(joint.idx_q(), joint.nq()) = pinocchioModelTh_.upperPositionLimit.segment(jointTh.idx_q(), jointTh.nq()); } } @@ -1429,14 +1429,14 @@ namespace jiminy for (std::size_t i = 0; i < mechanicalJointPositionIndices_.size(); ++i) { Eigen::Index positionIndex = mechanicalJointPositionIndices_[i]; - positionLimitMin[positionIndex] = modelOptions_->joints.positionLimitMin[i]; - positionLimitMax[positionIndex] = modelOptions_->joints.positionLimitMax[i]; + positionLimitLower[positionIndex] = modelOptions_->joints.positionLimitLower[i]; + positionLimitUpper[positionIndex] = modelOptions_->joints.positionLimitUpper[i]; } } // Overwrite extended model position limits - pinocchioModel_.lowerPositionLimit = positionLimitMin; - pinocchioModel_.upperPositionLimit = positionLimitMax; + pinocchioModel_.lowerPositionLimit = positionLimitLower; + pinocchioModel_.upperPositionLimit = positionLimitUpper; // Initialize backup joint space acceleration jointSpatialAccelerations_ = @@ -1557,33 +1557,33 @@ namespace jiminy boost::get(jointOptionsHolder.at("positionLimitFromUrdf")); if (!positionLimitFromUrdf) { - const Eigen::VectorXd & jointsPositionLimitMin = - boost::get(jointOptionsHolder.at("positionLimitMin")); + const Eigen::VectorXd & jointspositionLimitLower = + boost::get(jointOptionsHolder.at("positionLimitLower")); if (mechanicalJointPositionIndices_.size() != - static_cast(jointsPositionLimitMin.size())) + static_cast(jointspositionLimitLower.size())) { JIMINY_THROW(std::invalid_argument, - "Wrong vector size for 'positionLimitMin'."); + "Wrong vector size for 'positionLimitLower'."); } - const Eigen::VectorXd & jointsPositionLimitMax = - boost::get(jointOptionsHolder.at("positionLimitMax")); + const Eigen::VectorXd & jointspositionLimitUpper = + boost::get(jointOptionsHolder.at("positionLimitUpper")); if (mechanicalJointPositionIndices_.size() != - static_cast(jointsPositionLimitMax.size())) + static_cast(jointspositionLimitUpper.size())) { JIMINY_THROW(std::invalid_argument, - "Wrong vector size for 'positionLimitMax'."); + "Wrong vector size for 'positionLimitUpper'."); } if (mechanicalJointPositionIndices_.size() == - static_cast(modelOptions_->joints.positionLimitMin.size())) + static_cast(modelOptions_->joints.positionLimitLower.size())) { - auto jointsPositionLimitMinDiff = - jointsPositionLimitMin - modelOptions_->joints.positionLimitMin; + auto jointspositionLimitLowerDiff = + jointspositionLimitLower - modelOptions_->joints.positionLimitLower; internalBuffersMustBeUpdated |= - (jointsPositionLimitMinDiff.array().abs() >= EPS).all(); - auto jointsPositionLimitMaxDiff = - jointsPositionLimitMax - modelOptions_->joints.positionLimitMax; + (jointspositionLimitLowerDiff.array().abs() >= EPS).all(); + auto jointspositionLimitUpperDiff = + jointspositionLimitUpper - modelOptions_->joints.positionLimitUpper; internalBuffersMustBeUpdated |= - (jointsPositionLimitMaxDiff.array().abs() >= EPS).all(); + (jointspositionLimitUpperDiff.array().abs() >= EPS).all(); } else { diff --git a/core/unit/engine_sanity_check.cc b/core/unit/engine_sanity_check.cc index 429f46d757..90a2ce6b90 100755 --- a/core/unit/engine_sanity_check.cc +++ b/core/unit/engine_sanity_check.cc @@ -68,9 +68,9 @@ TEST(EngineSanity, EnergyConservation) GenericConfig & modelOptions = boost::get(robotOptions.at("model")); GenericConfig & jointsOptions = boost::get(modelOptions.at("joints")); boost::get(jointsOptions.at("positionLimitFromUrdf")) = false; - boost::get(jointsOptions.at("positionLimitMin")) = + boost::get(jointsOptions.at("positionLimitLower")) = Eigen::Vector2d::Constant(-INF); - boost::get(jointsOptions.at("positionLimitMax")) = + boost::get(jointsOptions.at("positionLimitUpper")) = Eigen::Vector2d::Constant(+INF); // Disable velocity and torque limits diff --git a/python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py b/python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py index 8e544730a2..cedad3e22d 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py +++ b/python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py @@ -42,6 +42,7 @@ from .blocks import BaseControllerBlock, BaseObserverBlock from ..utils import (DataNested, + fill, zeros, build_copyto, copy, @@ -368,13 +369,22 @@ def reset_hook() -> Optional[InterfaceJiminyEnv]: return env_derived + # Reset base pipeline. + # Note that it is important to reset the base environment first, + # otherwise adding extra layers on top of an existing pipeline would + # affect random number sampling, and therefore mess up with + # repeatability. + obs, info = self.env.reset( + seed=seed, options={"reset_hook": reset_hook}) + # Reset the seed of the action and observation spaces if seed is not None: - self.observation_space.seed(seed) - self.action_space.seed(seed) + obs_seed, act_seed = map(int, self.np_random.integers( + np.iinfo(int).max, size=(2,), dtype=int)) + self.observation_space.seed(obs_seed) + self.action_space.seed(act_seed) - # Reset base pipeline - return self.env.reset(seed=seed, options={"reset_hook": reset_hook}) + return obs, info def step(self, # type: ignore[override] action: Act @@ -1108,6 +1118,9 @@ def _setup(self) -> None: # Call base implementation super()._setup() + # Reset action + fill(self.action, 0) + # Compute the observe and control update periods self.observe_dt = self.env.observe_dt self.control_dt = self.controller.control_dt diff --git a/python/gym_jiminy/common/gym_jiminy/common/bases/quantities.py b/python/gym_jiminy/common/gym_jiminy/common/bases/quantities.py index 8bc14ab330..2c0996546d 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/bases/quantities.py +++ b/python/gym_jiminy/common/gym_jiminy/common/bases/quantities.py @@ -782,8 +782,13 @@ def initialize(self) -> None: # Call base implementation super().initialize() - # Force initializing state quantity - self.state.initialize() + # Force initializing state quantity if possible + try: + self.state.initialize() + except RuntimeError: + # No simulation running. This may be problematic but it is not + # blocking at this point. + pass # Refresh robot proxy assert isinstance(self.state, StateQuantity) diff --git a/python/gym_jiminy/common/gym_jiminy/common/blocks/body_orientation_observer.py b/python/gym_jiminy/common/gym_jiminy/common/blocks/body_orientation_observer.py index ee9785710e..2046e5a546 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/blocks/body_orientation_observer.py +++ b/python/gym_jiminy/common/gym_jiminy/common/blocks/body_orientation_observer.py @@ -3,9 +3,10 @@ """ from collections import OrderedDict from collections.abc import Mapping -from typing import List, Dict, Sequence, Union +from typing import List, Dict, Sequence, Union, Optional import numpy as np +import numba as nb import gymnasium as gym from jiminy_py.core import ImuSensor # pylint: disable=no-name-in-module @@ -13,6 +14,7 @@ from ..bases import BaseAct, BaseObs, BaseObserverBlock, InterfaceJiminyEnv from ..wrappers.observation_layout import NestedData from ..utils import (DataNested, + fill, quat_to_rpy, matrices_to_quat, quat_multiply, @@ -20,10 +22,64 @@ remove_twist_from_quat) +@nb.jit(nopython=True, cache=True) +def update_twist(q: np.ndarray, + twist: np.ndarray, + omega: np.ndarray, + time_constant_inv: float, + dt: float) -> None: + """Update the twist estimate of the Twist-after-Swing decomposition of + given orientations in quaternion representation using leaky integrator. + + :param q: Current swing estimate as a quaternion. It will be updated + in-place to add the estimated twist. + :param twist: Current twist estimate to update in-place. + :param omega: Current angular velocity estimate in local frame. + :param time_constant_inv: Inverse of the time constant of the leaky + integrator used to update the twist estimate. + :param dt: Time step, in seconds, between consecutive Quaternions. + """ + # Compute the derivative of the twist angle: + # The element-wise time-derivative of a quaternion is given by: + # dq = 0.5 * q * Quaternion(axis=gyro, w=0.0) [1] + # The twist around a given axis can be computed as follows: + # p = q_axis.dot(twist_axis) * twist_axis [2] + # twist = pin.Quaternion(np.array((*p, q_w))).normalized() + # The twist angle can be inferred from this expression: + # twist = 2 * atan2(q_axis.dot(twist_axis), q_w) [3] + # The derivative of twist angle can be derived: + # dtwist = 2 * ( + # (dq_axis * q_w - q_axis * dq_w).dot(twist_axis) [4] + # ) / (q_axis.dot(twist_axis) ** 2 + q_w ** 2) + # One can show that if q_s is the swing part of the orientation, then: + # q_s_axis.dot(twist_axis) = 0 + # It yields: + # dtwist = 2 * dq_s_axis.dot(twist_axis) / q_s_w [5] + q_x, q_y, _, q_w = q + dtwist = (- q_y * omega[0] + q_x * omega[1]) / q_w + omega[2] + + # Update twist angle using Leaky Integrator scheme to avoid long-term drift + twist *= max(0.0, 1.0 - time_constant_inv * dt) + twist += dtwist * dt + + # Update quaternion to add estimated twist + p_z, p_w = np.sin(0.5 * twist), np.cos(0.5 * twist) + q[0], q[1], q[2], q[3] = ( + p_w * q_x - p_z * q_y, + p_z * q_x + p_w * q_y, + p_z * q_w, + p_w * q_w) + + class BodyObserver(BaseObserverBlock[ Dict[str, np.ndarray], np.ndarray, BaseObs, BaseAct]): """Compute the orientation and angular velocity in local frame of the parent body associated with all the IMU sensors of the robot. + + .. note:: + The twist angle is not part of the internal state, even if being + integrated over time, because it is uniquely determined from the + orientation estimate. """ def __init__(self, name: str, @@ -33,7 +89,7 @@ def __init__(self, "features", "mahony_filter", "quat"), nested_imu_omega_key: NestedData = ( "features", "mahony_filter", "omega",), - ignore_twist: bool = False, + twist_time_constant: Optional[float] = None, compute_rpy: bool = True, update_ratio: int = 1) -> None: """ @@ -48,8 +104,15 @@ def __init__(self, local frame estimates. Their ordering must be consistent with the true IMU sensors of the robot. - :param ignore_twist: Whether to ignore the twist of the IMU quaternion - estimate. + :param twist_time_constant: + If specified, it corresponds to the time constant of the leaky + integrator (Exponential Moving Average) used to estimate the twist + part of twist-after-swing decomposition of the estimated + orientation in place of the Mahony Filter. If `0.0`, then its is + kept constant equal to zero. `None` to kept the original estimate + provided by Mahony Filter. See `remove_twist_from_quat` and + `update_twist` documentations for details. + Optional: `0.0` by default. :param compute_rpy: Whether to compute the Yaw-Pitch-Roll Euler angles representation for the 3D orientation of the IMU, in addition to the quaternion representation. @@ -60,9 +123,22 @@ def __init__(self, Optional: `1` by default. """ # Backup some of the user-argument(s) - self.ignore_twist = ignore_twist self.compute_rpy = compute_rpy + # Keep track of how the twist must be computed + self.twist_time_constant_inv: Optional[float] + if twist_time_constant is None: + self.twist_time_constant_inv = None + else: + if twist_time_constant > 0.0: + self.twist_time_constant_inv = 1.0 / twist_time_constant + else: + self.twist_time_constant_inv = float("inf") + self._remove_twist = self.twist_time_constant_inv is not None + self._update_twist = ( + self.twist_time_constant_inv is not None and + np.isfinite(self.twist_time_constant_inv)) + # Define observed / estimated IMU data proxies for fast access obs_imu_data_list: List[np.ndarray] = [] for nested_imu_key in (nested_imu_quat_key, nested_imu_omega_key): @@ -96,6 +172,10 @@ def __init__(self, imu_rel_rot_mats.append(frame.placement.rotation) self._imu_rel_quats = matrices_to_quat(tuple(imu_rel_rot_mats)) + # Allocate twist angle estimate around z-axis in world frame. + num_imu_sensors = len(env.robot.sensors[ImuSensor.type]) + self._twist = np.zeros((1, num_imu_sensors)) + # Initialize the observer super().__init__(name, env, update_ratio) @@ -131,6 +211,9 @@ def _setup(self) -> None: # Call base implementation super()._setup() + # Reset the twist estimate + fill(self._twist, 0) + # Fix initialization of the observation to be valid quaternions self._quat[-1] = 1.0 @@ -157,10 +240,6 @@ def refresh_observation(self, measurement: BaseObs) -> None: out=self._quat, is_right_conjugate=True) - # Remove twist if requested - if self.ignore_twist: - remove_twist_from_quat(self._quat) - # Compute the parent body angular velocities in local frame. # Note that batched "quaternion apply" is faster than sequential # "rotation matrix apply" in practice, both when using `np.einsum` or @@ -169,6 +248,18 @@ def refresh_observation(self, measurement: BaseObs) -> None: self._obs_imu_omegas, out=self._omega) + # Remove twist if requested + if self._remove_twist: + remove_twist_from_quat(self._quat) + + # Update twist if requested + if self._update_twist: + update_twist(self._quat, + self._twist, + self._omega, + self.twist_time_constant_inv, + self.observe_dt) + # Compute the RPY representation if requested if self.compute_rpy: quat_to_rpy(self._quat, self._rpy) diff --git a/python/gym_jiminy/common/gym_jiminy/common/blocks/mahony_filter.py b/python/gym_jiminy/common/gym_jiminy/common/blocks/mahony_filter.py index 040d8e6058..92434bd4d9 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/blocks/mahony_filter.py +++ b/python/gym_jiminy/common/gym_jiminy/common/blocks/mahony_filter.py @@ -3,7 +3,7 @@ """ import logging from collections import OrderedDict -from typing import List, Union, Dict, Optional +from typing import List, Sequence, Union, Dict import numpy as np import numba as nb @@ -95,56 +95,6 @@ def mahony_filter(q: np.ndarray, bias_hat -= ki * dt * omega_mes # eq. 32b -@nb.jit(nopython=True, cache=True) -def update_twist(q: np.ndarray, - twist: np.ndarray, - omega: np.ndarray, - time_constant_inv: float, - dt: float) -> None: - """Update the twist estimate of the Twist-after-Swing decomposition of - given orientations in quaternion representation using leaky integrator. - - :param q: Current swing estimate as a quaternion. It will be updated - in-place to add the estimated twist. - :param twist: Current twist estimate to update in-place. - :param omega: Current angular velocity estimate in local frame. - :param time_constant_inv: Inverse of the time constant of the leaky - integrator used to update the twist estimate. - :param dt: Time step, in seconds, between consecutive Quaternions. - """ - # Compute the derivative of the twist angle: - # The element-wise time-derivative of a quaternion is given by: - # dq = 0.5 * q * Quaternion(axis=gyro, w=0.0) [1] - # The twist around a given axis can be computed as follows: - # p = q_axis.dot(twist_axis) * twist_axis [2] - # twist = pin.Quaternion(np.array((*p, q_w))).normalized() - # The twist angle can be inferred from this expression: - # twist = 2 * atan2(q_axis.dot(twist_axis), q_w) [3] - # The derivative of twist angle can be derived: - # dtwist = 2 * ( - # (dq_axis * q_w - q_axis * dq_w).dot(twist_axis) [4] - # ) / (q_axis.dot(twist_axis) ** 2 + q_w ** 2) - # One can show that if q_s is the swing part of the orientation, then: - # q_s_axis.dot(twist_axis) = 0 - # It yields: - # dtwist = 2 * dq_s_axis.dot(twist_axis) / q_s_w [5] - q_x, q_y, _, q_w = q - dtwist = (- q_y * omega[0] + q_x * omega[1]) / q_w + omega[2] - - # Update twist angle using Leaky Integrator scheme to avoid long-term drift - twist *= max(0.0, 1.0 - time_constant_inv * dt) - twist += dtwist * dt - - # Update quaternion to add estimated twist - p_z, p_w = np.sin(0.5 * twist), np.cos(0.5 * twist) - q[0], q[1], q[2], q[3] = ( - p_w * q_x - p_z * q_y, - p_z * q_x + p_w * q_y, - p_z * q_w, - p_w * q_w, - ) - - class MahonyFilter(BaseObserverBlock[ Dict[str, np.ndarray], Dict[str, np.ndarray], BaseObs, BaseAct]): """Mahony's Nonlinear Complementary Filter on SO(3). @@ -183,24 +133,17 @@ def __init__(self, name: str, env: InterfaceJiminyEnv[BaseObs, BaseAct], *, - twist_time_constant: Optional[float] = 0.0, + ignore_twist: bool = False, exact_init: bool = True, - kp: Union[np.ndarray, float] = 1.0, - ki: Union[np.ndarray, float] = 0.1, + kp: Union[np.ndarray, Sequence[float], float] = 1.0, + ki: Union[np.ndarray, Sequence[float], float] = 0.1, compute_rpy: bool = False, update_ratio: int = 1) -> None: """ :param name: Name of the block. :param env: Environment to connect with. - :param twist_time_constant: - If specified, it corresponds to the time constant of the leaky - integrator (Exponential Moving Average) used to estimate the twist - part of twist-after-swing decomposition of the estimated - orientation in place of the Mahony Filter. If `0.0`, then its is - kept constant equal to zero. `None` to kept the original estimate - provided by Mahony Filter. See `remove_twist_from_quat` and - `update_twist` documentations for details. - Optional: `0.0` by default. + :param ignore_twist: Whether to ignore the twist of the IMU quaternion + estimate. :param exact_init: Whether to initialize orientation estimate using accelerometer measurements or ground truth. `False` is not recommended because the robot is often @@ -226,35 +169,22 @@ def __init__(self, # Handling of default argument(s) num_imu_sensors = len(env.robot.sensors[ImuSensor.type]) if isinstance(kp, float): - kp = np.full((num_imu_sensors,), kp) + kp = (kp,) * num_imu_sensors if isinstance(ki, float): - ki = np.full((num_imu_sensors,), ki) + ki = (ki,) * num_imu_sensors # Backup some of the user arguments - self.compute_rpy = compute_rpy + self.ignore_twist = ignore_twist self.exact_init = exact_init - self.kp = kp - self.ki = ki - - # Keep track of how the twist must be computed - self.twist_time_constant_inv: Optional[float] - if twist_time_constant is None: - self.twist_time_constant_inv = None - else: - if twist_time_constant > 0.0: - self.twist_time_constant_inv = 1.0 / twist_time_constant - else: - self.twist_time_constant_inv = float("inf") - self._remove_twist = self.twist_time_constant_inv is not None - self._update_twist = ( - self.twist_time_constant_inv is not None and - np.isfinite(self.twist_time_constant_inv)) + self.kp = np.asarray(kp) + self.ki = np.asarray(ki) + self.compute_rpy = compute_rpy # Whether the observer has been initialized. # This flag must be managed internally because relying on # `self.env.is_simulation_running` is not possible for observer blocks. # Unlike `compute_command`, the simulation is already running when - # `refresh_observation`` is called for the first time of an episode. + # `refresh_observation` is called for the first time of an episode. self._is_initialized = False # Whether the observer has been compiled already. @@ -272,13 +202,8 @@ def __init__(self, # Allocate gyroscope bias estimate self._bias = np.zeros((3, num_imu_sensors)) - # Allocate twist angle estimate around z-axis in world frame - self._twist = np.zeros((1, num_imu_sensors)) - # Define the state of the filter self._state = {"bias": self._bias} - if self._update_twist: - self._state["twist"] = self._twist # Initialize the observer super().__init__(name, env, update_ratio) @@ -294,9 +219,7 @@ def __init__(self, def _initialize_state_space(self) -> None: """Configure the internal state space of the observer. - It corresponds to the current gyroscope bias estimate. The twist angle - is not part of the internal state although being integrated over time - because it is uniquely determined from the orientation estimate. + It corresponds to the current gyroscope bias estimate. """ # Strictly speaking, 'q_prev' is part of the internal state of the # observer since it is involved in its computations. Yet, it is not an @@ -309,11 +232,6 @@ def _initialize_state_space(self) -> None: low=np.full((3, num_imu_sensors), -np.inf), high=np.full((3, num_imu_sensors), np.inf), dtype=np.float64) - if self._update_twist: - state_space["twist"] = gym.spaces.Box( - low=np.full((num_imu_sensors,), -np.inf), - high=np.full((num_imu_sensors,), np.inf), - dtype=np.float64) self.state_space = gym.spaces.Dict(state_space) def _initialize_observation_space(self) -> None: @@ -350,6 +268,9 @@ def _setup(self) -> None: # Call base implementation super()._setup() + # Initialize observation + fill(self.observation, 0) + # Fix initialization of the observation to be valid quaternions self._quat[-1] = 1.0 @@ -361,9 +282,6 @@ def _setup(self) -> None: # Reset the sensor bias estimate fill(self._bias, 0) - # Reset the twist estimate - fill(self._twist, 0) - # Warn if 'observe_dt' is too large to provide a meaningful if self.observe_dt > 0.01 + 1e-6: LOGGER.warning( @@ -373,11 +291,15 @@ def _setup(self) -> None: # Call `refresh_observation` manually to make sure that all the jitted # method of it control flow has been compiled. + # Note that setup must be called once again because compilation will + # mess up with the internal state of the filter. if not self._is_compiled: self._is_initialized = False for _ in range(2): self.refresh_observation(self.env.observation) self._is_compiled = True + self._setup() + return # Consider that the observer is not initialized anymore self._is_initialized = False @@ -432,10 +354,6 @@ def refresh_observation(self, measurement: BaseObs) -> None: # Convert the rotation matrices of the IMUs to quaternions matrices_to_quat(tuple(imu_rots), self._quat) - # Keep track of tilt if necessary - if self._update_twist: - self._twist[:] = np.arctan2(self._quat[2], self._quat[3]) - self._is_initialized = True # Compute the RPY representation if requested @@ -455,16 +373,9 @@ def refresh_observation(self, measurement: BaseObs) -> None: self.observe_dt) # Remove twist if requested - if self._remove_twist: + if self.ignore_twist: remove_twist_from_quat(self._quat) - if self._update_twist: - update_twist(self._quat, - self._twist, - self._omega, - self.twist_time_constant_inv, - self.observe_dt) - # Compute the RPY representation if requested if self.compute_rpy: quat_to_rpy(self._quat, self._rpy) diff --git a/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py b/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py index ae2f90e422..106a555e29 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py +++ b/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py @@ -2,7 +2,7 @@ with gym_jiminy reinforcement learning pipeline environment design. """ import warnings -from typing import List, Union +from typing import List, Union, Optional import numpy as np import numba as nb @@ -101,14 +101,14 @@ def integrate_zoh(state: np.ndarray, @nb.jit(nopython=True, cache=True, fastmath=True) -def pd_controller(q_measured: np.ndarray, - v_measured: np.ndarray, +def pd_controller(encoder_data: np.ndarray, command_state: np.ndarray, command_state_lower: np.ndarray, command_state_upper: np.ndarray, kp: np.ndarray, kd: np.ndarray, motors_effort_limit: np.ndarray, + motors_velocity_deadband: Optional[np.ndarray], control_dt: float, out: np.ndarray) -> None: """Compute command torques under discrete-time proportional-derivative @@ -133,8 +133,7 @@ def pd_controller(q_measured: np.ndarray, See `PDController` documentation to get more information, and `integrate_zoh` documentation for details about the state integration. - :param q_measured: Current position of the actuators. - :param v_measured: Current velocity of the actuators. + :param encoder_data: Current position and velocity of the actuators. :param command_state: Current command state, namely, all the derivatives of the target motors positions up to acceleration order. :param command_state_lower: Lower bound of the command state that must be @@ -144,6 +143,8 @@ def pd_controller(q_measured: np.ndarray, :param kp: PD controller position-proportional gain in motor order. :param kd: PD controller velocity-proportional gain in motor order. :param motors_effort_limit: Maximum effort that the actuators can output. + :param velocity_deadband: Target velocity deadband for which the target + motor velocity will be cancelled out completely. :param control_dt: Controller update period. It will be involved in the integration of the command state. :param out: Pre-allocated memory to store the command motor torques. @@ -154,11 +155,13 @@ def pd_controller(q_measured: np.ndarray, command_state_upper, control_dt) - # Extract targets motors positions and velocities from command state - q_target, v_target, _ = command_state + # Velocity/Acceleration dead-band to avoid slow drift of target position + if motors_velocity_deadband is not None: + is_zero_velocity = np.abs(command_state[1]) < motors_velocity_deadband + command_state[1, is_zero_velocity] = 0.0 # Compute the joint tracking error - q_error, v_error = q_target - q_measured, v_target - v_measured + q_error, v_error = command_state[:2] - encoder_data # Compute PD command out[:] = kp * (q_error + kd * v_error) @@ -174,8 +177,8 @@ def pd_adapter(action: np.ndarray, command_state: np.ndarray, command_state_lower: np.ndarray, command_state_upper: np.ndarray, - dt: float, is_instantaneous: bool, + step_dt: float, out: np.ndarray) -> None: """Compute the target motor accelerations that must be held constant for a given time interval in order to reach the desired value of some derivative @@ -198,41 +201,45 @@ def pd_adapter(action: np.ndarray, satisfied at all cost. :param command_state_upper: Upper bound of the command state that must be satisfied at all cost. - :param dt: Time interval during which the target motor accelerations will - be held constant. + :param step_dt: Time interval during which the target motor accelerations + will be held constant. :param out: Pre-allocated memory to store the target motor accelerations. """ + # Early return if timestep is too small + if abs(step_dt) < 1e-9: + return + # Update command accelerations based on the action and its derivative order if is_instantaneous: # Update the command state directly if order == 0: # Compute command velocity - velocity = (action - command_state[0]) / dt + velocity = (action - command_state[0]) / step_dt # Clip command velocity velocity = np.minimum(np.maximum( velocity, command_state_lower[1]), command_state_upper[1]) # Update command position instantaneously - command_state[0] = action + command_state[0] += velocity * step_dt command_state[1] = 0.0 else: # Compute command acceleration - acceleration = (action - command_state[1]) / dt + acceleration = (action - command_state[1]) / step_dt # Clip command acceleration acceleration = np.minimum(np.maximum( acceleration, command_state_lower[2]), command_state_upper[2]) # Update command velocity instantaneously - command_state[1] += acceleration * dt + command_state[1] += acceleration * step_dt # Hold the acceleration constant equal to zero out[:] = 0.0 else: if order == 0: # Compute command velocity - velocity = (action - command_state[0]) / dt + velocity = (action - command_state[0]) / step_dt else: # The action corresponds to the command motor velocities velocity = action @@ -242,7 +249,7 @@ def pd_adapter(action: np.ndarray, velocity, command_state_lower[1]), command_state_upper[1]) # Compute command acceleration - out[:] = (velocity - command_state[1]) / dt + out[:] = (velocity - command_state[1]) / step_dt def get_encoder_to_motor_map(robot: jiminy.Robot) -> Union[slice, List[int]]: @@ -315,7 +322,8 @@ def __init__(self, kd: Union[float, List[float], np.ndarray], joint_position_margin: float = 0.0, joint_velocity_limit: float = float("inf"), - joint_acceleration_limit: float = float("inf")) -> None: + joint_velocity_deadband: float = 0.0, + joint_acceleration_limit: Optional[float] = None) -> None: """ :param name: Name of the block. :param env: Environment to connect with. @@ -328,11 +336,24 @@ def __init__(self, :param joint_position_margin: Minimum distance of the joint target positions from their respective bounds. Optional: 0.0 by default. - :param joint_velocity_limit: Restrict maximum joint target velocities - wrt their hardware specifications. - Optional: "inf" by default. - :param joint_acceleration_limit: Maximum joint target acceleration. - Optional: "inf" by default. + :param joint_velocity_limit: + Further restrict maximum joint target velocities wrt their + hardware specifications. + Optional: 'inf' by default. + :param joint_velocity_deadband: + Target velocity deadband to avoid high-frequency vibrations and + drifting of the target motor positions that would result internal + efforts in the mechanical structure for hyperstatic postures. + Note that it is only enabled during evaluation, not training. + Optional: 0.0 by default. + :param joint_acceleration_limit: + Maximum joint target acceleration. `None` to infer acceleration + bounds (from prescribed PD gains plus maximum motor velocities and + efforts) that would be as restrictive as possible while allowing + bang-band control (i.e. unrestricted jump in target velocity or + command torque in a single environment timestep, depending on which + of these two criteria is the most limiting). + Optional: None by default. """ # Make sure the action space of the environment has not been altered if env.action_space is not env.unwrapped.action_space: @@ -391,16 +412,20 @@ def __init__(self, motors_velocity_limit = np.array([ min(motor.velocity_limit, ratio * joint_velocity_limit) for motor, ratio in zip(env.robot.motors, encoder_to_joint_ratio)]) + self._motors_velocity_deadband = np.array([ + ratio * joint_velocity_deadband + for motor, ratio in zip(env.robot.motors, encoder_to_joint_ratio)]) # Define acceleration bounds allowing unrestricted bang-bang control - range_limit = 2 * motors_velocity_limit / env.step_dt - effort_limit = self.motors_effort_limit / ( - self.kp * env.step_dt * np.maximum(env.step_dt / 2, self.kd)) - target_acceleration_limit = np.array([ - ratio * joint_acceleration_limit - for ratio in encoder_to_joint_ratio]) - acceleration_limit = np.minimum( - np.minimum(range_limit, effort_limit), target_acceleration_limit) + if joint_acceleration_limit is None: + range_limit = 2 * motors_velocity_limit / env.step_dt + effort_limit = self.motors_effort_limit / ( + self.kp * env.step_dt * np.maximum(env.step_dt, self.kd)) + acceleration_limit = np.minimum(range_limit, effort_limit) + else: + acceleration_limit = np.array([ + ratio * joint_acceleration_limit + for ratio in encoder_to_joint_ratio]) # Compute command state bounds self._command_state_lower = np.stack([motors_position_lower, @@ -411,8 +436,7 @@ def __init__(self, acceleration_limit], axis=0) # Extract measured motor positions and velocities for fast access - self.q_measured, self.v_measured = ( - env.measurement["measurements"][EncoderSensor.type]) + self.encoder_data = env.measurement["measurements"][EncoderSensor.type] # Allocate memory for the command state self._command_state = np.zeros((3, env.robot.nmotors)) @@ -480,23 +504,21 @@ def compute_command(self, action: np.ndarray, command: np.ndarray) -> None: :param action: Desired target motor acceleration. :param command: Current motor torques that will be updated in-place. """ + # Extract motor positions and velocity from encoder data + encoder_data = self.encoder_data + if not self._is_same_order: + encoder_data = encoder_data[:, self.encoder_to_motor_map] + # Re-initialize the command state to the current motor state if the # simulation is not running. This must be done here because the # command state must be valid prior to calling `refresh_observation` # for the first time, which happens at `reset`. is_simulation_running = self.env.is_simulation_running if not is_simulation_running: - for i, value in enumerate((self.q_measured, self.v_measured)): - np.clip(value, - self._command_state_lower[i], - self._command_state_upper[i], - out=self._command_state[i]) - - # Extract motor positions and velocity from encoder data - q_measured, v_measured = self.q_measured, self.v_measured - if not self._is_same_order: - q_measured = q_measured[self.encoder_to_motor_map] - v_measured = v_measured[self.encoder_to_motor_map] + np.clip(encoder_data, + self._command_state_lower[:2], + self._command_state_upper[:2], + out=self._command_state[:2]) # Update target motor accelerations array_copyto(self._command_acceleration, action) @@ -504,14 +526,14 @@ def compute_command(self, action: np.ndarray, command: np.ndarray) -> None: # Compute the motor efforts using PD control. # The command state must not be updated if no simulation is running. pd_controller( - q_measured, - v_measured, + encoder_data, self._command_state, self._command_state_lower, self._command_state_upper, self.kp, self.kd, self.motors_effort_limit, + None if self.env.is_training else self._motors_velocity_deadband, self.control_dt if is_simulation_running else 0.0, command) @@ -611,6 +633,6 @@ def compute_command(self, action: np.ndarray, command: np.ndarray) -> None: self._pd_controller._command_state, self._pd_controller._command_state_lower, self._pd_controller._command_state_upper, - self.control_dt, self.is_instantaneous, + self.control_dt if self.env.is_simulation_running else 0.0, command) diff --git a/python/gym_jiminy/common/gym_jiminy/common/envs/generic.py b/python/gym_jiminy/common/gym_jiminy/common/envs/generic.py index 405cff35d1..a4dd1a9b18 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/envs/generic.py +++ b/python/gym_jiminy/common/gym_jiminy/common/envs/generic.py @@ -58,9 +58,12 @@ # Maximum realtime slowdown of simulation steps before triggering timeout error -TIMEOUT_RATIO = 15 +TIMEOUT_RATIO = 25 -# Absolute tolerance when checking that observations are valid +# Absolute tolerance when checking that observations are valid. +# Note that the joint positions are out-of-bounds when hitting the mechanical +# stops. Because of this, some tolerance must be added to avoid trigeering +# termination too easily. OBS_CONTAINS_TOL = 0.01 @@ -246,10 +249,6 @@ def __init__(self, # Initialize the seed of the environment self._initialize_seed() - # Initialize the observation and action buffers - self.observation: Obs = zeros(self.observation_space) - self.action: Act = zeros(self.action_space) - # Check that the action and observation spaces are consistent with # 'compute_command' and 'refresh_observation' respectively. cls = type(self) @@ -270,9 +269,14 @@ def __init__(self, "`BaseJiminyEnv.refresh_observation` must be overloaded when " "defining a custom observation space.") - # Bind the observation to the engine measurements by default + # Initialize the observation and action buffers if necessary if not is_observation_space_custom: + # Bind the observation to the engine measurements by default self.observation = cast(Obs, self.measurement) + else: + self.observation: Obs = zeros(self.observation_space) + if not hasattr(self, "action"): + self.action: Act = zeros(self.action_space) # Define specialized operators for efficiency. # Note that a partial view of observation corresponding to measurement @@ -402,8 +406,10 @@ def _initialize_seed(self, seed: Optional[int] = None) -> None: self.np_random.bit_generator.state = np.random.SFC64(np_seed).state # Reset the seed of the action and observation spaces - self.observation_space.seed(seed) - self.action_space.seed(seed) + obs_seed, act_seed = map(int, self.np_random.integers( + np.iinfo(int).max, size=(2,), dtype=int)) + self.observation_space.seed(obs_seed) + self.action_space.seed(act_seed) # Reset the seed of Jiminy Engine self.simulator.seed(engine_seed) diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py index 7ee9af2e11..9190ad0212 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py +++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py @@ -1077,7 +1077,7 @@ class MultiFrameCollisionDetection(InterfaceQuantity[bool]): .. note:: Jiminy enforces all collision geometries to be either primitive shapes - or convex polyhedra for efficiency. In practice, tf meshes where + or convex polyhedra for efficiency. In practice, if meshes has been specified in the original URDF file, then they will be converted into their respective convex hull. """ diff --git a/python/gym_jiminy/common/gym_jiminy/common/utils/misc.py b/python/gym_jiminy/common/gym_jiminy/common/utils/misc.py index 585c6d583a..d6a9a551dd 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/utils/misc.py +++ b/python/gym_jiminy/common/gym_jiminy/common/utils/misc.py @@ -167,7 +167,7 @@ def sample(low: Union[float, np.ndarray] = -1.0, Optional: 'uniform' by default. :param scale: Shrink the standard deviation of the distribution around the mean by this factor. - Optional: No scaling by default? + Optional: No scaling by default :param enable_log_scale: The sampled values are power of 10. :param shape: Enforce of the sampling shape. Only available if 'low', 'high' and 'scale' are floats. `None` to disable. diff --git a/python/gym_jiminy/common/gym_jiminy/common/utils/pipeline.py b/python/gym_jiminy/common/gym_jiminy/common/utils/pipeline.py index 5b36dede0e..7bae689a33 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/utils/pipeline.py +++ b/python/gym_jiminy/common/gym_jiminy/common/utils/pipeline.py @@ -26,7 +26,7 @@ import pinocchio as pin from jiminy_py.dynamics import State, Trajectory -from ..quantities import EnergyGenerationMode +from ..quantities import EnergyGenerationMode, OrientationType from ..bases import (QuantityEvalMode, InterfaceJiminyEnv, InterfaceBlock, @@ -46,6 +46,7 @@ ENUM_TYPES = (EnergyGenerationMode, QuantityEvalMode, + OrientationType, pin.KinematicLevel) ENUM_NAME_TO_MODULE_MAP = {enum_type.__name__: enum_type.__module__.split(".") for enum_type in ENUM_TYPES} @@ -247,15 +248,17 @@ def build_pipeline(env_config: EnvConfig, Optional: `None` by default. """ # Define helper to replace enums string by its corresponding object value - def sanitize_enum_string(kwargs: Dict[str, Any]) -> None: - """Replace in-place enum string representation with their object - counterpart. + def sanitize_special_string(kwargs: Dict[str, Any]) -> None: + """Replace in-place some special strings with their object counterpart. + + This method deals with enums, None ("none") and special floats ("nan", + "+/-inf"). :param kwargs: Nested dictionary of options. """ for key, value in kwargs.items(): if isinstance(value, dict): - sanitize_enum_string(value) + sanitize_special_string(value) continue if not isinstance(value, str): @@ -264,6 +267,9 @@ def sanitize_enum_string(kwargs: Dict[str, Any]) -> None: if value == "none": kwargs[key] = None continue + if value == "nan" or value.endswith("inf"): + kwargs[key] = float(value) + continue value_path = value.split(".") enum_type = value_path[-2] if len(value_path) > 1 else None @@ -297,8 +303,8 @@ def sanitize_composition_config(composition_config: CompositionConfig, # Get its constructor keyword-arguments kwargs = composition_config.get("kwargs", {}) - # Special treatment for "none" and enum string - sanitize_enum_string(kwargs) + # Special treatment for "none", "nan", "+/-inf" and enum string + sanitize_special_string(kwargs) # Special handling for `MixtureReward` if is_reward and issubclass(cls, MixtureReward): @@ -341,8 +347,8 @@ def build_composition( # Get its constructor keyword-arguments kwargs = composition_config.get("kwargs", {}).copy() - # Special treatment for "none" and enum string - sanitize_enum_string(kwargs) + # Special treatment for "none", "nan", "+/-inf" and enum string + sanitize_special_string(kwargs) # Special handling for `MixtureReward` if is_reward and issubclass(cls, MixtureReward): @@ -560,9 +566,9 @@ def build_controller_observer_layer( block_kwargs = block_config.get("kwargs", {}) wrapper_kwargs = wrapper_config.get("kwargs", {}) - # Special treatment for "none" and enum string + # Special treatment for "none", "nan", "+/-inf" and enum string for kwargs in (block_kwargs, wrapper_kwargs): - sanitize_enum_string(kwargs) + sanitize_special_string(kwargs) # Special treatment for "quantity" arg of `QuantityObserver` blocks if block_cls_ is not None and issubclass(block_cls_, QuantityObserver): diff --git a/python/gym_jiminy/common/gym_jiminy/common/wrappers/observation_layout.py b/python/gym_jiminy/common/gym_jiminy/common/wrappers/observation_layout.py index 97f174e407..dd55485f6c 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/wrappers/observation_layout.py +++ b/python/gym_jiminy/common/gym_jiminy/common/wrappers/observation_layout.py @@ -145,7 +145,10 @@ def _adapt_layout( nested_key_in = cast(NestedKey, nested_spec_in) # Extract the input chunk recursively - value_in = reduce(getitem, nested_key_in, data) + try: + value_in = reduce(getitem, nested_key_in, data) + except KeyError as e: + raise KeyError(f"Nested key '{nested_key_in}' is missing.") from e if block_spec_in is not None: # Convert array block specification to slices slices = [] diff --git a/python/gym_jiminy/examples/tutorial_rl.ipynb b/python/gym_jiminy/examples/tutorial_rl.ipynb index 6dad16b4ba..ed00e458bd 100644 --- a/python/gym_jiminy/examples/tutorial_rl.ipynb +++ b/python/gym_jiminy/examples/tutorial_rl.ipynb @@ -81,8 +81,8 @@ "\n", " options = robot.get_model_options()\n", " options[\"joints\"][\"positionLimitFromUrdf\"] = False\n", - " options[\"joints\"][\"positionLimitMin\"] = np.full((2,), -1e5)\n", - " options[\"joints\"][\"positionLimitMax\"] = np.full((2,), +1e5)\n", + " options[\"joints\"][\"positionLimitLower\"] = np.full((2,), -1e5)\n", + " options[\"joints\"][\"positionLimitUpper\"] = np.full((2,), +1e5)\n", " robot.set_model_options(options)\n", "\n", " # motor\n", diff --git a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/qhull.py b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/qhull.py index 6572e4df6d..78fbd1a71b 100644 --- a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/qhull.py +++ b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/qhull.py @@ -431,23 +431,29 @@ def get_distance_to_point(self, points: np.ndarray) -> np.ndarray: a single query point. """ # Make sure that the input is at least 2D - if points.ndim < 2: - points = np.atleast_2d(points) + assert points.ndim in (1, 2) and points.shape[-1] == 2 + points2d = np.atleast_2d(points) # Compute the signed distance between query points and convex hull if self.npoints > 2: - return compute_distance_convex_to_point( - self.vertices, self.vectors, points) + dist2d = compute_distance_convex_to_point( + self.vertices, self.vectors, points2d) # Compute the distance between query points and segment - if self.npoints == 2: + elif self.npoints == 2: vec = self.vertices[1] - self.vertices[0] - ratio = (points - self.vertices[0]) @ vec / np.dot(vec, vec) + ratio = (points2d - self.vertices[0]) @ vec / np.dot(vec, vec) proj = self.vertices[0] + np.outer(np.clip(ratio, 0.0, 1.0), vec) - return np.linalg.norm(points - proj, axis=1) + dist2d = np.linalg.norm(points2d - proj, axis=1) # Compute the distance between query points and point - return np.linalg.norm(points - self.vertices, axis=1) + else: + dist2d = np.linalg.norm(points2d - self.vertices, axis=1) + + # Ravel extra dimension before returning if not present initially + if points.ndim < 2: + return dist2d[0] + return dist2d def get_distance_to_ray(self, vector: np.ndarray, diff --git a/python/gym_jiminy/unit_py/test_deformation_estimator.py b/python/gym_jiminy/unit_py/test_deformation_estimator.py index f649e9c428..afa0c4f3a4 100644 --- a/python/gym_jiminy/unit_py/test_deformation_estimator.py +++ b/python/gym_jiminy/unit_py/test_deformation_estimator.py @@ -131,7 +131,7 @@ def test_arm(self): env, kp=0.0, ki=0.0, - twist_time_constant=None, + ignore_twist=False, exact_init=True, update_ratio=1) env = ObservedJiminyEnv(env, mahony_filter) @@ -253,7 +253,7 @@ def _setup(self) -> None: kwargs=dict( kp=0.0, ki=0.0, - twist_time_constant=None, + ignore_twist=False, exact_init=True, update_ratio=1, ) @@ -281,4 +281,4 @@ def _setup(self) -> None: env.step(env.action) # Check that deformation estimates matches ground truth - self._test_deformation_estimate(env, imu_atol=1e-4, flex_atol=5e-3) + self._test_deformation_estimate(env, imu_atol=1e-4, flex_atol=1e-2) diff --git a/python/gym_jiminy/unit_py/test_pipeline_control.py b/python/gym_jiminy/unit_py/test_pipeline_control.py index 0f466aae1e..ad0eb83475 100644 --- a/python/gym_jiminy/unit_py/test_pipeline_control.py +++ b/python/gym_jiminy/unit_py/test_pipeline_control.py @@ -17,9 +17,9 @@ from jiminy_py.core import ImuSensor from jiminy_py.viewer import Viewer -from gym_jiminy.envs import ( - AtlasPDControlJiminyEnv, CassiePDControlJiminyEnv, DigitPDControlJiminyEnv) -from gym_jiminy.common.blocks import PDController, PDAdapter, MahonyFilter +from gym_jiminy.common.bases import ObservedJiminyEnv +from gym_jiminy.common.blocks import ( + PDController, PDAdapter, MahonyFilter, BodyObserver) from gym_jiminy.common.blocks.proportional_derivative_controller import ( integrate_zoh) from gym_jiminy.common.wrappers import ( @@ -27,6 +27,8 @@ FlattenObservation) from gym_jiminy.common.utils import ( quat_to_rpy, matrix_to_rpy, matrix_to_quat, remove_twist_from_quat) +from gym_jiminy.envs import ( + AtlasPDControlJiminyEnv, CassiePDControlJiminyEnv, DigitPDControlJiminyEnv) IMAGE_DIFF_THRESHOLD = 5.0 @@ -128,14 +130,25 @@ def test_pid_standing(self): self._test_pid_standing() Viewer.close() - def test_mahony_filter(self): + def test_mahony_filter_plus_body_observer(self): """Check consistency between IMU orientation estimation provided by Mahony filter and ground truth for moderately slow motions. """ # Instantiate and reset the environment env = AtlasPDControlJiminyEnv() - observer = env.observer - assert isinstance(observer, MahonyFilter) + + # Overwrite Mahony filter to specify custom constructor arguments + assert isinstance(env, ObservedJiminyEnv) + assert isinstance(env.observer, MahonyFilter) + mahony_filter = MahonyFilter( + "mahony_filter", + env.env, + kp=0.0, + ki=0.0, + ignore_twist=False, + exact_init=True, + compute_rpy=False) + env = ObservedJiminyEnv(env.env, mahony_filter) # Define a constant action that move the upper-body in all directions robot = env.robot @@ -150,23 +163,21 @@ def test_mahony_filter(self): # Check that the estimate IMU orientation is accurate over the episode for twist_time_constant in (None, float("inf"), 0.0): - # Reinitialize the observer - env.observer = observer = MahonyFilter( - observer.name, - observer.env, - kp=0.0, - ki=0.0, + # Add body observer + observer = BodyObserver( + "body_observer", + env, twist_time_constant=twist_time_constant, - exact_init=True, compute_rpy=True) + env_derived = ObservedJiminyEnv(env, observer) # Reset the environment - env.reset(seed=0) + env_derived.reset(seed=0) rpy_est = observer.observation["rpy"][:, 0] # Run of few simulation steps for i in range(200): - env.step(action * (1 - 2 * ((i // 50) % 2))) + env_derived.step(action * (1 - 2 * ((i // 50) % 2))) if twist_time_constant == 0.0: # The twist must be ignored as it is not observable diff --git a/python/gym_jiminy/unit_py/test_training_toys_models.py b/python/gym_jiminy/unit_py/test_training_toys_models.py index f2a49c3d60..78d1fa5c41 100644 --- a/python/gym_jiminy/unit_py/test_training_toys_models.py +++ b/python/gym_jiminy/unit_py/test_training_toys_models.py @@ -16,7 +16,7 @@ # Fix the seed and number of threads for CI stability -SEED = 0 +SEED = 42 N_THREADS = 5 # Skip some tests if Jiminy has been compiled in debug diff --git a/python/jiminy_py/unit_py/test_dense_pole.py b/python/jiminy_py/unit_py/test_dense_pole.py index e16273169d..d390394a65 100644 --- a/python/jiminy_py/unit_py/test_dense_pole.py +++ b/python/jiminy_py/unit_py/test_dense_pole.py @@ -38,8 +38,8 @@ def setUp(self): # Configure joint bounds model_options = self.robot.get_model_options() - model_options['joints']['positionLimitMin'] = [-self.joint_limit] - model_options['joints']['positionLimitMax'] = [self.joint_limit] + model_options['joints']['positionLimitLower'] = [-self.joint_limit] + model_options['joints']['positionLimitUpper'] = [self.joint_limit] model_options['joints']['positionLimitFromUrdf'] = False self.robot.set_model_options(model_options)