From a30b5e904cf2c12cc4eb263a59c5e971cd3a2764 Mon Sep 17 00:00:00 2001
From: Alexis DUBURCQ <alexis.duburcq@gmail.com>
Date: Thu, 19 Dec 2024 20:58:59 +0100
Subject: [PATCH] [gym_jiminy/common] Various minor improvements. (#849)

* [core] Properly handle undefined motor velocity bounds.
* [core] Rename 'positionLimit(Min|Max)' in 'positionLimit(Lower|Upper)' for consistency with Pinocchio.
* [gym_jiminy/common] More robust random seed initialization.
* [gym_jiminy/common] Add option to avoid clipping target motor accelerations of PD controller.
* [gym_jiminy/common] Do not overwrite 'BaseJiminyEnv.(observation|action)' if already defined.
* [gym_jiminy/common] Support additional 'special' string when loading pipeline from toml.
* [gym_jiminy/common] 'ConvexHull2D.get_distance_to_point' now supports single query.
* [gym_jiminy/common] Try initializing quantities even if no simulation is running.
* [gym_jiminy/common] Clear exception message if nested key is missing.
* [gym_jiminy/common] Move leaky twist from mahony filter to body observer.
* [gym_jiminy/common] Add target joint velocity deadband to PD controller.
* [gym_jiminy/common] Fix 'ControlledJiminyEnv.action' not reset during setup.
* [gym_jiminy/common] Increase simulation timeout delay.
---
 core/include/jiminy/core/robot/model.h        |  12 +-
 core/src/engine/engine.cc                     |  26 ++--
 core/src/hardware/basic_motors.cc             |  19 ++-
 core/src/robot/model.cc                       |  54 +++----
 core/unit/engine_sanity_check.cc              |   4 +-
 .../gym_jiminy/common/bases/pipeline.py       |  21 ++-
 .../gym_jiminy/common/bases/quantities.py     |   9 +-
 .../blocks/body_orientation_observer.py       | 109 ++++++++++++--
 .../gym_jiminy/common/blocks/mahony_filter.py | 133 +++---------------
 .../proportional_derivative_controller.py     | 116 ++++++++-------
 .../common/gym_jiminy/common/envs/generic.py  |  24 ++--
 .../gym_jiminy/common/quantities/generic.py   |   2 +-
 .../common/gym_jiminy/common/utils/misc.py    |   2 +-
 .../gym_jiminy/common/utils/pipeline.py       |  28 ++--
 .../common/wrappers/observation_layout.py     |   5 +-
 python/gym_jiminy/examples/tutorial_rl.ipynb  |   4 +-
 .../toolbox/gym_jiminy/toolbox/math/qhull.py  |  22 +--
 .../unit_py/test_deformation_estimator.py     |   6 +-
 .../unit_py/test_pipeline_control.py          |  41 ++++--
 .../unit_py/test_training_toys_models.py      |   2 +-
 python/jiminy_py/unit_py/test_dense_pole.py   |   4 +-
 21 files changed, 362 insertions(+), 281 deletions(-)
diff --git a/core/include/jiminy/core/robot/model.h b/core/include/jiminy/core/robot/model.h
index 97cea4cfed..bed17ae80c 100644
--- a/core/include/jiminy/core/robot/model.h
+++ b/core/include/jiminy/core/robot/model.h
@@ -137,8 +137,8 @@ namespace jiminy
         {
             GenericConfig config;
             config["positionLimitFromUrdf"] = true;
-            config["positionLimitMin"] = Eigen::VectorXd{};
-            config["positionLimitMax"] = Eigen::VectorXd{};
+            config["positionLimitLower"] = Eigen::VectorXd{};
+            config["positionLimitUpper"] = Eigen::VectorXd{};
 
             return config;
         };
@@ -181,13 +181,13 @@ namespace jiminy
         {
             const bool positionLimitFromUrdf;
             /// \brief Min position limit of all the mechanical joints of the theoretical model.
-            const Eigen::VectorXd positionLimitMin;
-            const Eigen::VectorXd positionLimitMax;
+            const Eigen::VectorXd positionLimitLower;
+            const Eigen::VectorXd positionLimitUpper;
 
             JointOptions(const GenericConfig & options) :
             positionLimitFromUrdf{boost::get<bool>(options.at("positionLimitFromUrdf"))},
-            positionLimitMin{boost::get<Eigen::VectorXd>(options.at("positionLimitMin"))},
-            positionLimitMax{boost::get<Eigen::VectorXd>(options.at("positionLimitMax"))}
+            positionLimitLower{boost::get<Eigen::VectorXd>(options.at("positionLimitLower"))},
+            positionLimitUpper{boost::get<Eigen::VectorXd>(options.at("positionLimitUpper"))}
             {
             }
         };
diff --git a/core/src/engine/engine.cc b/core/src/engine/engine.cc
index 22a454e3f5..17352e03f7 100644
--- a/core/src/engine/engine.cc
+++ b/core/src/engine/engine.cc
@@ -3238,8 +3238,8 @@ namespace jiminy
     {
         typedef boost::fusion::vector<
             const Eigen::VectorXd & /* q */,
-            const Eigen::VectorXd & /* positionLimitMin */,
-            const Eigen::VectorXd & /* positionLimitMax */,
+            const Eigen::VectorXd & /* positionLimitLower */,
+            const Eigen::VectorXd & /* positionLimitUpper */,
             const std::unique_ptr<const Engine::EngineOptions> & /* engineOptions */,
             const std::shared_ptr<AbstractConstraintBase> & /* constraint */>
             ArgsType;
@@ -3252,16 +3252,16 @@ namespace jiminy
                                 void>
         algo(const pinocchio::JointModelBase<JointModel> & joint,
              const Eigen::VectorXd & q,
-             const Eigen::VectorXd & positionLimitMin,
-             const Eigen::VectorXd & positionLimitMax,
+             const Eigen::VectorXd & positionLimitLower,
+             const Eigen::VectorXd & positionLimitUpper,
              const std::unique_ptr<const Engine::EngineOptions> & engineOptions,
              const std::shared_ptr<AbstractConstraintBase> & constraint)
         {
             // Define some proxies for convenience
             const Eigen::Index positionIndex = joint.idx_q();
             const double qJoint = q[positionIndex];
-            const double qJointMin = positionLimitMin[positionIndex];
-            const double qJointMax = positionLimitMax[positionIndex];
+            const double qJointMin = positionLimitLower[positionIndex];
+            const double qJointMax = positionLimitUpper[positionIndex];
             const double transitionEps = engineOptions->contacts.transitionEps;
 
             // Check if out-of-bounds
@@ -3287,8 +3287,8 @@ namespace jiminy
                                 void>
         algo(const pinocchio::JointModelBase<JointModel> & /* joint */,
              const Eigen::VectorXd & /* q */,
-             const Eigen::VectorXd & /* positionLimitMin */,
-             const Eigen::VectorXd & /* positionLimitMax */,
+             const Eigen::VectorXd & /* positionLimitLower */,
+             const Eigen::VectorXd & /* positionLimitUpper */,
              const std::unique_ptr<const Engine::EngineOptions> & /* engineOptions */,
              const std::shared_ptr<AbstractConstraintBase> & constraint)
         {
@@ -3307,8 +3307,8 @@ namespace jiminy
                                 void>
         algo(const pinocchio::JointModelBase<JointModel> & /* joint */,
              const Eigen::VectorXd & /* q */,
-             const Eigen::VectorXd & /* positionLimitMin */,
-             const Eigen::VectorXd & /* positionLimitMax */,
+             const Eigen::VectorXd & /* positionLimitLower */,
+             const Eigen::VectorXd & /* positionLimitUpper */,
              const std::unique_ptr<const Engine::EngineOptions> & /* engineOptions */,
              const std::shared_ptr<AbstractConstraintBase> & constraint)
         {
@@ -3332,8 +3332,8 @@ namespace jiminy
 
         /* Enforce position limits for all joints having bounds constraints, ie mechanical and
            backlash joints. */
-        const Eigen::VectorXd & positionLimitMin = robot->pinocchioModel_.lowerPositionLimit;
-        const Eigen::VectorXd & positionLimitMax = robot->pinocchioModel_.upperPositionLimit;
+        const Eigen::VectorXd & positionLimitLower = robot->pinocchioModel_.lowerPositionLimit;
+        const Eigen::VectorXd & positionLimitUpper = robot->pinocchioModel_.upperPositionLimit;
         for (auto & constraintItem : constraints.boundJoints)
         {
             auto & constraint = constraintItem.second;
@@ -3342,7 +3342,7 @@ namespace jiminy
             computePositionLimitsForcesAlgo::run(
                 model.joints[jointIndex],
                 typename computePositionLimitsForcesAlgo::ArgsType(
-                    q, positionLimitMin, positionLimitMax, engineOptions_, constraint));
+                    q, positionLimitLower, positionLimitUpper, engineOptions_, constraint));
         }
 
         // Compute the flexibilities (only support `JointModelType::SPHERICAL` so far)
diff --git a/core/src/hardware/basic_motors.cc b/core/src/hardware/basic_motors.cc
index b96d937ef0..637e460e0b 100644
--- a/core/src/hardware/basic_motors.cc
+++ b/core/src/hardware/basic_motors.cc
@@ -40,6 +40,10 @@ namespace jiminy
     {
         // Check if the friction parameters make sense
         // Make sure the user-defined position limit has the right dimension
+        if (boost::get<double>(motorOptions.at("velocityEffortInvSlope")) < 0.0)
+        {
+            JIMINY_THROW(std::invalid_argument, "'velocityEffortInvSlope' must be positive.");
+        }
         if (boost::get<double>(motorOptions.at("frictionViscousPositive")) > 0.0)
         {
             JIMINY_THROW(std::invalid_argument, "'frictionViscousPositive' must be negative.");
@@ -104,12 +108,15 @@ namespace jiminy
             effortMax = effortLimit_;
             if (motorOptions_->enableVelocityLimit)
             {
-                const double velocityThr = std::max(
-                    velocityLimit_ - effortLimit_ * motorOptions_->velocityEffortInvSlope, 0.0);
-                effortMin *= std::clamp(
-                    (velocityLimit_ + vMotor) / (velocityLimit_ - velocityThr), 0.0, 1.0);
-                effortMax *= std::clamp(
-                    (velocityLimit_ - vMotor) / (velocityLimit_ - velocityThr), 0.0, 1.0);
+                const double velocityDelta = effortLimit_ * motorOptions_->velocityEffortInvSlope;
+                if (velocityDelta > 0.0)
+                {
+                    const double velocityThr = std::max(velocityLimit_ - velocityDelta, 0.0);
+                    effortMin *= std::clamp(
+                        (velocityLimit_ + vMotor) / (velocityLimit_ - velocityThr), 0.0, 1.0);
+                    effortMax *= std::clamp(
+                        (velocityLimit_ - vMotor) / (velocityLimit_ - velocityThr), 0.0, 1.0);
+                }
             }
         }
         uMotor = std::clamp(command, effortMin, effortMax);
diff --git a/core/src/robot/model.cc b/core/src/robot/model.cc
index 694d995406..586adb5bfb 100644
--- a/core/src/robot/model.cc
+++ b/core/src/robot/model.cc
@@ -1368,8 +1368,8 @@ namespace jiminy
         }
 
         // Re-initialize position limits
-        Eigen::VectorXd positionLimitMin = Eigen::VectorXd::Constant(pinocchioModel_.nq, -INF);
-        Eigen::VectorXd positionLimitMax = Eigen::VectorXd::Constant(pinocchioModel_.nq, +INF);
+        Eigen::VectorXd positionLimitLower = Eigen::VectorXd::Constant(pinocchioModel_.nq, -INF);
+        Eigen::VectorXd positionLimitUpper = Eigen::VectorXd::Constant(pinocchioModel_.nq, +INF);
         Eigen::Index idx_q, nq;
         for (const auto & joint : pinocchioModel_.joints)
         {
@@ -1392,16 +1392,16 @@ namespace jiminy
             default:
                 continue;
             }
-            positionLimitMin.segment(idx_q, nq).setConstant(-1.0 - EPS);
-            positionLimitMax.segment(idx_q, nq).setConstant(+1.0 + EPS);
+            positionLimitLower.segment(idx_q, nq).setConstant(-1.0 - EPS);
+            positionLimitUpper.segment(idx_q, nq).setConstant(+1.0 + EPS);
         }
 
         // Copy backlash joint position limits as-is from extended model
         for (pinocchio::JointIndex jointIndex : backlashJointIndices_)
         {
             const Eigen::Index positionIndex = pinocchioModel_.idx_qs[jointIndex];
-            positionLimitMin[positionIndex] = pinocchioModel_.lowerPositionLimit[positionIndex];
-            positionLimitMax[positionIndex] = pinocchioModel_.upperPositionLimit[positionIndex];
+            positionLimitLower[positionIndex] = pinocchioModel_.lowerPositionLimit[positionIndex];
+            positionLimitUpper[positionIndex] = pinocchioModel_.upperPositionLimit[positionIndex];
         }
 
         // Set mechanical joint position limits from theoretical model or user options
@@ -1418,9 +1418,9 @@ namespace jiminy
                 }
                 const auto & joint = pinocchioModel_.joints[jointIndex];
                 const auto & jointTh = pinocchioModelTh_.joints[jointIndexTh];
-                positionLimitMin.segment(joint.idx_q(), joint.nq()) =
+                positionLimitLower.segment(joint.idx_q(), joint.nq()) =
                     pinocchioModelTh_.lowerPositionLimit.segment(jointTh.idx_q(), jointTh.nq());
-                positionLimitMax.segment(joint.idx_q(), joint.nq()) =
+                positionLimitUpper.segment(joint.idx_q(), joint.nq()) =
                     pinocchioModelTh_.upperPositionLimit.segment(jointTh.idx_q(), jointTh.nq());
             }
         }
@@ -1429,14 +1429,14 @@ namespace jiminy
             for (std::size_t i = 0; i < mechanicalJointPositionIndices_.size(); ++i)
             {
                 Eigen::Index positionIndex = mechanicalJointPositionIndices_[i];
-                positionLimitMin[positionIndex] = modelOptions_->joints.positionLimitMin[i];
-                positionLimitMax[positionIndex] = modelOptions_->joints.positionLimitMax[i];
+                positionLimitLower[positionIndex] = modelOptions_->joints.positionLimitLower[i];
+                positionLimitUpper[positionIndex] = modelOptions_->joints.positionLimitUpper[i];
             }
         }
 
         // Overwrite extended model position limits
-        pinocchioModel_.lowerPositionLimit = positionLimitMin;
-        pinocchioModel_.upperPositionLimit = positionLimitMax;
+        pinocchioModel_.lowerPositionLimit = positionLimitLower;
+        pinocchioModel_.upperPositionLimit = positionLimitUpper;
 
         // Initialize backup joint space acceleration
         jointSpatialAccelerations_ =
@@ -1557,33 +1557,33 @@ namespace jiminy
                 boost::get<bool>(jointOptionsHolder.at("positionLimitFromUrdf"));
             if (!positionLimitFromUrdf)
             {
-                const Eigen::VectorXd & jointsPositionLimitMin =
-                    boost::get<Eigen::VectorXd>(jointOptionsHolder.at("positionLimitMin"));
+                const Eigen::VectorXd & jointspositionLimitLower =
+                    boost::get<Eigen::VectorXd>(jointOptionsHolder.at("positionLimitLower"));
                 if (mechanicalJointPositionIndices_.size() !=
-                    static_cast<uint32_t>(jointsPositionLimitMin.size()))
+                    static_cast<uint32_t>(jointspositionLimitLower.size()))
                 {
                     JIMINY_THROW(std::invalid_argument,
-                                 "Wrong vector size for 'positionLimitMin'.");
+                                 "Wrong vector size for 'positionLimitLower'.");
                 }
-                const Eigen::VectorXd & jointsPositionLimitMax =
-                    boost::get<Eigen::VectorXd>(jointOptionsHolder.at("positionLimitMax"));
+                const Eigen::VectorXd & jointspositionLimitUpper =
+                    boost::get<Eigen::VectorXd>(jointOptionsHolder.at("positionLimitUpper"));
                 if (mechanicalJointPositionIndices_.size() !=
-                    static_cast<uint32_t>(jointsPositionLimitMax.size()))
+                    static_cast<uint32_t>(jointspositionLimitUpper.size()))
                 {
                     JIMINY_THROW(std::invalid_argument,
-                                 "Wrong vector size for 'positionLimitMax'.");
+                                 "Wrong vector size for 'positionLimitUpper'.");
                 }
                 if (mechanicalJointPositionIndices_.size() ==
-                    static_cast<uint32_t>(modelOptions_->joints.positionLimitMin.size()))
+                    static_cast<uint32_t>(modelOptions_->joints.positionLimitLower.size()))
                 {
-                    auto jointsPositionLimitMinDiff =
-                        jointsPositionLimitMin - modelOptions_->joints.positionLimitMin;
+                    auto jointspositionLimitLowerDiff =
+                        jointspositionLimitLower - modelOptions_->joints.positionLimitLower;
                     internalBuffersMustBeUpdated |=
-                        (jointsPositionLimitMinDiff.array().abs() >= EPS).all();
-                    auto jointsPositionLimitMaxDiff =
-                        jointsPositionLimitMax - modelOptions_->joints.positionLimitMax;
+                        (jointspositionLimitLowerDiff.array().abs() >= EPS).all();
+                    auto jointspositionLimitUpperDiff =
+                        jointspositionLimitUpper - modelOptions_->joints.positionLimitUpper;
                     internalBuffersMustBeUpdated |=
-                        (jointsPositionLimitMaxDiff.array().abs() >= EPS).all();
+                        (jointspositionLimitUpperDiff.array().abs() >= EPS).all();
                 }
                 else
                 {
diff --git a/core/unit/engine_sanity_check.cc b/core/unit/engine_sanity_check.cc
index 429f46d757..90a2ce6b90 100755
--- a/core/unit/engine_sanity_check.cc
+++ b/core/unit/engine_sanity_check.cc
@@ -68,9 +68,9 @@ TEST(EngineSanity, EnergyConservation)
     GenericConfig & modelOptions = boost::get<GenericConfig>(robotOptions.at("model"));
     GenericConfig & jointsOptions = boost::get<GenericConfig>(modelOptions.at("joints"));
     boost::get<bool>(jointsOptions.at("positionLimitFromUrdf")) = false;
-    boost::get<Eigen::VectorXd>(jointsOptions.at("positionLimitMin")) =
+    boost::get<Eigen::VectorXd>(jointsOptions.at("positionLimitLower")) =
         Eigen::Vector2d::Constant(-INF);
-    boost::get<Eigen::VectorXd>(jointsOptions.at("positionLimitMax")) =
+    boost::get<Eigen::VectorXd>(jointsOptions.at("positionLimitUpper")) =
         Eigen::Vector2d::Constant(+INF);
 
     // Disable velocity and torque limits
diff --git a/python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py b/python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py
index 8e544730a2..cedad3e22d 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py
@@ -42,6 +42,7 @@
 from .blocks import BaseControllerBlock, BaseObserverBlock
 
 from ..utils import (DataNested,
+                     fill,
                      zeros,
                      build_copyto,
                      copy,
@@ -368,13 +369,22 @@ def reset_hook() -> Optional[InterfaceJiminyEnv]:
 
             return env_derived
 
+        # Reset base pipeline.
+        # Note that it is important to reset the base environment first,
+        # otherwise adding extra layers on top of an existing pipeline would
+        # affect random number sampling, and therefore mess up with
+        # repeatability.
+        obs, info = self.env.reset(
+            seed=seed, options={"reset_hook": reset_hook})
+
         # Reset the seed of the action and observation spaces
         if seed is not None:
-            self.observation_space.seed(seed)
-            self.action_space.seed(seed)
+            obs_seed, act_seed = map(int, self.np_random.integers(
+                np.iinfo(int).max, size=(2,), dtype=int))
+            self.observation_space.seed(obs_seed)
+            self.action_space.seed(act_seed)
 
-        # Reset base pipeline
-        return self.env.reset(seed=seed, options={"reset_hook": reset_hook})
+        return obs, info
 
     def step(self,  # type: ignore[override]
              action: Act
@@ -1108,6 +1118,9 @@ def _setup(self) -> None:
         # Call base implementation
         super()._setup()
 
+        # Reset action
+        fill(self.action, 0)
+
         # Compute the observe and control update periods
         self.observe_dt = self.env.observe_dt
         self.control_dt = self.controller.control_dt
diff --git a/python/gym_jiminy/common/gym_jiminy/common/bases/quantities.py b/python/gym_jiminy/common/gym_jiminy/common/bases/quantities.py
index 8bc14ab330..2c0996546d 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/bases/quantities.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/bases/quantities.py
@@ -782,8 +782,13 @@ def initialize(self) -> None:
         # Call base implementation
         super().initialize()
 
-        # Force initializing state quantity
-        self.state.initialize()
+        # Force initializing state quantity if possible
+        try:
+            self.state.initialize()
+        except RuntimeError:
+            # No simulation running. This may be problematic but it is not
+            # blocking at this point.
+            pass
 
         # Refresh robot proxy
         assert isinstance(self.state, StateQuantity)
diff --git a/python/gym_jiminy/common/gym_jiminy/common/blocks/body_orientation_observer.py b/python/gym_jiminy/common/gym_jiminy/common/blocks/body_orientation_observer.py
index ee9785710e..2046e5a546 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/blocks/body_orientation_observer.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/blocks/body_orientation_observer.py
@@ -3,9 +3,10 @@
 """
 from collections import OrderedDict
 from collections.abc import Mapping
-from typing import List, Dict, Sequence, Union
+from typing import List, Dict, Sequence, Union, Optional
 
 import numpy as np
+import numba as nb
 import gymnasium as gym
 
 from jiminy_py.core import ImuSensor  # pylint: disable=no-name-in-module
@@ -13,6 +14,7 @@
 from ..bases import BaseAct, BaseObs, BaseObserverBlock, InterfaceJiminyEnv
 from ..wrappers.observation_layout import NestedData
 from ..utils import (DataNested,
+                     fill,
                      quat_to_rpy,
                      matrices_to_quat,
                      quat_multiply,
@@ -20,10 +22,64 @@
                      remove_twist_from_quat)
 
 
+@nb.jit(nopython=True, cache=True)
+def update_twist(q: np.ndarray,
+                 twist: np.ndarray,
+                 omega: np.ndarray,
+                 time_constant_inv: float,
+                 dt: float) -> None:
+    """Update the twist estimate of the Twist-after-Swing decomposition of
+    given orientations in quaternion representation using leaky integrator.
+
+    :param q: Current swing estimate as a quaternion. It will be updated
+              in-place to add the estimated twist.
+    :param twist: Current twist estimate to update in-place.
+    :param omega: Current angular velocity estimate in local frame.
+    :param time_constant_inv: Inverse of the time constant of the leaky
+                              integrator used to update the twist estimate.
+    :param dt: Time step, in seconds, between consecutive Quaternions.
+    """
+    # Compute the derivative of the twist angle:
+    # The element-wise time-derivative of a quaternion is given by:
+    #   dq = 0.5 * q * Quaternion(axis=gyro, w=0.0)        [1]
+    # The twist around a given axis can be computed as follows:
+    #   p = q_axis.dot(twist_axis) * twist_axis            [2]
+    #   twist = pin.Quaternion(np.array((*p, q_w))).normalized()
+    # The twist angle can be inferred from this expression:
+    #   twist = 2 * atan2(q_axis.dot(twist_axis), q_w)     [3]
+    # The derivative of twist angle can be derived:
+    #  dtwist = 2 * (
+    #     (dq_axis * q_w - q_axis * dq_w).dot(twist_axis)  [4]
+    #  ) / (q_axis.dot(twist_axis) ** 2 + q_w ** 2)
+    # One can show that if q_s is the swing part of the orientation, then:
+    #   q_s_axis.dot(twist_axis) = 0
+    # It yields:
+    #  dtwist = 2 * dq_s_axis.dot(twist_axis) / q_s_w      [5]
+    q_x, q_y, _, q_w = q
+    dtwist = (- q_y * omega[0] + q_x * omega[1]) / q_w + omega[2]
+
+    # Update twist angle using Leaky Integrator scheme to avoid long-term drift
+    twist *= max(0.0, 1.0 - time_constant_inv * dt)
+    twist += dtwist * dt
+
+    # Update quaternion to add estimated twist
+    p_z, p_w = np.sin(0.5 * twist), np.cos(0.5 * twist)
+    q[0], q[1], q[2], q[3] = (
+        p_w * q_x - p_z * q_y,
+        p_z * q_x + p_w * q_y,
+        p_z * q_w,
+        p_w * q_w)
+
+
 class BodyObserver(BaseObserverBlock[
         Dict[str, np.ndarray], np.ndarray, BaseObs, BaseAct]):
     """Compute the orientation and angular velocity in local frame of the
     parent body associated with all the IMU sensors of the robot.
+
+    .. note::
+        The twist angle is not part of the internal state, even if being
+        integrated over time, because it is uniquely determined from the
+        orientation estimate.
     """
     def __init__(self,
                  name: str,
@@ -33,7 +89,7 @@ def __init__(self,
                     "features", "mahony_filter", "quat"),
                  nested_imu_omega_key: NestedData = (
                     "features", "mahony_filter", "omega",),
-                 ignore_twist: bool = False,
+                 twist_time_constant: Optional[float] = None,
                  compute_rpy: bool = True,
                  update_ratio: int = 1) -> None:
         """
@@ -48,8 +104,15 @@ def __init__(self,
                                      local frame estimates. Their ordering must
                                      be consistent with the true IMU sensors of
                                      the robot.
-        :param ignore_twist: Whether to ignore the twist of the IMU quaternion
-                             estimate.
+        :param twist_time_constant:
+            If specified, it corresponds to the time constant of the leaky
+            integrator (Exponential Moving Average) used to estimate the twist
+            part of twist-after-swing decomposition of the estimated
+            orientation in place of the Mahony Filter. If `0.0`, then its is
+            kept constant equal to zero. `None` to kept the original estimate
+            provided by Mahony Filter. See `remove_twist_from_quat` and
+            `update_twist` documentations for details.
+            Optional: `0.0` by default.
         :param compute_rpy: Whether to compute the Yaw-Pitch-Roll Euler angles
                             representation for the 3D orientation of the IMU,
                             in addition to the quaternion representation.
@@ -60,9 +123,22 @@ def __init__(self,
                              Optional: `1` by default.
         """
         # Backup some of the user-argument(s)
-        self.ignore_twist = ignore_twist
         self.compute_rpy = compute_rpy
 
+        # Keep track of how the twist must be computed
+        self.twist_time_constant_inv: Optional[float]
+        if twist_time_constant is None:
+            self.twist_time_constant_inv = None
+        else:
+            if twist_time_constant > 0.0:
+                self.twist_time_constant_inv = 1.0 / twist_time_constant
+            else:
+                self.twist_time_constant_inv = float("inf")
+        self._remove_twist = self.twist_time_constant_inv is not None
+        self._update_twist = (
+            self.twist_time_constant_inv is not None and
+            np.isfinite(self.twist_time_constant_inv))
+
         # Define observed / estimated IMU data proxies for fast access
         obs_imu_data_list: List[np.ndarray] = []
         for nested_imu_key in (nested_imu_quat_key, nested_imu_omega_key):
@@ -96,6 +172,10 @@ def __init__(self,
             imu_rel_rot_mats.append(frame.placement.rotation)
         self._imu_rel_quats = matrices_to_quat(tuple(imu_rel_rot_mats))
 
+        # Allocate twist angle estimate around z-axis in world frame.
+        num_imu_sensors = len(env.robot.sensors[ImuSensor.type])
+        self._twist = np.zeros((1, num_imu_sensors))
+
         # Initialize the observer
         super().__init__(name, env, update_ratio)
 
@@ -131,6 +211,9 @@ def _setup(self) -> None:
         # Call base implementation
         super()._setup()
 
+        # Reset the twist estimate
+        fill(self._twist, 0)
+
         # Fix initialization of the observation to be valid quaternions
         self._quat[-1] = 1.0
 
@@ -157,10 +240,6 @@ def refresh_observation(self, measurement: BaseObs) -> None:
                       out=self._quat,
                       is_right_conjugate=True)
 
-        # Remove twist if requested
-        if self.ignore_twist:
-            remove_twist_from_quat(self._quat)
-
         # Compute the parent body angular velocities in local frame.
         # Note that batched "quaternion apply" is faster than sequential
         # "rotation matrix apply" in practice, both when using `np.einsum` or
@@ -169,6 +248,18 @@ def refresh_observation(self, measurement: BaseObs) -> None:
                    self._obs_imu_omegas,
                    out=self._omega)
 
+        # Remove twist if requested
+        if self._remove_twist:
+            remove_twist_from_quat(self._quat)
+
+        # Update twist if requested
+        if self._update_twist:
+            update_twist(self._quat,
+                         self._twist,
+                         self._omega,
+                         self.twist_time_constant_inv,
+                         self.observe_dt)
+
         # Compute the RPY representation if requested
         if self.compute_rpy:
             quat_to_rpy(self._quat, self._rpy)
diff --git a/python/gym_jiminy/common/gym_jiminy/common/blocks/mahony_filter.py b/python/gym_jiminy/common/gym_jiminy/common/blocks/mahony_filter.py
index 040d8e6058..92434bd4d9 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/blocks/mahony_filter.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/blocks/mahony_filter.py
@@ -3,7 +3,7 @@
 """
 import logging
 from collections import OrderedDict
-from typing import List, Union, Dict, Optional
+from typing import List, Sequence, Union, Dict
 
 import numpy as np
 import numba as nb
@@ -95,56 +95,6 @@ def mahony_filter(q: np.ndarray,
     bias_hat -= ki * dt * omega_mes  # eq. 32b
 
 
-@nb.jit(nopython=True, cache=True)
-def update_twist(q: np.ndarray,
-                 twist: np.ndarray,
-                 omega: np.ndarray,
-                 time_constant_inv: float,
-                 dt: float) -> None:
-    """Update the twist estimate of the Twist-after-Swing decomposition of
-    given orientations in quaternion representation using leaky integrator.
-
-    :param q: Current swing estimate as a quaternion. It will be updated
-              in-place to add the estimated twist.
-    :param twist: Current twist estimate to update in-place.
-    :param omega: Current angular velocity estimate in local frame.
-    :param time_constant_inv: Inverse of the time constant of the leaky
-                              integrator used to update the twist estimate.
-    :param dt: Time step, in seconds, between consecutive Quaternions.
-    """
-    # Compute the derivative of the twist angle:
-    # The element-wise time-derivative of a quaternion is given by:
-    #   dq = 0.5 * q * Quaternion(axis=gyro, w=0.0)        [1]
-    # The twist around a given axis can be computed as follows:
-    #   p = q_axis.dot(twist_axis) * twist_axis            [2]
-    #   twist = pin.Quaternion(np.array((*p, q_w))).normalized()
-    # The twist angle can be inferred from this expression:
-    #   twist = 2 * atan2(q_axis.dot(twist_axis), q_w)     [3]
-    # The derivative of twist angle can be derived:
-    #  dtwist = 2 * (
-    #     (dq_axis * q_w - q_axis * dq_w).dot(twist_axis)  [4]
-    #  ) / (q_axis.dot(twist_axis) ** 2 + q_w ** 2)
-    # One can show that if q_s is the swing part of the orientation, then:
-    #   q_s_axis.dot(twist_axis) = 0
-    # It yields:
-    #  dtwist = 2 * dq_s_axis.dot(twist_axis) / q_s_w      [5]
-    q_x, q_y, _, q_w = q
-    dtwist = (- q_y * omega[0] + q_x * omega[1]) / q_w + omega[2]
-
-    # Update twist angle using Leaky Integrator scheme to avoid long-term drift
-    twist *= max(0.0, 1.0 - time_constant_inv * dt)
-    twist += dtwist * dt
-
-    # Update quaternion to add estimated twist
-    p_z, p_w = np.sin(0.5 * twist), np.cos(0.5 * twist)
-    q[0], q[1], q[2], q[3] = (
-        p_w * q_x - p_z * q_y,
-        p_z * q_x + p_w * q_y,
-        p_z * q_w,
-        p_w * q_w,
-    )
-
-
 class MahonyFilter(BaseObserverBlock[
         Dict[str, np.ndarray], Dict[str, np.ndarray], BaseObs, BaseAct]):
     """Mahony's Nonlinear Complementary Filter on SO(3).
@@ -183,24 +133,17 @@ def __init__(self,
                  name: str,
                  env: InterfaceJiminyEnv[BaseObs, BaseAct],
                  *,
-                 twist_time_constant: Optional[float] = 0.0,
+                 ignore_twist: bool = False,
                  exact_init: bool = True,
-                 kp: Union[np.ndarray, float] = 1.0,
-                 ki: Union[np.ndarray, float] = 0.1,
+                 kp: Union[np.ndarray, Sequence[float], float] = 1.0,
+                 ki: Union[np.ndarray, Sequence[float], float] = 0.1,
                  compute_rpy: bool = False,
                  update_ratio: int = 1) -> None:
         """
         :param name: Name of the block.
         :param env: Environment to connect with.
-        :param twist_time_constant:
-            If specified, it corresponds to the time constant of the leaky
-            integrator (Exponential Moving Average) used to estimate the twist
-            part of twist-after-swing decomposition of the estimated
-            orientation in place of the Mahony Filter. If `0.0`, then its is
-            kept constant equal to zero. `None` to kept the original estimate
-            provided by Mahony Filter. See `remove_twist_from_quat` and
-            `update_twist` documentations for details.
-            Optional: `0.0` by default.
+        :param ignore_twist: Whether to ignore the twist of the IMU quaternion
+                             estimate.
         :param exact_init: Whether to initialize orientation estimate using
                            accelerometer measurements or ground truth. `False`
                            is not recommended because the robot is often
@@ -226,35 +169,22 @@ def __init__(self,
         # Handling of default argument(s)
         num_imu_sensors = len(env.robot.sensors[ImuSensor.type])
         if isinstance(kp, float):
-            kp = np.full((num_imu_sensors,), kp)
+            kp = (kp,) * num_imu_sensors
         if isinstance(ki, float):
-            ki = np.full((num_imu_sensors,), ki)
+            ki = (ki,) * num_imu_sensors
 
         # Backup some of the user arguments
-        self.compute_rpy = compute_rpy
+        self.ignore_twist = ignore_twist
         self.exact_init = exact_init
-        self.kp = kp
-        self.ki = ki
-
-        # Keep track of how the twist must be computed
-        self.twist_time_constant_inv: Optional[float]
-        if twist_time_constant is None:
-            self.twist_time_constant_inv = None
-        else:
-            if twist_time_constant > 0.0:
-                self.twist_time_constant_inv = 1.0 / twist_time_constant
-            else:
-                self.twist_time_constant_inv = float("inf")
-        self._remove_twist = self.twist_time_constant_inv is not None
-        self._update_twist = (
-            self.twist_time_constant_inv is not None and
-            np.isfinite(self.twist_time_constant_inv))
+        self.kp = np.asarray(kp)
+        self.ki = np.asarray(ki)
+        self.compute_rpy = compute_rpy
 
         # Whether the observer has been initialized.
         # This flag must be managed internally because relying on
         # `self.env.is_simulation_running` is not possible for observer blocks.
         # Unlike `compute_command`, the simulation is already running when
-        # `refresh_observation`` is called for the first time of an episode.
+        # `refresh_observation` is called for the first time of an episode.
         self._is_initialized = False
 
         # Whether the observer has been compiled already.
@@ -272,13 +202,8 @@ def __init__(self,
         # Allocate gyroscope bias estimate
         self._bias = np.zeros((3, num_imu_sensors))
 
-        # Allocate twist angle estimate around z-axis in world frame
-        self._twist = np.zeros((1, num_imu_sensors))
-
         # Define the state of the filter
         self._state = {"bias": self._bias}
-        if self._update_twist:
-            self._state["twist"] = self._twist
 
         # Initialize the observer
         super().__init__(name, env, update_ratio)
@@ -294,9 +219,7 @@ def __init__(self,
     def _initialize_state_space(self) -> None:
         """Configure the internal state space of the observer.
 
-        It corresponds to the current gyroscope bias estimate. The twist angle
-        is not part of the internal state although being integrated over time
-        because it is uniquely determined from the orientation estimate.
+        It corresponds to the current gyroscope bias estimate.
         """
         # Strictly speaking, 'q_prev' is part of the internal state of the
         # observer since it is involved in its computations. Yet, it is not an
@@ -309,11 +232,6 @@ def _initialize_state_space(self) -> None:
             low=np.full((3, num_imu_sensors), -np.inf),
             high=np.full((3, num_imu_sensors), np.inf),
             dtype=np.float64)
-        if self._update_twist:
-            state_space["twist"] = gym.spaces.Box(
-                low=np.full((num_imu_sensors,), -np.inf),
-                high=np.full((num_imu_sensors,), np.inf),
-                dtype=np.float64)
         self.state_space = gym.spaces.Dict(state_space)
 
     def _initialize_observation_space(self) -> None:
@@ -350,6 +268,9 @@ def _setup(self) -> None:
         # Call base implementation
         super()._setup()
 
+        # Initialize observation
+        fill(self.observation, 0)
+
         # Fix initialization of the observation to be valid quaternions
         self._quat[-1] = 1.0
 
@@ -361,9 +282,6 @@ def _setup(self) -> None:
         # Reset the sensor bias estimate
         fill(self._bias, 0)
 
-        # Reset the twist estimate
-        fill(self._twist, 0)
-
         # Warn if 'observe_dt' is too large to provide a meaningful
         if self.observe_dt > 0.01 + 1e-6:
             LOGGER.warning(
@@ -373,11 +291,15 @@ def _setup(self) -> None:
 
         # Call `refresh_observation` manually to make sure that all the jitted
         # method of it control flow has been compiled.
+        # Note that setup must be called once again because compilation will
+        # mess up with the internal state of the filter.
         if not self._is_compiled:
             self._is_initialized = False
             for _ in range(2):
                 self.refresh_observation(self.env.observation)
             self._is_compiled = True
+            self._setup()
+            return
 
         # Consider that the observer is not initialized anymore
         self._is_initialized = False
@@ -432,10 +354,6 @@ def refresh_observation(self, measurement: BaseObs) -> None:
                 # Convert the rotation matrices of the IMUs to quaternions
                 matrices_to_quat(tuple(imu_rots), self._quat)
 
-                # Keep track of tilt if necessary
-                if self._update_twist:
-                    self._twist[:] = np.arctan2(self._quat[2], self._quat[3])
-
                 self._is_initialized = True
 
             # Compute the RPY representation if requested
@@ -455,16 +373,9 @@ def refresh_observation(self, measurement: BaseObs) -> None:
                       self.observe_dt)
 
         # Remove twist if requested
-        if self._remove_twist:
+        if self.ignore_twist:
             remove_twist_from_quat(self._quat)
 
-        if self._update_twist:
-            update_twist(self._quat,
-                         self._twist,
-                         self._omega,
-                         self.twist_time_constant_inv,
-                         self.observe_dt)
-
         # Compute the RPY representation if requested
         if self.compute_rpy:
             quat_to_rpy(self._quat, self._rpy)
diff --git a/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py b/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py
index ae2f90e422..106a555e29 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py
@@ -2,7 +2,7 @@
 with gym_jiminy reinforcement learning pipeline environment design.
 """
 import warnings
-from typing import List, Union
+from typing import List, Union, Optional
 
 import numpy as np
 import numba as nb
@@ -101,14 +101,14 @@ def integrate_zoh(state: np.ndarray,
 
 
 @nb.jit(nopython=True, cache=True, fastmath=True)
-def pd_controller(q_measured: np.ndarray,
-                  v_measured: np.ndarray,
+def pd_controller(encoder_data: np.ndarray,
                   command_state: np.ndarray,
                   command_state_lower: np.ndarray,
                   command_state_upper: np.ndarray,
                   kp: np.ndarray,
                   kd: np.ndarray,
                   motors_effort_limit: np.ndarray,
+                  motors_velocity_deadband: Optional[np.ndarray],
                   control_dt: float,
                   out: np.ndarray) -> None:
     """Compute command torques under discrete-time proportional-derivative
@@ -133,8 +133,7 @@ def pd_controller(q_measured: np.ndarray,
         See `PDController` documentation to get more information, and
         `integrate_zoh` documentation for details about the state integration.
 
-    :param q_measured: Current position of the actuators.
-    :param v_measured: Current velocity of the actuators.
+    :param encoder_data: Current position and velocity of the actuators.
     :param command_state: Current command state, namely, all the derivatives of
                           the target motors positions up to acceleration order.
     :param command_state_lower: Lower bound of the command state that must be
@@ -144,6 +143,8 @@ def pd_controller(q_measured: np.ndarray,
     :param kp: PD controller position-proportional gain in motor order.
     :param kd: PD controller velocity-proportional gain in motor order.
     :param motors_effort_limit: Maximum effort that the actuators can output.
+    :param velocity_deadband: Target velocity deadband for which the target
+                              motor velocity will be cancelled out completely.
     :param control_dt: Controller update period. It will be involved in the
                        integration of the command state.
     :param out: Pre-allocated memory to store the command motor torques.
@@ -154,11 +155,13 @@ def pd_controller(q_measured: np.ndarray,
                   command_state_upper,
                   control_dt)
 
-    # Extract targets motors positions and velocities from command state
-    q_target, v_target, _ = command_state
+    # Velocity/Acceleration dead-band to avoid slow drift of target position
+    if motors_velocity_deadband is not None:
+        is_zero_velocity = np.abs(command_state[1]) < motors_velocity_deadband
+        command_state[1, is_zero_velocity] = 0.0
 
     # Compute the joint tracking error
-    q_error, v_error = q_target - q_measured, v_target - v_measured
+    q_error, v_error = command_state[:2] - encoder_data
 
     # Compute PD command
     out[:] = kp * (q_error + kd * v_error)
@@ -174,8 +177,8 @@ def pd_adapter(action: np.ndarray,
                command_state: np.ndarray,
                command_state_lower: np.ndarray,
                command_state_upper: np.ndarray,
-               dt: float,
                is_instantaneous: bool,
+               step_dt: float,
                out: np.ndarray) -> None:
     """Compute the target motor accelerations that must be held constant for a
     given time interval in order to reach the desired value of some derivative
@@ -198,41 +201,45 @@ def pd_adapter(action: np.ndarray,
                                 satisfied at all cost.
     :param command_state_upper: Upper bound of the command state that must be
                                 satisfied at all cost.
-    :param dt: Time interval during which the target motor accelerations will
-               be held constant.
+    :param step_dt: Time interval during which the target motor accelerations
+                    will be held constant.
     :param out: Pre-allocated memory to store the target motor accelerations.
     """
+    # Early return if timestep is too small
+    if abs(step_dt) < 1e-9:
+        return
+
     # Update command accelerations based on the action and its derivative order
     if is_instantaneous:
         # Update the command state directly
         if order == 0:
             # Compute command velocity
-            velocity = (action - command_state[0]) / dt
+            velocity = (action - command_state[0]) / step_dt
 
             # Clip command velocity
             velocity = np.minimum(np.maximum(
                 velocity, command_state_lower[1]), command_state_upper[1])
 
             # Update command position instantaneously
-            command_state[0] = action
+            command_state[0] += velocity * step_dt
             command_state[1] = 0.0
         else:
             # Compute command acceleration
-            acceleration = (action - command_state[1]) / dt
+            acceleration = (action - command_state[1]) / step_dt
 
             # Clip command acceleration
             acceleration = np.minimum(np.maximum(
                 acceleration, command_state_lower[2]), command_state_upper[2])
 
             # Update command velocity instantaneously
-            command_state[1] += acceleration * dt
+            command_state[1] += acceleration * step_dt
 
         # Hold the acceleration constant equal to zero
         out[:] = 0.0
     else:
         if order == 0:
             # Compute command velocity
-            velocity = (action - command_state[0]) / dt
+            velocity = (action - command_state[0]) / step_dt
         else:
             # The action corresponds to the command motor velocities
             velocity = action
@@ -242,7 +249,7 @@ def pd_adapter(action: np.ndarray,
             velocity, command_state_lower[1]), command_state_upper[1])
 
         # Compute command acceleration
-        out[:] = (velocity - command_state[1]) / dt
+        out[:] = (velocity - command_state[1]) / step_dt
 
 
 def get_encoder_to_motor_map(robot: jiminy.Robot) -> Union[slice, List[int]]:
@@ -315,7 +322,8 @@ def __init__(self,
                  kd: Union[float, List[float], np.ndarray],
                  joint_position_margin: float = 0.0,
                  joint_velocity_limit: float = float("inf"),
-                 joint_acceleration_limit: float = float("inf")) -> None:
+                 joint_velocity_deadband: float = 0.0,
+                 joint_acceleration_limit: Optional[float] = None) -> None:
         """
         :param name: Name of the block.
         :param env: Environment to connect with.
@@ -328,11 +336,24 @@ def __init__(self,
         :param joint_position_margin: Minimum distance of the joint target
                                       positions from their respective bounds.
                                       Optional: 0.0 by default.
-        :param joint_velocity_limit: Restrict maximum joint target velocities
-                                     wrt their hardware specifications.
-                                     Optional: "inf" by default.
-        :param joint_acceleration_limit: Maximum joint target acceleration.
-                                         Optional: "inf" by default.
+        :param joint_velocity_limit:
+            Further restrict maximum joint target velocities wrt their
+            hardware specifications.
+            Optional: 'inf' by default.
+        :param joint_velocity_deadband:
+            Target velocity deadband to avoid high-frequency vibrations and
+            drifting of the target motor positions that would result internal
+            efforts in the mechanical structure for hyperstatic postures.
+            Note that it is only enabled during evaluation, not training.
+            Optional: 0.0 by default.
+        :param joint_acceleration_limit:
+            Maximum joint target acceleration. `None` to infer acceleration
+            bounds (from prescribed PD gains plus maximum motor velocities and
+            efforts) that would be as restrictive as possible while allowing
+            bang-band control (i.e. unrestricted jump in target velocity or
+            command torque in a single environment timestep, depending on which
+            of these two criteria is the most limiting).
+            Optional: None by default.
         """
         # Make sure the action space of the environment has not been altered
         if env.action_space is not env.unwrapped.action_space:
@@ -391,16 +412,20 @@ def __init__(self,
         motors_velocity_limit = np.array([
             min(motor.velocity_limit, ratio * joint_velocity_limit)
             for motor, ratio in zip(env.robot.motors, encoder_to_joint_ratio)])
+        self._motors_velocity_deadband = np.array([
+            ratio * joint_velocity_deadband
+            for motor, ratio in zip(env.robot.motors, encoder_to_joint_ratio)])
 
         # Define acceleration bounds allowing unrestricted bang-bang control
-        range_limit = 2 * motors_velocity_limit / env.step_dt
-        effort_limit = self.motors_effort_limit / (
-            self.kp * env.step_dt * np.maximum(env.step_dt / 2, self.kd))
-        target_acceleration_limit = np.array([
-            ratio * joint_acceleration_limit
-            for ratio in encoder_to_joint_ratio])
-        acceleration_limit = np.minimum(
-            np.minimum(range_limit, effort_limit), target_acceleration_limit)
+        if joint_acceleration_limit is None:
+            range_limit = 2 * motors_velocity_limit / env.step_dt
+            effort_limit = self.motors_effort_limit / (
+                self.kp * env.step_dt * np.maximum(env.step_dt, self.kd))
+            acceleration_limit = np.minimum(range_limit, effort_limit)
+        else:
+            acceleration_limit = np.array([
+                ratio * joint_acceleration_limit
+                for ratio in encoder_to_joint_ratio])
 
         # Compute command state bounds
         self._command_state_lower = np.stack([motors_position_lower,
@@ -411,8 +436,7 @@ def __init__(self,
                                               acceleration_limit], axis=0)
 
         # Extract measured motor positions and velocities for fast access
-        self.q_measured, self.v_measured = (
-            env.measurement["measurements"][EncoderSensor.type])
+        self.encoder_data = env.measurement["measurements"][EncoderSensor.type]
 
         # Allocate memory for the command state
         self._command_state = np.zeros((3, env.robot.nmotors))
@@ -480,23 +504,21 @@ def compute_command(self, action: np.ndarray, command: np.ndarray) -> None:
         :param action: Desired target motor acceleration.
         :param command: Current motor torques that will be updated in-place.
         """
+        # Extract motor positions and velocity from encoder data
+        encoder_data = self.encoder_data
+        if not self._is_same_order:
+            encoder_data = encoder_data[:, self.encoder_to_motor_map]
+
         # Re-initialize the command state to the current motor state if the
         # simulation is not running. This must be done here because the
         # command state must be valid prior to calling `refresh_observation`
         # for the first time, which happens at `reset`.
         is_simulation_running = self.env.is_simulation_running
         if not is_simulation_running:
-            for i, value in enumerate((self.q_measured, self.v_measured)):
-                np.clip(value,
-                        self._command_state_lower[i],
-                        self._command_state_upper[i],
-                        out=self._command_state[i])
-
-        # Extract motor positions and velocity from encoder data
-        q_measured, v_measured = self.q_measured, self.v_measured
-        if not self._is_same_order:
-            q_measured = q_measured[self.encoder_to_motor_map]
-            v_measured = v_measured[self.encoder_to_motor_map]
+            np.clip(encoder_data,
+                    self._command_state_lower[:2],
+                    self._command_state_upper[:2],
+                    out=self._command_state[:2])
 
         # Update target motor accelerations
         array_copyto(self._command_acceleration, action)
@@ -504,14 +526,14 @@ def compute_command(self, action: np.ndarray, command: np.ndarray) -> None:
         # Compute the motor efforts using PD control.
         # The command state must not be updated if no simulation is running.
         pd_controller(
-            q_measured,
-            v_measured,
+            encoder_data,
             self._command_state,
             self._command_state_lower,
             self._command_state_upper,
             self.kp,
             self.kd,
             self.motors_effort_limit,
+            None if self.env.is_training else self._motors_velocity_deadband,
             self.control_dt if is_simulation_running else 0.0,
             command)
 
@@ -611,6 +633,6 @@ def compute_command(self, action: np.ndarray, command: np.ndarray) -> None:
             self._pd_controller._command_state,
             self._pd_controller._command_state_lower,
             self._pd_controller._command_state_upper,
-            self.control_dt,
             self.is_instantaneous,
+            self.control_dt if self.env.is_simulation_running else 0.0,
             command)
diff --git a/python/gym_jiminy/common/gym_jiminy/common/envs/generic.py b/python/gym_jiminy/common/gym_jiminy/common/envs/generic.py
index 405cff35d1..a4dd1a9b18 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/envs/generic.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/envs/generic.py
@@ -58,9 +58,12 @@
 
 
 # Maximum realtime slowdown of simulation steps before triggering timeout error
-TIMEOUT_RATIO = 15
+TIMEOUT_RATIO = 25
 
-# Absolute tolerance when checking that observations are valid
+# Absolute tolerance when checking that observations are valid.
+# Note that the joint positions are out-of-bounds when hitting the mechanical
+# stops. Because of this, some tolerance must be added to avoid trigeering
+# termination too easily.
 OBS_CONTAINS_TOL = 0.01
 
 
@@ -246,10 +249,6 @@ def __init__(self,
         # Initialize the seed of the environment
         self._initialize_seed()
 
-        # Initialize the observation and action buffers
-        self.observation: Obs = zeros(self.observation_space)
-        self.action: Act = zeros(self.action_space)
-
         # Check that the action and observation spaces are consistent with
         # 'compute_command' and 'refresh_observation' respectively.
         cls = type(self)
@@ -270,9 +269,14 @@ def __init__(self,
                 "`BaseJiminyEnv.refresh_observation` must be overloaded when "
                 "defining a custom observation space.")
 
-        # Bind the observation to the engine measurements by default
+        # Initialize the observation and action buffers if necessary
         if not is_observation_space_custom:
+            # Bind the observation to the engine measurements by default
             self.observation = cast(Obs, self.measurement)
+        else:
+            self.observation: Obs = zeros(self.observation_space)
+        if not hasattr(self, "action"):
+            self.action: Act = zeros(self.action_space)
 
         # Define specialized operators for efficiency.
         # Note that a partial view of observation corresponding to measurement
@@ -402,8 +406,10 @@ def _initialize_seed(self, seed: Optional[int] = None) -> None:
         self.np_random.bit_generator.state = np.random.SFC64(np_seed).state
 
         # Reset the seed of the action and observation spaces
-        self.observation_space.seed(seed)
-        self.action_space.seed(seed)
+        obs_seed, act_seed = map(int, self.np_random.integers(
+            np.iinfo(int).max, size=(2,), dtype=int))
+        self.observation_space.seed(obs_seed)
+        self.action_space.seed(act_seed)
 
         # Reset the seed of Jiminy Engine
         self.simulator.seed(engine_seed)
diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py
index 7ee9af2e11..9190ad0212 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py
@@ -1077,7 +1077,7 @@ class MultiFrameCollisionDetection(InterfaceQuantity[bool]):
 
     .. note::
         Jiminy enforces all collision geometries to be either primitive shapes
-        or convex polyhedra for efficiency. In practice, tf meshes where
+        or convex polyhedra for efficiency. In practice, if meshes has been
         specified in the original URDF file, then they will be converted into
         their respective convex hull.
     """
diff --git a/python/gym_jiminy/common/gym_jiminy/common/utils/misc.py b/python/gym_jiminy/common/gym_jiminy/common/utils/misc.py
index 585c6d583a..d6a9a551dd 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/utils/misc.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/utils/misc.py
@@ -167,7 +167,7 @@ def sample(low: Union[float, np.ndarray] = -1.0,
                  Optional: 'uniform' by default.
     :param scale: Shrink the standard deviation of the distribution around the
                   mean by this factor.
-                  Optional: No scaling by default?
+                  Optional: No scaling by default
     :param enable_log_scale: The sampled values are power of 10.
     :param shape: Enforce of the sampling shape. Only available if 'low',
                   'high' and 'scale' are floats. `None` to disable.
diff --git a/python/gym_jiminy/common/gym_jiminy/common/utils/pipeline.py b/python/gym_jiminy/common/gym_jiminy/common/utils/pipeline.py
index 5b36dede0e..7bae689a33 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/utils/pipeline.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/utils/pipeline.py
@@ -26,7 +26,7 @@
 import pinocchio as pin
 from jiminy_py.dynamics import State, Trajectory
 
-from ..quantities import EnergyGenerationMode
+from ..quantities import EnergyGenerationMode, OrientationType
 from ..bases import (QuantityEvalMode,
                      InterfaceJiminyEnv,
                      InterfaceBlock,
@@ -46,6 +46,7 @@
 
 ENUM_TYPES = (EnergyGenerationMode,
               QuantityEvalMode,
+              OrientationType,
               pin.KinematicLevel)
 ENUM_NAME_TO_MODULE_MAP = {enum_type.__name__: enum_type.__module__.split(".")
                            for enum_type in ENUM_TYPES}
@@ -247,15 +248,17 @@ def build_pipeline(env_config: EnvConfig,
                       Optional: `None` by default.
     """
     # Define helper to replace enums string by its corresponding object value
-    def sanitize_enum_string(kwargs: Dict[str, Any]) -> None:
-        """Replace in-place enum string representation with their object
-        counterpart.
+    def sanitize_special_string(kwargs: Dict[str, Any]) -> None:
+        """Replace in-place some special strings with their object counterpart.
+
+        This method deals with enums, None ("none") and special floats ("nan",
+        "+/-inf").
 
         :param kwargs: Nested dictionary of options.
         """
         for key, value in kwargs.items():
             if isinstance(value, dict):
-                sanitize_enum_string(value)
+                sanitize_special_string(value)
                 continue
 
             if not isinstance(value, str):
@@ -264,6 +267,9 @@ def sanitize_enum_string(kwargs: Dict[str, Any]) -> None:
             if value == "none":
                 kwargs[key] = None
                 continue
+            if value == "nan" or value.endswith("inf"):
+                kwargs[key] = float(value)
+                continue
 
             value_path = value.split(".")
             enum_type = value_path[-2] if len(value_path) > 1 else None
@@ -297,8 +303,8 @@ def sanitize_composition_config(composition_config: CompositionConfig,
         # Get its constructor keyword-arguments
         kwargs = composition_config.get("kwargs", {})
 
-        # Special treatment for "none" and enum string
-        sanitize_enum_string(kwargs)
+        # Special treatment for "none", "nan", "+/-inf" and enum string
+        sanitize_special_string(kwargs)
 
         # Special handling for `MixtureReward`
         if is_reward and issubclass(cls, MixtureReward):
@@ -341,8 +347,8 @@ def build_composition(
         # Get its constructor keyword-arguments
         kwargs = composition_config.get("kwargs", {}).copy()
 
-        # Special treatment for "none" and enum string
-        sanitize_enum_string(kwargs)
+        # Special treatment for "none", "nan", "+/-inf" and enum string
+        sanitize_special_string(kwargs)
 
         # Special handling for `MixtureReward`
         if is_reward and issubclass(cls, MixtureReward):
@@ -560,9 +566,9 @@ def build_controller_observer_layer(
         block_kwargs = block_config.get("kwargs", {})
         wrapper_kwargs = wrapper_config.get("kwargs", {})
 
-        # Special treatment for "none" and enum string
+        # Special treatment for "none", "nan", "+/-inf" and enum string
         for kwargs in (block_kwargs, wrapper_kwargs):
-            sanitize_enum_string(kwargs)
+            sanitize_special_string(kwargs)
 
         # Special treatment for "quantity" arg of `QuantityObserver` blocks
         if block_cls_ is not None and issubclass(block_cls_, QuantityObserver):
diff --git a/python/gym_jiminy/common/gym_jiminy/common/wrappers/observation_layout.py b/python/gym_jiminy/common/gym_jiminy/common/wrappers/observation_layout.py
index 97f174e407..dd55485f6c 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/wrappers/observation_layout.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/wrappers/observation_layout.py
@@ -145,7 +145,10 @@ def _adapt_layout(
             nested_key_in = cast(NestedKey, nested_spec_in)
 
         # Extract the input chunk recursively
-        value_in = reduce(getitem, nested_key_in, data)
+        try:
+            value_in = reduce(getitem, nested_key_in, data)
+        except KeyError as e:
+            raise KeyError(f"Nested key '{nested_key_in}' is missing.") from e
         if block_spec_in is not None:
             # Convert array block specification to slices
             slices = []
diff --git a/python/gym_jiminy/examples/tutorial_rl.ipynb b/python/gym_jiminy/examples/tutorial_rl.ipynb
index 6dad16b4ba..ed00e458bd 100644
--- a/python/gym_jiminy/examples/tutorial_rl.ipynb
+++ b/python/gym_jiminy/examples/tutorial_rl.ipynb
@@ -81,8 +81,8 @@
         "\n",
         "        options = robot.get_model_options()\n",
         "        options[\"joints\"][\"positionLimitFromUrdf\"] = False\n",
-        "        options[\"joints\"][\"positionLimitMin\"] = np.full((2,), -1e5)\n",
-        "        options[\"joints\"][\"positionLimitMax\"] = np.full((2,), +1e5)\n",
+        "        options[\"joints\"][\"positionLimitLower\"] = np.full((2,), -1e5)\n",
+        "        options[\"joints\"][\"positionLimitUpper\"] = np.full((2,), +1e5)\n",
         "        robot.set_model_options(options)\n",
         "\n",
         "        # motor\n",
diff --git a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/qhull.py b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/qhull.py
index 6572e4df6d..78fbd1a71b 100644
--- a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/qhull.py
+++ b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/qhull.py
@@ -431,23 +431,29 @@ def get_distance_to_point(self, points: np.ndarray) -> np.ndarray:
                        a single query point.
         """
         # Make sure that the input is at least 2D
-        if points.ndim < 2:
-            points = np.atleast_2d(points)
+        assert points.ndim in (1, 2) and points.shape[-1] == 2
+        points2d = np.atleast_2d(points)
 
         # Compute the signed distance between query points and convex hull
         if self.npoints > 2:
-            return compute_distance_convex_to_point(
-                self.vertices, self.vectors, points)
+            dist2d = compute_distance_convex_to_point(
+                self.vertices, self.vectors, points2d)
 
         # Compute the distance between query points and segment
-        if self.npoints == 2:
+        elif self.npoints == 2:
             vec = self.vertices[1] - self.vertices[0]
-            ratio = (points - self.vertices[0]) @ vec / np.dot(vec, vec)
+            ratio = (points2d - self.vertices[0]) @ vec / np.dot(vec, vec)
             proj = self.vertices[0] + np.outer(np.clip(ratio, 0.0, 1.0), vec)
-            return np.linalg.norm(points - proj, axis=1)
+            dist2d = np.linalg.norm(points2d - proj, axis=1)
 
         # Compute the distance between query points and point
-        return np.linalg.norm(points - self.vertices, axis=1)
+        else:
+            dist2d = np.linalg.norm(points2d - self.vertices, axis=1)
+
+        # Ravel extra dimension before returning if not present initially
+        if points.ndim < 2:
+            return dist2d[0]
+        return dist2d
 
     def get_distance_to_ray(self,
                             vector: np.ndarray,
diff --git a/python/gym_jiminy/unit_py/test_deformation_estimator.py b/python/gym_jiminy/unit_py/test_deformation_estimator.py
index f649e9c428..afa0c4f3a4 100644
--- a/python/gym_jiminy/unit_py/test_deformation_estimator.py
+++ b/python/gym_jiminy/unit_py/test_deformation_estimator.py
@@ -131,7 +131,7 @@ def test_arm(self):
             env,
             kp=0.0,
             ki=0.0,
-            twist_time_constant=None,
+            ignore_twist=False,
             exact_init=True,
             update_ratio=1)
         env = ObservedJiminyEnv(env, mahony_filter)
@@ -253,7 +253,7 @@ def _setup(self) -> None:
                         kwargs=dict(
                             kp=0.0,
                             ki=0.0,
-                            twist_time_constant=None,
+                            ignore_twist=False,
                             exact_init=True,
                             update_ratio=1,
                         )
@@ -281,4 +281,4 @@ def _setup(self) -> None:
             env.step(env.action)
 
         # Check that deformation estimates matches ground truth
-        self._test_deformation_estimate(env, imu_atol=1e-4, flex_atol=5e-3)
+        self._test_deformation_estimate(env, imu_atol=1e-4, flex_atol=1e-2)
diff --git a/python/gym_jiminy/unit_py/test_pipeline_control.py b/python/gym_jiminy/unit_py/test_pipeline_control.py
index 0f466aae1e..ad0eb83475 100644
--- a/python/gym_jiminy/unit_py/test_pipeline_control.py
+++ b/python/gym_jiminy/unit_py/test_pipeline_control.py
@@ -17,9 +17,9 @@
 from jiminy_py.core import ImuSensor
 from jiminy_py.viewer import Viewer
 
-from gym_jiminy.envs import (
-    AtlasPDControlJiminyEnv, CassiePDControlJiminyEnv, DigitPDControlJiminyEnv)
-from gym_jiminy.common.blocks import PDController, PDAdapter, MahonyFilter
+from gym_jiminy.common.bases import ObservedJiminyEnv
+from gym_jiminy.common.blocks import (
+    PDController, PDAdapter, MahonyFilter, BodyObserver)
 from gym_jiminy.common.blocks.proportional_derivative_controller import (
     integrate_zoh)
 from gym_jiminy.common.wrappers import (
@@ -27,6 +27,8 @@
     FlattenObservation)
 from gym_jiminy.common.utils import (
     quat_to_rpy, matrix_to_rpy, matrix_to_quat, remove_twist_from_quat)
+from gym_jiminy.envs import (
+    AtlasPDControlJiminyEnv, CassiePDControlJiminyEnv, DigitPDControlJiminyEnv)
 
 
 IMAGE_DIFF_THRESHOLD = 5.0
@@ -128,14 +130,25 @@ def test_pid_standing(self):
                 self._test_pid_standing()
                 Viewer.close()
 
-    def test_mahony_filter(self):
+    def test_mahony_filter_plus_body_observer(self):
         """Check consistency between IMU orientation estimation provided by
         Mahony filter and ground truth for moderately slow motions.
         """
         # Instantiate and reset the environment
         env = AtlasPDControlJiminyEnv()
-        observer = env.observer
-        assert isinstance(observer, MahonyFilter)
+
+        # Overwrite Mahony filter to specify custom constructor arguments
+        assert isinstance(env, ObservedJiminyEnv)
+        assert isinstance(env.observer, MahonyFilter)
+        mahony_filter = MahonyFilter(
+            "mahony_filter",
+            env.env,
+            kp=0.0,
+            ki=0.0,
+            ignore_twist=False,
+            exact_init=True,
+            compute_rpy=False)
+        env = ObservedJiminyEnv(env.env, mahony_filter)
 
         # Define a constant action that move the upper-body in all directions
         robot = env.robot
@@ -150,23 +163,21 @@ def test_mahony_filter(self):
 
         # Check that the estimate IMU orientation is accurate over the episode
         for twist_time_constant in (None, float("inf"), 0.0):
-            # Reinitialize the observer
-            env.observer = observer = MahonyFilter(
-                observer.name,
-                observer.env,
-                kp=0.0,
-                ki=0.0,
+            # Add body observer
+            observer = BodyObserver(
+                "body_observer",
+                env,
                 twist_time_constant=twist_time_constant,
-                exact_init=True,
                 compute_rpy=True)
+            env_derived = ObservedJiminyEnv(env, observer)
 
             # Reset the environment
-            env.reset(seed=0)
+            env_derived.reset(seed=0)
             rpy_est = observer.observation["rpy"][:, 0]
 
             # Run of few simulation steps
             for i in range(200):
-                env.step(action * (1 - 2 * ((i // 50) % 2)))
+                env_derived.step(action * (1 - 2 * ((i // 50) % 2)))
 
                 if twist_time_constant == 0.0:
                     # The twist must be ignored as it is not observable
diff --git a/python/gym_jiminy/unit_py/test_training_toys_models.py b/python/gym_jiminy/unit_py/test_training_toys_models.py
index f2a49c3d60..78d1fa5c41 100644
--- a/python/gym_jiminy/unit_py/test_training_toys_models.py
+++ b/python/gym_jiminy/unit_py/test_training_toys_models.py
@@ -16,7 +16,7 @@
 
 
 # Fix the seed and number of threads for CI stability
-SEED = 0
+SEED = 42
 N_THREADS = 5
 
 # Skip some tests if Jiminy has been compiled in debug
diff --git a/python/jiminy_py/unit_py/test_dense_pole.py b/python/jiminy_py/unit_py/test_dense_pole.py
index e16273169d..d390394a65 100644
--- a/python/jiminy_py/unit_py/test_dense_pole.py
+++ b/python/jiminy_py/unit_py/test_dense_pole.py
@@ -38,8 +38,8 @@ def setUp(self):
 
         # Configure joint bounds
         model_options = self.robot.get_model_options()
-        model_options['joints']['positionLimitMin'] = [-self.joint_limit]
-        model_options['joints']['positionLimitMax'] = [self.joint_limit]
+        model_options['joints']['positionLimitLower'] = [-self.joint_limit]
+        model_options['joints']['positionLimitUpper'] = [self.joint_limit]
         model_options['joints']['positionLimitFromUrdf'] = False
         self.robot.set_model_options(model_options)