[gym/common] Allows PD control with position action. Remove PD target…

… dead band.
duburcqa · Apr 2, 2024 · 1e5dc62 · 1e5dc62
1 parent 8a4e450
commit 1e5dc62
Show file tree

Hide file tree

Showing 3 changed files with 61 additions and 37 deletions.
diff --git a/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py b/python/gym_jiminy/common/gym_jiminy/common/blocks/proportional_derivative_controller.py
@@ -19,9 +19,6 @@
 # Name of the n-th position derivative
 N_ORDER_DERIVATIVE_NAMES = ("Position", "Velocity", "Acceleration")
 
-# Command velocity deadband to reduce vibrations and internal efforts
-EVAL_DEADBAND = 5.0e-3
-
 
 @nb.jit(nopython=True, cache=True, inline='always', fastmath=True)
 def integrate_zoh(state: np.ndarray,
@@ -65,6 +62,8 @@ def integrate_zoh(state: np.ndarray,
 @nb.jit(nopython=True, cache=True, fastmath=True)
 def pd_controller(q_measured: np.ndarray,
                   v_measured: np.ndarray,
+                  action: np.ndarray,
+                  order: int,
                   command_state: np.ndarray,
                   command_state_lower: np.ndarray,
                   command_state_upper: np.ndarray,
@@ -97,6 +96,9 @@ def pd_controller(q_measured: np.ndarray,
 
     :param q_measured: Current position of the actuators.
     :param v_measured: Current velocity of the actuators.
+    :param action: Desired value of the n-th derivative of the command motor
+                   positions at the end of the controller update.
+    :param order: Derivative order of the position associated with the action.
     :param command_state: Current command state, namely, all the derivatives of
                           the target motors positions up to order N. The order
                           must be larger than 1 but can be arbitrarily large.
@@ -114,20 +116,38 @@ def pd_controller(q_measured: np.ndarray,
     :param horizon: Horizon length to start slowing down before hitting bounds.
     :param out: Pre-allocated memory to store the command motor torques.
     """
-    # Compute slowdown horizon.
-    # It must be as short as possible to avoid altering the user-specified
-    # command if not strictly necessary, but long enough to avoid violation of
-    # acceleration bounds when hitting bounds.
-    horizon = np.ceil(
-        command_state_upper[1] / (command_state_upper[2] * control_dt))
+    # Update command accelerations based on the action and its derivative order
+    if order == 2:
+        # The action corresponds to the command motor accelerations
+        acceleration = action
+    else:
+        if order == 0:
+            # Compute command velocity
+            velocity = (action - command_state[0]) / control_dt
+
+            # Clip command velocity
+            velocity = np.minimum(np.maximum(
+                velocity, command_state_lower[1]), command_state_upper[1])
+        else:
+            # The action corresponds to the command motor velocities
+            velocity = action
+
+        # Compute command acceleration
+        acceleration = (velocity - command_state[1]) / control_dt
 
     # Clip command acceleration
-    np.minimum(np.maximum(
-        command_state[2], command_state_lower[2]), command_state_upper[2])
+    command_state[2] = np.minimum(np.maximum(
+        acceleration, command_state_lower[2]), command_state_upper[2])
 
     # Integrate command velocity
     command_state[1] += command_state[2] * control_dt
 
+    # Compute slowdown horizon.
+    # It must be as short as possible to avoid altering the user-specified
+    # command if not strictly necessary, but long enough to avoid violation of
+    # acceleration bounds when hitting bounds.
+    horizon = command_state_upper[1] / (command_state_upper[2] * control_dt)
+
     # Integrate command position, clipping velocity to satisfy state bounds
     integrate_zoh(command_state,
                   command_state_lower,
@@ -238,8 +258,9 @@ def __init__(self,
         :param update_ratio: Ratio between the update period of the controller
                              and the one of the subsequent controller. -1 to
                              match the simulation timestep of the environment.
-        :param order: Derivative order of the action. It accepts 1 (velocity)
-                      or 2 (acceleration). Optional: 1 by default.
+        :param order: Derivative order of the action. It accepts position,
+                      velocity or acceleration (respectively 0, 1 and 2).
+                      Optional: 1 by default.
         :param kp: PD controller position-proportional gains in motor order.
         :param kd: PD controller velocity-proportional gains in motor order.
         :param target_position_margin: Minimum distance of the motor target
@@ -254,7 +275,7 @@ def __init__(self,
             Optional: "inf" by default.
         """
         # Make sure that the specified derivative order is valid
-        assert (0 < order < 3), "Derivative order of command out-of-bounds"
+        assert (0 <= order < 3), "Derivative order of command out-of-bounds"
 
         # Make sure the action space of the environment has not been altered
         if env.action_space is not env.unwrapped.action_space:
@@ -340,8 +361,10 @@ def __init__(self,
         # Initialize the controller
         super().__init__(name, env, update_ratio)
 
-        # Reference to command acceleration for fast access
-        self._command_accel = self._command_state[2]
+        # References to command position, velocity and acceleration
+        (self._command_position,
+         self._command_velocity,
+         self._command_accel) = self._command_state
 
         # Command motor torques buffer for efficiency
         self._u_command = np.array([])
@@ -419,16 +442,6 @@ def compute_command(self, action: np.ndarray) -> np.ndarray:
                         self._command_state_upper[i],
                         out=self._command_state[i])
 
-        # Update the target motor accelerations based on the provided action
-        self._command_accel[:] = (
-            (action - self._command_state[1]) / self.control_dt
-            if self.order == 1 else action)
-
-        # Dead band to avoid slow drift of target at rest for evaluation only
-        if not self.env.is_training:
-            self._command_accel[
-                np.abs(self._command_accel) > EVAL_DEADBAND] = 0.0
-
         # Extract motor positions and velocity from encoder data
         q_measured, v_measured = self.q_measured, self.v_measured
         if not self._is_same_order:
@@ -440,6 +453,8 @@ def compute_command(self, action: np.ndarray) -> np.ndarray:
         return pd_controller(
             q_measured,
             v_measured,
+            action,
+            self.order,
             self._command_state,
             self._command_state_lower,
             self._command_state_upper,

diff --git a/python/gym_jiminy/unit_py/test_pipeline_control.py b/python/gym_jiminy/unit_py/test_pipeline_control.py
@@ -183,14 +183,22 @@ def test_pid_controller(self):
         """
         # Instantiate the environment and run a simulation with random action
         env = AtlasPDControlJiminyEnv()
+
+        # Make sure that a PD controller is plugged to the robot
+        controller = env.controller
+        assert isinstance(controller, PDController) and controller.order == 1
+
+        # Disable acceleration limits of PD controller
+        controller._command_state_lower[2] = float("-inf")
+        controller._command_state_upper[2] = float("inf")
+
+        # Run a few environment steps
         env.reset(seed=0)
         env.unwrapped._height_neutral = float("-inf")
         while env.stepper_state.t < 2.0:
             env.step(0.2 * env.action_space.sample())
 
         # Extract the target position and velocity of a single motor
-        controller = env.env.controller
-        assert isinstance(controller, PDController) and controller.order == 1
         ctrl_name = controller.name
         n_motors = len(controller.fieldnames)
         pos = env.log_data["variables"][".".join((
@@ -202,12 +210,13 @@ def test_pid_controller(self):
         np.testing.assert_allclose(
             np.diff(pos) / controller.control_dt, vel[1:], atol=TOLERANCE)
 
-        # Make sure that the position targets are within bounds.
-        # No such guarantee exists for higher-order derivatives.
+        # Make sure that the position and velocity targets are within bounds
         robot = env.robot
         pos_min = robot.position_limit_lower[robot.motor_position_indices[-1]]
         pos_max = robot.position_limit_upper[robot.motor_position_indices[-1]]
-        self.assertTrue(np.all(np.logical_and(pos_min < pos, pos < pos_max)))
+        vel_limit = robot.velocity_limit[robot.motor_velocity_indices[-1]]
+        self.assertTrue(np.all(np.logical_and(pos_min <= pos, pos <= pos_max)))
+        self.assertTrue(np.all(np.abs(vel) <= vel_limit))
 
     def test_repeatability(self):
         # Instantiate the environment

diff --git a/python/jiminy_pywrap/include/jiminy/python/utilities.h b/python/jiminy_pywrap/include/jiminy/python/utilities.h
@@ -570,7 +570,8 @@ namespace jiminy::python
                         "' was expected.");
         }
 
-        // Check array number of dimensions
+        // Pick the right mapping depending on dimensionality and memory layout
+        int32_t flags = PyArray_FLAGS(arrayPy);
         T * dataPtr = static_cast<T *>(PyArray_DATA(arrayPy));
         switch (PyArray_NDIM(arrayPy))
         {
@@ -584,7 +585,6 @@ namespace jiminy::python
             THROW_ERROR(std::invalid_argument, "Numpy array must be contiguous.");
         case 2:
         {
-            int32_t flags = PyArray_FLAGS(arrayPy);
             npy_intp * arrayPyShape = PyArray_SHAPE(arrayPy);
             if (flags & NPY_ARRAY_C_CONTIGUOUS)
             {
@@ -1160,8 +1160,8 @@ namespace jiminy::python
     };
 
     template<typename R, typename... Args>
-    ConvertGeneratorFromPythonAndInvoke(R (*)(Args...))
-        -> ConvertGeneratorFromPythonAndInvoke<R(Args...), void>;
+    ConvertGeneratorFromPythonAndInvoke(
+        R (*)(Args...)) -> ConvertGeneratorFromPythonAndInvoke<R(Args...), void>;
 
     template<typename T, typename R, typename Generator, typename... Args>
     class ConvertGeneratorFromPythonAndInvoke<R(Generator, Args...), T>
@@ -1187,8 +1187,8 @@ namespace jiminy::python
     };
 
     template<typename T, typename R, typename... Args>
-    ConvertGeneratorFromPythonAndInvoke(R (T::*)(Args...))
-        -> ConvertGeneratorFromPythonAndInvoke<R(Args...), T>;
+    ConvertGeneratorFromPythonAndInvoke(
+        R (T::*)(Args...)) -> ConvertGeneratorFromPythonAndInvoke<R(Args...), T>;
 
     // **************************** Automatic From Python converter **************************** //