Source code for gym_jiminy.common.blocks.proportional_derivative_controller

"""Implementation of basic Proportional-Derivative controller block compatible
with gym_jiminy reinforcement learning pipeline environment design.
"""
import warnings
from typing import List, Union

import numpy as np
import numba as nb
import gymnasium as gym

import jiminy_py.core as jiminy
from jiminy_py.core import (  # pylint: disable=no-name-in-module
    array_copyto,
    EncoderSensor as encoder)

from ..bases import BaseObsT, InterfaceJiminyEnv, BaseControllerBlock
from ..utils import fill


# Name of the n-th position derivative
N_ORDER_DERIVATIVE_NAMES = ("Position", "Velocity", "Acceleration")



[docs]
@nb.jit(nopython=True, cache=True, inline='always', fastmath=True)
def integrate_zoh(state: np.ndarray,
                  state_min: np.ndarray,
                  state_max: np.ndarray,
                  dt: float) -> None:
    """Second order approximate integration scheme assuming Zero-Order-Hold for
    the acceleration, taking position, velocity and acceleration bounds into
    account.

    Internally, it simply chains two first-order integrators in cascade. The
    acceleration will be updated in-place if clipping is necessary to satisfy
    bounds.

    :param state: Current state, ordered from lowest to highest derivative
                  order, ie: s[i](t) = s[i](t-1) + integ_{t-1}^{t}(s[i+1](t)),
                  as a 2D array whose first dimension gathers the 3 derivative
                  orders. It will be updated in-place.
    :param state_min: Lower bounds of the state as a 2D array.
    :param state_max: Upper bounds of the state as a 2D array.
    :param dt: Integration delta of time since previous state update.
    """
    # Make sure that dt is not negative
    assert dt >= 0.0, "Integration backward in time is not supported."

    # Early return if timestep is too small
    if abs(dt) < 1e-9:
        return

    # Split position, velocity and acceleration orders for convenience
    position, velocity, acceleration = state
    position_min, velocity_min, acceleration_min = state_min
    position_max, velocity_max, acceleration_max = state_max

    # Clip acceleration
    acceleration[:] = np.minimum(
        np.maximum(acceleration, acceleration_min), acceleration_max)

    # Backup the initial velocity to later compute the clipped acceleration
    velocity_prev = velocity.copy()

    # Integrate acceleration 1-step ahead
    velocity += acceleration * dt

    # Make sure that "true" velocity bounds are satisfied
    velocity[:] = np.minimum(np.maximum(velocity, velocity_min), velocity_max)

    # Force slowing down early enough to avoid violating acceleration limits
    # when hitting position bounds.
    horizon = np.maximum(
        np.floor(np.abs(velocity_prev) / acceleration_max / dt) * dt, dt)
    position_min_delta = position_min - position
    position_max_delta = position_max - position
    drift = 0.5 * (horizon[horizon > dt] * (horizon[horizon > dt] - dt))
    position_min_delta[horizon > dt] -= drift * acceleration_max[horizon > dt]
    position_max_delta[horizon > dt] += drift * acceleration_max[horizon > dt]
    velocity_min = position_min_delta / horizon
    velocity_max = position_max_delta / horizon
    velocity[:] = np.minimum(np.maximum(velocity, velocity_min), velocity_max)

    # Velocity after hitting bounds must be cancellable in a single time step
    velocity_mask = np.abs(velocity) > dt * acceleration_max
    velocity_min = - np.maximum(
        position_min_delta[velocity_mask] / velocity[velocity_mask], dt
        ) * acceleration_max[velocity_mask]
    velocity_max = np.maximum(
        position_max_delta[velocity_mask] / velocity[velocity_mask], dt
        ) * acceleration_max[velocity_mask]
    velocity[velocity_mask] = np.minimum(
        np.maximum(velocity[velocity_mask], velocity_min), velocity_max)

    # Back-propagate velocity clipping at the acceleration-level
    acceleration[:] = (velocity - velocity_prev) / dt

    # Integrate position 1-step ahead
    position += dt * velocity




[docs]
@nb.jit(nopython=True, cache=True, fastmath=True)
def pd_controller(q_measured: np.ndarray,
                  v_measured: np.ndarray,
                  command_state: np.ndarray,
                  command_state_lower: np.ndarray,
                  command_state_upper: np.ndarray,
                  kp: np.ndarray,
                  kd: np.ndarray,
                  motors_effort_limit: np.ndarray,
                  control_dt: float,
                  out: np.ndarray) -> None:
    """Compute command torques under discrete-time proportional-derivative
    feedback control.

    Internally, it integrates the command state (position, velocity and
    acceleration) at controller update period in order to obtain the desired
    motor positions 'q_desired' and velocities 'v_desired', takes into account
    some lower and upper bounds. By computing them this way, the target motor
    positions and velocities can be interpreted as targets that has be to reach
    right before updating the command once again. Enforcing consistency between
    target position and velocity in such a way is a necessary but insufficient
    condition for the motors to be able to track them.

    The command effort is computed as follows:

        tau = - kp * ((q_measured - q_desired) + kd * (v_measured - v_desired))

    The torque will be held constant until the next controller update.

    .. seealso::
        See `PDController` documentation to get more information, and
        `integrate_zoh` documentation for details about the state integration.

    :param q_measured: Current position of the actuators.
    :param v_measured: Current velocity of the actuators.
    :param command_state: Current command state, namely, all the derivatives of
                          the target motors positions up to acceleration order.
    :param command_state_lower: Lower bound of the command state that must be
                                satisfied at all cost.
    :param command_state_upper: Upper bound of the command state that must be
                                satisfied at all cost.
    :param kp: PD controller position-proportional gain in motor order.
    :param kd: PD controller velocity-proportional gain in motor order.
    :param motors_effort_limit: Maximum effort that the actuators can output.
    :param control_dt: Controller update period. It will be involved in the
                       integration of the command state.
    :param out: Pre-allocated memory to store the command motor torques.
    """
    # Integrate target motor positions and velocities
    integrate_zoh(command_state,
                  command_state_lower,
                  command_state_upper,
                  control_dt)

    # Extract targets motors positions and velocities from command state
    q_target, v_target, _ = command_state

    # Compute the joint tracking error
    q_error, v_error = q_target - q_measured, v_target - v_measured

    # Compute PD command
    out[:] = kp * (q_error + kd * v_error)

    # Clip the command motors torques before returning
    out[:] = np.minimum(np.maximum(
        out, -motors_effort_limit), motors_effort_limit)




[docs]
@nb.jit(nopython=True, cache=True, fastmath=True)
def pd_adapter(action: np.ndarray,
               order: int,
               command_state: np.ndarray,
               command_state_lower: np.ndarray,
               command_state_upper: np.ndarray,
               dt: float,
               out: np.ndarray) -> None:
    """Compute the target motor accelerations that must be held constant for a
    given time interval in order to reach the desired value of some derivative
    of the target motor positions at the end of that interval if possible.

    Internally, it applies backward in time the same integration scheme as the
    PD controller. Knowing the initial and final value of the derivative over
    the time interval, constant target motor accelerations can be uniquely
    deduced. In practice, it consists in successive clipped finite difference
    of that derivative up to acceleration-level.

    :param action: Desired value of the n-th derivative of the command motor
                   positions at the end of the controller update.
    :param order: Derivative order of the position associated with the action.
    :param command_state: Current command state, namely, all the derivatives of
                          the target motors positions up to acceleration order.
    :param command_state_lower: Lower bound of the command state that must be
                                satisfied at all cost.
    :param command_state_upper: Upper bound of the command state that must be
                                satisfied at all cost.
    :param dt: Time interval during which the target motor accelerations will
               be held constant.
    :param out: Pre-allocated memory to store the target motor accelerations.
    """
    # Update command accelerations based on the action and its derivative order
    if order == 2:
        # The action corresponds to the command motor accelerations
        out[:] = action
    else:
        if order == 0:
            # Compute command velocity
            velocity = (action - command_state[0]) / dt

            # Clip command velocity
            velocity = np.minimum(np.maximum(
                velocity, command_state_lower[1]), command_state_upper[1])
        else:
            # The action corresponds to the command motor velocities
            velocity = action

        # Compute command acceleration
        out[:] = (velocity - command_state[1]) / dt




[docs]
def get_encoder_to_motor_map(robot: jiminy.Robot) -> Union[slice, List[int]]:
    """Get the mapping from encoder sensors to motors.

    .. warning::
        If reordering is necessary, then a list of indices is returned, which
        can used to permute encoder sensor data to match the command torque
        vector. Yet, it relies on so-called "fancy" or "advanced" indexing for
        doing so, which means that the returned data is a copy of the original
        data instead of a reference. On the contrary, it reordering is not
        necessary, a slice is returned instead and no copy happens whatsoever.

    :param robot: Jiminy robot for which to compute mapping.

    :returns: A slice if possible, a list of indices otherwise.
    """
    # Define the mapping from motors to encoders
    encoder_to_motor = [-1 for _ in range(robot.nmotors)]
    encoders = [robot.get_sensor(encoder.type, sensor_name)
                for sensor_name in robot.sensor_names[encoder.type]]
    for i, motor_name in enumerate(robot.motor_names):
        motor = robot.get_motor(motor_name)
        for j, sensor in enumerate(encoders):
            assert isinstance(sensor, encoder)
            if motor.joint_index == sensor.joint_index:
                encoder_to_motor[sensor.index] = i
                encoders.pop(j)
                break
        else:
            raise RuntimeError(
                f"No encoder sensor associated with motor '{motor_name}'. "
                "Every actuated joint must have encoder sensors attached.")

    # Try converting it to slice if possible
    if (np.array(encoder_to_motor) == np.arange(robot.nmotors)).all():
        return slice(None)
    return encoder_to_motor




[docs]
class PDController(
        BaseControllerBlock[np.ndarray, np.ndarray, BaseObsT, np.ndarray]):
    """Low-level Proportional-Derivative controller.

    The action are the target motors accelerations. The latter are integrated
    twice using two first-order integrators in cascade, considering that the
    acceleration is constant until the next controller update:

        v_{t+1} = v_{t} + dt * a_{t}
        q_{t+1} = q_{t} + dt * v_{t+1}

    .. note::
        The position and velocity bounds on the command corresponds to the
        joint limits specified by the dynamical model of the robot. Then, lax
        default acceleration bounds are extrapolated. More precisely, they are
        chosen to be sufficient either to span the whole range of velocity or
        to allow reaching the command effort limits depending on what is the
        most restrictive. Position, velocity and acceleration.

    .. warning::
        It must be connected directly to the base environment to control
        without any intermediary controllers altering its action space.
    """
    def __init__(self,
                 name: str,
                 env: InterfaceJiminyEnv[BaseObsT, np.ndarray],
                 *,
                 update_ratio: int = 1,
                 kp: Union[float, List[float], np.ndarray],
                 kd: Union[float, List[float], np.ndarray],
                 target_position_margin: float = 0.0,
                 target_velocity_limit: float = float("inf"),
                 target_acceleration_limit: float = float("inf")) -> None:
        """
        :param name: Name of the block.
        :param env: Environment to connect with.
        :param update_ratio: Ratio between the update period of the controller
                             and the one of the subsequent controller. -1 to
                             match the simulation timestep of the environment.
                             Optional: 1 by default.
        :param kp: PD controller position-proportional gains in motor order.
        :param kd: PD controller velocity-proportional gains in motor order.
        :param target_position_margin: Minimum distance of the motor target
                                       positions from their respective bounds.
                                       Optional: 0.0 by default.
        :param target_velocity_limit: Restrict maximum motor target velocities
                                      wrt their hardware specifications.
                                      Optional: "inf" by default.
        :param target_acceleration_limit:
            Restrict maximum motor target accelerations wrt their hardware
            specifications.
            Optional: "inf" by default.
        """
        # Make sure the action space of the environment has not been altered
        if env.action_space is not env.unwrapped.action_space:
            raise RuntimeError(
                "Impossible to connect this block on an environment whose "
                "action space has been altered.")

        # Make sure that the number of PD gains matches the number of motors
        try:
            kp = np.broadcast_to(kp, (env.robot.nmotors,))
            kd = np.broadcast_to(kd, (env.robot.nmotors,))
        except ValueError as e:
            raise TypeError(
                "PD gains inconsistent with number of motors.") from e

        # Backup some user argument(s)
        self.kp = kp
        self.kd = kd

        # Mapping from motors to encoders
        self.encoder_to_motor = get_encoder_to_motor_map(env.robot)

        # Whether stored reference to encoder measurements are already in the
        # same order as the motors, allowing skipping re-ordering entirely.
        self._is_same_order = isinstance(self.encoder_to_motor, slice)
        if not self._is_same_order:
            warnings.warn(
                "Consider using the same ordering for encoders and motors for "
                "optimal performance.")

        # Define buffers storing information about the motors for efficiency.
        # Note that even if the robot instance may change from one simulation
        # to another, the observation and action spaces are required to stay
        # the same the whole time. This induces that the motors effort limit
        # must not change unlike the mapping from full state to motors.
        self.motors_effort_limit = env.robot.command_limit[
            env.robot.motor_velocity_indices]

        # Extract the motors target position bounds from the model
        motors_position_lower: List[float] = []
        motors_position_upper: List[float] = []
        for motor_name in env.robot.motor_names:
            motor = env.robot.get_motor(motor_name)
            joint_type = jiminy.get_joint_type(
                env.robot.pinocchio_model, motor.joint_index)
            if joint_type == jiminy.JointModelType.ROTARY_UNBOUNDED:
                lower, upper = float("-inf"), float("inf")
            else:
                motor_position_index = motor.joint_position_index
                lower = env.robot.position_limit_lower[motor_position_index]
                upper = env.robot.position_limit_upper[motor_position_index]
            motors_position_lower.append(lower + target_position_margin)
            motors_position_upper.append(upper - target_position_margin)

        # Extract the motors target velocity bounds
        motors_velocity_limit = np.minimum(
            env.robot.velocity_limit[env.robot.motor_velocity_indices],
            target_velocity_limit)

        # Compute acceleration bounds allowing unrestricted bang-bang control
        range_limit = 2 * motors_velocity_limit / env.step_dt
        effort_limit = self.motors_effort_limit / (
            self.kp * env.step_dt * np.maximum(env.step_dt / 2, self.kd))
        acceleration_limit = np.minimum(
            np.minimum(range_limit, effort_limit), target_acceleration_limit)

        # Compute command state bounds
        self._command_state_lower = np.stack([np.array(motors_position_lower),
                                              -motors_velocity_limit,
                                              -acceleration_limit], axis=0)
        self._command_state_upper = np.stack([np.array(motors_position_upper),
                                              motors_velocity_limit,
                                              acceleration_limit], axis=0)

        # Extract measured motor positions and velocities for fast access.
        # Note that they will be initialized in `_setup` method.
        self.q_measured, self.v_measured = np.array([]), np.array([])

        # Allocate memory for the command state
        self._command_state = np.zeros((3, env.robot.nmotors))

        # Initialize the controller
        super().__init__(name, env, update_ratio)

        # References to command acceleration for fast access
        self._command_acceleration = self._command_state[2]

    def _initialize_state_space(self) -> None:
        """Configure the state space of the controller.

        The state spaces corresponds to the target motors positions and
        velocities.
        """
        self.state_space = gym.spaces.Box(
            low=self._command_state_lower[:2],
            high=self._command_state_upper[:2],
            dtype=np.float64)

    def _initialize_action_space(self) -> None:
        """Configure the action space of the controller.

        The action spaces corresponds to the target motors accelerations.
        """
        self.action_space = gym.spaces.Box(
            low=self._command_state_lower[2],
            high=self._command_state_upper[2],
            dtype=np.float64)

    def _setup(self) -> None:
        # Call base implementation
        super()._setup()

        # Make sure control update is discrete-time
        if self.env.control_dt <= 0.0:
            raise ValueError(
                "This block does not support time-continuous update.")

        # Refresh measured motor positions and velocities proxies
        self.q_measured, self.v_measured = (
            self.env.sensor_measurements[encoder.type])

        # Reset the command state
        fill(self._command_state, 0)

    @property
    def fieldnames(self) -> List[str]:
        return [f"currentTarget{N_ORDER_DERIVATIVE_NAMES[2]}{name}"
                for name in self.env.robot.motor_names]


[docs]
    def get_state(self) -> np.ndarray:
        return self._command_state[:2]



[docs]
    def compute_command(self, action: np.ndarray, command: np.ndarray) -> None:
        """Compute the target motor torques using a PD controller.

        It is proportional to the error between the observed motors positions/
        velocities and the target ones.

        .. warning::
            Calling this method manually while a simulation is running is
            forbidden, because it would mess with the controller update period.

        :param action: Desired target motor acceleration.
        :param command: Current motor torques that will be updated in-place.
        """
        # Re-initialize the command state to the current motor state if the
        # simulation is not running. This must be done here because the
        # command state must be valid prior to calling `refresh_observation`
        # for the first time, which happens at `reset`.
        is_simulation_running = self.env.is_simulation_running
        if not is_simulation_running:
            for i, value in enumerate((self.q_measured, self.v_measured)):
                np.clip(value,
                        self._command_state_lower[i],
                        self._command_state_upper[i],
                        out=self._command_state[i])

        # Extract motor positions and velocity from encoder data
        q_measured, v_measured = self.q_measured, self.v_measured
        if not self._is_same_order:
            q_measured = q_measured[self.encoder_to_motor]
            v_measured = v_measured[self.encoder_to_motor]

        # Update target motor accelerations
        array_copyto(self._command_acceleration, action)

        # Compute the motor efforts using PD control.
        # The command state must not be updated if no simulation is running.
        pd_controller(
            q_measured,
            v_measured,
            self._command_state,
            self._command_state_lower,
            self._command_state_upper,
            self.kp,
            self.kd,
            self.motors_effort_limit,
            self.control_dt if is_simulation_running else 0.0,
            command)





[docs]
class PDAdapter(
        BaseControllerBlock[np.ndarray, np.ndarray, BaseObsT, np.ndarray]):
    """Adapt the action of a lower-level Proportional-Derivative controller
    to be the target motor positions or velocities rather than accelerations.

    The action is the desired value of some derivative of the target motor
    positions. The target motor accelerations are then deduced so as to reach
    this value by the next update of this controller if possible without
    exceeding the position, velocity and acceleration bounds. Finally, these
    target position accelerations are passed to a lower-level PD controller,
    usually running at a higher frequency.

    .. note::
        The higher the derivative order of the action, the smoother the command
        motor torques. Thus, a high order is generally beneficial for robotic
        applications. However, it introduces some kind of delay between the
        action and its effects. This phenomenon limits the responsiveness of
        the agent and therefore impedes its optimal performance. Besides, it
        introduces addition layer of indirection between the action and its
        effect which may be difficult to grasp for the agent. Finally,
        exploration usually consists in addition temporally uncorrelated
        gaussian random process at action-level. The effect of such random
        processes tends to vanish when integrated, making exploration very
        inefficient.
    """
    def __init__(self,
                 name: str,
                 env: InterfaceJiminyEnv[BaseObsT, np.ndarray],
                 *,
                 update_ratio: int = -1,
                 order: int = 1) -> None:
        """
        :param update_ratio: Ratio between the update period of the controller
                             and the one of the subsequent controller. -1 to
                             match the simulation timestep of the environment.
                             Optional: -1 by default.
        :param order: Derivative order of the action. It accepts position or
                      velocity (respectively 0 or 1).
                      Optional: 1 by default.
        """
        # Make sure that the specified derivative order is valid
        assert order in (0, 1), "Derivative order out-of-bounds"

        # Make sure that a PD controller block is already connected
        controller = env.controller  # type: ignore[attr-defined]
        if not isinstance(controller, PDController):
            raise RuntimeError(
                "This block must be directly connected to a lower-level "
                "`PDController` block.")

        # Backup some user argument(s)
        self.order = order

        # Define some proxies for convenience
        self._pd_controller = controller

        # Allocate memory for the target motor accelerations
        self._target_accelerations = np.zeros((env.robot.nmotors,))

        # Initialize the controller
        super().__init__(name, env, update_ratio)

    def _initialize_action_space(self) -> None:
        """Configure the action space of the controller.

        The action spaces corresponds to the N-th order derivative of the
        target motors positions.
        """
        self.action_space = gym.spaces.Box(
            low=self._pd_controller._command_state_lower[self.order],
            high=self._pd_controller._command_state_upper[self.order],
            dtype=np.float64)

    def _setup(self) -> None:
        # Call base implementation
        super()._setup()

        # Reset the target motor accelerations
        fill(self._target_accelerations, 0)

    @property
    def fieldnames(self) -> List[str]:
        return [f"nextTarget{N_ORDER_DERIVATIVE_NAMES[self.order]}{name}"
                for name in self.env.robot.motor_names]


[docs]
    def compute_command(self, action: np.ndarray, command: np.ndarray) -> None:
        """Compute the target motor accelerations from the desired value of
        some derivative of the target motor positions.

        :param action: Desired target motor acceleration.
        :param command: Current motor torques that will be updated in-place.
        """
        pd_adapter(
            action,
            self.order,
            self._pd_controller._command_state,
            self._pd_controller._command_state_lower,
            self._pd_controller._command_state_upper,
            self.control_dt,
            command)