Merge branch 'main' into aggregated-profiling

huggingface · Sep 9, 2024 · a7fb0f7 · a7fb0f7
2 parents 602a73d + a60d27b
commit a7fb0f7
Show file tree

Hide file tree

Showing 8 changed files with 123 additions and 48 deletions.
diff --git a/examples/7_get_started_with_real_robot.md b/examples/7_get_started_with_real_robot.md
@@ -41,6 +41,13 @@ Or using `poetry`:
 poetry install --sync --extras "dynamixel"
 ```
 
+/!\ For Linux only, ffmpeg and opencv requires conda install for now. Run this exact sequence of commands:
+```bash
+conda install -c conda-forge ffmpeg
+pip uninstall opencv-python
+conda install -c conda-forge opencv>=4.10.0
+```
+
 You are now ready to plug the 5V power supply to the motor bus of the leader arm (the smaller one) since all its motors only require 5V.
 
 Then plug the 12V power supply to the motor bus of the follower arm. It has two motors that need 12V, and the rest will be powered with 5V through the voltage convertor.

diff --git a/lerobot/common/policies/diffusion/configuration_diffusion.py b/lerobot/common/policies/diffusion/configuration_diffusion.py
@@ -196,3 +196,12 @@ def __post_init__(self):
                 f"`noise_scheduler_type` must be one of {supported_noise_schedulers}. "
                 f"Got {self.noise_scheduler_type}."
             )
+
+        # Check that the horizon size and U-Net downsampling is compatible.
+        # U-Net downsamples by 2 with each stage.
+        downsampling_factor = 2 ** len(self.down_dims)
+        if self.horizon % downsampling_factor != 0:
+            raise ValueError(
+                "The horizon should be an integer multiple of the downsampling factor (which is determined "
+                f"by `len(down_dims)`). Got {self.horizon=} and {self.down_dims=}"
+            )
diff --git a/lerobot/common/policies/vqbet/modeling_vqbet.py b/lerobot/common/policies/vqbet/modeling_vqbet.py
@@ -350,17 +350,22 @@ def forward(self, batch: dict[str, Tensor], rollout: bool) -> Tensor:
 
         # get action features (pass through GPT)
         features = self.policy(input_tokens)
-        # len(self.config.input_shapes) is the number of different observation modes. this line gets the index of action prompt tokens.
+        # len(self.config.input_shapes) is the number of different observation modes.
+        # this line gets the index of action prompt tokens.
         historical_act_pred_index = np.arange(0, n_obs_steps) * (len(self.config.input_shapes) + 1) + len(
             self.config.input_shapes
         )
 
         # only extract the output tokens at the position of action query:
-        # Behavior Transformer (BeT), and VQ-BeT are both sequence-to-sequence prediction models, mapping sequential observation to sequential action (please refer to section 2.2 in BeT paper https://arxiv.org/pdf/2206.11251).
-        # Thus, it predict historical action sequence, in addition to current and future actions (predicting future actions : optional).
-        features = torch.cat(
-            [features[:, historical_act_pred_index], features[:, -len_additional_action_token:]], dim=1
-        )
+        # Behavior Transformer (BeT), and VQ-BeT are both sequence-to-sequence prediction models,
+        # mapping sequential observation to sequential action (please refer to section 2.2 in BeT paper https://arxiv.org/pdf/2206.11251).
+        # Thus, it predicts a historical action sequence, in addition to current and future actions (predicting future actions : optional).
+        if len_additional_action_token > 0:
+            features = torch.cat(
+                [features[:, historical_act_pred_index], features[:, -len_additional_action_token:]], dim=1
+            )
+        else:
+            features = features[:, historical_act_pred_index]
         # pass through action head
         action_head_output = self.action_head(features)
         # if rollout, VQ-BeT don't calculate loss

diff --git a/lerobot/common/robot_devices/robots/manipulator.py b/lerobot/common/robot_devices/robots/manipulator.py
@@ -496,6 +496,17 @@ def set_shadow_(arm):
             # Set a velocity limit of 131 as advised by Trossen Robotics
             self.follower_arms[name].write("Velocity_Limit", 131)
 
+            # Use 'extended position mode' for all motors except gripper, because in joint mode the servos can't
+            # rotate more than 360 degrees (from 0 to 4095) And some mistake can happen while assembling the arm,
+            # you could end up with a servo with a position 0 or 4095 at a crucial point See [
+            # https://emanual.robotis.com/docs/en/dxl/x/x_series/#operating-mode11]
+            all_motors_except_gripper = [
+                name for name in self.follower_arms[name].motor_names if name != "gripper"
+            ]
+            if len(all_motors_except_gripper) > 0:
+                # 4 corresponds to Extended Position on Aloha motors
+                self.follower_arms[name].write("Operating_Mode", 4, all_motors_except_gripper)
+
             # Use 'position control current based' for follower gripper to be limited by the limit of the current.
             # It can grasp an object without forcing too much even tho,
             # it's goal position is a complete grasp (both gripper fingers are ordered to join and reach a touch).

diff --git a/lerobot/common/robot_devices/utils.py b/lerobot/common/robot_devices/utils.py
@@ -1,13 +1,19 @@
+import platform
 import time
 
 
 def busy_wait(seconds):
-    # Significantly more accurate than `time.sleep`, and mandatory for our use case,
-    # but it consumes CPU cycles.
-    # TODO(rcadene): find an alternative: from python 11, time.sleep is precise
-    end_time = time.perf_counter() + seconds
-    while time.perf_counter() < end_time:
-        pass
+    if platform.system() == "Darwin":
+        # On Mac, `time.sleep` is not accurate and we need to use this while loop trick,
+        # but it consumes CPU cycles.
+        # TODO(rcadene): find an alternative: from python 11, time.sleep is precise
+        end_time = time.perf_counter() + seconds
+        while time.perf_counter() < end_time:
+            pass
+    else:
+        # On Linux time.sleep is accurate
+        if seconds > 0:
+            time.sleep(seconds)
 
 
 class RobotDeviceNotConnectedError(Exception):

diff --git a/lerobot/configs/robot/aloha.yaml b/lerobot/configs/robot/aloha.yaml
@@ -1,3 +1,11 @@
+# Aloha: A Low-Cost Hardware for Bimanual Teleoperation
+# https://aloha-2.github.io
+# https://www.trossenrobotics.com/aloha-stationary
+
+# Requires installing extras packages
+# With pip: `pip install -e ".[dynamixel intelrealsense]"`
+# With poetry: `poetry install --sync --extras "dynamixel intelrealsense"`
+
 _target_: lerobot.common.robot_devices.robots.manipulator.ManipulatorRobot
 robot_type: aloha
 # Specific to Aloha, LeRobot comes with default calibration files. Assuming the motors have been

diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py
@@ -739,7 +739,7 @@ def replay(robot: Robot, episode: int, fps: int | None = None, root="data", repo
     parser_calib = subparsers.add_parser("calibrate", parents=[base_parser])
     parser_calib.add_argument(
         "--arms",
-        type=int,
+        type=str,
         nargs="*",
         help="List of arms to calibrate (e.g. `--arms left_follower right_follower left_leader`)",
     )

diff --git a/lerobot/templates/visualize_dataset_template.html b/lerobot/templates/visualize_dataset_template.html
@@ -14,7 +14,7 @@
 <!-- Use [Alpin.js](https://alpinejs.dev), a lightweight and easy to learn JS framework -->
 <!-- Use [tailwindcss](https://tailwindcss.com/), CSS classes for styling html -->
 <!-- Use [dygraphs](https://dygraphs.com/), a lightweight JS charting library -->
-<body class="flex h-screen max-h-screen bg-slate-950 text-gray-200" x-data="createAlpineData()" @keydown.window="(e) => {
+<body class="flex flex-col md:flex-row h-screen max-h-screen bg-slate-950 text-gray-200" x-data="createAlpineData()" @keydown.window="(e) => {
     // Use the space bar to play and pause, instead of default action (e.g. scrolling)
     const { keyCode, key } = e;
     if (keyCode === 32 || key === ' ') {
@@ -30,7 +30,7 @@
     }
 }">
     <!-- Sidebar -->
-    <div x-ref="sidebar" class="w-60 bg-slate-900 p-5 break-words max-h-screen overflow-y-auto">
+    <div x-ref="sidebar" class="bg-slate-900 p-5 break-words overflow-y-auto shrink-0 md:shrink md:w-60 md:max-h-screen">
         <h1 class="mb-4 text-xl font-semibold">{{ dataset_info.repo_id }}</h1>
 
         <ul>
@@ -46,7 +46,8 @@ <h1 class="mb-4 text-xl font-semibold">{{ dataset_info.repo_id }}</h1>
         </ul>
 
         <p>Episodes:</p>
-        <ul class="ml-2">
+        <!-- episodes menu for medium & large screens -->
+        <ul class="ml-2 hidden md:block">
             {% for episode in episodes %}
             <li class="font-mono text-sm mt-0.5">
                 <a href="episode_{{ episode }}" class="underline {% if episode_id == episode %}font-bold -ml-1{% endif %}">
@@ -56,24 +57,36 @@ <h1 class="mb-4 text-xl font-semibold">{{ dataset_info.repo_id }}</h1>
             {% endfor %}
         </ul>
 
+        <!-- episodes menu for small screens -->
+        <div class="flex overflow-x-auto md:hidden">
+            {% for episode in episodes %}
+            <p class="font-mono text-sm mt-0.5 border-r last:border-r-0 px-2 {% if episode_id == episode %}font-bold{% endif %}">
+                <a href="episode_{{ episode }}" class="">
+                    {{ episode }}
+                </a>
+            </p>
+            {% endfor %}
+        </div>
+
     </div>
 
     <!-- Toggle sidebar button -->
-    <button class="flex items-center opacity-50 hover:opacity-100 mx-1"
+    <button class="flex items-center opacity-50 hover:opacity-100 mx-1 hidden md:block"
         @click="() => ($refs.sidebar.classList.toggle('hidden'))" title="Toggle sidebar">
         <div class="bg-slate-500 w-2 h-10 rounded-full"></div>
     </button>
 
     <!-- Content -->
-    <div class="flex-1 max-h-screen flex flex-col gap-4 overflow-y-auto">
+    <div class="max-h-screen flex flex-col gap-4 overflow-y-auto md:flex-1">
         <h1 class="text-xl font-bold mt-4 font-mono">
             Episode {{ episode_id }}
         </h1>
 
         <!-- Videos -->
         <div class="flex flex-wrap gap-1">
+            <p x-show="videoCodecError" class="font-medium text-orange-700">Videos could NOT play because <a href="https://en.wikipedia.org/wiki/AV1" target="_blank" class="underline">AV1</a> decoding is not available on your browser. Learn more about <a href="https://huggingface.co/blog/video-encoding" target="_blank" class="underline">LeRobot video encoding</a>.</p>
             {% for video_info in videos_info %}
-            <div class="max-w-96">
+            <div x-show="!videoCodecError" class="max-w-96">
                 <p class="text-sm text-gray-300 bg-gray-800 px-2 rounded-t-xl truncate">{{ video_info.filename }}</p>
                 <video muted loop type="video/mp4" class="min-w-64" @canplaythrough="videoCanPlay" @timeupdate="() => {
                     if (video.duration) {
@@ -183,9 +196,9 @@ <h1 class="text-xl font-bold mt-4 font-mono">
                             </td>
                             <template x-for="(cell, colIndex) in row">
                                 <td x-show="cell" class="border border-slate-700">
-                                    <div class="flex gap-x-2 w-24 justify-between px-2">
+                                    <div class="flex gap-x-2 w-24 justify-between px-2" :class="{ 'hidden': cell.isNull }">
                                         <input type="checkbox" x-model="cell.checked" @change="updateTableValues()">
-                                        <span x-text="`${cell.value.toFixed(2)}`"
+                                        <span x-text="`${!cell.isNull ? cell.value.toFixed(2) : null}`"
                                             :style="`color: ${cell.color}`"></span>
                                     </div>
                                 </td>
@@ -207,7 +220,9 @@ <h1 class="text-xl font-bold mt-4 font-mono">
                 dygraph: null,
                 currentFrameData: null,
                 columnNames: ["state", "action", "pred action"],
-                nColumns: {% if has_policy %}3{% else %}2{% endif %},
+                nColumns: 2,
+                nStates: 0,
+                nActions: 0,
                 checked: [],
                 dygraphTime: 0.0,
                 dygraphIndex: 0,
@@ -216,9 +231,18 @@ <h1 class="text-xl font-bold mt-4 font-mono">
                 colors: null,
                 nVideos: {{ videos_info | length }},
                 nVideoReadyToPlay: 0,
+                videoCodecError: false,
 
                 // alpine initialization
                 init() {
+                    // check if videos can play
+                    const dummyVideo = document.createElement('video');
+                    const canPlayVideos = dummyVideo.canPlayType('video/mp4; codecs="av01.0.05M.08"'); // codec source: https://huggingface.co/blog/video-encoding#results
+                    if(!canPlayVideos){
+                        this.videoCodecError = true;
+                    }
+
+                    // process CSV data
                     this.videos = document.querySelectorAll('video');
                     this.video = this.videos[0];
                     this.dygraph = new Dygraph(document.getElementById("graph"), '{{ ep_csv_url }}', {
@@ -243,17 +267,19 @@ <h1 class="text-xl font-bold mt-4 font-mono">
                                 this.checked = Array(this.colors.length).fill(true);
 
                                 const seriesNames = this.dygraph.getLabels().slice(1);
+                                this.nStates = seriesNames.findIndex(item => item.startsWith('action_'));
+                                this.nActions = seriesNames.length - this.nStates;
                                 const colors = [];
                                 const LIGHTNESS = [30, 65, 85]; // state_lightness, action_lightness, pred_action_lightness
-                                let lightnessIdx = 0;
-                                const chunkSize = Math.ceil(seriesNames.length / this.nColumns);
-                                for (let i = 0; i < seriesNames.length; i += chunkSize) {
-                                    const lightness = LIGHTNESS[lightnessIdx];
-                                    for (let hue = 0; hue < 360; hue += parseInt(360/chunkSize)) {
-                                        const color = `hsl(${hue}, 100%, ${lightness}%)`;
-                                        colors.push(color);
-                                    }
-                                    lightnessIdx += 1;
+                                // colors for "state" lines
+                                for (let hue = 0; hue < 360; hue += parseInt(360/this.nStates)) {
+                                    const color = `hsl(${hue}, 100%, ${LIGHTNESS[0]}%)`;
+                                    colors.push(color);
+                                }
+                                // colors for "action" lines
+                                for (let hue = 0; hue < 360; hue += parseInt(360/this.nActions)) {
+                                    const color = `hsl(${hue}, 100%, ${LIGHTNESS[1]}%)`;
+                                    colors.push(color);
                                 }
                                 this.dygraph.updateOptions({ colors });
                                 this.colors = colors;
@@ -280,37 +306,40 @@ <h1 class="text-xl font-bold mt-4 font-mono">
                     if (!this.currentFrameData) {
                         return [];
                     }
-                    const columnSize = Math.ceil(this.currentFrameData.length / this.nColumns);
-                    return Array.from({
-                        length: columnSize
-                    }, (_, rowIndex) => {
-                        const row = [
-                            this.currentFrameData[rowIndex] || null,
-                            this.currentFrameData[rowIndex + columnSize] || null,
-                        ];
-                        if (this.nColumns === 3) {
-                            row.push(this.currentFrameData[rowIndex + 2 * columnSize] || null)
-                        }
-                        return row;
-                    });
+                    const rows = [];
+                    const nRows = Math.max(this.nStates, this.nActions);
+                    let rowIndex = 0;
+                    while(rowIndex < nRows){
+                        const row = [];
+                        // number of states may NOT match number of actions. In this case, we null-pad the 2D array to make a fully rectangular 2d array
+                        const nullCell = { isNull: true };
+                        const stateValueIdx = rowIndex;
+                        const actionValueIdx = stateValueIdx + this.nStates; // because this.currentFrameData = [state0, state1, ..., stateN, action0, action1, ..., actionN]
+                        // row consists of [state value, action value]
+                        row.push(rowIndex < this.nStates ? this.currentFrameData[stateValueIdx] : nullCell); // push "state value" to row
+                        row.push(rowIndex < this.nActions ? this.currentFrameData[actionValueIdx] : nullCell); // push "action value" to row
+                        rowIndex += 1;
+                        rows.push(row);
+                    }
+                    return rows;
                 },
                 isRowChecked(rowIndex) {
-                    return this.rows[rowIndex].every(cell => cell && cell.checked);
+                    return this.rows[rowIndex].every(cell => cell && (cell.isNull || cell.checked));
                 },
                 isColumnChecked(colIndex) {
-                    return this.rows.every(row => row[colIndex] && row[colIndex].checked);
+                    return this.rows.every(row => row[colIndex] && (row[colIndex].isNull || row[colIndex].checked));
                 },
                 toggleRow(rowIndex) {
                     const newState = !this.isRowChecked(rowIndex);
                     this.rows[rowIndex].forEach(cell => {
-                        if (cell) cell.checked = newState;
+                        if (cell && !cell.isNull) cell.checked = newState;
                     });
                     this.updateTableValues();
                 },
                 toggleColumn(colIndex) {
                     const newState = !this.isColumnChecked(colIndex);
                     this.rows.forEach(row => {
-                        if (row[colIndex]) row[colIndex].checked = newState;
+                        if (row[colIndex] && !row[colIndex].isNull) row[colIndex].checked = newState;
                     });
                     this.updateTableValues();
                 },