From bf3dbbdca6170779f49bfe1a72011ddb6e6af570 Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Wed, 19 Jun 2024 09:26:00 +0000
Subject: [PATCH] Add code review suggestions

---
 .../datasets/_video_benchmark/run_video_benchmark.py   |  2 +-
 .../datasets/push_dataset_to_hub/cam_png_format.py     | 10 +++++++++-
 lerobot/common/datasets/video_utils.py                 |  7 +++++++
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/lerobot/common/datasets/_video_benchmark/run_video_benchmark.py b/lerobot/common/datasets/_video_benchmark/run_video_benchmark.py
index 04baaa901..25b12b318 100644
--- a/lerobot/common/datasets/_video_benchmark/run_video_benchmark.py
+++ b/lerobot/common/datasets/_video_benchmark/run_video_benchmark.py
@@ -188,7 +188,7 @@ def load_original_frames(imgs_dir, timestamps) -> torch.Tensor:
         elif timestamps_mode == "2_frames":
             timestamps = [ts - 1 / fps, ts]
         elif timestamps_mode == "2_frames_4_space":
-            timestamps = [ts - 4 / fps, ts]
+            timestamps = [ts - 5 / fps, ts]
         elif timestamps_mode == "6_frames":
             timestamps = [ts - i / fps for i in range(6)][::-1]
         else:
diff --git a/lerobot/common/datasets/push_dataset_to_hub/cam_png_format.py b/lerobot/common/datasets/push_dataset_to_hub/cam_png_format.py
index ea5dbef51..4972e6b4a 100644
--- a/lerobot/common/datasets/push_dataset_to_hub/cam_png_format.py
+++ b/lerobot/common/datasets/push_dataset_to_hub/cam_png_format.py
@@ -13,6 +13,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Contains utilities to process raw data format of png images files recorded with capture_camera_feed.py
+"""
+
 from pathlib import Path
 
 import torch
@@ -30,7 +34,11 @@ def check_format(raw_dir: Path) -> bool:
         raise ValueError
 
 
-def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episodes: list[int] | None = None):
+def load_from_raw(raw_dir: Path, fps: int, episodes: list[int] | None = None):
+    if episodes is not None:
+        # TODO(aliberts): add support for multi-episodes.
+        raise NotImplementedError()
+
     ep_dict = {}
     ep_idx = 0
 
diff --git a/lerobot/common/datasets/video_utils.py b/lerobot/common/datasets/video_utils.py
index 0ac4ae899..fdc4fbe9c 100644
--- a/lerobot/common/datasets/video_utils.py
+++ b/lerobot/common/datasets/video_utils.py
@@ -77,6 +77,13 @@ def decode_video_frames_torchvision(
     https://github.com/pytorch/vision/blob/main/torchvision/csrc/io/decoder/gpu/README.rst
     (note that you need to compile against ffmpeg<4.3)
 
+    While both use cpu, "video_reader" is faster than "pyav" but requires additional setup.
+    See our benchmark results for more info on performance:
+    https://github.com/huggingface/lerobot/pull/220
+
+    See torchvision doc for more info on these two backends:
+    https://pytorch.org/vision/0.18/index.html?highlight=backend#torchvision.set_video_backend
+
     Note: Video benefits from inter-frame compression. Instead of storing every frame individually,
     the encoder stores a reference frame (or a key frame) and subsequent frames as differences relative to
     that key frame. As a consequence, to access a requested frame, we need to load the preceding key frame,