Add code review suggestions

huggingface · Jun 19, 2024 · bf3dbbd · bf3dbbd
1 parent c345056
commit bf3dbbd
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 2 deletions.
diff --git a/lerobot/common/datasets/_video_benchmark/run_video_benchmark.py b/lerobot/common/datasets/_video_benchmark/run_video_benchmark.py
@@ -188,7 +188,7 @@ def load_original_frames(imgs_dir, timestamps) -> torch.Tensor:
         elif timestamps_mode == "2_frames":
             timestamps = [ts - 1 / fps, ts]
         elif timestamps_mode == "2_frames_4_space":
-            timestamps = [ts - 4 / fps, ts]
+            timestamps = [ts - 5 / fps, ts]
         elif timestamps_mode == "6_frames":
             timestamps = [ts - i / fps for i in range(6)][::-1]
         else:

diff --git a/lerobot/common/datasets/push_dataset_to_hub/cam_png_format.py b/lerobot/common/datasets/push_dataset_to_hub/cam_png_format.py
@@ -13,6 +13,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Contains utilities to process raw data format of png images files recorded with capture_camera_feed.py
+"""
+
 from pathlib import Path
 
 import torch
@@ -30,7 +34,11 @@ def check_format(raw_dir: Path) -> bool:
         raise ValueError
 
 
-def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episodes: list[int] | None = None):
+def load_from_raw(raw_dir: Path, fps: int, episodes: list[int] | None = None):
+    if episodes is not None:
+        # TODO(aliberts): add support for multi-episodes.
+        raise NotImplementedError()
+
     ep_dict = {}
     ep_idx = 0
 

diff --git a/lerobot/common/datasets/video_utils.py b/lerobot/common/datasets/video_utils.py
@@ -77,6 +77,13 @@ def decode_video_frames_torchvision(
     https://github.com/pytorch/vision/blob/main/torchvision/csrc/io/decoder/gpu/README.rst
     (note that you need to compile against ffmpeg<4.3)
 
+    While both use cpu, "video_reader" is faster than "pyav" but requires additional setup.
+    See our benchmark results for more info on performance:
+    https://github.com/huggingface/lerobot/pull/220
+
+    See torchvision doc for more info on these two backends:
+    https://pytorch.org/vision/0.18/index.html?highlight=backend#torchvision.set_video_backend
+
     Note: Video benefits from inter-frame compression. Instead of storing every frame individually,
     the encoder stores a reference frame (or a key frame) and subsequent frames as differences relative to
     that key frame. As a consequence, to access a requested frame, we need to load the preceding key frame,