From bf3dbbdca6170779f49bfe1a72011ddb6e6af570 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Wed, 19 Jun 2024 09:26:00 +0000 Subject: [PATCH] Add code review suggestions --- .../datasets/_video_benchmark/run_video_benchmark.py | 2 +- .../datasets/push_dataset_to_hub/cam_png_format.py | 10 +++++++++- lerobot/common/datasets/video_utils.py | 7 +++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lerobot/common/datasets/_video_benchmark/run_video_benchmark.py b/lerobot/common/datasets/_video_benchmark/run_video_benchmark.py index 04baaa901..25b12b318 100644 --- a/lerobot/common/datasets/_video_benchmark/run_video_benchmark.py +++ b/lerobot/common/datasets/_video_benchmark/run_video_benchmark.py @@ -188,7 +188,7 @@ def load_original_frames(imgs_dir, timestamps) -> torch.Tensor: elif timestamps_mode == "2_frames": timestamps = [ts - 1 / fps, ts] elif timestamps_mode == "2_frames_4_space": - timestamps = [ts - 4 / fps, ts] + timestamps = [ts - 5 / fps, ts] elif timestamps_mode == "6_frames": timestamps = [ts - i / fps for i in range(6)][::-1] else: diff --git a/lerobot/common/datasets/push_dataset_to_hub/cam_png_format.py b/lerobot/common/datasets/push_dataset_to_hub/cam_png_format.py index ea5dbef51..4972e6b4a 100644 --- a/lerobot/common/datasets/push_dataset_to_hub/cam_png_format.py +++ b/lerobot/common/datasets/push_dataset_to_hub/cam_png_format.py @@ -13,6 +13,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +Contains utilities to process raw data format of png images files recorded with capture_camera_feed.py +""" + from pathlib import Path import torch @@ -30,7 +34,11 @@ def check_format(raw_dir: Path) -> bool: raise ValueError -def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episodes: list[int] | None = None): +def load_from_raw(raw_dir: Path, fps: int, episodes: list[int] | None = None): + if episodes is not None: + # TODO(aliberts): add support for multi-episodes. + raise NotImplementedError() + ep_dict = {} ep_idx = 0 diff --git a/lerobot/common/datasets/video_utils.py b/lerobot/common/datasets/video_utils.py index 0ac4ae899..fdc4fbe9c 100644 --- a/lerobot/common/datasets/video_utils.py +++ b/lerobot/common/datasets/video_utils.py @@ -77,6 +77,13 @@ def decode_video_frames_torchvision( https://github.com/pytorch/vision/blob/main/torchvision/csrc/io/decoder/gpu/README.rst (note that you need to compile against ffmpeg<4.3) + While both use cpu, "video_reader" is faster than "pyav" but requires additional setup. + See our benchmark results for more info on performance: + https://github.com/huggingface/lerobot/pull/220 + + See torchvision doc for more info on these two backends: + https://pytorch.org/vision/0.18/index.html?highlight=backend#torchvision.set_video_backend + Note: Video benefits from inter-frame compression. Instead of storing every frame individually, the encoder stores a reference frame (or a key frame) and subsequent frames as differences relative to that key frame. As a consequence, to access a requested frame, we need to load the preceding key frame,