argoverse · johnwlambert · May 7, 2021 · Oct 22, 2020 · Oct 22, 2020 · Oct 22, 2020
diff --git a/argoverse/utils/cv2_video_utils.py b/argoverse/utils/cv2_video_utils.py
@@ -0,0 +1,51 @@
+#!/usr/bin/python3
+
+import cv2
+import numpy as np
+
+"""
+Python-based utilities to avoid blowing up the disk with images, as FFMPEG requires.
+Inspired by Detectron2 and MSeg:
+https://github.com/facebookresearch/detectron2/blob/bab413cdb822af6214f9b7f70a9b7a9505eb86c5/demo/demo.py
+https://github.com/mseg-dataset/mseg-semantic/blob/master/mseg_semantic/utils/cv2_video_utils.py
+See OpenCV documentation for more details:
+https://docs.opencv.org/2.4/modules/highgui/doc/reading_and_writing_images_and_video.html
+"""
+
+class VideoWriter:
+	""" 
+	Lazy init, so that the user doesn't have to know width/height a priori.
+	Our default codec is "mp4v", though you may prefer "x264", if available
+	on your system
+	"""
+	def __init__(self, output_fpath: str, fps: int = 30) -> None:
+		""" """
+		self.output_fpath = output_fpath
+		self.fps = fps
+		self.writer = None
+		self.codec = "mp4v"
+
+	def init_outf(self, height: int, width: int) -> None:
+		""" """
+		self.writer = cv2.VideoWriter(
+			filename=self.output_fpath,
+			# some installation of opencv may not support x264 (due to its license),
+			# you can try other format (e.g. MPEG)
+			fourcc=cv2.VideoWriter_fourcc(*self.codec),
+			fps=float(self.fps),
+			frameSize=(width, height),
+			isColor=True,
+		)
+
+	def add_frame(self, rgb_frame: np.ndarray) -> None:
+		"""
+		"""
+		h, w, _ = rgb_frame.shape
+		if self.writer is None:
+			self.init_outf(height=h, width=w)
+		bgr_frame = rgb_frame[:,:,::-1]
+		self.writer.write(bgr_frame)
+
+	def complete(self) -> None:
+		""" """
+		self.writer.release()
diff --git a/argoverse/utils/ffmpeg_utils.py b/argoverse/utils/ffmpeg_utils.py
@@ -1,5 +1,7 @@
 # <Copyright 2019, Argo AI, LLC. Released under the MIT license.>
 
+import os
+from pathlib import Path
 
 from argoverse.utils.subprocess_utils import run_command
 
@@ -28,9 +30,8 @@ def write_video(image_prefix: str, output_prefix: str, fps: int = 10) -> None:
     Returns:
         None
     """
-    cmd = f"ffmpeg -r {fps} -i {image_prefix} -vcodec libx264 -profile:v main"
-    cmd += " -level 3.1 -preset medium -crf 23 -x264-params ref=4 -acodec"
-    cmd += f" copy -movflags +faststart -pix_fmt yuv420p  -vf scale=920:-2 {output_prefix}_{fps}fps.mp4"
+    codec_params_string = get_ffmpeg_codec_params_string()
+    cmd = f"ffmpeg -r {fps} -i {image_prefix} {codec_params_string} {output_prefix}_{fps}fps.mp4"
     print(cmd)
     run_command(cmd)
 
@@ -45,10 +46,41 @@ def write_nonsequential_idx_video(img_wildcard: str, output_fpath: str, fps: int
     Returns:
        None
     """
-    cmd = f"ffmpeg -r {fps} -f image2 -i {img_wildcard}"
-    cmd += " -vcodec libx264 -profile:v main"
-    cmd += " -level 3.1 -preset medium -crf 23 -x264-params ref=4 -acodec"
-    cmd += " copy -movflags +faststart -pix_fmt yuv420p  -vf scale=920:-2"
-    cmd += f" {output_fpath}"
+    codec_params_string = get_ffmpeg_codec_params_string()
+    cmd = f"ffmpeg -r {fps} -f image2 -i {img_wildcard} {codec_params_string} {output_fpath}"
     print(cmd)
     run_command(cmd)
+
+
+def ffmpeg_compress_video(uncompressed_mp4_path: str, fps: int) -> None:
+    """Generate compressed version of video, and delete uncompressed version.
+    Args:
+        img_wildcard: path to video to compress
+
+    Returns:
+       None
+    """
+    codec_params_string = get_ffmpeg_codec_params_string()
+    fname_stem = Path(uncompressed_mp4_path).stem
+    compressed_mp4_path = f"{Path(uncompressed_mp4_path).parent}/{fname_stem}_compressed.mp4"
+    cmd = f"ffmpeg -r {fps} -i {uncompressed_mp4_path} {codec_params_string} {compressed_mp4_path}"
+    print(cmd)
+    run_command(cmd)
+    os.remove(uncompressed_mp4_path)
+
+
+def get_ffmpeg_codec_params_string() -> str:
+    """Generate command line params for FFMPEG for a widely compatible codec with good compression"""
+    codec_params = [
+        "-vcodec libx264",
+        "-profile:v main",
+        "-level 3.1",
+        "-preset medium",
+        "-crf 23",
+        "-x264-params ref=4",
+        "-acodec copy",
+        "-movflags +faststart",
+        "-pix_fmt yuv420p",
+        "-vf scale=920:-2",
+    ]
+    return " ".join(codec_params)
diff --git a/demo_usage/cuboids_to_bboxes.py b/demo_usage/cuboids_to_bboxes.py
@@ -3,9 +3,9 @@
 import copy
 import glob
 import logging
-import multiprocessing
 import os
 import sys
+from multiprocessing import Pool
 from pathlib import Path
 from typing import Any, Iterable, List, Mapping, Sequence, Tuple, Union
 
@@ -26,7 +26,8 @@
 from argoverse.utils.camera_stats import RING_CAMERA_LIST, STEREO_CAMERA_LIST
 from argoverse.utils.city_visibility_utils import clip_point_cloud_to_visible_region
 from argoverse.utils.cv2_plotting_utils import draw_clipped_line_segment
-from argoverse.utils.ffmpeg_utils import write_nonsequential_idx_video
+from argoverse.utils.cv2_video_utils import VideoWriter
+from argoverse.utils.ffmpeg_utils import ffmpeg_compress_video, write_nonsequential_idx_video
 from argoverse.utils.frustum_clipping import generate_frustum_planes
 from argoverse.utils.ply_loader import load_ply
 from argoverse.utils.se3 import SE3
@@ -39,11 +40,13 @@
 
 # jigger lane pixel values by [-10,10] range
 LANE_COLOR_NOISE = 20
+STEREO_FPS = 5
+RING_CAM_FPS = 30
 
 
 def plot_lane_centerlines_in_img(
     lidar_pts: np.ndarray,
-    city_to_egovehicle_se3: SE3,
+    city_SE3_egovehicle: SE3,
     img: np.ndarray,
     city_name: str,
     avm: ArgoverseMap,
@@ -54,7 +57,7 @@ def plot_lane_centerlines_in_img(
 ) -> np.ndarray:
     """
     Args:
-        city_to_egovehicle_se3: SE3 transformation representing egovehicle to city transformation
+        city_SE3_egovehicle: SE(3) transformation representing egovehicle to city transformation
         img: Array of shape (M,N,3) representing updated image
         city_name: str, string representing city name, i.e. 'PIT' or 'MIA'
         avm: instance of ArgoverseMap
@@ -70,7 +73,7 @@ def plot_lane_centerlines_in_img(
     t = camera_config.extrinsic[:3, 3]
     cam_SE3_egovehicle = SE3(rotation=R, translation=t)
 
-    query_x, query_y, _ = city_to_egovehicle_se3.translation
+    query_x, query_y, _ = city_SE3_egovehicle.translation
     local_centerlines = avm.find_local_lane_centerlines(query_x, query_y, city_name)
 
     for centerline_city_fr in local_centerlines:
@@ -81,7 +84,7 @@ def plot_lane_centerlines_in_img(
         valid_idx = np.isnan(ground_heights)
         centerline_city_fr = centerline_city_fr[~valid_idx]
 
-        centerline_egovehicle_fr = city_to_egovehicle_se3.inverse().transform_point_cloud(centerline_city_fr)
+        centerline_egovehicle_fr = city_SE3_egovehicle.inverse().transform_point_cloud(centerline_city_fr)
         centerline_uv_cam = cam_SE3_egovehicle.transform_point_cloud(centerline_egovehicle_fr)
 
         # can also clip point cloud to nearest LiDAR point depth
@@ -105,6 +108,8 @@ def dump_clipped_3d_cuboids_to_images(
     data_dir: str,
     experiment_prefix: str,
     motion_compensate: bool = True,
+    omit_centerlines: bool = False,
+    generate_video_only: bool = False,
 ) -> List[str]:
     """
     We bring the 3D points into each camera coordinate system, and do the clipping there in 3D.
@@ -115,24 +120,40 @@ def dump_clipped_3d_cuboids_to_images(
         data_dir: path to dataset with the latest data
         experiment_prefix: Output directory
         motion_compensate: Whether to motion compensate when projecting
+        omit_centerlines: whether to omit map vector lane geometry from rendering
+        generate_video_only: whether to generate mp4s only without dumping individual frames
 
     Returns:
         saved_img_fpaths
     """
     saved_img_fpaths = []
     dl = SimpleArgoverseTrackingDataLoader(data_dir=data_dir, labels_dir=data_dir)
-    avm = ArgoverseMap()
+    if not omit_centerlines:
+        avm = ArgoverseMap()
+    fps_map = {
+        cam_name: STEREO_FPS if "stereo" in cam_name else RING_CAM_FPS
+        for cam_name in RING_CAMERA_LIST + STEREO_CAMERA_LIST
+    }
+    category_subdir = "amodal_labels"
+    if not Path(f"{experiment_prefix}_{category_subdir}").exists():
+        os.makedirs(f"{experiment_prefix}_{category_subdir}")
+    video_output_dir = f"{experiment_prefix}_{category_subdir}"
 
     for log_id in log_ids:
         save_dir = f"{experiment_prefix}_{log_id}"
-        if not Path(save_dir).exists():
+        if not generate_video_only and not Path(save_dir).exists():
+            # JPG images will be dumped here, if requested by arguments
             os.makedirs(save_dir)
 
         city_name = dl.get_city_name(log_id)
         log_calib_data = dl.get_log_calibration_data(log_id)
 
         flag_done = False
         for cam_idx, camera_name in enumerate(RING_CAMERA_LIST + STEREO_CAMERA_LIST):
+            fps = fps_map[camera_name]
+            if generate_video_only:
+                mp4_path = f"{video_output_dir}/{log_id}_{camera_name}_{fps}fps.mp4"
+                video_writer = VideoWriter(mp4_path)
             cam_im_fpaths = dl.get_ordered_log_cam_fpaths(log_id, camera_name)
             for i, im_fpath in enumerate(cam_im_fpaths):
                 if i % 50 == 0:
@@ -154,8 +175,8 @@ def dump_clipped_3d_cuboids_to_images(
                         break
                     continue
 
-                city_to_egovehicle_se3 = dl.get_city_to_egovehicle_se3(log_id, cam_timestamp)
-                if city_to_egovehicle_se3 is None:
+                city_SE3_egovehicle = dl.get_city_to_egovehicle_se3(log_id, cam_timestamp)
+                if city_SE3_egovehicle is None:
                     continue
 
                 lidar_timestamp = Path(ply_fpath).stem.split("_")[-1]
@@ -170,15 +191,16 @@ def dump_clipped_3d_cuboids_to_images(
                 img = imageio.imread(im_fpath)[:, :, ::-1].copy()
                 camera_config = get_calibration_config(log_calib_data, camera_name)
                 planes = generate_frustum_planes(camera_config.intrinsic.copy(), camera_name)
-                img = plot_lane_centerlines_in_img(
-                    lidar_pts,
-                    city_to_egovehicle_se3,
-                    img,
-                    city_name,
-                    avm,
-                    camera_config,
-                    planes,
-                )
+                if not omit_centerlines:
+                    img = plot_lane_centerlines_in_img(
+                        lidar_pts,
+                        city_SE3_egovehicle,
+                        img,
+                        city_name,
+                        avm,
+                        camera_config,
+                        planes,
+                    )
 
                 for label_idx, label in enumerate(labels):
                     obj_rec = json_label_dict_to_obj_record(label)
@@ -216,40 +238,64 @@ def dump_clipped_3d_cuboids_to_images(
                         copy.deepcopy(camera_config),
                     )
 
-                cv2.imwrite(save_img_fpath, img)
-                saved_img_fpaths += [save_img_fpath]
-                if max_num_images_to_render != -1 and len(saved_img_fpaths) > max_num_images_to_render:
+                if generate_video_only:
+                    video_writer.add_frame(img[:, :, ::-1])
+                else:
+                    cv2.imwrite(save_img_fpath, img)
+                    saved_img_fpaths += [save_img_fpath]
+                if (
+                    not generate_video_only
+                    and max_num_images_to_render != -1
+                    and len(saved_img_fpaths) > max_num_images_to_render
+                ):
                     flag_done = True
                     break
+            if generate_video_only:
+                video_writer.complete()
+                ffmpeg_compress_video(mp4_path, fps)
             if flag_done:
                 break
-        category_subdir = "amodal_labels"
-
-        if not Path(f"{experiment_prefix}_{category_subdir}").exists():
-            os.makedirs(f"{experiment_prefix}_{category_subdir}")
 
-        for cam_idx, camera_name in enumerate(RING_CAMERA_LIST + STEREO_CAMERA_LIST):
-            # Write the cuboid video -- could also write w/ fps=20,30,40
-            if "stereo" in camera_name:
-                fps = 5
-            else:
-                fps = 30
-            img_wildcard = f"{save_dir}/{camera_name}_%*.jpg"
-            output_fpath = f"{experiment_prefix}_{category_subdir}/{log_id}_{camera_name}_{fps}fps.mp4"
-            write_nonsequential_idx_video(img_wildcard, output_fpath, fps)
+        if not generate_video_only:
+            for cam_idx, camera_name in enumerate(RING_CAMERA_LIST + STEREO_CAMERA_LIST):
+                # Write the cuboid video from individual frames -- could also write w/ fps=20,30,40
+                fps = fps_map[camera_name]
+                img_wildcard = f"{save_dir}/{camera_name}_%*.jpg"
+                output_fpath = f"{video_output_dir}/{log_id}_{camera_name}_{fps}fps.mp4"
+                write_nonsequential_idx_video(img_wildcard, output_fpath, fps)
 
     return saved_img_fpaths
 
 
 def main(args: Any):
     """Run the example."""
     log_ids = [log_id.strip() for log_id in args.log_ids.split(",")]
-    dump_clipped_3d_cuboids_to_images(
-        log_ids,
-        args.max_num_images_to_render * 9,
-        args.dataset_dir,
-        args.experiment_prefix,
-    )
+    if args.use_multiprocessing:
+        single_process_args = [
+            (
+                [log_id],
+                args.max_num_images_to_render * 9,
+                args.dataset_dir,
+                args.experiment_prefix,
+                not args.no_motion_compensation,
+                args.omit_centerlines,
+                args.generate_video_only,
+            )
+            for log_id in log_ids
+        ]
+        with Pool(os.cpu_count()) as p:
+            accum = p.starmap(dump_clipped_3d_cuboids_to_images, single_process_args)
+
+    else:
+        dump_clipped_3d_cuboids_to_images(
+            log_ids,
+            args.max_num_images_to_render * 9,
+            args.dataset_dir,
+            args.experiment_prefix,
+            not args.no_motion_compensation,
+            args.omit_centerlines,
+            args.generate_video_only,
+        )
 
 
 if __name__ == "__main__":
@@ -262,6 +308,26 @@ def main(args: Any):
         help="number of images within which to render 3d cuboids",
     )
     parser.add_argument("--dataset-dir", type=str, required=True, help="path to the dataset folder")
+    parser.add_argument(
+        "--use-multiprocessing",
+        action="store_true",
+        help="uses multiprocessing only if arg is specified on command line, otherwise single process",
+    )
+    parser.add_argument(
+        "--no-motion-compensation",
+        action="store_true",
+        help="motion compensate by default, unless arg is specified on command line to not do so",
+    )
+    parser.add_argument(
+        "--omit-centerlines",
+        action="store_true",
+        help="renders centerlines by default, will omit them if arg is specified on command line",
+    )
+    parser.add_argument(
+        "--generate-video-only",
+        action="store_true",
+        help="produces mp4 files only, without dumping any individual frames/images to JPGs",
+    )
     parser.add_argument(
         "--log-ids",
         type=str,