Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sensor Dataset Visualization #39

Merged
merged 6 commits into from
Apr 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
307 changes: 170 additions & 137 deletions src/av2/datasets/sensor/sensor_dataloader.py

Large diffs are not rendered by default.

14 changes: 9 additions & 5 deletions src/av2/datasets/sensor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@ def convert_path_to_named_record(path: Path) -> Dict[str, Union[str, int]]:
Returns:
Mapping of name to record field.
"""
sensor_name = path.parent.stem
sensor_path = path.parent
sensor_name = sensor_path.stem
log_path = sensor_path.parent.parent if sensor_name == "lidar" else sensor_path.parent.parent.parent

# log_id is 2 directories up for the lidar filepaths, but 3 levels up for images
# {log_id}/sensors/cameras/ring_*/*.jpg vs.
# {log_id}/sensors/lidar/*.feather
parent_idx = 2 if sensor_name == "lidar" else 3
log_id = path.parents[parent_idx].stem
sensor_name, timestamp_ns = path.parent.stem, int(path.stem)
return {"log_id": log_id, "sensor_name": sensor_name, "timestamp_ns": timestamp_ns}
return {
"split": log_path.parent.stem,
"log_id": log_path.stem,
"sensor_name": sensor_name,
"timestamp_ns": int(path.stem),
}
28 changes: 24 additions & 4 deletions src/av2/geometry/camera/pinhole_camera.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,10 @@ def project_cam_to_img(
is_valid_points: boolean indicator of valid cheirality and within image boundary, as
boolean Numpy array of shape (N,).
"""
uv = self.intrinsics.K @ points_cam[:3, :]
uv = uv.T
points_cam = points_cam.T
points_cam = points_cam.transpose()
uv: NDArrayFloat = self.intrinsics.K @ points_cam
uv = uv.transpose()
points_cam = points_cam.transpose()

if remove_nan:
uv, points_cam = remove_nan_values(uv, points_cam)
Expand Down Expand Up @@ -241,7 +242,7 @@ def project_ego_to_img_motion_compensated(
boolean Numpy array of shape (N,).

Raises:
ValueError: If `city_SE3_ego_cam_t` or `city_SE3_ego_lidar_t` is `None`.
ValueError: If `city_SE3_egovehicle_cam_t` or `city_SE3_egovehicle_lidar_t` is `None`.
"""
if city_SE3_ego_cam_t is None:
raise ValueError("city_SE3_ego_cam_t cannot be `None`!")
Expand Down Expand Up @@ -406,6 +407,25 @@ def compute_pixel_ray_directions(self, uv: Union[NDArrayFloat, NDArrayInt]) -> N
raise RuntimeError("Ray directions must be (N,3)")
return ray_dirs

def scale(self, scale: float) -> PinholeCamera:
"""Scale the intrinsics and image size.

Args:
scale: Scaling factor.

Returns:
The scaled pinhole camera model.
"""
intrinsics = Intrinsics(
self.intrinsics.fx_px * scale,
self.intrinsics.fy_px * scale,
self.intrinsics.cx_px * scale,
self.intrinsics.cy_px * scale,
round(self.intrinsics.width_px * scale),
round(self.intrinsics.height_px * scale),
)
return PinholeCamera(ego_SE3_cam=self.ego_SE3_cam, intrinsics=intrinsics, cam_name=self.cam_name)


def remove_nan_values(uv: NDArrayFloat, points_cam: NDArrayFloat) -> Tuple[NDArrayFloat, NDArrayFloat]:
"""Remove NaN values from camera coordinates and image plane coordinates (accepts corrupt array).
Expand Down
1 change: 0 additions & 1 deletion src/av2/geometry/utm.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ def convert_city_coords_to_utm(points_city: Union[NDArrayFloat, NDArrayInt], cit
latitude, longitude = CITY_ORIGIN_LATLONG_DICT[city_name]
# get (easting, northing) of origin
origin_utm = convert_gps_to_utm(latitude=latitude, longitude=longitude, city_name=city_name)

points_utm: NDArrayFloat = points_city.astype(float) + np.array(origin_utm, dtype=float)
return points_utm

Expand Down
29 changes: 28 additions & 1 deletion src/av2/rendering/color.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

"""Colormap related constants and functions."""

from enum import Enum, unique
from typing import Final, Sequence, Tuple

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap

from av2.utils.typing import NDArrayFloat
from av2.utils.typing import NDArrayByte, NDArrayFloat

RED_HEX: Final[str] = "#df0101"
GREEN_HEX: Final[str] = "#31b404"
Expand All @@ -31,6 +33,31 @@
TRAFFIC_YELLOW1_BGR: Final[Tuple[int, int, int]] = TRAFFIC_YELLOW1_RGB[::-1]


@unique
class ColorFormats(str, Enum):
"""Color channel formats."""

BGR = "BGR"
RGB = "RGB"


def create_range_map(points_xyz: NDArrayFloat) -> NDArrayByte:
"""Generate an RGB colormap as a function of the lidar range.

Args:
points_xyz: (N,3) Points (x,y,z).

Returns:
(N,3) RGB colormap.
"""
range = points_xyz[..., 2]
range = np.round(range).astype(int) # type: ignore
color = plt.get_cmap("turbo")(np.arange(0, range.max() + 1))
color = color[range]
range_cmap: NDArrayByte = (color * 255.0).astype(np.uint8)
return range_cmap


def create_colormap(color_list: Sequence[str], n_colors: int) -> NDArrayFloat:
"""Create hex colorscale to interpolate between requested colors.

Expand Down
111 changes: 80 additions & 31 deletions src/av2/rendering/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,46 @@

"""Rendering tools for video visualizations."""

from __future__ import annotations

from enum import Enum, unique
from pathlib import Path
from typing import Dict, Final, Union
from typing import Dict, Final, Mapping, Optional, Set, Union

import av
import cv2
import numpy as np
import pandas as pd

from av2.rendering.color import ColorFormats
from av2.utils.typing import NDArrayByte

COLOR_FORMAT_TO_PYAV_COLOR_FORMAT: Final[Dict[ColorFormats, str]] = {
ColorFormats.RGB: "rgb24",
ColorFormats.BGR: "bgr24",
}
FFMPEG_OPTIONS: Final[Dict[str, str]] = {"crf": "27"}


def tile_cameras(named_sensors: Dict[str, Union[NDArrayByte, pd.DataFrame]]) -> NDArrayByte:
@unique
class VideoCodecs(str, Enum):
"""Available video codecs for encoding mp4 videos.

NOTE: The codecs available are dependent on the FFmpeg build that
you are using. We recommend defaulting to LIBX264.
"""

LIBX264 = "libx264" # https://en.wikipedia.org/wiki/Advanced_Video_Coding
LIBX265 = "libx265" # https://en.wikipedia.org/wiki/High_Efficiency_Video_Coding
HEVC_VIDEOTOOLBOX = "hevc_videotoolbox" # macOS GPU acceleration.


HIGH_EFFICIENCY_VIDEO_CODECS: Final[Set[VideoCodecs]] = set([VideoCodecs.LIBX265, VideoCodecs.HEVC_VIDEOTOOLBOX])


def tile_cameras(
named_sensors: Mapping[str, Union[NDArrayByte, pd.DataFrame]],
bev_img: Optional[NDArrayByte] = None,
) -> NDArrayByte:
"""Combine ring cameras into a tiled image.

NOTE: Images are expected in BGR ordering.
Expand All @@ -32,43 +58,62 @@ def tile_cameras(named_sensors: Dict[str, Union[NDArrayByte, pd.DataFrame]]) ->

Args:
named_sensors: Dictionary of camera names to the (width, height, 3) images.
bev_img: (H,W,3) Bird's-eye view image.

Returns:
Tiled image.
"""
landscape_width = 2048
landscape_height = 1550
landscape_height = 2048
landscape_width = 1550
for _, v in named_sensors.items():
landscape_width = max(v.shape[0], v.shape[1])
landscape_height = min(v.shape[0], v.shape[1])
break

height = landscape_height + landscape_height + landscape_height
width = landscape_width + landscape_height + landscape_width
tiled_im_bgr: NDArrayByte = np.zeros((height, width, 3), dtype=np.uint8)

ring_rear_left = named_sensors["ring_rear_left"]
ring_side_left = named_sensors["ring_side_left"]
ring_front_center = named_sensors["ring_front_center"]
ring_front_left = named_sensors["ring_front_left"]
ring_front_right = named_sensors["ring_front_right"]
ring_side_right = named_sensors["ring_side_right"]
ring_rear_right = named_sensors["ring_rear_right"]
if "ring_front_left" in named_sensors:
ring_front_left = named_sensors["ring_front_left"]
tiled_im_bgr[:landscape_height, :landscape_width] = ring_front_left

if "ring_front_center" in named_sensors:
ring_front_center = named_sensors["ring_front_center"]
tiled_im_bgr[:landscape_width, landscape_width : landscape_width + landscape_height] = ring_front_center

if "ring_front_right" in named_sensors:
ring_front_right = named_sensors["ring_front_right"]
tiled_im_bgr[:landscape_height, landscape_width + landscape_height :] = ring_front_right

tiled_im_bgr[:landscape_height, :landscape_width] = ring_front_left
tiled_im_bgr[:landscape_width, landscape_width : landscape_width + landscape_height] = ring_front_center
tiled_im_bgr[:landscape_height, landscape_width + landscape_height :] = ring_front_right
if "ring_side_left" in named_sensors:
ring_side_left = named_sensors["ring_side_left"]
tiled_im_bgr[landscape_height : 2 * landscape_height, :landscape_width] = ring_side_left

tiled_im_bgr[landscape_height:3100, :landscape_width] = ring_side_left
tiled_im_bgr[landscape_height:3100, landscape_width + landscape_height :] = ring_side_right
if "ring_side_right" in named_sensors:
ring_side_right = named_sensors["ring_side_right"]
tiled_im_bgr[landscape_height : 2 * landscape_height, landscape_width + landscape_height :] = ring_side_right

start = (width - 4096) // 2
tiled_im_bgr[3100:4650, start : start + landscape_width] = np.fliplr(ring_rear_left) # type: ignore
tiled_im_bgr[3100:4650, start + landscape_width : start + 4096] = np.fliplr(ring_rear_right) # type: ignore
tiled_im_rgb: NDArrayByte = cv2.cvtColor(tiled_im_bgr, cv2.COLOR_BGR2RGB)
return tiled_im_rgb
if bev_img is not None:
tiled_im_bgr[
landscape_width : 2 * landscape_width, landscape_width : landscape_width + landscape_height
] = bev_img

if "ring_rear_left" in named_sensors:
ring_rear_left = named_sensors["ring_rear_left"]
tiled_im_bgr[2 * landscape_height : 3 * landscape_height, :landscape_width] = ring_rear_left

if "ring_rear_right" in named_sensors:
ring_rear_right = named_sensors["ring_rear_right"]
tiled_im_bgr[2 * landscape_height : 3 * landscape_height, width - landscape_width :] = ring_rear_right
return tiled_im_bgr


def write_video(
video: NDArrayByte,
dst: Path,
codec: str = "libx264",
color_format: ColorFormats = ColorFormats.RGB,
codec: VideoCodecs = VideoCodecs.LIBX264,
fps: int = 10,
crf: int = 27,
preset: str = "veryfast",
Expand All @@ -78,14 +123,15 @@ def write_video(
Reference: https://github.com/PyAV-Org/PyAV

Args:
video: (N,H,W,3) array representing N RGB frames of identical dimensions.
dst: path to save folder.
codec: the name of a codec.
fps: the frame rate for video.
crf: constant rate factor (CRF) parameter of video, controlling the quality.
video: (N,H,W,3) Array representing N RGB frames of identical dimensions.
dst: Path to save folder.
color_format: Format of the color channels.
codec: Name of the codec.
fps: Frame rate for video.
crf: Constant rate factor (CRF) parameter of video, controlling the quality.
Lower values would result in better quality, at the expense of higher file sizes.
For x264, the valid Constant Rate Factor (crf) range is 0-51.
preset: file encoding speed. Options range from "ultrafast", ..., "fast", ..., "medium", ..., "slow", ...
preset: File encoding speed. Options range from "ultrafast", ..., "fast", ..., "medium", ..., "slow", ...
Higher compression efficiency often translates to slower video encoding speed, at file write time.
"""
_, H, W, _ = video.shape
Expand All @@ -98,6 +144,8 @@ def write_video(
dst.parent.mkdir(parents=True, exist_ok=True)
with av.open(str(dst), "w") as output:
stream = output.add_stream(codec, fps)
if codec in HIGH_EFFICIENCY_VIDEO_CODECS:
stream.codec_tag = "hvc1"
stream.width = W
stream.height = H
stream.options = {
Expand All @@ -106,10 +154,11 @@ def write_video(
"movflags": "+faststart",
"preset": preset,
"profile:v": "main",
"tag": "hvc1",
}

format = COLOR_FORMAT_TO_PYAV_COLOR_FORMAT[color_format]
for _, img in enumerate(video):
frame = av.VideoFrame.from_ndarray(img)
frame = av.VideoFrame.from_ndarray(img, format=format)
output.mux(stream.encode(frame))
output.mux(stream.encode(None))

Expand Down
Loading