diff --git a/docs/instructions_nuimages.md b/docs/instructions_nuimages.md new file mode 100644 index 00000000..f7cdfe18 --- /dev/null +++ b/docs/instructions_nuimages.md @@ -0,0 +1 @@ +TODO: Coming soon! diff --git a/docs/schema_nuimages.md b/docs/schema_nuimages.md index e008e2f5..49c6dd49 100644 --- a/docs/schema_nuimages.md +++ b/docs/schema_nuimages.md @@ -1,6 +1,164 @@ nuImages schema ========== +This document describes the database schema used in nuImages. +All annotations and meta data (including calibration, maps, vehicle coordinates etc.) are covered in a relational database. +The database tables are listed below. +Every row can be identified by its unique primary key `token`. +Foreign keys such as `sample_token` may be used to link to the `token` of the table `sample`. +Please refer to the [tutorial](https://www.nuscenes.org/nuimages#tutorial) for an introduction to the most important database tables. -TODO: Coming soon! +![](https://www.nuscenes.org/public/images/nuimages-schema.svg) -![](https://www.nuscenes.org/public/images/nuimages-schema.svg) \ No newline at end of file +attribute +--------- +An attribute is a property of an instance that can change while the category remains the same. +Example: a vehicle being parked/stopped/moving, and whether or not a bicycle has a rider. +The attributes in nuImages are a superset of those in nuScenes. +``` +attribute { + "token": -- Unique record identifier. + "name": -- Attribute name. + "description": -- Attribute description. +} +``` + +calibrated_sensor +--------- +Definition of a particular sensor (lidar/camera, but no radar) as calibrated on a particular vehicle. +All extrinsic parameters are given with respect to the ego vehicle body frame. +All camera images come undistorted and rectified. +``` +calibrated_sensor { + "token": -- Unique record identifier. + "sensor_token": -- Foreign key pointing to the sensor type. + "translation": [3] -- Coordinate system origin in meters: x, y, z. + "rotation": [4] -- Coordinate system orientation as quaternion: w, x, y, z. + "camera_intrinsic": [3, 3] -- Intrinsic camera calibration. Empty for sensors that are not cameras. + "camera_distortion": [5 or 6] -- Camera calibration parameters. We use the 5 parameter camera convention of the CalTech camera calibration toolbox, that is also used in OpenCV. Only for fish-eye lenses in CAM_BACK do we use the 6th parameter. +} +``` + +category +--------- +Taxonomy of object categories (e.g. vehicle, human). +Subcategories are delineated by a period (e.g. `human.pedestrian.adult`). +The categories in nuImages are the same as in the nuScenes (w/o lidarseg), plus `flat.driveable_surface`. +``` +category { + "token": -- Unique record identifier. + "name": -- Category name. Subcategories indicated by period. + "description": -- Category description. +} +``` + +ego_pose +--------- +Ego vehicle pose at a particular timestamp. Given with respect to global coordinate system of the log's map. +The ego_pose is the output of a lidar map-based localization algorithm described in our paper. +The localization is 2-dimensional in the x-y plane. +Warning: nuImages is collected from almost 500 logs with different maps versions. +Therefore the coordinates **should not be compared across logs** or rendered on the semantic maps of nuScenes. +``` +ego_pose { + "token": -- Unique record identifier. + "translation": [3] -- Coordinate system origin in meters: x, y, z. Note that z is always 0. + "rotation": [4] -- Coordinate system orientation as quaternion: w, x, y, z. + "timestamp": -- Unix time stamp. + "rotation_rate": [3] -- The angular velocity vector (x, y, z) of the vehicle in rad/s. This is expressed in the ego vehicle frame. + "acceleration": [3] -- Acceleration vector (x, y, z) in the ego vehicle frame in m/s/s. The z value is close to the gravitational acceleration `g = 9.81 m/s/s`. + "speed": -- The speed of the ego vehicle in the driving direction in m/s. +} +``` + +log +--------- +Information about the log from which the data was extracted. +``` +log { + "token": -- Unique record identifier. + "logfile": -- Log file name. + "vehicle": -- Vehicle name. + "date_captured": -- Date (YYYY-MM-DD). + "location": -- Area where log was captured, e.g. singapore-onenorth. +} +``` + +object_ann +--------- +The annotation of a foreground object (car, bike, pedestrian) in an image. +Each foreground object is annotated with a 2d box, a 2d instance mask and category-specific attributes. +``` +object_ann { + "token": -- Unique record identifier. + "sample_data_token": -- Foreign key pointing to the sample data, which must be a keyframe image. + "category_token": -- Foreign key pointing to the object category. + "attribute_tokens": [n] -- Foreign keys. List of attributes for this annotation. + "bbox": [4] -- Annotated amodal bounding box. Given as [xmin, ymin, xmax, ymax]. + "mask": -- Run length encoding of instance mask using the pycocotools package. +} +``` + +sample_data +--------- +A sensor data e.g. image or lidar pointcloud. Note that we don't have radar in nuImages. +Sample_data covers all sensor data, regardless of whether it is a keyframe or not. +For every keyframe image or lidar, we also include up to 6 past and 6 future sweeps at 2 Hz. +We can navigate between consecutive lidar or camera sample_datas using the `prev` and `next` pointers. +Only keyframe (sample) images are annotated. +The sample timestamp is inherited from the keyframe camera sample_data timestamp. +``` +sample_data { + "token": -- Unique record identifier. + "sample_token": -- Foreign key. Sample to which this sample_data is associated. + "ego_pose_token": -- Foreign key. + "calibrated_sensor_token": -- Foreign key. + "filename": -- Relative path to data-blob on disk. + "fileformat": -- Data file format. + "width": -- If the sample data is an image, this is the image width in pixels. + "height": -- If the sample data is an image, this is the image height in pixels. + "timestamp": -- Unix time stamp. + "is_key_frame": -- True if sample_data is part of key_frame, else False. + "next": -- Foreign key. Sample data from the same sensor that follows this in time. Empty if end of scene. + "prev": -- Foreign key. Sample data from the same sensor that precedes this in time. Empty if start of scene. +} +``` + +sample +--------- +A sample is an annotated keyframe selected from a large pool of images in a log. +Every sample has up to 13 lidar sample_datas and 13 camera sample_datas corresponding to it. +These include the actual lidar and camera keyframe sample_datas, which can be accessed via the `key_*_token` fields. +``` +sample { + "token": -- Unique record identifier. + "timestamp": -- Unix time stamp. + "log_token": -- Foreign key pointing to the log. + "key_camera_token": -- Foreign key of the sample_data corresponding to the camera keyframe. + "key_lidar_token": -- Foreign key of the sample_data corresponding to the lidar keyframe. +} +``` + +sensor +--------- +--------- +A specific sensor type. +``` +sensor { + "token": -- Unique record identifier. + "channel": -- Sensor channel name. + "modality": {camera, lidar} -- Sensor modality. Supports category(ies) in brackets. +} +``` + +surface_ann +--------- +The annotation of a background object (driveable surface) in an image. +Each background object is annotated with a 2d semantic segmentation mask. +``` +surface_ann { + "token": -- Unique record identifier. + "sample_data_token": -- Foreign key pointing to the sample data, which must be a keyframe image. + "category_token": -- Foreign key pointing to the surface category. + "mask": -- Run length encoding of segmentation mask using the pycocotools package. +} +``` diff --git a/docs/schema_nuscenes.md b/docs/schema_nuscenes.md index 2b62d17c..e69415ca 100644 --- a/docs/schema_nuscenes.md +++ b/docs/schema_nuscenes.md @@ -5,15 +5,14 @@ All annotations and meta data (including calibration, maps, vehicle coordinates The database tables are listed below. Every row can be identified by its unique primary key `token`. Foreign keys such as `sample_token` may be used to link to the `token` of the table `sample`. -Please refer to the [tutorial](https://www.nuscenes.org/tutorial) for an introduction to the most important database tables. +Please refer to the [tutorial](https://www.nuscenes.org/nuimages#tutorial) for an introduction to the most important database tables. ![](https://www.nuscenes.org/public/images/nuscenes-schema.svg) attribute --------- - An attribute is a property of an instance that can change while the category remains the same. - Example: a vehicle being parked/stopped/moving, and whether or not a bicycle has a rider. +Example: a vehicle being parked/stopped/moving, and whether or not a bicycle has a rider. ``` attribute { "token": -- Unique record identifier. @@ -21,9 +20,9 @@ attribute { "description": -- Attribute description. } ``` + calibrated_sensor --------- - Definition of a particular sensor (lidar/radar/camera) as calibrated on a particular vehicle. All extrinsic parameters are given with respect to the ego vehicle body frame. All camera images come undistorted and rectified. @@ -36,11 +35,11 @@ calibrated_sensor { "camera_intrinsic": [3, 3] -- Intrinsic camera calibration. Empty for sensors that are not cameras. } ``` + category --------- - Taxonomy of object categories (e.g. vehicle, human). -Subcategories are delineated by a period (e.g. human.pedestrian.adult). +Subcategories are delineated by a period (e.g. `human.pedestrian.adult`). ``` category { "token": -- Unique record identifier. @@ -49,9 +48,9 @@ category { "index": -- The index of the label used for efficiency reasons in the .bin label files of nuScenes-lidarseg. This field did not exist previously. } ``` + ego_pose --------- - Ego vehicle pose at a particular timestamp. Given with respect to global coordinate system of the log's map. The ego_pose is the output of a lidar map-based localization algorithm described in our paper. The localization is 2-dimensional in the x-y plane. @@ -63,24 +62,24 @@ ego_pose { "timestamp": -- Unix time stamp. } ``` + instance --------- - An object instance, e.g. particular vehicle. This table is an enumeration of all object instances we observed. Note that instances are not tracked across scenes. ``` instance { "token": -- Unique record identifier. - "category_token": -- Foreign key. Object instance category. + "category_token": -- Foreign key pointing to the object category. "nbr_annotations": -- Number of annotations of this instance. "first_annotation_token": -- Foreign key. Points to the first annotation of this instance. "last_annotation_token": -- Foreign key. Points to the last annotation of this instance. } ``` + lidarseg --------- - Mapping between nuScenes-lidarseg annotations and sample_datas corresponding to the lidar pointcloud associated with a keyframe. ``` lidarseg { @@ -89,9 +88,9 @@ lidarseg { "sample_data_token": -- Foreign key. Sample_data corresponding to the annotated lidar pointcloud with is_key_frame=True. } ``` + log --------- - Information about the log from which the data was extracted. ``` log { @@ -102,9 +101,9 @@ log { "location": -- Area where log was captured, e.g. singapore-onenorth. } ``` + map --------- - Map data that is stored as binary semantic masks from a top-down view. ``` map { @@ -114,10 +113,11 @@ map { "filename": -- Relative path to the file with the map mask. } ``` + sample --------- - -A sample is data collected at (approximately) the same timestamp as part of a single LIDAR sweep. +A sample is an annotated keyframe at 2 Hz. +The data is collected at (approximately) the same timestamp as part of a single LIDAR sweep. ``` sample { "token": -- Unique record identifier. @@ -127,9 +127,9 @@ sample { "prev": -- Foreign key. Sample that precedes this in time. Empty if start of scene. } ``` + sample_annotation --------- - A bounding box defining the position of an object seen in a sample. All location data is given with respect to the global coordinate system. ``` @@ -137,7 +137,7 @@ sample_annotation { "token": -- Unique record identifier. "sample_token": -- Foreign key. NOTE: this points to a sample NOT a sample_data since annotations are done on the sample level taking all relevant sample_data into account. "instance_token": -- Foreign key. Which object instance is this annotating. An instance can have multiple annotations over time. - "attribute_tokens": [n] -- Foreign keys. List of attributes for this annotation. Attributes can change over time, so they belong here, not in the object table. + "attribute_tokens": [n] -- Foreign keys. List of attributes for this annotation. Attributes can change over time, so they belong here, not in the instance table. "visibility_token": -- Foreign key. Visibility may also change over time. If no visibility is annotated, the token is an empty string. "translation": [3] -- Bounding box location in meters as center_x, center_y, center_z. "size": [3] -- Bounding box size in meters as width, length, height. @@ -148,9 +148,9 @@ sample_annotation { "prev": -- Foreign key. Sample annotation from the same object instance that precedes this in time. Empty if this is the first annotation for this object. } ``` + sample_data --------- - A sensor data e.g. image, point cloud or radar return. For sample_data with is_key_frame=True, the time-stamps should be very close to the sample it points to. For non key-frames the sample_data points to the sample that follows closest in time. @@ -170,9 +170,9 @@ sample_data { "prev": -- Foreign key. Sample data from the same sensor that precedes this in time. Empty if start of scene. } ``` + scene --------- - A scene is a 20s long sequence of consecutive frames extracted from a log. Multiple scenes can come from the same log. Note that object identities (instance tokens) are not preserved across scenes. @@ -187,9 +187,9 @@ scene { "last_sample_token": -- Foreign key. Points to the last sample in scene. } ``` + sensor --------- - A specific sensor type. ``` sensor { @@ -198,9 +198,9 @@ sensor { "modality": {camera, lidar, radar} -- Sensor modality. Supports category(ies) in brackets. } ``` + visibility --------- - The visibility of an instance is the fraction of annotation visible in all 6 images. Binned into 4 bins 0-40%, 40-60%, 60-80% and 80-100%. ``` visibility { diff --git a/python-sdk/nuimages/nuimages.py b/python-sdk/nuimages/nuimages.py index 7d060a2e..e9ab6339 100644 --- a/python-sdk/nuimages/nuimages.py +++ b/python-sdk/nuimages/nuimages.py @@ -32,13 +32,13 @@ class NuImages: """ def __init__(self, - version: str = 'v1.0-train', + version: str = 'v1.0-mini', dataroot: str = '/data/sets/nuimages', lazy: bool = True, verbose: bool = False): """ Loads database and creates reverse indexes and shortcuts. - :param version: Version to load (e.g. "v1.0-train", "v1.0-val"). + :param version: Version to load (e.g. "v1.0-train", "v1.0-val", "v1.0-test", "v1.0-mini"). :param dataroot: Path to the tables and data. :param lazy: Whether to use lazy loading for the database tables. :param verbose: Whether to print status messages during load. @@ -168,16 +168,16 @@ def __load_table__(self, table_name) -> List[dict]: return table - def shortcut(self, src_table: str, dst_table: str, src_token: str) -> Dict[str, Any]: + def shortcut(self, src_table: str, tgt_table: str, src_token: str) -> Dict[str, Any]: """ Convenience function to navigate between different tables that have one-to-one relations. E.g. we can use this function to conveniently retrieve the sensor for a sample_data. :param src_table: The name of the source table. - :param dst_table: The name of the destination table. + :param tgt_table: The name of the target table. :param src_token: The source token. :return: The entry of the destination table correspondings to the source token. """ - if src_table == 'sample_data' and dst_table == 'sensor': + if src_table == 'sample_data' and tgt_table == 'sensor': sd_camera = self.get('sample_data', src_token) calibrated_sensor = self.get('calibrated_sensor', sd_camera['calibrated_sensor_token']) sensor = self.get('sensor', calibrated_sensor['sensor_token']) @@ -201,6 +201,31 @@ def check_sweeps(self, filename: str) -> None: 'directory, but you cannot call methods that use non-keyframe sample_datas.' % sweeps_dir) + def find_corresponding_sample_data(self, sd_token: str, tgt_modality: str) -> str: + """ + For a sample_data token from either camera or lidar, find the corresponding sample_data token of the + other modality. + :param sd_token: Source sample_data token. + :param tgt_modality: The modality of the target. + :return: The corresponding sample_data token with the target modality. + """ + assert tgt_modality in ['camera', 'lidar'], 'Error: Invalid tgt_modality %s!' % tgt_modality + sample_data = self.get('sample_data', sd_token) + + tgt_sd_tokens = self.get_sample_content(sample_data['sample_token'], tgt_modality) + timestamps = np.array([self.get('sample_data', sd_token)['timestamp'] for sd_token in tgt_sd_tokens]) + rel_times = np.abs(timestamps - sample_data['timestamp']) / 1e6 + + closest_idx = rel_times.argmin() + closest_time_diff = rel_times[closest_idx] + assert closest_time_diff < 0.25, 'Error: No corresponding sample_data exists!' \ + 'Note that this is the case for 0.9% of all sample_datas.' + tgt_sd_token = tgt_sd_tokens[closest_idx] + assert tgt_sd_token != sd_token, 'Error: Invalid usage of this method. ' \ + 'Source and target modality must differ!' + + return tgt_sd_token + # ### List methods. ### def list_attributes(self) -> None: @@ -255,10 +280,11 @@ def list_cameras(self) -> None: print(format_str.format( cs_freq, channel_freq, channel)) - def list_categories(self, sample_tokens: List[str] = None) -> None: + def list_categories(self, sample_tokens: List[str] = None, sort_by: str = 'object_freq') -> None: """ List all categories and the number of object_anns and surface_anns for them. :param sample_tokens: A list of sample tokens for which category stats will be shown. + :param sort_by: Sorting criteria, e.g. "name", "object_freq", "surface_freq". """ # Preload data if in lazy load to avoid confusing outputs. if self.lazy: @@ -278,11 +304,24 @@ def list_categories(self, sample_tokens: List[str] = None) -> None: if sample_tokens is None or sample_token in sample_tokens: surface_freqs[surface_ann['category_token']] += 1 + # Sort entries. + if sort_by == 'name': + sort_order = [i for (i, _) in sorted(enumerate(self.category), key=lambda x: x[1]['name'])] + elif sort_by == 'object_freq': + object_freqs_order = [object_freqs[c['token']] for c in self.category] + sort_order = [i for (i, _) in sorted(enumerate(object_freqs_order), key=lambda x: x[1], reverse=True)] + elif sort_by == 'surface_freq': + surface_freqs_order = [surface_freqs[c['token']] for c in self.category] + sort_order = [i for (i, _) in sorted(enumerate(surface_freqs_order), key=lambda x: x[1], reverse=True)] + else: + raise Exception('Error: Invalid sorting criterion %s!' % sort_by) + # Print to stdout. format_str = '{:11} {:12} {:24.24} {:48.48}' print() print(format_str.format('Object_anns', 'Surface_anns', 'Name', 'Description')) - for category in self.category: + for s in sort_order: + category = self.category[s] category_token = category['token'] object_freq = object_freqs[category_token] surface_freq = surface_freqs[category_token] @@ -372,6 +411,8 @@ def get_sample_content(self, # Combine. result = backward[::-1] + [key_sd['token']] + forward + assert 7 <= len(result) <= 13, 'Error: There should be between 7 and 13 %s sample_datas for each sample!' \ + % modality return result def get_depth(self, @@ -741,12 +782,19 @@ def render_pointcloud(self, out_path: str = None) -> None: """ Render sample data onto axis. - :param sd_token_lidar: Sample_data token of the lidar pointcloud. + :param sd_token_lidar: Sample_data token of the lidar. + For compatibility with other render methods we also allow passing a camera sample_data token, + which is then converted to the corresponding lidar token. :param axes_limit: Axes limit for lidar (measured in meters). :param color_mode: How to color the lidar points, e.g. depth or height. :param use_flat_vehicle_coordinates: See get_pointcloud(). :param out_path: Optional path to save the rendered figure to disk. """ + # If we are provided a camera sd_token, we need to find the closest lidar token. + sample_data = self.get('sample_data', sd_token_lidar) + if sample_data['fileformat'] == 'jpg': + sd_token_lidar = self.find_corresponding_sample_data(sd_token_lidar, 'lidar') + # Load the pointcloud and transform it to the specified viewpoint. points, original_points = self.get_pointcloud(sd_token_lidar, use_flat_vehicle_coordinates) diff --git a/python-sdk/nuimages/scripts/render_images.py b/python-sdk/nuimages/scripts/render_images.py index af1440b6..94b36dc9 100644 --- a/python-sdk/nuimages/scripts/render_images.py +++ b/python-sdk/nuimages/scripts/render_images.py @@ -1,104 +1,207 @@ import argparse +import gc import os import random +from typing import List +from collections import defaultdict +import cv2 import tqdm from nuimages.nuimages import NuImages -class ImageRenderer: - - def __init__(self, version: str = 'v1.0-val', dataroot: str = '/data/sets/nuimages', verbose: bool = False): - """ - Initialize ImageRenderer. - :param version: The NuImages version. - :param dataroot: The root folder where the dataset is installed. - """ - self.version = version - self.dataroot = dataroot - self.verbose = verbose - self.nuim = NuImages(version=self.version, dataroot=self.dataroot, verbose=self.verbose, lazy=False) - - def render_images(self, - mode: str = 'all', - cam_name: str = None, - image_limit: int = 100, - out_dir: str = '~/Downloads/nuImages') -> None: - """ - Render a random selection of images and save them to disk. - Note: The images rendered here are keyframes only. - :param mode: What to render: - "annotated" for the image with annotations, - "raw" for the image without annotations, - "depth" for depth image, - "all" to render all of the above separately. - :param cam_name: Only render images from a particular camera, e.g. "CAM_BACK'. - :param image_limit: Maximum number of images to render. - :param out_dir: Folder to render the images to. - """ - # Check and convert inputs. - all_modes = ['annotated', 'image', 'depth_dense', 'depth_sparse', 'pointcloud', 'trajectory'] - assert mode in all_modes + ['all'] - out_dir = os.path.expanduser(out_dir) - if not os.path.isdir(out_dir): - os.makedirs(out_dir) - if mode == 'all': +def render_images(nuim: NuImages, + mode: str = 'all', + cam_name: str = None, + sample_limit: int = 100, + filter_categories: List[str] = None, + out_type: str = 'image', + out_dir: str = '~/Downloads/nuImages', + cleanup: bool = True) -> None: + """ + Render a random selection of images and save them to disk. + Note: The images rendered here are keyframes only. + :param nuim: NuImages instance. + :param mode: What to render: + "annotated" for the image with annotations, + "raw" for the image without annotations, + "dept_dense" for dense depth image, + "dept_sparse" for sparse depth image, + "pointcloud" for a birds-eye view of the pointcloud, + "trajectory" for a rendering of the trajectory of the vehice, + "all" to render all of the above separately. + :param cam_name: Only render images from a particular camera, e.g. "CAM_BACK'. + :param sample_limit: Maximum number of samples (images) to render. + :param filter_categories: Specify a list of object_ann category names. Every sample that is rendered must + contain annotations of any of those categories. + :param out_type: The output type as one of the following: + 'image': Renders a single image for the image keyframe of each sample. + 'video': Renders a video for all images/pcls in the clip associated with each sample. + :param out_dir: Folder to render the images to. + :param cleanup: Whether to delete images after rendering the video. Not relevant for out_type == 'image'. + """ + # Check and convert inputs. + assert out_type in ['image', 'video'], ' Error: Unknown out_type %s!' % out_type + all_modes = ['annotated', 'image', 'depth_dense', 'depth_sparse', 'pointcloud', 'trajectory'] + assert mode in all_modes + ['all'], 'Error: Unknown mode %s!' % mode + assert not (out_type == 'video' and mode == 'trajectory'), 'Error: Cannot render "trajectory" for videos!' + + if mode == 'all': + if out_type == 'image': modes = all_modes + elif out_type == 'video': + modes = [m for m in all_modes if m not in ['annotated', 'trajectory']] else: - modes = [mode] - - # Get a random selection of samples. - sample_tokens = [s['token'] for s in self.nuim.sample] - random.shuffle(sample_tokens) - - # Filter by camera. - if cam_name is not None: - sample_tokens_cam = [] - for sample_token in sample_tokens: - sample = self.nuim.get('sample', sample_token) - sd_token_camera = sample['key_camera_token'] - sensor = self.nuim.shortcut('sample_data', 'sensor', sd_token_camera) - if sensor['channel'] == cam_name: - sample_tokens_cam.append(sample_token) - sample_tokens = sample_tokens_cam - - # Limit number of samples. - sample_tokens = sample_tokens[:image_limit] - - print('Rendering images for mode %s to folder %s...' % (mode, out_dir)) - for sample_token in tqdm.tqdm(sample_tokens): - sample = self.nuim.get('sample', sample_token) + raise Exception('Error" Unknown mode %s!' % mode) + else: + modes = [mode] + + if filter_categories is not None: + category_names = [c['name'] for c in nuim.category] + for category_name in filter_categories: + assert category_name in category_names, 'Error: Invalid object_ann category %s!' % category_name + + # Create output folder. + out_dir = os.path.expanduser(out_dir) + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + # Filter by camera. + sample_tokens = [s['token'] for s in nuim.sample] + if cam_name is not None: + sample_tokens_cam = [] + for sample_token in sample_tokens: + sample = nuim.get('sample', sample_token) sd_token_camera = sample['key_camera_token'] - sensor = self.nuim.shortcut('sample_data', 'sensor', sd_token_camera) - sample_cam_name = sensor['channel'] - - for mode in modes: - out_path = os.path.join(out_dir, '%s_%s_%s.jpg' % (sample_token, sample_cam_name, mode)) - if mode == 'annotated': - self.nuim.render_image(sd_token_camera, with_annotations=True, out_path=out_path) - elif mode == 'image': - self.nuim.render_image(sd_token_camera, with_annotations=False, out_path=out_path) - elif mode == 'depth_dense': - self.nuim.render_depth(sd_token_camera, mode='dense', out_path=out_path) - elif mode == 'depth_sparse': - self.nuim.render_depth(sd_token_camera, mode='sparse', out_path=out_path) - elif mode == 'pointcloud': - sd_token_lidar = sample['key_lidar_token'] - self.nuim.render_pointcloud(sd_token_lidar, out_path=out_path) - elif mode == 'trajectory': - self.nuim.render_trajectory(sample_token, out_path=out_path) + sensor = nuim.shortcut('sample_data', 'sensor', sd_token_camera) + if sensor['channel'] == cam_name: + sample_tokens_cam.append(sample_token) + sample_tokens = sample_tokens_cam + + # Filter samples by category. + if filter_categories is not None: + # Get categories in each sample. + sd_to_object_cat_names = defaultdict(lambda: set()) + for object_ann in nuim.object_ann: + category = nuim.get('category', object_ann['category_token']) + sd_to_object_cat_names[object_ann['sample_data_token']].add(category['name']) + + # Filter samples. + sample_tokens_cleaned = [] + for sample_token in sample_tokens: + sample = nuim.get('sample', sample_token) + sd_token_camera = sample['key_camera_token'] + category_names = sd_to_object_cat_names[sd_token_camera] + if any([c in category_names for c in filter_categories]): + sample_tokens_cleaned.append(sample_token) + sample_tokens = sample_tokens_cleaned + + # Get a random selection of samples. + random.shuffle(sample_tokens) + + # Limit number of samples. + sample_tokens = sample_tokens[:sample_limit] + + print('Rendering %s for mode %s to folder %s...' % (out_type, mode, out_dir)) + for sample_token in tqdm.tqdm(sample_tokens): + sample = nuim.get('sample', sample_token) + sd_token_camera = sample['key_camera_token'] + sensor = nuim.shortcut('sample_data', 'sensor', sd_token_camera) + sample_cam_name = sensor['channel'] + sd_tokens_camera = nuim.get_sample_content(sample_token, modality='camera') + + # We cannot render a video if there are missing camera sample_datas. + if len(sd_tokens_camera) < 13 and out_type == 'video': + continue + + for mode in modes: + out_path_prefix = os.path.join(out_dir, '%s_%s_%s' % (sample_token, sample_cam_name, mode)) + if out_type == 'image': + write_image(nuim, sd_token_camera, mode, '%s.jpg' % out_path_prefix) + elif out_type == 'video': + write_video(nuim, sd_tokens_camera, mode, out_path_prefix, cleanup=cleanup) + + +def write_video(nuim: NuImages, + sd_tokens_camera: List[str], + mode: str, + out_path_prefix: str, + cleanup: bool = True) -> None: + """ + Render a video by combining all the images of type mode for each sample_data. + :param nuim: NuImages instance. + :param sd_tokens_camera: All camera sample_data tokens in chronological order. + :param mode: The mode - see render_images(). + :param out_path_prefix: The file prefix used for the images and video. + :param cleanup: Whether to delete images after rendering the video. + """ + # Loop through each frame to create the video. + out_paths = [] + for i, sd_token_camera in enumerate(sd_tokens_camera): + out_path = '%s_%d.jpg' % (out_path_prefix, i) + out_paths.append(out_path) + write_image(nuim, sd_token_camera, mode, out_path) + + # Create video. + first_im = cv2.imread(out_paths[0]) + freq = 2 # Display frequency (Hz). + fourcc = cv2.VideoWriter_fourcc(*'MJPG') + video_path = '%s.avi' % out_path_prefix + out = cv2.VideoWriter(video_path, fourcc, freq, first_im.shape[1::-1]) + + # Load each image and add to the video. + for out_path in out_paths: + im = cv2.imread(out_path) + out.write(im) + + # Delete temporary image if requested. + if cleanup: + os.remove(out_path) + + # Finalize video. + out.release() + + +def write_image(nuim: NuImages, sd_token_camera: str, mode: str, out_path: str) -> None: + """ + Render a single image of type mode for the given sample_data. + :param nuim: NuImages instance. + :param sd_token_camera: The sample_data token of the camera. + :param mode: The mode - see render_images(). + :param out_path: The file to write the image to. + """ + if mode == 'annotated': + nuim.render_image(sd_token_camera, with_annotations=True, out_path=out_path) + elif mode == 'image': + nuim.render_image(sd_token_camera, with_annotations=False, out_path=out_path) + elif mode == 'depth_dense': + nuim.render_depth(sd_token_camera, mode='dense', out_path=out_path) + elif mode == 'depth_sparse': + nuim.render_depth(sd_token_camera, mode='sparse', out_path=out_path) + elif mode == 'pointcloud': + nuim.render_pointcloud(sd_token_camera, out_path=out_path) + elif mode == 'trajectory': + sd_camera = nuim.get('sample_data', sd_token_camera) + nuim.render_trajectory(sd_camera['sample_token'], out_path=out_path) + else: + raise Exception('Error: Unknown mode %s!' % mode) + + # Trigger garbage collection to avoid memory overflow from the render functions. + gc.collect() if __name__ == '__main__': parser = argparse.ArgumentParser(description='Render a random selection of images and save them to disk.') parser.add_argument('--seed', type=int, default=42) # Set to 0 to disable. - parser.add_argument('--version', type=str, default='v1.0-val') + parser.add_argument('--version', type=str, default='v1.0-mini') parser.add_argument('--dataroot', type=str, default='/data/sets/nuimages') parser.add_argument('--verbose', type=int, default=1) parser.add_argument('--mode', type=str, default='all') parser.add_argument('--cam_name', type=str, default=None) - parser.add_argument('--image_limit', type=int, default=100) + parser.add_argument('--sample_limit', type=int, default=100) + parser.add_argument('--filter_categories', action='append') + parser.add_argument('--out_type', type=str, default='image') parser.add_argument('--out_dir', type=str, default='~/Downloads/nuImages') args = parser.parse_args() @@ -106,6 +209,9 @@ def render_images(self, if args.seed != 0: random.seed(args.seed) + # Initialize NuImages class. + nuim_ = NuImages(version=args.version, dataroot=args.dataroot, verbose=bool(args.verbose), lazy=False) + # Render images. - renderer = ImageRenderer(args.version, args.dataroot, bool(args.verbose)) - renderer.render_images(mode=args.mode, cam_name=args.cam_name, image_limit=args.image_limit, out_dir=args.out_dir) + render_images(nuim_, mode=args.mode, cam_name=args.cam_name, sample_limit=args.sample_limit, + filter_categories=args.filter_categories, out_type=args.out_type, out_dir=args.out_dir) diff --git a/python-sdk/nuimages/scripts/render_rare_classes.py b/python-sdk/nuimages/scripts/render_rare_classes.py new file mode 100644 index 00000000..4cce425f --- /dev/null +++ b/python-sdk/nuimages/scripts/render_rare_classes.py @@ -0,0 +1,81 @@ +import argparse +import random +from collections import defaultdict +from typing import Dict, Any, List + +from nuimages.nuimages import NuImages +from nuimages.scripts.render_images import render_images + + +def render_rare_classes(nuim: NuImages, + render_args: Dict[str, Any], + filter_categories: List[str] = None, + max_frequency: float = 0.001) -> None: + """ + Wrapper around render_images() that renders images with rare classes. + :param nuim: NuImages instance. + :param render_args: The render arguments passed on to the render function. See render_images(). + :param filter_categories: Specify a list of object_ann category names. + Every sample that is rendered must contain annotations of any of those categories. + Filter_categories are a applied on top of the frequency filering. + :param max_frequency: The maximum relative frequency of the categories, at least one of which is required to be + present in the image. E.g. 0.1 indicates that one of the classes that account for at most 10% of the annotations + is present. + """ + # Checks. + assert 'filter_categories' not in render_args.keys(), \ + 'Error: filter_categories is a separate argument and should not be part of render_args!' + assert 0 <= max_frequency <= 1, 'Error: max_frequency must be a ratio between 0 and 1!' + + # Compute object class frequencies. + object_freqs = defaultdict(lambda: 0) + for object_ann in nuim.object_ann: + category = nuim.get('category', object_ann['category_token']) + object_freqs[category['name']] += 1 + + # Find rare classes. + total_freqs = len(nuim.object_ann) + filter_categories_freq = sorted([k for (k, v) in object_freqs.items() if v / total_freqs <= max_frequency]) + assert len(filter_categories_freq) > 0, 'Error: No classes found with the specified max_frequency!' + print('The rare classes are: %s' % filter_categories_freq) + + # If specified, additionally filter these categories by what was requested. + if filter_categories is not None: + filter_categories = list(set(filter_categories_freq).intersection(set(filter_categories))) + assert len(filter_categories) > 0, 'Error: No categories left after applying filter_categories!' + + # Call render function. + render_images(nuim, filter_categories=filter_categories, **render_args) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Render a random selection of images and save them to disk.') + parser.add_argument('--seed', type=int, default=42) # Set to 0 to disable. + parser.add_argument('--version', type=str, default='v1.0-mini') + parser.add_argument('--dataroot', type=str, default='/data/sets/nuimages') + parser.add_argument('--verbose', type=int, default=1) + parser.add_argument('--mode', type=str, default='all') + parser.add_argument('--cam_name', type=str, default=None) + parser.add_argument('--sample_limit', type=int, default=100) + parser.add_argument('--max_frequency', type=float, default=0.001) + parser.add_argument('--filter_categories', action='append') + parser.add_argument('--out_type', type=str, default='image') + parser.add_argument('--out_dir', type=str, default='~/Downloads/nuImages') + args = parser.parse_args() + + # Set random seed for reproducible image selection. + if args.seed != 0: + random.seed(args.seed) + + # Initialize NuImages class. + nuim_ = NuImages(version=args.version, dataroot=args.dataroot, verbose=bool(args.verbose), lazy=False) + + # Render images. + render_args = { + 'mode': args.mode, + 'cam_name': args.cam_name, + 'sample_limit': args.sample_limit, + 'out_type': args.out_type, + 'out_dir': args.out_dir + } + render_rare_classes(nuim_, render_args, filter_categories=args.filter_categories, max_frequency=args.max_frequency) diff --git a/python-sdk/nuimages/tests/test_attributes.py b/python-sdk/nuimages/tests/test_attributes.py index 17b6d8c5..659905f1 100644 --- a/python-sdk/nuimages/tests/test_attributes.py +++ b/python-sdk/nuimages/tests/test_attributes.py @@ -6,10 +6,10 @@ class TestAttributes(unittest.TestCase): - def __init__(self, version: str = 'v1.0-val', dataroot: str = None): + def __init__(self, test_name: str = '', version: str = 'v1.0-mini', dataroot: str = None): """ Initialize TestAttributes. - TODO: Fix automatic discovery for this test. + :param test_name: Dummy parameter required by the TestCase class. :param version: The NuImages version. :param dataroot: The root folder where the dataset is installed. """ @@ -26,7 +26,7 @@ def __init__(self, version: str = 'v1.0-val', dataroot: str = None): 'human.pedestrian.adult': ['pedestrian'], 'human.pedestrian.child': ['pedestrian'], 'human.pedestrian.construction_worker': ['pedestrian'], - 'human.pedestrian.personal_mobility': ['has_rider'], + 'human.pedestrian.personal_mobility': ['cycle'], 'human.pedestrian.police_officer': ['pedestrian'], 'human.pedestrian.stroller': [], 'human.pedestrian.wheelchair': [], @@ -35,7 +35,7 @@ def __init__(self, version: str = 'v1.0-val', dataroot: str = None): 'movable_object.pushable_pullable': [], 'movable_object.trafficcone': [], 'static_object.bicycle_rack': [], - 'vehicle.bicycle': ['has_rider'], + 'vehicle.bicycle': ['cycle'], 'vehicle.bus.bendy': ['vehicle'], 'vehicle.bus.rigid': ['vehicle'], 'vehicle.car': ['vehicle'], @@ -43,11 +43,17 @@ def __init__(self, version: str = 'v1.0-val', dataroot: str = None): 'vehicle.ego': [], 'vehicle.emergency.ambulance': ['vehicle', 'vehicle_light.emergency'], 'vehicle.emergency.police': ['vehicle', 'vehicle_light.emergency'], - 'vehicle.motorcycle': ['has_rider'], + 'vehicle.motorcycle': ['cycle'], 'vehicle.trailer': ['vehicle'], 'vehicle.truck': ['vehicle'] } + def runTest(self) -> None: + """ + Dummy function required by the TestCase class. + """ + pass + def test_object_anns(self, print_only: bool = False) -> None: """ For every object_ann, check that all the required attributes for that class are present. @@ -99,7 +105,7 @@ def test_object_anns(self, print_only: bool = False) -> None: if __name__ == '__main__': # Runs the tests without aborting on error. - for nuim_version in ['v1.0-train', 'v1.0-val', 'v1.0-test']: + for nuim_version in ['v1.0-train', 'v1.0-val', 'v1.0-test', 'v1.0-mini']: print('Running TestAttributes for version %s...' % nuim_version) test = TestAttributes(version=nuim_version) test.test_object_anns(print_only=True) diff --git a/python-sdk/nuimages/tests/test_foreign_keys.py b/python-sdk/nuimages/tests/test_foreign_keys.py index 5385ada0..1c2ac3aa 100644 --- a/python-sdk/nuimages/tests/test_foreign_keys.py +++ b/python-sdk/nuimages/tests/test_foreign_keys.py @@ -8,9 +8,10 @@ class TestForeignKeys(unittest.TestCase): - def __init__(self, version: str = 'v1.0-val', dataroot: str = None): + def __init__(self, test_name: str = '', version: str = 'v1.0-mini', dataroot: str = None): """ Initialize TestForeignKeys. + :param test_name: Dummy parameter required by the TestCase class. :param version: The NuImages version. :param dataroot: The root folder where the dataset is installed. """ @@ -23,6 +24,12 @@ def __init__(self, version: str = 'v1.0-val', dataroot: str = None): self.dataroot = dataroot self.nuim = NuImages(version=self.version, dataroot=self.dataroot, verbose=False) + def runTest(self) -> None: + """ + Dummy function required by the TestCase class. + """ + pass + def test_foreign_keys(self) -> None: """ Test that every foreign key points to a valid token. @@ -38,6 +45,8 @@ def test_foreign_keys(self) -> None: # Go through each table and check the foreign_keys. for table_name in self.nuim.table_names: table: List[Dict[str, Any]] = self.nuim.__getattr__(table_name) + if len(table) == 0 and self.version.endswith('-test'): # Skip test annotations. + continue keys = table[0].keys() # Check 1-to-1 link. @@ -128,7 +137,7 @@ def test_prev_next(self) -> None: if __name__ == '__main__': # Runs the tests without aborting on error. - for nuim_version in ['v1.0-train', 'v1.0-val', 'v1.0-test']: + for nuim_version in ['v1.0-train', 'v1.0-val', 'v1.0-test', 'v1.0-mini']: print('Running TestForeignKeys for version %s...' % nuim_version) test = TestForeignKeys(version=nuim_version) test.test_foreign_keys() diff --git a/python-sdk/nuimages/tests/test_overflow_bug.py b/python-sdk/nuimages/tests/test_overflow_bug.py index 8cbb253b..bfecd811 100644 --- a/python-sdk/nuimages/tests/test_overflow_bug.py +++ b/python-sdk/nuimages/tests/test_overflow_bug.py @@ -6,6 +6,6 @@ # TODO: Delete this file once everything is well tested. -nuim = NuImages(version='v1.0-val') +nuim = NuImages(version='v1.0-val', verbose=False) for token in tokens: - nuim.render_depth(tokens) + nuim.render_depth(token) diff --git a/python-sdk/nuscenes/can_bus/README.md b/python-sdk/nuscenes/can_bus/README.md index 4422082e..885dc3c1 100644 --- a/python-sdk/nuscenes/can_bus/README.md +++ b/python-sdk/nuscenes/can_bus/README.md @@ -81,7 +81,7 @@ The current pose of the ego vehicle, sampled at 50Hz. - accel: \[3\] Acceleration vector in the ego vehicle frame in m/s/s. - orientation: \[4\] The rotation vector in the ego vehicle frame. - pos: \[3\] The position (x, y, z) in meters in the global frame. This is identical to the [nuScenes ego pose](https://github.com/nutonomy/nuscenes-devkit/blob/master/docs/schema_nuscenes.md#ego_pose), but sampled at a higher frequency. -- rotation_rate: \[3\] The angular velocity vector of the vehicle in rad/s. This is expressed in the ego vehicle frame. +- rotation_rate: \[3\] The angular velocity vector of the vehicle in rad/s. This is expressed in the ego vehicle frame. - vel: \[3\] The velocity in m/s, expressed in the ego vehicle frame. ### Steer Angle Feedback diff --git a/python-sdk/nuscenes/eval/prediction/tests/test_metrics.py b/python-sdk/nuscenes/eval/prediction/tests/test_metrics.py index caadac9a..26e37b9e 100644 --- a/python-sdk/nuscenes/eval/prediction/tests/test_metrics.py +++ b/python-sdk/nuscenes/eval/prediction/tests/test_metrics.py @@ -275,7 +275,7 @@ class TestOffRoadRate(unittest.TestCase): def _do_test(self, map_name, predictions, answer): with patch.object(PredictHelper, 'get_map_name_from_sample_token') as get_map_name: get_map_name.return_value = map_name - nusc = NuScenes('v1.0-mini', dataroot=os.environ['NUSCENES']) + nusc = NuScenes('v1.0-mini', dataroot=os.environ['NUSCENES'], verbose=False) helper = PredictHelper(nusc) off_road_rate = metrics.OffRoadRate(helper, [metrics.RowMean()]) diff --git a/python-sdk/nuscenes/prediction/input_representation/static_layers.py b/python-sdk/nuscenes/prediction/input_representation/static_layers.py index 881a38ca..83c8b330 100644 --- a/python-sdk/nuscenes/prediction/input_representation/static_layers.py +++ b/python-sdk/nuscenes/prediction/input_representation/static_layers.py @@ -20,10 +20,11 @@ Color = Tuple[float, float, float] -def load_all_maps(helper: PredictHelper) -> Dict[str, NuScenesMap]: +def load_all_maps(helper: PredictHelper, verbose: bool = False) -> Dict[str, NuScenesMap]: """ Loads all NuScenesMap instances for all available maps. :param helper: Instance of PredictHelper. + :param verbose: Whether to print to stdout. :return: Mapping from map-name to the NuScenesMap api instance. """ dataroot = helper.data.dataroot @@ -35,8 +36,8 @@ def load_all_maps(helper: PredictHelper) -> Dict[str, NuScenesMap]: for map_file in json_files: map_name = str(map_file.split(".")[0]) - - print(f'static_layers.py - Loading Map: {map_name}') + if verbose: + print(f'static_layers.py - Loading Map: {map_name}') maps[map_name] = NuScenesMap(dataroot, map_name=map_name) diff --git a/python-sdk/nuscenes/prediction/tests/test_predict_helper.py b/python-sdk/nuscenes/prediction/tests/test_predict_helper.py index 827fab68..647b1588 100644 --- a/python-sdk/nuscenes/prediction/tests/test_predict_helper.py +++ b/python-sdk/nuscenes/prediction/tests/test_predict_helper.py @@ -12,12 +12,14 @@ class MockNuScenes(NuScenes): - """ Mocks the NuScenes API needed to test PredictHelper. """ def __init__(self, sample_annotations: List[Dict[str, Any]], samples: List[Dict[str, Any]]): - + """ + Mocks the NuScenes API needed to test PredictHelper. + Note that we are skipping the call to the super class constructor on purpose to avoid loading the tables. + """ self._sample_annotation = {r['token']: r for r in sample_annotations} self._sample = {r['token']: r for r in samples} diff --git a/python-sdk/nuscenes/utils/map_mask.py b/python-sdk/nuscenes/utils/map_mask.py index 655d1ad4..0042e73d 100644 --- a/python-sdk/nuscenes/utils/map_mask.py +++ b/python-sdk/nuscenes/utils/map_mask.py @@ -16,7 +16,7 @@ class MapMask: def __init__(self, img_file: str, resolution: float = 0.1): """ - Init a map mask object that contains the semantic prior (drivable surface and sidewalks) mask. + Init a map mask object that contains the semantic prior (driveable surface and sidewalks) mask. :param img_file: File path to map png file. :param resolution: Map resolution in meters. """ diff --git a/python-sdk/tutorials/nuimages_tutorial.ipynb b/python-sdk/tutorials/nuimages_tutorial.ipynb index 64b8fa3c..c19ae36a 100644 --- a/python-sdk/tutorials/nuimages_tutorial.ipynb +++ b/python-sdk/tutorials/nuimages_tutorial.ipynb @@ -50,7 +50,7 @@ "%matplotlib inline\n", "from nuimages import NuImages\n", "\n", - "nuim = NuImages(dataroot='/data/sets/nuimages', version='v1.0-val', verbose=True, lazy=True)" + "nuim = NuImages(dataroot='/data/sets/nuimages', version='v1.0-mini', verbose=True, lazy=True)" ] }, { @@ -296,8 +296,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "`>> python nuimages/scripts/render_images.py --mode all --cam_name CAM_FRONT --out_dir ~/Downloads/nuImages\n", - "`" + "`>> python nuimages/scripts/render_images.py --mode all --cam_name CAM_FRONT --out_dir ~/Downloads/nuImages --out_type image`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Instead of rendering the annotated keyframe, we can also render a video of the 13 individual images, spaced at 2 Hz." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`>> python nuimages/scripts/render_images.py --mode all --cam_name CAM_FRONT --out_dir ~/Downloads/nuImages --out_type video`" ] }, { @@ -357,7 +370,7 @@ "metadata": {}, "outputs": [], "source": [ - "nuim.list_categories()" + "nuim.list_categories(sort_by='object_freq')" ] }, {