From 26076a679e4a38c168b8cd175c4ebb312711fa51 Mon Sep 17 00:00:00 2001 From: Holger Caesar Date: Wed, 29 Jul 2020 10:48:01 +0800 Subject: [PATCH 1/7] Purge depth from code --- python-sdk/nuimages/nuimages.py | 367 +----------------- python-sdk/nuimages/scripts/render_images.py | 15 +- .../nuimages/tests/test_foreign_keys.py | 31 +- .../nuimages/tests/test_overflow_bug.py | 11 - python-sdk/nuimages/utils/lidar.py | 147 ------- 5 files changed, 27 insertions(+), 544 deletions(-) delete mode 100644 python-sdk/nuimages/tests/test_overflow_bug.py delete mode 100644 python-sdk/nuimages/utils/lidar.py diff --git a/python-sdk/nuimages/nuimages.py b/python-sdk/nuimages/nuimages.py index 1382f2e6..7ae4e7a9 100644 --- a/python-sdk/nuimages/nuimages.py +++ b/python-sdk/nuimages/nuimages.py @@ -14,11 +14,8 @@ from PIL import Image, ImageDraw from pyquaternion import Quaternion -from nuimages.utils.lidar import depth_map, distort_pointcloud, InvertedNormalize from nuimages.utils.utils import annotation_name, mask_decode, get_font from nuscenes.utils.color_map import get_colormap -from nuscenes.utils.data_classes import LidarPointCloud -from nuscenes.utils.geometry_utils import view_points, transform_matrix PYTHON_VERSION = sys.version_info[0] @@ -207,31 +204,6 @@ def check_sweeps(self, filename: str) -> None: 'directory, but you cannot call methods that use non-keyframe sample_datas.' % sweeps_dir) - def find_corresponding_sample_data(self, sd_token: str, tgt_modality: str) -> str: - """ - For a sample_data token from either camera or lidar, find the corresponding sample_data token of the - other modality. - :param sd_token: Source sample_data token. - :param tgt_modality: The modality of the target. - :return: The corresponding sample_data token with the target modality. - """ - assert tgt_modality in ['camera', 'lidar'], 'Error: Invalid tgt_modality %s!' % tgt_modality - sample_data = self.get('sample_data', sd_token) - - tgt_sd_tokens = self.get_sample_content(sample_data['sample_token'], tgt_modality) - timestamps = np.array([self.get('sample_data', sd_token)['timestamp'] for sd_token in tgt_sd_tokens]) - rel_times = np.abs(timestamps - sample_data['timestamp']) / 1e6 - - closest_idx = rel_times.argmin() - closest_time_diff = rel_times[closest_idx] - assert closest_time_diff < 0.25, 'Error: No corresponding sample_data exists!' \ - 'Note that this is the case for 0.9% of all sample_datas.' - tgt_sd_token = tgt_sd_tokens[closest_idx] - assert tgt_sd_token != sd_token, 'Error: Invalid usage of this method. ' \ - 'Source and target modality must differ!' - - return tgt_sd_token - # ### List methods. ### def list_attributes(self, sort_by: str = 'freq') -> None: @@ -421,42 +393,26 @@ def list_sample_content(self, sample_token: str) -> None: # Print content for each modality. sample = self.get('sample', sample_token) - for modality in ['camera', 'lidar']: - sample_data_tokens = self.get_sample_content(sample_token, modality) - timestamps = np.array([self.get('sample_data', sd_token)['timestamp'] for sd_token in sample_data_tokens]) - rel_times = (timestamps - sample['timestamp']) / 1e6 + sample_data_tokens = self.get_sample_content(sample_token) + timestamps = np.array([self.get('sample_data', sd_token)['timestamp'] for sd_token in sample_data_tokens]) + rel_times = (timestamps - sample['timestamp']) / 1e6 - print('\nListing sample_datas for %s...' % modality) - print('Rel. time\tSample_data token') - for rel_time, sample_data_token in zip(rel_times, sample_data_tokens): - print('{:>9.1f}\t{}'.format(rel_time, sample_data_token)) + print('\nListing sample_datas...') + print('Rel. time\tSample_data token') + for rel_time, sample_data_token in zip(rel_times, sample_data_tokens): + print('{:>9.1f}\t{}'.format(rel_time, sample_data_token)) # ### Getter methods. ### def get_sample_content(self, - sample_token: str, - modality: str) -> List[str]: + sample_token: str) -> List[str]: """ - For a given sample and modality, return all the sample_datas. + For a given sample, return all the sample_datas in chronological order. :param sample_token: Sample token. - :param modality: Sensor modality, either camera or lidar. :return: A list of sample_data tokens sorted by their timestamp. """ - assert modality in ['camera', 'lidar'], 'Error: Invalid modality %s!' % modality sample = self.get('sample', sample_token) - key_name = 'key_%s_token' % modality - if sample[key_name] == '': - # If we don't have a key lidar pointcloud, work the slow way by searching all sample_datas. - if modality == 'camera': - fileformat = 'jpg' - else: - fileformat = 'bin' - sample_datas = [sd for sd in self.sample_data - if sd['sample_token'] == sample_token and sd['fileformat'] == fileformat] - sample_datas = sorted(sample_datas, key=lambda sd: sd['timestamp']) - sample_data_tokens = [sd['token'] for sd in sample_datas] - return sample_data_tokens - key_sd = self.get('sample_data', sample[key_name]) + key_sd = self.get('sample_data', sample['key_camera_token']) # Go forward. cur_sd = key_sd @@ -478,143 +434,13 @@ def get_sample_content(self, # % modality # TODO: adjust return result - def get_depth(self, - sd_token_camera: str, - min_dist: float = 1.0) -> Tuple[np.ndarray, np.ndarray, float, Tuple[int, int]]: - """ - This function picks out the lidar pcl closest to the given image timestamp and projects it onto the image. - :param sd_token_camera: The sample_data token of the camera image. - :param min_dist: Distance from the camera below which points are discarded. - :return: ( - points: Lidar points (x, y) in pixel coordinates. - depths: Depth in meters of each lidar point. - time_diff: Time difference between capturing the lidar and camera data. - im_size: Width and height. - ) - """ - # Find closest pointcloud. - sd_camera = self.get('sample_data', sd_token_camera) - sample_lidar_tokens = self.get_sample_content(sd_camera['sample_token'], 'lidar') - timestamps = np.array([self.get('sample_data', t)['timestamp'] for t in sample_lidar_tokens]) - time_diffs = np.abs(timestamps - sd_camera['timestamp']) / 1e6 - closest_idx = int(np.argmin(time_diffs)) - closest_time_diff = time_diffs[closest_idx] - if closest_time_diff > 0.25: - raise Exception('Error: Cannot render depth for an image that has no associated lidar pointcloud!' - 'This is the case for about 0.9%% of the images.') - # TODO: revisit this number, as some of the images may also be missing - sd_token_lidar = sample_lidar_tokens[closest_idx] - sd_lidar = self.get('sample_data', sd_token_lidar) - - # Retrieve size from meta data. - im_size = (sd_camera['width'], sd_camera['height']) - - # Load pointcloud. - self.check_sweeps(sd_lidar['filename']) - pcl_path = osp.join(self.dataroot, sd_lidar['filename']) - pc = LidarPointCloud.from_file(pcl_path) - pointsensor = sd_lidar - cam = sd_camera - - # Points live in the point sensor frame. So they need to be transformed via global to the image plane. - # First step: transform the pointcloud to the ego vehicle frame for the timestamp of the sweep. - cs_record = self.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) - pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) - pc.translate(np.array(cs_record['translation'])) - - if False: # TODO: For debugging purposes, stay in the ego vehicle frame - # Second step: transform from ego to the global frame. - poserecord = self.get('ego_pose', pointsensor['ego_pose_token']) - pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) - pc.translate(np.array(poserecord['translation'])) - - # Third step: transform from global into the ego vehicle frame for the timestamp of the image. - poserecord = self.get('ego_pose', cam['ego_pose_token']) - pc.translate(-np.array(poserecord['translation'])) - pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) - - # Fourth step: transform from ego into the camera. - cs_record = self.get('calibrated_sensor', cam['calibrated_sensor_token']) - pc.translate(-np.array(cs_record['translation'])) - pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) - - # Fifth step: actually take a "picture" of the pointcloud. - # Distort in camera plane (note that this only happens in nuImages, not nuScenes. - # In nuScenes all images are undistorted, in nuImages they are not. - sensor = self.get('sensor', cs_record['sensor_token']) - points, depths = distort_pointcloud(pc.points, np.array(cs_record['camera_distortion']), - sensor['channel']) - - # Take the actual picture (matrix multiplication with camera-matrix + renormalization). - points = view_points(points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) - - # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. - # Also make sure points are at least 1m in front of the camera to avoid seeing the lidar points on the camera - # casing for non-keyframes which are slightly out of sync. - mask = np.ones(depths.shape[0], dtype=bool) - mask = np.logical_and(mask, depths > min_dist) - mask = np.logical_and(mask, points[0, :] > 1) - mask = np.logical_and(mask, points[0, :] < im_size[0] - 1) - mask = np.logical_and(mask, points[1, :] > 1) - mask = np.logical_and(mask, points[1, :] < im_size[1] - 1) - points = points[:2, mask] - depths = depths[mask].squeeze() - - return points, depths, closest_time_diff, im_size - - def get_pointcloud(self, - sd_token_lidar: str, - use_flat_vehicle_coordinates: bool = True) -> Tuple[np.ndarray, np.ndarray]: - """ - Load a pointcloud and transform it to the specified viewpoint. - :param sd_token_lidar: Sample_data token of the lidar pointcloud. - :param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is - aligned to z-plane in the world. Note: Previously this method did not use flat vehicle coordinates, which - can lead to small errors when the vertical axis of the global frame and lidar are not aligned. The new - setting is more correct and rotates the plot by ~90 degrees. - :return: The points as seen from the specified viewpoint and the points in the original (lidar) frame. - """ - # Load lidar pointcloud. - sd_lidar = self.get('sample_data', sd_token_lidar) - self.check_sweeps(sd_lidar['filename']) - lidar_path = osp.join(self.dataroot, sd_lidar['filename']) - pc = LidarPointCloud.from_file(lidar_path) - - # By default we render the sample_data top down in the sensor frame. - # This is slightly inaccurate when rendering the map as the sensor frame may not be perfectly upright. - # Using use_flat_vehicle_coordinates we can render the map in the ego frame instead. - if use_flat_vehicle_coordinates: - # Retrieve transformation matrices for reference point cloud. - cs_record = self.get('calibrated_sensor', sd_lidar['calibrated_sensor_token']) - pose_record = self.get('ego_pose', sd_lidar['ego_pose_token']) - ref_to_ego = transform_matrix(translation=cs_record['translation'], - rotation=Quaternion(cs_record["rotation"])) - - # Compute rotation between 3D vehicle pose and "flat" vehicle pose (parallel to global z plane). - ego_yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0] - rotation_vehicle_flat_from_vehicle = np.dot( - Quaternion(scalar=np.cos(ego_yaw / 2), vector=[0, 0, np.sin(ego_yaw / 2)]).rotation_matrix, - Quaternion(pose_record['rotation']).inverse.rotation_matrix) - vehicle_flat_from_vehicle = np.eye(4) - vehicle_flat_from_vehicle[:3, :3] = rotation_vehicle_flat_from_vehicle - viewpoint = np.dot(vehicle_flat_from_vehicle, ref_to_ego) - else: - viewpoint = np.eye(4) - original_points = pc.points[:3, :] - points = view_points(original_points, viewpoint, normalize=False) - - return points, original_points - def get_ego_pose_data(self, sample_token: str, - modality: str = 'camera', attribute_name: str = 'translation') -> Tuple[np.ndarray, np.ndarray]: """ Return the ego pose data of the <= 13 sample_datas associated with this sample. The method return translation, rotation, rotation_rate, acceleration and speed. :param sample_token: Sample token. - :param modality: Whether to look at the ego poses of camera or lidar (very similar, as long as all 13 images - and 13 pointclouds exist. :param attribute_name: The ego_pose field to extract, e.g. "translation", "acceleration" or "speed". :return: ( timestamps: The timestamp of each ego_pose. @@ -630,7 +456,7 @@ def get_ego_pose_data(self, else: attribute_len = 3 - sd_tokens = self.get_sample_content(sample_token, modality) + sd_tokens = self.get_sample_content(sample_token) attributes = np.zeros((len(sd_tokens), attribute_len)) timestamps = np.zeros((len(sd_tokens))) for i, sd_token in enumerate(sd_tokens): @@ -700,7 +526,6 @@ def get_segmentation(self, """ # Validate inputs. sample_data = self.get('sample_data', sd_token_camera) - assert sample_data['fileformat'] == 'jpg', 'Error: Cannot use get_segmentation() on lidar pointclouds!' assert sample_data['is_key_frame'], 'Error: Cannot render annotations for non keyframes!' # Build a mapping from name to index to look up index in O(1) time. @@ -812,7 +637,6 @@ def render_image(self, """ # Validate inputs. sample_data = self.get('sample_data', sd_token_camera) - assert sample_data['fileformat'] == 'jpg', 'Error: Cannot use render_image() on lidar pointclouds!' if not sample_data['is_key_frame']: assert not annotation_type, 'Error: Cannot render annotations for non keyframes!' assert not with_attributes, 'Error: Cannot render attributes for non keyframes!' @@ -893,175 +717,6 @@ def render_image(self, plt.savefig(out_path, bbox_inches='tight', dpi=2.295 * pix_to_inch, pad_inches=0) plt.close() - def render_depth_sparse(self, - sd_token_camera: str, - render_scale: float = 1.0, - point_size: float = 10.0, - render_stats: bool = False, - out_path: str = None) -> None: - """ - This function plots an image and the projected lidar points. - The points are colored by depth. - :param sd_token_camera: The sample_data token of the camera image. - :param render_scale: The scale at which the depth image will be rendered. Use 1.0 for the recommended size. - :param point_size: The size of each lidar point in pixels. - :param render_stats: Whether to render information about ego speed and time difference in the top left corner - of the image. - :param out_path: Optional path to save the rendered figure to disk, or otherwise None. - If a path is provided, the plot is not shown to the user. - """ - # Get depth. - try: - points, depths, _, im_size = self.get_depth(sd_token_camera) - except: - print('Warning: Cannot render depth for an image without a lidar pointcloud!') # TODO - return - - # Init plot. - (width, height) = im_size - pix_to_inch = 100 / render_scale - figsize = (height / pix_to_inch, width / pix_to_inch) - plt.figure(figsize=figsize) - plt.axis('off') - - # Show image. - sd_camera = self.get('sample_data', sd_token_camera) - im_path = osp.join(self.dataroot, sd_camera['filename']) - im = Image.open(im_path) - plt.imshow(im) - - # Overlay points. - plt.scatter(points[0], points[1], marker='.', s=point_size, c=depths) - - # Print velocity and time difference. - if render_stats: - sd_lidar = self.get('sample_data', self.find_corresponding_sample_data(sd_token_camera, 'lidar')) - ego_pose = self.get('ego_pose', sd_lidar['ego_pose_token']) - vel = ego_pose['speed'] - time_camera = sd_camera['timestamp'] - time_lidar = sd_lidar['timestamp'] - time_diff = np.abs(time_camera - time_lidar) / 1e6 - plt.text(5, 40, 'vel: %.1f, time_diff: %.3f' % (vel, time_diff), color='white') - - # Save to disk. - if out_path is not None: - plt.savefig(out_path, bbox_inches='tight', dpi=2.295 * pix_to_inch, pad_inches=0) - plt.close() - - def render_depth_dense(self, - sd_token_camera: str, - max_depth: float = None, - depth_map_scale: float = 0.5, - n_dilate: int = 23, - n_gauss: int = 11, - sigma_gauss: float = 3, - cmap: str = 'viridis', - render_scale: float = 1.0, - out_path: str = None) -> None: - """ - This function plots a dense depth map using depth completion. - Depth completion dilates the sparse set of points in the image to "interpolate" between them. - Default depth colors range from yellow (close) to blue (far). Missing values are blue. - :param sd_token_camera: The sample_data token of the camera image. - :param max_depth: The maximum depth used for scaling the color values. If None, the actual maximum is used. - :param cmap: The matplotlib color map name. We recommend viridis or magma. - :param depth_map_scale: Down-sampling factor when computing the depth map. - :param n_dilate: Dilation filter size. - :param n_gauss: Gaussian filter size. - :param sigma_gauss: Gaussian filter sigma. - :param render_scale: The scale at which the depth image will be rendered. Use 1.0 for the recommended size. - A larger scale makes the point location more precise, but they will be harder to see. - For the "dense" option, the depth completion parameters are optimized for the recommended size. - :param out_path: Optional path to save the rendered figure to disk, or otherwise None. - If a path is provided, the plot is not shown to the user. - """ - # Get depth. - try: - points, depths, _, im_size = self.get_depth(sd_token_camera) - except: - print('Warning: Cannot render depth for an image without a lidar pointcloud!') # TODO - return - - # Compute dense depth image. - depth_im = depth_map(points, depths, im_size, scale=depth_map_scale, n_dilate=n_dilate, n_gauss=n_gauss, - sigma_gauss=sigma_gauss) - - # Scale depth_im to full image size. - depth_im = cv2.resize(depth_im, im_size) - - # Determine color scaling. - min_depth = 0 - if max_depth is None: - max_depth = depth_im.max() - norm = InvertedNormalize(vmin=min_depth, vmax=max_depth) - - # Plot the image. - (width, height) = depth_im.shape[::-1] - pix_to_inch = 100 / render_scale - figsize = (height / pix_to_inch, width / pix_to_inch) - plt.figure(figsize=figsize) - plt.axis('off') - plt.imshow(depth_im, norm=norm, cmap=cmap) - - # Save to disk. - if out_path is not None: - plt.savefig(out_path, bbox_inches='tight', dpi=2.295 * pix_to_inch, pad_inches=0) - plt.close() - - def render_pointcloud(self, - sd_token_lidar: str, - axes_limit: float = 10, - color_mode: str = 'height', - use_flat_vehicle_coordinates: bool = True, - out_path: str = None) -> None: - """ - Render sample data onto axis. - :param sd_token_lidar: Sample_data token of the lidar. - For compatibility with other render methods we also allow passing a camera sample_data token, - which is then converted to the corresponding lidar token. - :param axes_limit: Axes limit for lidar (measured in meters). - :param color_mode: How to color the lidar points, e.g. depth or height. - :param use_flat_vehicle_coordinates: See get_pointcloud(). - :param out_path: Optional path to save the rendered figure to disk. - If a path is provided, the plot is not shown to the user. - """ - # If we are provided a camera sd_token, we need to find the closest lidar token. - sample_data = self.get('sample_data', sd_token_lidar) - if sample_data['fileformat'] == 'jpg': - sd_token_lidar = self.find_corresponding_sample_data(sd_token_lidar, 'lidar') - - # Load the pointcloud and transform it to the specified viewpoint. - points, original_points = self.get_pointcloud(sd_token_lidar, use_flat_vehicle_coordinates) - - # Init axes. - plt.figure(figsize=(9, 9)) - plt.axis('off') - - # Show point cloud. - if color_mode == 'depth': - dists = np.sqrt(np.sum(original_points[:2, :] ** 2, axis=0)) - colors = np.minimum(1, dists / axes_limit / np.sqrt(2)) - elif color_mode == 'height': - heights = points[2, :] - colors = (heights - heights.min()) / (heights.max() - heights.min()) - colors = np.sqrt(colors) - else: - raise Exception('Error: Invalid color mode %s!' % color_mode) - point_scale = 0.2 - plt.scatter(points[0, :], points[1, :], c=colors, s=point_scale) - - # Show ego vehicle. - plt.plot(0, 0, 'x', color='red') - - # Limit visible range. - plt.xlim(-axes_limit, axes_limit) - plt.ylim(-axes_limit, axes_limit) - - # Save to disk. - if out_path is not None: - plt.savefig(out_path, bbox_inches='tight', dpi=150, pad_inches=0) - plt.close() - def render_trajectory(self, sample_token: str, rotation_yaw: float = 0.0, diff --git a/python-sdk/nuimages/scripts/render_images.py b/python-sdk/nuimages/scripts/render_images.py index 11a53484..8ffdb3aa 100644 --- a/python-sdk/nuimages/scripts/render_images.py +++ b/python-sdk/nuimages/scripts/render_images.py @@ -25,11 +25,8 @@ def render_images(nuim: NuImages, Note: The images rendered here are keyframes only. :param nuim: NuImages instance. :param mode: What to render: + "image" for the image without annotations, "annotated" for the image with annotations, - "raw" for the image without annotations, - "dept_dense" for dense depth image, - "dept_sparse" for sparse depth image, - "pointcloud" for a birds-eye view of the pointcloud, "trajectory" for a rendering of the trajectory of the vehice, "all" to render all of the above separately. :param cam_name: Only render images from a particular camera, e.g. "CAM_BACK'. @@ -45,7 +42,7 @@ def render_images(nuim: NuImages, """ # Check and convert inputs. assert out_type in ['image', 'video'], ' Error: Unknown out_type %s!' % out_type - all_modes = ['annotated', 'image', 'depth_dense', 'depth_sparse', 'pointcloud', 'trajectory'] + all_modes = ['image', 'annotated', 'trajectory'] assert mode in all_modes + ['all'], 'Error: Unknown mode %s!' % mode assert not (out_type == 'video' and mode == 'trajectory'), 'Error: Cannot render "trajectory" for videos!' @@ -123,7 +120,7 @@ def render_images(nuim: NuImages, sd_token_camera = sample['key_camera_token'] sensor = nuim.shortcut('sample_data', 'sensor', sd_token_camera) sample_cam_name = sensor['channel'] - sd_tokens_camera = nuim.get_sample_content(sample_token, modality='camera') + sd_tokens_camera = nuim.get_sample_content(sample_token) # We cannot render a video if there are missing camera sample_datas. if len(sd_tokens_camera) < 13 and out_type == 'video': @@ -189,12 +186,6 @@ def write_image(nuim: NuImages, sd_token_camera: str, mode: str, out_path: str) nuim.render_image(sd_token_camera, annotation_type='all', out_path=out_path) elif mode == 'image': nuim.render_image(sd_token_camera, annotation_type='none', out_path=out_path) - elif mode == 'depth_dense': - nuim.render_depth_dense(sd_token_camera, out_path=out_path) - elif mode == 'depth_sparse': - nuim.render_depth_sparse(sd_token_camera, out_path=out_path) - elif mode == 'pointcloud': - nuim.render_pointcloud(sd_token_camera, out_path=out_path) elif mode == 'trajectory': sd_camera = nuim.get('sample_data', sd_token_camera) nuim.render_trajectory(sd_camera['sample_token'], out_path=out_path) diff --git a/python-sdk/nuimages/tests/test_foreign_keys.py b/python-sdk/nuimages/tests/test_foreign_keys.py index e1f9662f..54591676 100644 --- a/python-sdk/nuimages/tests/test_foreign_keys.py +++ b/python-sdk/nuimages/tests/test_foreign_keys.py @@ -115,28 +115,23 @@ def test_prev_next(self) -> None: Test that the prev and next points in sample_data cover all entries and have the correct ordering. """ # Register all sample_datas. - sample_to_sample_datas = {'camera': defaultdict(lambda: []), 'lidar': defaultdict(lambda: [])} + sample_to_sample_datas = defaultdict(lambda: []) for sample_data in self.nuim.sample_data: - if sample_data['fileformat'] == 'jpg': - modality = 'camera' - else: - modality = 'lidar' - sample_to_sample_datas[modality][sample_data['sample_token']].append(sample_data['token']) + sample_to_sample_datas[sample_data['sample_token']].append(sample_data['token']) print('Checking prev-next pointers for completeness and correct ordering...') for sample in self.nuim.sample: - for modality in ['camera', 'lidar']: - # Compare the above sample_datas against those retrieved by using prev and next pointers. - sd_tokens_pointers = self.nuim.get_sample_content(sample['token'], modality) - sd_tokens_all = sample_to_sample_datas[modality][sample['token']] - self.assertTrue(set(sd_tokens_pointers) == set(sd_tokens_all), - 'Error: Inconsistency in prev/next pointers!') - - timestamps = [] - for sd_token in sd_tokens_pointers: - sample_data = self.nuim.get('sample_data', sd_token) - timestamps.append(sample_data['timestamp']) - self.assertTrue(sorted(timestamps) == timestamps, 'Error: Timestamps not properly sorted!') + # Compare the above sample_datas against those retrieved by using prev and next pointers. + sd_tokens_pointers = self.nuim.get_sample_content(sample['token']) + sd_tokens_all = sample_to_sample_datas[sample['token']] + self.assertTrue(set(sd_tokens_pointers) == set(sd_tokens_all), + 'Error: Inconsistency in prev/next pointers!') + + timestamps = [] + for sd_token in sd_tokens_pointers: + sample_data = self.nuim.get('sample_data', sd_token) + timestamps.append(sample_data['timestamp']) + self.assertTrue(sorted(timestamps) == timestamps, 'Error: Timestamps not properly sorted!') if __name__ == '__main__': diff --git a/python-sdk/nuimages/tests/test_overflow_bug.py b/python-sdk/nuimages/tests/test_overflow_bug.py deleted file mode 100644 index 253959bf..00000000 --- a/python-sdk/nuimages/tests/test_overflow_bug.py +++ /dev/null @@ -1,11 +0,0 @@ -from nuimages.nuimages import NuImages - -tokens = [ - '6b17bab7b6f849abb7bbae05806eb2b9' # Math overflow bug. -] - -# TODO: Delete this file once everything is well tested. - -nuim = NuImages(version='v1.0-val', verbose=False) -for token in tokens: - nuim.render_depth_dense(token) diff --git a/python-sdk/nuimages/utils/lidar.py b/python-sdk/nuimages/utils/lidar.py deleted file mode 100644 index cad4cec2..00000000 --- a/python-sdk/nuimages/utils/lidar.py +++ /dev/null @@ -1,147 +0,0 @@ -# nuScenes dev-kit. -# Code written by Asha Asvathaman & Holger Caesar, 2020. - -from typing import Tuple, Any - -import cv2 -import numpy as np -from matplotlib.colors import Normalize -from numpy.ma.core import MaskedArray - - -def depth_map(pts: np.ndarray, - depths: np.ndarray, - im_size: Tuple[int, int], - scale: float = 1 / 8, - n_dilate: int = None, - n_gauss: int = None, - sigma_gauss: float = None) -> np.ndarray: - """ - This function computes a depth map given a lidar pointcloud projected to the camera. - Depth completion can be used to sparsify the depth map. - :param pts: Lidar pointcloud in image coordinates. - :param depths: Depth of the points. - :param im_size: The image width and height. - :param scale: The scaling factor applied to the depth map. - :param n_dilate: Dilation filter size. - :param n_gauss: Gaussian filter size. - :param sigma_gauss: Gaussian filter sigma. - :return: The depth map. - """ - # Store the minimum depth in the corresponding pixels. - # Apply downsampling to make it more efficient and points larger to be more visible. - pxs = (pts[0, :] * scale).astype(np.int32) - pys = (pts[1, :] * scale).astype(np.int32) - - depth_map_size = np.array(im_size)[::-1] * scale - depth_map_size = np.ceil(depth_map_size).astype(np.int32) - depth_map = np.zeros(depth_map_size, dtype=np.float32) - for x, y, depth in zip(pxs, pys, depths): - if depth_map[y][x] == 0: - depth_map[y][x] = depth - else: - depth_map[y][x] = min(depth_map[y][x], depth) - - # Set invalid pixels to max_depth. - invalid = depth_map == 0 - depth_map[invalid] = np.max(depth_map) - - # Perform erosion to grow points - if n_dilate is not None: - depth_map = cv2.morphologyEx(depth_map, cv2.MORPH_ERODE, np.ones((n_dilate, n_dilate), np.uint8)) - - # Perform Gaussian blur to smoothen points. - # Note that this should be used in moderation as the Gaussian filter also uses invalid depth values. - if n_gauss is not None: - blurred = cv2.GaussianBlur(depth_map, (n_gauss, n_gauss), sigma_gauss) - valid = depth_map > 0 - depth_map[valid] = blurred[valid] - - return depth_map - - -def distort_pointcloud(points: np.ndarray, camera_distortion: np.ndarray, cam_name: str) \ - -> Tuple[np.ndarray, np.ndarray]: - """ - Distort the pointcloud coordinates to map into the image. - Note: This function discards some invalid points, that do not project into the image. - This happens if the radial distortion function is not injective, which is the case if k3 is negative. - We also use the same mechanism to avoid float overflows in the k4 portion of CAM_BACK. - :param cam_name: Name of the camera. - :param points: Lidar pointcloud. - :param camera_distortion: Distortion coefficents of the camera. - :return: Distorted pointcloud and depth values. - """ - assert len(camera_distortion) > 0, 'Error: Empty camera_distortion! Check that the camera_distortion is from a' \ - 'lidar and not a camera sample_data!' - k1 = camera_distortion[0] - k2 = camera_distortion[1] - p1 = camera_distortion[2] - p2 = camera_distortion[3] - k3 = camera_distortion[4] - - # Store depth to return it. - depths = points[2, :] - - # Normalize. - points_x = points[0, :] / points[2, :] - points_y = points[1, :] / points[2, :] - r_sq = points_x ** 2 + points_y ** 2 - - # Filter points from outside the frustum that are likely to map inside it. - # This happens when the distortion function is not injective, i.e. when k3 < 0 for all cameras, - # apart from CAM_BACK which has distortion coefficient k6, which prevents warping. - # However, we also do it elsewhere to avoid numerical overflows. - if cam_name == 'CAM_BACK': - r_sq_max = 10000 - else: - r_sq_max = 1 - mask = r_sq < r_sq_max - depths = depths[mask] - points_x = points_x[mask] - points_y = points_y[mask] - r_sq = r_sq[mask] - - # Specify the basic distortion model. - radial_distort = 1 + k1 * r_sq + k2 * r_sq ** 2 + k3 * r_sq ** 3 - - # For fish-eye lenses, add another parameter to the distortion model. - if cam_name == 'CAM_BACK': - k4 = camera_distortion[5] - radial_distort = radial_distort + k4 * r_sq ** 4 - assert not np.any(np.isinf(radial_distort)) and not np.any(np.isnan(radial_distort)) - - # Apply distortion to points. - x = radial_distort * points_x + 2 * p1 * points_x * points_y + p2 * (r_sq + 2 * points_x ** 2) - y = radial_distort * points_y + p1 * (r_sq + 2 * points_y ** 2) + 2 * p2 * points_x * points_y - - # Define output. - # Note that the third dimension is 1 as the points are already normalized above. - points = np.ones((3, len(points_x))) - points[0, :] = x - points[1, :] = y - assert points.shape[1] == len(depths), 'Error: Code is inconsistent!' - - return points, depths - - -class InvertedNormalize(Normalize): - - def __call__(self, value: MaskedArray, clip: Any = None) -> MaskedArray: - """ - A custom inverted colormap that stretches the close depth values out to have more color resolution. - :param value: - :param clip: - :return: - """ - assert clip is None, 'Error: Clip option not supported!' - - # Define a non-linear mapping based on 4 keypoints. - scaling_x = [0, 0.2, 0.5, 1] - scaling_y = [0, 0.5, 0.95, 1] - - # Apply that scaling, taking into account the specified minimum and maximum. - x = self.vmin + np.array(scaling_x) * (self.vmax - self.vmin) - y = scaling_y - colors = np.interp(value, x, y) - return 1 - np.ma.masked_array(colors) From 1bf5eb532dfe640aeda2c108738507ab13954263 Mon Sep 17 00:00:00 2001 From: Holger Caesar Date: Wed, 29 Jul 2020 10:52:37 +0800 Subject: [PATCH 2/7] Update documentation --- docs/instructions_nuimages.md | 2 +- docs/schema_nuimages.md | 21 ++++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/docs/instructions_nuimages.md b/docs/instructions_nuimages.md index e81b396c..36c8529a 100644 --- a/docs/instructions_nuimages.md +++ b/docs/instructions_nuimages.md @@ -119,7 +119,7 @@ nuImages includes surface classes as well: | Label | Short Description | | --- | --- | | [`flat.driveable_surface`](#1-flatdriveable_surface) | All paved or unpaved surfaces that a car can drive on with no concern of traffic rules. | -| [`vehicle.ego`](#2-vehicleego) | The vehicle on which the cameras, radar and lidar are mounted, that is sometimes visible at the bottom of the image. | +| [`vehicle.ego`](#2-vehicleego) | The vehicle on which the sensors are mounted, that are sometimes visible at the bottom of the image. | ### 1. flat.driveable_surface ![driveable_1](https://www.nuscenes.org/public/images/taxonomy_imgs/nuimages/driveable_1.png) diff --git a/docs/schema_nuimages.md b/docs/schema_nuimages.md index 49c6dd49..628ef3ad 100644 --- a/docs/schema_nuimages.md +++ b/docs/schema_nuimages.md @@ -24,9 +24,9 @@ attribute { calibrated_sensor --------- -Definition of a particular sensor (lidar/camera, but no radar) as calibrated on a particular vehicle. +Definition of a particular camera as calibrated on a particular vehicle. All extrinsic parameters are given with respect to the ego vehicle body frame. -All camera images come undistorted and rectified. +Contrary to nuScenes, all camera images come distorted and unrectified. ``` calibrated_sensor { "token": -- Unique record identifier. @@ -100,11 +100,11 @@ object_ann { sample_data --------- -A sensor data e.g. image or lidar pointcloud. Note that we don't have radar in nuImages. -Sample_data covers all sensor data, regardless of whether it is a keyframe or not. -For every keyframe image or lidar, we also include up to 6 past and 6 future sweeps at 2 Hz. -We can navigate between consecutive lidar or camera sample_datas using the `prev` and `next` pointers. -Only keyframe (sample) images are annotated. +Sample_data contains the images and information about when they were capture. +Sample_data covers all images, regardless of whether they are a keyframe or not. +Only keyframes are annotated. +For every keyframe, we also include up to 6 past and 6 future sweeps at 2 Hz. +We can navigate between consecutive images using the `prev` and `next` pointers. The sample timestamp is inherited from the keyframe camera sample_data timestamp. ``` sample_data { @@ -126,15 +126,14 @@ sample_data { sample --------- A sample is an annotated keyframe selected from a large pool of images in a log. -Every sample has up to 13 lidar sample_datas and 13 camera sample_datas corresponding to it. -These include the actual lidar and camera keyframe sample_datas, which can be accessed via the `key_*_token` fields. +Every sample has up to 13 camera sample_datas corresponding to it. +These include the keyframe, which can be accessed via `key_camera_token`. ``` sample { "token": -- Unique record identifier. "timestamp": -- Unix time stamp. "log_token": -- Foreign key pointing to the log. "key_camera_token": -- Foreign key of the sample_data corresponding to the camera keyframe. - "key_lidar_token": -- Foreign key of the sample_data corresponding to the lidar keyframe. } ``` @@ -146,7 +145,7 @@ A specific sensor type. sensor { "token": -- Unique record identifier. "channel": -- Sensor channel name. - "modality": {camera, lidar} -- Sensor modality. Supports category(ies) in brackets. + "modality": -- Sensor modality. Always "camera" in nuImages. } ``` From 63c51d0e4f50e4049a894fbec6243b08527435d8 Mon Sep 17 00:00:00 2001 From: Holger Caesar Date: Wed, 29 Jul 2020 15:12:45 +0800 Subject: [PATCH 3/7] Simplify variable names --- python-sdk/nuimages/nuimages.py | 35 +++++---- python-sdk/nuimages/scripts/render_images.py | 40 +++++------ python-sdk/tutorials/nuimages_tutorial.ipynb | 74 +++----------------- 3 files changed, 48 insertions(+), 101 deletions(-) diff --git a/python-sdk/nuimages/nuimages.py b/python-sdk/nuimages/nuimages.py index 7ae4e7a9..570dca11 100644 --- a/python-sdk/nuimages/nuimages.py +++ b/python-sdk/nuimages/nuimages.py @@ -8,7 +8,6 @@ from collections import defaultdict from typing import Any, List, Dict, Optional, Tuple, Callable -import cv2 import matplotlib.pyplot as plt import numpy as np from PIL import Image, ImageDraw @@ -175,8 +174,8 @@ def shortcut(self, src_table: str, tgt_table: str, src_token: str) -> Dict[str, :return: The entry of the destination table correspondings to the source token. """ if src_table == 'sample_data' and tgt_table == 'sensor': - sd_camera = self.get('sample_data', src_token) - calibrated_sensor = self.get('calibrated_sensor', sd_camera['calibrated_sensor_token']) + sample_data = self.get('sample_data', src_token) + calibrated_sensor = self.get('calibrated_sensor', sample_data['calibrated_sensor_token']) sensor = self.get('sensor', calibrated_sensor['sensor_token']) return sensor @@ -338,9 +337,9 @@ def list_anns(self, sample_token: str, verbose: bool = True) -> Tuple[List[str], self.load_tables(['sample', 'object_ann', 'surface_ann', 'category']) sample = self.get('sample', sample_token) - sd_token_camera = sample['key_camera_token'] - object_anns = [o for o in self.object_ann if o['sample_data_token'] == sd_token_camera] - surface_anns = [o for o in self.surface_ann if o['sample_data_token'] == sd_token_camera] + key_camera_token = sample['key_camera_token'] + object_anns = [o for o in self.object_ann if o['sample_data_token'] == key_camera_token] + surface_anns = [o for o in self.surface_ann if o['sample_data_token'] == key_camera_token] if verbose: print('Printing object annotations:') @@ -493,8 +492,8 @@ def get_trajectory(self, # Find keyframe translation and rotation. sample = self.get('sample', sample_token) - sd_camera = self.get('sample_data', sample['key_camera_token']) - ego_pose = self.get('ego_pose', sd_camera['ego_pose_token']) + sample_data = self.get('sample_data', sample['key_camera_token']) + ego_pose = self.get('ego_pose', sample_data['ego_pose_token']) key_rotation = Quaternion(ego_pose['rotation']) key_timestamp = ego_pose['timestamp'] key_index = [i for i, t in enumerate(timestamps) if t == key_timestamp][0] @@ -511,7 +510,7 @@ def get_trajectory(self, return translations, key_index def get_segmentation(self, - sd_token_camera: str) -> Tuple[np.ndarray, np.ndarray]: + sd_token: str) -> Tuple[np.ndarray, np.ndarray]: """ Produces two segmentation masks as numpy arrays of size H x W each, where H and W are the height and width of the camera image respectively: @@ -521,11 +520,11 @@ def get_segmentation(self, - instance mask: A mask in which each pixel is an integer value between 0 to N, where N is the number of objects in a given camera sample_data. Each integer corresponds to the order in which the object was drawn into the mask. - :param sd_token_camera: The token of the sample_data to be rendered. + :param sd_token: The token of the sample_data to be rendered. :return: Two 2D numpy arrays (one semantic mask , and one instance mask ). """ # Validate inputs. - sample_data = self.get('sample_data', sd_token_camera) + sample_data = self.get('sample_data', sd_token) assert sample_data['is_key_frame'], 'Error: Cannot render annotations for non keyframes!' # Build a mapping from name to index to look up index in O(1) time. @@ -562,7 +561,7 @@ def get_segmentation(self, instanceseg_mask = np.zeros((height, width)).astype('int32') # Load stuff / surface regions. - surface_anns = [o for o in self.surface_ann if o['sample_data_token'] == sd_token_camera] + surface_anns = [o for o in self.surface_ann if o['sample_data_token'] == sd_token] # Draw stuff / surface regions. for ann in surface_anns: @@ -577,7 +576,7 @@ def get_segmentation(self, semseg_mask[mask == 1] = nuim_name2idx_mapping[category_name] # Load object instances. - object_anns = [o for o in self.object_ann if o['sample_data_token'] == sd_token_camera] + object_anns = [o for o in self.object_ann if o['sample_data_token'] == sd_token] # Sort by token to ensure that objects always appear in the instance mask in the same order. object_anns = sorted(object_anns, key=lambda k: k['token']) @@ -605,7 +604,7 @@ def get_segmentation(self, # ### Rendering methods. ### def render_image(self, - sd_token_camera: str, + sd_token: str, annotation_type: str = 'all', with_category: bool = False, with_attributes: bool = False, @@ -617,7 +616,7 @@ def render_image(self, out_path: str = None) -> None: """ Renders an image (sample_data), optionally with annotations overlaid. - :param sd_token_camera: The token of the sample_data to be rendered. + :param sd_token: The token of the sample_data to be rendered. :param annotation_type: The types of annotations to draw on the image; there are four options: 'all': Draw surfaces and objects, subject to any filtering done by object_tokens and surface_tokens. 'surfaces': Draw only surfaces, subject to any filtering done by surface_tokens. @@ -636,7 +635,7 @@ def render_image(self, If a path is provided, the plot is not shown to the user. """ # Validate inputs. - sample_data = self.get('sample_data', sd_token_camera) + sample_data = self.get('sample_data', sd_token) if not sample_data['is_key_frame']: assert not annotation_type, 'Error: Cannot render annotations for non keyframes!' assert not with_attributes, 'Error: Cannot render attributes for non keyframes!' @@ -662,7 +661,7 @@ def render_image(self, if annotation_type is not 'none': if annotation_type == 'all' or annotation_type == 'surfaces': # Load stuff / surface regions. - surface_anns = [o for o in self.surface_ann if o['sample_data_token'] == sd_token_camera] + surface_anns = [o for o in self.surface_ann if o['sample_data_token'] == sd_token] if surface_tokens is not None: surface_anns = [o for o in surface_anns if o['token'] in surface_tokens] @@ -681,7 +680,7 @@ def render_image(self, if annotation_type == 'all' or annotation_type == 'objects': # Load object instances. - object_anns = [o for o in self.object_ann if o['sample_data_token'] == sd_token_camera] + object_anns = [o for o in self.object_ann if o['sample_data_token'] == sd_token] if object_tokens is not None: object_anns = [o for o in object_anns if o['token'] in object_tokens] diff --git a/python-sdk/nuimages/scripts/render_images.py b/python-sdk/nuimages/scripts/render_images.py index 8ffdb3aa..1508fbb3 100644 --- a/python-sdk/nuimages/scripts/render_images.py +++ b/python-sdk/nuimages/scripts/render_images.py @@ -72,8 +72,8 @@ def render_images(nuim: NuImages, sample_tokens_cam = [] for sample_token in sample_tokens: sample = nuim.get('sample', sample_token) - sd_token_camera = sample['key_camera_token'] - sensor = nuim.shortcut('sample_data', 'sensor', sd_token_camera) + key_camera_token = sample['key_camera_token'] + sensor = nuim.shortcut('sample_data', 'sensor', key_camera_token) if sensor['channel'] == cam_name: sample_tokens_cam.append(sample_token) sample_tokens = sample_tokens_cam @@ -100,8 +100,8 @@ def render_images(nuim: NuImages, sample_tokens_cleaned = [] for sample_token in sample_tokens: sample = nuim.get('sample', sample_token) - sd_token_camera = sample['key_camera_token'] - category_names = sd_to_object_cat_names[sd_token_camera] + key_camera_token = sample['key_camera_token'] + category_names = sd_to_object_cat_names[key_camera_token] if any([c in category_names for c in filter_categories]): sample_tokens_cleaned.append(sample_token) sample_tokens = sample_tokens_cleaned @@ -117,42 +117,42 @@ def render_images(nuim: NuImages, sample = nuim.get('sample', sample_token) log = nuim.get('log', sample['log_token']) log_name = log['logfile'] - sd_token_camera = sample['key_camera_token'] - sensor = nuim.shortcut('sample_data', 'sensor', sd_token_camera) + key_camera_token = sample['key_camera_token'] + sensor = nuim.shortcut('sample_data', 'sensor', key_camera_token) sample_cam_name = sensor['channel'] - sd_tokens_camera = nuim.get_sample_content(sample_token) + sd_tokens = nuim.get_sample_content(sample_token) # We cannot render a video if there are missing camera sample_datas. - if len(sd_tokens_camera) < 13 and out_type == 'video': + if len(sd_tokens) < 13 and out_type == 'video': continue for mode in modes: out_path_prefix = os.path.join(out_dir, '%s_%s_%s_%s' % (log_name, sample_token, sample_cam_name, mode)) if out_type == 'image': - write_image(nuim, sd_token_camera, mode, '%s.jpg' % out_path_prefix) + write_image(nuim, key_camera_token, mode, '%s.jpg' % out_path_prefix) elif out_type == 'video': - write_video(nuim, sd_tokens_camera, mode, out_path_prefix, cleanup=cleanup) + write_video(nuim, sd_tokens, mode, out_path_prefix, cleanup=cleanup) def write_video(nuim: NuImages, - sd_tokens_camera: List[str], + sd_tokens: List[str], mode: str, out_path_prefix: str, cleanup: bool = True) -> None: """ Render a video by combining all the images of type mode for each sample_data. :param nuim: NuImages instance. - :param sd_tokens_camera: All camera sample_data tokens in chronological order. + :param sd_tokens: All camera sample_data tokens in chronological order. :param mode: The mode - see render_images(). :param out_path_prefix: The file prefix used for the images and video. :param cleanup: Whether to delete images after rendering the video. """ # Loop through each frame to create the video. out_paths = [] - for i, sd_token_camera in enumerate(sd_tokens_camera): + for i, sd_token in enumerate(sd_tokens): out_path = '%s_%d.jpg' % (out_path_prefix, i) out_paths.append(out_path) - write_image(nuim, sd_token_camera, mode, out_path) + write_image(nuim, sd_token, mode, out_path) # Create video. first_im = cv2.imread(out_paths[0]) @@ -174,21 +174,21 @@ def write_video(nuim: NuImages, out.release() -def write_image(nuim: NuImages, sd_token_camera: str, mode: str, out_path: str) -> None: +def write_image(nuim: NuImages, sd_token: str, mode: str, out_path: str) -> None: """ Render a single image of type mode for the given sample_data. :param nuim: NuImages instance. - :param sd_token_camera: The sample_data token of the camera. + :param sd_token: The sample_data token. :param mode: The mode - see render_images(). :param out_path: The file to write the image to. """ if mode == 'annotated': - nuim.render_image(sd_token_camera, annotation_type='all', out_path=out_path) + nuim.render_image(sd_token, annotation_type='all', out_path=out_path) elif mode == 'image': - nuim.render_image(sd_token_camera, annotation_type='none', out_path=out_path) + nuim.render_image(sd_token, annotation_type='none', out_path=out_path) elif mode == 'trajectory': - sd_camera = nuim.get('sample_data', sd_token_camera) - nuim.render_trajectory(sd_camera['sample_token'], out_path=out_path) + sample_data = nuim.get('sample_data', sd_token) + nuim.render_trajectory(sample_data['sample_token'], out_path=out_path) else: raise Exception('Error: Unknown mode %s!' % mode) diff --git a/python-sdk/tutorials/nuimages_tutorial.ipynb b/python-sdk/tutorials/nuimages_tutorial.ipynb index 56b7ecc3..f17a6e13 100644 --- a/python-sdk/tutorials/nuimages_tutorial.ipynb +++ b/python-sdk/tutorials/nuimages_tutorial.ipynb @@ -149,7 +149,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "From the sample, we can directly access the sample_data corresponding to lidar and camera keyframe. These will be useful further below." + "From the sample, we can directly access the corresponding keyframe. This will be useful further below." ] }, { @@ -158,11 +158,8 @@ "metadata": {}, "outputs": [], "source": [ - "sd_token_camera = sample['key_camera_token']\n", - "print(sd_token_camera)\n", - "\n", - "sd_token_lidar = sample['key_lidar_token']\n", - "print(sd_token_lidar)" + "key_camera_token = sample['key_camera_token']\n", + "print(key_camera_token)" ] }, { @@ -198,7 +195,7 @@ "metadata": {}, "outputs": [], "source": [ - "nuim.render_image(sd_token_camera, annotation_type='all',\n", + "nuim.render_image(key_camera_token, annotation_type='all',\n", " with_category=True, with_attributes=False, box_line_width=0, render_scale=2)" ] }, @@ -233,7 +230,7 @@ }, "outputs": [], "source": [ - "nuim.render_image(sd_token_camera, with_category=True, object_tokens=[object_tokens[0]], surface_tokens=[surface_tokens[0]])" + "nuim.render_image(key_camera_token, with_category=True, object_tokens=[object_tokens[0]], surface_tokens=[surface_tokens[0]])" ] }, { @@ -251,7 +248,7 @@ "source": [ "import matplotlib.pyplot as plt\n", "\n", - "semantic_mask, instance_mask = nuim.get_segmentation(sd_token_camera)\n", + "semantic_mask, instance_mask = nuim.get_segmentation(key_camera_token)\n", "\n", "plt.figure(figsize=(32, 9))\n", "\n", @@ -267,8 +264,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Every annotated image (sample) comes with up to 6 past and 6 future images, spaced evenly at 500ms +- 250ms. For each image we have a matching lidar pointcloud. However, a small percentage of the samples has less sample_datas, either because they were at the beginning or end of a log, or due to delays or dropped data packages.\n", - "`list_sample_content()` shows for each sample all the associated sample_datas, which are images (from the same camera) and lidar pointclouds." + "Every annotated image (sample) comes with up to 6 past and 6 future images, spaced evenly at 500ms +- 250ms. However, a small percentage of the samples has less sample_datas, either because they were at the beginning or end of a log, or due to delays or dropped data packages.\n", + "`list_sample_content()` shows for each sample all the associated sample_datas." ] }, { @@ -293,8 +290,8 @@ "metadata": {}, "outputs": [], "source": [ - "next_sd_token_camera = nuim.get('sample_data', sd_token_camera)['next']\n", - "next_sd_token_camera" + "next_camera_token = nuim.get('sample_data', key_camera_token)['next']\n", + "next_camera_token" ] }, { @@ -313,60 +310,11 @@ "outputs": [], "source": [ "try:\n", - " nuim.render_image(next_sd_token_camera, annotation_type='none')\n", + " nuim.render_image(next_camera_token, annotation_type='none')\n", "except Exception as e:\n", " print('As expected, we encountered this error:', e)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As mentioned, most images have an associated lidar pointcloud. We can project this pointcloud to the image and display it using `render_depth_sparse()`. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "nuim.render_depth_sparse(sd_token_camera)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Since the pointcloud is relatively sparse, this image is hard to see. We can enable depth completion of the pointcloud to get a denser version of the image using `render_depth_dense()`. To get the raw data of this plot, use `get_depth()`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "nuim.render_depth_dense(sd_token_camera)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Rather than projecting the lidar to the camera, we can also render the \"raw\" pointcloud from a birds-eye view. Here we are coloring the pointcloud by point height and limiting the view to +-10m around the lidar. Another color option is to render by depth. To get the raw data of this plot, use `get_pointcloud()`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sd_token_lidar = sample['key_lidar_token']\n", - "nuim.render_pointcloud(sd_token_lidar, color_mode='height', axes_limit=10)" - ] - }, { "cell_type": "markdown", "metadata": {}, From f213b8ee098375226ac52537379aa3a6d56e1f6b Mon Sep 17 00:00:00 2001 From: Holger Caesar Date: Wed, 29 Jul 2020 16:37:07 +0800 Subject: [PATCH 4/7] Wording --- docs/schema_nuimages.md | 2 +- python-sdk/nuimages/scripts/render_images.py | 2 +- python-sdk/tutorials/nuimages_tutorial.ipynb | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/schema_nuimages.md b/docs/schema_nuimages.md index 628ef3ad..04977c82 100644 --- a/docs/schema_nuimages.md +++ b/docs/schema_nuimages.md @@ -100,7 +100,7 @@ object_ann { sample_data --------- -Sample_data contains the images and information about when they were capture. +Sample_data contains the images and information about when they were captured. Sample_data covers all images, regardless of whether they are a keyframe or not. Only keyframes are annotated. For every keyframe, we also include up to 6 past and 6 future sweeps at 2 Hz. diff --git a/python-sdk/nuimages/scripts/render_images.py b/python-sdk/nuimages/scripts/render_images.py index 1508fbb3..655221c9 100644 --- a/python-sdk/nuimages/scripts/render_images.py +++ b/python-sdk/nuimages/scripts/render_images.py @@ -142,7 +142,7 @@ def write_video(nuim: NuImages, """ Render a video by combining all the images of type mode for each sample_data. :param nuim: NuImages instance. - :param sd_tokens: All camera sample_data tokens in chronological order. + :param sd_tokens: All sample_data tokens in chronological order. :param mode: The mode - see render_images(). :param out_path_prefix: The file prefix used for the images and video. :param cleanup: Whether to delete images after rendering the video. diff --git a/python-sdk/tutorials/nuimages_tutorial.ipynb b/python-sdk/tutorials/nuimages_tutorial.ipynb index f17a6e13..c97bbf8a 100644 --- a/python-sdk/tutorials/nuimages_tutorial.ipynb +++ b/python-sdk/tutorials/nuimages_tutorial.ipynb @@ -149,7 +149,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "From the sample, we can directly access the corresponding keyframe. This will be useful further below." + "From the sample, we can directly access the corresponding keyframe sample data. This will be useful further below." ] }, { @@ -264,7 +264,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Every annotated image (sample) comes with up to 6 past and 6 future images, spaced evenly at 500ms +- 250ms. However, a small percentage of the samples has less sample_datas, either because they were at the beginning or end of a log, or due to delays or dropped data packages.\n", + "Every annotated image (keyframe) comes with up to 6 past and 6 future images, spaced evenly at 500ms +- 250ms. However, a small percentage of the samples has less sample_datas, either because they were at the beginning or end of a log, or due to delays or dropped data packages.\n", "`list_sample_content()` shows for each sample all the associated sample_datas." ] }, From d78075f3d0bfb49dcd12b7f11e58a87a985b2e60 Mon Sep 17 00:00:00 2001 From: Holger Caesar Date: Wed, 29 Jul 2020 16:39:02 +0800 Subject: [PATCH 5/7] Wording --- docs/schema_nuimages.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/schema_nuimages.md b/docs/schema_nuimages.md index 04977c82..fbbe89a7 100644 --- a/docs/schema_nuimages.md +++ b/docs/schema_nuimages.md @@ -42,7 +42,7 @@ category --------- Taxonomy of object categories (e.g. vehicle, human). Subcategories are delineated by a period (e.g. `human.pedestrian.adult`). -The categories in nuImages are the same as in the nuScenes (w/o lidarseg), plus `flat.driveable_surface`. +The categories in nuImages are the same as in nuScenes (w/o lidarseg), plus `flat.driveable_surface`. ``` category { "token": -- Unique record identifier. From ab01197a4d918566d85dd5bb85ed760b04a36d54 Mon Sep 17 00:00:00 2001 From: Holger Caesar Date: Wed, 29 Jul 2020 18:15:30 +0800 Subject: [PATCH 6/7] Fix wrong assertion --- python-sdk/nuimages/nuimages.py | 4 ++-- python-sdk/tutorials/nuimages_tutorial.ipynb | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python-sdk/nuimages/nuimages.py b/python-sdk/nuimages/nuimages.py index 570dca11..65c3985e 100644 --- a/python-sdk/nuimages/nuimages.py +++ b/python-sdk/nuimages/nuimages.py @@ -239,7 +239,7 @@ def list_attributes(self, sort_by: str = 'freq') -> None: print(format_str.format( attribute_freqs[attribute['token']], attribute['name'], attribute['description'])) - def list_sensors(self) -> None: + def list_cameras(self) -> None: """ List all cameras and the number of samples for each. """ @@ -637,7 +637,7 @@ def render_image(self, # Validate inputs. sample_data = self.get('sample_data', sd_token) if not sample_data['is_key_frame']: - assert not annotation_type, 'Error: Cannot render annotations for non keyframes!' + assert annotation_type != 'none', 'Error: Cannot render annotations for non keyframes!' assert not with_attributes, 'Error: Cannot render attributes for non keyframes!' if with_attributes: assert with_category, 'In order to set with_attributes=True, with_category must be True.' diff --git a/python-sdk/tutorials/nuimages_tutorial.ipynb b/python-sdk/tutorials/nuimages_tutorial.ipynb index c97bbf8a..51360725 100644 --- a/python-sdk/tutorials/nuimages_tutorial.ipynb +++ b/python-sdk/tutorials/nuimages_tutorial.ipynb @@ -452,7 +452,7 @@ "metadata": {}, "outputs": [], "source": [ - "nuim.list_sensors()" + "nuim.list_cameras()" ] } ], From b36e774dace1031ccef2e9d3d1bf9f9a8eb15b6c Mon Sep 17 00:00:00 2001 From: Holger Caesar Date: Fri, 31 Jul 2020 13:11:45 +0800 Subject: [PATCH 7/7] Typo --- python-sdk/nuimages/nuimages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-sdk/nuimages/nuimages.py b/python-sdk/nuimages/nuimages.py index 65c3985e..69472b87 100644 --- a/python-sdk/nuimages/nuimages.py +++ b/python-sdk/nuimages/nuimages.py @@ -171,7 +171,7 @@ def shortcut(self, src_table: str, tgt_table: str, src_token: str) -> Dict[str, :param src_table: The name of the source table. :param tgt_table: The name of the target table. :param src_token: The source token. - :return: The entry of the destination table correspondings to the source token. + :return: The entry of the destination table corresponding to the source token. """ if src_table == 'sample_data' and tgt_table == 'sensor': sample_data = self.get('sample_data', src_token)