From 40bd7b1b27eeb2d4a6316f37bebd268e0248c9d6 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Sun, 8 Oct 2023 13:59:13 +0800 Subject: [PATCH 01/11] fix lidar demo --- demo/new_pcd_demo.py | 96 ++++++++++ .../apis/inferencers/base_3d_inferencer.py | 177 ++++++++++++------ .../inferencers/lidar_det3d_inferencer.py | 85 +++++++-- mmdet3d/visualization/local_visualizer.py | 8 +- 4 files changed, 294 insertions(+), 72 deletions(-) create mode 100644 demo/new_pcd_demo.py diff --git a/demo/new_pcd_demo.py b/demo/new_pcd_demo.py new file mode 100644 index 0000000000..9dd80979c7 --- /dev/null +++ b/demo/new_pcd_demo.py @@ -0,0 +1,96 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging +import os +from argparse import ArgumentParser + +from mmengine.logging import print_log + +from mmdet3d.apis import LidarDet3DInferencer + + +def parse_args(): + parser = ArgumentParser() + parser.add_argument('pcd', help='Point cloud file') + parser.add_argument('model', help='Config file') + parser.add_argument('weights', help='Checkpoint file') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + parser.add_argument( + '--pred-score-thr', + type=float, + default=0.3, + help='bbox score threshold') + parser.add_argument( + '--out-dir', + type=str, + default='outputs', + help='Output directory of prediction results.') + parser.add_argument( + '--show', + action='store_true', + help='Show online visualization results') + parser.add_argument( + '--wait_time', + type=float, + default=-1, + help='The interval of show (s). Demo will be blocked in showing' + 'results, if wait_time is -1. Defaults to -1.') + parser.add_argument( + '--no-save-vis', + action='store_true', + help='Do not save detection vis results') + parser.add_argument( + '--no-save-pred', + action='store_true', + help='Do not save detection json results') + parser.add_argument( + '--print-result', + action='store_true', + help='Whether to print the results.') + call_args = vars(parser.parse_args()) + + call_args['inputs'] = dict(points=call_args.pop('pcd')) + + if call_args['no_save_vis'] and call_args['no_save_pred']: + call_args['out_dir'] = '' + + init_kws = ['model', 'weights', 'device'] + init_args = {} + for init_kw in init_kws: + init_args[init_kw] = call_args.pop(init_kw) + + # NOTE: If your operating environment does not have a display device, + # (e.g. a remote server), you can save the predictions and visualize + # them in local devices. + if os.environ.get('DISPLAY') is None and call_args['show']: + print_log( + 'Display device not found. `--show` is forced to False', + logger='current', + level=logging.WARNING) + call_args['show'] = False + + return init_args, call_args + + +def main(): + # TODO: Support inference of point cloud numpy file. + init_args, call_args = parse_args() + + inferencer = LidarDet3DInferencer(**init_args) + # inferencer(**call_args) + + inputs = call_args.pop('inputs') + _, _, visualize_kwargs, _ = inferencer._dispatch_kwargs(**call_args) + inputs = inferencer._inputs_to_list(inputs) + inferencer.visualize_preds_fromfile(inputs, ['outputs/preds/000008.json'], + **visualize_kwargs) + + if call_args['out_dir'] != '' and not (call_args['no_save_vis'] + and call_args['no_save_pred']): + print_log( + f'results have been saved at {call_args["out_dir"]}', + logger='current') + + +if __name__ == '__main__': + main() diff --git a/mmdet3d/apis/inferencers/base_3d_inferencer.py b/mmdet3d/apis/inferencers/base_3d_inferencer.py index 5bf411a8d4..26004512a3 100644 --- a/mmdet3d/apis/inferencers/base_3d_inferencer.py +++ b/mmdet3d/apis/inferencers/base_3d_inferencer.py @@ -1,18 +1,24 @@ # Copyright (c) OpenMMLab. All rights reserved. +import logging +import os.path as osp +from copy import deepcopy from typing import Dict, List, Optional, Sequence, Tuple, Union -import mmengine import numpy as np import torch.nn as nn +from mmengine import dump, print_log from mmengine.fileio import (get_file_backend, isdir, join_path, list_dir_or_file) from mmengine.infer.infer import BaseInferencer, ModelType +from mmengine.model.utils import revert_sync_batchnorm from mmengine.registry import init_default_scope from mmengine.runner import load_checkpoint from mmengine.structures import InstanceData from mmengine.visualization import Visualizer +from rich.progress import track -from mmdet3d.registry import MODELS +from mmdet3d.registry import DATASETS, MODELS +from mmdet3d.structures import Box3DMode, Det3DDataSample from mmdet3d.utils import ConfigType InstanceList = List[InstanceData] @@ -48,10 +54,10 @@ class Base3DInferencer(BaseInferencer): forward_kwargs: set = set() visualize_kwargs: set = { 'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr', - 'img_out_dir' + 'img_out_dir', 'no_save_vis' } postprocess_kwargs: set = { - 'print_result', 'pred_out_file', 'return_datasample' + 'print_result', 'pred_out_dir', 'return_datasample', 'no_save_pred' } def __init__(self, @@ -60,10 +66,14 @@ def __init__(self, device: Optional[str] = None, scope: str = 'mmdet3d', palette: str = 'none') -> None: + # A global counter tracking the number of frames processed, for + # naming of the output results + self.num_predicted_frames = 0 self.palette = palette init_default_scope(scope) super().__init__( model=model, weights=weights, device=device, scope=scope) + self.model = revert_sync_batchnorm(self.model) def _convert_syncbn(self, cfg: ConfigType): """Convert config's naiveSyncBN to BN. @@ -108,6 +118,14 @@ def _init_model( if 'PALETTE' in checkpoint.get('meta', {}): # 3D Segmentor model.dataset_meta['palette'] = checkpoint['meta']['PALETTE'] + test_dataset_cfg = deepcopy(cfg.test_dataloader.dataset) + # lazy init. We only need the metainfo. + test_dataset_cfg['lazy_init'] = True + metainfo = DATASETS.build(test_dataset_cfg).metainfo + cfg_palette = metainfo.get('palette', None) + if cfg_palette is not None: + model.dataset_meta['palette'] = cfg_palette + model.cfg = cfg # save the config in the model for convenience model.to(device) model.eval() @@ -173,64 +191,76 @@ def _init_visualizer(self, cfg: ConfigType) -> Optional[Visualizer]: visualizer.dataset_meta = self.model.dataset_meta return visualizer + def _dispatch_kwargs(self, + out_dir: str = '', + **kwargs) -> Tuple[Dict, Dict, Dict, Dict]: + """Dispatch kwargs to preprocess(), forward(), visualize() and + postprocess() according to the actual demands. + + Args: + out_dir (str): Dir to save the inference results. + **kwargs (dict): Key words arguments passed to :meth:`preprocess`, + :meth:`forward`, :meth:`visualize` and :meth:`postprocess`. + Each key in kwargs should be in the corresponding set of + ``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs`` + and ``postprocess_kwargs``. + + Returns: + Tuple[Dict, Dict, Dict, Dict]: kwargs passed to preprocess, + forward, visualize and postprocess respectively. + """ + kwargs['img_out_dir'] = out_dir + kwargs['pred_out_dir'] = out_dir + return super()._dispatch_kwargs(**kwargs) + def __call__(self, inputs: InputsType, + batch_size=1, return_datasamples: bool = False, - batch_size: int = 1, - return_vis: bool = False, - show: bool = False, - wait_time: int = 0, - draw_pred: bool = True, - pred_score_thr: float = 0.3, - img_out_dir: str = '', - print_result: bool = False, - pred_out_file: str = '', - **kwargs) -> dict: + **kwargs) -> Optional[dict]: """Call the inferencer. Args: inputs (InputsType): Inputs for the inferencer. return_datasamples (bool): Whether to return results as :obj:`BaseDataElement`. Defaults to False. - batch_size (int): Inference batch size. Defaults to 1. - return_vis (bool): Whether to return the visualization result. - Defaults to False. - show (bool): Whether to display the visualization results in a - popup window. Defaults to False. - wait_time (float): The interval of show (s). Defaults to 0. - draw_pred (bool): Whether to draw predicted bounding boxes. - Defaults to True. - pred_score_thr (float): Minimum score of bboxes to draw. - Defaults to 0.3. - img_out_dir (str): Output directory of visualization results. - If left as empty, no file will be saved. Defaults to ''. - print_result (bool): Whether to print the inference result w/o - visualization to the console. Defaults to False. - pred_out_file (str): File to save the inference results w/o - visualization. If left as empty, no file will be saved. - Defaults to ''. - **kwargs: Other keyword arguments passed to :meth:`preprocess`, + batch_size (int): Batch size. Defaults to 1. + **kwargs: Key words arguments passed to :meth:`preprocess`, :meth:`forward`, :meth:`visualize` and :meth:`postprocess`. Each key in kwargs should be in the corresponding set of ``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs`` and ``postprocess_kwargs``. + Returns: dict: Inference and visualization results. """ - return super().__call__( - inputs, - return_datasamples, - batch_size, - return_vis=return_vis, - show=show, - wait_time=wait_time, - draw_pred=draw_pred, - pred_score_thr=pred_score_thr, - img_out_dir=img_out_dir, - print_result=print_result, - pred_out_file=pred_out_file, - **kwargs) + + ( + preprocess_kwargs, + forward_kwargs, + visualize_kwargs, + postprocess_kwargs, + ) = self._dispatch_kwargs(**kwargs) + + ori_inputs = self._inputs_to_list(inputs) + inputs = self.preprocess( + ori_inputs, batch_size=batch_size, **preprocess_kwargs) + preds = [] + + results_dict = {'predictions': [], 'visualization': []} + for data in (track(inputs, description='Inference') + if self.show_progress else inputs): + preds.extend(self.forward(data, **forward_kwargs)) + visualization = self.visualize(ori_inputs, preds, + **visualize_kwargs) + results = self.postprocess(preds, visualization, + return_datasamples, + **postprocess_kwargs) + results_dict['predictions'].extend(results['predictions']) + if results['visualization'] is not None: + results_dict['visualization'].extend(results['visualization']) + return results_dict def postprocess( self, @@ -238,7 +268,8 @@ def postprocess( visualization: Optional[List[np.ndarray]] = None, return_datasample: bool = False, print_result: bool = False, - pred_out_file: str = '', + no_save_pred: bool = False, + pred_out_dir: str = '', ) -> Union[ResType, Tuple[ResType, np.ndarray]]: """Process the predictions and visualization results from ``forward`` and ``visualize``. @@ -258,7 +289,7 @@ def postprocess( Defaults to False. print_result (bool): Whether to print the inference result w/o visualization to the console. Defaults to False. - pred_out_file (str): File to save the inference results w/o + pred_out_dir (str): Dir to save the inference results w/o visualization. If left as empty, no file will be saved. Defaults to ''. @@ -273,35 +304,56 @@ def postprocess( json-serializable dict containing only basic data elements such as strings and numbers. """ + if no_save_pred is True: + pred_out_dir = '' + result_dict = {} results = preds if not return_datasample: results = [] for pred in preds: - result = self.pred2dict(pred) + result = self.pred2dict(pred, pred_out_dir) results.append(result) + elif pred_out_dir != '': + print_log( + 'Currently does not support saving datasample ' + 'when return_datasample is set to True. ' + 'Prediction results are not saved!', + level=logging.WARNING) + # Add img to the results after printing and dumping result_dict['predictions'] = results if print_result: print(result_dict) - if pred_out_file != '': - mmengine.dump(result_dict, pred_out_file) result_dict['visualization'] = visualization return result_dict - def pred2dict(self, data_sample: InstanceData) -> Dict: + # TODO: The data format and fields saved in json need further discussion. + # Maybe should include model name, timestamp, filename, image info etc. + def pred2dict(self, + data_sample: Det3DDataSample, + pred_out_dir: str = '') -> Dict: """Extract elements necessary to represent a prediction into a dictionary. It's better to contain only basic data elements such as strings and numbers in order to guarantee it's json-serializable. + + Args: + data_sample (:obj:`DetDataSample`): Predictions of the model. + pred_out_dir: Dir to save the inference results w/o + visualization. If left as empty, no file will be saved. + Defaults to ''. + + Returns: + dict: Prediction results. """ result = {} if 'pred_instances_3d' in data_sample: pred_instances_3d = data_sample.pred_instances_3d.numpy() result = { - 'bboxes_3d': pred_instances_3d.bboxes_3d.tensor.cpu().tolist(), 'labels_3d': pred_instances_3d.labels_3d.tolist(), - 'scores_3d': pred_instances_3d.scores_3d.tolist() + 'scores_3d': pred_instances_3d.scores_3d.tolist(), + 'bboxes_3d': pred_instances_3d.bboxes_3d.tensor.cpu().tolist() } if 'pred_pts_seg' in data_sample: @@ -309,4 +361,23 @@ def pred2dict(self, data_sample: InstanceData) -> Dict: result['pts_semantic_mask'] = \ pred_pts_seg.pts_semantic_mask.tolist() + if data_sample.box_mode_3d == Box3DMode.LIDAR: + result['box_type_3d'] = 'LiDAR' + elif data_sample.box_mode_3d == Box3DMode.CAM: + result['box_type_3d'] = 'Camera' + elif data_sample.box_mode_3d == Box3DMode.DEPTH: + result['box_type_3d'] = 'Depth' + + if pred_out_dir != '': + if 'lidar_path' in data_sample: + lidar_path = osp.basename(data_sample.lidar_path) + lidar_path = osp.splitext(lidar_path)[0] + out_json_path = osp.join(pred_out_dir, 'preds', + lidar_path + '.json') + else: + out_json_path = osp.join( + pred_out_dir, 'preds', + f'{str(self.num_visualized_imgs).zfill(8)}.json') + dump(result, out_json_path) + return result diff --git a/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py b/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py index a3fdc479b7..e75a8e553e 100644 --- a/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py @@ -4,11 +4,14 @@ import mmengine import numpy as np +import torch from mmengine.dataset import Compose from mmengine.infer.infer import ModelType from mmengine.structures import InstanceData from mmdet3d.registry import INFERENCERS +from mmdet3d.structures import (CameraInstance3DBoxes, DepthInstance3DBoxes, + Det3DDataSample, LiDARInstance3DBoxes) from mmdet3d.utils import ConfigType from .base_3d_inferencer import Base3DInferencer @@ -43,16 +46,6 @@ class LidarDet3DInferencer(Base3DInferencer): priority is palette -> config -> checkpoint. Defaults to 'none'. """ - preprocess_kwargs: set = set() - forward_kwargs: set = set() - visualize_kwargs: set = { - 'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr', - 'img_out_dir' - } - postprocess_kwargs: set = { - 'print_result', 'pred_out_file', 'return_datasample' - } - def __init__(self, model: Union[ModelType, str, None] = None, weights: Optional[str] = None, @@ -113,9 +106,10 @@ def visualize(self, preds: PredType, return_vis: bool = False, show: bool = False, - wait_time: int = 0, + wait_time: int = -1, draw_pred: bool = True, pred_score_thr: float = 0.3, + no_save_vis: bool = False, img_out_dir: str = '') -> Union[List[np.ndarray], None]: """Visualize predictions. @@ -131,6 +125,8 @@ def visualize(self, Defaults to True. pred_score_thr (float): Minimum score of bboxes to draw. Defaults to 0.3. + no_save_vis (bool): Whether to force not to save prediction + vis results. Defaults to False. img_out_dir (str): Output directory of visualization results. If left as empty, no file will be saved. Defaults to ''. @@ -138,8 +134,10 @@ def visualize(self, List[np.ndarray] or None: Returns visualization results only if applicable. """ - if self.visualizer is None or (not show and img_out_dir == '' - and not return_vis): + if no_save_vis is True: + img_out_dir = '' + + if not show and img_out_dir == '' and not return_vis: return None if getattr(self, 'visualizer') is None: @@ -156,17 +154,20 @@ def visualize(self, points = points.reshape(-1, self.load_dim) points = points[:, self.use_dim] pc_name = osp.basename(single_input).split('.bin')[0] - pc_name = f'{pc_name}.png' + pc_name = f'vis_lidar/{pc_name}.png' elif isinstance(single_input, np.ndarray): points = single_input.copy() pc_num = str(self.num_visualized_frames).zfill(8) - pc_name = f'pc_{pc_num}.png' + pc_name = f'vis_lidar/{pc_num}.png' else: raise ValueError('Unsupported input type: ' f'{type(single_input)}') - o3d_save_path = osp.join(img_out_dir, pc_name) \ - if img_out_dir != '' else None + if img_out_dir != '' and show: + o3d_save_path = osp.join(img_out_dir, pc_name) + mmengine.mkdir_or_exist(osp.dirname(o3d_save_path)) + else: + o3d_save_path = None data_input = dict(points=points) self.visualizer.add_datasample( @@ -185,3 +186,53 @@ def visualize(self, self.num_visualized_frames += 1 return results + + def visualize_preds_fromfile(self, inputs: InputsType, preds: PredType, + **kwargs) -> Union[List[np.ndarray], None]: + """Visualize predictions from `*.json` files. + + Args: + inputs (InputsType): Inputs for the inferencer. + preds (PredType): Predictions of the model. + return_vis (bool): Whether to return the visualization result. + Defaults to False. + show (bool): Whether to display the image in a popup window. + Defaults to False. + wait_time (float): The interval of show (s). Defaults to 0. + draw_pred (bool): Whether to draw predicted bounding boxes. + Defaults to True. + pred_score_thr (float): Minimum score of bboxes to draw. + Defaults to 0.3. + no_save_vis (bool): Whether to force not to save prediction + vis results. Defaults to False. + img_out_dir (str): Output directory of visualization results. + If left as empty, no file will be saved. Defaults to ''. + + Returns: + List[np.ndarray] or None: Returns visualization results only if + applicable. + """ + data_samples = [] + for pred in preds: + pred = mmengine.load(pred) + data_sample = Det3DDataSample() + data_sample.pred_instances_3d = InstanceData() + + data_sample.pred_instances_3d.labels_3d = torch.tensor( + pred['labels_3d']) + data_sample.pred_instances_3d.scores_3d = torch.tensor( + pred['scores_3d']) + if pred['box_type_3d'] == 'LiDAR': + data_sample.pred_instances_3d.bboxes_3d = \ + LiDARInstance3DBoxes(pred['bboxes_3d']) + elif pred['box_type_3d'] == 'Camera': + data_sample.pred_instances_3d.bboxes_3d = \ + CameraInstance3DBoxes(pred['bboxes_3d']) + elif pred['box_type_3d'] == 'Depth': + data_sample.pred_instances_3d.bboxes_3d = \ + DepthInstance3DBoxes(pred['bboxes_3d']) + else: + raise ValueError('Unsupported box type: ' + f'{pred["box_type_3d"]}') + data_samples.append(data_sample) + return self.visualize(inputs=inputs, preds=data_samples, **kwargs) diff --git a/mmdet3d/visualization/local_visualizer.py b/mmdet3d/visualization/local_visualizer.py index a557b3d426..b99636c084 100644 --- a/mmdet3d/visualization/local_visualizer.py +++ b/mmdet3d/visualization/local_visualizer.py @@ -1,6 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. import copy +import logging import math +import os import sys import time from typing import List, Optional, Sequence, Tuple, Union @@ -176,8 +178,10 @@ def _initialize_o3d_vis(self) -> Visualizer: o3d_vis.register_key_action_callback(glfw_key_space, self.space_action_callback) o3d_vis.register_key_callback(glfw_key_right, self.right_callback) - o3d_vis.create_window() - self.view_control = o3d_vis.get_view_control() + if os.environ.get('DISPLAY', None) is not None: + o3d_vis.create_window() + print_log('No display device found', level=logging.WARNING) + self.view_control = o3d_vis.get_view_control() return o3d_vis @master_only From 5c51f1a10d42c9b82a80d7b79957ad9a881dd505 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Mon, 9 Oct 2023 20:10:21 +0800 Subject: [PATCH 02/11] fix all demo --- demo/inference_demo.ipynb | 133 +++++++----------- demo/new_mono_det_demo.py | 98 +++++++++++++ demo/new_multi_modality_demo.py | 101 +++++++++++++ demo/new_pcd_demo.py | 8 +- demo/new_pcd_seg_demo.py | 85 +++++++++++ mmdet3d/apis/inference.py | 3 +- .../apis/inferencers/base_3d_inferencer.py | 55 +++++++- .../inferencers/lidar_det3d_inferencer.py | 10 +- .../inferencers/lidar_seg3d_inferencer.py | 30 ++-- .../apis/inferencers/mono_det3d_inferencer.py | 28 ++-- .../multi_modality_det3d_inferencer.py | 38 +++-- mmdet3d/datasets/transforms/loading.py | 53 ++----- mmdet3d/visualization/local_visualizer.py | 17 ++- 13 files changed, 453 insertions(+), 206 deletions(-) create mode 100644 demo/new_mono_det_demo.py create mode 100644 demo/new_multi_modality_demo.py create mode 100644 demo/new_pcd_seg_demo.py diff --git a/demo/inference_demo.ipynb b/demo/inference_demo.ipynb index 0989863ddd..14854722d6 100644 --- a/demo/inference_demo.ipynb +++ b/demo/inference_demo.ipynb @@ -2,117 +2,83 @@ "cells": [ { "cell_type": "code", - "execution_count": null, - "source": [ - "from mmdet3d.apis import inference_detector, init_model\n", - "from mmdet3d.registry import VISUALIZERS\n", - "from mmdet3d.utils import register_all_modules" - ], - "outputs": [], + "execution_count": 25, "metadata": { "pycharm": { "is_executing": false } - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "# register all modules in mmdet3d into the registries\n", - "register_all_modules()" - ], + }, "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 8, "source": [ - "config_file = '../configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py'\n", - "# download the checkpoint from model zoo and put it in `checkpoints/`\n", - "checkpoint_file = '../work_dirs/second/epoch_40.pth'" - ], - "outputs": [], - "metadata": { - "pycharm": { - "is_executing": false - } - } + "from mmdet3d.apis import LidarDet3DInferencer" + ] }, { "cell_type": "code", "execution_count": null, - "source": [ - "# build the model from a config file and a checkpoint file\n", - "model = init_model(config_file, checkpoint_file, device='cuda:0')" - ], + "metadata": {}, "outputs": [], - "metadata": {} + "source": [ + "# initialize inferencer\n", + "inferencer = LidarDet3DInferencer('pointpillars_kitti-3class')" + ] }, { "cell_type": "code", "execution_count": null, - "source": [ - "# init visualizer\n", - "visualizer = VISUALIZERS.build(model.cfg.visualizer)\n", - "visualizer.dataset_meta = {\n", - " 'CLASSES': model.CLASSES,\n", - " 'PALETTE': model.PALETTE\n", - "}" - ], - "outputs": [], "metadata": { "pycharm": { "is_executing": false } - } + }, + "outputs": [], + "source": [ + "# inference\n", + "inputs = dict(points='./data/kitti/000008.bin')\n", + "inferencer(inputs)" + ] }, { "cell_type": "code", - "execution_count": 11, - "source": [ - "# test a single sample\n", - "pcd = './data/kitti/000008.bin'\n", - "result, data = inference_detector(model, pcd)\n", - "points = data['inputs']['points']\n", - "data_input = dict(points=points)" - ], + "execution_count": null, + "metadata": {}, "outputs": [], - "metadata": { - "pycharm": { - "is_executing": false - } - } + "source": [ + "# inference and visualize\n", + "# NOTE: use the `Esc` key to exit Open3D window in Jupyter Notebook Environment\n", + "inferencer(inputs, show=True)" + ] }, { "cell_type": "code", "execution_count": null, - "source": [ - "# show the results\n", - "out_dir = './'\n", - "visualizer.add_datasample(\n", - " 'result',\n", - " data_input,\n", - " data_sample=result,\n", - " draw_gt=False,\n", - " show=True,\n", - " wait_time=0,\n", - " out_file=out_dir,\n", - " vis_task='det')" - ], + "metadata": {}, "outputs": [], - "metadata": { - "pycharm": { - "is_executing": false - } - } + "source": [ + "# If your operating environment does not have a display device,\n", + "# (e.g. a remote server), you can save the predictions and visualize\n", + "# them in local devices.\n", + "inferencer(inputs, show=False, out_dir='./remote_outputs')\n", + "\n", + "# Simulate the migration process\n", + "%mv ./remote_outputs ./local_outputs\n", + "\n", + "# Visualize the predictions from the saved files\n", + "# NOTE: use the `Esc` key to exit Open3D window in Jupyter Notebook Environment\n", + "local_inferencer = LidarDet3DInferencer('pointpillars_kitti-3class')\n", + "inputs = local_inferencer._inputs_to_list(inputs)\n", + "local_inferencer.visualize_preds_fromfile(inputs, ['local_outputs/preds/000008.json'], show=True)" + ] } ], "metadata": { + "interpreter": { + "hash": "a0c343fece975dd89087e8c2194dd4d3db28d7000f1b32ed9ed9d584dd54dbbe" + }, "kernelspec": { - "name": "python3", - "display_name": "Python 3.7.6 64-bit ('torch1.7-cu10.1': conda)" + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -124,19 +90,16 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.9.16" }, "pycharm": { "stem_cell": { "cell_type": "raw", - "source": [], "metadata": { "collapsed": false - } + }, + "source": [] } - }, - "interpreter": { - "hash": "a0c343fece975dd89087e8c2194dd4d3db28d7000f1b32ed9ed9d584dd54dbbe" } }, "nbformat": 4, diff --git a/demo/new_mono_det_demo.py b/demo/new_mono_det_demo.py new file mode 100644 index 0000000000..08ba916a8f --- /dev/null +++ b/demo/new_mono_det_demo.py @@ -0,0 +1,98 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging +import os +from argparse import ArgumentParser + +from mmengine.logging import print_log + +from mmdet3d.apis import MonoDet3DInferencer + + +def parse_args(): + parser = ArgumentParser() + parser.add_argument('img', help='Image file') + parser.add_argument('infos', help='Infos file with annotations') + parser.add_argument('model', help='Config file') + parser.add_argument('weights', help='Checkpoint file') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + parser.add_argument( + '--cam-type', + type=str, + default='CAM_BACK', + help='choose camera type to inference') + parser.add_argument( + '--pred-score-thr', + type=float, + default=0.3, + help='bbox score threshold') + parser.add_argument( + '--out-dir', + type=str, + default='outputs', + help='Output directory of prediction results.') + parser.add_argument( + '--show', + action='store_true', + help='Show online visualization results') + parser.add_argument( + '--wait_time', + type=float, + default=-1, + help='The interval of show (s). Demo will be blocked in showing' + 'results, if wait_time is -1. Defaults to -1.') + parser.add_argument( + '--no-save-vis', + action='store_true', + help='Do not save detection vis results') + parser.add_argument( + '--no-save-pred', + action='store_true', + help='Do not save detection json results') + parser.add_argument( + '--print-result', + action='store_true', + help='Whether to print the results.') + call_args = vars(parser.parse_args()) + + call_args['inputs'] = dict( + img=call_args.pop('img'), infos=call_args.pop('infos')) + call_args.pop('cam_type') + + if call_args['no_save_vis'] and call_args['no_save_pred']: + call_args['out_dir'] = '' + + init_kws = ['model', 'weights', 'device'] + init_args = {} + for init_kw in init_kws: + init_args[init_kw] = call_args.pop(init_kw) + + # NOTE: If your operating environment does not have a display device, + # (e.g. a remote server), you can save the predictions and visualize + # them in local devices. + if os.environ.get('DISPLAY') is None and call_args['show']: + print_log( + 'Display device not found. `--show` is forced to False', + logger='current', + level=logging.WARNING) + call_args['show'] = False + + return init_args, call_args + + +def main(): + # TODO: Support inference of point cloud numpy file. + init_args, call_args = parse_args() + + inferencer = MonoDet3DInferencer(**init_args) + inferencer(**call_args) + + if call_args['out_dir'] != '' and not (call_args['no_save_vis'] + and call_args['no_save_pred']): + print_log( + f'results have been saved at {call_args["out_dir"]}', + logger='current') + + +if __name__ == '__main__': + main() diff --git a/demo/new_multi_modality_demo.py b/demo/new_multi_modality_demo.py new file mode 100644 index 0000000000..3b4605aeeb --- /dev/null +++ b/demo/new_multi_modality_demo.py @@ -0,0 +1,101 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging +import os +from argparse import ArgumentParser + +from mmengine.logging import print_log + +from mmdet3d.apis import MultiModalityDet3DInferencer + + +def parse_args(): + parser = ArgumentParser() + parser.add_argument('pcd', help='Point cloud file') + parser.add_argument('img', help='Image file') + parser.add_argument('infos', help='Infos file with annotations') + parser.add_argument('model', help='Config file') + parser.add_argument('weights', help='Checkpoint file') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + parser.add_argument( + '--cam-type', + type=str, + default='CAM_BACK', + help='choose camera type to inference') + parser.add_argument( + '--pred-score-thr', + type=float, + default=0.3, + help='bbox score threshold') + parser.add_argument( + '--out-dir', + type=str, + default='outputs', + help='Output directory of prediction results.') + parser.add_argument( + '--show', + action='store_true', + help='Show online visualization results') + parser.add_argument( + '--wait_time', + type=float, + default=-1, + help='The interval of show (s). Demo will be blocked in showing' + 'results, if wait_time is -1. Defaults to -1.') + parser.add_argument( + '--no-save-vis', + action='store_true', + help='Do not save detection vis results') + parser.add_argument( + '--no-save-pred', + action='store_true', + help='Do not save detection json results') + parser.add_argument( + '--print-result', + action='store_true', + help='Whether to print the results.') + call_args = vars(parser.parse_args()) + + call_args['inputs'] = dict( + points=call_args.pop('pcd'), + img=call_args.pop('img'), + infos=call_args.pop('infos')) + call_args.pop('cam_type') + + if call_args['no_save_vis'] and call_args['no_save_pred']: + call_args['out_dir'] = '' + + init_kws = ['model', 'weights', 'device'] + init_args = {} + for init_kw in init_kws: + init_args[init_kw] = call_args.pop(init_kw) + + # NOTE: If your operating environment does not have a display device, + # (e.g. a remote server), you can save the predictions and visualize + # them in local devices. + if os.environ.get('DISPLAY') is None and call_args['show']: + print_log( + 'Display device not found. `--show` is forced to False', + logger='current', + level=logging.WARNING) + call_args['show'] = False + + return init_args, call_args + + +def main(): + # TODO: Support inference of point cloud numpy file. + init_args, call_args = parse_args() + + inferencer = MultiModalityDet3DInferencer(**init_args) + inferencer(**call_args) + + if call_args['out_dir'] != '' and not (call_args['no_save_vis'] + and call_args['no_save_pred']): + print_log( + f'results have been saved at {call_args["out_dir"]}', + logger='current') + + +if __name__ == '__main__': + main() diff --git a/demo/new_pcd_demo.py b/demo/new_pcd_demo.py index 9dd80979c7..d7d850bc12 100644 --- a/demo/new_pcd_demo.py +++ b/demo/new_pcd_demo.py @@ -77,13 +77,7 @@ def main(): init_args, call_args = parse_args() inferencer = LidarDet3DInferencer(**init_args) - # inferencer(**call_args) - - inputs = call_args.pop('inputs') - _, _, visualize_kwargs, _ = inferencer._dispatch_kwargs(**call_args) - inputs = inferencer._inputs_to_list(inputs) - inferencer.visualize_preds_fromfile(inputs, ['outputs/preds/000008.json'], - **visualize_kwargs) + inferencer(**call_args) if call_args['out_dir'] != '' and not (call_args['no_save_vis'] and call_args['no_save_pred']): diff --git a/demo/new_pcd_seg_demo.py b/demo/new_pcd_seg_demo.py new file mode 100644 index 0000000000..bc6697e165 --- /dev/null +++ b/demo/new_pcd_seg_demo.py @@ -0,0 +1,85 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging +import os +from argparse import ArgumentParser + +from mmengine.logging import print_log + +from mmdet3d.apis import LidarSeg3DInferencer + + +def parse_args(): + parser = ArgumentParser() + parser.add_argument('pcd', help='Point cloud file') + parser.add_argument('model', help='Config file') + parser.add_argument('weights', help='Checkpoint file') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + parser.add_argument( + '--out-dir', + type=str, + default='outputs', + help='Output directory of prediction results.') + parser.add_argument( + '--show', + action='store_true', + help='Show online visualization results') + parser.add_argument( + '--wait_time', + type=float, + default=-1, + help='The interval of show (s). Demo will be blocked in showing' + 'results, if wait_time is -1. Defaults to -1.') + parser.add_argument( + '--no-save-vis', + action='store_true', + help='Do not save detection vis results') + parser.add_argument( + '--no-save-pred', + action='store_true', + help='Do not save detection json results') + parser.add_argument( + '--print-result', + action='store_true', + help='Whether to print the results.') + call_args = vars(parser.parse_args()) + + call_args['inputs'] = dict(points=call_args.pop('pcd')) + + if call_args['no_save_vis'] and call_args['no_save_pred']: + call_args['out_dir'] = '' + + init_kws = ['model', 'weights', 'device'] + init_args = {} + for init_kw in init_kws: + init_args[init_kw] = call_args.pop(init_kw) + + # NOTE: If your operating environment does not have a display device, + # (e.g. a remote server), you can save the predictions and visualize + # them in local devices. + if os.environ.get('DISPLAY') is None and call_args['show']: + print_log( + 'Display device not found. `--show` is forced to False', + logger='current', + level=logging.WARNING) + call_args['show'] = False + + return init_args, call_args + + +def main(): + # TODO: Support inference of point cloud numpy file. + init_args, call_args = parse_args() + + inferencer = LidarSeg3DInferencer(**init_args) + inferencer(**call_args) + + if call_args['out_dir'] != '' and not (call_args['no_save_vis'] + and call_args['no_save_pred']): + print_log( + f'results have been saved at {call_args["out_dir"]}', + logger='current') + + +if __name__ == '__main__': + main() diff --git a/mmdet3d/apis/inference.py b/mmdet3d/apis/inference.py index e3fa9d5e3a..e355b958ee 100644 --- a/mmdet3d/apis/inference.py +++ b/mmdet3d/apis/inference.py @@ -392,7 +392,8 @@ def inference_segmentor(model: nn.Module, pcds: PointsType): new_test_pipeline = [] for pipeline in test_pipeline: - if pipeline['type'] != 'LoadAnnotations3D': + if pipeline['type'] != 'LoadAnnotations3D' and pipeline[ + 'type'] != 'PointSegClassMapping': new_test_pipeline.append(pipeline) test_pipeline = Compose(new_test_pipeline) diff --git a/mmdet3d/apis/inferencers/base_3d_inferencer.py b/mmdet3d/apis/inferencers/base_3d_inferencer.py index 26004512a3..8996bb0bab 100644 --- a/mmdet3d/apis/inferencers/base_3d_inferencer.py +++ b/mmdet3d/apis/inferencers/base_3d_inferencer.py @@ -4,6 +4,7 @@ from copy import deepcopy from typing import Dict, List, Optional, Sequence, Tuple, Union +import mmengine import numpy as np import torch.nn as nn from mmengine import dump, print_log @@ -50,11 +51,11 @@ class Base3DInferencer(BaseInferencer): priority is palette -> config -> checkpoint. Defaults to 'none'. """ - preprocess_kwargs: set = set() + preprocess_kwargs: set = {'cam_type'} forward_kwargs: set = set() visualize_kwargs: set = { 'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr', - 'img_out_dir', 'no_save_vis' + 'img_out_dir', 'no_save_vis', 'cam_type_dir' } postprocess_kwargs: set = { 'print_result', 'pred_out_dir', 'return_datasample', 'no_save_pred' @@ -131,10 +132,10 @@ def _init_model( model.eval() return model - def _inputs_to_list( - self, - inputs: Union[dict, list], - modality_key: Union[str, List[str]] = 'points') -> list: + def _inputs_to_list(self, + inputs: Union[dict, list], + modality_key: Union[str, List[str]] = 'points', + cam_type='CAM2') -> list: """Preprocess the inputs to a list. Preprocess inputs to a list according to its type: @@ -158,6 +159,11 @@ def _inputs_to_list( modality_key = [modality_key] assert set(modality_key).issubset({'points', 'img'}) + if 'infos' in inputs: + infos = inputs.pop('infos') + else: + infos = None + for key in modality_key: if isinstance(inputs, dict) and isinstance(inputs[key], str): img = inputs[key] @@ -174,6 +180,31 @@ def _inputs_to_list( if not isinstance(inputs, (list, tuple)): inputs = [inputs] + # get cam2img, lidar2cam and lidar2img from infos + if infos is not None: + info_list = mmengine.load(infos)['data_list'] + assert len(info_list) == len(inputs) + for index, input in enumerate(inputs): + data_info = info_list[index] + img_path = data_info['images'][cam_type]['img_path'] + if osp.basename(img_path) != osp.basename(input['img']): + raise ValueError( + f'the info file of {img_path} is not provided.') + cam2img = np.asarray( + data_info['images'][cam_type]['cam2img'], dtype=np.float32) + lidar2cam = np.asarray( + data_info['images'][cam_type]['lidar2cam'], + dtype=np.float32) + if 'lidar2img' in data_info['images'][cam_type]: + lidar2img = np.asarray( + data_info['images'][cam_type]['lidar2img'], + dtype=np.float32) + else: + lidar2img = cam2img @ lidar2cam + input['cam2img'] = cam2img + input['lidar2cam'] = lidar2cam + input['lidar2img'] = lidar2img + return list(inputs) def _get_transform_idx(self, pipeline_cfg: ConfigType, name: str) -> int: @@ -193,12 +224,14 @@ def _init_visualizer(self, cfg: ConfigType) -> Optional[Visualizer]: def _dispatch_kwargs(self, out_dir: str = '', + cam_type: str = '', **kwargs) -> Tuple[Dict, Dict, Dict, Dict]: """Dispatch kwargs to preprocess(), forward(), visualize() and postprocess() according to the actual demands. Args: out_dir (str): Dir to save the inference results. + cam_type (str): Camera type. Defaults to 'CAM2'. **kwargs (dict): Key words arguments passed to :meth:`preprocess`, :meth:`forward`, :meth:`visualize` and :meth:`postprocess`. Each key in kwargs should be in the corresponding set of @@ -211,6 +244,8 @@ def _dispatch_kwargs(self, """ kwargs['img_out_dir'] = out_dir kwargs['pred_out_dir'] = out_dir + if cam_type != '': + kwargs['cam_type_dir'] = cam_type return super()._dispatch_kwargs(**kwargs) def __call__(self, @@ -243,7 +278,8 @@ def __call__(self, postprocess_kwargs, ) = self._dispatch_kwargs(**kwargs) - ori_inputs = self._inputs_to_list(inputs) + cam_type = preprocess_kwargs.pop('cam_type', 'CAM2') + ori_inputs = self._inputs_to_list(inputs, cam_type=cam_type) inputs = self.preprocess( ori_inputs, batch_size=batch_size, **preprocess_kwargs) preds = [] @@ -374,6 +410,11 @@ def pred2dict(self, lidar_path = osp.splitext(lidar_path)[0] out_json_path = osp.join(pred_out_dir, 'preds', lidar_path + '.json') + elif 'img_path' in data_sample: + img_path = osp.basename(data_sample.img_path) + img_path = osp.splitext(img_path)[0] + out_json_path = osp.join(pred_out_dir, 'preds', + img_path + '.json') else: out_json_path = osp.join( pred_out_dir, 'preds', diff --git a/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py b/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py index e75a8e553e..128c53fb81 100644 --- a/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py @@ -62,7 +62,7 @@ def __init__(self, scope=scope, palette=palette) - def _inputs_to_list(self, inputs: Union[dict, list]) -> list: + def _inputs_to_list(self, inputs: Union[dict, list], **kwargs) -> list: """Preprocess the inputs to a list. Preprocess inputs to a list according to its type: @@ -80,7 +80,7 @@ def _inputs_to_list(self, inputs: Union[dict, list]) -> list: Returns: list: List of input for the :meth:`preprocess`. """ - return super()._inputs_to_list(inputs, modality_key='points') + return super()._inputs_to_list(inputs, modality_key='points', **kwargs) def _init_pipeline(self, cfg: ConfigType) -> Compose: """Initialize the test pipeline.""" @@ -154,17 +154,17 @@ def visualize(self, points = points.reshape(-1, self.load_dim) points = points[:, self.use_dim] pc_name = osp.basename(single_input).split('.bin')[0] - pc_name = f'vis_lidar/{pc_name}.png' + pc_name = f'{pc_name}.png' elif isinstance(single_input, np.ndarray): points = single_input.copy() pc_num = str(self.num_visualized_frames).zfill(8) - pc_name = f'vis_lidar/{pc_num}.png' + pc_name = f'{pc_num}.png' else: raise ValueError('Unsupported input type: ' f'{type(single_input)}') if img_out_dir != '' and show: - o3d_save_path = osp.join(img_out_dir, pc_name) + o3d_save_path = osp.join(img_out_dir, 'vis_lidar', pc_name) mmengine.mkdir_or_exist(osp.dirname(o3d_save_path)) else: o3d_save_path = None diff --git a/mmdet3d/apis/inferencers/lidar_seg3d_inferencer.py b/mmdet3d/apis/inferencers/lidar_seg3d_inferencer.py index 0286a07113..2adb998e4a 100644 --- a/mmdet3d/apis/inferencers/lidar_seg3d_inferencer.py +++ b/mmdet3d/apis/inferencers/lidar_seg3d_inferencer.py @@ -43,16 +43,6 @@ class LidarSeg3DInferencer(Base3DInferencer): priority is palette -> config -> checkpoint. Defaults to 'none'. """ - preprocess_kwargs: set = set() - forward_kwargs: set = set() - visualize_kwargs: set = { - 'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr', - 'img_out_dir' - } - postprocess_kwargs: set = { - 'print_result', 'pred_out_file', 'return_datasample' - } - def __init__(self, model: Union[ModelType, str, None] = None, weights: Optional[str] = None, @@ -69,7 +59,7 @@ def __init__(self, scope=scope, palette=palette) - def _inputs_to_list(self, inputs: Union[dict, list]) -> list: + def _inputs_to_list(self, inputs: Union[dict, list], **kwargs) -> list: """Preprocess the inputs to a list. Preprocess inputs to a list according to its type: @@ -87,7 +77,7 @@ def _inputs_to_list(self, inputs: Union[dict, list]) -> list: Returns: list: List of input for the :meth:`preprocess`. """ - return super()._inputs_to_list(inputs, modality_key='points') + return super()._inputs_to_list(inputs, modality_key='points', **kwargs) def _init_pipeline(self, cfg: ConfigType) -> Compose: """Initialize the test pipeline.""" @@ -124,6 +114,7 @@ def visualize(self, wait_time: int = 0, draw_pred: bool = True, pred_score_thr: float = 0.3, + no_save_vis: bool = False, img_out_dir: str = '') -> Union[List[np.ndarray], None]: """Visualize predictions. @@ -146,8 +137,10 @@ def visualize(self, List[np.ndarray] or None: Returns visualization results only if applicable. """ - if self.visualizer is None or (not show and img_out_dir == '' - and not return_vis): + if no_save_vis is True: + img_out_dir = '' + + if not show and img_out_dir == '' and not return_vis: return None if getattr(self, 'visualizer') is None: @@ -168,13 +161,16 @@ def visualize(self, elif isinstance(single_input, np.ndarray): points = single_input.copy() pc_num = str(self.num_visualized_frames).zfill(8) - pc_name = f'pc_{pc_num}.png' + pc_name = f'{pc_num}.png' else: raise ValueError('Unsupported input type: ' f'{type(single_input)}') - o3d_save_path = osp.join(img_out_dir, pc_name) \ - if img_out_dir != '' else None + if img_out_dir != '' and show: + o3d_save_path = osp.join(img_out_dir, 'vis_lidar', pc_name) + mmengine.mkdir_or_exist(osp.dirname(o3d_save_path)) + else: + o3d_save_path = None data_input = dict(points=points) self.visualizer.add_datasample( diff --git a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py index a0ebfe57fc..d4e231808b 100644 --- a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py @@ -44,16 +44,6 @@ class MonoDet3DInferencer(Base3DInferencer): priority is palette -> config -> checkpoint. Defaults to 'none'. """ - preprocess_kwargs: set = set() - forward_kwargs: set = set() - visualize_kwargs: set = { - 'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr', - 'img_out_dir' - } - postprocess_kwargs: set = { - 'print_result', 'pred_out_file', 'return_datasample' - } - def __init__(self, model: Union[ModelType, str, None] = None, weights: Optional[str] = None, @@ -70,7 +60,7 @@ def __init__(self, scope=scope, palette=palette) - def _inputs_to_list(self, inputs: Union[dict, list]) -> list: + def _inputs_to_list(self, inputs: Union[dict, list], **kwargs) -> list: """Preprocess the inputs to a list. Preprocess inputs to a list according to its type: @@ -88,7 +78,7 @@ def _inputs_to_list(self, inputs: Union[dict, list]) -> list: Returns: list: List of input for the :meth:`preprocess`. """ - return super()._inputs_to_list(inputs, modality_key='img') + return super()._inputs_to_list(inputs, modality_key='img', **kwargs) def _init_pipeline(self, cfg: ConfigType) -> Compose: """Initialize the test pipeline.""" @@ -110,7 +100,9 @@ def visualize(self, wait_time: int = 0, draw_pred: bool = True, pred_score_thr: float = 0.3, - img_out_dir: str = '') -> Union[List[np.ndarray], None]: + no_save_vis: bool = False, + img_out_dir: str = '', + cam_type_dir: str = 'CAM2') -> Union[List[np.ndarray], None]: """Visualize predictions. Args: @@ -132,8 +124,10 @@ def visualize(self, List[np.ndarray] or None: Returns visualization results only if applicable. """ - if self.visualizer is None or (not show and img_out_dir == '' - and not return_vis): + if no_save_vis is True: + img_out_dir = '' + + if not show and img_out_dir == '' and not return_vis: return None if getattr(self, 'visualizer') is None: @@ -156,8 +150,8 @@ def visualize(self, raise ValueError('Unsupported input type: ' f"{type(single_input['img'])}") - out_file = osp.join(img_out_dir, img_name) if img_out_dir != '' \ - else None + out_file = osp.join(img_out_dir, 'vis_camera', cam_type_dir, + img_name) if img_out_dir != '' else None data_input = dict(img=img) self.visualizer.add_datasample( diff --git a/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py b/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py index ab02e064b8..7f00dd03ff 100644 --- a/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py @@ -44,16 +44,6 @@ class MultiModalityDet3DInferencer(Base3DInferencer): palette (str): The palette of visualization. Defaults to 'none'. """ - preprocess_kwargs: set = set() - forward_kwargs: set = set() - visualize_kwargs: set = { - 'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr', - 'img_out_dir' - } - postprocess_kwargs: set = { - 'print_result', 'pred_out_file', 'return_datasample' - } - def __init__(self, model: Union[ModelType, str, None] = None, weights: Optional[str] = None, @@ -70,7 +60,7 @@ def __init__(self, scope=scope, palette=palette) - def _inputs_to_list(self, inputs: Union[dict, list]) -> list: + def _inputs_to_list(self, inputs: Union[dict, list], **kwargs) -> list: """Preprocess the inputs to a list. Preprocess inputs to a list according to its type: @@ -88,7 +78,8 @@ def _inputs_to_list(self, inputs: Union[dict, list]) -> list: Returns: list: List of input for the :meth:`preprocess`. """ - return super()._inputs_to_list(inputs, modality_key=['points', 'img']) + return super()._inputs_to_list( + inputs, modality_key=['points', 'img'], **kwargs) def _init_pipeline(self, cfg: ConfigType) -> Compose: """Initialize the test pipeline.""" @@ -144,7 +135,9 @@ def visualize(self, wait_time: int = 0, draw_pred: bool = True, pred_score_thr: float = 0.3, - img_out_dir: str = '') -> Union[List[np.ndarray], None]: + no_save_vis: bool = False, + img_out_dir: str = '', + cam_type_dir: str = 'CAM2') -> Union[List[np.ndarray], None]: """Visualize predictions. Args: @@ -166,8 +159,10 @@ def visualize(self, List[np.ndarray] or None: Returns visualization results only if applicable. """ - if self.visualizer is None or (not show and img_out_dir == '' - and not return_vis): + if no_save_vis is True: + img_out_dir = '' + + if not show and img_out_dir == '' and not return_vis: return None if getattr(self, 'visualizer') is None: @@ -188,13 +183,16 @@ def visualize(self, elif isinstance(points_input, np.ndarray): points = points_input.copy() pc_num = str(self.num_visualized_frames).zfill(8) - pc_name = f'pc_{pc_num}.png' + pc_name = f'{pc_num}.png' else: raise ValueError('Unsupported input type: ' f'{type(points_input)}') - o3d_save_path = osp.join(img_out_dir, pc_name) \ - if img_out_dir != '' else None + if img_out_dir != '' and show: + o3d_save_path = osp.join(img_out_dir, 'vis_lidar', pc_name) + mmengine.mkdir_or_exist(osp.dirname(o3d_save_path)) + else: + o3d_save_path = None img_input = single_input['img'] if isinstance(single_input['img'], str): @@ -210,8 +208,8 @@ def visualize(self, raise ValueError('Unsupported input type: ' f'{type(img_input)}') - out_file = osp.join(img_out_dir, img_name) if img_out_dir != '' \ - else None + out_file = osp.join(img_out_dir, 'vis_camera', cam_type_dir, + img_name) if img_out_dir != '' else None data_input = dict(points=points, img=img) self.visualizer.add_datasample( diff --git a/mmdet3d/datasets/transforms/loading.py b/mmdet3d/datasets/transforms/loading.py index 1dbad93001..383c44536f 100644 --- a/mmdet3d/datasets/transforms/loading.py +++ b/mmdet3d/datasets/transforms/loading.py @@ -1153,7 +1153,6 @@ class MonoDet3DInferencerLoader(BaseTransform): Added keys: - img - - cam2img - box_type_3d - box_mode_3d @@ -1176,32 +1175,19 @@ def transform(self, single_input: dict) -> dict: dict: The dict contains loaded image and meta information. """ box_type_3d, box_mode_3d = get_box_type('camera') - assert 'calib' in single_input and 'img' in single_input, \ - "key 'calib' and 'img' must be in input dict" - if isinstance(single_input['calib'], str): - calib_path = single_input['calib'] - with open(calib_path, 'r') as f: - lines = f.readlines() - cam2img = np.array([ - float(info) for info in lines[0].split(' ')[0:16] - ]).reshape([4, 4]) - elif isinstance(single_input['calib'], np.ndarray): - cam2img = single_input['calib'] - else: - raise ValueError('Unsupported input calib type: ' - f"{type(single_input['calib'])}") if isinstance(single_input['img'], str): inputs = dict( images=dict( CAM_FRONT=dict( - img_path=single_input['img'], cam2img=cam2img)), + img_path=single_input['img'], + cam2img=single_input['cam2img'])), box_mode_3d=box_mode_3d, box_type_3d=box_type_3d) elif isinstance(single_input['img'], np.ndarray): inputs = dict( img=single_input['img'], - cam2img=cam2img, + cam2img=single_input['cam2img'], box_type_3d=box_type_3d, box_mode_3d=box_mode_3d) else: @@ -1252,9 +1238,9 @@ def transform(self, single_input: dict) -> dict: dict: The dict contains loaded image, point cloud and meta information. """ - assert 'points' in single_input and 'img' in single_input and \ - 'calib' in single_input, "key 'points', 'img' and 'calib' must be " - f'in input dict, but got {single_input}' + assert 'points' in single_input and 'img' in single_input, \ + "key 'points', 'img' and must be in input dict," \ + f'but got {single_input}' if isinstance(single_input['points'], str): inputs = dict( lidar_points=dict(lidar_path=single_input['points']), @@ -1283,36 +1269,21 @@ def transform(self, single_input: dict) -> dict: multi_modality_inputs = points_inputs box_type_3d, box_mode_3d = get_box_type('lidar') - if isinstance(single_input['calib'], str): - calib = mmengine.load(single_input['calib']) - - elif isinstance(single_input['calib'], dict): - calib = single_input['calib'] - else: - raise ValueError('Unsupported input calib type: ' - f"{type(single_input['calib'])}") - - cam2img = np.asarray(calib['cam2img'], dtype=np.float32) - lidar2cam = np.asarray(calib['lidar2cam'], dtype=np.float32) - if 'lidar2cam' in calib: - lidar2img = np.asarray(calib['lidar2img'], dtype=np.float32) - else: - lidar2img = cam2img @ lidar2cam if isinstance(single_input['img'], str): inputs = dict( img_path=single_input['img'], - cam2img=cam2img, - lidar2img=lidar2img, - lidar2cam=lidar2cam, + cam2img=single_input['cam2img'], + lidar2img=single_input['lidar2img'], + lidar2cam=single_input['lidar2cam'], box_mode_3d=box_mode_3d, box_type_3d=box_type_3d) elif isinstance(single_input['img'], np.ndarray): inputs = dict( img=single_input['img'], - cam2img=cam2img, - lidar2img=lidar2img, - lidar2cam=lidar2cam, + cam2img=single_input['cam2img'], + lidar2img=single_input['lidar2img'], + lidar2cam=single_input['lidar2cam'], box_type_3d=box_type_3d, box_mode_3d=box_mode_3d) else: diff --git a/mmdet3d/visualization/local_visualizer.py b/mmdet3d/visualization/local_visualizer.py index b99636c084..b8bb2cdd71 100644 --- a/mmdet3d/visualization/local_visualizer.py +++ b/mmdet3d/visualization/local_visualizer.py @@ -1,6 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. import copy -import logging import math import os import sys @@ -157,7 +156,7 @@ def _clear_o3d_vis(self) -> None: if hasattr(self, 'pcd'): del self.pcd - def _initialize_o3d_vis(self) -> Visualizer: + def _initialize_o3d_vis(self, show=True) -> Visualizer: """Initialize open3d vis according to frame_cfg. Args: @@ -178,9 +177,8 @@ def _initialize_o3d_vis(self) -> Visualizer: o3d_vis.register_key_action_callback(glfw_key_space, self.space_action_callback) o3d_vis.register_key_callback(glfw_key_right, self.right_callback) - if os.environ.get('DISPLAY', None) is not None: + if os.environ.get('DISPLAY', None) is not None and show: o3d_vis.create_window() - print_log('No display device found', level=logging.WARNING) self.view_control = o3d_vis.get_view_control() return o3d_vis @@ -863,6 +861,9 @@ def show(self, self.view_port) self.flag_exit = not self.o3d_vis.poll_events() self.o3d_vis.update_renderer() + # if not hasattr(self, 'view_control'): + # self.o3d_vis.create_window() + # self.view_control = self.o3d_vis.get_view_control() self.view_port = \ self.view_control.convert_to_pinhole_camera_parameters() # noqa: E501 if wait_time != -1: @@ -980,7 +981,7 @@ def add_datasample(self, # For object detection datasets, no palette is saved palette = self.dataset_meta.get('palette', None) ignore_index = self.dataset_meta.get('ignore_index', None) - if ignore_index is not None and 'gt_pts_seg' in data_sample and vis_task == 'lidar_seg': # noqa: E501 + if vis_task == 'lidar_seg' and ignore_index is not None and 'pts_semantic_mask' in data_sample.gt_pts_seg: # noqa: E501 keep_index = data_sample.gt_pts_seg.pts_semantic_mask != ignore_index # noqa: E501 else: keep_index = None @@ -990,6 +991,9 @@ def add_datasample(self, gt_img_data = None pred_img_data = None + if not hasattr(self, 'o3d_vis'): + self.o3d_vis = self._initialize_o3d_vis(show=show) + if draw_gt and data_sample is not None: if 'gt_instances_3d' in data_sample: gt_data_3d = self._draw_instances_3d( @@ -1087,6 +1091,7 @@ def add_datasample(self, if drawn_img_3d is not None: mmcv.imwrite(drawn_img_3d[..., ::-1], out_file) if drawn_img is not None: - mmcv.imwrite(drawn_img[..., ::-1], out_file) + mmcv.imwrite(drawn_img[..., ::-1], + out_file[:-4] + '_2d' + out_file[-4:]) else: self.add_image(name, drawn_img_3d, step) From 510f50e119d622510fc78cad3126afabf7496aab Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Wed, 11 Oct 2023 15:21:50 +0800 Subject: [PATCH 03/11] fix bevfusion demo --- projects/BEVFusion/README.md | 2 +- projects/BEVFusion/demo/multi_modalit_demo.py | 78 +++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 projects/BEVFusion/demo/multi_modalit_demo.py diff --git a/projects/BEVFusion/README.md b/projects/BEVFusion/README.md index 0828bd4f8d..9d5ebd4c52 100644 --- a/projects/BEVFusion/README.md +++ b/projects/BEVFusion/README.md @@ -34,7 +34,7 @@ python projects/BEVFusion/setup.py develop Run a demo on NuScenes data using [BEVFusion model](https://drive.google.com/file/d/1QkvbYDk4G2d6SZoeJqish13qSyXA4lp3/view?usp=share_link): ```shell -python demo/multi_modality_demo.py demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__LIDAR_TOP__1532402927647951.pcd.bin demo/data/nuscenes/ demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ${CHECKPOINT_FILE} --cam-type all --score-thr 0.2 --show +python projects/BEVFusion/demo/multi_modality_demo.py demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__LIDAR_TOP__1532402927647951.pcd.bin demo/data/nuscenes/ demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ${CHECKPOINT_FILE} --cam-type all --score-thr 0.2 --show ``` ### Training commands diff --git a/projects/BEVFusion/demo/multi_modalit_demo.py b/projects/BEVFusion/demo/multi_modalit_demo.py new file mode 100644 index 0000000000..9f82867d40 --- /dev/null +++ b/projects/BEVFusion/demo/multi_modalit_demo.py @@ -0,0 +1,78 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from argparse import ArgumentParser + +import mmcv + +from mmdet3d.apis import inference_multi_modality_detector, init_model +from mmdet3d.registry import VISUALIZERS + + +def parse_args(): + parser = ArgumentParser() + parser.add_argument('pcd', help='Point cloud file') + parser.add_argument('img', help='image file') + parser.add_argument('ann', help='ann file') + parser.add_argument('config', help='Config file') + parser.add_argument('checkpoint', help='Checkpoint file') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + parser.add_argument( + '--cam-type', + type=str, + default='CAM_FRONT', + help='choose camera type to inference') + parser.add_argument( + '--score-thr', type=float, default=0.0, help='bbox score threshold') + parser.add_argument( + '--out-dir', type=str, default='demo', help='dir to save results') + parser.add_argument( + '--show', + action='store_true', + help='show online visualization results') + parser.add_argument( + '--snapshot', + action='store_true', + help='whether to save online visualization results') + args = parser.parse_args() + return args + + +def main(args): + # build the model from a config file and a checkpoint file + model = init_model(args.config, args.checkpoint, device=args.device) + + # init visualizer + visualizer = VISUALIZERS.build(model.cfg.visualizer) + visualizer.dataset_meta = model.dataset_meta + + # test a single image and point cloud sample + result, data = inference_multi_modality_detector(model, args.pcd, args.img, + args.ann, args.cam_type) + points = data['inputs']['points'] + if isinstance(result.img_path, list): + img = [] + for img_path in result.img_path: + single_img = mmcv.imread(img_path) + single_img = mmcv.imconvert(single_img, 'bgr', 'rgb') + img.append(single_img) + else: + img = mmcv.imread(result.img_path) + img = mmcv.imconvert(img, 'bgr', 'rgb') + data_input = dict(points=points, img=img) + + # show the results + visualizer.add_datasample( + 'result', + data_input, + data_sample=result, + draw_gt=False, + show=args.show, + wait_time=-1, + out_file=args.out_dir, + pred_score_thr=args.score_thr, + vis_task='multi-modality_det') + + +if __name__ == '__main__': + args = parse_args() + main(args) From b72da9b1a6b2698d7bebea1a7d8a635d76987e34 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Mon, 16 Oct 2023 11:42:23 +0800 Subject: [PATCH 04/11] rename --- demo/mono_det_demo.py | 106 +++++++++++++++++----------- demo/multi_modality_demo.py | 119 +++++++++++++++++++------------- demo/new_mono_det_demo.py | 98 -------------------------- demo/new_multi_modality_demo.py | 101 --------------------------- demo/new_pcd_demo.py | 90 ------------------------ demo/new_pcd_seg_demo.py | 85 ----------------------- demo/pcd_demo.py | 104 ++++++++++++++++++---------- demo/pcd_seg_demo.py | 102 +++++++++++++++++---------- 8 files changed, 271 insertions(+), 534 deletions(-) delete mode 100644 demo/new_mono_det_demo.py delete mode 100644 demo/new_multi_modality_demo.py delete mode 100644 demo/new_pcd_demo.py delete mode 100644 demo/new_pcd_seg_demo.py diff --git a/demo/mono_det_demo.py b/demo/mono_det_demo.py index 0096a649e5..08ba916a8f 100644 --- a/demo/mono_det_demo.py +++ b/demo/mono_det_demo.py @@ -1,18 +1,19 @@ # Copyright (c) OpenMMLab. All rights reserved. +import logging +import os from argparse import ArgumentParser -import mmcv +from mmengine.logging import print_log -from mmdet3d.apis import inference_mono_3d_detector, init_model -from mmdet3d.registry import VISUALIZERS +from mmdet3d.apis import MonoDet3DInferencer def parse_args(): parser = ArgumentParser() - parser.add_argument('img', help='image file') - parser.add_argument('ann', help='ann file') - parser.add_argument('config', help='Config file') - parser.add_argument('checkpoint', help='Checkpoint file') + parser.add_argument('img', help='Image file') + parser.add_argument('infos', help='Infos file with annotations') + parser.add_argument('model', help='Config file') + parser.add_argument('weights', help='Checkpoint file') parser.add_argument( '--device', default='cuda:0', help='Device used for inference') parser.add_argument( @@ -21,50 +22,77 @@ def parse_args(): default='CAM_BACK', help='choose camera type to inference') parser.add_argument( - '--score-thr', type=float, default=0.30, help='bbox score threshold') + '--pred-score-thr', + type=float, + default=0.3, + help='bbox score threshold') parser.add_argument( - '--out-dir', type=str, default='demo', help='dir to save results') + '--out-dir', + type=str, + default='outputs', + help='Output directory of prediction results.') parser.add_argument( '--show', action='store_true', - help='show online visualization results') + help='Show online visualization results') + parser.add_argument( + '--wait_time', + type=float, + default=-1, + help='The interval of show (s). Demo will be blocked in showing' + 'results, if wait_time is -1. Defaults to -1.') parser.add_argument( - '--snapshot', + '--no-save-vis', action='store_true', - help='whether to save online visualization results') - args = parser.parse_args() - return args + help='Do not save detection vis results') + parser.add_argument( + '--no-save-pred', + action='store_true', + help='Do not save detection json results') + parser.add_argument( + '--print-result', + action='store_true', + help='Whether to print the results.') + call_args = vars(parser.parse_args()) + + call_args['inputs'] = dict( + img=call_args.pop('img'), infos=call_args.pop('infos')) + call_args.pop('cam_type') + + if call_args['no_save_vis'] and call_args['no_save_pred']: + call_args['out_dir'] = '' + + init_kws = ['model', 'weights', 'device'] + init_args = {} + for init_kw in init_kws: + init_args[init_kw] = call_args.pop(init_kw) + # NOTE: If your operating environment does not have a display device, + # (e.g. a remote server), you can save the predictions and visualize + # them in local devices. + if os.environ.get('DISPLAY') is None and call_args['show']: + print_log( + 'Display device not found. `--show` is forced to False', + logger='current', + level=logging.WARNING) + call_args['show'] = False -def main(args): - # build the model from a config file and a checkpoint file - model = init_model(args.config, args.checkpoint, device=args.device) + return init_args, call_args - # init visualizer - visualizer = VISUALIZERS.build(model.cfg.visualizer) - visualizer.dataset_meta = model.dataset_meta - # test a single image - result = inference_mono_3d_detector(model, args.img, args.ann, - args.cam_type) +def main(): + # TODO: Support inference of point cloud numpy file. + init_args, call_args = parse_args() - img = mmcv.imread(args.img) - img = mmcv.imconvert(img, 'bgr', 'rgb') + inferencer = MonoDet3DInferencer(**init_args) + inferencer(**call_args) - data_input = dict(img=img) - # show the results - visualizer.add_datasample( - 'result', - data_input, - data_sample=result, - draw_gt=False, - show=args.show, - wait_time=-1, - out_file=args.out_dir, - pred_score_thr=args.score_thr, - vis_task='mono_det') + if call_args['out_dir'] != '' and not (call_args['no_save_vis'] + and call_args['no_save_pred']): + print_log( + f'results have been saved at {call_args["out_dir"]}', + logger='current') if __name__ == '__main__': - args = parse_args() - main(args) + main() diff --git a/demo/multi_modality_demo.py b/demo/multi_modality_demo.py index 9f82867d40..3b4605aeeb 100644 --- a/demo/multi_modality_demo.py +++ b/demo/multi_modality_demo.py @@ -1,78 +1,101 @@ # Copyright (c) OpenMMLab. All rights reserved. +import logging +import os from argparse import ArgumentParser -import mmcv +from mmengine.logging import print_log -from mmdet3d.apis import inference_multi_modality_detector, init_model -from mmdet3d.registry import VISUALIZERS +from mmdet3d.apis import MultiModalityDet3DInferencer def parse_args(): parser = ArgumentParser() parser.add_argument('pcd', help='Point cloud file') - parser.add_argument('img', help='image file') - parser.add_argument('ann', help='ann file') - parser.add_argument('config', help='Config file') - parser.add_argument('checkpoint', help='Checkpoint file') + parser.add_argument('img', help='Image file') + parser.add_argument('infos', help='Infos file with annotations') + parser.add_argument('model', help='Config file') + parser.add_argument('weights', help='Checkpoint file') parser.add_argument( '--device', default='cuda:0', help='Device used for inference') parser.add_argument( '--cam-type', type=str, - default='CAM_FRONT', + default='CAM_BACK', help='choose camera type to inference') parser.add_argument( - '--score-thr', type=float, default=0.0, help='bbox score threshold') + '--pred-score-thr', + type=float, + default=0.3, + help='bbox score threshold') parser.add_argument( - '--out-dir', type=str, default='demo', help='dir to save results') + '--out-dir', + type=str, + default='outputs', + help='Output directory of prediction results.') parser.add_argument( '--show', action='store_true', - help='show online visualization results') + help='Show online visualization results') + parser.add_argument( + '--wait_time', + type=float, + default=-1, + help='The interval of show (s). Demo will be blocked in showing' + 'results, if wait_time is -1. Defaults to -1.') + parser.add_argument( + '--no-save-vis', + action='store_true', + help='Do not save detection vis results') + parser.add_argument( + '--no-save-pred', + action='store_true', + help='Do not save detection json results') parser.add_argument( - '--snapshot', + '--print-result', action='store_true', - help='whether to save online visualization results') - args = parser.parse_args() - return args + help='Whether to print the results.') + call_args = vars(parser.parse_args()) + + call_args['inputs'] = dict( + points=call_args.pop('pcd'), + img=call_args.pop('img'), + infos=call_args.pop('infos')) + call_args.pop('cam_type') + + if call_args['no_save_vis'] and call_args['no_save_pred']: + call_args['out_dir'] = '' + + init_kws = ['model', 'weights', 'device'] + init_args = {} + for init_kw in init_kws: + init_args[init_kw] = call_args.pop(init_kw) + + # NOTE: If your operating environment does not have a display device, + # (e.g. a remote server), you can save the predictions and visualize + # them in local devices. + if os.environ.get('DISPLAY') is None and call_args['show']: + print_log( + 'Display device not found. `--show` is forced to False', + logger='current', + level=logging.WARNING) + call_args['show'] = False + return init_args, call_args -def main(args): - # build the model from a config file and a checkpoint file - model = init_model(args.config, args.checkpoint, device=args.device) - # init visualizer - visualizer = VISUALIZERS.build(model.cfg.visualizer) - visualizer.dataset_meta = model.dataset_meta +def main(): + # TODO: Support inference of point cloud numpy file. + init_args, call_args = parse_args() - # test a single image and point cloud sample - result, data = inference_multi_modality_detector(model, args.pcd, args.img, - args.ann, args.cam_type) - points = data['inputs']['points'] - if isinstance(result.img_path, list): - img = [] - for img_path in result.img_path: - single_img = mmcv.imread(img_path) - single_img = mmcv.imconvert(single_img, 'bgr', 'rgb') - img.append(single_img) - else: - img = mmcv.imread(result.img_path) - img = mmcv.imconvert(img, 'bgr', 'rgb') - data_input = dict(points=points, img=img) + inferencer = MultiModalityDet3DInferencer(**init_args) + inferencer(**call_args) - # show the results - visualizer.add_datasample( - 'result', - data_input, - data_sample=result, - draw_gt=False, - show=args.show, - wait_time=-1, - out_file=args.out_dir, - pred_score_thr=args.score_thr, - vis_task='multi-modality_det') + if call_args['out_dir'] != '' and not (call_args['no_save_vis'] + and call_args['no_save_pred']): + print_log( + f'results have been saved at {call_args["out_dir"]}', + logger='current') if __name__ == '__main__': - args = parse_args() - main(args) + main() diff --git a/demo/new_mono_det_demo.py b/demo/new_mono_det_demo.py deleted file mode 100644 index 08ba916a8f..0000000000 --- a/demo/new_mono_det_demo.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import logging -import os -from argparse import ArgumentParser - -from mmengine.logging import print_log - -from mmdet3d.apis import MonoDet3DInferencer - - -def parse_args(): - parser = ArgumentParser() - parser.add_argument('img', help='Image file') - parser.add_argument('infos', help='Infos file with annotations') - parser.add_argument('model', help='Config file') - parser.add_argument('weights', help='Checkpoint file') - parser.add_argument( - '--device', default='cuda:0', help='Device used for inference') - parser.add_argument( - '--cam-type', - type=str, - default='CAM_BACK', - help='choose camera type to inference') - parser.add_argument( - '--pred-score-thr', - type=float, - default=0.3, - help='bbox score threshold') - parser.add_argument( - '--out-dir', - type=str, - default='outputs', - help='Output directory of prediction results.') - parser.add_argument( - '--show', - action='store_true', - help='Show online visualization results') - parser.add_argument( - '--wait_time', - type=float, - default=-1, - help='The interval of show (s). Demo will be blocked in showing' - 'results, if wait_time is -1. Defaults to -1.') - parser.add_argument( - '--no-save-vis', - action='store_true', - help='Do not save detection vis results') - parser.add_argument( - '--no-save-pred', - action='store_true', - help='Do not save detection json results') - parser.add_argument( - '--print-result', - action='store_true', - help='Whether to print the results.') - call_args = vars(parser.parse_args()) - - call_args['inputs'] = dict( - img=call_args.pop('img'), infos=call_args.pop('infos')) - call_args.pop('cam_type') - - if call_args['no_save_vis'] and call_args['no_save_pred']: - call_args['out_dir'] = '' - - init_kws = ['model', 'weights', 'device'] - init_args = {} - for init_kw in init_kws: - init_args[init_kw] = call_args.pop(init_kw) - - # NOTE: If your operating environment does not have a display device, - # (e.g. a remote server), you can save the predictions and visualize - # them in local devices. - if os.environ.get('DISPLAY') is None and call_args['show']: - print_log( - 'Display device not found. `--show` is forced to False', - logger='current', - level=logging.WARNING) - call_args['show'] = False - - return init_args, call_args - - -def main(): - # TODO: Support inference of point cloud numpy file. - init_args, call_args = parse_args() - - inferencer = MonoDet3DInferencer(**init_args) - inferencer(**call_args) - - if call_args['out_dir'] != '' and not (call_args['no_save_vis'] - and call_args['no_save_pred']): - print_log( - f'results have been saved at {call_args["out_dir"]}', - logger='current') - - -if __name__ == '__main__': - main() diff --git a/demo/new_multi_modality_demo.py b/demo/new_multi_modality_demo.py deleted file mode 100644 index 3b4605aeeb..0000000000 --- a/demo/new_multi_modality_demo.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import logging -import os -from argparse import ArgumentParser - -from mmengine.logging import print_log - -from mmdet3d.apis import MultiModalityDet3DInferencer - - -def parse_args(): - parser = ArgumentParser() - parser.add_argument('pcd', help='Point cloud file') - parser.add_argument('img', help='Image file') - parser.add_argument('infos', help='Infos file with annotations') - parser.add_argument('model', help='Config file') - parser.add_argument('weights', help='Checkpoint file') - parser.add_argument( - '--device', default='cuda:0', help='Device used for inference') - parser.add_argument( - '--cam-type', - type=str, - default='CAM_BACK', - help='choose camera type to inference') - parser.add_argument( - '--pred-score-thr', - type=float, - default=0.3, - help='bbox score threshold') - parser.add_argument( - '--out-dir', - type=str, - default='outputs', - help='Output directory of prediction results.') - parser.add_argument( - '--show', - action='store_true', - help='Show online visualization results') - parser.add_argument( - '--wait_time', - type=float, - default=-1, - help='The interval of show (s). Demo will be blocked in showing' - 'results, if wait_time is -1. Defaults to -1.') - parser.add_argument( - '--no-save-vis', - action='store_true', - help='Do not save detection vis results') - parser.add_argument( - '--no-save-pred', - action='store_true', - help='Do not save detection json results') - parser.add_argument( - '--print-result', - action='store_true', - help='Whether to print the results.') - call_args = vars(parser.parse_args()) - - call_args['inputs'] = dict( - points=call_args.pop('pcd'), - img=call_args.pop('img'), - infos=call_args.pop('infos')) - call_args.pop('cam_type') - - if call_args['no_save_vis'] and call_args['no_save_pred']: - call_args['out_dir'] = '' - - init_kws = ['model', 'weights', 'device'] - init_args = {} - for init_kw in init_kws: - init_args[init_kw] = call_args.pop(init_kw) - - # NOTE: If your operating environment does not have a display device, - # (e.g. a remote server), you can save the predictions and visualize - # them in local devices. - if os.environ.get('DISPLAY') is None and call_args['show']: - print_log( - 'Display device not found. `--show` is forced to False', - logger='current', - level=logging.WARNING) - call_args['show'] = False - - return init_args, call_args - - -def main(): - # TODO: Support inference of point cloud numpy file. - init_args, call_args = parse_args() - - inferencer = MultiModalityDet3DInferencer(**init_args) - inferencer(**call_args) - - if call_args['out_dir'] != '' and not (call_args['no_save_vis'] - and call_args['no_save_pred']): - print_log( - f'results have been saved at {call_args["out_dir"]}', - logger='current') - - -if __name__ == '__main__': - main() diff --git a/demo/new_pcd_demo.py b/demo/new_pcd_demo.py deleted file mode 100644 index d7d850bc12..0000000000 --- a/demo/new_pcd_demo.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import logging -import os -from argparse import ArgumentParser - -from mmengine.logging import print_log - -from mmdet3d.apis import LidarDet3DInferencer - - -def parse_args(): - parser = ArgumentParser() - parser.add_argument('pcd', help='Point cloud file') - parser.add_argument('model', help='Config file') - parser.add_argument('weights', help='Checkpoint file') - parser.add_argument( - '--device', default='cuda:0', help='Device used for inference') - parser.add_argument( - '--pred-score-thr', - type=float, - default=0.3, - help='bbox score threshold') - parser.add_argument( - '--out-dir', - type=str, - default='outputs', - help='Output directory of prediction results.') - parser.add_argument( - '--show', - action='store_true', - help='Show online visualization results') - parser.add_argument( - '--wait_time', - type=float, - default=-1, - help='The interval of show (s). Demo will be blocked in showing' - 'results, if wait_time is -1. Defaults to -1.') - parser.add_argument( - '--no-save-vis', - action='store_true', - help='Do not save detection vis results') - parser.add_argument( - '--no-save-pred', - action='store_true', - help='Do not save detection json results') - parser.add_argument( - '--print-result', - action='store_true', - help='Whether to print the results.') - call_args = vars(parser.parse_args()) - - call_args['inputs'] = dict(points=call_args.pop('pcd')) - - if call_args['no_save_vis'] and call_args['no_save_pred']: - call_args['out_dir'] = '' - - init_kws = ['model', 'weights', 'device'] - init_args = {} - for init_kw in init_kws: - init_args[init_kw] = call_args.pop(init_kw) - - # NOTE: If your operating environment does not have a display device, - # (e.g. a remote server), you can save the predictions and visualize - # them in local devices. - if os.environ.get('DISPLAY') is None and call_args['show']: - print_log( - 'Display device not found. `--show` is forced to False', - logger='current', - level=logging.WARNING) - call_args['show'] = False - - return init_args, call_args - - -def main(): - # TODO: Support inference of point cloud numpy file. - init_args, call_args = parse_args() - - inferencer = LidarDet3DInferencer(**init_args) - inferencer(**call_args) - - if call_args['out_dir'] != '' and not (call_args['no_save_vis'] - and call_args['no_save_pred']): - print_log( - f'results have been saved at {call_args["out_dir"]}', - logger='current') - - -if __name__ == '__main__': - main() diff --git a/demo/new_pcd_seg_demo.py b/demo/new_pcd_seg_demo.py deleted file mode 100644 index bc6697e165..0000000000 --- a/demo/new_pcd_seg_demo.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import logging -import os -from argparse import ArgumentParser - -from mmengine.logging import print_log - -from mmdet3d.apis import LidarSeg3DInferencer - - -def parse_args(): - parser = ArgumentParser() - parser.add_argument('pcd', help='Point cloud file') - parser.add_argument('model', help='Config file') - parser.add_argument('weights', help='Checkpoint file') - parser.add_argument( - '--device', default='cuda:0', help='Device used for inference') - parser.add_argument( - '--out-dir', - type=str, - default='outputs', - help='Output directory of prediction results.') - parser.add_argument( - '--show', - action='store_true', - help='Show online visualization results') - parser.add_argument( - '--wait_time', - type=float, - default=-1, - help='The interval of show (s). Demo will be blocked in showing' - 'results, if wait_time is -1. Defaults to -1.') - parser.add_argument( - '--no-save-vis', - action='store_true', - help='Do not save detection vis results') - parser.add_argument( - '--no-save-pred', - action='store_true', - help='Do not save detection json results') - parser.add_argument( - '--print-result', - action='store_true', - help='Whether to print the results.') - call_args = vars(parser.parse_args()) - - call_args['inputs'] = dict(points=call_args.pop('pcd')) - - if call_args['no_save_vis'] and call_args['no_save_pred']: - call_args['out_dir'] = '' - - init_kws = ['model', 'weights', 'device'] - init_args = {} - for init_kw in init_kws: - init_args[init_kw] = call_args.pop(init_kw) - - # NOTE: If your operating environment does not have a display device, - # (e.g. a remote server), you can save the predictions and visualize - # them in local devices. - if os.environ.get('DISPLAY') is None and call_args['show']: - print_log( - 'Display device not found. `--show` is forced to False', - logger='current', - level=logging.WARNING) - call_args['show'] = False - - return init_args, call_args - - -def main(): - # TODO: Support inference of point cloud numpy file. - init_args, call_args = parse_args() - - inferencer = LidarSeg3DInferencer(**init_args) - inferencer(**call_args) - - if call_args['out_dir'] != '' and not (call_args['no_save_vis'] - and call_args['no_save_pred']): - print_log( - f'results have been saved at {call_args["out_dir"]}', - logger='current') - - -if __name__ == '__main__': - main() diff --git a/demo/pcd_demo.py b/demo/pcd_demo.py index 6c9266d4e0..d7d850bc12 100644 --- a/demo/pcd_demo.py +++ b/demo/pcd_demo.py @@ -1,60 +1,90 @@ # Copyright (c) OpenMMLab. All rights reserved. +import logging +import os from argparse import ArgumentParser -from mmdet3d.apis import inference_detector, init_model -from mmdet3d.registry import VISUALIZERS +from mmengine.logging import print_log + +from mmdet3d.apis import LidarDet3DInferencer def parse_args(): parser = ArgumentParser() parser.add_argument('pcd', help='Point cloud file') - parser.add_argument('config', help='Config file') - parser.add_argument('checkpoint', help='Checkpoint file') + parser.add_argument('model', help='Config file') + parser.add_argument('weights', help='Checkpoint file') parser.add_argument( '--device', default='cuda:0', help='Device used for inference') parser.add_argument( - '--score-thr', type=float, default=0.0, help='bbox score threshold') + '--pred-score-thr', + type=float, + default=0.3, + help='bbox score threshold') parser.add_argument( - '--out-dir', type=str, default='demo', help='dir to save results') + '--out-dir', + type=str, + default='outputs', + help='Output directory of prediction results.') parser.add_argument( '--show', action='store_true', - help='show online visualization results') + help='Show online visualization results') + parser.add_argument( + '--wait_time', + type=float, + default=-1, + help='The interval of show (s). Demo will be blocked in showing' + 'results, if wait_time is -1. Defaults to -1.') + parser.add_argument( + '--no-save-vis', + action='store_true', + help='Do not save detection vis results') parser.add_argument( - '--snapshot', + '--no-save-pred', action='store_true', - help='whether to save online visualization results') - args = parser.parse_args() - return args + help='Do not save detection json results') + parser.add_argument( + '--print-result', + action='store_true', + help='Whether to print the results.') + call_args = vars(parser.parse_args()) + + call_args['inputs'] = dict(points=call_args.pop('pcd')) + if call_args['no_save_vis'] and call_args['no_save_pred']: + call_args['out_dir'] = '' -def main(args): + init_kws = ['model', 'weights', 'device'] + init_args = {} + for init_kw in init_kws: + init_args[init_kw] = call_args.pop(init_kw) + + # NOTE: If your operating environment does not have a display device, + # (e.g. a remote server), you can save the predictions and visualize + # them in local devices. + if os.environ.get('DISPLAY') is None and call_args['show']: + print_log( + 'Display device not found. `--show` is forced to False', + logger='current', + level=logging.WARNING) + call_args['show'] = False + + return init_args, call_args + + +def main(): # TODO: Support inference of point cloud numpy file. - # build the model from a config file and a checkpoint file - model = init_model(args.config, args.checkpoint, device=args.device) - - # init visualizer - visualizer = VISUALIZERS.build(model.cfg.visualizer) - visualizer.dataset_meta = model.dataset_meta - - # test a single point cloud sample - result, data = inference_detector(model, args.pcd) - points = data['inputs']['points'] - data_input = dict(points=points) - - # show the results - visualizer.add_datasample( - 'result', - data_input, - data_sample=result, - draw_gt=False, - show=args.show, - wait_time=-1, - out_file=args.out_dir, - pred_score_thr=args.score_thr, - vis_task='lidar_det') + init_args, call_args = parse_args() + + inferencer = LidarDet3DInferencer(**init_args) + inferencer(**call_args) + + if call_args['out_dir'] != '' and not (call_args['no_save_vis'] + and call_args['no_save_pred']): + print_log( + f'results have been saved at {call_args["out_dir"]}', + logger='current') if __name__ == '__main__': - args = parse_args() - main(args) + main() diff --git a/demo/pcd_seg_demo.py b/demo/pcd_seg_demo.py index 2f1438604b..bc6697e165 100644 --- a/demo/pcd_seg_demo.py +++ b/demo/pcd_seg_demo.py @@ -1,55 +1,85 @@ # Copyright (c) OpenMMLab. All rights reserved. +import logging +import os from argparse import ArgumentParser -from mmdet3d.apis import inference_segmentor, init_model -from mmdet3d.registry import VISUALIZERS +from mmengine.logging import print_log + +from mmdet3d.apis import LidarSeg3DInferencer def parse_args(): parser = ArgumentParser() parser.add_argument('pcd', help='Point cloud file') - parser.add_argument('config', help='Config file') - parser.add_argument('checkpoint', help='Checkpoint file') + parser.add_argument('model', help='Config file') + parser.add_argument('weights', help='Checkpoint file') parser.add_argument( '--device', default='cuda:0', help='Device used for inference') parser.add_argument( - '--out-dir', type=str, default='demo', help='dir to save results') + '--out-dir', + type=str, + default='outputs', + help='Output directory of prediction results.') parser.add_argument( '--show', action='store_true', - help='show online visualization results') + help='Show online visualization results') + parser.add_argument( + '--wait_time', + type=float, + default=-1, + help='The interval of show (s). Demo will be blocked in showing' + 'results, if wait_time is -1. Defaults to -1.') + parser.add_argument( + '--no-save-vis', + action='store_true', + help='Do not save detection vis results') parser.add_argument( - '--snapshot', + '--no-save-pred', action='store_true', - help='whether to save online visualization results') - args = parser.parse_args() - return args - - -def main(args): - # build the model from a config file and a checkpoint file - model = init_model(args.config, args.checkpoint, device=args.device) - - # init visualizer - visualizer = VISUALIZERS.build(model.cfg.visualizer) - visualizer.dataset_meta = model.dataset_meta - - # test a single point cloud sample - result, data = inference_segmentor(model, args.pcd) - points = data['inputs']['points'] - data_input = dict(points=points) - # show the results - visualizer.add_datasample( - 'result', - data_input, - data_sample=result, - draw_gt=False, - show=args.show, - wait_time=-1, - out_file=args.out_dir, - vis_task='lidar_seg') + help='Do not save detection json results') + parser.add_argument( + '--print-result', + action='store_true', + help='Whether to print the results.') + call_args = vars(parser.parse_args()) + + call_args['inputs'] = dict(points=call_args.pop('pcd')) + + if call_args['no_save_vis'] and call_args['no_save_pred']: + call_args['out_dir'] = '' + + init_kws = ['model', 'weights', 'device'] + init_args = {} + for init_kw in init_kws: + init_args[init_kw] = call_args.pop(init_kw) + + # NOTE: If your operating environment does not have a display device, + # (e.g. a remote server), you can save the predictions and visualize + # them in local devices. + if os.environ.get('DISPLAY') is None and call_args['show']: + print_log( + 'Display device not found. `--show` is forced to False', + logger='current', + level=logging.WARNING) + call_args['show'] = False + + return init_args, call_args + + +def main(): + # TODO: Support inference of point cloud numpy file. + init_args, call_args = parse_args() + + inferencer = LidarSeg3DInferencer(**init_args) + inferencer(**call_args) + + if call_args['out_dir'] != '' and not (call_args['no_save_vis'] + and call_args['no_save_pred']): + print_log( + f'results have been saved at {call_args["out_dir"]}', + logger='current') if __name__ == '__main__': - args = parse_args() - main(args) + main() From 53515b949247dd086d77cafd6309cf6596f10293 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Mon, 16 Oct 2023 11:54:00 +0800 Subject: [PATCH 05/11] fix det version --- mmdet3d/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mmdet3d/__init__.py b/mmdet3d/__init__.py index 3fb8ca0e2b..9584e57741 100644 --- a/mmdet3d/__init__.py +++ b/mmdet3d/__init__.py @@ -14,8 +14,8 @@ mmengine_maximum_version = '1.0.0' mmengine_version = digit_version(mmengine.__version__) -mmdet_minimum_version = '3.0.0' -mmdet_maximum_version = '3.2.0' +mmdet_minimum_version = '3.0.0rc5' +mmdet_maximum_version = '3.3.0' mmdet_version = digit_version(mmdet.__version__) assert (mmcv_version >= digit_version(mmcv_minimum_version) From 114059e3000fff0d4151ef97d8a41c36bf524406 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Mon, 16 Oct 2023 17:12:22 +0800 Subject: [PATCH 06/11] fix ut and fix input_to_list --- .../apis/inferencers/base_3d_inferencer.py | 78 ---------------- .../inferencers/lidar_det3d_inferencer.py | 19 +++- .../inferencers/lidar_seg3d_inferencer.py | 19 +++- .../apis/inferencers/mono_det3d_inferencer.py | 81 ++++++++++++++++- .../multi_modality_det3d_inferencer.py | 89 ++++++++++++++++++- .../test_lidar_det3d_inferencer.py | 14 ++- .../test_lidar_seg3d_inferencer.py | 14 ++- .../test_mono_det3d_inferencer.py | 50 +++++------ .../test_multi_modality_det3d_inferencer.py | 46 +++++----- 9 files changed, 259 insertions(+), 151 deletions(-) diff --git a/mmdet3d/apis/inferencers/base_3d_inferencer.py b/mmdet3d/apis/inferencers/base_3d_inferencer.py index 8996bb0bab..d8a39fb3fc 100644 --- a/mmdet3d/apis/inferencers/base_3d_inferencer.py +++ b/mmdet3d/apis/inferencers/base_3d_inferencer.py @@ -4,12 +4,9 @@ from copy import deepcopy from typing import Dict, List, Optional, Sequence, Tuple, Union -import mmengine import numpy as np import torch.nn as nn from mmengine import dump, print_log -from mmengine.fileio import (get_file_backend, isdir, join_path, - list_dir_or_file) from mmengine.infer.infer import BaseInferencer, ModelType from mmengine.model.utils import revert_sync_batchnorm from mmengine.registry import init_default_scope @@ -132,81 +129,6 @@ def _init_model( model.eval() return model - def _inputs_to_list(self, - inputs: Union[dict, list], - modality_key: Union[str, List[str]] = 'points', - cam_type='CAM2') -> list: - """Preprocess the inputs to a list. - - Preprocess inputs to a list according to its type: - - - list or tuple: return inputs - - dict: the value of key 'points'/`img` is - - Directory path: return all files in the directory - - other cases: return a list containing the string. The string - could be a path to file, a url or other types of string according - to the task. - - Args: - inputs (Union[dict, list]): Inputs for the inferencer. - modality_key (Union[str, List[str]]): The key of the modality. - Defaults to 'points'. - - Returns: - list: List of input for the :meth:`preprocess`. - """ - if isinstance(modality_key, str): - modality_key = [modality_key] - assert set(modality_key).issubset({'points', 'img'}) - - if 'infos' in inputs: - infos = inputs.pop('infos') - else: - infos = None - - for key in modality_key: - if isinstance(inputs, dict) and isinstance(inputs[key], str): - img = inputs[key] - backend = get_file_backend(img) - if hasattr(backend, 'isdir') and isdir(img): - # Backends like HttpsBackend do not implement `isdir`, so - # only those backends that implement `isdir` could accept - # the inputs as a directory - filename_list = list_dir_or_file(img, list_dir=False) - inputs = [{ - f'{key}': join_path(img, filename) - } for filename in filename_list] - - if not isinstance(inputs, (list, tuple)): - inputs = [inputs] - - # get cam2img, lidar2cam and lidar2img from infos - if infos is not None: - info_list = mmengine.load(infos)['data_list'] - assert len(info_list) == len(inputs) - for index, input in enumerate(inputs): - data_info = info_list[index] - img_path = data_info['images'][cam_type]['img_path'] - if osp.basename(img_path) != osp.basename(input['img']): - raise ValueError( - f'the info file of {img_path} is not provided.') - cam2img = np.asarray( - data_info['images'][cam_type]['cam2img'], dtype=np.float32) - lidar2cam = np.asarray( - data_info['images'][cam_type]['lidar2cam'], - dtype=np.float32) - if 'lidar2img' in data_info['images'][cam_type]: - lidar2img = np.asarray( - data_info['images'][cam_type]['lidar2img'], - dtype=np.float32) - else: - lidar2img = cam2img @ lidar2cam - input['cam2img'] = cam2img - input['lidar2cam'] = lidar2cam - input['lidar2img'] = lidar2img - - return list(inputs) - def _get_transform_idx(self, pipeline_cfg: ConfigType, name: str) -> int: """Returns the index of the transform in a pipeline. diff --git a/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py b/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py index 128c53fb81..9869a87a28 100644 --- a/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py @@ -6,6 +6,8 @@ import numpy as np import torch from mmengine.dataset import Compose +from mmengine.fileio import (get_file_backend, isdir, join_path, + list_dir_or_file) from mmengine.infer.infer import ModelType from mmengine.structures import InstanceData @@ -80,7 +82,22 @@ def _inputs_to_list(self, inputs: Union[dict, list], **kwargs) -> list: Returns: list: List of input for the :meth:`preprocess`. """ - return super()._inputs_to_list(inputs, modality_key='points', **kwargs) + if isinstance(inputs, dict) and isinstance(inputs['points'], str): + pcd = inputs['points'] + backend = get_file_backend(pcd) + if hasattr(backend, 'isdir') and isdir(pcd): + # Backends like HttpsBackend do not implement `isdir`, so + # only those backends that implement `isdir` could accept + # the inputs as a directory + filename_list = list_dir_or_file(pcd, list_dir=False) + inputs = [{ + 'points': join_path(pcd, filename) + } for filename in filename_list] + + if not isinstance(inputs, (list, tuple)): + inputs = [inputs] + + return list(inputs) def _init_pipeline(self, cfg: ConfigType) -> Compose: """Initialize the test pipeline.""" diff --git a/mmdet3d/apis/inferencers/lidar_seg3d_inferencer.py b/mmdet3d/apis/inferencers/lidar_seg3d_inferencer.py index 2adb998e4a..328d865e0d 100644 --- a/mmdet3d/apis/inferencers/lidar_seg3d_inferencer.py +++ b/mmdet3d/apis/inferencers/lidar_seg3d_inferencer.py @@ -5,6 +5,8 @@ import mmengine import numpy as np from mmengine.dataset import Compose +from mmengine.fileio import (get_file_backend, isdir, join_path, + list_dir_or_file) from mmengine.infer.infer import ModelType from mmengine.structures import InstanceData @@ -77,7 +79,22 @@ def _inputs_to_list(self, inputs: Union[dict, list], **kwargs) -> list: Returns: list: List of input for the :meth:`preprocess`. """ - return super()._inputs_to_list(inputs, modality_key='points', **kwargs) + if isinstance(inputs, dict) and isinstance(inputs['points'], str): + pcd = inputs['points'] + backend = get_file_backend(pcd) + if hasattr(backend, 'isdir') and isdir(pcd): + # Backends like HttpsBackend do not implement `isdir`, so + # only those backends that implement `isdir` could accept + # the inputs as a directory + filename_list = list_dir_or_file(pcd, list_dir=False) + inputs = [{ + 'points': join_path(pcd, filename) + } for filename in filename_list] + + if not isinstance(inputs, (list, tuple)): + inputs = [inputs] + + return list(inputs) def _init_pipeline(self, cfg: ConfigType) -> Compose: """Initialize the test pipeline.""" diff --git a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py index d4e231808b..9a65883aed 100644 --- a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py @@ -6,6 +6,8 @@ import mmengine import numpy as np from mmengine.dataset import Compose +from mmengine.fileio import (get_file_backend, isdir, join_path, + list_dir_or_file) from mmengine.infer.infer import ModelType from mmengine.structures import InstanceData @@ -60,7 +62,10 @@ def __init__(self, scope=scope, palette=palette) - def _inputs_to_list(self, inputs: Union[dict, list], **kwargs) -> list: + def _inputs_to_list(self, + inputs: Union[dict, list], + cam_type='CAM2', + **kwargs) -> list: """Preprocess the inputs to a list. Preprocess inputs to a list according to its type: @@ -78,7 +83,79 @@ def _inputs_to_list(self, inputs: Union[dict, list], **kwargs) -> list: Returns: list: List of input for the :meth:`preprocess`. """ - return super()._inputs_to_list(inputs, modality_key='img', **kwargs) + if isinstance(inputs, dict): + assert 'infos' in inputs + infos = inputs.pop('infos') + + if isinstance(inputs['img'], str): + img = inputs['img'] + backend = get_file_backend(img) + if hasattr(backend, 'isdir') and isdir(img): + # Backends like HttpsBackend do not implement `isdir`, so + # only those backends that implement `isdir` could accept + # the inputs as a directory + filename_list = list_dir_or_file(img, list_dir=False) + inputs = [{ + 'img': join_path(img, filename) + } for filename in filename_list] + + if not isinstance(inputs, (list, tuple)): + inputs = [inputs] + + # get cam2img, lidar2cam and lidar2img from infos + info_list = mmengine.load(infos)['data_list'] + assert len(info_list) == len(inputs) + for index, input in enumerate(inputs): + data_info = info_list[index] + img_path = data_info['images'][cam_type]['img_path'] + if osp.isfile(input['img']) and osp.basename( + img_path) != osp.basename(input['img']): + raise ValueError( + f'the info file of {img_path} is not provided.') + cam2img = np.asarray( + data_info['images'][cam_type]['cam2img'], dtype=np.float32) + lidar2cam = np.asarray( + data_info['images'][cam_type]['lidar2cam'], + dtype=np.float32) + if 'lidar2img' in data_info['images'][cam_type]: + lidar2img = np.asarray( + data_info['images'][cam_type]['lidar2img'], + dtype=np.float32) + else: + lidar2img = cam2img @ lidar2cam + input['cam2img'] = cam2img + input['lidar2cam'] = lidar2cam + input['lidar2img'] = lidar2img + elif isinstance(inputs, (list, tuple)): + # get cam2img, lidar2cam and lidar2img from infos + for input in inputs: + assert 'infos' in input + infos = input.pop('infos') + info_list = mmengine.load(infos)['data_list'] + assert len(info_list) == 1, 'Only support single sample info' \ + 'in `.pkl`, when inputs is a list.' + data_info = info_list[0] + img_path = data_info['images'][cam_type]['img_path'] + if osp.isfile(input['img']) and osp.basename( + img_path) != osp.basename(input['img']): + raise ValueError( + f'the info file of {img_path} is not provided.') + cam2img = np.asarray( + data_info['images'][cam_type]['cam2img'], dtype=np.float32) + lidar2cam = np.asarray( + data_info['images'][cam_type]['lidar2cam'], + dtype=np.float32) + if 'lidar2img' in data_info['images'][cam_type]: + lidar2img = np.asarray( + data_info['images'][cam_type]['lidar2img'], + dtype=np.float32) + else: + lidar2img = cam2img @ lidar2cam + input['cam2img'] = cam2img + input['lidar2cam'] = lidar2cam + input['lidar2img'] = lidar2img + + return list(inputs) def _init_pipeline(self, cfg: ConfigType) -> Compose: """Initialize the test pipeline.""" diff --git a/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py b/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py index 7f00dd03ff..b934ed9e21 100644 --- a/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py @@ -7,6 +7,8 @@ import mmengine import numpy as np from mmengine.dataset import Compose +from mmengine.fileio import (get_file_backend, isdir, join_path, + list_dir_or_file) from mmengine.infer.infer import ModelType from mmengine.structures import InstanceData @@ -60,7 +62,10 @@ def __init__(self, scope=scope, palette=palette) - def _inputs_to_list(self, inputs: Union[dict, list], **kwargs) -> list: + def _inputs_to_list(self, + inputs: Union[dict, list], + cam_type: str = 'CAM2', + **kwargs) -> list: """Preprocess the inputs to a list. Preprocess inputs to a list according to its type: @@ -78,8 +83,86 @@ def _inputs_to_list(self, inputs: Union[dict, list], **kwargs) -> list: Returns: list: List of input for the :meth:`preprocess`. """ - return super()._inputs_to_list( - inputs, modality_key=['points', 'img'], **kwargs) + if isinstance(inputs, dict): + assert 'infos' in inputs + infos = inputs.pop('infos') + + if isinstance(inputs['img'], str): + img, pcd = inputs['img'], inputs['points'] + backend = get_file_backend(img) + if hasattr(backend, 'isdir') and isdir(img) and isdir(pcd): + # Backends like HttpsBackend do not implement `isdir`, so + # only those backends that implement `isdir` could accept + # the inputs as a directory + img_filename_list = list_dir_or_file( + img, list_dir=False, suffix=['.png', '.jpg']) + pcd_filename_list = list_dir_or_file( + pcd, list_dir=False, suffix='.bin') + assert len(img_filename_list) == len(pcd_filename_list) + + inputs = [{ + 'img': join_path(img, img_filename), + 'points': join_path(pcd, pcd_filename) + } for pcd_filename, img_filename in zip( + pcd_filename_list, img_filename_list)] + + if not isinstance(inputs, (list, tuple)): + inputs = [inputs] + + # get cam2img, lidar2cam and lidar2img from infos + info_list = mmengine.load(infos)['data_list'] + assert len(info_list) == len(inputs) + for index, input in enumerate(inputs): + data_info = info_list[index] + img_path = data_info['images'][cam_type]['img_path'] + if osp.isfile(input['img']) and osp.basename( + img_path) != osp.basename(input['img']): + raise ValueError( + f'the info file of {img_path} is not provided.') + cam2img = np.asarray( + data_info['images'][cam_type]['cam2img'], dtype=np.float32) + lidar2cam = np.asarray( + data_info['images'][cam_type]['lidar2cam'], + dtype=np.float32) + if 'lidar2img' in data_info['images'][cam_type]: + lidar2img = np.asarray( + data_info['images'][cam_type]['lidar2img'], + dtype=np.float32) + else: + lidar2img = cam2img @ lidar2cam + input['cam2img'] = cam2img + input['lidar2cam'] = lidar2cam + input['lidar2img'] = lidar2img + elif isinstance(inputs, (list, tuple)): + # get cam2img, lidar2cam and lidar2img from infos + for input in inputs: + assert 'infos' in input + infos = input.pop('infos') + info_list = mmengine.load(infos)['data_list'] + assert len(info_list) == 1, 'Only support single sample' \ + 'info in `.pkl`, when input is a list.' + data_info = info_list[0] + img_path = data_info['images'][cam_type]['img_path'] + if osp.isfile(input['img']) and osp.basename( + img_path) != osp.basename(input['img']): + raise ValueError( + f'the info file of {img_path} is not provided.') + cam2img = np.asarray( + data_info['images'][cam_type]['cam2img'], dtype=np.float32) + lidar2cam = np.asarray( + data_info['images'][cam_type]['lidar2cam'], + dtype=np.float32) + if 'lidar2img' in data_info['images'][cam_type]: + lidar2img = np.asarray( + data_info['images'][cam_type]['lidar2img'], + dtype=np.float32) + else: + lidar2img = cam2img @ lidar2cam + input['cam2img'] = cam2img + input['lidar2cam'] = lidar2cam + input['lidar2img'] = lidar2img + + return list(inputs) def _init_pipeline(self, cfg: ConfigType) -> Compose: """Initialize the test pipeline.""" diff --git a/tests/test_apis/test_inferencers/test_lidar_det3d_inferencer.py b/tests/test_apis/test_inferencers/test_lidar_det3d_inferencer.py index 7d76b627ac..f4fc771606 100644 --- a/tests/test_apis/test_inferencers/test_lidar_det3d_inferencer.py +++ b/tests/test_apis/test_inferencers/test_lidar_det3d_inferencer.py @@ -89,7 +89,7 @@ def test_visualize(self): inputs = dict(points='tests/data/kitti/training/velodyne/000000.bin'), # img_out_dir with tempfile.TemporaryDirectory() as tmp_dir: - self.inferencer(inputs, img_out_dir=tmp_dir) + self.inferencer(inputs, out_dir=tmp_dir) # TODO: For LiDAR-based detection, the saved image only exists when # show=True. # self.assertTrue(osp.exists(osp.join(tmp_dir, '000000.png'))) @@ -102,11 +102,9 @@ def test_postprocess(self): res = self.inferencer(inputs, return_datasamples=True) self.assertTrue(is_list_of(res['predictions'], Det3DDataSample)) - # pred_out_file + # pred_out_dir with tempfile.TemporaryDirectory() as tmp_dir: - pred_out_file = osp.join(tmp_dir, 'tmp.json') - res = self.inferencer( - inputs, print_result=True, pred_out_file=pred_out_file) - dumped_res = mmengine.load(pred_out_file) - self.assert_predictions_equal(res['predictions'], - dumped_res['predictions']) + res = self.inferencer(inputs, print_result=True, out_dir=tmp_dir) + dumped_res = mmengine.load( + osp.join(tmp_dir, 'preds', '000000.json')) + self.assertEqual(res['predictions'][0], dumped_res) diff --git a/tests/test_apis/test_inferencers/test_lidar_seg3d_inferencer.py b/tests/test_apis/test_inferencers/test_lidar_seg3d_inferencer.py index f0bd72b163..9bbdb037a1 100644 --- a/tests/test_apis/test_inferencers/test_lidar_seg3d_inferencer.py +++ b/tests/test_apis/test_inferencers/test_lidar_seg3d_inferencer.py @@ -91,7 +91,7 @@ def test_visualizer(self): inputs = dict(points='tests/data/s3dis/points/Area_1_office_2.bin') # img_out_dir with tempfile.TemporaryDirectory() as tmp_dir: - self.inferencer(inputs, img_out_dir=tmp_dir) + self.inferencer(inputs, out_dir=tmp_dir) def test_post_processor(self): if not torch.cuda.is_available(): @@ -101,11 +101,9 @@ def test_post_processor(self): res = self.inferencer(inputs, return_datasamples=True) self.assertTrue(is_list_of(res['predictions'], Det3DDataSample)) - # pred_out_file + # pred_out_dir with tempfile.TemporaryDirectory() as tmp_dir: - pred_out_file = osp.join(tmp_dir, 'tmp.json') - res = self.inferencer( - inputs, print_result=True, pred_out_file=pred_out_file) - dumped_res = mmengine.load(pred_out_file) - self.assert_predictions_equal(res['predictions'], - dumped_res['predictions']) + res = self.inferencer(inputs, print_result=True, out_dir=tmp_dir) + dumped_res = mmengine.load( + osp.join(tmp_dir, 'preds', 'Area_1_office_2.json')) + self.assertEqual(res['predictions'][0], dumped_res) diff --git a/tests/test_apis/test_inferencers/test_mono_det3d_inferencer.py b/tests/test_apis/test_inferencers/test_mono_det3d_inferencer.py index 71df09f78f..d30f156485 100644 --- a/tests/test_apis/test_inferencers/test_mono_det3d_inferencer.py +++ b/tests/test_apis/test_inferencers/test_mono_det3d_inferencer.py @@ -42,13 +42,13 @@ def assert_predictions_equal(self, preds1, preds2): def test_call(self, model): # single img img_path = 'demo/data/kitti/000008.png' - calib_path = 'demo/data/kitti/000008.txt' + infos_path = 'demo/data/kitti/000008.pkl' inferencer = MonoDet3DInferencer(model) - inputs = dict(img=img_path, calib=calib_path) + inputs = dict(img=img_path, infos=infos_path) res_path = inferencer(inputs, return_vis=True) # ndarray img = mmcv.imread(img_path) - inputs = dict(img=img, calib=calib_path) + inputs = dict(img=img, infos=infos_path) res_ndarray = inferencer(inputs, return_vis=True) self.assert_predictions_equal(res_path['predictions'], res_ndarray['predictions']) @@ -59,16 +59,18 @@ def test_call(self, model): inputs = [ dict( img='demo/data/kitti/000008.png', - calib='demo/data/kitti/000008.txt'), + infos='demo/data/kitti/000008.pkl'), dict( img='demo/data/kitti/000008.png', - calib='demo/data/kitti/000008.txt') + infos='demo/data/kitti/000008.pkl') ] res_path = inferencer(inputs, return_vis=True) # list of ndarray imgs = [mmcv.imread(p['img']) for p in inputs] - inputs[0]['img'] = imgs[0] - inputs[1]['img'] = imgs[1] + inputs = [ + dict(img=imgs[0], infos='demo/data/kitti/000008.pkl'), + dict(img=imgs[1], infos='demo/data/kitti/000008.pkl') + ] res_ndarray = inferencer(inputs, return_vis=True) self.assert_predictions_equal(res_path['predictions'], res_ndarray['predictions']) @@ -77,36 +79,30 @@ def test_call(self, model): @parameterized.expand(['pgd_kitti']) def test_visualize(self, model): - inputs = [ - dict( - img='demo/data/kitti/000008.png', - calib='demo/data/kitti/000008.txt'), - dict( - img='demo/data/kitti/000008.png', - calib='demo/data/kitti/000008.txt') - ] + inputs = dict( + img='demo/data/kitti/000008.png', + infos='demo/data/kitti/000008.pkl') inferencer = MonoDet3DInferencer(model) # img_out_dir with tempfile.TemporaryDirectory() as tmp_dir: - inferencer(inputs, img_out_dir=tmp_dir) - for img_dir in ['000008.png', '000008.png']: - self.assertTrue(osp.exists(osp.join(tmp_dir, img_dir))) + inferencer(inputs, out_dir=tmp_dir) + self.assertTrue( + osp.exists(osp.join(tmp_dir, 'vis_camera/CAM2/000008.png'))) @parameterized.expand(['pgd_kitti']) def test_postprocess(self, model): # return_datasample img_path = 'demo/data/kitti/000008.png' - calib_path = 'demo/data/kitti/000008.txt' - inputs = dict(img=img_path, calib=calib_path) + infos_path = 'demo/data/kitti/000008.pkl' + inputs = dict(img=img_path, infos=infos_path) inferencer = MonoDet3DInferencer(model) res = inferencer(inputs, return_datasamples=True) self.assertTrue(is_list_of(res['predictions'], Det3DDataSample)) - # pred_out_file + # pred_out_dir with tempfile.TemporaryDirectory() as tmp_dir: - pred_out_file = osp.join(tmp_dir, 'tmp.json') - res = inferencer( - inputs, print_result=True, pred_out_file=pred_out_file) - dumped_res = mmengine.load(pred_out_file) - self.assert_predictions_equal(res['predictions'], - dumped_res['predictions']) + inputs = dict(img=img_path, infos=infos_path) + res = inferencer(inputs, print_result=True, out_dir=tmp_dir) + dumped_res = mmengine.load( + osp.join(tmp_dir, 'preds', '000008.json')) + self.assertEqual(res['predictions'][0], dumped_res) diff --git a/tests/test_apis/test_inferencers/test_multi_modality_det3d_inferencer.py b/tests/test_apis/test_inferencers/test_multi_modality_det3d_inferencer.py index e0be781ca2..c51f7c764b 100644 --- a/tests/test_apis/test_inferencers/test_multi_modality_det3d_inferencer.py +++ b/tests/test_apis/test_inferencers/test_multi_modality_det3d_inferencer.py @@ -44,11 +44,11 @@ def assert_predictions_equal(self, preds1, preds2): def test_call(self): if not torch.cuda.is_available(): return - calib_path = 'tests/data/kitti/training/calib/000000.pkl' - points_path = 'tests/data/kitti/training/velodyne/000000.bin' - img_path = 'tests/data/kitti/training/image_2/000000.png' + infos_path = 'demo/data/kitti/000008.pkl' + points_path = 'demo/data/kitti/000008.bin' + img_path = 'demo/data/kitti/000008.png' # single img & point cloud - inputs = dict(points=points_path, img=img_path, calib=calib_path) + inputs = dict(points=points_path, img=img_path, infos=infos_path) res_path = self.inferencer(inputs, return_vis=True) # ndarray @@ -57,7 +57,7 @@ def test_call(self): points = points.reshape(-1, 4) points = points[:, :4] img = mmcv.imread(inputs['img']) - inputs = dict(points=points, img=img, calib=calib_path) + inputs = dict(points=points, img=img, infos=infos_path) res_ndarray = self.inferencer(inputs, return_vis=True) self.assert_predictions_equal(res_path['predictions'], res_ndarray['predictions']) @@ -66,8 +66,8 @@ def test_call(self): # multiple imgs & point clouds inputs = [ - dict(points=points_path, img=img_path, calib=calib_path), - dict(points=points_path, img=img_path, calib=calib_path) + dict(points=points_path, img=img_path, infos=infos_path), + dict(points=points_path, img=img_path, infos=infos_path) ] res_path = self.inferencer(inputs, return_vis=True) # list of ndarray @@ -77,7 +77,7 @@ def test_call(self): points = np.frombuffer(pts_bytes, dtype=np.float32) points = points.reshape(-1, 4) img = mmcv.imread(p['img']) - all_inputs.append(dict(points=points, img=img, calib=p['calib'])) + all_inputs.append(dict(points=points, img=img, infos=infos_path)) res_ndarray = self.inferencer(all_inputs, return_vis=True) self.assert_predictions_equal(res_path['predictions'], @@ -89,12 +89,12 @@ def test_visualize(self): if not torch.cuda.is_available(): return inputs = dict( - points='tests/data/kitti/training/velodyne/000000.bin', - img='tests/data/kitti/training/image_2/000000.png', - calib='tests/data/kitti/training/calib/000000.pkl'), + points='demo/data/kitti/000008.bin', + img='demo/data/kitti/000008.png', + infos='demo/data/kitti/000008.pkl'), # img_out_dir with tempfile.TemporaryDirectory() as tmp_dir: - self.inferencer(inputs, img_out_dir=tmp_dir) + self.inferencer(inputs, out_dir=tmp_dir) # TODO: For results of LiDAR-based detection, the saved image only # exists when show=True. # self.assertTrue(osp.exists(osp.join(tmp_dir, '000000.png'))) @@ -103,18 +103,18 @@ def test_postprocess(self): if not torch.cuda.is_available(): return # return_datasample - inputs = dict( - points='tests/data/kitti/training/velodyne/000000.bin', - img='tests/data/kitti/training/image_2/000000.png', - calib='tests/data/kitti/training/calib/000000.pkl') + infos_path = 'demo/data/kitti/000008.pkl' + points_path = 'demo/data/kitti/000008.bin' + img_path = 'demo/data/kitti/000008.png' + # single img & point cloud + inputs = dict(points=points_path, img=img_path, infos=infos_path) res = self.inferencer(inputs, return_datasamples=True) self.assertTrue(is_list_of(res['predictions'], Det3DDataSample)) - # pred_out_file + # pred_out_dir with tempfile.TemporaryDirectory() as tmp_dir: - pred_out_file = osp.join(tmp_dir, 'tmp.json') - res = self.inferencer( - inputs, print_result=True, pred_out_file=pred_out_file) - dumped_res = mmengine.load(pred_out_file) - self.assert_predictions_equal(res['predictions'], - dumped_res['predictions']) + inputs = dict(points=points_path, img=img_path, infos=infos_path) + res = self.inferencer(inputs, print_result=True, out_dir=tmp_dir) + dumped_res = mmengine.load( + osp.join(tmp_dir, 'preds', '000008.json')) + self.assertEqual(res['predictions'][0], dumped_res) From 0b42058030bb76d7c64101b6f54543af13ad6a1a Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Mon, 16 Oct 2023 17:45:49 +0800 Subject: [PATCH 07/11] resolve comments --- demo/mono_det_demo.py | 8 ++++---- demo/multi_modality_demo.py | 10 +++++----- demo/pcd_demo.py | 8 ++++---- demo/pcd_seg_demo.py | 8 ++++---- mmdet3d/apis/inferencers/base_3d_inferencer.py | 2 +- mmdet3d/visualization/local_visualizer.py | 5 ++++- .../{multi_modalit_demo.py => multi_modality_demo.py} | 0 7 files changed, 22 insertions(+), 19 deletions(-) rename projects/BEVFusion/demo/{multi_modalit_demo.py => multi_modality_demo.py} (100%) diff --git a/demo/mono_det_demo.py b/demo/mono_det_demo.py index 08ba916a8f..42416fd9f4 100644 --- a/demo/mono_det_demo.py +++ b/demo/mono_det_demo.py @@ -30,13 +30,13 @@ def parse_args(): '--out-dir', type=str, default='outputs', - help='Output directory of prediction results.') + help='Output directory of prediction and visualization results.') parser.add_argument( '--show', action='store_true', help='Show online visualization results') parser.add_argument( - '--wait_time', + '--wait-time', type=float, default=-1, help='The interval of show (s). Demo will be blocked in showing' @@ -44,11 +44,11 @@ def parse_args(): parser.add_argument( '--no-save-vis', action='store_true', - help='Do not save detection vis results') + help='Do not save detection visualization results') parser.add_argument( '--no-save-pred', action='store_true', - help='Do not save detection json results') + help='Do not save detection prediction results') parser.add_argument( '--print-result', action='store_true', diff --git a/demo/multi_modality_demo.py b/demo/multi_modality_demo.py index 3b4605aeeb..e3c28cdc48 100644 --- a/demo/multi_modality_demo.py +++ b/demo/multi_modality_demo.py @@ -20,7 +20,7 @@ def parse_args(): parser.add_argument( '--cam-type', type=str, - default='CAM_BACK', + default='CAM_FRONT', help='choose camera type to inference') parser.add_argument( '--pred-score-thr', @@ -31,13 +31,13 @@ def parse_args(): '--out-dir', type=str, default='outputs', - help='Output directory of prediction results.') + help='Output directory of prediction and visualization results.') parser.add_argument( '--show', action='store_true', help='Show online visualization results') parser.add_argument( - '--wait_time', + '--wait-time', type=float, default=-1, help='The interval of show (s). Demo will be blocked in showing' @@ -45,11 +45,11 @@ def parse_args(): parser.add_argument( '--no-save-vis', action='store_true', - help='Do not save detection vis results') + help='Do not save detection visualization results') parser.add_argument( '--no-save-pred', action='store_true', - help='Do not save detection json results') + help='Do not save detection prediction results') parser.add_argument( '--print-result', action='store_true', diff --git a/demo/pcd_demo.py b/demo/pcd_demo.py index d7d850bc12..8ff1090f35 100644 --- a/demo/pcd_demo.py +++ b/demo/pcd_demo.py @@ -24,13 +24,13 @@ def parse_args(): '--out-dir', type=str, default='outputs', - help='Output directory of prediction results.') + help='Output directory of prediction and visualization results.') parser.add_argument( '--show', action='store_true', help='Show online visualization results') parser.add_argument( - '--wait_time', + '--wait-time', type=float, default=-1, help='The interval of show (s). Demo will be blocked in showing' @@ -38,11 +38,11 @@ def parse_args(): parser.add_argument( '--no-save-vis', action='store_true', - help='Do not save detection vis results') + help='Do not save detection visualization results') parser.add_argument( '--no-save-pred', action='store_true', - help='Do not save detection json results') + help='Do not save detection prediction results') parser.add_argument( '--print-result', action='store_true', diff --git a/demo/pcd_seg_demo.py b/demo/pcd_seg_demo.py index bc6697e165..574c21eb2d 100644 --- a/demo/pcd_seg_demo.py +++ b/demo/pcd_seg_demo.py @@ -19,13 +19,13 @@ def parse_args(): '--out-dir', type=str, default='outputs', - help='Output directory of prediction results.') + help='Output directory of prediction and visualization results.') parser.add_argument( '--show', action='store_true', help='Show online visualization results') parser.add_argument( - '--wait_time', + '--wait-time', type=float, default=-1, help='The interval of show (s). Demo will be blocked in showing' @@ -33,11 +33,11 @@ def parse_args(): parser.add_argument( '--no-save-vis', action='store_true', - help='Do not save detection vis results') + help='Do not save detection visualization results') parser.add_argument( '--no-save-pred', action='store_true', - help='Do not save detection json results') + help='Do not save detection prediction results') parser.add_argument( '--print-result', action='store_true', diff --git a/mmdet3d/apis/inferencers/base_3d_inferencer.py b/mmdet3d/apis/inferencers/base_3d_inferencer.py index d8a39fb3fc..f3959ed2d3 100644 --- a/mmdet3d/apis/inferencers/base_3d_inferencer.py +++ b/mmdet3d/apis/inferencers/base_3d_inferencer.py @@ -247,7 +247,7 @@ def postprocess( Defaults to False. print_result (bool): Whether to print the inference result w/o visualization to the console. Defaults to False. - pred_out_dir (str): Dir to save the inference results w/o + pred_out_dir (str): Directory to save the inference results w/o visualization. If left as empty, no file will be saved. Defaults to ''. diff --git a/mmdet3d/visualization/local_visualizer.py b/mmdet3d/visualization/local_visualizer.py index b8bb2cdd71..d1eff4af97 100644 --- a/mmdet3d/visualization/local_visualizer.py +++ b/mmdet3d/visualization/local_visualizer.py @@ -991,7 +991,10 @@ def add_datasample(self, gt_img_data = None pred_img_data = None - if not hasattr(self, 'o3d_vis'): + if not hasattr(self, 'o3d_vis') and vis_task in [ + 'multi-view_det', 'lidar_det', 'lidar_seg', + 'multi-modality_det' + ]: self.o3d_vis = self._initialize_o3d_vis(show=show) if draw_gt and data_sample is not None: diff --git a/projects/BEVFusion/demo/multi_modalit_demo.py b/projects/BEVFusion/demo/multi_modality_demo.py similarity index 100% rename from projects/BEVFusion/demo/multi_modalit_demo.py rename to projects/BEVFusion/demo/multi_modality_demo.py From b3fb73337ec11f188ca60e36dc8fb91dfe93d8f6 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Mon, 16 Oct 2023 18:20:16 +0800 Subject: [PATCH 08/11] fix os.isfile when python=3.7 --- mmdet3d/apis/inferencers/mono_det3d_inferencer.py | 8 ++++---- .../apis/inferencers/multi_modality_det3d_inferencer.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py index 9a65883aed..dc15329ca9 100644 --- a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py @@ -108,8 +108,8 @@ def _inputs_to_list(self, for index, input in enumerate(inputs): data_info = info_list[index] img_path = data_info['images'][cam_type]['img_path'] - if osp.isfile(input['img']) and osp.basename( - img_path) != osp.basename(input['img']): + if isinstance(input['img'], str) and \ + osp.basename(img_path) != osp.basename(input['img']): raise ValueError( f'the info file of {img_path} is not provided.') cam2img = np.asarray( @@ -136,8 +136,8 @@ def _inputs_to_list(self, 'in `.pkl`, when inputs is a list.' data_info = info_list[0] img_path = data_info['images'][cam_type]['img_path'] - if osp.isfile(input['img']) and osp.basename( - img_path) != osp.basename(input['img']): + if isinstance(input['img'], str) and \ + osp.basename(img_path) != osp.basename(input['img']): raise ValueError( f'the info file of {img_path} is not provided.') cam2img = np.asarray( diff --git a/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py b/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py index b934ed9e21..35f50c9f14 100644 --- a/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py @@ -115,8 +115,8 @@ def _inputs_to_list(self, for index, input in enumerate(inputs): data_info = info_list[index] img_path = data_info['images'][cam_type]['img_path'] - if osp.isfile(input['img']) and osp.basename( - img_path) != osp.basename(input['img']): + if isinstance(input['img'], str) and \ + osp.basename(img_path) != osp.basename(input['img']): raise ValueError( f'the info file of {img_path} is not provided.') cam2img = np.asarray( @@ -143,8 +143,8 @@ def _inputs_to_list(self, 'info in `.pkl`, when input is a list.' data_info = info_list[0] img_path = data_info['images'][cam_type]['img_path'] - if osp.isfile(input['img']) and osp.basename( - img_path) != osp.basename(input['img']): + if isinstance(input['img'], str) and \ + osp.basename(img_path) != osp.basename(input['img']): raise ValueError( f'the info file of {img_path} is not provided.') cam2img = np.asarray( From 0709e04faa5873293700ca7a32f0b02d1d79da9f Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Tue, 17 Oct 2023 14:11:52 +0800 Subject: [PATCH 09/11] add note in get_started --- docs/en/get_started.md | 4 ++++ docs/zh_cn/get_started.md | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/docs/en/get_started.md b/docs/en/get_started.md index c4f132553a..00437debb7 100644 --- a/docs/en/get_started.md +++ b/docs/en/get_started.md @@ -141,6 +141,10 @@ You will see a visualizer interface with point cloud, where bounding boxes are p **Note**: +If you install MMDetection3D on a remote server without display device, you can leave out the `--show` argument. Demo will still save the predictions to `outputs/pred/000008.json` file. + +**Note**: + If you want to input a `.ply` file, you can use the following function and convert it to `.bin` format. Then you can use the converted `.bin` file to run demo. Note that you need to install `pandas` and `plyfile` before using this script. This function can also be used for data preprocessing for training `ply data`. diff --git a/docs/zh_cn/get_started.md b/docs/zh_cn/get_started.md index 9f0c1cb511..224a44c4ab 100644 --- a/docs/zh_cn/get_started.md +++ b/docs/zh_cn/get_started.md @@ -139,6 +139,10 @@ python demo/pcd_demo.py demo/data/kitti/000008.bin pointpillars_hv_secfpn_8xb6-1 **注意**: +如果你在没有显示设备的服务器上安装 MMDetection3D ,你可以忽略 `--show` 参数。Demo 仍会将预测结果保存到 `outputs/pred/000008.json` 文件中。 + +**注意**: + 如果您想输入一个 `.ply` 文件,您可以使用如下函数将它转换成 `.bin` 格式。然后您可以使用转化的 `.bin` 文件来运行样例。请注意在使用此脚本之前,您需要安装 `pandas` 和 `plyfile`。这个函数也可以用于训练 `ply 数据`时作为数据预处理来使用。 ```python From 450676623e80d7db37429f17d27da8324bc0b84c Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Wed, 18 Oct 2023 17:02:43 +0800 Subject: [PATCH 10/11] fix docstring --- mmdet3d/apis/inferencers/base_3d_inferencer.py | 6 +++--- .../apis/inferencers/lidar_det3d_inferencer.py | 15 +-------------- .../apis/inferencers/lidar_seg3d_inferencer.py | 1 + mmdet3d/apis/inferencers/mono_det3d_inferencer.py | 2 ++ .../multi_modality_det3d_inferencer.py | 1 + 5 files changed, 8 insertions(+), 17 deletions(-) diff --git a/mmdet3d/apis/inferencers/base_3d_inferencer.py b/mmdet3d/apis/inferencers/base_3d_inferencer.py index f3959ed2d3..6564325e8c 100644 --- a/mmdet3d/apis/inferencers/base_3d_inferencer.py +++ b/mmdet3d/apis/inferencers/base_3d_inferencer.py @@ -153,7 +153,7 @@ def _dispatch_kwargs(self, Args: out_dir (str): Dir to save the inference results. - cam_type (str): Camera type. Defaults to 'CAM2'. + cam_type (str): Camera type. Defaults to ''. **kwargs (dict): Key words arguments passed to :meth:`preprocess`, :meth:`forward`, :meth:`visualize` and :meth:`postprocess`. Each key in kwargs should be in the corresponding set of @@ -172,16 +172,16 @@ def _dispatch_kwargs(self, def __call__(self, inputs: InputsType, - batch_size=1, + batch_size: int = 1, return_datasamples: bool = False, **kwargs) -> Optional[dict]: """Call the inferencer. Args: inputs (InputsType): Inputs for the inferencer. + batch_size (int): Batch size. Defaults to 1. return_datasamples (bool): Whether to return results as :obj:`BaseDataElement`. Defaults to False. - batch_size (int): Batch size. Defaults to 1. **kwargs: Key words arguments passed to :meth:`preprocess`, :meth:`forward`, :meth:`visualize` and :meth:`postprocess`. Each key in kwargs should be in the corresponding set of diff --git a/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py b/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py index 9869a87a28..fc513e85d8 100644 --- a/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/lidar_det3d_inferencer.py @@ -137,7 +137,7 @@ def visualize(self, Defaults to False. show (bool): Whether to display the image in a popup window. Defaults to False. - wait_time (float): The interval of show (s). Defaults to 0. + wait_time (float): The interval of show (s). Defaults to -1. draw_pred (bool): Whether to draw predicted bounding boxes. Defaults to True. pred_score_thr (float): Minimum score of bboxes to draw. @@ -211,19 +211,6 @@ def visualize_preds_fromfile(self, inputs: InputsType, preds: PredType, Args: inputs (InputsType): Inputs for the inferencer. preds (PredType): Predictions of the model. - return_vis (bool): Whether to return the visualization result. - Defaults to False. - show (bool): Whether to display the image in a popup window. - Defaults to False. - wait_time (float): The interval of show (s). Defaults to 0. - draw_pred (bool): Whether to draw predicted bounding boxes. - Defaults to True. - pred_score_thr (float): Minimum score of bboxes to draw. - Defaults to 0.3. - no_save_vis (bool): Whether to force not to save prediction - vis results. Defaults to False. - img_out_dir (str): Output directory of visualization results. - If left as empty, no file will be saved. Defaults to ''. Returns: List[np.ndarray] or None: Returns visualization results only if diff --git a/mmdet3d/apis/inferencers/lidar_seg3d_inferencer.py b/mmdet3d/apis/inferencers/lidar_seg3d_inferencer.py index 328d865e0d..33ab95c104 100644 --- a/mmdet3d/apis/inferencers/lidar_seg3d_inferencer.py +++ b/mmdet3d/apis/inferencers/lidar_seg3d_inferencer.py @@ -147,6 +147,7 @@ def visualize(self, Defaults to True. pred_score_thr (float): Minimum score of bboxes to draw. Defaults to 0.3. + no_save_vis (bool): Whether to save visualization results. img_out_dir (str): Output directory of visualization results. If left as empty, no file will be saved. Defaults to ''. diff --git a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py index dc15329ca9..22863ae166 100644 --- a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py @@ -194,8 +194,10 @@ def visualize(self, Defaults to True. pred_score_thr (float): Minimum score of bboxes to draw. Defaults to 0.3. + no_save_vis (bool): Whether to save visualization results. img_out_dir (str): Output directory of visualization results. If left as empty, no file will be saved. Defaults to ''. + cam_type_dir (str): Camera type directory. Defaults to 'CAM2'. Returns: List[np.ndarray] or None: Returns visualization results only if diff --git a/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py b/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py index 35f50c9f14..6717bb18c8 100644 --- a/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/multi_modality_det3d_inferencer.py @@ -233,6 +233,7 @@ def visualize(self, wait_time (float): The interval of show (s). Defaults to 0. draw_pred (bool): Whether to draw predicted bounding boxes. Defaults to True. + no_save_vis (bool): Whether to save visualization results. pred_score_thr (float): Minimum score of bboxes to draw. Defaults to 0.3. img_out_dir (str): Output directory of visualization results. From 93449e31c3e73d814dfd1ca2bb0bf01d13e215d9 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Wed, 18 Oct 2023 17:40:21 +0800 Subject: [PATCH 11/11] fix mmcv version --- mmdet3d/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mmdet3d/__init__.py b/mmdet3d/__init__.py index 9584e57741..5f7c341783 100644 --- a/mmdet3d/__init__.py +++ b/mmdet3d/__init__.py @@ -7,7 +7,7 @@ from .version import __version__, version_info mmcv_minimum_version = '2.0.0rc4' -mmcv_maximum_version = '2.1.0' +mmcv_maximum_version = '2.2.0' mmcv_version = digit_version(mmcv.__version__) mmengine_minimum_version = '0.8.0'