From 95d0be63e4f51e6338041ae0cd143820d1007191 Mon Sep 17 00:00:00 2001 From: Daigo Hirooka Date: Sun, 26 Jun 2022 18:25:29 +0900 Subject: [PATCH 01/22] add semantic segmentation head Author: Daigo Hirooka Date: Sun Jun 26 18:25:29 2022 +0900 --- mmdeploy/codebase/mmdet/models/__init__.py | 1 + .../mmdet/models/seg_heads/__init__.py | 4 +++ .../models/seg_heads/base_semantic_head.py | 29 +++++++++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 mmdeploy/codebase/mmdet/models/seg_heads/__init__.py create mode 100644 mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py diff --git a/mmdeploy/codebase/mmdet/models/__init__.py b/mmdeploy/codebase/mmdet/models/__init__.py index ee7b60a4e2..8da5d86571 100644 --- a/mmdeploy/codebase/mmdet/models/__init__.py +++ b/mmdeploy/codebase/mmdet/models/__init__.py @@ -5,6 +5,7 @@ from . import layers # noqa: F401,F403 from . import necks # noqa: F401,F403 from . import roi_heads # noqa: F401,F403 +from . import seg_heads # noqa: F401,F403 from . import task_modules # noqa: F401,F403 from . import transformer # noqa: F401,F403 from . import utils # noqa: F401,F403 diff --git a/mmdeploy/codebase/mmdet/models/seg_heads/__init__.py b/mmdeploy/codebase/mmdet/models/seg_heads/__init__.py new file mode 100644 index 0000000000..222a0c876a --- /dev/null +++ b/mmdeploy/codebase/mmdet/models/seg_heads/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base_semantic_head import base_semantic_head__simple_test + +__all__ = ['base_semantic_head__simple_test'] diff --git a/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py b/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py new file mode 100644 index 0000000000..1d09c44ebe --- /dev/null +++ b/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn.functional as F + +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet.models.seg_heads.base_semantic_head.BaseSemanticHead.simple_test') +def base_semantic_head__simple_test(ctx, self, x, img_metas, **kwargs): + """Rewrite `simple_test` for default backend. + Support configured dynamic/static shape for model input and return + semantic-segmentation result as Tensor instead of numpy array. + Args: + ctx (ContextCaller): The context with additional information. + self: The instance of the original class. + img (Tensor | List[Tensor]): Input image tensor(s). + img_meta (list[dict]): Dict containing image's meta information + such as `img_shape`. + + Returns: + Tensor: `semseg` of shape [N, num_sem_class, H, W] + """ + output = self.forward(x) + seg_preds = output['seg_preds'] + + h, w = img_metas[0]['img_shape'][:2] + seg_preds = F.interpolate( + seg_preds, size=(h, w), mode='bilinear', align_corners=False) + return seg_preds From d3928efa6f647350a2ae81829bcec8eb692ec8e7 Mon Sep 17 00:00:00 2001 From: Daigo Hirooka Date: Sun, 26 Jun 2022 18:26:05 +0900 Subject: [PATCH 02/22] add panoptic detection model --- .../mmdet/models/detectors/__init__.py | 6 +- .../codebase/mmdet/models/detectors/base.py | 71 +++++++++++++++++++ .../detectors/panoptic_two_stage_segmentor.py | 41 +++++++++++ 3 files changed, 116 insertions(+), 2 deletions(-) create mode 100644 mmdeploy/codebase/mmdet/models/detectors/base.py create mode 100644 mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py diff --git a/mmdeploy/codebase/mmdet/models/detectors/__init__.py b/mmdeploy/codebase/mmdet/models/detectors/__init__.py index 2c0a2f3ed5..f424226b15 100644 --- a/mmdeploy/codebase/mmdet/models/detectors/__init__.py +++ b/mmdeploy/codebase/mmdet/models/detectors/__init__.py @@ -1,6 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. -from . import base_detr, single_stage, single_stage_instance_seg, two_stage +from . import (base_detr, panoptic_two_stage_segmentor, single_stage, + single_stage_instance_seg, two_stage) __all__ = [ - 'base_detr', 'single_stage', 'single_stage_instance_seg', 'two_stage' + 'base_detr', 'single_stage', 'single_stage_instance_seg', 'two_stage', + 'panoptic_two_stage_segmentor' ] diff --git a/mmdeploy/codebase/mmdet/models/detectors/base.py b/mmdeploy/codebase/mmdet/models/detectors/base.py new file mode 100644 index 0000000000..789d777b49 --- /dev/null +++ b/mmdeploy/codebase/mmdet/models/detectors/base.py @@ -0,0 +1,71 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmdeploy.core import FUNCTION_REWRITER, mark +from mmdeploy.utils import is_dynamic_shape + + +@mark( + 'detector_forward', + inputs=['input'], + outputs=['dets', 'labels', 'masks', 'semseg']) +def __forward_impl(ctx, self, img, img_metas=None, **kwargs): + """Rewrite and adding mark for `forward`. + + Encapsulate this function for rewriting `forward` of BaseDetector. + 1. Add mark for BaseDetector. + 2. Support both dynamic and static export to onnx. + """ + assert isinstance(img_metas, dict) + assert isinstance(img, torch.Tensor) + + deploy_cfg = ctx.cfg + is_dynamic_flag = is_dynamic_shape(deploy_cfg) + # get origin input shape as tensor to support onnx dynamic shape + img_shape = torch._shape_as_tensor(img)[2:] + if not is_dynamic_flag: + img_shape = [int(val) for val in img_shape] + img_metas['img_shape'] = img_shape + img_metas = [img_metas] + return self.simple_test(img, img_metas, **kwargs) + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet.models.detectors.base.BaseDetector.forward') +def base_detector__forward(ctx, self, img, img_metas=None, **kwargs): + """Rewrite `forward` of BaseDetector for default backend. + + Rewrite this function to: + 1. Create img_metas for exporting model to onnx. + 2. Call `simple_test` directly to skip `aug_test`. + 3. Remove `return_loss` because deployment has no need for training + functions. + + Args: + ctx (ContextCaller): The context with additional information. + self: The instance of the class BaseDetector. + img (Tensor): Input images of shape (N, C, H, W). + Typically these should be mean centered and std scaled. + img_metas (Optional[list[dict]]): A list of image info dict where each + dict has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys, see + :class:`mmdet.datasets.pipelines.Collect`. + + Returns: + list[list[np.ndarray]]: BBox results of each image and classes. + The outer list corresponds to each image. The inner list + corresponds to each class. + """ + if img_metas is None: + img_metas = {} + + while isinstance(img_metas, list): + img_metas = img_metas[0] + + if isinstance(img, list): + img = torch.cat(img, 0) + + if 'return_loss' in kwargs: + kwargs.pop('return_loss') + return __forward_impl(ctx, self, img, img_metas=img_metas, **kwargs) diff --git a/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py b/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py new file mode 100644 index 0000000000..daeb5ae040 --- /dev/null +++ b/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py @@ -0,0 +1,41 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet.models.detectors.panoptic_two_stage_segmentor.' + 'TwoStagePanopticSegmentor.simple_test') +def two_stage_panoptic_segmentor__simple_test(ctx, + self, + img, + img_metas, + proposals=None, + **kwargs): + """Rewrite `simple_test` for default backend. + Support configured dynamic/static shape for model input and return + detection result as Tensor instead of numpy array. + Args: + ctx (ContextCaller): The context with additional information. + self: The instance of the original class. + img (Tensor | List[Tensor]): Input image tensor(s). + img_meta (list[dict]): Dict containing image's meta information + such as `img_shape`. + proposals (List[Tensor]): Region proposals. + Default is None. + + Returns: + tuple[Tensor, Tensor, Tensor, Tensor]: + (bboxes, labels, masks, semseg), `bboxes` of shape [N, num_det, 5], + `labels` of shape [N, num_det], `masks` of shape [N, roi_H, roi_W], + `semseg` of shape [N, num_sem_class, sem_H, sem_W]. + """ + assert self.with_bbox, 'Bbox head must be implemented.' + x = self.extract_feat(img) + if proposals is None: + proposals, _ = self.rpn_head.simple_test_rpn(x, img_metas) + + bboxes, labels, masks = self.roi_head.simple_test( + x, proposals, img_metas, rescale=False) + + semseg = self.semantic_head.simple_test(x, img_metas, rescale=False) + return bboxes, labels, masks, semseg From 8f56c7582cd0ce0fca16f9022a0d3ddd903df4a8 Mon Sep 17 00:00:00 2001 From: Daigo Hirooka Date: Sun, 26 Jun 2022 18:39:17 +0900 Subject: [PATCH 03/22] add panoptic segmentation configs --- .../mmdet/_base_/base_panoptic-seg_dynamic.py | 28 +++++++++++++++++++ .../mmdet/_base_/base_panoptic-seg_static.py | 4 +++ .../panoptic-seg_onnxruntime_dynamic.py | 4 +++ .../panoptic-seg_onnxruntime_static.py | 4 +++ 4 files changed, 40 insertions(+) create mode 100644 configs/mmdet/_base_/base_panoptic-seg_dynamic.py create mode 100644 configs/mmdet/_base_/base_panoptic-seg_static.py create mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_dynamic.py create mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_static.py diff --git a/configs/mmdet/_base_/base_panoptic-seg_dynamic.py b/configs/mmdet/_base_/base_panoptic-seg_dynamic.py new file mode 100644 index 0000000000..9859fc7942 --- /dev/null +++ b/configs/mmdet/_base_/base_panoptic-seg_dynamic.py @@ -0,0 +1,28 @@ +_base_ = ['./base_panoptic-seg_static.py'] +onnx_config = dict( + dynamic_axes={ + 'input': { + 0: 'batch', + 2: 'height', + 3: 'width' + }, + 'dets': { + 0: 'batch', + 1: 'num_dets', + }, + 'labels': { + 0: 'batch', + 1: 'num_dets', + }, + 'masks': { + 0: 'batch', + 1: 'num_dets', + 2: 'height', + 3: 'width' + }, + 'semseg': { + 0: 'batch', + 2: 'height', + 3: 'width' + }, + }) diff --git a/configs/mmdet/_base_/base_panoptic-seg_static.py b/configs/mmdet/_base_/base_panoptic-seg_static.py new file mode 100644 index 0000000000..d42442b54d --- /dev/null +++ b/configs/mmdet/_base_/base_panoptic-seg_static.py @@ -0,0 +1,4 @@ +_base_ = ['./base_static.py'] + +onnx_config = dict(output_names=['dets', 'labels', 'masks', 'semseg']) +codebase_config = dict(post_processing=dict(export_postprocess_mask=False)) diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_dynamic.py b/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_dynamic.py new file mode 100644 index 0000000000..990d4e8fc9 --- /dev/null +++ b/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_dynamic.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/base_panoptic-seg_dynamic.py', + '../../_base_/backends/onnxruntime.py' +] diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_static.py b/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_static.py new file mode 100644 index 0000000000..c6c630dcf7 --- /dev/null +++ b/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_static.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/base_panoptic-seg_static.py', + '../../_base_/backends/onnxruntime.py' +] From 548e8d7c5f948f27444eb147607a0933ad7e1bdd Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Fri, 28 Jul 2023 11:22:35 +0800 Subject: [PATCH 04/22] support panoptic-fpn --- .../panoptic-seg_onnxruntime_static.py | 1 + ...-seg_tensorrt_dynamic-352x512-1344x1344.py | 15 ++++ .../mmdet/deploy/object_detection_model.py | 51 +++++++++++-- .../codebase/mmdet/models/detectors/base.py | 71 ------------------- .../detectors/panoptic_two_stage_segmentor.py | 70 +++++++++++------- .../mmdet/models/seg_heads/__init__.py | 4 +- .../models/seg_heads/base_semantic_head.py | 30 ++++---- 7 files changed, 123 insertions(+), 119 deletions(-) create mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_tensorrt_dynamic-352x512-1344x1344.py delete mode 100644 mmdeploy/codebase/mmdet/models/detectors/base.py diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_static.py b/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_static.py index c6c630dcf7..d4ff118f2e 100644 --- a/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_static.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_static.py @@ -2,3 +2,4 @@ '../_base_/base_panoptic-seg_static.py', '../../_base_/backends/onnxruntime.py' ] +onnx_config = dict(input_shape=[1280, 800]) diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_tensorrt_dynamic-352x512-1344x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_tensorrt_dynamic-352x512-1344x1344.py new file mode 100644 index 0000000000..42e243f349 --- /dev/null +++ b/configs/mmdet/panoptic-seg/panoptic-seg_tensorrt_dynamic-352x512-1344x1344.py @@ -0,0 +1,15 @@ +_base_ = [ + '../_base_/base_panoptic-seg_dynamic.py', + '../../_base_/backends/tensorrt.py' +] + +backend_config = dict( + common_config=dict(max_workspace_size=1 << 30), + model_inputs=[ + dict( + input_shapes=dict( + input=dict( + min_shape=[1, 3, 352, 512], + opt_shape=[1, 3, 800, 1344], + max_shape=[1, 3, 1344, 1344]))) + ]) diff --git a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py index 5fe9874686..9498251c35 100644 --- a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py +++ b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py @@ -92,13 +92,19 @@ def __clear_outputs( batch_size = len(test_outputs[0]) num_outputs = len(test_outputs) + has_inst_seg = num_outputs >= 3 + has_sem_seg = num_outputs == 4 outputs = [[None for _ in range(batch_size)] for _ in range(num_outputs)] for i in range(batch_size): inds = test_outputs[0][i, :, 4] > 0.0 - for output_id in range(num_outputs): - outputs[output_id][i] = test_outputs[output_id][i, inds, ...] + outputs[0][i] = test_outputs[0][i, inds, ...] + outputs[1][i] = test_outputs[1][i, inds, ...] + if has_inst_seg: + outputs[2][i] = test_outputs[2][i, inds, ...] + if has_sem_seg: + outputs[3][i] = test_outputs[3][i] return outputs @staticmethod @@ -192,10 +198,9 @@ def forward(self, outputs = self.predict(inputs) outputs = End2EndModel.__clear_outputs(outputs) batch_dets, batch_labels = outputs[:2] - batch_masks = outputs[2] if len(outputs) == 3 else None + batch_masks = outputs[2] if len(outputs) >= 3 else None batch_size = inputs.shape[0] img_metas = [data_sample.metainfo for data_sample in data_samples] - results = [] rescale = kwargs.get('rescale', True) model_type = self.model_cfg.model.type if \ self.model_cfg is not None else None @@ -261,7 +266,7 @@ def forward(self, masks = masks[:, :img_h, :img_w] # avoid to resize masks with zero dim if export_postprocess_mask and rescale and masks.shape[0] != 0: - masks = torch.nn.functional.interpolate( + masks = F.interpolate( masks.unsqueeze(0), size=[ math.ceil(masks.shape[-2] / @@ -275,9 +280,41 @@ def forward(self, # aligned with mmdet to easily convert to numpy masks = masks.cpu() result.masks = masks + data_samples[i].pred_instances = result - results.append(data_samples[i]) - return results + + # deal with panoptic seg + batch_semseg = outputs[3] if len(outputs) == 4 else None + if batch_semseg is not None: + from mmdet.models.seg_heads import (HeuristicFusionHead, + MaskFormerFusionHead) + obj_dict = { + 'HeuristicFusionHead': HeuristicFusionHead, + 'MaskFormerFusionHead': MaskFormerFusionHead + } + head_args = self.model_cfg.model.panoptic_fusion_head.copy() + head_args['test_cfg'] = self.model_cfg.model.test_cfg.panoptic + fusion_head = obj_dict[head_args.pop('type')](**head_args) + if rescale: + seg_pred_list = [] + for i in range(batch_size): + h, w = img_metas[i]['img_shape'] + seg_pred = batch_semseg[i][:, :h, :w] + h, w = img_metas[i]['ori_shape'] + seg_pred = F.interpolate( + seg_pred[None], + size=(h, w), + mode='bilinear', + align_corners=False)[0] + seg_pred_list.append(seg_pred) + batch_semseg = seg_pred_list + + masks_results = [ds.pred_instances for ds in data_samples] + semseg_results = fusion_head.predict(masks_results, batch_semseg) + for ds, pred_panoptic_seg in zip(data_samples, semseg_results): + ds.pred_panoptic_seg = pred_panoptic_seg + + return data_samples def predict(self, imgs: Tensor) -> Tuple[np.ndarray, np.ndarray]: """The interface for predict. diff --git a/mmdeploy/codebase/mmdet/models/detectors/base.py b/mmdeploy/codebase/mmdet/models/detectors/base.py deleted file mode 100644 index 789d777b49..0000000000 --- a/mmdeploy/codebase/mmdet/models/detectors/base.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch - -from mmdeploy.core import FUNCTION_REWRITER, mark -from mmdeploy.utils import is_dynamic_shape - - -@mark( - 'detector_forward', - inputs=['input'], - outputs=['dets', 'labels', 'masks', 'semseg']) -def __forward_impl(ctx, self, img, img_metas=None, **kwargs): - """Rewrite and adding mark for `forward`. - - Encapsulate this function for rewriting `forward` of BaseDetector. - 1. Add mark for BaseDetector. - 2. Support both dynamic and static export to onnx. - """ - assert isinstance(img_metas, dict) - assert isinstance(img, torch.Tensor) - - deploy_cfg = ctx.cfg - is_dynamic_flag = is_dynamic_shape(deploy_cfg) - # get origin input shape as tensor to support onnx dynamic shape - img_shape = torch._shape_as_tensor(img)[2:] - if not is_dynamic_flag: - img_shape = [int(val) for val in img_shape] - img_metas['img_shape'] = img_shape - img_metas = [img_metas] - return self.simple_test(img, img_metas, **kwargs) - - -@FUNCTION_REWRITER.register_rewriter( - 'mmdet.models.detectors.base.BaseDetector.forward') -def base_detector__forward(ctx, self, img, img_metas=None, **kwargs): - """Rewrite `forward` of BaseDetector for default backend. - - Rewrite this function to: - 1. Create img_metas for exporting model to onnx. - 2. Call `simple_test` directly to skip `aug_test`. - 3. Remove `return_loss` because deployment has no need for training - functions. - - Args: - ctx (ContextCaller): The context with additional information. - self: The instance of the class BaseDetector. - img (Tensor): Input images of shape (N, C, H, W). - Typically these should be mean centered and std scaled. - img_metas (Optional[list[dict]]): A list of image info dict where each - dict has: 'img_shape', 'scale_factor', 'flip', and may also contain - 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. - For details on the values of these keys, see - :class:`mmdet.datasets.pipelines.Collect`. - - Returns: - list[list[np.ndarray]]: BBox results of each image and classes. - The outer list corresponds to each image. The inner list - corresponds to each class. - """ - if img_metas is None: - img_metas = {} - - while isinstance(img_metas, list): - img_metas = img_metas[0] - - if isinstance(img, list): - img = torch.cat(img, 0) - - if 'return_loss' in kwargs: - kwargs.pop('return_loss') - return __forward_impl(ctx, self, img, img_metas=img_metas, **kwargs) diff --git a/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py b/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py index daeb5ae040..ce6f67e1bd 100644 --- a/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py +++ b/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py @@ -1,27 +1,31 @@ # Copyright (c) OpenMMLab. All rights reserved. +import copy + +import torch + from mmdeploy.core import FUNCTION_REWRITER +from mmdeploy.utils import is_dynamic_shape @FUNCTION_REWRITER.register_rewriter( 'mmdet.models.detectors.panoptic_two_stage_segmentor.' - 'TwoStagePanopticSegmentor.simple_test') -def two_stage_panoptic_segmentor__simple_test(ctx, - self, - img, - img_metas, - proposals=None, - **kwargs): - """Rewrite `simple_test` for default backend. - Support configured dynamic/static shape for model input and return - detection result as Tensor instead of numpy array. + 'TwoStagePanopticSegmentor.forward') +def two_stage_panoptic_segmentor__forward(self, + batch_inputs, + data_samples, + mode='tensor', + **kwargs): + """Rewrite `forward` for default backend. Support configured dynamic/static + shape for model input and return detection result as Tensor instead of + numpy array. + Args: - ctx (ContextCaller): The context with additional information. - self: The instance of the original class. - img (Tensor | List[Tensor]): Input image tensor(s). - img_meta (list[dict]): Dict containing image's meta information - such as `img_shape`. - proposals (List[Tensor]): Region proposals. - Default is None. + batch_inputs (Tensor): Inputs with shape (N, C, H, W). + batch_data_samples (List[:obj:`DetDataSample`]): The Data + Samples. It usually includes information such as + `gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`. + rescale (bool): Whether to rescale the results. + Defaults to True. Returns: tuple[Tensor, Tensor, Tensor, Tensor]: @@ -29,13 +33,31 @@ def two_stage_panoptic_segmentor__simple_test(ctx, `labels` of shape [N, num_det], `masks` of shape [N, roi_H, roi_W], `semseg` of shape [N, num_sem_class, sem_H, sem_W]. """ - assert self.with_bbox, 'Bbox head must be implemented.' - x = self.extract_feat(img) - if proposals is None: - proposals, _ = self.rpn_head.simple_test_rpn(x, img_metas) + ctx = FUNCTION_REWRITER.get_context() + data_samples = copy.deepcopy(data_samples) + deploy_cfg = ctx.cfg + + # get origin input shape as tensor to support onnx dynamic shape + is_dynamic_flag = is_dynamic_shape(deploy_cfg) + img_shape = torch._shape_as_tensor(batch_inputs)[2:] + if not is_dynamic_flag: + img_shape = [int(val) for val in img_shape] + # set the metainfo + # note that we can not use `set_metainfo`, deepcopy would crash the + # onnx trace. + for data_sample in data_samples: + data_sample.set_field( + name='img_shape', value=img_shape, field_type='metainfo') + data_sample.set_field( + name='batch_input_shape', value=img_shape, field_type='metainfo') + + img_metas = [data_samples.metainfo for data_samples in data_samples] + x = self.extract_feat(batch_inputs) + proposals = self.rpn_head.predict(x, data_samples, rescale=False) - bboxes, labels, masks = self.roi_head.simple_test( - x, proposals, img_metas, rescale=False) + bboxes, labels, masks = self.roi_head.predict( + x, proposals, data_samples, rescale=False) - semseg = self.semantic_head.simple_test(x, img_metas, rescale=False) + semseg = self.semantic_head.predict(x, img_metas, rescale=False) + # do not export panoptic_fusion_head return bboxes, labels, masks, semseg diff --git a/mmdeploy/codebase/mmdet/models/seg_heads/__init__.py b/mmdeploy/codebase/mmdet/models/seg_heads/__init__.py index 222a0c876a..db03f1f20c 100644 --- a/mmdeploy/codebase/mmdet/models/seg_heads/__init__.py +++ b/mmdeploy/codebase/mmdet/models/seg_heads/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .base_semantic_head import base_semantic_head__simple_test +from . import base_semantic_head -__all__ = ['base_semantic_head__simple_test'] +__all__ = ['base_semantic_head'] diff --git a/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py b/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py index 1d09c44ebe..6b036f78d5 100644 --- a/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py +++ b/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py @@ -5,25 +5,25 @@ @FUNCTION_REWRITER.register_rewriter( - 'mmdet.models.seg_heads.base_semantic_head.BaseSemanticHead.simple_test') -def base_semantic_head__simple_test(ctx, self, x, img_metas, **kwargs): - """Rewrite `simple_test` for default backend. - Support configured dynamic/static shape for model input and return - semantic-segmentation result as Tensor instead of numpy array. + 'mmdet.models.seg_heads.base_semantic_head.BaseSemanticHead.predict') +def base_semantic_head__predict(self, x, batch_img_metas, rescale=False): + """Rewrite `predict` for default backend. Support configured dynamic/static + shape for model input and return semantic-segmentation result as Tensor + instead of numpy array. + Args: - ctx (ContextCaller): The context with additional information. - self: The instance of the original class. - img (Tensor | List[Tensor]): Input image tensor(s). - img_meta (list[dict]): Dict containing image's meta information - such as `img_shape`. + x (Union[Tensor, Tuple[Tensor]]): Feature maps. + batch_img_metas (List[dict]): List of image information. + rescale (bool): Whether to rescale the results. + Defaults to False. Returns: Tensor: `semseg` of shape [N, num_sem_class, H, W] """ - output = self.forward(x) - seg_preds = output['seg_preds'] - - h, w = img_metas[0]['img_shape'][:2] + seg_preds = self.forward(x)['seg_preds'] seg_preds = F.interpolate( - seg_preds, size=(h, w), mode='bilinear', align_corners=False) + seg_preds, + size=batch_img_metas[0]['batch_input_shape'], + mode='bilinear', + align_corners=False) return seg_preds From 79a51f5c5a8ad4f77453bd3884613b6e6fe8a376 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Fri, 28 Jul 2023 16:16:46 +0800 Subject: [PATCH 05/22] remove interpolate --- .../mmdet/deploy/object_detection_model.py | 31 +++++++++++-------- .../models/seg_heads/base_semantic_head.py | 11 +++---- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py index 9498251c35..08f01e107b 100644 --- a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py +++ b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py @@ -295,19 +295,24 @@ def forward(self, head_args = self.model_cfg.model.panoptic_fusion_head.copy() head_args['test_cfg'] = self.model_cfg.model.test_cfg.panoptic fusion_head = obj_dict[head_args.pop('type')](**head_args) - if rescale: - seg_pred_list = [] - for i in range(batch_size): - h, w = img_metas[i]['img_shape'] - seg_pred = batch_semseg[i][:, :h, :w] - h, w = img_metas[i]['ori_shape'] - seg_pred = F.interpolate( - seg_pred[None], - size=(h, w), - mode='bilinear', - align_corners=False)[0] - seg_pred_list.append(seg_pred) - batch_semseg = seg_pred_list + seg_pred_list = [] + for i in range(batch_size): + h, w = img_metas[i]['batch_input_shape'] + seg_pred = F.interpolate( + batch_semseg[i][None], + size=(h, w), + mode='bilinear', + align_corners=False)[0] + h, w = img_metas[i]['img_shape'] + seg_pred = seg_pred[:, :h, :w] + h, w = img_metas[i]['ori_shape'] + seg_pred = F.interpolate( + seg_pred[None], + size=(h, w), + mode='bilinear', + align_corners=False)[0] + seg_pred_list.append(seg_pred) + batch_semseg = seg_pred_list masks_results = [ds.pred_instances for ds in data_samples] semseg_results = fusion_head.predict(masks_results, batch_semseg) diff --git a/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py b/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py index 6b036f78d5..83778f2fbe 100644 --- a/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py +++ b/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py @@ -1,5 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -import torch.nn.functional as F from mmdeploy.core import FUNCTION_REWRITER @@ -21,9 +20,9 @@ def base_semantic_head__predict(self, x, batch_img_metas, rescale=False): Tensor: `semseg` of shape [N, num_sem_class, H, W] """ seg_preds = self.forward(x)['seg_preds'] - seg_preds = F.interpolate( - seg_preds, - size=batch_img_metas[0]['batch_input_shape'], - mode='bilinear', - align_corners=False) + # seg_preds = F.interpolate( + # seg_preds, + # size=batch_img_metas[0]['batch_input_shape'], + # mode='bilinear', + # align_corners=False) return seg_preds From aa94ec71fcbb11ac727221bfc3ef7a341a23ee3e Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Fri, 28 Jul 2023 16:25:03 +0800 Subject: [PATCH 06/22] update --- configs/mmdet/_base_/base_panoptic-seg_dynamic.py | 4 ---- mmdeploy/codebase/mmdet/deploy/object_detection_model.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/configs/mmdet/_base_/base_panoptic-seg_dynamic.py b/configs/mmdet/_base_/base_panoptic-seg_dynamic.py index 9859fc7942..c0e9244e9e 100644 --- a/configs/mmdet/_base_/base_panoptic-seg_dynamic.py +++ b/configs/mmdet/_base_/base_panoptic-seg_dynamic.py @@ -17,12 +17,8 @@ 'masks': { 0: 'batch', 1: 'num_dets', - 2: 'height', - 3: 'width' }, 'semseg': { 0: 'batch', - 2: 'height', - 3: 'width' }, }) diff --git a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py index 08f01e107b..bcc88d1243 100644 --- a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py +++ b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py @@ -310,7 +310,7 @@ def forward(self, seg_pred[None], size=(h, w), mode='bilinear', - align_corners=False)[0] + align_corners=False)[0].cpu() seg_pred_list.append(seg_pred) batch_semseg = seg_pred_list From d48eca5b01a158602b420bcebe7830ea045ce781 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Tue, 1 Aug 2023 15:26:51 +0800 Subject: [PATCH 07/22] support panoptic-fpn mask2former maskformer --- .../mmdet/_base_/base_panoptic-seg_static.py | 18 +- ..._maskformer_onnxruntime_static-800x1344.py | 8 + ...eg_maskformer_tensorrt_static-800x1344.py} | 10 +- .../panoptic-seg_onnxruntime_dynamic.py | 4 - .../panoptic-seg_onnxruntime_static.py | 5 - ...c-seg_panoptic-fpn_onnxruntime_dynamic.py} | 10 +- ...-fpn_tensorrt_dynamic-352x512-1344x1344.py | 41 +++ docs/en/04-supported-codebases/mmdet.md | 3 + docs/zh_cn/04-supported-codebases/mmdet.md | 3 + .../mmdet/deploy/object_detection_model.py | 265 +++++++++++++----- .../mmdet/models/detectors/__init__.py | 6 +- .../mmdet/models/detectors/maskformer.py | 57 ++++ tests/regression/mmdet.yml | 36 +++ 13 files changed, 373 insertions(+), 93 deletions(-) create mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py rename configs/mmdet/panoptic-seg/{panoptic-seg_tensorrt_dynamic-352x512-1344x1344.py => panoptic-seg_maskformer_tensorrt_static-800x1344.py} (51%) delete mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_dynamic.py delete mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_static.py rename configs/mmdet/{_base_/base_panoptic-seg_dynamic.py => panoptic-seg/panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py} (69%) create mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_tensorrt_dynamic-352x512-1344x1344.py create mode 100644 mmdeploy/codebase/mmdet/models/detectors/maskformer.py diff --git a/configs/mmdet/_base_/base_panoptic-seg_static.py b/configs/mmdet/_base_/base_panoptic-seg_static.py index d42442b54d..aeace5065b 100644 --- a/configs/mmdet/_base_/base_panoptic-seg_static.py +++ b/configs/mmdet/_base_/base_panoptic-seg_static.py @@ -1,4 +1,16 @@ -_base_ = ['./base_static.py'] +_base_ = ['../../_base_/onnx_config.py'] -onnx_config = dict(output_names=['dets', 'labels', 'masks', 'semseg']) -codebase_config = dict(post_processing=dict(export_postprocess_mask=False)) +codebase_config = dict( + type='mmdet', + task='ObjectDetection', + model_type='panoptic_end2end', + post_processing=dict( + export_postprocess_mask=False, + score_threshold=0.05, + confidence_threshold=0.005, # for YOLOv3 + iou_threshold=0.5, + max_output_boxes_per_class=200, + pre_top_k=5000, + keep_top_k=100, + background_label_id=-1, + )) diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py new file mode 100644 index 0000000000..5ee9077731 --- /dev/null +++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/base_panoptic-seg_static.py', + '../../_base_/backends/onnxruntime.py' +] +onnx_config = dict( + opset_version=13, + output_names=['cls_logits', 'mask_logits'], + input_shape=[1344, 800]) diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_tensorrt_dynamic-352x512-1344x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py similarity index 51% rename from configs/mmdet/panoptic-seg/panoptic-seg_tensorrt_dynamic-352x512-1344x1344.py rename to configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py index 42e243f349..8cfe30734a 100644 --- a/configs/mmdet/panoptic-seg/panoptic-seg_tensorrt_dynamic-352x512-1344x1344.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py @@ -1,7 +1,11 @@ _base_ = [ - '../_base_/base_panoptic-seg_dynamic.py', + '../_base_/base_panoptic-seg_static.py', '../../_base_/backends/tensorrt.py' ] +onnx_config = dict( + opset_version=13, + output_names=['cls_logits', 'mask_logits'], + input_shape=[1344, 800]) backend_config = dict( common_config=dict(max_workspace_size=1 << 30), @@ -9,7 +13,7 @@ dict( input_shapes=dict( input=dict( - min_shape=[1, 3, 352, 512], + min_shape=[1, 3, 800, 1344], opt_shape=[1, 3, 800, 1344], - max_shape=[1, 3, 1344, 1344]))) + max_shape=[1, 3, 800, 1344]))) ]) diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_dynamic.py b/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_dynamic.py deleted file mode 100644 index 990d4e8fc9..0000000000 --- a/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_dynamic.py +++ /dev/null @@ -1,4 +0,0 @@ -_base_ = [ - '../_base_/base_panoptic-seg_dynamic.py', - '../../_base_/backends/onnxruntime.py' -] diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_static.py b/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_static.py deleted file mode 100644 index d4ff118f2e..0000000000 --- a/configs/mmdet/panoptic-seg/panoptic-seg_onnxruntime_static.py +++ /dev/null @@ -1,5 +0,0 @@ -_base_ = [ - '../_base_/base_panoptic-seg_static.py', - '../../_base_/backends/onnxruntime.py' -] -onnx_config = dict(input_shape=[1280, 800]) diff --git a/configs/mmdet/_base_/base_panoptic-seg_dynamic.py b/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py similarity index 69% rename from configs/mmdet/_base_/base_panoptic-seg_dynamic.py rename to configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py index c0e9244e9e..637a906f39 100644 --- a/configs/mmdet/_base_/base_panoptic-seg_dynamic.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py @@ -1,5 +1,10 @@ -_base_ = ['./base_panoptic-seg_static.py'] +_base_ = [ + '../_base_/base_panoptic-seg_static.py', + '../../_base_/backends/onnxruntime.py' +] onnx_config = dict( + input_shape=[1344, 800], + output_names=['dets', 'labels', 'masks', 'semseg'], dynamic_axes={ 'input': { 0: 'batch', @@ -21,4 +26,5 @@ 'semseg': { 0: 'batch', }, - }) + }, +) diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_tensorrt_dynamic-352x512-1344x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_tensorrt_dynamic-352x512-1344x1344.py new file mode 100644 index 0000000000..87c43d1b93 --- /dev/null +++ b/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_tensorrt_dynamic-352x512-1344x1344.py @@ -0,0 +1,41 @@ +_base_ = [ + '../_base_/base_panoptic-seg_static.py', + '../../_base_/backends/tensorrt.py' +] +onnx_config = dict( + input_shape=[1344, 800], + output_names=['dets', 'labels', 'masks', 'semseg'], + dynamic_axes={ + 'input': { + 0: 'batch', + 2: 'height', + 3: 'width' + }, + 'dets': { + 0: 'batch', + 1: 'num_dets', + }, + 'labels': { + 0: 'batch', + 1: 'num_dets', + }, + 'masks': { + 0: 'batch', + 1: 'num_dets', + }, + 'semseg': { + 0: 'batch', + }, + }, +) + +backend_config = dict( + common_config=dict(max_workspace_size=1 << 30), + model_inputs=[ + dict( + input_shapes=dict( + input=dict( + min_shape=[1, 3, 352, 512], + opt_shape=[1, 3, 800, 1344], + max_shape=[1, 3, 1344, 1344]))) + ]) diff --git a/docs/en/04-supported-codebases/mmdet.md b/docs/en/04-supported-codebases/mmdet.md index 847c9311a4..45631bb15e 100644 --- a/docs/en/04-supported-codebases/mmdet.md +++ b/docs/en/04-supported-codebases/mmdet.md @@ -213,3 +213,6 @@ Besides python API, mmdeploy SDK also provides other FFI (Foreign Function Inter | [Swin Transformer](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/swin) | Instance Segmentation | Y | Y | N | N | Y | | [SOLO](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solo) | Instance Segmentation | Y | N | N | N | Y | | [SOLOv2](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solov2) | Instance Segmentation | Y | N | N | N | Y | +| [Panoptic FPN](https://github.com/open-mmlab/mmdetection/tree/main/configs/panoptic_fpn) | Panoptic Segmentation | Y | Y | N | N | N | +| [MaskFormer](https://github.com/open-mmlab/mmdetection/tree/main/configs/maskformer) | Panoptic Segmentation | Y | Y | N | N | N | +| [Mask2Former](https://github.com/open-mmlab/mmdetection/tree/main/configs/mask2former) | Panoptic Segmentation | Y | Y | N | N | N | diff --git a/docs/zh_cn/04-supported-codebases/mmdet.md b/docs/zh_cn/04-supported-codebases/mmdet.md index 4a42b38ca8..da3ce82ce4 100644 --- a/docs/zh_cn/04-supported-codebases/mmdet.md +++ b/docs/zh_cn/04-supported-codebases/mmdet.md @@ -216,3 +216,6 @@ cv2.imwrite('output_detection.png', img) | [Swin Transformer](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/swin) | Instance Segmentation | Y | Y | N | N | Y | | [SOLO](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solo) | Instance Segmentation | Y | N | N | N | Y | | [SOLOv2](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solov2) | Instance Segmentation | Y | N | N | N | Y | +| [Panoptic FPN](https://github.com/open-mmlab/mmdetection/tree/main/configs/panoptic_fpn) | Panoptic Segmentation | Y | Y | N | N | N | +| [MaskFormer](https://github.com/open-mmlab/mmdetection/tree/main/configs/maskformer) | Panoptic Segmentation | Y | Y | N | N | N | +| [Mask2Former](https://github.com/open-mmlab/mmdetection/tree/main/configs/mask2former) | Panoptic Segmentation | Y | Y | N | N | N | diff --git a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py index bcc88d1243..9b4e4c2097 100644 --- a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py +++ b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py @@ -93,7 +93,6 @@ def __clear_outputs( num_outputs = len(test_outputs) has_inst_seg = num_outputs >= 3 - has_sem_seg = num_outputs == 4 outputs = [[None for _ in range(batch_size)] for _ in range(num_outputs)] @@ -103,8 +102,6 @@ def __clear_outputs( outputs[1][i] = test_outputs[1][i, inds, ...] if has_inst_seg: outputs[2][i] = test_outputs[2][i, inds, ...] - if has_sem_seg: - outputs[3][i] = test_outputs[3][i] return outputs @staticmethod @@ -177,41 +174,26 @@ def postprocessing_masks(det_bboxes: Union[np.ndarray, Tensor], result_masks = torch.cat(result_masks, 1) return result_masks.squeeze(0) - def forward(self, - inputs: torch.Tensor, - data_samples: Optional[List[BaseDataElement]] = None, - mode: str = 'predict', - **kwargs) -> Any: - """The model forward. - - Args: - inputs (torch.Tensor): The input tensors - data_samples (List[BaseDataElement], optional): The data samples. - Defaults to None. - mode (str, optional): forward mode, only support `predict`. - - Returns: - Any: Model output. - """ - assert mode == 'predict', 'Deploy model only allow mode=="predict".' - inputs = inputs.contiguous() - outputs = self.predict(inputs) - outputs = End2EndModel.__clear_outputs(outputs) - batch_dets, batch_labels = outputs[:2] - batch_masks = outputs[2] if len(outputs) >= 3 else None - batch_size = inputs.shape[0] + def postprocessing_results(self, + batch_dets: torch.Tensor, + batch_labels: torch.Tensor, + batch_masks: torch.Tensor, + data_samples: List[BaseDataElement], + rescale: bool = True): + """Post-processing dets, labels, masks.""" + batch_size = len(batch_dets) img_metas = [data_sample.metainfo for data_sample in data_samples] - rescale = kwargs.get('rescale', True) model_type = self.model_cfg.model.type if \ self.model_cfg is not None else None for i in range(batch_size): dets, labels = batch_dets[i], batch_labels[i] - result = InstanceData() + pred_instances = InstanceData() bboxes = dets[:, :4] scores = dets[:, 4] - # perform rescale - if rescale and 'scale_factor' in img_metas[i]: + scale_factor = bboxes.new_ones(1, 4) + # get scale_factor + if 'scale_factor' in img_metas[i]: scale_factor = img_metas[i]['scale_factor'] if isinstance(scale_factor, (list, tuple, np.ndarray)): if len(scale_factor) == 2: @@ -220,6 +202,7 @@ def forward(self, [scale_factor, scale_factor]) scale_factor = np.array(scale_factor)[None, :] # [1,4] scale_factor = torch.from_numpy(scale_factor).to(dets) + if rescale: bboxes /= scale_factor # Most of models in mmdetection 3.x use `pad_param`, but some @@ -233,19 +216,17 @@ def forward(self, elif 'border' in img_metas[i]: pad_key = 'border' if pad_key is not None: - scale_factor = img_metas[i].get('scale_factor', - np.array([1., 1.])) x_off = img_metas[i][pad_key][2] / scale_factor[1] y_off = img_metas[i][pad_key][0] / scale_factor[0] bboxes[:, ::2] -= x_off bboxes[:, 1::2] -= y_off bboxes *= (bboxes > 0) - result.scores = scores - result.bboxes = bboxes + pred_instances.scores = scores + pred_instances.bboxes = bboxes if model_type in ['SOLO', 'SOLOv2']: - result.bboxes = bboxes.new_zeros(bboxes.shape) - result.labels = labels + pred_instances.bboxes = bboxes.new_zeros(bboxes.shape) + pred_instances.labels = labels if batch_masks is not None: masks = batch_masks[i] @@ -253,7 +234,6 @@ def forward(self, ori_h, ori_w = img_metas[i]['ori_shape'][:2] export_postprocess_mask = False if self.deploy_cfg is not None: - mmdet_deploy_cfg = get_post_processing_params( self.deploy_cfg) # this flag enable postprocess when export. @@ -269,34 +249,137 @@ def forward(self, masks = F.interpolate( masks.unsqueeze(0), size=[ - math.ceil(masks.shape[-2] / - img_metas[i]['scale_factor'][0]), - math.ceil(masks.shape[-1] / - img_metas[i]['scale_factor'][1]) + math.ceil(masks.shape[-2] / scale_factor[0]), + math.ceil(masks.shape[-1] / scale_factor[1]) ])[..., :ori_h, :ori_w] masks = masks.squeeze(0) if masks.dtype != bool: masks = masks >= 0.5 # aligned with mmdet to easily convert to numpy masks = masks.cpu() - result.masks = masks - - data_samples[i].pred_instances = result - - # deal with panoptic seg - batch_semseg = outputs[3] if len(outputs) == 4 else None - if batch_semseg is not None: - from mmdet.models.seg_heads import (HeuristicFusionHead, - MaskFormerFusionHead) - obj_dict = { - 'HeuristicFusionHead': HeuristicFusionHead, - 'MaskFormerFusionHead': MaskFormerFusionHead - } - head_args = self.model_cfg.model.panoptic_fusion_head.copy() - head_args['test_cfg'] = self.model_cfg.model.test_cfg.panoptic - fusion_head = obj_dict[head_args.pop('type')](**head_args) + pred_instances.masks = masks + + data_samples[i].pred_instances = pred_instances + + def forward(self, + inputs: torch.Tensor, + data_samples: List[BaseDataElement], + mode: str = 'predict', + **kwargs) -> Any: + """The model forward. + + Args: + inputs (torch.Tensor): The input tensors + data_samples (List[BaseDataElement]): The data samples. + Defaults to None. + mode (str, optional): forward mode, only support `predict`. + + Returns: + Any: Model output. + """ + assert mode == 'predict', 'Deploy model only allow mode=="predict".' + inputs = inputs.contiguous() + outputs = self.predict(inputs) + outputs = End2EndModel.__clear_outputs(outputs) + batch_dets, batch_labels = outputs[:2] + batch_masks = outputs[2] if len(outputs) >= 3 else None + self.postprocessing_results(batch_dets, batch_labels, batch_masks, + data_samples) + return data_samples + + def predict(self, imgs: Tensor) -> Tuple[np.ndarray, np.ndarray]: + """The interface for predict. + + Args: + imgs (Tensor): Input image(s) in [N x C x H x W] format. + + Returns: + tuple[np.ndarray, np.ndarray]: dets of shape [N, num_det, 5] + and class labels of shape [N, num_det]. + """ + outputs = self.wrapper({self.input_name: imgs}) + outputs = self.wrapper.output_to_list(outputs) + return outputs + + +@__BACKEND_MODEL.register_module('panoptic_end2end') +class PanOpticEnd2EndModel(End2EndModel): + """End to end model for inference of PanOptic Segmentation. + + Args: + backend (Backend): The backend enum, specifying backend type. + backend_files (Sequence[str]): Paths to all required backend files + (e.g. '.onnx' for ONNX Runtime, '.param' and '.bin' for ncnn). + device (str): A string specifying device type. + deploy_cfg (str|Config): Deployment config file or loaded Config + object. + data_preprocessor (dict|nn.Module): The data preprocessor. + """ + + def __init__(self, + backend: Backend, + backend_files: Sequence[str], + device: str, + deploy_cfg: Union[str, Config], + model_cfg: Optional[Union[str, Config]] = None, + data_preprocessor: Optional[Union[dict, nn.Module]] = None, + **kwargs): + super(PanOpticEnd2EndModel, self).__init__( + backend, + backend_files, + device, + deploy_cfg, + model_cfg=model_cfg, + data_preprocessor=data_preprocessor, + **kwargs) + from mmdet.models.seg_heads import (HeuristicFusionHead, + MaskFormerFusionHead) + obj_dict = { + 'HeuristicFusionHead': HeuristicFusionHead, + 'MaskFormerFusionHead': MaskFormerFusionHead + } + head_args = self.model_cfg.model.panoptic_fusion_head.copy() + test_cfg = self.model_cfg.model.test_cfg + # deal with PanopticFPN + if 'panoptic' in test_cfg: + test_cfg = test_cfg['panoptic'] + head_args['test_cfg'] = test_cfg + self.fusion_head_type = head_args.pop('type') + self.fusion_head = obj_dict[self.fusion_head_type](**head_args) + + def forward(self, + inputs: torch.Tensor, + data_samples: List[BaseDataElement], + mode: str = 'predict', + **kwargs) -> Any: + """The model forward. + + Args: + inputs (torch.Tensor): The input tensors + data_samples (List[BaseDataElement], optional): The data samples. + Defaults to None. + mode (str, optional): forward mode, only support `predict`. + + Returns: + Any: Model output. + """ + assert mode == 'predict', 'Deploy model only allow mode=="predict".' + model_type = self.model_cfg.model.type + + inputs = inputs.contiguous() + outputs = self.predict(inputs) + rescale = kwargs.get('rescale', True) + + if model_type == 'PanopticFPN': + batch_dets, batch_labels, batch_masks = outputs[:3] + batch_semseg = outputs[3] + self.postprocessing_results(batch_dets, batch_labels, batch_masks, + data_samples) + masks_results = [ds.pred_instances for ds in data_samples] + img_metas = [data_sample.metainfo for data_sample in data_samples] seg_pred_list = [] - for i in range(batch_size): + for i in range(len(data_samples)): + # do resize in base_semantic_head h, w = img_metas[i]['batch_input_shape'] seg_pred = F.interpolate( batch_semseg[i][None], @@ -312,28 +395,64 @@ def forward(self, mode='bilinear', align_corners=False)[0].cpu() seg_pred_list.append(seg_pred) - batch_semseg = seg_pred_list - - masks_results = [ds.pred_instances for ds in data_samples] - semseg_results = fusion_head.predict(masks_results, batch_semseg) - for ds, pred_panoptic_seg in zip(data_samples, semseg_results): - ds.pred_panoptic_seg = pred_panoptic_seg - + semseg_results = self.fusion_head.predict(masks_results, + seg_pred_list) + results_list = [dict(pan_results=res) for res in semseg_results] + elif model_type in ['MaskFormer', 'Mask2Former']: + batch_cls_logits = outputs[0] + batch_mask_logits = outputs[1] + + results_list = self.fusion_head.predict( + batch_cls_logits, + batch_mask_logits, + data_samples, + rescale=rescale) + + data_samples = self.add_pred_to_datasample(data_samples, results_list) return data_samples - def predict(self, imgs: Tensor) -> Tuple[np.ndarray, np.ndarray]: - """The interface for predict. + @staticmethod + def add_pred_to_datasample( + data_samples: List[BaseDataElement], + results_list: List[dict]) -> List[BaseDataElement]: + """Add predictions to `DetDataSample`. Args: - imgs (Tensor): Input image(s) in [N x C x H x W] format. + data_samples (list[:obj:`DetDataSample`], optional): A batch of + data samples that contain annotations and predictions. + results_list (List[dict]): Instance segmentation, segmantic + segmentation and panoptic segmentation results. Returns: - tuple[np.ndarray, np.ndarray]: dets of shape [N, num_det, 5] - and class labels of shape [N, num_det]. + list[:obj:`DetDataSample`]: Detection results of the + input images. Each DetDataSample usually contain + 'pred_instances' and `pred_panoptic_seg`. And the + ``pred_instances`` usually contains following keys. + + - scores (Tensor): Classification scores, has a shape + (num_instance, ) + - labels (Tensor): Labels of bboxes, has a shape + (num_instances, ). + - bboxes (Tensor): Has a shape (num_instances, 4), + the last dimension 4 arrange as (x1, y1, x2, y2). + - masks (Tensor): Has a shape (num_instances, H, W). + + And the ``pred_panoptic_seg`` contains the following key + + - sem_seg (Tensor): panoptic segmentation mask, has a + shape (1, h, w). """ - outputs = self.wrapper({self.input_name: imgs}) - outputs = self.wrapper.output_to_list(outputs) - return outputs + for data_sample, pred_results in zip(data_samples, results_list): + if 'pan_results' in pred_results: + data_sample.pred_panoptic_seg = pred_results['pan_results'] + + if 'ins_results' in pred_results: + data_sample.pred_instances = pred_results['ins_results'] + + assert 'sem_results' not in pred_results, 'segmantic ' \ + 'segmentation results are not supported yet.' + + return data_samples @__BACKEND_MODEL.register_module('single_stage') diff --git a/mmdeploy/codebase/mmdet/models/detectors/__init__.py b/mmdeploy/codebase/mmdet/models/detectors/__init__.py index f424226b15..460694aa72 100644 --- a/mmdeploy/codebase/mmdet/models/detectors/__init__.py +++ b/mmdeploy/codebase/mmdet/models/detectors/__init__.py @@ -1,8 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. -from . import (base_detr, panoptic_two_stage_segmentor, single_stage, - single_stage_instance_seg, two_stage) +from . import (base_detr, maskformer, panoptic_two_stage_segmentor, + single_stage, single_stage_instance_seg, two_stage) __all__ = [ 'base_detr', 'single_stage', 'single_stage_instance_seg', 'two_stage', - 'panoptic_two_stage_segmentor' + 'panoptic_two_stage_segmentor', 'maskformer' ] diff --git a/mmdeploy/codebase/mmdet/models/detectors/maskformer.py b/mmdeploy/codebase/mmdet/models/detectors/maskformer.py new file mode 100644 index 0000000000..f12b31ec43 --- /dev/null +++ b/mmdeploy/codebase/mmdet/models/detectors/maskformer.py @@ -0,0 +1,57 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy + +import torch + +from mmdeploy.core import FUNCTION_REWRITER +from mmdeploy.utils import is_dynamic_shape + + +@FUNCTION_REWRITER.register_rewriter('mmdet.models.detectors.maskformer.' + 'MaskFormer.forward') +def maskformer__forward(self, + batch_inputs, + data_samples, + mode='tensor', + **kwargs): + """Rewrite `forward` for default backend. Support configured dynamic/static + shape for model input and return detection result as Tensor instead of + numpy array. + + Args: + batch_inputs (Tensor): Inputs with shape (N, C, H, W). + batch_data_samples (List[:obj:`DetDataSample`]): The Data + Samples. It usually includes information such as + `gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`. + rescale (bool): Whether to rescale the results. + Defaults to True. + + Returns: + tuple[Tensor, Tensor, Tensor, Tensor]: + (bboxes, labels, masks, semseg), `bboxes` of shape [N, num_det, 5], + `labels` of shape [N, num_det], `masks` of shape [N, roi_H, roi_W], + `semseg` of shape [N, num_sem_class, sem_H, sem_W]. + """ + ctx = FUNCTION_REWRITER.get_context() + data_samples = copy.deepcopy(data_samples) + deploy_cfg = ctx.cfg + + # get origin input shape as tensor to support onnx dynamic shape + is_dynamic_flag = is_dynamic_shape(deploy_cfg) + img_shape = torch._shape_as_tensor(batch_inputs)[2:] + if not is_dynamic_flag: + img_shape = [int(val) for val in img_shape] + # set the metainfo + # note that we can not use `set_metainfo`, deepcopy would crash the + # onnx trace. + for data_sample in data_samples: + data_sample.set_field( + name='img_shape', value=img_shape, field_type='metainfo') + data_sample.set_field( + name='batch_input_shape', value=img_shape, field_type='metainfo') + + feats = self.extract_feat(batch_inputs) + mask_cls_results, mask_pred_results = self.panoptic_head.predict( + feats, data_samples) + # do not export panoptic_fusion_head + return mask_cls_results, mask_pred_results diff --git a/tests/regression/mmdet.yml b/tests/regression/mmdet.yml index 7e35e8a1b6..1d5aa9ea5e 100644 --- a/tests/regression/mmdet.yml +++ b/tests/regression/mmdet.yml @@ -381,3 +381,39 @@ models: - *pipeline_seg_ort_dynamic_fp32 - *pipeline_seg_trt_dynamic_fp32 - *pipeline_seg_openvino_dynamic_fp32 + + - name: PanopticFPN + metafile: configs/panoptic_fpn/metafile.yml + model_configs: + - configs/panoptic_fpn/panoptic-fpn_r50_fpn_1x_coco.py + pipelines: + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py + convert_image: *convert_image + backend_test: False + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_tensorrt_dynamic-352x512-1344x1344.py + convert_image: *convert_image + backend_test: True + + - name: MaskFormer + metafile: configs/maskformer/metafile.yml + model_configs: + - configs/maskformer/maskformer_r50_ms-16xb1-75e_coco.py + pipelines: + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py + convert_image: *convert_image + backend_test: False + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py + convert_image: *convert_image + backend_test: False + + - name: Mask2Former + metafile: configs/mask2former/metafile.yml + model_configs: + - configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py + pipelines: + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py + convert_image: *convert_image + backend_test: False + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py + convert_image: *convert_image + backend_test: False From 931192770a3f0a872dfe3b5a443a536b3f08a21f Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Wed, 2 Aug 2023 10:55:31 +0800 Subject: [PATCH 08/22] update --- mmdeploy/apis/onnx/export.py | 9 ++++++++- .../codebase/mmdet/deploy/object_detection_model.py | 13 +++---------- .../mmdet/models/seg_heads/base_semantic_head.py | 13 ++++++++----- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/mmdeploy/apis/onnx/export.py b/mmdeploy/apis/onnx/export.py index 92a9002d8d..1b788e842d 100644 --- a/mmdeploy/apis/onnx/export.py +++ b/mmdeploy/apis/onnx/export.py @@ -127,7 +127,14 @@ def wrapper(*arg, **kwargs): patched_model.forward = wrap_forward(patched_model.forward) patched_model.forward = partial(patched_model.forward, **input_metas) - + # force to export on cpu + patched_model = patched_model.cpu() + if isinstance(args, torch.Tensor): + args = args.cpu() + elif isinstance(args, (tuple, list)): + args = [_.cpu() for _ in args] + else: + raise RuntimeError(f'Not supported args: {args}') torch.onnx.export( patched_model, args, diff --git a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py index 9b4e4c2097..17b3a35f1c 100644 --- a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py +++ b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py @@ -256,7 +256,7 @@ def postprocessing_results(self, if masks.dtype != bool: masks = masks >= 0.5 # aligned with mmdet to easily convert to numpy - masks = masks.cpu() + # masks = masks.cpu() pred_instances.masks = masks data_samples[i].pred_instances = pred_instances @@ -379,21 +379,14 @@ def forward(self, img_metas = [data_sample.metainfo for data_sample in data_samples] seg_pred_list = [] for i in range(len(data_samples)): - # do resize in base_semantic_head - h, w = img_metas[i]['batch_input_shape'] - seg_pred = F.interpolate( - batch_semseg[i][None], - size=(h, w), - mode='bilinear', - align_corners=False)[0] h, w = img_metas[i]['img_shape'] - seg_pred = seg_pred[:, :h, :w] + seg_pred = batch_semseg[i][:, :h, :w] h, w = img_metas[i]['ori_shape'] seg_pred = F.interpolate( seg_pred[None], size=(h, w), mode='bilinear', - align_corners=False)[0].cpu() + align_corners=False)[0] seg_pred_list.append(seg_pred) semseg_results = self.fusion_head.predict(masks_results, seg_pred_list) diff --git a/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py b/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py index 83778f2fbe..8ac2982812 100644 --- a/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py +++ b/mmdeploy/codebase/mmdet/models/seg_heads/base_semantic_head.py @@ -1,5 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. +import torch.nn.functional as F + from mmdeploy.core import FUNCTION_REWRITER @@ -20,9 +22,10 @@ def base_semantic_head__predict(self, x, batch_img_metas, rescale=False): Tensor: `semseg` of shape [N, num_sem_class, H, W] """ seg_preds = self.forward(x)['seg_preds'] - # seg_preds = F.interpolate( - # seg_preds, - # size=batch_img_metas[0]['batch_input_shape'], - # mode='bilinear', - # align_corners=False) + img_shape = batch_img_metas[0]['batch_input_shape'] + seg_preds = F.interpolate( + seg_preds, + size=(img_shape[0], img_shape[1]), + mode='bilinear', + align_corners=False) return seg_preds From 3f06043c3983d976b150884cc98830134211db76 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Wed, 2 Aug 2023 18:01:47 +0800 Subject: [PATCH 09/22] support dynamic --- ...ptic-seg_maskformer_onnxruntime_dynamic.py | 25 ++++ ...rmer_tensorrt_dynamic-352x512-1344x1344.py | 19 +++ .../models/dense_heads/mask2former_head.py | 94 +++++++++++++ .../codebase/mmdet/models/layers/__init__.py | 1 + .../layers/msdeformattn_pixel_decoder.py | 125 ++++++++++++++++++ mmdeploy/mmcv/ops/multi_scale_deform_attn.py | 65 ++++++++- 6 files changed, 328 insertions(+), 1 deletion(-) create mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py create mode 100644 configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-352x512-1344x1344.py create mode 100644 mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py create mode 100644 mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py new file mode 100644 index 0000000000..72d8af2aec --- /dev/null +++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py @@ -0,0 +1,25 @@ +_base_ = [ + '../_base_/base_panoptic-seg_static.py', + '../../_base_/backends/onnxruntime.py' +] +onnx_config = dict( + opset_version=12, + output_names=['cls_logits', 'mask_logits'], + dynamic_axes={ + 'input': { + 0: 'batch', + 2: 'height', + 3: 'width' + }, + 'cls_logits': { + 0: 'batch', + 2: 'h', + 3: 'w', + }, + 'mask_logits': { + 0: 'batch', + 2: 'h', + 3: 'w', + }, + }, + input_shape=None) diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-352x512-1344x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-352x512-1344x1344.py new file mode 100644 index 0000000000..a8eb5d4543 --- /dev/null +++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-352x512-1344x1344.py @@ -0,0 +1,19 @@ +_base_ = [ + '../_base_/base_panoptic-seg_static.py', + '../../_base_/backends/tensorrt.py' +] +onnx_config = dict( + opset_version=12, + output_names=['cls_logits', 'mask_logits'], + input_shape=[1344, 800]) + +backend_config = dict( + common_config=dict(max_workspace_size=1 << 30), + model_inputs=[ + dict( + input_shapes=dict( + input=dict( + min_shape=[1, 3, 352, 512], + opt_shape=[1, 3, 800, 1344], + max_shape=[1, 3, 1344, 1344]))) + ]) diff --git a/mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py b/mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py new file mode 100644 index 0000000000..22e333965e --- /dev/null +++ b/mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py @@ -0,0 +1,94 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +import torch + +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + func_name='mmdet.models.dense_heads.mask2former_head.' + 'Mask2FormerHead.forward') +def mask2former_head__forward(self, x, batch_data_samples): + """Rewrite `forward` for default backend. + + Args: + x (list[Tensor]): Multi scale Features from the + upstream network, each is a 4D-tensor. + batch_data_samples (List[:obj:`DetDataSample`]): The Data + Samples. It usually includes information such as + `gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`. + + Returns: + tuple[list[Tensor]]: A tuple contains two elements. + + - cls_pred_list (list[Tensor)]: Classification logits \ + for each decoder layer. Each is a 3D-tensor with shape \ + (batch_size, num_queries, cls_out_channels). \ + Note `cls_out_channels` should includes background. + - mask_pred_list (list[Tensor]): Mask logits for each \ + decoder layer. Each with shape (batch_size, num_queries, \ + h, w). + """ + batch_size = x[0].shape[0] + mask_features, multi_scale_memorys = self.pixel_decoder(x) + # multi_scale_memorys (from low resolution to high resolution) + decoder_inputs = [] + decoder_positional_encodings = [] + for i in range(self.num_transformer_feat_level): + decoder_input = self.decoder_input_projs[i](multi_scale_memorys[i]) + # shape (batch_size, c, h, w) -> (batch_size, h*w, c) + decoder_input = decoder_input.flatten(2).permute(0, 2, 1) + level_embed = self.level_embed.weight[i].view(1, 1, -1) + decoder_input = decoder_input + level_embed + # shape (batch_size, c, h, w) -> (batch_size, h*w, c) + mask = decoder_input.new_zeros( + (batch_size, ) + multi_scale_memorys[i].shape[-2:], + dtype=torch.bool) + decoder_positional_encoding = self.decoder_positional_encoding(mask) + decoder_positional_encoding = decoder_positional_encoding.flatten( + 2).permute(0, 2, 1) + decoder_inputs.append(decoder_input) + decoder_positional_encodings.append(decoder_positional_encoding) + # shape (num_queries, c) -> (batch_size, num_queries, c) + query_feat = self.query_feat.weight.unsqueeze(0).repeat((batch_size, 1, 1)) + query_embed = self.query_embed.weight.unsqueeze(0).repeat( + (batch_size, 1, 1)) + + cls_pred_list = [] + mask_pred_list = [] + cls_pred, mask_pred, attn_mask = self._forward_head( + query_feat, mask_features, multi_scale_memorys[0].shape[-2:]) + cls_pred_list.append(cls_pred) + mask_pred_list.append(mask_pred) + + for i in range(self.num_transformer_decoder_layers): + level_idx = i % self.num_transformer_feat_level + # if a mask is all True(all background), then set it all False. + + # to avoid Nonzero, replace with following code + # attn_mask[torch.where( + # attn_mask.sum(-1) == attn_mask.shape[-1])] = False + cond = (attn_mask.sum(-1) != attn_mask.shape[-1]).unsqueeze(2) + attn_mask = attn_mask & cond + + # cross_attn + self_attn + layer = self.transformer_decoder.layers[i] + query_feat = layer( + query=query_feat, + key=decoder_inputs[level_idx], + value=decoder_inputs[level_idx], + query_pos=query_embed, + key_pos=decoder_positional_encodings[level_idx], + cross_attn_mask=attn_mask, + query_key_padding_mask=None, + # here we do not apply masking on padded region + key_padding_mask=None) + cls_pred, mask_pred, attn_mask = self._forward_head( + query_feat, mask_features, + multi_scale_memorys[(i + 1) % + self.num_transformer_feat_level].shape[-2:]) + + cls_pred_list.append(cls_pred) + mask_pred_list.append(mask_pred) + + return cls_pred_list, mask_pred_list diff --git a/mmdeploy/codebase/mmdet/models/layers/__init__.py b/mmdeploy/codebase/mmdet/models/layers/__init__.py index 0559af920f..d431a71071 100644 --- a/mmdeploy/codebase/mmdet/models/layers/__init__.py +++ b/mmdeploy/codebase/mmdet/models/layers/__init__.py @@ -2,5 +2,6 @@ # recovery for mmyolo from mmdeploy.mmcv.ops import multiclass_nms # noqa: F401, F403 from . import matrix_nms # noqa: F401, F403 +from . import msdeformattn_pixel_decoder # noqa: F401, F403 __all__ = ['multiclass_nms'] diff --git a/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py b/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py new file mode 100644 index 0000000000..cda5cb6d3e --- /dev/null +++ b/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py @@ -0,0 +1,125 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List + +import torch +import torch.nn.functional as F + +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet.models.layers.msdeformattn_pixel_decoder.' + 'MSDeformAttnPixelDecoder.forward') +def msdeform_attn_pixel_decoder__forward(self, feats: List[torch.Tensor]): + """Rewrite `forward` for default backend. + Args: + feats (list[Tensor]): Feature maps of each level. Each has + shape of (batch_size, c, h, w). + + Returns: + tuple: A tuple containing the following: + + - mask_feature (Tensor): shape (batch_size, c, h, w). + - multi_scale_features (list[Tensor]): Multi scale \ + features, each in shape (batch_size, c, h, w). + """ + # generate padding mask for each level, for each image + batch_size = feats[0].shape[0] + encoder_input_list = [] + padding_mask_list = [] + level_positional_encoding_list = [] + spatial_shapes = [] + reference_points_list = [] + for i in range(self.num_encoder_levels): + level_idx = self.num_input_levels - i - 1 + feat = feats[level_idx] + feat_projected = self.input_convs[i](feat) + feat_hw = torch._shape_as_tensor(feat)[2:] + + # no padding + padding_mask_resized = feat.new_zeros( + (batch_size, ) + feat.shape[-2:], dtype=torch.bool) + pos_embed = self.postional_encoding(padding_mask_resized) + level_embed = self.level_encoding.weight[i] + level_pos_embed = level_embed.view(1, -1, 1, 1) + pos_embed + # (h_i * w_i, 2) + reference_points = self.point_generator.single_level_grid_priors( + feat.shape[-2:], level_idx, device=feat.device) + # normalize + feat_wh = feat_hw.unsqueeze(0).flip(dims=[0, 1]) + factor = feat_wh * self.strides[level_idx] + reference_points = reference_points / factor + + # shape (batch_size, c, h_i, w_i) -> (h_i * w_i, batch_size, c) + feat_projected = feat_projected.flatten(2).permute(0, 2, 1) + level_pos_embed = level_pos_embed.flatten(2).permute(0, 2, 1) + padding_mask_resized = padding_mask_resized.flatten(1) + + encoder_input_list.append(feat_projected) + padding_mask_list.append(padding_mask_resized) + level_positional_encoding_list.append(level_pos_embed) + spatial_shapes.append(feat_hw) + reference_points_list.append(reference_points) + # shape (batch_size, total_num_queries), + # total_num_queries=sum([., h_i * w_i,.]) + padding_masks = torch.cat(padding_mask_list, dim=1) + # shape (total_num_queries, batch_size, c) + encoder_inputs = torch.cat(encoder_input_list, dim=1) + level_positional_encodings = torch.cat( + level_positional_encoding_list, dim=1) + # shape (num_encoder_levels, 2), from low + # resolution to high resolution + spatial_shapes = torch.cat(spatial_shapes).view(-1, 2) + + # shape (0, h_0*w_0, h_0*w_0+h_1*w_1, ...) + # keep last index + level_start_index = torch.cat( + [spatial_shapes.new_zeros(1), + spatial_shapes.prod(1).cumsum(0)]).to(torch.long) + reference_points = torch.cat(reference_points_list, dim=0) + reference_points = reference_points[None, :, + None].repeat(batch_size, 1, + self.num_encoder_levels, + 1) + valid_radios = reference_points.new_ones( + (batch_size, self.num_encoder_levels, 2)) + # shape (num_total_queries, batch_size, c) + memory = self.encoder( + query=encoder_inputs, + query_pos=level_positional_encodings, + key_padding_mask=padding_masks, + spatial_shapes=spatial_shapes, + reference_points=reference_points, + level_start_index=level_start_index[:-1], + valid_ratios=valid_radios) + # (batch_size, c, num_total_queries) + memory = memory.permute(0, 2, 1) + + # from low resolution to high resolution + # num_queries_per_level = [e[0] * e[1] for e in spatial_shapes] + # outs = torch.split(memory, num_queries_per_level, dim=-1) + outs = [] + for i in range(self.num_encoder_levels): + outs.append(memory[:, :, + level_start_index[i]:level_start_index[i + 1]]) + + outs = [ + x.reshape(batch_size, -1, spatial_shapes[i][0], spatial_shapes[i][1]) + for i, x in enumerate(outs) + ] + + for i in range(self.num_input_levels - self.num_encoder_levels - 1, -1, + -1): + x = feats[i] + cur_feat = self.lateral_convs[i](x) + y = cur_feat + F.interpolate( + outs[-1], + size=cur_feat.shape[-2:], + mode='bilinear', + align_corners=False) + y = self.output_convs[i](y) + outs.append(y) + multi_scale_features = outs[:self.num_outs] + + mask_feature = self.mask_feature(outs[-1]) + return mask_feature, multi_scale_features diff --git a/mmdeploy/mmcv/ops/multi_scale_deform_attn.py b/mmdeploy/mmcv/ops/multi_scale_deform_attn.py index 7a5ccccba8..4633c366ea 100644 --- a/mmdeploy/mmcv/ops/multi_scale_deform_attn.py +++ b/mmdeploy/mmcv/ops/multi_scale_deform_attn.py @@ -1,5 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. -from mmdeploy.core import SYMBOLIC_REWRITER +import torch +import torch.nn.functional as F + +from mmdeploy.core import FUNCTION_REWRITER, SYMBOLIC_REWRITER @SYMBOLIC_REWRITER.register_symbolic( @@ -23,3 +26,63 @@ def ms_deform_attn_default( attention_weights, im2col_step_i=im2col_step, ) + + +@FUNCTION_REWRITER.register_rewriter( + 'mmcv.ops.multi_scale_deform_attn.multi_scale_deformable_attn_pytorch') +def multi_scale_deformable_attn_pytorch_default( + value: torch.Tensor, value_spatial_shapes: torch.Tensor, + sampling_locations: torch.Tensor, + attention_weights: torch.Tensor) -> torch.Tensor: + """CPU version of multi-scale deformable attention. + + Args: + value (torch.Tensor): The value has shape + (bs, num_keys, num_heads, embed_dims//num_heads) + value_spatial_shapes (torch.Tensor): Spatial shape of + each feature map, has shape (num_levels, 2), + last dimension 2 represent (h, w) + sampling_locations (torch.Tensor): The location of sampling points, + has shape + (bs ,num_queries, num_heads, num_levels, num_points, 2), + the last dimension 2 represent (x, y). + attention_weights (torch.Tensor): The weight of sampling points used + when calculate the attention, has shape + (bs ,num_queries, num_heads, num_levels, num_points), + + Returns: + torch.Tensor: has shape (bs, num_queries, embed_dims) + """ + + bs, _, num_heads, embed_dims = value.shape + _, num_queries, num_heads, num_levels, num_points, _ =\ + sampling_locations.shape + indices = torch.cat((value_spatial_shapes.new_zeros(1), + value_spatial_shapes.prod(1).cumsum(0))) + # avoid split with dynamic split_sizes + # value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], + # dim=1) + sampling_grids = 2 * sampling_locations - 1 + sampling_value_list = [] + for i in range(num_levels): + H_, W_ = value_spatial_shapes[i] + value_l_ = value[:, indices[i]:indices[i + 1], :, :] + value_l_ = value_l_.flatten(2).transpose(1, 2).reshape( + bs * num_heads, embed_dims, H_, W_) + sampling_grid_l_ = sampling_grids[:, :, :, + i].transpose(1, 2).flatten(0, 1) + # bs*num_heads, embed_dims, num_queries, num_points + sampling_value_l_ = F.grid_sample( + value_l_, + sampling_grid_l_, + mode='bilinear', + padding_mode='zeros', + align_corners=False) + sampling_value_list.append(sampling_value_l_) + + attention_weights = attention_weights.transpose(1, 2).reshape( + bs * num_heads, 1, num_queries, num_levels * num_points) + output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * + attention_weights).sum(-1).view(bs, num_heads * embed_dims, + num_queries) + return output.transpose(1, 2).contiguous() From 3a5886978f3babd578bcbbf16f85c292c25236c9 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Thu, 3 Aug 2023 11:21:48 +0800 Subject: [PATCH 10/22] update --- ...ptic-seg_maskformer_onnxruntime_dynamic.py | 2 +- ...mer_tensorrt_dynamic-320x512-1344x1344.py} | 23 ++++++++++++++++--- .../mmdet/deploy/object_detection_model.py | 5 ++-- .../mmdet/models/detectors/maskformer.py | 3 --- 4 files changed, 24 insertions(+), 9 deletions(-) rename configs/mmdet/panoptic-seg/{panoptic-seg_maskformer_tensorrt_dynamic-352x512-1344x1344.py => panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py} (51%) diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py index 72d8af2aec..27f0853d7f 100644 --- a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py @@ -3,7 +3,7 @@ '../../_base_/backends/onnxruntime.py' ] onnx_config = dict( - opset_version=12, + opset_version=13, output_names=['cls_logits', 'mask_logits'], dynamic_axes={ 'input': { diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-352x512-1344x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py similarity index 51% rename from configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-352x512-1344x1344.py rename to configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py index a8eb5d4543..c00079ed69 100644 --- a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-352x512-1344x1344.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py @@ -3,9 +3,26 @@ '../../_base_/backends/tensorrt.py' ] onnx_config = dict( - opset_version=12, + opset_version=13, output_names=['cls_logits', 'mask_logits'], - input_shape=[1344, 800]) + dynamic_axes={ + 'input': { + 0: 'batch', + 2: 'height', + 3: 'width' + }, + 'cls_logits': { + 0: 'batch', + 2: 'h', + 3: 'w', + }, + 'mask_logits': { + 0: 'batch', + 2: 'h', + 3: 'w', + }, + }, + input_shape=None) backend_config = dict( common_config=dict(max_workspace_size=1 << 30), @@ -13,7 +30,7 @@ dict( input_shapes=dict( input=dict( - min_shape=[1, 3, 352, 512], + min_shape=[1, 3, 320, 512], opt_shape=[1, 3, 800, 1344], max_shape=[1, 3, 1344, 1344]))) ]) diff --git a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py index 17b3a35f1c..3f9b3ae42e 100644 --- a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py +++ b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py @@ -188,7 +188,8 @@ def postprocessing_results(self, for i in range(batch_size): dets, labels = batch_dets[i], batch_labels[i] pred_instances = InstanceData() - + device = dets.device + labels = labels.to(device) bboxes = dets[:, :4] scores = dets[:, 4] scale_factor = bboxes.new_ones(1, 4) @@ -256,7 +257,7 @@ def postprocessing_results(self, if masks.dtype != bool: masks = masks >= 0.5 # aligned with mmdet to easily convert to numpy - # masks = masks.cpu() + masks = masks.to(device) pred_instances.masks = masks data_samples[i].pred_instances = pred_instances diff --git a/mmdeploy/codebase/mmdet/models/detectors/maskformer.py b/mmdeploy/codebase/mmdet/models/detectors/maskformer.py index f12b31ec43..c430ae9126 100644 --- a/mmdeploy/codebase/mmdet/models/detectors/maskformer.py +++ b/mmdeploy/codebase/mmdet/models/detectors/maskformer.py @@ -1,6 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -import copy - import torch from mmdeploy.core import FUNCTION_REWRITER @@ -33,7 +31,6 @@ def maskformer__forward(self, `semseg` of shape [N, num_sem_class, sem_H, sem_W]. """ ctx = FUNCTION_REWRITER.get_context() - data_samples = copy.deepcopy(data_samples) deploy_cfg = ctx.cfg # get origin input shape as tensor to support onnx dynamic shape From 2b6d24ae5fe910b473438e7c780513e31e0c52e1 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Thu, 3 Aug 2023 11:24:10 +0800 Subject: [PATCH 11/22] remove unused rewritings for mask2former --- ...ptic-seg_maskformer_onnxruntime_dynamic.py | 4 +- ...rmer_tensorrt_dynamic-320x512-1344x1344.py | 4 +- .../models/dense_heads/mask2former_head.py | 94 ------------- .../codebase/mmdet/models/layers/__init__.py | 1 - .../layers/msdeformattn_pixel_decoder.py | 125 ------------------ mmdeploy/mmcv/ops/multi_scale_deform_attn.py | 64 +-------- tests/regression/mmdet.yml | 12 +- 7 files changed, 11 insertions(+), 293 deletions(-) delete mode 100644 mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py delete mode 100644 mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py index 27f0853d7f..762daa5138 100644 --- a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py @@ -13,11 +13,11 @@ }, 'cls_logits': { 0: 'batch', - 2: 'h', - 3: 'w', + 1: 'query', }, 'mask_logits': { 0: 'batch', + 1: 'query', 2: 'h', 3: 'w', }, diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py index c00079ed69..1dc3e78289 100644 --- a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py @@ -13,11 +13,11 @@ }, 'cls_logits': { 0: 'batch', - 2: 'h', - 3: 'w', + 1: 'query', }, 'mask_logits': { 0: 'batch', + 1: 'query', 2: 'h', 3: 'w', }, diff --git a/mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py b/mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py deleted file mode 100644 index 22e333965e..0000000000 --- a/mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. - -import torch - -from mmdeploy.core import FUNCTION_REWRITER - - -@FUNCTION_REWRITER.register_rewriter( - func_name='mmdet.models.dense_heads.mask2former_head.' - 'Mask2FormerHead.forward') -def mask2former_head__forward(self, x, batch_data_samples): - """Rewrite `forward` for default backend. - - Args: - x (list[Tensor]): Multi scale Features from the - upstream network, each is a 4D-tensor. - batch_data_samples (List[:obj:`DetDataSample`]): The Data - Samples. It usually includes information such as - `gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`. - - Returns: - tuple[list[Tensor]]: A tuple contains two elements. - - - cls_pred_list (list[Tensor)]: Classification logits \ - for each decoder layer. Each is a 3D-tensor with shape \ - (batch_size, num_queries, cls_out_channels). \ - Note `cls_out_channels` should includes background. - - mask_pred_list (list[Tensor]): Mask logits for each \ - decoder layer. Each with shape (batch_size, num_queries, \ - h, w). - """ - batch_size = x[0].shape[0] - mask_features, multi_scale_memorys = self.pixel_decoder(x) - # multi_scale_memorys (from low resolution to high resolution) - decoder_inputs = [] - decoder_positional_encodings = [] - for i in range(self.num_transformer_feat_level): - decoder_input = self.decoder_input_projs[i](multi_scale_memorys[i]) - # shape (batch_size, c, h, w) -> (batch_size, h*w, c) - decoder_input = decoder_input.flatten(2).permute(0, 2, 1) - level_embed = self.level_embed.weight[i].view(1, 1, -1) - decoder_input = decoder_input + level_embed - # shape (batch_size, c, h, w) -> (batch_size, h*w, c) - mask = decoder_input.new_zeros( - (batch_size, ) + multi_scale_memorys[i].shape[-2:], - dtype=torch.bool) - decoder_positional_encoding = self.decoder_positional_encoding(mask) - decoder_positional_encoding = decoder_positional_encoding.flatten( - 2).permute(0, 2, 1) - decoder_inputs.append(decoder_input) - decoder_positional_encodings.append(decoder_positional_encoding) - # shape (num_queries, c) -> (batch_size, num_queries, c) - query_feat = self.query_feat.weight.unsqueeze(0).repeat((batch_size, 1, 1)) - query_embed = self.query_embed.weight.unsqueeze(0).repeat( - (batch_size, 1, 1)) - - cls_pred_list = [] - mask_pred_list = [] - cls_pred, mask_pred, attn_mask = self._forward_head( - query_feat, mask_features, multi_scale_memorys[0].shape[-2:]) - cls_pred_list.append(cls_pred) - mask_pred_list.append(mask_pred) - - for i in range(self.num_transformer_decoder_layers): - level_idx = i % self.num_transformer_feat_level - # if a mask is all True(all background), then set it all False. - - # to avoid Nonzero, replace with following code - # attn_mask[torch.where( - # attn_mask.sum(-1) == attn_mask.shape[-1])] = False - cond = (attn_mask.sum(-1) != attn_mask.shape[-1]).unsqueeze(2) - attn_mask = attn_mask & cond - - # cross_attn + self_attn - layer = self.transformer_decoder.layers[i] - query_feat = layer( - query=query_feat, - key=decoder_inputs[level_idx], - value=decoder_inputs[level_idx], - query_pos=query_embed, - key_pos=decoder_positional_encodings[level_idx], - cross_attn_mask=attn_mask, - query_key_padding_mask=None, - # here we do not apply masking on padded region - key_padding_mask=None) - cls_pred, mask_pred, attn_mask = self._forward_head( - query_feat, mask_features, - multi_scale_memorys[(i + 1) % - self.num_transformer_feat_level].shape[-2:]) - - cls_pred_list.append(cls_pred) - mask_pred_list.append(mask_pred) - - return cls_pred_list, mask_pred_list diff --git a/mmdeploy/codebase/mmdet/models/layers/__init__.py b/mmdeploy/codebase/mmdet/models/layers/__init__.py index d431a71071..0559af920f 100644 --- a/mmdeploy/codebase/mmdet/models/layers/__init__.py +++ b/mmdeploy/codebase/mmdet/models/layers/__init__.py @@ -2,6 +2,5 @@ # recovery for mmyolo from mmdeploy.mmcv.ops import multiclass_nms # noqa: F401, F403 from . import matrix_nms # noqa: F401, F403 -from . import msdeformattn_pixel_decoder # noqa: F401, F403 __all__ = ['multiclass_nms'] diff --git a/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py b/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py deleted file mode 100644 index cda5cb6d3e..0000000000 --- a/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import List - -import torch -import torch.nn.functional as F - -from mmdeploy.core import FUNCTION_REWRITER - - -@FUNCTION_REWRITER.register_rewriter( - 'mmdet.models.layers.msdeformattn_pixel_decoder.' - 'MSDeformAttnPixelDecoder.forward') -def msdeform_attn_pixel_decoder__forward(self, feats: List[torch.Tensor]): - """Rewrite `forward` for default backend. - Args: - feats (list[Tensor]): Feature maps of each level. Each has - shape of (batch_size, c, h, w). - - Returns: - tuple: A tuple containing the following: - - - mask_feature (Tensor): shape (batch_size, c, h, w). - - multi_scale_features (list[Tensor]): Multi scale \ - features, each in shape (batch_size, c, h, w). - """ - # generate padding mask for each level, for each image - batch_size = feats[0].shape[0] - encoder_input_list = [] - padding_mask_list = [] - level_positional_encoding_list = [] - spatial_shapes = [] - reference_points_list = [] - for i in range(self.num_encoder_levels): - level_idx = self.num_input_levels - i - 1 - feat = feats[level_idx] - feat_projected = self.input_convs[i](feat) - feat_hw = torch._shape_as_tensor(feat)[2:] - - # no padding - padding_mask_resized = feat.new_zeros( - (batch_size, ) + feat.shape[-2:], dtype=torch.bool) - pos_embed = self.postional_encoding(padding_mask_resized) - level_embed = self.level_encoding.weight[i] - level_pos_embed = level_embed.view(1, -1, 1, 1) + pos_embed - # (h_i * w_i, 2) - reference_points = self.point_generator.single_level_grid_priors( - feat.shape[-2:], level_idx, device=feat.device) - # normalize - feat_wh = feat_hw.unsqueeze(0).flip(dims=[0, 1]) - factor = feat_wh * self.strides[level_idx] - reference_points = reference_points / factor - - # shape (batch_size, c, h_i, w_i) -> (h_i * w_i, batch_size, c) - feat_projected = feat_projected.flatten(2).permute(0, 2, 1) - level_pos_embed = level_pos_embed.flatten(2).permute(0, 2, 1) - padding_mask_resized = padding_mask_resized.flatten(1) - - encoder_input_list.append(feat_projected) - padding_mask_list.append(padding_mask_resized) - level_positional_encoding_list.append(level_pos_embed) - spatial_shapes.append(feat_hw) - reference_points_list.append(reference_points) - # shape (batch_size, total_num_queries), - # total_num_queries=sum([., h_i * w_i,.]) - padding_masks = torch.cat(padding_mask_list, dim=1) - # shape (total_num_queries, batch_size, c) - encoder_inputs = torch.cat(encoder_input_list, dim=1) - level_positional_encodings = torch.cat( - level_positional_encoding_list, dim=1) - # shape (num_encoder_levels, 2), from low - # resolution to high resolution - spatial_shapes = torch.cat(spatial_shapes).view(-1, 2) - - # shape (0, h_0*w_0, h_0*w_0+h_1*w_1, ...) - # keep last index - level_start_index = torch.cat( - [spatial_shapes.new_zeros(1), - spatial_shapes.prod(1).cumsum(0)]).to(torch.long) - reference_points = torch.cat(reference_points_list, dim=0) - reference_points = reference_points[None, :, - None].repeat(batch_size, 1, - self.num_encoder_levels, - 1) - valid_radios = reference_points.new_ones( - (batch_size, self.num_encoder_levels, 2)) - # shape (num_total_queries, batch_size, c) - memory = self.encoder( - query=encoder_inputs, - query_pos=level_positional_encodings, - key_padding_mask=padding_masks, - spatial_shapes=spatial_shapes, - reference_points=reference_points, - level_start_index=level_start_index[:-1], - valid_ratios=valid_radios) - # (batch_size, c, num_total_queries) - memory = memory.permute(0, 2, 1) - - # from low resolution to high resolution - # num_queries_per_level = [e[0] * e[1] for e in spatial_shapes] - # outs = torch.split(memory, num_queries_per_level, dim=-1) - outs = [] - for i in range(self.num_encoder_levels): - outs.append(memory[:, :, - level_start_index[i]:level_start_index[i + 1]]) - - outs = [ - x.reshape(batch_size, -1, spatial_shapes[i][0], spatial_shapes[i][1]) - for i, x in enumerate(outs) - ] - - for i in range(self.num_input_levels - self.num_encoder_levels - 1, -1, - -1): - x = feats[i] - cur_feat = self.lateral_convs[i](x) - y = cur_feat + F.interpolate( - outs[-1], - size=cur_feat.shape[-2:], - mode='bilinear', - align_corners=False) - y = self.output_convs[i](y) - outs.append(y) - multi_scale_features = outs[:self.num_outs] - - mask_feature = self.mask_feature(outs[-1]) - return mask_feature, multi_scale_features diff --git a/mmdeploy/mmcv/ops/multi_scale_deform_attn.py b/mmdeploy/mmcv/ops/multi_scale_deform_attn.py index 4633c366ea..8649294f55 100644 --- a/mmdeploy/mmcv/ops/multi_scale_deform_attn.py +++ b/mmdeploy/mmcv/ops/multi_scale_deform_attn.py @@ -1,8 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn.functional as F -from mmdeploy.core import FUNCTION_REWRITER, SYMBOLIC_REWRITER +from mmdeploy.core import SYMBOLIC_REWRITER @SYMBOLIC_REWRITER.register_symbolic( @@ -26,63 +24,3 @@ def ms_deform_attn_default( attention_weights, im2col_step_i=im2col_step, ) - - -@FUNCTION_REWRITER.register_rewriter( - 'mmcv.ops.multi_scale_deform_attn.multi_scale_deformable_attn_pytorch') -def multi_scale_deformable_attn_pytorch_default( - value: torch.Tensor, value_spatial_shapes: torch.Tensor, - sampling_locations: torch.Tensor, - attention_weights: torch.Tensor) -> torch.Tensor: - """CPU version of multi-scale deformable attention. - - Args: - value (torch.Tensor): The value has shape - (bs, num_keys, num_heads, embed_dims//num_heads) - value_spatial_shapes (torch.Tensor): Spatial shape of - each feature map, has shape (num_levels, 2), - last dimension 2 represent (h, w) - sampling_locations (torch.Tensor): The location of sampling points, - has shape - (bs ,num_queries, num_heads, num_levels, num_points, 2), - the last dimension 2 represent (x, y). - attention_weights (torch.Tensor): The weight of sampling points used - when calculate the attention, has shape - (bs ,num_queries, num_heads, num_levels, num_points), - - Returns: - torch.Tensor: has shape (bs, num_queries, embed_dims) - """ - - bs, _, num_heads, embed_dims = value.shape - _, num_queries, num_heads, num_levels, num_points, _ =\ - sampling_locations.shape - indices = torch.cat((value_spatial_shapes.new_zeros(1), - value_spatial_shapes.prod(1).cumsum(0))) - # avoid split with dynamic split_sizes - # value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], - # dim=1) - sampling_grids = 2 * sampling_locations - 1 - sampling_value_list = [] - for i in range(num_levels): - H_, W_ = value_spatial_shapes[i] - value_l_ = value[:, indices[i]:indices[i + 1], :, :] - value_l_ = value_l_.flatten(2).transpose(1, 2).reshape( - bs * num_heads, embed_dims, H_, W_) - sampling_grid_l_ = sampling_grids[:, :, :, - i].transpose(1, 2).flatten(0, 1) - # bs*num_heads, embed_dims, num_queries, num_points - sampling_value_l_ = F.grid_sample( - value_l_, - sampling_grid_l_, - mode='bilinear', - padding_mode='zeros', - align_corners=False) - sampling_value_list.append(sampling_value_l_) - - attention_weights = attention_weights.transpose(1, 2).reshape( - bs * num_heads, 1, num_queries, num_levels * num_points) - output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * - attention_weights).sum(-1).view(bs, num_heads * embed_dims, - num_queries) - return output.transpose(1, 2).contiguous() diff --git a/tests/regression/mmdet.yml b/tests/regression/mmdet.yml index 1d5aa9ea5e..763cfb7865 100644 --- a/tests/regression/mmdet.yml +++ b/tests/regression/mmdet.yml @@ -399,21 +399,21 @@ models: model_configs: - configs/maskformer/maskformer_r50_ms-16xb1-75e_coco.py pipelines: - - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py convert_image: *convert_image backend_test: False - - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py convert_image: *convert_image - backend_test: False + backend_test: True - name: Mask2Former metafile: configs/mask2former/metafile.yml model_configs: - configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py pipelines: - - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py convert_image: *convert_image backend_test: False - - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py convert_image: *convert_image - backend_test: False + backend_test: True From 426efce968b3d969e5801184fdb50181c1a453f2 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Thu, 3 Aug 2023 17:44:12 +0800 Subject: [PATCH 12/22] Revert "remove unused rewritings for mask2former" This reverts commit 2b6d24ae5fe910b473438e7c780513e31e0c52e1. --- ...ptic-seg_maskformer_onnxruntime_dynamic.py | 4 +- ...rmer_tensorrt_dynamic-320x512-1344x1344.py | 4 +- .../models/dense_heads/mask2former_head.py | 94 +++++++++++++ .../codebase/mmdet/models/layers/__init__.py | 1 + .../layers/msdeformattn_pixel_decoder.py | 125 ++++++++++++++++++ mmdeploy/mmcv/ops/multi_scale_deform_attn.py | 64 ++++++++- tests/regression/mmdet.yml | 12 +- 7 files changed, 293 insertions(+), 11 deletions(-) create mode 100644 mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py create mode 100644 mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py index 762daa5138..27f0853d7f 100644 --- a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py @@ -13,11 +13,11 @@ }, 'cls_logits': { 0: 'batch', - 1: 'query', + 2: 'h', + 3: 'w', }, 'mask_logits': { 0: 'batch', - 1: 'query', 2: 'h', 3: 'w', }, diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py index 1dc3e78289..c00079ed69 100644 --- a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py @@ -13,11 +13,11 @@ }, 'cls_logits': { 0: 'batch', - 1: 'query', + 2: 'h', + 3: 'w', }, 'mask_logits': { 0: 'batch', - 1: 'query', 2: 'h', 3: 'w', }, diff --git a/mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py b/mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py new file mode 100644 index 0000000000..22e333965e --- /dev/null +++ b/mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py @@ -0,0 +1,94 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +import torch + +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + func_name='mmdet.models.dense_heads.mask2former_head.' + 'Mask2FormerHead.forward') +def mask2former_head__forward(self, x, batch_data_samples): + """Rewrite `forward` for default backend. + + Args: + x (list[Tensor]): Multi scale Features from the + upstream network, each is a 4D-tensor. + batch_data_samples (List[:obj:`DetDataSample`]): The Data + Samples. It usually includes information such as + `gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`. + + Returns: + tuple[list[Tensor]]: A tuple contains two elements. + + - cls_pred_list (list[Tensor)]: Classification logits \ + for each decoder layer. Each is a 3D-tensor with shape \ + (batch_size, num_queries, cls_out_channels). \ + Note `cls_out_channels` should includes background. + - mask_pred_list (list[Tensor]): Mask logits for each \ + decoder layer. Each with shape (batch_size, num_queries, \ + h, w). + """ + batch_size = x[0].shape[0] + mask_features, multi_scale_memorys = self.pixel_decoder(x) + # multi_scale_memorys (from low resolution to high resolution) + decoder_inputs = [] + decoder_positional_encodings = [] + for i in range(self.num_transformer_feat_level): + decoder_input = self.decoder_input_projs[i](multi_scale_memorys[i]) + # shape (batch_size, c, h, w) -> (batch_size, h*w, c) + decoder_input = decoder_input.flatten(2).permute(0, 2, 1) + level_embed = self.level_embed.weight[i].view(1, 1, -1) + decoder_input = decoder_input + level_embed + # shape (batch_size, c, h, w) -> (batch_size, h*w, c) + mask = decoder_input.new_zeros( + (batch_size, ) + multi_scale_memorys[i].shape[-2:], + dtype=torch.bool) + decoder_positional_encoding = self.decoder_positional_encoding(mask) + decoder_positional_encoding = decoder_positional_encoding.flatten( + 2).permute(0, 2, 1) + decoder_inputs.append(decoder_input) + decoder_positional_encodings.append(decoder_positional_encoding) + # shape (num_queries, c) -> (batch_size, num_queries, c) + query_feat = self.query_feat.weight.unsqueeze(0).repeat((batch_size, 1, 1)) + query_embed = self.query_embed.weight.unsqueeze(0).repeat( + (batch_size, 1, 1)) + + cls_pred_list = [] + mask_pred_list = [] + cls_pred, mask_pred, attn_mask = self._forward_head( + query_feat, mask_features, multi_scale_memorys[0].shape[-2:]) + cls_pred_list.append(cls_pred) + mask_pred_list.append(mask_pred) + + for i in range(self.num_transformer_decoder_layers): + level_idx = i % self.num_transformer_feat_level + # if a mask is all True(all background), then set it all False. + + # to avoid Nonzero, replace with following code + # attn_mask[torch.where( + # attn_mask.sum(-1) == attn_mask.shape[-1])] = False + cond = (attn_mask.sum(-1) != attn_mask.shape[-1]).unsqueeze(2) + attn_mask = attn_mask & cond + + # cross_attn + self_attn + layer = self.transformer_decoder.layers[i] + query_feat = layer( + query=query_feat, + key=decoder_inputs[level_idx], + value=decoder_inputs[level_idx], + query_pos=query_embed, + key_pos=decoder_positional_encodings[level_idx], + cross_attn_mask=attn_mask, + query_key_padding_mask=None, + # here we do not apply masking on padded region + key_padding_mask=None) + cls_pred, mask_pred, attn_mask = self._forward_head( + query_feat, mask_features, + multi_scale_memorys[(i + 1) % + self.num_transformer_feat_level].shape[-2:]) + + cls_pred_list.append(cls_pred) + mask_pred_list.append(mask_pred) + + return cls_pred_list, mask_pred_list diff --git a/mmdeploy/codebase/mmdet/models/layers/__init__.py b/mmdeploy/codebase/mmdet/models/layers/__init__.py index 0559af920f..d431a71071 100644 --- a/mmdeploy/codebase/mmdet/models/layers/__init__.py +++ b/mmdeploy/codebase/mmdet/models/layers/__init__.py @@ -2,5 +2,6 @@ # recovery for mmyolo from mmdeploy.mmcv.ops import multiclass_nms # noqa: F401, F403 from . import matrix_nms # noqa: F401, F403 +from . import msdeformattn_pixel_decoder # noqa: F401, F403 __all__ = ['multiclass_nms'] diff --git a/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py b/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py new file mode 100644 index 0000000000..cda5cb6d3e --- /dev/null +++ b/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py @@ -0,0 +1,125 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List + +import torch +import torch.nn.functional as F + +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet.models.layers.msdeformattn_pixel_decoder.' + 'MSDeformAttnPixelDecoder.forward') +def msdeform_attn_pixel_decoder__forward(self, feats: List[torch.Tensor]): + """Rewrite `forward` for default backend. + Args: + feats (list[Tensor]): Feature maps of each level. Each has + shape of (batch_size, c, h, w). + + Returns: + tuple: A tuple containing the following: + + - mask_feature (Tensor): shape (batch_size, c, h, w). + - multi_scale_features (list[Tensor]): Multi scale \ + features, each in shape (batch_size, c, h, w). + """ + # generate padding mask for each level, for each image + batch_size = feats[0].shape[0] + encoder_input_list = [] + padding_mask_list = [] + level_positional_encoding_list = [] + spatial_shapes = [] + reference_points_list = [] + for i in range(self.num_encoder_levels): + level_idx = self.num_input_levels - i - 1 + feat = feats[level_idx] + feat_projected = self.input_convs[i](feat) + feat_hw = torch._shape_as_tensor(feat)[2:] + + # no padding + padding_mask_resized = feat.new_zeros( + (batch_size, ) + feat.shape[-2:], dtype=torch.bool) + pos_embed = self.postional_encoding(padding_mask_resized) + level_embed = self.level_encoding.weight[i] + level_pos_embed = level_embed.view(1, -1, 1, 1) + pos_embed + # (h_i * w_i, 2) + reference_points = self.point_generator.single_level_grid_priors( + feat.shape[-2:], level_idx, device=feat.device) + # normalize + feat_wh = feat_hw.unsqueeze(0).flip(dims=[0, 1]) + factor = feat_wh * self.strides[level_idx] + reference_points = reference_points / factor + + # shape (batch_size, c, h_i, w_i) -> (h_i * w_i, batch_size, c) + feat_projected = feat_projected.flatten(2).permute(0, 2, 1) + level_pos_embed = level_pos_embed.flatten(2).permute(0, 2, 1) + padding_mask_resized = padding_mask_resized.flatten(1) + + encoder_input_list.append(feat_projected) + padding_mask_list.append(padding_mask_resized) + level_positional_encoding_list.append(level_pos_embed) + spatial_shapes.append(feat_hw) + reference_points_list.append(reference_points) + # shape (batch_size, total_num_queries), + # total_num_queries=sum([., h_i * w_i,.]) + padding_masks = torch.cat(padding_mask_list, dim=1) + # shape (total_num_queries, batch_size, c) + encoder_inputs = torch.cat(encoder_input_list, dim=1) + level_positional_encodings = torch.cat( + level_positional_encoding_list, dim=1) + # shape (num_encoder_levels, 2), from low + # resolution to high resolution + spatial_shapes = torch.cat(spatial_shapes).view(-1, 2) + + # shape (0, h_0*w_0, h_0*w_0+h_1*w_1, ...) + # keep last index + level_start_index = torch.cat( + [spatial_shapes.new_zeros(1), + spatial_shapes.prod(1).cumsum(0)]).to(torch.long) + reference_points = torch.cat(reference_points_list, dim=0) + reference_points = reference_points[None, :, + None].repeat(batch_size, 1, + self.num_encoder_levels, + 1) + valid_radios = reference_points.new_ones( + (batch_size, self.num_encoder_levels, 2)) + # shape (num_total_queries, batch_size, c) + memory = self.encoder( + query=encoder_inputs, + query_pos=level_positional_encodings, + key_padding_mask=padding_masks, + spatial_shapes=spatial_shapes, + reference_points=reference_points, + level_start_index=level_start_index[:-1], + valid_ratios=valid_radios) + # (batch_size, c, num_total_queries) + memory = memory.permute(0, 2, 1) + + # from low resolution to high resolution + # num_queries_per_level = [e[0] * e[1] for e in spatial_shapes] + # outs = torch.split(memory, num_queries_per_level, dim=-1) + outs = [] + for i in range(self.num_encoder_levels): + outs.append(memory[:, :, + level_start_index[i]:level_start_index[i + 1]]) + + outs = [ + x.reshape(batch_size, -1, spatial_shapes[i][0], spatial_shapes[i][1]) + for i, x in enumerate(outs) + ] + + for i in range(self.num_input_levels - self.num_encoder_levels - 1, -1, + -1): + x = feats[i] + cur_feat = self.lateral_convs[i](x) + y = cur_feat + F.interpolate( + outs[-1], + size=cur_feat.shape[-2:], + mode='bilinear', + align_corners=False) + y = self.output_convs[i](y) + outs.append(y) + multi_scale_features = outs[:self.num_outs] + + mask_feature = self.mask_feature(outs[-1]) + return mask_feature, multi_scale_features diff --git a/mmdeploy/mmcv/ops/multi_scale_deform_attn.py b/mmdeploy/mmcv/ops/multi_scale_deform_attn.py index 8649294f55..4633c366ea 100644 --- a/mmdeploy/mmcv/ops/multi_scale_deform_attn.py +++ b/mmdeploy/mmcv/ops/multi_scale_deform_attn.py @@ -1,6 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn.functional as F -from mmdeploy.core import SYMBOLIC_REWRITER +from mmdeploy.core import FUNCTION_REWRITER, SYMBOLIC_REWRITER @SYMBOLIC_REWRITER.register_symbolic( @@ -24,3 +26,63 @@ def ms_deform_attn_default( attention_weights, im2col_step_i=im2col_step, ) + + +@FUNCTION_REWRITER.register_rewriter( + 'mmcv.ops.multi_scale_deform_attn.multi_scale_deformable_attn_pytorch') +def multi_scale_deformable_attn_pytorch_default( + value: torch.Tensor, value_spatial_shapes: torch.Tensor, + sampling_locations: torch.Tensor, + attention_weights: torch.Tensor) -> torch.Tensor: + """CPU version of multi-scale deformable attention. + + Args: + value (torch.Tensor): The value has shape + (bs, num_keys, num_heads, embed_dims//num_heads) + value_spatial_shapes (torch.Tensor): Spatial shape of + each feature map, has shape (num_levels, 2), + last dimension 2 represent (h, w) + sampling_locations (torch.Tensor): The location of sampling points, + has shape + (bs ,num_queries, num_heads, num_levels, num_points, 2), + the last dimension 2 represent (x, y). + attention_weights (torch.Tensor): The weight of sampling points used + when calculate the attention, has shape + (bs ,num_queries, num_heads, num_levels, num_points), + + Returns: + torch.Tensor: has shape (bs, num_queries, embed_dims) + """ + + bs, _, num_heads, embed_dims = value.shape + _, num_queries, num_heads, num_levels, num_points, _ =\ + sampling_locations.shape + indices = torch.cat((value_spatial_shapes.new_zeros(1), + value_spatial_shapes.prod(1).cumsum(0))) + # avoid split with dynamic split_sizes + # value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], + # dim=1) + sampling_grids = 2 * sampling_locations - 1 + sampling_value_list = [] + for i in range(num_levels): + H_, W_ = value_spatial_shapes[i] + value_l_ = value[:, indices[i]:indices[i + 1], :, :] + value_l_ = value_l_.flatten(2).transpose(1, 2).reshape( + bs * num_heads, embed_dims, H_, W_) + sampling_grid_l_ = sampling_grids[:, :, :, + i].transpose(1, 2).flatten(0, 1) + # bs*num_heads, embed_dims, num_queries, num_points + sampling_value_l_ = F.grid_sample( + value_l_, + sampling_grid_l_, + mode='bilinear', + padding_mode='zeros', + align_corners=False) + sampling_value_list.append(sampling_value_l_) + + attention_weights = attention_weights.transpose(1, 2).reshape( + bs * num_heads, 1, num_queries, num_levels * num_points) + output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * + attention_weights).sum(-1).view(bs, num_heads * embed_dims, + num_queries) + return output.transpose(1, 2).contiguous() diff --git a/tests/regression/mmdet.yml b/tests/regression/mmdet.yml index 763cfb7865..1d5aa9ea5e 100644 --- a/tests/regression/mmdet.yml +++ b/tests/regression/mmdet.yml @@ -399,21 +399,21 @@ models: model_configs: - configs/maskformer/maskformer_r50_ms-16xb1-75e_coco.py pipelines: - - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py convert_image: *convert_image backend_test: False - - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py convert_image: *convert_image - backend_test: True + backend_test: False - name: Mask2Former metafile: configs/mask2former/metafile.yml model_configs: - configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py pipelines: - - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py convert_image: *convert_image backend_test: False - - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py convert_image: *convert_image - backend_test: True + backend_test: False From 6a770f08353188137cf98e7f26795f30b34e78cc Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Thu, 3 Aug 2023 17:57:55 +0800 Subject: [PATCH 13/22] update configs and regs --- ...ptic-seg_maskformer_onnxruntime_dynamic.py | 9 ++---- ...rmer_tensorrt_dynamic-320x512-1344x1344.py | 29 +++++++------------ .../layers/msdeformattn_pixel_decoder.py | 2 +- tests/regression/mmdet.yml | 16 +++++----- 4 files changed, 23 insertions(+), 33 deletions(-) diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py index 27f0853d7f..8219316f23 100644 --- a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py @@ -1,10 +1,7 @@ _base_ = [ - '../_base_/base_panoptic-seg_static.py', - '../../_base_/backends/onnxruntime.py' + './panoptic-seg_maskformer_onnxruntime_static-800x1344.py', ] onnx_config = dict( - opset_version=13, - output_names=['cls_logits', 'mask_logits'], dynamic_axes={ 'input': { 0: 'batch', @@ -13,11 +10,11 @@ }, 'cls_logits': { 0: 'batch', - 2: 'h', - 3: 'w', + 1: 'query', }, 'mask_logits': { 0: 'batch', + 1: 'query', 2: 'h', 3: 'w', }, diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py index c00079ed69..aac577bbef 100644 --- a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py @@ -1,10 +1,5 @@ -_base_ = [ - '../_base_/base_panoptic-seg_static.py', - '../../_base_/backends/tensorrt.py' -] +_base_ = ['./panoptic-seg_maskformer_tensorrt_static-800x1344.py'] onnx_config = dict( - opset_version=13, - output_names=['cls_logits', 'mask_logits'], dynamic_axes={ 'input': { 0: 'batch', @@ -13,24 +8,22 @@ }, 'cls_logits': { 0: 'batch', - 2: 'h', - 3: 'w', + 1: 'query', }, 'mask_logits': { 0: 'batch', + 1: 'query', 2: 'h', 3: 'w', }, }, input_shape=None) -backend_config = dict( - common_config=dict(max_workspace_size=1 << 30), - model_inputs=[ - dict( - input_shapes=dict( - input=dict( - min_shape=[1, 3, 320, 512], - opt_shape=[1, 3, 800, 1344], - max_shape=[1, 3, 1344, 1344]))) - ]) +backend_config = dict(model_inputs=[ + dict( + input_shapes=dict( + input=dict( + min_shape=[1, 3, 320, 512], + opt_shape=[1, 3, 800, 1344], + max_shape=[1, 3, 1344, 1344]))) +]) diff --git a/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py b/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py index cda5cb6d3e..d0f9734bf2 100644 --- a/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py +++ b/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py @@ -34,7 +34,7 @@ def msdeform_attn_pixel_decoder__forward(self, feats: List[torch.Tensor]): level_idx = self.num_input_levels - i - 1 feat = feats[level_idx] feat_projected = self.input_convs[i](feat) - feat_hw = torch._shape_as_tensor(feat)[2:] + feat_hw = torch._shape_as_tensor(feat)[2:].to(feat.device) # no padding padding_mask_resized = feat.new_zeros( diff --git a/tests/regression/mmdet.yml b/tests/regression/mmdet.yml index 1d5aa9ea5e..f519f7f021 100644 --- a/tests/regression/mmdet.yml +++ b/tests/regression/mmdet.yml @@ -399,21 +399,21 @@ models: model_configs: - configs/maskformer/maskformer_r50_ms-16xb1-75e_coco.py pipelines: - - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py convert_image: *convert_image - backend_test: False - - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py + backend_test: True + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py convert_image: *convert_image - backend_test: False + backend_test: True - name: Mask2Former metafile: configs/mask2former/metafile.yml model_configs: - configs/mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py pipelines: - - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_static-800x1344.py + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py convert_image: *convert_image - backend_test: False - - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py + backend_test: True + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py convert_image: *convert_image - backend_test: False + backend_test: True From 6ceeba9bbf204ada18be1c7ae54d2900267337d1 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Fri, 4 Aug 2023 14:06:09 +0800 Subject: [PATCH 14/22] debug dynamic --- .../layers/msdeformattn_pixel_decoder.py | 21 +++++++++++-------- mmdeploy/mmcv/ops/multi_scale_deform_attn.py | 11 +++++----- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py b/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py index d0f9734bf2..798947c34e 100644 --- a/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py +++ b/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py @@ -30,6 +30,8 @@ def msdeform_attn_pixel_decoder__forward(self, feats: List[torch.Tensor]): level_positional_encoding_list = [] spatial_shapes = [] reference_points_list = [] + num_queries_per_level = [] + for i in range(self.num_encoder_levels): level_idx = self.num_input_levels - i - 1 feat = feats[level_idx] @@ -59,6 +61,7 @@ def msdeform_attn_pixel_decoder__forward(self, feats: List[torch.Tensor]): padding_mask_list.append(padding_mask_resized) level_positional_encoding_list.append(level_pos_embed) spatial_shapes.append(feat_hw) + num_queries_per_level.append(feat_hw[0] * feat_hw[1]) reference_points_list.append(reference_points) # shape (batch_size, total_num_queries), # total_num_queries=sum([., h_i * w_i,.]) @@ -73,9 +76,9 @@ def msdeform_attn_pixel_decoder__forward(self, feats: List[torch.Tensor]): # shape (0, h_0*w_0, h_0*w_0+h_1*w_1, ...) # keep last index - level_start_index = torch.cat( - [spatial_shapes.new_zeros(1), - spatial_shapes.prod(1).cumsum(0)]).to(torch.long) + # level_start_index = torch.cat( + # [spatial_shapes.new_zeros(1), + # spatial_shapes.prod(1).cumsum(0)]).to(torch.long) reference_points = torch.cat(reference_points_list, dim=0) reference_points = reference_points[None, :, None].repeat(batch_size, 1, @@ -90,18 +93,18 @@ def msdeform_attn_pixel_decoder__forward(self, feats: List[torch.Tensor]): key_padding_mask=padding_masks, spatial_shapes=spatial_shapes, reference_points=reference_points, - level_start_index=level_start_index[:-1], + level_start_index=None, valid_ratios=valid_radios) # (batch_size, c, num_total_queries) memory = memory.permute(0, 2, 1) # from low resolution to high resolution # num_queries_per_level = [e[0] * e[1] for e in spatial_shapes] - # outs = torch.split(memory, num_queries_per_level, dim=-1) - outs = [] - for i in range(self.num_encoder_levels): - outs.append(memory[:, :, - level_start_index[i]:level_start_index[i + 1]]) + outs = torch.split(memory, num_queries_per_level, dim=-1) + # outs = [] + # for i in range(self.num_encoder_levels): + # outs.append(memory[:, :, + # level_start_index[i]:level_start_index[i + 1]]) outs = [ x.reshape(batch_size, -1, spatial_shapes[i][0], spatial_shapes[i][1]) diff --git a/mmdeploy/mmcv/ops/multi_scale_deform_attn.py b/mmdeploy/mmcv/ops/multi_scale_deform_attn.py index 4633c366ea..f18aad681c 100644 --- a/mmdeploy/mmcv/ops/multi_scale_deform_attn.py +++ b/mmdeploy/mmcv/ops/multi_scale_deform_attn.py @@ -57,16 +57,17 @@ def multi_scale_deformable_attn_pytorch_default( bs, _, num_heads, embed_dims = value.shape _, num_queries, num_heads, num_levels, num_points, _ =\ sampling_locations.shape - indices = torch.cat((value_spatial_shapes.new_zeros(1), - value_spatial_shapes.prod(1).cumsum(0))) + # indices = torch.cat((value_spatial_shapes.new_zeros(1), + # value_spatial_shapes.prod(1).cumsum(0))) # avoid split with dynamic split_sizes - # value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], - # dim=1) + value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], + dim=1) sampling_grids = 2 * sampling_locations - 1 sampling_value_list = [] for i in range(num_levels): H_, W_ = value_spatial_shapes[i] - value_l_ = value[:, indices[i]:indices[i + 1], :, :] + # value_l_ = value[:, indices[i]:indices[i + 1], :, :] + value_l_ = value_list[i] value_l_ = value_l_.flatten(2).transpose(1, 2).reshape( bs * num_heads, embed_dims, H_, W_) sampling_grid_l_ = sampling_grids[:, :, :, From 421d2e75b7270fef19e9b6e5b49b0bce30765e5a Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Tue, 8 Aug 2023 16:09:56 +0800 Subject: [PATCH 15/22] fix for panoptic-fpn --- configs/mmdet/_base_/base_panoptic-seg_static.py | 2 +- .../panoptic-seg_maskformer_onnxruntime_dynamic.py | 2 -- ...maskformer_tensorrt_dynamic-320x512-1344x1344.py | 2 -- ...panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py | 4 +++- ...noptic-fpn_tensorrt_dynamic-352x512-1344x1344.py | 4 +++- .../codebase/mmdet/deploy/object_detection_model.py | 13 ++++++++----- .../detectors/panoptic_two_stage_segmentor.py | 8 ++++++-- tests/regression/mmdet.yml | 2 +- 8 files changed, 22 insertions(+), 15 deletions(-) diff --git a/configs/mmdet/_base_/base_panoptic-seg_static.py b/configs/mmdet/_base_/base_panoptic-seg_static.py index aeace5065b..aee09e633e 100644 --- a/configs/mmdet/_base_/base_panoptic-seg_static.py +++ b/configs/mmdet/_base_/base_panoptic-seg_static.py @@ -6,7 +6,7 @@ model_type='panoptic_end2end', post_processing=dict( export_postprocess_mask=False, - score_threshold=0.05, + score_threshold=0.0, confidence_threshold=0.005, # for YOLOv3 iou_threshold=0.5, max_output_boxes_per_class=200, diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py index 8219316f23..5ccd7794e2 100644 --- a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py @@ -10,11 +10,9 @@ }, 'cls_logits': { 0: 'batch', - 1: 'query', }, 'mask_logits': { 0: 'batch', - 1: 'query', 2: 'h', 3: 'w', }, diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py index aac577bbef..8dcddabc2b 100644 --- a/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py @@ -8,11 +8,9 @@ }, 'cls_logits': { 0: 'batch', - 1: 'query', }, 'mask_logits': { 0: 'batch', - 1: 'query', 2: 'h', 3: 'w', }, diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py b/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py index 637a906f39..88bf4944f7 100644 --- a/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_onnxruntime_dynamic.py @@ -3,7 +3,7 @@ '../../_base_/backends/onnxruntime.py' ] onnx_config = dict( - input_shape=[1344, 800], + input_shape=None, output_names=['dets', 'labels', 'masks', 'semseg'], dynamic_axes={ 'input': { @@ -25,6 +25,8 @@ }, 'semseg': { 0: 'batch', + 2: 'height', + 3: 'width' }, }, ) diff --git a/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_tensorrt_dynamic-352x512-1344x1344.py b/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_tensorrt_dynamic-352x512-1344x1344.py index 87c43d1b93..79f50ab512 100644 --- a/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_tensorrt_dynamic-352x512-1344x1344.py +++ b/configs/mmdet/panoptic-seg/panoptic-seg_panoptic-fpn_tensorrt_dynamic-352x512-1344x1344.py @@ -3,7 +3,7 @@ '../../_base_/backends/tensorrt.py' ] onnx_config = dict( - input_shape=[1344, 800], + input_shape=None, output_names=['dets', 'labels', 'masks', 'semseg'], dynamic_axes={ 'input': { @@ -25,6 +25,8 @@ }, 'semseg': { 0: 'batch', + 2: 'height', + 3: 'width' }, }, ) diff --git a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py index 3f9b3ae42e..739929488b 100644 --- a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py +++ b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +import copy import math from functools import partial from typing import Any, List, Optional, Sequence, Tuple, Union @@ -92,7 +93,6 @@ def __clear_outputs( batch_size = len(test_outputs[0]) num_outputs = len(test_outputs) - has_inst_seg = num_outputs >= 3 outputs = [[None for _ in range(batch_size)] for _ in range(num_outputs)] @@ -100,7 +100,7 @@ def __clear_outputs( inds = test_outputs[0][i, :, 4] > 0.0 outputs[0][i] = test_outputs[0][i, inds, ...] outputs[1][i] = test_outputs[1][i, inds, ...] - if has_inst_seg: + if num_outputs >= 3 and test_outputs[2][i] is not None: outputs[2][i] = test_outputs[2][i, inds, ...] return outputs @@ -182,6 +182,9 @@ def postprocessing_results(self, rescale: bool = True): """Post-processing dets, labels, masks.""" batch_size = len(batch_dets) + tmp_outputs = [batch_dets, batch_labels, batch_masks] + outputs = End2EndModel.__clear_outputs(tmp_outputs) + batch_dets, batch_labels, batch_masks = outputs img_metas = [data_sample.metainfo for data_sample in data_samples] model_type = self.model_cfg.model.type if \ self.model_cfg is not None else None @@ -281,7 +284,6 @@ def forward(self, assert mode == 'predict', 'Deploy model only allow mode=="predict".' inputs = inputs.contiguous() outputs = self.predict(inputs) - outputs = End2EndModel.__clear_outputs(outputs) batch_dets, batch_labels = outputs[:2] batch_masks = outputs[2] if len(outputs) >= 3 else None self.postprocessing_results(batch_dets, batch_labels, batch_masks, @@ -374,9 +376,10 @@ def forward(self, if model_type == 'PanopticFPN': batch_dets, batch_labels, batch_masks = outputs[:3] batch_semseg = outputs[3] + tmp_data_samples = copy.deepcopy(data_samples) self.postprocessing_results(batch_dets, batch_labels, batch_masks, - data_samples) - masks_results = [ds.pred_instances for ds in data_samples] + tmp_data_samples) + masks_results = [ds.pred_instances for ds in tmp_data_samples] img_metas = [data_sample.metainfo for data_sample in data_samples] seg_pred_list = [] for i in range(len(data_samples)): diff --git a/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py b/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py index ce6f67e1bd..d606209793 100644 --- a/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py +++ b/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py @@ -39,7 +39,8 @@ def two_stage_panoptic_segmentor__forward(self, # get origin input shape as tensor to support onnx dynamic shape is_dynamic_flag = is_dynamic_shape(deploy_cfg) - img_shape = torch._shape_as_tensor(batch_inputs)[2:] + img_shape = torch._shape_as_tensor(batch_inputs)[2:].to( + batch_inputs.device) if not is_dynamic_flag: img_shape = [int(val) for val in img_shape] # set the metainfo @@ -53,7 +54,10 @@ def two_stage_panoptic_segmentor__forward(self, img_metas = [data_samples.metainfo for data_samples in data_samples] x = self.extract_feat(batch_inputs) - proposals = self.rpn_head.predict(x, data_samples, rescale=False) + if data_samples[0].get('proposals', None) is None: + proposals = self.rpn_head.predict(x, data_samples, rescale=False) + else: + proposals = [data_sample.proposals for data_sample in data_samples] bboxes, labels, masks = self.roi_head.predict( x, proposals, data_samples, rescale=False) diff --git a/tests/regression/mmdet.yml b/tests/regression/mmdet.yml index f519f7f021..9cb8bbddfa 100644 --- a/tests/regression/mmdet.yml +++ b/tests/regression/mmdet.yml @@ -15,7 +15,7 @@ globals: tolerance: 1 # metric ±n% multi_value: 100 PQ: - metric_key: '?' + metric_key: 'coco_panoptic/PQ' tolerance: 0.1 # metric ±n% convert_image: &convert_image input_img: *input_img From a3ee2dfd393a0c79b0d7440daf68264457c99f35 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Wed, 9 Aug 2023 10:23:09 +0800 Subject: [PATCH 16/22] update --- tests/regression/mmdet.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/regression/mmdet.yml b/tests/regression/mmdet.yml index 9cb8bbddfa..06d2d55e07 100644 --- a/tests/regression/mmdet.yml +++ b/tests/regression/mmdet.yml @@ -16,7 +16,7 @@ globals: multi_value: 100 PQ: metric_key: 'coco_panoptic/PQ' - tolerance: 0.1 # metric ±n% + tolerance: 0.5 # metric ±n% convert_image: &convert_image input_img: *input_img test_img: *test_img From 06235f31a2a0bb69a4b6a914cf7f0eae4a49c7ac Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Wed, 9 Aug 2023 14:07:17 +0800 Subject: [PATCH 17/22] remove rewritings for mask2former --- .../models/dense_heads/mask2former_head.py | 94 ------------- .../codebase/mmdet/models/layers/__init__.py | 1 - .../layers/msdeformattn_pixel_decoder.py | 128 ------------------ mmdeploy/mmcv/ops/multi_scale_deform_attn.py | 65 +-------- 4 files changed, 1 insertion(+), 287 deletions(-) delete mode 100644 mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py delete mode 100644 mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py diff --git a/mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py b/mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py deleted file mode 100644 index 22e333965e..0000000000 --- a/mmdeploy/codebase/mmdet/models/dense_heads/mask2former_head.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. - -import torch - -from mmdeploy.core import FUNCTION_REWRITER - - -@FUNCTION_REWRITER.register_rewriter( - func_name='mmdet.models.dense_heads.mask2former_head.' - 'Mask2FormerHead.forward') -def mask2former_head__forward(self, x, batch_data_samples): - """Rewrite `forward` for default backend. - - Args: - x (list[Tensor]): Multi scale Features from the - upstream network, each is a 4D-tensor. - batch_data_samples (List[:obj:`DetDataSample`]): The Data - Samples. It usually includes information such as - `gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`. - - Returns: - tuple[list[Tensor]]: A tuple contains two elements. - - - cls_pred_list (list[Tensor)]: Classification logits \ - for each decoder layer. Each is a 3D-tensor with shape \ - (batch_size, num_queries, cls_out_channels). \ - Note `cls_out_channels` should includes background. - - mask_pred_list (list[Tensor]): Mask logits for each \ - decoder layer. Each with shape (batch_size, num_queries, \ - h, w). - """ - batch_size = x[0].shape[0] - mask_features, multi_scale_memorys = self.pixel_decoder(x) - # multi_scale_memorys (from low resolution to high resolution) - decoder_inputs = [] - decoder_positional_encodings = [] - for i in range(self.num_transformer_feat_level): - decoder_input = self.decoder_input_projs[i](multi_scale_memorys[i]) - # shape (batch_size, c, h, w) -> (batch_size, h*w, c) - decoder_input = decoder_input.flatten(2).permute(0, 2, 1) - level_embed = self.level_embed.weight[i].view(1, 1, -1) - decoder_input = decoder_input + level_embed - # shape (batch_size, c, h, w) -> (batch_size, h*w, c) - mask = decoder_input.new_zeros( - (batch_size, ) + multi_scale_memorys[i].shape[-2:], - dtype=torch.bool) - decoder_positional_encoding = self.decoder_positional_encoding(mask) - decoder_positional_encoding = decoder_positional_encoding.flatten( - 2).permute(0, 2, 1) - decoder_inputs.append(decoder_input) - decoder_positional_encodings.append(decoder_positional_encoding) - # shape (num_queries, c) -> (batch_size, num_queries, c) - query_feat = self.query_feat.weight.unsqueeze(0).repeat((batch_size, 1, 1)) - query_embed = self.query_embed.weight.unsqueeze(0).repeat( - (batch_size, 1, 1)) - - cls_pred_list = [] - mask_pred_list = [] - cls_pred, mask_pred, attn_mask = self._forward_head( - query_feat, mask_features, multi_scale_memorys[0].shape[-2:]) - cls_pred_list.append(cls_pred) - mask_pred_list.append(mask_pred) - - for i in range(self.num_transformer_decoder_layers): - level_idx = i % self.num_transformer_feat_level - # if a mask is all True(all background), then set it all False. - - # to avoid Nonzero, replace with following code - # attn_mask[torch.where( - # attn_mask.sum(-1) == attn_mask.shape[-1])] = False - cond = (attn_mask.sum(-1) != attn_mask.shape[-1]).unsqueeze(2) - attn_mask = attn_mask & cond - - # cross_attn + self_attn - layer = self.transformer_decoder.layers[i] - query_feat = layer( - query=query_feat, - key=decoder_inputs[level_idx], - value=decoder_inputs[level_idx], - query_pos=query_embed, - key_pos=decoder_positional_encodings[level_idx], - cross_attn_mask=attn_mask, - query_key_padding_mask=None, - # here we do not apply masking on padded region - key_padding_mask=None) - cls_pred, mask_pred, attn_mask = self._forward_head( - query_feat, mask_features, - multi_scale_memorys[(i + 1) % - self.num_transformer_feat_level].shape[-2:]) - - cls_pred_list.append(cls_pred) - mask_pred_list.append(mask_pred) - - return cls_pred_list, mask_pred_list diff --git a/mmdeploy/codebase/mmdet/models/layers/__init__.py b/mmdeploy/codebase/mmdet/models/layers/__init__.py index d431a71071..0559af920f 100644 --- a/mmdeploy/codebase/mmdet/models/layers/__init__.py +++ b/mmdeploy/codebase/mmdet/models/layers/__init__.py @@ -2,6 +2,5 @@ # recovery for mmyolo from mmdeploy.mmcv.ops import multiclass_nms # noqa: F401, F403 from . import matrix_nms # noqa: F401, F403 -from . import msdeformattn_pixel_decoder # noqa: F401, F403 __all__ = ['multiclass_nms'] diff --git a/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py b/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py deleted file mode 100644 index 798947c34e..0000000000 --- a/mmdeploy/codebase/mmdet/models/layers/msdeformattn_pixel_decoder.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import List - -import torch -import torch.nn.functional as F - -from mmdeploy.core import FUNCTION_REWRITER - - -@FUNCTION_REWRITER.register_rewriter( - 'mmdet.models.layers.msdeformattn_pixel_decoder.' - 'MSDeformAttnPixelDecoder.forward') -def msdeform_attn_pixel_decoder__forward(self, feats: List[torch.Tensor]): - """Rewrite `forward` for default backend. - Args: - feats (list[Tensor]): Feature maps of each level. Each has - shape of (batch_size, c, h, w). - - Returns: - tuple: A tuple containing the following: - - - mask_feature (Tensor): shape (batch_size, c, h, w). - - multi_scale_features (list[Tensor]): Multi scale \ - features, each in shape (batch_size, c, h, w). - """ - # generate padding mask for each level, for each image - batch_size = feats[0].shape[0] - encoder_input_list = [] - padding_mask_list = [] - level_positional_encoding_list = [] - spatial_shapes = [] - reference_points_list = [] - num_queries_per_level = [] - - for i in range(self.num_encoder_levels): - level_idx = self.num_input_levels - i - 1 - feat = feats[level_idx] - feat_projected = self.input_convs[i](feat) - feat_hw = torch._shape_as_tensor(feat)[2:].to(feat.device) - - # no padding - padding_mask_resized = feat.new_zeros( - (batch_size, ) + feat.shape[-2:], dtype=torch.bool) - pos_embed = self.postional_encoding(padding_mask_resized) - level_embed = self.level_encoding.weight[i] - level_pos_embed = level_embed.view(1, -1, 1, 1) + pos_embed - # (h_i * w_i, 2) - reference_points = self.point_generator.single_level_grid_priors( - feat.shape[-2:], level_idx, device=feat.device) - # normalize - feat_wh = feat_hw.unsqueeze(0).flip(dims=[0, 1]) - factor = feat_wh * self.strides[level_idx] - reference_points = reference_points / factor - - # shape (batch_size, c, h_i, w_i) -> (h_i * w_i, batch_size, c) - feat_projected = feat_projected.flatten(2).permute(0, 2, 1) - level_pos_embed = level_pos_embed.flatten(2).permute(0, 2, 1) - padding_mask_resized = padding_mask_resized.flatten(1) - - encoder_input_list.append(feat_projected) - padding_mask_list.append(padding_mask_resized) - level_positional_encoding_list.append(level_pos_embed) - spatial_shapes.append(feat_hw) - num_queries_per_level.append(feat_hw[0] * feat_hw[1]) - reference_points_list.append(reference_points) - # shape (batch_size, total_num_queries), - # total_num_queries=sum([., h_i * w_i,.]) - padding_masks = torch.cat(padding_mask_list, dim=1) - # shape (total_num_queries, batch_size, c) - encoder_inputs = torch.cat(encoder_input_list, dim=1) - level_positional_encodings = torch.cat( - level_positional_encoding_list, dim=1) - # shape (num_encoder_levels, 2), from low - # resolution to high resolution - spatial_shapes = torch.cat(spatial_shapes).view(-1, 2) - - # shape (0, h_0*w_0, h_0*w_0+h_1*w_1, ...) - # keep last index - # level_start_index = torch.cat( - # [spatial_shapes.new_zeros(1), - # spatial_shapes.prod(1).cumsum(0)]).to(torch.long) - reference_points = torch.cat(reference_points_list, dim=0) - reference_points = reference_points[None, :, - None].repeat(batch_size, 1, - self.num_encoder_levels, - 1) - valid_radios = reference_points.new_ones( - (batch_size, self.num_encoder_levels, 2)) - # shape (num_total_queries, batch_size, c) - memory = self.encoder( - query=encoder_inputs, - query_pos=level_positional_encodings, - key_padding_mask=padding_masks, - spatial_shapes=spatial_shapes, - reference_points=reference_points, - level_start_index=None, - valid_ratios=valid_radios) - # (batch_size, c, num_total_queries) - memory = memory.permute(0, 2, 1) - - # from low resolution to high resolution - # num_queries_per_level = [e[0] * e[1] for e in spatial_shapes] - outs = torch.split(memory, num_queries_per_level, dim=-1) - # outs = [] - # for i in range(self.num_encoder_levels): - # outs.append(memory[:, :, - # level_start_index[i]:level_start_index[i + 1]]) - - outs = [ - x.reshape(batch_size, -1, spatial_shapes[i][0], spatial_shapes[i][1]) - for i, x in enumerate(outs) - ] - - for i in range(self.num_input_levels - self.num_encoder_levels - 1, -1, - -1): - x = feats[i] - cur_feat = self.lateral_convs[i](x) - y = cur_feat + F.interpolate( - outs[-1], - size=cur_feat.shape[-2:], - mode='bilinear', - align_corners=False) - y = self.output_convs[i](y) - outs.append(y) - multi_scale_features = outs[:self.num_outs] - - mask_feature = self.mask_feature(outs[-1]) - return mask_feature, multi_scale_features diff --git a/mmdeploy/mmcv/ops/multi_scale_deform_attn.py b/mmdeploy/mmcv/ops/multi_scale_deform_attn.py index f18aad681c..8649294f55 100644 --- a/mmdeploy/mmcv/ops/multi_scale_deform_attn.py +++ b/mmdeploy/mmcv/ops/multi_scale_deform_attn.py @@ -1,8 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn.functional as F -from mmdeploy.core import FUNCTION_REWRITER, SYMBOLIC_REWRITER +from mmdeploy.core import SYMBOLIC_REWRITER @SYMBOLIC_REWRITER.register_symbolic( @@ -26,64 +24,3 @@ def ms_deform_attn_default( attention_weights, im2col_step_i=im2col_step, ) - - -@FUNCTION_REWRITER.register_rewriter( - 'mmcv.ops.multi_scale_deform_attn.multi_scale_deformable_attn_pytorch') -def multi_scale_deformable_attn_pytorch_default( - value: torch.Tensor, value_spatial_shapes: torch.Tensor, - sampling_locations: torch.Tensor, - attention_weights: torch.Tensor) -> torch.Tensor: - """CPU version of multi-scale deformable attention. - - Args: - value (torch.Tensor): The value has shape - (bs, num_keys, num_heads, embed_dims//num_heads) - value_spatial_shapes (torch.Tensor): Spatial shape of - each feature map, has shape (num_levels, 2), - last dimension 2 represent (h, w) - sampling_locations (torch.Tensor): The location of sampling points, - has shape - (bs ,num_queries, num_heads, num_levels, num_points, 2), - the last dimension 2 represent (x, y). - attention_weights (torch.Tensor): The weight of sampling points used - when calculate the attention, has shape - (bs ,num_queries, num_heads, num_levels, num_points), - - Returns: - torch.Tensor: has shape (bs, num_queries, embed_dims) - """ - - bs, _, num_heads, embed_dims = value.shape - _, num_queries, num_heads, num_levels, num_points, _ =\ - sampling_locations.shape - # indices = torch.cat((value_spatial_shapes.new_zeros(1), - # value_spatial_shapes.prod(1).cumsum(0))) - # avoid split with dynamic split_sizes - value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], - dim=1) - sampling_grids = 2 * sampling_locations - 1 - sampling_value_list = [] - for i in range(num_levels): - H_, W_ = value_spatial_shapes[i] - # value_l_ = value[:, indices[i]:indices[i + 1], :, :] - value_l_ = value_list[i] - value_l_ = value_l_.flatten(2).transpose(1, 2).reshape( - bs * num_heads, embed_dims, H_, W_) - sampling_grid_l_ = sampling_grids[:, :, :, - i].transpose(1, 2).flatten(0, 1) - # bs*num_heads, embed_dims, num_queries, num_points - sampling_value_l_ = F.grid_sample( - value_l_, - sampling_grid_l_, - mode='bilinear', - padding_mode='zeros', - align_corners=False) - sampling_value_list.append(sampling_value_l_) - - attention_weights = attention_weights.transpose(1, 2).reshape( - bs * num_heads, 1, num_queries, num_levels * num_points) - output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * - attention_weights).sum(-1).view(bs, num_heads * embed_dims, - num_queries) - return output.transpose(1, 2).contiguous() From a7fdccf1c97505ac8adee1e645e380c48b3e511b Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Wed, 9 Aug 2023 14:09:34 +0800 Subject: [PATCH 18/22] update reg test config --- tests/regression/mmdet.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/regression/mmdet.yml b/tests/regression/mmdet.yml index 06d2d55e07..ed16bff92f 100644 --- a/tests/regression/mmdet.yml +++ b/tests/regression/mmdet.yml @@ -401,7 +401,7 @@ models: pipelines: - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py convert_image: *convert_image - backend_test: True + backend_test: False - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py convert_image: *convert_image backend_test: True @@ -413,7 +413,7 @@ models: pipelines: - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_onnxruntime_dynamic.py convert_image: *convert_image - backend_test: True - - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_dynamic-320x512-1344x1344.py + backend_test: False + - deploy_config: configs/mmdet/panoptic-seg/panoptic-seg_maskformer_tensorrt_static-800x1344.py convert_image: *convert_image - backend_test: True + backend_test: False From 7594d3f344e767512bc8dd2d204c0a7c2b3a7ad9 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Wed, 9 Aug 2023 14:39:23 +0800 Subject: [PATCH 19/22] fix --- mmdeploy/codebase/mmdet/deploy/object_detection_model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py index 739929488b..1ed05a2da5 100644 --- a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py +++ b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py @@ -375,6 +375,8 @@ def forward(self, if model_type == 'PanopticFPN': batch_dets, batch_labels, batch_masks = outputs[:3] + # fix int32 and int64 mismatch in fusion head + batch_labels = batch_labels.to(torch.long) batch_semseg = outputs[3] tmp_data_samples = copy.deepcopy(data_samples) self.postprocessing_results(batch_dets, batch_labels, batch_masks, From 99374540f093f49778fa32b862bd3fdd78a049da Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Fri, 11 Aug 2023 11:46:17 +0800 Subject: [PATCH 20/22] update docs --- docs/en/04-supported-codebases/mmdet.md | 60 ++++++++++++---------- docs/zh_cn/04-supported-codebases/mmdet.md | 59 +++++++++++---------- 2 files changed, 65 insertions(+), 54 deletions(-) diff --git a/docs/en/04-supported-codebases/mmdet.md b/docs/en/04-supported-codebases/mmdet.md index 45631bb15e..84e1fe5922 100644 --- a/docs/en/04-supported-codebases/mmdet.md +++ b/docs/en/04-supported-codebases/mmdet.md @@ -10,6 +10,7 @@ - [Backend model inference](#backend-model-inference) - [SDK model inference](#sdk-model-inference) - [Supported models](#supported-models) + - [Reminder](#reminder) ______________________________________________________________________ @@ -189,30 +190,35 @@ Besides python API, mmdeploy SDK also provides other FFI (Foreign Function Inter ## Supported models -| Model | Task | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO | -| :-------------------------------------------------------------------------------------------: | :-------------------: | :---------: | :------: | :--: | :---: | :------: | -| [ATSS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/atss) | Object Detection | Y | Y | N | N | Y | -| [FCOS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fcos) | Object Detection | Y | Y | Y | N | Y | -| [FoveaBox](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/foveabox) | Object Detection | Y | N | N | N | Y | -| [FSAF](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fsaf) | Object Detection | Y | Y | Y | Y | Y | -| [RetinaNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/retinanet) | Object Detection | Y | Y | Y | Y | Y | -| [SSD](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/ssd) | Object Detection | Y | Y | Y | N | Y | -| [VFNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/vfnet) | Object Detection | N | N | N | N | Y | -| [YOLOv3](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolo) | Object Detection | Y | Y | Y | N | Y | -| [YOLOX](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolox) | Object Detection | Y | Y | Y | N | Y | -| [Cascade R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn) | Object Detection | Y | Y | N | Y | Y | -| [Faster R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn) | Object Detection | Y | Y | Y | Y | Y | -| [Faster R-CNN + DCN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn) | Object Detection | Y | Y | Y | Y | Y | -| [GFL](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/gfl) | Object Detection | Y | Y | N | ? | Y | -| [RepPoints](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/reppoints) | Object Detection | N | Y | N | ? | Y | -| [DETR](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/detr) | Object Detection | Y | Y | N | ? | Y | -| [CenterNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/centernet) | Object Detection | Y | Y | N | ? | Y | -| [RTMDet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/rtmdet) | Object Detection | Y | Y | N | ? | Y | -| [Cascade Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn) | Instance Segmentation | Y | Y | N | N | Y | -| [Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/mask_rcnn) | Instance Segmentation | Y | Y | N | N | Y | -| [Swin Transformer](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/swin) | Instance Segmentation | Y | Y | N | N | Y | -| [SOLO](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solo) | Instance Segmentation | Y | N | N | N | Y | -| [SOLOv2](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solov2) | Instance Segmentation | Y | N | N | N | Y | -| [Panoptic FPN](https://github.com/open-mmlab/mmdetection/tree/main/configs/panoptic_fpn) | Panoptic Segmentation | Y | Y | N | N | N | -| [MaskFormer](https://github.com/open-mmlab/mmdetection/tree/main/configs/maskformer) | Panoptic Segmentation | Y | Y | N | N | N | -| [Mask2Former](https://github.com/open-mmlab/mmdetection/tree/main/configs/mask2former) | Panoptic Segmentation | Y | Y | N | N | N | +| Model | Task | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO | +| :------------------------------------------------------------------------------------------------------: | :-------------------: | :---------: | :------: | :--: | :---: | :------: | +| [ATSS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/atss) | Object Detection | Y | Y | N | N | Y | +| [FCOS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fcos) | Object Detection | Y | Y | Y | N | Y | +| [FoveaBox](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/foveabox) | Object Detection | Y | N | N | N | Y | +| [FSAF](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fsaf) | Object Detection | Y | Y | Y | Y | Y | +| [RetinaNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/retinanet) | Object Detection | Y | Y | Y | Y | Y | +| [SSD](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/ssd) | Object Detection | Y | Y | Y | N | Y | +| [VFNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/vfnet) | Object Detection | N | N | N | N | Y | +| [YOLOv3](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolo) | Object Detection | Y | Y | Y | N | Y | +| [YOLOX](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolox) | Object Detection | Y | Y | Y | N | Y | +| [Cascade R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn) | Object Detection | Y | Y | N | Y | Y | +| [Faster R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn) | Object Detection | Y | Y | Y | Y | Y | +| [Faster R-CNN + DCN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn) | Object Detection | Y | Y | Y | Y | Y | +| [GFL](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/gfl) | Object Detection | Y | Y | N | ? | Y | +| [RepPoints](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/reppoints) | Object Detection | N | Y | N | ? | Y | +| [DETR](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/detr) | Object Detection | Y | Y | N | ? | Y | +| [CenterNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/centernet) | Object Detection | Y | Y | N | ? | Y | +| [RTMDet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/rtmdet) | Object Detection | Y | Y | N | ? | Y | +| [Cascade Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn) | Instance Segmentation | Y | Y | N | N | Y | +| [Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/mask_rcnn) | Instance Segmentation | Y | Y | N | N | Y | +| [Swin Transformer](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/swin) | Instance Segmentation | Y | Y | N | N | Y | +| [SOLO](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solo) | Instance Segmentation | Y | N | N | N | Y | +| [SOLOv2](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solov2) | Instance Segmentation | Y | N | N | N | Y | +| [Panoptic FPN](https://github.com/open-mmlab/mmdetection/tree/main/configs/panoptic_fpn) | Panoptic Segmentation | Y | Y | N | N | N | +| [MaskFormer](https://github.com/open-mmlab/mmdetection/tree/main/configs/maskformer) | Panoptic Segmentation | Y | Y | N | N | N | +| [Mask2Former](https://github.com/open-mmlab/mmdetection/tree/main/configs/mask2former)[\*](#mask2former) | Panoptic Segmentation | Y | Y | N | N | N | + +## Reminder + +- For transformer based models, strongly suggest use `TensorRT>=8.4`. +- Mask2Former should use `TensorRT>=8.6.1` for dynamic shape inference. diff --git a/docs/zh_cn/04-supported-codebases/mmdet.md b/docs/zh_cn/04-supported-codebases/mmdet.md index da3ce82ce4..17c501630f 100644 --- a/docs/zh_cn/04-supported-codebases/mmdet.md +++ b/docs/zh_cn/04-supported-codebases/mmdet.md @@ -192,30 +192,35 @@ cv2.imwrite('output_detection.png', img) ## 模型支持列表 -| Model | Task | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO | -| :-------------------------------------------------------------------------------------------: | :-------------------: | :---------: | :------: | :--: | :---: | :------: | -| [ATSS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/atss) | Object Detection | Y | Y | N | N | Y | -| [FCOS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fcos) | Object Detection | Y | Y | Y | N | Y | -| [FoveaBox](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/foveabox) | Object Detection | Y | N | N | N | Y | -| [FSAF](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fsaf) | Object Detection | Y | Y | Y | Y | Y | -| [RetinaNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/retinanet) | Object Detection | Y | Y | Y | Y | Y | -| [SSD](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/ssd) | Object Detection | Y | Y | Y | N | Y | -| [VFNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/vfnet) | Object Detection | N | N | N | N | Y | -| [YOLOv3](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolo) | Object Detection | Y | Y | Y | N | Y | -| [YOLOX](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolox) | Object Detection | Y | Y | Y | N | Y | -| [Cascade R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn) | Object Detection | Y | Y | N | Y | Y | -| [Faster R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn) | Object Detection | Y | Y | Y | Y | Y | -| [Faster R-CNN + DCN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn) | Object Detection | Y | Y | Y | Y | Y | -| [GFL](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/gfl) | Object Detection | Y | Y | N | ? | Y | -| [RepPoints](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/reppoints) | Object Detection | N | Y | N | ? | Y | -| [DETR](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/detr) | Object Detection | Y | Y | N | ? | Y | -| [CenterNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/centernet) | Object Detection | Y | Y | N | ? | Y | -| [RTMDet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/rtmdet) | Object Detection | Y | Y | N | ? | Y | -| [Cascade Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn) | Instance Segmentation | Y | Y | N | N | Y | -| [Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/mask_rcnn) | Instance Segmentation | Y | Y | N | N | Y | -| [Swin Transformer](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/swin) | Instance Segmentation | Y | Y | N | N | Y | -| [SOLO](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solo) | Instance Segmentation | Y | N | N | N | Y | -| [SOLOv2](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solov2) | Instance Segmentation | Y | N | N | N | Y | -| [Panoptic FPN](https://github.com/open-mmlab/mmdetection/tree/main/configs/panoptic_fpn) | Panoptic Segmentation | Y | Y | N | N | N | -| [MaskFormer](https://github.com/open-mmlab/mmdetection/tree/main/configs/maskformer) | Panoptic Segmentation | Y | Y | N | N | N | -| [Mask2Former](https://github.com/open-mmlab/mmdetection/tree/main/configs/mask2former) | Panoptic Segmentation | Y | Y | N | N | N | +| Model | Task | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO | +| :------------------------------------------------------------------------------------------------------: | :-------------------: | :---------: | :------: | :--: | :---: | :------: | +| [ATSS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/atss) | Object Detection | Y | Y | N | N | Y | +| [FCOS](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fcos) | Object Detection | Y | Y | Y | N | Y | +| [FoveaBox](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/foveabox) | Object Detection | Y | N | N | N | Y | +| [FSAF](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/fsaf) | Object Detection | Y | Y | Y | Y | Y | +| [RetinaNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/retinanet) | Object Detection | Y | Y | Y | Y | Y | +| [SSD](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/ssd) | Object Detection | Y | Y | Y | N | Y | +| [VFNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/vfnet) | Object Detection | N | N | N | N | Y | +| [YOLOv3](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolo) | Object Detection | Y | Y | Y | N | Y | +| [YOLOX](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/yolox) | Object Detection | Y | Y | Y | N | Y | +| [Cascade R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn) | Object Detection | Y | Y | N | Y | Y | +| [Faster R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn) | Object Detection | Y | Y | Y | Y | Y | +| [Faster R-CNN + DCN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/faster_rcnn) | Object Detection | Y | Y | Y | Y | Y | +| [GFL](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/gfl) | Object Detection | Y | Y | N | ? | Y | +| [RepPoints](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/reppoints) | Object Detection | N | Y | N | ? | Y | +| [DETR](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/detr) | Object Detection | Y | Y | N | ? | Y | +| [CenterNet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/centernet) | Object Detection | Y | Y | N | ? | Y | +| [RTMDet](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/rtmdet) | Object Detection | Y | Y | N | ? | Y | +| [Cascade Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/cascade_rcnn) | Instance Segmentation | Y | Y | N | N | Y | +| [Mask R-CNN](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/mask_rcnn) | Instance Segmentation | Y | Y | N | N | Y | +| [Swin Transformer](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/swin) | Instance Segmentation | Y | Y | N | N | Y | +| [SOLO](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solo) | Instance Segmentation | Y | N | N | N | Y | +| [SOLOv2](https://github.com/open-mmlab/mmdetection/tree/3.x/configs/solov2) | Instance Segmentation | Y | N | N | N | Y | +| [Panoptic FPN](https://github.com/open-mmlab/mmdetection/tree/main/configs/panoptic_fpn) | Panoptic Segmentation | Y | Y | N | N | N | +| [MaskFormer](https://github.com/open-mmlab/mmdetection/tree/main/configs/maskformer) | Panoptic Segmentation | Y | Y | N | N | N | +| [Mask2Former](https://github.com/open-mmlab/mmdetection/tree/main/configs/mask2former)[\*](#mask2former) | Panoptic Segmentation | Y | Y | N | N | N | + +## 注意事项 + +- 强烈建议使用`TensorRT>=8.4`来转换基于 `transformer` 的模型. +- Mask2Former 请使用 `TensorRT>=8.6.1` 以保证动态尺寸正常推理. From 65b1e5ca4d09925f45d57d1e35f49f267d9534fd Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Wed, 23 Aug 2023 11:34:51 +0800 Subject: [PATCH 21/22] fix comments --- configs/mmdet/_base_/base_panoptic-seg_static.py | 1 - .../mmdet/models/detectors/panoptic_two_stage_segmentor.py | 2 -- mmdeploy/codebase/mmdet/models/detectors/single_stage.py | 4 ---- mmdeploy/codebase/mmdet/models/detectors/two_stage.py | 2 -- 4 files changed, 9 deletions(-) diff --git a/configs/mmdet/_base_/base_panoptic-seg_static.py b/configs/mmdet/_base_/base_panoptic-seg_static.py index aee09e633e..bc117ce6ae 100644 --- a/configs/mmdet/_base_/base_panoptic-seg_static.py +++ b/configs/mmdet/_base_/base_panoptic-seg_static.py @@ -7,7 +7,6 @@ post_processing=dict( export_postprocess_mask=False, score_threshold=0.0, - confidence_threshold=0.005, # for YOLOv3 iou_threshold=0.5, max_output_boxes_per_class=200, pre_top_k=5000, diff --git a/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py b/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py index d606209793..8fa4475723 100644 --- a/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py +++ b/mmdeploy/codebase/mmdet/models/detectors/panoptic_two_stage_segmentor.py @@ -1,5 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -import copy import torch @@ -34,7 +33,6 @@ def two_stage_panoptic_segmentor__forward(self, `semseg` of shape [N, num_sem_class, sem_H, sem_W]. """ ctx = FUNCTION_REWRITER.get_context() - data_samples = copy.deepcopy(data_samples) deploy_cfg = ctx.cfg # get origin input shape as tensor to support onnx dynamic shape diff --git a/mmdeploy/codebase/mmdet/models/detectors/single_stage.py b/mmdeploy/codebase/mmdet/models/detectors/single_stage.py index 5f3872c8b6..b25f70c028 100644 --- a/mmdeploy/codebase/mmdet/models/detectors/single_stage.py +++ b/mmdeploy/codebase/mmdet/models/detectors/single_stage.py @@ -1,5 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -import copy import torch from mmdet.models.detectors.base import ForwardResults @@ -31,9 +30,6 @@ def _set_metainfo(data_samples, img_shape): Code in this function cannot be traced by fx. """ - - # fx can not trace deepcopy correctly - data_samples = copy.deepcopy(data_samples) if data_samples is None: data_samples = [DetDataSample()] diff --git a/mmdeploy/codebase/mmdet/models/detectors/two_stage.py b/mmdeploy/codebase/mmdet/models/detectors/two_stage.py index d0bd140003..477571327e 100644 --- a/mmdeploy/codebase/mmdet/models/detectors/two_stage.py +++ b/mmdeploy/codebase/mmdet/models/detectors/two_stage.py @@ -1,5 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -import copy import torch from mmdet.models.detectors.base import ForwardResults @@ -63,7 +62,6 @@ def two_stage_detector__forward(self, (num_instances, ). """ ctx = FUNCTION_REWRITER.get_context() - data_samples = copy.deepcopy(data_samples) deploy_cfg = ctx.cfg # get origin input shape as tensor to support onnx dynamic shape From e279a2954eee1f9e145eb1ece6e35306b98dfb1e Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Wed, 23 Aug 2023 14:55:06 +0800 Subject: [PATCH 22/22] fix --- .github/scripts/prepare_reg_test.py | 6 ++++++ mmdeploy/codebase/mmdet/deploy/object_detection_model.py | 8 ++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/scripts/prepare_reg_test.py b/.github/scripts/prepare_reg_test.py index 96065ef3e6..e7f6e67751 100644 --- a/.github/scripts/prepare_reg_test.py +++ b/.github/scripts/prepare_reg_test.py @@ -98,6 +98,12 @@ def prepare_codebases(codebases): f'{MMDEPLOY_DIR}/configs/mmyolo') shutil.copy(f'{target_dir}/tests/regression/mmyolo.yml', f'{MMDEPLOY_DIR}/tests/regression/mmyolo.yml') + elif codebase == 'mmdet': + # for panoptic + run_cmd([ + 'python -m pip install ', + 'git+https://github.com/cocodataset/panopticapi.git', + ]) def install_torch(torch_version): diff --git a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py index 1ed05a2da5..e9f3b6cf7b 100644 --- a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py +++ b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py @@ -182,9 +182,13 @@ def postprocessing_results(self, rescale: bool = True): """Post-processing dets, labels, masks.""" batch_size = len(batch_dets) - tmp_outputs = [batch_dets, batch_labels, batch_masks] + tmp_outputs = [batch_dets, batch_labels] + has_mask = batch_masks is not None + if has_mask: + tmp_outputs.append(batch_masks) outputs = End2EndModel.__clear_outputs(tmp_outputs) - batch_dets, batch_labels, batch_masks = outputs + batch_dets, batch_labels = outputs[:2] + batch_masks = outputs[2] if has_mask else None img_metas = [data_sample.metainfo for data_sample in data_samples] model_type = self.model_cfg.model.type if \ self.model_cfg is not None else None