diff --git a/configs/mmocr/text-detection/text-detection_mrcnn_onnxruntime_dynamic.py b/configs/mmocr/text-detection/text-detection_mrcnn_onnxruntime_dynamic.py
new file mode 100644
index 0000000000..947d028fd6
--- /dev/null
+++ b/configs/mmocr/text-detection/text-detection_mrcnn_onnxruntime_dynamic.py
@@ -0,0 +1,33 @@
+_base_ = ['./text-detection_static.py', '../../_base_/backends/onnxruntime.py']
+onnx_config = dict(
+ output_names=['dets', 'labels', 'masks'],
+ dynamic_axes=dict(
+ input=dict({
+ 0: 'batch',
+ 2: 'height',
+ 3: 'width'
+ }),
+ dets=dict({
+ 0: 'batch',
+ 1: 'num_dets'
+ }),
+ labels=dict({
+ 0: 'batch',
+ 1: 'num_dets'
+ }),
+ masks=dict({
+ 0: 'batch',
+ 1: 'num_dets',
+ 2: 'height',
+ 3: 'width'
+ })))
+codebase_config = dict(
+ post_processing=dict(
+ score_threshold=0.05,
+ confidence_threshold=0.005,
+ iou_threshold=0.5,
+ max_output_boxes_per_class=200,
+ pre_top_k=5000,
+ keep_top_k=100,
+ background_label_id=-1,
+ export_postprocess_mask=False))
diff --git a/configs/mmocr/text-detection/text-detection_mrcnn_tensorrt-fp16_dynamic-320x320-2240x2240.py b/configs/mmocr/text-detection/text-detection_mrcnn_tensorrt-fp16_dynamic-320x320-2240x2240.py
new file mode 100644
index 0000000000..2cd6c220d1
--- /dev/null
+++ b/configs/mmocr/text-detection/text-detection_mrcnn_tensorrt-fp16_dynamic-320x320-2240x2240.py
@@ -0,0 +1,2 @@
+_base_ = ['./text-detection_mrcnn_tensorrt_dynamic-320x320-2240x2240.py']
+backend_config = dict(common_config=dict(fp16_mode=True))
diff --git a/configs/mmocr/text-detection/text-detection_mrcnn_tensorrt-int8_dynamic-320x320-2240x2240.py b/configs/mmocr/text-detection/text-detection_mrcnn_tensorrt-int8_dynamic-320x320-2240x2240.py
new file mode 100644
index 0000000000..f08c95f113
--- /dev/null
+++ b/configs/mmocr/text-detection/text-detection_mrcnn_tensorrt-int8_dynamic-320x320-2240x2240.py
@@ -0,0 +1,5 @@
+_base_ = ['./text-detection_mrcnn_tensorrt_dynamic-320x320-2240x2240.py']
+
+backend_config = dict(common_config=dict(fp16_mode=True, int8_mode=True))
+
+calib_config = dict(create_calib=True, calib_file='calib_data.h5')
diff --git a/configs/mmocr/text-detection/text-detection_mrcnn_tensorrt_dynamic-320x320-2240x2240.py b/configs/mmocr/text-detection/text-detection_mrcnn_tensorrt_dynamic-320x320-2240x2240.py
new file mode 100644
index 0000000000..12a03c8c45
--- /dev/null
+++ b/configs/mmocr/text-detection/text-detection_mrcnn_tensorrt_dynamic-320x320-2240x2240.py
@@ -0,0 +1,45 @@
+_base_ = ['./text-detection_static.py', '../../_base_/backends/tensorrt.py']
+onnx_config = dict(
+ output_names=['dets', 'labels', 'masks'],
+ dynamic_axes=dict(
+ input=dict({
+ 0: 'batch',
+ 2: 'height',
+ 3: 'width'
+ }),
+ dets=dict({
+ 0: 'batch',
+ 1: 'num_dets'
+ }),
+ labels=dict({
+ 0: 'batch',
+ 1: 'num_dets'
+ }),
+ masks=dict({
+ 0: 'batch',
+ 1: 'num_dets',
+ 2: 'height',
+ 3: 'width'
+ })))
+
+backend_config = dict(
+ common_config=dict(max_workspace_size=1 << 30),
+ model_inputs=[
+ dict(
+ input_shapes=dict(
+ input=dict(
+ min_shape=[1, 3, 320, 320],
+ opt_shape=[1, 3, 600, 800],
+ max_shape=[1, 3, 2240, 2240])))
+ ])
+
+codebase_config = dict(
+ post_processing=dict(
+ score_threshold=0.05,
+ confidence_threshold=0.005,
+ iou_threshold=0.5,
+ max_output_boxes_per_class=200,
+ pre_top_k=5000,
+ keep_top_k=100,
+ background_label_id=-1,
+ export_postprocess_mask=False))
diff --git a/configs/mmocr/text-detection/text-detection_mrcnn_torchscript.py b/configs/mmocr/text-detection/text-detection_mrcnn_torchscript.py
new file mode 100644
index 0000000000..073560e704
--- /dev/null
+++ b/configs/mmocr/text-detection/text-detection_mrcnn_torchscript.py
@@ -0,0 +1,18 @@
+_base_ = [
+ '../../_base_/torchscript_config.py',
+ '../../_base_/backends/torchscript.py'
+]
+
+ir_config = dict(input_shape=None, output_names=['dets', 'labels', 'masks'])
+codebase_config = dict(
+ type='mmocr',
+ task='TextDetection',
+ post_processing=dict(
+ score_threshold=0.05,
+ confidence_threshold=0.005,
+ iou_threshold=0.5,
+ max_output_boxes_per_class=200,
+ pre_top_k=5000,
+ keep_top_k=100,
+ background_label_id=-1,
+ export_postprocess_mask=False))
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-1x32x32-1x32x640.py b/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-1x32x32-1x32x640.py
index 68cb0ea73f..f0a6ea4a86 100644
--- a/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-1x32x32-1x32x640.py
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-1x32x32-1x32x640.py
@@ -1,3 +1,4 @@
+# 1 channel input for CRNN models
_base_ = [
'./text-recognition_dynamic.py', '../../_base_/backends/tensorrt-fp16.py'
]
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-32x32-32x640.py b/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-32x32-32x640.py
index 87a144391e..fe85452ace 100644
--- a/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-32x32-32x640.py
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-32x32-32x640.py
@@ -1,3 +1,4 @@
+# 3 channel and 32 height input for SATRN models
_base_ = [
'./text-recognition_dynamic.py', '../../_base_/backends/tensorrt-fp16.py'
]
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-48x64-48x640.py b/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-48x64-48x640.py
new file mode 100644
index 0000000000..fcbabb63bf
--- /dev/null
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-48x64-48x640.py
@@ -0,0 +1,14 @@
+# 3 channel and 48 height for SAR models
+_base_ = [
+ './text-recognition_dynamic.py', '../../_base_/backends/tensorrt-fp16.py'
+]
+backend_config = dict(
+ common_config=dict(max_workspace_size=1 << 30),
+ model_inputs=[
+ dict(
+ input_shapes=dict(
+ input=dict(
+ min_shape=[1, 3, 48, 64],
+ opt_shape=[1, 3, 48, 64],
+ max_shape=[1, 3, 48, 640])))
+ ])
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_static-1x32x32.py b/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_static-1x32x32.py
index d1621913df..b1acd5069e 100644
--- a/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_static-1x32x32.py
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_static-1x32x32.py
@@ -1,3 +1,4 @@
+# 1 channel input for CRNN models
_base_ = [
'./text-recognition_static.py', '../../_base_/backends/tensorrt-fp16.py'
]
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_static-32x128.py b/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_static-32x128.py
new file mode 100644
index 0000000000..bc5865260c
--- /dev/null
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_static-32x128.py
@@ -0,0 +1,16 @@
+# ABINet models use static input 32x128
+_base_ = [
+ './text-recognition_static.py', '../../_base_/backends/tensorrt-fp16.py'
+]
+
+onnx_config = dict(input_shape=[128, 32])
+backend_config = dict(
+ common_config=dict(max_workspace_size=1 << 30),
+ model_inputs=[
+ dict(
+ input_shapes=dict(
+ input=dict(
+ min_shape=[1, 3, 32, 128],
+ opt_shape=[1, 3, 32, 128],
+ max_shape=[1, 3, 32, 128])))
+ ])
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_dynamic-1x32x32-1x32x640.py b/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_dynamic-1x32x32-1x32x640.py
index 49194a862b..c749c4a5cc 100644
--- a/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_dynamic-1x32x32-1x32x640.py
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_dynamic-1x32x32-1x32x640.py
@@ -1,3 +1,4 @@
+# 1 channel input for CRNN models
_base_ = [
'./text-recognition_dynamic.py', '../../_base_/backends/tensorrt-int8.py'
]
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_dynamic-32x32-32x640.py b/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_dynamic-32x32-32x640.py
index cebb2674c9..136afc410b 100644
--- a/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_dynamic-32x32-32x640.py
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_dynamic-32x32-32x640.py
@@ -1,3 +1,4 @@
+# 3 channel and 32 height input for SATRN models
_base_ = [
'./text-recognition_dynamic.py', '../../_base_/backends/tensorrt-int8.py'
]
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_dynamic-48x64-48x640.py b/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_dynamic-48x64-48x640.py
new file mode 100644
index 0000000000..8289b0ce82
--- /dev/null
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_dynamic-48x64-48x640.py
@@ -0,0 +1,14 @@
+# 3 channel and 48 height for SAR models
+_base_ = [
+ './text-recognition_dynamic.py', '../../_base_/backends/tensorrt-int8.py'
+]
+backend_config = dict(
+ common_config=dict(max_workspace_size=1 << 30),
+ model_inputs=[
+ dict(
+ input_shapes=dict(
+ input=dict(
+ min_shape=[1, 3, 48, 64],
+ opt_shape=[1, 3, 48, 64],
+ max_shape=[1, 3, 48, 640])))
+ ])
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_static-1x32x32.py b/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_static-1x32x32.py
index df36ce93e6..6fbeabc272 100644
--- a/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_static-1x32x32.py
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_static-1x32x32.py
@@ -1,3 +1,4 @@
+# 1 channel input for CRNN models
_base_ = [
'./text-recognition_static.py', '../../_base_/backends/tensorrt-int8.py'
]
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_static-32x128.py b/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_static-32x128.py
new file mode 100644
index 0000000000..64e8954c3a
--- /dev/null
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt-int8_static-32x128.py
@@ -0,0 +1,16 @@
+# ABINet models use static input 32x128
+_base_ = [
+ './text-recognition_static.py', '../../_base_/backends/tensorrt-int8.py'
+]
+
+onnx_config = dict(input_shape=[128, 32])
+backend_config = dict(
+ common_config=dict(max_workspace_size=1 << 30),
+ model_inputs=[
+ dict(
+ input_shapes=dict(
+ input=dict(
+ min_shape=[1, 3, 32, 128],
+ opt_shape=[1, 3, 32, 128],
+ max_shape=[1, 3, 32, 128])))
+ ])
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt_dynamic-1x32x32-1x32x640.py b/configs/mmocr/text-recognition/text-recognition_tensorrt_dynamic-1x32x32-1x32x640.py
index 6fca1265a3..795b1566d6 100644
--- a/configs/mmocr/text-recognition/text-recognition_tensorrt_dynamic-1x32x32-1x32x640.py
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt_dynamic-1x32x32-1x32x640.py
@@ -1,3 +1,4 @@
+# 1 channel input for CRNN models
_base_ = ['./text-recognition_dynamic.py', '../../_base_/backends/tensorrt.py']
backend_config = dict(
common_config=dict(max_workspace_size=1 << 30),
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt_dynamic-32x32-32x640.py b/configs/mmocr/text-recognition/text-recognition_tensorrt_dynamic-32x32-32x640.py
index 4f26716e13..2f6c98a61b 100644
--- a/configs/mmocr/text-recognition/text-recognition_tensorrt_dynamic-32x32-32x640.py
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt_dynamic-32x32-32x640.py
@@ -1,3 +1,4 @@
+# 3 channel and 32 height input for SATRN models
_base_ = ['./text-recognition_dynamic.py', '../../_base_/backends/tensorrt.py']
backend_config = dict(
common_config=dict(max_workspace_size=1 << 30),
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt_dynamic-48x64-48x640.py b/configs/mmocr/text-recognition/text-recognition_tensorrt_dynamic-48x64-48x640.py
new file mode 100644
index 0000000000..932470d35b
--- /dev/null
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt_dynamic-48x64-48x640.py
@@ -0,0 +1,12 @@
+# 3 channel and 48 height for SAR models
+_base_ = ['./text-recognition_dynamic.py', '../../_base_/backends/tensorrt.py']
+backend_config = dict(
+ common_config=dict(max_workspace_size=1 << 30),
+ model_inputs=[
+ dict(
+ input_shapes=dict(
+ input=dict(
+ min_shape=[1, 3, 48, 64],
+ opt_shape=[1, 3, 48, 64],
+ max_shape=[1, 3, 48, 640])))
+ ])
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt_static-1x32x32.py b/configs/mmocr/text-recognition/text-recognition_tensorrt_static-1x32x32.py
index a7e653c8a6..9a4b122c59 100644
--- a/configs/mmocr/text-recognition/text-recognition_tensorrt_static-1x32x32.py
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt_static-1x32x32.py
@@ -1,3 +1,4 @@
+# 1 channel input for CRNN models
_base_ = ['./text-recognition_static.py', '../../_base_/backends/tensorrt.py']
onnx_config = dict(input_shape=[32, 32])
diff --git a/configs/mmocr/text-recognition/text-recognition_tensorrt_static-32x128.py b/configs/mmocr/text-recognition/text-recognition_tensorrt_static-32x128.py
new file mode 100644
index 0000000000..9b7029ff66
--- /dev/null
+++ b/configs/mmocr/text-recognition/text-recognition_tensorrt_static-32x128.py
@@ -0,0 +1,14 @@
+# ABINet models use static input 32x128
+_base_ = ['./text-recognition_static.py', '../../_base_/backends/tensorrt.py']
+
+onnx_config = dict(input_shape=[128, 32])
+backend_config = dict(
+ common_config=dict(max_workspace_size=1 << 30),
+ model_inputs=[
+ dict(
+ input_shapes=dict(
+ input=dict(
+ min_shape=[1, 3, 32, 128],
+ opt_shape=[1, 3, 32, 128],
+ max_shape=[1, 3, 32, 128])))
+ ])
diff --git a/docs/en/03-benchmark/benchmark.md b/docs/en/03-benchmark/benchmark.md
index ed7604f72a..18ef2faa3b 100644
--- a/docs/en/03-benchmark/benchmark.md
+++ b/docs/en/03-benchmark/benchmark.md
@@ -1178,6 +1178,42 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../
0.7949 |
0.7950 |
+
+ DBNetpp |
+ TextDetection |
+ ICDAR2015 |
+ recall |
+ 0.8209 |
+ 0.8209 |
+ 0.8209 |
+ 0.8199 |
+ 0.8204 |
+ 0.8204 |
+ - |
+ 0.8209 |
+
+
+ precision |
+ 0.9079 |
+ 0.9079 |
+ 0.9079 |
+ 0.9117 |
+ 0.9117 |
+ 0.9142 |
+ - |
+ 0.9079 |
+
+
+ hmean |
+ 0.8622 |
+ 0.8622 |
+ 0.8622 |
+ 0.8634 |
+ 0.8637 |
+ 0.8648 |
+ - |
+ 0.8622 |
+
PSENet |
TextDetection |
@@ -1250,6 +1286,78 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../
- |
0.7955 |
+
+ TextSnake |
+ TextDetection |
+ CTW1500 |
+ recall |
+ 0.8052 |
+ 0.8052 |
+ 0.8052 |
+ 0.8055 |
+ - |
+ - |
+ - |
+ - |
+
+
+ precision |
+ 0.8535 |
+ 0.8535 |
+ 0.8535 |
+ 0.8538 |
+ - |
+ - |
+ - |
+ - |
+
+
+ hmean |
+ 0.8286 |
+ 0.8286 |
+ 0.8286 |
+ 0.8290 |
+ - |
+ - |
+ - |
+ - |
+
+
+ MaskRCNN |
+ TextDetection |
+ ICDAR2015 |
+ recall |
+ 0.7766 |
+ 0.7766 |
+ 0.7766 |
+ 0.7766 |
+ 0.7761 |
+ 0.7670 |
+ - |
+ - |
+
+
+ precision |
+ 0.8644 |
+ 0.8644 |
+ 0.8644 |
+ 0.8644 |
+ 0.8630 |
+ 0.8705 |
+ - |
+ - |
+
+
+ hmean |
+ 0.8182 |
+ 0.8182 |
+ 0.8182 |
+ 0.8182 |
+ 0.8172 |
+ 0.8155 |
+ - |
+ - |
+
CRNN |
TextRecognition |
@@ -1292,6 +1400,20 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../
- |
- |
+
+ ABINet |
+ TextRecognition |
+ IIIT5K |
+ acc |
+ 0.9603 |
+ 0.9563 |
+ 0.9563 |
+ 0.9573 |
+ 0.9507 |
+ 0.9510 |
+ - |
+ - |
+
diff --git a/docs/en/03-benchmark/supported_models.md b/docs/en/03-benchmark/supported_models.md
index ec459dc3e8..d9b2400f5c 100644
--- a/docs/en/03-benchmark/supported_models.md
+++ b/docs/en/03-benchmark/supported_models.md
@@ -68,11 +68,15 @@ The table below lists the models that are guaranteed to be exportable to other b
| [EDSR](https://github.com/open-mmlab/mmediting/tree/1.x/configs/edsr) | MMEditing | Y | Y | Y | Y | N | Y | N | N |
| [RDN](https://github.com/open-mmlab/mmediting/tree/1.x/configs/rdn) | MMEditing | Y | Y | Y | Y | Y | Y | N | N |
| [DBNet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/dbnet) | MMOCR | Y | Y | Y | Y | Y | Y | Y | N |
+| [DBNetpp](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/dbnetpp) | MMOCR | Y | Y | Y | ? | ? | Y | ? | N |
| [PANet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/panet) | MMOCR | Y | Y | Y | Y | ? | Y | Y | N |
| [PSENet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/psenet) | MMOCR | Y | Y | Y | Y | ? | Y | Y | N |
+| [TextSnake](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/textsnake) | MMOCR | Y | Y | Y | Y | ? | ? | ? | N |
+| [MaskRCNN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/maskrcnn) | MMOCR | Y | Y | Y | ? | ? | ? | ? | N |
| [CRNN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/crnn) | MMOCR | Y | Y | Y | Y | Y | N | N | N |
| [SAR](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/sar) | MMOCR | N | Y | N | N | N | N | N | N |
| [SATRN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/satrn) | MMOCR | Y | Y | Y | N | N | N | N | N |
+| [ABINet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/abinet) | MMOCR | Y | Y | Y | N | N | N | N | N |
| [HRNet](https://mmpose.readthedocs.io/en/1.x/model_zoo_papers/backbones.html#hrnet-cvpr-2019) | MMPose | N | Y | Y | Y | N | Y | N | N |
| [MSPN](https://mmpose.readthedocs.io/en/1.x/model_zoo_papers/backbones.html#mspn-arxiv-2019) | MMPose | N | Y | Y | Y | N | Y | N | N |
| [LiteHRNet](https://mmpose.readthedocs.io/en/1.x/model_zoo_papers/backbones.html#litehrnet-cvpr-2021) | MMPose | N | Y | Y | N | N | Y | N | N |
diff --git a/docs/en/04-supported-codebases/mmocr.md b/docs/en/04-supported-codebases/mmocr.md
index 592eca36d1..d07d1d6b95 100644
--- a/docs/en/04-supported-codebases/mmocr.md
+++ b/docs/en/04-supported-codebases/mmocr.md
@@ -1,16 +1,20 @@
# MMOCR Deployment
-- [Installation](#installation)
- - [Install mmocr](#install-mmocr)
- - [Install mmdeploy](#install-mmdeploy)
-- [Convert model](#convert-model)
- - [Convert text detection model](#convert-text-detection-model)
- - [Convert text recognition model](#convert-text-recognition-model)
-- [Model specification](#model-specification)
-- [Model Inference](#model-inference)
- - [Backend model inference](#backend-model-inference)
- - [SDK model inference](#sdk-model-inference)
-- [Supported models](#supported-models)
+- [MMOCR Deployment](#mmocr-deployment)
+ - [Installation](#installation)
+ - [Install mmocr](#install-mmocr)
+ - [Install mmdeploy](#install-mmdeploy)
+ - [Convert model](#convert-model)
+ - [Convert text detection model](#convert-text-detection-model)
+ - [Convert text recognition model](#convert-text-recognition-model)
+ - [Model specification](#model-specification)
+ - [Model Inference](#model-inference)
+ - [Backend model inference](#backend-model-inference)
+ - [SDK model inference](#sdk-model-inference)
+ - [Text detection SDK model inference](#text-detection-sdk-model-inference)
+ - [Text Recognition SDK model inference](#text-recognition-sdk-model-inference)
+ - [Supported models](#supported-models)
+ - [Reminder](#reminder)
______________________________________________________________________
@@ -230,11 +234,29 @@ Besides python API, mmdeploy SDK also provides other FFI (Foreign Function Inter
## Supported models
-| Model | Task | TorchScript | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO |
-| :---------------------------------------------------------------------------- | :--------------- | :---------: | :---------: | :------: | :--: | :---: | :------: |
-| [DBNet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/dbnet) | text-detection | Y | Y | Y | Y | Y | Y |
-| [PSENet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/psenet) | text-detection | Y | Y | Y | Y | N | Y |
-| [PANet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/panet) | text-detection | Y | Y | Y | Y | N | Y |
-| [CRNN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/crnn) | text-recognition | Y | Y | Y | Y | Y | N |
-| [SAR](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/sar) | text-recognition | N | Y | N | N | N | N |
-| [SATRN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/satrn) | text-recognition | Y | Y | Y | N | N | N |
+| Model | Task | TorchScript | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO |
+| :---------------------------------------------------------------------------------- | :--------------- | :---------: | :---------: | :------: | :--: | :---: | :------: |
+| [DBNet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/dbnet) | text-detection | Y | Y | Y | Y | Y | Y |
+| [DBNetpp](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/dbnetpp) | text-detection | N | Y | Y | ? | ? | Y |
+| [PSENet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/psenet) | text-detection | Y | Y | Y | Y | N | Y |
+| [PANet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/panet) | text-detection | Y | Y | Y | Y | N | Y |
+| [TextSnake](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/textsnake) | text-detection | Y | Y | Y | ? | ? | ? |
+| [MaskRCNN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/maskrcnn) | text-detection | Y | Y | Y | ? | ? | ? |
+| [CRNN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/crnn) | text-recognition | Y | Y | Y | Y | Y | N |
+| [SAR](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/sar) | text-recognition | N | Y | Y | N | N | N |
+| [SATRN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/satrn) | text-recognition | Y | Y | Y | N | N | N |
+| [ABINet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/abinet) | text-recognition | Y | Y | Y | ? | ? | ? |
+
+## Reminder
+
+- ABINet for TensorRT require pytorch1.10+ and TensorRT 8.4+.
+
+- For TensorRT backend, users have to choose the right config. For example, CRNN only accepts 1 channel input. Here is a recommendation table:
+
+ | Model | Config |
+ | :------- | :--------------------------------------------------------- |
+ | MaskRCNN | text-detection_mrcnn_tensorrt_dynamic-320x320-2240x2240.py |
+ | CRNN | text-recognition_tensorrt_dynamic-1x32x32-1x32x640.py |
+ | SATRN | text-recognition_tensorrt_dynamic-32x32-32x640.py |
+ | SAR | text-recognition_tensorrt_dynamic-48x64-48x640.py |
+ | ABINet | text-recognition_tensorrt_static-32x128.py |
diff --git a/docs/zh_cn/03-benchmark/benchmark.md b/docs/zh_cn/03-benchmark/benchmark.md
index 04d3e5e067..1e1a0c5d10 100644
--- a/docs/zh_cn/03-benchmark/benchmark.md
+++ b/docs/zh_cn/03-benchmark/benchmark.md
@@ -1173,6 +1173,42 @@ GPU: ncnn, TensorRT, PPLNN
0.7949 |
0.7950 |
+
+ DBNetpp |
+ TextDetection |
+ ICDAR2015 |
+ recall |
+ 0.8209 |
+ 0.8209 |
+ 0.8209 |
+ 0.8199 |
+ 0.8204 |
+ 0.8204 |
+ - |
+ 0.8209 |
+
+
+ precision |
+ 0.9079 |
+ 0.9079 |
+ 0.9079 |
+ 0.9117 |
+ 0.9117 |
+ 0.9142 |
+ - |
+ 0.9079 |
+
+
+ hmean |
+ 0.8622 |
+ 0.8622 |
+ 0.8622 |
+ 0.8634 |
+ 0.8637 |
+ 0.8648 |
+ - |
+ 0.8622 |
+
PSENet |
TextDetection |
@@ -1245,6 +1281,78 @@ GPU: ncnn, TensorRT, PPLNN
- |
0.7955 |
+
+ TextSnake |
+ TextDetection |
+ CTW1500 |
+ recall |
+ 0.8052 |
+ 0.8052 |
+ 0.8052 |
+ 0.8055 |
+ - |
+ - |
+ - |
+ - |
+
+
+ precision |
+ 0.8535 |
+ 0.8535 |
+ 0.8535 |
+ 0.8538 |
+ - |
+ - |
+ - |
+ - |
+
+
+ hmean |
+ 0.8286 |
+ 0.8286 |
+ 0.8286 |
+ 0.8290 |
+ - |
+ - |
+ - |
+ - |
+
+
+ MaskRCNN |
+ TextDetection |
+ ICDAR2015 |
+ recall |
+ 0.7766 |
+ 0.7766 |
+ 0.7766 |
+ 0.7766 |
+ 0.7761 |
+ 0.7670 |
+ - |
+ - |
+
+
+ precision |
+ 0.8644 |
+ 0.8644 |
+ 0.8644 |
+ 0.8644 |
+ 0.8630 |
+ 0.8705 |
+ - |
+ - |
+
+
+ hmean |
+ 0.8182 |
+ 0.8182 |
+ 0.8182 |
+ 0.8182 |
+ 0.8172 |
+ 0.8155 |
+ - |
+ - |
+
CRNN |
TextRecognition |
@@ -1287,6 +1395,20 @@ GPU: ncnn, TensorRT, PPLNN
- |
- |
+
+ ABINet |
+ TextRecognition |
+ IIIT5K |
+ acc |
+ 0.9603 |
+ 0.9563 |
+ 0.9563 |
+ 0.9573 |
+ 0.9507 |
+ 0.9510 |
+ - |
+ - |
+
diff --git a/docs/zh_cn/03-benchmark/supported_models.md b/docs/zh_cn/03-benchmark/supported_models.md
index a58071df5a..a9eb83b747 100644
--- a/docs/zh_cn/03-benchmark/supported_models.md
+++ b/docs/zh_cn/03-benchmark/supported_models.md
@@ -68,11 +68,15 @@
| [EDSR](https://github.com/open-mmlab/mmediting/tree/1.x/configs/edsr) | MMEditing | Y | Y | Y | Y | N | Y | N | N |
| [RDN](https://github.com/open-mmlab/mmediting/tree/1.x/configs/rdn) | MMEditing | Y | Y | Y | Y | Y | Y | N | N |
| [DBNet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/dbnet) | MMOCR | Y | Y | Y | Y | Y | Y | Y | N |
+| [DBNetpp](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/dbnetpp) | MMOCR | Y | Y | Y | ? | ? | Y | ? | N |
| [PANet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/panet) | MMOCR | Y | Y | Y | Y | ? | Y | Y | N |
| [PSENet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/psenet) | MMOCR | Y | Y | Y | Y | ? | Y | Y | N |
+| [TextSnake](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/textsnake) | MMOCR | Y | Y | Y | Y | ? | ? | ? | N |
+| [MaskRCNN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/maskrcnn) | MMOCR | Y | Y | Y | ? | ? | ? | ? | N |
| [CRNN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/crnn) | MMOCR | Y | Y | Y | Y | Y | N | N | N |
| [SAR](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/sar) | MMOCR | N | Y | N | N | N | N | N | N |
| [SATRN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/satrn) | MMOCR | Y | Y | Y | N | N | N | N | N |
+| [ABINet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/abinet) | MMOCR | Y | Y | Y | N | N | N | N | N |
| [HRNet](https://mmpose.readthedocs.io/en/1.x/model_zoo_papers/backbones.html#hrnet-cvpr-2019) | MMPose | N | Y | Y | Y | N | Y | N | N |
| [MSPN](https://mmpose.readthedocs.io/en/1.x/model_zoo_papers/backbones.html#mspn-arxiv-2019) | MMPose | N | Y | Y | Y | N | Y | N | N |
| [LiteHRNet](https://mmpose.readthedocs.io/en/1.x/model_zoo_papers/backbones.html#litehrnet-cvpr-2021) | MMPose | N | Y | Y | N | N | Y | N | N |
diff --git a/docs/zh_cn/04-supported-codebases/mmocr.md b/docs/zh_cn/04-supported-codebases/mmocr.md
index 73aea75d48..ff970f4b5b 100644
--- a/docs/zh_cn/04-supported-codebases/mmocr.md
+++ b/docs/zh_cn/04-supported-codebases/mmocr.md
@@ -1,18 +1,20 @@
# MMOCR 模型部署
-- [安装](#安装)
- - [安装 mmocr](#安装-mmocr)
- - [安装 mmdeploy](#安装-mmdeploy)
-- [模型转换](#模型转换)
- - [文字检测任务模型转换](#文字检测任务模型转换)
- - [文字识别任务模型转换](#文字识别任务模型转换)
-- [模型规范](#模型规范)
-- [模型推理](#模型推理)
- - [后端模型推理](#后端模型推理)
- - [SDK 模型推理](#sdk-模型推理)
- - [文字检测 SDK 模型推理](#文字检测-sdk-模型推理)
- - [文字识别 SDK 模型推理](#文字识别-sdk-模型推理)
-- [模型支持列表](#模型支持列表)
+- [MMOCR 模型部署](#mmocr-模型部署)
+ - [安装](#安装)
+ - [安装 mmocr](#安装-mmocr)
+ - [安装 mmdeploy](#安装-mmdeploy)
+ - [模型转换](#模型转换)
+ - [文字检测任务模型转换](#文字检测任务模型转换)
+ - [文字识别任务模型转换](#文字识别任务模型转换)
+ - [模型规范](#模型规范)
+ - [模型推理](#模型推理)
+ - [后端模型推理](#后端模型推理)
+ - [SDK 模型推理](#sdk-模型推理)
+ - [文字检测 SDK 模型推理](#文字检测-sdk-模型推理)
+ - [文字识别 SDK 模型推理](#文字识别-sdk-模型推理)
+ - [模型支持列表](#模型支持列表)
+ - [注意事项](#注意事项)
______________________________________________________________________
@@ -236,11 +238,29 @@ print(texts)
## 模型支持列表
-| Model | Task | TorchScript | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO |
-| :---------------------------------------------------------------------------- | :--------------- | :---------: | :---------: | :------: | :--: | :---: | :------: |
-| [DBNet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/dbnet) | text-detection | Y | Y | Y | Y | Y | Y |
-| [PSENet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/psenet) | text-detection | Y | Y | Y | Y | N | Y |
-| [PANet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/panet) | text-detection | Y | Y | Y | Y | N | Y |
-| [CRNN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/crnn) | text-recognition | Y | Y | Y | Y | Y | N |
-| [SAR](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/sar) | text-recognition | N | Y | N | N | N | N |
-| [SATRN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/satrn) | text-recognition | Y | Y | Y | N | N | N |
+| Model | Task | TorchScript | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO |
+| :---------------------------------------------------------------------------------- | :--------------- | :---------: | :---------: | :------: | :--: | :---: | :------: |
+| [DBNet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/dbnet) | text-detection | Y | Y | Y | Y | Y | Y |
+| [DBNetpp](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/dbnetpp) | text-detection | N | Y | Y | ? | ? | Y |
+| [PSENet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/psenet) | text-detection | Y | Y | Y | Y | N | Y |
+| [PANet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/panet) | text-detection | Y | Y | Y | Y | N | Y |
+| [TextSnake](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/textsnake) | text-detection | Y | Y | Y | ? | ? | ? |
+| [MaskRCNN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/maskrcnn) | text-detection | Y | Y | Y | ? | ? | ? |
+| [CRNN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/crnn) | text-recognition | Y | Y | Y | Y | Y | N |
+| [SAR](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/sar) | text-recognition | N | Y | Y | N | N | N |
+| [SATRN](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/satrn) | text-recognition | Y | Y | Y | N | N | N |
+| [ABINet](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textrecog/abinet) | text-recognition | Y | Y | Y | ? | ? | ? |
+
+## 注意事项
+
+- ABINet 在 TensorRT 后端要求使用 pytorch1.10+, TensorRT 8.4+。
+
+- 对于 TensorRT 后端,用户需要使用正确的配置文件。比如 CRNN 只接受单通道输入。下面是一个示例表格:
+
+ | Model | Config |
+ | :------- | :--------------------------------------------------------- |
+ | MaskRCNN | text-detection_mrcnn_tensorrt_dynamic-320x320-2240x2240.py |
+ | CRNN | text-recognition_tensorrt_dynamic-1x32x32-1x32x640.py |
+ | SATRN | text-recognition_tensorrt_dynamic-32x32-32x640.py |
+ | SAR | text-recognition_tensorrt_dynamic-48x64-48x640.py |
+ | ABINet | text-recognition_tensorrt_static-32x128.py |
diff --git a/mmdeploy/codebase/mmocr/deploy/text_detection.py b/mmdeploy/codebase/mmocr/deploy/text_detection.py
index 5d2f7e720a..8305199e0d 100644
--- a/mmdeploy/codebase/mmocr/deploy/text_detection.py
+++ b/mmdeploy/codebase/mmocr/deploy/text_detection.py
@@ -1,4 +1,5 @@
# Copyright (c) OpenMMLab. All rights reserved.
+from copy import deepcopy
from typing import Callable, Dict, Optional, Sequence, Tuple, Union
import mmengine
@@ -98,6 +99,41 @@ def __init__(self, model_cfg: mmengine.Config, deploy_cfg: mmengine.Config,
device: str):
super(TextDetection, self).__init__(model_cfg, deploy_cfg, device)
+ def build_pytorch_model(self,
+ model_checkpoint: Optional[str] = None,
+ cfg_options: Optional[Dict] = None,
+ **kwargs) -> torch.nn.Module:
+ """Initialize torch model.
+
+ Args:
+ model_checkpoint (str): The checkpoint file of torch model,
+ defaults to `None`.
+ cfg_options (dict): Optional config key-pair parameters.
+
+ Returns:
+ nn.Module: An initialized torch model generated by other OpenMMLab
+ codebases.
+ """
+ from mmengine.model import revert_sync_batchnorm
+ from mmengine.registry import MODELS
+
+ model = deepcopy(self.model_cfg.model)
+ preprocess_cfg = deepcopy(self.model_cfg.get('preprocess_cfg', {}))
+ preprocess_cfg.update(
+ deepcopy(self.model_cfg.get('data_preprocessor', {})))
+ model.setdefault('data_preprocessor', preprocess_cfg)
+ if model.type == 'MMDetWrapper': # Mask-RCNN in MMOCR
+ model = deepcopy(self.model_cfg.model)
+ model = MODELS.build(model)
+ if model_checkpoint is not None:
+ from mmengine.runner.checkpoint import load_checkpoint
+ load_checkpoint(model, model_checkpoint, map_location=self.device)
+
+ model = revert_sync_batchnorm(model)
+ model = model.to(self.device)
+ model.eval()
+ return model
+
def build_backend_model(self,
model_files: Optional[str] = None,
**kwargs) -> torch.nn.Module:
@@ -225,7 +261,12 @@ def get_preprocess(self, *args, **kwargs) -> Dict:
if transform['type'] == 'Resize':
transforms[i]['size'] = transforms[i].pop('scale')
- data_preprocessor = model_cfg.model.data_preprocessor
+ if 'data_preprocessor' in model_cfg.model:
+ data_preprocessor = model_cfg.model.data_preprocessor
+ elif 'MMDetWrapper' == self.model_cfg.model.type:
+ data_preprocessor = model_cfg.model.cfg.data_preprocessor
+ else:
+ raise ValueError(f'Unsupported model config {model_cfg.model} ')
transforms.insert(-1, dict(type='DefaultFormatBundle'))
transforms.insert(
-2,
@@ -247,7 +288,20 @@ def get_postprocess(self, *args, **kwargs) -> Dict:
Return:
dict: Composed of the postprocess information.
"""
- postprocess = self.model_cfg.model.det_head
+ if 'det_head' in self.model_cfg.model:
+ postprocess = self.model_cfg.model.det_head
+ elif 'MMDetWrapper' == self.model_cfg.model.type:
+ params = self.model_cfg.model.cfg.test_cfg
+ type = 'ResizeInstanceMask' # default for object detection
+ if 'rpn' in params:
+ params['min_bbox_size'] = params['rpn']['min_bbox_size']
+ if 'rcnn' in params:
+ params['score_thr'] = params['rcnn']['score_thr']
+ if 'mask_thr_binary' in params['rcnn']:
+ params['mask_thr_binary'] = params['rcnn'][
+ 'mask_thr_binary']
+ type = 'ResizeInstanceMask' # for instance-seg
+ return dict(type=type, params=params)
return postprocess
def get_model_name(self, *args, **kwargs) -> str:
diff --git a/mmdeploy/codebase/mmocr/deploy/text_detection_model.py b/mmdeploy/codebase/mmocr/deploy/text_detection_model.py
index da5d8720f5..de0c407666 100644
--- a/mmdeploy/codebase/mmocr/deploy/text_detection_model.py
+++ b/mmdeploy/codebase/mmocr/deploy/text_detection_model.py
@@ -1,6 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional, Sequence, Union
+import cv2
import mmengine
import torch
from mmengine.registry import Registry
@@ -36,14 +37,22 @@ def __init__(
model_cfg: Optional[mmengine.Config] = None,
**kwargs,
):
+ data_preprocessor = model_cfg.model.get('data_preprocessor', {})
+ if data_preprocessor is not None: # skip when it is SDKEnd2EndModel
+ data_preprocessor.update(
+ model_cfg.model.get('cfg', {}).get('data_preprocessor', {}))
+ if data_preprocessor.get('type', None) == 'DetDataPreprocessor':
+ data_preprocessor.update(_scope_='mmdet') # MRCNN
super(End2EndModel, self).__init__(
- deploy_cfg=deploy_cfg,
- data_preprocessor=model_cfg.model.data_preprocessor)
+ deploy_cfg=deploy_cfg, data_preprocessor=data_preprocessor)
self.deploy_cfg = deploy_cfg
self.show_score = False
from mmocr.registry import MODELS
- self.det_head = MODELS.build(model_cfg.model.det_head)
+ if hasattr(model_cfg.model, 'det_head'):
+ self.det_head = MODELS.build(model_cfg.model.det_head)
+ else:
+ self.text_repr_type = model_cfg.model.get('text_repr_type', 'poly')
self._init_wrapper(
backend=backend,
backend_files=backend_files,
@@ -101,7 +110,79 @@ def forward(self,
instance, in (xn, yn) order.
"""
x = self.extract_feat(inputs)
- return self.det_head.postprocessor(x[0], data_samples)
+ if hasattr(self, 'det_head'):
+ return self.det_head.postprocessor(x[0], data_samples)
+ # post-process of mmdet models
+ from mmdet.structures.mask import bitmap_to_polygon
+ from mmocr.utils.bbox_utils import bbox2poly
+
+ from mmdeploy.codebase.mmdet.deploy import get_post_processing_params
+ from mmdeploy.codebase.mmdet.deploy.object_detection_model import \
+ End2EndModel as DetModel
+ if len(x) == 3: # instance seg
+ batch_dets, _, batch_masks = x
+ for i in range(batch_dets.size(0)):
+ masks = batch_masks[i]
+ bboxes = batch_dets[i, :, :4]
+ bboxes[:, ::2] /= data_samples[i].scale_factor[0]
+ bboxes[:, 1::2] /= data_samples[i].scale_factor[1]
+ ori_h, ori_w = data_samples[i].ori_shape[:2]
+ img_h, img_w = data_samples[i].img_shape[:2]
+ export_postprocess_mask = True
+ polygons = []
+ scores = []
+ if self.deploy_cfg is not None:
+ codebase_cfg = get_post_processing_params(self.deploy_cfg)
+ # this flag enable postprocess when export.
+ export_postprocess_mask = codebase_cfg.get(
+ 'export_postprocess_mask', True)
+ if not export_postprocess_mask:
+ masks = DetModel.postprocessing_masks(
+ bboxes, masks, ori_w, ori_h, batch_masks.device)
+ else:
+ masks = masks[:, :img_h, :img_w]
+ masks = torch.nn.functional.interpolate(
+ masks.unsqueeze(0).float(), size=(ori_h, ori_w))
+ masks = masks.squeeze(0)
+ if masks.dtype != bool:
+ masks = masks >= 0.5
+
+ for mask_idx, mask in enumerate(masks.cpu()):
+ contours, _ = bitmap_to_polygon(mask)
+ polygons += [contour.reshape(-1) for contour in contours]
+ scores += [batch_dets[i, :, 4][mask_idx].cpu()
+ ] * len(contours)
+ # filter invalid polygons
+ filterd_polygons = []
+ keep_idx = []
+ for poly_idx, polygon in enumerate(polygons):
+ if len(polygon) < 6:
+ continue
+ filterd_polygons.append(polygon)
+ keep_idx.append(poly_idx)
+ # convert by text_repr_type
+ if self.text_repr_type == 'quad':
+ for j, poly in enumerate(filterd_polygons):
+ rect = cv2.minAreaRect(poly)
+ vertices = cv2.boxPoints(rect)
+ poly = vertices.flatten()
+ filterd_polygons[j] = poly
+ pred_instances = InstanceData()
+ pred_instances.polygons = filterd_polygons
+ pred_instances.scores = torch.FloatTensor(scores)[keep_idx]
+ data_samples[i].pred_instances = pred_instances
+ else:
+ dets = x[0]
+ for i in range(dets.size(0)):
+ bboxes = dets[i, :, :4].cpu().numpy()
+ bboxes[:, ::2] /= data_samples[i].scale_factor[0]
+ bboxes[:, 1::2] /= data_samples[i].scale_factor[1]
+ polygons = [bbox2poly(bbox) for bbox in bboxes]
+ pred_instances = InstanceData()
+ pred_instances.polygons = polygons
+ pred_instances.scores = torch.FloatTensor(dets[i, :, 4].cpu())
+ data_samples[i].pred_instances = pred_instances
+ return data_samples
def extract_feat(self, batch_inputs: torch.Tensor) -> torch.Tensor:
"""The interface for forward test.
diff --git a/mmdeploy/codebase/mmocr/models/text_detection/__init__.py b/mmdeploy/codebase/mmocr/models/text_detection/__init__.py
index abaed26541..91cd1655f3 100644
--- a/mmdeploy/codebase/mmocr/models/text_detection/__init__.py
+++ b/mmdeploy/codebase/mmocr/models/text_detection/__init__.py
@@ -1,4 +1,5 @@
# Copyright (c) OpenMMLab. All rights reserved.
from . import fpn_cat # noqa: F401,F403
from . import heads # noqa: F401,F403
+from . import mmdet_wrapper # noqa: F401,F403
from . import single_stage_text_detector # noqa: F401,F403
diff --git a/mmdeploy/codebase/mmocr/models/text_detection/heads.py b/mmdeploy/codebase/mmocr/models/text_detection/heads.py
index d23c957ff4..3c408de304 100644
--- a/mmdeploy/codebase/mmocr/models/text_detection/heads.py
+++ b/mmdeploy/codebase/mmocr/models/text_detection/heads.py
@@ -2,7 +2,7 @@
from typing import Dict
import torch
-from mmocr.utils.typing_utils import DetSampleList
+from mmocr.utils import DetSampleList
from mmdeploy.core import FUNCTION_REWRITER
diff --git a/mmdeploy/codebase/mmocr/models/text_detection/mmdet_wrapper.py b/mmdeploy/codebase/mmocr/models/text_detection/mmdet_wrapper.py
new file mode 100644
index 0000000000..67adc3a45c
--- /dev/null
+++ b/mmdeploy/codebase/mmocr/models/text_detection/mmdet_wrapper.py
@@ -0,0 +1,56 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Sequence, Union
+
+import torch
+from mmdet.structures import DetDataSample
+from mmdet.structures import SampleList as MMDET_SampleList
+from mmocr.structures import TextDetDataSample
+from mmocr.utils.typing_utils import DetSampleList
+
+from mmdeploy.core import FUNCTION_REWRITER
+
+
+@FUNCTION_REWRITER.register_rewriter(
+ func_name='mmocr.models.textdet.detectors.MMDetWrapper.forward')
+def mmdet_wrapper__forward(self,
+ inputs: torch.Tensor,
+ data_samples: Optional[Union[
+ DetSampleList, MMDET_SampleList]] = None,
+ mode: str = 'tensor',
+ **kwargs) -> Sequence[TextDetDataSample]:
+ """The unified entry for a forward process in both training and test.
+
+ The method works in three modes: "tensor", "predict" and "loss":
+
+ - "tensor": Forward the whole network and return tensor or tuple of
+ tensor without any post-processing, same as a common nn.Module.
+ - "predict": Forward and return the predictions, which are fully
+ processed to a list of :obj:`DetDataSample`.
+ - "loss": Forward and return a dict of losses according to the given
+ inputs and data samples.
+
+ Note that this method doesn't handle either back propagation or
+ parameter update, which are supposed to be done in :meth:`train_step`.
+
+ Args:
+ inputs (torch.Tensor): The input tensor with shape
+ (N, C, ...) in general.
+ data_samples (list[:obj:`DetDataSample`] or
+ list[:obj:`TextDetDataSample`]): The annotation data of every
+ sample. When in "predict" mode, it should be a list of
+ :obj:`TextDetDataSample`. Otherwise they are
+ :obj:`DetDataSample`s. Defaults to None.
+ mode (str): Running mode. Defaults to 'tensor'.
+
+ Returns:
+ results (Sequence(torch.Tensor)): Output of MMDet models.
+ """
+ if mode == 'predict':
+ ocr_data_samples = data_samples
+ data_samples = []
+ for i in range(len(ocr_data_samples)):
+ data_samples.append(
+ DetDataSample(metainfo=ocr_data_samples[i].metainfo))
+
+ results = self.wrapped_model.forward(inputs, data_samples, mode, **kwargs)
+ return results
diff --git a/mmdeploy/codebase/mmocr/models/text_recognition/__init__.py b/mmdeploy/codebase/mmocr/models/text_recognition/__init__.py
index f3ed1e7976..b2991af218 100644
--- a/mmdeploy/codebase/mmocr/models/text_recognition/__init__.py
+++ b/mmdeploy/codebase/mmocr/models/text_recognition/__init__.py
@@ -1,7 +1,9 @@
# Copyright (c) OpenMMLab. All rights reserved.
+from . import abi_language_decoder # noqa: F401,F403
from . import base_decoder # noqa: F401,F403
from . import crnn_decoder # noqa: F401,F403
from . import encoder_decoder_recognizer # noqa: F401,F403
from . import lstm_layer # noqa: F401,F403
from . import sar_decoder # noqa: F401,F403
from . import sar_encoder # noqa: F401,F403
+from . import transformer_module # noqa: F401,F403
diff --git a/mmdeploy/codebase/mmocr/models/text_recognition/abi_language_decoder.py b/mmdeploy/codebase/mmocr/models/text_recognition/abi_language_decoder.py
new file mode 100644
index 0000000000..3e5d242297
--- /dev/null
+++ b/mmdeploy/codebase/mmocr/models/text_recognition/abi_language_decoder.py
@@ -0,0 +1,31 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
+from mmdeploy.core import FUNCTION_REWRITER
+from mmdeploy.utils import IR
+
+
+@FUNCTION_REWRITER.register_rewriter(
+ func_name='mmocr.models.textrecog.decoders.ABILanguageDecoder._get_length',
+ IR=IR.ONNX)
+def abi_language_decoder___get_length__default(self,
+ logit: torch.Tensor,
+ dim: int = -1,
+ **kwargs) -> torch.Tensor:
+ """Rewrite `_get_length`. Add `.float()` to cast Tensors from bool to float
+ for `cumsum` and `argmax`.
+
+ Returns the first location of padding index or the length of the entire
+ tensor otherwise.
+ """
+ # out as a boolean vector indicating the existence of end token(s)
+ out = (logit.argmax(dim=-1) == self.dictionary.end_idx)
+ abn = out.any(dim)
+ # Get the first index of end token
+ # add `.float()` to `out` for onnxruntime `cumsum()`
+ # add `.float()` before `argmax()`
+ out = ((out.float().cumsum(dim) == 1) & out).float().argmax(dim)
+ out = out + 1
+ out = torch.where(abn, out,
+ out.new_tensor(logit.shape[1]).to(out.device)).float()
+ return out
diff --git a/mmdeploy/codebase/mmocr/models/text_recognition/transformer_module.py b/mmdeploy/codebase/mmocr/models/text_recognition/transformer_module.py
new file mode 100644
index 0000000000..d5cfad4de7
--- /dev/null
+++ b/mmdeploy/codebase/mmocr/models/text_recognition/transformer_module.py
@@ -0,0 +1,44 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+from torch import nn
+
+from mmdeploy.core import MODULE_REWRITER
+
+
+@MODULE_REWRITER.register_rewrite_module(
+ 'mmocr.models.common.modules.PositionalEncoding', backend='default')
+class PositionalEncoding(nn.Module):
+ """Rewrite Position Encoding module in `ABINet."""
+
+ def __init__(self, module, deploy_cfg, **kwargs):
+ super(PositionalEncoding, self).__init__()
+ self._module = module
+ self.deploy_cfg = deploy_cfg
+ self.n_position = module.position_table.size(1)
+ self.d_hid = module.position_table.size(2)
+
+ def _get_sinusoid_encoding_table(self, n_position, d_hid, device):
+ """Sinusoid position encoding table."""
+ denominator = torch.Tensor([
+ 1.0 / torch.tensor(10000).to(device).pow(
+ torch.tensor(2 * (hid_j // 2) / d_hid)).to(device)
+ for hid_j in range(d_hid)
+ ]).to(device)
+ denominator = denominator.view(1, -1)
+ pos_tensor = torch.arange(n_position).to(device).unsqueeze(-1).float()
+ sinusoid_table = pos_tensor * denominator
+ sinusoid_table[:, 0::2] = torch.sin(sinusoid_table[:, 0::2])
+ sinusoid_table[:, 1::2] = torch.cos(sinusoid_table[:, 1::2])
+
+ return sinusoid_table.unsqueeze(0)
+
+ def forward(self, x):
+ """
+ Args:
+ x (Tensor): Tensor of shape (batch_size, pos_len, d_hid, ...)
+ """
+ device = x.device
+ position_table = self._get_sinusoid_encoding_table(
+ self.n_position, self.d_hid, device)
+ x = x + position_table[:, :x.size(1), ...]
+ return x
diff --git a/mmdeploy/pytorch/functions/__init__.py b/mmdeploy/pytorch/functions/__init__.py
index a07a2aab06..5ee8ef348d 100644
--- a/mmdeploy/pytorch/functions/__init__.py
+++ b/mmdeploy/pytorch/functions/__init__.py
@@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
from . import adaptive_pool # noqa: F401,F403
+from . import any # noqa: F401,F403
from . import atan2 # noqa: F401,F403
from . import chunk # noqa: F401,F403
from . import clip # noqa: F401,F403
diff --git a/mmdeploy/pytorch/functions/any.py b/mmdeploy/pytorch/functions/any.py
new file mode 100644
index 0000000000..469b7c327b
--- /dev/null
+++ b/mmdeploy/pytorch/functions/any.py
@@ -0,0 +1,19 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional
+
+import torch
+
+from mmdeploy.core import FUNCTION_REWRITER
+
+
+@FUNCTION_REWRITER.register_rewriter(func_name='torch.Tensor.any')
+@FUNCTION_REWRITER.register_rewriter(func_name='torch.any')
+def any__default(input: torch.Tensor,
+ dim: Optional[str] = None,
+ keepdim: bool = False,
+ **kwargs) -> torch.Tensor:
+ """Rewrite `any` for ONNX."""
+ if dim is None and keepdim is False:
+ return (input != 0).sum() > 0
+
+ return (input != 0).sum(dim, keepdim=keepdim) > 0
diff --git a/tests/regression/mmocr.yml b/tests/regression/mmocr.yml
index 71df518d42..16ce8c5a18 100644
--- a/tests/regression/mmocr.yml
+++ b/tests/regression/mmocr.yml
@@ -34,6 +34,10 @@ onnxruntime:
convert_image: *convert_image_det
deploy_config: configs/mmocr/text-detection/text-detection_onnxruntime_dynamic.py
+ pipeline_ort_detection_mrcnn_dynamic_fp32: &pipeline_ort_detection_mrcnn_dynamic_fp32
+ convert_image: *convert_image_det
+ deploy_config: configs/mmocr/text-detection/text-detection_mrcnn_onnxruntime_dynamic.py
+
# ======= recognition =======
pipeline_ort_recognition_static_fp32: &pipeline_ort_recognition_static_fp32
convert_image: *convert_image_rec
@@ -69,12 +73,24 @@ tensorrt:
sdk_config: *sdk_detection_dynamic
deploy_config: configs/mmocr/text-detection/text-detection_tensorrt_dynamic-320x320-2240x2240.py
+ pipeline_trt_detection_mrcnn_dynamic_fp32: &pipeline_trt_detection_mrcnn_dynamic_fp32
+ convert_image: *convert_image_det
+ backend_test: *default_backend_test
+ sdk_config: *sdk_detection_dynamic
+ deploy_config: configs/mmocr/text-detection/text-detection_mrcnn_tensorrt_dynamic-320x320-2240x2240.py
+
pipeline_trt_detection_dynamic_fp16: &pipeline_trt_detection_dynamic_fp16
convert_image: *convert_image_det
backend_test: *default_backend_test
sdk_config: *sdk_detection_dynamic
deploy_config: configs/mmocr/text-detection/text-detection_tensorrt-fp16_dynamic-320x320-2240x2240.py
+ pipeline_trt_detection_mrcnn_dynamic_fp16: &pipeline_trt_detection_mrcnn_dynamic_fp16
+ convert_image: *convert_image_det
+ backend_test: *default_backend_test
+ sdk_config: *sdk_detection_dynamic
+ deploy_config: configs/mmocr/text-detection/text-detection_mrcnn_tensorrt-fp16_dynamic-320x320-2240x2240.py
+
pipeline_trt_detection_dynamic_int8: &pipeline_trt_detection_dynamic_int8
convert_image: *convert_image_det
backend_test: *default_backend_test
@@ -82,42 +98,82 @@ tensorrt:
deploy_config: configs/mmocr/text-detection/text-detection_tensorrt-int8_dynamic-320x320-2240x2240.py
# ======= recognition =======
- pipeline_trt_recognition_static_fp32: &pipeline_trt_recognition_static_fp32
+ pipeline_trt_recognition_static_fp32_C1: &pipeline_trt_recognition_static_fp32_C1
convert_image: *convert_image_rec
backend_test: *default_backend_test
sdk_config: *sdk_recognition_dynamic
deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt_static-1x32x32.py
- pipeline_trt_recognition_static_fp16: &pipeline_trt_recognition_static_fp16
+ # ABINet models with static shape 32x128
+ pipeline_trt_recognition_static_fp32_C3: &pipeline_trt_recognition_static_fp32_C3
+ convert_image: *convert_image_rec
+ backend_test: *default_backend_test
+ sdk_config: *sdk_recognition_dynamic
+ deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt_static-32x128.py
+
+ pipeline_trt_recognition_static_fp16_C3: &pipeline_trt_recognition_static_fp16_C3
+ convert_image: *convert_image_rec
+ backend_test: *default_backend_test
+ sdk_config: *sdk_recognition_dynamic
+ deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_static-32x128.py
+
+ # SAR models with height 48 and channel 3
+ pipeline_trt_recognition_dynamic_fp32_H48_C3: &pipeline_trt_recognition_dynamic_fp32_H48_C3
+ convert_image: *convert_image_rec
+ backend_test: *default_backend_test
+ sdk_config: *sdk_recognition_dynamic
+ deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt_dynamic-48x64-48x640.py
+
+ pipeline_trt_recognition_dynamic_fp16_H48_C3: &pipeline_trt_recognition_dynamic_fp16_H48_C3
convert_image: *convert_image_rec
backend_test: *default_backend_test
sdk_config: *sdk_recognition_dynamic
- deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_static-1x32x32.py
+ deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-48x64-48x640.py
- pipeline_trt_recognition_static_int8: &pipeline_trt_recognition_static_int8
+ pipeline_trt_recognition_dynamic_int8_H48_C3: &pipeline_trt_recognition_dynamic_int8_H48_C3
convert_image: *convert_image_rec
backend_test: *default_backend_test
sdk_config: *sdk_recognition_dynamic
- deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt-int8_static-1x32x32.py
+ deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt-int8_dynamic-48x64-48x640.py
- pipeline_trt_recognition_dynamic_fp32: &pipeline_trt_recognition_dynamic_fp32
+ # CRNN models with height 32 and channel 1
+ pipeline_trt_recognition_dynamic_fp32_H32_C1: &pipeline_trt_recognition_dynamic_fp32_H32_C1
convert_image: *convert_image_rec
backend_test: *default_backend_test
sdk_config: *sdk_recognition_dynamic
deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt_dynamic-1x32x32-1x32x640.py
- pipeline_trt_recognition_dynamic_fp16: &pipeline_trt_recognition_dynamic_fp16
+ pipeline_trt_recognition_dynamic_fp16_H32_C1: &pipeline_trt_recognition_dynamic_fp16_H32_C1
convert_image: *convert_image_rec
backend_test: *default_backend_test
sdk_config: *sdk_recognition_dynamic
deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-1x32x32-1x32x640.py
- pipeline_trt_recognition_dynamic_int8: &pipeline_trt_recognition_dynamic_int8
+ pipeline_trt_recognition_dynamic_int8_H32_C1: &pipeline_trt_recognition_dynamic_int8_H32_C1
convert_image: *convert_image_rec
backend_test: *default_backend_test
sdk_config: *sdk_recognition_dynamic
deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt-int8_dynamic-1x32x32-1x32x640.py
+ # SATRN models with height 32 and channel 3
+ pipeline_trt_recognition_dynamic_fp32_H32_C3: &pipeline_trt_recognition_dynamic_fp32_H32_C3
+ convert_image: *convert_image_rec
+ backend_test: *default_backend_test
+ sdk_config: *sdk_recognition_dynamic
+ deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt_dynamic-32x32-32x640.py
+
+ pipeline_trt_recognition_dynamic_fp16_H32_C3: &pipeline_trt_recognition_dynamic_fp16_H32_C3
+ convert_image: *convert_image_rec
+ backend_test: *default_backend_test
+ sdk_config: *sdk_recognition_dynamic
+ deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt-fp16_dynamic-32x32-32x640.py
+
+ pipeline_trt_recognition_dynamic_int8_H32_C3: &pipeline_trt_recognition_dynamic_int8_H32_C3
+ convert_image: *convert_image_rec
+ backend_test: *default_backend_test
+ sdk_config: *sdk_recognition_dynamic
+ deploy_config: configs/mmocr/text-recognition/text-recognition_tensorrt-int8_dynamic-32x32-32x640.py
+
openvino:
pipeline_openvino_detection_dynamic_fp32: &pipeline_openvino_detection_dynamic_fp32
convert_image: *convert_image_det
@@ -157,6 +213,11 @@ torchscript:
backend_test: False
deploy_config: configs/mmocr/text-detection/text-detection_torchscript.py
+ pipeline_ts_detection_mrcnn_fp32: &pipeline_ts_detection_mrcnn_fp32
+ convert_image: *convert_image_det
+ backend_test: False
+ deploy_config: configs/mmocr/text-detection/text-detection_mrcnn_torchscript.py
+
pipeline_ts_recognition_fp32: &pipeline_ts_recognition_fp32
convert_image: *convert_image_rec
backend_test: False
@@ -176,6 +237,16 @@ models:
- *pipeline_pplnn_detection_dynamic_fp32
- *pipeline_openvino_detection_dynamic_fp32
+ - name: DBNetpp
+ metafile: configs/textdet/dbnetpp/metafile.yml
+ model_configs:
+ - configs/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015.py
+ pipelines:
+ - *pipeline_ort_detection_dynamic_fp32
+ - *pipeline_trt_detection_dynamic_fp16
+ - *pipeline_ncnn_detection_static_fp32
+ - *pipeline_openvino_detection_dynamic_fp32
+
- name: PANet
metafile: configs/textdet/panet/metafile.yml
model_configs:
@@ -188,6 +259,36 @@ models:
- *pipeline_pplnn_detection_dynamic_fp32
- *pipeline_openvino_detection_dynamic_fp32
+ - name: PSENet
+ metafile: configs/textdet/psenet/metafile.yml
+ model_configs:
+ - configs/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015.py
+ pipelines:
+ - *pipeline_ts_detection_fp32
+ - *pipeline_ort_detection_dynamic_fp32
+ - *pipeline_trt_detection_dynamic_fp16
+ - *pipeline_ncnn_detection_static_fp32
+ - *pipeline_pplnn_detection_dynamic_fp32
+ - *pipeline_openvino_detection_dynamic_fp32
+
+ - name: TextSnake
+ metafile: configs/textdet/textsnake/metafile.yml
+ model_configs:
+ - configs/textdet/textsnake/textsnake_resnet50_fpn-unet_1200e_ctw1500.py
+ pipelines:
+ - *pipeline_ts_detection_fp32
+ - *pipeline_ort_detection_dynamic_fp32
+ - *pipeline_trt_detection_dynamic_fp32
+
+ - name: MaskRCNN
+ metafile: configs/textdet/maskrcnn/metafile.yml
+ model_configs:
+ - configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py
+ pipelines:
+ - *pipeline_ts_detection_mrcnn_fp32
+ - *pipeline_ort_detection_mrcnn_dynamic_fp32
+ - *pipeline_trt_detection_mrcnn_dynamic_fp32
+
- name: CRNN
metafile: configs/textrecog/crnn/metafile.yml
model_configs:
@@ -195,6 +296,33 @@ models:
pipelines:
- *pipeline_ts_recognition_fp32
- *pipeline_ort_recognition_dynamic_fp32
- - *pipeline_trt_recognition_dynamic_fp16
+ - *pipeline_trt_recognition_dynamic_fp16_H32_C1
- *pipeline_ncnn_recognition_static_fp32
- *pipeline_pplnn_recognition_dynamic_fp32
+
+ - name: SAR
+ metafile: configs/textrecog/sar/metafile.yml
+ model_configs:
+ - configs/textrecog/sar/sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py
+ pipelines:
+ - *pipeline_ts_recognition_fp32
+ - *pipeline_ort_recognition_dynamic_fp32
+ - *pipeline_trt_recognition_dynamic_fp32_H48_C3
+
+ - name: SATRN
+ metafile: configs/textrecog/satrn/metafile.yml
+ model_configs:
+ - configs/textrecog/satrn/satrn_shallow-small_5e_st_mj.py
+ pipelines:
+ - *pipeline_ts_recognition_fp32
+ - *pipeline_ort_recognition_dynamic_fp32
+ - *pipeline_trt_recognition_dynamic_fp32_H32_C3
+
+ - name: ABINet
+ metafile: configs/textrecog/abinet/metafile.yml
+ model_configs:
+ - configs/textrecog/abinet/abinet_20e_st-an_mj.py
+ pipelines:
+ - *pipeline_ts_recognition_fp32
+ - *pipeline_ort_recognition_static_fp32
+ - *pipeline_trt_recognition_static_fp16_C3
diff --git a/tests/test_codebase/test_mmocr/data/mrcnn.py b/tests/test_codebase/test_mmocr/data/mrcnn.py
new file mode 100644
index 0000000000..732c834423
--- /dev/null
+++ b/tests/test_codebase/test_mmocr/data/mrcnn.py
@@ -0,0 +1,135 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+model = dict(
+ type='MMDetWrapper',
+ text_repr_type='poly',
+ cfg=dict(
+ type='MaskRCNN',
+ data_preprocessor=dict(
+ type='DetDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True,
+ pad_mask=False,
+ pad_size_divisor=32),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ num_stages=4,
+ out_indices=(0, 1, 2, 3),
+ frozen_stages=1,
+ norm_cfg=dict(type='BN', requires_grad=True),
+ norm_eval=True,
+ style='pytorch',
+ init_cfg=dict(
+ type='Pretrained', checkpoint='torchvision://resnet50')),
+ neck=dict(
+ type='FPN',
+ in_channels=[256, 512, 1024, 2048],
+ out_channels=256,
+ num_outs=5),
+ rpn_head=dict(
+ type='RPNHead',
+ in_channels=256,
+ feat_channels=256,
+ anchor_generator=dict(
+ type='AnchorGenerator',
+ scales=[4],
+ ratios=[0.17, 0.44, 1.13, 2.9, 7.46],
+ strides=[4, 8, 16, 32, 64]),
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0.0, 0.0, 0.0, 0.0],
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
+ loss_cls=dict(
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+ roi_head=dict(
+ type='StandardRoIHead',
+ bbox_roi_extractor=dict(
+ type='SingleRoIExtractor',
+ roi_layer=dict(
+ type='RoIAlign', output_size=7, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32]),
+ bbox_head=dict(
+ type='Shared2FCBBoxHead',
+ in_channels=256,
+ fc_out_channels=1024,
+ roi_feat_size=7,
+ num_classes=1,
+ bbox_coder=dict(
+ type='DeltaXYWHBBoxCoder',
+ target_means=[0.0, 0.0, 0.0, 0.0],
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
+ reg_class_agnostic=False,
+ loss_cls=dict(
+ type='CrossEntropyLoss',
+ use_sigmoid=False,
+ loss_weight=1.0),
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+ mask_roi_extractor=dict(
+ type='SingleRoIExtractor',
+ roi_layer=dict(
+ type='RoIAlign', output_size=14, sampling_ratio=0),
+ out_channels=256,
+ featmap_strides=[4, 8, 16, 32]),
+ mask_head=dict(
+ type='FCNMaskHead',
+ num_convs=4,
+ in_channels=256,
+ conv_out_channels=256,
+ num_classes=1,
+ loss_mask=dict(
+ type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
+ train_cfg=dict(
+ rpn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.7,
+ neg_iou_thr=0.3,
+ min_pos_iou=0.3,
+ match_low_quality=True,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=256,
+ pos_fraction=0.5,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=False),
+ allowed_border=-1,
+ pos_weight=-1,
+ debug=False),
+ rpn_proposal=dict(
+ nms_pre=2000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ assigner=dict(
+ type='MaxIoUAssigner',
+ pos_iou_thr=0.5,
+ neg_iou_thr=0.5,
+ min_pos_iou=0.5,
+ match_low_quality=True,
+ ignore_iof_thr=-1),
+ sampler=dict(
+ type='RandomSampler',
+ num=512,
+ pos_fraction=0.25,
+ neg_pos_ub=-1,
+ add_gt_as_proposals=True),
+ mask_size=28,
+ pos_weight=-1,
+ debug=False)),
+ test_cfg=dict(
+ rpn=dict(
+ nms_pre=1000,
+ max_per_img=1000,
+ nms=dict(type='nms', iou_threshold=0.7),
+ min_bbox_size=0),
+ rcnn=dict(
+ score_thr=0.05,
+ nms=dict(type='nms', iou_threshold=0.5),
+ max_per_img=100,
+ mask_thr_binary=0.5)),
+ _scope_='mmdet'))
diff --git a/tests/test_codebase/test_mmocr/test_mmocr_models.py b/tests/test_codebase/test_mmocr/test_mmocr_models.py
index 87dcd49783..1d07d5f9e6 100644
--- a/tests/test_codebase/test_mmocr/test_mmocr_models.py
+++ b/tests/test_codebase/test_mmocr/test_mmocr_models.py
@@ -9,6 +9,7 @@
from mmdeploy.codebase import import_codebase
from mmdeploy.core import RewriterContext, patch_model
from mmdeploy.utils import Backend, Codebase
+from mmdeploy.utils.config_utils import load_config
from mmdeploy.utils.test import (WrapModel, check_backend, get_model_outputs,
get_rewrite_outputs)
@@ -22,7 +23,8 @@
dictionary = dict(
type='Dictionary',
dict_file='tests/test_codebase/test_mmocr/data/lower_english_digits.txt',
- with_padding=True)
+ with_padding=True,
+ with_end=True)
class FPNCNeckModel(FPNC):
@@ -408,16 +410,6 @@ def test_sar_model(backend: Backend, decoder_type):
sar_cfg.model.pop('type')
pytorch_model = SARNet(**(sar_cfg.model))
- # img_meta = {
- # 'ori_shape': [48, 160],
- # 'img_shape': [48, 160, 3],
- # 'scale_factor': [1., 1.]
- # }
- # from mmengine.structures import InstanceData
- # from mmocr.structures import TextRecogDataSample
- # pred_instances = InstanceData(metainfo=img_meta)
- # data_sample = TextRecogDataSample(pred_instances=pred_instances)
- # data_sample.set_metainfo(img_meta)
model_inputs = {'inputs': torch.rand(1, 3, 48, 160), 'data_samples': None}
deploy_cfg = mmengine.Config(
@@ -461,3 +453,121 @@ def test_sar_model(backend: Backend, decoder_type):
onnx.checker.check_model(model)
except onnx.checker.ValidationError:
assert False
+
+
+@pytest.mark.parametrize('backend', [Backend.ONNXRUNTIME])
+def test_mmdet_wrapper__forward(backend):
+ check_backend(backend)
+ from mmdet.structures import DetDataSample
+ from mmengine.structures import InstanceData
+ from mmocr.models.textdet import MMDetWrapper
+ cfg, = load_config('tests/test_codebase/test_mmocr/data/mrcnn.py')
+
+ model = MMDetWrapper(cfg.model.cfg)
+ model.eval()
+ deploy_cfg = mmengine.Config(
+ dict(
+ backend_config=dict(
+ type=backend.value,
+ common_config=dict(max_workspace_size=1 << 30)),
+ onnx_config=dict(
+ input_shape=None,
+ input_names=['inputs'],
+ output_names=['output']),
+ codebase_config=dict(
+ type='mmocr',
+ task='TextDetection',
+ post_processing=dict(
+ score_threshold=0.05,
+ confidence_threshold=0.005,
+ iou_threshold=0.5,
+ max_output_boxes_per_class=200,
+ pre_top_k=5000,
+ keep_top_k=100,
+ background_label_id=-1,
+ export_postprocess_mask=False))))
+
+ input = torch.rand(1, 3, 64, 64)
+ img_meta = {
+ 'ori_shape': [64, 64],
+ 'img_shape': [64, 64],
+ 'scale_factor': [1., 1.],
+ 'img_path': ''
+ }
+ pred_instances = InstanceData(metainfo=img_meta)
+ data_sample = DetDataSample(pred_instances=pred_instances)
+ data_sample.set_metainfo(img_meta)
+ wrapped_model = WrapModel(model, 'forward', data_samples=[data_sample])
+
+ rewrite_inputs = {'inputs': input}
+
+ rewrite_outputs, _ = get_rewrite_outputs(
+ wrapped_model=wrapped_model,
+ model_inputs=rewrite_inputs,
+ deploy_cfg=deploy_cfg,
+ run_with_backend=False)
+ assert rewrite_outputs is not None
+
+
+@pytest.mark.parametrize('backend', [Backend.ONNXRUNTIME])
+def test_abi_language_decoder___get_length(backend):
+ check_backend(backend)
+ from mmocr.models.textrecog.decoders import ABILanguageDecoder
+ model = ABILanguageDecoder(dictionary=dictionary)
+ input = torch.randn(1, 26, 37)
+ model_inputs = {'logit': input}
+ model_outputs = get_model_outputs(model, '_get_length', model_inputs)
+ wrapped_model = WrapModel(model, '_get_length')
+ rewrite_inputs = {'logit': input}
+ deploy_cfg = mmengine.Config(
+ dict(
+ backend_config=dict(type=backend.value),
+ onnx_config=dict(input_shape=None),
+ codebase_config=dict(
+ type='mmocr',
+ task='TextRecognition',
+ )))
+ rewrite_outputs, is_backend_output = get_rewrite_outputs(
+ wrapped_model=wrapped_model,
+ model_inputs=rewrite_inputs,
+ deploy_cfg=deploy_cfg)
+
+ if is_backend_output:
+ rewrite_outputs = rewrite_outputs[0]
+
+ model_outputs = model_outputs.float().cpu().numpy()
+ rewrite_outputs = rewrite_outputs.cpu().numpy()
+ print(model_outputs, rewrite_outputs)
+ assert np.allclose(model_outputs, rewrite_outputs, rtol=1e-03, atol=1e-05)
+
+
+@pytest.mark.parametrize('backend', [Backend.ONNXRUNTIME])
+def test__positional_encoding(backend):
+ check_backend(backend)
+ from mmocr.models.common.modules import PositionalEncoding
+ pytorch_model = PositionalEncoding(64, 20)
+ input = torch.rand(1, 20, 64)
+ model_inputs = {'x': input}
+ model_outputs = get_model_outputs(pytorch_model, 'forward', model_inputs)
+ wrapped_model = WrapModel(pytorch_model, 'forward')
+ rewrite_inputs = {'x': input}
+ deploy_cfg = mmengine.Config(
+ dict(
+ backend_config=dict(type=backend.value),
+ onnx_config=dict(input_shape=None),
+ codebase_config=dict(
+ type='mmocr',
+ task='TextRecognition',
+ )))
+ rewrite_outputs, is_backend_output = get_rewrite_outputs(
+ wrapped_model=wrapped_model,
+ model_inputs=rewrite_inputs,
+ deploy_cfg=deploy_cfg)
+
+ if is_backend_output:
+ rewrite_outputs = rewrite_outputs[0]
+
+ model_outputs = model_outputs.float().cpu().numpy()
+ rewrite_outputs = rewrite_outputs.cpu().numpy()
+ print(model_outputs, rewrite_outputs)
+ assert np.allclose(model_outputs, rewrite_outputs, rtol=1e-03, atol=1e-05)
diff --git a/tests/test_pytorch/test_pytorch_functions.py b/tests/test_pytorch/test_pytorch_functions.py
index 6b296c9198..245bfba9d4 100644
--- a/tests/test_pytorch/test_pytorch_functions.py
+++ b/tests/test_pytorch/test_pytorch_functions.py
@@ -557,6 +557,31 @@ def _pad_(x):
pytorch_output, rewrite_output[0], rtol=1e-3, atol=1e-5)
+@backend_checker(Backend.ONNXRUNTIME)
+@pytest.mark.parametrize('dim', [0, -1])
+@pytest.mark.parametrize('keepdim', [True, False])
+def test_any__default(dim, keepdim):
+ input = torch.rand(2, 4)
+ model = WrapFunction(lambda input: input.any(dim, keepdim=keepdim))
+ pytorch_output = model(input)
+ deploy_cfg_ort = Config(
+ dict(
+ onnx_config=dict(input_shape=None),
+ backend_config=dict(type='onnxruntime'),
+ codebase_config=dict(type='mmdet', task='ObjectDetection')))
+ rewrite_output, _ = get_rewrite_outputs(
+ model,
+ model_inputs={'input': input},
+ deploy_cfg=deploy_cfg_ort,
+ run_with_backend=True)
+ assert pytorch_output.dtype == rewrite_output[0].dtype
+ assert torch.allclose(
+ pytorch_output.float(),
+ rewrite_output[0].float(),
+ rtol=1e-3,
+ atol=1e-5)
+
+
@backend_checker(Backend.ONNXRUNTIME)
def test_linspace__default():
import random