zhiqwang · zhiqwang · Feb 4, 2022 · Jan 26, 2022 · Jan 26, 2022 · Jan 26, 2022
diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml
@@ -12,6 +12,7 @@ jobs:
   Unittest:
     runs-on: ${{ matrix.image }}
     strategy:
+      fail-fast: false
       matrix:
         image: [ 'ubuntu-latest' ]
         torch: [ 'PyTorch 1.9.1+cpu', 'PyTorch 1.10.2+cpu' ]

diff --git a/notebooks/inference-pytorch-export-libtorch.ipynb b/notebooks/inference-pytorch-export-libtorch.ipynb
diff --git a/test/test_engine.py b/test/test_engine.py
@@ -7,7 +7,7 @@
 from torchvision.io import read_image
 from yolort.data import COCOEvaluator, DetectionDataModule, _helper as data_helper
 from yolort.models import yolov5s
-from yolort.models.transform import nested_tensor_from_tensor_list
+from yolort.models.transform import YOLOTransform
 from yolort.models.yolo import yolov5_darknet_pan_s_r31
 
 
@@ -28,9 +28,10 @@ def test_train_with_vanilla_model():
     img_tensor = default_loader(img_name)
     assert img_tensor.ndim == 3
     # Add a dummy image to train
-    img_dummy = torch.rand((3, 416, 360), dtype=torch.float32)
+    img_dummy = torch.rand((3, 1080, 810), dtype=torch.float32)
 
-    images = nested_tensor_from_tensor_list([img_tensor, img_dummy])
+    yolo_transform = YOLOTransform(640, 640)
+    images = yolo_transform.batch_images([img_tensor, img_dummy])
     targets = torch.tensor(
         [
             [0, 7, 0.3790, 0.5487, 0.3220, 0.2047],

diff --git a/test/test_models.py b/test/test_models.py
@@ -340,20 +340,15 @@ def test_torchscript(arch):
 
 
 @pytest.mark.parametrize(
-    "arch, version, upstream_version, hash_prefix",
+    "arch, size_divisible, version, upstream_version, hash_prefix",
     [
-        ("yolov5s", "r4.0", "v4.0", "9ca9a642"),
-        ("yolov5n", "r6.0", "v6.0", "649e089f"),
-        ("yolov5s", "r6.0", "v6.0", "c3b140f3"),
-        ("yolov5n6", "r6.0", "v6.0", "beecbbae"),
+        ("yolov5s", 32, "r4.0", "v4.0", "9ca9a642"),
+        ("yolov5n", 32, "r6.0", "v6.0", "649e089f"),
+        ("yolov5s", 32, "r6.0", "v6.0", "c3b140f3"),
+        ("yolov5n6", 64, "r6.0", "v6.0", "beecbbae"),
     ],
 )
-def test_load_from_yolov5(
-    arch: str,
-    version: str,
-    upstream_version: str,
-    hash_prefix: str,
-):
+def test_load_from_yolov5(arch, size_divisible, version, upstream_version, hash_prefix):
     img_path = "test/assets/bus.jpg"
 
     base_url = "https://github.com/ultralytics/yolov5/releases/download/"
@@ -366,6 +361,7 @@ def test_load_from_yolov5(
         checkpoint_path,
         score_thresh=score_thresh,
         version=version,
+        size_divisible=size_divisible,
     )
     model_yolov5.eval()
     out_from_yolov5 = model_yolov5.predict(img_path)
@@ -388,28 +384,23 @@ def test_load_from_yolov5(
 
 
 @pytest.mark.parametrize(
-    "arch, version, upstream_version, hash_prefix",
+    "arch, size_divisible, version, upstream_version, hash_prefix",
     [
-        ("yolov5s", "r4.0", "v4.0", "9ca9a642"),
-        ("yolov5n", "r6.0", "v6.0", "649e089f"),
-        ("yolov5s", "r6.0", "v6.0", "c3b140f3"),
-        ("yolov5n6", "r6.0", "v6.0", "beecbbae"),
+        ("yolov5s", 32, "r4.0", "v4.0", "9ca9a642"),
+        ("yolov5n", 32, "r6.0", "v6.0", "649e089f"),
+        ("yolov5s", 32, "r6.0", "v6.0", "c3b140f3"),
+        ("yolov5n6", 64, "r6.0", "v6.0", "beecbbae"),
     ],
 )
-def test_load_from_yolov5_torchscript(
-    arch: str,
-    version: str,
-    upstream_version: str,
-    hash_prefix: str,
-):
+def test_load_from_yolov5_torchscript(arch, size_divisible, version, upstream_version, hash_prefix):
     import cv2
     from yolort.utils import read_image_to_tensor
     from yolort.v5 import letterbox
 
     # Loading and pre-processing the image
     img_path = "test/assets/zidane.jpg"
     img_raw = cv2.imread(img_path)
-    img = letterbox(img_raw, new_shape=(640, 640))[0]
+    img = letterbox(img_raw, new_shape=(640, 640), stride=size_divisible)[0]
     img = read_image_to_tensor(img)
 
     base_url = "https://github.com/ultralytics/yolov5/releases/download/"

diff --git a/test/test_models_transform.py b/test/test_models_transform.py
@@ -1,5 +1,8 @@
+# Copyright (c) 2022, yolort team. All rights reserved.
 import copy
 
+import numpy as np
+import pytest
 import torch
 from yolort.models.transform import YOLOTransform, NestedTensor
 
@@ -19,3 +22,32 @@ def test_yolo_transform():
     # Test annotations after transformation
     torch.testing.assert_close(annotations[0]["boxes"], annotations_copy[0]["boxes"], rtol=0, atol=0)
     torch.testing.assert_close(annotations[1]["boxes"], annotations_copy[1]["boxes"], rtol=0, atol=0)
+
+
+@pytest.mark.parametrize("img_h", [300, 500, 720, 800, 1080, 1280])
+@pytest.mark.parametrize("img_w", [300, 500, 720, 800, 1080, 1280])
+@pytest.mark.parametrize("auto", [True])
+@pytest.mark.parametrize("stride", [32, 64])
+def test_letterbox(img_h, img_w, auto, stride):
+
+    from yolort.models.transform import _resize_image_and_masks
+    from yolort.v5 import letterbox
+
+    new_shape = (640, 640)  # height, width
+
+    img_tensor = torch.randint(0, 255, (3, img_h, img_w))
+    img_numpy = img_tensor.permute(1, 2, 0).numpy().astype("uint8")
+
+    yolo_transform = YOLOTransform(new_shape[0], new_shape[1], size_divisible=stride, auto_rectangle=auto)
+
+    im3 = img_tensor / 255
+    im3, _ = _resize_image_and_masks(im3.float(), new_shape)
+    out1 = yolo_transform.batch_images([im3])
+
+    out2 = letterbox(img_numpy, new_shape=new_shape, auto=auto, stride=stride)
+
+    aug1 = out1[0].numpy()
+    aug2 = out2[0].astype(np.float32)  # uint8 to float32
+    aug2 = np.transpose(aug2 / 255.0, [2, 0, 1])
+    assert aug1.shape == aug2.shape
+    np.testing.assert_allclose(aug1, aug2, rtol=1e-4, atol=1e-2)
diff --git a/test/test_onnx.py b/test/test_onnx.py
@@ -1,42 +1,37 @@
 """
-Test for exporting model to ONNX and inference with ONNXRuntime
+Test for exporting model to ONNX and inference with ONNX Runtime
 """
 import io
 from pathlib import Path
 
 import pytest
 import torch
-from PIL import Image
 from torch import Tensor
-from torchvision import transforms
+from torchvision.io import read_image
 from torchvision.ops._register_onnx_ops import _onnx_opset_version
 from yolort import models
+from yolort.utils.image_utils import to_numpy
 
 # In environments without onnxruntime we prefer to
 # invoke all tests in the repo and have this one skipped rather than fail.
 onnxruntime = pytest.importorskip("onnxruntime")
 
 
 class TestONNXExporter:
-    @classmethod
-    def setUpClass(cls):
-        torch.manual_seed(123)
-
     def run_model(
         self,
         model,
         inputs_list,
-        tolerate_small_mismatch=False,
         do_constant_folding=True,
-        dynamic_axes=None,
-        output_names=None,
         input_names=None,
+        output_names=None,
+        dynamic_axes=None,
     ):
         """
-        The core part of exporting model to ONNX and inference with ONNXRuntime
+        The core part of exporting model to ONNX and inference with ONNX Runtime
         Copy-paste from <https://github.com/pytorch/vision/blob/07fb8ba/test/test_onnx.py#L34>
         """
-        model.eval()
+        model = model.eval()
 
         onnx_io = io.BytesIO()
         if isinstance(inputs_list[0][-1], dict):
@@ -50,9 +45,9 @@ def run_model(
             onnx_io,
             do_constant_folding=do_constant_folding,
             opset_version=_onnx_opset_version,
-            dynamic_axes=dynamic_axes,
             input_names=input_names,
             output_names=output_names,
+            dynamic_axes=dynamic_axes,
         )
         # validate the exported model with onnx runtime
         for test_inputs in inputs_list:
@@ -62,85 +57,79 @@ def run_model(
                 test_outputs = model(*test_inputs)
                 if isinstance(test_outputs, Tensor):
                     test_outputs = (test_outputs,)
-            self.ort_validate(onnx_io, test_inputs, test_outputs, tolerate_small_mismatch)
+            self.ort_validate(onnx_io, test_inputs, test_outputs)
 
-    def ort_validate(self, onnx_io, inputs, outputs, tolerate_small_mismatch=False):
+    def ort_validate(self, onnx_io, inputs, outputs):
 
         inputs, _ = torch.jit._flatten(inputs)
         outputs, _ = torch.jit._flatten(outputs)
 
-        def to_numpy(tensor):
-            if tensor.requires_grad:
-                return tensor.detach().cpu().numpy()
-            else:
-                return tensor.cpu().numpy()
-
         inputs = list(map(to_numpy, inputs))
         outputs = list(map(to_numpy, outputs))
 
         ort_session = onnxruntime.InferenceSession(onnx_io.getvalue())
-        # compute onnxruntime output prediction
+        # Inference on ONNX Runtime
         ort_inputs = dict((ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs))
         ort_outs = ort_session.run(None, ort_inputs)
 
         for i in range(0, len(outputs)):
-            try:
-                torch.testing.assert_close(outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05)
-            except AssertionError as error:
-                if tolerate_small_mismatch:
-                    self.assertIn("(0.00%)", str(error), str(error))
-                else:
-                    raise
+            torch.testing.assert_allclose(outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05)
 
-    def get_image(self, img_name, size):
+    def get_image(self, img_name):
 
         img_path = Path(__file__).parent.resolve() / "assets" / img_name
-        image = Image.open(img_path).convert("RGB").resize(size, Image.BILINEAR)
+        image = read_image(str(img_path)) / 255
 
-        return transforms.ToTensor()(image)
+        return image
 
     def get_test_images(self):
-        return (
-            [self.get_image("bus.jpg", (416, 320))],
-            [self.get_image("zidane.jpg", (352, 480))],
-        )
+        return [self.get_image("bus.jpg")], [self.get_image("zidane.jpg")]
 
     @pytest.mark.parametrize(
-        "arch, upstream_version",
+        "arch, auto_rectangle, upstream_version",
         [
-            ("yolov5s", "r3.1"),
-            ("yolov5m", "r4.0"),
-            # ("yolov5ts", "r4.0"),
+            ("yolov5s", True, "r3.1"),
+            ("yolov5m", True, "r4.0"),
+            ("yolov5n", True, "r6.0"),
+            ("yolov5n6", True, "r6.0"),
         ],
     )
-    def test_yolort_export_onnx(self, arch, upstream_version):
+    def test_yolort_export_onnx(self, arch, auto_rectangle, upstream_version):
         images_one, images_two = self.get_test_images()
-        images_dummy = [torch.ones(3, 100, 100) * 0.3]
+        images_dummy = [torch.ones(3, 1080, 720) * 0.3]
 
         model = models.__dict__[arch](
             upstream_version=upstream_version,
-            export_friendly=True,
             pretrained=True,
             size=(640, 640),
+            auto_rectangle=auto_rectangle,
             score_thresh=0.45,
         )
-        model.eval()
+        model = model.eval()
         model(images_one)
         # Test exported model on images of different size, or dummy input
         self.run_model(
             model,
             [(images_one,), (images_two,), (images_dummy,)],
-            input_names=["images_tensors"],
-            output_names=["outputs"],
-            dynamic_axes={"images_tensors": [0, 1, 2], "outputs": [0, 1, 2]},
-            tolerate_small_mismatch=True,
+            input_names=["images"],
+            output_names=["scores", "labels", "boxes"],
+            dynamic_axes={
+                "images": [1, 2],
+                "boxes": [0, 1],
+                "labels": [0],
+                "scores": [0],
+            },
         )
         # Test exported model for an image with no detections on other images
         self.run_model(
             model,
             [(images_dummy,), (images_one,)],
-            input_names=["images_tensors"],
-            output_names=["outputs"],
-            dynamic_axes={"images_tensors": [0, 1, 2], "outputs": [0, 1, 2]},
-            tolerate_small_mismatch=True,
+            input_names=["images"],
+            output_names=["scores", "labels", "boxes"],
+            dynamic_axes={
+                "images": [1, 2],
+                "boxes": [0, 1],
+                "labels": [0],
+                "scores": [0],
+            },
         )
diff --git a/yolort/models/__init__.py b/yolort/models/__init__.py
@@ -119,7 +119,7 @@ def yolov5n6(upstream_version: str = "r6.0", export_friendly: bool = False, **kw
             Default: False.
     """
     if upstream_version == "r6.0":
-        model = YOLOv5(arch="yolov5_darknet_pan_n6_r60", **kwargs)
+        model = YOLOv5(arch="yolov5_darknet_pan_n6_r60", size_divisible=64, **kwargs)
     else:
         raise NotImplementedError("Currently only supports r6.0 version")
 
@@ -138,7 +138,7 @@ def yolov5s6(upstream_version: str = "r6.0", export_friendly: bool = False, **kw
             Default: False.
     """
     if upstream_version == "r6.0":
-        model = YOLOv5(arch="yolov5_darknet_pan_s6_r60", **kwargs)
+        model = YOLOv5(arch="yolov5_darknet_pan_s6_r60", size_divisible=64, **kwargs)
     else:
         raise NotImplementedError("Currently only supports r5.0 and r6.0 versions")
 
@@ -157,7 +157,7 @@ def yolov5m6(upstream_version: str = "r6.0", export_friendly: bool = False, **kw
             Default: False.
     """
     if upstream_version == "r6.0":
-        model = YOLOv5(arch="yolov5_darknet_pan_m6_r60", **kwargs)
+        model = YOLOv5(arch="yolov5_darknet_pan_m6_r60", size_divisible=64, **kwargs)
     else:
         raise NotImplementedError("Currently only supports r5.0 and r6.0 versions")