From e70c91a9ff9b8a20e05c133aec6ec3ed538c32fb Mon Sep 17 00:00:00 2001
From: Aditya Oke <47158509+oke-aditya@users.noreply.github.com>
Date: Thu, 1 Oct 2020 16:51:22 +0530
Subject: [PATCH] Adds bounding boxes conversion (#2710)

* adds boxes conversion

* adds documentation

* adds xywh tests

* fixes small typo

* adds tests

* Remove sphinx theme

* corrects assertions

* cleans code as per suggestion

Signed-off-by: Aditya Oke <okeaditya315@gmail.com>

* reverts assertion

* fixes to assertEqual

* fixes inplace operations

* Adds docstrings

* added documentation

* changes tests

* moves code to box_convert

* adds more tests

* Apply suggestions from code review

Let's leave those changes to a separate PR

* fixes documentation

Co-authored-by: Francisco Massa <fvsmassa@gmail.com>
---
 docs/source/ops.rst             |  1 +
 test/test_ops.py                | 96 +++++++++++++++++++++++++++++++++
 torchvision/ops/__init__.py     |  4 +-
 torchvision/ops/_box_convert.py | 83 ++++++++++++++++++++++++++++
 torchvision/ops/boxes.py        | 56 +++++++++++++++++++
 5 files changed, 239 insertions(+), 1 deletion(-)
 create mode 100644 torchvision/ops/_box_convert.py

diff --git a/docs/source/ops.rst b/docs/source/ops.rst
index b332119f04f..6b3b2451009 100644
--- a/docs/source/ops.rst
+++ b/docs/source/ops.rst
@@ -13,6 +13,7 @@ torchvision.ops
 .. autofunction:: batched_nms
 .. autofunction:: remove_small_boxes
 .. autofunction:: clip_boxes_to_image
+.. autofunction:: box_convert
 .. autofunction:: box_area
 .. autofunction:: box_iou
 .. autofunction:: generalized_box_iou
diff --git a/test/test_ops.py b/test/test_ops.py
index 515b2f11541..5ee30e77705 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -647,6 +647,102 @@ def test_convert_boxes_to_roi_format(self):
                 self.assertTrue(torch.equal(ref_tensor, ops._utils.convert_boxes_to_roi_format(box_sequence)))
 
 
+class BoxTester(unittest.TestCase):
+    def test_bbox_same(self):
+        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
+                                  [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
+
+        exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
+                                [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
+
+        box_same = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy")
+        self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
+        self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
+        assert torch.all(torch.eq(box_same, exp_xyxy)).item()
+
+        box_same = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh")
+        self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
+        self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
+        assert torch.all(torch.eq(box_same, exp_xyxy)).item()
+
+        box_same = ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh")
+        self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
+        self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
+        assert torch.all(torch.eq(box_same, exp_xyxy)).item()
+
+    def test_bbox_xyxy_xywh(self):
+        # Simple test convert boxes to xywh and back. Make sure they are same.
+        # box_tensor is in x1 y1 x2 y2 format.
+        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
+                                  [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
+        exp_xywh = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
+                                [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)
+
+        box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
+        self.assertEqual(exp_xywh.size(), torch.Size([4, 4]))
+        self.assertEqual(exp_xywh.dtype, box_tensor.dtype)
+        assert torch.all(torch.eq(box_xywh, exp_xywh)).item()
+
+        # Reverse conversion
+        box_xyxy = ops.box_convert(box_xywh, in_fmt="xywh", out_fmt="xyxy")
+        self.assertEqual(box_xyxy.size(), torch.Size([4, 4]))
+        self.assertEqual(box_xyxy.dtype, box_tensor.dtype)
+        assert torch.all(torch.eq(box_xyxy, box_tensor)).item()
+
+    def test_bbox_xyxy_cxcywh(self):
+        # Simple test convert boxes to xywh and back. Make sure they are same.
+        # box_tensor is in x1 y1 x2 y2 format.
+        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
+                                  [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
+        exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0],
+                                  [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float)
+
+        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
+        self.assertEqual(exp_cxcywh.size(), torch.Size([4, 4]))
+        self.assertEqual(exp_cxcywh.dtype, box_tensor.dtype)
+        assert torch.all(torch.eq(box_cxcywh, exp_cxcywh)).item()
+
+        # Reverse conversion
+        box_xyxy = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xyxy")
+        self.assertEqual(box_xyxy.size(), torch.Size([4, 4]))
+        self.assertEqual(box_xyxy.dtype, box_tensor.dtype)
+        assert torch.all(torch.eq(box_xyxy, box_tensor)).item()
+
+    def test_bbox_xywh_cxcywh(self):
+        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
+                                  [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)
+
+        # This is wrong
+        exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0],
+                                  [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float)
+
+        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh")
+        self.assertEqual(exp_cxcywh.size(), torch.Size([4, 4]))
+        self.assertEqual(exp_cxcywh.dtype, box_tensor.dtype)
+        assert torch.all(torch.eq(box_cxcywh, exp_cxcywh)).item()
+
+        # Reverse conversion
+        box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh")
+        self.assertEqual(box_xywh.size(), torch.Size([4, 4]))
+        self.assertEqual(box_xywh.dtype, box_tensor.dtype)
+        assert torch.all(torch.eq(box_xywh, box_tensor)).item()
+
+    # def test_bbox_convert_jit(self):
+    #     box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
+    #                               [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
+
+    #     scripted_fn = torch.jit.script(ops.box_convert)
+    #     TOLERANCE = 1e-3
+
+    #     box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
+    #     scripted_xywh = scripted_fn(box_tensor, 'xyxy', 'xywh')
+    #     self.assertTrue((scripted_xywh - box_xywh).abs().max() < TOLERANCE)
+
+    #     box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
+    #     scripted_cxcywh = scripted_fn(box_tensor, 'xyxy', 'cxcywh')
+    #     self.assertTrue((scripted_cxcywh - box_cxcywh).abs().max() < TOLERANCE)
+
+
 class BoxAreaTester(unittest.TestCase):
     def test_box_area(self):
         # A bounding box of area 10000 and a degenerate case
diff --git a/torchvision/ops/__init__.py b/torchvision/ops/__init__.py
index 45b59f628a9..4f94ac447c8 100644
--- a/torchvision/ops/__init__.py
+++ b/torchvision/ops/__init__.py
@@ -1,4 +1,5 @@
 from .boxes import nms, batched_nms, remove_small_boxes, clip_boxes_to_image, box_area, box_iou, generalized_box_iou
+from .boxes import box_convert
 from .new_empty_tensor import _new_empty_tensor
 from .deform_conv import deform_conv2d, DeformConv2d
 from .roi_align import roi_align, RoIAlign
@@ -15,7 +16,8 @@
 
 __all__ = [
     'deform_conv2d', 'DeformConv2d', 'nms', 'batched_nms', 'remove_small_boxes',
-    'clip_boxes_to_image', 'box_area', 'box_iou', 'generalized_box_iou', 'roi_align', 'RoIAlign', 'roi_pool',
+    'clip_boxes_to_image', 'box_convert',
+    'box_area', 'box_iou', 'generalized_box_iou', 'roi_align', 'RoIAlign', 'roi_pool',
     'RoIPool', '_new_empty_tensor', 'ps_roi_align', 'PSRoIAlign', 'ps_roi_pool',
     'PSRoIPool', 'MultiScaleRoIAlign', 'FeaturePyramidNetwork'
 ]
diff --git a/torchvision/ops/_box_convert.py b/torchvision/ops/_box_convert.py
new file mode 100644
index 00000000000..2f7ea0af360
--- /dev/null
+++ b/torchvision/ops/_box_convert.py
@@ -0,0 +1,83 @@
+import torch
+from torch.jit.annotations import Tuple
+from torch import Tensor
+import torchvision
+
+
+def _box_cxcywh_to_xyxy(boxes: Tensor) -> Tensor:
+    """
+    Converts bounding boxes from (cx, cy, w, h) format to (x1, y1, x2, y2) format.
+    (cx, cy) refers to center of bounding box
+    (w, h) are width and height of bounding box
+    Arguments:
+        boxes (Tensor[N, 4]): boxes in (cx, cy, w, h) format which will be converted.
+
+    Returns:
+        boxes (Tensor(N, 4)): boxes in (x1, y1, x2, y2) format.
+    """
+    # We need to change all 4 of them so some temporary variable is needed.
+    cx, cy, w, h = boxes.unbind(-1)
+    x1 = cx - 0.5 * w
+    y1 = cy - 0.5 * h
+    x2 = cx + 0.5 * w
+    y2 = cy + 0.5 * h
+
+    boxes = torch.stack((x1, y1, x2, y2), dim=-1)
+
+    return boxes
+
+
+def _box_xyxy_to_cxcywh(boxes: Tensor) -> Tensor:
+    """
+    Converts bounding boxes from (x1, y1, x2, y2) format to (cx, cy, w, h) format.
+    (x1, y1) refer to top left of bounding box
+    (x2, y2) refer to bottom right of bounding box
+    Arguments:
+        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format which will be converted.
+
+    Returns:
+        boxes (Tensor(N, 4)): boxes in (cx, cy, w, h) format.
+    """
+    x1, y1, x2, y2 = boxes.unbind(-1)
+    cx = (x1 + x2) / 2
+    cy = (y1 + y2) / 2
+    w = x2 - x1
+    h = y2 - y1
+
+    boxes = torch.stack((cx, cy, w, h), dim=-1)
+
+    return boxes
+
+
+def _box_xywh_to_xyxy(boxes: Tensor) -> Tensor:
+    """
+    Converts bounding boxes from (x, y, w, h) format to (x1, y1, x2, y2) format.
+    (x, y) refers to top left of bouding box.
+    (w, h) refers to width and height of box.
+    Arguments:
+        boxes (Tensor[N, 4]): boxes in (x, y, w, h) which will be converted.
+
+    Returns:
+        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format.
+    """
+    x, y, w, h = boxes.unbind(-1)
+    boxes = torch.stack([x, y, x + w, y + h], dim=-1)
+    return boxes
+
+
+def _box_xyxy_to_xywh(boxes: Tensor) -> Tensor:
+    """
+    Converts bounding boxes from (x1, y1, x2, y2) format to (x, y, w, h) format.
+    (x1, y1) refer to top left of bounding box
+    (x2, y2) refer to bottom right of bounding box
+    Arguments:
+        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) which will be converted.
+
+    Returns:
+        boxes (Tensor[N, 4]): boxes in (x, y, w, h) format.
+    """
+    x1, y1, x2, y2 = boxes.unbind(-1)
+    x2 = x2 - x1  # x2 - x1
+    y2 = y2 - y1  # y2 - y1
+    boxes = torch.stack((x1, y1, x2, y2), dim=-1)
+    return boxes
diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py
index 6899445e867..a914543b1ec 100644
--- a/torchvision/ops/boxes.py
+++ b/torchvision/ops/boxes.py
@@ -1,6 +1,7 @@
 import torch
 from torch.jit.annotations import Tuple
 from torch import Tensor
+from ._box_convert import _box_cxcywh_to_xyxy, _box_xyxy_to_cxcywh, _box_xywh_to_xyxy, _box_xyxy_to_xywh
 import torchvision
 
 
@@ -133,6 +134,61 @@ def clip_boxes_to_image(boxes: Tensor, size: Tuple[int, int]) -> Tensor:
     return clipped_boxes.reshape(boxes.shape)
 
 
+def box_convert(boxes: Tensor, in_fmt: str, out_fmt: str) -> Tensor:
+    """
+    Converts boxes from given in_fmt to out_fmt.
+    Supported in_fmt and out_fmt are:
+
+    'xyxy': boxes are represented via corners, x1, y1 being top left and x2, y2 being bottom right.
+
+    'xywh' : boxes are represented via corner, width and height, x1, y2 being top left, w, h being width and height.
+
+    'cxcywh' : boxes are represented via centre, width and height, cx, cy being center of box, w, h
+    being width and height.
+
+    Arguments:
+        boxes (Tensor[N, 4]): boxes which will be converted.
+        in_fmt (str): Input format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh'].
+        out_fmt (str): Output format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh']
+
+    Returns:
+        boxes (Tensor[N, 4]): Boxes into converted format.
+    """
+    allowed_fmts = ("xyxy", "xywh", "cxcywh")
+    assert in_fmt in allowed_fmts
+    assert out_fmt in allowed_fmts
+
+    if in_fmt == out_fmt:
+        boxes_converted = boxes.clone()
+        return boxes_converted
+
+    if in_fmt != 'xyxy' and out_fmt != 'xyxy':
+        if in_fmt == "xywh":
+            boxes_xyxy = _box_xywh_to_xyxy(boxes)
+            if out_fmt == "cxcywh":
+                boxes_converted = _box_xyxy_to_cxcywh(boxes_xyxy)
+
+        elif in_fmt == "cxcywh":
+            boxes_xyxy = _box_cxcywh_to_xyxy(boxes)
+            if out_fmt == "xywh":
+                boxes_converted = _box_xyxy_to_xywh(boxes_xyxy)
+
+        # convert one to xyxy and change either in_fmt or out_fmt to xyxy
+    else:
+        if in_fmt == "xyxy":
+            if out_fmt == "xywh":
+                boxes_converted = _box_xyxy_to_xywh(boxes)
+            elif out_fmt == "cxcywh":
+                boxes_converted = _box_xyxy_to_cxcywh(boxes)
+        elif out_fmt == "xyxy":
+            if in_fmt == "xywh":
+                boxes_converted = _box_xywh_to_xyxy(boxes)
+            elif in_fmt == "cxcywh":
+                boxes_converted = _box_cxcywh_to_xyxy(boxes)
+
+    return boxes_converted
+
+
 def box_area(boxes: Tensor) -> Tensor:
     """
     Computes the area of a set of bounding boxes, which are specified by its