From e70c91a9ff9b8a20e05c133aec6ec3ed538c32fb Mon Sep 17 00:00:00 2001 From: Aditya Oke <47158509+oke-aditya@users.noreply.github.com> Date: Thu, 1 Oct 2020 16:51:22 +0530 Subject: [PATCH] Adds bounding boxes conversion (#2710) * adds boxes conversion * adds documentation * adds xywh tests * fixes small typo * adds tests * Remove sphinx theme * corrects assertions * cleans code as per suggestion Signed-off-by: Aditya Oke * reverts assertion * fixes to assertEqual * fixes inplace operations * Adds docstrings * added documentation * changes tests * moves code to box_convert * adds more tests * Apply suggestions from code review Let's leave those changes to a separate PR * fixes documentation Co-authored-by: Francisco Massa --- docs/source/ops.rst | 1 + test/test_ops.py | 96 +++++++++++++++++++++++++++++++++ torchvision/ops/__init__.py | 4 +- torchvision/ops/_box_convert.py | 83 ++++++++++++++++++++++++++++ torchvision/ops/boxes.py | 56 +++++++++++++++++++ 5 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 torchvision/ops/_box_convert.py diff --git a/docs/source/ops.rst b/docs/source/ops.rst index b332119f04f..6b3b2451009 100644 --- a/docs/source/ops.rst +++ b/docs/source/ops.rst @@ -13,6 +13,7 @@ torchvision.ops .. autofunction:: batched_nms .. autofunction:: remove_small_boxes .. autofunction:: clip_boxes_to_image +.. autofunction:: box_convert .. autofunction:: box_area .. autofunction:: box_iou .. autofunction:: generalized_box_iou diff --git a/test/test_ops.py b/test/test_ops.py index 515b2f11541..5ee30e77705 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -647,6 +647,102 @@ def test_convert_boxes_to_roi_format(self): self.assertTrue(torch.equal(ref_tensor, ops._utils.convert_boxes_to_roi_format(box_sequence))) +class BoxTester(unittest.TestCase): + def test_bbox_same(self): + box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + + exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + + box_same = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy") + self.assertEqual(exp_xyxy.size(), torch.Size([4, 4])) + self.assertEqual(exp_xyxy.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_same, exp_xyxy)).item() + + box_same = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh") + self.assertEqual(exp_xyxy.size(), torch.Size([4, 4])) + self.assertEqual(exp_xyxy.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_same, exp_xyxy)).item() + + box_same = ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh") + self.assertEqual(exp_xyxy.size(), torch.Size([4, 4])) + self.assertEqual(exp_xyxy.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_same, exp_xyxy)).item() + + def test_bbox_xyxy_xywh(self): + # Simple test convert boxes to xywh and back. Make sure they are same. + # box_tensor is in x1 y1 x2 y2 format. + box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + exp_xywh = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float) + + box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh") + self.assertEqual(exp_xywh.size(), torch.Size([4, 4])) + self.assertEqual(exp_xywh.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_xywh, exp_xywh)).item() + + # Reverse conversion + box_xyxy = ops.box_convert(box_xywh, in_fmt="xywh", out_fmt="xyxy") + self.assertEqual(box_xyxy.size(), torch.Size([4, 4])) + self.assertEqual(box_xyxy.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_xyxy, box_tensor)).item() + + def test_bbox_xyxy_cxcywh(self): + # Simple test convert boxes to xywh and back. Make sure they are same. + # box_tensor is in x1 y1 x2 y2 format. + box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0], + [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float) + + box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh") + self.assertEqual(exp_cxcywh.size(), torch.Size([4, 4])) + self.assertEqual(exp_cxcywh.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_cxcywh, exp_cxcywh)).item() + + # Reverse conversion + box_xyxy = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xyxy") + self.assertEqual(box_xyxy.size(), torch.Size([4, 4])) + self.assertEqual(box_xyxy.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_xyxy, box_tensor)).item() + + def test_bbox_xywh_cxcywh(self): + box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float) + + # This is wrong + exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0], + [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float) + + box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh") + self.assertEqual(exp_cxcywh.size(), torch.Size([4, 4])) + self.assertEqual(exp_cxcywh.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_cxcywh, exp_cxcywh)).item() + + # Reverse conversion + box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh") + self.assertEqual(box_xywh.size(), torch.Size([4, 4])) + self.assertEqual(box_xywh.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_xywh, box_tensor)).item() + + # def test_bbox_convert_jit(self): + # box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + # [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + + # scripted_fn = torch.jit.script(ops.box_convert) + # TOLERANCE = 1e-3 + + # box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh") + # scripted_xywh = scripted_fn(box_tensor, 'xyxy', 'xywh') + # self.assertTrue((scripted_xywh - box_xywh).abs().max() < TOLERANCE) + + # box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh") + # scripted_cxcywh = scripted_fn(box_tensor, 'xyxy', 'cxcywh') + # self.assertTrue((scripted_cxcywh - box_cxcywh).abs().max() < TOLERANCE) + + class BoxAreaTester(unittest.TestCase): def test_box_area(self): # A bounding box of area 10000 and a degenerate case diff --git a/torchvision/ops/__init__.py b/torchvision/ops/__init__.py index 45b59f628a9..4f94ac447c8 100644 --- a/torchvision/ops/__init__.py +++ b/torchvision/ops/__init__.py @@ -1,4 +1,5 @@ from .boxes import nms, batched_nms, remove_small_boxes, clip_boxes_to_image, box_area, box_iou, generalized_box_iou +from .boxes import box_convert from .new_empty_tensor import _new_empty_tensor from .deform_conv import deform_conv2d, DeformConv2d from .roi_align import roi_align, RoIAlign @@ -15,7 +16,8 @@ __all__ = [ 'deform_conv2d', 'DeformConv2d', 'nms', 'batched_nms', 'remove_small_boxes', - 'clip_boxes_to_image', 'box_area', 'box_iou', 'generalized_box_iou', 'roi_align', 'RoIAlign', 'roi_pool', + 'clip_boxes_to_image', 'box_convert', + 'box_area', 'box_iou', 'generalized_box_iou', 'roi_align', 'RoIAlign', 'roi_pool', 'RoIPool', '_new_empty_tensor', 'ps_roi_align', 'PSRoIAlign', 'ps_roi_pool', 'PSRoIPool', 'MultiScaleRoIAlign', 'FeaturePyramidNetwork' ] diff --git a/torchvision/ops/_box_convert.py b/torchvision/ops/_box_convert.py new file mode 100644 index 00000000000..2f7ea0af360 --- /dev/null +++ b/torchvision/ops/_box_convert.py @@ -0,0 +1,83 @@ +import torch +from torch.jit.annotations import Tuple +from torch import Tensor +import torchvision + + +def _box_cxcywh_to_xyxy(boxes: Tensor) -> Tensor: + """ + Converts bounding boxes from (cx, cy, w, h) format to (x1, y1, x2, y2) format. + (cx, cy) refers to center of bounding box + (w, h) are width and height of bounding box + Arguments: + boxes (Tensor[N, 4]): boxes in (cx, cy, w, h) format which will be converted. + + Returns: + boxes (Tensor(N, 4)): boxes in (x1, y1, x2, y2) format. + """ + # We need to change all 4 of them so some temporary variable is needed. + cx, cy, w, h = boxes.unbind(-1) + x1 = cx - 0.5 * w + y1 = cy - 0.5 * h + x2 = cx + 0.5 * w + y2 = cy + 0.5 * h + + boxes = torch.stack((x1, y1, x2, y2), dim=-1) + + return boxes + + +def _box_xyxy_to_cxcywh(boxes: Tensor) -> Tensor: + """ + Converts bounding boxes from (x1, y1, x2, y2) format to (cx, cy, w, h) format. + (x1, y1) refer to top left of bounding box + (x2, y2) refer to bottom right of bounding box + Arguments: + boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format which will be converted. + + Returns: + boxes (Tensor(N, 4)): boxes in (cx, cy, w, h) format. + """ + x1, y1, x2, y2 = boxes.unbind(-1) + cx = (x1 + x2) / 2 + cy = (y1 + y2) / 2 + w = x2 - x1 + h = y2 - y1 + + boxes = torch.stack((cx, cy, w, h), dim=-1) + + return boxes + + +def _box_xywh_to_xyxy(boxes: Tensor) -> Tensor: + """ + Converts bounding boxes from (x, y, w, h) format to (x1, y1, x2, y2) format. + (x, y) refers to top left of bouding box. + (w, h) refers to width and height of box. + Arguments: + boxes (Tensor[N, 4]): boxes in (x, y, w, h) which will be converted. + + Returns: + boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format. + """ + x, y, w, h = boxes.unbind(-1) + boxes = torch.stack([x, y, x + w, y + h], dim=-1) + return boxes + + +def _box_xyxy_to_xywh(boxes: Tensor) -> Tensor: + """ + Converts bounding boxes from (x1, y1, x2, y2) format to (x, y, w, h) format. + (x1, y1) refer to top left of bounding box + (x2, y2) refer to bottom right of bounding box + Arguments: + boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) which will be converted. + + Returns: + boxes (Tensor[N, 4]): boxes in (x, y, w, h) format. + """ + x1, y1, x2, y2 = boxes.unbind(-1) + x2 = x2 - x1 # x2 - x1 + y2 = y2 - y1 # y2 - y1 + boxes = torch.stack((x1, y1, x2, y2), dim=-1) + return boxes diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py index 6899445e867..a914543b1ec 100644 --- a/torchvision/ops/boxes.py +++ b/torchvision/ops/boxes.py @@ -1,6 +1,7 @@ import torch from torch.jit.annotations import Tuple from torch import Tensor +from ._box_convert import _box_cxcywh_to_xyxy, _box_xyxy_to_cxcywh, _box_xywh_to_xyxy, _box_xyxy_to_xywh import torchvision @@ -133,6 +134,61 @@ def clip_boxes_to_image(boxes: Tensor, size: Tuple[int, int]) -> Tensor: return clipped_boxes.reshape(boxes.shape) +def box_convert(boxes: Tensor, in_fmt: str, out_fmt: str) -> Tensor: + """ + Converts boxes from given in_fmt to out_fmt. + Supported in_fmt and out_fmt are: + + 'xyxy': boxes are represented via corners, x1, y1 being top left and x2, y2 being bottom right. + + 'xywh' : boxes are represented via corner, width and height, x1, y2 being top left, w, h being width and height. + + 'cxcywh' : boxes are represented via centre, width and height, cx, cy being center of box, w, h + being width and height. + + Arguments: + boxes (Tensor[N, 4]): boxes which will be converted. + in_fmt (str): Input format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh']. + out_fmt (str): Output format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh'] + + Returns: + boxes (Tensor[N, 4]): Boxes into converted format. + """ + allowed_fmts = ("xyxy", "xywh", "cxcywh") + assert in_fmt in allowed_fmts + assert out_fmt in allowed_fmts + + if in_fmt == out_fmt: + boxes_converted = boxes.clone() + return boxes_converted + + if in_fmt != 'xyxy' and out_fmt != 'xyxy': + if in_fmt == "xywh": + boxes_xyxy = _box_xywh_to_xyxy(boxes) + if out_fmt == "cxcywh": + boxes_converted = _box_xyxy_to_cxcywh(boxes_xyxy) + + elif in_fmt == "cxcywh": + boxes_xyxy = _box_cxcywh_to_xyxy(boxes) + if out_fmt == "xywh": + boxes_converted = _box_xyxy_to_xywh(boxes_xyxy) + + # convert one to xyxy and change either in_fmt or out_fmt to xyxy + else: + if in_fmt == "xyxy": + if out_fmt == "xywh": + boxes_converted = _box_xyxy_to_xywh(boxes) + elif out_fmt == "cxcywh": + boxes_converted = _box_xyxy_to_cxcywh(boxes) + elif out_fmt == "xyxy": + if in_fmt == "xywh": + boxes_converted = _box_xywh_to_xyxy(boxes) + elif in_fmt == "cxcywh": + boxes_converted = _box_cxcywh_to_xyxy(boxes) + + return boxes_converted + + def box_area(boxes: Tensor) -> Tensor: """ Computes the area of a set of bounding boxes, which are specified by its