Skip to content

Commit

Permalink
Adds bounding boxes conversion (#2710)
Browse files Browse the repository at this point in the history
* adds boxes conversion

* adds documentation

* adds xywh tests

* fixes small typo

* adds tests

* Remove sphinx theme

* corrects assertions

* cleans code as per suggestion

Signed-off-by: Aditya Oke <okeaditya315@gmail.com>

* reverts assertion

* fixes to assertEqual

* fixes inplace operations

* Adds docstrings

* added documentation

* changes tests

* moves code to box_convert

* adds more tests

* Apply suggestions from code review

Let's leave those changes to a separate PR

* fixes documentation

Co-authored-by: Francisco Massa <fvsmassa@gmail.com>
  • Loading branch information
oke-aditya and fmassa authored Oct 1, 2020
1 parent 786ec32 commit e70c91a
Show file tree
Hide file tree
Showing 5 changed files with 239 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/source/ops.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ torchvision.ops
.. autofunction:: batched_nms
.. autofunction:: remove_small_boxes
.. autofunction:: clip_boxes_to_image
.. autofunction:: box_convert
.. autofunction:: box_area
.. autofunction:: box_iou
.. autofunction:: generalized_box_iou
Expand Down
96 changes: 96 additions & 0 deletions test/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,102 @@ def test_convert_boxes_to_roi_format(self):
self.assertTrue(torch.equal(ref_tensor, ops._utils.convert_boxes_to_roi_format(box_sequence)))


class BoxTester(unittest.TestCase):
def test_bbox_same(self):
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)

exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)

box_same = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy")
self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_same, exp_xyxy)).item()

box_same = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh")
self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_same, exp_xyxy)).item()

box_same = ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh")
self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_same, exp_xyxy)).item()

def test_bbox_xyxy_xywh(self):
# Simple test convert boxes to xywh and back. Make sure they are same.
# box_tensor is in x1 y1 x2 y2 format.
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
exp_xywh = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)

box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
self.assertEqual(exp_xywh.size(), torch.Size([4, 4]))
self.assertEqual(exp_xywh.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_xywh, exp_xywh)).item()

# Reverse conversion
box_xyxy = ops.box_convert(box_xywh, in_fmt="xywh", out_fmt="xyxy")
self.assertEqual(box_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(box_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_xyxy, box_tensor)).item()

def test_bbox_xyxy_cxcywh(self):
# Simple test convert boxes to xywh and back. Make sure they are same.
# box_tensor is in x1 y1 x2 y2 format.
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0],
[20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float)

box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
self.assertEqual(exp_cxcywh.size(), torch.Size([4, 4]))
self.assertEqual(exp_cxcywh.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_cxcywh, exp_cxcywh)).item()

# Reverse conversion
box_xyxy = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xyxy")
self.assertEqual(box_xyxy.size(), torch.Size([4, 4]))
self.assertEqual(box_xyxy.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_xyxy, box_tensor)).item()

def test_bbox_xywh_cxcywh(self):
box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
[10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)

# This is wrong
exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0],
[20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float)

box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh")
self.assertEqual(exp_cxcywh.size(), torch.Size([4, 4]))
self.assertEqual(exp_cxcywh.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_cxcywh, exp_cxcywh)).item()

# Reverse conversion
box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh")
self.assertEqual(box_xywh.size(), torch.Size([4, 4]))
self.assertEqual(box_xywh.dtype, box_tensor.dtype)
assert torch.all(torch.eq(box_xywh, box_tensor)).item()

# def test_bbox_convert_jit(self):
# box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
# [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)

# scripted_fn = torch.jit.script(ops.box_convert)
# TOLERANCE = 1e-3

# box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
# scripted_xywh = scripted_fn(box_tensor, 'xyxy', 'xywh')
# self.assertTrue((scripted_xywh - box_xywh).abs().max() < TOLERANCE)

# box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
# scripted_cxcywh = scripted_fn(box_tensor, 'xyxy', 'cxcywh')
# self.assertTrue((scripted_cxcywh - box_cxcywh).abs().max() < TOLERANCE)


class BoxAreaTester(unittest.TestCase):
def test_box_area(self):
# A bounding box of area 10000 and a degenerate case
Expand Down
4 changes: 3 additions & 1 deletion torchvision/ops/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .boxes import nms, batched_nms, remove_small_boxes, clip_boxes_to_image, box_area, box_iou, generalized_box_iou
from .boxes import box_convert
from .new_empty_tensor import _new_empty_tensor
from .deform_conv import deform_conv2d, DeformConv2d
from .roi_align import roi_align, RoIAlign
Expand All @@ -15,7 +16,8 @@

__all__ = [
'deform_conv2d', 'DeformConv2d', 'nms', 'batched_nms', 'remove_small_boxes',
'clip_boxes_to_image', 'box_area', 'box_iou', 'generalized_box_iou', 'roi_align', 'RoIAlign', 'roi_pool',
'clip_boxes_to_image', 'box_convert',
'box_area', 'box_iou', 'generalized_box_iou', 'roi_align', 'RoIAlign', 'roi_pool',
'RoIPool', '_new_empty_tensor', 'ps_roi_align', 'PSRoIAlign', 'ps_roi_pool',
'PSRoIPool', 'MultiScaleRoIAlign', 'FeaturePyramidNetwork'
]
83 changes: 83 additions & 0 deletions torchvision/ops/_box_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import torch
from torch.jit.annotations import Tuple
from torch import Tensor
import torchvision


def _box_cxcywh_to_xyxy(boxes: Tensor) -> Tensor:
"""
Converts bounding boxes from (cx, cy, w, h) format to (x1, y1, x2, y2) format.
(cx, cy) refers to center of bounding box
(w, h) are width and height of bounding box
Arguments:
boxes (Tensor[N, 4]): boxes in (cx, cy, w, h) format which will be converted.
Returns:
boxes (Tensor(N, 4)): boxes in (x1, y1, x2, y2) format.
"""
# We need to change all 4 of them so some temporary variable is needed.
cx, cy, w, h = boxes.unbind(-1)
x1 = cx - 0.5 * w
y1 = cy - 0.5 * h
x2 = cx + 0.5 * w
y2 = cy + 0.5 * h

boxes = torch.stack((x1, y1, x2, y2), dim=-1)

return boxes


def _box_xyxy_to_cxcywh(boxes: Tensor) -> Tensor:
"""
Converts bounding boxes from (x1, y1, x2, y2) format to (cx, cy, w, h) format.
(x1, y1) refer to top left of bounding box
(x2, y2) refer to bottom right of bounding box
Arguments:
boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format which will be converted.
Returns:
boxes (Tensor(N, 4)): boxes in (cx, cy, w, h) format.
"""
x1, y1, x2, y2 = boxes.unbind(-1)
cx = (x1 + x2) / 2
cy = (y1 + y2) / 2
w = x2 - x1
h = y2 - y1

boxes = torch.stack((cx, cy, w, h), dim=-1)

return boxes


def _box_xywh_to_xyxy(boxes: Tensor) -> Tensor:
"""
Converts bounding boxes from (x, y, w, h) format to (x1, y1, x2, y2) format.
(x, y) refers to top left of bouding box.
(w, h) refers to width and height of box.
Arguments:
boxes (Tensor[N, 4]): boxes in (x, y, w, h) which will be converted.
Returns:
boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format.
"""
x, y, w, h = boxes.unbind(-1)
boxes = torch.stack([x, y, x + w, y + h], dim=-1)
return boxes


def _box_xyxy_to_xywh(boxes: Tensor) -> Tensor:
"""
Converts bounding boxes from (x1, y1, x2, y2) format to (x, y, w, h) format.
(x1, y1) refer to top left of bounding box
(x2, y2) refer to bottom right of bounding box
Arguments:
boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) which will be converted.
Returns:
boxes (Tensor[N, 4]): boxes in (x, y, w, h) format.
"""
x1, y1, x2, y2 = boxes.unbind(-1)
x2 = x2 - x1 # x2 - x1
y2 = y2 - y1 # y2 - y1
boxes = torch.stack((x1, y1, x2, y2), dim=-1)
return boxes
56 changes: 56 additions & 0 deletions torchvision/ops/boxes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import torch
from torch.jit.annotations import Tuple
from torch import Tensor
from ._box_convert import _box_cxcywh_to_xyxy, _box_xyxy_to_cxcywh, _box_xywh_to_xyxy, _box_xyxy_to_xywh
import torchvision


Expand Down Expand Up @@ -133,6 +134,61 @@ def clip_boxes_to_image(boxes: Tensor, size: Tuple[int, int]) -> Tensor:
return clipped_boxes.reshape(boxes.shape)


def box_convert(boxes: Tensor, in_fmt: str, out_fmt: str) -> Tensor:
"""
Converts boxes from given in_fmt to out_fmt.
Supported in_fmt and out_fmt are:
'xyxy': boxes are represented via corners, x1, y1 being top left and x2, y2 being bottom right.
'xywh' : boxes are represented via corner, width and height, x1, y2 being top left, w, h being width and height.
'cxcywh' : boxes are represented via centre, width and height, cx, cy being center of box, w, h
being width and height.
Arguments:
boxes (Tensor[N, 4]): boxes which will be converted.
in_fmt (str): Input format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh'].
out_fmt (str): Output format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh']
Returns:
boxes (Tensor[N, 4]): Boxes into converted format.
"""
allowed_fmts = ("xyxy", "xywh", "cxcywh")
assert in_fmt in allowed_fmts
assert out_fmt in allowed_fmts

if in_fmt == out_fmt:
boxes_converted = boxes.clone()
return boxes_converted

if in_fmt != 'xyxy' and out_fmt != 'xyxy':
if in_fmt == "xywh":
boxes_xyxy = _box_xywh_to_xyxy(boxes)
if out_fmt == "cxcywh":
boxes_converted = _box_xyxy_to_cxcywh(boxes_xyxy)

elif in_fmt == "cxcywh":
boxes_xyxy = _box_cxcywh_to_xyxy(boxes)
if out_fmt == "xywh":
boxes_converted = _box_xyxy_to_xywh(boxes_xyxy)

# convert one to xyxy and change either in_fmt or out_fmt to xyxy
else:
if in_fmt == "xyxy":
if out_fmt == "xywh":
boxes_converted = _box_xyxy_to_xywh(boxes)
elif out_fmt == "cxcywh":
boxes_converted = _box_xyxy_to_cxcywh(boxes)
elif out_fmt == "xyxy":
if in_fmt == "xywh":
boxes_converted = _box_xywh_to_xyxy(boxes)
elif in_fmt == "cxcywh":
boxes_converted = _box_cxcywh_to_xyxy(boxes)

return boxes_converted


def box_area(boxes: Tensor) -> Tensor:
"""
Computes the area of a set of bounding boxes, which are specified by its
Expand Down

0 comments on commit e70c91a

Please sign in to comment.