From 11b702902b3b16aed54979384d49c5e0e69be4d5 Mon Sep 17 00:00:00 2001 From: Tao Gong Date: Mon, 13 Dec 2021 10:25:12 +0800 Subject: [PATCH] Support `bbox_clip_border` for the augmentations of YOLOX (#6730) * support 'bbox_clip_border' for the augmentations of YOLOX * update based on 1-st comments * add comments * fix typos * rename remove_ouside_bboxes to find_inside_bboxes * move comments to docstring --- mmdet/core/bbox/__init__.py | 4 +- mmdet/core/bbox/transforms.py | 16 +++++ mmdet/datasets/pipelines/transforms.py | 96 +++++++++++++++++++------- 3 files changed, 89 insertions(+), 27 deletions(-) diff --git a/mmdet/core/bbox/__init__.py b/mmdet/core/bbox/__init__.py index 1e3fa12d8fe..371eba198e9 100644 --- a/mmdet/core/bbox/__init__.py +++ b/mmdet/core/bbox/__init__.py @@ -12,7 +12,7 @@ from .transforms import (bbox2distance, bbox2result, bbox2roi, bbox_cxcywh_to_xyxy, bbox_flip, bbox_mapping, bbox_mapping_back, bbox_rescale, bbox_xyxy_to_cxcywh, - distance2bbox, roi2bbox) + distance2bbox, find_inside_bboxes, roi2bbox) __all__ = [ 'bbox_overlaps', 'BboxOverlaps2D', 'BaseAssigner', 'MaxIoUAssigner', @@ -24,5 +24,5 @@ 'build_bbox_coder', 'BaseBBoxCoder', 'PseudoBBoxCoder', 'DeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'DistancePointBBoxCoder', 'CenterRegionAssigner', 'bbox_rescale', 'bbox_cxcywh_to_xyxy', - 'bbox_xyxy_to_cxcywh', 'RegionAssigner' + 'bbox_xyxy_to_cxcywh', 'RegionAssigner', 'find_inside_bboxes' ] diff --git a/mmdet/core/bbox/transforms.py b/mmdet/core/bbox/transforms.py index 246028b439e..6d72076a562 100644 --- a/mmdet/core/bbox/transforms.py +++ b/mmdet/core/bbox/transforms.py @@ -3,6 +3,22 @@ import torch +def find_inside_bboxes(bboxes, img_h, img_w): + """Find bboxes as long as a part of bboxes is inside the image. + + Args: + bboxes (Tensor): Shape (N, 4). + img_h (int): Image height. + img_w (int): Image width. + + Returns: + Tensor: Index of the remaining bboxes. + """ + inside_inds = (bboxes[:, 0] < img_w) & (bboxes[:, 2] > 0) \ + & (bboxes[:, 1] < img_h) & (bboxes[:, 3] > 0) + return inside_inds + + def bbox_flip(bboxes, img_shape, direction='horizontal'): """Flip bboxes horizontally or vertically. diff --git a/mmdet/datasets/pipelines/transforms.py b/mmdet/datasets/pipelines/transforms.py index 47f25c2697d..06c27bfa8c3 100644 --- a/mmdet/datasets/pipelines/transforms.py +++ b/mmdet/datasets/pipelines/transforms.py @@ -9,7 +9,7 @@ import numpy as np from numpy import random -from mmdet.core import PolygonMasks +from mmdet.core import PolygonMasks, find_inside_bboxes from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps from ..builder import PIPELINES @@ -54,8 +54,10 @@ class Resize: ratio_range (tuple[float]): (min_ratio, max_ratio) keep_ratio (bool): Whether to keep the aspect ratio when resizing the image. - bbox_clip_border (bool, optional): Whether clip the objects outside - the border of the image. Defaults to True. + bbox_clip_border (bool, optional): Whether to clip the objects outside + the border of the image. In some dataset like MOT17, the gt bboxes + are allowed to cross the border of images. Therefore, we don't + need to clip the gt bboxes in these cases. Defaults to True. backend (str): Image resize backend, choices are 'cv2' and 'pillow'. These two backends generates slightly different results. Defaults to 'cv2'. @@ -1982,6 +1984,10 @@ class Mosaic: output. Default to (0.5, 1.5). min_bbox_size (int | float): The minimum pixel for filtering invalid bboxes after the mosaic pipeline. Default to 0. + bbox_clip_border (bool, optional): Whether to clip the objects outside + the border of the image. In some dataset like MOT17, the gt bboxes + are allowed to cross the border of images. Therefore, we don't + need to clip the gt bboxes in these cases. Defaults to True. skip_filter (bool): Whether to skip filtering rules. If it is True, the filter rule will not be applied, and the `min_bbox_size` is invalid. Default to True. @@ -1992,12 +1998,14 @@ def __init__(self, img_scale=(640, 640), center_ratio_range=(0.5, 1.5), min_bbox_size=0, + bbox_clip_border=True, skip_filter=True, pad_val=114): assert isinstance(img_scale, tuple) self.img_scale = img_scale self.center_ratio_range = center_ratio_range self.min_bbox_size = min_bbox_size + self.bbox_clip_border = bbox_clip_border self.skip_filter = skip_filter self.pad_val = pad_val @@ -2099,16 +2107,24 @@ def _mosaic_transform(self, results): if len(mosaic_labels) > 0: mosaic_bboxes = np.concatenate(mosaic_bboxes, 0) - mosaic_bboxes[:, 0::2] = np.clip(mosaic_bboxes[:, 0::2], 0, - 2 * self.img_scale[1]) - mosaic_bboxes[:, 1::2] = np.clip(mosaic_bboxes[:, 1::2], 0, - 2 * self.img_scale[0]) mosaic_labels = np.concatenate(mosaic_labels, 0) + if self.bbox_clip_border: + mosaic_bboxes[:, 0::2] = np.clip(mosaic_bboxes[:, 0::2], 0, + 2 * self.img_scale[1]) + mosaic_bboxes[:, 1::2] = np.clip(mosaic_bboxes[:, 1::2], 0, + 2 * self.img_scale[0]) + if not self.skip_filter: mosaic_bboxes, mosaic_labels = \ self._filter_box_candidates(mosaic_bboxes, mosaic_labels) + # remove outside bboxes + inside_inds = find_inside_bboxes(mosaic_bboxes, 2 * self.img_scale[0], + 2 * self.img_scale[1]) + mosaic_bboxes = mosaic_bboxes[inside_inds] + mosaic_labels = mosaic_labels[inside_inds] + results['img'] = mosaic_img results['img_shape'] = mosaic_img.shape results['gt_bboxes'] = mosaic_bboxes @@ -2243,6 +2259,10 @@ class MixUp: max_aspect_ratio (float): Aspect ratio of width and height threshold to filter bboxes. If max(h/w, w/h) larger than this value, the box will be removed. Default: 20. + bbox_clip_border (bool, optional): Whether to clip the objects outside + the border of the image. In some dataset like MOT17, the gt bboxes + are allowed to cross the border of images. Therefore, we don't + need to clip the gt bboxes in these cases. Defaults to True. skip_filter (bool): Whether to skip filtering rules. If it is True, the filter rule will not be applied, and the `min_bbox_size` and `min_area_ratio` and `max_aspect_ratio` @@ -2258,6 +2278,7 @@ def __init__(self, min_bbox_size=5, min_area_ratio=0.2, max_aspect_ratio=20, + bbox_clip_border=True, skip_filter=True): assert isinstance(img_scale, tuple) self.dynamic_scale = img_scale @@ -2268,6 +2289,7 @@ def __init__(self, self.min_bbox_size = min_bbox_size self.min_area_ratio = min_area_ratio self.max_aspect_ratio = max_aspect_ratio + self.bbox_clip_border = bbox_clip_border self.skip_filter = skip_filter def __call__(self, results): @@ -2371,10 +2393,13 @@ def _mixup_transform(self, results): # 6. adjust bbox retrieve_gt_bboxes = retrieve_results['gt_bboxes'] - retrieve_gt_bboxes[:, 0::2] = np.clip( - retrieve_gt_bboxes[:, 0::2] * scale_ratio, 0, origin_w) - retrieve_gt_bboxes[:, 1::2] = np.clip( - retrieve_gt_bboxes[:, 1::2] * scale_ratio, 0, origin_h) + retrieve_gt_bboxes[:, 0::2] = retrieve_gt_bboxes[:, 0::2] * scale_ratio + retrieve_gt_bboxes[:, 1::2] = retrieve_gt_bboxes[:, 1::2] * scale_ratio + if self.bbox_clip_border: + retrieve_gt_bboxes[:, 0::2] = np.clip(retrieve_gt_bboxes[:, 0::2], + 0, origin_w) + retrieve_gt_bboxes[:, 1::2] = np.clip(retrieve_gt_bboxes[:, 1::2], + 0, origin_h) if is_filp: retrieve_gt_bboxes[:, 0::2] = ( @@ -2382,10 +2407,15 @@ def _mixup_transform(self, results): # 7. filter cp_retrieve_gt_bboxes = retrieve_gt_bboxes.copy() - cp_retrieve_gt_bboxes[:, 0::2] = np.clip( - cp_retrieve_gt_bboxes[:, 0::2] - x_offset, 0, target_w) - cp_retrieve_gt_bboxes[:, 1::2] = np.clip( - cp_retrieve_gt_bboxes[:, 1::2] - y_offset, 0, target_h) + cp_retrieve_gt_bboxes[:, 0::2] = \ + cp_retrieve_gt_bboxes[:, 0::2] - x_offset + cp_retrieve_gt_bboxes[:, 1::2] = \ + cp_retrieve_gt_bboxes[:, 1::2] - y_offset + if self.bbox_clip_border: + cp_retrieve_gt_bboxes[:, 0::2] = np.clip( + cp_retrieve_gt_bboxes[:, 0::2], 0, target_w) + cp_retrieve_gt_bboxes[:, 1::2] = np.clip( + cp_retrieve_gt_bboxes[:, 1::2], 0, target_h) # 8. mix up ori_img = ori_img.astype(np.float32) @@ -2405,6 +2435,11 @@ def _mixup_transform(self, results): mixup_gt_labels = np.concatenate( (results['gt_labels'], retrieve_gt_labels), axis=0) + # remove outside bbox + inside_inds = find_inside_bboxes(mixup_gt_bboxes, target_h, target_w) + mixup_gt_bboxes = mixup_gt_bboxes[inside_inds] + mixup_gt_labels = mixup_gt_labels[inside_inds] + results['img'] = mixup_img.astype(np.uint8) results['img_shape'] = mixup_img.shape results['gt_bboxes'] = mixup_gt_bboxes @@ -2471,6 +2506,10 @@ class RandomAffine: max_aspect_ratio (float): Aspect ratio of width and height threshold to filter bboxes. If max(h/w, w/h) larger than this value, the box will be removed. + bbox_clip_border (bool, optional): Whether to clip the objects outside + the border of the image. In some dataset like MOT17, the gt bboxes + are allowed to cross the border of images. Therefore, we don't + need to clip the gt bboxes in these cases. Defaults to True. skip_filter (bool): Whether to skip filtering rules. If it is True, the filter rule will not be applied, and the `min_bbox_size` and `min_area_ratio` and `max_aspect_ratio` @@ -2487,6 +2526,7 @@ def __init__(self, min_bbox_size=2, min_area_ratio=0.2, max_aspect_ratio=20, + bbox_clip_border=True, skip_filter=True): assert 0 <= max_translate_ratio <= 1 assert scaling_ratio_range[0] <= scaling_ratio_range[1] @@ -2500,6 +2540,7 @@ def __init__(self, self.min_bbox_size = min_bbox_size self.min_area_ratio = min_area_ratio self.max_aspect_ratio = max_aspect_ratio + self.bbox_clip_border = bbox_clip_border self.skip_filter = skip_filter def __call__(self, results): @@ -2560,20 +2601,25 @@ def __call__(self, results): warp_bboxes = np.vstack( (xs.min(1), ys.min(1), xs.max(1), ys.max(1))).T - warp_bboxes[:, [0, 2]] = warp_bboxes[:, [0, 2]].clip(0, width) - warp_bboxes[:, [1, 3]] = warp_bboxes[:, [1, 3]].clip(0, height) + if self.bbox_clip_border: + warp_bboxes[:, [0, 2]] = \ + warp_bboxes[:, [0, 2]].clip(0, width) + warp_bboxes[:, [1, 3]] = \ + warp_bboxes[:, [1, 3]].clip(0, height) + # remove outside bbox + valid_index = find_inside_bboxes(warp_bboxes, height, width) if not self.skip_filter: # filter bboxes - valid_index = self.filter_gt_bboxes( + filter_index = self.filter_gt_bboxes( bboxes * scaling_ratio, warp_bboxes) - results[key] = warp_bboxes[valid_index] - if key in ['gt_bboxes']: - if 'gt_labels' in results: - results['gt_labels'] = results['gt_labels'][ - valid_index] - else: - results[key] = warp_bboxes + valid_index = valid_index & filter_index + + results[key] = warp_bboxes[valid_index] + if key in ['gt_bboxes']: + if 'gt_labels' in results: + results['gt_labels'] = results['gt_labels'][ + valid_index] if 'gt_masks' in results: raise NotImplementedError(