diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index 92635f8cdbd..f4ec9516ea4 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -82,14 +82,6 @@ def __init__( supports_pil=False, make_images_kwargs=dict(DEFAULT_MAKE_IMAGES_KWARGS, dtypes=[torch.float]), ), - ConsistencyConfig( - v2_transforms.CenterCrop, - legacy_transforms.CenterCrop, - [ - ArgsKwargs(18), - ArgsKwargs((18, 13)), - ], - ), ConsistencyConfig( v2_transforms.FiveCrop, legacy_transforms.FiveCrop, diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py index cb72e8ce28b..fc9958966c4 100644 --- a/test/test_transforms_v2_functional.py +++ b/test/test_transforms_v2_functional.py @@ -12,7 +12,6 @@ from torchvision.transforms.functional import _get_perspective_coeffs from torchvision.transforms.v2 import functional as F from torchvision.transforms.v2._utils import is_pure_tensor -from torchvision.transforms.v2.functional._geometry import _center_crop_compute_padding from torchvision.transforms.v2.functional._meta import clamp_bounding_boxes, convert_bounding_box_format from transforms_v2_dispatcher_infos import DISPATCHER_INFOS from transforms_v2_kernel_infos import KERNEL_INFOS @@ -601,75 +600,6 @@ def _compute_expected_bbox(bbox, format_, canvas_size_, pcoeffs_): torch.testing.assert_close(output_bboxes, expected_bboxes, rtol=0, atol=1) -@pytest.mark.parametrize("device", cpu_and_cuda()) -@pytest.mark.parametrize( - "output_size", - [(18, 18), [18, 15], (16, 19), [12], [46, 48]], -) -def test_correctness_center_crop_bounding_boxes(device, output_size): - def _compute_expected_bbox(bbox, format_, canvas_size_, output_size_): - dtype = bbox.dtype - bbox = convert_bounding_box_format(bbox.float(), format_, tv_tensors.BoundingBoxFormat.XYWH) - - if len(output_size_) == 1: - output_size_.append(output_size_[-1]) - - cy = int(round((canvas_size_[0] - output_size_[0]) * 0.5)) - cx = int(round((canvas_size_[1] - output_size_[1]) * 0.5)) - out_bbox = [ - bbox[0].item() - cx, - bbox[1].item() - cy, - bbox[2].item(), - bbox[3].item(), - ] - out_bbox = torch.tensor(out_bbox) - out_bbox = convert_bounding_box_format(out_bbox, tv_tensors.BoundingBoxFormat.XYWH, format_) - out_bbox = clamp_bounding_boxes(out_bbox, format=format_, canvas_size=output_size) - return out_bbox.to(dtype=dtype, device=bbox.device) - - for bboxes in make_multiple_bounding_boxes(extra_dims=((4,),)): - bboxes = bboxes.to(device) - bboxes_format = bboxes.format - bboxes_canvas_size = bboxes.canvas_size - - output_boxes, output_canvas_size = F.center_crop_bounding_boxes( - bboxes, bboxes_format, bboxes_canvas_size, output_size - ) - - expected_bboxes = torch.stack( - [ - _compute_expected_bbox(b, bboxes_format, bboxes_canvas_size, output_size) - for b in bboxes.reshape(-1, 4).unbind() - ] - ).reshape(bboxes.shape) - - torch.testing.assert_close(output_boxes, expected_bboxes, atol=1, rtol=0) - torch.testing.assert_close(output_canvas_size, output_size) - - -@pytest.mark.parametrize("device", cpu_and_cuda()) -@pytest.mark.parametrize("output_size", [[4, 2], [4], [7, 6]]) -def test_correctness_center_crop_mask(device, output_size): - def _compute_expected_mask(mask, output_size): - crop_height, crop_width = output_size if len(output_size) > 1 else [output_size[0], output_size[0]] - - _, image_height, image_width = mask.shape - if crop_width > image_height or crop_height > image_width: - padding = _center_crop_compute_padding(crop_height, crop_width, image_height, image_width) - mask = F.pad_image(mask, padding, fill=0) - - left = round((image_width - crop_width) * 0.5) - top = round((image_height - crop_height) * 0.5) - - return mask[:, top : top + crop_height, left : left + crop_width] - - mask = torch.randint(0, 2, size=(1, 6, 6), dtype=torch.long, device=device) - actual = F.center_crop_mask(mask, output_size) - - expected = _compute_expected_mask(mask, output_size) - torch.testing.assert_close(expected, actual) - - @pytest.mark.parametrize( "inpt", [ diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index b6a2a7053cb..f9d00610d57 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -3525,3 +3525,111 @@ def test_bounding_boxes_correctness(self, padding, format, dtype, device, fn): expected = self._reference_pad_bounding_boxes(bounding_boxes, padding=padding) assert_equal(actual, expected) + + +class TestCenterCrop: + INPUT_SIZE = (17, 11) + OUTPUT_SIZES = [(3, 5), (5, 3), (4, 4), (21, 9), (13, 15), (19, 14), 3, (4,), [5], INPUT_SIZE] + + @pytest.mark.parametrize("output_size", OUTPUT_SIZES) + @pytest.mark.parametrize("dtype", [torch.int64, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, output_size, dtype, device): + check_kernel( + F.center_crop_image, + make_image(self.INPUT_SIZE, dtype=dtype, device=device), + output_size=output_size, + check_scripted_vs_eager=not isinstance(output_size, int), + ) + + @pytest.mark.parametrize("output_size", OUTPUT_SIZES) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + def test_kernel_bounding_boxes(self, output_size, format): + bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format) + check_kernel( + F.center_crop_bounding_boxes, + bounding_boxes, + format=bounding_boxes.format, + canvas_size=bounding_boxes.canvas_size, + output_size=output_size, + check_scripted_vs_eager=not isinstance(output_size, int), + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + check_kernel(F.center_crop_mask, make_mask(), output_size=self.OUTPUT_SIZES[0]) + + def test_kernel_video(self): + check_kernel(F.center_crop_video, make_video(self.INPUT_SIZE), output_size=self.OUTPUT_SIZES[0]) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_functional(self, make_input): + check_functional(F.center_crop, make_input(self.INPUT_SIZE), output_size=self.OUTPUT_SIZES[0]) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.center_crop_image, torch.Tensor), + (F._center_crop_image_pil, PIL.Image.Image), + (F.center_crop_image, tv_tensors.Image), + (F.center_crop_bounding_boxes, tv_tensors.BoundingBoxes), + (F.center_crop_mask, tv_tensors.Mask), + (F.center_crop_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.center_crop, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_transform(self, make_input): + check_transform(transforms.CenterCrop(self.OUTPUT_SIZES[0]), make_input(self.INPUT_SIZE)) + + @pytest.mark.parametrize("output_size", OUTPUT_SIZES) + @pytest.mark.parametrize("fn", [F.center_crop, transform_cls_to_functional(transforms.CenterCrop)]) + def test_image_correctness(self, output_size, fn): + image = make_image(self.INPUT_SIZE, dtype=torch.uint8, device="cpu") + + actual = fn(image, output_size) + expected = F.to_image(F.center_crop(F.to_pil_image(image), output_size=output_size)) + + assert_equal(actual, expected) + + def _reference_center_crop_bounding_boxes(self, bounding_boxes, output_size): + image_height, image_width = bounding_boxes.canvas_size + if isinstance(output_size, int): + output_size = (output_size, output_size) + elif len(output_size) == 1: + output_size *= 2 + crop_height, crop_width = output_size + + top = int(round((image_height - crop_height) / 2)) + left = int(round((image_width - crop_width) / 2)) + + affine_matrix = np.array( + [ + [1, 0, -left], + [0, 1, -top], + ], + ) + return reference_affine_bounding_boxes_helper( + bounding_boxes, affine_matrix=affine_matrix, new_canvas_size=output_size + ) + + @pytest.mark.parametrize("output_size", OUTPUT_SIZES) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.int64, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("fn", [F.center_crop, transform_cls_to_functional(transforms.CenterCrop)]) + def test_bounding_boxes_correctness(self, output_size, format, dtype, device, fn): + bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device) + + actual = fn(bounding_boxes, output_size) + expected = self._reference_center_crop_bounding_boxes(bounding_boxes, output_size) + + assert_equal(actual, expected) diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py index 0071c6d3c51..806e58f3526 100644 --- a/test/transforms_v2_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -135,19 +135,6 @@ def xfail_jit_python_scalar_arg(name, *, reason=None): pil_kernel_info=PILKernelInfo(F._elastic_image_pil), test_marks=[xfail_jit_python_scalar_arg("fill")], ), - DispatcherInfo( - F.center_crop, - kernels={ - tv_tensors.Image: F.center_crop_image, - tv_tensors.Video: F.center_crop_video, - tv_tensors.BoundingBoxes: F.center_crop_bounding_boxes, - tv_tensors.Mask: F.center_crop_mask, - }, - pil_kernel_info=PILKernelInfo(F._center_crop_image_pil), - test_marks=[ - xfail_jit_python_scalar_arg("output_size"), - ], - ), DispatcherInfo( F.equalize, kernels={ diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index 3e0d2e0eeca..ca422115705 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -389,97 +389,6 @@ def sample_inputs_elastic_video(): ) -_CENTER_CROP_SPATIAL_SIZES = [(16, 16), (7, 33), (31, 9)] -_CENTER_CROP_OUTPUT_SIZES = [[4, 3], [42, 70], [4], 3, (5, 2), (6,)] - - -def sample_inputs_center_crop_image_tensor(): - for image_loader, output_size in itertools.product( - make_image_loaders(sizes=[(16, 17)], color_spaces=["RGB"], dtypes=[torch.float32]), - [ - # valid `output_size` types for which cropping is applied to both dimensions - *[5, (4,), (2, 3), [6], [3, 2]], - # `output_size`'s for which at least one dimension needs to be padded - *[[4, 18], [17, 5], [17, 18]], - ], - ): - yield ArgsKwargs(image_loader, output_size=output_size) - - -def reference_inputs_center_crop_image_tensor(): - for image_loader, output_size in itertools.product( - make_image_loaders(sizes=_CENTER_CROP_SPATIAL_SIZES, extra_dims=[()], dtypes=[torch.uint8]), - _CENTER_CROP_OUTPUT_SIZES, - ): - yield ArgsKwargs(image_loader, output_size=output_size) - - -def sample_inputs_center_crop_bounding_boxes(): - for bounding_boxes_loader, output_size in itertools.product(make_bounding_box_loaders(), _CENTER_CROP_OUTPUT_SIZES): - yield ArgsKwargs( - bounding_boxes_loader, - format=bounding_boxes_loader.format, - canvas_size=bounding_boxes_loader.canvas_size, - output_size=output_size, - ) - - -def sample_inputs_center_crop_mask(): - for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_categories=[10], num_objects=[5]): - height, width = mask_loader.shape[-2:] - yield ArgsKwargs(mask_loader, output_size=(height // 2, width // 2)) - - -def reference_inputs_center_crop_mask(): - for mask_loader, output_size in itertools.product( - make_mask_loaders(sizes=_CENTER_CROP_SPATIAL_SIZES, extra_dims=[()], num_objects=[1]), _CENTER_CROP_OUTPUT_SIZES - ): - yield ArgsKwargs(mask_loader, output_size=output_size) - - -def sample_inputs_center_crop_video(): - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): - height, width = video_loader.shape[-2:] - yield ArgsKwargs(video_loader, output_size=(height // 2, width // 2)) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.center_crop_image, - sample_inputs_fn=sample_inputs_center_crop_image_tensor, - reference_fn=pil_reference_wrapper(F._center_crop_image_pil), - reference_inputs_fn=reference_inputs_center_crop_image_tensor, - float32_vs_uint8=True, - test_marks=[ - xfail_jit_python_scalar_arg("output_size"), - ], - ), - KernelInfo( - F.center_crop_bounding_boxes, - sample_inputs_fn=sample_inputs_center_crop_bounding_boxes, - test_marks=[ - xfail_jit_python_scalar_arg("output_size"), - ], - ), - KernelInfo( - F.center_crop_mask, - sample_inputs_fn=sample_inputs_center_crop_mask, - reference_fn=pil_reference_wrapper(F._center_crop_image_pil), - reference_inputs_fn=reference_inputs_center_crop_mask, - float32_vs_uint8=True, - test_marks=[ - xfail_jit_python_scalar_arg("output_size"), - ], - ), - KernelInfo( - F.center_crop_video, - sample_inputs_fn=sample_inputs_center_crop_video, - ), - ] -) - - def sample_inputs_equalize_image_tensor(): for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader)