From ce441f6bde971a4231e9178b8531b620e478bed3 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 30 Aug 2023 14:58:01 +0200 Subject: [PATCH 1/8] remove trailing _tensor from image kernel checks (#7908) --- test/test_transforms_v2_refactored.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 9de1ed6d78a..544484b8fff 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -459,7 +459,7 @@ def _compute_output_size(self, *, input_size, size, max_size): @pytest.mark.parametrize("antialias", [True, False]) @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_kernel_image_tensor(self, size, interpolation, use_max_size, antialias, dtype, device): + def test_kernel_image(self, size, interpolation, use_max_size, antialias, dtype, device): if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): return @@ -830,7 +830,7 @@ def test_float16_no_rounding(self): class TestHorizontalFlip: @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_kernel_image_tensor(self, dtype, device): + def test_kernel_image(self, dtype, device): check_kernel(F.horizontal_flip_image, make_image(dtype=dtype, device=device)) @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) @@ -980,7 +980,7 @@ def _check_kernel(self, kernel, input, *args, **kwargs): ) @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_kernel_image_tensor(self, param, value, dtype, device): + def test_kernel_image(self, param, value, dtype, device): if param == "fill": value = adapt_fill(value, dtype=dtype) self._check_kernel( @@ -1280,7 +1280,7 @@ def test_transform_unknown_fill_error(self): class TestVerticalFlip: @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_kernel_image_tensor(self, dtype, device): + def test_kernel_image(self, dtype, device): check_kernel(F.vertical_flip_image, make_image(dtype=dtype, device=device)) @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) @@ -1404,7 +1404,7 @@ class TestRotate: ) @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_kernel_image_tensor(self, param, value, dtype, device): + def test_kernel_image(self, param, value, dtype, device): kwargs = {param: value} if param != "angle": kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"] @@ -2382,7 +2382,7 @@ def _make_displacement(self, inpt): ) @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_kernel_image_tensor(self, param, value, dtype, device): + def test_kernel_image(self, param, value, dtype, device): image = make_image_tensor(dtype=dtype, device=device) check_kernel( From 58f834a39a2328da666fe08ad0be3e2ae4aaa604 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 30 Aug 2023 14:16:14 +0100 Subject: [PATCH 2/8] Bunch of doc edits (#7906) --- docs/source/transforms.rst | 45 ++++++++++++------- docs/source/tv_tensors.rst | 10 +++-- gallery/README.rst | 2 + gallery/transforms/plot_transforms_e2e.py | 13 ++++++ .../plot_transforms_getting_started.py | 2 + 5 files changed, 52 insertions(+), 20 deletions(-) diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst index 3cae407a70a..2aa1fc5ba1e 100644 --- a/docs/source/transforms.rst +++ b/docs/source/transforms.rst @@ -33,20 +33,33 @@ tasks (image classification, detection, segmentation, video classification). from torchvision import tv_tensors img = torch.randint(0, 256, size=(3, H, W), dtype=torch.uint8) - bboxes = torch.randint(0, H // 2, size=(3, 4)) - bboxes[:, 2:] += bboxes[:, :2] - bboxes = tv_tensors.BoundingBoxes(bboxes, format="XYXY", canvas_size=(H, W)) + boxes = torch.randint(0, H // 2, size=(3, 4)) + boxes[:, 2:] += boxes[:, :2] + boxes = tv_tensors.BoundingBoxes(boxes, format="XYXY", canvas_size=(H, W)) # The same transforms can be used! - img, bboxes = transforms(img, bboxes) + img, boxes = transforms(img, boxes) # And you can pass arbitrary input structures - output_dict = transforms({"image": img, "bboxes": bboxes}) + output_dict = transforms({"image": img, "boxes": boxes}) Transforms are typically passed as the ``transform`` or ``transforms`` argument to the :ref:`Datasets `. -.. TODO: Reader guide, i.e. what to read depending on what you're looking for -.. TODO: add link to getting started guide here. +Start here +---------- + +Whether you're new to Torchvision transforms, or you're already experienced with +them, we encourage you to start with +:ref:`sphx_glr_auto_examples_transforms_plot_transforms_getting_started.py` in +order to learn more about what can be done with the new v2 transforms. + +Then, browse the sections in below this page for general information and +performance tips. The available transforms and functionals are listed in the +:ref:`API reference `. + +More information and tutorials can also be found in our :ref:`example gallery +`, e.g. :ref:`sphx_glr_auto_examples_transforms_plot_transforms_e2e.py` +or :ref:`sphx_glr_auto_examples_transforms_plot_custom_transforms.py`. .. _conventions: @@ -98,25 +111,21 @@ advantages compared to the v1 ones (in ``torchvision.transforms``): - They can transform images **but also** bounding boxes, masks, or videos. This provides support for tasks beyond image classification: detection, segmentation, - video classification, etc. + video classification, etc. See + :ref:`sphx_glr_auto_examples_transforms_plot_transforms_getting_started.py` + and :ref:`sphx_glr_auto_examples_transforms_plot_transforms_e2e.py`. - They support more transforms like :class:`~torchvision.transforms.v2.CutMix` - and :class:`~torchvision.transforms.v2.MixUp`. + and :class:`~torchvision.transforms.v2.MixUp`. See + :ref:`sphx_glr_auto_examples_transforms_plot_cutmix_mixup.py`. - They're :ref:`faster `. - They support arbitrary input structures (dicts, lists, tuples, etc.). - Future improvements and features will be added to the v2 transforms only. -.. TODO: Add link to e2e example for first bullet point. - These transforms are **fully backward compatible** with the v1 ones, so if you're already using tranforms from ``torchvision.transforms``, all you need to do to is to update the import to ``torchvision.transforms.v2``. In terms of output, there might be negligible differences due to implementation differences. -To learn more about the v2 transforms, check out -:ref:`sphx_glr_auto_examples_transforms_plot_transforms_getting_started.py`. - -.. TODO: make sure link is still good!! - .. note:: The v2 transforms are still BETA, but at this point we do not expect @@ -184,7 +193,7 @@ This is very much like the :mod:`torch.nn` package which defines both classes and functional equivalents in :mod:`torch.nn.functional`. The functionals support PIL images, pure tensors, or :ref:`TVTensors -`, e.g. both ``resize(image_tensor)`` and ``resize(bboxes)`` are +`, e.g. both ``resize(image_tensor)`` and ``resize(boxes)`` are valid. .. note:: @@ -248,6 +257,8 @@ be derived from ``torch.nn.Module``. See also: :ref:`sphx_glr_auto_examples_others_plot_scripted_tensor_transforms.py`. +.. _v2_api_ref: + V2 API reference - Recommended ------------------------------ diff --git a/docs/source/tv_tensors.rst b/docs/source/tv_tensors.rst index e80a1ed88fb..cb8a3c45fa9 100644 --- a/docs/source/tv_tensors.rst +++ b/docs/source/tv_tensors.rst @@ -7,9 +7,13 @@ TVTensors TVTensors are :class:`torch.Tensor` subclasses which the v2 :ref:`transforms ` use under the hood to dispatch their inputs to the appropriate -lower-level kernels. Most users do not need to manipulate TVTensors directly and -can simply rely on dataset wrapping - see e.g. -:ref:`sphx_glr_auto_examples_transforms_plot_transforms_e2e.py`. +lower-level kernels. Most users do not need to manipulate TVTensors directly. + +Refer to +:ref:`sphx_glr_auto_examples_transforms_plot_transforms_getting_started.py` for +an introduction to TVTensors, or +:ref:`sphx_glr_auto_examples_transforms_plot_tv_tensors.py` for more advanced +info. .. autosummary:: :toctree: generated/ diff --git a/gallery/README.rst b/gallery/README.rst index 9a0838f493f..8dfea355276 100644 --- a/gallery/README.rst +++ b/gallery/README.rst @@ -1,2 +1,4 @@ +.. _gallery: + Examples and tutorials ====================== diff --git a/gallery/transforms/plot_transforms_e2e.py b/gallery/transforms/plot_transforms_e2e.py index 66d9203d70c..6c58b4a5a9a 100644 --- a/gallery/transforms/plot_transforms_e2e.py +++ b/gallery/transforms/plot_transforms_e2e.py @@ -166,3 +166,16 @@ print(f"{[type(target) for target in targets] = }") for name, loss_val in loss_dict.items(): print(f"{name:<20}{loss_val:.3f}") + +# %% +# Training References +# ------------------- +# +# From there, you can check out the `torchvision references +# `_ where you'll find +# the actual training scripts we use to train our models. +# +# **Disclaimer** The code in our references is more complex than what you'll +# need for your own use-cases: this is because we're supporting different +# backends (PIL, tensors, TVTensors) and different transforms namespaces (v1 and +# v2). So don't be afraid to simplify and only keep what you need. diff --git a/gallery/transforms/plot_transforms_getting_started.py b/gallery/transforms/plot_transforms_getting_started.py index cbaab3dc97d..c61d1cc1be0 100644 --- a/gallery/transforms/plot_transforms_getting_started.py +++ b/gallery/transforms/plot_transforms_getting_started.py @@ -217,6 +217,8 @@ # can still be transformed by some transforms like # :class:`~torchvision.transforms.v2.SanitizeBoundingBoxes`!). # +# .. _transforms_datasets_intercompatibility: +# # Transforms and Datasets intercompatibility # ------------------------------------------ # From a06df0d9229e49bd859e2ff0355a48b3bddd1e10 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 30 Aug 2023 16:01:10 +0200 Subject: [PATCH 3/8] add tests for F.crop and transforms.RandomCrop (#7892) Co-authored-by: Nicolas Hug --- test/test_transforms_v2.py | 62 ----- test/test_transforms_v2_consistency.py | 21 -- test/test_transforms_v2_functional.py | 57 ----- test/test_transforms_v2_refactored.py | 240 +++++++++++++++++- test/transforms_v2_dispatcher_infos.py | 10 - test/transforms_v2_kernel_infos.py | 99 -------- .../transforms/v2/functional/_geometry.py | 2 +- 7 files changed, 237 insertions(+), 254 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 65e65481b78..c92a8cd7c52 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -449,68 +449,6 @@ def test__get_params(self, fill, side_range): assert 0 <= params["padding"][3] <= (side_range[1] - 1) * h -class TestRandomCrop: - def test_assertions(self): - with pytest.raises(ValueError, match="Please provide only two dimensions"): - transforms.RandomCrop([10, 12, 14]) - - with pytest.raises(TypeError, match="Got inappropriate padding arg"): - transforms.RandomCrop([10, 12], padding="abc") - - with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"): - transforms.RandomCrop([10, 12], padding=[-0.7, 0, 0.7]) - - with pytest.raises(TypeError, match="Got inappropriate fill arg"): - transforms.RandomCrop([10, 12], padding=1, fill="abc") - - with pytest.raises(ValueError, match="Padding mode should be either"): - transforms.RandomCrop([10, 12], padding=1, padding_mode="abc") - - @pytest.mark.parametrize("padding", [None, 1, [2, 3], [1, 2, 3, 4]]) - @pytest.mark.parametrize("size, pad_if_needed", [((10, 10), False), ((50, 25), True)]) - def test__get_params(self, padding, pad_if_needed, size): - h, w = size = (24, 32) - image = make_image(size) - - transform = transforms.RandomCrop(size, padding=padding, pad_if_needed=pad_if_needed) - params = transform._get_params([image]) - - if padding is not None: - if isinstance(padding, int): - pad_top = pad_bottom = pad_left = pad_right = padding - elif isinstance(padding, list) and len(padding) == 2: - pad_left = pad_right = padding[0] - pad_top = pad_bottom = padding[1] - elif isinstance(padding, list) and len(padding) == 4: - pad_left, pad_top, pad_right, pad_bottom = padding - - h += pad_top + pad_bottom - w += pad_left + pad_right - else: - pad_left = pad_right = pad_top = pad_bottom = 0 - - if pad_if_needed: - if w < size[1]: - diff = size[1] - w - pad_left += diff - pad_right += diff - w += 2 * diff - if h < size[0]: - diff = size[0] - h - pad_top += diff - pad_bottom += diff - h += 2 * diff - - padding = [pad_left, pad_top, pad_right, pad_bottom] - - assert 0 <= params["top"] <= h - size[0] + 1 - assert 0 <= params["left"] <= w - size[1] + 1 - assert params["height"] == size[0] - assert params["width"] == size[1] - assert params["needs_pad"] is any(padding) - assert params["padding"] == padding - - class TestGaussianBlur: def test_assertions(self): with pytest.raises(ValueError, match="Kernel size should be a tuple/list of two integers"): diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index e37913a9422..e8d6487e92a 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -318,26 +318,6 @@ def __init__( ], closeness_kwargs={"rtol": 1e-5, "atol": 1e-5}, ), - ConsistencyConfig( - v2_transforms.RandomCrop, - legacy_transforms.RandomCrop, - [ - ArgsKwargs(12), - ArgsKwargs((15, 17)), - NotScriptableArgsKwargs(11, padding=1), - ArgsKwargs(11, padding=[1]), - ArgsKwargs((8, 13), padding=(2, 3)), - ArgsKwargs((14, 9), padding=(0, 2, 1, 0)), - ArgsKwargs(36, pad_if_needed=True), - ArgsKwargs((7, 8), fill=1), - NotScriptableArgsKwargs(5, fill=(1, 2, 3)), - ArgsKwargs(12), - NotScriptableArgsKwargs(15, padding=2, padding_mode="edge"), - ArgsKwargs(17, padding=(1, 0), padding_mode="reflect"), - ArgsKwargs(8, padding=(3, 0, 0, 1), padding_mode="symmetric"), - ], - make_images_kwargs=dict(DEFAULT_MAKE_IMAGES_KWARGS, sizes=[(26, 26), (18, 33), (29, 22)]), - ), ConsistencyConfig( v2_transforms.RandomPerspective, legacy_transforms.RandomPerspective, @@ -573,7 +553,6 @@ def test_call_consistency(config, args_kwargs): (v2_transforms.RandomErasing, ArgsKwargs(make_image(), scale=(0.3, 0.7), ratio=(0.5, 1.5))), (v2_transforms.ColorJitter, ArgsKwargs(brightness=None, contrast=None, saturation=None, hue=None)), (v2_transforms.GaussianBlur, ArgsKwargs(0.3, 1.4)), - (v2_transforms.RandomCrop, ArgsKwargs(make_image(size=(61, 47)), output_size=(19, 25))), (v2_transforms.RandomPerspective, ArgsKwargs(23, 17, 0.5)), (v2_transforms.AutoAugment, ArgsKwargs(5)), ] diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py index cdd75ca0fbf..23f06475cf1 100644 --- a/test/test_transforms_v2_functional.py +++ b/test/test_transforms_v2_functional.py @@ -576,63 +576,6 @@ def _compute_affine_matrix(angle_, translate_, scale_, shear_, center_): return true_matrix -@pytest.mark.parametrize("device", cpu_and_cuda()) -@pytest.mark.parametrize( - "format", - [tv_tensors.BoundingBoxFormat.XYXY, tv_tensors.BoundingBoxFormat.XYWH, tv_tensors.BoundingBoxFormat.CXCYWH], -) -@pytest.mark.parametrize( - "top, left, height, width, expected_bboxes", - [ - [8, 12, 30, 40, [(-2.0, 7.0, 13.0, 27.0), (38.0, -3.0, 58.0, 14.0), (33.0, 38.0, 44.0, 54.0)]], - [-8, 12, 70, 40, [(-2.0, 23.0, 13.0, 43.0), (38.0, 13.0, 58.0, 30.0), (33.0, 54.0, 44.0, 70.0)]], - ], -) -def test_correctness_crop_bounding_boxes(device, format, top, left, height, width, expected_bboxes): - - # Expected bboxes computed using Albumentations: - # import numpy as np - # from albumentations.augmentations.crops.functional import crop_bbox_by_coords, normalize_bbox, denormalize_bbox - # expected_bboxes = [] - # for in_box in in_boxes: - # n_in_box = normalize_bbox(in_box, *size) - # n_out_box = crop_bbox_by_coords( - # n_in_box, (left, top, left + width, top + height), height, width, *size - # ) - # out_box = denormalize_bbox(n_out_box, height, width) - # expected_bboxes.append(out_box) - - format = tv_tensors.BoundingBoxFormat.XYXY - canvas_size = (64, 76) - in_boxes = [ - [10.0, 15.0, 25.0, 35.0], - [50.0, 5.0, 70.0, 22.0], - [45.0, 46.0, 56.0, 62.0], - ] - in_boxes = torch.tensor(in_boxes, device=device) - if format != tv_tensors.BoundingBoxFormat.XYXY: - in_boxes = convert_bounding_box_format(in_boxes, tv_tensors.BoundingBoxFormat.XYXY, format) - - expected_bboxes = clamp_bounding_boxes( - tv_tensors.BoundingBoxes(expected_bboxes, format="XYXY", canvas_size=canvas_size) - ).tolist() - - output_boxes, output_canvas_size = F.crop_bounding_boxes( - in_boxes, - format, - top, - left, - canvas_size[0], - canvas_size[1], - ) - - if format != tv_tensors.BoundingBoxFormat.XYXY: - output_boxes = convert_bounding_box_format(output_boxes, format, tv_tensors.BoundingBoxFormat.XYXY) - - torch.testing.assert_close(output_boxes.tolist(), expected_bboxes) - torch.testing.assert_close(output_canvas_size, canvas_size) - - @pytest.mark.parametrize("device", cpu_and_cuda()) def test_correctness_vertical_flip_segmentation_mask_on_fixed_input(device): mask = torch.zeros((3, 3, 3), dtype=torch.long, device=device) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 544484b8fff..6492aead369 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -228,7 +228,7 @@ def check_functional_kernel_signature_match(functional, *, kernel, input_type): assert functional_param == kernel_param -def _check_transform_v1_compatibility(transform, input, rtol, atol): +def _check_transform_v1_compatibility(transform, input, *, rtol, atol): """If the transform defines the ``_v1_transform_cls`` attribute, checks if the transform has a public, static ``get_params`` method that is the v1 equivalent, the output is close to v1, is scriptable, and the scripted version can be called without error.""" @@ -357,10 +357,11 @@ def reference_affine_bounding_boxes_helper(bounding_boxes, *, affine_matrix, new def affine_bounding_boxes(bounding_boxes): dtype = bounding_boxes.dtype + device = bounding_boxes.device # Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1 input_xyxy = F.convert_bounding_box_format( - bounding_boxes.to(torch.float64, copy=True), + bounding_boxes.to(dtype=torch.float64, device="cpu", copy=True), old_format=format, new_format=tv_tensors.BoundingBoxFormat.XYXY, inplace=True, @@ -396,9 +397,13 @@ def affine_bounding_boxes(bounding_boxes): output, format=format, canvas_size=canvas_size, - ).to(dtype) + ) + else: + # We leave the bounding box as float64 so the caller gets the full precision to perform any additional + # operation + dtype = output.dtype - return output + return output.to(dtype=dtype, device=device) return tv_tensors.BoundingBoxes( torch.cat([affine_bounding_boxes(b) for b in bounding_boxes.reshape(-1, 4).unbind()], dim=0).reshape( @@ -2486,3 +2491,230 @@ def test_correctness(self): assert isinstance(out_value, torch.Tensor) and not isinstance(out_value, tv_tensors.TVTensor) else: assert isinstance(out_value, type(input_value)) + + +class TestCrop: + INPUT_SIZE = (21, 11) + + CORRECTNESS_CROP_KWARGS = [ + # center + dict(top=5, left=5, height=10, width=5), + # larger than input, i.e. pad + dict(top=-5, left=-5, height=30, width=20), + # sides: left, right, top, bottom + dict(top=-5, left=-5, height=30, width=10), + dict(top=-5, left=5, height=30, width=10), + dict(top=-5, left=-5, height=20, width=20), + dict(top=5, left=-5, height=20, width=20), + # corners: top-left, top-right, bottom-left, bottom-right + dict(top=-5, left=-5, height=20, width=10), + dict(top=-5, left=5, height=20, width=10), + dict(top=5, left=-5, height=20, width=10), + dict(top=5, left=5, height=20, width=10), + ] + MINIMAL_CROP_KWARGS = CORRECTNESS_CROP_KWARGS[0] + + @pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS) + @pytest.mark.parametrize("dtype", [torch.uint8, torch.float32]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, kwargs, dtype, device): + check_kernel(F.crop_image, make_image(self.INPUT_SIZE, dtype=dtype, device=device), **kwargs) + + @pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_bounding_box(self, kwargs, format, dtype, device): + bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device) + check_kernel(F.crop_bounding_boxes, bounding_boxes, format=format, **kwargs) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + check_kernel(F.crop_mask, make_mask(self.INPUT_SIZE), **self.MINIMAL_CROP_KWARGS) + + def test_kernel_video(self): + check_kernel(F.crop_video, make_video(self.INPUT_SIZE), **self.MINIMAL_CROP_KWARGS) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_functional(self, make_input): + check_functional(F.crop, make_input(self.INPUT_SIZE), **self.MINIMAL_CROP_KWARGS) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.crop_image, torch.Tensor), + (F._crop_image_pil, PIL.Image.Image), + (F.crop_image, tv_tensors.Image), + (F.crop_bounding_boxes, tv_tensors.BoundingBoxes), + (F.crop_mask, tv_tensors.Mask), + (F.crop_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.crop, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS) + def test_functional_image_correctness(self, kwargs): + image = make_image(self.INPUT_SIZE, dtype=torch.uint8, device="cpu") + + actual = F.crop(image, **kwargs) + expected = F.to_image(F.crop(F.to_pil_image(image), **kwargs)) + + assert_equal(actual, expected) + + @param_value_parametrization( + size=[(10, 5), (25, 15), (25, 5), (10, 15)], + fill=EXHAUSTIVE_TYPE_FILLS, + ) + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + def test_transform(self, param, value, make_input): + input = make_input(self.INPUT_SIZE) + + kwargs = {param: value} + if param == "fill": + # 1. size is required + # 2. the fill parameter only has an affect if we need padding + kwargs["size"] = [s + 4 for s in self.INPUT_SIZE] + + if isinstance(input, PIL.Image.Image) and isinstance(value, (tuple, list)) and len(value) == 1: + pytest.xfail("F._pad_image_pil does not support sequences of length 1 for fill.") + + if isinstance(input, tv_tensors.Mask) and isinstance(value, (tuple, list)): + pytest.skip("F.pad_mask doesn't support non-scalar fill.") + + check_transform( + transforms.RandomCrop(**kwargs, pad_if_needed=True), + input, + check_v1_compatibility=param != "fill" or isinstance(value, (int, float)), + ) + + @pytest.mark.parametrize("padding", [1, (1, 1), (1, 1, 1, 1)]) + def test_transform_padding(self, padding): + inpt = make_image(self.INPUT_SIZE) + + output_size = [s + 2 for s in F.get_size(inpt)] + transform = transforms.RandomCrop(output_size, padding=padding) + + output = transform(inpt) + + assert F.get_size(output) == output_size + + @pytest.mark.parametrize("padding", [None, 1, (1, 1), (1, 1, 1, 1)]) + def test_transform_insufficient_padding(self, padding): + inpt = make_image(self.INPUT_SIZE) + + output_size = [s + 3 for s in F.get_size(inpt)] + transform = transforms.RandomCrop(output_size, padding=padding) + + with pytest.raises(ValueError, match="larger than (padded )?input image size"): + transform(inpt) + + def test_transform_pad_if_needed(self): + inpt = make_image(self.INPUT_SIZE) + + output_size = [s * 2 for s in F.get_size(inpt)] + transform = transforms.RandomCrop(output_size, pad_if_needed=True) + + output = transform(inpt) + + assert F.get_size(output) == output_size + + @param_value_parametrization( + size=[(10, 5), (25, 15), (25, 5), (10, 15)], + fill=CORRECTNESS_FILLS, + padding_mode=["constant", "edge", "reflect", "symmetric"], + ) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_image_correctness(self, param, value, seed): + kwargs = {param: value} + if param != "size": + # 1. size is required + # 2. the fill / padding_mode parameters only have an affect if we need padding + kwargs["size"] = [s + 4 for s in self.INPUT_SIZE] + if param == "fill": + kwargs["fill"] = adapt_fill(kwargs["fill"], dtype=torch.uint8) + + transform = transforms.RandomCrop(pad_if_needed=True, **kwargs) + + image = make_image(self.INPUT_SIZE) + + with freeze_rng_state(): + torch.manual_seed(seed) + actual = transform(image) + + torch.manual_seed(seed) + expected = F.to_image(transform(F.to_pil_image(image))) + + assert_equal(actual, expected) + + def _reference_crop_bounding_boxes(self, bounding_boxes, *, top, left, height, width): + affine_matrix = np.array( + [ + [1, 0, -left], + [0, 1, -top], + ], + ) + return reference_affine_bounding_boxes_helper( + bounding_boxes, affine_matrix=affine_matrix, new_canvas_size=(height, width) + ) + + @pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device): + bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format, dtype=dtype, device=device) + + actual = F.crop(bounding_boxes, **kwargs) + expected = self._reference_crop_bounding_boxes(bounding_boxes, **kwargs) + + assert_equal(actual, expected, atol=1, rtol=0) + assert_equal(F.get_size(actual), F.get_size(expected)) + + @pytest.mark.parametrize("output_size", [(17, 11), (11, 17), (11, 11)]) + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_bounding_boxes_correctness(self, output_size, format, dtype, device, seed): + input_size = [s * 2 for s in output_size] + bounding_boxes = make_bounding_boxes(input_size, format=format, dtype=dtype, device=device) + + transform = transforms.RandomCrop(output_size) + + with freeze_rng_state(): + torch.manual_seed(seed) + params = transform._get_params([bounding_boxes]) + assert not params.pop("needs_pad") + del params["padding"] + assert params.pop("needs_crop") + + torch.manual_seed(seed) + actual = transform(bounding_boxes) + + expected = self._reference_crop_bounding_boxes(bounding_boxes, **params) + + assert_equal(actual, expected) + assert_equal(F.get_size(actual), F.get_size(expected)) + + def test_errors(self): + with pytest.raises(ValueError, match="Please provide only two dimensions"): + transforms.RandomCrop([10, 12, 14]) + + with pytest.raises(TypeError, match="Got inappropriate padding arg"): + transforms.RandomCrop([10, 12], padding="abc") + + with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"): + transforms.RandomCrop([10, 12], padding=[-0.7, 0, 0.7]) + + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.RandomCrop([10, 12], padding=1, fill="abc") + + with pytest.raises(ValueError, match="Padding mode should be either"): + transforms.RandomCrop([10, 12], padding=1, padding_mode="abc") diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py index 31d98db7f3d..12c1417d9a7 100644 --- a/test/transforms_v2_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -139,16 +139,6 @@ def fill_sequence_needs_broadcast(args_kwargs): DISPATCHER_INFOS = [ - DispatcherInfo( - F.crop, - kernels={ - tv_tensors.Image: F.crop_image, - tv_tensors.Video: F.crop_video, - tv_tensors.BoundingBoxes: F.crop_bounding_boxes, - tv_tensors.Mask: F.crop_mask, - }, - pil_kernel_info=PILKernelInfo(F._crop_image_pil, kernel_name="crop_image_pil"), - ), DispatcherInfo( F.resized_crop, kernels={ diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index b682c992d26..22eda35397c 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -259,105 +259,6 @@ def reference_inputs_convert_bounding_box_format(): ) -_CROP_PARAMS = combinations_grid(top=[-8, 0, 9], left=[-8, 0, 9], height=[12, 20], width=[12, 20]) - - -def sample_inputs_crop_image_tensor(): - for image_loader, params in itertools.product( - make_image_loaders(sizes=[(16, 17)], color_spaces=["RGB"], dtypes=[torch.float32]), - [ - dict(top=4, left=3, height=7, width=8), - dict(top=-1, left=3, height=7, width=8), - dict(top=4, left=-1, height=7, width=8), - dict(top=4, left=3, height=17, width=8), - dict(top=4, left=3, height=7, width=18), - ], - ): - yield ArgsKwargs(image_loader, **params) - - -def reference_inputs_crop_image_tensor(): - for image_loader, params in itertools.product( - make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]), _CROP_PARAMS - ): - yield ArgsKwargs(image_loader, **params) - - -def sample_inputs_crop_bounding_boxes(): - for bounding_boxes_loader, params in itertools.product( - make_bounding_box_loaders(), [_CROP_PARAMS[0], _CROP_PARAMS[-1]] - ): - yield ArgsKwargs(bounding_boxes_loader, format=bounding_boxes_loader.format, **params) - - -def sample_inputs_crop_mask(): - for mask_loader in make_mask_loaders(sizes=[(16, 17)], num_categories=[10], num_objects=[5]): - yield ArgsKwargs(mask_loader, top=4, left=3, height=7, width=8) - - -def reference_inputs_crop_mask(): - for mask_loader, params in itertools.product(make_mask_loaders(extra_dims=[()], num_objects=[1]), _CROP_PARAMS): - yield ArgsKwargs(mask_loader, **params) - - -def sample_inputs_crop_video(): - for video_loader in make_video_loaders(sizes=[(16, 17)], num_frames=[3]): - yield ArgsKwargs(video_loader, top=4, left=3, height=7, width=8) - - -def reference_crop_bounding_boxes(bounding_boxes, *, format, top, left, height, width): - affine_matrix = np.array( - [ - [1, 0, -left], - [0, 1, -top], - ], - dtype="float64" if bounding_boxes.dtype == torch.float64 else "float32", - ) - - canvas_size = (height, width) - expected_bboxes = reference_affine_bounding_boxes_helper( - bounding_boxes, format=format, canvas_size=canvas_size, affine_matrix=affine_matrix - ) - return expected_bboxes, canvas_size - - -def reference_inputs_crop_bounding_boxes(): - for bounding_boxes_loader, params in itertools.product( - make_bounding_box_loaders(extra_dims=((), (4,))), [_CROP_PARAMS[0], _CROP_PARAMS[-1]] - ): - yield ArgsKwargs(bounding_boxes_loader, format=bounding_boxes_loader.format, **params) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.crop_image, - kernel_name="crop_image_tensor", - sample_inputs_fn=sample_inputs_crop_image_tensor, - reference_fn=pil_reference_wrapper(F._crop_image_pil), - reference_inputs_fn=reference_inputs_crop_image_tensor, - float32_vs_uint8=True, - ), - KernelInfo( - F.crop_bounding_boxes, - sample_inputs_fn=sample_inputs_crop_bounding_boxes, - reference_fn=reference_crop_bounding_boxes, - reference_inputs_fn=reference_inputs_crop_bounding_boxes, - ), - KernelInfo( - F.crop_mask, - sample_inputs_fn=sample_inputs_crop_mask, - reference_fn=pil_reference_wrapper(F._crop_image_pil), - reference_inputs_fn=reference_inputs_crop_mask, - float32_vs_uint8=True, - ), - KernelInfo( - F.crop_video, - sample_inputs_fn=sample_inputs_crop_video, - ), - ] -) - _RESIZED_CROP_PARAMS = combinations_grid(top=[-8, 9], left=[-8, 9], height=[12], width=[12], size=[(16, 18)]) diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py index 7838d7e3eae..8c74f600285 100644 --- a/torchvision/transforms/v2/functional/_geometry.py +++ b/torchvision/transforms/v2/functional/_geometry.py @@ -1165,7 +1165,7 @@ def pad_image( fill: Optional[Union[int, float, List[float]]] = None, padding_mode: str = "constant", ) -> torch.Tensor: - # Be aware that while `padding` has order `[left, top, right, bottom]` has order, `torch_padding` uses + # Be aware that while `padding` has order `[left, top, right, bottom]`, `torch_padding` uses # `[left, right, top, bottom]`. This stems from the fact that we align our API with PIL, but need to use `torch_pad` # internally. torch_padding = _parse_pad_padding(padding) From a2f8f8e9bd40805b6342b998a25e665d748d4fc7 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 30 Aug 2023 17:00:15 +0200 Subject: [PATCH 4/8] port tests for F.erase and transforms.RandomErasing (#7902) --- test/test_transforms_v2.py | 47 --------- test/test_transforms_v2_consistency.py | 15 --- test/test_transforms_v2_refactored.py | 141 +++++++++++++++++++++++++ test/transforms_v2_dispatcher_infos.py | 11 -- test/transforms_v2_kernel_infos.py | 30 ------ 5 files changed, 141 insertions(+), 103 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index c92a8cd7c52..175a3ac161c 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -540,53 +540,6 @@ def test__get_params(self): assert (-alpha / h <= displacement[0, ..., 1]).all() and (displacement[0, ..., 1] <= alpha / h).all() -class TestRandomErasing: - def test_assertions(self): - with pytest.raises(TypeError, match="Argument value should be either a number or str or a sequence"): - transforms.RandomErasing(value={}) - - with pytest.raises(ValueError, match="If value is str, it should be 'random'"): - transforms.RandomErasing(value="abc") - - with pytest.raises(TypeError, match="Scale should be a sequence"): - transforms.RandomErasing(scale=123) - - with pytest.raises(TypeError, match="Ratio should be a sequence"): - transforms.RandomErasing(ratio=123) - - with pytest.raises(ValueError, match="Scale should be between 0 and 1"): - transforms.RandomErasing(scale=[-1, 2]) - - image = make_image((24, 32)) - - transform = transforms.RandomErasing(value=[1, 2, 3, 4]) - - with pytest.raises(ValueError, match="If value is a sequence, it should have either a single value"): - transform._get_params([image]) - - @pytest.mark.parametrize("value", [5.0, [1, 2, 3], "random"]) - def test__get_params(self, value): - image = make_image((24, 32)) - num_channels, height, width = F.get_dimensions(image) - - transform = transforms.RandomErasing(value=value) - params = transform._get_params([image]) - - v = params["v"] - h, w = params["h"], params["w"] - i, j = params["i"], params["j"] - assert isinstance(v, torch.Tensor) - if value == "random": - assert v.shape == (num_channels, h, w) - elif isinstance(value, (int, float)): - assert v.shape == (1, 1, 1) - elif isinstance(value, (list, tuple)): - assert v.shape == (num_channels, 1, 1) - - assert 0 <= i <= height - h - assert 0 <= j <= width - w - - class TestTransform: @pytest.mark.parametrize( "inpt_type", diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index e8d6487e92a..1f96caa247f 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -276,20 +276,6 @@ def __init__( ], closeness_kwargs=dict(rtol=0, atol=21), ), - ConsistencyConfig( - v2_transforms.RandomErasing, - legacy_transforms.RandomErasing, - [ - ArgsKwargs(p=0), - ArgsKwargs(p=1), - ArgsKwargs(p=1, scale=(0.3, 0.7)), - ArgsKwargs(p=1, ratio=(0.5, 1.5)), - ArgsKwargs(p=1, value=1), - ArgsKwargs(p=1, value=(1, 2, 3)), - ArgsKwargs(p=1, value="random"), - ], - supports_pil=False, - ), ConsistencyConfig( v2_transforms.ColorJitter, legacy_transforms.ColorJitter, @@ -550,7 +536,6 @@ def test_call_consistency(config, args_kwargs): ) for transform_cls, get_params_args_kwargs in [ (v2_transforms.RandomResizedCrop, ArgsKwargs(make_image(), scale=[0.3, 0.7], ratio=[0.5, 1.5])), - (v2_transforms.RandomErasing, ArgsKwargs(make_image(), scale=(0.3, 0.7), ratio=(0.5, 1.5))), (v2_transforms.ColorJitter, ArgsKwargs(brightness=None, contrast=None, saturation=None, hue=None)), (v2_transforms.GaussianBlur, ArgsKwargs(0.3, 1.4)), (v2_transforms.RandomPerspective, ArgsKwargs(23, 17, 0.5)), diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index 6492aead369..ad5cd8e00d8 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -2718,3 +2718,144 @@ def test_errors(self): with pytest.raises(ValueError, match="Padding mode should be either"): transforms.RandomCrop([10, 12], padding=1, padding_mode="abc") + + +class TestErase: + INPUT_SIZE = (17, 11) + FUNCTIONAL_KWARGS = dict( + zip("ijhwv", [2, 2, 10, 8, torch.tensor(0.0, dtype=torch.float32, device="cpu").reshape(-1, 1, 1)]) + ) + + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image(self, dtype, device): + check_kernel(F.erase_image, make_image(self.INPUT_SIZE, dtype=dtype, device=device), **self.FUNCTIONAL_KWARGS) + + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image_inplace(self, dtype, device): + input = make_image(self.INPUT_SIZE, dtype=dtype, device=device) + input_version = input._version + + output_out_of_place = F.erase_image(input, **self.FUNCTIONAL_KWARGS) + assert output_out_of_place.data_ptr() != input.data_ptr() + assert output_out_of_place is not input + + output_inplace = F.erase_image(input, **self.FUNCTIONAL_KWARGS, inplace=True) + assert output_inplace.data_ptr() == input.data_ptr() + assert output_inplace._version > input_version + assert output_inplace is input + + assert_equal(output_inplace, output_out_of_place) + + def test_kernel_video(self): + check_kernel(F.erase_video, make_video(self.INPUT_SIZE), **self.FUNCTIONAL_KWARGS) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + def test_functional(self, make_input): + check_functional(F.erase, make_input(), **self.FUNCTIONAL_KWARGS) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.erase_image, torch.Tensor), + (F._erase_image_pil, PIL.Image.Image), + (F.erase_image, tv_tensors.Image), + (F.erase_video, tv_tensors.Video), + ], + ) + def test_functional_signature(self, kernel, input_type): + check_functional_kernel_signature_match(F.erase, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, device): + check_transform(transforms.RandomErasing(p=1), make_input(device=device)) + + def _reference_erase_image(self, image, *, i, j, h, w, v): + mask = torch.zeros_like(image, dtype=torch.bool) + mask[..., i : i + h, j : j + w] = True + + # The broadcasting and type casting logic is handled automagically in the kernel through indexing + value = torch.broadcast_to(v, (*image.shape[:-2], h, w)).to(image) + + erased_image = torch.empty_like(image) + erased_image[mask] = value.flatten() + erased_image[~mask] = image[~mask] + + return erased_image + + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_functional_image_correctness(self, dtype, device): + image = make_image(dtype=dtype, device=device) + + actual = F.erase(image, **self.FUNCTIONAL_KWARGS) + expected = self._reference_erase_image(image, **self.FUNCTIONAL_KWARGS) + + assert_equal(actual, expected) + + @param_value_parametrization( + scale=[(0.1, 0.2), [0.0, 1.0]], + ratio=[(0.3, 0.7), [0.1, 5.0]], + value=[0, 0.5, (0, 1, 0), [-0.2, 0.0, 1.3], "random"], + ) + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_image_correctness(self, param, value, dtype, device, seed): + transform = transforms.RandomErasing(**{param: value}, p=1) + + image = make_image(dtype=dtype, device=device) + + with freeze_rng_state(): + torch.manual_seed(seed) + # This emulates the random apply check that happens before _get_params is called + torch.rand(1) + params = transform._get_params([image]) + + torch.manual_seed(seed) + actual = transform(image) + + expected = self._reference_erase_image(image, **params) + + assert_equal(actual, expected) + + def test_transform_errors(self): + with pytest.raises(TypeError, match="Argument value should be either a number or str or a sequence"): + transforms.RandomErasing(value={}) + + with pytest.raises(ValueError, match="If value is str, it should be 'random'"): + transforms.RandomErasing(value="abc") + + with pytest.raises(TypeError, match="Scale should be a sequence"): + transforms.RandomErasing(scale=123) + + with pytest.raises(TypeError, match="Ratio should be a sequence"): + transforms.RandomErasing(ratio=123) + + with pytest.raises(ValueError, match="Scale should be between 0 and 1"): + transforms.RandomErasing(scale=[-1, 2]) + + transform = transforms.RandomErasing(value=[1, 2, 3, 4]) + + with pytest.raises(ValueError, match="If value is a sequence, it should have either a single value"): + transform._get_params([make_image()]) + + @pytest.mark.parametrize("make_input", [make_bounding_boxes, make_detection_mask]) + def test_transform_passthrough(self, make_input): + transform = transforms.RandomErasing(p=1) + + input = make_input(self.INPUT_SIZE) + + with pytest.warns(UserWarning, match="currently passing through inputs of type"): + # RandomErasing requires an image or video to be present + _, output = transform(make_image(self.INPUT_SIZE), input) + + assert output is input diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py index 12c1417d9a7..6d7ee64d21a 100644 --- a/test/transforms_v2_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -269,17 +269,6 @@ def fill_sequence_needs_broadcast(args_kwargs): }, pil_kernel_info=PILKernelInfo(F._adjust_sharpness_image_pil, kernel_name="adjust_sharpness_image_pil"), ), - DispatcherInfo( - F.erase, - kernels={ - tv_tensors.Image: F.erase_image, - tv_tensors.Video: F.erase_video, - }, - pil_kernel_info=PILKernelInfo(F._erase_image_pil), - test_marks=[ - skip_dispatch_tv_tensor, - ], - ), DispatcherInfo( F.adjust_contrast, kernels={ diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index 22eda35397c..a549bfe72dd 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -1123,36 +1123,6 @@ def sample_inputs_adjust_sharpness_video(): ) -def sample_inputs_erase_image_tensor(): - for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): - # FIXME: make the parameters more diverse - h, w = 6, 7 - v = torch.rand(image_loader.num_channels, h, w) - yield ArgsKwargs(image_loader, i=1, j=2, h=h, w=w, v=v) - - -def sample_inputs_erase_video(): - for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): - # FIXME: make the parameters more diverse - h, w = 6, 7 - v = torch.rand(video_loader.num_channels, h, w) - yield ArgsKwargs(video_loader, i=1, j=2, h=h, w=w, v=v) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.erase_image, - kernel_name="erase_image_tensor", - sample_inputs_fn=sample_inputs_erase_image_tensor, - ), - KernelInfo( - F.erase_video, - sample_inputs_fn=sample_inputs_erase_video, - ), - ] -) - _ADJUST_CONTRAST_FACTORS = [0.1, 0.5] From f1b4c7a6fd65479a096ed6ae44fb5e762af6c0f4 Mon Sep 17 00:00:00 2001 From: vfdev Date: Wed, 30 Aug 2023 18:13:02 +0200 Subject: [PATCH 5/8] Fixed sigma input type for v2.GaussianBlur (#7887) Co-authored-by: Philip Meier Co-authored-by: Nicolas Hug --- test/test_transforms_v2.py | 45 +++----------------------- test/test_transforms_v2_refactored.py | 43 ++++++++++++++++++++++++ torchvision/transforms/v2/_geometry.py | 6 ++-- torchvision/transforms/v2/_misc.py | 15 +++------ torchvision/transforms/v2/_utils.py | 23 +++++++------ 5 files changed, 67 insertions(+), 65 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 175a3ac161c..3f0056e96ab 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -449,37 +449,6 @@ def test__get_params(self, fill, side_range): assert 0 <= params["padding"][3] <= (side_range[1] - 1) * h -class TestGaussianBlur: - def test_assertions(self): - with pytest.raises(ValueError, match="Kernel size should be a tuple/list of two integers"): - transforms.GaussianBlur([10, 12, 14]) - - with pytest.raises(ValueError, match="Kernel size value should be an odd and positive number"): - transforms.GaussianBlur(4) - - with pytest.raises( - TypeError, match="sigma should be a single int or float or a list/tuple with length 2 floats." - ): - transforms.GaussianBlur(3, sigma=[1, 2, 3]) - - with pytest.raises(ValueError, match="If sigma is a single number, it must be positive"): - transforms.GaussianBlur(3, sigma=-1.0) - - with pytest.raises(ValueError, match="sigma values should be positive and of the form"): - transforms.GaussianBlur(3, sigma=[2.0, 1.0]) - - @pytest.mark.parametrize("sigma", [10.0, [10.0, 12.0]]) - def test__get_params(self, sigma): - transform = transforms.GaussianBlur(3, sigma=sigma) - params = transform._get_params([]) - - if isinstance(sigma, float): - assert params["sigma"][0] == params["sigma"][1] == 10 - else: - assert sigma[0] <= params["sigma"][0] <= sigma[1] - assert sigma[0] <= params["sigma"][1] <= sigma[1] - - class TestRandomPerspective: def test_assertions(self): with pytest.raises(ValueError, match="Argument distortion_scale value should be between 0 and 1"): @@ -503,24 +472,18 @@ def test__get_params(self): class TestElasticTransform: def test_assertions(self): - with pytest.raises(TypeError, match="alpha should be float or a sequence of floats"): + with pytest.raises(TypeError, match="alpha should be a number or a sequence of numbers"): transforms.ElasticTransform({}) - with pytest.raises(ValueError, match="alpha is a sequence its length should be one of 2"): + with pytest.raises(ValueError, match="alpha is a sequence its length should be 1 or 2"): transforms.ElasticTransform([1.0, 2.0, 3.0]) - with pytest.raises(ValueError, match="alpha should be a sequence of floats"): - transforms.ElasticTransform([1, 2]) - - with pytest.raises(TypeError, match="sigma should be float or a sequence of floats"): + with pytest.raises(TypeError, match="sigma should be a number or a sequence of numbers"): transforms.ElasticTransform(1.0, {}) - with pytest.raises(ValueError, match="sigma is a sequence its length should be one of 2"): + with pytest.raises(ValueError, match="sigma is a sequence its length should be 1 or 2"): transforms.ElasticTransform(1.0, [1.0, 2.0, 3.0]) - with pytest.raises(ValueError, match="sigma should be a sequence of floats"): - transforms.ElasticTransform(1.0, [1, 2]) - with pytest.raises(TypeError, match="Got inappropriate fill arg"): transforms.ElasticTransform(1.0, 2.0, fill="abc") diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index ad5cd8e00d8..b2e21fc4aca 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -2859,3 +2859,46 @@ def test_transform_passthrough(self, make_input): _, output = transform(make_image(self.INPUT_SIZE), input) assert output is input + + +class TestGaussianBlur: + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize("sigma", [5, (0.5, 2)]) + def test_transform(self, make_input, device, sigma): + check_transform(transforms.GaussianBlur(kernel_size=3, sigma=sigma), make_input(device=device)) + + def test_assertions(self): + with pytest.raises(ValueError, match="Kernel size should be a tuple/list of two integers"): + transforms.GaussianBlur([10, 12, 14]) + + with pytest.raises(ValueError, match="Kernel size value should be an odd and positive number"): + transforms.GaussianBlur(4) + + with pytest.raises(ValueError, match="If sigma is a sequence its length should be 1 or 2. Got 3"): + transforms.GaussianBlur(3, sigma=[1, 2, 3]) + + with pytest.raises(ValueError, match="sigma values should be positive and of the form"): + transforms.GaussianBlur(3, sigma=-1.0) + + with pytest.raises(ValueError, match="sigma values should be positive and of the form"): + transforms.GaussianBlur(3, sigma=[2.0, 1.0]) + + with pytest.raises(TypeError, match="sigma should be a number or a sequence of numbers"): + transforms.GaussianBlur(3, sigma={}) + + @pytest.mark.parametrize("sigma", [10.0, [10.0, 12.0], (10, 12.0), [10]]) + def test__get_params(self, sigma): + transform = transforms.GaussianBlur(3, sigma=sigma) + params = transform._get_params([]) + + if isinstance(sigma, float): + assert params["sigma"][0] == params["sigma"][1] == sigma + elif isinstance(sigma, list) and len(sigma) == 1: + assert params["sigma"][0] == params["sigma"][1] == sigma[0] + else: + assert sigma[0] <= params["sigma"][0] <= sigma[1] + assert sigma[0] <= params["sigma"][1] <= sigma[1] diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py index ba3e690dd4d..721e9b7e452 100644 --- a/torchvision/transforms/v2/_geometry.py +++ b/torchvision/transforms/v2/_geometry.py @@ -21,7 +21,7 @@ _get_fill, _setup_angle, _setup_fill_arg, - _setup_float_or_seq, + _setup_number_or_seq, _setup_size, get_bounding_boxes, has_all, @@ -1060,8 +1060,8 @@ def __init__( fill: Union[_FillType, Dict[Union[Type, str], _FillType]] = 0, ) -> None: super().__init__() - self.alpha = _setup_float_or_seq(alpha, "alpha", 2) - self.sigma = _setup_float_or_seq(sigma, "sigma", 2) + self.alpha = _setup_number_or_seq(alpha, "alpha") + self.sigma = _setup_number_or_seq(sigma, "sigma") self.interpolation = _check_interpolation(interpolation) self.fill = fill diff --git a/torchvision/transforms/v2/_misc.py b/torchvision/transforms/v2/_misc.py index 739f2fb7ff5..67aaf4f3753 100644 --- a/torchvision/transforms/v2/_misc.py +++ b/torchvision/transforms/v2/_misc.py @@ -9,7 +9,7 @@ from torchvision import transforms as _transforms, tv_tensors from torchvision.transforms.v2 import functional as F, Transform -from ._utils import _parse_labels_getter, _setup_float_or_seq, _setup_size, get_bounding_boxes, has_any, is_pure_tensor +from ._utils import _parse_labels_getter, _setup_number_or_seq, _setup_size, get_bounding_boxes, has_any, is_pure_tensor # TODO: do we want/need to expose this? @@ -198,17 +198,10 @@ def __init__( if ks <= 0 or ks % 2 == 0: raise ValueError("Kernel size value should be an odd and positive number.") - if isinstance(sigma, (int, float)): - if sigma <= 0: - raise ValueError("If sigma is a single number, it must be positive.") - sigma = float(sigma) - elif isinstance(sigma, Sequence) and len(sigma) == 2: - if not 0.0 < sigma[0] <= sigma[1]: - raise ValueError("sigma values should be positive and of the form (min, max).") - else: - raise TypeError("sigma should be a single int or float or a list/tuple with length 2 floats.") + self.sigma = _setup_number_or_seq(sigma, "sigma") - self.sigma = _setup_float_or_seq(sigma, "sigma", 2) + if not 0.0 < self.sigma[0] <= self.sigma[1]: + raise ValueError(f"sigma values should be positive and of the form (min, max). Got {self.sigma}") def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]: sigma = torch.empty(1).uniform_(self.sigma[0], self.sigma[1]).item() diff --git a/torchvision/transforms/v2/_utils.py b/torchvision/transforms/v2/_utils.py index d5669f5739f..6147180a986 100644 --- a/torchvision/transforms/v2/_utils.py +++ b/torchvision/transforms/v2/_utils.py @@ -18,20 +18,23 @@ from torchvision.transforms.v2.functional._utils import _FillType, _FillTypeJIT -def _setup_float_or_seq(arg: Union[float, Sequence[float]], name: str, req_size: int = 2) -> Sequence[float]: - if not isinstance(arg, (float, Sequence)): - raise TypeError(f"{name} should be float or a sequence of floats. Got {type(arg)}") - if isinstance(arg, Sequence) and len(arg) != req_size: - raise ValueError(f"If {name} is a sequence its length should be one of {req_size}. Got {len(arg)}") +def _setup_number_or_seq(arg: Union[int, float, Sequence[Union[int, float]]], name: str) -> Sequence[float]: + if not isinstance(arg, (int, float, Sequence)): + raise TypeError(f"{name} should be a number or a sequence of numbers. Got {type(arg)}") + if isinstance(arg, Sequence) and len(arg) not in (1, 2): + raise ValueError(f"If {name} is a sequence its length should be 1 or 2. Got {len(arg)}") if isinstance(arg, Sequence): for element in arg: - if not isinstance(element, float): - raise ValueError(f"{name} should be a sequence of floats. Got {type(element)}") + if not isinstance(element, (int, float)): + raise ValueError(f"{name} should be a sequence of numbers. Got {type(element)}") - if isinstance(arg, float): + if isinstance(arg, (int, float)): arg = [float(arg), float(arg)] - if isinstance(arg, (list, tuple)) and len(arg) == 1: - arg = [arg[0], arg[0]] + elif isinstance(arg, Sequence): + if len(arg) == 1: + arg = [float(arg[0]), float(arg[0])] + else: + arg = [float(arg[0]), float(arg[1])] return arg From d073a19454e5bf089b7aa70937c390a9bebea88e Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 30 Aug 2023 23:03:36 +0200 Subject: [PATCH 6/8] rename BoundingBoxes module to match class name (#7910) --- torchvision/tv_tensors/__init__.py | 2 +- torchvision/tv_tensors/{_bounding_box.py => _bounding_boxes.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename torchvision/tv_tensors/{_bounding_box.py => _bounding_boxes.py} (100%) diff --git a/torchvision/tv_tensors/__init__.py b/torchvision/tv_tensors/__init__.py index cb9bf702fb3..d55e10e8620 100644 --- a/torchvision/tv_tensors/__init__.py +++ b/torchvision/tv_tensors/__init__.py @@ -1,6 +1,6 @@ import torch -from ._bounding_box import BoundingBoxes, BoundingBoxFormat +from ._bounding_boxes import BoundingBoxes, BoundingBoxFormat from ._image import Image from ._mask import Mask from ._torch_function_helpers import set_return_type diff --git a/torchvision/tv_tensors/_bounding_box.py b/torchvision/tv_tensors/_bounding_boxes.py similarity index 100% rename from torchvision/tv_tensors/_bounding_box.py rename to torchvision/tv_tensors/_bounding_boxes.py From 96950a5cff2cf3ebee1b91cd3fbafe086d54c9c5 Mon Sep 17 00:00:00 2001 From: Andrey Talman Date: Wed, 30 Aug 2023 18:00:42 -0400 Subject: [PATCH 7/8] Try to turn off DEBUG mode when building for release (#7914) --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index 482ab914692..ce67413f410 100644 --- a/setup.py +++ b/setup.py @@ -223,6 +223,9 @@ def get_extensions(): extra_compile_args["nvcc"] = [f for f in nvcc_flags if not ("-O" in f or "-g" in f)] extra_compile_args["nvcc"].append("-O0") extra_compile_args["nvcc"].append("-g") + else: + print("Compiling with debug mode OFF") + extra_compile_args["cxx"].append("-g0") sources = [os.path.join(extensions_dir, s) for s in sources] From b8286712ce73f5de986d7683f4b3d9826c686eb5 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 31 Aug 2023 13:05:37 +0200 Subject: [PATCH 8/8] allow sequence fill for v2 AA scripted (#7919) --- test/test_transforms_v2_consistency.py | 21 ++++++++++++--------- torchvision/transforms/v2/_auto_augment.py | 4 ++-- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index 1f96caa247f..1f47eb2117f 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -755,10 +755,11 @@ def test_randaug(self, inpt, interpolation, mocker): v2_transforms.InterpolationMode.BILINEAR, ], ) - def test_randaug_jit(self, interpolation): + @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) + def test_randaug_jit(self, interpolation, fill): inpt = torch.randint(0, 256, size=(1, 3, 256, 256), dtype=torch.uint8) - t_ref = legacy_transforms.RandAugment(interpolation=interpolation, num_ops=1) - t = v2_transforms.RandAugment(interpolation=interpolation, num_ops=1) + t_ref = legacy_transforms.RandAugment(interpolation=interpolation, num_ops=1, fill=fill) + t = v2_transforms.RandAugment(interpolation=interpolation, num_ops=1, fill=fill) tt_ref = torch.jit.script(t_ref) tt = torch.jit.script(t) @@ -830,10 +831,11 @@ def test_trivial_aug(self, inpt, interpolation, mocker): v2_transforms.InterpolationMode.BILINEAR, ], ) - def test_trivial_aug_jit(self, interpolation): + @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) + def test_trivial_aug_jit(self, interpolation, fill): inpt = torch.randint(0, 256, size=(1, 3, 256, 256), dtype=torch.uint8) - t_ref = legacy_transforms.TrivialAugmentWide(interpolation=interpolation) - t = v2_transforms.TrivialAugmentWide(interpolation=interpolation) + t_ref = legacy_transforms.TrivialAugmentWide(interpolation=interpolation, fill=fill) + t = v2_transforms.TrivialAugmentWide(interpolation=interpolation, fill=fill) tt_ref = torch.jit.script(t_ref) tt = torch.jit.script(t) @@ -906,11 +908,12 @@ def test_augmix(self, inpt, interpolation, mocker): v2_transforms.InterpolationMode.BILINEAR, ], ) - def test_augmix_jit(self, interpolation): + @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) + def test_augmix_jit(self, interpolation, fill): inpt = torch.randint(0, 256, size=(1, 3, 256, 256), dtype=torch.uint8) - t_ref = legacy_transforms.AugMix(interpolation=interpolation, mixture_width=1, chain_depth=1) - t = v2_transforms.AugMix(interpolation=interpolation, mixture_width=1, chain_depth=1) + t_ref = legacy_transforms.AugMix(interpolation=interpolation, mixture_width=1, chain_depth=1, fill=fill) + t = v2_transforms.AugMix(interpolation=interpolation, mixture_width=1, chain_depth=1, fill=fill) tt_ref = torch.jit.script(t_ref) tt = torch.jit.script(t) diff --git a/torchvision/transforms/v2/_auto_augment.py b/torchvision/transforms/v2/_auto_augment.py index 4fec62f1b11..8ddd5aacdc3 100644 --- a/torchvision/transforms/v2/_auto_augment.py +++ b/torchvision/transforms/v2/_auto_augment.py @@ -33,8 +33,8 @@ def __init__( def _extract_params_for_v1_transform(self) -> Dict[str, Any]: params = super()._extract_params_for_v1_transform() - if not (params["fill"] is None or isinstance(params["fill"], (int, float))): - raise ValueError(f"{type(self).__name__}() can only be scripted for a scalar `fill`, but got {self.fill}.") + if isinstance(params["fill"], dict): + raise ValueError(f"{type(self).__name__}() can not be scripted for when `fill` is a dictionary.") return params