pytorch · datumbox · Jan 26, 2021 · Jan 26, 2021
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
@@ -384,7 +384,9 @@ def scale(*args, **kwargs):
 def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor:
     r"""Pad the given image on all sides with the given "pad" value.
     If the image is torch Tensor, it is expected
-    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+    to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric,
+    at most 3 leading dimensions for mode edge,
+    and an arbitrary number of leading dimensions for mode constant
 
     Args:
         img (PIL Image or Tensor): Image to be padded.
@@ -402,7 +404,8 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con
 
             - constant: pads with a constant value, this value is specified with fill
 
-            - edge: pads with the last value on the edge of the image
+            - edge: pads with the last value on the edge of the image,
+                    if input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2
 
             - reflect: pads with reflection of image (without repeating the last value on the edge)
 

diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
@@ -319,7 +319,9 @@ def __repr__(self):
 class Pad(torch.nn.Module):
     """Pad the given image on all sides with the given "pad" value.
     If the image is torch Tensor, it is expected
-    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+    to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric,
+    at most 3 leading dimensions for mode edge,
+    and an arbitrary number of leading dimensions for mode constant
 
     Args:
         padding (int or sequence): Padding on each border. If a single int is provided this
@@ -337,7 +339,8 @@ class Pad(torch.nn.Module):
 
             - constant: pads with a constant value, this value is specified with fill
 
-            - edge: pads with the last value at the edge of the image
+            - edge: pads with the last value at the edge of the image,
+                    if input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2
 
             - reflect: pads with reflection of image without repeating the last value on the edge
 
@@ -491,7 +494,8 @@ def __call__(self, img):
 class RandomCrop(torch.nn.Module):
     """Crop the given image at a random location.
     If the image is torch Tensor, it is expected
-    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions,
+    but if non-constant padding is used, the input is expected to have at most 2 leading dimensions
 
     Args:
         size (sequence or int): Desired output size of the crop. If size is an