pydata · fujiisoup · Mar 1, 2021 · Mar 1, 2021 · Mar 2, 2021 · Mar 2, 2021
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -22,6 +22,10 @@ v0.17.1 (unreleased)
 
 New Features
 ~~~~~~~~~~~~
+- Now :py:meth:`DataArray.pad` and :py:meth:`Dataset.pad` accept a tuple of indexes
+  as its arguments. In this case, these values will be used as the newly extended parts
+  of the IndexVariable.
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 
 
 Breaking changes

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -3788,7 +3788,9 @@ def polyfit(
 
     def pad(
         self,
-        pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None,
+        pad_width: Mapping[
+            Hashable, Union[int, Tuple[Union[int, Sequence], Union[int, Sequence]]]
+        ] = None,
         mode: str = "constant",
         stat_length: Union[
             int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]]
@@ -3818,6 +3820,11 @@ def pad(
             Mapping with the form of {dim: (pad_before, pad_after)}
             describing the number of values padded along each dimension.
             {dim: pad} is a shortcut for pad_before = pad_after = pad
+            Note that having np.nan in IndexVariable loses most of the useful
+            functionalities of xarray. To avoid this problem, an iterable,
+            such as a list or np.array, can be used for either pad_before or pad_after.
+            In this case, these values will be used for an IndexVariable and preventing
+            from the loss of functionalities.
         mode : str, default: "constant"
             One of the following string values (taken from numpy docs)
 
@@ -3942,6 +3949,18 @@ def pad(
           * x        (x) float64 nan 0.0 1.0 nan
           * y        (y) int64 10 20 30 40
             z        (x) float64 nan 100.0 200.0 nan
+
+        >>> da.pad(x=([-2, -1], [2]))
+        <xarray.DataArray (x: 5, y: 4)>
+        array([[nan, nan, nan, nan],
+            [nan, nan, nan, nan],
+            [ 0.,  1.,  2.,  3.],
+            [10., 11., 12., 13.],
+            [nan, nan, nan, nan]])
+        Coordinates:
+          * x        (x) int64 -2 -1 0 1 2
+          * y        (y) int64 10 20 30 40
+            z        (x) float64 nan nan 100.0 200.0 nan
         """
         ds = self._to_temp_dataset().pad(
             pad_width=pad_width,

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -6496,7 +6496,9 @@ def polyfit(
 
     def pad(
         self,
-        pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None,
+        pad_width: Mapping[
+            Hashable, Union[int, Tuple[Union[int, Sequence], Union[int, Sequence]]]
+        ] = None,
         mode: str = "constant",
         stat_length: Union[
             int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]]
@@ -6522,10 +6524,15 @@ def pad(
 
         Parameters
         ----------
-        pad_width : mapping of hashable to tuple of int
+        pad_width : mapping of hashable to tuple of int or Iterable.
             Mapping with the form of {dim: (pad_before, pad_after)}
             describing the number of values padded along each dimension.
             {dim: pad} is a shortcut for pad_before = pad_after = pad
+            Note that having np.nan in IndexVariable loses most of the useful
+            functionalities of xarray. To avoid this problem, an iterable,
+            such as a list or np.array, can be used for either pad_before or pad_after.
+            In this case, these values will be used for an IndexVariable and preventing
+            from the loss of functionalities.
         mode : str, default: "constant"
             One of the following string values (taken from numpy docs).
 
@@ -6622,6 +6629,14 @@ def pad(
         Dimensions without coordinates: x
         Data variables:
             foo      (x) float64 nan 0.0 1.0 2.0 3.0 4.0 nan nan
+        >>> ds = xr.Dataset({"foo": ("x", range(3))}, coords={"x": [0, 1, 2]})
+        >>> ds.pad(x=([-1], [3]))
+        <xarray.Dataset>
+        Dimensions:  (x: 5)
+        Coordinates:
+        * x        (x) int64 -1 0 1 2 3
+        Data variables:
+            foo      (x) float64 nan 0.0 1.0 2.0 nan
         """
         pad_width = either_dict_or_kwargs(pad_width, pad_width_kwargs, "pad")
 
@@ -6638,8 +6653,25 @@ def pad(
             coord_pad_options = {}
 
         variables = {}
+
+        # standarize pad_width
+        pad_width_standardized = {}  # type: Dict[Hashable, Tuple[int, int]]
+        for k, v in pad_width.items():
+            if not isinstance(v, int):
+                # if pad_width is a tuple of iterable, we use its length for
+                # pad_width_standardized
+                # mypy does not know the length here and infers Tuple[int, ...]
+                # see https://github.com/python/mypy/issues/7509
+                pad_width_standardized[k] = tuple(  # type: ignore
+                    len(v1) if isinstance(v1, Sequence) else v1 for v1 in v
+                )
+            else:  # just an int
+                pad_width_standardized[k] = (v, v)
+
         for name, var in self.variables.items():
-            var_pad_width = {k: v for k, v in pad_width.items() if k in var.dims}
+            var_pad_width = {
+                k: v for k, v in pad_width_standardized.items() if k in var.dims
+            }
             if not var_pad_width:
                 variables[name] = var
             elif name in self.data_vars:
@@ -6651,6 +6683,20 @@ def pad(
                     end_values=end_values,
                     reflect_type=reflect_type,
                 )
+            elif name in var_pad_width.keys() and not isinstance(
+                var_pad_width[name], int
+            ):  # dimension coordinates
+                w0, w1 = pad_width[name]  # type: ignore
+                fill_value_ind = dtypes.get_fill_value(var.dtype)
+                if isinstance(w0, int):
+                    w0_ = IndexVariable(name, [fill_value_ind] * w0)
+                else:
+                    w0_ = IndexVariable(name, w0)
+                if isinstance(w1, int):
+                    w1_ = IndexVariable(name, [fill_value_ind] * w1)
+                else:
+                    w1_ = IndexVariable(name, w1)
+                variables[name] = var.concat([w0_, var, w1_], dim=name)
             else:
                 variables[name] = var.pad(
                     pad_width=var_pad_width,

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
@@ -5798,6 +5798,23 @@ def test_pad(self):
         np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42)
         np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan)
 
+    def test_pad_index(self):
+        ds = create_test_data(seed=1)
+        padded = ds.pad(dim2=([0, 1, 2], 0), constant_values=42)
+
+        assert padded["dim2"].shape == (12,)
+        assert padded["var1"].shape == (8, 12)
+        assert padded["var2"].shape == (8, 12)
+        assert padded["var3"].shape == (10, 8)
+        assert dict(padded.dims) == {"dim1": 8, "dim2": 12, "dim3": 10, "time": 20}
+        assert np.nan not in padded["dim2"]
+
+        padded = ds.pad(dim2=(0, [0, 1, 2]), constant_values=42)
+        assert np.nan not in padded["dim2"]
+
+        padded = ds.pad(dim2=([0, 1], [0, 1, 2]), constant_values=42)
+        assert np.nan not in padded["dim2"]
+
     def test_astype_attrs(self):
         data = create_test_data(seed=123)
         data.attrs["foo"] = "bar"