Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implemented pad with new-indexes #4974

Closed
wants to merge 16 commits into from
Closed
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ v0.17.1 (unreleased)

New Features
~~~~~~~~~~~~
- Now :py:meth:`DataArray.pad` and :py:meth:`Dataset.pad` accept a tuple of indexes
as its arguments. In this case, these values will be used as the newly extended parts
fujiisoup marked this conversation as resolved.
Show resolved Hide resolved
of the IndexVariable.
By `Keisuke Fujii <https://github.com/fujiisoup>`_.


Breaking changes
Expand Down
21 changes: 20 additions & 1 deletion xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3788,7 +3788,9 @@ def polyfit(

def pad(
self,
pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None,
pad_width: Mapping[
Hashable, Union[int, Tuple[Union[int, Iterable], Union[int, Iterable]]]
fujiisoup marked this conversation as resolved.
Show resolved Hide resolved
] = None,
mode: str = "constant",
stat_length: Union[
int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]]
Expand Down Expand Up @@ -3818,6 +3820,11 @@ def pad(
Mapping with the form of {dim: (pad_before, pad_after)}
describing the number of values padded along each dimension.
{dim: pad} is a shortcut for pad_before = pad_after = pad
Note that having np.nan in IndexVariable loses most of the useful
functionalities of xarray. To avoid this problem, an iterable,
such as a list or np.array, can be used for either pad_before or pad_after.
In this case, these values will be used for an IndexVariable and preventing
from the loss of functionalities.
mode : str, default: "constant"
One of the following string values (taken from numpy docs)

Expand Down Expand Up @@ -3942,6 +3949,18 @@ def pad(
* x (x) float64 nan 0.0 1.0 nan
* y (y) int64 10 20 30 40
z (x) float64 nan 100.0 200.0 nan

>>> da.pad(x=([-2, -1], [2]))
dcherian marked this conversation as resolved.
Show resolved Hide resolved
<xarray.DataArray (x: 5, y: 4)>
array([[nan, nan, nan, nan],
[nan, nan, nan, nan],
[ 0., 1., 2., 3.],
[10., 11., 12., 13.],
[nan, nan, nan, nan]])
Coordinates:
* x (x) int64 -2 -1 0 1 2
* y (y) int64 10 20 30 40
z (x) float64 nan nan 100.0 200.0 nan
"""
ds = self._to_temp_dataset().pad(
pad_width=pad_width,
Expand Down
50 changes: 47 additions & 3 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6496,7 +6496,9 @@ def polyfit(

def pad(
self,
pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None,
pad_width: Mapping[
Hashable, Union[int, Tuple[Union[int, Iterable], Union[int, Iterable]]]
fujiisoup marked this conversation as resolved.
Show resolved Hide resolved
] = None,
mode: str = "constant",
stat_length: Union[
int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]]
Expand All @@ -6522,10 +6524,15 @@ def pad(

Parameters
----------
pad_width : mapping of hashable to tuple of int
pad_width : mapping of hashable to tuple of int or Iterable.
fujiisoup marked this conversation as resolved.
Show resolved Hide resolved
Mapping with the form of {dim: (pad_before, pad_after)}
describing the number of values padded along each dimension.
{dim: pad} is a shortcut for pad_before = pad_after = pad
Note that having np.nan in IndexVariable loses most of the useful
functionalities of xarray. To avoid this problem, an iterable,
fujiisoup marked this conversation as resolved.
Show resolved Hide resolved
such as a list or np.array, can be used for either pad_before or pad_after.
In this case, these values will be used for an IndexVariable and preventing
fujiisoup marked this conversation as resolved.
Show resolved Hide resolved
from the loss of functionalities.
mode : str, default: "constant"
One of the following string values (taken from numpy docs).

Expand Down Expand Up @@ -6622,6 +6629,14 @@ def pad(
Dimensions without coordinates: x
Data variables:
foo (x) float64 nan 0.0 1.0 2.0 3.0 4.0 nan nan
>>> ds = xr.Dataset({"foo": ("x", range(3))}, coords={"x": [0, 1, 2]})
>>> ds.pad(x=([-1], [3]))
<xarray.Dataset>
Dimensions: (x: 5)
Coordinates:
* x (x) int64 -1 0 1 2 3
Data variables:
foo (x) float64 nan 0.0 1.0 2.0 nan
"""
pad_width = either_dict_or_kwargs(pad_width, pad_width_kwargs, "pad")

Expand All @@ -6638,8 +6653,23 @@ def pad(
coord_pad_options = {}

variables = {}

# standarize pad_width
pad_width_standarized = {} # type: Mapping[Hashable, Tuple[int, int]]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needs help for correcting the typing.
Now, pad_width_standarized should be Mapping[Hashable, Tuple[int, int]] instead of Mapping[ Hashable, Union[int, Tuple[Union[int, Iterable], Union[int, Iterable]]] ]
mypy is complaining...

fujiisoup marked this conversation as resolved.
Show resolved Hide resolved
for k, v in pad_width.items():
if not isinstance(v, int):
# if pad_width is a tuple of iterable, we use its length for
# pad_width_standarized
mathause marked this conversation as resolved.
Show resolved Hide resolved
pad_width_standarized[k] = [
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
pad_width_standarized[k] = [
pad_width_standardized[k] = [

len(v1) if isinstance(v1, Iterable) else v1 for v1 in v
]
fujiisoup marked this conversation as resolved.
Show resolved Hide resolved
else: # just an int
pad_width_standarized[k] = [v, v]
fujiisoup marked this conversation as resolved.
Show resolved Hide resolved

for name, var in self.variables.items():
var_pad_width = {k: v for k, v in pad_width.items() if k in var.dims}
var_pad_width = {
k: v for k, v in pad_width_standarized.items() if k in var.dims
mathause marked this conversation as resolved.
Show resolved Hide resolved
}
if not var_pad_width:
variables[name] = var
elif name in self.data_vars:
Expand All @@ -6651,6 +6681,20 @@ def pad(
end_values=end_values,
reflect_type=reflect_type,
)
elif name in var_pad_width.keys() and not isinstance(
var_pad_width[name], int
): # dimension coordinates
w0, w1 = pad_width[name]
fujiisoup marked this conversation as resolved.
Show resolved Hide resolved
fill_value_ind = dtypes.get_fill_value(var.dtype)
if isinstance(w0, int):
w0 = IndexVariable(name, [fill_value_ind] * w0)
fujiisoup marked this conversation as resolved.
Show resolved Hide resolved
else:
w0 = IndexVariable(name, w0)
fujiisoup marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(w1, int):
w1 = IndexVariable(name, [fill_value_ind] * w1)
fujiisoup marked this conversation as resolved.
Show resolved Hide resolved
else:
w1 = IndexVariable(name, w1)
variables[name] = var.concat([w0, var, w1], dim=name)
fujiisoup marked this conversation as resolved.
Show resolved Hide resolved
else:
variables[name] = var.pad(
pad_width=var_pad_width,
Expand Down
17 changes: 17 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5798,6 +5798,23 @@ def test_pad(self):
np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42)
np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan)

def test_pad_index(self):
ds = create_test_data(seed=1)
padded = ds.pad(dim2=([0, 1, 2], 0), constant_values=42)

assert padded["dim2"].shape == (12,)
assert padded["var1"].shape == (8, 12)
assert padded["var2"].shape == (8, 12)
assert padded["var3"].shape == (10, 8)
assert dict(padded.dims) == {"dim1": 8, "dim2": 12, "dim3": 10, "time": 20}
assert np.nan not in padded["dim2"]

padded = ds.pad(dim2=(0, [0, 1, 2]), constant_values=42)
assert np.nan not in padded["dim2"]

padded = ds.pad(dim2=([0, 1], [0, 1, 2]), constant_values=42)
assert np.nan not in padded["dim2"]

def test_astype_attrs(self):
data = create_test_data(seed=123)
data.attrs["foo"] = "bar"
Expand Down