From b8cc48bb7964b1461175150e85acb7d73712d399 Mon Sep 17 00:00:00 2001 From: keisuke fujii Date: Mon, 1 Mar 2021 16:43:46 +0900 Subject: [PATCH 01/14] implemented pad with new-indexes --- doc/whats-new.rst | 4 +++ xarray/core/dataarray.py | 19 +++++++++++- xarray/core/dataset.py | 58 ++++++++++++++++++++++++++++++------ xarray/tests/test_dataset.py | 18 +++++++++++ 4 files changed, 89 insertions(+), 10 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index eed4e16eb62..c48224beaaa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,10 @@ v0.17.1 (unreleased) New Features ~~~~~~~~~~~~ +- Now :py:meth:`DataArray.pad` and :py:meth:`Dataset.pad` accept a tuple of indexes + as its arguments. In this case, these values will be used as the newly extended parts + of the IndexVariable. + By `Keisuke Fujii `_. Breaking changes diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index e6209b0604b..2e64600ff47 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3788,7 +3788,7 @@ def polyfit( def pad( self, - pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, + pad_width: Mapping[Hashable, Union[int, Tuple[Union[int, Iterable], Union[int, Iterable]]]] = None, mode: str = "constant", stat_length: Union[ int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] @@ -3818,6 +3818,11 @@ def pad( Mapping with the form of {dim: (pad_before, pad_after)} describing the number of values padded along each dimension. {dim: pad} is a shortcut for pad_before = pad_after = pad + Note that having np.nan in IndexVariable loses most of the useful + functionalities of xarray. To avoid this problem, an iterable, + such as a list or np.array, can be used for either pad_before or pad_after. + In this case, these values will be used for an IndexVariable and preventing + from the loss of functionalities. mode : str, default: "constant" One of the following string values (taken from numpy docs) @@ -3942,6 +3947,18 @@ def pad( * x (x) float64 nan 0.0 1.0 nan * y (y) int64 10 20 30 40 z (x) float64 nan 100.0 200.0 nan + + >>> da.pad(x=([-2, -1], [2])) + + array([[nan, nan, nan, nan], + [nan, nan, nan, nan], + [ 0., 1., 2., 3.], + [10., 11., 12., 13.], + [nan, nan, nan, nan]]) + Coordinates: + * x (x) int64 -2 -1 0 1 2 + * y (y) int64 10 20 30 40 + z (x) float64 nan nan 100.0 200.0 nan """ ds = self._to_temp_dataset().pad( pad_width=pad_width, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 9faf74dd4bc..b0591c90774 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3016,9 +3016,7 @@ def _rename_all(self, name_dict, dims_dict): return variables, coord_names, dims, indexes def rename( - self, - name_dict: Mapping[Hashable, Hashable] = None, - **names: Hashable, + self, name_dict: Mapping[Hashable, Hashable] = None, **names: Hashable, ) -> "Dataset": """Returns a new object with renamed variables and dimensions. @@ -3440,9 +3438,7 @@ def set_index( return self._replace_vars_and_dims(variables, coord_names=coord_names) def reset_index( - self, - dims_or_levels: Union[Hashable, Sequence[Hashable]], - drop: bool = False, + self, dims_or_levels: Union[Hashable, Sequence[Hashable]], drop: bool = False, ) -> "Dataset": """Reset the specified index(es) or multi-index level(s). @@ -6496,7 +6492,9 @@ def polyfit( def pad( self, - pad_width: Mapping[Hashable, Union[int, Tuple[int, int]]] = None, + pad_width: Mapping[ + Hashable, Union[int, Tuple[Union[int, Iterable], Union[int, Iterable]]] + ] = None, mode: str = "constant", stat_length: Union[ int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] @@ -6522,10 +6520,15 @@ def pad( Parameters ---------- - pad_width : mapping of hashable to tuple of int + pad_width : mapping of hashable to tuple of int or Iterable. Mapping with the form of {dim: (pad_before, pad_after)} describing the number of values padded along each dimension. {dim: pad} is a shortcut for pad_before = pad_after = pad + Note that having np.nan in IndexVariable loses most of the useful + functionalities of xarray. To avoid this problem, an iterable, + such as a list or np.array, can be used for either pad_before or pad_after. + In this case, these values will be used for an IndexVariable and preventing + from the loss of functionalities. mode : str, default: "constant" One of the following string values (taken from numpy docs). @@ -6622,6 +6625,14 @@ def pad( Dimensions without coordinates: x Data variables: foo (x) float64 nan 0.0 1.0 2.0 3.0 4.0 nan nan + >>> ds = xr.Dataset({"foo": ("x", range(3))}, coords={"x": [0, 1, 2]}) + >>> ds.pad(x=([-1], [3])) + + Dimensions: (x: 5) + Coordinates: + * x (x) int64 -1 0 1 2 3 + Data variables: + foo (x) float64 nan 0.0 1.0 2.0 nan """ pad_width = either_dict_or_kwargs(pad_width, pad_width_kwargs, "pad") @@ -6638,8 +6649,23 @@ def pad( coord_pad_options = {} variables = {} + + # standarize pad_width + pad_width_standarized = {} + for k, v in pad_width.items(): + if not isinstance(v, int): + # if pad_width is a tuple of iterable, we use its length for + # pad_width_standarized + pad_width_standarized[k] = [ + len(v1) if hasattr(v1, "__len__") else v1 for v1 in v + ] + else: # just an int + pad_width_standarized[k] = [v, v] + for name, var in self.variables.items(): - var_pad_width = {k: v for k, v in pad_width.items() if k in var.dims} + var_pad_width = { + k: v for k, v in pad_width_standarized.items() if k in var.dims + } if not var_pad_width: variables[name] = var elif name in self.data_vars: @@ -6651,6 +6677,20 @@ def pad( end_values=end_values, reflect_type=reflect_type, ) + elif name in var_pad_width.keys() and not isinstance( + var_pad_width[name], int + ): # dimension coordinates + w0, w1 = pad_width[name] + fill_value_ind = dtypes.get_fill_value(var.dtype) + if isinstance(w0, int): + w0 = IndexVariable(name, [fill_value_ind] * w0) + else: + w0 = IndexVariable(name, w0) + if isinstance(w1, int): + w1 = IndexVariable(name, [fill_value_ind] * w1) + else: + w1 = IndexVariable(name, w1) + variables[name] = var.concat([w0, var, w1], dim=name) else: variables[name] = var.pad( pad_width=var_pad_width, diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 2118bc8b780..999cf1c716f 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5798,6 +5798,24 @@ def test_pad(self): np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42) np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan) + def test_pad_index(self): + ds = create_test_data(seed=1) + padded = ds.pad(dim2=([0, 1, 2], 0), constant_values=42) + + assert padded["dim2"].shape == (12,) + assert padded["var1"].shape == (8, 12) + assert padded["var2"].shape == (8, 12) + assert padded["var3"].shape == (10, 8) + assert dict(padded.dims) == {"dim1": 8, "dim2": 12, "dim3": 10, "time": 20} + assert np.nan not in padded["dim2"] + + padded = ds.pad(dim2=(0, [0, 1, 2]), constant_values=42) + assert np.nan not in padded["dim2"] + + padded = ds.pad(dim2=([0, 1], [0, 1, 2]), constant_values=42) + assert np.nan not in padded["dim2"] + + def test_astype_attrs(self): data = create_test_data(seed=123) data.attrs["foo"] = "bar" From f65c4c9bf3b1ad2434346002ccd5e40bddd1d276 Mon Sep 17 00:00:00 2001 From: keisuke fujii Date: Mon, 1 Mar 2021 16:59:05 +0900 Subject: [PATCH 02/14] pre-commit hook --- doc/whats-new.rst | 4 ++-- xarray/core/dataarray.py | 12 +++++++----- xarray/core/dataset.py | 18 +++++++++++------- xarray/tests/test_dataset.py | 1 - 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c48224beaaa..452496163ba 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,8 +22,8 @@ v0.17.1 (unreleased) New Features ~~~~~~~~~~~~ -- Now :py:meth:`DataArray.pad` and :py:meth:`Dataset.pad` accept a tuple of indexes - as its arguments. In this case, these values will be used as the newly extended parts +- Now :py:meth:`DataArray.pad` and :py:meth:`Dataset.pad` accept a tuple of indexes + as its arguments. In this case, these values will be used as the newly extended parts of the IndexVariable. By `Keisuke Fujii `_. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 2e64600ff47..f360ab8f6a4 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3788,7 +3788,9 @@ def polyfit( def pad( self, - pad_width: Mapping[Hashable, Union[int, Tuple[Union[int, Iterable], Union[int, Iterable]]]] = None, + pad_width: Mapping[ + Hashable, Union[int, Tuple[Union[int, Iterable], Union[int, Iterable]]] + ] = None, mode: str = "constant", stat_length: Union[ int, Tuple[int, int], Mapping[Hashable, Tuple[int, int]] @@ -3818,10 +3820,10 @@ def pad( Mapping with the form of {dim: (pad_before, pad_after)} describing the number of values padded along each dimension. {dim: pad} is a shortcut for pad_before = pad_after = pad - Note that having np.nan in IndexVariable loses most of the useful - functionalities of xarray. To avoid this problem, an iterable, + Note that having np.nan in IndexVariable loses most of the useful + functionalities of xarray. To avoid this problem, an iterable, such as a list or np.array, can be used for either pad_before or pad_after. - In this case, these values will be used for an IndexVariable and preventing + In this case, these values will be used for an IndexVariable and preventing from the loss of functionalities. mode : str, default: "constant" One of the following string values (taken from numpy docs) @@ -3948,7 +3950,7 @@ def pad( * y (y) int64 10 20 30 40 z (x) float64 nan 100.0 200.0 nan - >>> da.pad(x=([-2, -1], [2])) + >>> da.pad(x=([-2, -1], [2])) array([[nan, nan, nan, nan], [nan, nan, nan, nan], diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b0591c90774..4bb71790589 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3016,7 +3016,9 @@ def _rename_all(self, name_dict, dims_dict): return variables, coord_names, dims, indexes def rename( - self, name_dict: Mapping[Hashable, Hashable] = None, **names: Hashable, + self, + name_dict: Mapping[Hashable, Hashable] = None, + **names: Hashable, ) -> "Dataset": """Returns a new object with renamed variables and dimensions. @@ -3438,7 +3440,9 @@ def set_index( return self._replace_vars_and_dims(variables, coord_names=coord_names) def reset_index( - self, dims_or_levels: Union[Hashable, Sequence[Hashable]], drop: bool = False, + self, + dims_or_levels: Union[Hashable, Sequence[Hashable]], + drop: bool = False, ) -> "Dataset": """Reset the specified index(es) or multi-index level(s). @@ -6524,10 +6528,10 @@ def pad( Mapping with the form of {dim: (pad_before, pad_after)} describing the number of values padded along each dimension. {dim: pad} is a shortcut for pad_before = pad_after = pad - Note that having np.nan in IndexVariable loses most of the useful - functionalities of xarray. To avoid this problem, an iterable, + Note that having np.nan in IndexVariable loses most of the useful + functionalities of xarray. To avoid this problem, an iterable, such as a list or np.array, can be used for either pad_before or pad_after. - In this case, these values will be used for an IndexVariable and preventing + In this case, these values will be used for an IndexVariable and preventing from the loss of functionalities. mode : str, default: "constant" One of the following string values (taken from numpy docs). @@ -6651,13 +6655,13 @@ def pad( variables = {} # standarize pad_width - pad_width_standarized = {} + pad_width_standarized = {} # type: Mapping[Hashable, Tuple[int, int]] for k, v in pad_width.items(): if not isinstance(v, int): # if pad_width is a tuple of iterable, we use its length for # pad_width_standarized pad_width_standarized[k] = [ - len(v1) if hasattr(v1, "__len__") else v1 for v1 in v + len(v1) if isinstance(v1, Iterable) else v1 for v1 in v ] else: # just an int pad_width_standarized[k] = [v, v] diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 999cf1c716f..2f526a72aac 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5815,7 +5815,6 @@ def test_pad_index(self): padded = ds.pad(dim2=([0, 1], [0, 1, 2]), constant_values=42) assert np.nan not in padded["dim2"] - def test_astype_attrs(self): data = create_test_data(seed=123) data.attrs["foo"] = "bar" From 4a4c514040c5b2c128db0c3d89e3f9cd5fcac4d7 Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Tue, 2 Mar 2021 17:01:49 +0900 Subject: [PATCH 03/14] Update xarray/core/dataset.py Co-authored-by: Mathias Hauser --- xarray/core/dataset.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 4bb71790589..bb5dd01aef5 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6660,9 +6660,11 @@ def pad( if not isinstance(v, int): # if pad_width is a tuple of iterable, we use its length for # pad_width_standarized - pad_width_standarized[k] = [ - len(v1) if isinstance(v1, Iterable) else v1 for v1 in v - ] + # mypy does not know the length here and infers Tuple[int, ...] + # see https://github.com/python/mypy/issues/7509 + pad_width_standardized[k] = tuple( # type: ignore + len(v1) if isinstance(v1, Sequence) else v1 for v1 in v + ) else: # just an int pad_width_standarized[k] = [v, v] From f40ae1d76b1ee4b8fd44c7c0f89666f9a2bcc233 Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Tue, 2 Mar 2021 17:02:01 +0900 Subject: [PATCH 04/14] Update xarray/core/dataarray.py Co-authored-by: Mathias Hauser --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index f360ab8f6a4..046585523b7 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3789,7 +3789,7 @@ def polyfit( def pad( self, pad_width: Mapping[ - Hashable, Union[int, Tuple[Union[int, Iterable], Union[int, Iterable]]] + Hashable, Union[int, Tuple[Union[int, Sequence], Union[int, Sequence]]] ] = None, mode: str = "constant", stat_length: Union[ From ed7cfa2138f38813c291aae59213310a375a762f Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Tue, 2 Mar 2021 17:03:05 +0900 Subject: [PATCH 05/14] Apply suggestions from code review Co-authored-by: Mathias Hauser --- xarray/core/dataset.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index bb5dd01aef5..bbf5387da2f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6497,7 +6497,7 @@ def polyfit( def pad( self, pad_width: Mapping[ - Hashable, Union[int, Tuple[Union[int, Iterable], Union[int, Iterable]]] + Hashable, Union[int, Tuple[Union[int, Sequence], Union[int, Sequence]]] ] = None, mode: str = "constant", stat_length: Union[ @@ -6655,7 +6655,7 @@ def pad( variables = {} # standarize pad_width - pad_width_standarized = {} # type: Mapping[Hashable, Tuple[int, int]] + pad_width_standardized = {} # type: Dict[Hashable, Tuple[int, int]] for k, v in pad_width.items(): if not isinstance(v, int): # if pad_width is a tuple of iterable, we use its length for @@ -6666,7 +6666,7 @@ def pad( len(v1) if isinstance(v1, Sequence) else v1 for v1 in v ) else: # just an int - pad_width_standarized[k] = [v, v] + pad_width_standarized[k] = (v, v) for name, var in self.variables.items(): var_pad_width = { @@ -6686,17 +6686,17 @@ def pad( elif name in var_pad_width.keys() and not isinstance( var_pad_width[name], int ): # dimension coordinates - w0, w1 = pad_width[name] + w0, w1 = pad_width[name] # type: ignore fill_value_ind = dtypes.get_fill_value(var.dtype) if isinstance(w0, int): - w0 = IndexVariable(name, [fill_value_ind] * w0) + w0_ = IndexVariable(name, [fill_value_ind] * w0) else: - w0 = IndexVariable(name, w0) + w0_ = IndexVariable(name, w0) if isinstance(w1, int): - w1 = IndexVariable(name, [fill_value_ind] * w1) + w1_ = IndexVariable(name, [fill_value_ind] * w1) else: - w1 = IndexVariable(name, w1) - variables[name] = var.concat([w0, var, w1], dim=name) + w1_ = IndexVariable(name, w1) + variables[name] = var.concat([w0_, var, w1_], dim=name) else: variables[name] = var.pad( pad_width=var_pad_width, From 36e3e395655488f8612c706831494b72c922d7cf Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Tue, 2 Mar 2021 14:09:20 +0100 Subject: [PATCH 06/14] Apply suggestions from code review --- xarray/core/dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index bbf5387da2f..d7d78628ecf 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6659,18 +6659,18 @@ def pad( for k, v in pad_width.items(): if not isinstance(v, int): # if pad_width is a tuple of iterable, we use its length for - # pad_width_standarized + # pad_width_standardized # mypy does not know the length here and infers Tuple[int, ...] # see https://github.com/python/mypy/issues/7509 pad_width_standardized[k] = tuple( # type: ignore len(v1) if isinstance(v1, Sequence) else v1 for v1 in v ) else: # just an int - pad_width_standarized[k] = (v, v) + pad_width_standardized[k] = (v, v) for name, var in self.variables.items(): var_pad_width = { - k: v for k, v in pad_width_standarized.items() if k in var.dims + k: v for k, v in pad_width_standardized.items() if k in var.dims } if not var_pad_width: variables[name] = var From c49ef3a132deea755c0c2ba14a5437ff2aa249a9 Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Wed, 3 Mar 2021 16:13:57 +0900 Subject: [PATCH 07/14] Apply suggestions from code review Co-authored-by: Deepak Cherian Co-authored-by: Mathias Hauser --- doc/whats-new.rst | 2 +- xarray/core/dataset.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 452496163ba..1999cbc39e2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,7 +23,7 @@ v0.17.1 (unreleased) New Features ~~~~~~~~~~~~ - Now :py:meth:`DataArray.pad` and :py:meth:`Dataset.pad` accept a tuple of indexes - as its arguments. In this case, these values will be used as the newly extended parts + as its arguments. In this case, these values will be used as the newly extended coordinate labels of the IndexVariable. By `Keisuke Fujii `_. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d7d78628ecf..d2140c83641 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6529,9 +6529,9 @@ def pad( describing the number of values padded along each dimension. {dim: pad} is a shortcut for pad_before = pad_after = pad Note that having np.nan in IndexVariable loses most of the useful - functionalities of xarray. To avoid this problem, an iterable, + functionalities of xarray. To avoid this problem, a sequence, such as a list or np.array, can be used for either pad_before or pad_after. - In this case, these values will be used for an IndexVariable and preventing + In this case, these values will be used for an IndexVariable preventing from the loss of functionalities. mode : str, default: "constant" One of the following string values (taken from numpy docs). From 99daf649fbf1d4b5d28d91747347d6686194f1fc Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Wed, 3 Mar 2021 16:15:44 +0900 Subject: [PATCH 08/14] Update xarray/core/dataset.py Co-authored-by: Mathias Hauser --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d2140c83641..4cd2f5f4d11 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6524,7 +6524,7 @@ def pad( Parameters ---------- - pad_width : mapping of hashable to tuple of int or Iterable. + pad_width : mapping of hashable to int or tuple of int or Sequence. Mapping with the form of {dim: (pad_before, pad_after)} describing the number of values padded along each dimension. {dim: pad} is a shortcut for pad_before = pad_after = pad From 07a01b402ff2fe182a5c167846804d0689bb5f38 Mon Sep 17 00:00:00 2001 From: keisuke fujii Date: Wed, 3 Mar 2021 16:31:21 +0900 Subject: [PATCH 09/14] A bug fix; now int can be passed as an argument. --- xarray/core/dataset.py | 2 +- xarray/tests/test_dataset.py | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 4cd2f5f4d11..6db32ae5a8a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6684,7 +6684,7 @@ def pad( reflect_type=reflect_type, ) elif name in var_pad_width.keys() and not isinstance( - var_pad_width[name], int + pad_width[name], int ): # dimension coordinates w0, w1 = pad_width[name] # type: ignore fill_value_ind = dtypes.get_fill_value(var.dtype) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 2f526a72aac..047678383f2 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5787,16 +5787,17 @@ def test_polyfit_warnings(self): def test_pad(self): ds = create_test_data(seed=1) - padded = ds.pad(dim2=(1, 1), constant_values=42) + for width in [(1, 1), 1]: + padded = ds.pad(dim2=width, constant_values=42) - assert padded["dim2"].shape == (11,) - assert padded["var1"].shape == (8, 11) - assert padded["var2"].shape == (8, 11) - assert padded["var3"].shape == (10, 8) - assert dict(padded.dims) == {"dim1": 8, "dim2": 11, "dim3": 10, "time": 20} + assert padded["dim2"].shape == (11,) + assert padded["var1"].shape == (8, 11) + assert padded["var2"].shape == (8, 11) + assert padded["var3"].shape == (10, 8) + assert dict(padded.dims) == {"dim1": 8, "dim2": 11, "dim3": 10, "time": 20} - np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42) - np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan) + np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42) + np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan) def test_pad_index(self): ds = create_test_data(seed=1) From 6662385cd20472ca11c651a7ae605d3ded5bb2c6 Mon Sep 17 00:00:00 2001 From: keisuke fujii Date: Wed, 3 Mar 2021 16:41:23 +0900 Subject: [PATCH 10/14] Added equivalent tests for doctest, but not failing --- xarray/tests/test_dataset.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 047678383f2..d109e08f0ca 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5816,6 +5816,22 @@ def test_pad_index(self): padded = ds.pad(dim2=([0, 1], [0, 1, 2]), constant_values=42) assert np.nan not in padded["dim2"] + padded = ds.pad(dim2=([0, 1], [2]), constant_values=42) + assert np.nan not in padded["dim2"] + + def test_pad_index_doc(self): + ds = xr.Dataset({"foo": ("x", range(3))}, coords={"x": [0, 1, 2]}) + padded = ds.pad(x=([-1], [3])) + assert np.nan not in padded["x"] + + da = xr.DataArray( + [[0, 1, 2, 3], [10, 11, 12, 13]], + dims=["x", "y"], + coords={"x": [0, 1], "y": [10, 20, 30, 40], "z": ("x", [100, 200])}, + ) + padded = da.pad(x=([-2, -1], [2])) + assert np.nan not in padded["x"] + def test_astype_attrs(self): data = create_test_data(seed=123) data.attrs["foo"] = "bar" From c130af44fdc2575e9f80e9ed108c6ae4d398184b Mon Sep 17 00:00:00 2001 From: keisuke fujii Date: Thu, 4 Mar 2021 04:09:52 +0900 Subject: [PATCH 11/14] Fix indent --- xarray/core/dataarray.py | 8 ++++---- xarray/core/dataset.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 046585523b7..47b9415293b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3953,10 +3953,10 @@ def pad( >>> da.pad(x=([-2, -1], [2])) array([[nan, nan, nan, nan], - [nan, nan, nan, nan], - [ 0., 1., 2., 3.], - [10., 11., 12., 13.], - [nan, nan, nan, nan]]) + [nan, nan, nan, nan], + [ 0., 1., 2., 3.], + [10., 11., 12., 13.], + [nan, nan, nan, nan]]) Coordinates: * x (x) int64 -2 -1 0 1 2 * y (y) int64 10 20 30 40 diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6db32ae5a8a..afda9125d69 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6634,7 +6634,7 @@ def pad( Dimensions: (x: 5) Coordinates: - * x (x) int64 -1 0 1 2 3 + * x (x) int64 -1 0 1 2 3 Data variables: foo (x) float64 nan 0.0 1.0 2.0 nan """ From ac7bf7039d931111ce903fbbe0f90c420ccb548c Mon Sep 17 00:00:00 2001 From: keisuke fujii Date: Thu, 4 Mar 2021 04:38:12 +0900 Subject: [PATCH 12/14] doctest --- xarray/core/dataarray.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 47b9415293b..24e8943f45f 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3953,10 +3953,10 @@ def pad( >>> da.pad(x=([-2, -1], [2])) array([[nan, nan, nan, nan], - [nan, nan, nan, nan], - [ 0., 1., 2., 3.], - [10., 11., 12., 13.], - [nan, nan, nan, nan]]) + [nan, nan, nan, nan], + [ 0., 1., 2., 3.], + [10., 11., 12., 13.], + [nan, nan, nan, nan]]) Coordinates: * x (x) int64 -2 -1 0 1 2 * y (y) int64 10 20 30 40 From 9b907697dd47e5272442bdeb07c8a6d379f0802b Mon Sep 17 00:00:00 2001 From: keisuke fujii Date: Sat, 6 Mar 2021 06:34:21 +0900 Subject: [PATCH 13/14] Added Variable.pad_indexes disallow [Sequence, int] as an argument. --- xarray/core/dataarray.py | 8 ++++---- xarray/core/dataset.py | 38 ++++++++++++++++++++---------------- xarray/core/variable.py | 9 +++++++++ xarray/tests/test_dataset.py | 9 +++++++-- 4 files changed, 41 insertions(+), 23 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 24e8943f45f..734fb65cca6 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3789,7 +3789,7 @@ def polyfit( def pad( self, pad_width: Mapping[ - Hashable, Union[int, Tuple[Union[int, Sequence], Union[int, Sequence]]] + Hashable, Union[int, Tuple[int, int], Tuple[Sequence, Sequence]] ] = None, mode: str = "constant", stat_length: Union[ @@ -3821,9 +3821,9 @@ def pad( describing the number of values padded along each dimension. {dim: pad} is a shortcut for pad_before = pad_after = pad Note that having np.nan in IndexVariable loses most of the useful - functionalities of xarray. To avoid this problem, an iterable, - such as a list or np.array, can be used for either pad_before or pad_after. - In this case, these values will be used for an IndexVariable and preventing + functionalities of xarray. To avoid this problem, sequences, + such as lists or np.arrays, can be used for pad_before and pad_after. + In this case, these values will be used for an IndexVariable preventing from the loss of functionalities. mode : str, default: "constant" One of the following string values (taken from numpy docs) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index afda9125d69..0285ab73e28 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6497,7 +6497,8 @@ def polyfit( def pad( self, pad_width: Mapping[ - Hashable, Union[int, Tuple[Union[int, Sequence], Union[int, Sequence]]] + Hashable, + Union[int, Tuple[Union[int], Union[int]], Tuple[Sequence, Sequence]], ] = None, mode: str = "constant", stat_length: Union[ @@ -6529,8 +6530,8 @@ def pad( describing the number of values padded along each dimension. {dim: pad} is a shortcut for pad_before = pad_after = pad Note that having np.nan in IndexVariable loses most of the useful - functionalities of xarray. To avoid this problem, a sequence, - such as a list or np.array, can be used for either pad_before or pad_after. + functionalities of xarray. To avoid this problem, sequences, + such as lists or np.arrays, can be used for pad_before and pad_after. In this case, these values will be used for an IndexVariable preventing from the loss of functionalities. mode : str, default: "constant" @@ -6683,20 +6684,23 @@ def pad( end_values=end_values, reflect_type=reflect_type, ) - elif name in var_pad_width.keys() and not isinstance( - pad_width[name], int - ): # dimension coordinates - w0, w1 = pad_width[name] # type: ignore - fill_value_ind = dtypes.get_fill_value(var.dtype) - if isinstance(w0, int): - w0_ = IndexVariable(name, [fill_value_ind] * w0) - else: - w0_ = IndexVariable(name, w0) - if isinstance(w1, int): - w1_ = IndexVariable(name, [fill_value_ind] * w1) - else: - w1_ = IndexVariable(name, w1) - variables[name] = var.concat([w0_, var, w1_], dim=name) + elif ( + name in var_pad_width.keys() # dimension coordinates + and isinstance(pad_width[name], Sequence) + and ( + isinstance(pad_width[name][0], Sequence) # type: ignore + or isinstance(pad_width[name][1], Sequence) # type: ignore + ) + ): + pad_start, pad_end = pad_width[name] # type: ignore + if isinstance(pad_start, int) or isinstance(pad_end, int): + # do not allow [Sequence, int] as pad_width + raise TypeError( + "({}, {}) is used for pad_width[{}]. Must be either (int, int) or (Sequence, Sequence).".format( + type(pad_start), type(pad_end), name + ) + ) + variables[name] = var.pad_indexes(pad_start=pad_start, pad_end=pad_end) else: variables[name] = var.pad( pad_width=var_pad_width, diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9b70f721689..e5e8557e5d4 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1362,6 +1362,15 @@ def pad( return type(self)(self.dims, array) + def pad_indexes(self, pad_start: Sequence, pad_end: Sequence): + """ + Return a new (Index)Variable with [pad_start, pad_end] padded at the head and tail + of the original array. Used in dataset.pad + """ + start = type(self)(self.dims[0], pad_start) + end = type(self)(self.dims[0], pad_end) + return type(self).concat([start, self, end], dim=self.dims[0]) + def _roll_one_dim(self, dim, count): axis = self.get_axis_num(dim) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d109e08f0ca..52858a27a53 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5801,7 +5801,7 @@ def test_pad(self): def test_pad_index(self): ds = create_test_data(seed=1) - padded = ds.pad(dim2=([0, 1, 2], 0), constant_values=42) + padded = ds.pad(dim2=([0, 1, 2], []), constant_values=42) assert padded["dim2"].shape == (12,) assert padded["var1"].shape == (8, 12) @@ -5810,7 +5810,7 @@ def test_pad_index(self): assert dict(padded.dims) == {"dim1": 8, "dim2": 12, "dim3": 10, "time": 20} assert np.nan not in padded["dim2"] - padded = ds.pad(dim2=(0, [0, 1, 2]), constant_values=42) + padded = ds.pad(dim2=([], [0, 1, 2]), constant_values=42) assert np.nan not in padded["dim2"] padded = ds.pad(dim2=([0, 1], [0, 1, 2]), constant_values=42) @@ -5819,6 +5819,11 @@ def test_pad_index(self): padded = ds.pad(dim2=([0, 1], [2]), constant_values=42) assert np.nan not in padded["dim2"] + def test_pad_index_error(self): + with pytest.raises(TypeError): + ds = create_test_data(seed=1) + ds.pad(dim2=(0, [1, 2])) + def test_pad_index_doc(self): ds = xr.Dataset({"foo": ("x", range(3))}, coords={"x": [0, 1, 2]}) padded = ds.pad(x=([-1], [3])) From 30391c64c809686bfefd3bb878ca66eaf86016a5 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 29 Jun 2021 05:56:32 -0600 Subject: [PATCH 14/14] Update xarray/core/dataarray.py --- xarray/core/dataarray.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index f2a2101b2c8..053f1bf2f87 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3950,6 +3950,8 @@ def pad( * y (y) int64 10 20 30 40 z (x) float64 nan 100.0 200.0 nan + Specify coordinate labels for padded values by passing a tuple of sequences + >>> da.pad(x=([-2, -1], [2])) array([[nan, nan, nan, nan],