Skip to content

Commit

Permalink
Add join='override' (#3175)
Browse files Browse the repository at this point in the history
* Add join='override'

* Add coords='skip_nondim'

* Revert "Add coords='skip_nondim'"

This reverts commit 8263d38.

* black

* black2

* join='override' concat tests.

* Add whats-new.rst

* Improve error message.

* da error message.

* Refactor + fix edge cases.

* Add da test.

* more darray tests.

* Update docstrings.

* Address review comments.
  • Loading branch information
dcherian authored Aug 16, 2019
1 parent e678ec9 commit 7866587
Show file tree
Hide file tree
Showing 10 changed files with 133 additions and 3 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ New functions/methods
Enhancements
~~~~~~~~~~~~

- Added ``join='override'``. This only checks that index sizes are equal among objects and skips
checking indexes for equality. By `Deepak Cherian <https://github.com/dcherian>`_.
- :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg.
It is passed down to :py:func:`~xarray.align`. By `Deepak Cherian <https://github.com/dcherian>`_.
- In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if
Expand Down
5 changes: 4 additions & 1 deletion xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,7 @@ def open_mfdataset(
parallel : bool, optional
If True, the open and preprocess steps of this function will be
performed in parallel using ``dask.delayed``. Default is False.
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
join : {'outer', 'inner', 'left', 'right', 'exact, 'override'}, optional
String indicating how to combine differing indexes
(excluding concat_dim) in objects
Expand All @@ -823,6 +823,9 @@ def open_mfdataset(
- 'right': use indexes from the last object with each dimension
- 'exact': instead of aligning, raise `ValueError` when indexes to be
aligned are not equal
- 'override': if indexes are of same size, rewrite indexes to be
those of the first object with that dimension. Indexes for the same
dimension must have the same size in all objects.
**kwargs : optional
Additional arguments passed on to :py:func:`xarray.open_dataset`.
Expand Down
32 changes: 31 additions & 1 deletion xarray/core/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,34 @@ def _get_joiner(join):
# We cannot return a function to "align" in this case, because it needs
# access to the dimension name to give a good error message.
return None
elif join == "override":
# We rewrite all indexes and then use join='left'
return operator.itemgetter(0)
else:
raise ValueError("invalid value for join: %s" % join)


def _override_indexes(objects, all_indexes, exclude):
for dim, dim_indexes in all_indexes.items():
if dim not in exclude:
lengths = {index.size for index in dim_indexes}
if len(lengths) != 1:
raise ValueError(
"Indexes along dimension %r don't have the same length."
" Cannot use join='override'." % dim
)

objects = list(objects)
for idx, obj in enumerate(objects[1:]):
new_indexes = dict()
for dim in obj.dims:
if dim not in exclude:
new_indexes[dim] = all_indexes[dim][0]
objects[idx + 1] = obj._overwrite_indexes(new_indexes)

return objects


def align(
*objects,
join="inner",
Expand All @@ -57,7 +81,7 @@ def align(
----------
*objects : Dataset or DataArray
Objects to align.
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
join : {'outer', 'inner', 'left', 'right', 'exact', 'override'}, optional
Method for joining the indexes of the passed objects along each
dimension:
Expand All @@ -67,6 +91,9 @@ def align(
- 'right': use indexes from the last object with each dimension
- 'exact': instead of aligning, raise `ValueError` when indexes to be
aligned are not equal
- 'override': if indexes are of same size, rewrite indexes to be
those of the first object with that dimension. Indexes for the same
dimension must have the same size in all objects.
copy : bool, optional
If ``copy=True``, data in the return values is always copied. If
``copy=False`` and reindexing is unnecessary, or can be performed with
Expand Down Expand Up @@ -111,6 +138,9 @@ def align(
else:
all_indexes[dim].append(index)

if join == "override":
objects = _override_indexes(list(objects), all_indexes, exclude)

# We don't reindex over dimensions with all equal indexes for two reasons:
# - It's faster for the usual case (already aligned objects).
# - It ensures it's possible to do operations that don't require alignment
Expand Down
9 changes: 9 additions & 0 deletions xarray/core/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,9 @@ def combine_nested(
- 'right': use indexes from the last object with each dimension
- 'exact': instead of aligning, raise `ValueError` when indexes to be
aligned are not equal
- 'override': if indexes are of same size, rewrite indexes to be
those of the first object with that dimension. Indexes for the same
dimension must have the same size in all objects.
Returns
-------
Expand Down Expand Up @@ -529,6 +532,9 @@ def combine_by_coords(
- 'right': use indexes from the last object with each dimension
- 'exact': instead of aligning, raise `ValueError` when indexes to be
aligned are not equal
- 'override': if indexes are of same size, rewrite indexes to be
those of the first object with that dimension. Indexes for the same
dimension must have the same size in all objects.
Returns
-------
Expand Down Expand Up @@ -688,6 +694,9 @@ def auto_combine(
- 'right': use indexes from the last object with each dimension
- 'exact': instead of aligning, raise `ValueError` when indexes to be
aligned are not equal
- 'override': if indexes are of same size, rewrite indexes to be
those of the first object with that dimension. Indexes for the same
dimension must have the same size in all objects.
Returns
-------
Expand Down
3 changes: 3 additions & 0 deletions xarray/core/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ def concat(
- 'right': use indexes from the last object with each dimension
- 'exact': instead of aligning, raise `ValueError` when indexes to be
aligned are not equal
- 'override': if indexes are of same size, rewrite indexes to be
those of the first object with that dimension. Indexes for the same
dimension must have the same size in all objects.
indexers, mode, concat_over : deprecated
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ def _replace_maybe_drop_dims(
)
return self._replace(variable, coords, name)

def _replace_indexes(self, indexes: Mapping[Hashable, Any]) -> "DataArray":
def _overwrite_indexes(self, indexes: Mapping[Hashable, Any]) -> "DataArray":
if not len(indexes):
return self
coords = self._coords.copy()
Expand Down
3 changes: 3 additions & 0 deletions xarray/core/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,9 @@ def merge(objects, compat="no_conflicts", join="outer", fill_value=dtypes.NA):
- 'right': use indexes from the last object with each dimension
- 'exact': instead of aligning, raise `ValueError` when indexes to be
aligned are not equal
- 'override': if indexes are of same size, rewrite indexes to be
those of the first object with that dimension. Indexes for the same
dimension must have the same size in all objects.
fill_value : scalar, optional
Value to use for newly missing values
Expand Down
8 changes: 8 additions & 0 deletions xarray/tests/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ def test_concat_join_kwarg(self):
{"a": (("x", "y"), np.array([np.nan, 0], ndmin=2).T)},
coords={"x": [0, 1], "y": [0.0001]},
)
expected["override"] = Dataset(
{"a": (("x", "y"), np.array([0, 0], ndmin=2).T)},
coords={"x": [0, 1], "y": [0]},
)

with raises_regex(ValueError, "indexes along dimension 'y'"):
actual = concat([ds1, ds2], join="exact", dim="x")
Expand Down Expand Up @@ -396,6 +400,10 @@ def test_concat_join_kwarg(self):
{"a": (("x", "y"), np.array([np.nan, 0], ndmin=2).T)},
coords={"x": [0, 1], "y": [0.0001]},
)
expected["override"] = Dataset(
{"a": (("x", "y"), np.array([0, 0], ndmin=2).T)},
coords={"x": [0, 1], "y": [0]},
)

with raises_regex(ValueError, "indexes along dimension 'y'"):
actual = concat([ds1, ds2], join="exact", dim="x")
Expand Down
50 changes: 50 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3144,6 +3144,56 @@ def test_align_copy(self):
assert_identical(x, x2)
assert source_ndarray(x2.data) is not source_ndarray(x.data)

def test_align_override(self):
left = DataArray([1, 2, 3], dims="x", coords={"x": [0, 1, 2]})
right = DataArray(
np.arange(9).reshape((3, 3)),
dims=["x", "y"],
coords={"x": [0.1, 1.1, 2.1], "y": [1, 2, 3]},
)

expected_right = DataArray(
np.arange(9).reshape(3, 3),
dims=["x", "y"],
coords={"x": [0, 1, 2], "y": [1, 2, 3]},
)

new_left, new_right = align(left, right, join="override")
assert_identical(left, new_left)
assert_identical(new_right, expected_right)

new_left, new_right = align(left, right, exclude="x", join="override")
assert_identical(left, new_left)
assert_identical(right, new_right)

new_left, new_right = xr.align(
left.isel(x=0, drop=True), right, exclude="x", join="override"
)
assert_identical(left.isel(x=0, drop=True), new_left)
assert_identical(right, new_right)

with raises_regex(ValueError, "Indexes along dimension 'x' don't have"):
align(left.isel(x=0).expand_dims("x"), right, join="override")

@pytest.mark.parametrize(
"darrays",
[
[
DataArray(0),
DataArray([1], [("x", [1])]),
DataArray([2, 3], [("x", [2, 3])]),
],
[
DataArray([2, 3], [("x", [2, 3])]),
DataArray([1], [("x", [1])]),
DataArray(0),
],
],
)
def test_align_override_error(self, darrays):
with raises_regex(ValueError, "Indexes along dimension 'x' don't have"):
xr.align(*darrays, join="override")

def test_align_exclude(self):
x = DataArray([[1, 2], [3, 4]], coords=[("a", [-1, -2]), ("b", [3, 4])])
y = DataArray([[1, 2], [3, 4]], coords=[("a", [-1, 20]), ("b", [5, 6])])
Expand Down
22 changes: 22 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1921,6 +1921,28 @@ def test_align_exact(self):
with raises_regex(ValueError, "indexes .* not equal"):
xr.align(left, right, join="exact")

def test_align_override(self):
left = xr.Dataset(coords={"x": [0, 1, 2]})
right = xr.Dataset(coords={"x": [0.1, 1.1, 2.1], "y": [1, 2, 3]})
expected_right = xr.Dataset(coords={"x": [0, 1, 2], "y": [1, 2, 3]})

new_left, new_right = xr.align(left, right, join="override")
assert_identical(left, new_left)
assert_identical(new_right, expected_right)

new_left, new_right = xr.align(left, right, exclude="x", join="override")
assert_identical(left, new_left)
assert_identical(right, new_right)

new_left, new_right = xr.align(
left.isel(x=0, drop=True), right, exclude="x", join="override"
)
assert_identical(left.isel(x=0, drop=True), new_left)
assert_identical(right, new_right)

with raises_regex(ValueError, "Indexes along dimension 'x' don't have"):
xr.align(left.isel(x=0).expand_dims("x"), right, join="override")

def test_align_exclude(self):
x = Dataset(
{
Expand Down

0 comments on commit 7866587

Please sign in to comment.