diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 83f59d9eea4..87954275821 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -44,6 +44,8 @@ New functions/methods Enhancements ~~~~~~~~~~~~ +- Added ``join='override'``. This only checks that index sizes are equal among objects and skips + checking indexes for equality. By `Deepak Cherian `_. - :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg. It is passed down to :py:func:`~xarray.align`. By `Deepak Cherian `_. - In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 8d5f7f05a9f..a2e6164e070 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -813,7 +813,7 @@ def open_mfdataset( parallel : bool, optional If True, the open and preprocess steps of this function will be performed in parallel using ``dask.delayed``. Default is False. - join : {'outer', 'inner', 'left', 'right', 'exact'}, optional + join : {'outer', 'inner', 'left', 'right', 'exact, 'override'}, optional String indicating how to combine differing indexes (excluding concat_dim) in objects @@ -823,6 +823,9 @@ def open_mfdataset( - 'right': use indexes from the last object with each dimension - 'exact': instead of aligning, raise `ValueError` when indexes to be aligned are not equal + - 'override': if indexes are of same size, rewrite indexes to be + those of the first object with that dimension. Indexes for the same + dimension must have the same size in all objects. **kwargs : optional Additional arguments passed on to :py:func:`xarray.open_dataset`. diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 56f060fd713..bb44f48fb9b 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -31,10 +31,34 @@ def _get_joiner(join): # We cannot return a function to "align" in this case, because it needs # access to the dimension name to give a good error message. return None + elif join == "override": + # We rewrite all indexes and then use join='left' + return operator.itemgetter(0) else: raise ValueError("invalid value for join: %s" % join) +def _override_indexes(objects, all_indexes, exclude): + for dim, dim_indexes in all_indexes.items(): + if dim not in exclude: + lengths = {index.size for index in dim_indexes} + if len(lengths) != 1: + raise ValueError( + "Indexes along dimension %r don't have the same length." + " Cannot use join='override'." % dim + ) + + objects = list(objects) + for idx, obj in enumerate(objects[1:]): + new_indexes = dict() + for dim in obj.dims: + if dim not in exclude: + new_indexes[dim] = all_indexes[dim][0] + objects[idx + 1] = obj._overwrite_indexes(new_indexes) + + return objects + + def align( *objects, join="inner", @@ -57,7 +81,7 @@ def align( ---------- *objects : Dataset or DataArray Objects to align. - join : {'outer', 'inner', 'left', 'right', 'exact'}, optional + join : {'outer', 'inner', 'left', 'right', 'exact', 'override'}, optional Method for joining the indexes of the passed objects along each dimension: @@ -67,6 +91,9 @@ def align( - 'right': use indexes from the last object with each dimension - 'exact': instead of aligning, raise `ValueError` when indexes to be aligned are not equal + - 'override': if indexes are of same size, rewrite indexes to be + those of the first object with that dimension. Indexes for the same + dimension must have the same size in all objects. copy : bool, optional If ``copy=True``, data in the return values is always copied. If ``copy=False`` and reindexing is unnecessary, or can be performed with @@ -111,6 +138,9 @@ def align( else: all_indexes[dim].append(index) + if join == "override": + objects = _override_indexes(list(objects), all_indexes, exclude) + # We don't reindex over dimensions with all equal indexes for two reasons: # - It's faster for the usual case (already aligned objects). # - It ensures it's possible to do operations that don't require alignment diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 740cb68c862..3aae12c3b66 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -379,6 +379,9 @@ def combine_nested( - 'right': use indexes from the last object with each dimension - 'exact': instead of aligning, raise `ValueError` when indexes to be aligned are not equal + - 'override': if indexes are of same size, rewrite indexes to be + those of the first object with that dimension. Indexes for the same + dimension must have the same size in all objects. Returns ------- @@ -529,6 +532,9 @@ def combine_by_coords( - 'right': use indexes from the last object with each dimension - 'exact': instead of aligning, raise `ValueError` when indexes to be aligned are not equal + - 'override': if indexes are of same size, rewrite indexes to be + those of the first object with that dimension. Indexes for the same + dimension must have the same size in all objects. Returns ------- @@ -688,6 +694,9 @@ def auto_combine( - 'right': use indexes from the last object with each dimension - 'exact': instead of aligning, raise `ValueError` when indexes to be aligned are not equal + - 'override': if indexes are of same size, rewrite indexes to be + those of the first object with that dimension. Indexes for the same + dimension must have the same size in all objects. Returns ------- diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 19609308e78..014b615f2a7 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -85,6 +85,9 @@ def concat( - 'right': use indexes from the last object with each dimension - 'exact': instead of aligning, raise `ValueError` when indexes to be aligned are not equal + - 'override': if indexes are of same size, rewrite indexes to be + those of the first object with that dimension. Indexes for the same + dimension must have the same size in all objects. indexers, mode, concat_over : deprecated diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 33be8d96e91..40ee9b7992f 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -375,7 +375,7 @@ def _replace_maybe_drop_dims( ) return self._replace(variable, coords, name) - def _replace_indexes(self, indexes: Mapping[Hashable, Any]) -> "DataArray": + def _overwrite_indexes(self, indexes: Mapping[Hashable, Any]) -> "DataArray": if not len(indexes): return self coords = self._coords.copy() diff --git a/xarray/core/merge.py b/xarray/core/merge.py index b8d9e1a795c..882667dbaaa 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -540,6 +540,9 @@ def merge(objects, compat="no_conflicts", join="outer", fill_value=dtypes.NA): - 'right': use indexes from the last object with each dimension - 'exact': instead of aligning, raise `ValueError` when indexes to be aligned are not equal + - 'override': if indexes are of same size, rewrite indexes to be + those of the first object with that dimension. Indexes for the same + dimension must have the same size in all objects. fill_value : scalar, optional Value to use for newly missing values diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index d16ebeeb53d..153588dad7b 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -190,6 +190,10 @@ def test_concat_join_kwarg(self): {"a": (("x", "y"), np.array([np.nan, 0], ndmin=2).T)}, coords={"x": [0, 1], "y": [0.0001]}, ) + expected["override"] = Dataset( + {"a": (("x", "y"), np.array([0, 0], ndmin=2).T)}, + coords={"x": [0, 1], "y": [0]}, + ) with raises_regex(ValueError, "indexes along dimension 'y'"): actual = concat([ds1, ds2], join="exact", dim="x") @@ -399,6 +403,10 @@ def test_concat_join_kwarg(self): {"a": (("x", "y"), np.array([np.nan, 0], ndmin=2).T)}, coords={"x": [0, 1], "y": [0.0001]}, ) + expected["override"] = Dataset( + {"a": (("x", "y"), np.array([0, 0], ndmin=2).T)}, + coords={"x": [0, 1], "y": [0]}, + ) with raises_regex(ValueError, "indexes along dimension 'y'"): actual = concat([ds1, ds2], join="exact", dim="x") diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 8b63b650dc2..3872f0d23e2 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3113,6 +3113,56 @@ def test_align_copy(self): assert_identical(x, x2) assert source_ndarray(x2.data) is not source_ndarray(x.data) + def test_align_override(self): + left = DataArray([1, 2, 3], dims="x", coords={"x": [0, 1, 2]}) + right = DataArray( + np.arange(9).reshape((3, 3)), + dims=["x", "y"], + coords={"x": [0.1, 1.1, 2.1], "y": [1, 2, 3]}, + ) + + expected_right = DataArray( + np.arange(9).reshape(3, 3), + dims=["x", "y"], + coords={"x": [0, 1, 2], "y": [1, 2, 3]}, + ) + + new_left, new_right = align(left, right, join="override") + assert_identical(left, new_left) + assert_identical(new_right, expected_right) + + new_left, new_right = align(left, right, exclude="x", join="override") + assert_identical(left, new_left) + assert_identical(right, new_right) + + new_left, new_right = xr.align( + left.isel(x=0, drop=True), right, exclude="x", join="override" + ) + assert_identical(left.isel(x=0, drop=True), new_left) + assert_identical(right, new_right) + + with raises_regex(ValueError, "Indexes along dimension 'x' don't have"): + align(left.isel(x=0).expand_dims("x"), right, join="override") + + @pytest.mark.parametrize( + "darrays", + [ + [ + DataArray(0), + DataArray([1], [("x", [1])]), + DataArray([2, 3], [("x", [2, 3])]), + ], + [ + DataArray([2, 3], [("x", [2, 3])]), + DataArray([1], [("x", [1])]), + DataArray(0), + ], + ], + ) + def test_align_override_error(self, darrays): + with raises_regex(ValueError, "Indexes along dimension 'x' don't have"): + xr.align(*darrays, join="override") + def test_align_exclude(self): x = DataArray([[1, 2], [3, 4]], coords=[("a", [-1, -2]), ("b", [3, 4])]) y = DataArray([[1, 2], [3, 4]], coords=[("a", [-1, 20]), ("b", [5, 6])]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 75325a77b36..a072383c72d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1898,6 +1898,28 @@ def test_align_exact(self): with raises_regex(ValueError, "indexes .* not equal"): xr.align(left, right, join="exact") + def test_align_override(self): + left = xr.Dataset(coords={"x": [0, 1, 2]}) + right = xr.Dataset(coords={"x": [0.1, 1.1, 2.1], "y": [1, 2, 3]}) + expected_right = xr.Dataset(coords={"x": [0, 1, 2], "y": [1, 2, 3]}) + + new_left, new_right = xr.align(left, right, join="override") + assert_identical(left, new_left) + assert_identical(new_right, expected_right) + + new_left, new_right = xr.align(left, right, exclude="x", join="override") + assert_identical(left, new_left) + assert_identical(right, new_right) + + new_left, new_right = xr.align( + left.isel(x=0, drop=True), right, exclude="x", join="override" + ) + assert_identical(left.isel(x=0, drop=True), new_left) + assert_identical(right, new_right) + + with raises_regex(ValueError, "Indexes along dimension 'x' don't have"): + xr.align(left.isel(x=0).expand_dims("x"), right, join="override") + def test_align_exclude(self): x = Dataset( {