diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b83697a3b20..1d5be3847c4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -65,6 +65,8 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Many error messages related to invalid dimensions or coordinates now always show the list of valid dims/coords (:pull:`8079`). + By `András Gunyhó `_. .. _whats-new.2023.08.0: diff --git a/xarray/core/computation.py b/xarray/core/computation.py index fe89672e392..235b52402f1 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -2046,9 +2046,13 @@ def _calc_idxminmax( raise ValueError("Must supply 'dim' argument for multidimensional arrays") if dim not in array.dims: - raise KeyError(f'Dimension "{dim}" not in dimension') + raise KeyError( + f"Dimension {dim!r} not found in array dimensions {array.dims!r}" + ) if dim not in array.coords: - raise KeyError(f'Dimension "{dim}" does not have coordinates') + raise KeyError( + f"Dimension {dim!r} is not one of the coordinates {tuple(array.coords.keys())}" + ) # These are dtypes with NaN values argmin and argmax can handle na_dtypes = "cfO" diff --git a/xarray/core/concat.py b/xarray/core/concat.py index d7aad8c7188..a76bb6b0033 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -391,17 +391,20 @@ def process_subset_opt(opt, subset): else: raise ValueError(f"unexpected value for {subset}: {opt}") else: - invalid_vars = [k for k in opt if k not in getattr(datasets[0], subset)] + valid_vars = tuple(getattr(datasets[0], subset)) + invalid_vars = [k for k in opt if k not in valid_vars] if invalid_vars: if subset == "coords": raise ValueError( - "some variables in coords are not coordinates on " - f"the first dataset: {invalid_vars}" + f"the variables {invalid_vars} in coords are not " + f"found in the coordinates of the first dataset {valid_vars}" ) else: + # note: data_vars are not listed in the error message here, + # because there may be lots of them raise ValueError( - "some variables in data_vars are not data variables " - f"on the first dataset: {invalid_vars}" + f"the variables {invalid_vars} in data_vars are not " + f"found in the data variables of the first dataset" ) concat_over.update(opt) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 489b6f0d04e..e20c022e637 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -769,7 +769,9 @@ def __delitem__(self, key: Hashable) -> None: if key in self: del self._data[key] else: - raise KeyError(f"{key!r} is not a coordinate variable.") + raise KeyError( + f"{key!r} is not in coordinate variables {tuple(self.keys())}" + ) def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython.""" @@ -855,7 +857,9 @@ def to_dataset(self) -> Dataset: def __delitem__(self, key: Hashable) -> None: if key not in self: - raise KeyError(f"{key!r} is not a coordinate variable.") + raise KeyError( + f"{key!r} is not in coordinate variables {tuple(self.keys())}" + ) assert_no_index_corrupted(self._data.xindexes, {key}) del self._data._coords[key] diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 97f528aea7d..4e5ca3746f0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -213,11 +213,6 @@ def _get_virtual_variable( return ref_name, var_name, virtual_var -def _assert_empty(args: tuple, msg: str = "%s") -> None: - if args: - raise ValueError(msg % args) - - def _get_chunk(var: Variable, chunks, chunkmanager: ChunkManagerEntrypoint): """ Return map from each dim to chunk sizes, accounting for backend's preferred chunks. @@ -2640,7 +2635,7 @@ def chunk( bad_dims = chunks.keys() - self.dims.keys() if bad_dims: raise ValueError( - f"some chunks keys are not dimensions on this object: {bad_dims}" + f"chunks keys {tuple(bad_dims)} not found in data dimensions {tuple(self.dims)}" ) chunkmanager = guess_chunkmanager(chunked_array_type) @@ -4243,8 +4238,8 @@ def rename_dims( for k, v in dims_dict.items(): if k not in self.dims: raise ValueError( - f"cannot rename {k!r} because it is not a " - "dimension in this dataset" + f"cannot rename {k!r} because it is not found " + f"in the dimensions of this dataset {tuple(self.dims)}" ) if v in self.dims or v in self: raise ValueError( @@ -4366,7 +4361,7 @@ def swap_dims( if k not in self.dims: raise ValueError( f"cannot swap from dimension {k!r} because it is " - "not an existing dimension" + f"not one of the dimensions of this dataset {tuple(self.dims)}" ) if v in self.variables and self.variables[v].dims != (k,): raise ValueError( @@ -5448,10 +5443,10 @@ def unstack( else: dims = list(dim) - missing_dims = [d for d in dims if d not in self.dims] + missing_dims = set(dims) - set(self.dims) if missing_dims: raise ValueError( - f"Dataset does not contain the dimensions: {missing_dims}" + f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}" ) # each specified dimension must have exactly one multi-index @@ -5836,7 +5831,10 @@ def drop_indexes( if errors == "raise": invalid_coords = coord_names - self._coord_names if invalid_coords: - raise ValueError(f"those coordinates don't exist: {invalid_coords}") + raise ValueError( + f"The coordinates {tuple(invalid_coords)} are not found in the " + f"dataset coordinates {tuple(self.coords.keys())}" + ) unindexed_coords = set(coord_names) - set(self._indexes) if unindexed_coords: @@ -6084,7 +6082,7 @@ def drop_dims( missing_dims = drop_dims - set(self.dims) if missing_dims: raise ValueError( - f"Dataset does not contain the dimensions: {missing_dims}" + f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}" ) drop_vars = {k for k, v in self._variables.items() if set(v.dims) & drop_dims} @@ -6244,7 +6242,9 @@ def dropna( # depending on the order of the supplied axes. if dim not in self.dims: - raise ValueError(f"{dim} must be a single dataset dimension") + raise ValueError( + f"Dimension {dim!r} not found in data dimensions {tuple(self.dims)}" + ) if subset is None: subset = iter(self.data_vars) @@ -6725,10 +6725,10 @@ def reduce( else: dims = set(dim) - missing_dimensions = [d for d in dims if d not in self.dims] + missing_dimensions = tuple(d for d in dims if d not in self.dims) if missing_dimensions: raise ValueError( - f"Dataset does not contain the dimensions: {missing_dimensions}" + f"Dimensions {missing_dimensions} not found in data dimensions {tuple(self.dims)}" ) if keep_attrs is None: @@ -7710,9 +7710,11 @@ def shift( foo (x) object nan nan 'a' 'b' 'c' """ shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "shift") - invalid = [k for k in shifts if k not in self.dims] + invalid = tuple(k for k in shifts if k not in self.dims) if invalid: - raise ValueError(f"dimensions {invalid!r} do not exist") + raise ValueError( + f"Dimensions {invalid} not found in data dimensions {tuple(self.dims)}" + ) variables = {} for name, var in self.variables.items(): @@ -7789,7 +7791,9 @@ def roll( shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "roll") invalid = [k for k in shifts if k not in self.dims] if invalid: - raise ValueError(f"dimensions {invalid!r} do not exist") + raise ValueError( + f"Dimensions {invalid} not found in data dimensions {tuple(self.dims)}" + ) unrolled_vars: tuple[Hashable, ...] @@ -8038,10 +8042,11 @@ def quantile( else: dims = set(dim) - _assert_empty( - tuple(d for d in dims if d not in self.dims), - "Dataset does not contain the dimensions: %s", - ) + invalid_dims = set(dims) - set(self.dims) + if invalid_dims: + raise ValueError( + f"Dimensions {tuple(invalid_dims)} not found in data dimensions {tuple(self.dims)}" + ) q = np.asarray(q, dtype=np.float64) @@ -8117,7 +8122,9 @@ def rank( ) if dim not in self.dims: - raise ValueError(f"Dataset does not contain the dimension: {dim}") + raise ValueError( + f"Dimension {dim!r} not found in data dimensions {tuple(self.dims)}" + ) variables = {} for name, var in self.variables.items(): @@ -8167,7 +8174,10 @@ def differentiate( from xarray.core.variable import Variable if coord not in self.variables and coord not in self.dims: - raise ValueError(f"Coordinate {coord} does not exist.") + variables_and_dims = tuple(set(self.variables.keys()).union(self.dims)) + raise ValueError( + f"Coordinate {coord!r} not found in variables or dimensions {variables_and_dims}." + ) coord_var = self[coord].variable if coord_var.ndim != 1: @@ -8269,7 +8279,10 @@ def _integrate_one(self, coord, datetime_unit=None, cumulative=False): from xarray.core.variable import Variable if coord not in self.variables and coord not in self.dims: - raise ValueError(f"Coordinate {coord} does not exist.") + variables_and_dims = tuple(set(self.variables.keys()).union(self.dims)) + raise ValueError( + f"Coordinate {coord!r} not found in variables or dimensions {variables_and_dims}." + ) coord_var = self[coord].variable if coord_var.ndim != 1: @@ -9771,7 +9784,9 @@ def drop_duplicates( missing_dims = set(dims) - set(self.dims) if missing_dims: - raise ValueError(f"'{missing_dims}' not found in dimensions") + raise ValueError( + f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}" + ) indexes = {dim: ~self.get_index(dim).duplicated(keep=keep) for dim in dims} return self.isel(indexes) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index b5e396963a1..dffc012c582 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1203,12 +1203,12 @@ def sel(self, labels, method=None, tolerance=None) -> IndexSelResult: coord_name, label = next(iter(labels.items())) if is_dict_like(label): - invalid_levels = [ + invalid_levels = tuple( name for name in label if name not in self.index.names - ] + ) if invalid_levels: raise ValueError( - f"invalid multi-index level names {invalid_levels}" + f"multi-index level names {invalid_levels} not found in indexes {tuple(self.index.names)}" ) return self.sel(label) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 7969ded3102..6e6ce01a41f 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -143,7 +143,10 @@ def group_indexers_by_index( elif key in obj.coords: raise KeyError(f"no index found for coordinate {key!r}") elif key not in obj.dims: - raise KeyError(f"{key!r} is not a valid dimension or coordinate") + raise KeyError( + f"{key!r} is not a valid dimension or coordinate for " + f"{obj.__class__.__name__} with dimensions {obj.dims!r}" + ) elif len(options): raise ValueError( f"cannot supply selection options {options!r} for dimension {key!r}" diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 916fabe42ac..dcd01a0e0f1 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -102,6 +102,14 @@ def __init__( self.center = self._mapping_to_list(center, default=False) self.obj: T_Xarray = obj + missing_dims = tuple(dim for dim in self.dim if dim not in self.obj.dims) + if missing_dims: + # NOTE: we raise KeyError here but ValueError in Coarsen. + raise KeyError( + f"Window dimensions {missing_dims} not found in {self.obj.__class__.__name__} " + f"dimensions {tuple(self.obj.dims)}" + ) + # attributes if min_periods is not None and min_periods <= 0: raise ValueError("min_periods must be greater than zero or None") @@ -624,8 +632,7 @@ def __init__( xarray.DataArray.groupby """ super().__init__(obj, windows, min_periods, center) - if any(d not in self.obj.dims for d in self.dim): - raise KeyError(self.dim) + # Keep each Rolling object as a dictionary self.rollings = {} for key, da in self.obj.data_vars.items(): @@ -839,10 +846,11 @@ def __init__( self.side = side self.boundary = boundary - absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims] - if absent_dims: + missing_dims = tuple(dim for dim in windows.keys() if dim not in self.obj.dims) + if missing_dims: raise ValueError( - f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}." + f"Window dimensions {missing_dims} not found in {self.obj.__class__.__name__} " + f"dimensions {tuple(self.obj.dims)}" ) if not utils.is_dict_like(coord_func): coord_func = {d: coord_func for d in self.obj.dims} # type: ignore[misc] diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 05f9930aacd..c08f0738d6e 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2117,7 +2117,7 @@ def concat( for var in variables: if var.dims != first_var_dims: raise ValueError( - f"Variable has dimensions {list(var.dims)} but first Variable has dimensions {list(first_var_dims)}" + f"Variable has dimensions {tuple(var.dims)} but first Variable has dimensions {tuple(first_var_dims)}" ) return cls(dims, data, attrs, encoding, fastpath=True) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index e21091fad6b..82ffe684ec7 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -198,10 +198,11 @@ def _check_dim(self, dim: Dims): dims = [dim] if dim else [] else: dims = list(dim) - missing_dims = set(dims) - set(self.obj.dims) - set(self.weights.dims) + all_dims = set(self.obj.dims).union(set(self.weights.dims)) + missing_dims = set(dims) - all_dims if missing_dims: raise ValueError( - f"{self.__class__.__name__} does not contain the dimensions: {missing_dims}" + f"Dimensions {tuple(missing_dims)} not found in {self.__class__.__name__} dimensions {tuple(all_dims)}" ) @staticmethod diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py index d58361afdd3..e345ae691ec 100644 --- a/xarray/tests/test_coarsen.py +++ b/xarray/tests/test_coarsen.py @@ -17,7 +17,10 @@ def test_coarsen_absent_dims_error(ds: Dataset) -> None: - with pytest.raises(ValueError, match=r"not found in Dataset."): + with pytest.raises( + ValueError, + match=r"Window dimensions \('foo',\) not found in Dataset dimensions", + ): ds.coarsen(foo=2) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 030f653e031..543b6d33cb9 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -614,9 +614,12 @@ def test_concat_errors(self): with pytest.raises(ValueError, match=r"must supply at least one"): concat([], "dim1") - with pytest.raises(ValueError, match=r"are not coordinates"): + with pytest.raises(ValueError, match=r"are not found in the coordinates"): concat([data, data], "new_dim", coords=["not_found"]) + with pytest.raises(ValueError, match=r"are not found in the data variables"): + concat([data, data], "new_dim", data_vars=["not_found"]) + with pytest.raises(ValueError, match=r"global attributes not"): # call deepcopy seperately to get unique attrs data0 = deepcopy(split_data[0]) diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index 27abc6c0ae2..ef73371dfe4 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -103,6 +103,11 @@ def test_delitem(self) -> None: del coords["x"] assert "x" not in coords + with pytest.raises( + KeyError, match="'nonexistent' is not in coordinate variables" + ): + del coords["nonexistent"] + def test_update(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b4efe4ab2a7..2a28939df41 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1,6 +1,7 @@ from __future__ import annotations import pickle +import re import sys import warnings from collections.abc import Hashable @@ -4886,8 +4887,10 @@ def test_idxmin( else: ar0 = ar0_raw - # dim doesn't exist - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match=r"'spam' not found in array dimensions", + ): ar0.idxmin(dim="spam") # Scalar Dataarray @@ -4999,8 +5002,10 @@ def test_idxmax( else: ar0 = ar0_raw - # dim doesn't exist - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match=r"'spam' not found in array dimensions", + ): ar0.idxmax(dim="spam") # Scalar Dataarray @@ -6954,7 +6959,12 @@ def test_drop_duplicates_1d(self, keep) -> None: result = da.drop_duplicates("time", keep=keep) assert_equal(expected, result) - with pytest.raises(ValueError, match="['space'] not found"): + with pytest.raises( + ValueError, + match=re.escape( + "Dimensions ('space',) not found in data dimensions ('time',)" + ), + ): da.drop_duplicates("space", keep=keep) def test_drop_duplicates_2d(self) -> None: diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 01bdf2cef0c..226e2b6dc78 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1158,7 +1158,12 @@ def get_dask_names(ds): for k, v in new_dask_names.items(): assert v == orig_dask_names[k] - with pytest.raises(ValueError, match=r"some chunks"): + with pytest.raises( + ValueError, + match=re.escape( + "chunks keys ('foo',) not found in data dimensions ('dim2', 'dim3', 'time', 'dim1')" + ), + ): data.chunk({"foo": 10}) @requires_dask @@ -2780,7 +2785,10 @@ def test_drop_indexes(self) -> None: assert type(actual.x.variable) is Variable assert type(actual.y.variable) is Variable - with pytest.raises(ValueError, match="those coordinates don't exist"): + with pytest.raises( + ValueError, + match=r"The coordinates \('not_a_coord',\) are not found in the dataset coordinates", + ): ds.drop_indexes("not_a_coord") with pytest.raises(ValueError, match="those coordinates do not have an index"): @@ -3672,7 +3680,10 @@ def test_unstack(self) -> None: def test_unstack_errors(self) -> None: ds = Dataset({"x": [1, 2, 3]}) - with pytest.raises(ValueError, match=r"does not contain the dimensions"): + with pytest.raises( + ValueError, + match=re.escape("Dimensions ('foo',) not found in data dimensions ('x',)"), + ): ds.unstack("foo") with pytest.raises(ValueError, match=r".*do not have exactly one multi-index"): ds.unstack("x") @@ -4962,7 +4973,10 @@ def test_dropna(self) -> None: expected = ds.isel(a=[1, 3]) assert_identical(actual, ds) - with pytest.raises(ValueError, match=r"a single dataset dimension"): + with pytest.raises( + ValueError, + match=r"'foo' not found in data dimensions \('a', 'b'\)", + ): ds.dropna("foo") with pytest.raises(ValueError, match=r"invalid how"): ds.dropna("a", how="somehow") # type: ignore @@ -5280,7 +5294,10 @@ def test_mean_uint_dtype(self) -> None: def test_reduce_bad_dim(self) -> None: data = create_test_data() - with pytest.raises(ValueError, match=r"Dataset does not contain"): + with pytest.raises( + ValueError, + match=r"Dimensions \('bad_dim',\) not found in data dimensions", + ): data.mean(dim="bad_dim") def test_reduce_cumsum(self) -> None: @@ -5306,7 +5323,10 @@ def test_reduce_cumsum(self) -> None: @pytest.mark.parametrize("func", ["cumsum", "cumprod"]) def test_reduce_cumsum_test_dims(self, reduct, expected, func) -> None: data = create_test_data() - with pytest.raises(ValueError, match=r"Dataset does not contain"): + with pytest.raises( + ValueError, + match=r"Dimensions \('bad_dim',\) not found in data dimensions", + ): getattr(data, func)(dim="bad_dim") # ensure dimensions are correct @@ -5554,7 +5574,12 @@ def test_rank(self) -> None: assert list(z.coords) == list(ds.coords) assert list(x.coords) == list(y.coords) # invalid dim - with pytest.raises(ValueError, match=r"does not contain"): + with pytest.raises( + ValueError, + match=re.escape( + "Dimension 'invalid_dim' not found in data dimensions ('dim3', 'dim1')" + ), + ): x.rank("invalid_dim") def test_rank_use_bottleneck(self) -> None: @@ -7087,7 +7112,12 @@ def test_drop_duplicates_1d(self, keep) -> None: result = ds.drop_duplicates("time", keep=keep) assert_equal(expected, result) - with pytest.raises(ValueError, match="['space'] not found"): + with pytest.raises( + ValueError, + match=re.escape( + "Dimensions ('space',) not found in data dimensions ('time',)" + ), + ): ds.drop_duplicates("space", keep=keep) diff --git a/xarray/tests/test_indexes.py b/xarray/tests/test_indexes.py index 05d748541ed..866c2ef7e85 100644 --- a/xarray/tests/test_indexes.py +++ b/xarray/tests/test_indexes.py @@ -487,7 +487,10 @@ def test_sel(self) -> None: index.sel({"x": 0}) with pytest.raises(ValueError, match=r"cannot provide labels for both.*"): index.sel({"one": 0, "x": "a"}) - with pytest.raises(ValueError, match=r"invalid multi-index level names"): + with pytest.raises( + ValueError, + match=r"multi-index level names \('three',\) not found in indexes", + ): index.sel({"x": {"three": 0}}) with pytest.raises(IndexError): index.sel({"x": (slice(None), 1, "no_level")}) diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py index 73aebc1b1f0..0e3c0874a0a 100644 --- a/xarray/tests/test_rolling.py +++ b/xarray/tests/test_rolling.py @@ -77,6 +77,12 @@ def test_rolling_properties(self, da) -> None: with pytest.raises(ValueError, match="min_periods must be greater than zero"): da.rolling(time=2, min_periods=0) + with pytest.raises( + KeyError, + match=r"\('foo',\) not found in DataArray dimensions", + ): + da.rolling(foo=2) + @pytest.mark.parametrize("name", ("sum", "mean", "std", "min", "max", "median")) @pytest.mark.parametrize("center", (True, False, None)) @pytest.mark.parametrize("min_periods", (1, None)) @@ -540,6 +546,11 @@ def test_rolling_properties(self, ds) -> None: ds.rolling(time=2, min_periods=0) with pytest.raises(KeyError, match="time2"): ds.rolling(time2=2) + with pytest.raises( + KeyError, + match=r"\('foo',\) not found in Dataset dimensions", + ): + ds.rolling(foo=2) @pytest.mark.parametrize( "name", ("sum", "mean", "std", "var", "min", "max", "median") diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index e2530d41fbe..628d6310945 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -782,9 +782,12 @@ def test_weighted_bad_dim(operation, as_dataset): if operation == "quantile": kwargs["q"] = 0.5 - error_msg = ( - f"{data.__class__.__name__}Weighted" - " does not contain the dimensions: {'bad_dim'}" - ) - with pytest.raises(ValueError, match=error_msg): + with pytest.raises( + ValueError, + match=( + f"Dimensions \\('bad_dim',\\) not found in {data.__class__.__name__}Weighted " + # the order of (dim_0, dim_1) varies + "dimensions \\(('dim_0', 'dim_1'|'dim_1', 'dim_0')\\)" + ), + ): getattr(data.weighted(weights), operation)(**kwargs)