diff --git a/doc/api.rst b/doc/api.rst index 413bd703a5d..03e7f6766ea 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -87,6 +87,8 @@ Dataset contents Dataset.pipe Dataset.merge Dataset.rename + Dataset.rename_vars + Dataset.rename_dims Dataset.swap_dims Dataset.expand_dims Dataset.drop diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 56f2446627a..034461e0ded 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,6 +21,12 @@ v0.12.3 (unreleased) Enhancements ~~~~~~~~~~~~ +- Renaming variables and dimensions independently: + Datasets with coordinate dimensions can now have only their dimension + (using rename_dim) or only their coordinate (using rename_vars) renamed + instead of the rename function applyingto both. (:issue:`3026`) + By `Julia Kent `_. + Bug fixes ~~~~~~~~~ @@ -102,6 +108,8 @@ Enhancements to existing functionality accept a keyword argument ``restore_coord_dims`` which keeps the order of the dimensions of multi-dimensional coordinates intact (:issue:`1856`). By `Peter Hausamann `_. +- Clean up Python 2 compatibility in code (:issue:`2950`) + By `Guido Imperiale `_. - Better warning message when supplying invalid objects to ``xr.merge`` (:issue:`2948`). By `Mathias Hauser `_. - Add ``errors`` keyword argument to :py:meth:`Dataset.drop` and :py:meth:`Dataset.drop_dims` diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 500f2bb5d7f..796de7e928e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -149,8 +149,8 @@ def merge_indexes( for n in var_names: var = variables[n] - if (current_index_variable is not None and - var.dims != current_index_variable.dims): + if (current_index_variable is not None + and var.dims != current_index_variable.dims): raise ValueError( "dimension mismatch between %r %s and %r %s" % (dim, current_index_variable.dims, n, var.dims)) @@ -209,8 +209,8 @@ def split_indexes( Not public API. Used in Dataset and DataArray reset_index methods. """ - if (isinstance(dims_or_levels, str) - or not isinstance(dims_or_levels, Sequence)): + if (isinstance(dims_or_levels, str) or + not isinstance(dims_or_levels, Sequence)): dims_or_levels = [dims_or_levels] dim_levels \ @@ -287,8 +287,8 @@ def __len__(self) -> int: return len(self._dataset._variables) - len(self._dataset._coord_names) def __contains__(self, key) -> bool: - return (key in self._dataset._variables and - key not in self._dataset._coord_names) + return (key in self._dataset._variables + and key not in self._dataset._coord_names) def __getitem__(self, key) -> 'DataArray': if key not in self._dataset._coord_names: @@ -1188,8 +1188,8 @@ def identical(self, other): Dataset.equals """ try: - return (utils.dict_equiv(self.attrs, other.attrs) and - self._all_compat(other, 'identical')) + return (utils.dict_equiv(self.attrs, other.attrs) + and self._all_compat(other, 'identical')) except (TypeError, AttributeError): return False @@ -2151,8 +2151,8 @@ def _validate_interp_indexer(x, new_x): # In the case of datetimes, the restrictions placed on indexers # used with interp are stronger than those which are placed on # isel, so we need an additional check after _validate_indexers. - if (_contains_datetime_like_objects(x) and - not _contains_datetime_like_objects(new_x)): + if (_contains_datetime_like_objects(x) + and not _contains_datetime_like_objects(new_x)): raise TypeError('When interpolating over a datetime-like ' 'coordinate, the coordinates to ' 'interpolate to must be either datetime ' @@ -2264,10 +2264,9 @@ def _rename_vars(self, name_dict, dims_dict): variables = OrderedDict() coord_names = set() for k, v in self.variables.items(): - name = name_dict.get(k, k) - dims = tuple(dims_dict.get(dim, dim) for dim in v.dims) var = v.copy(deep=False) - var.dims = dims + var.dims = tuple(dims_dict.get(dim, dim) for dim in v.dims) + name = name_dict.get(k, k) if name in variables: raise ValueError('the new name %r conflicts' % (name,)) variables[name] = var @@ -2275,8 +2274,8 @@ def _rename_vars(self, name_dict, dims_dict): coord_names.add(name) return variables, coord_names - def _rename_dims(self, dims_dict): - return {dims_dict.get(k, k): v for k, v in self.dims.items()} + def _rename_dims(self, name_dict): + return {name_dict.get(k, k): v for k, v in self.dims.items()} def _rename_indexes(self, name_dict): if self._indexes is None: @@ -2293,9 +2292,9 @@ def _rename_indexes(self, name_dict): indexes[new_name] = index return indexes - def _rename_all(self, name_dict, dim_dict): - variables, coord_names = self._rename_vars(name_dict, dim_dict) - dims = self._rename_dims(dim_dict) + def _rename_all(self, name_dict, dims_dict): + variables, coord_names = self._rename_vars(name_dict, dims_dict) + dims = self._rename_dims(dims_dict) indexes = self._rename_indexes(name_dict) return variables, coord_names, dims, indexes @@ -2322,21 +2321,91 @@ def rename(self, name_dict=None, inplace=None, **names): See Also -------- Dataset.swap_dims + Dataset.rename_vars + Dataset.rename_dims DataArray.rename """ - # TODO: add separate rename_vars and rename_dims methods. inplace = _check_inplace(inplace) name_dict = either_dict_or_kwargs(name_dict, names, 'rename') - for k, v in name_dict.items(): + for k in name_dict.keys(): if k not in self and k not in self.dims: raise ValueError("cannot rename %r because it is not a " "variable or dimension in this dataset" % k) variables, coord_names, dims, indexes = self._rename_all( - name_dict=name_dict, dim_dict=name_dict) + name_dict=name_dict, dims_dict=name_dict) return self._replace(variables, coord_names, dims=dims, indexes=indexes, inplace=inplace) + def rename_dims(self, dims_dict=None, **dims): + """Returns a new object with renamed dimensions only. + + Parameters + ---------- + dims_dict : dict-like, optional + Dictionary whose keys are current dimension names and + whose values are the desired names. + **dims, optional + Keyword form of ``dims_dict``. + One of dims_dict or dims must be provided. + + Returns + ------- + renamed : Dataset + Dataset with renamed dimensions. + + See Also + -------- + Dataset.swap_dims + Dataset.rename + Dataset.rename_vars + DataArray.rename + """ + dims_dict = either_dict_or_kwargs(dims_dict, dims, 'rename_dims') + for k in dims_dict: + if k not in self.dims: + raise ValueError("cannot rename %r because it is not a " + "dimension in this dataset" % k) + + variables, coord_names, dims, indexes = self._rename_all( + name_dict={}, dims_dict=dims_dict) + return self._replace(variables, coord_names, dims=dims, + indexes=indexes) + + def rename_vars(self, name_dict=None, **names): + """Returns a new object with renamed variables including coordinates + + Parameters + ---------- + name_dict : dict-like, optional + Dictionary whose keys are current variable or coordinate names and + whose values are the desired names. + **names, optional + Keyword form of ``name_dict``. + One of name_dict or names must be provided. + + Returns + ------- + renamed : Dataset + Dataset with renamed variables including coordinates + + See Also + -------- + Dataset.swap_dims + Dataset.rename + Dataset.rename_dims + DataArray.rename + """ + name_dict = either_dict_or_kwargs(name_dict, names, 'rename_vars') + for k in name_dict: + if k not in self: + raise ValueError("cannot rename %r because it is not a " + "variable or coordinate in this dataset" % k) + variables, coord_names, dims, indexes = self._rename_all( + name_dict=name_dict, dims_dict={}) + return self._replace(variables, coord_names, dims=dims, + indexes=indexes) + def swap_dims(self, dims_dict, inplace=None): """Returns a new object with swapped dimensions. @@ -2464,8 +2533,8 @@ def expand_dims(self, dim=None, axis=None, **dim_kwargs): if d in self.dims: raise ValueError( 'Dimension {dim} already exists.'.format(dim=d)) - if (d in self._variables and - not utils.is_scalar(self._variables[d])): + if (d in self._variables + and not utils.is_scalar(self._variables[d])): raise ValueError( '{dim} already exists as coordinate or' ' variable name.'.format(dim=d)) @@ -3256,9 +3325,9 @@ def reduce(self, func, dim=None, keep_attrs=None, keepdims=False, if not reduce_dims: variables[name] = var else: - if (not numeric_only or - np.issubdtype(var.dtype, np.number) or - (var.dtype == np.bool_)): + if (not numeric_only + or np.issubdtype(var.dtype, np.number) + or (var.dtype == np.bool_)): if len(reduce_dims) == 1: # unpack dimensions for the benefit of functions # like np.argmin which can't handle tuple arguments @@ -3791,8 +3860,8 @@ def diff(self, dim, n=1, label='upper'): for name, var in self.variables.items(): if dim in var.dims: if name in self.data_vars: - variables[name] = (var.isel(**kwargs_end) - - var.isel(**kwargs_start)) + variables[name] = (var.isel(**kwargs_end) + - var.isel(**kwargs_start)) else: variables[name] = var.isel(**kwargs_new) else: @@ -3976,8 +4045,8 @@ def sortby(self, variables, ascending=True): for data_array in aligned_other_vars: if data_array.ndim != 1: raise ValueError("Input DataArray is not 1-D.") - if (data_array.dtype == object and - LooseVersion(np.__version__) < LooseVersion('1.11.0')): + if (data_array.dtype == object + and LooseVersion(np.__version__) < LooseVersion('1.11.0')): raise NotImplementedError( 'sortby uses np.lexsort under the hood, which requires ' 'numpy 1.11.0 or later to support object data-type.') @@ -4053,9 +4122,9 @@ def quantile(self, q, dim=None, interpolation='linear', reduce_dims = [d for d in var.dims if d in dims] if reduce_dims or not var.dims: if name not in self.coords: - if (not numeric_only or - np.issubdtype(var.dtype, np.number) or - var.dtype == np.bool_): + if (not numeric_only + or np.issubdtype(var.dtype, np.number) + or var.dtype == np.bool_): if len(reduce_dims) == var.ndim: # prefer to aggregate over axis=None rather than # axis=(0, 1) if they will be equivalent, because @@ -4171,8 +4240,8 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None): variables = OrderedDict() for k, v in self.variables.items(): - if (k in self.data_vars and dim in v.dims and - k not in self.coords): + if (k in self.data_vars and dim in v.dims + and k not in self.coords): if _contains_datetime_like_objects(v): v = v._to_numeric(datetime_unit=datetime_unit) grad = duck_array_ops.gradient( @@ -4348,8 +4417,8 @@ def filter_by_attrs(self, **kwargs): has_value_flag = False for attr_name, pattern in kwargs.items(): attr_value = variable.attrs.get(attr_name) - if ((callable(pattern) and pattern(attr_value)) or - attr_value == pattern): + if ((callable(pattern) and pattern(attr_value)) + or attr_value == pattern): has_value_flag = True else: has_value_flag = False diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 25b7c6b8b14..ac759ac9c24 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2162,6 +2162,40 @@ def test_rename_inplace(self): # check virtual variables assert_array_equal(data['t.dayofyear'], [1, 2, 3]) + def test_rename_dims(self): + original = Dataset( + {'x': ('x', [0, 1, 2]), 'y': ('x', [10, 11, 12]), 'z': 42}) + expected = Dataset( + {'x': ('x_new', [0, 1, 2]), 'y': ('x_new', [10, 11, 12]), 'z': 42}) + expected = expected.set_coords('x') + dims_dict = {'x': 'x_new'} + actual = original.rename_dims(dims_dict) + assert_identical(expected, actual) + actual_2 = original.rename_dims(**dims_dict) + assert_identical(expected, actual_2) + + # Test to raise ValueError + dims_dict_bad = {'x_bad': 'x_new'} + with pytest.raises(ValueError): + original.rename_dims(dims_dict_bad) + + def test_rename_vars(self): + original = Dataset( + {'x': ('x', [0, 1, 2]), 'y': ('x', [10, 11, 12]), 'z': 42}) + expected = Dataset( + {'x_new': ('x', [0, 1, 2]), 'y': ('x', [10, 11, 12]), 'z': 42}) + expected = expected.set_coords('x_new') + name_dict = {'x': 'x_new'} + actual = original.rename_vars(name_dict) + assert_identical(expected, actual) + actual_2 = original.rename_vars(**name_dict) + assert_identical(expected, actual_2) + + # Test to raise ValueError + names_dict_bad = {'x_bad': 'x_new'} + with pytest.raises(ValueError): + original.rename_vars(names_dict_bad) + def test_swap_dims(self): original = Dataset({'x': [1, 2, 3], 'y': ('x', list('abc')), 'z': 42}) expected = Dataset({'z': 42},