Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a .drop_attrs method #8258

Merged
merged 25 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ Dataset contents
Dataset.drop_duplicates
Dataset.drop_dims
Dataset.drop_encoding
Dataset.drop_attrs
Dataset.set_coords
Dataset.reset_coords
Dataset.convert_calendar
Expand Down Expand Up @@ -306,6 +307,7 @@ DataArray contents
DataArray.drop_indexes
DataArray.drop_duplicates
DataArray.drop_encoding
DataArray.drop_attrs
DataArray.reset_coords
DataArray.copy
DataArray.convert_calendar
Expand Down
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ New Features
By `Martin Raspaud <https://github.com/mraspaud>`_.
- Extract the source url from fsspec objects (:issue:`9142`, :pull:`8923`).
By `Justus Magin <https://github.com/keewis>`_.
- Add :py:meth:`DataArray.drop_attrs` & :py:meth:`Dataset.drop_attrs` methods,
to return an object without ``attrs``. A ``deep`` parameter controls whether
variables' ``attrs`` are also dropped.
By `Maximilian Roos <https://github.com/max-sixty>`_. (:pull:`8288`)

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
17 changes: 17 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -7456,3 +7456,20 @@ def to_dask_dataframe(
# this needs to be at the end, or mypy will confuse with `str`
# https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
str = utils.UncachedAccessor(StringAccessor["DataArray"])

def drop_attrs(self, *, deep: bool = True) -> Self:
"""
Removes all attributes from the DataArray.

Parameters
----------
deep : bool, default True
Removes attributes from coordinates.

Returns
-------
DataArray
"""
return (
self._to_temp_dataset().drop_attrs(deep=deep).pipe(self._from_temp_dataset)
)
42 changes: 42 additions & 0 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10680,3 +10680,45 @@ def resample(
restore_coord_dims=restore_coord_dims,
**indexer_kwargs,
)

def drop_attrs(self, *, deep: bool = True) -> Self:
"""
Removes all attributes from the Dataset and its variables.

Parameters
----------
deep : bool, default True
Removes attributes from all variables.

Returns
-------
Dataset
"""
# Remove attributes from the dataset
self = self._replace(attrs={})

if not deep:
return self

# Remove attributes from each variable in the dataset
for var in self.variables:
# variables don't have a `._replace` method, so we copy and then remove
# attrs. If we added a `._replace` method, we could use that instead.
if var not in self.indexes:
self[var] = self[var].copy()
self[var].attrs = {}

new_idx_variables = {}
# Not sure this is the most elegant way of doing this, but it works.
# (Should we have a more general "map over all variables, including
# indexes" approach?)
for idx, idx_vars in self.xindexes.group_by_index():
# copy each coordinate variable of an index and drop their attrs
temp_idx_variables = {k: v.copy() for k, v in idx_vars.items()}
for v in temp_idx_variables.values():
v.attrs = {}
# re-wrap the index object in new coordinate variables
new_idx_variables.update(idx.create_variables(temp_idx_variables))
self = self.assign(new_idx_variables)

return self
5 changes: 5 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2980,6 +2980,11 @@ def test_assign_attrs(self) -> None:
assert_identical(new_actual, expected)
assert actual.attrs == {"a": 1, "b": 2}

def test_drop_attrs(self) -> None:
# Mostly tested in test_dataset.py, but adding a very small test here
da = DataArray([], attrs=dict(a=1, b=2))
assert da.drop_attrs().attrs == {}

@pytest.mark.parametrize(
"func", [lambda x: x.clip(0, 1), lambda x: np.float64(1.0) * x, np.abs, abs]
)
Expand Down
48 changes: 48 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4450,6 +4450,54 @@ def test_assign_attrs(self) -> None:
assert_identical(new_actual, expected)
assert actual.attrs == dict(a=1, b=2)

def test_drop_attrs(self) -> None:
# Simple example
ds = Dataset().assign_attrs(a=1, b=2)
original = ds.copy()
expected = Dataset()
result = ds.drop_attrs()
assert_identical(result, expected)

# Doesn't change original
assert_identical(ds, original)

# Example with variables and coords with attrs, and a multiindex. (arguably
# should have used a canonical dataset with all the features we're should
# support...)
var = Variable("x", [1, 2, 3], attrs=dict(x=1, y=2))
idx = IndexVariable("y", [1, 2, 3], attrs=dict(c=1, d=2))
mx = xr.Coordinates.from_pandas_multiindex(
pd.MultiIndex.from_tuples([(1, 2), (3, 4)], names=["d", "e"]), "z"
)
ds = Dataset(dict(var1=var), coords=dict(y=idx, z=mx)).assign_attrs(a=1, b=2)
assert ds.attrs != {}
assert ds["var1"].attrs != {}
assert ds["y"].attrs != {}
assert ds.coords["y"].attrs != {}

original = ds.copy(deep=True)
result = ds.drop_attrs()

assert result.attrs == {}
assert result["var1"].attrs == {}
assert result["y"].attrs == {}
assert list(result.data_vars) == list(ds.data_vars)
assert list(result.coords) == list(ds.coords)

# Doesn't change original
assert_identical(ds, original)
# Specifically test that the attrs on the coords are still there. (The index
# can't currently contain `attrs`, so we can't test those.)
assert ds.coords["y"].attrs != {}

# Test for deep=False
result_shallow = ds.drop_attrs(deep=False)
assert result_shallow.attrs == {}
assert result_shallow["var1"].attrs != {}
assert result_shallow["y"].attrs != {}
assert list(result.data_vars) == list(ds.data_vars)
assert list(result.coords) == list(ds.coords)

def test_assign_multiindex_level(self) -> None:
data = create_test_multiindex()
with pytest.raises(ValueError, match=r"cannot drop or update.*corrupt.*index "):
Expand Down
Loading