Skip to content
This repository has been archived by the owner on Oct 24, 2024. It is now read-only.

Commit

Permalink
DatasetView in map_over_subtree (#269)
Browse files Browse the repository at this point in the history
* re-forbid initializing a DatasetView directly

* test that map_over_subtree definitely doesn't modify data in-place

* chnge behaviour to return an immmutable DatasetView within map_over_subtree

* improve error messages

* whatsew

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* remove unused return variable

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
TomNicholas and pre-commit-ci[bot] authored Oct 24, 2023
1 parent ff86d3c commit 07ac0ae
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 16 deletions.
23 changes: 15 additions & 8 deletions datatree/datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,10 @@ class DatasetView(Dataset):
An immutable Dataset-like view onto the data in a single DataTree node.
In-place operations modifying this object should raise an AttributeError.
This requires overriding all inherited constructors.
Operations returning a new result will return a new xarray.Dataset object.
This includes all API on Dataset, which will be inherited.
This requires overriding all inherited private constructors.
We leave the public init constructor because it is used by type() in some xarray code (see datatree GH issue #188)
"""

# TODO what happens if user alters (in-place) a DataArray they extracted from this object?
Expand All @@ -130,6 +127,14 @@ class DatasetView(Dataset):
"_variables",
)

def __init__(
self,
data_vars: Optional[Mapping[Any, Any]] = None,
coords: Optional[Mapping[Any, Any]] = None,
attrs: Optional[Mapping[Any, Any]] = None,
):
raise AttributeError("DatasetView objects are not to be initialized directly")

@classmethod
def _from_node(
cls,
Expand All @@ -150,14 +155,16 @@ def _from_node(

def __setitem__(self, key, val) -> None:
raise AttributeError(
"Mutation of the DatasetView is not allowed, please use __setitem__ on the wrapping DataTree node, "
"or use `DataTree.to_dataset()` if you want a mutable dataset"
"Mutation of the DatasetView is not allowed, please use `.__setitem__` on the wrapping DataTree node, "
"or use `dt.to_dataset()` if you want a mutable dataset. If calling this from within `map_over_subtree`,"
"use `.copy()` first to get a mutable version of the input dataset."
)

def update(self, other) -> None:
raise AttributeError(
"Mutation of the DatasetView is not allowed, please use .update on the wrapping DataTree node, "
"or use `DataTree.to_dataset()` if you want a mutable dataset"
"Mutation of the DatasetView is not allowed, please use `.update` on the wrapping DataTree node, "
"or use `dt.to_dataset()` if you want a mutable dataset. If calling this from within `map_over_subtree`,"
"use `.copy()` first to get a mutable version of the input dataset."
)

# FIXME https://github.com/python/mypy/issues/7328
Expand Down
11 changes: 5 additions & 6 deletions datatree/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,15 +190,14 @@ def _map_over_subtree(*args, **kwargs) -> DataTree | Tuple[DataTree, ...]:
*args_as_tree_length_iterables,
*list(kwargs_as_tree_length_iterables.values()),
):
node_args_as_datasets = [
a.to_dataset() if isinstance(a, DataTree) else a
for a in all_node_args[:n_args]
node_args_as_datasetviews = [
a.ds if isinstance(a, DataTree) else a for a in all_node_args[:n_args]
]
node_kwargs_as_datasets = dict(
node_kwargs_as_datasetviews = dict(
zip(
[k for k in kwargs_as_tree_length_iterables.keys()],
[
v.to_dataset() if isinstance(v, DataTree) else v
v.ds if isinstance(v, DataTree) else v
for v in all_node_args[n_args:]
],
)
Expand All @@ -210,7 +209,7 @@ def _map_over_subtree(*args, **kwargs) -> DataTree | Tuple[DataTree, ...]:
# Now we can call func on the data in this particular set of corresponding nodes
results = (
func_with_error_context(
*node_args_as_datasets, **node_kwargs_as_datasets
*node_args_as_datasetviews, **node_kwargs_as_datasetviews
)
if node_of_first_tree.has_data
else None
Expand Down
5 changes: 3 additions & 2 deletions datatree/tests/test_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ def weighted_mean(ds):

dt.map_over_subtree(weighted_mean)

def test_alter_inplace(self):
def test_alter_inplace_forbidden(self):
simpsons = DataTree.from_dict(
d={
"/": xr.Dataset({"age": 83}),
Expand All @@ -322,7 +322,8 @@ def fast_forward(ds: xr.Dataset, years: float) -> xr.Dataset:
ds["age"] = ds["age"] + years
return ds

simpsons.map_over_subtree(fast_forward, years=10)
with pytest.raises(AttributeError):
simpsons.map_over_subtree(fast_forward, years=10)


@pytest.mark.xfail
Expand Down
2 changes: 2 additions & 0 deletions docs/source/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ Breaking changes

- Nodes containing only attributes but no data are now ignored by :py:func:`map_over_subtree` (:issue:`262`, :pull:`263`)
By `Tom Nicholas <https://github.com/TomNicholas>`_.
- Disallow altering of given dataset inside function called by :py:func:`map_over_subtree` (:pull:`269`, reverts part of :pull:`194`).
By `Tom Nicholas <https://github.com/TomNicholas>`_.

Deprecations
~~~~~~~~~~~~
Expand Down

0 comments on commit 07ac0ae

Please sign in to comment.