diff --git a/README.md b/README.md index f0f0d1a..bea4bdd 100644 --- a/README.md +++ b/README.md @@ -38,13 +38,13 @@ during construction. ``` Size: 16kB -array([[166, 225, 182, ..., 118, 153, 82], - [194, 64, 15, ..., 50, 45, 146], - [232, 41, 44, ..., 148, 245, 170], +array([[170, 79, 215, ..., 31, 151, 150], + [ 77, 181, 1, ..., 217, 176, 228], + [193, 91, 240, ..., 132, 152, 41], ..., - [ 99, 94, 7, ..., 136, 205, 30], - [ 52, 244, 229, ..., 108, 45, 58], - [ 72, 122, 212, ..., 183, 73, 188]], dtype=uint8) + [ 50, 140, 231, ..., 80, 236, 28], + [ 89, 46, 180, ..., 84, 42, 140], + [ 96, 148, 240, ..., 61, 43, 255]], dtype=uint8) Coordinates: * y (y) float64 1kB 0.0 1.0 2.0 3.0 4.0 ... 124.0 125.0 126.0 127.0 * x (x) float64 1kB 0.0 1.0 2.0 3.0 4.0 ... 124.0 125.0 126.0 127.0 @@ -136,6 +136,20 @@ Group: / image (y, x, c) float64 40kB dask.array ``` +While the decorator allows you to define your own methods to map over datasets +in the `DataTree` while ignoring those datasets not having dimensions, this +library also provides a few convenience methods. For example, the transpose +method we saw earlier can also be applied as follows: + +```python +multiscale = multiscale.msi.transpose("y", "x", "c") +``` + +Other methods implemented this way are `reindex`, equivalent to the +`xr.DataArray` +[reindex](https://docs.xarray.dev/en/stable/generated/xarray.DataArray.reindex.html) +method and `assign_coords`, equivalent to `xr.Dataset` `assign_coords` method. + Store as an Open Microscopy Environment-Next Generation File Format ([OME-NGFF]) / [netCDF] [Zarr] store. diff --git a/multiscale_spatial_image/multiscale_spatial_image.py b/multiscale_spatial_image/multiscale_spatial_image.py index b2d3eab..f87ea8f 100644 --- a/multiscale_spatial_image/multiscale_spatial_image.py +++ b/multiscale_spatial_image/multiscale_spatial_image.py @@ -1,10 +1,15 @@ -from typing import Union +from typing import Union, Iterable, Any from xarray import DataTree, register_datatree_accessor import numpy as np -from collections.abc import MutableMapping +from collections.abc import MutableMapping, Hashable from pathlib import Path from zarr.storage import BaseStore +from multiscale_spatial_image.operations import ( + transpose, + reindex_data_arrays, + assign_coords, +) @register_datatree_accessor("msi") @@ -121,3 +126,94 @@ def to_zarr( self._dt.ds = self._dt.ds.assign_attrs(**ngff_metadata) self._dt.to_zarr(store, mode=mode, **kwargs) + + def transpose(self, *dims: Hashable) -> DataTree: + """Return a `DataTree` with all dimensions of arrays in datasets transposed. + + This method automatically skips those nodes of the `DataTree` that do not contain + dimensions. Note that for `Dataset`s themselves, the order of dimensions stays the same. + In case of a `DataTree` node missing specified dimensions an error is raised. + + Parameters + ---------- + *dims : Hashable | None + If not specified, reverse the dimensions on each array. Otherwise, + reorder the dimensions to the order that the `dims` are specified.. + """ + return self._dt.map_over_datasets(transpose, *dims) + + def reindex_data_arrays( + self, + indexers: dict[str, Any], + method: str | None = None, + tolerance: float | Iterable[float] | str | None = None, + copy: bool = False, + fill_value: int | dict[str, int] | None = None, + **indexer_kwargs: Any, + ): + """ + Reindex the `DataArray`s present in the datasets at each scale level of the MultiscaleSpatialImage. + + From the original xarray docstring: Conform this object onto the indexes of another object, filling in missing + values with fill_value. The default fill value is NaN. + + Parameters + ---------- + indexers : dict | None + Dictionary with keys given by dimension names and values given by arrays of coordinates tick labels. + Any mis-matched coordinate values will be filled in with NaN, and any mis-matched dimension names will + simply be ignored. One of indexers or indexers_kwargs must be provided. + method : str | None + Method to use for filling index values in indexers not found on this data array: + - None (default): don’t fill gaps + - pad / ffill: propagate last valid index value forward + - backfill / bfill: propagate next valid index value backward + - nearest: use nearest valid index value + tolerance: float | Iterable[float] | str | None + Maximum distance between original and new labels for inexact matches. The values of the index at the + matching locations must satisfy the equation abs(index[indexer] - target) <= tolerance. Tolerance may + be a scalar value, which applies the same tolerance to all values, or list-like, which applies variable + tolerance per element. List-like must be the same size as the index and its dtype must exactly match the + index’s type. + copy : bool + If copy=True, data in the return value is always copied. If copy=False and reindexing is unnecessary, or + can be performed with only slice operations, then the output may share memory with the input. In either + case, a new xarray object is always returned. + fill_value: int | dict[str, int] | None + Value to use for newly missing values. If a dict-like, maps variable names (including coordinates) to fill + values. Use this data array’s name to refer to the data array’s values. + **indexer_kwargs + The keyword arguments form of indexers. One of indexers or indexers_kwargs must be provided. + """ + return self._dt.map_over_datasets( + reindex_data_arrays, + indexers, + method, + tolerance, + copy, + fill_value, + *indexer_kwargs, + ) + + def assign_coords(self, coords, **coords_kwargs): + """ + Assign new coordinates to all `Dataset`s in the `DataTree` having dimensions. + + Returns a new `Dataset` at each scale level of the `MultiscaleSpatialImage` with all the original data in + addition to the new coordinates. + + Parameters + ---------- + coords + A mapping whose keys are the names of the coordinates and values are the coordinates to assign. + The mapping will generally be a dict or Coordinates. + - If a value is a standard data value — for example, a DataArray, scalar, or array — the data is simply + assigned as a coordinate. + - If a value is callable, it is called with this object as the only parameter, and the return value is + used as new coordinate variables. + - A coordinate can also be defined and attached to an existing dimension using a tuple with the first + element the dimension name and the second element the values for this new coordinate. + **coords_kwargs + The keyword arguments form of coords. One of coords or coords_kwargs must be provided. + """ + return self._dt.map_over_datasets(assign_coords, coords, *coords_kwargs) diff --git a/multiscale_spatial_image/operations/__init__.py b/multiscale_spatial_image/operations/__init__.py new file mode 100644 index 0000000..283c1dc --- /dev/null +++ b/multiscale_spatial_image/operations/__init__.py @@ -0,0 +1,3 @@ +from .operations import assign_coords, transpose, reindex_data_arrays + +__all__ = ["assign_coords", "transpose", "reindex_data_arrays"] diff --git a/multiscale_spatial_image/operations/operations.py b/multiscale_spatial_image/operations/operations.py new file mode 100644 index 0000000..6560993 --- /dev/null +++ b/multiscale_spatial_image/operations/operations.py @@ -0,0 +1,18 @@ +from multiscale_spatial_image.utils import skip_non_dimension_nodes +from xarray import Dataset +from typing import Any + + +@skip_non_dimension_nodes +def assign_coords(ds: Dataset, *args: Any, **kwargs: Any) -> Dataset: + return ds.assign_coords(*args, **kwargs) + + +@skip_non_dimension_nodes +def transpose(ds: Dataset, *args: Any, **kwargs: Any) -> Dataset: + return ds.transpose(*args, **kwargs) + + +@skip_non_dimension_nodes +def reindex_data_arrays(ds: Dataset, *args: Any, **kwargs: Any) -> Dataset: + return ds["image"].reindex(*args, **kwargs).to_dataset() diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 0000000..85497e6 --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,13 @@ +import pytest +import numpy as np +from spatial_image import to_spatial_image +from multiscale_spatial_image import to_multiscale + + +@pytest.fixture() +def multiscale_data(): + data = np.zeros((3, 200, 200)) + dims = ("c", "y", "x") + scale_factors = [2, 2] + image = to_spatial_image(array_like=data, dims=dims) + return to_multiscale(image, scale_factors=scale_factors) diff --git a/test/test_operations.py b/test/test_operations.py new file mode 100644 index 0000000..c110044 --- /dev/null +++ b/test/test_operations.py @@ -0,0 +1,17 @@ +def test_transpose(multiscale_data): + multiscale_data = multiscale_data.msi.transpose("y", "x", "c") + + for scale in list(multiscale_data.keys()): + assert multiscale_data[scale]["image"].dims == ("y", "x", "c") + + +def test_reindex_arrays(multiscale_data): + multiscale_data = multiscale_data.msi.reindex_data_arrays({"c": ["r", "g", "b"]}) + for scale in list(multiscale_data.keys()): + assert multiscale_data[scale].c.data.tolist() == ["r", "g", "b"] + + +def test_assign_coords(multiscale_data): + multiscale_data = multiscale_data.msi.assign_coords({"c": ["r", "g", "b"]}) + for scale in list(multiscale_data.keys()): + assert multiscale_data[scale].c.data.tolist() == ["r", "g", "b"] diff --git a/test/test_utils.py b/test/test_utils.py index 173a76a..ee9a9d3 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,23 +1,15 @@ -import numpy as np -from spatial_image import to_spatial_image -from multiscale_spatial_image import skip_non_dimension_nodes, to_multiscale +from multiscale_spatial_image import skip_non_dimension_nodes -def test_skip_nodes(): - data = np.zeros((2, 200, 200)) - dims = ("c", "y", "x") - scale_factors = [2, 2] - image = to_spatial_image(array_like=data, dims=dims) - multiscale_img = to_multiscale(image, scale_factors=scale_factors) - +def test_skip_nodes(multiscale_data): @skip_non_dimension_nodes def transpose(ds, *args, **kwargs): return ds.transpose(*args, **kwargs) - for scale in list(multiscale_img.keys()): - assert multiscale_img[scale]["image"].dims == ("c", "y", "x") + for scale in list(multiscale_data.keys()): + assert multiscale_data[scale]["image"].dims == ("c", "y", "x") # applying this function without skipping the root node would fail as the root node does not have dimensions. - result = multiscale_img.map_over_datasets(transpose, "y", "x", "c") + result = multiscale_data.map_over_datasets(transpose, "y", "x", "c") for scale in list(result.keys()): assert result[scale]["image"].dims == ("y", "x", "c")