Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add operations to multiscale spatial image #103

Merged
merged 9 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ during construction.

```
<xarray.DataArray 'image' (y: 128, x: 128)> Size: 16kB
array([[166, 225, 182, ..., 118, 153, 82],
[194, 64, 15, ..., 50, 45, 146],
[232, 41, 44, ..., 148, 245, 170],
array([[170, 79, 215, ..., 31, 151, 150],
[ 77, 181, 1, ..., 217, 176, 228],
[193, 91, 240, ..., 132, 152, 41],
...,
[ 99, 94, 7, ..., 136, 205, 30],
[ 52, 244, 229, ..., 108, 45, 58],
[ 72, 122, 212, ..., 183, 73, 188]], dtype=uint8)
[ 50, 140, 231, ..., 80, 236, 28],
[ 89, 46, 180, ..., 84, 42, 140],
[ 96, 148, 240, ..., 61, 43, 255]], dtype=uint8)
Coordinates:
* y (y) float64 1kB 0.0 1.0 2.0 3.0 4.0 ... 124.0 125.0 126.0 127.0
* x (x) float64 1kB 0.0 1.0 2.0 3.0 4.0 ... 124.0 125.0 126.0 127.0
Expand Down Expand Up @@ -136,6 +136,20 @@ Group: /
image (y, x, c) float64 40kB dask.array<chunksize=(50, 50, 2), meta=np.ndarray>
```

While the decorator allows you to define your own methods to map over datasets
in the `DataTree` while ignoring those datasets not having dimensions, this
library also provides a few convenience methods. For example, the transpose
method we saw earlier can also be applied as follows:

```python
multiscale = multiscale.msi.transpose("y", "x", "c")
```
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lovely


Other methods implemented this way are `reindex`, equivalent to the
`xr.DataArray`
[reindex](https://docs.xarray.dev/en/stable/generated/xarray.DataArray.reindex.html)
method and `assign_coords`, equivalent to `xr.Dataset` `assign_coords` method.

Store as an Open Microscopy Environment-Next Generation File Format ([OME-NGFF])
/ [netCDF] [Zarr] store.

Expand Down
100 changes: 98 additions & 2 deletions multiscale_spatial_image/multiscale_spatial_image.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from typing import Union
from typing import Union, Iterable, Any

from xarray import DataTree, register_datatree_accessor
import numpy as np
from collections.abc import MutableMapping
from collections.abc import MutableMapping, Hashable
from pathlib import Path
from zarr.storage import BaseStore
from multiscale_spatial_image.operations import (
transpose,
reindex_data_arrays,
assign_coords,
)


@register_datatree_accessor("msi")
Expand Down Expand Up @@ -121,3 +126,94 @@ def to_zarr(
self._dt.ds = self._dt.ds.assign_attrs(**ngff_metadata)

self._dt.to_zarr(store, mode=mode, **kwargs)

def transpose(self, *dims: Hashable) -> DataTree:
"""Return a `DataTree` with all dimensions of arrays in datasets transposed.

This method automatically skips those nodes of the `DataTree` that do not contain
dimensions. Note that for `Dataset`s themselves, the order of dimensions stays the same.
In case of a `DataTree` node missing specified dimensions an error is raised.

Parameters
----------
*dims : Hashable | None
If not specified, reverse the dimensions on each array. Otherwise,
reorder the dimensions to the order that the `dims` are specified..
"""
return self._dt.map_over_datasets(transpose, *dims)

def reindex_data_arrays(
self,
indexers: dict[str, Any],
method: str | None = None,
tolerance: float | Iterable[float] | str | None = None,
copy: bool = False,
fill_value: int | dict[str, int] | None = None,
**indexer_kwargs: Any,
):
"""
Reindex the `DataArray`s present in the datasets at each scale level of the MultiscaleSpatialImage.

From the original xarray docstring: Conform this object onto the indexes of another object, filling in missing
values with fill_value. The default fill value is NaN.

Parameters
----------
indexers : dict | None
Dictionary with keys given by dimension names and values given by arrays of coordinates tick labels.
Any mis-matched coordinate values will be filled in with NaN, and any mis-matched dimension names will
simply be ignored. One of indexers or indexers_kwargs must be provided.
method : str | None
Method to use for filling index values in indexers not found on this data array:
- None (default): don’t fill gaps
- pad / ffill: propagate last valid index value forward
- backfill / bfill: propagate next valid index value backward
- nearest: use nearest valid index value
tolerance: float | Iterable[float] | str | None
Maximum distance between original and new labels for inexact matches. The values of the index at the
matching locations must satisfy the equation abs(index[indexer] - target) <= tolerance. Tolerance may
be a scalar value, which applies the same tolerance to all values, or list-like, which applies variable
tolerance per element. List-like must be the same size as the index and its dtype must exactly match the
index’s type.
copy : bool
If copy=True, data in the return value is always copied. If copy=False and reindexing is unnecessary, or
can be performed with only slice operations, then the output may share memory with the input. In either
case, a new xarray object is always returned.
fill_value: int | dict[str, int] | None
Value to use for newly missing values. If a dict-like, maps variable names (including coordinates) to fill
values. Use this data array’s name to refer to the data array’s values.
**indexer_kwargs
The keyword arguments form of indexers. One of indexers or indexers_kwargs must be provided.
"""
return self._dt.map_over_datasets(
reindex_data_arrays,
indexers,
method,
tolerance,
copy,
fill_value,
*indexer_kwargs,
)

def assign_coords(self, coords, **coords_kwargs):
"""
Assign new coordinates to all `Dataset`s in the `DataTree` having dimensions.

Returns a new `Dataset` at each scale level of the `MultiscaleSpatialImage` with all the original data in
addition to the new coordinates.

Parameters
----------
coords
A mapping whose keys are the names of the coordinates and values are the coordinates to assign.
The mapping will generally be a dict or Coordinates.
- If a value is a standard data value — for example, a DataArray, scalar, or array — the data is simply
assigned as a coordinate.
- If a value is callable, it is called with this object as the only parameter, and the return value is
used as new coordinate variables.
- A coordinate can also be defined and attached to an existing dimension using a tuple with the first
element the dimension name and the second element the values for this new coordinate.
**coords_kwargs
The keyword arguments form of coords. One of coords or coords_kwargs must be provided.
"""
return self._dt.map_over_datasets(assign_coords, coords, *coords_kwargs)
3 changes: 3 additions & 0 deletions multiscale_spatial_image/operations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .operations import assign_coords, transpose, reindex_data_arrays

__all__ = ["assign_coords", "transpose", "reindex_data_arrays"]
18 changes: 18 additions & 0 deletions multiscale_spatial_image/operations/operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from multiscale_spatial_image.utils import skip_non_dimension_nodes
from xarray import Dataset
from typing import Any


@skip_non_dimension_nodes
def assign_coords(ds: Dataset, *args: Any, **kwargs: Any) -> Dataset:
return ds.assign_coords(*args, **kwargs)


@skip_non_dimension_nodes
def transpose(ds: Dataset, *args: Any, **kwargs: Any) -> Dataset:
return ds.transpose(*args, **kwargs)


@skip_non_dimension_nodes
def reindex_data_arrays(ds: Dataset, *args: Any, **kwargs: Any) -> Dataset:
return ds["image"].reindex(*args, **kwargs).to_dataset()
13 changes: 13 additions & 0 deletions test/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pytest
import numpy as np
from spatial_image import to_spatial_image
from multiscale_spatial_image import to_multiscale


@pytest.fixture()
def multiscale_data():
data = np.zeros((3, 200, 200))
dims = ("c", "y", "x")
scale_factors = [2, 2]
image = to_spatial_image(array_like=data, dims=dims)
return to_multiscale(image, scale_factors=scale_factors)
17 changes: 17 additions & 0 deletions test/test_operations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
def test_transpose(multiscale_data):
multiscale_data = multiscale_data.msi.transpose("y", "x", "c")

for scale in list(multiscale_data.keys()):
assert multiscale_data[scale]["image"].dims == ("y", "x", "c")


def test_reindex_arrays(multiscale_data):
multiscale_data = multiscale_data.msi.reindex_data_arrays({"c": ["r", "g", "b"]})
for scale in list(multiscale_data.keys()):
assert multiscale_data[scale].c.data.tolist() == ["r", "g", "b"]


def test_assign_coords(multiscale_data):
multiscale_data = multiscale_data.msi.assign_coords({"c": ["r", "g", "b"]})
for scale in list(multiscale_data.keys()):
assert multiscale_data[scale].c.data.tolist() == ["r", "g", "b"]
18 changes: 5 additions & 13 deletions test/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,15 @@
import numpy as np
from spatial_image import to_spatial_image
from multiscale_spatial_image import skip_non_dimension_nodes, to_multiscale
from multiscale_spatial_image import skip_non_dimension_nodes


def test_skip_nodes():
data = np.zeros((2, 200, 200))
dims = ("c", "y", "x")
scale_factors = [2, 2]
image = to_spatial_image(array_like=data, dims=dims)
multiscale_img = to_multiscale(image, scale_factors=scale_factors)

def test_skip_nodes(multiscale_data):
@skip_non_dimension_nodes
def transpose(ds, *args, **kwargs):
return ds.transpose(*args, **kwargs)

for scale in list(multiscale_img.keys()):
assert multiscale_img[scale]["image"].dims == ("c", "y", "x")
for scale in list(multiscale_data.keys()):
assert multiscale_data[scale]["image"].dims == ("c", "y", "x")

# applying this function without skipping the root node would fail as the root node does not have dimensions.
result = multiscale_img.map_over_datasets(transpose, "y", "x", "c")
result = multiscale_data.map_over_datasets(transpose, "y", "x", "c")
for scale in list(result.keys()):
assert result[scale]["image"].dims == ("y", "x", "c")
Loading