From bde03144bc380d4f3d458c14505b37160f642b16 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Thu, 25 Jan 2024 15:07:55 +0100 Subject: [PATCH 01/25] Add function to export spatialdata to legacy anndata format --- docs/api.md | 11 ++++++ pyproject.toml | 2 ++ src/spatialdata_io/__init__.py | 3 ++ src/spatialdata_io/converters/__init__.py | 0 .../converters/legacy_anndata.py | 36 +++++++++++++++++++ 5 files changed, 52 insertions(+) create mode 100644 src/spatialdata_io/converters/__init__.py create mode 100644 src/spatialdata_io/converters/legacy_anndata.py diff --git a/docs/api.md b/docs/api.md index 1b9509e0..19083501 100644 --- a/docs/api.md +++ b/docs/api.md @@ -8,6 +8,8 @@ I/O for the `spatialdata` project. +### Readers + ```{eval-rst} .. autosummary:: :toctree: generated @@ -21,3 +23,12 @@ I/O for the `spatialdata` project. merscope mcmicro ``` + +### Conversion functions + +```{eval-rst} +.. autosummary:: + :toctree: generated + + to_legacy_anndata +``` diff --git a/pyproject.toml b/pyproject.toml index 58f6858a..2e40e45c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,8 @@ urls.Documentation = "https://spatialdata-io.readthedocs.io/" urls.Source = "https://github.com/scverse/spatialdata-io" urls.Home-page = "https://github.com/scverse/spatialdata-io" dependencies = [ + "anndata", + "numpy", "spatialdata", "scikit-image", "h5py", diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index 7b63c9d4..57bae68b 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -9,6 +9,8 @@ from spatialdata_io.readers.visium import visium from spatialdata_io.readers.xenium import xenium +from spatialdata_io.converters.legacy_anndata import to_legacy_anndata + __all__ = [ "curio", "visium", @@ -18,6 +20,7 @@ "mcmicro", "steinbock", "merscope", + "to_legacy_anndata", ] __version__ = version("spatialdata-io") diff --git a/src/spatialdata_io/converters/__init__.py b/src/spatialdata_io/converters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/spatialdata_io/converters/legacy_anndata.py b/src/spatialdata_io/converters/legacy_anndata.py new file mode 100644 index 00000000..b1bf50e1 --- /dev/null +++ b/src/spatialdata_io/converters/legacy_anndata.py @@ -0,0 +1,36 @@ +from anndata import AnnData +from spatialdata import SpatialData +from spatialdata.transformations import get_transformation +import numpy as np + + +def to_legacy_anndata(sdata: SpatialData) -> AnnData: + """ + Convert SpatialData object to a (legacy) spatial AnnData object. + + This is useful for converting a Spatialdata object with Visium data for the use + with packages using spatial information in AnnData as used by scanpy and older versions of Squidpy. + Using this format for any new package is not recommended. + + Parameters + ---------- + sdata + SpatialData object + """ + adata = sdata.table.copy() + for dataset_id in adata.uns["spatial"]: + adata.uns["spatial"][dataset_id]["images"] = { + "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]), + "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]), + } + adata.uns["spatial"][dataset_id]["scalefactors"] = { + "tissue_hires_scalef": get_transformation( + sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires" + ).scale[0], + "tissue_lowres_scalef": get_transformation( + sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres" + ).scale[0], + "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2, + } + + return adata From 560fe96721ee84a287cd2a290b593a0455b53d13 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 25 Jan 2024 14:09:29 +0000 Subject: [PATCH 02/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata_io/__init__.py | 3 +-- src/spatialdata_io/converters/legacy_anndata.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index 57bae68b..d13be57a 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -1,5 +1,6 @@ from importlib.metadata import version +from spatialdata_io.converters.legacy_anndata import to_legacy_anndata from spatialdata_io.readers.codex import codex from spatialdata_io.readers.cosmx import cosmx from spatialdata_io.readers.curio import curio @@ -9,8 +10,6 @@ from spatialdata_io.readers.visium import visium from spatialdata_io.readers.xenium import xenium -from spatialdata_io.converters.legacy_anndata import to_legacy_anndata - __all__ = [ "curio", "visium", diff --git a/src/spatialdata_io/converters/legacy_anndata.py b/src/spatialdata_io/converters/legacy_anndata.py index b1bf50e1..4d812ff2 100644 --- a/src/spatialdata_io/converters/legacy_anndata.py +++ b/src/spatialdata_io/converters/legacy_anndata.py @@ -1,7 +1,7 @@ +import numpy as np from anndata import AnnData from spatialdata import SpatialData from spatialdata.transformations import get_transformation -import numpy as np def to_legacy_anndata(sdata: SpatialData) -> AnnData: From 2d595294ad285e9456af7636a00a2971beb48c62 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Thu, 25 Jan 2024 16:35:23 +0100 Subject: [PATCH 03/25] tiny fix docstring --- docs/changelog.md | 3 --- src/spatialdata_io/converters/legacy_anndata.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 docs/changelog.md diff --git a/docs/changelog.md b/docs/changelog.md deleted file mode 100644 index d9e79ba6..00000000 --- a/docs/changelog.md +++ /dev/null @@ -1,3 +0,0 @@ -```{include} ../CHANGELOG.md - -``` diff --git a/src/spatialdata_io/converters/legacy_anndata.py b/src/spatialdata_io/converters/legacy_anndata.py index 4d812ff2..302f07a0 100644 --- a/src/spatialdata_io/converters/legacy_anndata.py +++ b/src/spatialdata_io/converters/legacy_anndata.py @@ -9,7 +9,7 @@ def to_legacy_anndata(sdata: SpatialData) -> AnnData: Convert SpatialData object to a (legacy) spatial AnnData object. This is useful for converting a Spatialdata object with Visium data for the use - with packages using spatial information in AnnData as used by scanpy and older versions of Squidpy. + with packages using spatial information in AnnData as used by Scanpy and older versions of Squidpy. Using this format for any new package is not recommended. Parameters From 47e9dc20eef31e0c1d0b5a2b4601307e0625a22b Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Thu, 25 Jan 2024 18:19:18 +0100 Subject: [PATCH 04/25] added from_legacy_anndata() --- CHANGELOG.md | 1 + docs/api.md | 1 + src/spatialdata_io/__init__.py | 6 +- .../converters/legacy_anndata.py | 135 +++++++++++++++++- 4 files changed, 141 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1862d40b..ba403922 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning][]. - (MCMICRO) support for TMAs (such as the data of exemplar-002) - (Xenium) support for post-xenium aligned images (IF, HE) +- converter function `to_legacy_anndata()` ### Fixed diff --git a/docs/api.md b/docs/api.md index 19083501..9502f2da 100644 --- a/docs/api.md +++ b/docs/api.md @@ -30,5 +30,6 @@ I/O for the `spatialdata` project. .. autosummary:: :toctree: generated + from_legacy_anndata to_legacy_anndata ``` diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index d13be57a..8c951c3c 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -1,6 +1,9 @@ from importlib.metadata import version -from spatialdata_io.converters.legacy_anndata import to_legacy_anndata +from spatialdata_io.converters.legacy_anndata import ( + from_legacy_anndata, + to_legacy_anndata, +) from spatialdata_io.readers.codex import codex from spatialdata_io.readers.cosmx import cosmx from spatialdata_io.readers.curio import curio @@ -19,6 +22,7 @@ "mcmicro", "steinbock", "merscope", + "from_legacy_anndata", "to_legacy_anndata", ] diff --git a/src/spatialdata_io/converters/legacy_anndata.py b/src/spatialdata_io/converters/legacy_anndata.py index 302f07a0..9c089557 100644 --- a/src/spatialdata_io/converters/legacy_anndata.py +++ b/src/spatialdata_io/converters/legacy_anndata.py @@ -1,7 +1,10 @@ +import warnings + import numpy as np from anndata import AnnData from spatialdata import SpatialData -from spatialdata.transformations import get_transformation +from spatialdata.models import Image2DModel, ShapesModel, TableModel +from spatialdata.transformations import Identity, Scale, get_transformation def to_legacy_anndata(sdata: SpatialData) -> AnnData: @@ -16,6 +19,7 @@ def to_legacy_anndata(sdata: SpatialData) -> AnnData: ---------- sdata SpatialData object + """ adata = sdata.table.copy() for dataset_id in adata.uns["spatial"]: @@ -34,3 +38,132 @@ def to_legacy_anndata(sdata: SpatialData) -> AnnData: } return adata + + +def from_legacy_anndata(adata: AnnData) -> SpatialData: + """ + Convert (legacy) spatial AnnData object to SpatialData object. + + This is useful for parsing a (legacy) spatial AnnData object, for example the ones produced by Scanpy and older + version of Squidpy. + + Parameters + ---------- + adata + (legacy) spatial AnnData object + """ + # AnnData keys + SPATIAL = "spatial" + + SCALEFACTORS = "scalefactors" + TISSUE_HIRES_SCALEF = "tissue_hires_scalef" + TISSUE_LORES_SCALEF = "tissue_lowres_scalef" + SPOT_DIAMETER_FULLRES = "spot_diameter_fullres" + + IMAGES = "images" + HIRES = "hires" + LORES = "lowres" + + # SpatialData keys + REGION = "locations" + REGION_KEY = "region" + INSTANCE_KEY = "instance_id" + SPOT_DIAMETER_FULLRES_DEFAULT = 10 + + images = {} + shapes = {} + spot_diameter_fullres_list = [] + shapes_transformations = {} + + if SPATIAL in adata.uns: + dataset_ids = list(adata.uns[SPATIAL].keys()) + for dataset_id in dataset_ids: + # read the image data and the scale factors for the shapes + keys = set(adata.uns[SPATIAL][dataset_id].keys()) + tissue_hires_scalef = None + tissue_lores_scalef = None + if SCALEFACTORS in keys: + scalefactors = adata.uns[SPATIAL][dataset_id][SCALEFACTORS] + if TISSUE_HIRES_SCALEF in scalefactors: + tissue_hires_scalef = scalefactors[TISSUE_HIRES_SCALEF] + if TISSUE_LORES_SCALEF in scalefactors: + tissue_lores_scalef = scalefactors[TISSUE_LORES_SCALEF] + if SPOT_DIAMETER_FULLRES in scalefactors: + spot_diameter_fullres_list.append(scalefactors[SPOT_DIAMETER_FULLRES]) + if IMAGES in keys: + image_data = adata.uns[SPATIAL][dataset_id][IMAGES] + if HIRES in image_data: + hires = image_data[HIRES] + if LORES in image_data: + lores = image_data[LORES] + + # construct the spatialdata elements + if hires is not None: + # prepare the hires image + assert ( + tissue_hires_scalef is not None + ), "tissue_hires_scalef is required when an the hires image is present" + hires_image = Image2DModel.parse( + hires, dims=("y", "x", "c"), transformations={f"{dataset_id}_downscaled_hires": Identity()} + ) + images[f"{dataset_id}_hires_image"] = hires_image + + # prepare the transformation to the hires image for the shapes + scale_hires = Scale([tissue_hires_scalef, tissue_hires_scalef], axes=("x", "y")) + shapes_transformations[f"{dataset_id}_downscaled_hires"] = scale_hires + if lores is not None: + # prepare the lores image + assert ( + tissue_lores_scalef is not None + ), "tissue_lores_scalef is required when an the lores image is present" + lores_image = Image2DModel.parse( + lores, dims=("y", "x", "c"), transformations={f"{dataset_id}_downscaled_lowres": Identity()} + ) + images[f"{dataset_id}_lowres_image"] = lores_image + + # prepare the transformation to the lores image for the shapes + scale_lores = Scale([tissue_lores_scalef, tissue_lores_scalef], axes=("x", "y")) + shapes_transformations[f"{dataset_id}_downscaled_lowres"] = scale_lores + + # validate the spot_diameter_fullres value + if len(spot_diameter_fullres_list) > 0: + d = np.array(spot_diameter_fullres_list) + if not np.allclose(d, d[0]): + warnings.warn( + "spot_diameter_fullres is not constant across datasets. Using the average value.", + UserWarning, + stacklevel=2, + ) + spot_diameter_fullres = d.mean() + else: + spot_diameter_fullres = d[0] + else: + warnings.warn( + f"spot_diameter_fullres is not present. Using {SPOT_DIAMETER_FULLRES_DEFAULT} as default value.", + UserWarning, + stacklevel=2, + ) + spot_diameter_fullres = SPOT_DIAMETER_FULLRES_DEFAULT + + # parse and prepare the shapes + if SPATIAL in adata.obsm: + xy = adata.obsm[SPATIAL] + radius = spot_diameter_fullres / 2 + shapes[REGION] = ShapesModel.parse(xy, geometry=0, radius=radius, transformations=shapes_transformations) + + # link the shapes to the table + new_table = adata.copy() + new_table.obs[REGION_KEY] = REGION + new_table.obs[REGION_KEY] = new_table.obs[REGION_KEY].astype("category") + new_table.obs[INSTANCE_KEY] = shapes[REGION].index.values + new_table = TableModel.parse(new_table, region=REGION, region_key=REGION_KEY, instance_key=INSTANCE_KEY) + else: + new_table = adata.copy() + # workaround for https://github.com/scverse/spatialdata/issues/306, not anymore needed as soona ss the + # multi_table branch is merged + new_table.obs[REGION_KEY] = "dummy" + new_table.obs[REGION_KEY] = new_table.obs[REGION_KEY].astype("category") + new_table.obs[INSTANCE_KEY] = np.arange(len(new_table)) + + new_table = TableModel.parse(new_table, region="dummy", region_key=REGION_KEY, instance_key=INSTANCE_KEY) + return SpatialData(table=new_table, images=images, shapes=shapes) From 32bd148322b11977b11dd3554f8ab162f5c2e983 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Thu, 25 Jan 2024 18:21:14 +0100 Subject: [PATCH 05/25] fix docs --- docs/changelog.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 docs/changelog.md diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 00000000..d9e79ba6 --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1,3 @@ +```{include} ../CHANGELOG.md + +``` From 39073973822a8cf30a858194f78c742eca416037 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Fri, 26 Jan 2024 16:52:19 +0100 Subject: [PATCH 06/25] wip --- .../converters/legacy_anndata.py | 152 +++++++++++++++--- tests/converters/test_legacy_anndata.py | 59 +++++++ 2 files changed, 190 insertions(+), 21 deletions(-) create mode 100644 tests/converters/test_legacy_anndata.py diff --git a/src/spatialdata_io/converters/legacy_anndata.py b/src/spatialdata_io/converters/legacy_anndata.py index 9c089557..3bef277b 100644 --- a/src/spatialdata_io/converters/legacy_anndata.py +++ b/src/spatialdata_io/converters/legacy_anndata.py @@ -2,41 +2,143 @@ import numpy as np from anndata import AnnData -from spatialdata import SpatialData +from dask.dataframe import DataFrame as DaskDataFrame +from geopandas import GeoDataFrame +from spatialdata import SpatialData, transform from spatialdata.models import Image2DModel, ShapesModel, TableModel from spatialdata.transformations import Identity, Scale, get_transformation -def to_legacy_anndata(sdata: SpatialData) -> AnnData: +def to_legacy_anndata(sdata: SpatialData, coordinate_system: str | None = None) -> AnnData: """ - Convert SpatialData object to a (legacy) spatial AnnData object. + Convert a SpatialData object to a (legacy) spatial AnnData object. - This is useful for converting a Spatialdata object with Visium data for the use - with packages using spatial information in AnnData as used by Scanpy and older versions of Squidpy. - Using this format for any new package is not recommended. + This is useful for using packages expecting spatial information in AnnData, for example Scanpy and older versions + of Squidpy. Using this format for any new package is not recommended. Parameters ---------- sdata SpatialData object + coordinate_system + The coordinate system to consider. The AnnData object will be populated with the data aligned to this coordinate + system. + Returns + ------- + The legacy spatial AnnData object + + Notes + ----- + Limitations and edge cases: + + - The Table can only annotate Shapes, not Labels. If labels are needed, please manually compute the centroids, + approximate radii from the Labels areas, add this as a Shapes element to the SpatialData object and make the + table annotate the Shapes element. + - The Table cannot annotate more than one Shapes element. If more than one Shapes element are present, please + merge them into a single Shapes element and make the table annotate the new Shapes element. + - Table rows not annotating geometries in the coordinate system will be dropped. Similarly, Shapes rows + not annotated by Table rows will be dropped. + + How resolutions, coordinates, indices and Table rows will be affected: + + - The Images will be scaled (see later) and will not maintain the original resolution. + - The Shapes centroids will likely change to match to the image bounding boxes (see below) and coordinate + systems. + - The Table and Shapes rows will have the same indices but, as mentioned above, some rows may be dropped. + + The AnnData object does support only low-resolution images and limited coordinate transformations; eventual + transformations are applied before conversion. Precisely, this is how the Images are prepared for the AnnData + object: + + - For each Image aligned to the specified coordinate system, the coordinate transformation to the coordinate + system is applied; all the Images are rendered to a common target bounding box, which is the bounding box of + the union of the Images aligned to the target coordinate system. + - Practically the above implies that if the Images have very dissimilar locations, most the rendered Images will + have empty spaces. + - Each Image is downscaled during rendering to fit a 2000x2000 pixels image (downscaled_hires) and a 600x600 + pixels image (downscaled_lowres). """ - adata = sdata.table.copy() - for dataset_id in adata.uns["spatial"]: - adata.uns["spatial"][dataset_id]["images"] = { - "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]), - "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]), - } - adata.uns["spatial"][dataset_id]["scalefactors"] = { - "tissue_hires_scalef": get_transformation( - sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires" - ).scale[0], - "tissue_lowres_scalef": get_transformation( - sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres" - ).scale[0], - "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2, - } + # check that coordinate system is valid + css = sdata.coordinate_systems + if coordinate_system is None: + assert len(css) == 1, "The SpatialData object has more than one coordinate system. Please specify one." + coordinate_system = css[0] + else: + assert ( + coordinate_system in css + ), f"The SpatialData object does not have the coordinate system {coordinate_system}." + sdata = sdata.filter_by_coordinate_system(coordinate_system) + + def _get_region(sdata: SpatialData) -> list[str]: + region = sdata.table.uns[TableModel.ATTRS_KEY]["region"] + if not isinstance(region, list): + region = [region] + return region + + # the table needs to annotate exactly one Shapes element + region = _get_region(sdata) + if len(region) != 1: + raise ValueError(f"The table needs to annotate exactly one Shapes element. Found {len(region)}.") + if region[0] not in sdata.shapes: + raise ValueError(f"The table needs to annotate a Shapes element, not Labels or Points.") + shapes = sdata[region[0]] + + # matches the table and the shapes geometries + # useful code to be refactored into spatialdata/_core/query/relational_query.py + instance_key = sdata.table.uns[TableModel.ATTRS_KEY]["instance_key"] + table_instances = sdata.table.obs[instance_key].values + shapes_instances = shapes.index.values + common = np.intersect1d(table_instances, shapes_instances) + new_table = sdata.table[sdata.table.obs[instance_key].isin(common)] + new_shapes = shapes.loc[new_table.obs[instance_key].values] + t = get_transformation(new_shapes, to_coordinate_system=coordinate_system) + new_shapes = transform(new_shapes, t) + # the table after the filtering must not be empty + assert len(new_table) > 0, "The table does not annotate any geometry in the Shapes element." + + # get the centroids + # useful function to move to spatialdata + def get_centroids(shapes: GeoDataFrame) -> DaskDataFrame: + if shapes.iloc[0].geometry.type in ["Polygon", "MultiPolygon"]: + return np.array((shapes.geometry.centroid.x, shapes.geometry.centroid.y)).T + elif shapes.iloc[0].geometry.type == "Point": + return np.array((shapes.geometry.x, shapes.geometry.y)).T + else: + raise ValueError(f"Unexpected geometry type: {shapes.iloc[0].geometry.type}") + + # need to recompute because we don't support in-memory points yet + xy = get_centroids(new_shapes) + + # get the average radius; approximates polygons/multipolygons as circles + if new_shapes.iloc[0] == "Point": + np.mean(new_shapes["radius"]) + else: + np.mean(np.sqrt(new_shapes.geometry.area / np.pi)) + + adata = new_table.copy() + adata.obsm["spatial"] = xy + # # process the images + # sdata_images = sdata.subset(element_names=list(sdata.images.keys())) + # bb = get_extent(sdata_images) + + # adata = sdata.table.copy() + # for dataset_id in adata.uns["spatial"]: + # adata.uns["spatial"][dataset_id]["images"] = { + # "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]), + # "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]), + # } + # adata.uns["spatial"][dataset_id]["scalefactors"] = { + # "tissue_hires_scalef": get_transformation( + # sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires" + # ).scale[0], + # "tissue_lowres_scalef": get_transformation( + # sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres" + # ).scale[0], + # "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2, + # } + # return adata @@ -51,6 +153,14 @@ def from_legacy_anndata(adata: AnnData) -> SpatialData: ---------- adata (legacy) spatial AnnData object + + Returns + ------- + The SpatialData object + + Notes + ----- + The SpatialData object will have one hires and one lores image for each dataset in the AnnData object. """ # AnnData keys SPATIAL = "spatial" diff --git a/tests/converters/test_legacy_anndata.py b/tests/converters/test_legacy_anndata.py new file mode 100644 index 00000000..0ad68c21 --- /dev/null +++ b/tests/converters/test_legacy_anndata.py @@ -0,0 +1,59 @@ +from typing import Literal + +import pytest +from anndata import AnnData +from anndata.tests.helpers import assert_equal +from spatialdata.datasets import blobs +from spatialdata.models import TableModel + +from spatialdata_io import from_legacy_anndata, to_legacy_anndata + +from spatialdata. + + +def blobs_annotating_element(name: Literal["blobs_labels", "blobs_circles", "blobs_polygons", "blobs_multipolygons"]): + sdata = blobs() + n = len(sdata[name]) + new_table = AnnData(shape=(n, 0), obs={"region": [name for _ in range(n)], "instance_id": sdata[name].index}) + new_table = TableModel.parse(new_table, region=name, region_key="region", instance_key="instance_id") + del sdata.table + sdata.table = new_table + # TODO: call helper function to shuffle the order of the rows of the table and of the shapes + return sdata + +def test_invalid_coordinate_system(): + pass + # coordinate system not passed but multiple present + # coordinate system passed but multiple present, not matching +def test_invalid_annotations(): + pass + # table annotating labels + # table annotating multiple shapes + # table not annotating any shapes + # table annotating a shapes but with instance_key not matching + +# valid coordinate systems combinations: + # not passed but only one present + # passed and multiple present, matching with one of them +# valid shapes combinations: polygons, multipolygons, circles +# images: no images, one image, multiple images, negative translation and rotation +def test_bidectional_convesion(): + pass + # test idempotency + +def idempotency_check_to_anndata(sdata): + adata = to_legacy_anndata(sdata) + adata2 = to_legacy_anndata(from_legacy_anndata(adata)) + assert_equal(adata, adata2) + +def idempotency_check_from_anndata(adata): + sdata = from_legacy_anndata(adata) + sdata2 = from_legacy_anndata(to_legacy_anndata(sdata)) + + # assert_spatialdata_objects_seem_equal(sdata, sdata2) + +# new branch in spatialdata +# TODO: make assert spatialdata objects seem equal public +# TODO: make get_centroids public +# TODO: make a simple helper function in relational query +# TODO: create _util function transform_to_data_extent() in spatialdata From ec0abc89777459b4b94b424fa0e704cf58d96f0f Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Fri, 23 Feb 2024 02:40:29 +0100 Subject: [PATCH 07/25] wip --- .../converters/legacy_anndata.py | 135 ++++++++++-------- tests/converters/test_legacy_anndata.py | 53 +++---- 2 files changed, 93 insertions(+), 95 deletions(-) diff --git a/src/spatialdata_io/converters/legacy_anndata.py b/src/spatialdata_io/converters/legacy_anndata.py index 95672a3d..d01a353b 100644 --- a/src/spatialdata_io/converters/legacy_anndata.py +++ b/src/spatialdata_io/converters/legacy_anndata.py @@ -4,14 +4,16 @@ import numpy as np from anndata import AnnData -from dask.dataframe import DataFrame as DaskDataFrame -from geopandas import GeoDataFrame -from spatialdata import SpatialData, transform -from spatialdata.models import Image2DModel, ShapesModel, TableModel -from spatialdata.transformations import Identity, Scale, get_transformation +from multiscale_spatial_image import MultiscaleSpatialImage +from spatial_image import SpatialImage +from spatialdata import SpatialData, join_sdata_spatialelement_table +from spatialdata.models import Image2DModel, ShapesModel, TableModel, get_table_keys +from spatialdata.transformations import Identity, Scale -def to_legacy_anndata(sdata: SpatialData, coordinate_system: str | None = None) -> AnnData: +def to_legacy_anndata( + sdata: SpatialData, coordinate_system: str | None = None, table_name: str | None = None +) -> AnnData: """ Convert a SpatialData object to a (legacy) spatial AnnData object. @@ -23,8 +25,11 @@ def to_legacy_anndata(sdata: SpatialData, coordinate_system: str | None = None) sdata SpatialData object coordinate_system - The coordinate system to consider. The AnnData object will be populated with the data aligned to this coordinate - system. + The coordinate system to consider. The AnnData object will be populated with the data transformed to this + coordinate system. + table_name + The name of the table in the SpatialData object to consider. If None and the SpatialData object has only one + table, that table will be used. Returns ------- @@ -32,34 +37,48 @@ def to_legacy_anndata(sdata: SpatialData, coordinate_system: str | None = None) Notes ----- - Limitations and edge cases: + Edge cases and limitations: - - The Table can only annotate Shapes, not Labels. If labels are needed, please manually compute the centroids, - approximate radii from the Labels areas, add this as a Shapes element to the SpatialData object and make the - table annotate the Shapes element. - - The Table cannot annotate more than one Shapes element. If more than one Shapes element are present, please - merge them into a single Shapes element and make the table annotate the new Shapes element. - - Table rows not annotating geometries in the coordinate system will be dropped. Similarly, Shapes rows - not annotated by Table rows will be dropped. + - The Table can only annotate Shapes or Labels. + - Labels will be approximated to circles by using the centroids of each label and an average approximated + radius. + - The Table cannot annotate more than one Shapes element. If multiple Shapes elements are present, please merge + them into a single Shapes element and make the table annotate the new Shapes element. + - Table rows not annotating geometries in the coordinate system will be dropped. Similarly, Shapes rows (in the + case of Labels, the circle rows approximating the Labels) not annotated by Table rows will be dropped. How resolutions, coordinates, indices and Table rows will be affected: - The Images will be scaled (see later) and will not maintain the original resolution. - - The Shapes centroids will likely change to match to the image bounding boxes (see below) and coordinate - systems. - - The Table and Shapes rows will have the same indices but, as mentioned above, some rows may be dropped. + - The Shapes centroids will likely change to match the alignment in the specified coordinate system, and to the + images bounding boxes (see below). + - The Table and Shapes rows will have the same indices as before the conversion, which is useful for plugging + results back to the SpatialData object, but as mentioned above, some rows may be dropped. - The AnnData object does support only low-resolution images and limited coordinate transformations; eventual - transformations are applied before conversion. Precisely, this is how the Images are prepared for the AnnData - object: + The generated AnnData object will contain only low-resolution images and their origin will be reset to the pixel + (0, 0). In particular, the ImageContainer class used by Squidpy is not used. Eventual transformations are applied + before conversion. Precisely, this is how the Images are prepared for the AnnData object: - For each Image aligned to the specified coordinate system, the coordinate transformation to the coordinate system is applied; all the Images are rendered to a common target bounding box, which is the bounding box of the union of the Images aligned to the target coordinate system. - - Practically the above implies that if the Images have very dissimilar locations, most the rendered Images will - have empty spaces. + - The origins of the new images will match the pixel (0, 0) of the target bounding box. - Each Image is downscaled during rendering to fit a 2000x2000 pixels image (downscaled_hires) and a 600x600 pixels image (downscaled_lowres). + - Practically the above implies that if the Images have very dissimilar locations, most the rendered Images will + have empty spaces; in such cases it is recommended to drop or crop some images before the conversion. + + Matching of spatial coordinates and pixel coordinates: + + - The coordinates in `obsm['spatial']` will match the high-resolution (generated) images (i.e. coordinate (x, y) + will match with the pixel (y, x) in the image). + - The above matching is not perfect due to this bug: https://github.com/scverse/spatialdata/issues/165, which + will eventually be addressed. + + Final consideration: + + - Due to the legacy nature of the AnnData object generated, this function may not handle all the edge cases, + please report any issue you may find. """ # check that coordinate system is valid css = sdata.coordinate_systems @@ -72,46 +91,44 @@ def to_legacy_anndata(sdata: SpatialData, coordinate_system: str | None = None) ), f"The SpatialData object does not have the coordinate system {coordinate_system}." sdata = sdata.filter_by_coordinate_system(coordinate_system) - def _get_region(sdata: SpatialData) -> list[str]: - region = sdata.table.uns[TableModel.ATTRS_KEY]["region"] - if not isinstance(region, list): - region = [region] - return region + if table_name is None: + assert ( + len(sdata.tables) == 1 + ), "When the table_name is not specified, the SpatialData object must have exactly one table." + table_name = next(iter(sdata.tables)) + else: + assert table_name in sdata.tables, f"The table {table_name} is not present in the SpatialData object." + + table = sdata[table_name] + ( + region, + _, + instance_key, + ) = get_table_keys(table) + if not isinstance(region, list): + region = [region] # the table needs to annotate exactly one Shapes element - region = _get_region(sdata) if len(region) != 1: - raise ValueError(f"The table needs to annotate exactly one Shapes element. Found {len(region)}.") - if region[0] not in sdata.shapes: - raise ValueError(f"The table needs to annotate a Shapes element, not Labels or Points.") - shapes = sdata[region[0]] - - # matches the table and the shapes geometries - # useful code to be refactored into spatialdata/_core/query/relational_query.py - instance_key = sdata.table.uns[TableModel.ATTRS_KEY]["instance_key"] - table_instances = sdata.table.obs[instance_key].values - shapes_instances = shapes.index.values - common = np.intersect1d(table_instances, shapes_instances) - new_table = sdata.table[sdata.table.obs[instance_key].isin(common)] - new_shapes = shapes.loc[new_table.obs[instance_key].values] - t = get_transformation(new_shapes, to_coordinate_system=coordinate_system) - new_shapes = transform(new_shapes, t) + raise ValueError(f"The table needs to annotate exactly one element. Found {len(region)}.") + if region[0] not in sdata.shapes and region[0] not in sdata.labels: + raise ValueError(f"The table needs to annotate a Shapes or Labels element, not Points.") + element = sdata[region[0]] - # the table after the filtering must not be empty - assert len(new_table) > 0, "The table does not annotate any geometry in the Shapes element." - - # get the centroids - # useful function to move to spatialdata - def get_centroids(shapes: GeoDataFrame) -> DaskDataFrame: - if shapes.iloc[0].geometry.type in ["Polygon", "MultiPolygon"]: - return np.array((shapes.geometry.centroid.x, shapes.geometry.centroid.y)).T - elif shapes.iloc[0].geometry.type == "Point": - return np.array((shapes.geometry.x, shapes.geometry.y)).T - else: - raise ValueError(f"Unexpected geometry type: {shapes.iloc[0].geometry.type}") + # TODO: call get_centroids() here and convert polygons/multipolygons/labels to circles + + if isinstance(element, (SpatialImage, MultiscaleSpatialImage)): + table.obs[instance_key] = table.obs[instance_key].astype(element.dtype) + joined_elements, new_table = join_sdata_spatialelement_table( + sdata=sdata, spatial_element_name=region[0], table_name=table_name, how="inner", match_rows="left" + ) + joined_elements[region[0]] - # need to recompute because we don't support in-memory points yet - xy = get_centroids(new_shapes) + # the table after the filtering must not be empty + assert len(new_table) > 0, ( + "The table does not annotate any geometry in the Shapes element. This could also be caused by a mismatch " + "between the type of the indices of the geometries and the type of the INSTANCE_KEY column of the table." + ) # get the average radius; approximates polygons/multipolygons as circles if new_shapes.iloc[0] == "Point": diff --git a/tests/converters/test_legacy_anndata.py b/tests/converters/test_legacy_anndata.py index f0bb3629..57368bd5 100644 --- a/tests/converters/test_legacy_anndata.py +++ b/tests/converters/test_legacy_anndata.py @@ -1,50 +1,30 @@ from typing import Literal +import pytest from anndata import AnnData from anndata.tests.helpers import assert_equal +from spatialdata._core.query.relational_query import _get_unique_label_values_as_index +from spatialdata._utils import _assert_spatialdata_objects_seem_identical from spatialdata.datasets import blobs from spatialdata.models import TableModel from spatialdata_io import from_legacy_anndata, to_legacy_anndata -# from spatialdata. - def blobs_annotating_element(name: Literal["blobs_labels", "blobs_circles", "blobs_polygons", "blobs_multipolygons"]): sdata = blobs() - n = len(sdata[name]) - new_table = AnnData(shape=(n, 0), obs={"region": [name for _ in range(n)], "instance_id": sdata[name].index}) + if name == "blobs_labels": + instance_id = _get_unique_label_values_as_index(sdata[name]).tolist() + else: + instance_id = sdata[name].index.tolist() + n = len(instance_id) + new_table = AnnData(shape=(n, 0), obs={"region": [name for _ in range(n)], "instance_id": instance_id}) new_table = TableModel.parse(new_table, region=name, region_key="region", instance_key="instance_id") del sdata.table sdata.table = new_table - # TODO: call helper function to shuffle the order of the rows of the table and of the shapes return sdata -def test_invalid_coordinate_system(): - pass - # coordinate system not passed but multiple present - # coordinate system passed but multiple present, not matching - - -def test_invalid_annotations(): - pass - # table annotating labels - # table annotating multiple shapes - # table not annotating any shapes - # table annotating a shapes but with instance_key not matching - - -# valid coordinate systems combinations: -# not passed but only one present -# passed and multiple present, matching with one of them -# valid shapes combinations: polygons, multipolygons, circles -# images: no images, one image, multiple images, negative translation and rotation -def test_bidectional_convesion(): - pass - # test idempotency - - def idempotency_check_to_anndata(sdata): adata = to_legacy_anndata(sdata) adata2 = to_legacy_anndata(from_legacy_anndata(adata)) @@ -53,13 +33,14 @@ def idempotency_check_to_anndata(sdata): def idempotency_check_from_anndata(adata): sdata = from_legacy_anndata(adata) - from_legacy_anndata(to_legacy_anndata(sdata)) + sdata1 = from_legacy_anndata(to_legacy_anndata(sdata)) + _assert_spatialdata_objects_seem_identical(sdata, sdata1) - # assert_spatialdata_objects_seem_equal(sdata, sdata2) +@pytest.mark.parametrize("name", ["blobs_labels", "blobs_circles", "blobs_polygons", "blobs_multipolygons"]) +def test_bidectional_convesion(name): + sdata = blobs_annotating_element(name) + adata = to_legacy_anndata(sdata) -# new branch in spatialdata -# TODO: make assert spatialdata objects seem equal public -# TODO: make get_centroids public -# TODO: make a simple helper function in relational query -# TODO: create _util function transform_to_data_extent() in spatialdata + idempotency_check_to_anndata(sdata) + idempotency_check_from_anndata(adata) From 8268cf63925ec5fa344118208872efbc633e5c99 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Sun, 25 Feb 2024 23:08:05 +0100 Subject: [PATCH 08/25] done to_legacy_anndata() for shapes, labels and points; wip images --- .../converters/legacy_anndata.py | 173 +++++++++++++----- tests/converters/test_legacy_anndata.py | 9 +- 2 files changed, 135 insertions(+), 47 deletions(-) diff --git a/src/spatialdata_io/converters/legacy_anndata.py b/src/spatialdata_io/converters/legacy_anndata.py index d01a353b..fe5c2594 100644 --- a/src/spatialdata_io/converters/legacy_anndata.py +++ b/src/spatialdata_io/converters/legacy_anndata.py @@ -3,16 +3,34 @@ import warnings import numpy as np +import pandas as pd from anndata import AnnData from multiscale_spatial_image import MultiscaleSpatialImage +from shapely import MultiPolygon, Polygon from spatial_image import SpatialImage -from spatialdata import SpatialData, join_sdata_spatialelement_table -from spatialdata.models import Image2DModel, ShapesModel, TableModel, get_table_keys +from spatialdata import ( + SpatialData, + aggregate, + get_centroids, + get_extent, + join_sdata_spatialelement_table, +) +from spatialdata.models import ( + Image2DModel, + Image3DModel, + ShapesModel, + TableModel, + get_axes_names, + get_table_keys, +) from spatialdata.transformations import Identity, Scale def to_legacy_anndata( - sdata: SpatialData, coordinate_system: str | None = None, table_name: str | None = None + sdata: SpatialData, + coordinate_system: str | None = None, + table_name: str | None = None, + include_images: bool = False, ) -> AnnData: """ Convert a SpatialData object to a (legacy) spatial AnnData object. @@ -20,6 +38,9 @@ def to_legacy_anndata( This is useful for using packages expecting spatial information in AnnData, for example Scanpy and older versions of Squidpy. Using this format for any new package is not recommended. + This function by default ignores images (recommended); setting the `include_images` parameter to `True` will + include a downscaled version of the images in the output AnnData object. + Parameters ---------- sdata @@ -30,10 +51,13 @@ def to_legacy_anndata( table_name The name of the table in the SpatialData object to consider. If None and the SpatialData object has only one table, that table will be used. + include_images + If True, includes downscaled versions of the images in the output AnnData object. It is recommended to handle + the full resolution images separately and keep them in the SpatialData object. Returns ------- - The legacy spatial AnnData object + The legacy spatial AnnData object. Notes ----- @@ -49,15 +73,17 @@ def to_legacy_anndata( How resolutions, coordinates, indices and Table rows will be affected: - - The Images will be scaled (see later) and will not maintain the original resolution. + - When `include_images` is `True`, the Images will be scaled (see later) and will not maintain the original + resolution. - The Shapes centroids will likely change to match the alignment in the specified coordinate system, and to the - images bounding boxes (see below). + images bounding boxes when `include_images` is `True` (see below). - The Table and Shapes rows will have the same indices as before the conversion, which is useful for plugging results back to the SpatialData object, but as mentioned above, some rows may be dropped. - The generated AnnData object will contain only low-resolution images and their origin will be reset to the pixel - (0, 0). In particular, the ImageContainer class used by Squidpy is not used. Eventual transformations are applied - before conversion. Precisely, this is how the Images are prepared for the AnnData object: + When `inlcude_images` is `True`, the generated AnnData object will contain low-resolution images and their origin + will be reset to the pixel (0, 0). In particular, the ImageContainer class used by Squidpy is not used. Eventual + transformations are applied before conversion. Precisely, this is how the Images are prepared for the AnnData + object: - For each Image aligned to the specified coordinate system, the coordinate transformation to the coordinate system is applied; all the Images are rendered to a common target bounding box, which is the bounding box of @@ -70,8 +96,10 @@ def to_legacy_anndata( Matching of spatial coordinates and pixel coordinates: - - The coordinates in `obsm['spatial']` will match the high-resolution (generated) images (i.e. coordinate (x, y) - will match with the pixel (y, x) in the image). + - When `include_images` is `True`, the coordinates in `obsm['spatial']` will match the downscaled (generated) + image with suffix "*_downscaled_hires" (i.e. coordinate (x, y) will match with the pixel (y, x) in the image). + Please note that the suffix "_hires" is used for legacy reasons, but the image is actually downscaled and + fairly low resolution. - The above matching is not perfect due to this bug: https://github.com/scverse/spatialdata/issues/165, which will eventually be addressed. @@ -114,15 +142,60 @@ def to_legacy_anndata( if region[0] not in sdata.shapes and region[0] not in sdata.labels: raise ValueError(f"The table needs to annotate a Shapes or Labels element, not Points.") element = sdata[region[0]] + region_name = region[0] - # TODO: call get_centroids() here and convert polygons/multipolygons/labels to circles - + # convert polygons, multipolygons and labels to circles + obs = None if isinstance(element, (SpatialImage, MultiscaleSpatialImage)): - table.obs[instance_key] = table.obs[instance_key].astype(element.dtype) + # find the area of labels, estimate the radius from it; find the centroids + if isinstance(element, MultiscaleSpatialImage): + shape = element["scale0"].values().__iter__().__next__().shape + else: + shape = element.shape + + axes = get_axes_names(element) + if "z" in axes: + model = Image3DModel + else: + model = Image2DModel + ones = model.parse(np.ones((1,) + shape), dims=("c",) + axes) + aggregated = aggregate(values=ones, by=element, agg_func="sum").table + areas = aggregated.X.todense().A1.reshape(-1) + aobs = aggregated.obs + aobs["areas"] = areas + aobs["radius"] = np.sqrt(areas / np.pi) + + # get the centroids; remove the background if present (the background is not considered during aggregation) + centroids = get_centroids(element, coordinate_system=coordinate_system).compute() + if 0 in centroids.index: + centroids = centroids.drop(index=0) + centroids.index = centroids.index + aobs.index = aobs[instance_key] + assert len(aobs) == len(centroids) + obs = pd.merge(aobs, centroids, left_index=True, right_index=True, how="inner") + assert len(obs) == len(centroids) + elif isinstance(element.geometry.iloc[0], (Polygon, MultiPolygon)): + radius = np.sqrt(element.geometry.area / np.pi) + centroids = get_centroids(element, coordinate_system=coordinate_system).compute() + obs = pd.DataFrame({"radius": radius}) + obs = pd.merge(obs, centroids, left_index=True, right_index=True, how="inner") + if obs is not None: + spatial_axes = sorted(get_axes_names(element)) + centroids = obs[spatial_axes].values + shapes = ShapesModel.parse( + centroids, + geometry=0, + index=obs.index, + radius=obs["radius"].values, + transformations={coordinate_system: Identity()}, + ) + else: + shapes = sdata[region_name] + circles_sdata = SpatialData(tables={table_name: table}, shapes={region_name: shapes}) + joined_elements, new_table = join_sdata_spatialelement_table( - sdata=sdata, spatial_element_name=region[0], table_name=table_name, how="inner", match_rows="left" + sdata=circles_sdata, spatial_element_name=region_name, table_name=table_name, how="inner", match_rows="left" ) - joined_elements[region[0]] # the table after the filtering must not be empty assert len(new_table) > 0, ( @@ -130,34 +203,46 @@ def to_legacy_anndata( "between the type of the indices of the geometries and the type of the INSTANCE_KEY column of the table." ) - # get the average radius; approximates polygons/multipolygons as circles - if new_shapes.iloc[0] == "Point": - np.mean(new_shapes["radius"]) + sdata_pre_rasterize = SpatialData( + tables={table_name: new_table}, shapes={region_name: joined_elements[region_name]} + ) + + # process the images + if not include_images: + sdata_post_rasterize = sdata_pre_rasterize else: - np.mean(np.sqrt(new_shapes.geometry.area / np.pi)) - - adata = new_table.copy() - adata.obsm["spatial"] = xy - # # process the images - # sdata_images = sdata.subset(element_names=list(sdata.images.keys())) - # bb = get_extent(sdata_images) - - # adata = sdata.table.copy() - # for dataset_id in adata.uns["spatial"]: - # adata.uns["spatial"][dataset_id]["images"] = { - # "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]), - # "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]), - # } - # adata.uns["spatial"][dataset_id]["scalefactors"] = { - # "tissue_hires_scalef": get_transformation( - # sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires" - # ).scale[0], - # "tissue_lowres_scalef": get_transformation( - # sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres" - # ).scale[0], - # "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2, - # } - # + sdata_images = sdata.subset(element_names=list(sdata.images.keys())) + for image_name, image in sdata_images.images.items(): + sdata_pre_rasterize[image_name] = image + get_extent(sdata_images) + + # TODO: transform to data extent highres + # TODO: transform to data extent lowres + + for image_name, image in sdata_images.images.items(): + if "spatial" not in adata.uns: + adata.uns["spatial"] = {} + adata.uns["spatial"][image_name] = {} + adata.uns["spatial"][image_name]["images"] = {} + adata.uns["spatial"][image_name]["scalefactors"] = {} + + # for dataset_id in adata.uns["spatial"]: + # adata.uns["spatial"][dataset_id]["images"] = { + # "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]), + # "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]), + # } + # adata.uns["spatial"][dataset_id]["scalefactors"] = { + # "tissue_hires_scalef": get_transformation( + # sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires" + # ).scale[0], + # "tissue_lowres_scalef": get_transformation( + # sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres" + # ).scale[0], + # "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2, + # } + + adata = sdata_post_rasterize[table_name].copy() + adata.obsm["spatial"] = get_centroids(sdata_post_rasterize[region_name]).compute().values return adata @@ -282,6 +367,8 @@ def from_legacy_anndata(adata: AnnData) -> SpatialData: # link the shapes to the table new_table = adata.copy() + if TableModel.ATTRS_KEY in new_table.uns: + del new_table.uns[TableModel.ATTRS_KEY] new_table.obs[REGION_KEY] = REGION new_table.obs[REGION_KEY] = new_table.obs[REGION_KEY].astype("category") new_table.obs[INSTANCE_KEY] = shapes[REGION].index.values diff --git a/tests/converters/test_legacy_anndata.py b/tests/converters/test_legacy_anndata.py index 57368bd5..77d2324a 100644 --- a/tests/converters/test_legacy_anndata.py +++ b/tests/converters/test_legacy_anndata.py @@ -39,8 +39,9 @@ def idempotency_check_from_anndata(adata): @pytest.mark.parametrize("name", ["blobs_labels", "blobs_circles", "blobs_polygons", "blobs_multipolygons"]) def test_bidectional_convesion(name): - sdata = blobs_annotating_element(name) - adata = to_legacy_anndata(sdata) + sdata0 = blobs_annotating_element(name) + adata0 = to_legacy_anndata(sdata0) + sdata1 = from_legacy_anndata(adata0) - idempotency_check_to_anndata(sdata) - idempotency_check_from_anndata(adata) + idempotency_check_to_anndata(sdata1) + idempotency_check_from_anndata(adata0) From a6921f541c9a2571a11e892ae6847f9487fc7ec5 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Mon, 26 Feb 2024 01:14:01 +0100 Subject: [PATCH 09/25] fixed non-image tests, skipped image tests --- .../converters/legacy_anndata.py | 119 +++++++++++------- tests/converters/test_legacy_anndata.py | 61 ++++++--- 2 files changed, 120 insertions(+), 60 deletions(-) diff --git a/src/spatialdata_io/converters/legacy_anndata.py b/src/spatialdata_io/converters/legacy_anndata.py index fe5c2594..6370d1c3 100644 --- a/src/spatialdata_io/converters/legacy_anndata.py +++ b/src/spatialdata_io/converters/legacy_anndata.py @@ -15,6 +15,7 @@ get_extent, join_sdata_spatialelement_table, ) +from spatialdata._core.operations._utils import transform_to_data_extent from spatialdata.models import ( Image2DModel, Image3DModel, @@ -80,7 +81,7 @@ def to_legacy_anndata( - The Table and Shapes rows will have the same indices as before the conversion, which is useful for plugging results back to the SpatialData object, but as mentioned above, some rows may be dropped. - When `inlcude_images` is `True`, the generated AnnData object will contain low-resolution images and their origin + When `include_images` is `True`, the generated AnnData object will contain low-resolution images and their origin will be reset to the pixel (0, 0). In particular, the ImageContainer class used by Squidpy is not used. Eventual transformations are applied before conversion. Precisely, this is how the Images are prepared for the AnnData object: @@ -89,25 +90,34 @@ def to_legacy_anndata( system is applied; all the Images are rendered to a common target bounding box, which is the bounding box of the union of the Images aligned to the target coordinate system. - The origins of the new images will match the pixel (0, 0) of the target bounding box. - - Each Image is downscaled during rendering to fit a 2000x2000 pixels image (downscaled_hires) and a 600x600 - pixels image (downscaled_lowres). + - Each Image is downscaled (or upscaled) during rendering to fit a 2000x2000 pixels image (downscaled_hires) and + a 600x600 pixels image (downscaled_lowres). - Practically the above implies that if the Images have very dissimilar locations, most the rendered Images will have empty spaces; in such cases it is recommended to drop or crop some images before the conversion. Matching of spatial coordinates and pixel coordinates: - When `include_images` is `True`, the coordinates in `obsm['spatial']` will match the downscaled (generated) - image with suffix "*_downscaled_hires" (i.e. coordinate (x, y) will match with the pixel (y, x) in the image). - Please note that the suffix "_hires" is used for legacy reasons, but the image is actually downscaled and - fairly low resolution. + image "hires" (i.e. coordinate (x, y) will match with the pixel (y, x) in the image). Please note that the + "hires" naming is used for legacy reasons, but the image is actually downscaled and fairly low resolution. - The above matching is not perfect due to this bug: https://github.com/scverse/spatialdata/issues/165, which will eventually be addressed. + Imperfect downscaling: + + - Due to the bug mentioned above, https://github.com/scverse/spatialdata/issues/165, when the downscaling factor + is large, the error between the original and the downscale image can be significant. This is another reason to + not recommend using include_images=True. Note, however, that the error can be large only between the original + and the downscaled images, not between the downscaled images and the new spatial coordinates. Thus the use + may be acceptable for some use cases. + Final consideration: - Due to the legacy nature of the AnnData object generated, this function may not handle all the edge cases, please report any issue you may find. """ + DOWNSCALED_HIRES_LENGTH = 2000 + DOWNSCALED_LOWRES_LENGTH = 600 # check that coordinate system is valid css = sdata.coordinate_systems if coordinate_system is None: @@ -140,7 +150,7 @@ def to_legacy_anndata( if len(region) != 1: raise ValueError(f"The table needs to annotate exactly one element. Found {len(region)}.") if region[0] not in sdata.shapes and region[0] not in sdata.labels: - raise ValueError(f"The table needs to annotate a Shapes or Labels element, not Points.") + raise ValueError("The table needs to annotate a Shapes or Labels element, not Points.") element = sdata[region[0]] region_name = region[0] @@ -171,6 +181,7 @@ def to_legacy_anndata( centroids = centroids.drop(index=0) centroids.index = centroids.index aobs.index = aobs[instance_key] + aobs.index.name = None assert len(aobs) == len(centroids) obs = pd.merge(aobs, centroids, left_index=True, right_index=True, how="inner") assert len(obs) == len(centroids) @@ -208,40 +219,56 @@ def to_legacy_anndata( ) # process the images + downscaled_images_hires = {} + downscaled_images_lowres = {} + sdata_post_rasterize = None if not include_images: sdata_post_rasterize = sdata_pre_rasterize else: sdata_images = sdata.subset(element_names=list(sdata.images.keys())) for image_name, image in sdata_images.images.items(): sdata_pre_rasterize[image_name] = image - get_extent(sdata_images) + bb = get_extent(sdata_images) # TODO: transform to data extent highres # TODO: transform to data extent lowres + parameters_dict = {"x": "target_width", "y": "target_height", "z": "target_depth"} + longest_side = max(bb, key=bb.get) + parameter_hires = {parameters_dict[longest_side]: DOWNSCALED_HIRES_LENGTH} + parameter_lowres = {parameters_dict[longest_side]: DOWNSCALED_LOWRES_LENGTH} + downscaled_hires = transform_to_data_extent( + sdata_pre_rasterize, coordinate_system=coordinate_system, maintain_positioning=False, **parameter_hires + ) + downscaled_lowres = transform_to_data_extent( + sdata_pre_rasterize, coordinate_system=coordinate_system, maintain_positioning=False, **parameter_lowres + ) + for image_name in sdata_images.images.keys(): + downscaled_images_hires[image_name] = downscaled_hires[image_name] + downscaled_images_lowres[image_name] = downscaled_lowres[image_name] - for image_name, image in sdata_images.images.items(): + if sdata_post_rasterize is None: + sdata_post_rasterize = downscaled_hires + + adata = sdata_post_rasterize[table_name].copy() + + if len(downscaled_images_hires) > 0: + for image_name in sdata_images.images.keys(): if "spatial" not in adata.uns: adata.uns["spatial"] = {} adata.uns["spatial"][image_name] = {} - adata.uns["spatial"][image_name]["images"] = {} - adata.uns["spatial"][image_name]["scalefactors"] = {} - - # for dataset_id in adata.uns["spatial"]: - # adata.uns["spatial"][dataset_id]["images"] = { - # "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]), - # "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]), - # } - # adata.uns["spatial"][dataset_id]["scalefactors"] = { - # "tissue_hires_scalef": get_transformation( - # sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires" - # ).scale[0], - # "tissue_lowres_scalef": get_transformation( - # sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres" - # ).scale[0], - # "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2, - # } + adata.uns["spatial"][image_name]["images"] = { + "hires": downscaled_images_hires[image_name].data.compute().transpose([1, 2, 0]), + "lowres": downscaled_images_lowres[image_name].data.compute().transpose([1, 2, 0]), + } + try: + adata.uns["spatial"][image_name]["scalefactors"] = { + "tissue_hires_scalef": 1.0, + "tissue_lowres_scalef": DOWNSCALED_LOWRES_LENGTH / DOWNSCALED_HIRES_LENGTH, + "spot_diameter_fullres": sdata_post_rasterize.shapes[region_name]["radius"].iloc[0] * 2, + } + except KeyError: + pass - adata = sdata_post_rasterize[table_name].copy() adata.obsm["spatial"] = get_centroids(sdata_post_rasterize[region_name]).compute().values return adata @@ -264,19 +291,19 @@ def from_legacy_anndata(adata: AnnData) -> SpatialData: Notes ----- - The SpatialData object will have one hires and one lores image for each dataset in the AnnData object. + The SpatialData object will have one hires and one lowres image for each dataset in the AnnData object. """ # AnnData keys SPATIAL = "spatial" SCALEFACTORS = "scalefactors" TISSUE_HIRES_SCALEF = "tissue_hires_scalef" - TISSUE_LORES_SCALEF = "tissue_lowres_scalef" + TISSUE_LOWRES_SCALEF = "tissue_lowres_scalef" SPOT_DIAMETER_FULLRES = "spot_diameter_fullres" IMAGES = "images" HIRES = "hires" - LORES = "lowres" + LOWRES = "lowres" # SpatialData keys REGION = "locations" @@ -295,21 +322,21 @@ def from_legacy_anndata(adata: AnnData) -> SpatialData: # read the image data and the scale factors for the shapes keys = set(adata.uns[SPATIAL][dataset_id].keys()) tissue_hires_scalef = None - tissue_lores_scalef = None + tissue_lowres_scalef = None if SCALEFACTORS in keys: scalefactors = adata.uns[SPATIAL][dataset_id][SCALEFACTORS] if TISSUE_HIRES_SCALEF in scalefactors: tissue_hires_scalef = scalefactors[TISSUE_HIRES_SCALEF] - if TISSUE_LORES_SCALEF in scalefactors: - tissue_lores_scalef = scalefactors[TISSUE_LORES_SCALEF] + if TISSUE_LOWRES_SCALEF in scalefactors: + tissue_lowres_scalef = scalefactors[TISSUE_LOWRES_SCALEF] if SPOT_DIAMETER_FULLRES in scalefactors: spot_diameter_fullres_list.append(scalefactors[SPOT_DIAMETER_FULLRES]) if IMAGES in keys: image_data = adata.uns[SPATIAL][dataset_id][IMAGES] if HIRES in image_data: hires = image_data[HIRES] - if LORES in image_data: - lores = image_data[LORES] + if LOWRES in image_data: + lowres = image_data[LOWRES] # construct the spatialdata elements if hires is not None: @@ -325,19 +352,19 @@ def from_legacy_anndata(adata: AnnData) -> SpatialData: # prepare the transformation to the hires image for the shapes scale_hires = Scale([tissue_hires_scalef, tissue_hires_scalef], axes=("x", "y")) shapes_transformations[f"{dataset_id}_downscaled_hires"] = scale_hires - if lores is not None: - # prepare the lores image + if lowres is not None: + # prepare the lowres image assert ( - tissue_lores_scalef is not None - ), "tissue_lores_scalef is required when an the lores image is present" - lores_image = Image2DModel.parse( - lores, dims=("y", "x", "c"), transformations={f"{dataset_id}_downscaled_lowres": Identity()} + tissue_lowres_scalef is not None + ), "tissue_lowres_scalef is required when an the lowres image is present" + lowres_image = Image2DModel.parse( + lowres, dims=("y", "x", "c"), transformations={f"{dataset_id}_downscaled_lowres": Identity()} ) - images[f"{dataset_id}_lowres_image"] = lores_image + images[f"{dataset_id}_lowres_image"] = lowres_image - # prepare the transformation to the lores image for the shapes - scale_lores = Scale([tissue_lores_scalef, tissue_lores_scalef], axes=("x", "y")) - shapes_transformations[f"{dataset_id}_downscaled_lowres"] = scale_lores + # prepare the transformation to the lowres image for the shapes + scale_lowres = Scale([tissue_lowres_scalef, tissue_lowres_scalef], axes=("x", "y")) + shapes_transformations[f"{dataset_id}_downscaled_lowres"] = scale_lowres # validate the spot_diameter_fullres value if len(spot_diameter_fullres_list) > 0: @@ -375,7 +402,7 @@ def from_legacy_anndata(adata: AnnData) -> SpatialData: new_table = TableModel.parse(new_table, region=REGION, region_key=REGION_KEY, instance_key=INSTANCE_KEY) else: new_table = adata.copy() - # workaround for https://github.com/scverse/spatialdata/issues/306, not anymore needed as soona ss the + # workaround for https://github.com/scverse/spatialdata/issues/306, not anymore needed as soon as the # multi_table branch is merged new_table.obs[REGION_KEY] = "dummy" new_table.obs[REGION_KEY] = new_table.obs[REGION_KEY].astype("category") diff --git a/tests/converters/test_legacy_anndata.py b/tests/converters/test_legacy_anndata.py index 77d2324a..6a1754c9 100644 --- a/tests/converters/test_legacy_anndata.py +++ b/tests/converters/test_legacy_anndata.py @@ -3,16 +3,20 @@ import pytest from anndata import AnnData from anndata.tests.helpers import assert_equal +from spatialdata import SpatialData from spatialdata._core.query.relational_query import _get_unique_label_values_as_index from spatialdata._utils import _assert_spatialdata_objects_seem_identical from spatialdata.datasets import blobs from spatialdata.models import TableModel +from spatialdata.transformations import get_transformation, set_transformation from spatialdata_io import from_legacy_anndata, to_legacy_anndata +BlobsTypes = Literal["blobs_labels", "blobs_circles", "blobs_polygons", "blobs_multipolygons"] -def blobs_annotating_element(name: Literal["blobs_labels", "blobs_circles", "blobs_polygons", "blobs_multipolygons"]): - sdata = blobs() + +def blobs_annotating_element(name: BlobsTypes) -> SpatialData: + sdata = blobs(length=50) if name == "blobs_labels": instance_id = _get_unique_label_values_as_index(sdata[name]).tolist() else: @@ -25,23 +29,52 @@ def blobs_annotating_element(name: Literal["blobs_labels", "blobs_circles", "blo return sdata -def idempotency_check_to_anndata(sdata): - adata = to_legacy_anndata(sdata) - adata2 = to_legacy_anndata(from_legacy_anndata(adata)) - assert_equal(adata, adata2) +def _adjust_table_for_idempotency_check(sdata: SpatialData, include_images: bool) -> SpatialData: + """We need to adjust the coordinate systems from from_legacy_anndata() to perform the idempotency check.""" + if not include_images: + # nothing needs to be adjusted + return sdata + CS_NAME = "blobs_multiscale_image_downscaled_hires" + sdata = sdata.filter_by_coordinate_system(CS_NAME) + sdata.rename_coordinate_systems({CS_NAME: "global"}) + t = get_transformation(sdata["locations"], "global") + set_transformation(sdata["locations"], {"global": t}, set_all=True) + IMG_NAME = "blobs_multiscale_image_hires_image" + im = sdata.images[IMG_NAME] + del sdata.images[IMG_NAME] + sdata.images["blobs_multiscale_image"] = im + return sdata + + +def idempotency_check_to_anndata(sdata0: SpatialData, include_images: bool) -> None: + adata0 = to_legacy_anndata(sdata0, include_images=include_images) + sdata1 = from_legacy_anndata(adata0) + sdata1 = _adjust_table_for_idempotency_check(sdata1, include_images=include_images) + adata2 = to_legacy_anndata(sdata1, include_images=include_images) + assert_equal(adata0, adata2) -def idempotency_check_from_anndata(adata): - sdata = from_legacy_anndata(adata) - sdata1 = from_legacy_anndata(to_legacy_anndata(sdata)) - _assert_spatialdata_objects_seem_identical(sdata, sdata1) +def idempotency_check_from_anndata(adata0: SpatialData, include_images: bool) -> None: + sdata0 = from_legacy_anndata(adata0) + sdata0 = _adjust_table_for_idempotency_check(sdata0, include_images=include_images) + adata1 = to_legacy_anndata(sdata0, include_images=include_images) + sdata1 = from_legacy_anndata(adata1) + sdata1 = _adjust_table_for_idempotency_check(sdata1, include_images=include_images) + _assert_spatialdata_objects_seem_identical(sdata0, sdata1) @pytest.mark.parametrize("name", ["blobs_labels", "blobs_circles", "blobs_polygons", "blobs_multipolygons"]) -def test_bidectional_convesion(name): +@pytest.mark.parametrize("include_images", [False, True]) +def test_bidectional_convesion(name: BlobsTypes, include_images: bool) -> None: + if include_images: + pytest.skip( + "include_images=True can't be tested because the bug " + "https://github.com/scverse/spatialdata/issues/165 causes a large error" + ) sdata0 = blobs_annotating_element(name) - adata0 = to_legacy_anndata(sdata0) + adata0 = to_legacy_anndata(sdata0, include_images=include_images) sdata1 = from_legacy_anndata(adata0) + sdata1 = _adjust_table_for_idempotency_check(sdata1, include_images=include_images) - idempotency_check_to_anndata(sdata1) - idempotency_check_from_anndata(adata0) + idempotency_check_to_anndata(sdata1, include_images=include_images) + idempotency_check_from_anndata(adata0, include_images=include_images) From 307c923dedb15b911653a03e7658beedc8a530ee Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Fri, 8 Mar 2024 16:11:49 +0100 Subject: [PATCH 10/25] fix --- tests/converters/test_legacy_anndata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/converters/test_legacy_anndata.py b/tests/converters/test_legacy_anndata.py index 6a1754c9..313ace48 100644 --- a/tests/converters/test_legacy_anndata.py +++ b/tests/converters/test_legacy_anndata.py @@ -5,9 +5,9 @@ from anndata.tests.helpers import assert_equal from spatialdata import SpatialData from spatialdata._core.query.relational_query import _get_unique_label_values_as_index -from spatialdata._utils import _assert_spatialdata_objects_seem_identical from spatialdata.datasets import blobs from spatialdata.models import TableModel +from spatialdata.testing import assert_spatial_data_objects_are_identical from spatialdata.transformations import get_transformation, set_transformation from spatialdata_io import from_legacy_anndata, to_legacy_anndata @@ -60,7 +60,7 @@ def idempotency_check_from_anndata(adata0: SpatialData, include_images: bool) -> adata1 = to_legacy_anndata(sdata0, include_images=include_images) sdata1 = from_legacy_anndata(adata1) sdata1 = _adjust_table_for_idempotency_check(sdata1, include_images=include_images) - _assert_spatialdata_objects_seem_identical(sdata0, sdata1) + assert_spatial_data_objects_are_identical(sdata0, sdata1) @pytest.mark.parametrize("name", ["blobs_labels", "blobs_circles", "blobs_polygons", "blobs_multipolygons"]) From 5f9d905eb302319d6457ce8d5c7f6912292eeacc Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Mon, 18 Mar 2024 19:16:43 +0100 Subject: [PATCH 11/25] fix coordinate system --- .../converters/legacy_anndata.py | 124 +++++++++--------- 1 file changed, 59 insertions(+), 65 deletions(-) diff --git a/src/spatialdata_io/converters/legacy_anndata.py b/src/spatialdata_io/converters/legacy_anndata.py index 6370d1c3..da47b5e1 100644 --- a/src/spatialdata_io/converters/legacy_anndata.py +++ b/src/spatialdata_io/converters/legacy_anndata.py @@ -3,27 +3,16 @@ import warnings import numpy as np -import pandas as pd from anndata import AnnData -from multiscale_spatial_image import MultiscaleSpatialImage -from shapely import MultiPolygon, Polygon -from spatial_image import SpatialImage from spatialdata import ( SpatialData, - aggregate, get_centroids, get_extent, join_sdata_spatialelement_table, + to_circles, ) from spatialdata._core.operations._utils import transform_to_data_extent -from spatialdata.models import ( - Image2DModel, - Image3DModel, - ShapesModel, - TableModel, - get_axes_names, - get_table_keys, -) +from spatialdata.models import Image2DModel, ShapesModel, TableModel, get_table_keys from spatialdata.transformations import Identity, Scale @@ -130,9 +119,11 @@ def to_legacy_anndata( sdata = sdata.filter_by_coordinate_system(coordinate_system) if table_name is None: - assert ( - len(sdata.tables) == 1 - ), "When the table_name is not specified, the SpatialData object must have exactly one table." + assert len(sdata.tables) == 1, ( + "When the table_name is not specified, the SpatialData object must have exactly one table. The specified " + f"sdata object, after being filtered for the coordinate system {coordinate_system}, has " + f"{len(sdata.tables)} tables." + ) table_name = next(iter(sdata.tables)) else: assert table_name in sdata.tables, f"The table {table_name} is not present in the SpatialData object." @@ -154,54 +145,55 @@ def to_legacy_anndata( element = sdata[region[0]] region_name = region[0] - # convert polygons, multipolygons and labels to circles - obs = None - if isinstance(element, (SpatialImage, MultiscaleSpatialImage)): - # find the area of labels, estimate the radius from it; find the centroids - if isinstance(element, MultiscaleSpatialImage): - shape = element["scale0"].values().__iter__().__next__().shape - else: - shape = element.shape - - axes = get_axes_names(element) - if "z" in axes: - model = Image3DModel - else: - model = Image2DModel - ones = model.parse(np.ones((1,) + shape), dims=("c",) + axes) - aggregated = aggregate(values=ones, by=element, agg_func="sum").table - areas = aggregated.X.todense().A1.reshape(-1) - aobs = aggregated.obs - aobs["areas"] = areas - aobs["radius"] = np.sqrt(areas / np.pi) - - # get the centroids; remove the background if present (the background is not considered during aggregation) - centroids = get_centroids(element, coordinate_system=coordinate_system).compute() - if 0 in centroids.index: - centroids = centroids.drop(index=0) - centroids.index = centroids.index - aobs.index = aobs[instance_key] - aobs.index.name = None - assert len(aobs) == len(centroids) - obs = pd.merge(aobs, centroids, left_index=True, right_index=True, how="inner") - assert len(obs) == len(centroids) - elif isinstance(element.geometry.iloc[0], (Polygon, MultiPolygon)): - radius = np.sqrt(element.geometry.area / np.pi) - centroids = get_centroids(element, coordinate_system=coordinate_system).compute() - obs = pd.DataFrame({"radius": radius}) - obs = pd.merge(obs, centroids, left_index=True, right_index=True, how="inner") - if obs is not None: - spatial_axes = sorted(get_axes_names(element)) - centroids = obs[spatial_axes].values - shapes = ShapesModel.parse( - centroids, - geometry=0, - index=obs.index, - radius=obs["radius"].values, - transformations={coordinate_system: Identity()}, - ) - else: - shapes = sdata[region_name] + # # convert polygons, multipolygons and labels to circles + shapes = to_circles(element) + # obs = None + # if isinstance(element, (SpatialImage, MultiscaleSpatialImage)): + # # find the area of labels, estimate the radius from it; find the centroids + # if isinstance(element, MultiscaleSpatialImage): + # shape = element["scale0"].values().__iter__().__next__().shape + # else: + # shape = element.shape + # + # axes = get_axes_names(element) + # if "z" in axes: + # model = Image3DModel + # else: + # model = Image2DModel + # ones = model.parse(np.ones((1,) + shape), dims=("c",) + axes) + # aggregated = aggregate(values=ones, by=element, agg_func="sum").table + # areas = aggregated.X.todense().A1.reshape(-1) + # aobs = aggregated.obs + # aobs["areas"] = areas + # aobs["radius"] = np.sqrt(areas / np.pi) + # + # # get the centroids; remove the background if present (the background is not considered during aggregation) + # centroids = get_centroids(element, coordinate_system=coordinate_system).compute() + # if 0 in centroids.index: + # centroids = centroids.drop(index=0) + # centroids.index = centroids.index + # aobs.index = aobs[instance_key] + # aobs.index.name = None + # assert len(aobs) == len(centroids) + # obs = pd.merge(aobs, centroids, left_index=True, right_index=True, how="inner") + # assert len(obs) == len(centroids) + # elif isinstance(element.geometry.iloc[0], (Polygon, MultiPolygon)): + # radius = np.sqrt(element.geometry.area / np.pi) + # centroids = get_centroids(element, coordinate_system=coordinate_system).compute() + # obs = pd.DataFrame({"radius": radius}) + # obs = pd.merge(obs, centroids, left_index=True, right_index=True, how="inner") + # if obs is not None: + # spatial_axes = sorted(get_axes_names(element)) + # centroids = obs[spatial_axes].values + # shapes = ShapesModel.parse( + # centroids, + # geometry=0, + # index=obs.index, + # radius=obs["radius"].values, + # transformations={coordinate_system: Identity()}, + # ) + # else: + # shapes = sdata[region_name] circles_sdata = SpatialData(tables={table_name: table}, shapes={region_name: shapes}) joined_elements, new_table = join_sdata_spatialelement_table( @@ -269,7 +261,9 @@ def to_legacy_anndata( except KeyError: pass - adata.obsm["spatial"] = get_centroids(sdata_post_rasterize[region_name]).compute().values + adata.obsm["spatial"] = ( + get_centroids(sdata_post_rasterize[region_name], coordinate_system=coordinate_system).compute().values + ) return adata From 849b17fbe440356c7a6b7340d85e08cb881cc5eb Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Mon, 18 Mar 2024 19:28:11 +0100 Subject: [PATCH 12/25] created experimental submodule --- src/spatialdata_io/__init__.py | 6 ------ src/spatialdata_io/experimental/__init__.py | 6 ++++++ tests/converters/test_legacy_anndata.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) create mode 100644 src/spatialdata_io/experimental/__init__.py diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index b3d325ad..e4eb41ae 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -1,9 +1,5 @@ from importlib.metadata import version -from spatialdata_io.converters.legacy_anndata import ( - from_legacy_anndata, - to_legacy_anndata, -) from spatialdata_io.readers.codex import codex from spatialdata_io.readers.cosmx import cosmx from spatialdata_io.readers.curio import curio @@ -30,8 +26,6 @@ "xenium_aligned_image", "xenium_explorer_selection", "dbit", - "from_legacy_anndata", - "to_legacy_anndata", ] __version__ = version("spatialdata-io") diff --git a/src/spatialdata_io/experimental/__init__.py b/src/spatialdata_io/experimental/__init__.py new file mode 100644 index 00000000..4f87b75b --- /dev/null +++ b/src/spatialdata_io/experimental/__init__.py @@ -0,0 +1,6 @@ +from spatialdata_io.converters.legacy_anndata import ( + from_legacy_anndata, + to_legacy_anndata, +) + +__all__ = ["from_legacy_anndata", "to_legacy_anndata"] diff --git a/tests/converters/test_legacy_anndata.py b/tests/converters/test_legacy_anndata.py index 313ace48..8cebad8a 100644 --- a/tests/converters/test_legacy_anndata.py +++ b/tests/converters/test_legacy_anndata.py @@ -10,7 +10,7 @@ from spatialdata.testing import assert_spatial_data_objects_are_identical from spatialdata.transformations import get_transformation, set_transformation -from spatialdata_io import from_legacy_anndata, to_legacy_anndata +from spatialdata_io.experimental import from_legacy_anndata, to_legacy_anndata BlobsTypes = Literal["blobs_labels", "blobs_circles", "blobs_polygons", "blobs_multipolygons"] From 8a1430fc3b075d615c3ce546df1339199784a4ac Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Mon, 18 Mar 2024 19:34:46 +0100 Subject: [PATCH 13/25] fix docs --- docs/api.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/api.md b/docs/api.md index 5454098d..280df0cb 100644 --- a/docs/api.md +++ b/docs/api.md @@ -25,6 +25,8 @@ I/O for the `spatialdata` project. dbit ``` +### Conversion functions + ### Utility functions ```{eval-rst} @@ -35,9 +37,13 @@ I/O for the `spatialdata` project. xenium_explorer_selection ``` -### Conversion functions +### Experimental readers + +### Experimental conversion functions ```{eval-rst} +.. currentmodule:: spatialdata_io.experimental + .. autosummary:: :toctree: generated From e8c8828a1b21760beb591e84118c8d1f32abaf14 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Mon, 18 Mar 2024 19:41:32 +0100 Subject: [PATCH 14/25] attempt fix tests --- tests/__init__.py | 0 tests/converters/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/converters/__init__.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/converters/__init__.py b/tests/converters/__init__.py new file mode 100644 index 00000000..e69de29b From 41313ac8f03aeb692dba6167dba37db88cc2f8be Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Mon, 18 Mar 2024 19:54:59 +0100 Subject: [PATCH 15/25] removed old comments --- .../converters/legacy_anndata.py | 51 +------------------ 1 file changed, 1 insertion(+), 50 deletions(-) diff --git a/src/spatialdata_io/converters/legacy_anndata.py b/src/spatialdata_io/converters/legacy_anndata.py index da47b5e1..5025e92f 100644 --- a/src/spatialdata_io/converters/legacy_anndata.py +++ b/src/spatialdata_io/converters/legacy_anndata.py @@ -145,55 +145,8 @@ def to_legacy_anndata( element = sdata[region[0]] region_name = region[0] - # # convert polygons, multipolygons and labels to circles + # convert polygons, multipolygons and labels to circles shapes = to_circles(element) - # obs = None - # if isinstance(element, (SpatialImage, MultiscaleSpatialImage)): - # # find the area of labels, estimate the radius from it; find the centroids - # if isinstance(element, MultiscaleSpatialImage): - # shape = element["scale0"].values().__iter__().__next__().shape - # else: - # shape = element.shape - # - # axes = get_axes_names(element) - # if "z" in axes: - # model = Image3DModel - # else: - # model = Image2DModel - # ones = model.parse(np.ones((1,) + shape), dims=("c",) + axes) - # aggregated = aggregate(values=ones, by=element, agg_func="sum").table - # areas = aggregated.X.todense().A1.reshape(-1) - # aobs = aggregated.obs - # aobs["areas"] = areas - # aobs["radius"] = np.sqrt(areas / np.pi) - # - # # get the centroids; remove the background if present (the background is not considered during aggregation) - # centroids = get_centroids(element, coordinate_system=coordinate_system).compute() - # if 0 in centroids.index: - # centroids = centroids.drop(index=0) - # centroids.index = centroids.index - # aobs.index = aobs[instance_key] - # aobs.index.name = None - # assert len(aobs) == len(centroids) - # obs = pd.merge(aobs, centroids, left_index=True, right_index=True, how="inner") - # assert len(obs) == len(centroids) - # elif isinstance(element.geometry.iloc[0], (Polygon, MultiPolygon)): - # radius = np.sqrt(element.geometry.area / np.pi) - # centroids = get_centroids(element, coordinate_system=coordinate_system).compute() - # obs = pd.DataFrame({"radius": radius}) - # obs = pd.merge(obs, centroids, left_index=True, right_index=True, how="inner") - # if obs is not None: - # spatial_axes = sorted(get_axes_names(element)) - # centroids = obs[spatial_axes].values - # shapes = ShapesModel.parse( - # centroids, - # geometry=0, - # index=obs.index, - # radius=obs["radius"].values, - # transformations={coordinate_system: Identity()}, - # ) - # else: - # shapes = sdata[region_name] circles_sdata = SpatialData(tables={table_name: table}, shapes={region_name: shapes}) joined_elements, new_table = join_sdata_spatialelement_table( @@ -222,8 +175,6 @@ def to_legacy_anndata( sdata_pre_rasterize[image_name] = image bb = get_extent(sdata_images) - # TODO: transform to data extent highres - # TODO: transform to data extent lowres parameters_dict = {"x": "target_width", "y": "target_height", "z": "target_depth"} longest_side = max(bb, key=bb.get) parameter_hires = {parameters_dict[longest_side]: DOWNSCALED_HIRES_LENGTH} From ddc0cdd5b738ace7b32ad610125bb99282011dab Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Mon, 18 Mar 2024 19:56:23 +0100 Subject: [PATCH 16/25] removed old workaround --- src/spatialdata_io/converters/legacy_anndata.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/spatialdata_io/converters/legacy_anndata.py b/src/spatialdata_io/converters/legacy_anndata.py index 5025e92f..17406a9a 100644 --- a/src/spatialdata_io/converters/legacy_anndata.py +++ b/src/spatialdata_io/converters/legacy_anndata.py @@ -347,11 +347,4 @@ def from_legacy_anndata(adata: AnnData) -> SpatialData: new_table = TableModel.parse(new_table, region=REGION, region_key=REGION_KEY, instance_key=INSTANCE_KEY) else: new_table = adata.copy() - # workaround for https://github.com/scverse/spatialdata/issues/306, not anymore needed as soon as the - # multi_table branch is merged - new_table.obs[REGION_KEY] = "dummy" - new_table.obs[REGION_KEY] = new_table.obs[REGION_KEY].astype("category") - new_table.obs[INSTANCE_KEY] = np.arange(len(new_table)) - - new_table = TableModel.parse(new_table, region="dummy", region_key=REGION_KEY, instance_key=INSTANCE_KEY) return SpatialData(table=new_table, images=images, shapes=shapes) From 51c738d06498f31503c47eb33f3519021ac612df Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Wed, 20 Mar 2024 03:23:45 +0100 Subject: [PATCH 17/25] attempt fix docs --- docs/conf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/conf.py b/docs/conf.py index 6fdc257c..d8cc098e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -9,6 +9,9 @@ from datetime import datetime from importlib.metadata import metadata from pathlib import Path +import spatialdata_io.experimental + +_ = spatialdata_io.experimental HERE = Path(__file__).parent sys.path.insert(0, str(HERE / "extensions")) From 27fbd6b76cffc59bae71898272b750f09292c6b1 Mon Sep 17 00:00:00 2001 From: LucaMarconato <2664412+LucaMarconato@users.noreply.github.com> Date: Wed, 20 Mar 2024 03:37:34 +0100 Subject: [PATCH 18/25] Update test_and_deploy.yaml Testing against pre releases --- .github/workflows/test_and_deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_and_deploy.yaml b/.github/workflows/test_and_deploy.yaml index bf07a32b..41aeda25 100644 --- a/.github/workflows/test_and_deploy.yaml +++ b/.github/workflows/test_and_deploy.yaml @@ -45,7 +45,7 @@ jobs: pip-${{ runner.os }}-${{ env.pythonLocation }}- - name: Install test dependencies run: | - python -m pip install --upgrade pip wheel + python -m pip install --pre --upgrade pip wheel pip install pytest-cov - name: Install dependencies run: | From a9e348f6cd7d4eb86d5bc367da6268521747dc10 Mon Sep 17 00:00:00 2001 From: LucaMarconato <2664412+LucaMarconato@users.noreply.github.com> Date: Wed, 20 Mar 2024 13:30:03 +0100 Subject: [PATCH 19/25] test against pre-release --- .github/workflows/test_and_deploy.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_and_deploy.yaml b/.github/workflows/test_and_deploy.yaml index 41aeda25..c65910a5 100644 --- a/.github/workflows/test_and_deploy.yaml +++ b/.github/workflows/test_and_deploy.yaml @@ -45,11 +45,11 @@ jobs: pip-${{ runner.os }}-${{ env.pythonLocation }}- - name: Install test dependencies run: | - python -m pip install --pre --upgrade pip wheel + python -m pip install --upgrade pip wheel pip install pytest-cov - name: Install dependencies run: | - pip install -e ".[dev,test]" + pip install -e --pre ".[dev,test]" - name: Test env: MPLBACKEND: agg From 2d32017dc79952fabd8c1be456ef41ee281e0f6b Mon Sep 17 00:00:00 2001 From: LucaMarconato <2664412+LucaMarconato@users.noreply.github.com> Date: Wed, 20 Mar 2024 13:31:28 +0100 Subject: [PATCH 20/25] fix --- .github/workflows/test_and_deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_and_deploy.yaml b/.github/workflows/test_and_deploy.yaml index c65910a5..06167e61 100644 --- a/.github/workflows/test_and_deploy.yaml +++ b/.github/workflows/test_and_deploy.yaml @@ -49,7 +49,7 @@ jobs: pip install pytest-cov - name: Install dependencies run: | - pip install -e --pre ".[dev,test]" + pip install --pre -e ".[dev,test]" - name: Test env: MPLBACKEND: agg From 53b30e8ec95a6f2b4a0c5eb8e34b96ad8b1bffa3 Mon Sep 17 00:00:00 2001 From: LucaMarconato <2664412+LucaMarconato@users.noreply.github.com> Date: Wed, 20 Mar 2024 13:49:42 +0100 Subject: [PATCH 21/25] building docs against pre-release --- .readthedocs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index b29bfd66..c6c730fd 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -13,3 +13,5 @@ python: path: . extra_requirements: - doc + extra_pip_args: + - "--pre" \ No newline at end of file From 3a4f87aff102cdee198accd5d97d318a7fabdb6a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Mar 2024 12:50:01 +0000 Subject: [PATCH 22/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index c6c730fd..9f5af428 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -14,4 +14,4 @@ python: extra_requirements: - doc extra_pip_args: - - "--pre" \ No newline at end of file + - "--pre" From f895c4786e1c8a9de19d58816f7520d2de3fd166 Mon Sep 17 00:00:00 2001 From: LucaMarconato <2664412+LucaMarconato@users.noreply.github.com> Date: Wed, 20 Mar 2024 14:00:42 +0100 Subject: [PATCH 23/25] attemp docs from pre-released spatialdata --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 2e40e45c..44ff6a85 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,10 @@ test = [ "pytest", "pytest-cov", ] +# this will be used by readthedocs and will make pip also look for pre-releases, generally installing the latest available version +pre = [ + "spatialdata>=0.1.0-pre0" +] [tool.coverage.run] source = ["spatialdata_io"] From b41ea186ecbb0f7d121f75e315275486bb27f187 Mon Sep 17 00:00:00 2001 From: LucaMarconato <2664412+LucaMarconato@users.noreply.github.com> Date: Wed, 20 Mar 2024 14:01:19 +0100 Subject: [PATCH 24/25] fix --- .readthedocs.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 9f5af428..89c704cf 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -13,5 +13,4 @@ python: path: . extra_requirements: - doc - extra_pip_args: - - "--pre" + - pre \ No newline at end of file From e4f7f819c654e468648e02e0b0c276628e706c0e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Mar 2024 13:01:34 +0000 Subject: [PATCH 25/25] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 89c704cf..ec4886d3 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -13,4 +13,4 @@ python: path: . extra_requirements: - doc - - pre \ No newline at end of file + - pre