Skip to content

Commit

Permalink
first tables implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
lorenzocerrone committed Sep 25, 2024
1 parent c491d43 commit 13ce548
Show file tree
Hide file tree
Showing 10 changed files with 673 additions and 0 deletions.
21 changes: 21 additions & 0 deletions src/ngio/tables/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Module for handling tables in the Fractal format."""

from ngio.tables._utils import df_from_andata, df_to_andata, validate_roi_table
from ngio.tables.tables_group import (
FeatureTable,
MaskingROITable,
ROITable,
Table,
TableGroup,
)

__all__ = [
"Table",
"ROITable",
"FeatureTable",
"MaskingROITable",
"TableGroup",
"df_from_andata",
"df_to_andata",
"validate_roi_table",
]
128 changes: 128 additions & 0 deletions src/ngio/tables/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import anndata as ad
import pandas as pd
import pandas.api.types as ptypes


class TableValidationError(Exception):
"""Error raised when a table is not formatted correctly."""

pass


def _safe_to_df(data_frame: pd.DataFrame, index_key: str) -> pd.DataFrame:
columns = data_frame.columns

if index_key not in columns:
raise TableValidationError(f"index_key {index_key} not found in data frame")

if not ptypes.is_integer_dtype(data_frame[index_key]):
raise TableValidationError(f"index_key {index_key} must be of integer type")

data_frame[index_key] = data_frame[index_key].astype(str)

str_columns, num_columns = [], []
for c_name in columns:
column_df = data_frame[c_name]
if column_df.apply(type).nunique() > 1:
raise TableValidationError(
f"Column {c_name} has mixed types: "
f"{column_df.apply(type).unique()}. "
"Type of all elements must be the same."
)

if ptypes.is_string_dtype(column_df):
str_columns.append(c_name)

elif ptypes.is_numeric_dtype(column_df):
num_columns.append(c_name)
else:
raise TableValidationError(
f"Column {c_name} has unsupported type: {column_df.dtype}."
" Supported types are string and numerics."
)

obs_df = data_frame[str_columns]
obs_df.index = obs_df.index.astype(str)
x_df = data_frame[num_columns]
x_df = x_df.astype("float32")
return ad.AnnData(X=x_df, obs=obs_df)


def df_to_andata(
data_frame: pd.DataFrame,
index_key: str = "label",
inplicit_conversion: bool = False,
) -> ad.AnnData:
"""Convert a pandas DataFrame representing a fractal table to an AnnData object.
Args:
data_frame: A pandas DataFrame representing a fractal table.
index_key: The column name to use as the index of the DataFrame.
Default is 'label'.
inplicit_conversion: If True, the function will convert the data frame
to an AnnData object as it. If False, the function will check the data frame
for compatibility.
And correct correctly formatted data frame to AnnData object.
Default is False.
"""
if inplicit_conversion:
return ad.AnnData(data_frame)

return _safe_to_df(data_frame, index_key)


def df_from_andata(andata_table: ad.AnnData, index_key: str = "label") -> pd.DataFrame:
"""Convert a AnnData object representing a fractal table to a pandas DataFrame.
Args:
andata_table: An AnnData object representing a fractal table.
index_key: The column name to use as the index of the DataFrame.
Default is 'label'.
"""
data_frame = andata_table.to_df()
data_frame[andata_table.obs_keys()] = andata_table.obs

if index_key not in data_frame.columns:
raise TableValidationError(f"index_key {index_key} not found in data frame.")

data_frame[index_key] = data_frame[index_key].astype(int)
return data_frame


def validate_roi_table(
data_frame: pd.DataFrame,
required_columns: list[str],
optional_columns: list[str],
index_name: str = "FieldIndex",
) -> pd.DataFrame:
"""Validate the ROI table.
Args:
data_frame: The ROI table as a DataFrame.
required_columns: A list of required columns in the ROI table.
optional_columns: A list of optional columns in the ROI table.
index_name: The name of the index column in the ROI table.
Default is 'FieldIndex'.
"""
if data_frame.index.name != index_name:
if index_name in data_frame.columns:
data_frame = data_frame.set_index(index_name)
else:
raise TableValidationError(
f"{index_name} is required in ROI table. It must be the index or a "
"column"
)

table_header = data_frame.columns
for column in required_columns:
if column not in table_header:
raise TableValidationError(f"Column {column} is required in ROI table")

possible_columns = [*required_columns, *optional_columns]
for column in table_header:
if column not in possible_columns:
raise TableValidationError(
f"Column {column} is not recognized in ROI table"
)
return data_frame
Empty file removed src/ngio/tables/fractal_tables.py
Empty file.
161 changes: 161 additions & 0 deletions src/ngio/tables/tables_group.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
"""Module for handling the /tables group in an OME-NGFF file.
The /tables group contains t
"""

from typing import Literal

import pandas as pd
import zarr

from ngio.io import StoreLike
from ngio.pydantic_utils import BaseWithExtraFields
from ngio.tables.v1 import FeatureTableV1, ROITableV1


class MaskingROITableV1:
"""Placeholder for Masking ROI Table V1."""

def __init__(self, group: zarr.Group) -> None:
"""Initialize the class from an existing group."""
raise NotImplementedError("Masking ROI Table V1 not implemented.")


ROITable = ROITableV1
IMPLEMENTED_ROI_TABLES = {"1": ROITableV1}

FeatureTable = FeatureTableV1
IMPLEMENTED_FEATURE_TABLES = {"1": FeatureTableV1}

MaskingROITable = MaskingROITableV1
IMPLEMENTED_MASKING_ROI_TABLES = {"1": MaskingROITableV1}

Table = ROITable | FeatureTable | MaskingROITable


class CommonMeta(BaseWithExtraFields):
"""Common metadata for all tables."""

type: Literal["roi_table", "feature_table", "masking_roi_table"]
fractal_table_version: str


def _find_table_impl(
table_type: Literal["roi_table", "feature_table", "masking_roi_table"],
version: str,
) -> Table:
"""Find the type of table in the group."""
if table_type == "roi_table":
if version not in IMPLEMENTED_ROI_TABLES:
raise ValueError(f"ROI Table version {version} not implemented.")
return IMPLEMENTED_ROI_TABLES[version]

elif table_type == "feature_table":
if version not in IMPLEMENTED_FEATURE_TABLES:
raise ValueError(f"Feature Table version {version} not implemented.")
return IMPLEMENTED_FEATURE_TABLES[version]

elif table_type == "masking_roi_table":
if version not in IMPLEMENTED_MASKING_ROI_TABLES:
raise ValueError(f"Masking ROI Table version {version} not implemented.")
return IMPLEMENTED_MASKING_ROI_TABLES[version]

else:
raise ValueError(f"Table type {table_type} not recognized.")


def _get_table_impl(group: zarr.Group) -> Table:
common_meta = CommonMeta(**group.attrs)
return _find_table_impl(
table_type=common_meta.type, version=common_meta.fractal_table_version
)(group=group)


class TableGroup:
"""A class to handle the /labels group in an OME-NGFF file."""

def __init__(self, group: StoreLike) -> None:
"""Initialize the LabelGroupHandler."""
if "tables" not in group:
raise ValueError("The NGFF image contains no 'tables' Group.")

self._group: zarr.Group = group["tables"]

def _validate_list_of_tables(self, list_of_tables: list[str]) -> None:
"""Validate the list of tables."""
list_of_groups = list(self._group.group_keys())

for table_name in list_of_tables:
if table_name not in list_of_groups:
raise ValueError(f"Table {table_name} not found in the group.")

def _get_list_of_tables(self) -> list[str]:
"""Return the list of tables."""
list_of_tables = self._group.attrs.get("tables", [])
self._validate_list_of_tables(list_of_tables)
return list_of_tables

def list(
self,
type: Literal["roi_table", "feature_table", "masking_roi_table"] | None = None,
) -> list[str]:
"""List all labels in the group.
Args:
type (str): The type of table to list.
If None, all tables are listed.
Allowed values are: 'roi_table', 'feature_table', 'masking_roi_table'.
"""
list_of_tables = self._get_list_of_tables()
self._validate_list_of_tables(list_of_tables=list_of_tables)
if type is None:
return list_of_tables

else:
if type not in ["roi_table", "feature_table", "masking_roi_table"]:
raise ValueError(
f"Table type {type} not recognized. "
" Allowed values are: 'roi', 'feature', 'masking_roi'."
)
list_of_typed_tables = []
for table_name in list_of_tables:
table = self._group[table_name]
common_meta = CommonMeta(**table.attrs)
if common_meta.type == type:
list_of_typed_tables.append(table_name)
return list_of_typed_tables

def get(self, name: str) -> Table:
"""Get a label from the group."""
list_of_tables = self._get_list_of_tables()
if name not in list_of_tables:
raise ValueError(f"Table {name} not found in the group.")

return _get_table_impl(group=self._group[name])

def new(
self,
name: str,
table: pd.DataFrame | None = None,
table_type: str = "roi_table",
overwrite: bool = False,
version: str = "1",
**type_specific_kwargs,
) -> Table:
"""Add a new table to the group."""
list_of_tables = self._get_list_of_tables()
if not overwrite and name in list_of_tables:
raise ValueError(f"Table {name} already exists in the group.")

table_impl = _find_table_impl(table_type=table_type, version=version)
new_table = table_impl._create_new(
parent_group=self._group,
name=name,
table=table,
overwrite=overwrite,
**type_specific_kwargs,
)

if name not in list_of_tables:
self._group.attrs["tables"] = [*list_of_tables, name]
return new_table
Empty file removed src/ngio/tables/tables_handler.py
Empty file.
6 changes: 6 additions & 0 deletions src/ngio/tables/v1/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""This module contains the objects to handle the fractal tables V1."""

from ngio.tables.v1.feature_tables import FeatureTableV1
from ngio.tables.v1.roi_tables import ROITableV1

__all__ = ["ROITableV1", "FeatureTableV1"]
Loading

0 comments on commit 13ce548

Please sign in to comment.