constantinpape · anwai98 · May 20, 2024 · Jul 29, 2024 · Jul 29, 2024
diff --git a/scripts/datasets/light_microscopy/check_cellpose.py b/scripts/datasets/light_microscopy/check_cellpose.py
@@ -0,0 +1,20 @@
+from torch_em.util.debug import check_loader
+from torch_em.data.datasets.light_microscopy import get_cellpose_loader
+
+
+ROOT = "/media/anwai/ANWAI/data/cellpose/"
+
+
+def check_cellpose():
+    loader = get_cellpose_loader(
+        path=ROOT,
+        split="train",
+        patch_shape=(512, 512),
+        batch_size=1,
+        choice="cyto",
+    )
+    check_loader(loader, 8, instance_labels=True)
+
+
+if __name__ == "__main__":
+    check_cellpose()
diff --git a/torch_em/data/datasets/light_microscopy/__init__.py b/torch_em/data/datasets/light_microscopy/__init__.py
@@ -1,3 +1,4 @@
+from .cellpose import get_cellpose_loader, get_cellpose_dataset
 from .cellseg_3d import get_cellseg_3d_loader, get_cellseg_3d_dataset
 from .covid_if import get_covid_if_loader, get_covid_if_dataset
 from .ctc import get_ctc_segmentation_loader, get_ctc_segmentation_dataset

diff --git a/torch_em/data/datasets/light_microscopy/cellpose.py b/torch_em/data/datasets/light_microscopy/cellpose.py
@@ -0,0 +1,118 @@
+"""This dataset contains annotation for cell segmentation in fluorescene microscently-labeled microscopy images.
+
+This dataset is from the publication https://doi.org/10.1038/s41592-020-01018-x.
+Please cite it if you use this dataset in your research.
+"""
+
+
+import os
+from glob import glob
+from natsort import natsorted
+from typing import Union, Tuple
+
+import torch_em
+
+from .. import util
+from .neurips_cell_seg import to_rgb
+from ... import ImageCollectionDataset
+
+
+URL = "https://www.cellpose.org/dataset"
+
+
+def _get_cellpose_paths(path, split, choice):
+    if choice == "cyto":
+        assert split in ["train", "test"], f"'{split}' is not a valid split in '{choice}'."
+    elif choice == "cyto2":
+        assert split == "train", f"'{split}' is not a valid split in '{choice}'."
+    else:
+        raise ValueError(f"'{choice}' is not a valid dataset choice.")
+
+    image_paths = natsorted(glob(os.path.join(path, choice, split, "*_img.png")))
+    gt_paths = natsorted(glob(os.path.join(path, choice, split, "*_masks.png")))
+
+    return image_paths, gt_paths
+
+
+def get_cellpose_dataset(
+    path: Union[os.PathLike, str],
+    split: str,
+    patch_shape: Tuple[int, int],
+    choice: str = "cyto",
+    download: bool = False,
+    **kwargs
+):
+    """Get the CellPose dataset for cell segmentation.
+
+    Args:
+        TODO
+
+    Returns:
+        The segmentation dataset.
+    """
+    assert choice in ["cyto", "cyto2"]
+    assert split in ["train", "test"]
+
+    if download:
+        assert NotImplementedError(
+            "The dataset cannot be automatically downloaded. ",
+            "Please see 'get_cellpose_dataset' in 'torch_em/data/datasets/cellpose.py' for details."
+        )
+
+    image_paths, gt_paths = _get_cellpose_paths(path=path, split=split, choice=choice)
+
+    if "raw_transform" not in kwargs:
+        raw_transform = torch_em.transform.get_raw_transform(augmentation2=to_rgb)
+
+    if "transform" not in kwargs:
+        transform = torch_em.transform.get_augmentations(ndim=2)
+
+    dataset = torch_em.default_segmentation_dataset(
+        raw_paths=image_paths,
+        raw_key=None,
+        label_paths=gt_paths,
+        label_key=None,
+        patch_shape=patch_shape,
+        raw_transform=raw_transform,
+        transform=transform,
+        **kwargs
+    )
+    dataset = ImageCollectionDataset(
+        raw_image_paths=image_paths,
+        label_image_paths=gt_paths,
+        patch_shape=patch_shape,
+        raw_transform=raw_transform,
+        transform=transform,
+    )
+
+    return dataset
+
+
+def get_cellpose_loader(
+    path: Union[os.PathLike, str],
+    split: str,
+    patch_shape: Tuple[int, int],
+    batch_size: int,
+    choice: str = "cyto",
+    download: bool = False,
+    **kwargs
+):
+    """Get the CellPose dataloader for cell segmentation.
+
+    Args:
+        TODO
+
+    Returns:
+        The DataLoader.
+    """
+    ds_kwargs, loader_kwargs = util.split_kwargs(torch_em.default_segmentation_dataset, **kwargs)
+    dataset = get_cellpose_dataset(
+        path=path,
+        split=split,
+        patch_shape=patch_shape,
+        choice=choice,
+        download=download,
+        **ds_kwargs
+    )
+    loader = torch_em.get_data_loader(dataset=dataset, batch_size=batch_size, **loader_kwargs)
+    return loader