angelolab · ngreenwald · Jan 20, 2023 · Nov 10, 2022 · Nov 10, 2022 · Nov 10, 2022
diff --git a/ark/utils/example_dataset.py b/ark/utils/example_dataset.py
@@ -28,6 +28,7 @@ def __init__(self, dataset: str, overwrite_existing: bool = True, cache_dir: str
                     * `"LDA_training_inference"`
                     * `"neighborhood_analysis"`
                     * `"pairwise_spatial_enrichment"`
+                    * `"ome_tiff"`
             overwrite_existing (bool): A flag to overwrite existing data. Defaults to `True`.
             cache_dir (str, optional): The directory to save the cache dir. Defaults to `None`,
                 which internally in Hugging Face defaults to `~/.cache/huggingface/datasets`.
@@ -50,6 +51,7 @@ def __init__(self, dataset: str, overwrite_existing: bool = True, cache_dir: str
             "example_cell_output_dir": "pixie/example_cell_output_dir",
             "spatial_lda": "spatial_analysis/spatial_lda",
             "post_clustering": "post_clustering",
+            "ome_tiff": "ome_tiff"
         }
         """
         Path suffixes for mapping each downloaded dataset partition to it's appropriate
@@ -173,7 +175,8 @@ def get_example_dataset(dataset: str, save_dir: Union[str, pathlib.Path],
                       "LDA_preprocessing",
                       "LDA_training_inference",
                       "neighborhood_analysis",
-                      "pairwise_spatial_enrichment"]
+                      "pairwise_spatial_enrichment",
+                      "ome_tiff"]
 
     # Check the appropriate dataset name
     try:

diff --git a/ark/utils/example_dataset_test.py b/ark/utils/example_dataset_test.py
@@ -1,5 +1,5 @@
 import pathlib
-from typing import Callable, Generator, Iterator
+from typing import Callable, Dict, Generator, Iterator, List
 
 import pytest
 from tmi import test_utils
@@ -16,7 +16,8 @@
                                          "LDA_preprocessing",
                                          "LDA_training_inference",
                                          "neighborhood_analysis",
-                                         "pairwise_spatial_enrichment"])
+                                         "pairwise_spatial_enrichment",
+                                         "ome_tiff"])
 def dataset_download(request) -> Iterator[ExampleDataset]:
     """
     A Fixture which instantiates and downloads the dataset with respect to each
@@ -80,7 +81,7 @@ def _setup(self):
             "cell_masks": [f"fov{i}_cell_mask" for i in range(2)]
         }
 
-        self._spatial_analysis_lda_preprocessed_files = [
+        self._spatial_analysis_lda_preprocessed_files: List[str] = [
             "difference_mats",
             "featurized_cell_table",
             "formatted_cell_table",
@@ -90,14 +91,17 @@ def _setup(self):
         self._post_clustering_files = ["cell_table_thresholded",
                                        "marker_thresholds", "updated_cell_table"]
 
+        self._ome_tiff_files: List[str] = ["fov1.ome"]
+
         self.dataset_test_fns: dict[str, Callable] = {
             "image_data": self._image_data_check,
             "cell_table": self._cell_table_check,
             "deepcell_output": self._deepcell_output_check,
             "example_pixel_output_dir": self._example_pixel_output_dir_check,
             "example_cell_output_dir": self._example_cell_output_dir_check,
             "spatial_lda": self._spatial_lda_output_dir_check,
-            "post_clustering": self._post_clustering_output_dir_check
+            "post_clustering": self._post_clustering_output_dir_check,
+            "ome_tiff": self._ome_tiff_check
         }
 
         # Mapping the datasets to their respective test functions.
@@ -110,6 +114,7 @@ def _setup(self):
             "example_cell_output_dir": "pixie/example_cell_output_dir",
             "spatial_lda": "spatial_analysis/spatial_lda",
             "post_clustering": "post_clustering",
+            "ome_tiff": "ome_tiff",
         }
 
     def test_download_example_dataset(self, dataset_download: ExampleDataset):
@@ -392,6 +397,17 @@ def _post_clustering_output_dir_check(self, dir_p: pathlib.Path):
         downloaded_post_cluster_names = [f.stem for f in downloaded_post_cluster]
         assert set(self._post_clustering_files) == set(downloaded_post_cluster_names)
 
+    def _ome_tiff_check(self, dir_p: pathlib.Path):
+        """
+        Checks to make sure that the correct files exist w.r.t the `ome_tiff` output dir
+
+        Args:
+            dir_p (pathlib.Path): The directory to check.
+        """
+        downloaded_ome_tiff = list(dir_p.glob("*.ome.tiff"))
+        downloaded_ome_tiff_names = [f.stem for f in downloaded_ome_tiff]
+        assert set(self._ome_tiff_files) == set(downloaded_ome_tiff_names)
+
     def _suffix_paths(self, dataset_download: ExampleDataset,
                       parent_dir: pathlib.Path) -> Generator:
         """

diff --git a/docs/conf.py b/docs/conf.py
@@ -96,7 +96,8 @@
                         'kiosk_client',
                         'mpl_toolkits',
                         'tqdm',
-                        'ark.utils._bootstrapping']
+                        'ark.utils._bootstrapping',
+                        'xmltodict']
 
 # prefix each section label with the name of the document it is in, followed by a colon
 # autosection_label_prefix_document = True

diff --git a/requirements.txt b/requirements.txt
@@ -16,9 +16,11 @@ matplotlib==3.4.3
 multiprocess==0.70.13
 natsort>=8.1.0,<9
 numpy>=1.23.4,<2
+numpy>=1.23.4,<2
 palettable>=3.3.0,<4
 pandas>=1.3.5,<2
 pillow>=9.1.0,<9.2
+pillow>=9.1.0,<9.2
 pyFlowSOM>=0.1.12,<1
 requests>=2.27.1,<3
 scikit-image>=0.19.3,<0.20
@@ -33,4 +35,5 @@ tmi @ git+https://github.com/angelolab/tmi.git@v0.1.0
 umap-learn>=0.5.3,<1
 xarray>=2022.6.0,<2023
 tqdm>=4.64.0,<5
+xmltodict>=0.13.0,<1
 zstandard>=0.19.0,<1
diff --git a/templates/OME-TIFF_Conversion.ipynb b/templates/OME-TIFF_Conversion.ipynb
@@ -0,0 +1,219 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Utility Notebook - OME-TIFF Conversion\n",
+    "\n",
+    "This small notebook contains utilities to convert:\n",
+    "- Multichannel OME-TIFFs into single channel tiff files, with one tiff file per channel in the OME-TIFF.\n",
+    "- Single-Channel tiffs per FOV into a multichannel OME-TIFF.\n",
+    "\n",
+    "\n",
+    "Single channel tiff structure:\n",
+    "```sh\n",
+    "fov_1/\n",
+    "├── channel_1.tiff\n",
+    "├── channel_2.tiff\n",
+    "├── channel_3.tiff\n",
+    "├── ...\n",
+    "└── channel_n.tiff\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ark.utils import load_utils, example_dataset\n",
+    "from typing import List\n",
+    "import pathlib\n",
+    "from tqdm.notebook import tqdm\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## OME-TIFF to Single Channel TIFFs"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you would like to test this feature using the `Ark`'s example dataset, run the cell below. To use your own data, skip the cell below. The dataset contains one OME-TIFF (A converted version of FOV 1 in the Example Dataset)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "example_dataset.get_example_dataset(dataset=\"ome_tiff\", save_dir = pathlib.Path(\"../data/example_dataset/\"), overwrite_existing = True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Variables:**\n",
+    "* `ome_dir_path`: The directory containing all the OME-TIFFs which will be used.\n",
+    "* `data_dir`: The directory where the Single Channel TIFFs get saved to. Recommended to be a subdirectory in `../data/`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ome_dir_path: str = \"../data/example_dataset/ome_tiff/\"\n",
+    "data_dir_path: str = \"../data/your_data/\"\n",
+    "\n",
+    "data_dir = pathlib.Path(data_dir_path)\n",
+    "ome_dir: str = pathlib.Path(ome_dir_path)\n",
+    "\n",
+    "# Make the paths if they do not exist\n",
+    "data_dir.mkdir(parents=True, exist_ok=True)\n",
+    "ome_dir.mkdir(parents=True, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Convert all the OME-TIFFS to Single Channel TIFFs and save them in `data_dir`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Gather all the OME-TIFFs\n",
+    "ome_tiffs = list(ome_dir.glob(\"*.ome.[tif tiff]*\"))\n",
+    "\n",
+    "# Convert all the OME-TIFFs to Single Channel TIFFs\n",
+    "for ome_tiff in tqdm(ome_tiffs):\n",
+    "    load_utils.ome_to_fov(ome_tiff, data_dir=data_dir)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Single Channel TIFFs to OME-TIFF"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you would like to test this feature using the `Ark`'s example dataset, run the cell below. To use your own data, skip the cell below. The dataset contains 11 Single Channel TIFF Images, each with 22 Channels."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "example_dataset.get_example_dataset(dataset=\"segment_image_data\", save_dir = pathlib.Path(\"../data/example_dataset/\"), overwrite_existing = True)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Variables**\n",
+    "\n",
+    "- `fovs`: A list of FOVs (a folder containing single channel tiffs) to convert to OME-TIFFS. If `None`, converts all Single Channel TIFFs in the directory to OME-TIFFS). \n",
+    "- `channels`: The channels per set of Single Channel TIFFs to convert to OME-TIFFs (defaults to `None` which uses all channels per set of Single Channel TIFFs). Or you may input a list of desired channels (i.e. `[\"chan1\", \"chan2\", ..., \"chan7\"]`)\n",
+    "- `img_sub_folder`: The subdirectory within each of the `fovs` containing the Single Channel TIFFs, an optional parameter.\n",
+    "- `data_dir`: The directory containing the Single Channel TIFFs.\n",
+    "- `save_dir`: The directory to save the OME-TIFF.\n",
+    "\n",
+    "Note, the name of the OME-TIFF is based on the Single Channel TIFF Name name (i.e. the folder `fov1` will become `fov1.ome.tiff`)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_dir: str = \"../data/example_dataset/image_data/\"\n",
+    "save_dir: str = \"../data/ome_tiffs/\"\n",
+    "\n",
+    "# Make the `save_dir` if it doesn't exist\n",
+    "pathlib.Path(save_dir).mkdir(parents=True, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fovs: List[str] = [\"fov1\"] # naming schema for the example dataset is fov0, fov1, fov2, ..., fov10\n",
+    "channels = None\n",
+    "img_sub_folder = None"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Converts a folder of single channel tiffs into a multichannel OME-TIFF."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load_utils.fov_to_ome(data_dir=data_dir, ome_save_dir=save_dir, img_sub_folder=img_sub_folder, fovs=fovs, channels=channels)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.8.15 ('ark38')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.15 | packaged by conda-forge | (default, Nov 22 2022, 08:49:06) \n[Clang 14.0.6 ]"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "9cd428f2623867f362c6ffd1805d28fe273bb79d15f4a3a73107e7f51d98be79"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}