From 13175874b2a19aa41b02e858695966784e17cfd3 Mon Sep 17 00:00:00 2001 From: Najib Ishaq Date: Tue, 27 Jun 2023 15:09:32 -0400 Subject: [PATCH] feat: updated theia plugin to new standards --- .../.bumpversion.cfg | 29 +++ .../.dockerignore | 171 +++++++++++++++ .../.gitignore | 1 + .../Dockerfile | 53 +++++ .../README.md | 90 ++++++++ .../VERSION | 1 + .../plugin.json | 126 +++++++++++ .../pyproject.toml | 46 ++++ .../run-plugin.sh | 41 ++++ .../theia_bleedthrough_estimation/__init__.py | 6 + .../theia_bleedthrough_estimation/__main__.py | 163 ++++++++++++++ .../theia_bleedthrough_estimation/model.py | 204 ++++++++++++++++++ .../tile_selectors/__init__.py | 89 ++++++++ .../tile_selectors/selector.py | 166 ++++++++++++++ .../utils/__init__.py | 4 + .../utils/constants.py | 15 ++ .../utils/helpers.py | 95 ++++++++ .../tests/__init__.py | 1 + 18 files changed, 1301 insertions(+) create mode 100644 regression/theia-bleedthrough-estimation-plugin/.bumpversion.cfg create mode 100644 regression/theia-bleedthrough-estimation-plugin/.dockerignore create mode 100644 regression/theia-bleedthrough-estimation-plugin/.gitignore create mode 100644 regression/theia-bleedthrough-estimation-plugin/Dockerfile create mode 100644 regression/theia-bleedthrough-estimation-plugin/README.md create mode 100644 regression/theia-bleedthrough-estimation-plugin/VERSION create mode 100644 regression/theia-bleedthrough-estimation-plugin/plugin.json create mode 100644 regression/theia-bleedthrough-estimation-plugin/pyproject.toml create mode 100755 regression/theia-bleedthrough-estimation-plugin/run-plugin.sh create mode 100644 regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/__init__.py create mode 100644 regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/__main__.py create mode 100644 regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/model.py create mode 100644 regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/tile_selectors/__init__.py create mode 100644 regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/tile_selectors/selector.py create mode 100644 regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/utils/__init__.py create mode 100644 regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/utils/constants.py create mode 100644 regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/utils/helpers.py create mode 100644 regression/theia-bleedthrough-estimation-plugin/tests/__init__.py diff --git a/regression/theia-bleedthrough-estimation-plugin/.bumpversion.cfg b/regression/theia-bleedthrough-estimation-plugin/.bumpversion.cfg new file mode 100644 index 000000000..0add190e0 --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/.bumpversion.cfg @@ -0,0 +1,29 @@ +[bumpversion] +current_version = 0.5.0-dev0 +commit = False +tag = False +parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? +serialize = + {major}.{minor}.{patch}-{release}{dev} + {major}.{minor}.{patch} + +[bumpversion:part:release] +optional_value = _ +first_value = dev +values = + dev + _ + +[bumpversion:part:dev] + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" + +[bumpversion:file:plugin.json] + +[bumpversion:file:VERSION] + +[bumpversion:file:README.md] + +[bumpversion:file:src/polus/plugins/regression/theia_bleedthrough_estimation/__init__.py] diff --git a/regression/theia-bleedthrough-estimation-plugin/.dockerignore b/regression/theia-bleedthrough-estimation-plugin/.dockerignore new file mode 100644 index 000000000..97191f3ab --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/.dockerignore @@ -0,0 +1,171 @@ +################################################################################ +# Local Files and Folders +################################################################################ +/data +requirements.txt +**/__pycache__ +**/*.so + +################################################################################ +# Python Template from github +################################################################################ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ diff --git a/regression/theia-bleedthrough-estimation-plugin/.gitignore b/regression/theia-bleedthrough-estimation-plugin/.gitignore new file mode 100644 index 000000000..598ae7928 --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/.gitignore @@ -0,0 +1 @@ +/.vscode diff --git a/regression/theia-bleedthrough-estimation-plugin/Dockerfile b/regression/theia-bleedthrough-estimation-plugin/Dockerfile new file mode 100644 index 000000000..364c1a305 --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/Dockerfile @@ -0,0 +1,53 @@ +FROM tensorflow/tensorflow:2.12.0-gpu + +# cat /etc/os-release +# +# NAME="Ubuntu" +# VERSION="20.04.5 LTS (Focal Fossa)" +# ID=ubuntu +# ID_LIKE=debian +# PRETTY_NAME="Ubuntu 20.04.5 LTS" +# VERSION_ID="20.04" +# HOME_URL="https://www.ubuntu.com/" +# SUPPORT_URL="https://help.ubuntu.com/" +# BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/" +# PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy" +# VERSION_CODENAME=focal +# UBUNTU_CODENAME=focal + +# Instal Python +RUN apt update && \ + apt install software-properties-common -y && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt install python3.9 python3.9-distutils curl -y && \ + curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ + python3.9 get-pip.py && \ + apt autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +# Symbolic link to python3.9 +RUN ln -sf /usr/bin/python3.9 /usr/bin/python3 + +# environment variables defined in polusai/bfio +ENV EXEC_DIR="/opt/executables" +ENV DATA_DIR="/data" +ENV POLUS_LOG="INFO" +ENV POLUS_IMG_EXT=".ome.tif" +ENV POLUS_TAB_EXT=".csv" + +RUN mkdir /.cache && chmod 777 /.cache + +# Work directory defined in the base container +WORKDIR ${EXEC_DIR} + +# Copy the python package +COPY pyproject.toml ${EXEC_DIR} +COPY VERSION ${EXEC_DIR} +COPY README.md ${EXEC_DIR} +COPY src ${EXEC_DIR}/src + +# Install the python package +RUN pip3 install ${EXEC_DIR} --no-cache-dir + +ENTRYPOINT ["python3", "-m", "polus.plugins.regression.theia_bleedthrough_estimation"] +CMD ["--help"] diff --git a/regression/theia-bleedthrough-estimation-plugin/README.md b/regression/theia-bleedthrough-estimation-plugin/README.md new file mode 100644 index 000000000..98c91c014 --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/README.md @@ -0,0 +1,90 @@ +# Theia Bleedthrough Estimation (v0.5.0-dev0) + +This WIPP plugin estimates the bleed-through in a collection of 2d images. +It uses the Theia algorithm from [this repo](https://github.com/PolusAI/theia). + +## File Patterns + +This plugin uses [file-patterns](https://filepattern.readthedocs.io/en/latest/Examples.html#what-is-filepattern) to create subsets of an input collection. +In particular, defining a filename variable is surrounded by `{}`, and the variable name and number of spaces dedicated to the variable are denoted by repeated characters for the variable. +For example, if all filenames follow the structure `prefix_tTTT.ome.tif`, where `TTT` indicates the time-point of capture of the image, then the file-pattern would be `prefix_t{ttt}.ome.tif`. + +## Optional Parameters + +### --groupBy + +This parameter can be used to group images into subsets. +This plugin will apply bleed-through correction to each subset. +Each subset should contain all channels for one image/tile/FOV. +The images in each subset should all have the same size (in pixels and dimensions) and one. + +If no `--groupBy` is specified, then the plugin will assume that all images in the input collection are part of the same subset. + +### --selectionCriterion + +Which method to use to rank and select tiles in images. +The available options are: + +1. `"MeanIntensity"`: Select tiles with the highest mean pixel intensity. This is the default. +2. `"Entropy"`: Select tiles with the highest entropy. +3. `"MedianIntensity"`: Select tiles with the highest median pixel intensity. +4. `"IntensityRange"`: Select tiles with the largest difference in intensity of the brightest and dimmest pixels. + +We rank-order all tiles based on one of these criteria and then select some of the best few tiles from each channel. +If the images are small enough, we select all tiles from each channel. + +### --channelOrdering + +By default, we assumed that the order of channel numbers is the same as the order, in increasing wavelength, of the emission filters for those channels. +If this is not the case, use this parameter to specify, as a string of comma-separated integers, the wavelength-order of the channels. + +For example, if the channels are `0, 1, 2, 3, 4` and they correspond to wavelengths (of the emission filter) of `420nm, 350nm, 600nm, 510nm, 580nm`, then `--channelOrdering` should be `"1,0,3,4,2"`. + +If this parameter is not specified, then we assume that the channel numbers are in increasing wavelength order. + +If you do not know the channel ordering, you can use the `--channelOverlap` parameter to specify a higher number of adjacent channels to consider as contributors to bleed-through. + +### --channelOverlap + +For each channel in the image, we assume that the only noticeable bleed-through is from a small number of adjacent channels. +By default, we consider only `1` adjacent channel on each side of the wavelength scale as contributors to bleed-through. + +For example, for channel 3, we would consider channels 2 and 4 to contribute bleed-through components. + +Use a higher value for `--channelOverlap` to have our model look for bleed-through from farther channels. + +### --kernelSize + +We learn a convolutional kernel for estimating the bleed-through from each channel to each neighboring channel. +This parameter specifies the size of those kernels. + +We recommend one of `3`, `5`, or `7` and use `3` as the default. + +## TODOs: + +1. Handle case where each image file contains all channels. +2. Extend to 3d images. + +## Build the plugin + +To build the Docker image for the conversion plugin, run `./build-docker.sh`. + +## Install WIPP Plugin + +In WIPP, navigate to the plugins page and add a new plugin. +Paste the contents of `plugin.json` into the pop-up window and submit. + +## Options + +This plugin takes 6 input arguments and 1 output argument: + +| Name | Description | I/O | Type | Default | +| ---------------------- | ---------------------------------------- | ------ | ------- | --------------- | +| `--inpDir` | Input image collection. | Input | String | N/A | +| `--filePattern` | File pattern to subset images. | Input | String | ".*" | +| `--groupBy` | Variables to group together. | Input | String | "" | +| `--channelOrdering` | Channel ordering by wavelength scale. | Input | String | "" | +| `--selectionCriterion` | Method to use for selecting tiles. | Input | Enum | "MeanIntensity" | +| `--channelOverlap` | Number of adjacent channels to consider. | Input | Integer | 1 | +| `--kernelSize` | Size of convolutional kernels to learn. | Input | Integer | 3 | +| `--outDir` | Output image collection. | Output | String | N/A | diff --git a/regression/theia-bleedthrough-estimation-plugin/VERSION b/regression/theia-bleedthrough-estimation-plugin/VERSION new file mode 100644 index 000000000..412ff8c77 --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/VERSION @@ -0,0 +1 @@ +0.5.0-dev0 diff --git a/regression/theia-bleedthrough-estimation-plugin/plugin.json b/regression/theia-bleedthrough-estimation-plugin/plugin.json new file mode 100644 index 000000000..656014ef9 --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/plugin.json @@ -0,0 +1,126 @@ +{ + "name": "Bleed-Through Estimation Plugin", + "version": "0.5.0-dev0", + "title": "BleedThroughEstimation", + "description": "Performs bleed-through estimation for images.", + "author": "Najib Ishaq (najib.ishaq@nih.gov), Nick Schaub (nick.schaub@nih.gov)", + "institution": "National Center for the Advancing Translational Sciences, National Institutes of Health", + "repository": "https://github.com/polusai/polus-plugins/tree/dev/regression", + "website": "https://ncats.nih.gov/preclinical/core/informatics", + "citation": "https://doi.org/10.1038/s41467-021-21735-x", + "containerId": "polusai/bleed-through-estimation-plugin:0.5.0-dev0", + "baseCommand": [ + "python3", + "-m", + "polus.plugins.regression.theia_bleedthrough_estimation" + ], + "inputs": [ + { + "name": "inpDir", + "type": "collection", + "description": "Input image collection.", + "required": true + }, + { + "name": "filePattern", + "type": "string", + "description": "File pattern to subset images.", + "required": false + }, + { + "name": "groupBy", + "type": "string", + "description": "Variables to group together.", + "required": false + }, + { + "name": "channelOrdering", + "type": "string", + "description": "Channel ordering by wavelength scale.", + "required": false + }, + { + "name": "selectionCriterion", + "type": "enum", + "description": "Method to use for selecting tiles.", + "options": { + "values": [ + "MeanIntensity", + "Entropy", + "MedianIntensity", + "IntensityRange" + ] + }, + "required": false + }, + { + "name": "channelOverlap", + "type": "number", + "description": "Number of adjacent channels to consider.", + "required": false + }, + { + "name": "kernelSize", + "type": "number", + "description": "Size of convolution kernels to learn.", + "options": { + "values": [ + 3, + 5, + 7 + ] + }, + "required": false + } + ], + "outputs": [ + { + "name": "outDir", + "type": "collection", + "description": "Location for writing bleed-through components." + } + ], + "ui": [ + { + "key": "inputs.inpDir", + "title": "Input image collection: ", + "description": "Image collection..." + }, + { + "key": "inputs.filePattern", + "title": "Filename pattern: ", + "description": "File pattern to subset images.", + "default": ".*" + }, + { + "key": "inputs.groupBy", + "title": "Grouping Variables: ", + "description": "Variables to group together.", + "default": "" + }, + { + "key": "inputs.channelOrdering", + "title": "Channel Ordering: ", + "description": "Channel ordering by wavelength scale.", + "default": "" + }, + { + "key": "inputs.selectionCriterion", + "title": "Selection Criterion: ", + "description": "Method to use for selecting tiles.", + "default": "MeanIntensity" + }, + { + "key": "inputs.channelOverlap", + "title": "Channel Overlap: ", + "description": "Number of adjacent channels to consider.", + "default": 1 + }, + { + "key": "inputs.kernelSize", + "title": "Kernel Size: ", + "description": "Size of convolutional kernels to learn.", + "default": 3 + } + ] +} diff --git a/regression/theia-bleedthrough-estimation-plugin/pyproject.toml b/regression/theia-bleedthrough-estimation-plugin/pyproject.toml new file mode 100644 index 000000000..58b6d3fc1 --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/pyproject.toml @@ -0,0 +1,46 @@ +[tool.poetry] +name = "polus-plugins-regression-theia-bleedthrough-estimation" +version = "0.5.0-dev0" +description = "" +authors = [ + "Nick Schaub ", + "Najib Ishaq ", +] +readme = "README.md" +packages = [{include = "polus", from = "src"}] + +[tool.poetry.dependencies] +python = ">=3.9,<3.12,!=3.9.7" +bfio = { version = "2.1.9", extras = ["all"] } +filepattern = [ + { version = "^2.0.0", platform = "linux" }, + { version = "^2.0.0", platform = "win32" }, + { git = "https://github.com/PolusAI/filepattern", rev = "c07bf543c435cbc4cf264effd5a178868e9eaf19", platform = "darwin" }, +] +typer = { version = "^0.7.0", extras = ["all"] } +tqdm = "^4.65.0" +# TODO: Update this to a version number after the branch is merged. +theia-py = {git = "https://github.com/nishaq503/theia.git", rev = "updates/docs"} +scipy = "^1.9.0" +numpy = "^1.23.2" + +[tool.poetry.group.dev.dependencies] +bump2version = "^1.0.1" +pre-commit = "^3.0.4" +black = "^23.1.0" +ruff = "^0.0.265" +pytest = "^7.2.1" +pytest-cov = "^4.0.0" +pytest-sugar = "^0.9.6" +pytest-xdist = "^3.2.0" +pytest-benchmark = "^4.0.0" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +# scipy restricts us to the most recently released version of Python. +# See [here](https://github.com/scipy/scipy/blob/ace5787b8e5d28cc5e3ad7594d287ea47a249b46/pyproject.toml#L1-L4) +# for more information. + +# streamlit is not compatible with Python 3.9.7. See [here](https://github.com/streamlit/streamlit/blob/c0378a39b1ded984b7668a4a58fbe0fba04a2c20/lib/setup.py#L147-L150) diff --git a/regression/theia-bleedthrough-estimation-plugin/run-plugin.sh b/regression/theia-bleedthrough-estimation-plugin/run-plugin.sh new file mode 100755 index 000000000..f8d19e0e6 --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/run-plugin.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +version=$( None: + """CLI for estimating bleedthrough using Theia.""" + inp_dir = inp_dir.resolve() + if inp_dir.joinpath("images").exists(): + inp_dir = inp_dir.joinpath("images") + + grouping_variables = list(group_by) + + channel_order: typing.Optional[list[int]] = None + if channel_ordering: + channel_order = list(map(int, channel_ordering.split(","))) + + out_dir = out_dir.resolve() + + logger.info(f"--inpDir = {inp_dir}") + logger.info(f'--filePattern = "{pattern}"') + logger.info(f"--groupBy = \"{''.join(grouping_variables)}\"") + logger.info(f'--channelOrdering = "{channel_ordering}"') + logger.info(f'--selectionCriterion = "{selection_criterion.value}"') + logger.info(f"--channelOverlap = {channel_overlap}") + logger.info(f"--kernelSize = {kernel_size}") + logger.info(f"--removeInteractions = {remove_interactions}") + logger.info(f"--outDir = {out_dir}") + logger.info(f"--preview = {preview}") + + fp = filepattern.FilePattern(str(inp_dir), pattern) + groups = [ + [pathlib.Path(p) for _, [p] in files] + for _, files in fp(group_by=grouping_variables) + ] + + if preview: + logger.info("Previewing results without running any computation ...") + metadata: dict[str, list[str]] = {"files": []} + for image_paths in groups: + for path in image_paths: + metadata["files"].append(str(path)) + with out_dir.joinpath("preview.json").open("w") as f: + json.dump(metadata, f, indent=2) + return + + logger.info("Running Bleedthrough Estimation ...") + with concurrent.futures.ProcessPoolExecutor( + max_workers=constants.NUM_THREADS, + ) as executor: + futures = [] + for image_paths in groups: + futures.append( + executor.submit( + model.estimate_bleedthrough, + image_paths, + channel_order, + selection_criterion, + channel_overlap, + kernel_size, + remove_interactions, + out_dir, + ), + ) + for future in tqdm.tqdm( + concurrent.futures.as_completed(futures), + total=len(futures), + ): + future.result() + + +if __name__ == "__main__": + app() + +""" +python -m src.polus.plugins.regression.theia_bleedthrough_estimation \ + --inpDir ./data/input \ + --filePattern "S1_R{r:d}_C1-C11_A1_y009_x009_c{c:ddd}.ome.tif" \ + --groupBy "r" \ + --channelOrdering "1,0,3,2,4,5,7,6,8,9" \ + --selectionCriterion MeanIntensity \ + --channelOverlap 1 \ + --kernelSize 3 \ + --outDir ./data/output +""" diff --git a/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/model.py b/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/model.py new file mode 100644 index 000000000..cbd653838 --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/model.py @@ -0,0 +1,204 @@ +"""Wraps the neural model from the Theia package for the plugin.""" + + +import pathlib +import typing + +import bfio +import numpy +import theia + +from . import tile_selectors +from .utils import constants +from .utils import helpers + +logger = helpers.make_logger(__name__) + + +def estimate_bleedthrough( # noqa: PLR0913 + image_paths: list[pathlib.Path], + channel_order: typing.Optional[list[int]], + selection_criterion: tile_selectors.Selectors, + channel_overlap: int, + kernel_size: int, + remove_interactions: bool, + out_dir: pathlib.Path, +) -> None: + """Estimate bleedthrough using Theia. + + Args: + image_paths: List of paths to images. + channel_order: Order of channels in the input images. + selection_criterion: Criterion to select tiles for training. + channel_overlap: Number of adjacent channels to consider. + kernel_size: Size of the kernel to use for the convolution. + remove_interactions: Whether to remove interactions between channels. + out_dir: Path to the output directory. + """ + components_dir = out_dir.joinpath("images") + components_dir.mkdir(exist_ok=True) + + metadata_dir = out_dir.joinpath("metadata") + metadata_dir.mkdir(exist_ok=True) + + with bfio.BioReader(image_paths[0], max_workers=1) as br: + num_channels: int = br.C + num_tiles = helpers.count_tiles_2d(br) + + if num_tiles > constants.MAX_2D_TILES: + logger.warning( + f"Image has {num_tiles} tiles. Using only the best " + f"{constants.MAX_2D_TILES} tiles for training.", + ) + num_tiles = constants.MAX_2D_TILES + + if channel_order is not None: + if len(channel_order) != num_channels: + msg = ( + f"Number of channels in the channel ordering " + f"({','.join(map(str, channel_order))}) does not match the number " + f"of channels in the image ({num_channels})." + ) + logger.critical(msg) + raise ValueError(msg) + + image_paths = [image_paths[i] for i in channel_order] + + selector = selection_criterion()( + files=image_paths, + num_tiles_per_channel=num_tiles, + ) + selector.fit() + + tile_indices = selector.selected_tiles + tiles = load_tiles(image_paths, tile_indices) + if len(tile_indices) > constants.MIN_2D_TILES: + val_size = len(tile_indices) // 4 + valid_generator = theia.TileGenerator( + images=tiles[:val_size], + tile_size=256, + shuffle=True, + normalize=False, + ) + train_generator = theia.TileGenerator( + images=tiles[val_size:], + tile_size=256, + shuffle=True, + normalize=False, + ) + else: + train_generator = theia.TileGenerator( + images=tiles, + tile_size=256, + shuffle=True, + normalize=False, + ) + valid_generator = None + + model = theia.models.Neural( + num_channels=num_channels, + channel_overlap=channel_overlap, + kernel_size=kernel_size, + alpha=1, + beta=1, + tile_size=256, + ) + model.early_stopping( + min_delta=1e-3, + patience=4, + verbose=1, + restore_best_weights=True, + ) + model.compile(optimizer="adam") + model.fit_theia( + train_gen=train_generator, + valid_gen=valid_generator, + epochs=128, + verbose=1, + ) + + readers = [bfio.BioReader(image_path, max_workers=1) for image_path in image_paths] + + out_paths = [components_dir.joinpath(p.name) for p in image_paths] + writers = [ + bfio.BioWriter(out_path, metadata=reader.metadata) + for out_path, reader in zip(out_paths, readers) + ] + + transformer = model.transformer + + for tile_index in helpers.tile_indices_2d(readers[0]): + z, y_min, y_max, x_min, x_max = tile_index + channel_tiles = [] + for reader in readers: + channel_tiles.append( + numpy.squeeze( + reader[ + z, + y_min:y_max, + x_min:x_max, + :, + :, + ], + ), + ) + channel = numpy.stack(channel_tiles, axis=-1) + + component = transformer.total_bleedthrough(channel) + if remove_interactions: + component += transformer.total_interactions(component) + + for writer in writers: + writer[z, y_min:y_max, x_min:x_max, :, :] = component + + for writer in writers: + writer.close() + + for reader in readers: + reader.close() + + +def load_tiles( + image_paths: list[pathlib.Path], + tile_indices: tile_selectors.TileIndices, +) -> list[numpy.ndarray]: + """Load tiles from the given images. + + This method will stack the tiles from each channel into a single array. + The arrays for each channel will be stacked along the last axis. + These stacked arrays will be returned as a list. + + Args: + image_paths: List of paths to images. + tile_indices: List of tile indices to load. + + Returns: + List of tiles as numpy arrays. + """ + tiles = [] + + readers = [bfio.BioReader(image_path, max_workers=1) for image_path in image_paths] + + for tile_index in tile_indices: + z_min, z_max, y_min, y_max, x_min, x_max = tile_index + channel_tiles = [] + for reader in readers: + channel_tiles.append( + numpy.squeeze( + reader[ + z_min:z_max, + y_min:y_max, + x_min:x_max, + :, + :, + ], + ), + ) + + channel = numpy.stack(channel_tiles, axis=-1) + tiles.append(channel) + + for reader in readers: + reader.close() + + return tiles diff --git a/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/tile_selectors/__init__.py b/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/tile_selectors/__init__.py new file mode 100644 index 000000000..b64f459ef --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/tile_selectors/__init__.py @@ -0,0 +1,89 @@ +"""Automated tile-selectors for Theia Bleedthrough Estimation plugin.""" + +import enum +import pathlib +import typing + +import numpy +import scipy.stats + +from .selector import Selector +from .selector import TileIndices + + +class Entropy(Selector): + """Select tiles with the highest entropy.""" + + def _score_tile(self, tile: numpy.ndarray) -> float: + counts, _ = numpy.histogram(tile.flat, bins=128, density=True) + return float(scipy.stats.entropy(counts)) + + def __init__(self, files: list[pathlib.Path], num_tiles_per_channel: int) -> None: + """Initializes an Entropy tile selector.""" + super().__init__(files, num_tiles_per_channel) + + +class MeanIntensity(Selector): + """Select tiles with the highest mean intensity.""" + + def _score_tile(self, tile: numpy.ndarray) -> float: + return float(numpy.mean(tile)) + + def __init__(self, files: list[pathlib.Path], num_tiles_per_channel: int) -> None: + """Initializes a MeanIntensity tile selector.""" + super().__init__(files, num_tiles_per_channel) + + +class MedianIntensity(Selector): + """Select tiles with the highest median intensity.""" + + def _score_tile(self, tile: numpy.ndarray) -> float: + return float(numpy.median(tile)) + + def __init__(self, files: list[pathlib.Path], num_tiles_per_channel: int) -> None: + """Initializes a MedianIntensity tile selector.""" + super().__init__(files, num_tiles_per_channel) + + +class IntensityRange(Selector): + """Select tiles with the largest 90-10 percentile intensity difference.""" + + def _score_tile(self, tile: numpy.ndarray) -> float: + return float(numpy.percentile(tile, 90) - numpy.percentile(tile, 10)) + + def __init__(self, files: list[pathlib.Path], num_tiles_per_channel: int) -> None: + """Initializes an IntensityRange tile selector.""" + super().__init__(files, num_tiles_per_channel) + + +SELECTORS: dict[str, type[Selector]] = { + "Entropy": Entropy, + "MeanIntensity": MeanIntensity, + "MedianIntensity": MedianIntensity, + "IntensityRange": IntensityRange, +} +"""A Dictionary to let us use a Selector by name.""" + + +class Selectors(str, enum.Enum): + """Enum of selectors for the Theia Bleedthrough Estimation plugin.""" + + Entropy = "Entropy" + MeanIntensity = "MeanIntensity" + MedianIntensity = "MedianIntensity" + IntensityRange = "IntensityRange" + + def __call__(self) -> type[Selector]: + """Returns the selector class for this enum value.""" + return SELECTORS[self.value] + + +__all__ = [ + "TileIndices", + "Entropy", + "IntensityRange", + "MeanIntensity", + "MedianIntensity", + "Selectors", + "SELECTORS", +] diff --git a/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/tile_selectors/selector.py b/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/tile_selectors/selector.py new file mode 100644 index 000000000..09d2f08f4 --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/tile_selectors/selector.py @@ -0,0 +1,166 @@ +"""Selector base class for Theia Bleedthrough Estimation plugin.""" + +import abc +import concurrent.futures +import operator +import pathlib + +import bfio +import numpy + +from ..utils import constants +from ..utils import helpers + +logger = helpers.make_logger(__name__) + + +ScoresDict = dict[tuple[int, int, int, int, int, int], float] +"""A dictionary of scores for each tile in an image. + +key: (6-tuple of indices) (z_min, z_max, y_min, y_max, x_min, x_max) +value: (float) score +""" + +TileIndices = list[tuple[int, int, int, int, int, int]] +"""A list of coordinates for each tile that was selected by a Selector. + +Each item is a 6-tuple of indices: (z_min, z_max, y_min, y_max, x_min, x_max) +""" + + +class Selector(abc.ABC): + """Base class for tile-selection methods.""" + + __slots__ = ( + "__files", + "__is_high_better", + "__num_tiles_per_channel", + "__scores", + "__selected_tiles", + "__image_mins", + "__image_maxs", + ) + + def __init__( + self, + files: list[pathlib.Path], + num_tiles_per_channel: int, + is_high_better: bool = True, + ) -> None: + """Scores all tiles in images and selects the best few for training a model. + + Args: + files: List of paths to images from which tiles will be selected. + num_tiles_per_channel: How many tiles to select from each channel. + is_high_better: Whether higher scoring tiles are better. + """ + self.__files = files + self.__num_tiles_per_channel = num_tiles_per_channel + self.__is_high_better = is_high_better + + self.__image_mins: list[int] = [] + self.__image_maxs: list[int] = [] + self.__scores: list[ScoresDict] = [] + self.__selected_tiles: TileIndices = [] + + def fit(self) -> None: + """Scores all tiles in images and selects the best few for training a model. + + This method must be called before using the `selected_tiles` property. + """ + with concurrent.futures.ProcessPoolExecutor( + max_workers=constants.NUM_THREADS, + ) as executor: + futures: list[concurrent.futures.Future[tuple[ScoresDict, int, int]]] = [ + executor.submit(self._score_tiles_thread, file_path) + for file_path in self.__files + ] + for future in futures: + score, image_min, image_max = future.result() + self.__scores.append(score) + self.__image_mins.append(image_min) + self.__image_maxs.append(image_max) + + self.__selected_tiles = self._select_best_tiles() + + @property + def selected_tiles(self) -> TileIndices: + """Returns the indices of the selected tiles.""" + return self.__selected_tiles + + @property + def image_mins(self) -> list[int]: + """Returns the minimum intensity of each image.""" + return self.__image_mins + + @property + def image_maxs(self) -> list[int]: + """Returns the maximum intensity of each image.""" + return self.__image_maxs + + @abc.abstractmethod + def _score_tile(self, tile: numpy.ndarray) -> float: + pass + + def _score_tiles_thread( + self, + file_path: pathlib.Path, + ) -> tuple[ScoresDict, int, int]: + """This method runs in a single thread and scores all tiles for a single file. + + Args: + file_path: Path to image for which the tiles need to be scored. + + Returns: + A Dictionary of tile-scores. + """ + with bfio.BioReader(file_path, max_workers=constants.NUM_THREADS) as reader: + scores_dict: ScoresDict = {} + logger.info(f"Ranking tiles in {file_path.name}...") + num_tiles = helpers.count_tiles_2d(reader) + image_min = numpy.iinfo(reader.dtype).max + image_max = -numpy.iinfo(reader.dtype).min + + for i, (_, y_min, y_max, x_min, x_max) in enumerate( + helpers.tile_indices_2d(reader), + ): + if i % 10 == 0: + logger.info( + f"Ranking tiles in {file_path.name}. " + f"Progress {100 * i / num_tiles:6.2f} %", + ) + + tile = numpy.squeeze(reader[y_min:y_max, x_min:x_max, 0, 0, 0]) + + # TODO: Actually handle 3d images properly with 3d tile-chunks. + key = (0, 1, y_min, y_max, x_min, x_max) + if key in scores_dict: + scores_dict[key] = (max if self.__is_high_better else min)( + scores_dict[key], + self._score_tile(tile), + ) + else: + scores_dict[key] = self._score_tile(tile) + + image_min = numpy.min(tile[tile > 0], initial=image_min) + image_max = numpy.max(tile, initial=image_max) + + return scores_dict, image_min, image_max + + def _select_best_tiles(self) -> TileIndices: + """Sort the tiles by their scores and select the best few from each channel. + + Returns: + List of indices of the best tiles. + """ + return list( + { + coordinates + for scores_dict in self.__scores + for coordinates, _ in sorted( + scores_dict.items(), + key=operator.itemgetter(1), + reverse=self.__is_high_better, + )[: self.__num_tiles_per_channel] + }, + ) diff --git a/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/utils/__init__.py b/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/utils/__init__.py new file mode 100644 index 000000000..07fd9448c --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/utils/__init__.py @@ -0,0 +1,4 @@ +"""Utilities for theia_bleedthrough_estimation plugin.""" + +from . import constants +from . import helpers diff --git a/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/utils/constants.py b/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/utils/constants.py new file mode 100644 index 000000000..cae380595 --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/utils/constants.py @@ -0,0 +1,15 @@ +"""Constants used by theia_bleedthrough_estimation plugin.""" + +import logging +import multiprocessing +import os + +POLUS_LOG = getattr(logging, os.environ.get("POLUS_LOG", "INFO")) +POLUS_IMG_EXT = os.environ.get("POLUS_IMG_EXT", ".ome.tif") + +NUM_THREADS = max(1, int(multiprocessing.cpu_count() // 2)) +TILE_SIZE_2D = 1024 * 2 +TILE_SIZE_3D = 128 +MIN_2D_TILES = 8 +MAX_2D_TILES = 16 +EPSILON = 1e-8 # To avoid divide-by-zero errors diff --git a/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/utils/helpers.py b/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/utils/helpers.py new file mode 100644 index 000000000..d4edc9d45 --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/src/polus/plugins/regression/theia_bleedthrough_estimation/utils/helpers.py @@ -0,0 +1,95 @@ +"""Helper functions for theia_bleedthrough_estimation plugin.""" + +import functools +import logging +import time +import typing + +import bfio + +from . import constants + +ReaderOrWriter = typing.Union[bfio.BioReader, bfio.BioWriter] +Tiles2D = typing.Generator[tuple[int, int, int, int, int], None, None] +Tiles3D = typing.Generator[tuple[int, int, int, int, int, int], None, None] + + +def make_logger(name: str, level: str = constants.POLUS_LOG) -> logging.Logger: + """Creates a logger with the given name and level.""" + logger = logging.getLogger(name) + logger.setLevel(level) + return logger + + +def replace_extension(name: str, new_extension: typing.Optional[str] = None) -> str: + """Replaces the extension in the name of an input image with `POLUS_IMG_EXT`.""" + new_extension = constants.POLUS_IMG_EXT if new_extension is None else new_extension + return name.replace(".ome.tif", new_extension).replace(".ome.zarr", new_extension) + + +def tile_indices_2d(reader_or_writer: ReaderOrWriter) -> Tiles2D: + """A generator for the indices of all 2d tiles in a BioReader/BioWriter.""" + tile_size = ( + constants.TILE_SIZE_2D if reader_or_writer.Z == 1 else constants.TILE_SIZE_3D + ) + + for z in range(reader_or_writer.Z): + for y_min in range(0, reader_or_writer.Y, tile_size): + y_max = min(reader_or_writer.Y, y_min + tile_size) + + for x_min in range(0, reader_or_writer.X, tile_size): + x_max = min(reader_or_writer.X, x_min + tile_size) + + yield z, y_min, y_max, x_min, x_max + + +def count_tiles_2d(reader_or_writer: ReaderOrWriter) -> int: + """Returns the number of 2d tiles in a BioReader/BioWriter.""" + return len(list(tile_indices_2d(reader_or_writer))) + + +def tile_indices_3d(reader_or_writer: ReaderOrWriter) -> Tiles3D: + """A generator for the indices of all 3d chunks in a BioReader/BioWriter.""" + tile_size = ( + constants.TILE_SIZE_2D if reader_or_writer.Z == 1 else constants.TILE_SIZE_3D + ) + + for z_min in range(0, reader_or_writer.Z, tile_size): + z_max = min(reader_or_writer.Z, z_min + tile_size) + + for y_min in range(0, reader_or_writer.Y, tile_size): + y_max = min(reader_or_writer.Y, y_min + tile_size) + + for x_min in range(0, reader_or_writer.X, tile_size): + x_max = min(reader_or_writer.X, x_min + tile_size) + + yield z_min, z_max, y_min, y_max, x_min, x_max + + +def count_tiles_3d(reader_or_writer: ReaderOrWriter) -> int: + """Returns the number of 3d chunks in a BioReader/BioWriter.""" + return len(list(tile_indices_3d(reader_or_writer))) + + +class TimeIt: + """A class to provide a decorator for timing the execution of a function.""" + + def __init__( # noqa: D107 + self, + logger: logging.Logger, + template: str = "completed {:s} in {:.3f} seconds", + ) -> None: + self.template: str = template + self.logger: logging.Logger = logger + + def __call__(self, function: typing.Callable): # noqa: D102, ANN204 + @functools.wraps(function) + def wrapper(*args, **kwargs): # noqa: ANN002, ANN003, ANN202 + start = time.time() + result = function(*args, **kwargs) + end = time.time() + + self.logger.info(self.template.format(function.__name__, end - start)) + return result + + return wrapper diff --git a/regression/theia-bleedthrough-estimation-plugin/tests/__init__.py b/regression/theia-bleedthrough-estimation-plugin/tests/__init__.py new file mode 100644 index 000000000..17ed4063e --- /dev/null +++ b/regression/theia-bleedthrough-estimation-plugin/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for the plugin."""