From 8c0b6ba4aa8edf168c7f5347c25271ec105d0c4c Mon Sep 17 00:00:00 2001 From: Shannon Axelrod Date: Tue, 1 Oct 2019 12:11:03 -0400 Subject: [PATCH] deleting Decode and Detect modules in lieu of spot finding refactor --- .../data_processing_examples/3d_smFISH.py | 22 +- docs/source/api/spots/index.rst | 37 +- docs/source/index.rst | 4 +- starfish/core/pipeline/algorithmbase.py | 19 +- starfish/core/spots/Decode/__init__.py | 11 - starfish/core/spots/Decode/_base.py | 12 - starfish/core/spots/Decode/metric_decoder.py | 72 --- .../Decode/per_round_max_channel_decoder.py | 40 -- starfish/core/spots/Decode/test/__init__.py | 0 .../test/test_decoding_without_spots.py | 54 --- starfish/core/spots/DetectSpots/__init__.py | 13 - starfish/core/spots/DetectSpots/_base.py | 104 ---- starfish/core/spots/DetectSpots/blob.py | 163 ------- starfish/core/spots/DetectSpots/detect.py | 249 ---------- .../DetectSpots/local_max_peak_finder.py | 299 ------------ .../DetectSpots/local_search_blob_detector.py | 453 ------------------ .../core/spots/DetectSpots/test/__init__.py | 0 .../test/test_local_search_blob_detector.py | 154 ------ .../DetectSpots/test/test_spot_detection.py | 210 -------- .../DetectSpots/test/test_synthetic_data.py | 91 ---- .../trackpy_local_max_peak_finder.py | 181 ------- starfish/spots.py | 2 - workflows/wdl/iss_published/recipe.py | 14 +- 23 files changed, 33 insertions(+), 2171 deletions(-) delete mode 100644 starfish/core/spots/Decode/__init__.py delete mode 100644 starfish/core/spots/Decode/_base.py delete mode 100644 starfish/core/spots/Decode/metric_decoder.py delete mode 100644 starfish/core/spots/Decode/per_round_max_channel_decoder.py delete mode 100644 starfish/core/spots/Decode/test/__init__.py delete mode 100644 starfish/core/spots/Decode/test/test_decoding_without_spots.py delete mode 100644 starfish/core/spots/DetectSpots/__init__.py delete mode 100644 starfish/core/spots/DetectSpots/_base.py delete mode 100644 starfish/core/spots/DetectSpots/blob.py delete mode 100644 starfish/core/spots/DetectSpots/detect.py delete mode 100644 starfish/core/spots/DetectSpots/local_max_peak_finder.py delete mode 100644 starfish/core/spots/DetectSpots/local_search_blob_detector.py delete mode 100644 starfish/core/spots/DetectSpots/test/__init__.py delete mode 100644 starfish/core/spots/DetectSpots/test/test_local_search_blob_detector.py delete mode 100644 starfish/core/spots/DetectSpots/test/test_spot_detection.py delete mode 100644 starfish/core/spots/DetectSpots/test/test_synthetic_data.py delete mode 100644 starfish/core/spots/DetectSpots/trackpy_local_max_peak_finder.py diff --git a/docs/source/_static/data_processing_examples/3d_smFISH.py b/docs/source/_static/data_processing_examples/3d_smFISH.py index 2b8a538ed..99e650115 100644 --- a/docs/source/_static/data_processing_examples/3d_smFISH.py +++ b/docs/source/_static/data_processing_examples/3d_smFISH.py @@ -20,7 +20,8 @@ import starfish import starfish.data -from starfish import FieldOfView, IntensityTable +from starfish import FieldOfView, DecodedIntensityTable +from starfish.types import TraceBuildingStrategies # equivalent to %gui qt ipython = get_ipython() @@ -57,12 +58,11 @@ # local intensity maxima, and spots are matched to the gene they represent by looking them up in a # codebook that records which (round, channel) matches which gene target. -tlmpf = starfish.spots.DetectSpots.TrackpyLocalMaxPeakFinder( +tlmpf = starfish.spots.FindSpots.TrackpyLocalMaxPeakFinder( spot_diameter=5, # must be odd integer min_mass=0.02, max_size=2, # this is max radius separation=7, - noise_size=0.65, # this is not used because preprocess is False preprocess=False, percentile=10, # this is irrelevant when min_mass, spot_diameter, and max_size are set properly verbose=True, @@ -100,6 +100,11 @@ def processing_pipeline( print("Loading images...") images = enumerate(experiment[fov_name].get_images(FieldOfView.PRIMARY_IMAGES)) + decoder = starfish.spots.DecodeSpots.PerRoundMaxChannel( + codebook=codebook, + trace_building_strategy=TraceBuildingStrategies.SEQUENTIAL + ) + for image_number, primary_image in images: print(f"Filtering image {image_number}...") filter_kwargs = dict( @@ -113,13 +118,12 @@ def processing_pipeline( clip2.run(primary_image, **filter_kwargs) print("Calling spots...") - spot_attributes = tlmpf.run(primary_image) - all_intensities.append(spot_attributes) - - spot_attributes = IntensityTable.concatenate_intensity_tables(all_intensities) + spots = tlmpf.run(primary_image) + print("Decoding spots...") + decoded_intensities = decoder.run(spots) + all_intensities.append(decoded_intensities) - print("Decoding spots...") - decoded = codebook.decode_per_round_max(spot_attributes) + decoded = DecodedIntensityTable.concatenate_intensity_tables(all_intensities) decoded = decoded[decoded["total_intensity"] > .025] return primary_image, decoded diff --git a/docs/source/api/spots/index.rst b/docs/source/api/spots/index.rst index 13da9b572..ee25f16c2 100644 --- a/docs/source/api/spots/index.rst +++ b/docs/source/api/spots/index.rst @@ -5,8 +5,8 @@ Spots Starfish provides a number of methods for which spots (or other regions of interest) are the main substrate. These include :py:class:`starfish.spots.DetectPixels`, which exposes methods that identify which target code best corresponds to each pixel, and merges adjacent pixels into ROIs, -:py:class:`starfish.spots.DetectSpots`, which exposes methods that find bright spots against dark backgrounds, -:py:class:`starfish.spots.Decode`, which exposes methods that match patterns of spots detected across rounds and channels in the same spatial positions with target codes, and +:py:class:`starfish.spots.FindSpots`, which exposes methods that find bright spots against dark backgrounds, +:py:class:`starfish.spots.DecodeSpots`, which exposes methods that match patterns of spots detected across rounds and channels in the same spatial positions with target codes, and :py:class:`starfish.spots.AssignTargets`, which exposes methods to assign spots to cells. .. _detect_pixels: @@ -23,29 +23,11 @@ Pixel Detectors can be imported using ``starfish.spots.DetectPixels``, which reg .. automodule:: starfish.spots.DetectPixels :members: - -.. _detection: - -Detecting Spots ---------------- - -Spot Detectors can be imported using ``starfish.spots.DetectSpots``, which registers all classes that subclass ``DetectSpotsAlgorithm``: - -.. code-block:: python - - from starfish.spots import DetectSpots - -.. automodule:: starfish.spots.DetectSpots - :members: - .. _spot_finding: Finding Spots --------------- -NOTE: Starfish is embarking on a SpotFinding data structures refactor see `Spot Finding Refactor Plan`_ -DetectSpots will be replaced by the FindSpots module documented below. - Spot Finders can be imported using ``starfish.spots.FindSpots``, which registers all classes that subclass ``FindSpotsAlgorithm``: @@ -58,21 +40,6 @@ Spot Finders can be imported using ``starfish.spots.FindSpots``, which registers .. automodule:: starfish.spots.FindSpots :members: - -.. _decoding: - -Decoding --------- - -Decoders can be imported using ``starfish.spots.Decode``, which registers all classes that subclass ``DecodeAlgorithm``: - -.. code-block:: python - - from starfish.spots import Decode - -.. automodule:: starfish.spots.Decode - :members: - .. _decode_spots: Decoding Spots diff --git a/docs/source/index.rst b/docs/source/index.rst index eab3a656c..e2c6ce293 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -69,8 +69,8 @@ To see what improvements the developers have planned for starfish, please see th

Features

* Filtering: :ref:`API ` -* Spot-Finding: :ref:`API ` -* Decoding: :ref:`API ` +* Spot-Finding: :ref:`API ` +* Decoding: :ref:`API ` * Segmenting: :ref:`API ` .. raw:: html diff --git a/starfish/core/pipeline/algorithmbase.py b/starfish/core/pipeline/algorithmbase.py index f41d03dc4..e3c7a87ef 100644 --- a/starfish/core/pipeline/algorithmbase.py +++ b/starfish/core/pipeline/algorithmbase.py @@ -2,7 +2,6 @@ import inspect from abc import ABCMeta -from starfish.core.imagestack.imagestack import ImageStack from starfish.core.types._constants import STARFISH_EXTRAS_KEY @@ -35,16 +34,12 @@ def helper(*args, **kwargs): spot_results = kwargs['spots'] spot_results.log.update_log(args[0]) result.attrs[STARFISH_EXTRAS_KEY] = spot_results.log.encode() - # OLD CODE FOR DETECT WILL GET REMOVED - if 'DetectSpots' in method_class_str or 'DetectPixels' in method_class_str: - if isinstance(args[1], ImageStack): - stack = args[1] - # update log with spot detection instance args[0] - stack.log.update_log(args[0]) - # get resulting intensity table and set log - it = result - if isinstance(result, tuple): - it = result[0] - it.attrs[STARFISH_EXTRAS_KEY] = stack.log.encode() + if 'DetectPixels' in method_class_str: + stack = args[1] + # update log with spot detection instance args[0] + stack.log.update_log(args[0]) + # get resulting intensity table and set log + it = result[0] + it.attrs[STARFISH_EXTRAS_KEY] = stack.log.encode() return result return helper diff --git a/starfish/core/spots/Decode/__init__.py b/starfish/core/spots/Decode/__init__.py deleted file mode 100644 index 44e3024a8..000000000 --- a/starfish/core/spots/Decode/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from ._base import DecodeAlgorithm -from .metric_decoder import MetricDistance -from .per_round_max_channel_decoder import PerRoundMaxChannel - -# autodoc's automodule directive only captures the modules explicitly listed in __all__. -all_filters = { - filter_name: filter_cls - for filter_name, filter_cls in locals().items() - if isinstance(filter_cls, type) and issubclass(filter_cls, DecodeAlgorithm) -} -__all__ = list(all_filters.keys()) diff --git a/starfish/core/spots/Decode/_base.py b/starfish/core/spots/Decode/_base.py deleted file mode 100644 index c7048ce96..000000000 --- a/starfish/core/spots/Decode/_base.py +++ /dev/null @@ -1,12 +0,0 @@ -from abc import abstractmethod - -from starfish.core.intensity_table.intensity_table import IntensityTable -from starfish.core.pipeline.algorithmbase import AlgorithmBase - - -class DecodeAlgorithm(metaclass=AlgorithmBase): - - @abstractmethod - def run(self, intensities: IntensityTable, *args): - """Performs decoding on the spots found, using the codebook specified.""" - raise NotImplementedError() diff --git a/starfish/core/spots/Decode/metric_decoder.py b/starfish/core/spots/Decode/metric_decoder.py deleted file mode 100644 index 13b890e31..000000000 --- a/starfish/core/spots/Decode/metric_decoder.py +++ /dev/null @@ -1,72 +0,0 @@ -from starfish.core.codebook.codebook import Codebook -from starfish.core.intensity_table.decoded_intensity_table import DecodedIntensityTable -from starfish.core.intensity_table.intensity_table import IntensityTable -from starfish.core.types import Number -from ._base import DecodeAlgorithm - - -# TODO ambrosejcarr add tests -class MetricDistance(DecodeAlgorithm): - """ - Normalizes both the magnitudes of the codes and the spot intensities, then decodes spots by - assigning each spot to the closest code, measured by the provided metric. - - Codes greater than max_distance from the nearest code, or dimmer than min_intensity, are - discarded. - - Parameters - ---------- - codebook : Codebook - codebook containing targets the experiment was designed to quantify - max_distance : Number - spots greater than this distance from their nearest target are not decoded - min_intensity : Number - spots dimmer than this intensity are not decoded - norm_order : int - the norm to use to normalize the magnitudes of spots and codes (default 2, L2 norm) - metric : str - the metric to use to measure distance. Can be any metric that satisfies the triangle - inequality that is implemented by :py:mod:`scipy.spatial.distance` (default "euclidean") - """ - - def __init__( - self, - codebook: Codebook, - max_distance: Number, - min_intensity: Number, - norm_order: int = 2, - metric: str = "euclidean", - ) -> None: - self.codebook = codebook - self.max_distance = max_distance - self.min_intensity = min_intensity - self.norm_order = norm_order - self.metric = metric - - def run( - self, - intensities: IntensityTable, - *args - ) -> DecodedIntensityTable: - """Decode spots by selecting the max-valued channel in each sequencing round - - Parameters - ---------- - intensities : IntensityTable - IntensityTable to be decoded - codebook : Codebook - Contains codes to decode IntensityTable - - Returns - ------- - IntensityTable : - IntensityTable decoded and appended with Features.TARGET and Features.QUALITY values. - - """ - return self.codebook.decode_metric( - intensities, - max_distance=self.max_distance, - min_intensity=self.min_intensity, - norm_order=self.norm_order, - metric=self.metric, - ) diff --git a/starfish/core/spots/Decode/per_round_max_channel_decoder.py b/starfish/core/spots/Decode/per_round_max_channel_decoder.py deleted file mode 100644 index df560c7a9..000000000 --- a/starfish/core/spots/Decode/per_round_max_channel_decoder.py +++ /dev/null @@ -1,40 +0,0 @@ -from starfish.core.codebook.codebook import Codebook -from starfish.core.intensity_table.decoded_intensity_table import DecodedIntensityTable -from starfish.core.intensity_table.intensity_table import IntensityTable -from ._base import DecodeAlgorithm - - -class PerRoundMaxChannel(DecodeAlgorithm): - """ - Decode spots by selecting the max-valued channel in each sequencing round. - - Note that this assumes that the codebook contains only one "on" channel per sequencing round, - a common pattern in experiments that assign one fluorophore to each DNA nucleotide and - read DNA sequentially. It is also a characteristic of single-molecule FISH and RNAscope - codebooks. - - Parameters - ---------- - codebook : Codebook - Contains codes to decode IntensityTable - - """ - - def __init__(self, codebook: Codebook): - self.codebook = codebook - - def run(self, intensities: IntensityTable, *args) -> DecodedIntensityTable: - """Decode spots by selecting the max-valued channel in each sequencing round - - Parameters - ---------- - intensities : IntensityTable - IntensityTable to be decoded - - Returns - ------- - IntensityTable : - IntensityTable decoded and appended with Features.TARGET and Features.QUALITY values. - - """ - return self.codebook.decode_per_round_max(intensities) diff --git a/starfish/core/spots/Decode/test/__init__.py b/starfish/core/spots/Decode/test/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/starfish/core/spots/Decode/test/test_decoding_without_spots.py b/starfish/core/spots/Decode/test/test_decoding_without_spots.py deleted file mode 100644 index 81fe94523..000000000 --- a/starfish/core/spots/Decode/test/test_decoding_without_spots.py +++ /dev/null @@ -1,54 +0,0 @@ -import os -from tempfile import TemporaryDirectory - -import pandas as pd - -import starfish -from starfish.core.test.factories import two_spot_sparse_coded_data_factory - - -def test_per_round_max_spot_decoding_without_spots(): - - codebook, image_stack, max_intensity = two_spot_sparse_coded_data_factory() - - bd = starfish.spots.DetectSpots.BlobDetector( - min_sigma=1, max_sigma=1, num_sigma=1, threshold=max_intensity + 0.1) - no_spots = bd.run(image_stack) - - decode = starfish.spots.Decode.PerRoundMaxChannel(codebook) - decoded_no_spots: starfish.DecodedIntensityTable = decode.run(no_spots) - - decoded_spot_table = decoded_no_spots.to_decoded_dataframe() - - with TemporaryDirectory() as dir_: - filename = os.path.join(dir_, 'test.csv') - decoded_spot_table.save_csv(os.path.join(dir_, 'test.csv')) - - # verify we can concatenate two empty tables - table1 = pd.read_csv(filename, index_col=0) - table2 = pd.read_csv(filename, index_col=0) - pd.concat([table1, table2], axis=0) - -def test_metric_decoding_without_spots(): - - codebook, image_stack, max_intensity = two_spot_sparse_coded_data_factory() - - bd = starfish.spots.DetectSpots.BlobDetector( - min_sigma=1, max_sigma=1, num_sigma=1, threshold=max_intensity + 0.1) - no_spots = bd.run(image_stack) - - decode = starfish.spots.Decode.MetricDistance( - codebook, max_distance=0, min_intensity=max_intensity + 0.1 - ) - decoded_no_spots: starfish.DecodedIntensityTable = decode.run(no_spots) - - decoded_spot_table = decoded_no_spots.to_decoded_dataframe() - - with TemporaryDirectory() as dir_: - filename = os.path.join(dir_, 'test.csv') - decoded_spot_table.save_csv(os.path.join(dir_, 'test.csv')) - - # verify we can concatenate two empty tables - table1 = pd.read_csv(filename, index_col=0) - table2 = pd.read_csv(filename, index_col=0) - pd.concat([table1, table2], axis=0) diff --git a/starfish/core/spots/DetectSpots/__init__.py b/starfish/core/spots/DetectSpots/__init__.py deleted file mode 100644 index 126b1b715..000000000 --- a/starfish/core/spots/DetectSpots/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -from ._base import DetectSpotsAlgorithm -from .blob import BlobDetector -from .local_max_peak_finder import LocalMaxPeakFinder -from .local_search_blob_detector import LocalSearchBlobDetector -from .trackpy_local_max_peak_finder import TrackpyLocalMaxPeakFinder - -# autodoc's automodule directive only captures the modules explicitly listed in __all__. -all_filters = { - filter_name: filter_cls - for filter_name, filter_cls in locals().items() - if isinstance(filter_cls, type) and issubclass(filter_cls, DetectSpotsAlgorithm) -} -__all__ = list(all_filters.keys()) diff --git a/starfish/core/spots/DetectSpots/_base.py b/starfish/core/spots/DetectSpots/_base.py deleted file mode 100644 index 3fafedffe..000000000 --- a/starfish/core/spots/DetectSpots/_base.py +++ /dev/null @@ -1,104 +0,0 @@ -from abc import abstractmethod -from typing import Any, Callable, Optional, Tuple, Union - -import numpy as np -import xarray as xr - -from starfish.core.imagestack.imagestack import ImageStack -from starfish.core.intensity_table.intensity_table import IntensityTable -from starfish.core.pipeline.algorithmbase import AlgorithmBase -from starfish.core.types import Axes, Number, SpotAttributes - - -class DetectSpotsAlgorithm(metaclass=AlgorithmBase): - """ - Starfish spot detectors use a variety of means to detect bright spots against - dark backgrounds. Starfish's spot detectors each have different strengths and weaknesses. - - **Fixed-position spot finders** - - The following spot finders have two modes of operation. - - The first mode is suitable for coded - experiments where genes are identified by patterns of spots over all rounds and channels of the - experiment. In this mode, the spot finders identify spots in a single image, which can be either - a dots auxiliary image, or a maximum intensity projection of the primary images. - The positions of the maxima are then measured in all other images, and the intensities across - the complete experiment are stored in an :ref:`IntensityTable` - - The second mode is suitable for assays that detect spots in a single round, such as single - molecule FISH and RNAscope. This mode simply finds all the spots and concatenates them into a - long-form IntensityTable. In this mode, the spots are not measured in images that correspond to - other :code:`(round, channel)` pairs; those positions of the IntensityTable are filled with - :code:`np.nan`. - - 1. The :py:class:`~starfish.spots._detect_spots.blob.BlobDetector` allows the user to pre-filter - an image using either a Laplacian-of-Gaussians or - Difference-of-Gaussians (fast approximation to Laplacian-of-Gaussians). These filters are - applied at with a user-specified variety of Gaussian kernel sizes, and the best-fitting size is - automatically selected. This allows this filter to detect Gaussian shaped blobs of various - sizes. - - 2. The :py:class:`~starfish.spots._detect_spots.local_max_peak_finder.LocalMaxPeakFinder` - identifies local maxima using the same machinery as the BlobDetector, except that - it requires that the user to pre-apply any filters to enhance spots. In exchange, it allows a - user to automatically select the threshold that separates foreground (spots) from background - (noise). - - In the future, starfish will combine the functionality of LocalMaxPeakFinder into BlobDetector - so that a user can detect blobs of multiple sizes *and* automatically find a stable threshold. - - 3. The - :py:class:`~starfish.spots._detect_spots.trackpy_local_max_peak_finder.TrackpyLocalMaxPeakFinder` - provides an implementation of the `Crocker-Grier `_ spot finding - algorithm. This method optionally preprocesses the image by performing a band pass and a - threshold. It then locates all peaks of brightness, characterizes the neighborhoods of the peaks - and takes only those with given total brightness (“mass”). Finally, it refines the positions of - each peak. - - .. _crocker_grier: https://physics.nyu.edu/grierlab/methods3c/ - - **Fuzzy-position spot finders** - - In addition to the spot finders above, we expose single additional spot finder that, while very - similar in implementation to the BlobFinder, is able to adjust to small local spatial - perturbations in the centroid of the spot across rounds and channels, such as those that might - occur from small shifts to the tissue or stage. - - 1. The - :py:class:`~starfish.spots._detect_spots.local_search_blob_detector.LocalSearchBlobDetector` - is a Gaussian blob detector finds spots in all rounds and channels independently, then, given - each spot in a user specified "anchor round", selects the closest spot by spatial position in - all other rounds and aggregates those into codes which can subsequently be decoded. This Spot - detector is only applicable to experiments with "one-hot" codebooks, such as those generated - by in-situ sequencing, which guarantee that only one channel will be "on" per round. - - """ - - @abstractmethod - def run( - self, - primary_image: ImageStack, - blobs_image: Optional[ImageStack] = None, - blobs_axes: Optional[Tuple[Axes, ...]] = None, - *args, - ) -> Union[IntensityTable, Tuple[IntensityTable, Any]]: - """Finds spots in an ImageStack""" - raise NotImplementedError() - - @abstractmethod - def image_to_spots(self, data_image: Union[np.ndarray, xr.DataArray]) -> SpotAttributes: - """Finds spots in a 3d volume""" - raise NotImplementedError() - - @staticmethod - def _get_measurement_function( - measurement_type: str - ) -> Callable[[Union[np.ndarray, xr.DataArray]], Number]: - try: - measurement_function = getattr(np, measurement_type) - except AttributeError: - raise ValueError( - f'measurement_type must be a numpy reduce function such as "max" or "mean". ' - f'{measurement_type} not found.') - return measurement_function diff --git a/starfish/core/spots/DetectSpots/blob.py b/starfish/core/spots/DetectSpots/blob.py deleted file mode 100644 index cec5444ff..000000000 --- a/starfish/core/spots/DetectSpots/blob.py +++ /dev/null @@ -1,163 +0,0 @@ -from typing import Optional, Tuple, Union - -import numpy as np -import pandas as pd -import xarray as xr -from skimage.feature import blob_dog, blob_doh, blob_log - -from starfish.core.imagestack.imagestack import ImageStack -from starfish.core.intensity_table.intensity_table import IntensityTable -from starfish.core.types import Axes, Features, Number, SpotAttributes -from ._base import DetectSpotsAlgorithm -from .detect import detect_spots, measure_spot_intensity - -blob_detectors = { - 'blob_dog': blob_dog, - 'blob_doh': blob_doh, - 'blob_log': blob_log -} - - -class BlobDetector(DetectSpotsAlgorithm): - """ - Multi-dimensional gaussian spot detector - - This method is a wrapper for skimage.feature.blob_log - - Parameters - ---------- - min_sigma : float - The minimum standard deviation for Gaussian Kernel. Keep this low to - detect smaller blobs. - max_sigma : float - The maximum standard deviation for Gaussian Kernel. Keep this high to - detect larger blobs. - num_sigma : int - The number of intermediate values of standard deviations to consider - between `min_sigma` and `max_sigma`. - threshold : float - The absolute lower bound for scale space maxima. Local maxima smaller - than thresh are ignored. Reduce this to detect blobs with less - intensities. - overlap : float [0, 1] - If two spots have more than this fraction of overlap, the spots are combined - (default = 0.5) - measurement_type : str ['max', 'mean'] - name of the function used to calculate the intensity for each identified spot area - detector_method: str ['blob_dog', 'blob_doh', 'blob_log'] - name of the type of detection method used from skimage.feature, default: blob_log - - Notes - ----- - see also: http://scikit-image.org/docs/dev/auto_examples/features_detection/plot_blob.html - - """ - - def __init__( - self, - min_sigma: Number, - max_sigma: Number, - num_sigma: int, - threshold: Number, - overlap: float = 0.5, - measurement_type='max', - is_volume: bool = True, - detector_method: str = 'blob_log' - ) -> None: - - self.min_sigma = min_sigma - self.max_sigma = max_sigma - self.num_sigma = num_sigma - self.threshold = threshold - self.overlap = overlap - self.is_volume = is_volume - self.measurement_function = self._get_measurement_function(measurement_type) - try: - self.detector_method = blob_detectors[detector_method] - except ValueError: - raise ValueError("Detector method must be one of {blob_log, blob_dog, blob_doh}") - - def image_to_spots(self, data_image: Union[np.ndarray, xr.DataArray]) -> SpotAttributes: - """ - Find spots using a gaussian blob finding algorithm - - Parameters - ---------- - data_image : Union[np.ndarray, xr.DataArray] - image containing spots to be detected - - Returns - ------- - SpotAttributes : - DataFrame of metadata containing the coordinates, intensity and radius of each spot - - """ - - fitted_blobs_array: np.ndarray = self.detector_method( - data_image, - self.min_sigma, - self.max_sigma, - self.num_sigma, - self.threshold, - self.overlap - ) - - if fitted_blobs_array.shape[0] == 0: - return SpotAttributes.empty(extra_fields=['intensity', 'spot_id']) - - # create the SpotAttributes Table - columns = [Axes.ZPLANE.value, Axes.Y.value, Axes.X.value, Features.SPOT_RADIUS] - fitted_blobs = pd.DataFrame(data=fitted_blobs_array, columns=columns) - - # convert standard deviation of gaussian kernel used to identify spot to radius of spot - converted_radius = np.round(fitted_blobs[Features.SPOT_RADIUS] * np.sqrt(3)) - fitted_blobs[Features.SPOT_RADIUS] = converted_radius - - # convert the array to int so it can be used to index - spots = SpotAttributes(fitted_blobs) - - spots.data['intensity'] = measure_spot_intensity( - data_image, spots, self.measurement_function) - spots.data['spot_id'] = np.arange(spots.data.shape[0]) - - return spots - - def run( - self, - primary_image: ImageStack, - blobs_image: Optional[ImageStack] = None, - blobs_axes: Optional[Tuple[Axes, ...]] = None, - n_processes: Optional[int] = None, - *args, - ) -> IntensityTable: - """ - Find spots. - - Parameters - ---------- - primary_image : ImageStack - ImageStack where we find the spots in. - blobs_image : Optional[ImageStack] - If provided, spots will be found in the blobs image, and intensities will be measured - across rounds and channels. Otherwise, spots are measured independently for each channel - and round. - blobs_axes : Optional[Tuple[Axes, ...]] - If blobs_image is provided, blobs_axes must be provided as well. blobs_axes represents - the axes across which the blobs image is max projected before spot detection is done. - n_processes : Optional[int] = None, - Number of processes to devote to spot finding. - """ - DeprecationWarning("Starfish is embarking on a SpotFinding data structures refactor" - "(See https://github.com/spacetx/starfish/issues/1514) This version of " - "BlobDetection will soon be deleted. To find and decode your spots " - "please instead use FindSpots.BlobDetector followed by DecodeSpots.") - intensity_table = detect_spots( - data_stack=primary_image, - spot_finding_method=self.image_to_spots, - reference_image=blobs_image, - reference_image_max_projection_axes=blobs_axes, - measurement_function=self.measurement_function, - n_processes=n_processes, - radius_is_gyration=False) - - return intensity_table diff --git a/starfish/core/spots/DetectSpots/detect.py b/starfish/core/spots/DetectSpots/detect.py deleted file mode 100644 index 2c8cbc203..000000000 --- a/starfish/core/spots/DetectSpots/detect.py +++ /dev/null @@ -1,249 +0,0 @@ -from functools import partial -from itertools import product -from typing import Callable, Dict, Optional, Sequence, Tuple, Union - -import numpy as np -import pandas as pd -import xarray as xr - -from starfish.core.imagestack.imagestack import ImageStack -from starfish.core.intensity_table.intensity_table import IntensityTable -from starfish.core.intensity_table.intensity_table_coordinates import \ - transfer_physical_coords_to_intensity_table -from starfish.core.types import Axes, Features, Number, SpotAttributes - - -def measure_spot_intensity( - image: Union[np.ndarray, xr.DataArray], - spots: SpotAttributes, - measurement_function: Callable[[Union[np.ndarray, xr.DataArray]], Number], - radius_is_gyration: bool=False, -) -> pd.Series: - """measure the intensity of each spot in spots in the corresponding image - - Parameters - ---------- - image : Union[np.ndarray, xr.DataArray], - 3-d volume in which to measure intensities - spots : pd.DataFrame - SpotAttributes table containing coordinates and radii of spots - measurement_function : Callable[[Union[np.ndarray, xr.DataArray]], Number]) - Function to apply over the spot volumes to identify the intensity (e.g. max, mean, ...) - radius_is_gyration : bool - if True, indicates that the radius corresponds to radius of gyration, which is a function of - spot intensity, but typically is a smaller unit than the sigma generated by blob_log. - In this case, the spot's bounding box is rounded up instead of down when measuring - intensity. (default False) - - Returns - ------- - pd.Series : - Intensities for each spot in SpotAttributes - - """ - - def fn(row: pd.Series) -> Number: - data = image[ - row['z_min']:row['z_max'], - row['y_min']:row['y_max'], - row['x_min']:row['x_max'] - ] - return measurement_function(data) - - if radius_is_gyration: - radius = np.ceil(spots.data[Features.SPOT_RADIUS]).astype(int) + 1 # round up - else: - radius = spots.data[Features.SPOT_RADIUS].astype(int) # truncate down to nearest integer - for v, max_size in zip(['z', 'y', 'x'], image.shape): - # numpy does exclusive max indexing, so need to subtract 1 from min to get centered box - spots.data[f'{v}_min'] = np.clip(spots.data[v] - (radius - 1), 0, None) - spots.data[f'{v}_max'] = np.clip(spots.data[v] + radius, None, max_size) - return spots.data[['z_min', 'z_max', 'y_min', 'y_max', 'x_min', 'x_max']].astype(int).apply( - fn, - axis=1 - ) - - -def measure_spot_intensities( - data_image: ImageStack, - spot_attributes: SpotAttributes, - measurement_function: Callable[[Union[np.ndarray, xr.DataArray]], Number], - radius_is_gyration: bool=False, -) -> IntensityTable: - """given spots found from a reference image, find those spots across a data_image - - Parameters - ---------- - data_image : ImageStack - ImageStack containing multiple volumes for which spots' intensities must be calculated - spot_attributes : pd.Dataframe - Locations and radii of spots - measurement_function : Callable[[Union[np.ndarray, xr.DataArray]], Number]) - Function to apply over the spot volumes to identify the intensity (e.g. max, mean, ...) - radius_is_gyration : bool - if True, indicates that the radius corresponds to radius of gyration, which is a function of - spot intensity, but typically is a smaller unit than the sigma generated by blob_log. - In this case, the spot's bounding box is rounded up instead of down when measuring - intensity. (default False) - - Returns - ------- - IntensityTable : - 3d tensor of (spot, channel, round) information for each coded spot - - """ - - # determine the shape of the intensity table - ch_labels = data_image.axis_labels(Axes.CH) - round_labels = data_image.axis_labels(Axes.ROUND) - - # construct the empty intensity table - intensity_table = IntensityTable.zeros( - spot_attributes=spot_attributes, - round_labels=round_labels, - ch_labels=ch_labels, - ) - - # if no spots were detected, return the empty IntensityTable - if intensity_table.sizes[Features.AXIS] == 0: - return intensity_table - - # fill the intensity table - indices = product(ch_labels, round_labels) - for c, r in indices: - image, _ = data_image.get_slice({Axes.CH: c, Axes.ROUND: r}) - blob_intensities: pd.Series = measure_spot_intensity( - image, - spot_attributes, - measurement_function, - radius_is_gyration=radius_is_gyration - ) - intensity_table.loc[dict(c=c, r=r)] = blob_intensities - - return intensity_table - - -def concatenate_spot_attributes_to_intensities( - spot_attributes: Sequence[Tuple[SpotAttributes, Dict[Axes, int]]] -) -> IntensityTable: - """ - Merge multiple spot attributes frames into a single IntensityTable without merging across - channels and imaging rounds - - Parameters - ---------- - spot_attributes : Sequence[Tuple[SpotAttributes, Dict[Axes, int]]] - A sequence of SpotAttribute objects and the indices (channel, round) that each object is - associated with. - - Returns - ------- - IntensityTable : - concatenated input SpotAttributes, converted to an IntensityTable object - - """ - ch_values: Sequence[int] = sorted(set(inds[Axes.CH] for _, inds in spot_attributes)) - round_values: Sequence[int] = sorted(set(inds[Axes.ROUND] for _, inds in spot_attributes)) - - all_spots = pd.concat([sa.data for sa, inds in spot_attributes], sort=True) - # this drop call ensures only x, y, z, radius, and quality, are passed to the IntensityTable - features_coordinates = all_spots.drop(['spot_id', 'intensity'], axis=1) - - intensity_table = IntensityTable.zeros( - SpotAttributes(features_coordinates), round_values, ch_values, - ) - - i = 0 - for attrs, inds in spot_attributes: - for _, row in attrs.data.iterrows(): - selector = dict(features=i, c=inds[Axes.CH], r=inds[Axes.ROUND]) - intensity_table.loc[selector] = row['intensity'] - i += 1 - - return intensity_table - - -def detect_spots(data_stack: ImageStack, - spot_finding_method: Callable[..., SpotAttributes], - spot_finding_kwargs: Dict = None, - reference_image: Optional[ImageStack] = None, - reference_image_max_projection_axes: Optional[Tuple[Axes, ...]] = None, - measurement_function: Callable[[Union[np.ndarray, xr.DataArray]], Number] = np.max, - radius_is_gyration: bool = False, - n_processes: Optional[int] = None) -> IntensityTable: - """Apply a spot_finding_method to a ImageStack - - Parameters - ---------- - data_stack : ImageStack - The ImageStack containing spots - spot_finding_method : Callable[..., IntensityTable] - The method to identify spots - spot_finding_kwargs : Dict - additional keyword arguments to pass to spot_finding_method - reference_image : xr.DataArray - (Optional) a reference image. If provided, spots will be found in this image, and then - the locations that correspond to these spots will be measured across each channel and round, - filling in the values in the IntensityTable - reference_image_max_projection_axes : Tuple[Axes] - Generate the reference image by max-projecting reference_image across these axes. - measurement_function : Callable[[Union[np.ndarray, xr.DataArray]], Number] - the function to apply over the spot area to extract the intensity value (default 'np.max') - radius_is_gyration : bool - if True, indicates that the radius corresponds to radius of gyration, which is a function of - spot intensity, but typically is a smaller unit than the sigma generated by blob_log. - In this case, the spot's bounding box is rounded up instead of down when measuring - intensity. (default False) - is_volume: bool - If True, pass 3d volumes (x, y, z) to func, else pass 2d tiles (x, y) to func. (default - True) - n_processes : Optional[int] - The number of processes to use in stack.transform if reference image is None. - If None, uses the output of os.cpu_count() (default = None). - - Notes - ----- - - This class will always detect spots in 3d. If 2d spot detection is desired, the data should - be projected down to "fake 3d" prior to submission to this function - - If neither reference_image nor reference_from_max_projection are passed, spots will be - detected _independently_ in each channel. This assumes a non-multiplex imaging experiment, - as only one (ch, round) will be measured for each spot. - - Returns - ------- - IntensityTable : - IntensityTable containing the intensity of each spot, its radius, and location in pixel - coordinates - - """ - - if spot_finding_kwargs is None: - spot_finding_kwargs = {} - - if reference_image is not None: - if reference_image_max_projection_axes is not None: - reference_image = reference_image.reduce( - reference_image_max_projection_axes, func="max") - data_image = reference_image._squeezed_numpy(*reference_image_max_projection_axes) - else: - data_image = reference_image.xarray - reference_spot_locations = spot_finding_method(data_image, **spot_finding_kwargs) - intensity_table = measure_spot_intensities( - data_image=data_stack, - spot_attributes=reference_spot_locations, - measurement_function=measurement_function, - radius_is_gyration=radius_is_gyration, - ) - else: # don't use a reference image, measure each - spot_finding_method = partial(spot_finding_method, **spot_finding_kwargs) - spot_attributes_list = data_stack.transform( - func=spot_finding_method, - group_by={Axes.ROUND, Axes.CH}, - n_processes=n_processes - ) - intensity_table = concatenate_spot_attributes_to_intensities(spot_attributes_list) - - transfer_physical_coords_to_intensity_table(image_stack=data_stack, - intensity_table=intensity_table) - - return intensity_table diff --git a/starfish/core/spots/DetectSpots/local_max_peak_finder.py b/starfish/core/spots/DetectSpots/local_max_peak_finder.py deleted file mode 100644 index d75c7571c..000000000 --- a/starfish/core/spots/DetectSpots/local_max_peak_finder.py +++ /dev/null @@ -1,299 +0,0 @@ -from typing import List, Optional, Tuple, Union - -import numpy as np -import pandas as pd -import xarray as xr -from scipy.ndimage import label -from skimage.feature import peak_local_max -from skimage.measure import regionprops -from sympy import Line, Point -from tqdm import tqdm - -from starfish.core.config import StarfishConfig -from starfish.core.imagestack.imagestack import ImageStack -from starfish.core.intensity_table.intensity_table import IntensityTable -from starfish.core.types import Axes, Features, Number, SpotAttributes -from ._base import DetectSpotsAlgorithm -from .detect import detect_spots - - -# TODO ambrosejcarr, ttung: https://github.com/spacetx/starfish/issues/708 -# Currently, spot finders cannot propagate state, which makes the flow for this -# spot finder confusing. One would expect to have access to the private parameters -# however, they are lost due to the memory-space forking induced by multi-processing. - -class LocalMaxPeakFinder(DetectSpotsAlgorithm): - """ - 2-dimensional local-max peak finder that wraps skimage.feature.peak_local_max - - Parameters - ---------- - min_distance : int - Minimum number of pixels separating peaks in a region of 2 * min_distance + 1 - (i.e. peaks are separated by at least min_distance). To find the maximum number of - peaks, use min_distance=1. - stringency : int - min_obj_area : int - max_obj_area : int - threshold : Optional[Number] - measurement_type : str, {'max', 'mean'} - default 'max' calculates the maximum intensity inside the object - min_num_spots_detected : int - When fewer than this number of spots are detected, spot searching for higher threshold - values. (default = 3) - is_volume : bool - Not supported. For 3d peak detection please use TrackpyLocalMaxPeakFinder. - (default=False) - verbose : bool - If True, report the percentage completed (default = False) during processing - - Notes - ----- - http://scikit-image.org/docs/dev/api/skimage.feature.html#skimage.feature.peak_local_max - """ - - def __init__( - self, min_distance: int, stringency: int, min_obj_area: int, max_obj_area: int, - threshold: Optional[Number]=None, measurement_type: str='max', - min_num_spots_detected: int=3, is_volume: bool=False, verbose: bool=True - ) -> None: - - self.min_distance = min_distance - self.stringency = stringency - self.min_obj_area = min_obj_area - self.max_obj_area = max_obj_area - self.threshold = threshold - self.min_num_spots_detected = min_num_spots_detected - - self.measurement_function = self._get_measurement_function(measurement_type) - - self.is_volume = is_volume - self.verbose = verbose - - def _compute_num_spots_per_threshold(self, img: np.ndarray) -> Tuple[np.ndarray, List[int]]: - """Computes the number of detected spots for each threshold - - Parameters - ---------- - img : np.ndarray - The image in which to count spots - - Returns - ------- - np.ndarray : - thresholds - List[int] : - spot counts - """ - - # thresholds to search over - thresholds = np.linspace(img.min(), img.max(), num=100) - - # number of spots detected at each threshold - spot_counts = [] - - # where we stop our threshold search - stop_threshold = None - - if self.verbose and StarfishConfig().verbose: - threshold_iter = tqdm(thresholds) - print('Determining optimal threshold ...') - else: - threshold_iter = thresholds - - for stop_index, threshold in enumerate(threshold_iter): - spots = peak_local_max( - img, - min_distance=self.min_distance, - threshold_abs=threshold, - exclude_border=False, - indices=True, - num_peaks=np.inf, - footprint=None, - labels=None - ) - - # stop spot finding when the number of detected spots falls below min_num_spots_detected - if len(spots) <= self.min_num_spots_detected: - stop_threshold = threshold - if self.verbose: - print(f'Stopping early at threshold={threshold}. Number of spots fell below: ' - f'{self.min_num_spots_detected}') - break - else: - spot_counts.append(len(spots)) - - if stop_threshold is None: - stop_threshold = thresholds.max() - - if len(thresholds > 1): - thresholds = thresholds[:stop_index] - spot_counts = spot_counts[:stop_index] - - return thresholds, spot_counts - - def _select_optimal_threshold(self, thresholds: np.ndarray, spot_counts: List[int]) -> float: - # calculate the gradient of the number of spots - grad = np.gradient(spot_counts) - optimal_threshold_index = np.argmin(grad) - - # only consider thresholds > than optimal threshold - thresholds = thresholds[optimal_threshold_index:] - grad = grad[optimal_threshold_index:] - - # if all else fails, return 0. - selected_thr = 0 - - if len(thresholds) > 1: - - distances = [] - - # create a line whose end points are the threshold and and corresponding gradient value - # for spot_counts corresponding to the threshold - start_point = Point(thresholds[0], grad[0]) - end_point = Point(thresholds[-1], grad[-1]) - line = Line(start_point, end_point) - - # calculate the distance between all points and the line - for k in range(len(thresholds)): - p = Point(thresholds[k], grad[k]) - dst = line.distance(p) - distances.append(dst.evalf()) - - # remove the end points - thresholds = thresholds[1:-1] - distances = distances[1:-1] - - # select the threshold that has the maximum distance from the line - # if stringency is passed, select a threshold that is n steps higher, where n is the - # value of stringency - if distances: - thr_idx = np.argmax(np.array(distances)) - - if thr_idx + self.stringency < len(thresholds): - selected_thr = thresholds[thr_idx + self.stringency] - else: - selected_thr = thresholds[thr_idx] - - return selected_thr - - def _compute_threshold(self, img: Union[np.ndarray, xr.DataArray]) -> float: - """Finds spots on a number of thresholds then selects and returns the optimal threshold - - Parameters - ---------- - img: Union[np.ndarray, xr.DataArray] - data array in which spots should be detected and over which to compute different - intensity thresholds - - Returns - ------- - Number : #TODO ambrosejcarr this should probably be a float - The intensity threshold - """ - img = np.asarray(img) - thresholds, spot_counts = self._compute_num_spots_per_threshold(img) - if len(spot_counts) == 0: - # this only happens when we never find more spots than `self.min_num_spots_detected` - return img.min() - return self._select_optimal_threshold(thresholds, spot_counts) - - def image_to_spots(self, data_image: Union[np.ndarray, xr.DataArray]) -> SpotAttributes: - """measure attributes of spots detected by binarizing the image using the selected threshold - - Parameters - ---------- - data_image : Union[np.ndarray, xr.DataArray] - image containing spots to be detected - - Returns - ------- - SpotAttributes - Attributes for each detected spot - """ - - threshold = self._compute_threshold(data_image) - - data_image = np.asarray(data_image) - - # identify each spot's size by binarizing and calculating regionprops - masked_image = data_image[:, :] > threshold - labels = label(masked_image)[0] - spot_props = regionprops(np.squeeze(labels)) - - # mask spots whose areas are too small or too large - for spot_prop in spot_props: - if spot_prop.area < self.min_obj_area or spot_prop.area > self.max_obj_area: - masked_image[0, spot_prop.coords[:, 0], spot_prop.coords[:, 1]] = 0 - - # store re-calculated regionprops and labels based on the area-masked image - labels = label(masked_image)[0] - - if self.verbose: - print('computing final spots ...') - - spot_coords = peak_local_max( - data_image, - min_distance=self.min_distance, - threshold_abs=threshold, - exclude_border=False, - indices=True, - num_peaks=np.inf, - footprint=None, - labels=labels - ) - - # this is a workaround for the fix https://github.com/scikit-image/scikit-image/pull/4263 - if spot_coords.shape[0] == 0: - spot_coords = np.empty((0, data_image.ndim), np.int) - - # TODO how to get the radius? unlikely that this can be pulled out of - # self._spot_props, since the last call to peak_local_max can find multiple - # peaks per label - res = {Axes.X.value: spot_coords[:, 2], - Axes.Y.value: spot_coords[:, 1], - Axes.ZPLANE.value: spot_coords[:, 0], - Features.SPOT_RADIUS: 1, - Features.SPOT_ID: np.arange(spot_coords.shape[0]), - Features.INTENSITY: data_image[spot_coords[:, 0], - spot_coords[:, 1], - spot_coords[:, 2]] - } - - return SpotAttributes(pd.DataFrame(res)) - - def run( - self, - primary_image: ImageStack, - blobs_image: Optional[ImageStack] = None, - blobs_axes: Optional[Tuple[Axes, ...]] = None, - n_processes: Optional[int] = None, - *args, - ) -> IntensityTable: - """ - Find spots. - - Parameters - ---------- - primary_image : ImageStack - ImageStack where we find the spots in. - blobs_image : Optional[ImageStack] - If provided, spots will be found in the blobs image, and intensities will be measured - across rounds and channels. Otherwise, spots are measured independently for each channel - and round. - blobs_axes : Optional[Tuple[Axes, ...]] - If blobs_image is provided, blobs_axes must be provided as well. blobs_axes represents - the axes across which the blobs image is max projected before spot detection is done. - n_processes : Optional[int] = None, - Number of processes to devote to spot finding. - """ - intensity_table = detect_spots( - data_stack=primary_image, - spot_finding_method=self.image_to_spots, - reference_image=blobs_image, - reference_image_max_projection_axes=blobs_axes, - measurement_function=self.measurement_function, - n_processes=n_processes, - radius_is_gyration=False) - - return intensity_table diff --git a/starfish/core/spots/DetectSpots/local_search_blob_detector.py b/starfish/core/spots/DetectSpots/local_search_blob_detector.py deleted file mode 100644 index 666edb5ae..000000000 --- a/starfish/core/spots/DetectSpots/local_search_blob_detector.py +++ /dev/null @@ -1,453 +0,0 @@ -""" -Note: this blob detector uses the same underlying methods (skimage.blob_log, skimage.blob_dog) as -the starfish.spots.SpotFinder.BlobDetector. In the future, this and the packages other blob -detectors should be refactored to merge their functionalities. - -See: https://github.com/spacetx/starfish/issues/1005 -""" - -from collections import defaultdict -from typing import Any, Dict, Hashable, List, Mapping, Optional, Sequence, Tuple, Union - -import numpy as np -import pandas as pd -import xarray as xr -from sklearn.neighbors import NearestNeighbors - -from starfish.core.compat import blob_dog, blob_log -from starfish.core.image.Filter.util import determine_axes_to_group_by -from starfish.core.imagestack.imagestack import ImageStack -from starfish.core.intensity_table.intensity_table import IntensityTable -from starfish.core.intensity_table.intensity_table_coordinates import \ - transfer_physical_coords_to_intensity_table -from starfish.core.types import Axes, Features, Number, SpotAttributes -from ._base import DetectSpotsAlgorithm - -blob_detectors = { - 'blob_dog': blob_dog, - 'blob_log': blob_log -} - - -class LocalSearchBlobDetector(DetectSpotsAlgorithm): - """ - Multi-dimensional gaussian spot detector. - - This method is a wrapper for skimage.feature.blob_log that engages in a local radius search - to match spots up across rounds. In sparse conditions, the search can be seeded in one round - with a relatively large radius. In crowded images, the spots can be seeded in all rounds and - consensus filtering can be used to extract codes that are consistently extracted across - rounds. - - In brief, this spot finder operates on a few assumptions: - 1. Codes that represent transcripts are one-hot, meaning that in each round, one and only one - channel should be "on" for a given transcript. - 2. Due to experimental conditions, there may be a small amount of jitter the the exact position - of each spots. - 3. To build codes in these circumstances, one must be able to account for small deviations in - physical position to reconstruct codes. - - The LocalSearchBlobDetector accomplishes this as follows: - - 1. Identify spots independently in all rounds and channels. - 2. Identify an `anchor round`. Spots in this round will serve as the central location of a - local search. The best round to select for the anchor round is the one that has the best - signal to noise ratio. - 3. In rounds other than the anchor round, search for the closest spot that is within - `search_radius` pixels of the anchor spot. Intuitively, search radius should be the smallest - value possible that accounts for local jitter in spot position introduced by your - experimental approach. A common value is 2.5 pixels. - 4. Construct an IntensityTable, where for each anchor spot, spots in other rounds and channels - are merged into single features, storing the (z, y, x) coordinates of the anchor spots. - - The IntensityTable can then be decoded with downstream tools. This approach was used to good - effect in the STARmap paper (DOI: 10.1126/science.aat5691) - - Parameters - ---------- - min_sigma : float - The minimum standard deviation for Gaussian Kernel. Keep this low to - detect smaller blobs. - max_sigma : float - The maximum standard deviation for Gaussian Kernel. Keep this high to - detect larger blobs. - threshold : float - The absolute lower bound for scale space maxima. Local maxima smaller - than thresh are ignored. Reduce this to detect blobs with less - intensities. - detector_method : str ['blob_dog', 'blob_log'] - Name of the type of detection method used from skimage.feature, default: blob_log. - search_radius : int - Number of pixels over which to search for spots in other rounds and channels. - anchor_round : int - The imaging round against which other rounds will be checked for spots in the same - approximate pixel location. - detector_kwargs : Dict[str, Any] - Additional keyword arguments to pass to the detector_method. - - """ - - def __init__( - self, - min_sigma: Union[Number, Tuple[Number, ...]], - max_sigma: Union[Number, Tuple[Number, ...]], - num_sigma: int, - threshold: Number, - detector_method: str='blob_log', - exclude_border: Optional[int]=None, - search_radius: int=3, - anchor_round: int=1, - **detector_kwargs, - ) -> None: - - self.min_sigma = min_sigma - self.max_sigma = max_sigma - self.threshold = threshold - self.is_volume = True # TODO test 2-d spot calling - self.exclude_border = exclude_border - self.search_radius = search_radius - self.anchor_round = anchor_round - self.detector_kwargs = detector_kwargs - try: - self.detector_method = blob_detectors[detector_method] - except ValueError: - raise ValueError(f"Detector method must be one of {list(blob_detectors.keys())}") - - def _spot_finder(self, data: xr.DataArray) -> pd.DataFrame: - """Find spots in a data volume. - - Parameters - ---------- - data : xr.DataArray - 3D (z, y, x) data volume in which spots will be identified. - - Returns - ------- - pd.DataFrame - DataFrame wrapping the output of skimage blob_log or blob_dog with named outputs. - - """ - - # results = np.ndarray: n_spots x (z, y, x, radius, sigma_z, sigma_y, sigma_x) - results = self.detector_method( - data, - min_sigma=self.min_sigma, - max_sigma=self.max_sigma, - threshold=self.threshold, - exclude_border=self.exclude_border, - **self.detector_kwargs - ) - - # if spots were detected - if results.shape[0]: - - # measure intensities - z_inds = results[:, 0].astype(int) - y_inds = results[:, 1].astype(int) - x_inds = results[:, 2].astype(int) - intensities = data.values[tuple([z_inds, y_inds, x_inds])] - - # collapse radius if sigma is non-scalar - if all(np.isscalar(s) for s in (self.min_sigma, self.max_sigma)): - radius = results[:, 3] - else: - radius = np.mean(results[:, -3:], axis=1) - - # construct dataframe - spot_data = pd.DataFrame( - data={ - "intensity": intensities, - Axes.ZPLANE: z_inds, - Axes.Y: y_inds, - Axes.X: x_inds, - Features.SPOT_RADIUS: radius - } - ) - - else: - spot_data = pd.DataFrame( - data=np.array( - [], - dtype=[ - ('intensity', float), ('z', int), ('y', int), ('x', int), - (Features.SPOT_RADIUS, float) - ] - ) - ) - - return spot_data - - def _find_spots( - self, - data_stack: ImageStack, - verbose: bool=False, - n_processes: Optional[int]=None - ) -> Dict[Tuple[int, int], np.ndarray]: - """Find spots in all (z, y, x) volumes of an ImageStack. - - Parameters - ---------- - data_stack : ImageStack - Stack containing spots to find. - - Returns - ------- - Dict[Tuple[int, int], np.ndarray] - Dictionary mapping (round, channel) pairs to a spot table generated by skimage blob_log - or blob_dog. - - """ - # find spots in each (r, c) volume - transform_results = data_stack.transform( - self._spot_finder, - group_by=determine_axes_to_group_by(self.is_volume), - n_processes=n_processes, - ) - - # create output dictionary - spot_results = {} - for spot_calls, axes_dict in transform_results: - r = axes_dict[Axes.ROUND] - c = axes_dict[Axes.CH] - spot_results[r, c] = spot_calls - - return spot_results - - @staticmethod - def _merge_spots_by_round( - spot_results: Dict[Tuple[int, int], pd.DataFrame] - ) -> Dict[int, pd.DataFrame]: - """Merge DataFrames containing spots from different channels into one DataFrame per round. - - Executed on the output of - Parameters - ---------- - spot_results : Dict[Tuple[int, int], pd.DataFrame] - Output of _find_spots. Dictionary mapping (round, channel) volumes to the spots detected - in them. - - Returns - ------- - Dict[int, pd.DataFrame] - Dictionary mapping round volumes to the spots detected in them. Contains an additional - column labeled by Axes.CH which identifies the channel in which a given spot was - detected. - - """ - - # add channel information to each table and add it to round_data - round_data: Mapping[int, List] = defaultdict(list) - for (r, c), df in spot_results.items(): - df[Axes.CH.value] = np.full(df.shape[0], c) - round_data[r].append(df) - - # create one dataframe per round - round_dataframes = { - k: pd.concat(v, axis=0).reset_index().drop('index', axis=1) - for k, v in round_data.items() - } - - return round_dataframes - - @staticmethod - def _match_spots( - round_dataframes: Dict[int, pd.DataFrame], search_radius: int, anchor_round: int - ) -> Tuple[pd.DataFrame, pd.DataFrame]: - """ For each spot in anchor round, find the closest spot within search_radius in all rounds. - - Parameters - ---------- - round_dataframes : Dict[int, pd.DataFrame] - Output from _merge_spots_by_round, contains mapping of image volumes from each round to - all the spots detected in them. - search_radius : int - The maximum (euclidean) distance in pixels for a spot to be considered matching in - a round subsequent to the anchor round. - anchor_round : int - The imaging round to seed the local search from. - - Returns - ------- - pd.DataFrame - Spots x rounds dataframe containing the distances to the nearest spot. np.nan if - no spot is detected within search radius - pd.DataFrame - Spots x rounds dataframe containing the indices of the nearest spot to the - corresponding round_dataframe. np.nan if no spot is detected within search radius. - - """ - reference_df = round_dataframes[anchor_round] - reference_coordinates = reference_df[[Axes.ZPLANE, Axes.Y, Axes.X]] - - dist = pd.DataFrame( - data=np.zeros((reference_df.shape[0], len(round_dataframes)), dtype=float), - columns=list(round_dataframes.keys()) - ) - ind = pd.DataFrame( - data=np.zeros((reference_df.shape[0], len(round_dataframes)), dtype=np.int32), - columns=list(round_dataframes.keys()) - ) - - # fill data for anchor round; every spot is a perfect match to itself. - ind[anchor_round] = np.arange(reference_df.shape[0], dtype=np.int32) - - # get spots matching across rounds - for r in sorted(set(round_dataframes.keys()) - {anchor_round, }): - query_df = round_dataframes[r] - query_coordinates = query_df[[Axes.ZPLANE, Axes.Y, Axes.X]] - - # Build the classifier; chose NN over radius neighbors because data structures are - # amenable to vectorization, which improves execution time. - # TODO ambrosejcarr use n_neighbors > 1 to break ties, enable codebook-based finding - # use additional axes in dist, ind to retain vectorization. - nn = NearestNeighbors(n_neighbors=1) - nn.fit(query_coordinates) - - distances, indices = nn.kneighbors(reference_coordinates) - dist[r] = distances - ind[r] = indices - - return dist, ind - - @staticmethod - def _build_intensity_table( - round_dataframes: Dict[int, pd.DataFrame], - dist: pd.DataFrame, - ind: pd.DataFrame, - channels: Sequence[int], - rounds: Sequence[int], - search_radius: int, - anchor_round: int, - ) -> IntensityTable: - """Construct an intensity table from the results of a local search over detected spots - - Parameters - ---------- - round_dataframes : Dict[int, pd.DataFrame] - Output from _merge_spots_by_round, contains mapping of image volumes from each round to - all the spots detected in them. - dist, ind : pd.DataFrame - Output from _match_spots, contains distances and indices to the nearest spot for each - spot in anchor_round. - channels, rounds : Sequence[int] - Channels and rounds present in the ImageStack from which spots were detected. - search_radius : int - The maximum (euclidean) distance in pixels for a spot to be considered matching in - a round subsequent to the anchor round. - anchor_round : int - The imaging round to seed the local search from. - - """ - - anchor_df = round_dataframes[anchor_round] - - # create empty IntensityTable filled with np.nan - data = np.full((dist.shape[0], len(channels), len(rounds)), fill_value=np.nan) - dims = (Features.AXIS, Axes.CH.value, Axes.ROUND.value) - coords: Mapping[Hashable, Tuple[str, Any]] = { - Features.SPOT_RADIUS: (Features.AXIS, anchor_df[Features.SPOT_RADIUS]), - Axes.ZPLANE.value: (Features.AXIS, anchor_df[Axes.ZPLANE]), - Axes.Y.value: (Features.AXIS, anchor_df[Axes.Y]), - Axes.X.value: (Features.AXIS, anchor_df[Axes.X]), - Axes.ROUND.value: (Axes.ROUND.value, rounds), - Axes.CH.value: (Axes.CH.value, channels) - } - intensity_table = IntensityTable(data=data, dims=dims, coords=coords) - - # fill IntensityTable - for r in rounds: - - # get intensity data and indices - spot_indices = ind[r] - intensity_data = round_dataframes[r].loc[spot_indices, 'intensity'] - channel_index = round_dataframes[r].loc[spot_indices, Axes.CH] - round_index = np.full(ind.shape[0], fill_value=r, dtype=int) - feature_index = np.arange(ind.shape[0], dtype=int) - - # mask spots that are outside the search radius - mask = np.asarray(dist[r] < search_radius) # indices need not match - feature_index = feature_index[mask] - channel_index = channel_index[mask] - round_index = round_index[mask] - intensity_data = intensity_data[mask] - - # need numpy indexing to set values in vectorized manner - intensity_table.values[feature_index, channel_index, round_index] = intensity_data - - return intensity_table - - def run( - self, - primary_image: ImageStack, - blobs_image: Optional[ImageStack] = None, - blobs_axes: Optional[Tuple[Axes, ...]] = None, - verbose: bool = False, - n_processes: Optional[int] = None, - *args, - ) -> IntensityTable: - """Find 1-hot coded spots in data. - - Parameters - ---------- - data : ImageStack - Image data containing coded spots. - verbose : bool - If True, report on progress of spot finding. - n_processes : Optional[int] - Number of processes to devote to spot finding. If None, will use the number of available - cpus (Default None). - - Notes - ----- - blobs_image is an unused parameter that is included for testing purposes. It should not - be passed to this method. If it is passed, the method will trigger a ValueError. - - Returns - ------- - IntensityTable - Contains detected coded spots. - - """ - DeprecationWarning("Starfish is embarking on a SpotFinding data structures refactor" - "(See https://github.com/spacetx/starfish/issues/1514) This version of " - "LocalSearchBlobDetector will soon be deleted. To find and decode your" - " spots please instead use FindSpots.BlobDetector then " - "DecodeSpots.PerRoundMaxChannel with the parameter " - "TraceBuildingStrategies.NEAREST_NEIGHBOR. See example in STARmap.py") - - if blobs_image is not None: - raise ValueError( - "blobs_image shouldn't be set for LocalSearchBlobDetector. This is likely a usage " - "error." - ) - - per_tile_spot_results = self._find_spots( - primary_image, verbose=verbose, n_processes=n_processes) - - per_round_spot_results = self._merge_spots_by_round(per_tile_spot_results) - - distances, indices = self._match_spots( - per_round_spot_results, - search_radius=self.search_radius, anchor_round=self.anchor_round - ) - - # TODO implement consensus seeding (SeqFISH) - - intensity_table = self._build_intensity_table( - per_round_spot_results, distances, indices, - rounds=primary_image.xarray[Axes.ROUND.value].values, - channels=primary_image.xarray[Axes.CH.value].values, - search_radius=self.search_radius, - anchor_round=self.anchor_round - ) - - transfer_physical_coords_to_intensity_table( - image_stack=primary_image, intensity_table=intensity_table - ) - - return intensity_table - - def image_to_spots(self, data_image: Union[np.ndarray, xr.DataArray]) -> SpotAttributes: - # LocalSearchBlobDetector does not follow the same contract as the remaining spot detectors. - # TODO: (ambrosejcarr) Rationalize the spot detectors by contract and then remove this hack. - raise NotImplementedError() diff --git a/starfish/core/spots/DetectSpots/test/__init__.py b/starfish/core/spots/DetectSpots/test/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/starfish/core/spots/DetectSpots/test/test_local_search_blob_detector.py b/starfish/core/spots/DetectSpots/test/test_local_search_blob_detector.py deleted file mode 100644 index 41580639d..000000000 --- a/starfish/core/spots/DetectSpots/test/test_local_search_blob_detector.py +++ /dev/null @@ -1,154 +0,0 @@ -import numpy as np -from scipy.ndimage.filters import gaussian_filter - -from starfish import ImageStack -from starfish.core.spots.DetectSpots.local_search_blob_detector import LocalSearchBlobDetector -from starfish.core.types import Axes - - -def traversing_code() -> ImageStack: - """this code walks in a sequential direction, and should only be detectable from some anchors""" - img = np.zeros((3, 2, 20, 50, 50), dtype=np.float32) - - # code 1 - img[0, 0, 5, 35, 35] = 10 - img[1, 1, 5, 32, 32] = 10 - img[2, 0, 5, 29, 29] = 10 - - # blur points - gaussian_filter(img, (0, 0, 0.5, 1.5, 1.5), output=img) - - return ImageStack.from_numpy(img) - - -def multiple_possible_neighbors() -> ImageStack: - """this image is intended to be tested with anchor_round in {0, 1}, last round has more spots""" - img = np.zeros((3, 2, 20, 50, 50), dtype=np.float32) - - # round 1 - img[0, 0, 5, 20, 40] = 10 - img[0, 0, 5, 40, 20] = 10 - - # round 2 - img[1, 1, 5, 20, 40] = 10 - img[1, 1, 5, 40, 20] = 10 - - # round 3 - img[2, 0, 5, 20, 40] = 10 - img[2, 0, 5, 35, 35] = 10 - img[2, 0, 5, 40, 20] = 10 - - # blur points - gaussian_filter(img, (0, 0, 0.5, 1.5, 1.5), output=img) - - return ImageStack.from_numpy(img) - - -def jitter_code() -> ImageStack: - """this code has some minor jitter <= 3px at the most distant point""" - img = np.zeros((3, 2, 20, 50, 50), dtype=np.float32) - - # code 1 - img[0, 0, 5, 35, 35] = 10 - img[1, 1, 5, 34, 35] = 10 - img[2, 0, 6, 35, 33] = 10 - - # blur points - gaussian_filter(img, (0, 0, 0.5, 1.5, 1.5), output=img) - - return ImageStack.from_numpy(img) - - -def two_perfect_codes() -> ImageStack: - """this code has no jitter""" - img = np.zeros((3, 2, 20, 50, 50), dtype=np.float32) - - # code 1 - img[0, 0, 5, 20, 35] = 10 - img[1, 1, 5, 20, 35] = 10 - img[2, 0, 5, 20, 35] = 10 - - # code 1 - img[0, 0, 5, 40, 45] = 10 - img[1, 1, 5, 40, 45] = 10 - img[2, 0, 5, 40, 45] = 10 - - # blur points - gaussian_filter(img, (0, 0, 0.5, 1.5, 1.5), output=img) - - return ImageStack.from_numpy(img) - - -def local_search_blob_detector(search_radius: int, anchor_channel=0) -> LocalSearchBlobDetector: - return LocalSearchBlobDetector( - min_sigma=(0.4, 1.2, 1.2), - max_sigma=(0.6, 1.7, 1.7), - num_sigma=3, - threshold=0.1, - overlap=0.5, - search_radius=search_radius, - anchor_round=anchor_channel, - ) - - -def test_local_search_blob_detector_two_codes(): - stack = two_perfect_codes() - lsbd = local_search_blob_detector(search_radius=1) - intensity_table = lsbd.run(stack, n_processes=1) - - assert intensity_table.shape == (2, 2, 3) - assert np.all(intensity_table[0][Axes.X.value] == 45) - assert np.all(intensity_table[0][Axes.Y.value] == 40) - assert np.all(intensity_table[0][Axes.ZPLANE.value] == 5) - - -def test_local_search_blob_detector_jitter_code(): - stack = jitter_code() - lsbd = local_search_blob_detector(search_radius=3) - intensity_table = lsbd.run(stack, n_processes=1) - - assert intensity_table.shape == (1, 2, 3) - f, c, r = np.where(~intensity_table.isnull()) - assert np.all(f == np.array([0, 0, 0])) - assert np.all(c == np.array([0, 0, 1])) - assert np.all(r == np.array([0, 2, 1])) - - # test again with smaller search radius - lsbd = local_search_blob_detector(search_radius=1) - intensity_table = lsbd.run(stack, n_processes=1) - assert intensity_table.shape == (1, 2, 3) - f, c, r = np.where(~intensity_table.isnull()) - assert np.all(f == np.array([0])) - assert np.all(c == np.array([0])) - assert np.all(r == np.array([0])) - - -def test_local_search_blob_detector_traversing_code(): - stack = traversing_code() - lsbd = local_search_blob_detector(search_radius=5, anchor_channel=0) - intensity_table = lsbd.run(stack, n_processes=1) - - assert intensity_table.shape == (1, 2, 3) - f, c, r = np.where(~intensity_table.isnull()) - assert np.all(f == np.array([0, 0])) - assert np.all(c == np.array([0, 1])) - assert np.all(r == np.array([0, 1])) - - lsbd = local_search_blob_detector(search_radius=5, anchor_channel=1) - intensity_table = lsbd.run(stack, n_processes=1) - f, c, r = np.where(~intensity_table.isnull()) - assert np.all(f == np.array([0, 0, 0])) - assert np.all(c == np.array([0, 0, 1])) - assert np.all(r == np.array([0, 2, 1])) - - -def test_local_search_blob_detector_multiple_neighbors(): - stack = multiple_possible_neighbors() - lsbd = local_search_blob_detector(search_radius=4, anchor_channel=0) - intensity_table = lsbd.run(stack, n_processes=1) - - assert intensity_table.shape == (2, 2, 3) - f, c, r = np.where(~intensity_table.isnull()) - assert np.all(intensity_table[Axes.ZPLANE.value] == (5, 5)) - assert np.all(intensity_table[Axes.Y.value] == (40, 20)) - assert np.all(intensity_table[Axes.X.value] == (20, 40)) diff --git a/starfish/core/spots/DetectSpots/test/test_spot_detection.py b/starfish/core/spots/DetectSpots/test/test_spot_detection.py deleted file mode 100644 index af222600e..000000000 --- a/starfish/core/spots/DetectSpots/test/test_spot_detection.py +++ /dev/null @@ -1,210 +0,0 @@ -import numpy as np -import pytest - -from starfish import ImageStack -from starfish.core.imagestack.test.factories import unique_tiles_imagestack -from starfish.core.test.factories import ( - two_spot_informative_blank_coded_data_factory, - two_spot_one_hot_coded_data_factory, - two_spot_sparse_coded_data_factory, -) -from starfish.types import Axes, Features -from .._base import DetectSpotsAlgorithm -from ..blob import BlobDetector -from ..detect import detect_spots -from ..local_max_peak_finder import LocalMaxPeakFinder -from ..trackpy_local_max_peak_finder import TrackpyLocalMaxPeakFinder - -# verify all spot finders handle different coding types -_, ONE_HOT_IMAGESTACK, ONE_HOT_MAX_INTENSITY = two_spot_one_hot_coded_data_factory() -_, SPARSE_IMAGESTACK, SPARSE_MAX_INTENSITY = two_spot_sparse_coded_data_factory() -_, BLANK_IMAGESTACK, BLANK_MAX_INTENSITY = two_spot_informative_blank_coded_data_factory() - -# make sure that all spot finders handle empty arrays -EMPTY_IMAGESTACK = ImageStack.from_numpy(np.zeros((4, 2, 10, 100, 100), dtype=np.float32)) - - -def simple_gaussian_spot_detector() -> BlobDetector: - """create a basic gaussian spot detector""" - return BlobDetector(min_sigma=1, max_sigma=4, num_sigma=5, threshold=0, measurement_type='max') - - -def simple_trackpy_local_max_spot_detector() -> TrackpyLocalMaxPeakFinder: - """create a basic local max peak finder""" - return TrackpyLocalMaxPeakFinder( - spot_diameter=3, - min_mass=0.01, - max_size=10, - separation=2, - ) - - -def simple_local_max_spot_detector() -> LocalMaxPeakFinder: - return LocalMaxPeakFinder( - min_distance=6, - stringency=0, - min_obj_area=0, - max_obj_area=np.inf, - threshold=0 - ) - - -# initialize spot detectors -gaussian_spot_detector = simple_gaussian_spot_detector() -trackpy_local_max_spot_detector = simple_trackpy_local_max_spot_detector() -local_max_spot_detector = simple_local_max_spot_detector() - -# test parameterization -test_parameters = ( - 'data_stack, spot_detector, radius_is_gyration, max_intensity', - [ - (ONE_HOT_IMAGESTACK, gaussian_spot_detector, False, ONE_HOT_MAX_INTENSITY), - (ONE_HOT_IMAGESTACK, trackpy_local_max_spot_detector, True, ONE_HOT_MAX_INTENSITY), - (ONE_HOT_IMAGESTACK, local_max_spot_detector, False, ONE_HOT_MAX_INTENSITY), - (SPARSE_IMAGESTACK, gaussian_spot_detector, False, SPARSE_MAX_INTENSITY), - (SPARSE_IMAGESTACK, trackpy_local_max_spot_detector, True, SPARSE_MAX_INTENSITY), - (SPARSE_IMAGESTACK, local_max_spot_detector, False, SPARSE_MAX_INTENSITY), - (BLANK_IMAGESTACK, gaussian_spot_detector, False, BLANK_MAX_INTENSITY), - (BLANK_IMAGESTACK, trackpy_local_max_spot_detector, True, BLANK_MAX_INTENSITY), - (BLANK_IMAGESTACK, local_max_spot_detector, False, BLANK_MAX_INTENSITY), - ] -) - - -@pytest.mark.parametrize(*test_parameters) -def test_spot_detection_with_reference_image( - data_stack: ImageStack, - spot_detector: DetectSpotsAlgorithm, - radius_is_gyration: bool, - max_intensity: float, -): - """This testing method uses a reference image to identify spot locations.""" - - def call_detect_spots(stack): - - return detect_spots( - data_stack=stack, - spot_finding_method=spot_detector.image_to_spots, - reference_image=stack, - reference_image_max_projection_axes=(Axes.ROUND, Axes.CH), - measurement_function=np.max, - radius_is_gyration=radius_is_gyration, - n_processes=1, - ) - - intensity_table = call_detect_spots(data_stack) - assert intensity_table.sizes[Features.AXIS] == 2, "wrong number of spots detected" - expected = [max_intensity * 2, max_intensity * 2] - assert np.allclose(intensity_table.sum((Axes.ROUND, Axes.CH)).values, expected), \ - "wrong spot intensities detected" - - # verify this execution strategy produces an empty intensitytable when called with a blank image - empty_intensity_table = call_detect_spots(EMPTY_IMAGESTACK) - assert empty_intensity_table.sizes[Features.AXIS] == 0 - - -@pytest.mark.parametrize(*test_parameters) -def test_spot_detection_with_reference_image_from_max_projection( - data_stack: ImageStack, - spot_detector: DetectSpotsAlgorithm, - radius_is_gyration: bool, - max_intensity: float, -): - """This testing method builds a reference image to identify spot locations.""" - - def call_detect_spots(stack): - return detect_spots( - data_stack=stack, - spot_finding_method=spot_detector.image_to_spots, - reference_image=stack, - reference_image_max_projection_axes=(Axes.ROUND, Axes.CH), - measurement_function=np.max, - radius_is_gyration=radius_is_gyration, - n_processes=1, - ) - - intensity_table = call_detect_spots(data_stack) - assert intensity_table.sizes[Features.AXIS] == 2, "wrong number of spots detected" - expected = [max_intensity * 2, max_intensity * 2] - assert np.allclose(intensity_table.sum((Axes.ROUND, Axes.CH)).values, expected), \ - "wrong spot intensities detected" - - empty_intensity_table = call_detect_spots(EMPTY_IMAGESTACK) - assert empty_intensity_table.sizes[Features.AXIS] == 0 - -@pytest.mark.parametrize(*test_parameters) -def test_spot_finding_no_reference_image( - data_stack: ImageStack, - spot_detector: DetectSpotsAlgorithm, - radius_is_gyration: bool, - max_intensity: float, -): - """ - This testing method does not provide a reference image, and should therefore check for spots - in each (round, ch) combination in sequence. With the given input, it should detect 4 spots. - """ - - def call_detect_spots(stack): - return detect_spots( - data_stack=stack, - spot_finding_method=spot_detector.image_to_spots, - measurement_function=np.max, - radius_is_gyration=radius_is_gyration, - n_processes=1, - ) - - intensity_table = call_detect_spots(data_stack) - assert intensity_table.sizes[Features.AXIS] == 4, "wrong number of spots detected" - expected = [max_intensity] * 4 - assert np.allclose(intensity_table.sum((Axes.ROUND, Axes.CH)).values, expected), \ - "wrong spot intensities detected" - - empty_intensity_table = call_detect_spots(EMPTY_IMAGESTACK) - assert empty_intensity_table.sizes[Features.AXIS] == 0 - - -def _make_labeled_image() -> ImageStack: - ROUND_LABELS = (1, 4, 6) - CH_LABELS = (2, 4, 6, 8) - ZPLANE_LABELS = (3, 4) - HEIGHT = 2 - WIDTH = 4 - - return unique_tiles_imagestack( - ROUND_LABELS, CH_LABELS, ZPLANE_LABELS, HEIGHT, WIDTH) - - -def test_reference_image_spot_detection_with_image_with_labeled_axes(monkeypatch): - """This testing method uses a reference image to identify spot locations.""" - - def call_detect_spots(stack): - - return detect_spots( - data_stack=stack, - spot_finding_method=gaussian_spot_detector.image_to_spots, - reference_image=stack, - reference_image_max_projection_axes=(Axes.ROUND, Axes.CH), - measurement_function=np.max, - radius_is_gyration=False, - n_processes=1, - ) - - data_stack = _make_labeled_image() - call_detect_spots(data_stack) - - -def test_spot_detection_with_image_with_labeled_axes(): - """This testing method uses no reference image to identify spot locations.""" - - def call_detect_spots(stack): - - return detect_spots( - data_stack=stack, - spot_finding_method=gaussian_spot_detector.image_to_spots, - measurement_function=np.max, - radius_is_gyration=False, - n_processes=1, - ) - - data_stack = _make_labeled_image() - call_detect_spots(data_stack) diff --git a/starfish/core/spots/DetectSpots/test/test_synthetic_data.py b/starfish/core/spots/DetectSpots/test/test_synthetic_data.py deleted file mode 100644 index ce2925b16..000000000 --- a/starfish/core/spots/DetectSpots/test/test_synthetic_data.py +++ /dev/null @@ -1,91 +0,0 @@ -import numpy as np -import pytest - -from starfish.core.spots.DetectSpots.blob import BlobDetector -from starfish.core.test.factories import SyntheticData -from starfish.core.types import Axes, Features - - -def test_round_trip_synthetic_data(): - np.random.seed(0) - - sd = SyntheticData( - n_ch=2, - n_round=3, - n_spots=1, - n_codes=4, - n_photons_background=0, - background_electrons=0, - camera_detection_efficiency=1.0, - gray_level=1, - ad_conversion_bits=16, - point_spread_function=(2, 2, 2), - ) - - codebook = sd.codebook() - intensities = sd.intensities(codebook=codebook) - spots = sd.spots(intensities=intensities) - gsd = BlobDetector(min_sigma=1, max_sigma=4, num_sigma=5, threshold=0) - calculated_intensities = gsd.run(spots, blobs_image=spots, blobs_axes=(Axes.ROUND, Axes.CH)) - decoded_intensities = codebook.decode_metric( - calculated_intensities, - max_distance=1, - min_intensity=0, - norm_order=2 - ) - - # applying the gaussian blur to the intensities causes them to be reduced in magnitude, so - # they won't be the same size, but they should be in the same place, and decode the same - # way - spot1, round1, ch1 = np.where(intensities.values) - spot2, round2, ch2 = np.where(calculated_intensities.values) - assert np.array_equal(spot1, spot2) - assert np.array_equal(round1, round2) - assert np.array_equal(ch1, ch2) - assert len(decoded_intensities.coords[Features.TARGET]) == 1 - - -@pytest.mark.slow -def test_medium_synthetic_stack(): - np.random.seed(0) - - n_z = 40 - height = 300 - width = 400 - sigma = 2 - - sd = SyntheticData( - n_round=4, - n_ch=4, - n_z=n_z, - height=height, - width=width, - n_spots=100, - n_codes=10, - point_spread_function=(sigma, sigma, sigma), - ) - - codebook = sd.codebook() - intensities = sd.intensities(codebook=codebook) - - # some spots won't be detected properly because they will spread outside the image when blurred, - # so we'll remove those from intensities before we generate spots. - - spot_radius = sigma * np.sqrt(2) # this is the radius of the spot in pixels - - valid_z = np.logical_and(intensities.z.values > spot_radius, - intensities.z.values < (n_z - spot_radius)) - valid_y = np.logical_and(intensities.y.values > spot_radius, - intensities.y.values < (height - spot_radius)) - valid_x = np.logical_and(intensities.x.values > spot_radius, - intensities.x.values < (width - spot_radius)) - - valid_locations = valid_z & valid_y & valid_x - intensities = intensities[np.where(valid_locations)] - spots = sd.spots(intensities=intensities) - gsd = BlobDetector(min_sigma=1, max_sigma=4, num_sigma=5, threshold=1e-4) - calculated_intensities = gsd.run(spots, blobs_image=spots, blobs_axes=(Axes.ROUND, Axes.CH)) - calculated_intensities = codebook.decode_metric( - calculated_intensities, max_distance=1, min_intensity=0, norm_order=2 - ) - assert len(calculated_intensities.coords[Features.TARGET]) == 80 diff --git a/starfish/core/spots/DetectSpots/trackpy_local_max_peak_finder.py b/starfish/core/spots/DetectSpots/trackpy_local_max_peak_finder.py deleted file mode 100644 index a7cc9fc8b..000000000 --- a/starfish/core/spots/DetectSpots/trackpy_local_max_peak_finder.py +++ /dev/null @@ -1,181 +0,0 @@ -import warnings -from typing import Optional, Tuple, Union - -import numpy as np -import xarray as xr -from trackpy import locate - -from starfish.core.imagestack.imagestack import ImageStack -from starfish.core.intensity_table.intensity_table import IntensityTable -from starfish.core.types import Axes, SpotAttributes -from ._base import DetectSpotsAlgorithm -from .detect import detect_spots - - -class TrackpyLocalMaxPeakFinder(DetectSpotsAlgorithm): - """ - Find spots using a local max peak finding algorithm - - This is a wrapper for :code:`trackpy.locate`, which implements a version of the - `Crocker-Grier `_ algorithm. - - .. _crocker_grier: https://physics.nyu.edu/grierlab/methods3c/ - - Parameters - ---------- - - spot_diameter : odd integer or tuple of odd integers. - This may be a single number or a tuple giving the feature’s extent in each dimension, - useful when the dimensions do not have equal resolution (e.g. confocal microscopy). - The tuple order is the same as the image shape, conventionally (z, y, x) or (y, x). - The number(s) must be odd integers. When in doubt, round up. - min_mass : float, optional - The minimum integrated brightness. This is a crucial parameter for eliminating spurious - features. Recommended minimum values are 100 for integer images and 1 for float images. - Defaults to 0 (no filtering). - max_size : float - maximum radius-of-gyration of brightness, default None - separation : float or tuple - Minimum separtion between features. Default is diameter + 1. May be a tuple, see - diameter for details. - percentile : float - Features must have a peak brighter than pixels in this percentile. This helps eliminate - spurious peaks. - noise_size : float or tuple - Width of Gaussian blurring kernel, in pixels Default is 1. May be a tuple, see diameter - for details. - smoothing_size : float or tuple - The size of the sides of the square kernel used in boxcar (rolling average) smoothing, - in pixels Default is diameter. May be a tuple, making the kernel rectangular. - threshold : float - Clip bandpass result below this value. Thresholding is done on the already - background-subtracted image. By default, 1 for integer images and 1/255 for float - images. - measurement_type : str ['max', 'mean'] - name of the function used to calculate the intensity for each identified spot area - preprocess : boolean - Set to False to turn off bandpass preprocessing. - max_iterations : integer - Max number of loops to refine the center of mass, default 10 - is_volume : bool - if True, run the algorithm on 3d volumes of the provided stack - verbose : bool - If True, report the percentage completed (default = False) during processing - - Notes - ----- - See also trackpy locate: http://soft-matter.github.io/trackpy/dev/generated/trackpy.locate.html - - """ - - def __init__( - self, spot_diameter, min_mass, max_size, separation, percentile=0, - noise_size: Tuple[int, int, int] = (1, 1, 1), smoothing_size=None, threshold=None, - preprocess: bool = False, max_iterations: int = 10, measurement_type: str = 'max', - is_volume: bool = False, verbose=False) -> None: - - self.diameter = spot_diameter - self.minmass = min_mass - self.maxsize = max_size - self.separation = separation - self.noise_size = noise_size - self.smoothing_size = smoothing_size - self.percentile = percentile - self.threshold = threshold - self.measurement_function = self._get_measurement_function(measurement_type) - self.preprocess = preprocess - self.max_iterations = max_iterations - self.is_volume = is_volume - self.verbose = verbose - - def image_to_spots(self, data_image: Union[np.ndarray, xr.DataArray]) -> SpotAttributes: - """ - - Parameters - ---------- - data_image : np.ndarray - three-dimensional image containing spots to be detected - - Returns - ------- - SpotAttributes : - spot attributes table for all detected spots - - """ - data_image = np.asarray(data_image) - with warnings.catch_warnings(): - warnings.simplefilter('ignore', FutureWarning) # trackpy numpy indexing warning - warnings.simplefilter('ignore', UserWarning) # yielded if black images - attributes = locate( - data_image, - diameter=self.diameter, - minmass=self.minmass, - maxsize=self.maxsize, - separation=self.separation, - noise_size=self.noise_size, - smoothing_size=self.smoothing_size, - threshold=self.threshold, - percentile=self.percentile, - preprocess=self.preprocess, - max_iterations=self.max_iterations, - ) - - # when zero spots are detected, 'ep' is missing from the trackpy locate results. - if attributes.shape[0] == 0: - attributes['ep'] = [] - - # TODO ambrosejcarr: data should always be at least pseudo-3d, this may not be necessary - # TODO ambrosejcarr: this is where max vs. sum vs. mean would be parametrized. - # here, total_intensity = sum, intensity = max - new_colnames = [ - 'y', 'x', 'total_intensity', 'radius', 'eccentricity', 'intensity', 'raw_mass', 'ep' - ] - if len(data_image.shape) == 3: - attributes.columns = ['z'] + new_colnames - else: - attributes.columns = new_colnames - - attributes['spot_id'] = np.arange(attributes.shape[0]) - return SpotAttributes(attributes) - - def run( - self, - primary_image: ImageStack, - blobs_image: Optional[ImageStack] = None, - blobs_axes: Optional[Tuple[Axes, ...]] = None, - n_processes: Optional[int] = None, - *args, - ) -> IntensityTable: - """ - Find spots. - - Parameters - ---------- - primary_image : ImageStack - ImageStack where we find the spots in. - blobs_image : Optional[ImageStack] - If provided, spots will be found in the blobs image, and intensities will be measured - across rounds and channels. Otherwise, spots are measured independently for each channel - and round. - blobs_axes : Optional[Tuple[Axes, ...]] - If blobs_image is provided, blobs_axes must be provided as well. blobs_axes represents - the axes across which the blobs image is max projected before spot detection is done. - n_processes : Optional[int] = None, - Number of processes to devote to spot finding. - """ - DeprecationWarning("Starfish is embarking on a SpotFinding data structures refactor" - "(See https://github.com/spacetx/starfish/issues/1514) This version of " - "TrackpyLocalMaxPeakFinder will soon be deleted. To find and decode your" - "spots please instead use FindSpots.TrackpyLocalMaxPeakFinder then " - "DecodeSpots.PerRoundMaxChannel with the parameter " - "TraceBuildingStrategies.SEQUENTIAL. See example in smFISH.py") - intensity_table = detect_spots( - data_stack=primary_image, - spot_finding_method=self.image_to_spots, - reference_image=blobs_image, - reference_image_max_projection_axes=blobs_axes, - measurement_function=self.measurement_function, - n_processes=n_processes, - radius_is_gyration=True) - - return intensity_table diff --git a/starfish/spots.py b/starfish/spots.py index 7bb8a7fd1..0df6c28bf 100644 --- a/starfish/spots.py +++ b/starfish/spots.py @@ -1,8 +1,6 @@ from starfish.core.spots import ( # noqa: F401 AssignTargets, - Decode, DecodeSpots, DetectPixels, - DetectSpots, FindSpots, ) diff --git a/workflows/wdl/iss_published/recipe.py b/workflows/wdl/iss_published/recipe.py index b4f94f29e..dd12cfdb9 100644 --- a/workflows/wdl/iss_published/recipe.py +++ b/workflows/wdl/iss_published/recipe.py @@ -2,8 +2,8 @@ from starfish import FieldOfView from starfish.image import Filter from starfish.image import ApplyTransform, LearnTransform -from starfish.spots import DetectSpots -from starfish.types import Axes +from starfish.spots import FindSpots, DecodeSpots +from starfish.types import Axes, FunctionSource def process_fov(field_num: int, experiment_str: str): @@ -50,16 +50,20 @@ def process_fov(field_num: int, experiment_str: str): filt.run(nuclei, verbose=True, in_place=True) print("Detecting") - detector = DetectSpots.BlobDetector( + detector = FindSpots.BlobDetector( min_sigma=1, max_sigma=10, num_sigma=30, threshold=0.01, measurement_type='mean', ) + dots_max_projector = Filter.Reduce((Axes.ROUND, Axes.ZPLANE), func="max", + module=FunctionSource.np) + dots_max = dots_max_projector.run(dots) - intensities = detector.run(filtered_imgs, blobs_image=dots, blobs_axes=(Axes.ROUND, Axes.ZPLANE)) + spots = detector.run(image_stack=filtered_imgs, reference_image=dots_max) - decoded = experiment.codebook.decode_per_round_max(intensities) + decoder = DecodeSpots.PerRoundMaxChannel(codebook=experiment.codebook) + decoded = decoder.run(spots=spots) df = decoded.to_decoded_dataframe() return df