Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1.) Provenance logging refactor and support for SpotFindingResults #1517

Merged
merged 1 commit into from
Oct 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 4 additions & 22 deletions starfish/core/imagestack/imagestack.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,11 @@
Clip,
Coordinates,
CoordinateValue,
LOG,
Number,
STARFISH_EXTRAS_KEY
)
from starfish.core.util import logging
from starfish.core.util.dtype import preserve_float_range
from starfish.core.util.logging import Log
from .dataorder import AXES_DATA, N_AXES


Expand Down Expand Up @@ -91,7 +90,7 @@ def __init__(self, data: xr.DataArray, tile_data: Optional[TileCollectionData]=N
self._data = data
self._data_loaded = False
self._tile_data = tile_data
self._log: List[dict] = list()
self._log: Log = Log()

@classmethod
def from_tile_collection_data(cls, tile_data: TileCollectionData) -> "ImageStack":
Expand Down Expand Up @@ -948,7 +947,7 @@ def tile_metadata(self) -> pd.DataFrame:
return pd.DataFrame(data)

@property
def log(self) -> List[dict]:
def log(self) -> Log:
"""
Returns a list of pipeline components that have been applied to this imagestack
as well as their corresponding runtime parameters.
Expand All @@ -963,23 +962,6 @@ def log(self) -> List[dict]:
"""
return self._log

def update_log(self, class_instance) -> None:
"""
Adds a new entry to the log list.

Parameters
----------
class_instance: The instance of a class being applied to the imagestack
"""
entry = {"method": class_instance.__class__.__name__,
"arguments": class_instance.__dict__,
"os": logging.get_os_info(),
"dependencies": logging.get_core_dependency_info(),
"release tag": logging.get_release_tag(),
"starfish version": logging.get_dependency_version('starfish')
}
self._log.append(entry)

@property
def raw_shape(self) -> Tuple[int, int, int, int, int]:
"""
Expand Down Expand Up @@ -1087,7 +1069,7 @@ def export(self,
"""
# Add log data to extras
tileset_extras = self._tile_data.extras if self._tile_data else {}
tileset_extras[STARFISH_EXTRAS_KEY] = logging.LogEncoder().encode({LOG: self.log})
tileset_extras[STARFISH_EXTRAS_KEY] = self.log.encode()
tileset = TileSet(
dimensions={
Axes.ROUND,
Expand Down
47 changes: 24 additions & 23 deletions starfish/core/pipeline/algorithmbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@
from abc import ABCMeta

from starfish.core.imagestack.imagestack import ImageStack
from starfish.core.intensity_table.intensity_table import IntensityTable
from starfish.core.types import LOG
from starfish.core.types._constants import STARFISH_EXTRAS_KEY
from starfish.core.util.logging import LogEncoder


class AlgorithmBase(ABCMeta):
Expand All @@ -21,29 +18,33 @@ def run_with_logging(func):
This method extends each pipeline component.run() method to also log itself and
runtime parameters to the IntensityTable and ImageStack objects. There are two
scenarios for this method:
1.) Filtering/ApplyTransform:
Imagestack -> Imagestack
2.) Spot Detection:
ImageStack -> IntensityTable
ImageStack -> [IntensityTable, ConnectedComponentDecodingResult]
TODO segmentation and decoding
"""
@functools.wraps(func)
def helper(*args, **kwargs):
result = func(*args, **kwargs)
# Scenario 1, Filtering, ApplyTransform
if isinstance(result, ImageStack):
result.update_log(args[0])
# Scenario 2, Spot detection
elif isinstance(result, tuple) or isinstance(result, IntensityTable):
if isinstance(args[1], ImageStack):
stack = args[1]
# update log with spot detection instance args[0]
stack.update_log(args[0])
# get resulting intensity table and set log
it = result
if isinstance(result, tuple):
it = result[0]
it.attrs[STARFISH_EXTRAS_KEY] = LogEncoder().encode({LOG: stack.log})
if result is not None:
method_class_str = str(args[0].__class__)
if 'ApplyTransform' in method_class_str or 'Filter' in method_class_str:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's plausible that we have mechanisms that take multiple loggable objects.

It might be a good idea to have a base ProvenanceTracked class that all objects (ImageStack, IntensityTable, etc.) inherit, and then to walk through all the arguments to find objects that extend ProvenanceTracked. Then scrape the logs from those objects and build up a provenance graph.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems like a larger refactor, like maybe something post spot finding stuff

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, can you tack that onto the logging epic?

There are a lot of gaps in the logging -- for instance, ApplyTransform plausibly should log the origin of the warping data. Also can be covered afterwards.

# Update the log on the resulting ImageStack
result.log.update_log(args[0])
if 'FindSpots' in method_class_str:
# Update the log on the resulting SpotFindingResults
result.log.update_log(args[0])
if 'DecodeSpots' in method_class_str:
# update log then transfer to DecodedIntensityTable
spot_results = kwargs['spots']
spot_results.log.update_log(args[0])
result.attrs[STARFISH_EXTRAS_KEY] = spot_results.log.encode()
# OLD CODE FOR DETECT WILL GET REMOVED
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# OLD CODE FOR DETECT WILL GET REMOVED
# TODO shanaxel42 OLD CODE FOR DETECT WILL GET REMOVED

if 'DetectSpots' in method_class_str or 'DetectPixels' in method_class_str:
if isinstance(args[1], ImageStack):
stack = args[1]
# update log with spot detection instance args[0]
stack.log.update_log(args[0])
# get resulting intensity table and set log
it = result
if isinstance(result, tuple):
it = result[0]
it.attrs[STARFISH_EXTRAS_KEY] = stack.log.encode()
return result
return helper
21 changes: 20 additions & 1 deletion starfish/core/types/_spot_finding_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import xarray as xr

from starfish.core.types import Axes, Coordinates, SpotAttributes
from starfish.core.util.logging import Log


AXES_ORDER = (Axes.ROUND, Axes.CH)
Expand All @@ -15,7 +16,8 @@ class SpotFindingResults:
SpotAttributes.
"""

def __init__(self, imagestack_coords, spot_attributes_list: Optional[List[Tuple]] = None):
def __init__(
self, imagestack_coords, log: Log, spot_attributes_list: Optional[List[Tuple]] = None):
"""
Construct a SpotFindingResults instance

Expand All @@ -35,6 +37,7 @@ def __init__(self, imagestack_coords, spot_attributes_list: Optional[List[Tuple]
Axes.X.value: imagestack_coords[Coordinates.X.value],
Axes.Y.value: imagestack_coords[Coordinates.Y.value],
Axes.ZPLANE.value: imagestack_coords[Coordinates.Z.value]}
self._log: Log = log

def __setitem__(self, indices: Mapping[Axes, int], spots: SpotAttributes):
"""
Expand Down Expand Up @@ -100,3 +103,19 @@ def get_physical_coord_ranges(self) -> Mapping[Hashable, xr.DataArray]:
information for calculating the physical coordinate values of decoded spots.
"""
return self.physical_coord_ranges

@property
def log(self) -> Log:
"""
Returns a list of pipeline components that have been applied to this get these SpotResults
as well as their corresponding runtime parameters.

For more information about provenance logging see
`Provenance Logging
<https://spacetx-starfish.readthedocs.io/en/latest/help_and_reference/api/utils/ilogging.html>`_

Returns
-------
Log
"""
return self._log
37 changes: 35 additions & 2 deletions starfish/core/util/logging.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,45 @@
import platform
from functools import lru_cache
from json import JSONEncoder
from typing import Mapping
from typing import List, Mapping

import pkg_resources

import starfish.core
from starfish.core.types import CORE_DEPENDENCIES
from starfish.core.types import CORE_DEPENDENCIES, LOG


class Log:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Love the idea. Is this only used by spot finding? My intuition is that this should trigger a bigger refactor.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nah this is used by everything. This PR was basically the "bigger" logging refactor needed to keep everything working throughout the refactor

def __init__(self):
"""
Class for capturing methods and their parameters applied by an analysis
pipeline.
"""
self._log: List[dict] = list()

def update_log(self, class_instance) -> None:
"""
Adds a new entry to the log list.

Parameters
----------
class_instance: The instance of a class being applied to the imagestack
shanaxel42 marked this conversation as resolved.
Show resolved Hide resolved
"""
entry = {"method": class_instance.__class__.__name__,
"arguments": class_instance.__dict__,
"os": get_os_info(),
"dependencies": get_core_dependency_info(),
"release tag": get_release_tag(),
"starfish version": get_dependency_version('starfish')
}
self._log.append(entry)

def encode(self):
return LogEncoder().encode({LOG: self.data})

@property
def data(self):
return self._log


@lru_cache(maxsize=1)
Expand Down
2 changes: 1 addition & 1 deletion starfish/test/full_pipelines/api/test_dartfish.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def test_dartfish_pipeline_cropped_data(tmpdir):
zero_norm_stack.xarray[0, 0, 0, 50:60, 60:70]
)

pipeline_log = zero_norm_stack.log
pipeline_log = zero_norm_stack.log.data

assert pipeline_log[0]['method'] == 'Clip'
assert pipeline_log[1]['method'] == 'ZeroByChannelMagnitude'
Expand Down
2 changes: 1 addition & 1 deletion starfish/test/full_pipelines/api/test_iss_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_iss_pipeline_cropped_data(tmpdir):
registered_image.xarray[2, 2, 0, 40:50, 40:50]
)

pipeline_log = registered_image.log
pipeline_log = registered_image.log.data

assert pipeline_log[0]['method'] == 'WhiteTophat'
assert pipeline_log[1]['method'] == 'Warp'
Expand Down