Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Intensity Table Concat Processing #1118

Merged
merged 23 commits into from
Apr 10, 2019
Merged
32 changes: 28 additions & 4 deletions starfish/intensity_table/intensity_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@

from starfish.expression_matrix.expression_matrix import ExpressionMatrix
from starfish.types import Axes, DecodedSpots, Features, LOG, SpotAttributes, STARFISH_EXTRAS_KEY
from starfish.types._constants import OverlapStrategy
shanaxel42 marked this conversation as resolved.
Show resolved Hide resolved
from starfish.util.dtype import preserve_float_range
from starfish.util.overlap_utils import (
find_overlaps_of_xarrays,
OVERLAP_STRATEGY_MAP,
)


class IntensityTable(xr.DataArray):
Expand Down Expand Up @@ -389,10 +394,29 @@ def from_image_stack(
return IntensityTable.from_spot_data(intensity_data, pixel_coordinates)

@staticmethod
def concatanate_intensity_tables(intensity_tables: List["IntensityTable"]):
# TODO VARY CONCAT LOGIC IF TILES OVERLAP
# This method is a starting point for handling tile overlap, right now
# it does a simple concat but people want other overlap logic implmented
def process_overlaps(its: List["IntensityTable"],
shanaxel42 marked this conversation as resolved.
Show resolved Hide resolved
overlap_strategy: OverlapStrategy
) -> List["IntensityTable"]:
"""Find the overlapping sections between IntensityTables
and process them according to the give overlap strategy
ttung marked this conversation as resolved.
Show resolved Hide resolved
"""
overlap_pairs = find_overlaps_of_xarrays(its)
for indices, intersection_area in overlap_pairs.items():
overlap_method = OVERLAP_STRATEGY_MAP[overlap_strategy]
idx1, idx2 = indices
# modify IntensityTables based on overlap strategy
it1, it2 = overlap_method(intersection_area, its[idx1], its[idx2])
# replace IntensityTables in list
its[idx1] = it1
its[idx2] = it2
return its

@staticmethod
def concatanate_intensity_tables(intensity_tables: List["IntensityTable"],
overlap_strategy: OverlapStrategy = None):
shanaxel42 marked this conversation as resolved.
Show resolved Hide resolved
if overlap_strategy:
intensity_tables = IntensityTable.process_overlaps(intensity_tables,
overlap_strategy)
return xr.concat(intensity_tables, dim=Features.AXIS)

def to_features_dataframe(self) -> pd.DataFrame:
Expand Down
157 changes: 157 additions & 0 deletions starfish/test/test_overlap_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import numpy as np
import xarray as xr

from starfish import IntensityTable
from starfish.test import test_utils
from starfish.types import Coordinates, Features
from starfish.types._constants import OverlapStrategy
from starfish.util.overlap_utils import (
Area,
find_overlaps_of_xarrays,
remove_area_of_xarray,
sel_area_of_xarray
)


def create_intenisty_table_with_coords(area: Area, n_spots=10):
ttung marked this conversation as resolved.
Show resolved Hide resolved
"""
Creates a 50X50 intensity table with physical coordinates within
the given Area.

Parameters
----------
area: Area
The area of physical space the IntensityTable should be defined over
n_spots:
Number of spots to add to the IntensityTable
"""
codebook = test_utils.codebook_array_factory()
it = IntensityTable.synthetic_intensities(
codebook,
num_z=1,
height=50,
width=50,
n_spots=n_spots
)
# intensity table 1 has 10 spots, xmin = 0, ymin = 0, xmax = 2, ymax = 1
it[Coordinates.X.value] = xr.DataArray(np.linspace(area.min_x, area.max_x, n_spots),
dims=Features.AXIS)
it[Coordinates.Y.value] = xr.DataArray(np.linspace(area.min_y, area.max_y, n_spots),
dims=Features.AXIS)
return it


def test_find_area_intersection():
"""
Create various Area objects and verify their intersection are calculated correctly
"""
area1 = Area(min_x=0, max_x=2, min_y=0, max_y=2)
area2 = Area(min_x=1, max_x=2, min_y=1, max_y=3)
intersection = Area.find_intersection(area1, area2)
# intersection should be area with bottom point (1,1) and top point (2,2)
assert intersection == Area(min_x=1, max_x=2, min_y=1, max_y=2)

area2 = Area(min_x=3, max_x=5, min_y=3, max_y=5)
intersection = Area.find_intersection(area1, area2)
# no intersection
assert intersection is None

area2 = Area(min_x=0, max_x=5, min_y=3, max_y=5)
intersection = Area.find_intersection(area1, area2)
# area 2 right above area one
assert intersection is None

# try negatives
area1 = Area(min_x=-1, max_x=1, min_y=0, max_y=2)
area2 = Area(min_x=0, max_x=2, min_y=0, max_y=2)
intersection = Area.find_intersection(area1, area2)
assert intersection == Area(min_x=0, max_x=1, min_y=0, max_y=2)

area2 = Area(min_x=-3, max_x=-2, min_y=0, max_y=2)
intersection = Area.find_intersection(area1, area2)
assert intersection is None


def test_find_overlaps_of_xarrays():
"""
Create a list of overlapping IntensityTables and verify we identify the correct
overlapping sections
"""
# Create some overlapping intensity tables
it0 = create_intenisty_table_with_coords(Area(min_x=0, max_x=1,
min_y=0, max_y=1))
it1 = create_intenisty_table_with_coords(Area(min_x=.5, max_x=2,
min_y=.5, max_y=1.5))
it2 = create_intenisty_table_with_coords(Area(min_x=1.5, max_x=2.5,
min_y=0, max_y=1))
it3 = create_intenisty_table_with_coords(Area(min_x=0, max_x=1,
min_y=1, max_y=2))
overlaps = find_overlaps_of_xarrays([it0, it1, it2, it3])
# should have 4 total overlaps
assert len(overlaps) == 4
# overlap 1 between it0 and it1:
overlap_1 = Area(min_x=.5, max_x=1, min_y=.5, max_y=1)
assert overlap_1 == overlaps[(0, 1)]
# overlap 2 between it1 and it2
overlap_2 = Area(min_x=1.5, max_x=2, min_y=.5, max_y=1)
assert overlap_2 == overlaps[(1, 2)]
# overlap 3 between it1 and it3
overlap_3 = Area(min_x=.5, max_x=1, min_y=1, max_y=1.5)
assert overlap_3 == overlaps[(1, 3)]
# overlap 4 between it0 and it3
overlap_4 = Area(min_x=0, max_x=1, min_y=1, max_y=1)
assert overlap_4 == overlaps[(0, 3)]


def test_remove_area_of_xarray():
"""
Tests removing a section of an IntensityTable defined by its physical area
"""
it = create_intenisty_table_with_coords(Area(min_x=0, max_x=2,
min_y=0, max_y=2), n_spots=10)

area = Area(min_x=1, max_x=2, min_y=1, max_y=3)
# grab some random coord values in this range
removed_x = it.where(it.xc > 1, drop=True)[Coordinates.X.value].data[0]
removed_y = it.where(it.yc > 1, drop=True)[Coordinates.X.value].data[3]

it = remove_area_of_xarray(it, area)
# assert coords from removed section are no longer in it
assert not np.any(np.isclose(it[Coordinates.X.value], removed_x))
assert not np.any(np.isclose(it[Coordinates.Y.value], removed_y))


def test_sel_area_of_xarray():
"""
Tests selecting a section of an IntensityTable defined by its physical area
"""
it = create_intenisty_table_with_coords(Area(min_x=0, max_x=2, min_y=0, max_y=2), n_spots=10)

area = Area(min_x=1, max_x=2, min_y=1, max_y=3)
it = sel_area_of_xarray(it, area)

# Assert new min/max values
assert min(it[Coordinates.X.value]).data >= 1
assert max(it[Coordinates.X.value]).data <= 2
assert min(it[Coordinates.Y.value]).data >= 1
assert max(it[Coordinates.X.value]).data <= 2


def test_take_max():
"""
Create two overlapping IntensityTables with differing number of spots and verify that
by concatenating them with the TAKE_MAX strategy we only include spots in the overlapping
section from the IntensityTable that had the most.
"""
it1 = create_intenisty_table_with_coords(Area(min_x=0, max_x=2,
min_y=0, max_y=2), n_spots=10)
it2 = create_intenisty_table_with_coords(Area(min_x=1, max_x=2,
min_y=1, max_y=3), n_spots=20)

concatenated = IntensityTable.concatanate_intensity_tables(
[it1, it2], overlap_strategy=OverlapStrategy.TAKE_MAX)

# The overlap section hits half of the spots from each intensity table, 5 from it1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wait what? if it hits 5 of the spots from it1, then shouldn't we get a total of 25 spots?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

both the sel and remove_area_of_xarray methods are inclusive...so we get one spot in the comparison count and the concatenation...maybe this is wrong?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would dump the table to make sure it is consistent with your understanding, though I suspect you are correct. :)

# and 10 from i21. It2 wins and the resulting concatenated table should have all the
# spots from it2 (20) and 6 from it1 (6) for a total of 26 spots
assert concatenated.sizes[Features.AXIS] == 26
8 changes: 8 additions & 0 deletions starfish/types/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ class Features:
INTENSITY = 'intensity'


class OverlapStrategy(AugmentedEnum):
"""
contains options to use when processes physically overlapping IntensityTables
or ImageStacks
"""
TAKE_MAX = 'take_max'


class Clip(AugmentedEnum):
"""
contains clipping options that determine how out-of-bounds values produced by filters are
Expand Down
162 changes: 162 additions & 0 deletions starfish/util/overlap_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import itertools
from typing import Dict, List, Tuple, Union

import xarray as xr

from starfish.types import Coordinates, Features
from starfish.types._constants import OverlapStrategy


class Area:
"""
Small class that defines rectangular area of physical space by
its bottom left and top right coordinates.
"""
def __init__(self, min_x, max_x, min_y, max_y):
ttung marked this conversation as resolved.
Show resolved Hide resolved
self.min_x = min_x
self.max_x = max_x
self.min_y = min_y
self.max_y = max_y

def __eq__(self, other):
shanaxel42 marked this conversation as resolved.
Show resolved Hide resolved
return (self.min_x == other.min_x
and self.min_y == other.min_y
and self.max_x == other.max_x
and self.max_y == other.max_y)

@staticmethod
def _overlap(area1: "Area", area2: "Area") -> bool:
"""Return True if two rectangles overlap"""
if (area1.max_x < area2.min_x) or (area1.min_x > area2.max_x):
return False
if (area1.max_y < area2.min_y) or (area1.min_y > area2.max_y):
return False
return True

@staticmethod
def find_intersection(area1: "Area", area2: "Area") -> Union[None, "Area"]:
ttung marked this conversation as resolved.
Show resolved Hide resolved
"""
Find the overlap area of two rectangles and return as new Area object.
If no overlap return none.
ttung marked this conversation as resolved.
Show resolved Hide resolved
"""
if Area._overlap(area1, area2):
return Area(max(area1.min_x, area2.min_x),
min(area1.max_x, area2.max_x),
max(area1.min_y, area2.min_y),
min(area1.max_y, area2.max_y))
return None


def find_overlaps_of_xarrays(xarrays: List[xr.DataArray]
) -> Dict[Tuple[int, int], "Area"]:
"""
Find all the overlap areas within a list of xarrays.

Parameters
----------
xarrays : List[xr.DataArray]
The list of xarrays to find overlaps in.

Returns
-------
Dict[Tuple[int, int], "Area"]] :
A dictionary of tuples containing the indices of two overlapping
IntensityTables and their Area of intersection.

"""
all_overlaps: Dict[Tuple[int, int], "Area"] = dict()
for idx1, idx2 in itertools.combinations(range(len(xarrays)), 2):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this is, as you pointed out, a n^2 operation. but each operation requires scanning the table to find the min/max for the coordinates. can you get some numbers from @ambrosejcarr for realistic FOV and spot counts, and build a set of tables that reflect that, and see what the perf is like? we can likely significantly shrink the cost by precomputing a min-max for each xarray and reusing that. it would also help inform whether we need to do the nlogn approach.

Finally, it might be worth trying to actually merge a large set of intensity tables, even if synthetic, to see if there are any performance implications to any of the xarray ops used during the merge.

xr1 = xarrays[idx1]
xr2 = xarrays[idx2]
area1 = Area(min(xr1[Coordinates.X.value]).data,
max(xr1[Coordinates.X.value]).data,
min(xr1[Coordinates.Y.value]).data,
max(xr1[Coordinates.Y.value]).data)

area2 = Area(min(xr2[Coordinates.X.value]).data,
max(xr2[Coordinates.X.value]).data,
min(xr2[Coordinates.Y.value]).data,
max(xr2[Coordinates.Y.value]).data)
intersection = Area.find_intersection(area1, area2)
if intersection:
all_overlaps[(idx1, idx2)] = intersection
return all_overlaps


def remove_area_of_xarray(it: xr.DataArray, area: Area) -> xr.DataArray:
"""Return everything in the xarray defined OUTSIDE the input area
shanaxel42 marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
it: xr.DataArray
The xarray to modify
area: Area
The area to not include in the modified xarray

Returns
-------
xr.DataArray :
ttung marked this conversation as resolved.
Show resolved Hide resolved
The xarray without the defined area.
"""
return it.where((it.xc <= area.min_x)
| (it.xc >= area.max_x)
| (it.yc <= area.min_y)
| (it.yc >= area.max_y),
drop=True)


def sel_area_of_xarray(it: xr.DataArray, area: Area) -> xr.DataArray:
"""Return everything in the xarray defined WITHIN the input area
shanaxel42 marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
it: xr.DataArray
The xarray to modify
area: Area
The area to include in the modified xarray

Returns
-------
xr.DataArray :
The xarray within the defined area.
"""
return it.where((it.xc >= area.min_x)
& (it.xc <= area.max_x)
& (it.yc >= area.min_y)
& (it.yc <= area.max_y), drop=True)


def take_max(intersection_rect: Area,
it1: xr.DataArray,
it2: xr.DataArray
):
"""
Compare two overlapping xarrays and remove spots from whichever
ttung marked this conversation as resolved.
Show resolved Hide resolved
has less in the overlapping section.

Parameters
----------
intersection_rect : Area
ttung marked this conversation as resolved.
Show resolved Hide resolved
The area of physical overalap between two xarrays
ttung marked this conversation as resolved.
Show resolved Hide resolved
it1 : xr.DataArray
The first overlapping xarray
it2 : xr.DataArray
The second overlapping xarray
"""
# # compare to see which section has more spots
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# # compare to see which section has more spots
# compare to see which section has more spots

intersect1 = sel_area_of_xarray(it1, intersection_rect)
intersect2 = sel_area_of_xarray(it2, intersection_rect)
if intersect1.sizes[Features.AXIS] >= intersect2.sizes[Features.AXIS]:
# I1 has more spots remove intesection2 from it2
it2 = remove_area_of_xarray(it2, intersection_rect)
else:
it1 = remove_area_of_xarray(it1, intersection_rect)
return it1, it2


"""
The mapping between OverlapStrategy type and the method to use for each.
"""
OVERLAP_STRATEGY_MAP = {
OverlapStrategy.TAKE_MAX: take_max
}