Skip to content

Commit

Permalink
GA: add 'intersection' method (closes #340)
Browse files Browse the repository at this point in the history
  • Loading branch information
etal committed Jun 30, 2018
1 parent 0ae50d5 commit f41eeb4
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
19 changes: 18 additions & 1 deletion skgenome/gary.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import pandas as pd

from .chromsort import sorter_chrom
from .intersect import by_ranges, into_ranges, iter_ranges, iter_slices
from .intersect import by_ranges, into_ranges, iter_ranges, iter_slices, _monotonic
from .merge import flatten, merge
from .rangelabel import to_label
from .subtract import subtract
Expand Down Expand Up @@ -617,6 +617,23 @@ def flatten(self, combine=None, split_columns=None):
return self.as_dataframe(flatten(self.data, combine=combine,
split_columns=split_columns))

def intersection(self, other, mode='outer'):
"""Select the bins in `self` that overlap the regions in `other`.
The extra fields of `self`, but not `other`, are retained in the output.
"""
# TODO options for which extra fields to keep
# by default, keep just the fields in 'table'
if mode == 'trim':
# Slower
chunks = [chunk.data for _, chunk in
self.by_ranges(other, mode=mode, keep_empty=False)]
return self.as_dataframe(pd.concat(chunks))
else:
slices = iter_slices(self.data, other.data, mode, False)
indices = np.concatenate([self.data.index[slc] for slc in slices])
return self.as_dataframe(self.data.iloc[indices])

def merge(self, bp=0, stranded=False, combine=None):
"""Merge adjacent or overlapping regions into single rows.
Expand Down
5 changes: 5 additions & 0 deletions test/test_genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,10 +453,15 @@ def test_intersect(self):
(self.regions_2, selections2, expectations2),
):
for mode in ('outer', 'trim', 'inner'):
# Iterative intersection
grouped_results = regions.by_ranges(selections, mode=mode)
for (_coord, result), expect in zip(grouped_results,
expectations[mode]):
self._compare_regions(result, self._from_intervals(expect))
# Single-object intersect
result = regions.intersection(selections, mode=mode)
expect = self._from_intervals(sum(expectations[mode], []))
self._compare_regions(result, expect)

def test_subtract(self):
# Test cases:
Expand Down

0 comments on commit f41eeb4

Please sign in to comment.