Skip to content

Commit

Permalink
[ADAM-1728] Expose region join API through Python and R.
Browse files Browse the repository at this point in the history
Resolves #1728.
  • Loading branch information
Frank Austin Nothaft authored and heuermh committed Mar 20, 2018
1 parent 9798fbb commit a28da44
Show file tree
Hide file tree
Showing 8 changed files with 1,622 additions and 75 deletions.
625 changes: 625 additions & 0 deletions adam-core/src/main/scala/org/bdgenomics/adam/rdd/GenomicRDD.scala

Large diffs are not rendered by default.

315 changes: 284 additions & 31 deletions adam-python/bdgenomics/adam/rdd.py

Large diffs are not rendered by default.

120 changes: 120 additions & 0 deletions adam-python/bdgenomics/adam/test/alignmentRecordRdd_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,3 +236,123 @@ def test_persisting(self):
persistedReads.unpersist()
cached = self.sc._jsc.getPersistentRDDs()
self.assertEquals(cached.isEmpty(), True)


def test_broadcast_inner_join(self):

readsPath = self.resourceFile("small.1.sam")
targetsPath = self.resourceFile("small.1.bed")

ac = ADAMContext(self.ss)

reads = ac.loadAlignments(readsPath)
targets = ac.loadFeatures(targetsPath)

jRdd = reads.broadcastRegionJoin(targets)

self.assertEquals(jRdd.toDF().count(), 5)


def test_broadcast_right_outer_join(self):

readsPath = self.resourceFile("small.1.sam")
targetsPath = self.resourceFile("small.1.bed")

ac = ADAMContext(self.ss)

reads = ac.loadAlignments(readsPath)
targets = ac.loadFeatures(targetsPath)

jRdd = reads.rightOuterBroadcastRegionJoin(targets)

self.assertEquals(jRdd.toDF().count(), 6)


def test_shuffle_inner_join(self):

readsPath = self.resourceFile("small.1.sam")
targetsPath = self.resourceFile("small.1.bed")

ac = ADAMContext(self.ss)

reads = ac.loadAlignments(readsPath)
targets = ac.loadFeatures(targetsPath)

jRdd = reads.shuffleRegionJoin(targets)

self.assertEquals(jRdd.toDF().count(), 5)


def test_shuffle_right_outer_join(self):

readsPath = self.resourceFile("small.1.sam")
targetsPath = self.resourceFile("small.1.bed")

ac = ADAMContext(self.ss)

reads = ac.loadAlignments(readsPath)
targets = ac.loadFeatures(targetsPath)

jRdd = reads.rightOuterShuffleRegionJoin(targets)

self.assertEquals(jRdd.toDF().count(), 6)


def test_shuffle_left_outer_join(self):

readsPath = self.resourceFile("small.1.sam")
targetsPath = self.resourceFile("small.1.bed")

ac = ADAMContext(self.ss)

reads = ac.loadAlignments(readsPath)
targets = ac.loadFeatures(targetsPath)

jRdd = reads.leftOuterShuffleRegionJoin(targets)

self.assertEquals(jRdd.toDF().count(), 20)


def test_shuffle_full_outer_join(self):

readsPath = self.resourceFile("small.1.sam")
targetsPath = self.resourceFile("small.1.bed")

ac = ADAMContext(self.ss)

reads = ac.loadAlignments(readsPath)
targets = ac.loadFeatures(targetsPath)

jRdd = reads.fullOuterShuffleRegionJoin(targets)

self.assertEquals(jRdd.toDF().count(), 21)


def test_shuffle_inner_join_groupBy_left(self):

readsPath = self.resourceFile("small.1.sam")
targetsPath = self.resourceFile("small.1.bed")

ac = ADAMContext(self.ss)

reads = ac.loadAlignments(readsPath)
targets = ac.loadFeatures(targetsPath)

jRdd = reads.shuffleRegionJoinAndGroupByLeft(targets)

self.assertEquals(jRdd.toDF().count(), 5)


def test_shuffle_right_outer_join_groupBy_left(self):

readsPath = self.resourceFile("small.1.sam")
targetsPath = self.resourceFile("small.1.bed")

ac = ADAMContext(self.ss)

reads = ac.loadAlignments(readsPath)
targets = ac.loadFeatures(targetsPath)

jRdd = reads.rightOuterShuffleRegionJoinAndGroupByLeft(targets)

self.assertEquals(jRdd.toDF().count(), 21)
23 changes: 22 additions & 1 deletion adam-r/bdgenomics.adam/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@

export(ADAMContext)
export(aggregatedCoverage)
export(broadcastRegionJoin)
export(broadcastRegionJoinAndGroupByRight)
export(cache)
export(collapse)
export(countKmers)
export(coverage)
export(createADAMContext)
export(flankAdjacentFragments)
export(flatten)
export(fullOuterShuffleRegionJoin)
export(leftOuterShuffleRegionJoin)
export(leftOuterShuffleRegionJoinAndGroupByLeft)
export(loadAlignments)
export(loadContigFragments)
export(loadCoverage)
Expand All @@ -21,10 +26,16 @@ export(persist)
export(pipe)
export(realignIndels)
export(recalibrateBaseQualities)
export(rightOuterBroadcastRegionJoin)
export(rightOuterBroadcastRegionJoinAndGroupByRight)
export(rightOuterShuffleRegionJoin)
export(rightOuterShuffleRegionJoinAndGroupByLeft)
export(save)
export(saveAsParquet)
export(saveAsSam)
export(saveAsVcf)
export(shuffleRegionJoin)
export(shuffleRegionJoinAndGroupByLeft)
export(sort)
export(sortLexicographically)
export(sortReadsByReferencePosition)
Expand All @@ -45,18 +56,22 @@ exportClasses(CoverageRDD)
exportClasses(FeatureRDD)
exportClasses(FragmentRDD)
exportClasses(GenomicDataset)
exportClasses(GenomicRDD)
exportClasses(GenotypeRDD)
exportClasses(NucleotideContigFragmentRDD)
exportClasses(VariantContextRDD)
exportClasses(VariantRDD)
exportMethods(aggregatedCoverage)
exportMethods(broadcastRegionJoin)
exportMethods(broadcastRegionJoinAndGroupByRight)
exportMethods(cache)
exportMethods(collapse)
exportMethods(countKmers)
exportMethods(coverage)
exportMethods(flankAdjacentFragments)
exportMethods(flatten)
exportMethods(fullOuterShuffleRegionJoin)
exportMethods(leftOuterShuffleRegionJoin)
exportMethods(leftOuterShuffleRegionJoinAndGroupByLeft)
exportMethods(loadAlignments)
exportMethods(loadContigFragments)
exportMethods(loadCoverage)
Expand All @@ -69,10 +84,16 @@ exportMethods(persist)
exportMethods(pipe)
exportMethods(realignIndels)
exportMethods(recalibrateBaseQualities)
exportMethods(rightOuterBroadcastRegionJoin)
exportMethods(rightOuterBroadcastRegionJoinAndGroupByRight)
exportMethods(rightOuterShuffleRegionJoin)
exportMethods(rightOuterShuffleRegionJoinAndGroupByLeft)
exportMethods(save)
exportMethods(saveAsParquet)
exportMethods(saveAsSam)
exportMethods(saveAsVcf)
exportMethods(shuffleRegionJoin)
exportMethods(shuffleRegionJoinAndGroupByLeft)
exportMethods(sort)
exportMethods(sortLexicographically)
exportMethods(sortReadsByReferencePosition)
Expand Down
Loading

0 comments on commit a28da44

Please sign in to comment.