diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala index 183c798f1e..ce71ef0939 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala @@ -65,7 +65,7 @@ private[realignment] object TargetOrdering extends Ordering[IndelRealignmentTarg } /** - * Compares a read to an indel realignment target to see if it starts before the start of the indel realignment target. + * Compares a read to an indel realignment target to see if the target is before the read. * * @param target Realignment target to compare. * @param read Read to compare. diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala index 0e72bf5fbf..8a4f1f0412 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala @@ -74,24 +74,27 @@ private[read] object RealignIndels extends Serializable with Logging { read: RichAlignmentRecord, targets: TreeSet[(IndelRealignmentTarget, Int)]): Int = { // Perform tail call recursive binary search - if (targets.size == 1) { - if (TargetOrdering.contains(targets.head._1, read)) { - // if there is overlap, return the overlapping target - targets.head._2 - } else { - // else, return an empty target (negative index) - // to prevent key skew, split up by max indel alignment length - (-1 - (read.record.getStart / 3000L)).toInt - } + if (TargetOrdering.contains(targets.head._1, read)) { + // if there is overlap, return the overlapping target + targets.head._2 + } else if (targets.size <= 1) { + // else, return an empty target (negative index) + // to prevent key skew, split up by max indel alignment length + (-1 - (read.record.getStart / 3000L)).toInt } else { // split the set and recurse val (head, tail) = targets.splitAt(targets.size / 2) - val reducedSet = if (TargetOrdering.lt(tail.head._1, read)) { - head + + if (TargetOrdering.contains(tail.head._1, read)) { + tail.head._2 } else { - tail + val reducedSet = if (TargetOrdering.lt(tail.head._1, read)) { + tail + } else { + head + } + mapToTarget(read, reducedSet, test) } - mapToTarget(read, reducedSet) } } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTargetSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTargetSuite.scala index cb95e99221..2552e50829 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTargetSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTargetSuite.scala @@ -79,6 +79,12 @@ class IndelRealignmentTargetSuite extends ADAMFunSuite { assert(targets.head.variation.get.end === 8) assert(targets.head.readRange.start === 3) assert(targets.head.readRange.end === 10) + assert(TargetOrdering.contains(targets.head, read)) + assert(!TargetOrdering.lt(targets.head, read)) + val read2 = make_read(2L, "2M3D2M", "2^AAA2", 4, 7) + val read4 = make_read(4L, "2M3D2M", "2^AAA2", 4, 7) + assert(!TargetOrdering.lt(targets.head, read)) + assert(TargetOrdering.lt(targets.head, read4)) } sparkTest("creating simple target from read with insertion") { diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala index 1cbe55694b..8dd1780402 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala @@ -22,13 +22,14 @@ import org.bdgenomics.adam.algorithms.consensus.{ Consensus, ConsensusGenerator } -import org.bdgenomics.adam.models.ReferencePosition +import org.bdgenomics.adam.models.{ ReferencePosition, ReferenceRegion } import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD import org.bdgenomics.adam.rdd.variant.VariantRDD import org.bdgenomics.adam.rich.RichAlignmentRecord import org.bdgenomics.adam.util.ADAMFunSuite import org.bdgenomics.formats.avro.{ AlignmentRecord, Contig, Variant } +import scala.collection.immutable.TreeSet class RealignIndelsSuite extends ADAMFunSuite { @@ -53,6 +54,40 @@ class RealignIndelsSuite extends ADAMFunSuite { sc.loadAlignments(path).rdd } + def makeRead(start: Long, end: Long): RichAlignmentRecord = { + RichAlignmentRecord(AlignmentRecord.newBuilder() + .setContigName("ctg") + .setStart(start) + .setEnd(end) + .setReadMapped(true) + .build()) + } + + test("map reads to targets") { + val ts = Array( + new IndelRealignmentTarget(None, ReferenceRegion("ctg", 1L, 4L)), + new IndelRealignmentTarget(None, ReferenceRegion("ctg", 10L, 44L)), + new IndelRealignmentTarget(None, ReferenceRegion("ctg", 100L, 400L))) + .zipWithIndex + val targets = TreeSet(ts: _*)(ZippedTargetOrdering) + + assert(RealignIndels.mapToTarget(makeRead(0L, 1L), targets) < 0) + assert(RealignIndels.mapToTarget(makeRead(0L, 2L), targets) === 0) + assert(RealignIndels.mapToTarget(makeRead(1L, 2L), targets) === 0) + assert(RealignIndels.mapToTarget(makeRead(3L, 6L), targets) === 0) + assert(RealignIndels.mapToTarget(makeRead(6L, 8L), targets) < 0) + assert(RealignIndels.mapToTarget(makeRead(8L, 12L), targets) === 1) + assert(RealignIndels.mapToTarget(makeRead(10L, 12L), targets) === 1) + assert(RealignIndels.mapToTarget(makeRead(14L, 36L), targets) === 1) + assert(RealignIndels.mapToTarget(makeRead(35L, 50L), targets) === 1) + assert(RealignIndels.mapToTarget(makeRead(45L, 50L), targets) < 0) + assert(RealignIndels.mapToTarget(makeRead(90L, 100L), targets) < 0) + assert(RealignIndels.mapToTarget(makeRead(90L, 101L), targets) === 2) + assert(RealignIndels.mapToTarget(makeRead(200L, 300L), targets) === 2) + assert(RealignIndels.mapToTarget(makeRead(200L, 600L), targets) === 2) + assert(RealignIndels.mapToTarget(makeRead(700L, 1000L), targets) < 0) + } + sparkTest("checking mapping to targets for artificial reads") { val targets = RealignmentTargetFinder(artificialReads.map(RichAlignmentRecord(_))) assert(targets.size === 1)