Skip to content

Commit

Permalink
[ADAM-1402] Fix INDEL realigner bad binary search.
Browse files Browse the repository at this point in the history
  • Loading branch information
fnothaft committed Mar 23, 2017
1 parent 629b778 commit f7c6578
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ private[realignment] object TargetOrdering extends Ordering[IndelRealignmentTarg
}

/**
* Compares a read to an indel realignment target to see if it starts before the start of the indel realignment target.
* Compares a read to an indel realignment target to see if the target is before the read.
*
* @param target Realignment target to compare.
* @param read Read to compare.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,24 +74,27 @@ private[read] object RealignIndels extends Serializable with Logging {
read: RichAlignmentRecord,
targets: TreeSet[(IndelRealignmentTarget, Int)]): Int = {
// Perform tail call recursive binary search
if (targets.size == 1) {
if (TargetOrdering.contains(targets.head._1, read)) {
// if there is overlap, return the overlapping target
targets.head._2
} else {
// else, return an empty target (negative index)
// to prevent key skew, split up by max indel alignment length
(-1 - (read.record.getStart / 3000L)).toInt
}
if (TargetOrdering.contains(targets.head._1, read)) {
// if there is overlap, return the overlapping target
targets.head._2
} else if (targets.size <= 1) {
// else, return an empty target (negative index)
// to prevent key skew, split up by max indel alignment length
(-1 - (read.record.getStart / 3000L)).toInt
} else {
// split the set and recurse
val (head, tail) = targets.splitAt(targets.size / 2)
val reducedSet = if (TargetOrdering.lt(tail.head._1, read)) {
head

if (TargetOrdering.contains(tail.head._1, read)) {
tail.head._2
} else {
tail
val reducedSet = if (TargetOrdering.lt(tail.head._1, read)) {
tail
} else {
head
}
mapToTarget(read, reducedSet, test)
}
mapToTarget(read, reducedSet)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ class IndelRealignmentTargetSuite extends ADAMFunSuite {
assert(targets.head.variation.get.end === 8)
assert(targets.head.readRange.start === 3)
assert(targets.head.readRange.end === 10)
assert(TargetOrdering.contains(targets.head, read))
assert(!TargetOrdering.lt(targets.head, read))
val read2 = make_read(2L, "2M3D2M", "2^AAA2", 4, 7)
val read4 = make_read(4L, "2M3D2M", "2^AAA2", 4, 7)
assert(!TargetOrdering.lt(targets.head, read))
assert(TargetOrdering.lt(targets.head, read4))
}

sparkTest("creating simple target from read with insertion") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ import org.bdgenomics.adam.algorithms.consensus.{
Consensus,
ConsensusGenerator
}
import org.bdgenomics.adam.models.ReferencePosition
import org.bdgenomics.adam.models.{ ReferencePosition, ReferenceRegion }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
import org.bdgenomics.adam.rdd.variant.VariantRDD
import org.bdgenomics.adam.rich.RichAlignmentRecord
import org.bdgenomics.adam.util.ADAMFunSuite
import org.bdgenomics.formats.avro.{ AlignmentRecord, Contig, Variant }
import scala.collection.immutable.TreeSet

class RealignIndelsSuite extends ADAMFunSuite {

Expand All @@ -53,6 +54,40 @@ class RealignIndelsSuite extends ADAMFunSuite {
sc.loadAlignments(path).rdd
}

def makeRead(start: Long, end: Long): RichAlignmentRecord = {
RichAlignmentRecord(AlignmentRecord.newBuilder()
.setContigName("ctg")
.setStart(start)
.setEnd(end)
.setReadMapped(true)
.build())
}

test("map reads to targets") {
val ts = Array(
new IndelRealignmentTarget(None, ReferenceRegion("ctg", 1L, 4L)),
new IndelRealignmentTarget(None, ReferenceRegion("ctg", 10L, 44L)),
new IndelRealignmentTarget(None, ReferenceRegion("ctg", 100L, 400L)))
.zipWithIndex
val targets = TreeSet(ts: _*)(ZippedTargetOrdering)

assert(RealignIndels.mapToTarget(makeRead(0L, 1L), targets) < 0)
assert(RealignIndels.mapToTarget(makeRead(0L, 2L), targets) === 0)
assert(RealignIndels.mapToTarget(makeRead(1L, 2L), targets) === 0)
assert(RealignIndels.mapToTarget(makeRead(3L, 6L), targets) === 0)
assert(RealignIndels.mapToTarget(makeRead(6L, 8L), targets) < 0)
assert(RealignIndels.mapToTarget(makeRead(8L, 12L), targets) === 1)
assert(RealignIndels.mapToTarget(makeRead(10L, 12L), targets) === 1)
assert(RealignIndels.mapToTarget(makeRead(14L, 36L), targets) === 1)
assert(RealignIndels.mapToTarget(makeRead(35L, 50L), targets) === 1)
assert(RealignIndels.mapToTarget(makeRead(45L, 50L), targets) < 0)
assert(RealignIndels.mapToTarget(makeRead(90L, 100L), targets) < 0)
assert(RealignIndels.mapToTarget(makeRead(90L, 101L), targets) === 2)
assert(RealignIndels.mapToTarget(makeRead(200L, 300L), targets) === 2)
assert(RealignIndels.mapToTarget(makeRead(200L, 600L), targets) === 2)
assert(RealignIndels.mapToTarget(makeRead(700L, 1000L), targets) < 0)
}

sparkTest("checking mapping to targets for artificial reads") {
val targets = RealignmentTargetFinder(artificialReads.map(RichAlignmentRecord(_)))
assert(targets.size === 1)
Expand Down

0 comments on commit f7c6578

Please sign in to comment.