Skip to content

Commit

Permalink
[ADAM-1381] Fix Variant end position.
Browse files Browse the repository at this point in the history
Resolves bigdatagenomics#1381. Sets variant end position to the proper site for symbolic alleles.
  • Loading branch information
fnothaft authored and heuermh committed Feb 21, 2017
1 parent 8c8b944 commit 0fee9a4
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,16 @@ private[adam] object VariantContextConverter {
}
}

private val OPT_NON_REF = Some(Allele.create("<NON_REF>", false))

private def optNonRef(v: Variant): Option[Allele] = {
if (v.getAlternateAllele != null) {
None
} else {
OPT_NON_REF
}
}

/**
* Converts the alleles in a variant into a Java collection of htsjdk alleles.
*
Expand All @@ -115,7 +125,8 @@ private[adam] object VariantContextConverter {
*/
private def convertAlleles(v: Variant): java.util.Collection[Allele] = {
val asSeq = Seq(convertAlleleOpt(v.getReferenceAllele, true),
convertAlleleOpt(v.getAlternateAllele)).flatten
convertAlleleOpt(v.getAlternateAllele),
optNonRef(v)).flatten

asSeq
}
Expand Down Expand Up @@ -1848,7 +1859,7 @@ private[adam] class VariantContextConverter(
val builder = new VariantContextBuilder()
.chr(v.getContigName)
.start(v.getStart + 1)
.stop(v.getStart + v.getReferenceAllele.length)
.stop(v.getEnd)
.alleles(VariantContextConverter.convertAlleles(v))

// bind the conversion functions and fold
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import org.bdgenomics.adam.models.{
SequenceDictionary,
VariantContext => ADAMVariantContext
}
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.util.{ ADAMFunSuite, PhredUtils }
import org.bdgenomics.formats.avro._
import scala.collection.JavaConversions._
Expand Down Expand Up @@ -85,6 +86,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
def adamSNVBuilder(contig: String = "1"): Variant.Builder = Variant.newBuilder()
.setContigName(contig)
.setStart(0L)
.setEnd(1L)
.setReferenceAllele("A")
.setAlternateAllele("T")

Expand Down Expand Up @@ -1863,6 +1865,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
val v = Variant.newBuilder
.setContigName("1")
.setStart(0L)
.setEnd(1L)
.setReferenceAllele("A")
.setAlternateAllele("T")
.build
Expand Down Expand Up @@ -2534,4 +2537,17 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("STRING_G"))
assert(adamGt.getVariantCallingAnnotations.getAttributes.get("STRING_G") === "foo,bar,baz")
}

sparkTest("respect end position for symbolic alts") {
val vcRecords = sc.loadVcf(testFile("gvcf_dir/gvcf_multiallelic.g.vcf"))
.rdd
.collect()

val symbolic = vcRecords.filter(_.variant.variant.getStart == 16157520L)
.head
val optHtsjdkVc = converter.convert(symbolic)

assert(optHtsjdkVc.isDefined)
assert(optHtsjdkVc.get.getEnd === 16157602)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -399,8 +399,6 @@ class ADAMContextSuite extends ADAMFunSuite {
val path = new File(testFile("gvcf_dir/gvcf_multiallelic.g.vcf")).getParent()

val variants = sc.loadVcf(path).toVariantRDD
// Not sure that the count should be 7 below, however the current failure to read the mult-allelic site happens
// before this assertion is even reached
assert(variants.rdd.count === 6)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class VariantContextRDDSuite extends ADAMFunSuite {
val v0 = Variant.newBuilder
.setContigName("chr11")
.setStart(17409572L)
.setEnd(17409573L)
.setReferenceAllele("T")
.setAlternateAllele("C")
.setNames(ImmutableList.of("rs3131972", "rs201888535"))
Expand Down

0 comments on commit 0fee9a4

Please sign in to comment.