Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increasing unit test coverage for VariantContextConverter #1276

Merged
merged 2 commits into from
Nov 18, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,9 @@ private[adam] class VariantContextConverter(dict: Option[SequenceDictionary] = N
if (vc.isFiltered) {
builder.setFiltersFailed(new java.util.ArrayList(vc.getFilters));
}
if (vc.getAttributeAsBoolean("SOMATIC", false)) {
builder.setSomatic(true)
}
builder.build
}

Expand Down Expand Up @@ -607,6 +610,21 @@ private[adam] class VariantContextConverter(dict: Option[SequenceDictionary] = N
case Some(s) => vcb.id(s)
}

val filtersApplied = Option(variant.getFiltersApplied).getOrElse(false)
val filtersPassed = Option(variant.getFiltersPassed).getOrElse(false)

(filtersApplied, filtersPassed) match {
case (false, false) => vcb.unfiltered
case (false, true) => vcb.passFilters // log warning?
case (true, false) => vcb.filters(new java.util.HashSet(variant.getFiltersFailed()))
case (true, true) => vcb.passFilters
}

val somatic: java.lang.Boolean = Option(variant.getSomatic).getOrElse(false)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd lose the : java.lang.Boolean. Is there a reason you need it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah it wouldn't compile without it. Odd that the lines above were ok.

if (somatic) {
vcb.attribute("SOMATIC", true)
}

// TODO: Extract provenance INFO fields
try {
vcb.genotypes(vc.genotypes.map(g => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@ import htsjdk.variant.variantcontext.writer.{
}
import htsjdk.variant.vcf.{ VCFHeader, VCFHeaderLine }
import java.io.OutputStream
import org.bdgenomics.adam.converters.{
SupportedHeaderLines,
VariantContextConverter
}
import org.bdgenomics.adam.converters.VariantContextConverter
import org.bdgenomics.adam.models.{
SequenceDictionary,
VariantContext
Expand All @@ -47,13 +44,14 @@ object VCFInFormatter extends InFormatterCompanion[VariantContext, VariantContex
* VCF header.
*/
def apply(gRdd: VariantContextRDD): VCFInFormatter = {
VCFInFormatter(gRdd.sequences, gRdd.samples.map(_.getSampleId))
VCFInFormatter(gRdd.sequences, gRdd.samples.map(_.getSampleId), gRdd.headerLines)
}
}

private[variant] case class VCFInFormatter private (
sequences: SequenceDictionary,
samples: Seq[String]) extends InFormatter[VariantContext, VariantContextRDD, VCFInFormatter] {
samples: Seq[String],
headerLines: Seq[VCFHeaderLine]) extends InFormatter[VariantContext, VariantContextRDD, VCFInFormatter] {

protected val companion = VCFInFormatter

Expand All @@ -75,9 +73,7 @@ private[variant] case class VCFInFormatter private (
.unsetOption(Options.INDEX_ON_THE_FLY)
.build()

val headerLines: Set[VCFHeaderLine] = (SupportedHeaderLines.infoHeaderLines ++
SupportedHeaderLines.formatHeaderLines).toSet
val header = new VCFHeader(headerLines, samples)
val header = new VCFHeader(headerLines.toSet, samples)
header.setSequenceDictionary(sequences.toSAMSequenceDictionary)
writer.writeHeader(header)

Expand Down
12 changes: 6 additions & 6 deletions adam-core/src/test/resources/sorted.lex.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@
##contig=<ID=2,length=249250621>
##contig=<ID=13,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C . . . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,256 0/1:8,2:10:dp;rd:60:60,0,256 0/0:39,0:39:PASS:99:0,116,256
1 14522 . G A . . . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,256 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,256
1 63735 rs201888535 CCTA C . . . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,256 0/0:40,0:40:PASS:99:0,117,256 0/1:23,74:97:rd:99:256,0,256
13 752721 rs3131972 A G . . . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:256,81,0 1/1:0,19:19:dp:57:256,57,0 1/1:0,22:22:PASS:66:256,66,0
13 752791 . A G . . . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:256,81,0:0,1,2,3 1/1:0,19:19:dp:57:256,57,0:4,5,6,7 1/1:0,22:22:PASS:66:256,66,0:2,3,4,5
2 19190 . GC G . . . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:256,0,256 0/1:18,13:31:PASS:99:256,0,256 0/1:5,15:20:rd:99:256,0,107
1 14397 . CTGT C . IndelQD . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,256 0/1:8,2:10:dp;rd:60:60,0,256 0/0:39,0:39:PASS:99:0,116,256
1 14522 . G A . VQSRTrancheSNP99.95to100.00 . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,256 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,256
1 63735 rs201888535 CCTA C . PASS . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,256 0/0:40,0:40:PASS:99:0,117,256 0/1:23,74:97:rd:99:256,0,256
13 752721 rs3131972 A G . PASS . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:256,81,0 1/1:0,19:19:dp:57:256,57,0 1/1:0,22:22:PASS:66:256,66,0
13 752791 . A G . PASS . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:256,81,0:0,1,2,3 1/1:0,19:19:dp:57:256,57,0:4,5,6,7 1/1:0,22:22:PASS:66:256,66,0:2,3,4,5
2 19190 . GC G . PASS . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:256,0,256 0/1:18,13:31:PASS:99:256,0,256 0/1:5,15:20:rd:99:256,0,107
12 changes: 6 additions & 6 deletions adam-core/src/test/resources/sorted.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@
##contig=<ID=2,length=249250621>
##contig=<ID=13,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C . . . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,256 0/1:8,2:10:dp;rd:60:60,0,256 0/0:39,0:39:PASS:99:0,116,256
1 14522 . G A . . . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,256 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,256
1 63735 rs201888535 CCTA C . . . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,256 0/0:40,0:40:PASS:99:0,117,256 0/1:23,74:97:rd:99:256,0,256
2 19190 . GC G . . . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:256,0,256 0/1:18,13:31:PASS:99:256,0,256 0/1:5,15:20:rd:99:256,0,107
13 752721 rs3131972 A G . . . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:256,81,0 1/1:0,19:19:dp:57:256,57,0 1/1:0,22:22:PASS:66:256,66,0
13 752791 . A G . . . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:256,81,0:0,1,2,3 1/1:0,19:19:dp:57:256,57,0:4,5,6,7 1/1:0,22:22:PASS:66:256,66,0:2,3,4,5
1 14397 . CTGT C . IndelQD . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,256 0/1:8,2:10:dp;rd:60:60,0,256 0/0:39,0:39:PASS:99:0,116,256
1 14522 . G A . VQSRTrancheSNP99.95to100.00 . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,256 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,256
1 63735 rs201888535 CCTA C . PASS . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,256 0/0:40,0:40:PASS:99:0,117,256 0/1:23,74:97:rd:99:256,0,256
2 19190 . GC G . PASS . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:256,0,256 0/1:18,13:31:PASS:99:256,0,256 0/1:5,15:20:rd:99:256,0,107
13 752721 rs3131972 A G . PASS . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:256,81,0 1/1:0,19:19:dp:57:256,57,0 1/1:0,22:22:PASS:66:256,66,0
13 752791 . A G . PASS . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:256,81,0:0,1,2,3 1/1:0,19:19:dp:57:256,57,0:4,5,6,7 1/1:0,22:22:PASS:66:256,66,0:2,3,4,5
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,23 @@ class VariantContextConverterSuite extends ADAMFunSuite {

assert(variant.getReferenceAllele === "A")
assert(variant.getStart === 0L)
assert(variant.getSomatic === false)
}

test("Convert somatic htsjdk site-only SNV to ADAM") {
val converter = new VariantContextConverter

val vcb: VariantContextBuilder = new VariantContextBuilder()
.alleles(List(Allele.create("A", true), Allele.create("T")))
.start(1L)
.stop(1L)
.chr("1")
.attribute("SOMATIC", true)

val adamVCs = converter.convert(vcb.make)
val adamVC = adamVCs.head
val variant = adamVC.variant.variant
assert(variant.getSomatic === true)
}

test("Convert htsjdk site-only SNV to ADAM with contig conversion") {
Expand Down Expand Up @@ -413,4 +430,94 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(htsjdkVC.hasID)
assert(htsjdkVC.getID === "rs3131972;rs201888535")
}

test("Convert ADAM variant context with null filters applied to htsjdk") {
val variant = adamSNVBuilder()
.setFiltersApplied(null)
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(!htsjdkVC.filtersWereApplied)
assert(!htsjdkVC.isFiltered)
assert(htsjdkVC.getFilters.isEmpty)
}

test("Convert ADAM variant context with no filters applied to htsjdk") {
val variant = adamSNVBuilder()
.setFiltersApplied(false)
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(!htsjdkVC.filtersWereApplied)
assert(!htsjdkVC.isFiltered)
assert(htsjdkVC.getFilters.isEmpty)
}

test("Convert ADAM variant context with passing filters to htsjdk") {
val variant = adamSNVBuilder()
.setFiltersApplied(true)
.setFiltersPassed(true)
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(htsjdkVC.filtersWereApplied)
assert(!htsjdkVC.isFiltered)
assert(htsjdkVC.getFilters.isEmpty)
}

test("Convert ADAM variant context with failing filters to htsjdk") {
val variant = adamSNVBuilder()
.setFiltersApplied(true)
.setFiltersPassed(false)
.setFiltersFailed(ImmutableList.of("FILTER1", "FILTER2"))
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(htsjdkVC.filtersWereApplied)
assert(htsjdkVC.isFiltered)
assert(htsjdkVC.getFilters.contains("FILTER1"))
assert(htsjdkVC.getFilters.contains("FILTER2"))
}

test("Convert ADAM variant context with null somatic flag to htsjdk") {
val variant = adamSNVBuilder()
.setSomatic(null)
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(!htsjdkVC.hasAttribute("SOMATIC"))
}

test("Convert ADAM variant context with non-somatic variant to htsjdk") {
val variant = adamSNVBuilder()
.setSomatic(false)
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(!htsjdkVC.hasAttribute("SOMATIC"))
}

test("Convert ADAM variant context with somatic variant to htsjdk") {
val variant = adamSNVBuilder()
.setSomatic(true)
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(htsjdkVC.hasAttribute("SOMATIC"))
assert(htsjdkVC.getAttributeAsBoolean("SOMATIC", false) === true)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,18 @@ class ADAMContextSuite extends ADAMFunSuite {
assert(vcs.size === 6)

val vc = vcs.head
val variant = vc.variant.variant
assert(variant.getContigName === "1")
assert(variant.getStart === 14396L)
assert(variant.getEnd === 14400L)
assert(variant.getReferenceAllele === "CTGT")
assert(variant.getAlternateAllele === "C")
assert(variant.getNames.isEmpty)
assert(variant.getFiltersApplied === true)
assert(variant.getFiltersPassed === false)
assert(variant.getFiltersFailed.contains("IndelQD"))
assert(variant.getSomatic === false)

assert(vc.genotypes.size === 3)

val gt = vc.genotypes.head
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class VariantContextRDDSuite extends ADAMFunSuite {
.setReferenceAllele("T")
.setAlternateAllele("C")
.setNames(ImmutableList.of("rs3131972", "rs201888535"))
.setFiltersApplied(true)
.setFiltersPassed(true)
.build

val g0 = Genotype.newBuilder().setVariant(v0)
Expand All @@ -67,14 +69,18 @@ class VariantContextRDDSuite extends ADAMFunSuite {
val vcRdd = sc.loadVcf("%s/test.vcf/part-r-00000".format(tempDir))
assert(vcRdd.rdd.count === 1)

val variant = vcRdd.rdd.first.variant
assert(variant.variant.getContigName === "chr11")
assert(variant.variant.getStart === 17409572)
assert(variant.variant.getReferenceAllele === "T")
assert(variant.variant.getAlternateAllele === "C")
assert(variant.variant.getNames.length === 2)
assert(variant.variant.getNames.get(0) === "rs3131972")
assert(variant.variant.getNames.get(1) === "rs201888535")
val variant = vcRdd.rdd.first.variant.variant
assert(variant.getContigName === "chr11")
assert(variant.getStart === 17409572)
assert(variant.getReferenceAllele === "T")
assert(variant.getAlternateAllele === "C")
assert(variant.getNames.length === 2)
assert(variant.getNames.get(0) === "rs3131972")
assert(variant.getNames.get(1) === "rs201888535")
assert(variant.getFiltersApplied === true)
assert(variant.getFiltersPassed === true)
assert(variant.getFiltersFailed.isEmpty)
assert(variant.getSomatic === false)

assert(vcRdd.sequences.records.size === 1)
assert(vcRdd.sequences.records(0).name === "chr11")
Expand Down Expand Up @@ -116,9 +122,7 @@ class VariantContextRDDSuite extends ADAMFunSuite {
}

sparkTest("don't lose any variants when piping as VCF") {
val smallVcf = Thread.currentThread()
.getContextClassLoader
.getResource("small.vcf").getFile
val smallVcf = testFile("small.vcf")
val rdd: VariantContextRDD = sc.loadVcf(smallVcf)
val records = rdd.rdd.count

Expand Down