From 6835e2e895f0f753663a45d8a69df2ff37ff1220 Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Wed, 16 Nov 2016 12:20:29 -0600 Subject: [PATCH] Increasing unit test coverage for VariantContextConverter --- .../converters/VariantContextConverter.scala | 18 +++ .../VariantContextConverterSuite.scala | 111 ++++++++++++++++++ .../adam/rdd/ADAMContextSuite.scala | 16 +++ .../rdd/variant/VariantContextRDDSuite.scala | 22 ++-- 4 files changed, 159 insertions(+), 8 deletions(-) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala index b951556481..f7d8fa2668 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/VariantContextConverter.scala @@ -397,6 +397,9 @@ private[adam] class VariantContextConverter(dict: Option[SequenceDictionary] = N if (vc.isFiltered) { builder.setFiltersFailed(new java.util.ArrayList(vc.getFilters)); } + if (vc.getAttributeAsBoolean("SOMATIC", false)) { + builder.setSomatic(true) + } builder.build } @@ -607,6 +610,21 @@ private[adam] class VariantContextConverter(dict: Option[SequenceDictionary] = N case Some(s) => vcb.id(s) } + val filtersApplied = Option(variant.getFiltersApplied).getOrElse(false) + val filtersPassed = Option(variant.getFiltersPassed).getOrElse(false) + + (filtersApplied, filtersPassed) match { + case (false, false) => vcb.unfiltered + case (false, true) => vcb.passFilters // log warning? + case (true, false) => vcb.filters(new java.util.HashSet(variant.getFiltersFailed())) + case (true, true) => vcb.passFilters + } + + val somatic: java.lang.Boolean = Option(variant.getSomatic).getOrElse(false) + if (somatic) { + vcb.attribute("SOMATIC", true) + } + // TODO: Extract provenance INFO fields try { vcb.genotypes(vc.genotypes.map(g => { diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala index df3d562b28..49860e1397 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala @@ -86,6 +86,27 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(variant.getReferenceAllele === "A") assert(variant.getStart === 0L) + assert(variant.getSomatic === false) + } + + test("Convert somatic htsjdk site-only SNV to ADAM") { + val converter = new VariantContextConverter + + // not sure why this doesn't work + //htsjdkSNVBuilder.attribute("SOMATIC", true) + //val adamVCs = converter.convert(htsjdkSNVBuilder.make) + + val vcb: VariantContextBuilder = new VariantContextBuilder() + .alleles(List(Allele.create("A", true), Allele.create("T"))) + .start(1L) + .stop(1L) + .chr("1") + .attribute("SOMATIC", true) + + val adamVCs = converter.convert(vcb.make) + val adamVC = adamVCs.head + val variant = adamVC.variant.variant + assert(variant.getSomatic === true) } test("Convert htsjdk site-only SNV to ADAM with contig conversion") { @@ -413,4 +434,94 @@ class VariantContextConverterSuite extends ADAMFunSuite { assert(htsjdkVC.hasID) assert(htsjdkVC.getID === "rs3131972;rs201888535") } + + test("Convert ADAM variant context with null filters applied to htsjdk") { + val variant = adamSNVBuilder() + .setFiltersApplied(null) + .build + + val converter = new VariantContextConverter + + val htsjdkVC = converter.convert(ADAMVariantContext(variant)) + assert(!htsjdkVC.filtersWereApplied) + assert(!htsjdkVC.isFiltered) + assert(htsjdkVC.getFilters.isEmpty) + } + + test("Convert ADAM variant context with no filters applied to htsjdk") { + val variant = adamSNVBuilder() + .setFiltersApplied(false) + .build + + val converter = new VariantContextConverter + + val htsjdkVC = converter.convert(ADAMVariantContext(variant)) + assert(!htsjdkVC.filtersWereApplied) + assert(!htsjdkVC.isFiltered) + assert(htsjdkVC.getFilters.isEmpty) + } + + test("Convert ADAM variant context with passing filters to htsjdk") { + val variant = adamSNVBuilder() + .setFiltersApplied(true) + .setFiltersPassed(true) + .build + + val converter = new VariantContextConverter + + val htsjdkVC = converter.convert(ADAMVariantContext(variant)) + assert(htsjdkVC.filtersWereApplied) + assert(!htsjdkVC.isFiltered) + assert(htsjdkVC.getFilters.isEmpty) + } + + test("Convert ADAM variant context with failing filters to htsjdk") { + val variant = adamSNVBuilder() + .setFiltersApplied(true) + .setFiltersPassed(false) + .setFiltersFailed(ImmutableList.of("FILTER1", "FILTER2")) + .build + + val converter = new VariantContextConverter + + val htsjdkVC = converter.convert(ADAMVariantContext(variant)) + assert(htsjdkVC.filtersWereApplied) + assert(htsjdkVC.isFiltered) + assert(htsjdkVC.getFilters.contains("FILTER1")) + assert(htsjdkVC.getFilters.contains("FILTER2")) + } + + test("Convert ADAM variant context with null somatic flag to htsjdk") { + val variant = adamSNVBuilder() + .setSomatic(null) + .build + + val converter = new VariantContextConverter + + val htsjdkVC = converter.convert(ADAMVariantContext(variant)) + assert(!htsjdkVC.hasAttribute("SOMATIC")) + } + + test("Convert ADAM variant context with non-somatic variant to htsjdk") { + val variant = adamSNVBuilder() + .setSomatic(false) + .build + + val converter = new VariantContextConverter + + val htsjdkVC = converter.convert(ADAMVariantContext(variant)) + assert(!htsjdkVC.hasAttribute("SOMATIC")) + } + + test("Convert ADAM variant context with somatic variant to htsjdk") { + val variant = adamSNVBuilder() + .setSomatic(true) + .build + + val converter = new VariantContextConverter + + val htsjdkVC = converter.convert(ADAMVariantContext(variant)) + assert(htsjdkVC.hasAttribute("SOMATIC")) + assert(htsjdkVC.getAttributeAsBoolean("SOMATIC", false) === true) + } } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala index 2b080bf035..be9385c51c 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala @@ -152,6 +152,22 @@ class ADAMContextSuite extends ADAMFunSuite { assert(vcs.size === 6) val vc = vcs.head + + /* +1 14397 . CTGT C 139.12 IndelQD AC=2;AF=0.333;AN=6;BaseQRankSum=1.800;ClippingRankSum=0.138;DP=69;FS=7.786;MLEAC=2;MLEAF=0.333;MQ=26.84;MQ0=0;MQRankSum=-1.906;QD=1.55;ReadPosRankSum=0.384 GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,827 0/1:8,2:10:dp;rd:60:60,0,414 0/0:39,0:39:PASS:99:0,116,2114 + */ + val variant = vc.variant.variant + assert(variant.getContigName === "1") + assert(variant.getStart === 14396L) + assert(variant.getEnd === 14400L) + assert(variant.getReferenceAllele === "CTGT") + assert(variant.getAlternateAllele === "C") + assert(variant.getNames.isEmpty) + assert(variant.getFiltersApplied === true) + assert(variant.getFiltersPassed === false) + assert(variant.getFiltersFailed.contains("IndelQD")) + assert(variant.getSomatic === false) + assert(vc.genotypes.size === 3) val gt = vc.genotypes.head diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextRDDSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextRDDSuite.scala index ddbc72effe..015247bfff 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextRDDSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantContextRDDSuite.scala @@ -45,6 +45,8 @@ class VariantContextRDDSuite extends ADAMFunSuite { .setReferenceAllele("T") .setAlternateAllele("C") .setNames(ImmutableList.of("rs3131972", "rs201888535")) + .setFiltersApplied(true) + .setFiltersPassed(true) .build val g0 = Genotype.newBuilder().setVariant(v0) @@ -67,14 +69,18 @@ class VariantContextRDDSuite extends ADAMFunSuite { val vcRdd = sc.loadVcf("%s/test.vcf/part-r-00000".format(tempDir)) assert(vcRdd.rdd.count === 1) - val variant = vcRdd.rdd.first.variant - assert(variant.variant.getContigName === "chr11") - assert(variant.variant.getStart === 17409572) - assert(variant.variant.getReferenceAllele === "T") - assert(variant.variant.getAlternateAllele === "C") - assert(variant.variant.getNames.length === 2) - assert(variant.variant.getNames.get(0) === "rs3131972") - assert(variant.variant.getNames.get(1) === "rs201888535") + val variant = vcRdd.rdd.first.variant.variant + assert(variant.getContigName === "chr11") + assert(variant.getStart === 17409572) + assert(variant.getReferenceAllele === "T") + assert(variant.getAlternateAllele === "C") + assert(variant.getNames.length === 2) + assert(variant.getNames.get(0) === "rs3131972") + assert(variant.getNames.get(1) === "rs201888535") + assert(variant.getFiltersApplied === true) + assert(variant.getFiltersPassed === true) + assert(variant.getFiltersFailed.isEmpty) + assert(variant.getSomatic === false) assert(vcRdd.sequences.records.size === 1) assert(vcRdd.sequences.records(0).name === "chr11")