From 33ac71e23d0794780f23cc37226364705419a689 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Fri, 19 May 2023 12:04:42 -0400 Subject: [PATCH] Using a light ref/alt/pos Event class for assembled variation, pileup events, and force-calling alleles --- .../walkers/annotator/AssemblyComplexity.java | 22 +- .../tools/walkers/annotator/TandemRepeat.java | 7 + .../annotator/VariantOverlapAnnotator.java | 11 +- .../walkers/genotyper/GenotypingEngine.java | 7 +- .../afcalc/AlleleFrequencyCalculator.java | 18 +- .../haplotypecaller/AlleleAndContext.java | 56 --- .../haplotypecaller/AlleleFiltering.java | 200 ++++----- .../AssemblyBasedCallerUtils.java | 413 +++--------------- .../AssemblyRegionTrimmer.java | 85 ++-- .../haplotypecaller/AssemblyResultSet.java | 204 ++++++++- .../HaplotypeCallerEngine.java | 80 ++-- .../HaplotypeCallerGenotypingEngine.java | 18 +- .../haplotypecaller/LocationAndAlleles.java | 46 -- ...yDeterminedHaplotypeComputationEngine.java | 314 +++++++------ .../PileupDetectionArgumentCollection.java | 3 +- .../RampedHaplotypeCallerEngine.java | 17 +- .../readthreading/ReadThreadingAssembler.java | 3 +- .../tools/walkers/mutect/Mutect2Engine.java | 52 +-- .../mutect/SomaticGenotypingEngine.java | 15 +- .../HaplotypeBasedVariantRecaller.java | 5 +- .../LeftAlignAndTrimVariants.java | 6 - .../hellbender/utils/haplotype/Event.java | 117 +++++ .../hellbender/utils/haplotype/EventMap.java | 246 ++++------- .../PartiallyDeterminedHaplotype.java | 26 +- .../utils/pileup/PileupBasedAlleles.java | 187 +++----- .../variant/GATKVariantContextUtils.java | 163 +++---- .../AlleleFilteringUnitTest.java | 39 +- .../AssemblyBasedCallerUtilsUnitTest.java | 378 ++++++---------- ...plotypeCallerGenotypingEngineUnitTest.java | 16 +- ...nedHaplotypeComputationEngineUnitTest.java | 104 +++-- .../utils/haplotype/EventMapUnitTest.java | 42 +- .../GATKVariantContextUtilsUnitTest.java | 68 +-- ...ected.pileupCallerDRAGEN.378.gatk4.vcf.idx | Bin 2923 -> 2914 bytes ...ed.pileupCallerDRAGEN.WithIndels.gatk4.vcf | 56 +-- .../expected.pileupCallerDRAGEN.gatk4.vcf | 6 +- .../expected.pileupCallerDRAGEN.gatk4.vcf.idx | Bin 1611 -> 1602 bytes .../expected.pileupCallerDefaults.gatk4.vcf | 92 ++-- ...xpected.pileupCallerDefaults.gatk4.vcf.idx | Bin 2920 -> 2911 bytes ...OfSmithWatermanParameters.HC.gatk4.vcf.idx | Bin 10536 -> 10508 bytes ...tForceCallingNotProducingNoCalls.gatk4.vcf | 1 + ...estGVCFMode.gatk4.alleleSpecific.g.vcf.idx | Bin 40822 -> 40794 bytes .../expected.testGVCFMode.gatk4.g.vcf.idx | Bin 40807 -> 40779 bytes ...ltAlleleBasedOnHaptypeScores.gatk4.vcf.idx | Bin 2154 -> 2126 bytes ...ected.testLinkedDebruijnMode.gatk4.vcf.idx | Bin 2941 -> 2913 bytes .../expected.testVCFMode.gatk4.DRAGEN.vcf.idx | Bin 2937 -> 2909 bytes .../expected.testVCFMode.gatk4.FRDBQD.vcf.idx | Bin 3874 -> 3846 bytes .../expected.testVCFMode.gatk4.vcf.idx | Bin 2929 -> 2901 bytes ...vcfBeforeRebase.expected.flowbased.vcf.idx | Bin 438884 -> 438883 bytes ...lowModeAdvanced.expected.flowbased.vcf.idx | Bin 438914 -> 438905 bytes ...lowModeStandard.expected.flowbased.vcf.idx | Bin 438907 -> 438906 bytes ...KeepLoneAlleles.expected.flowbased.vcf.idx | Bin 438888 -> 438887 bytes ...nnotationRevamp.expected.flowbased.vcf.idx | Bin 438919 -> 438910 bytes ...VcfBeforeRebase.expected.flowbased.vcf.idx | Bin 114794 -> 114793 bytes ...exityAnnotation.expected.flowbased.vcf.idx | Bin 114822 -> 114813 bytes .../test_flowBasedHMM.expected.vcf.idx | Bin 114805 -> 114796 bytes ...est_flowBasedHMM_Stepwise.expected.vcf.idx | Bin 114806 -> 114805 bytes 56 files changed, 1264 insertions(+), 1859 deletions(-) delete mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleAndContext.java delete mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/LocationAndAlleles.java create mode 100644 src/main/java/org/broadinstitute/hellbender/utils/haplotype/Event.java diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AssemblyComplexity.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AssemblyComplexity.java index a33796672e0..931a6bb5256 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AssemblyComplexity.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/AssemblyComplexity.java @@ -12,6 +12,7 @@ import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.EventMap; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.help.HelpConstants; @@ -64,9 +65,9 @@ public static Triple annotate(final VariantContext vc, // encode each haplotype as a string of variant starts and alt allele strings, excluding the locus of vc (to avoid reference/germline bias) // note that VariantContexts in an EventMap are always biallelic, so var.getAlternateAllele(0) is valid final Map> haplotypeGroups = haplotypeLikelihoods.alleles().stream() - .collect(Collectors.groupingBy(hap -> hap.getEventMap().getVariantContexts().stream() - .filter(var -> var.getStart() != vc.getStart()) - .map(var -> var.getStart() + var.getAlternateAllele(0).getBaseString()) + .collect(Collectors.groupingBy(hap -> hap.getEventMap().getEvents().stream() + .filter(event -> event.getStart() != vc.getStart()) + .map(event -> event.getStart() + event.altAllele().getBaseString()) .collect(Collectors.joining()))); // sum the read support counts for all haplotypes within each group @@ -143,16 +144,16 @@ public List getKeyNames() { // k bases longer we simply check that the event map alt allele matches the variant context alt allele excluding the // latter's last k bases private static boolean containsAltAllele(final EventMap eventMap, final VariantContext vc, final int altAlleleIndex) { - final List overlapping = eventMap.getOverlappingEvents(vc.getStart()); + final List overlapping = eventMap.getOverlappingEvents(vc.getStart()); if (overlapping.isEmpty()) { return false; } else if (overlapping.get(0).getStart() != vc.getStart()) { return false; } else { - final VariantContext eventMapVC = overlapping.get(0); - final int excessBases = vc.getReference().length() - eventMapVC.getReference().length(); + final Event overlappingEvent = overlapping.get(0); + final int excessBases = vc.getReference().length() - overlappingEvent.refAllele().length(); - return equalBasesExcludingSuffix(eventMapVC.getAlternateAllele(0).getBases(), + return equalBasesExcludingSuffix(overlappingEvent.altAllele().getBases(), vc.getAlternateAllele(altAlleleIndex).getBases(), excessBases); } } @@ -177,12 +178,11 @@ private static boolean equalBasesExcludingSuffix(final byte[] eventMapBases, fin } // count variants in one haplotype but not the other - // note that we use the fact that EventMap VariantContexts are biallelic private static int uniqueVariants(final Haplotype hap1, final Haplotype hap2, final int excludedPosition) { final EventMap eventMap2 = hap2.getEventMap(); - return (int) hap1.getEventMap().getVariantContexts().stream() - .filter(vc -> vc.getStart() != excludedPosition) - .filter(vc -> !containsAltAllele(eventMap2, vc, 0)) + return (int) hap1.getEventMap().getEvents().stream() + .filter(event -> event.getStart() != excludedPosition) + .filter(event -> !event.equals(eventMap2.get(event.getStart()))) .count(); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/TandemRepeat.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/TandemRepeat.java index 385660b34de..ec787ab5065 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/TandemRepeat.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/TandemRepeat.java @@ -7,6 +7,7 @@ import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.help.HelpConstants; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; @@ -48,6 +49,12 @@ public Map annotate(final ReferenceContext ref, return Collections.unmodifiableMap(map); } + public static Pair, byte[]> getNumTandemRepeatUnits(final ReferenceContext ref, final Event event) { + final byte[] refBases = ref.getBases(); + final int startIndex = event.getStart() + 1 - ref.getWindow().getStart(); // +1 to exclude leading match base common to VC's ref and alt alleles + return GATKVariantContextUtils.getNumTandemRepeatUnits(event.refAllele(), Collections.singletonList(event.altAllele()), Arrays.copyOfRange(refBases, startIndex, refBases.length)); + } + public static Pair, byte[]> getNumTandemRepeatUnits(final ReferenceContext ref, final VariantContext vc) { final byte[] refBases = ref.getBases(); final int startIndex = vc.getStart() + 1 - ref.getWindow().getStart(); // +1 to exclude leading match base common to VC's ref and alt alleles diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantOverlapAnnotator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantOverlapAnnotator.java index 920c029de5d..e7a28ec023f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantOverlapAnnotator.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/VariantOverlapAnnotator.java @@ -8,6 +8,7 @@ import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import java.util.ArrayList; @@ -162,7 +163,7 @@ private static String getRsID(final List rsIDSourceVCs, final Va Utils.nonNull(vcToAnnotate, "vcToAnnotate cannot be null"); final List rsids = new ArrayList<>(); - final List vcAnnotateList = GATKVariantContextUtils.splitVariantContextToBiallelics(vcToAnnotate, true, + final List vcAnnotateList = GATKVariantContextUtils.splitVariantContextToEvents(vcToAnnotate, true, GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS, true); for ( final VariantContext vcCompSource : rsIDSourceVCs ) { @@ -174,12 +175,12 @@ private static String getRsID(final List rsIDSourceVCs, final Va throw new IllegalArgumentException("source rsID VariantContext " + vcCompSource + " is not on same chromosome as vcToAnnotate " + vcToAnnotate); } - final List vcCompList = GATKVariantContextUtils.splitVariantContextToBiallelics(vcCompSource, true, + final List vcCompList = GATKVariantContextUtils.splitVariantContextToEvents(vcCompSource, true, GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS, true); boolean addThisID = false; - for (final VariantContext vcComp : vcCompList) { - for (final VariantContext vcToAnnotateBi : vcAnnotateList) { - if (vcComp.getStart() == vcToAnnotateBi.getStart() && vcToAnnotateBi.getReference().equals(vcComp.getReference()) && vcComp.getAlternateAlleles().equals(vcToAnnotateBi.getAlternateAlleles())) { + for (final Event vcComp : vcCompList) { + for (final Event vcToAnnotateBi : vcAnnotateList) { + if (vcComp.equals(vcToAnnotateBi)) { addThisID = true; break; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java index 350fd4f102b..1846fbe806d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java @@ -13,6 +13,7 @@ import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerUtils; import org.broadinstitute.hellbender.utils.*; import org.broadinstitute.hellbender.utils.genotyper.SampleList; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.logging.OneShotLogger; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; @@ -124,7 +125,7 @@ public Config getConfiguration() { * @param vc Input variant context to complete. * @return VC with assigned genotypes */ - public VariantContext calculateGenotypes(final VariantContext vc, final GenotypePriorCalculator gpc, final List givenAlleles) { + public VariantContext calculateGenotypes(final VariantContext vc, final GenotypePriorCalculator gpc, final List givenAlleles) { // if input VC can't be genotyped, exit with either null VCC or, in case where we need to emit all sites, an empty call if (cannotBeGenotyped(vc) || vc.getNSamples() == 0) { return null; @@ -150,7 +151,7 @@ public VariantContext calculateGenotypes(final VariantContext vc, final Genotype } final AFCalculationResult AFresult = alleleFrequencyCalculator.calculate(reducedVC, defaultPloidy); - final Set forcedAlleles = AssemblyBasedCallerUtils.getAllelesConsistentWithGivenAlleles(givenAlleles, vc); + final Set forcedAlleles = AssemblyBasedCallerUtils.allelesConsistentWithGivenAlleles(givenAlleles, vc); final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc, forcedAlleles); // note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice @@ -289,7 +290,7 @@ public List alternativeAlleleMLECounts() { * Provided the exact mode computations it returns the appropriate subset of alleles that progress to genotyping. * @param afCalculationResult the allele fraction calculation result. * @param vc the variant context - * @param forcedAlleles alleles from the vc input that are consistent with forced alleles in the assembly region {@link AssemblyBasedCallerUtils#getAllelesConsistentWithGivenAlleles} + * @param forcedAlleles alleles from the vc input that are consistent with forced alleles in the assembly region {@link AssemblyBasedCallerUtils#allelesConsistentWithGivenAlleles} * @return information about the alternative allele subsetting {@code null}. */ private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult afCalculationResult, final VariantContext vc, final Set forcedAlleles) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculator.java index eb1a1547f09..9255091a07c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculator.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculator.java @@ -1,6 +1,9 @@ package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc; -import htsjdk.variant.variantcontext.*; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.GenotypeBuilder; +import htsjdk.variant.variantcontext.VariantContext; import it.unimi.dsi.fastutil.doubles.DoubleArrayList; import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap; import org.apache.commons.math3.special.Gamma; @@ -10,11 +13,9 @@ import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeIndexCalculator; import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypingLikelihoods; -import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AlleleAndContext; import org.broadinstitute.hellbender.utils.*; import org.broadinstitute.hellbender.utils.dragstr.DragstrParams; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; @@ -154,22 +155,13 @@ public AFCalculationResult fastCalculateDiploidBasedOnGLs(final GenotypingLikeli final int numAlleles = gls.numberOfAlleles(); final List alleles = gls.asListOfAlleles(); - final List alleleLengths = new ArrayList<>(); - for (Allele al : gls.asListOfAlleles()) { - if (al instanceof AlleleAndContext) { - alleleLengths.add(((AlleleAndContext) al).maxAlleleLength()); - } else { - alleleLengths.add(al.length()); - } - } - final int alleleLength = alleleLengths.stream().max(Integer::compare).get(); + final int alleleLength = gls.asListOfAlleles().stream().map(Allele::length).max(Integer::compare).get(); final List samples = gls.asListOfSamples(); final List genotypes = IntStream.range(0, samples.size()).mapToObj(idx -> new GenotypeBuilder(samples.get(idx)).alleles(alleles).PL(gls.sampleLikelihoods(idx).getAsPLs()).make()).collect(Collectors.toList()); return calculate(numAlleles, alleles, genotypes, defaultPloidy, alleleLength); } - /** * Private function that actually calculates allele frequencies etc. * diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleAndContext.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleAndContext.java deleted file mode 100644 index c2eae4bd150..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleAndContext.java +++ /dev/null @@ -1,56 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.haplotypecaller; -import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.SimpleAllele; - -/** - * This class is similar to {@link org.broadinstitute.hellbender.tools.walkers.haplotypecaller.LocationAndAlleles} but - * allows keeping only an allele/ref pair rather than a list of alleles. The comparison is done on allele by allele basis and - * not in the way it is done on LocationAndAlleles - */ - -public class AlleleAndContext extends SimpleAllele { - final static public long serialVersionUID = 1L; - private final int loc; - private final String contig; - private final Allele refAllele; - - public AlleleAndContext(final String contig, final int loc, final Allele allele, final Allele refAllele) { - super(allele.getBases(), allele.isReference()); - this.loc = loc; - this.contig = contig; - this.refAllele = refAllele; - } - - public int getLoc() { - return loc; - } - - public String getContig() { return contig; } - - public Allele getAllele() { - return this; - } - - @Override - public boolean equals(final Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - final AlleleAndContext that = (AlleleAndContext) o; - - if (loc != that.loc) return false; - return super.equals(that) && this.refAllele.equals(that.getRefAllele()); - } - - @Override - public int hashCode() { - return 31 * loc + super.hashCode(); - } - - public String toString() {return String.format("(%d) %s/%s", loc, getBaseString(), getRefAllele().getBaseString());} - public Allele getRefAllele() { return refAllele;} - public int maxAlleleLength() { - return Math.max(getAllele().length(), refAllele.length()); - } -} - diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java index f1a1d8acc14..17728fa6035 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java @@ -2,7 +2,6 @@ import htsjdk.samtools.util.CollectionUtil; import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.VariantContext; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; @@ -13,6 +12,7 @@ import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.InverseAllele; import org.broadinstitute.hellbender.utils.BaseUtils; import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.jgrapht.graph.DefaultDirectedWeightedGraph; @@ -21,7 +21,10 @@ import org.jgrapht.io.DOTExporter; import org.jgrapht.io.IntegerComponentNameProvider; -import java.io.*; +import java.io.FileWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; import java.util.*; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -91,20 +94,6 @@ public AlleleLikelihoods filterAlleles(final AlleleLikeliho return subsettedReadLikelihoodsFinal; } - /** - * Returns all alleles from haplotype - * @param haplotype Input - * @return set of AlleleAndContext - */ - static private Set getAlleles(final Haplotype haplotype){ - final Collection vcs = haplotype.getEventMap().getVariantContexts(); - - return vcs.stream().flatMap( - vc -> vc.getAlleles().stream().map( - al -> new AlleleAndContext(vc.getContig(), vc.getStart(), al, vc.getReference())) - ).collect(Collectors.toSet()); - } - /** * Main function that filters haplotypes that contribute weak alleles * @param readLikelihoods read x haplotype matrix @@ -119,8 +108,8 @@ private AlleleLikelihoods subsetHaplotypesByAlleles(final A final int activeWindowStart, Set suspiciousLocations) { // 1. Collect all alleles in the active region final Set disabledHaplotypes = new HashSet<>(); - final Map> haplotypeAlleleMap = new CollectionUtil.DefaultingMap<>((k) -> new ArrayList<>(), true); - readLikelihoods.alleles().forEach(h -> getAlleles(h).stream().filter(al -> !al.isReference()).forEach(jh -> haplotypeAlleleMap.get(h).add(jh))); + final Map> haplotypeAlleleMap = new CollectionUtil.DefaultingMap<>((k) -> new ArrayList<>(), true); + readLikelihoods.alleles().forEach(h -> h.getEventMap().getEvents().stream().forEach(jh -> haplotypeAlleleMap.get(h).add(jh))); // 2. Split them into sets to genotype together. The goal is to cluster true allele with all its variants from // biased seq. error. @@ -128,16 +117,16 @@ private AlleleLikelihoods subsetHaplotypesByAlleles(final A // First we generate a graph with edge for each pair of alleles that do not occur in the same haplotype // Then we only keep the edges where the alleles are close or up to hmer indel from each other // the connected components of the graph are genotyped together - final OccurrenceMatrix occm = new OccurrenceMatrix<>(haplotypeAlleleMap); - List> nonCoOcurringAlleles = occm.nonCoOcurringColumns(); - final List> closeNonCoOccurringAlleles = filterByDistance(nonCoOcurringAlleles, 0, 3); + final OccurrenceMatrix occm = new OccurrenceMatrix<>(haplotypeAlleleMap); + List> nonCoOcurringAlleles = occm.nonCoOcurringColumns(); + final List> closeNonCoOccurringAlleles = filterByDistance(nonCoOcurringAlleles, 0, 3); nonCoOcurringAlleles = filterSameUpToHmerPairs(filterByDistance(nonCoOcurringAlleles,0,20), findReferenceHaplotype(readLikelihoods.alleles()), activeWindowStart); nonCoOcurringAlleles.addAll(closeNonCoOccurringAlleles); - final List> independentAlleles = occm.getIndependentSets(nonCoOcurringAlleles); + final List> independentAlleles = occm.getIndependentSets(nonCoOcurringAlleles); // 3. For each cluster - remove weak alleles - for (final Set alleleSet : independentAlleles) { + for (final Set alleleSet : independentAlleles) { // debugging - write the interaction map of the location (we will keep this function from the unused approach // where we only attempted to filter alleles that strongly affect an another allele's quality. This approach @@ -145,9 +134,9 @@ private AlleleLikelihoods subsetHaplotypesByAlleles(final A // interaction map is the graph of how much quality of each allele is improved when another allele is removed if (assemblyArgs.writeFilteringGraphs) { if (alleleSet.size() > 1 ) { - final List alleleSetAsList = new ArrayList<>(alleleSet); - final Map initialRPLsMap = new HashMap<>(); - final DefaultDirectedWeightedGraph intm = + final List alleleSetAsList = new ArrayList<>(alleleSet); + final Map initialRPLsMap = new HashMap<>(); + final DefaultDirectedWeightedGraph intm = interactionMatrixToGraph(getInteractionMatrix(alleleSetAsList, haplotypeAlleleMap, readLikelihoods, initialRPLsMap), initialRPLsMap); printInteractionGraph(intm, initialRPLsMap, alleleSet); @@ -161,26 +150,25 @@ private AlleleLikelihoods subsetHaplotypesByAlleles(final A removedAlleles = false; // b. Marginalize: calculate quality of each allele relative to all other alleles logger.debug("GAL::start of iteration"); - final List activeAlleles = activeHaplotypes.stream() - .flatMap(h -> getAlleles(h).stream().filter(alleleSet::contains)) + final List activeAlleles = activeHaplotypes.stream() + .flatMap(h -> h.getEventMap().getEvents().stream().filter(alleleSet::contains)) .distinct() .collect(Collectors.toList());; - final Map> alleleHaplotypeMap = new CollectionUtil.DefaultingMap<>((k) -> new ArrayList<>(), true); + final Map> alleleHaplotypeMap = new CollectionUtil.DefaultingMap<>((k) -> new ArrayList<>(), true); readLikelihoods.alleles().stream().filter(activeHaplotypes::contains) .forEach(h -> - getAlleles(h).stream() + h.getEventMap().getEvents().stream() .filter(alleleSet::contains) - .filter(al -> !al.isReference()) .forEach(jh -> alleleHaplotypeMap.get(jh).add(h)) ); logger.debug("AHM::printout start"); - for (final AlleleAndContext al : alleleHaplotypeMap.keySet()) { + for (final Event al : alleleHaplotypeMap.keySet()) { logger.debug("AHM::allele block ---> "); for (final Allele h : alleleHaplotypeMap.get(al)) { - logger.debug(() -> String.format("AHM:: (%d) %s/%s: %s", al.getLoc(), al.getAllele().getBaseString(), al.getRefAllele().getBaseString(), h.getBaseString())); + logger.debug(() -> String.format("AHM:: (%d) %s/%s: %s", al.getStart(), al.altAllele().getBaseString(), al.refAllele().getBaseString(), h.getBaseString())); } logger.debug("AHM::allele block ---< "); @@ -194,11 +182,11 @@ private AlleleLikelihoods subsetHaplotypesByAlleles(final A haplotypeAlleleMap, activeHaplotypes)).collect(Collectors.toList()); // c. Calculate SOR and RPL // Note that the QUAL is calculated as a PL, that is -10*log likelihood. This means that high PL is low quality allele - final List collectedRPLs = IntStream.range(0, activeAlleles.size()).mapToObj(i -> getAlleleLikelihoodVsInverse(alleleLikelihoods.get(i), activeAlleles.get(i))).collect(Collectors.toList()); - final List collectedSORs = IntStream.range(0, activeAlleles.size()).mapToObj(i -> getAlleleSOR(alleleLikelihoods.get(i), activeAlleles.get(i))).collect(Collectors.toList()); + final List collectedRPLs = IntStream.range(0, activeAlleles.size()).mapToObj(i -> getAlleleLikelihoodVsInverse(alleleLikelihoods.get(i), activeAlleles.get(i).altAllele())).collect(Collectors.toList()); + final List collectedSORs = IntStream.range(0, activeAlleles.size()).mapToObj(i -> getAlleleSOR(alleleLikelihoods.get(i), activeAlleles.get(i).altAllele())).collect(Collectors.toList()); // d. Generate variants that are below SOR threshold and below RPL threshold - final List filteringCandidates = identifyBadAlleles(collectedRPLs, + final List filteringCandidates = identifyBadAlleles(collectedRPLs, collectedSORs, activeAlleles, assemblyArgs.prefilterQualThreshold, @@ -207,7 +195,7 @@ private AlleleLikelihoods subsetHaplotypesByAlleles(final A //very weak candidates are filtered out in any case, even if they are alone (they will be filtered anyway even in the GVCF mode) // the very weak quality is hardcoded - final List filteringCandidatesStringent = identifyBadAlleles(collectedRPLs, + final List filteringCandidatesStringent = identifyBadAlleles(collectedRPLs, collectedSORs, activeAlleles, 1, @@ -217,14 +205,14 @@ private AlleleLikelihoods subsetHaplotypesByAlleles(final A //for now we just mark all locations with close alleles, one of which is weak. //We write them in suspiciousLocations and they will be then annotated as SUSP_NOISY... in the VCF if ((filteringCandidates.size() > 0 ) && (alleleSet.size()>0)) { - activeAlleles.forEach(laa -> suspiciousLocations.add(laa.getLoc())); + activeAlleles.forEach(laa -> suspiciousLocations.add(laa.getStart())); } // e. For every variant - calculate what is the effect of its deletion and if higher than threshold - delete and continue // (This is a currently disabled code from the approach that would disable only the candidates that strongly // affect other alleles - //AlleleAndContext candidateToDisable = identifyStrongInteractingAllele(filteringCandidates, + //Event candidateToDisable = identifyStrongInteractingAllele(filteringCandidates, // hcargs.prefilterQualThreshold, activeAlleles, collectedRPLs, readLikelihoods, haplotypeAlleleMap, alleleHaplotypeMap); ) // if weak candidate had been identified - add its haplotypes into blacklist, remove the allele from the @@ -238,7 +226,7 @@ private AlleleLikelihoods subsetHaplotypesByAlleles(final A "filtering should always be higher than for the relaxed one"); } - final AlleleAndContext candidateToDisable = filteringCandidates.get(0); + final Event candidateToDisable = filteringCandidates.get(0); logger.debug(() -> String.format("GAL:: Remove %s", candidateToDisable.toString())); removedAlleles = true; final List haplotypesToRemove = alleleHaplotypeMap.get(candidateToDisable); @@ -269,9 +257,9 @@ private AlleleLikelihoods subsetHaplotypesByAlleles(final A final AlleleLikelihoods currentReadLikelihoods = readLikelihoods.removeAllelesToSubset(eventualAlleles); logger.debug("----- SHA list of remaining alleles start ----"); - final Set locAllele = new HashSet<>(); - currentReadLikelihoods.alleles().forEach(h -> getAlleles(h).stream().filter(al -> !al.isReference()).forEach(locAllele::add)); - for (final AlleleAndContext al: locAllele) { + final Set locAllele = new HashSet<>(); + currentReadLikelihoods.alleles().forEach(h -> h.getEventMap().getEvents().stream().forEach(locAllele::add)); + for (final Event al: locAllele) { logger.debug(() -> String.format("---- SHA :: %s ", al.toString())); } logger.debug("----- SHA list of remaining alleles end ----"); @@ -290,8 +278,8 @@ private AlleleLikelihoods subsetHaplotypesByAlleles(final A * @param sorThreshold only variants with SOR above threshold will be considered * @return list of alleles that can be removed */ - private List identifyBadAlleles(final List collectedRPLs, final List collectedSORs, - final List alleles, + private List identifyBadAlleles(final List collectedRPLs, final List collectedSORs, + final List alleles, final double qualThreshold, final double sorThreshold) { @@ -302,7 +290,7 @@ private List identifyBadAlleles(final List collectedR //this list will contain all alleles that should be filtered in the order of priority - final List result = new ArrayList<>(); + final List result = new ArrayList<>(); final double THRESHOLD = -1 * qualThreshold; // quality threshold is like in GATK (GL) and we collected PL, so QUAL 30 will appear as -30. final double SOR_THRESHOLD = sorThreshold; @@ -337,16 +325,16 @@ private List identifyBadAlleles(final List collectedR * @return read x allele matrix */ private AlleleLikelihoods getAlleleLikelihoodMatrix(final AlleleLikelihoods readLikelihoods, - final AlleleAndContext allele, - final Map> haplotypeAlleleMap, + final Event allele, + final Map> haplotypeAlleleMap, final Set enabledHaplotypes ){ final Map> alleleHaplotypeMap = new CollectionUtil.DefaultingMap<>((k) -> new ArrayList<>(), true); - final Allele notAllele= InverseAllele.of(allele.getAllele(), true); + final Allele notAllele= InverseAllele.of(allele.altAllele(), true); readLikelihoods.alleles().stream().filter(enabledHaplotypes::contains) .filter(h->haplotypeAlleleMap.get(h).contains(allele)) - .forEach(alleleHaplotypeMap.get(allele)::add); + .forEach(alleleHaplotypeMap.get(allele.altAllele())::add); readLikelihoods.alleles().stream().filter(enabledHaplotypes::contains) .filter(h -> !haplotypeAlleleMap.get(h).contains(allele)) .forEach(alleleHaplotypeMap.get(notAllele)::add); @@ -369,33 +357,33 @@ private double getAlleleSOR(final AlleleLikelihoods alleleLike } //filters pairs of alleles by distance - private List> filterByDistance( - final List> allelePairs, + private List> filterByDistance( + final List> allelePairs, final int minDist, final int maxDist) { logger.debug(() -> String.format("FBD: input %d pairs ", allelePairs.size())); - final List> result = new ArrayList<>(allelePairs); - result.removeIf(v -> Math.abs(v.getLeft().getLoc() - v.getRight().getLoc())>maxDist); - result.removeIf(v -> Math.abs(v.getLeft().getLoc() - v.getRight().getLoc())> result = new ArrayList<>(allelePairs); + result.removeIf(v -> Math.abs(v.getLeft().getStart() - v.getRight().getStart())>maxDist); + result.removeIf(v -> Math.abs(v.getLeft().getStart() - v.getRight().getStart()) String.format("FBD: output %d pairs ", allelePairs.size())); return result; } //filters pairs of alleles that are not same up to hmer indel - private List> filterSameUpToHmerPairs(final List> allelePairs, final Haplotype refHaplotype, final int activeWindowStart) { + private List> filterSameUpToHmerPairs(final List> allelePairs, final Haplotype refHaplotype, final int activeWindowStart) { - final List> result = new ArrayList<>(); - for (final Pair allelePair: allelePairs) { + final List> result = new ArrayList<>(); + for (final Pair allelePair: allelePairs) { final Pair modifiedHaplotypes = new ImmutablePair<>( refHaplotype.insertAllele( - allelePair.getLeft().getRefAllele(), - allelePair.getLeft().getAllele(), - allelePair.getLeft().getLoc()), + allelePair.getLeft().refAllele(), + allelePair.getLeft().altAllele(), + allelePair.getLeft().getStart()), refHaplotype.insertAllele( - allelePair.getRight().getRefAllele(), - allelePair.getRight().getAllele(), - allelePair.getRight().getLoc())); + allelePair.getRight().refAllele(), + allelePair.getRight().altAllele(), + allelePair.getRight().getStart())); if ( BaseUtils.equalUpToHmerChange(modifiedHaplotypes.getLeft().getBases(), modifiedHaplotypes.getRight().getBases()) ) { result.add(allelePair); @@ -439,21 +427,21 @@ private int[] getSortedIndexList(final List values) { // The goal of these functions is to look at how one allele affects the other and make decisions // only for the alleles that really affect others. The approach did not currently work that well @SuppressWarnings("unused") - private AlleleAndContext identifyStrongInteractingAllele(final List candidateList, + private Event identifyStrongInteractingAllele(final List candidateList, final float prefilterThreshold, - final List allAlleles, + final List allAlleles, final List rpls, final AlleleLikelihoods readLikelihoods, - final Map> haplotypeAlleleMap, - final Map> alleleHaplotypeMap + final Map> haplotypeAlleleMap, + final Map> alleleHaplotypeMap ){ logger.debug("ISIA :: start"); - final Map initialRPLsMap = new HashMap<>(); + final Map initialRPLsMap = new HashMap<>(); IntStream.range(0, allAlleles.size()).forEach(i -> initialRPLsMap.put(allAlleles.get(i), rpls.get(i))); - for (final AlleleAndContext cand: candidateList){ + for (final Event cand: candidateList){ logger.debug(() -> String.format("ISIA :: test %s", cand.toString())); if ( initialRPLsMap.get(cand) > (-1)*prefilterThreshold){ logger.debug( String.format("ISIA:: selected %s due to low QUAL", cand)); @@ -464,9 +452,9 @@ private AlleleAndContext identifyStrongInteractingAllele(final List interactionVector = getInteractionVector(cand, + final Map interactionVector = getInteractionVector(cand, haplotypeAlleleMap, alleleHaplotypeMap, readLikelihoods, initialRPLsMap); - for (final AlleleAndContext allele: interactionVector.keySet()){ + for (final Event allele: interactionVector.keySet()){ logger.debug(() -> String.format(" --- %s: %d", allele.toString(), initialRPLsMap.get(allele) - interactionVector.get(allele))); if (initialRPLsMap.get(allele) - interactionVector.get(allele) > prefilterThreshold ){ logger.debug(String.format("ISIA:: selected %s", cand)); @@ -482,33 +470,33 @@ private AlleleAndContext identifyStrongInteractingAllele(final List> getInteractionMatrix( - final List alleles, - final Map> haplotypeAlleleMap, + private Map> getInteractionMatrix( + final List alleles, + final Map> haplotypeAlleleMap, final AlleleLikelihoods readLikelihoods, - final Map initialRPLsMap) { + final Map initialRPLsMap) { - final Map> alleleHaplotypeMap = new CollectionUtil.DefaultingMap<>((k) -> new ArrayList<>(), true); + final Map> alleleHaplotypeMap = new CollectionUtil.DefaultingMap<>((k) -> new ArrayList<>(), true); final Set haplotypes = new HashSet<>(readLikelihoods.alleles()); - readLikelihoods.alleles().stream().forEach(h -> getAlleles(h).stream().filter(al -> alleles.contains(al)).filter(al -> !al.isReference()).forEach( + readLikelihoods.alleles().stream().forEach(h -> h.getEventMap().getEvents().stream().filter(al -> alleles.contains(al)).forEach( jh -> alleleHaplotypeMap.get(jh).add(h)) ); - final List allAlleles = new ArrayList<>(alleleHaplotypeMap.keySet()); + final List allAlleles = new ArrayList<>(alleleHaplotypeMap.keySet()); final List> initialAlleleLikelihoods = allAlleles.stream().map(c -> getAlleleLikelihoodMatrix(readLikelihoods, c, haplotypeAlleleMap, haplotypes)).collect(Collectors.toList()); final List initialRPLs = IntStream.range(0, allAlleles.size()).mapToObj(i -> getAlleleLikelihoodVsInverse(initialAlleleLikelihoods.get(i), - allAlleles.get(i))).collect(Collectors.toList()); + allAlleles.get(i).altAllele())).collect(Collectors.toList()); for (int i = 0 ; i < allAlleles.size(); i++) { initialRPLsMap.put(allAlleles.get(i), initialRPLs.get(i)); } - final Map> result = new HashMap<>(); - for ( final AlleleAndContext alleleToDisable : allAlleles) { - Map rplsWithoutAlleleMap = getInteractionVector(alleleToDisable, haplotypeAlleleMap, alleleHaplotypeMap, readLikelihoods, initialRPLsMap); + final Map> result = new HashMap<>(); + for ( final Event alleleToDisable : allAlleles) { + Map rplsWithoutAlleleMap = getInteractionVector(alleleToDisable, haplotypeAlleleMap, alleleHaplotypeMap, readLikelihoods, initialRPLsMap); result.put(alleleToDisable, rplsWithoutAlleleMap); } @@ -516,16 +504,16 @@ private Map> getInteractionMatr } // function to create interaction of a single allele with other alleles - private Map getInteractionVector( - final AlleleAndContext alleleToDisable, - final Map> haplotypeAlleleMap, - final Map> alleleHaplotypeMap, + private Map getInteractionVector( + final Event alleleToDisable, + final Map> haplotypeAlleleMap, + final Map> alleleHaplotypeMap, final AlleleLikelihoods readLikelihoods, - final Map initialRPLsMap) { + final Map initialRPLsMap) { - final Set allAlleles = initialRPLsMap.keySet(); - final List allelesWithoutDisabledAllele = allAlleles.stream().filter(al -> al!=alleleToDisable).collect(Collectors.toList()); + final Set allAlleles = initialRPLsMap.keySet(); + final List allelesWithoutDisabledAllele = allAlleles.stream().filter(al -> al!=alleleToDisable).collect(Collectors.toList()); final Set haplotypes = haplotypeAlleleMap.keySet(); final Set haplotypesWithoutDisabledAllele = haplotypes.stream().filter( h -> !alleleHaplotypeMap.get(alleleToDisable).contains(h)).collect(Collectors.toSet()); @@ -533,23 +521,23 @@ private Map getInteractionVector( allelesWithoutDisabledAllele.stream().map(c -> getAlleleLikelihoodMatrix(readLikelihoods, c, haplotypeAlleleMap, haplotypesWithoutDisabledAllele)).collect(Collectors.toList()); final List rplsWithoutAllele = IntStream.range(0, allelesWithoutDisabledAllele.size()).mapToObj(i -> getAlleleLikelihoodVsInverse(disabledAlleleLikelihood.get(i), - allelesWithoutDisabledAllele.get(i))).collect(Collectors.toList()); + allelesWithoutDisabledAllele.get(i).altAllele())).collect(Collectors.toList()); - final Map rplsWithoutAlleleMap = new HashMap<>(); + final Map rplsWithoutAlleleMap = new HashMap<>(); IntStream.range(0, allelesWithoutDisabledAllele.size()).forEach( i -> rplsWithoutAlleleMap.put(allelesWithoutDisabledAllele.get(i), rplsWithoutAllele.get(i))); return rplsWithoutAlleleMap; } - private DefaultDirectedWeightedGraph interactionMatrixToGraph(final Map> interactionMatrix, - final Map initialRPL ){ - final DefaultDirectedWeightedGraph result = new DefaultDirectedWeightedGraph<>(DefaultWeightedEdge.class); + private DefaultDirectedWeightedGraph interactionMatrixToGraph(final Map> interactionMatrix, + final Map initialRPL ){ + final DefaultDirectedWeightedGraph result = new DefaultDirectedWeightedGraph<>(DefaultWeightedEdge.class); initialRPL.keySet().stream().forEach(x -> result.addVertex(x)); - for ( final AlleleAndContext loc1 : interactionMatrix.keySet() ) { - for ( final AlleleAndContext loc2 : interactionMatrix.get(loc1).keySet()){ + for ( final Event loc1 : interactionMatrix.keySet() ) { + for ( final Event loc2 : interactionMatrix.get(loc1).keySet()){ final int diff = interactionMatrix.get(loc1).get(loc2) - initialRPL.get(loc2); if (diff < 0){ final DefaultWeightedEdge edge = result.addEdge(loc1, loc2); @@ -561,18 +549,18 @@ private DefaultDirectedWeightedGraph inte } //debug function - prints dot file with edges between the alleles that affect each other - void printInteractionGraph(final DefaultDirectedWeightedGraph intm, - final Map rpls, - final Set alleleSet ){ - final IntegerComponentNameProvider p1 = new IntegerComponentNameProvider<>(); - final ComponentNameProvider p2 = (v -> v.toString() + " = " + rpls.get(v)) ; + void printInteractionGraph(final DefaultDirectedWeightedGraph intm, + final Map rpls, + final Set alleleSet ){ + final IntegerComponentNameProvider p1 = new IntegerComponentNameProvider<>(); + final ComponentNameProvider p2 = (v -> v.toString() + " = " + rpls.get(v)) ; final ComponentNameProvider p4 = (e -> String.valueOf(intm.getEdgeWeight(e))); - final DOTExporter dotExporter = new DOTExporter<>(p1, p2, p4, + final DOTExporter dotExporter = new DOTExporter<>(p1, p2, p4, null, null); final String contig = alleleSet.iterator().next().getContig(); - final int rangeStart = alleleSet.stream().mapToInt(al -> al.getLoc()).min().getAsInt(); - final int rangeEnd = alleleSet.stream().mapToInt(al -> al.getLoc()).max().getAsInt(); + final int rangeStart = alleleSet.stream().mapToInt(al -> al.getStart()).min().getAsInt(); + final int rangeEnd = alleleSet.stream().mapToInt(al -> al.getStart()).max().getAsInt(); try { final Writer outfile = new FileWriter(String.format("allele.interaction.%s.%d-%d.dot", contig, rangeStart, rangeEnd)); dotExporter.exportGraph(intm, outfile); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java index c02f5bde71b..5e711ac285c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java @@ -9,16 +9,16 @@ import htsjdk.samtools.util.Locatable; import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.VCFConstants; -import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.mutable.MutableInt; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.Logger; -import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy; import org.broadinstitute.gatk.nativebindings.smithwaterman.SWParameters; import org.broadinstitute.hellbender.engine.AlignmentContext; import org.broadinstitute.hellbender.engine.AssemblyRegion; import org.broadinstitute.hellbender.exceptions.UserException; -import org.broadinstitute.hellbender.tools.walkers.ReferenceConfidenceVariantContextMerger; +import org.broadinstitute.hellbender.tools.FlowBasedArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; +import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.QualityUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; @@ -29,12 +29,13 @@ import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; import org.broadinstitute.hellbender.utils.genotyper.IndexedAlleleList; import org.broadinstitute.hellbender.utils.genotyper.SampleList; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.haplotype.HaplotypeBAMWriter; import org.broadinstitute.hellbender.utils.haplotype.PartiallyDeterminedHaplotype; import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState; -import org.broadinstitute.hellbender.utils.logging.OneShotLogger; + import org.broadinstitute.hellbender.utils.pileup.ReadPileup; import org.broadinstitute.hellbender.utils.read.*; import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner; @@ -61,6 +62,7 @@ public final class AssemblyBasedCallerUtils { public static final String ALIGNMENT_REGION_TAG = "AR"; public static final String EXT_COLLAPSED_TAG = "XC"; public static final String EXT_SPECIAL_TAG = "XS"; // added to haplotype to assist in reading them back in with all fields restored. + public static final Function HAPLOTYPE_ALIGNMENT_TIEBREAKING_PRIORITY = h -> { final Cigar cigar = h.getCigar(); final int referenceTerm = (h.isReference() ? 1 : 0); @@ -73,8 +75,6 @@ public final class AssemblyBasedCallerUtils { // get realigned incorrectly. See https://github.com/broadinstitute/gatk/issues/5060 public static final int MINIMUM_READ_LENGTH_AFTER_TRIMMING = 10; - public static final OneShotLogger haplotypeDeletionWarningLogger = new OneShotLogger(AssemblyBasedCallerUtils.class); - // Phase group notation can be interpreted as a representation of the alleles present on the two phased haplotypes at the site: // "0": REF or '*'; "1": site-specific alt allele enum PhaseGroup { @@ -428,215 +428,21 @@ private static int determineFlowAssemblyColapseHmer(SAMFileHeader readsHeader) { return result; } - /** - * Helper method that handles the actual "GGA-like" Merging of haplotype alleles into an assembly result set. - * - * First this method will filter out haplotypes that contain alleles that have failed the pileup calling filtering steps, - * Then the list will attempt to poke into the haplotype list artificial haplotypes that have the found alleles present. - * - * @param region - * @param argumentCollection - * @param aligner - * @param refHaplotype - * @param assemblyResultSet - * @param pileupAllelesFoundShouldFilter - * @param pileupAllelesPassingFilters - * @return - */ - public static AssemblyResultSet applyPileupEventsAsForcedAlleles(final AssemblyRegion region, final AssemblyBasedCallerArgumentCollection argumentCollection, - final SmithWatermanAligner aligner, final Haplotype refHaplotype, - final AssemblyResultSet assemblyResultSet, final List pileupAllelesFoundShouldFilter, - final List pileupAllelesPassingFilters, final boolean debug) { - List haplotypesWithFilterAlleles = new ArrayList<>(); - // IF we find pileup alleles that we want to filter... AND we are not generating the partially determined haplotypes, - // we resort to a messy approach where we filter alleles by throwing away every haplotype supporting an allele. This is - // very dangerous since this could easily destroy phased variants with the haplotype. - if (!pileupAllelesFoundShouldFilter.isEmpty() && !argumentCollection.pileupDetectionArgs.generatePDHaplotypes) { - // TODO this is a bad algorithm for bad people - for(VariantContext delVariant : pileupAllelesFoundShouldFilter) { - for (Haplotype hap : assemblyResultSet.getHaplotypeList()) { - if (hap.getEventMap()==null) { - // NOTE: This check should be a reasonable sanity check on the inputs. However, there edge cases in the assembly engine + SW realignment that can cause - // haplotypes to re-merge into the reference and end up with an empty event map. (Also the event map code explicitly expects to fail in some instances) - // thus we can't enforce that the haplotypes no variants. -// if (!hap.isReference()) { -// //throw new RuntimeException("empty event map for haplotype" + hap); -// } - } else { - if (hap.getEventMap().getVariantContexts().stream().anyMatch(v -> v.getStart() == delVariant.getStart() - && delVariant.getReference().equals(v.getReference()) - && delVariant.getAlternateAllele(0).equals(v.getAlternateAllele(0)))) { - if (argumentCollection.pileupDetectionArgs.debugPileupStdout) System.err.println("Flagging hap " + hap + " for containing variant " + delVariant); - haplotypesWithFilterAlleles.add(hap); - } - } - } - } - } - // TODO removing haplotypes whole-cloth is messy and will likely lead to errors and dropped variants in this code - if (!haplotypesWithFilterAlleles.isEmpty()) { - if (debug) System.out.println("Found Assembly Haps with filtered Variants:\n"+haplotypesWithFilterAlleles.stream().map(Haplotype::toString).collect(Collectors.joining("\n"))); - - haplotypeDeletionWarningLogger.warn(() -> "Haplotypes from Assembly are being filtered by heuristics from the PileupCaller. This can lead to missing variants. See --"+PileupDetectionArgumentCollection.PILEUP_DETECTION_FILTER_ASSEMBLY_HAPS_THRESHOLD+" for more details"); - - for (Haplotype hap : haplotypesWithFilterAlleles) { - assemblyResultSet.removeHapltotype(hap); - } - } - if (!pileupAllelesPassingFilters.isEmpty()) { - processPileupAlleles(region, pileupAllelesPassingFilters, argumentCollection.maxMnpDistance, argumentCollection.pileupDetectionArgs.snpAdajacentToAssemblyIndel, aligner, refHaplotype, assemblyResultSet, argumentCollection.pileupDetectionArgs.numHaplotypesToIterate, argumentCollection.pileupDetectionArgs.filteringKmerSize, argumentCollection.getHaplotypeToReferenceSWParameters(), debug); - } - return assemblyResultSet; - } - - /** - * Handle pileup detected alternate alleles. - */ - @VisibleForTesting - @SuppressWarnings("deprecation") - static void processPileupAlleles(final AssemblyRegion region, final List pileupVC, final int maxMnpDistance, - final int snpAdjacentToIndelLimit, final SmithWatermanAligner aligner, final Haplotype refHaplotype, - final AssemblyResultSet assemblyResultSet, final int numHaplotypesPerIteration, final int hapFilteringKmerSize, - final SWParameters haplotypeToReferenceSWParameters, final boolean debug) { - final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef(); - final Map assembledVariants = assemblyResultSet.getVariationEvents(maxMnpDistance).stream() - .collect(Collectors.groupingBy(VariantContext::getStart, Collectors.collectingAndThen(Collectors.toList(), AssemblyBasedCallerUtils::makeMergedVariantContext))); - final Collection assembledIndels = assemblyResultSet.getVariationEvents(maxMnpDistance).stream().filter(VariantContext::isIndel) - .collect(Collectors.groupingBy(VariantContext::getStart, Collectors.collectingAndThen(Collectors.toList(), AssemblyBasedCallerUtils::makeMergedVariantContext))).values(); - - Set baseHaplotypes = new TreeSet<>(); - baseHaplotypes.addAll(assemblyResultSet.getHaplotypeList().stream() - .sorted(Comparator.comparingInt((Haplotype hap) -> hap.isReference() ? 1 : 0).thenComparingDouble(hap -> hap.getScore()).reversed()) - .limit(NUM_HAPLOTYPES_TO_INJECT_FORCE_CALLING_ALLELES_INTO) - .collect(Collectors.toList())); - - //TODO its unclear whether the correct answer here is to use the hardclipped pileup reads (which we used in generating the pileup alleles for specificty reasons) - //TODO or if it would be more accurate to use the softclipped bases here in filtering down the haplotypes. I suspect the latter but I will evaluate later. - Map kmerReadCounts = getKmerReadCounts(region.getHardClippedPileupReads(), hapFilteringKmerSize); - - // Remove SNPs that are too close to assembled indels. - final List givenAllelesFiltered = pileupVC.stream().filter(vc -> vc.isIndel() || assembledIndels.stream().noneMatch(indel -> vc.withinDistanceOf(indel, snpAdjacentToIndelLimit))).collect(Collectors.toList()); - - for (final VariantContext givenVC : givenAllelesFiltered) { - final VariantContext assembledVC = assembledVariants.get(givenVC.getStart()); - final int givenVCRefLength = givenVC.getReference().length(); - final Allele longerRef = (assembledVC == null || givenVCRefLength > assembledVC.getReference().length()) ? givenVC.getReference() : assembledVC.getReference(); - final List unassembledGivenAlleles = getAllelesNotPresentInAssembly(givenVC, assembledVC, givenVCRefLength, longerRef); - - final List unassembledNonSymbolicAlleles = unassembledGivenAlleles.stream().filter(a -> { - final byte[] bases = a.getBases(); - return !(Allele.wouldBeNoCallAllele(bases) || Allele.wouldBeNullAllele(bases) || Allele.wouldBeStarAllele(bases) || Allele.wouldBeSymbolicAllele(bases)); - }).collect(Collectors.toList()); - - final List newPileupHaplotypes = new ArrayList<>(); - for (final Allele givenAllele : unassembledNonSymbolicAlleles) { - if (debug) System.out.println("Processing new Haplotypes for Pileup Allele that was not in the assembly: "+givenVC); - for (final Haplotype baseHaplotype : baseHaplotypes) { - // make sure this allele doesn't collide with a variant on the haplotype - if (baseHaplotype.getEventMap() == null || baseHaplotype.getEventMap().getVariantContexts().stream().noneMatch(vc -> vc.overlaps(givenVC))) { - // BG & AH this is the right way to insert a new haplotype - final Haplotype insertedHaplotype = baseHaplotype.insertAllele(longerRef, givenAllele, givenVC.getStart()); - if (insertedHaplotype != null) { // can be null if the requested allele can't be inserted into the haplotype - final Cigar cigar = CigarUtils.calculateCigar(refHaplotype.getBases(), insertedHaplotype.getBases(), aligner, haplotypeToReferenceSWParameters, SWOverhangStrategy.INDEL); - insertedHaplotype.setCigar(cigar); - insertedHaplotype.setGenomeLocation(refHaplotype.getGenomeLocation()); - insertedHaplotype.setAlignmentStartHapwrtRef(activeRegionStart); - - // and add to our internal list so we get haplotypes that contain all given alleles - // do we want a flag to control this behavior - newPileupHaplotypes.add(insertedHaplotype); - } - } - - } - } - - Set refactoredHaps = filterPileupHaplotypes(newPileupHaplotypes, kmerReadCounts, numHaplotypesPerIteration, hapFilteringKmerSize); - baseHaplotypes.addAll(refactoredHaps); - if (debug) System.out.println("Constructed the following new Pileup Haplotypes after filtering:\n"+refactoredHaps.stream().map(Haplotype::toString).collect(Collectors.joining("\n"))); - - } - baseHaplotypes.forEach(assemblyResultSet::add); - assemblyResultSet.regenerateVariationEvents(maxMnpDistance); - } - - @VisibleForTesting - public static void addGivenAlleles(final List givenAlleles, final int maxMnpDistance, final SmithWatermanAligner aligner, - final SWParameters haplotypeToReferenceSWParameters, final AssemblyResultSet assemblyResultSet) { - if (givenAlleles.isEmpty()) { - return; - } - final Haplotype refHaplotype = assemblyResultSet.getReferenceHaplotype(); - final Map assembledVariants = assemblyResultSet.getVariationEvents(maxMnpDistance).stream() - .collect(Collectors.groupingBy(VariantContext::getStart, Collectors.collectingAndThen(Collectors.toList(), AssemblyBasedCallerUtils::makeMergedVariantContext))); - - final List assembledHaplotypes = assemblyResultSet.getHaplotypeList(); - for (final VariantContext givenVC : givenAlleles) { - final VariantContext assembledVC = assembledVariants.get(givenVC.getStart()); - final int givenVCRefLength = givenVC.getReference().length(); - final Allele longerRef = (assembledVC == null || givenVCRefLength > assembledVC.getReference().length()) ? givenVC.getReference() : assembledVC.getReference(); - final List unassembledGivenAlleles = getAllelesNotPresentInAssembly(givenVC, assembledVC, givenVCRefLength, longerRef); - - final List unassembledNonSymbolicAlleles = unassembledGivenAlleles.stream() - //TODO, update the null allele check when htsjdk adds a NULL_ALLELE constant to Allele - .filter(a -> !(a.equals(Allele.NO_CALL) || a.getDisplayString().equals(String.valueOf(VCFConstants.NULL_ALLELE)) || a.equals(Allele.SPAN_DEL) || a.isSymbolic())) - .collect(Collectors.toList()); - - // choose the highest-scoring haplotypes along with the reference for building force-calling haplotypes - final List baseHaplotypes = unassembledNonSymbolicAlleles.isEmpty() ? Collections.emptyList() : assembledHaplotypes.stream() - .sorted(Comparator.comparingInt((Haplotype hap) -> hap.isReference() ? 1 : 0).thenComparingDouble(hap -> hap.getScore()).reversed()) - .limit(NUM_HAPLOTYPES_TO_INJECT_FORCE_CALLING_ALLELES_INTO) - .collect(Collectors.toList()); - - for (final Allele givenAllele : unassembledNonSymbolicAlleles) { - for (final Haplotype baseHaplotype : baseHaplotypes) { - // make sure this allele doesn't collide with a variant on the haplotype - if (baseHaplotype.getEventMap()!= null && baseHaplotype.getEventMap().getVariantContexts().stream().anyMatch(vc -> vc.overlaps(givenVC))) { - continue; - } - - final Haplotype insertedHaplotype = baseHaplotype.insertAllele(longerRef, givenAllele, givenVC.getStart()); - if (insertedHaplotype != null) { // can be null if the requested allele can't be inserted into the haplotype - final Cigar cigar = CigarUtils.calculateCigar(refHaplotype.getBases(), insertedHaplotype.getBases(), aligner, haplotypeToReferenceSWParameters, SWOverhangStrategy.INDEL); - insertedHaplotype.setCigar(cigar); - insertedHaplotype.setGenomeLocation(refHaplotype.getGenomeLocation()); - insertedHaplotype.setAlignmentStartHapwrtRef(refHaplotype.getAlignmentStartHapwrtRef()); - assemblyResultSet.add(insertedHaplotype); - } - } - } - } - assemblyResultSet.regenerateVariationEvents(maxMnpDistance); - } - - private static List getAllelesNotPresentInAssembly(VariantContext givenVC, VariantContext assembledVC, int givenVCRefLength, Allele longerRef) { - List unassembledGivenAlleles; - if (assembledVC == null) { - unassembledGivenAlleles = givenVC.getAlternateAlleles(); - } else { - // map all alleles to the longest common reference - final Set assembledAlleleSet = new HashSet<>(longerRef.length() == assembledVC.getReference().length() ? assembledVC.getAlternateAlleles() : - ReferenceConfidenceVariantContextMerger.remapAlleles(assembledVC, longerRef)); - final Set givenAlleleSet = new HashSet<>(longerRef.length() == givenVCRefLength ? givenVC.getAlternateAlleles() : - ReferenceConfidenceVariantContextMerger.remapAlleles(givenVC, longerRef)); - //We must filter out the Ref alleles here to protect against edge cases and undexpected behavior from the pileupcalling code - unassembledGivenAlleles = givenAlleleSet.stream().filter(a -> !assembledAlleleSet.contains(a)).filter(a -> !a.isReference()).collect(Collectors.toList()); - } - return unassembledGivenAlleles; - } - /** * Returns a map of kmer -> count of total unique occurrences across all of the provided reads. This is a necessary step - * in the {@link AssemblyBasedCallerUtils#processPileupAlleles} pileup - * haplotype filtering. + * in the {@link AssemblyResultSet#injectPileupEvents} pileup haplotype filtering. * * @param hardClippedPileupReads Reads to scan to genreate kmer counts from * @param kmerSize kmer size to use in kmerizing the reads * @return a map of kmer to the number of occurences in the data. */ - static Map getKmerReadCounts(final List hardClippedPileupReads, int kmerSize) { - Map kmerReadCounts = new HashMap<>(); - hardClippedPileupReads.forEach(read -> kmerizeAndCountOccurences(read.getBases(), kmerSize, kmerReadCounts)); + static Map getKmerReadCounts(final List hardClippedPileupReads, int kmerSize) { + Map kmerReadCounts = new HashMap<>(); + for (final GATKRead read : hardClippedPileupReads) { + final byte[] bases = read.getBasesNoCopy(); + new IndexRange(0, Math.max(bases.length - kmerSize + 1, 0)) + .forEach(i -> kmerReadCounts.computeIfAbsent(new Kmer(bases, i, kmerSize), k -> new MutableInt(0)).increment()); + } return kmerReadCounts; } @@ -650,63 +456,28 @@ static Map getKmerReadCounts(final List hardClippedPil * @return A set of artificial haplotypes limited to at most numPileupHaplotypes */ @VisibleForTesting - static Set filterPileupHaplotypes(final List onlyNewHaplotypes, - final Map kmerReadCounts, + static Set filterPileupHaplotypes(final Set onlyNewHaplotypes, + final Map kmerReadCounts, final int numPileupHaplotypes, final int kmerSize ) { - - // Get haplotypes from assemblyResultSet and kmerize. for each haplotype create a set of kmers. - // for each haplotype, look up the kmers in the read-map and sum the counts fo the haplotype score - // create a Map - LinkedHashMap haplotypeScores = new LinkedHashMap<>(); - for (Haplotype haplotype : onlyNewHaplotypes) { - // TODO this code might use some normalizing based on haplotype length in the future - Set hapKmers = kmerizeBytes(haplotype.getBases(), kmerSize); - int hapKmerCount = 0; - for(Kmer kmer : hapKmers) { - hapKmerCount += kmerReadCounts.containsKey(kmer) ? 1 : 0; - } - - haplotypeScores.put(haplotype, hapKmerCount); + if (onlyNewHaplotypes.size() <= numPileupHaplotypes) { + return onlyNewHaplotypes; // if the limiting haplotype count is more haplotypes than we have, everything passes } - // Select the top haplotypes in order of how many of their kmers have supports in the underlying reads - Map topScoringHaplotypes = - haplotypeScores.entrySet().stream() - .sorted(Collections.reverseOrder(Map.Entry.comparingByValue())) - .limit(numPileupHaplotypes) - .collect(Collectors.toMap( - Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new)); + // sort by score = # of kmers in haplotype that appear in any read + // TODO this code might use some normalizing based on haplotype length in the future + final Map scores = onlyNewHaplotypes.stream() + .collect(Collectors.toMap(h -> h, h -> kmerizeSequence(h.getBases(), kmerSize).stream() + .filter(kmer -> kmerReadCounts.getOrDefault(kmer, new MutableInt(0)).intValue() > 0) + .count())); - return topScoringHaplotypes.keySet(); + // if there are ties, we pass any haplotype with a score as good as the numPileupHaplotypes-th best + final long minimumPassingScore = scores.values().stream().sorted(Comparator.reverseOrder()).skip(numPileupHaplotypes - 1).findFirst().get(); + return onlyNewHaplotypes.stream().filter(h -> scores.get(h) >= minimumPassingScore).collect(Collectors.toSet()); } - /** A utility method that increments a counter-map - * or sets the counter to 1 if the key is missing. - * - * @param map a Map - * @param key The Key to increment - * @param the type that Maps to an Integer - * - */ - synchronized private static void increment(Map map, Type key){ - if (!map.containsKey(key)) { - map.put(key, 1); - } else { - map.put(key, map.get(key) + 1); - } - } - static void kmerizeAndCountOccurences(byte[] sequence, int kmerSize, Map results){ - final int stopPosition = sequence.length - kmerSize; - for (int i = 0; i <= stopPosition; i++) { - final Kmer kmer = new Kmer(sequence, i, kmerSize); - increment(results, kmer); - } - } - - - static Set kmerizeBytes(byte[] sequence, int kmerSize){ + private static Set kmerizeSequence(byte[] sequence, int kmerSize){ final Set allKmers = new LinkedHashSet<>(); final int stopPosition = sequence.length - kmerSize; for (int i = 0; i <= stopPosition; i++) { @@ -821,51 +592,6 @@ public static List getPileupsOverReference(final SAMFileHeader reads return pileups; } - /** - * Returns the list of given alleles active at this location. This method will include events that span the current - * location if includeSpanningEvents is set to true; otherwise it will only include events that have loc as their \ - * start position. - * @param loc The start position we are genotyping - * @param activeAllelesToGenotype The list of given alleles for the current active region, empty unless we are in GGA mode - * @param includeSpanningEvents If true, will also return events that span loc - */ - public static List getVariantContextsFromGivenAlleles(final int loc, - final List activeAllelesToGenotype, - final boolean includeSpanningEvents) { - final Set uniqueLocationsAndAlleles = new HashSet<>(); - final List results = new ArrayList<>(); - - int givenAlleleSourceCount = 0; - for( final VariantContext givenAlleleVC : activeAllelesToGenotype ) { - if( givenAlleleVC.getStart() <= loc && givenAlleleVC.getEnd() >= loc) { - if (! (includeSpanningEvents || givenAlleleVC.getStart() == loc)) { - continue; - } - int alleleCount = 0; - for( final Allele givenAltAllele : givenAlleleVC.getAlternateAlleles() ) { - final List alleleSet = Arrays.asList(givenAlleleVC.getReference(), givenAltAllele); - - //TODO: this source name seems arbitrary and probably just has to be unique - //TODO: how about replace it by vcSourceName = String.parseInt(nameCounter++)? - final String vcSourceName = "Comp" + givenAlleleSourceCount + "Allele" + alleleCount; - // check if this event is already in the list of events due to a repeat in the input alleles track - final VariantContext candidateEventToAdd = new VariantContextBuilder(givenAlleleVC).alleles(alleleSet) - .genotypes(GenotypesContext.NO_GENOTYPES).source(vcSourceName).make(); - - final LocationAndAlleles locationAndAlleles = new LocationAndAlleles(candidateEventToAdd.getStart(), candidateEventToAdd.getAlleles()); - if (! uniqueLocationsAndAlleles.contains(locationAndAlleles)) { - uniqueLocationsAndAlleles.add(locationAndAlleles); - results.add(candidateEventToAdd); - } - - alleleCount++; - } - } - givenAlleleSourceCount++; - } - return results; - } - /** * Returns the list of events discovered in assembled haplotypes that are active at this location. The results will @@ -875,24 +601,23 @@ public static List getVariantContextsFromGivenAlleles(final int * @param haplotypes list of active haplotypes at the current location * @param includeSpanningEvents If true, will also return events that span loc */ - public static List getVariantContextsFromActiveHaplotypes(final int loc, - final List haplotypes, - final boolean includeSpanningEvents) { - final List results = new ArrayList<>(); - final Set uniqueLocationsAndAlleles = new HashSet<>(); - - haplotypes.stream() - .flatMap(h -> Utils.stream(h.getEventMap().getOverlappingEvents(loc))) - .filter(Objects::nonNull) - .filter(v -> (includeSpanningEvents || v.getStart() == loc)) - .forEach(v -> { - final LocationAndAlleles locationAndAlleles = new LocationAndAlleles(v.getStart(), v.getAlleles()); - if (! uniqueLocationsAndAlleles.contains(locationAndAlleles)) { - uniqueLocationsAndAlleles.add(locationAndAlleles); - results.add(v); - } - }); - return results; + public static List getEventsFromActiveHaplotypes(final int loc, final List haplotypes, final boolean includeSpanningEvents) { + final Set events = new HashSet<>(); + final List result = new ArrayList<>(); + + int hapNumber = 0; + for (final Haplotype haplotype : haplotypes) { + final String sourceName = "HC" + hapNumber++; + for (final Event event : haplotype.getEventMap().getOverlappingEvents(loc)) { + if (event == null || (!includeSpanningEvents && event.getStart() != loc)) { + continue; + } else if (events.add(event)) { + result.add(event.asVariantContext(sourceName)); + } + } + } + + return result; } /** @@ -924,30 +649,30 @@ public static Map> createAlleleMapper(final VariantConte if (h.isPartiallyDetermined() && ((PartiallyDeterminedHaplotype) h).getDeterminedPosition() != loc) { continue; } - final List spanningEvents = h.getEventMap().getOverlappingEvents(loc); + final List overlappingEvents = h.getEventMap().getOverlappingEvents(loc); - if (spanningEvents.isEmpty()) { //no events --> this haplotype supports the reference at this locus + if (overlappingEvents.isEmpty()) { //no events --> this haplotype supports the reference at this locus result.get(ref).add(h); continue; } - for (VariantContext spanningEvent : spanningEvents) { - if (spanningEvent.getStart() == loc) { + for (Event overlappingEvent : overlappingEvents) { + if (overlappingEvent.getStart() == loc) { // the event starts at the current location - if (spanningEvent.getReference().length() == mergedVC.getReference().length()) { + if (overlappingEvent.refAllele().length() == mergedVC.getReference().length()) { // reference allele lengths are equal; we can just use the spanning event's alt allele // in the case of GGA mode the spanning event might not match an allele in the mergedVC - if (result.containsKey(spanningEvent.getAlternateAllele(0))) { + if (result.containsKey(overlappingEvent.altAllele())) { // variant contexts derived from the event map have only one alt allele each, so we can just // grab the first one (we're not assuming that the sample is biallelic) - result.get(spanningEvent.getAlternateAllele(0)).add(h); + result.get(overlappingEvent.altAllele()).add(h); } - } else if (spanningEvent.getReference().length() < mergedVC.getReference().length()) { + } else if (overlappingEvent.refAllele().length() < mergedVC.getReference().length()) { // spanning event has shorter ref allele than merged VC; we need to pad out its alt allele final Map spanningEventAlleleMappingToMergedVc - = GATKVariantContextUtils.createAlleleMapping(mergedVC.getReference(), spanningEvent); - final Allele remappedSpanningEventAltAllele = spanningEventAlleleMappingToMergedVc.get(spanningEvent.getAlternateAllele(0)); + = GATKVariantContextUtils.createAlleleMapping(mergedVC.getReference(), overlappingEvent.refAllele(), Collections.singletonList(overlappingEvent.altAllele())); + final Allele remappedSpanningEventAltAllele = spanningEventAlleleMappingToMergedVc.get(overlappingEvent.altAllele()); // in the case of GGA mode the spanning event might not match an allele in the mergedVC if (result.containsKey(remappedSpanningEventAltAllele)) { result.get(remappedSpanningEventAltAllele).add(h); @@ -1021,9 +746,9 @@ static Map> constructHaplotypeMapping(final List< // keep track of the haplotypes that contain this particular alternate allele final Allele alt = getSiteSpecificAlternateAllele(call); - final Predicate hasThisAlt = vc -> (vc.getStart() == call.getStart() && vc.getAlternateAlleles().contains(alt)); + final Predicate hasThisAlt = vc -> (vc.getStart() == call.getStart() && vc.altAllele().equals(alt)); final Set hapsWithAllele = calledHaplotypes.stream() - .filter(h -> h.getEventMap().getVariantContexts().stream().anyMatch(hasThisAlt)) + .filter(h -> h.getEventMap().getEvents().stream().anyMatch(hasThisAlt)) .collect(Collectors.toCollection(HashSet::new)); haplotypeMap.put(call, hapsWithAllele); @@ -1263,31 +988,21 @@ private static VariantContext phaseVC(final VariantContext vc, final String ID, return new VariantContextBuilder(vc).genotypes(phasedGenotypes).make(); } - public static Set getAllelesConsistentWithGivenAlleles(final List givenAlleles, final VariantContext mergedVC) { - if (givenAlleles.isEmpty()) { - return Collections.emptySet(); - } - - final List> givenAltAndRefAllelesInOriginalContext = getVariantContextsFromGivenAlleles(mergedVC.getStart(), givenAlleles, false).stream() - .flatMap(vc -> vc.getAlternateAlleles().stream().map(allele -> ImmutablePair.of(allele, vc.getReference()))).collect(Collectors.toList()); - - return mergedVC.getAlternateAlleles().stream() - .map(allele -> ImmutablePair.of(allele, mergedVC.getReference())) - .filter(altAndRef -> givenAltAndRefAllelesInOriginalContext.stream().anyMatch(givenAltAndRef -> allelesAreConsistent(givenAltAndRef, altAndRef))) - .map(altAndRefPair -> altAndRefPair.getLeft()) + // find all alleles in a VariantContext that encode an equivalent ref -> alt as at least one given event + public static Set allelesConsistentWithGivenAlleles(final Collection givenAlleles, final VariantContext mergedVC) { + return givenAlleles.isEmpty() ? Collections.emptySet() : mergedVC.getAlternateAlleles().stream() + .filter(allele -> givenAlleles.stream().anyMatch(ga -> allelesAreConsistent(ga.refAllele(), ga.altAllele(), mergedVC.getReference(), allele))) .collect(Collectors.toSet()); } // check whether two alleles coming from different variant contexts and with possibly different reference alleles // could in fact be the same. The condition is that one is a prefix of the other - private static boolean allelesAreConsistent(final Pair altAndRef1, final Pair altAndRef2) { - final Allele alt1 = altAndRef1.getLeft(); - final Allele alt2 = altAndRef2.getLeft(); + private static boolean allelesAreConsistent(final Allele ref1, final Allele alt1, final Allele ref2, final Allele alt2) { if (alt1.isSymbolic() || alt2.isSymbolic()) { return false; } else { - final int sizeDiff1 = alt1.length() - altAndRef1.getRight().length(); - final int sizeDiff2 = alt2.length() - altAndRef2.getRight().length(); + final int sizeDiff1 = alt1.length() - ref1.length(); + final int sizeDiff2 = alt2.length() - ref2.length(); return (sizeDiff1 == sizeDiff2) && (alt1.length() < alt2.length() ? alt1.basesMatch(Arrays.copyOf(alt2.getBases(), alt1.length())) : alt2.basesMatch(Arrays.copyOf(alt1.getBases(), alt2.length()))); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyRegionTrimmer.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyRegionTrimmer.java index 20f2a7cb1c0..fe7602c1602 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyRegionTrimmer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyRegionTrimmer.java @@ -1,7 +1,6 @@ package org.broadinstitute.hellbender.tools.walkers.haplotypecaller; import htsjdk.samtools.SAMSequenceDictionary; -import htsjdk.variant.variantcontext.VariantContext; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -12,8 +11,12 @@ import org.broadinstitute.hellbender.tools.walkers.annotator.TandemRepeat; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.haplotype.Event; -import java.util.*; +import java.util.LinkedList; +import java.util.List; +import java.util.Optional; +import java.util.SortedSet; import java.util.stream.Collectors; /** @@ -30,11 +33,6 @@ public final class AssemblyRegionTrimmer { private SAMSequenceDictionary sequenceDictionary; - /** - * Holds a reference the trimmer logger. - */ - private static final Logger logger = LogManager.getLogger(AssemblyRegionTrimmer.class); - /** * Initializes the trimmer. * @@ -162,31 +160,31 @@ protected Result noVariation(final AssemblyRegion targetRegion) { * can be recovered latter. * * @param region the genome location range to trim. - * @param variants list of variants contained in the trimming location. Variants therein + * @param events list of variants contained in the trimming location. Variants therein * not overlapping with {@code region} are simply ignored. * @param referenceContext * @return never {@code null}. */ - public Result trim(final AssemblyRegion region, final SortedSet variants, ReferenceContext referenceContext) { + public Result trim(final AssemblyRegion region, final SortedSet events, ReferenceContext referenceContext) { if (assemblyRegionArgs.enableLegacyAssemblyRegionTrimming) { - return trimLegacy(region, variants); + return trimLegacy(region, events); } - final List variantsInRegion = variants.stream().filter(region::overlaps).collect(Collectors.toList()); + final List eventsInRegion = events.stream().filter(region::overlaps).collect(Collectors.toList()); - if ( variantsInRegion.isEmpty() ) { + if ( eventsInRegion.isEmpty() ) { return noVariation(region); } - int minStart = variantsInRegion.stream().mapToInt(VariantContext::getStart).min().getAsInt(); - int maxEnd = variantsInRegion.stream().mapToInt(VariantContext::getEnd).max().getAsInt(); + int minStart = eventsInRegion.stream().mapToInt(Event::getStart).min().getAsInt(); + int maxEnd = eventsInRegion.stream().mapToInt(Event::getEnd).max().getAsInt(); final SimpleInterval variantSpan = new SimpleInterval(region.getContig(), minStart, maxEnd).intersect(region); - for (final VariantContext vc : variantsInRegion) { + for (final Event event : eventsInRegion) { int padding = assemblyRegionArgs.snpPaddingForGenotyping; - if (vc.isIndel()) { + if (event.isIndel()) { padding = assemblyRegionArgs.indelPaddingForGenotyping; - final Pair, byte[]> numRepeatsAndUnit = TandemRepeat.getNumTandemRepeatUnits(referenceContext, vc); + final Pair, byte[]> numRepeatsAndUnit = TandemRepeat.getNumTandemRepeatUnits(referenceContext, event); if (numRepeatsAndUnit != null && numRepeatsAndUnit.getRight() != null) { final int repeatLength = numRepeatsAndUnit.getRight() == null ? 0 : numRepeatsAndUnit.getRight().length; final int mostRepeats = numRepeatsAndUnit.getLeft().stream().max(Integer::compareTo).orElse(0); @@ -195,8 +193,8 @@ public Result trim(final AssemblyRegion region, final SortedSet } } - minStart = Math.min(minStart, Math.max(vc.getStart() - padding,1)); - maxEnd = Math.max(maxEnd, vc.getEnd() + padding); + minStart = Math.min(minStart, Math.max(event.getStart() - padding,1)); + maxEnd = Math.max(maxEnd, event.getEnd() + padding); } final SimpleInterval paddedVariantSpan = new SimpleInterval(region.getContig(), minStart, maxEnd).intersect(region.getPaddedSpan()); @@ -213,28 +211,26 @@ public Result trim(final AssemblyRegion region, final SortedSet * can be recovered latter. * * @param originalRegion the genome location range to trim. - * @param allVariantsWithinExtendedRegion list of variants contained in the trimming location. Variants therein + * @param allEventsInExtendedRegion list of variants contained in the trimming location. Variants therein * not overlapping with {@code originalRegion} are simply ignored. * @return never {@code null}. */ - public Result trimLegacy(final AssemblyRegion originalRegion, - final SortedSet allVariantsWithinExtendedRegion) { + public Result trimLegacy(final AssemblyRegion originalRegion, final SortedSet allEventsInExtendedRegion) { - if ( allVariantsWithinExtendedRegion.isEmpty() ) // no variants, - { + if ( allEventsInExtendedRegion.isEmpty() ) { return noVariation(originalRegion); } - final List withinActiveRegion = new LinkedList<>(); + final List withinActiveRegion = new LinkedList<>(); final SimpleInterval originalRegionRange = originalRegion.getSpan(); boolean foundNonSnp = false; SimpleInterval variantSpan = null; - for ( final VariantContext vc : allVariantsWithinExtendedRegion ) { - final SimpleInterval vcLoc = new SimpleInterval(vc); + for ( final Event event : allEventsInExtendedRegion ) { + final SimpleInterval vcLoc = new SimpleInterval(event); if ( originalRegionRange.overlaps(vcLoc) ) { - foundNonSnp = foundNonSnp || ! vc.isSNP(); + foundNonSnp = foundNonSnp || ! event.isSNP(); variantSpan = variantSpan == null ? vcLoc : variantSpan.spanWith(vcLoc); - withinActiveRegion.add(vc); + withinActiveRegion.add(event); } } final int padding = foundNonSnp ? assemblyRegionArgs.indelPaddingForGenotyping : assemblyRegionArgs.snpPaddingForGenotyping; @@ -254,8 +250,6 @@ public Result trimLegacy(final AssemblyRegion originalRegion, // final SimpleInterval callableSpan = emitReferenceConfidence ? variantSpan.intersect(originalRegionRange) : variantSpan; final SimpleInterval callableSpan = variantSpan; - final Pair nonVariantRegions = nonVariantTargetRegions(originalRegion, callableSpan); - // TODO add equivalent debug garbage to the real assembly region trimming code if (HaplotypeCallerGenotypingDebugger.isEnabled()) { HaplotypeCallerGenotypingDebugger.println("events : " + withinActiveRegion); @@ -271,33 +265,4 @@ public Result trimLegacy(final AssemblyRegion originalRegion, return new Result(originalRegion, variantSpan, finalSpan); } - /** - * Calculates the list of region to trim away. - * - * NOTE: This is part of the legacy assembly region trimming code - * - * @param targetRegion region for which to generate the flanking regions. - * @param variantSpan the span of the core region containing relevant variation and required padding. - * @return never {@code null}; 0, 1 or 2 element list. - */ - private Pair nonVariantTargetRegions(final AssemblyRegion targetRegion, final SimpleInterval variantSpan) { - final SimpleInterval targetRegionRange = targetRegion.getSpan(); - final int finalStart = variantSpan.getStart(); - final int finalStop = variantSpan.getEnd(); - - final int targetStart = targetRegionRange.getStart(); - final int targetStop = targetRegionRange.getEnd(); - - final boolean preTrimmingRequired = targetStart < finalStart; - final boolean postTrimmingRequired = targetStop > finalStop; - if (preTrimmingRequired) { - final String contig = targetRegionRange.getContig(); - return postTrimmingRequired ? Pair.of(new SimpleInterval(contig, targetStart, finalStart - 1), new SimpleInterval(contig, finalStop + 1, targetStop)) : - Pair.of(new SimpleInterval(contig, targetStart, finalStart - 1), null); - } else if (postTrimmingRequired) { - return Pair.of(null, new SimpleInterval(targetRegionRange.getContig(), finalStop + 1, targetStop)); - } else { - return Pair.of(null, null); - } - } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyResultSet.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyResultSet.java index 574ea5f562b..07f319fbd27 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyResultSet.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyResultSet.java @@ -1,21 +1,33 @@ package org.broadinstitute.hellbender.tools.walkers.haplotypecaller; +import com.google.common.annotations.VisibleForTesting; +import htsjdk.samtools.Cigar; import htsjdk.samtools.util.Locatable; -import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.vcf.VCFConstants; +import org.apache.commons.lang3.mutable.MutableInt; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy; +import org.broadinstitute.gatk.nativebindings.smithwaterman.SWParameters; import org.broadinstitute.hellbender.engine.AssemblyRegion; +import org.broadinstitute.hellbender.tools.walkers.ReferenceConfidenceVariantContextMerger; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.AbstractReadThreadingGraph; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.EventMap; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; +import org.broadinstitute.hellbender.utils.logging.OneShotLogger; import org.broadinstitute.hellbender.utils.param.ParamUtils; +import org.broadinstitute.hellbender.utils.read.CigarUtils; +import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner; import java.io.PrintWriter; import java.io.StringWriter; import java.util.*; +import java.util.stream.Collectors; /** * Collection of read assembly using several kmerSizes. @@ -32,11 +44,10 @@ * @author Valentin Ruano-Rubio <valentin@broadinstitute.com> */ public final class AssemblyResultSet { - - public static final Comparator HAPLOTYPE_VARIANT_CONTEXT_COMPARATOR = Comparator.comparingInt(VariantContext::getStart) + public static final Comparator HAPLOTYPE_EVENT_COMPARATOR = Comparator.comparingInt(Event::getStart) // Decide arbitrarily so as not to accidentally throw away overlapping variants - .thenComparingInt(vc -> vc.getReference().length()) - .thenComparing(vc -> vc.getAlternateAllele(0)); + .thenComparingInt(e -> e.refAllele().length()) + .thenComparing(e -> e.altAllele()); private final Map assemblyResultByKmerSize; private final Set haplotypes; private Set originalAssemblyHaps = new LinkedHashSet<>(); @@ -47,10 +58,11 @@ public final class AssemblyResultSet { private boolean variationPresent; private Haplotype refHaplotype; private final SortedSet kmerSizes; - private SortedSet variationEvents; + private SortedSet variationEvents; private OptionalInt lastMaxMnpDistanceUsed = OptionalInt.empty(); private boolean debug; private static final Logger logger = LogManager.getLogger(AssemblyResultSet.class); + public static final OneShotLogger haplotypeDeletionWarningLogger = new OneShotLogger(AssemblyBasedCallerUtils.class); private LongHomopolymerHaplotypeCollapsingEngine haplotypeCollapsingEngine; // this is nullable - indicating no collapsing engine (flow-based specific) private boolean isPartiallyDeterminedList = false; @@ -530,7 +542,7 @@ private void updateReferenceHaplotype(final Haplotype newHaplotype) { * at 10-12, a MNP at 14-15, and a SNP at 17. May not be negative. * @return never {@code null}, but perhaps an empty collection. */ - public SortedSet getVariationEvents(final int maxMnpDistance) { + public SortedSet getVariationEvents(final int maxMnpDistance) { ParamUtils.isPositiveOrZero(maxMnpDistance, "maxMnpDistance may not be negative."); final boolean sameMnpDistance = lastMaxMnpDistanceUsed.isPresent() && maxMnpDistance == lastMaxMnpDistanceUsed.getAsInt(); @@ -555,16 +567,9 @@ public void regenerateVariationEvents(int maxMnpDistance) { * @param haplotypes the set of haplotypes to grab the VCs from * @return a sorted set of variant contexts */ - private static SortedSet getAllVariantContexts( final List haplotypes ) { - // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file - final TreeSet vcs = new TreeSet<>( - HAPLOTYPE_VARIANT_CONTEXT_COMPARATOR); - - for( final Haplotype h : haplotypes ) { - vcs.addAll(h.getEventMap().getVariantContexts()); - } - - return vcs; + private static SortedSet getAllVariantContexts(final List haplotypes ) { + return haplotypes.stream().flatMap(h -> h.getEventMap().getEvents().stream()) + .collect(Collectors.toCollection(() -> new TreeSet<>(HAPLOTYPE_EVENT_COMPARATOR))); } public void setDebug(boolean debug) { @@ -580,11 +585,11 @@ public void setHaplotypeCollapsingEngine(LongHomopolymerHaplotypeCollapsingEngin } public void clearHaplotypes() { - haplotypes.clear();; + haplotypes.clear(); refHaplotype = null; } public void replaceAllHaplotypes(Set list) { - haplotypes.clear();; + haplotypes.clear(); refHaplotype = null; for ( Haplotype h : list ) { add(h); @@ -597,8 +602,7 @@ public void replaceAllHaplotypes(Set list) { // For PDHMM use: Remove a haplotype from this AssemblyResultSet and update all of the various references in the // object to that haplotype to be current. // WARNING: Deleting haplotypes in this manner is highly dangerous and will likely lead to lost variants - public void removeHapltotype(final Haplotype hap) { - + private void removeHaplotype(final Haplotype hap) { haplotypes.remove(hap); assemblyResultByHaplotype.remove(hap); for (Integer kmerSize : assemblyResultByKmerSize.keySet()) { @@ -625,4 +629,162 @@ public void storeAssemblyHaplotypes() { public boolean hasOverwrittenHaps() { return !originalAssemblyHaps.isEmpty(); } + + /** + * Remove haplotypes with alleles we wish to filter + * // TODO this is a bad algorithm for bad people -- it might eliminate good alleles on the same haplotypes + * // TODO: this should be a method of AssemblyResultSet, not an external static method + */ + public void removeHaplotypesWithBadAlleles(final AssemblyBasedCallerArgumentCollection argumentCollection, + final Collection badPileupEvents) { + if (badPileupEvents.isEmpty()) { + return; // nothing to do + } + List haplotypesWithFilterAlleles = new ArrayList<>(); + // If we are not using partially determined haplotypes, we discard every haplotype containing a bad pileup allele. + + + for(Event badEvent : badPileupEvents) { + for (Haplotype hap : getHaplotypeList()) { + // NOTE: The event map may be null due to edge cases in the assembly engine + SW realignment that can cause + // haplotypes to re-merge into the reference and end up with an empty event map. (Also the event map + // code explicitly expects to fail in some instances) + if (hap.getEventMap() != null && hap.getEventMap().getEvents().stream().anyMatch(badEvent::equals)) { + if (argumentCollection.pileupDetectionArgs.debugPileupStdout) System.err.println("Flagging hap " + hap + " for containing variant " + badEvent); + haplotypesWithFilterAlleles.add(hap); + } + } + } + + if (!haplotypesWithFilterAlleles.isEmpty()) { + if (argumentCollection.pileupDetectionArgs.debugPileupStdout) System.out.println("Found Assembly Haps with filtered Variants:\n"+haplotypesWithFilterAlleles.stream().map(Haplotype::toString).collect(Collectors.joining("\n"))); + haplotypeDeletionWarningLogger.warn(() -> "Haplotypes from Assembly are being filtered by heuristics from the PileupCaller. This can lead to missing variants. See --"+PileupDetectionArgumentCollection.PILEUP_DETECTION_FILTER_ASSEMBLY_HAPS_THRESHOLD+" for more details"); + haplotypesWithFilterAlleles.forEach(this::removeHaplotype); + } + } + + /** + * Helper method that handles the actual "GGA-like" Merging of haplotype alleles into an assembly result set. + * + * First this method will filter out haplotypes that contain alleles that have failed the pileup calling filtering steps, + * Then the list will attempt to poke into the haplotype list artificial haplotypes that have the found alleles present. + */ + @SuppressWarnings({"deprecation"}) + public void injectPileupEvents(final AssemblyRegion region, final AssemblyBasedCallerArgumentCollection argumentCollection, + final SmithWatermanAligner aligner, final Collection goodPileupEvents) { + if (goodPileupEvents.isEmpty()) { + return; // nothing to do + } + + final Haplotype refHaplotype = getReferenceHaplotype(); + final Map> assembledEventByStart = getVariationEvents(argumentCollection.maxMnpDistance).stream() + .collect(Collectors.groupingBy(Event::getStart)); + final Collection assembledIndels = getVariationEvents(argumentCollection.maxMnpDistance).stream(). + filter(Event::isIndel).collect(Collectors.toList()); + + Set baseHaplotypes = new TreeSet<>(); + baseHaplotypes.addAll(getHaplotypeList().stream() + .sorted(Comparator.comparingInt((Haplotype hap) -> hap.isReference() ? 1 : 0).thenComparingDouble(hap -> hap.getScore()).reversed()) + .limit(AssemblyBasedCallerUtils.NUM_HAPLOTYPES_TO_INJECT_FORCE_CALLING_ALLELES_INTO) + .collect(Collectors.toList())); + + //TODO its unclear whether the correct answer here is to use the hardclipped pileup reads (which we used in generating the pileup alleles for specificty reasons) + //TODO or if it would be more accurate to use the softclipped bases here in filtering down the haplotypes. I suspect the latter but I will evaluate later. + Map kmerReadCounts = AssemblyBasedCallerUtils.getKmerReadCounts(region.getHardClippedPileupReads(), argumentCollection.pileupDetectionArgs.filteringKmerSize); + + for (final Event event : goodPileupEvents.stream().sorted(Comparator.comparingInt(Event::getStart)).collect(Collectors.toList())) { + + if (argumentCollection.pileupDetectionArgs.debugPileupStdout) System.out.println("Processing new Haplotypes for Pileup Allele that was not in the assembly: " + event.asVariantContext()); + + // skip SNPs that are too close to assembled indels. + if (!event.isIndel() && assembledIndels.stream().anyMatch(indel -> event.withinDistanceOf(indel, argumentCollection.pileupDetectionArgs.snpAdjacentToAssemblyIndel))) { + continue; + } + final List assembledEvents = assembledEventByStart.getOrDefault(event.getStart(), Collections.emptyList()); + + if (isEventPresentInAssembly(event, assembledEvents) || isSymbolic(event.altAllele())) { + continue; + } + + final Set newPileupHaplotypes = new HashSet<>(); + for (final Haplotype baseHaplotype : baseHaplotypes) { + final Haplotype insertedHaplotype = makeHaplotypeWithInsertedEvent(baseHaplotype, refHaplotype, event, aligner, argumentCollection.getHaplotypeToReferenceSWParameters()); + if (insertedHaplotype != null) { + newPileupHaplotypes.add(insertedHaplotype); + } + } + + final Set refactoredHaps = AssemblyBasedCallerUtils.filterPileupHaplotypes(newPileupHaplotypes, kmerReadCounts, + argumentCollection.pileupDetectionArgs.numHaplotypesToIterate, argumentCollection.pileupDetectionArgs.filteringKmerSize); + if (argumentCollection.pileupDetectionArgs.debugPileupStdout) { + System.out.println("Constructed the following new Pileup Haplotypes after filtering:\n"+ + refactoredHaps.stream().map(Haplotype::toString).collect(Collectors.joining("\n"))); + } + + baseHaplotypes.addAll(refactoredHaps); + } + + + baseHaplotypes.forEach(this::add); + regenerateVariationEvents(argumentCollection.maxMnpDistance); + } + + @VisibleForTesting + public void addGivenAlleles(final List givenAlleles, final int maxMnpDistance, final SmithWatermanAligner aligner, + final SWParameters haplotypeToReferenceSWParameters) { + if (givenAlleles.isEmpty()) { + return; + } + final Haplotype refHaplotype = getReferenceHaplotype(); + final Map> assembledEventsByStart = getVariationEvents(maxMnpDistance).stream() + .collect(Collectors.groupingBy(Event::getStart)); + + // choose the highest-scoring haplotypes along with the reference for building force-calling haplotypes + final List baseHaplotypes = getHaplotypeList().stream() + .sorted(Comparator.comparing(Haplotype::isReference).thenComparingDouble(hap -> hap.getScore()).reversed()) + .limit(AssemblyBasedCallerUtils.NUM_HAPLOTYPES_TO_INJECT_FORCE_CALLING_ALLELES_INTO) + .collect(Collectors.toList()); + + for (final Event given : givenAlleles) { + final List assembledEvents = assembledEventsByStart.getOrDefault(given.getStart(), Collections.emptyList()); + + if (isEventPresentInAssembly(given, assembledEvents) || isSymbolic(given.altAllele())) { + continue; + } + + for (final Haplotype baseHaplotype : baseHaplotypes) { + final Haplotype insertedHaplotype = makeHaplotypeWithInsertedEvent(baseHaplotype, refHaplotype, given, aligner, haplotypeToReferenceSWParameters); + if (insertedHaplotype != null) { + add(insertedHaplotype); + } + } + } + regenerateVariationEvents(maxMnpDistance); + } + + private static boolean isEventPresentInAssembly(final Event event, final List assembledEvents) { + return assembledEvents.stream().anyMatch(event::equals); // note that Events are forced to have a minimal representation + } + + private static boolean isSymbolic(final Allele a) { + return a.equals(Allele.NO_CALL) || a.getDisplayString().equals(String.valueOf(VCFConstants.NULL_ALLELE)) || a.equals(Allele.SPAN_DEL) || a.isSymbolic(); + } + + // returns a new haplotype with the event inserted or null if that is not possible + private static Haplotype makeHaplotypeWithInsertedEvent(final Haplotype baseHaplotype, final Haplotype refHaplotype, final Event event, + final SmithWatermanAligner aligner, final SWParameters haplotypeToReferenceSWParameters) { + if (baseHaplotype.getEventMap() != null && baseHaplotype.getEventMap().getEvents().stream().anyMatch(event::overlaps)) { + return null; // Can't insert because the base haplotype already has an event there. + } + + final Haplotype insertedHaplotype = baseHaplotype.insertAllele(event.refAllele(), event.altAllele(), event.getStart()); + if (insertedHaplotype != null) { // can be null if the requested allele can't be inserted into the haplotype + final Cigar cigar = CigarUtils.calculateCigar(refHaplotype.getBases(), insertedHaplotype.getBases(), aligner, haplotypeToReferenceSWParameters, SWOverhangStrategy.INDEL); + insertedHaplotype.setCigar(cigar); + insertedHaplotype.setGenomeLocation(refHaplotype.getGenomeLocation()); + insertedHaplotype.setAlignmentStartHapwrtRef(refHaplotype.getAlignmentStartHapwrtRef()); + + } + return insertedHaplotype; + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java index d96fffb3875..60170907857 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java @@ -8,6 +8,7 @@ import htsjdk.variant.variantcontext.writer.Options; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.*; +import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.broadinstitute.barclay.argparser.CommandLineException; @@ -26,6 +27,7 @@ import org.broadinstitute.hellbender.tools.walkers.genotyper.StandardCallerArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; import org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.pileup.PileupBasedAlleles; import org.broadinstitute.hellbender.transformers.IUPACReadTransformer; import org.broadinstitute.hellbender.transformers.ReadTransformer; @@ -627,8 +629,10 @@ public List callRegion(final AssemblyRegion region, final Featur return referenceModelForNoVariation(region, true, VCpriors); } - final List givenAlleles = features.getValues(hcArgs.alleles).stream() - .filter(vc -> hcArgs.forceCallFiltered || vc.isNotFiltered()).collect(Collectors.toList()); + final List givenAlleles = features.getValues(hcArgs.alleles).stream() + .filter(vc -> hcArgs.forceCallFiltered || vc.isNotFiltered()) + .flatMap(vc -> GATKVariantContextUtils.splitVariantContextToEvents(vc, false, GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, false).stream()) + .collect(Collectors.toList()); if( givenAlleles.isEmpty() && region.size() == 0 ) { // No reads here so nothing to do! @@ -646,11 +650,6 @@ public List callRegion(final AssemblyRegion region, final Featur } } - List forcedPileupAlleles = Collections.emptyList(); - if(hcArgs.pileupDetectionArgs.usePileupDetection){ - forcedPileupAlleles = PileupBasedAlleles.getPileupVariantContexts(region.getAlignmentData(), hcArgs.pileupDetectionArgs, readsHeader, hcArgs.minBaseQualityScore); - } - // run the local assembler, getting back a collection of information on how we should proceed final AssemblyResultSet untrimmedAssemblyResult = AssemblyBasedCallerUtils.assembleReads(region, hcArgs, readsHeader, samplesList, logger, referenceReader, assemblyEngine, aligner, !hcArgs.doNotCorrectOverlappingBaseQualities, hcArgs.fbargs, false); ReadThreadingAssembler.addAssembledVariantsToEventMapOutput(untrimmedAssemblyResult, assembledEventMapVariants, hcArgs.maxMnpDistance, assembledEventMapVcfOutputWriter); @@ -667,38 +666,18 @@ public List callRegion(final AssemblyRegion region, final Featur } } - // PileupCaller events if we need them - List pileupAllelesFoundShouldFilter = forcedPileupAlleles.stream() - .filter(v -> PileupBasedAlleles.shouldFilterAssemblyVariant(hcArgs.pileupDetectionArgs, v)) - .collect(Collectors.toList()); - List pileupAllelesPassingFilters = forcedPileupAlleles.stream() - .filter(v -> PileupBasedAlleles.passesFilters(hcArgs.pileupDetectionArgs, v)) - .collect(Collectors.toList()); + Pair, Set> goodAndBadPileupEvents = + PileupBasedAlleles.goodAndBadPileupEvents(region.getAlignmentData(), hcArgs.pileupDetectionArgs, readsHeader, hcArgs.minBaseQualityScore); + final Set goodPileupEvents = goodAndBadPileupEvents.getLeft(); + final Set badPileupEvents = goodAndBadPileupEvents.getRight(); // Regenerate the list of AllVariationEvents, filtering out assembled variants that must be filtered according to the pileupcaller code. - final SortedSet allVariationEvents = new TreeSet<>( - AssemblyResultSet.HAPLOTYPE_VARIANT_CONTEXT_COMPARATOR); - allVariationEvents.addAll(untrimmedAssemblyResult.getVariationEvents(hcArgs.maxMnpDistance).stream() - .filter(outerVC -> { return pileupAllelesFoundShouldFilter.stream() // Stream over the PileupAllelesThatMustBeFiltered - // Does the variant match? - .noneMatch(filterAllle -> filterAllle.getStart() == outerVC.getStart() - && filterAllle.getReference().equals(outerVC.getReference()) - && filterAllle.getAlternateAllele(0).equals(outerVC.getAlternateAllele(0))); - }).collect(Collectors.toList())); - // Add any new pileupcaller alleles to the variation events - for (final VariantContext pileupAllele : pileupAllelesPassingFilters) { - //these are events from single haplotypes, so we can do a simple comparison without trimming - if (allVariationEvents.stream().noneMatch(vc -> vc.getStart() == pileupAllele.getStart() && vc.getReference().basesMatch(pileupAllele.getReference()) && vc.getAlternateAllele(0).basesMatch(pileupAllele.getAlternateAllele(0)))) { - allVariationEvents.add(pileupAllele); - } - } - // Add given alleles to the variation events - for (final VariantContext given : givenAlleles) { - //these are events from single haplotypes, so we can do a simple comparison without trimming - if (allVariationEvents.stream().noneMatch(vc -> vc.getStart() == given.getStart() && vc.getReference().basesMatch(given.getReference()) && vc.getAlternateAllele(0).basesMatch(given.getAlternateAllele(0)))) { - allVariationEvents.add(given); - } - } + final SortedSet allVariationEvents = untrimmedAssemblyResult.getVariationEvents(hcArgs.maxMnpDistance).stream() + .filter(event -> !badPileupEvents.contains(event)) + .collect(Collectors.toCollection(() -> new TreeSet<>(AssemblyResultSet.HAPLOTYPE_EVENT_COMPARATOR))); + + goodPileupEvents.forEach(allVariationEvents::add); + givenAlleles.forEach(allVariationEvents::add); final AssemblyRegionTrimmer.Result trimmingResult = trimmer.trim(region, allVariationEvents, referenceContext); @@ -707,18 +686,20 @@ public List callRegion(final AssemblyRegion region, final Featur } AssemblyResultSet assemblyResult = untrimmedAssemblyResult.trimTo(trimmingResult.getVariantRegion()); - AssemblyBasedCallerUtils.addGivenAlleles(givenAlleles, hcArgs.maxMnpDistance, aligner, hcArgs.getHaplotypeToReferenceSWParameters(), assemblyResult); + assemblyResult.addGivenAlleles(givenAlleles, hcArgs.maxMnpDistance, aligner, hcArgs.getHaplotypeToReferenceSWParameters()); // Pre-work for the PDHMM, if we are in PDHMM mode then here is where we re-compute the haplotypes as PD haplotypes. if (hcArgs.pileupDetectionArgs.generatePDHaplotypes) { // Note: we ignore maxMNPDistance here because the PartiallyDeterminedHaplotypeComputationEngine does not currently handle MNPs - SortedSet assemblyVariants = assemblyResult.getVariationEvents(0); + SortedSet assemblyVariants = assemblyResult.getVariationEvents(0); if (hcArgs.pileupDetectionArgs.debugPileupStdout) { System.out.println("Generating PDHaplotypes for PDHMM"); System.out.println("Assembled Variants to use:"); assemblyVariants.forEach(System.out::println); - System.out.println("Pileup Variants to use:"); - forcedPileupAlleles.forEach(System.out::println); + System.out.println("Good Pileup Variants to use:"); + goodPileupEvents.forEach(System.out::println); + System.out.println("Bad Pileup Variants to filter:"); + badPileupEvents.forEach(System.out::println); System.out.println("Adding Variants To Reference Haplotype:"); System.out.println(assemblyResult.getReferenceHaplotype()); System.out.println("FinalSpan: " + assemblyResult.getReferenceHaplotype().getGenomeLocation()); @@ -728,9 +709,9 @@ public List callRegion(final AssemblyRegion region, final Featur region.getSpan(), assemblyResult.getReferenceHaplotype(), assemblyVariants, - pileupAllelesFoundShouldFilter, - pileupAllelesPassingFilters, - hcArgs.pileupDetectionArgs.snpAdajacentToAssemblyIndel, + badPileupEvents, + goodPileupEvents, + hcArgs.pileupDetectionArgs.snpAdjacentToAssemblyIndel, aligner, hcArgs.getHaplotypeToReferenceSWParameters(), hcArgs.pileupDetectionArgs.determinePDHaps, @@ -742,11 +723,10 @@ public List callRegion(final AssemblyRegion region, final Featur // NOTE: We might also call this if hcArgs.pileupDetectionArgs.useGGAFallback is set and we are making PD haplotypes. This // fallback handles edge cases where the PartiallyDeterminedHaplotypeComputationEngine generates too-many haplotypes // from a very complex assebmly region and we want to fall back to assembly. - if ((!pileupAllelesFoundShouldFilter.isEmpty() || !pileupAllelesPassingFilters.isEmpty()) && // Assert that we did find pileup events to process before calling this code - (!hcArgs.pileupDetectionArgs.generatePDHaplotypes || - (hcArgs.pileupDetectionArgs.useGGAFallback && !assemblyResult.hasOverwrittenHaps()))) { // If we are generating PDHaps assert that it failed before calling this - if (hcArgs.pileupDetectionArgs.debugPileupStdout) { System.out.println("Falling back to GGA based Pileup Allele mode!"); } - assemblyResult = AssemblyBasedCallerUtils.applyPileupEventsAsForcedAlleles(region, hcArgs, aligner, assemblyResult.getReferenceHaplotype(), assemblyResult, pileupAllelesFoundShouldFilter, pileupAllelesPassingFilters, hcArgs.pileupDetectionArgs.debugPileupStdout); + if (!hcArgs.pileupDetectionArgs.generatePDHaplotypes || + (hcArgs.pileupDetectionArgs.useGGAFallback && !assemblyResult.hasOverwrittenHaps())) { // If we are generating PDHaps assert that it failed before calling this + assemblyResult.removeHaplotypesWithBadAlleles(hcArgs, badPileupEvents); + assemblyResult.injectPileupEvents(region, hcArgs, aligner, goodPileupEvents); } final AssemblyRegion regionForGenotyping = assemblyResult.getRegionForGenotyping(); final List readStubs = regionForGenotyping.getReads().stream() @@ -1008,7 +988,7 @@ public void shutdown() { if (pdhmmLikelihoodCalculationEngine != null) pdhmmLikelihoodCalculationEngine.close(); aligner.close(); haplotypeBAMWriter.ifPresent(HaplotypeBAMWriter::close); - assembledEventMapVcfOutputWriter.ifPresent(writer -> {assembledEventMapVariants.get().forEach(writer::add); writer.close();}); + assembledEventMapVcfOutputWriter.ifPresent(writer -> {assembledEventMapVariants.get().forEach(event -> writer.add(event)); writer.close();}); if ( referenceReader != null) { try { referenceReader.close(); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index 0d05f78fb27..63e9666a805 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -22,6 +22,7 @@ import org.broadinstitute.hellbender.utils.genotyper.AlleleList; import org.broadinstitute.hellbender.utils.genotyper.IndexedAlleleList; import org.broadinstitute.hellbender.utils.genotyper.SampleList; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.EventMap; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.logging.OneShotLogger; @@ -123,7 +124,7 @@ public CalledHaplotypes assignGenotypeLikelihoods(final List haplotyp final SimpleInterval refLoc, final SimpleInterval activeRegionWindow, final FeatureContext tracker, - final List givenAlleles, + final List givenAlleles, final boolean emitReferenceConfidence, final int maxMnpDistance, final SAMFileHeader header, @@ -143,7 +144,8 @@ public CalledHaplotypes assignGenotypeLikelihoods(final List haplotyp // update the haplotypes so we're ready to call, getting the ordered list of positions on the reference // that carry events among the haplotypes - final SortedSet startPosKeySet = EventMap.buildEventMapsForHaplotypes(haplotypes, ref, refLoc, hcArgs.assemblerArgs.debugAssembly, maxMnpDistance); + EventMap.buildEventMapsForHaplotypes(haplotypes, ref, refLoc, hcArgs.assemblerArgs.debugAssembly, maxMnpDistance); + final SortedSet eventStarts = EventMap.getEventStartPositions(haplotypes); // Walk along each position in the key set and create each event to be outputted final Set calledHaplotypes = new HashSet<>(); @@ -157,16 +159,16 @@ public CalledHaplotypes assignGenotypeLikelihoods(final List haplotyp } // null if there is no potential uses of DRAGstr in this region. - final DragstrReferenceAnalyzer dragstrs = constructDragstrReferenceSTRAnalyzerIfNecessary(haplotypes, ref, refLoc, startPosKeySet); + final DragstrReferenceAnalyzer dragstrs = constructDragstrReferenceSTRAnalyzerIfNecessary(haplotypes, ref, refLoc, eventStarts); final BiPredicate readQualifiesForGenotypingPredicate = composeReadQualifiesForGenotypingPredicate(hcArgs); - for( final int loc : startPosKeySet ) { + for( final int loc : eventStarts ) { if( loc < activeRegionWindow.getStart() || loc > activeRegionWindow.getEnd() ) { continue; } - final List eventsAtThisLoc = AssemblyBasedCallerUtils.getVariantContextsFromActiveHaplotypes(loc, + final List eventsAtThisLoc = AssemblyBasedCallerUtils.getEventsFromActiveHaplotypes(loc, haplotypes, !hcArgs.disableSpanningEventGenotyping); final List eventsAtThisLocWithSpanDelsReplaced = replaceSpanDels(eventsAtThisLoc, @@ -326,8 +328,8 @@ private boolean isDragstrSTRAnalyzerNecessary(SortedSet startPosKeySet, return !startPosKeySet.isEmpty() && dragstrParams != null && !hcArgs.standardArgs.genotypeArgs.dontUseDragstrPriors && haplotypes.stream() - .anyMatch(h -> h.getEventMap().getVariantContexts().stream() - .anyMatch(GATKVariantContextUtils::containsInlineIndel)); + .anyMatch(h -> h.getEventMap().getEvents().stream() + .anyMatch(e -> GATKVariantContextUtils.containsInlineIndel(e.refAllele(), Collections.singletonList(e.altAllele())))); } @@ -468,7 +470,7 @@ static List whichAllelesToKeepBasedonHapScores(final Map{ private final Allele allele; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/LocationAndAlleles.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/LocationAndAlleles.java deleted file mode 100644 index 8551e919464..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/LocationAndAlleles.java +++ /dev/null @@ -1,46 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.haplotypecaller; - -import htsjdk.variant.variantcontext.Allele; - -import java.util.List; - -/** - * This class exists to allow VariantContext objects to be compared based only on their location and set of alleles, - * providing a more liberal equals method so that VariantContext objects can be placed into a Set - * which retains only VCs that have non-redundant location and Allele lists. - */ -public class LocationAndAlleles { - private final int loc; - private final List alleles; - - public LocationAndAlleles(final int loc, final List alleles) { - this.loc = loc; - this.alleles = alleles; - } - - public int getLoc() { - return loc; - } - - public List getAlleles() { - return alleles; - } - - @Override - public boolean equals(final Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - final LocationAndAlleles that = (LocationAndAlleles) o; - - if (loc != that.loc) return false; - return alleles != null ? alleles.equals(that.alleles) : that.alleles == null; - } - - @Override - public int hashCode() { - return 31 * loc + (alleles != null ? alleles.hashCode() : 0); - } - - -} \ No newline at end of file diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PartiallyDeterminedHaplotypeComputationEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PartiallyDeterminedHaplotypeComputationEngine.java index 621b4ad6404..5e7460b47bb 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PartiallyDeterminedHaplotypeComputationEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PartiallyDeterminedHaplotypeComputationEngine.java @@ -6,12 +6,12 @@ import htsjdk.samtools.util.Locatable; import htsjdk.samtools.util.Tuple; import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.VariantContext; import org.apache.commons.lang3.ArrayUtils; import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy; import org.broadinstitute.gatk.nativebindings.smithwaterman.SWParameters; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.utils.bwa.InvalidInputException; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.EventMap; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.haplotype.PartiallyDeterminedHaplotype; @@ -20,7 +20,6 @@ import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner; import java.util.*; -import java.util.List; import java.util.stream.Collectors; /** @@ -35,7 +34,7 @@ * processing would become too complicated. */ public class PartiallyDeterminedHaplotypeComputationEngine { - final static int MAX_PD_HAPS_TO_GENERATE = 256*2;; //(2048 is illuminas #) (without optimizing the hmm to some degree this is probably unattainable) + final static int MAX_PD_HAPS_TO_GENERATE = 256*2; //(2048 is illuminas #) (without optimizing the hmm to some degree this is probably unattainable) final static int MAX_BRANCH_PD_HAPS = 128; //(128 is illuminas #) final static int MAX_VAR_IN_EVENT_GROUP = 17; // (20 is illuminas #) @@ -43,11 +42,11 @@ public class PartiallyDeterminedHaplotypeComputationEngine { //U is a 0->N (N = region size) based space where Insertions are +0.5 and deletions are + 1 from their original position // We use this comparator for haplotype construcion to make it slightly easier to build compound haplotypes (i.e. snp+insertion/deletion at the same anchor base) - public static final Comparator HAPLOTYPE_SNP_FIRST_COMPARATOR = Comparator.comparingInt(VariantContext::getStart) + public static final Comparator HAPLOTYPE_SNP_FIRST_COMPARATOR = Comparator.comparingInt(Event::getStart) // Decide arbitrarily so as not to accidentally throw away overlapping variants - .thenComparingInt(vc -> vc.getReference().length()) - .thenComparingInt(vc -> vc.getAlternateAllele(0).length()) - .thenComparing(vc -> vc.getAlternateAllele(0)); + .thenComparingInt(e -> e.refAllele().length()) + .thenComparingInt(e -> e.altAllele().length()) + .thenComparing(e -> e.altAllele()); /** @@ -71,9 +70,9 @@ public class PartiallyDeterminedHaplotypeComputationEngine { * @param sourceSet AssemblyResultSet to be modified with the new haplotypes * @param callingSpan Calling span to subset determined events to (to handle padding) * @param referenceHaplotype Reference haplotype against which to build artifical haps - * @param assemblyVariants Assembly variants. - * @param pileupAllelesFoundShouldFilter Pileup alleles that should be filtered if they are part of the assembly - * @param pileupAllelesPassingFilters Pileup alleles that pass the heuristics to be included in genotyping + * @param assemblyEvents Assembly variants. + * @param badPileupEvents Pileup alleles that should be filtered if they are part of the assembly + * @param goodPileupEvents Pileup alleles that pass the heuristics to be included in genotyping * @param snpAdjacentToIndelLimit If pileup allele snps are too close to assembled indels we thorw them out. * @param aligner SmithWatermanAligner to use for filtering out equivalent event sets * @param swParameters Parameters for hap-hap comparisons @@ -84,9 +83,9 @@ public class PartiallyDeterminedHaplotypeComputationEngine { public static AssemblyResultSet generatePDHaplotypes(final AssemblyResultSet sourceSet, final Locatable callingSpan, final Haplotype referenceHaplotype, - final SortedSet assemblyVariants, - final List pileupAllelesFoundShouldFilter, - final List pileupAllelesPassingFilters, + final SortedSet assemblyEvents, + final Collection badPileupEvents, + final Collection goodPileupEvents, final int snpAdjacentToIndelLimit, final SmithWatermanAligner aligner, final SWParameters swParameters, @@ -94,50 +93,49 @@ public static AssemblyResultSet generatePDHaplotypes(final AssemblyResultSet sou final boolean debugSite) { //We currently don't support MNPs in here, assert nothing coming in IS a MNP - if (assemblyVariants.stream().anyMatch(VariantContext::isMNP) || pileupAllelesPassingFilters.stream().anyMatch(VariantContext::isMNP)) { + if (assemblyEvents.stream().anyMatch(Event::isMNP) || goodPileupEvents.stream().anyMatch(Event::isMNP)) { throw new InvalidInputException("PartiallyDeterminedHaplotypeComputationEngine currently doesn't support any MNP variants"); } - final TreeSet variantsInOrder = new TreeSet<>( - HAPLOTYPE_SNP_FIRST_COMPARATOR); + final TreeSet eventsInOrder = new TreeSet<>(HAPLOTYPE_SNP_FIRST_COMPARATOR); // First we filter the assembly variants based on badness from the graph - for (VariantContext delVariant : pileupAllelesFoundShouldFilter) { + for (Event delVariant : badPileupEvents) { - List variantsToRemove = assemblyVariants.stream().filter( - v -> v.getStart() == delVariant.getStart() && - delVariant.getReference().equals(v.getReference()) && - delVariant.getAlternateAllele(0).equals(v.getAlternateAllele(0))).collect(Collectors.toList()); + List variantsToRemove = assemblyEvents.stream().filter(delVariant::equals).collect(Collectors.toList()); if (!variantsToRemove.isEmpty()) { if (debugSite) System.out.println("Removing assembly variants due to columnwise heuristics: " + variantsToRemove); - variantsToRemove.forEach(assemblyVariants::remove); + variantsToRemove.forEach(assemblyEvents::remove); } } // Ignore any snps from pileups that were close to indels - final List givenAllelesFiltered = pileupAllelesPassingFilters.stream() - .filter(vc -> vc.isIndel() || - assemblyVariants.stream().filter(VariantContext::isIndel).noneMatch(indel -> vc.withinDistanceOf(indel, snpAdjacentToIndelLimit))).collect(Collectors.toList()); + final List givenAllelesFiltered = goodPileupEvents.stream() + .filter(event -> event.isIndel() || + assemblyEvents.stream().filter(Event::isIndel).noneMatch(indel -> event.withinDistanceOf(indel, snpAdjacentToIndelLimit))) + .collect(Collectors.toList()); // TODO make sure this TREE-SET is properly comparing the VCs - variantsInOrder.addAll(assemblyVariants); - variantsInOrder.addAll(givenAllelesFiltered); + eventsInOrder.addAll(assemblyEvents); + eventsInOrder.addAll(givenAllelesFiltered); - if (debugSite) System.out.println("Variants to PDHapDetermination:\n"+ - variantsInOrder.stream().map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int) referenceHaplotype.getStartPosition())).collect(Collectors.joining("\n"))); + if (debugSite) { + System.out.println("Variants to PDHapDetermination:\n"+ + eventsInOrder.stream().map(Event::asVariantContext).map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int) referenceHaplotype.getStartPosition())).collect(Collectors.joining("\n"))); + } // TODO this is where we filter out if indels > 32 (a heuristic known from DRAGEN that is not implemented here) - List vcsAsList = new ArrayList<>(variantsInOrder); + List vcsAsList = new ArrayList<>(eventsInOrder); // NOTE: we iterate over this several times and expect it to be sorted. - Map> eventsByDRAGENCoordinates = new LinkedHashMap<>(); - SortedMap> variantsByStartPos = new TreeMap<>(); + Map> eventsByDRAGENCoordinates = new LinkedHashMap<>(); + SortedMap> variantsByStartPos = new TreeMap<>(); List eventGroups = new ArrayList<>(); int lastEventEnd = -1; - for (VariantContext vc : variantsInOrder) { + for (Event vc : eventsInOrder) { // Break everything into independent groups (don't worry about transitivitiy right now) - Double eventKey = vc.getStart() + (vc.isSimpleInsertion()? 0.5:0) + (vc.isSimpleDeletion()? 1 : 0) - referenceHaplotype.getStartPosition(); + Double eventKey = vc.getStart() + (vc.isSimpleInsertion() ? 0.5 : 0) + (vc.isSimpleDeletion() ? 1 : 0) - referenceHaplotype.getStartPosition(); eventsByDRAGENCoordinates.putIfAbsent(eventKey, new ArrayList<>()); eventsByDRAGENCoordinates.get(eventKey).add(vc); variantsByStartPos.putIfAbsent(vc.getStart(), new ArrayList<>()); @@ -155,17 +153,17 @@ public static AssemblyResultSet generatePDHaplotypes(final AssemblyResultSet sou } //Print the event groups if (debugSite) eventsByDRAGENCoordinates.entrySet().stream().map(e -> { - return String.format("%.1f", e.getKey()) + " -> " + e.getValue().stream() + return String.format("%.1f", e.getKey()) + " -> " + e.getValue().stream().map(Event::asVariantContext) .map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int) referenceHaplotype.getStartPosition())) .collect(Collectors.joining(",")); }).forEach(System.out::println); // Iterate over all events starting with all indels - List> disallowedPairs = smithWatermanRealignPairsOfVariantsForEquivalentEvents(referenceHaplotype, aligner, swParameters, debugSite, variantsInOrder, vcsAsList); + List> disallowedPairs = smithWatermanRealignPairsOfVariantsForEquivalentEvents(referenceHaplotype, aligner, swParameters, debugSite, eventsInOrder, vcsAsList); if (debugSite) { System.out.println("disallowed Variant pairs:"); - disallowedPairs.stream().map(l -> l.stream().map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int) referenceHaplotype.getStartPosition())).collect(Collectors.joining("->"))).forEach(System.out::println); + disallowedPairs.stream().map(l -> l.stream().map(Event::asVariantContext).map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int) referenceHaplotype.getStartPosition())).collect(Collectors.joining("->"))).forEach(System.out::println); } if (debugSite) { @@ -173,9 +171,9 @@ public static AssemblyResultSet generatePDHaplotypes(final AssemblyResultSet sou } //Now that we have the disallowed groups, lets merge any of them from seperate groups: //TODO this is not an efficient way of doing this - for (List pair : disallowedPairs) { + for (List pair : disallowedPairs) { EventGroup eventGrpLeft = null; - for (VariantContext event : pair) { + for (Event event : pair) { EventGroup grpForEvent = eventGroups.stream().filter(grp -> grp.contains(event)).findFirst().get(); // If the event isn't in the same event group as its predicessor, merge this group with that one and if (eventGrpLeft != grpForEvent) { @@ -197,7 +195,7 @@ public static AssemblyResultSet generatePDHaplotypes(final AssemblyResultSet sou // if any of our event groups is too large, abort. if (debugSite ) System.out.println("Found event group with too many variants! Aborting haplotype building"); return sourceSet; - }; + } Set outputHaplotypes = new LinkedHashSet<>(); if (makeDeterminedHapsInstead) { @@ -206,13 +204,13 @@ public static AssemblyResultSet generatePDHaplotypes(final AssemblyResultSet sou } //Iterate over very VCF start position in R space - List>> entriesRInOrder = new ArrayList<>(variantsByStartPos.entrySet()); + List>> entriesRInOrder = new ArrayList<>(variantsByStartPos.entrySet()); /** * Overall Loop: * Iterate over every cluster of variants with the same start position. */ for (int indexOfDeterminedInR = 0; indexOfDeterminedInR < entriesRInOrder.size(); indexOfDeterminedInR++) { - Map.Entry> variantSiteGroup = entriesRInOrder.get(indexOfDeterminedInR); + Map.Entry> variantSiteGroup = entriesRInOrder.get(indexOfDeterminedInR); if (debugSite) System.out.println("working with variants of the group: " + variantSiteGroup); // Skip if (entriesRInOrder.get(indexOfDeterminedInR).getKey() < callingSpan.getStart() || entriesRInOrder.get(indexOfDeterminedInR).getKey() > callingSpan.getEnd()) { @@ -220,7 +218,7 @@ public static AssemblyResultSet generatePDHaplotypes(final AssemblyResultSet sou continue; } - final List determinedVariants = variantSiteGroup.getValue(); + final List determinedVariants = variantSiteGroup.getValue(); /** * Determined Event Loop: @@ -229,25 +227,25 @@ public static AssemblyResultSet generatePDHaplotypes(final AssemblyResultSet sou * NOTE: we skip the reference allele in the event that we are making determined haplotypes instead of undetermined haplotypes */ for (int IndexOfAllele = (makeDeterminedHapsInstead?0:-1); IndexOfAllele < determinedVariants.size(); IndexOfAllele++) { //note -1 for I here corresponds to the reference allele at this site - if (debugSite) System.out.println("Working with allele at site: "+(IndexOfAllele ==-1? "[ref:"+(variantSiteGroup.getKey()-referenceHaplotype.getStartPosition())+"]" : PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int)referenceHaplotype.getStartPosition()).apply(determinedVariants.get(IndexOfAllele)))); + if (debugSite) System.out.println("Working with allele at site: "+(IndexOfAllele ==-1? "[ref:"+(variantSiteGroup.getKey()-referenceHaplotype.getStartPosition())+"]" : PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int)referenceHaplotype.getStartPosition()).apply(determinedVariants.get(IndexOfAllele).asVariantContext()))); // This corresponds to the DRAGEN code for // 0 0 // 0 1 // 1 0 final boolean isRef = IndexOfAllele == -1; - final VariantContext determinedEventToTest = determinedVariants.get(isRef ? 0 : IndexOfAllele); + final Event determinedEventToTest = determinedVariants.get(isRef ? 0 : IndexOfAllele); /* * Here we handle any of the necessary work to deal with the event groups and maybe forming compund branches out of the groups */ //Set Determined pairs: - List> determinedPairs = new ArrayList<>(); + List> determinedPairs = new ArrayList<>(); for(int j = 0; j < determinedVariants.size(); j++) { determinedPairs.add(new Tuple<>(determinedVariants.get(j), IndexOfAllele == j)); } // Loop over eventGroups, have each of them return a list of VariantContexts - List> branchExcludeAlleles = new ArrayList<>(); + List> branchExcludeAlleles = new ArrayList<>(); branchExcludeAlleles.add(new HashSet<>()); // Add the null branch (assuming no exclusions) /* Note for future posterity: @@ -259,13 +257,13 @@ public static AssemblyResultSet generatePDHaplotypes(final AssemblyResultSet sou */ for(EventGroup group : eventGroups ) { if (group.causesBranching()) { - List>> groupVCs = group.getVariantGroupsForEvent(determinedPairs, true); + List>> groupVCs = group.getVariantGroupsForEvent(determinedPairs, true); // Combinatorially expand the branches as necessary - List> newBranchesToAdd = new ArrayList<>(); - for (Set excludedVars : branchExcludeAlleles) { + List> newBranchesToAdd = new ArrayList<>(); + for (Set excludedVars : branchExcludeAlleles) { //For every exclude group, fork it by each subset we have: for (int i = 1; i < groupVCs.size(); i++) { //NOTE: iterate starting at 1 here because we special case that branch at the end - Set newSet = new HashSet<>(excludedVars); + Set newSet = new HashSet<>(excludedVars); groupVCs.get(i).stream().filter(t -> !t.b).forEach(t -> newSet.add(t.a)); newBranchesToAdd.add(newSet); } @@ -288,9 +286,9 @@ public static AssemblyResultSet generatePDHaplotypes(final AssemblyResultSet sou for (int i = 0; i < branchExcludeAlleles.size(); i++) { final int ifinal = i; System.out.println("Branch "+i+" VCs:"); - System.out.println("exclude:"+branchExcludeAlleles.get(i).stream().map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int)referenceHaplotype.getStartPosition())).collect(Collectors.joining("->"))); + System.out.println("exclude:"+branchExcludeAlleles.get(i).stream().map(Event::asVariantContext).map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int)referenceHaplotype.getStartPosition())).collect(Collectors.joining("->"))); //to match dragen debug output for personal sanity - System.out.println("include:"+variantsInOrder.stream().filter(variantContext -> !branchExcludeAlleles.get(ifinal).contains(variantContext)).map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int)referenceHaplotype.getStartPosition())).collect(Collectors.joining("->"))); + System.out.println("include:"+eventsInOrder.stream().filter(variantContext -> !branchExcludeAlleles.get(ifinal).contains(variantContext)).map(Event::asVariantContext).map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int)referenceHaplotype.getStartPosition())).collect(Collectors.joining("->"))); } } @@ -298,10 +296,10 @@ public static AssemblyResultSet generatePDHaplotypes(final AssemblyResultSet sou /** * Now handle each branch independently of the others. (the logic is the same in every case except we must ensure that none of the excluded alleles get included when constructing haps. */ - for (Set excludeEvents : branchExcludeAlleles) { + for (Set excludeEvents : branchExcludeAlleles) { List branchHaps = new ArrayList<>(); - List newBranch = new ArrayList<>(); + List newBranch = new ArrayList<>(); // Handle the simple case of making PD haplotypes if (!makeDeterminedHapsInstead) { @@ -325,7 +323,7 @@ public static AssemblyResultSet generatePDHaplotypes(final AssemblyResultSet sou } else { // TODO currently this approach doesn't properly handle a bunch of duplicate events... // If we are producing determined bases, then we want to enforce that every new event at least has THIS event as a variant. - List> variantGroupsCombinatorialExpansion = new ArrayList<>(); + List> variantGroupsCombinatorialExpansion = new ArrayList<>(); variantGroupsCombinatorialExpansion.add(new ArrayList<>()); // We can drastically cut down on combinatorial expansion here by saying each allele is the FIRST variant in the list, thus preventing double counting. for (int secondRIndex = indexOfDeterminedInR; secondRIndex < entriesRInOrder.size(); secondRIndex++) { @@ -335,16 +333,16 @@ public static AssemblyResultSet generatePDHaplotypes(final AssemblyResultSet sou } // Iterate through the growing combinatorial expansion of haps, split it into either having or not having the variant. if (secondRIndex == indexOfDeterminedInR) { - for (List hclist : variantGroupsCombinatorialExpansion) { + for (List hclist : variantGroupsCombinatorialExpansion) { hclist.add(determinedEventToTest); } // Othewise make sure to include the combinatorial expansion of events at the other site } else { - List> hapsPerVCsAtRSite = new ArrayList<>(); - for (VariantContext vc : entriesRInOrder.get(secondRIndex).getValue()) { - for (List hclist : variantGroupsCombinatorialExpansion) { + List> hapsPerVCsAtRSite = new ArrayList<>(); + for (Event vc : entriesRInOrder.get(secondRIndex).getValue()) { + for (List hclist : variantGroupsCombinatorialExpansion) { if (!excludeEvents.contains(vc)) { - List newList = new ArrayList<>(hclist); + List newList = new ArrayList<>(hclist); newList.add(vc); hapsPerVCsAtRSite.add(newList); } @@ -355,15 +353,15 @@ public static AssemblyResultSet generatePDHaplotypes(final AssemblyResultSet sou } } - for (List subset : variantGroupsCombinatorialExpansion) { + for (List subset : variantGroupsCombinatorialExpansion) { subset.sort(HAPLOTYPE_SNP_FIRST_COMPARATOR); - if (debugSite) System.out.println("Construcing Hap From Events:"+ subset.stream().map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int) referenceHaplotype.getStartPosition())).collect(Collectors.joining("->"))); + if (debugSite) System.out.println("Construcing Hap From Events:"+ subset.stream().map(Event::asVariantContext).map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int) referenceHaplotype.getStartPosition())).collect(Collectors.joining("->"))); branchHaps.add(constructHaplotypeFromVariants(referenceHaplotype, subset, true)); } } // Add the branch haps to the results: if (debugSite) { - System.out.println("Constructed Haps for Branch"+excludeEvents.stream().map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int) referenceHaplotype.getStartPosition())).collect(Collectors.joining(",")) + ":"); + System.out.println("Constructed Haps for Branch"+excludeEvents.stream().map(Event::asVariantContext).map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int) referenceHaplotype.getStartPosition())).collect(Collectors.joining(",")) + ":"); System.out.println(branchHaps.stream().map(h -> h.getCigar() + " " + h.toString()).collect(Collectors.joining("\n"))); } @@ -417,22 +415,22 @@ public int compare(Haplotype o1, Haplotype o2) { * * @return A list of lists of variant contexts that correspond to disallowed groups. This list may be empty if none are found. */ - private static List> smithWatermanRealignPairsOfVariantsForEquivalentEvents(Haplotype referenceHaplotype, SmithWatermanAligner aligner, SWParameters swParameters, boolean debugSite, TreeSet variantsInOrder, List vcsAsList) { - List> disallowedPairs = new ArrayList<>(); + private static List> smithWatermanRealignPairsOfVariantsForEquivalentEvents(Haplotype referenceHaplotype, SmithWatermanAligner aligner, SWParameters swParameters, boolean debugSite, TreeSet eventsInOrder, List eventsAsList) { + List> disallowedPairs = new ArrayList<>(); //Iterate over all 2 element permutations in which one element is an indel and test for alignments - for (int i = 0; i < vcsAsList.size(); i++) { - final VariantContext firstEvent = vcsAsList.get(i); + for (int i = 0; i < eventsAsList.size(); i++) { + final Event firstEvent = eventsAsList.get(i); if (firstEvent.isIndel()) { // For every indel, make every 2-3 element subset (without overlapping) of variants to test for equivalency - for (int j = 0; j < vcsAsList.size(); j++) { - final VariantContext secondEvent = vcsAsList.get(j); + for (int j = 0; j < eventsAsList.size(); j++) { + final Event secondEvent = eventsAsList.get(j); // Don't compare myslef, any overlappers to myself, or indels i've already examined me (to prevent double counting) if (j != i && !eventsOverlapForPDHapsCode(firstEvent, secondEvent, true) && ((!secondEvent.isIndel()) || j > i)) { - final List events = new ArrayList<>(Arrays.asList(firstEvent, secondEvent)); + final List events = new ArrayList<>(Arrays.asList(firstEvent, secondEvent)); events.sort(HAPLOTYPE_SNP_FIRST_COMPARATOR); - if (debugSite) System.out.println("Testing events: "+ events.stream().map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int) referenceHaplotype.getStartPosition())).collect(Collectors.joining("->"))); - if (constructArtificialHaplotypeAndTestEquivalentEvents(referenceHaplotype, aligner, swParameters, variantsInOrder, events, debugSite)) { + if (debugSite) System.out.println("Testing events: "+ events.stream().map(Event::asVariantContext).map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int) referenceHaplotype.getStartPosition())).collect(Collectors.joining("->"))); + if (constructArtificialHaplotypeAndTestEquivalentEvents(referenceHaplotype, aligner, swParameters, eventsInOrder, events, debugSite)) { disallowedPairs.add(events); } } @@ -443,32 +441,32 @@ private static List> smithWatermanRealignPairsOfVariantsFor //TODO NOTE: there are some discrepancies with the iteration over 3x variants in some complicated cases involving //TODO lots of transitively disallowed pairs. Hopefully this is a minor effect. //Now iterate over all 3 element pairs and make sure none of the - for (int i = 0; i < vcsAsList.size(); i++) { - final VariantContext firstEvent = vcsAsList.get(i); + for (int i = 0; i < eventsAsList.size(); i++) { + final Event firstEvent = eventsAsList.get(i); if (firstEvent.isIndel()) { // For every indel, make every 2-3 element subset (without overlapping) of variants to test for equivalency - for (int j = 0; j < vcsAsList.size(); j++) { - final VariantContext secondEvent = vcsAsList.get(j); + for (int j = 0; j < eventsAsList.size(); j++) { + final Event secondEvent = eventsAsList.get(j); // Don't compare myslef, any overlappers to myself, or indels i've already examined me (to prevent double counting) if (j != i && !eventsOverlapForPDHapsCode(firstEvent, secondEvent, true) && ((!secondEvent.isIndel()) || j > i)) { // if i and j area lready disalowed keep going if (disallowedPairs.stream().anyMatch(p -> p.contains(firstEvent) && p.contains(secondEvent))) { continue; } - final List events = new ArrayList<>(Arrays.asList(firstEvent, secondEvent)); + final List events = new ArrayList<>(Arrays.asList(firstEvent, secondEvent)); // If our 2 element arrays weren't inequivalent, test subsets of 3 including this: - for (int k = j+1; k < vcsAsList.size(); k++) { - final VariantContext thirdEvent = vcsAsList.get(k); + for (int k = j+1; k < eventsAsList.size(); k++) { + final Event thirdEvent = eventsAsList.get(k); if (k != i && !eventsOverlapForPDHapsCode(thirdEvent, firstEvent, true) && !eventsOverlapForPDHapsCode(thirdEvent, secondEvent, true)) { // if k and j or k and i are disallowed, keep looking if (disallowedPairs.stream().anyMatch(p -> (p.contains(firstEvent) && p.contains(thirdEvent)) || (p.contains(secondEvent) && p.contains(thirdEvent)))) { continue; } - List subList = new ArrayList<>(events); + List subList = new ArrayList<>(events); subList.add(thirdEvent); subList.sort(HAPLOTYPE_SNP_FIRST_COMPARATOR); - if (debugSite) System.out.println("Testing events: " + subList.stream().map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int) referenceHaplotype.getStartPosition())).collect(Collectors.joining("->"))); - if (constructArtificialHaplotypeAndTestEquivalentEvents(referenceHaplotype, aligner, swParameters, variantsInOrder, subList, debugSite)) { + if (debugSite) System.out.println("Testing events: " + subList.stream().map(Event::asVariantContext).map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int) referenceHaplotype.getStartPosition())).collect(Collectors.joining("->"))); + if (constructArtificialHaplotypeAndTestEquivalentEvents(referenceHaplotype, aligner, swParameters, eventsInOrder, subList, debugSite)) { disallowedPairs.add(subList); } } @@ -488,17 +486,17 @@ private static List> smithWatermanRealignPairsOfVariantsFor * * @param snpsOverlap if false, don't ever evaluate snps as overlapping other snps (we do this because sometimes we need to construct artifical haps where we don't allow overlapping) */ - static boolean eventsOverlapForPDHapsCode(VariantContext vc1, VariantContext vc2, boolean snpsOverlap){ - if (!snpsOverlap && vc2.isSNP() && vc1.isSNP()) { + static boolean eventsOverlapForPDHapsCode(Event e1, Event e2, boolean snpsOverlap){ + if (!snpsOverlap && e2.isSNP() && e1.isSNP()) { return false; } - if (!vc1.getContig().equals(vc2.getContig())) { + if (!e1.getContig().equals(e2.getContig())) { return false; } - double vc1start = vc1.isIndel() ? (vc1.isSimpleDeletion() ? vc1.getStart() + 1 : vc1.getStart() + 0.5) : vc1.getStart(); - double vc1end = vc1.isSimpleInsertion() ? vc1.getEnd() + 0.5 : vc1.getEnd(); - double vc2start = vc2.isIndel() ? (vc2.isSimpleDeletion() ? vc2.getStart() + 1 : vc2.getStart() + 0.5) : vc2.getStart(); - double vc2end = vc2.isSimpleInsertion() ? vc2.getEnd() + 0.5 : vc2.getEnd(); + double vc1start = e1.isIndel() ? (e1.isSimpleDeletion() ? e1.getStart() + 1 : e1.getStart() + 0.5) : e1.getStart(); + double vc1end = e1.isSimpleInsertion() ? e1.getEnd() + 0.5 : e1.getEnd(); + double vc2start = e2.isIndel() ? (e2.isSimpleDeletion() ? e2.getStart() + 1 : e2.getStart() + 0.5) : e2.getStart(); + double vc2end = e2.isSimpleInsertion() ? e2.getEnd() + 0.5 : e2.getEnd(); //Pulled directly from CoordMath.java (not using here because of doubles) return (vc2start >= vc1start && vc2start <= vc1end) || (vc2end >=vc1start && vc2end <= vc1end) || vc1start >= vc2start && vc1end <= vc2end; @@ -523,7 +521,7 @@ static boolean eventsOverlapForPDHapsCode(VariantContext vc1, VariantContext vc2 * @return true if we SHOULD NOT allow the eventsToTest alleles to appear as alleles together in determined haplotypes */ @VisibleForTesting - private static boolean constructArtificialHaplotypeAndTestEquivalentEvents(Haplotype referenceHaplotype, SmithWatermanAligner aligner, SWParameters swParameters, TreeSet vcs, List eventsToTest, boolean debugSite) { + private static boolean constructArtificialHaplotypeAndTestEquivalentEvents(Haplotype referenceHaplotype, SmithWatermanAligner aligner, SWParameters swParameters, TreeSet events, List eventsToTest, boolean debugSite) { final Haplotype realignHap = constructHaplotypeFromVariants(referenceHaplotype, eventsToTest, false); //Special case to capture events that equal the reference (and thus have empty event maps). if (Arrays.equals(realignHap.getBases(), referenceHaplotype.getBases())) { @@ -538,17 +536,13 @@ private static boolean constructArtificialHaplotypeAndTestEquivalentEvents(Haplo //TODO for the existing ones. Since we are simply realigning and checking the event map outputs its possible that we consider events to //TODO be disallowed that have an equal SmithWaterman score to the original but a different (but equivalent) variant representation. //TODO This is likely a minor effect on the overall correctness. - final boolean wasEquivalentEvent = realignHap.getEventMap().getVariantContexts().stream().filter(eMapVC -> + final boolean wasEquivalentEvent = realignHap.getEventMap().getEvents().stream() // Are there any variants NOT in our initial list - eventsToTest.stream().noneMatch(v -> { - return doVariantsMatch(eMapVC, v); - })) + .filter(event -> eventsToTest.stream().noneMatch(event::equals)) // Do any of variants (that were not in our set of 2-3 targets) appear in our overall list of alleles - .anyMatch(eMapVc -> vcs.stream().anyMatch(v -> { - return doVariantsMatch(eMapVc, v); - })); + .anyMatch(event -> events.stream().anyMatch(event::equals)); if (debugSite) System.out.println( - realignHap.getEventMap().getVariantContexts().stream() + realignHap.getEventMap().getEvents().stream().map(Event::asVariantContext) .map(PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString((int) referenceHaplotype.getStartPosition())) .collect(Collectors.joining("\n"))); if (wasEquivalentEvent) { @@ -558,27 +552,18 @@ private static boolean constructArtificialHaplotypeAndTestEquivalentEvents(Haplo return wasEquivalentEvent; } - // A helper method to assert that variant contexts in the event map match those outside of it. - private static boolean doVariantsMatch(VariantContext eMapVC, VariantContext v) { - return eMapVC.getStart() == v.getStart() && - eMapVC.getReference().equals(v.getReference()) && - eMapVC.getAlternateAllele(0).equals(v.getAlternateAllele(0)) && - eMapVC.getAlternateAlleles().size() == v.getAlternateAlleles().size(); - } - - /** * NOTE: this accepts multiple alleles stacked up at the same base (assuming the order is SNP -> INDEL) * NOTE: However this class does NOT accept multiple SNPS overlapping or SNPs overlapping deletions */ @VisibleForTesting - public static Haplotype constructHaplotypeFromVariants(final Haplotype refHap, final List variantContexts, final boolean setEventMap) { + public static Haplotype constructHaplotypeFromVariants(final Haplotype refHap, final List events, final boolean setEventMap) { //ASSERT that the base is ref and cool if (!refHap.isReference() || refHap.getCigar().numCigarElements() > 1) { throw new GATKException("This is not a valid base haplotype for construction"); } //ASSERT that everything is fully overlapping the reference. - variantContexts.stream().forEach(v -> {if (!refHap.getGenomeLocation().contains(v)) throw new GATKException("Provided Variant Context"+v+"doesn't overlap haplotype "+refHap);}); + events.stream().forEach(v -> {if (!refHap.getGenomeLocation().contains(v)) throw new GATKException("Provided Variant Context"+v+"doesn't overlap haplotype "+refHap);}); final long genomicStartPosition = refHap.getStartPosition(); long refOffsetOfNextBaseToAdd = genomicStartPosition; @@ -589,24 +574,21 @@ public static Haplotype constructHaplotypeFromVariants(final Haplotype refHap, f //ASSUME sorted for now // use the reverse list to save myself figuring out cigars for right now - for (VariantContext vc : variantContexts) { - if (vc.getAlternateAlleles().size() > 1) { - throw new GATKException("too may alt alleles"); - } - Allele refAllele = vc.getReference(); - Allele altAllele = vc.getAlternateAllele(0); + for (Event event : events) { + Allele refAllele = event.refAllele(); + Allele altAllele = event.altAllele(); int intermediateRefStartPosition = (int) (refOffsetOfNextBaseToAdd - genomicStartPosition); - int intermediateRefEndPosition = Math.toIntExact(vc.getStart() - genomicStartPosition); + int intermediateRefEndPosition = Math.toIntExact(event.getStart() - genomicStartPosition); - if ((vc.isIndel() && intermediateRefEndPosition - intermediateRefStartPosition < -1) || (!vc.isIndel() && intermediateRefEndPosition - intermediateRefStartPosition < 0)) {//todo clean this up - throw new GATKException("Variant "+vc+" is out of order in the PD event list: "+variantContexts); + if ((event.isIndel() && intermediateRefEndPosition - intermediateRefStartPosition < -1) || (!event.isIndel() && intermediateRefEndPosition - intermediateRefStartPosition < 0)) {//todo clean this up + throw new GATKException("Variant "+event+" is out of order in the PD event list: "+events); } if (intermediateRefEndPosition - intermediateRefStartPosition > 0) { // Append the cigar element for the anchor base if necessary. runningCigar.add(new CigarElement(intermediateRefEndPosition - intermediateRefStartPosition, CigarOperator.M)); } // Include the ref base for indel if the base immediately proceeding this event is not already tracked - boolean includeRefBaseForIndel = vc.isIndel() && (intermediateRefStartPosition <= intermediateRefEndPosition); + boolean includeRefBaseForIndel = event.isIndel() && (intermediateRefStartPosition <= intermediateRefEndPosition); CigarElement newCigarElement; if (refAllele.length() == altAllele.length()) { @@ -623,7 +605,7 @@ public static Haplotype constructHaplotypeFromVariants(final Haplotype refHap, f runningCigar.add(newCigarElement); if (intermediateRefEndPosition - intermediateRefStartPosition > 0) { - newRefBases = ArrayUtils.addAll(newRefBases, ArrayUtils.subarray(refbases, intermediateRefStartPosition, (int) (vc.getStart() - genomicStartPosition))); // bases before the variant + newRefBases = ArrayUtils.addAll(newRefBases, ArrayUtils.subarray(refbases, intermediateRefStartPosition, (int) (event.getStart() - genomicStartPosition))); // bases before the variant } // Handle the ref base for indels that exlcude their ref bases if (refAllele.length() != altAllele.length() && !includeRefBaseForIndel) { @@ -632,7 +614,7 @@ public static Haplotype constructHaplotypeFromVariants(final Haplotype refHap, f } else { newRefBases = ArrayUtils.addAll(newRefBases, altAllele.getBases()); // refbases added } - refOffsetOfNextBaseToAdd = vc.getEnd() + 1; //TODO this is probably not set for future reference + refOffsetOfNextBaseToAdd = event.getEnd() + 1; //TODO this is probably not set for future reference } // Finish off the haplotype with the final bases @@ -659,7 +641,7 @@ public static Haplotype constructHaplotypeFromVariants(final Haplotype refHap, f */ @VisibleForTesting //TODO When we implement JointDetection we will need to allow multiple eventWithVariants to be prsent... - static PartiallyDeterminedHaplotype createNewPDHaplotypeFromEvents(final Haplotype base, final VariantContext eventWithVariant, final boolean useRef, final List constituentEvents) { + static PartiallyDeterminedHaplotype createNewPDHaplotypeFromEvents(final Haplotype base, final Event eventWithVariant, final boolean useRef, final List constituentEvents) { //ASSERT that the base is ref and cool if (!base.isReference() || base.getCigar().numCigarElements() > 1) { throw new RuntimeException("This is not a valid base haplotype for construction"); @@ -675,25 +657,25 @@ static PartiallyDeterminedHaplotype createNewPDHaplotypeFromEvents(final Haploty //ASSUME sorted for now // use the reverse list to save myself figuring out cigars for right now - for (VariantContext vc : constituentEvents) { + for (Event event : constituentEvents) { int intermediateRefStartPosition = (int) (refOffsetOfNextBaseToAdd - genomicStartPosition); - int intermediateRefEndPosition = Math.toIntExact(vc.getStart() - genomicStartPosition); + int intermediateRefEndPosition = Math.toIntExact(event.getStart() - genomicStartPosition); // An extra special case if we are a SNP following a SNP - if (vc.isSNP() && intermediateRefEndPosition - intermediateRefStartPosition == -1 && ((pdBytes[pdBytes.length-1] & PartiallyDeterminedHaplotype.SNP) != 0) ) { - byte[] array = PartiallyDeterminedHaplotype.getPDBytesForHaplotypes(vc.getReference(), vc.getAlternateAllele(0)); + if (event.isSNP() && intermediateRefEndPosition - intermediateRefStartPosition == -1 && ((pdBytes[pdBytes.length-1] & PartiallyDeterminedHaplotype.SNP) != 0) ) { + byte[] array = PartiallyDeterminedHaplotype.getPDBytesForHaplotypes(event.refAllele(), event.altAllele()); pdBytes[pdBytes.length-1] = (byte) (pdBytes[pdBytes.length-1] | array[0]); // adding any partial bases if necessary continue; } // Ref alleles (even if they overlap undetermined events) should be skipped - if (vc.getStart()==eventWithVariant.getStart() && useRef) { + if (event.getStart()==eventWithVariant.getStart() && useRef) { continue; } //Check that we are allowed to add this event (and if not we are) - if ((vc.isIndel() && intermediateRefEndPosition - intermediateRefStartPosition < -1) || (!vc.isIndel() && intermediateRefEndPosition - intermediateRefStartPosition < 0)) {//todo clean this up - throw new RuntimeException("Variant "+vc+" is out of order in the PD event list: "+constituentEvents); + if ((event.isIndel() && intermediateRefEndPosition - intermediateRefStartPosition < -1) || (!event.isIndel() && intermediateRefEndPosition - intermediateRefStartPosition < 0)) {//todo clean this up + throw new RuntimeException("Variant "+event+" is out of order in the PD event list: "+constituentEvents); } // Add the cigar for bases we skip over @@ -702,19 +684,16 @@ static PartiallyDeterminedHaplotype createNewPDHaplotypeFromEvents(final Haploty } // Include the ref base for indel if the base immediately proceeding this event is not already tracked - boolean includeRefBaseForIndel = vc.isIndel() && (intermediateRefStartPosition <= intermediateRefEndPosition); + boolean includeRefBaseForIndel = event.isIndel() && (intermediateRefStartPosition <= intermediateRefEndPosition); // Determine the alleles to add - Allele refAllele = vc.getReference(); - Allele altAllele = vc.getAlternateAllele(0); + Allele refAllele = event.refAllele(); + Allele altAllele = event.altAllele(); boolean isInsertion = altAllele.length() > refAllele.length(); // If its an insertion we flip to "ADD" the bases to the ref. boolean isEvent = false; byte[] basesToAdd; // If this is the blessed variant, add - if (vc.getStart()==eventWithVariant.getStart()) { - if (!useRef && eventWithVariant.getAlternateAlleles().size() > 1) { - throw new RuntimeException("the eventWithVariant variant must be monoallelic"); - } + if (event.getStart()==eventWithVariant.getStart()) { isEvent = true; basesToAdd = useRef? refAllele.getBases() : altAllele.getBases(); // Otherwise make sure we are adding the longest allele (for indels) or the ref allele for snps. @@ -723,7 +702,7 @@ static PartiallyDeterminedHaplotype createNewPDHaplotypeFromEvents(final Haploty } // Remove anchor base if necessary - if (vc.isIndel() && !includeRefBaseForIndel) { + if (event.isIndel() && !includeRefBaseForIndel) { basesToAdd = Arrays.copyOfRange(basesToAdd, 1, basesToAdd.length); } @@ -733,10 +712,10 @@ static PartiallyDeterminedHaplotype createNewPDHaplotypeFromEvents(final Haploty CigarElement newCigarElement; // if this is the event special case if (isEvent) { - if (vc.isSNP()) { + if (event.isSNP()) { newCigarElement = new CigarElement(refAllele.length(), useRef? CigarOperator.M : CigarOperator.X); } else { - if (vc.isIndel() && includeRefBaseForIndel) { + if (event.isIndel() && includeRefBaseForIndel) { runningCigar.add(new CigarElement( 1, CigarOperator.M)); } // For Insertions: mark the Cigar as I if we aren't in ref @@ -749,7 +728,7 @@ static PartiallyDeterminedHaplotype createNewPDHaplotypeFromEvents(final Haploty } // If we aren't in the blessed variant, add a match and make sure the array is set accordingly } else { - if (!vc.isIndel()) { + if (!event.isIndel()) { newCigarElement = new CigarElement(refAllele.length() , CigarOperator.M); } else { // Maybe add the cigar for the anchor base @@ -770,8 +749,8 @@ static PartiallyDeterminedHaplotype createNewPDHaplotypeFromEvents(final Haploty // Add ref basses up to this if necessary if (intermediateRefEndPosition - intermediateRefStartPosition > 0) { - newHaplotypeBasees = ArrayUtils.addAll(newHaplotypeBasees, ArrayUtils.subarray(refBasesToAddTo, intermediateRefStartPosition, (int) (vc.getStart() - genomicStartPosition))); // bases before the variant - pdBytes = ArrayUtils.addAll(pdBytes, new byte[vc.getStart() - (int)refOffsetOfNextBaseToAdd]); // bases before the variant + newHaplotypeBasees = ArrayUtils.addAll(newHaplotypeBasees, ArrayUtils.subarray(refBasesToAddTo, intermediateRefStartPosition, (int) (event.getStart() - genomicStartPosition))); // bases before the variant + pdBytes = ArrayUtils.addAll(pdBytes, new byte[event.getStart() - (int)refOffsetOfNextBaseToAdd]); // bases before the variant } newHaplotypeBasees = ArrayUtils.addAll(newHaplotypeBasees, basesToAdd); // refbases added if (includeRefBaseForIndel) { @@ -784,7 +763,7 @@ static PartiallyDeterminedHaplotype createNewPDHaplotypeFromEvents(final Haploty refAllele, isInsertion? refAllele : altAllele)); // refbases added - refOffsetOfNextBaseToAdd = vc.getEnd() + 1; //TODO this is probably not set for future reference + refOffsetOfNextBaseToAdd = event.getEnd() + 1; //TODO this is probably not set for future reference } // Finish off the haplotype with the final bases @@ -806,23 +785,22 @@ static PartiallyDeterminedHaplotype createNewPDHaplotypeFromEvents(final Haploty // A helper class for managing mutually exclusive event clusters and the logic arround forming valid events vs eachother. private static class EventGroup { - List variantsInBitmapOrder; - HashSet variantContextSet; + List variantsInBitmapOrder; + HashSet variantContextSet; //From Illumina (there is a LOT of math that will eventually go into these)/ BitSet allowedEvents = null; - // Optimizaiton to save ourselves recomputing the subsets at every point its necessary to do so. - List>> cachedEventLitsts = null; + // Optimization to save ourselves recomputing the subsets at every point its necessary to do so. + List>> cachedEventLists = null; - public EventGroup(final VariantContext variantContext) { - variantsInBitmapOrder = new ArrayList<>(); - variantContextSet = new HashSet<>(); - variantsInBitmapOrder.add(variantContext); - variantContextSet.add(variantContext); - } - public EventGroup() { + public EventGroup(final Event ... events) { variantsInBitmapOrder = new ArrayList<>(); variantContextSet = new HashSet<>(); + + for (final Event event : events) { + variantsInBitmapOrder.add(event); + variantContextSet.add(event); + } } /** @@ -840,7 +818,7 @@ public EventGroup() { * @param disallowedEvents Pairs of events disallowed * @return false if the event group is too large to process */ - public boolean populateBitset(List> disallowedEvents) { + public boolean populateBitset(List> disallowedEvents) { if (variantsInBitmapOrder.size() > MAX_VAR_IN_EVENT_GROUP) { return false; } @@ -864,11 +842,11 @@ public boolean populateBitset(List> disallowedEvents) { } } // mark as disallowed any sets of variants from the bitmask. - for (List disallowed : disallowedEvents) { + for (List disallowed : disallowedEvents) { // if (disallowed.stream().anyMatch(v -> variantContextSet.contains(v))){ int bitmask = 0; - for (VariantContext v : disallowed) { + for (Event v : disallowed) { int indexOfV = variantsInBitmapOrder.indexOf(v); if (indexOfV < 0) { throw new RuntimeException("Something went wrong in event group merging, variant "+v+" is missing from the event group despite being in a mutex pair: "+disallowed+"\n"+this); @@ -905,11 +883,11 @@ public boolean populateBitset(List> disallowedEvents) { * @param disallowSubsets * @return */ - public List>> getVariantGroupsForEvent(final List> eventsForMask, final boolean disallowSubsets) { + public List>> getVariantGroupsForEvent(final List> eventsForMask, final boolean disallowSubsets) { // If we are dealing with an external to this list event int eventMask = 0; int maskValues = 0; - for(Tuple event : eventsForMask) { + for(Tuple event : eventsForMask) { if (variantContextSet.contains(event.a)) { int index = variantsInBitmapOrder.indexOf(event.a); eventMask = eventMask | (1 << index); @@ -917,8 +895,8 @@ public List>> getVariantGroupsForEvent(final } } // Special case (if we are determining bases outside of this mutex cluster we can reuse the work from previous iterations) - if (eventMask == 0 && cachedEventLitsts != null) { - return cachedEventLitsts; + if (eventMask == 0 && cachedEventLists != null) { + return cachedEventLists; } List ints = new ArrayList<>(); @@ -942,9 +920,9 @@ public List>> getVariantGroupsForEvent(final } // Now that we have all the mutex groups, unpack them into lists of variants - List>> output = new ArrayList<>(); + List>> output = new ArrayList<>(); for (Integer grp : ints) { - List> newGrp = new ArrayList<>(); + List> newGrp = new ArrayList<>(); for (int i = 0; i < variantsInBitmapOrder.size(); i++) { // if the corresponding bit is 1, set it as such, otherwise set it as 0. newGrp.add(new Tuple<>(variantsInBitmapOrder.get(i), ((1<>> getVariantGroupsForEvent(final } // Cache the result if(eventMask==0) { - cachedEventLitsts = Collections.unmodifiableList(output); + cachedEventLists = Collections.unmodifiableList(output); } return output; } @@ -964,14 +942,14 @@ public boolean causesBranching() { //Print The event group in Illumina indexed ordering: public String toDisplayString(int startPos) { - return "EventGroup: " + variantsInBitmapOrder.stream().map(vc -> PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString(startPos).apply(vc)).collect(Collectors.joining("->")); + return "EventGroup: " + variantsInBitmapOrder.stream().map(Event::asVariantContext).map(vc -> PartiallyDeterminedHaplotype.getDRAGENDebugVariantContextString(startPos).apply(vc)).collect(Collectors.joining("->")); } - public boolean contains(final VariantContext event) { + public boolean contains(final Event event) { return variantContextSet.contains(event); } - public void addEvent(final VariantContext event) { + public void addEvent(final Event event) { variantsInBitmapOrder.add(event); variantContextSet.add(event); allowedEvents = null; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PileupDetectionArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PileupDetectionArgumentCollection.java index 9350d48f470..2e134ce8d3a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PileupDetectionArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PileupDetectionArgumentCollection.java @@ -122,7 +122,8 @@ public final class PileupDetectionArgumentCollection { public double pileupAbsoluteDepth = 0; @Hidden @Argument(fullName= PILEUP_DETECTION_INDEL_SNP_BLOCKING_RANGE, doc = "Pileup Detection: Filters out pileup snps within this many bases of an assembled indel. (Requires '--"+PILEUP_DETECTION_LONG_NAME+"' argument)", optional = true, minValue = 0D) - public int snpAdajacentToAssemblyIndel = 5; + public int snpAdjacentToAssemblyIndel = 5; + @Hidden @Argument(fullName= PILEUP_DETECTION_SNP_BASEQUALITY_THRESHOLD, doc = "Pileup Detection: Filters out reads from pileup SNPs with base quality lower than this threshold. (Requires '--"+PILEUP_DETECTION_LONG_NAME+"' argument)", optional = true) public int qualityForSnpsInPileupDetection = 12; //WHY is this two different than the regular active region determination limit (10)? Ask DRAGEN engineers. diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/RampedHaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/RampedHaplotypeCallerEngine.java index 42e2335f63e..b5b5c2cfa88 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/RampedHaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/RampedHaplotypeCallerEngine.java @@ -1,9 +1,6 @@ package org.broadinstitute.hellbender.tools.walkers.haplotypecaller; import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.reference.ReferenceSequenceFile; -import htsjdk.samtools.util.Tuple; -import htsjdk.samtools.util.Tuple; import htsjdk.variant.variantcontext.VariantContext; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -13,14 +10,16 @@ import org.broadinstitute.hellbender.engine.spark.AssemblyRegionArgumentCollection; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.tools.walkers.annotator.*; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.ramps.*; import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.utils.downsampling.ReservoirDownsampler; import org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.EventMap; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.read.GATKRead; +import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import java.io.IOException; import java.util.*; @@ -155,7 +154,7 @@ private class CallRegionContext { // prepared fields or fields we're able to regenerate List VCpriors; - List givenAlleles; + List givenAlleles; LongHomopolymerHaplotypeCollapsingEngine haplotypeCollapsing; // assembly results @@ -247,7 +246,9 @@ private void prepare(final CallRegionContext context) { } context.givenAlleles = context.features.getValues(hcArgs.alleles).stream() - .filter(vc -> hcArgs.forceCallFiltered || vc.isNotFiltered()).collect(Collectors.toList()); + .filter(vc -> hcArgs.forceCallFiltered || vc.isNotFiltered()) + .flatMap(vc -> GATKVariantContextUtils.splitVariantContextToEvents(vc, false, GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, false).stream()) + .collect(Collectors.toList()); if (context.givenAlleles.isEmpty() && context.region.size() == 0) { // No reads here so nothing to do! @@ -300,7 +301,7 @@ private void assemble(final CallRegionContext context) { // restore trimmed reads - final SortedSet allVariationEvents = context.assemblyResult.getVariationEvents(hcArgs.maxMnpDistance); + final SortedSet allVariationEvents = context.assemblyResult.getVariationEvents(hcArgs.maxMnpDistance); final AssemblyRegionTrimmer.Result trimmingResult = trimmer.trim(context.region, allVariationEvents, context.referenceContext); try { context.assemblyResult = context.assemblyResult.trimTo(trimmingResult.getVariantRegion()); @@ -346,7 +347,7 @@ private void assemble(final CallRegionContext context) { } } - final SortedSet allVariationEvents = untrimmedAssemblyResult.getVariationEvents(hcArgs.maxMnpDistance); + final SortedSet allVariationEvents = untrimmedAssemblyResult.getVariationEvents(hcArgs.maxMnpDistance); AssemblyRegionTrimmer.Result trimmingResult = trimmer.trim(context.region, allVariationEvents, context.referenceContext); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index 0f768202afa..ab69d224c20 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -21,6 +21,7 @@ import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.clipping.ReadClipper; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.param.ParamUtils; import org.broadinstitute.hellbender.utils.read.CigarUtils; @@ -917,7 +918,7 @@ public static void addAssembledVariantsToEventMapOutput(final AssemblyResultSet if (queue.size() >= 300) { queue.stream().limit(200).forEachOrdered(vc -> assembledEventMapVcfOutputWriter.get().add(vc)); } - queue.add(event);})); + queue.add(event.asVariantContext());})); } public AssemblyResultSet generateEmptyLLocalAssemblyResult(final AssemblyRegion assemblyRegion, diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java index c71f83df315..0c02ac547a5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java @@ -29,6 +29,7 @@ import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation; import org.broadinstitute.hellbender.tools.walkers.annotator.StandardMutectAnnotation; import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod; import org.broadinstitute.hellbender.tools.walkers.genotyper.HomogeneousPloidyModel; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.*; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; @@ -43,6 +44,7 @@ import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; import org.broadinstitute.hellbender.utils.genotyper.IndexedSampleList; import org.broadinstitute.hellbender.utils.genotyper.SampleList; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.EventMap; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.haplotype.HaplotypeBAMWriter; @@ -56,8 +58,6 @@ import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; -import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AlleleFilteringMutect; -import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.LongHomopolymerHaplotypeCollapsingEngine; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import java.io.File; @@ -253,48 +253,34 @@ public List callRegion(final AssemblyRegion originalAssemblyRegi removeUnmarkedDuplicates(originalAssemblyRegion); - final List givenAlleles = featureContext.getValues(MTAC.alleles).stream() - .filter(vc -> MTAC.forceCallFiltered || vc.isNotFiltered()).collect(Collectors.toList()); - - final List forcedPileupAlleles= MTAC.pileupDetectionArgs.usePileupDetection ? - PileupBasedAlleles.getPileupVariantContexts(originalAssemblyRegion.getAlignmentData(), MTAC.pileupDetectionArgs, header, MTAC.minBaseQualityScore) : - Collections.emptyList(); - + final List givenAlleles = featureContext.getValues(MTAC.alleles).stream() + .filter(vc -> MTAC.forceCallFiltered || vc.isNotFiltered()) + .flatMap(vc -> GATKVariantContextUtils.splitVariantContextToEvents(vc, false, GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, false).stream()) + .collect(Collectors.toList()); final AssemblyResultSet untrimmedAssemblyResult = AssemblyBasedCallerUtils.assembleReads(originalAssemblyRegion, MTAC, header, samplesList, logger, referenceReader, assemblyEngine, aligner, false, MTAC.fbargs, false); ReadThreadingAssembler.addAssembledVariantsToEventMapOutput(untrimmedAssemblyResult, assembledEventMapVariants, MTAC.maxMnpDistance, assembledEventMapVcfOutputWriter); final LongHomopolymerHaplotypeCollapsingEngine haplotypeCollapsing = untrimmedAssemblyResult.getHaplotypeCollapsingEngine(); - final SortedSet allVariationEvents = untrimmedAssemblyResult.getVariationEvents(MTAC.maxMnpDistance); - // PileupCaller events if we need to apply them - final List pileupAllelesFoundShouldFilter = forcedPileupAlleles.stream() - .filter(v -> PileupBasedAlleles.shouldFilterAssemblyVariant(MTAC.pileupDetectionArgs, v)) - .collect(Collectors.toList()); - final List pileupAllelesPassingFilters = forcedPileupAlleles.stream() - .filter(v -> PileupBasedAlleles.passesFilters(MTAC.pileupDetectionArgs, v)) - .collect(Collectors.toList()); - for (final VariantContext given : givenAlleles) { - if (allVariationEvents.stream().noneMatch(vc -> vc.getStart() == given.getStart() && vc.getReference().basesMatch(given.getReference()) && vc.getAlternateAllele(0).basesMatch(given.getAlternateAllele(0)) )) { - allVariationEvents.add(given); - } - } - for (final VariantContext pileupAllele : pileupAllelesPassingFilters) { - if (allVariationEvents.stream().noneMatch(vc -> vc.getStart() == pileupAllele.getStart() && vc.getReference().basesMatch(pileupAllele.getReference()) && vc.getAlternateAllele(0).basesMatch(pileupAllele.getAlternateAllele(0)))) { - allVariationEvents.add(pileupAllele); - } - } + final SortedSet allVariationEvents = untrimmedAssemblyResult.getVariationEvents(MTAC.maxMnpDistance); + + Pair, Set> goodAndBadPileupEvents = + PileupBasedAlleles.goodAndBadPileupEvents(originalAssemblyRegion.getAlignmentData(), MTAC.pileupDetectionArgs, header, MTAC.minBaseQualityScore); + final Set goodPileupEvents = goodAndBadPileupEvents.getLeft(); + final Set badPileupEvents = goodAndBadPileupEvents.getRight(); + + goodPileupEvents.forEach(allVariationEvents::add); + givenAlleles.forEach(allVariationEvents::add); final AssemblyRegionTrimmer.Result trimmingResult = trimmer.trim(originalAssemblyRegion, allVariationEvents, referenceContext); if (!trimmingResult.isVariationPresent()) { return emitReferenceConfidence() ? referenceModelForNoVariation(originalAssemblyRegion) : NO_CALLS; } - AssemblyResultSet assemblyResult = untrimmedAssemblyResult.trimTo(trimmingResult.getVariantRegion()); - AssemblyBasedCallerUtils.addGivenAlleles(givenAlleles, MTAC.maxMnpDistance, aligner, MTAC.getHaplotypeToReferenceSWParameters(), assemblyResult); - // Apply the forced pileup calling alleles if there are any that we must filter - if ((!pileupAllelesFoundShouldFilter.isEmpty() || !pileupAllelesPassingFilters.isEmpty())) { - AssemblyBasedCallerUtils.applyPileupEventsAsForcedAlleles(originalAssemblyRegion, MTAC, aligner, assemblyResult.getReferenceHaplotype(), assemblyResult, pileupAllelesFoundShouldFilter, pileupAllelesPassingFilters, MTAC.pileupDetectionArgs.debugPileupStdout); - } + AssemblyResultSet assemblyResult = untrimmedAssemblyResult.trimTo(trimmingResult.getVariantRegion()); + assemblyResult.addGivenAlleles(givenAlleles, MTAC.maxMnpDistance, aligner, MTAC.getHaplotypeToReferenceSWParameters()); + assemblyResult.removeHaplotypesWithBadAlleles(MTAC, badPileupEvents); + assemblyResult.injectPileupEvents(originalAssemblyRegion, MTAC, aligner, goodPileupEvents); // we might find out after assembly that the "active" region actually has no variants if( ! assemblyResult.isVariationPresent() ) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java index e51388efa41..8d603c2b00a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java @@ -23,6 +23,7 @@ import org.broadinstitute.hellbender.utils.genotyper.AlleleList; import org.broadinstitute.hellbender.utils.genotyper.LikelihoodMatrix; import org.broadinstitute.hellbender.utils.genotyper.SampleList; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.EventMap; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.read.Fragment; @@ -81,7 +82,7 @@ public CalledHaplotypes callMutations( final ReferenceContext referenceContext, final SimpleInterval activeRegionWindow, final FeatureContext featureContext, - final List givenAlleles, + final List givenAlleles, final SAMFileHeader header, final boolean withBamOut, final boolean emitRefConf, @@ -92,8 +93,10 @@ public CalledHaplotypes callMutations( final List haplotypes = logReadLikelihoods.alleles(); - final List startPosKeySet = EventMap.buildEventMapsForHaplotypes(haplotypes, assemblyResultSet.getFullReferenceWithPadding(), - assemblyResultSet.getPaddedReferenceLoc(), MTAC.assemblerArgs.debugAssembly, MTAC.maxMnpDistance).stream() + EventMap.buildEventMapsForHaplotypes(haplotypes, assemblyResultSet.getFullReferenceWithPadding(), + assemblyResultSet.getPaddedReferenceLoc(), MTAC.assemblerArgs.debugAssembly, MTAC.maxMnpDistance); + + final List eventStarts = EventMap.getEventStartPositions(haplotypes).stream() .filter(loc -> activeRegionWindow.getStart() <= loc && loc <= activeRegionWindow.getEnd()) .collect(Collectors.toList()); @@ -110,8 +113,8 @@ public CalledHaplotypes callMutations( } final AlleleLikelihoods logFragmentLikelihoods = logReadLikelihoods.groupEvidence(MTAC.independentMates ? read -> read : GATKRead::getName, Fragment::createAndAvoidFailure); - for( final int loc : startPosKeySet ) { - final List eventsAtThisLoc = AssemblyBasedCallerUtils.getVariantContextsFromActiveHaplotypes(loc, haplotypes, false); + for( final int loc : eventStarts ) { + final List eventsAtThisLoc = AssemblyBasedCallerUtils.getEventsFromActiveHaplotypes(loc, haplotypes, false); VariantContext mergedVC = AssemblyBasedCallerUtils.makeMergedVariantContext(eventsAtThisLoc); if( mergedVC == null ) { continue; @@ -144,7 +147,7 @@ public CalledHaplotypes callMutations( final PerAlleleCollection normalArtifactLogOdds = somaticLogOdds(logNormalMatrix); - final Set forcedAlleles = AssemblyBasedCallerUtils.getAllelesConsistentWithGivenAlleles(givenAlleles, mergedVC); + final Set forcedAlleles = AssemblyBasedCallerUtils.allelesConsistentWithGivenAlleles(givenAlleles, mergedVC); final List tumorAltAlleles = mergedVC.getAlternateAlleles().stream() .filter(allele -> forcedAlleles.contains(allele) || tumorLogOdds.getAlt(allele) > MTAC.getEmissionLogOdds()) .collect(Collectors.toList()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantrecalling/HaplotypeBasedVariantRecaller.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantrecalling/HaplotypeBasedVariantRecaller.java index ee087ff2b68..0babe3ec0e4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantrecalling/HaplotypeBasedVariantRecaller.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantrecalling/HaplotypeBasedVariantRecaller.java @@ -212,12 +212,13 @@ public Map> simplifiedAssignGenotype Map> result = new LinkedHashMap<>(); // walk on starting locations for haplotypes - for( final int loc : EventMap.buildEventMapsForHaplotypes(haplotypes, ref, refLoc, hcArgs.assemblerArgs.debugAssembly, hcArgs.maxMnpDistance) ) { + EventMap.buildEventMapsForHaplotypes(haplotypes, ref, refLoc, hcArgs.assemblerArgs.debugAssembly, hcArgs.maxMnpDistance); + for( final int loc : EventMap.getEventStartPositions(haplotypes) ) { if ( activeRegionWindow.contains(new SimpleInterval(activeRegionWindow.getContig(), loc, loc)) ) { // collect events - final List eventsAtThisLoc = AssemblyBasedCallerUtils.getVariantContextsFromActiveHaplotypes(loc, + final List eventsAtThisLoc = AssemblyBasedCallerUtils.getEventsFromActiveHaplotypes(loc, haplotypes, true); final List eventsAtThisLocWithSpanDelsReplaced = HaplotypeCallerGenotypingEngine.replaceSpanDels(eventsAtThisLoc, Allele.create(ref[loc - refLoc.getStart()], true), loc); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java index 2262ad74184..70c4c46b724 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java @@ -4,7 +4,6 @@ import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.*; -import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.argparser.Hidden; import org.broadinstitute.barclay.help.DocumentedFeature; @@ -15,20 +14,15 @@ import org.broadinstitute.hellbender.engine.ReadsContext; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.engine.VariantWalker; -import org.broadinstitute.hellbender.utils.IndexRange; -import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import org.broadinstitute.hellbender.utils.variant.VcfUtils; import picard.cmdline.programgroups.VariantManipulationProgramGroup; import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod; -import org.broadinstitute.hellbender.utils.read.AlignmentUtils; import java.nio.file.Path; import java.util.*; -import java.util.stream.Collectors; -import java.util.stream.IntStream; /** * Left-align indels in a variant callset diff --git a/src/main/java/org/broadinstitute/hellbender/utils/haplotype/Event.java b/src/main/java/org/broadinstitute/hellbender/utils/haplotype/Event.java new file mode 100644 index 00000000000..6a94f34710d --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/haplotype/Event.java @@ -0,0 +1,117 @@ +package org.broadinstitute.hellbender.utils.haplotype; + +import htsjdk.samtools.util.Locatable; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import org.apache.commons.lang.builder.HashCodeBuilder; +import org.broadinstitute.hellbender.utils.Utils; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +/** + * Very simple class wrapping VariantContext when we want to be explicit that a variant is biallelic, such as + * in a haplotype EventMap + */ +public class Event implements Locatable { + private final String contig; + private final int start; + private final int stop; + private final Allele refAllele; + private final Allele altAllele; + + private Map attributesForVariantContext = null; + + private static final long serialVersionUID = 1L; + + public Event(final String contig, final int start, final Allele ref, final Allele alt) { + // TODO: Should this instead silently trim to the minimal representation? + Utils.validateArg(ref.length() == 1 || alt.length() == 1 || differentLastBase(ref, alt), "Ref and alt alleles have same last base, hence this event is not in its minimal representation."); + Utils.validateArg(ref.isReference(), "ref is not ref"); + this.contig = contig; + this.start = start; + stop = start + ref.length() - 1; + refAllele = ref; + altAllele = alt; + } + + public static Event of(final VariantContext vc) { + Utils.validateArg(vc.isBiallelic(), "variant must be biallelic"); + return new Event(vc.getContig(), vc.getStart(), vc.getReference(), vc.getAlternateAllele(0)); + } + + // only use this for debugging, file output etc -- creating a full VariantContext is slow and defeats the purpose of this slim class + public VariantContext asVariantContext() { + return asVariantContext("source"); + } + + // This should only be used once in the lifecycle of an event: when we make the jump from discovered event to variant context for output + public VariantContext asVariantContext(final String source) { + final VariantContext result = new VariantContextBuilder(source, contig, start, stop, Arrays.asList(refAllele, altAllele)).make(); + if (attributesForVariantContext != null) { + attributesForVariantContext.entrySet().forEach(entry -> result.getCommonInfo().putAttribute(entry.getKey(), entry.getValue())); + } + return result; + } + + @Override + public String getContig() { return contig; } + + @Override + public int getStart() { + return start; + } + + @Override + public int getEnd() { return stop; } + + public Allele refAllele() { + return refAllele; + } + + public Allele altAllele() { + return altAllele; + } + + public boolean isSNP() { return refAllele.length() == 1 && refAllele.length() == altAllele.length(); } + + public boolean isIndel() { return refAllele.length() != altAllele.length() && !altAllele.isSymbolic(); } + + public boolean isSimpleInsertion() { return refAllele.length() == 1 && altAllele.length() > 1; } + + public boolean isSimpleDeletion() { return refAllele.length() > 1 && altAllele.length() == 1; } + + public boolean isMNP() { return refAllele.length() > 1 && refAllele.length() == altAllele.length(); } + + public void setVariantAttribute(final String key, final String value) { + if (attributesForVariantContext == null) { + attributesForVariantContext = new HashMap<>(); + } + attributesForVariantContext.put(key, value); + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != this.getClass()) { + return false; + } + + final Event other = (Event) obj; + + return this.start == other.start && this.refAllele.equals(other.refAllele) && this.altAllele.equals(other.altAllele); + } + + @Override + public int hashCode() { + return new HashCodeBuilder().append(start).append(refAllele).append(altAllele).hashCode(); + } + + private static boolean differentLastBase(final Allele ref, final Allele alt) { + final byte[] refBases = ref.getBases(); + final byte[] altBases = alt.getBases(); + return refBases.length == 0 || altBases.length == 0 || refBases[refBases.length-1] != altBases[altBases.length-1]; + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/haplotype/EventMap.java b/src/main/java/org/broadinstitute/hellbender/utils/haplotype/EventMap.java index bbc8bd70412..19fcd31135c 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/haplotype/EventMap.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/haplotype/EventMap.java @@ -5,7 +5,6 @@ import htsjdk.samtools.util.Locatable; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; -import htsjdk.variant.variantcontext.VariantContextBuilder; import org.apache.commons.lang3.ArrayUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -15,104 +14,71 @@ import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.param.ParamUtils; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.Queue; -import java.util.Set; -import java.util.TreeMap; -import java.util.TreeSet; +import java.util.*; +import java.util.function.Predicate; import java.util.stream.Collectors; /** * Extract simple VariantContext events from a single haplotype */ -public final class EventMap extends TreeMap { +public final class EventMap extends TreeMap { private static final long serialVersionUID = 1L; private static final Logger logger = LogManager.getLogger(EventMap.class); - protected static final int MIN_NUMBER_OF_EVENTS_TO_COMBINE_INTO_BLOCK_SUBSTITUTION = 3; - private static final int MAX_EVENTS_PER_HAPLOTYPE = 3; - private static final int MAX_INDELS_PER_HAPLOTYPE = 2; - public static final Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("", false); - private final Haplotype haplotype; - private final byte[] ref; - private final Locatable refLoc; - private final String sourceNameToAdd; - - public EventMap(final Haplotype haplotype, final byte[] ref, final Locatable refLoc, final String sourceNameToAdd, final int maxMnpDistance) { + public EventMap(final Collection events) { super(); - this.haplotype = haplotype; - this.ref = ref; - this.refLoc = refLoc; - this.sourceNameToAdd = sourceNameToAdd; - processCigarForInitialEvents(maxMnpDistance); + events.forEach(this::addEvent); } - /** - * For testing. Let's you set up a explicit configuration without having to process a haplotype and reference - * @param stateForTesting - */ - public EventMap(final Collection stateForTesting) { - haplotype = null; - ref = null; - refLoc = null; - sourceNameToAdd = null; - for ( final VariantContext vc : stateForTesting ) - addVC(vc); + public static EventMap fromHaplotype(final Haplotype haplotype, final byte[] ref, final Locatable refLoc, final int maxMnpDistance) { + return new EventMap(getEvents(haplotype, ref, refLoc, maxMnpDistance)); + } + + public static EventMap fromHaplotype(final Haplotype haplotype, final byte[] ref, final int maxMnpDistance) { + return new EventMap(getEvents(haplotype, ref, haplotype.getLocation(), maxMnpDistance)); + } + + public static EventMap of(final Event ... events) { + return new EventMap(Arrays.asList(events)); } /** - * * @param maxMnpDistance Phased substitutions separated by this distance or less are merged into MNPs. More than * two substitutions occurring in the same alignment block (ie the same M/X/EQ CIGAR element) * are merged until a substitution is separated from the previous one by a greater distance. * That is, if maxMnpDistance = 1, substitutions at 10,11,12,14,15,17 are partitioned into a MNP * at 10-12, a MNP at 14-15, and a SNP at 17. May not be negative. */ - protected void processCigarForInitialEvents(final int maxMnpDistance) { + private static List getEvents(final Haplotype haplotype, final byte[] ref, final Locatable refLoc, final int maxMnpDistance) { ParamUtils.isPositiveOrZero(maxMnpDistance, "maxMnpDistance may not be negative."); final Cigar cigar = haplotype.getCigar(); final byte[] alignment = haplotype.getBases(); int refPos = haplotype.getAlignmentStartHapwrtRef(); if( refPos < 0 ) { - return; + return Collections.emptyList(); } // Protection against SW failures - final List proposedEvents = new ArrayList<>(); + final List proposedEvents = new ArrayList<>(); int alignmentPos = 0; for( int cigarIndex = 0; cigarIndex < cigar.numCigarElements(); cigarIndex++ ) { final CigarElement ce = cigar.getCigarElement(cigarIndex); final int elementLength = ce.getLength(); + switch( ce.getOperator() ) { case I: { - if( refPos > 0 ) { // protect against trying to create insertions/deletions at the beginning of a contig - final List insertionAlleles = new ArrayList<>(); + if( refPos > 0 && cigarIndex > 0 && cigarIndex < cigar.numCigarElements() - 1) { // forbid insertions at start of contig or not resolved within the haplotype final int insertionStart = refLoc.getStart() + refPos - 1; final byte refByte = ref[refPos-1]; - if( BaseUtils.isRegularBase(refByte) ) { - insertionAlleles.add( Allele.create(refByte, true) ); - } - if( cigarIndex == 0 || cigarIndex == cigar.numCigarElements() - 1 ) { - // if the insertion isn't completely resolved in the haplotype, skip it - // note this used to emit SYMBOLIC_UNASSEMBLED_EVENT_ALLELE but that seems dangerous - } else { - byte[] insertionBases = {}; - insertionBases = ArrayUtils.add(insertionBases, ref[refPos - 1]); // add the padding base - insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange(alignment, alignmentPos, alignmentPos + elementLength)); - if( BaseUtils.isAllRegularBases(insertionBases) ) { - insertionAlleles.add( Allele.create(insertionBases, false) ); - } - } - if( insertionAlleles.size() == 2 ) { // found a proper ref and alt allele - proposedEvents.add(new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make()); + + byte[] insertionBases = {refByte}; // add the padding base + insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange(alignment, alignmentPos, alignmentPos + elementLength)); + if( BaseUtils.isAllRegularBases(insertionBases) ) { + proposedEvents.add(new Event(refLoc.getContig(), insertionStart, Allele.create(refByte, true), Allele.create(insertionBases))); } } alignmentPos += elementLength; @@ -125,15 +91,12 @@ protected void processCigarForInitialEvents(final int maxMnpDistance) { } case D: { - if( refPos > 0 ) { // protect against trying to create insertions/deletions at the beginning of a contig + if( refPos > 0 ) { // forbid deletions at the beginning of a contig final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base - final List deletionAlleles = new ArrayList<>(); final int deletionStart = refLoc.getStart() + refPos - 1; final byte refByte = ref[refPos-1]; if( BaseUtils.isRegularBase(refByte) && BaseUtils.isAllRegularBases(deletionBases) ) { - deletionAlleles.add( Allele.create(deletionBases, true) ); - deletionAlleles.add( Allele.create(refByte, false) ); - proposedEvents.add(new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), deletionStart, deletionStart + elementLength, deletionAlleles).make()); + proposedEvents.add(new Event(refLoc.getContig(), deletionStart, Allele.create(deletionBases, true), Allele.create(refByte))); } } refPos += elementLength; @@ -161,12 +124,12 @@ protected void processCigarForInitialEvents(final int maxMnpDistance) { } final Allele refAllele = Allele.create(Arrays.copyOfRange(ref, refPos + start, refPos + end + 1), true); final Allele altAllele = Allele.create(Arrays.copyOfRange(alignment, alignmentPos + start, alignmentPos + end + 1), false); - VariantContext vc; - proposedEvents.add(vc = new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), refLoc.getStart() + refPos + start, refLoc.getStart() + refPos + end, Arrays.asList(refAllele, altAllele)).make()); - if ( haplotype.isCollapsed() ) { + final Event event = new Event(refLoc.getContig(), refLoc.getStart() + refPos + start, refAllele, altAllele); - vc.getCommonInfo().putAttribute(AssemblyBasedCallerUtils.EXT_COLLAPSED_TAG, "1"); + if ( haplotype.isCollapsed() ) { + event.setVariantAttribute(AssemblyBasedCallerUtils.EXT_COLLAPSED_TAG, "1"); } + proposedEvents.add(event); } // move refPos and alignmentPos forward to the end of this cigar element @@ -181,77 +144,45 @@ protected void processCigarForInitialEvents(final int maxMnpDistance) { throw new GATKException( "Unsupported cigar operator created during SW alignment: " + ce.getOperator() ); } } - - for ( final VariantContext proposedEvent : proposedEvents ) - addVC(proposedEvent, true); - } - - /** - * Add VariantContext vc to this map, merging events with the same start sites if necessary - * @param vc the variant context to add - */ - public void addVC(final VariantContext vc) { - addVC(vc, true); + return proposedEvents; } - /** - * Add VariantContext vc to this map - * @param vc the variant context to add - * @param merge should we attempt to merge it with an already existing element, or should we throw an error in that case? - */ - public void addVC(final VariantContext vc, final boolean merge) { - Utils.nonNull(vc); - if ( containsKey(vc.getStart()) ) { - Utils.validate(merge, () -> "Will not merge previously bound variant contexts as merge is false at " + vc); - final VariantContext prev = get(vc.getStart()); - put(vc.getStart(), makeBlock(prev, vc)); - } else - put(vc.getStart(), vc); + // add an event, combining it into a compound event if an event already exists at the same position + private void addEvent(final Event newEvent) { + Utils.nonNull(newEvent); + computeIfPresent(newEvent.getStart(), (pos, oldEvent) -> combineEvents(oldEvent, newEvent)); + putIfAbsent(newEvent.getStart(), newEvent); } /** - * Create a block substitution out of two variant contexts that start at the same position - * - * vc1 can be SNP, and vc2 can then be either a insertion or deletion. - * If vc1 is an indel, then vc2 must be the opposite type (vc1 deletion => vc2 must be an insertion) + * Combine two events with the same start into a single compound event. The resulting event will not be a SNP + * or a simple indel. * - * @param vc1 the first variant context we want to merge - * @param vc2 the second - * @return a block substitution that represents the composite substitution implied by vc1 and vc2 + * e1 can be SNP, and e2 can then be either a insertion or deletion. + * If e1 is an indel, then e2 must be the opposite type (e1 deletion => e2 must be an insertion) */ - protected VariantContext makeBlock(final VariantContext vc1, final VariantContext vc2) { - Utils.validateArg( vc1.getStart() == vc2.getStart(), () -> "vc1 and 2 must have the same start but got " + vc1 + " and " + vc2); - Utils.validateArg( vc1.isBiallelic(), "vc1 must be biallelic"); - if ( ! vc1.isSNP() ) { - Utils.validateArg ( (vc1.isSimpleDeletion() && vc2.isSimpleInsertion()) || (vc1.isSimpleInsertion() && vc2.isSimpleDeletion()), - () -> "Can only merge single insertion with deletion (or vice versa) but got " + vc1 + " merging with " + vc2); - } else { - Utils.validateArg(!vc2.isSNP(), () -> "vc1 is " + vc1 + " but vc2 is a SNP, which implies there's been some terrible bug in the cigar " + vc2); - } + protected static Event combineEvents(final Event e1, final Event e2) { + Utils.validateArg( e1.getStart() == e2.getStart(), "e1 and e2 must have the same start"); + + if ( e1.isSNP() || e2.isSNP()) { + Utils.validateArg(!(e1.isSNP() && e2.isSNP()), "Trying to put two overlapping SNPs in one EventMap. This could be a CIGAR bug."); + final Event snp = e1.isSNP() ? e1 : e2; + final Event indel = e1.isSNP() ? e2 : e1; - final Allele ref, alt; - final VariantContextBuilder b = new VariantContextBuilder(vc1); - if ( vc1.isSNP() ) { - // we have to repair the first base, so SNP case is special cased - if ( vc1.getReference().equals(vc2.getReference()) ) { - // we've got an insertion, so we just update the alt to have the prev alt - ref = vc1.getReference(); - alt = Allele.create(vc1.getAlternateAllele(0).getDisplayString() + vc2.getAlternateAllele(0).getDisplayString().substring(1), false); + // SNP + insertion. Example: A -> G (e1) + A -> CT (e2) = A -> GT + if ( snp.refAllele().equals(indel.refAllele()) ) { + return new Event(snp.getContig(), snp.getStart(), snp.refAllele(), Allele.create(snp.altAllele().getDisplayString() + indel.altAllele().getDisplayString().substring(1))); } else { - // we're dealing with a deletion, so we patch the ref - ref = vc2.getReference(); - alt = vc1.getAlternateAllele(0); - b.stop(vc2.getEnd()); + // SNP + deletion. Example: A -> T + AC -> A = AC -> T + return new Event(snp.getContig(), snp.getStart(), indel.refAllele(), snp.altAllele()); } - } else { - final VariantContext insertion = vc1.isSimpleInsertion() ? vc1 : vc2; - final VariantContext deletion = vc1.isSimpleInsertion() ? vc2 : vc1; - ref = deletion.getReference(); - alt = insertion.getAlternateAllele(0); - b.stop(deletion.getEnd()); + } else { // insertion + deletion. Example: AC -> A + A -> AGT = AC -> AGT + Utils.validateArg ( (e1.isSimpleDeletion() && e2.isSimpleInsertion()) || (e1.isSimpleInsertion() && e2.isSimpleDeletion()), + () -> "Can only merge single insertion with deletion (or vice versa)"); + final Event insertion = e1.isSimpleInsertion() ? e1 : e2; + final Event deletion = e1.isSimpleInsertion() ? e2 : e1; + return new Event(e1.getContig(), e1.getStart(), deletion.refAllele(), insertion.altAllele()); } - - return b.alleles(Arrays.asList(ref, alt)).make(); } /** @@ -266,7 +197,7 @@ public Set getStartPositions() { * Get the variant contexts in order of start position in this event map * @return */ - public Collection getVariantContexts() { + public Collection getEvents() { return values(); } @@ -281,15 +212,14 @@ public int getNumberOfEvents() { @Override public String toString() { final StringBuilder b = new StringBuilder("EventMap{"); - for ( final VariantContext vc : getVariantContexts() ) - b.append(String.format("%s:%d-%d %s,", vc.getContig(), vc.getStart(), vc.getEnd(), vc.getAlleles())); + for ( final Event event : getEvents() ) + b.append(String.format("%s:%d-%d %s,", event.getContig(), event.getStart(), event.getEnd(), Arrays.asList(event.refAllele(), event.altAllele()))); b.append("}"); return b.toString(); } /** - * Build event maps for each haplotype, returning the sorted set of all of the starting positions of all - * events across all haplotypes + * Build event maps for each haplotype * * @param haplotypes a list of haplotypes * @param ref the reference bases @@ -300,31 +230,23 @@ public String toString() { * are merged until a substitution is separated from the previous one by a greater distance. * That is, if maxMnpDistance = 1, substitutions at 10,11,12,14,15,17 are partitioned into a MNP * at 10-12, a MNP at 14-15, and a SNP at 17. May not be negative. - * @return a sorted set of start positions of all events among all haplotypes */ - public static TreeSet buildEventMapsForHaplotypes( final List haplotypes, + public static void buildEventMapsForHaplotypes( final Collection haplotypes, final byte[] ref, final Locatable refLoc, final boolean debug, final int maxMnpDistance) { ParamUtils.isPositiveOrZero(maxMnpDistance, "maxMnpDistance may not be negative."); - // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file - final TreeSet startPosKeySet = new TreeSet<>(); int hapNumber = 0; if( debug ) logger.info("=== Best Haplotypes ==="); for( final Haplotype h : haplotypes ) { //TODO h.recomputeAndSetEventMap() with overrides for the two haplotype classes should replace this casting+checking code here which is prone to error. - if (h.isPartiallyDetermined()) { - // Since PD haplotypes Know what allels are variants, simply ask it and generate the map that way. - h.setEventMap(new EventMap(((PartiallyDeterminedHaplotype) h).getDeterminedAlleles())); - } else { - // Walk along the alignment and turn any difference from the reference into an event - h.setEventMap(new EventMap(h, ref, refLoc, "HC" + hapNumber++, maxMnpDistance)); - } - startPosKeySet.addAll(h.getEventMap().getStartPositions()); - // Assert that all of the events discovered have 2 alleles - h.getEventMap().getVariantContexts().forEach(vc -> Utils.validate(vc.getAlleles().size() == 2, () -> "Error Haplotype event map Variant Context has too many alleles " + vc.getAlleles() + " for haplotype: " + h)); + + // Since PD haplotypes Know what alleles are variants, simply ask it and generate the map that way. + final EventMap events = h.isPartiallyDetermined() ? new EventMap(((PartiallyDeterminedHaplotype) h).getDeterminedAlleles()) : + fromHaplotype(h, ref, refLoc, maxMnpDistance); + h.setEventMap(events); if (debug) { logger.info(h.toString()); @@ -332,24 +254,28 @@ public static TreeSet buildEventMapsForHaplotypes( final List> Events = " + h.getEventMap()); } } + } - return startPosKeySet; + /** + * Return the sorted set of all of the starting positions of all events across all haplotypes + */ + public static TreeSet getEventStartPositions(final Collection haplotypes) { + final TreeSet result = new TreeSet<>(); + for( final Haplotype h : haplotypes ) { + Utils.nonNull(h.getEventMap(), "Haplotype event map has not been set"); + result.addAll(h.getEventMap().getStartPositions()); + } + return result; } /** * Returns any events in the map that overlap loc, including spanning deletions and events that start at loc. */ - public List getOverlappingEvents(final int loc) { - final List overlappingEvents = headMap(loc, true).values().stream().filter(v -> v.getEnd() >= loc).collect(Collectors.toList()); - final List deletionEventsEndingAtLoc = overlappingEvents.stream() - .filter(v -> v.isSimpleDeletion() && v.getEnd() == loc).collect(Collectors.toList()); - final boolean containsDeletionEndingAtLoc = deletionEventsEndingAtLoc.size() > 0; - final boolean containsInsertionAtLoc = overlappingEvents.stream().anyMatch(VariantContext::isSimpleInsertion); - if (containsDeletionEndingAtLoc && containsInsertionAtLoc){ - // We are at the end of a deletion and the start of an insertion; - // only the insertion should be kept in this case. - overlappingEvents.remove(deletionEventsEndingAtLoc.get(0)); - } - return overlappingEvents; + public List getOverlappingEvents(final int loc) { + final List overlappingEvents = headMap(loc, true).values().stream().filter(v -> v.getEnd() >= loc).collect(Collectors.toList()); + // if we're at the start of an insertion, exclude deletions that end here; otherwise keep everything + final Predicate filter = overlappingEvents.stream().anyMatch(Event::isSimpleInsertion) ? + v -> !(v.isSimpleDeletion() && v.getEnd() == loc) : v -> true; + return overlappingEvents.stream().filter(filter).collect(Collectors.toList()); } } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/haplotype/PartiallyDeterminedHaplotype.java b/src/main/java/org/broadinstitute/hellbender/utils/haplotype/PartiallyDeterminedHaplotype.java index 8ea5b1e0bd0..f2f50ce618a 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/haplotype/PartiallyDeterminedHaplotype.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/haplotype/PartiallyDeterminedHaplotype.java @@ -74,16 +74,16 @@ public byte[] getAlternateBases() { } private final byte[] alternateBases; - private final List constituentBuiltEvents; + private final List constituentBuiltEvents; // NOTE: we must store ALL of the determined events at this site (which is different than the constituent events, we expect the constituent // events for one of these objects to only be a single element) for the purposes of the overlapping reads PDHMM optimization. // At Multiallelic sites, we ultimately genotype all of the alleles at once. If we aren't careful, reads that only overlap some // alleles at a site will end up with incorrect/undetermined PDHMM scores for a subset of alleles in the genotyper which can // lead to false positives/poorly called sites. - private List allDeterminedEventsAtThisSite; + private List allDeterminedEventsAtThisSite; //TODO Eventually these will have to be refactored to support multiple determined alleles per PDHaplotype - private final VariantContext alleleBearingVariantContext; // NOTE this must be in both of the previous lists + private final Event alleleBearingEvent; // NOTE this must be in both of the previous lists private final long determinedPosition; private final boolean isDeterminedAlleleRef; @@ -106,13 +106,13 @@ public byte[] getAlternateBases() { * @param determinedPosition position (wrt the reference contig) that the haplotype should be considered determined //TODO this will be refactored to be a range of events in JointDetection * @param getAlignmentStartHapwrtRef alignment startHapwrtRef from baseHaplotype corresponding to the in-memory storage of reference bases (must be set for trimming/clipping ops to work) */ - public PartiallyDeterminedHaplotype(final Haplotype base, boolean isRefAllele, byte[] pdBytes, List constituentEvents, - VariantContext eventWithVariant, Cigar cigar, long determinedPosition, int getAlignmentStartHapwrtRef) { + public PartiallyDeterminedHaplotype(final Haplotype base, boolean isRefAllele, byte[] pdBytes, List constituentEvents, + final Event eventWithVariant, final Cigar cigar, long determinedPosition, int getAlignmentStartHapwrtRef) { super(base.getBases(), false, base.getAlignmentStartHapwrtRef(), cigar); this.setGenomeLocation(base.getGenomeLocation()); this.alternateBases = pdBytes; this.constituentBuiltEvents = constituentEvents; - this.alleleBearingVariantContext = eventWithVariant; + this.alleleBearingEvent = eventWithVariant; this.allDeterminedEventsAtThisSite = Collections.singletonList(eventWithVariant); this.determinedPosition = determinedPosition; this.isDeterminedAlleleRef = isRefAllele; @@ -135,7 +135,7 @@ public String toString() { String output = "HapLen:"+length() +", "+new String(getDisplayBases()); output = output + "\nUnresolved Bases["+alternateBases.length+"] "+Arrays.toString(alternateBases); return output + "\n"+getCigar().toString()+" "+ constituentBuiltEvents.stream() - .map(v ->(v==this.alleleBearingVariantContext ?"*":"")+getDRAGENDebugVariantContextString((int)getStartPosition()).apply(v) ) + .map(v ->(v==this.alleleBearingEvent ?"*":"")+getDRAGENDebugVariantContextString((int)getStartPosition()).apply(v.asVariantContext()) ) .collect(Collectors.joining("->")); } @@ -161,11 +161,11 @@ public int hashCode() { return Objects.hash(Arrays.hashCode(getBases()),Arrays.hashCode(alternateBases)); } - public List getDeterminedAlleles() { - return isDeterminedAlleleRef ? Collections.emptyList() : Collections.singletonList(alleleBearingVariantContext); + public List getDeterminedAlleles() { + return isDeterminedAlleleRef ? Collections.emptyList() : Collections.singletonList(alleleBearingEvent); } - public void setAllDeterminedEventsAtThisSite(List allDeterminedEventsAtThisSite) { + public void setAllDeterminedEventsAtThisSite(List allDeterminedEventsAtThisSite) { this.allDeterminedEventsAtThisSite = allDeterminedEventsAtThisSite; cachedExtent = null; } @@ -173,9 +173,9 @@ public void setAllDeterminedEventsAtThisSite(List allDeterminedE //NOTE: we never want the genotyper to handle reads that were not HMM scored, caching this extent helps keep us safe from messy sites public SimpleInterval getMaximumExtentOfSiteDeterminedAlleles() { if (cachedExtent == null) { - cachedExtent = new SimpleInterval(alleleBearingVariantContext); - for( VariantContext variantContext : allDeterminedEventsAtThisSite) { - cachedExtent = cachedExtent.mergeWithContiguous(variantContext); + cachedExtent = new SimpleInterval(alleleBearingEvent); + for( Event event : allDeterminedEventsAtThisSite) { + cachedExtent = cachedExtent.mergeWithContiguous(event); } } return cachedExtent; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/pileup/PileupBasedAlleles.java b/src/main/java/org/broadinstitute/hellbender/utils/pileup/PileupBasedAlleles.java index a2eec754330..d45dcd54a28 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/pileup/PileupBasedAlleles.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/pileup/PileupBasedAlleles.java @@ -1,21 +1,24 @@ package org.broadinstitute.hellbender.utils.pileup; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Streams; import htsjdk.samtools.*; import htsjdk.samtools.util.SequenceUtil; import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.VariantContext; -import htsjdk.variant.variantcontext.VariantContextBuilder; +import org.apache.commons.lang3.mutable.MutableInt; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.engine.AlignmentAndReferenceContext; import org.broadinstitute.hellbender.engine.AlignmentContext; import org.broadinstitute.hellbender.engine.ReferenceContext; -import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.PileupDetectionArgumentCollection; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.read.GATKRead; import java.util.*; +import java.util.stream.Collectors; /** @@ -27,12 +30,6 @@ public final class PileupBasedAlleles { final static String MISMATCH_BASES_PERCENTAGE_TAG = "MZ"; public static final double MISMATCH_BASES_PERCENTAGE_ADJUSMTENT = 1000.0; - // internal values to be used for tagging the VCF info fields appropriately - public static String PILEUP_ALLELE_SUPPORTING_READS = "PileupSupportingReads"; - public static String PILEUP_ALLELE_BAD_READS_TAG = "PileupSupportingBadReads"; - public static String PILEUP_ALLELE_ASSEMBLY_BAD_READS_TAG = "PileupAssemblyBadReads"; - public static String PILEUP_ALLELE_TOTAL_READS = "PileupAssemblyTotalReads"; - private final static int COUNT_IDX = 0; private final static int BAD_COUNT_IDX = 1; private final static int ASSEMBLY_BAD_COUNT_IDX = 2; @@ -52,9 +49,13 @@ public final class PileupBasedAlleles { * @param headerForReads Header for the reads (only necessary for SAM file conversion) * @return A list of variant context objects corresponding to potential variants that pass our heuristics. */ - public static List getPileupVariantContexts(final List alignmentAndReferenceContextList, final PileupDetectionArgumentCollection args, final SAMFileHeader headerForReads, final int minBaseQualityScore) { + public static Pair, Set> goodAndBadPileupEvents(final List alignmentAndReferenceContextList, final PileupDetectionArgumentCollection args, final SAMFileHeader headerForReads, final int minBaseQualityScore) { + if (!args.usePileupDetection) { + return ImmutablePair.of(Collections.emptySet(), Collections.emptySet()); + } - final List pileupVariantList = new ArrayList<>(); + final Set goodEvents = new HashSet<>(); + final Set badEvents = new HashSet<>(); // Iterate over every base for (int i = 0; i < alignmentAndReferenceContextList.size(); i++) { @@ -72,99 +73,73 @@ public static List getPileupVariantContexts(final List insertionCounts = new HashMap<>(); Map deletionCounts = new HashMap<>(); - Map altCounts = new HashMap<>(); + Map SNPCounts = new HashMap<>(); for (PileupElement element : pileup) { final byte eachBase = element.getBase(); // Subtract out low quality bases to mimic the reading active region determination //TODO this might need to also ignore the qual basees if (element.getQual() < minBaseQualityScore) { - numOfBases--; + pileupDepth.decrement(); } - // check to see that the base is not ref (and non-deletion) and increment the alt counts (and evaluate if the read is "bad") - if (refBase != eachBase && eachBase != 'D' && element.getQual() > args.qualityForSnpsInPileupDetection) { - incrementAltCount(eachBase, altCounts, - evaluateBadRead(element.getRead(), referenceContext, args, headerForReads), - evaluateBadReadForAssembly(element.getRead(), referenceContext, args, headerForReads)); - } + final boolean SNPFound = !onlyTrackDeletions && refBase != eachBase && eachBase != 'D' && element.getQual() > args.qualityForSnpsInPileupDetection; + final boolean insertionFound = !onlyTrackDeletions && args.detectIndels && element.isBeforeInsertion(); + final boolean deletionFound = args.detectIndels && element.isBeforeDeletionStart(); - //NOTE: we count indels - if (args.detectIndels) { - // now look for indels - if (element.isBeforeInsertion()) { - incrementInsertionCount(element.getBasesOfImmediatelyFollowingInsertion(), insertionCounts, - evaluateBadRead(element.getRead(), referenceContext, args, headerForReads), - evaluateBadReadForAssembly(element.getRead(), referenceContext, args, headerForReads)); - } - if (element.isBeforeDeletionStart()) { - incrementDeletionCount(element.getLengthOfImmediatelyFollowingIndel(), deletionCounts, - evaluateBadRead(element.getRead(), referenceContext, args, headerForReads), - evaluateBadReadForAssembly(element.getRead(), referenceContext, args, headerForReads)); + if (SNPFound || insertionFound || deletionFound) { + final boolean badPileup = badPileupRead(element.getRead(), args, headerForReads); + final boolean badAssembly = badAssemblyRead(element.getRead(), args); + if (SNPFound) { + incrementCounts(eachBase, SNPCounts, badPileup, badAssembly); } - } - } - - // Evaluate the detected SNP alleles for this site - if (!onlyTrackDeletions) { - for (Map.Entry allele : altCounts.entrySet()) { - List alleles = new ArrayList<>(); - alleles.add(Allele.create(referenceContext.getBase(), true)); - alleles.add(Allele.create(allele.getKey())); - final VariantContextBuilder pileupSNP = new VariantContextBuilder("pileup", alignmentContext.getContig(), alignmentContext.getStart(), alignmentContext.getEnd(), alleles); - pileupVariantList.add(pileupSNP - .attribute(PILEUP_ALLELE_SUPPORTING_READS, allele.getValue()[COUNT_IDX]) - .attribute(PILEUP_ALLELE_BAD_READS_TAG, allele.getValue()[BAD_COUNT_IDX]) - .attribute(PILEUP_ALLELE_ASSEMBLY_BAD_READS_TAG, allele.getValue()[ASSEMBLY_BAD_COUNT_IDX]) - .attribute(PILEUP_ALLELE_TOTAL_READS, numOfBases).make()); - } - } - if (args.detectIndels) { - // Evaluate the detected Insertions alleles for this site - if (!onlyTrackDeletions) { - for (Map.Entry allele : insertionCounts.entrySet()) { - List delAlleles = new ArrayList<>(); - delAlleles.add(Allele.create(referenceContext.getBase(), true)); - delAlleles.add(Allele.create((char) referenceContext.getBase() + allele.getKey())); - final VariantContextBuilder pileupInsertion = new VariantContextBuilder("pileup", alignmentContext.getContig(), alignmentContext.getStart(), alignmentContext.getEnd(), delAlleles); - pileupVariantList.add(pileupInsertion - .attribute(PILEUP_ALLELE_SUPPORTING_READS, allele.getValue()[COUNT_IDX]) - .attribute(PILEUP_ALLELE_BAD_READS_TAG, allele.getValue()[BAD_COUNT_IDX]) - .attribute(PILEUP_ALLELE_ASSEMBLY_BAD_READS_TAG, allele.getValue()[ASSEMBLY_BAD_COUNT_IDX]) - .attribute(PILEUP_ALLELE_TOTAL_READS, numOfBases).make()); + if (insertionFound) { + incrementCounts(element.getBasesOfImmediatelyFollowingInsertion(), insertionCounts, badPileup, badAssembly); } - } - // Evaluate the detected Deletions alleles for this site - for (Map.Entry allele : deletionCounts.entrySet()) { - List insAlleles = new ArrayList<>(); - insAlleles.add(Allele.create(referenceContext.getBase(), false)); - insAlleles.add(Allele.create(referenceContext.getBases( - new SimpleInterval(referenceContext.getContig(), - alignmentContext.getStart(), - alignmentContext.getEnd() + allele.getKey())), - true)); - final VariantContextBuilder pileupInsertion = new VariantContextBuilder("pileup", alignmentContext.getContig(), alignmentContext.getStart(), alignmentContext.getEnd() + allele.getKey(), insAlleles); - pileupVariantList.add(pileupInsertion - .attribute(PILEUP_ALLELE_SUPPORTING_READS, allele.getValue()[COUNT_IDX]) - .attribute(PILEUP_ALLELE_BAD_READS_TAG, allele.getValue()[BAD_COUNT_IDX]) - .attribute(PILEUP_ALLELE_ASSEMBLY_BAD_READS_TAG, allele.getValue()[ASSEMBLY_BAD_COUNT_IDX]) - .attribute(PILEUP_ALLELE_TOTAL_READS, numOfBases).make()); + if (deletionFound) { + incrementCounts(element.getLengthOfImmediatelyFollowingIndel(), deletionCounts, badPileup, badAssembly); + } } } + + final Map SNPEventsAndCounts = SNPCounts.entrySet().stream() + .collect(Collectors.toMap(entry -> new Event(contig, start, Allele.create(refBase, true), Allele.create(entry.getKey())), entry -> entry.getValue())); + final Map insertionEventsAndCounts = insertionCounts.entrySet().stream() + .collect(Collectors.toMap(entry -> new Event(contig, start, Allele.create(refBase, true), Allele.create((char) refBase + entry.getKey())), entry -> entry.getValue())); + final Map deletionEventsAndCounts = deletionCounts.entrySet().stream() + .collect(Collectors.toMap(entry -> new Event(contig, start, Allele.create(referenceContext.getBases(new SimpleInterval(contig, start, end + entry.getKey())), true), Allele.create(refBase)), entry -> entry.getValue())); + + Streams.concat(SNPEventsAndCounts.entrySet().stream(), insertionEventsAndCounts.entrySet().stream(), deletionEventsAndCounts.entrySet().stream()) + .forEach(eventAndCounts -> { + final Event event = eventAndCounts.getKey(); + final int[] counts = eventAndCounts.getValue(); + + if (passesPileupFilters(args, counts[COUNT_IDX], counts[BAD_COUNT_IDX], pileupDepth.intValue(), event.isIndel())) { + goodEvents.add(event); + } + + if (failsAssemblyFilters(args, counts[COUNT_IDX], counts[ASSEMBLY_BAD_COUNT_IDX])) { + badEvents.add(event); + } + }); } - return pileupVariantList; + return ImmutablePair.of(goodEvents, badEvents); } /** @@ -173,35 +148,15 @@ public static List getPileupVariantContexts(final List (pileupVariant.isIndel() ? args.indelThreshold : args.snpThreshold) - && totalDepth >= args.pileupAbsoluteDepth - && ((args.badReadThreshold <= 0.0) || (double) pileupVariant.getAttributeAsInt(PILEUP_ALLELE_BAD_READS_TAG,0) / (double)pileupSupport <= args.badReadThreshold); + private static boolean passesPileupFilters(final PileupDetectionArgumentCollection args, final int pileupSupport, final int pileupBadReads, final int pileupDepth, final boolean isIndel) { + return ((double) pileupSupport / (double) pileupDepth) > (isIndel ? args.indelThreshold : args.snpThreshold) + && pileupDepth >= args.pileupAbsoluteDepth + && ((args.badReadThreshold <= 0.0) || (double) pileupBadReads / (double)pileupSupport <= args.badReadThreshold); } // TODO this is the most sketchy one... does a variant that fails pileup calling with only one bad read as support count as garbage by this tool... - public static boolean shouldFilterAssemblyVariant(final PileupDetectionArgumentCollection args, final VariantContext pileupVariant) { - //Validation that this VC is the correct object type - validatePileupVariant(pileupVariant); - final int pileupSupport = pileupVariant.getAttributeAsInt(PILEUP_ALLELE_SUPPORTING_READS, 0); - final int assemblyBadReads = pileupVariant.getAttributeAsInt(PILEUP_ALLELE_ASSEMBLY_BAD_READS_TAG, 0); - return ((args.assemblyBadReadThreshold > 0.0) && (double) assemblyBadReads / (double) pileupSupport >= args.assemblyBadReadThreshold); - } - - private static void validatePileupVariant(final VariantContext pileupVariant) { - Utils.nonNull(pileupVariant); - if (pileupVariant.getAlleles().size() != 2 || - !pileupVariant.hasAttribute(PILEUP_ALLELE_ASSEMBLY_BAD_READS_TAG) || - !pileupVariant.hasAttribute(PILEUP_ALLELE_SUPPORTING_READS) || - !pileupVariant.hasAttribute(PILEUP_ALLELE_TOTAL_READS) || - !pileupVariant.hasAttribute(PILEUP_ALLELE_BAD_READS_TAG)) { - throw new GATKException.ShouldNeverReachHereException("The supplied Variant Context "+pileupVariant.toString()+" is not a PileupVariantContext"); - } + private static boolean failsAssemblyFilters(final PileupDetectionArgumentCollection args, final int pileupSupport, final int assemblyBadReads) { + return (args.assemblyBadReadThreshold > 0.0) && (double) assemblyBadReads / (double) pileupSupport >= args.assemblyBadReadThreshold; } /** @@ -213,13 +168,12 @@ private static void validatePileupVariant(final VariantContext pileupVariant) { * - Reads 2 std deviations away from the standard insert size are bad (not implemented) * * @param read - * @param referenceContext * @param args * @param headerForRead TODO get rid of this sam record conversion * @return true if any of the "badness" heuristics suggest we should consider the read suspect, false otherwise. */ @VisibleForTesting - static boolean evaluateBadRead(final GATKRead read, final ReferenceContext referenceContext, final PileupDetectionArgumentCollection args, final SAMFileHeader headerForRead) { + static boolean badPileupRead(final GATKRead read, final PileupDetectionArgumentCollection args, final SAMFileHeader headerForRead) { if (args.badReadThreshold <= 0.0) { return false; } @@ -252,7 +206,7 @@ static boolean evaluateBadRead(final GATKRead read, final ReferenceContext refer } @VisibleForTesting - static boolean evaluateBadReadForAssembly(final GATKRead read, final ReferenceContext referenceContext, final PileupDetectionArgumentCollection args, final SAMFileHeader headerForRead) { + static boolean badAssemblyRead(final GATKRead read, final PileupDetectionArgumentCollection args) { if (args.assemblyBadReadThreshold <= 0.0) { return false; } @@ -261,18 +215,11 @@ static boolean evaluateBadReadForAssembly(final GATKRead read, final ReferenceCo return (read.getAttributeAsInteger(MISMATCH_BASES_PERCENTAGE_TAG) / MISMATCH_BASES_PERCENTAGE_ADJUSMTENT) > args.assemblyBadReadEditDistance; } - // Helper methods to manage the badness counting arrays - private static void incrementInsertionCount(String insertion, Map insertionCounts, boolean bad, boolean assemblybad){ - int[] values = insertionCounts.computeIfAbsent(insertion, (i) -> new int[3] ); - values[COUNT_IDX]+=1; values[BAD_COUNT_IDX]+=bad?1:0; values[ASSEMBLY_BAD_COUNT_IDX]+=assemblybad?1:0; - } - private static void incrementDeletionCount(Integer deletion, Map deletionCounts, boolean bad, boolean assemblybad){ - int[] values = deletionCounts.computeIfAbsent(deletion, (i) -> new int[3] ); - values[COUNT_IDX]+=1; values[BAD_COUNT_IDX]+=bad?1:0; values[ASSEMBLY_BAD_COUNT_IDX]+=assemblybad?1:0; - } - private static void incrementAltCount(byte base, Map altCounts, boolean bad, boolean assemblybad){ - int[] values = altCounts.computeIfAbsent(base, (i) -> new int[3] ); - values[COUNT_IDX]+=1; values[BAD_COUNT_IDX]+=bad?1:0; values[ASSEMBLY_BAD_COUNT_IDX]+=assemblybad?1:0; + // Helper method to manage the badness counting arrays + // T is a Byte for a SNP (alt base), String for insertion (inserted bases), Integer for deletion (deletion length) + private static void incrementCounts(T altAllele, Map altCounts, boolean pileupBad, boolean assemblyBad){ + final int[] values = altCounts.computeIfAbsent(altAllele, (i) -> new int[3] ); + values[COUNT_IDX]+=1; values[BAD_COUNT_IDX]+=pileupBad?1:0; values[ASSEMBLY_BAD_COUNT_IDX]+=assemblyBad?1:0; } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java index c02d90a0958..f78324e8969 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java @@ -25,6 +25,7 @@ import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypesCache; import org.broadinstitute.hellbender.utils.*; import org.broadinstitute.hellbender.utils.genotyper.GenotypePriorCalculator; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.param.ParamUtils; import org.broadinstitute.hellbender.utils.pileup.PileupElement; import org.broadinstitute.hellbender.utils.read.AlignmentUtils; @@ -715,10 +716,12 @@ private static int getMinBaseQualityForAlleleInRead(final PileupElement pileupEl } public static boolean containsInlineIndel(final VariantContext vc) { - final List alleles = vc.getAlleles(); - final int refLength = alleles.get(0).length(); - for (int i = 1; i < alleles.size(); i++) { - final Allele alt = alleles.get(i); + return containsInlineIndel(vc.getReference(), vc.getAlternateAlleles()); + } + + public static boolean containsInlineIndel(final Allele ref, final Collection altAlleles) { + final int refLength = ref.length(); + for (final Allele alt : altAlleles) { if (!alt.isSymbolic() && alt != Allele.SPAN_DEL && alt.length() != refLength) { return true; } @@ -810,21 +813,21 @@ public static boolean isTandemRepeat(final VariantContext vc, final byte[] refBa * the padding base. In this case, {@code refBasesStartingAtVCWithoutPad} is CCACCACCAGTCGA. * @return */ - public static Pair, byte[]> getNumTandemRepeatUnits(final VariantContext vc, final byte[] refBasesStartingAtVCWithoutPad) { - Utils.nonNull(vc); + public static Pair, byte[]> getNumTandemRepeatUnits(final Allele refAllele, final List altAlleles, final byte[] refBasesStartingAtVCWithoutPad) { + Utils.nonNull(refAllele); + Utils.nonNull(altAlleles); Utils.nonNull(refBasesStartingAtVCWithoutPad); - if ( ! vc.isIndel() ){ // only indels are tandem repeats + if ( altAlleles.stream().allMatch(a -> a.length() == refAllele.length()) ){ // only indels are tandem repeats return null; } - final Allele refAllele = vc.getReference(); final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length()); byte[] repeatUnit = null; final List lengths = new ArrayList<>(); - for ( final Allele allele : vc.getAlternateAlleles() ) { + for ( final Allele allele : altAlleles ) { Pair result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad); final int[] repetitionCount = result.getLeft(); @@ -843,6 +846,10 @@ public static Pair, byte[]> getNumTandemRepeatUnits(final VariantC return new MutablePair<>(lengths,repeatUnit); } + public static Pair, byte[]> getNumTandemRepeatUnits(final VariantContext vc, final byte[] refBasesStartingAtVCWithoutPad) { + return getNumTandemRepeatUnits(vc.getReference(), vc.getAlternateAlleles(), refBasesStartingAtVCWithoutPad); + } + public static Pair getNumTandemRepeatUnits(final byte[] refBases, final byte[] altBases, final byte[] remainingRefContext) { /* we can't exactly apply same logic as in basesAreRepeated() to compute tandem unit and number of repeated units. Consider case where ref =ATATAT and we have an insertion of ATAT. Natural description is (AT)3 -> (AT)2. @@ -1250,8 +1257,7 @@ public static AlleleMapper resolveIncompatibleAlleles(final Allele refAllele, fi } } - public static Map createAlleleMapping(final Allele refAllele, - final VariantContext oneVC) { + public static Map createAlleleMapping(final Allele refAllele, final VariantContext oneVC) { return createAlleleMapping(refAllele, oneVC.getReference(), oneVC.getAlternateAlleles()); } @@ -1273,8 +1279,7 @@ public static Map createAlleleMapping(final Allele refAllele, * @param inputAlts the alternate alleles that may need to be extended * @return a non-null mapping of original alleles to new (extended) ones */ - public static Map createAlleleMapping(final Allele refAllele, - final Allele inputRef, final List inputAlts) { + public static Map createAlleleMapping(final Allele refAllele, final Allele inputRef, final List inputAlts) { Utils.validate( refAllele.length() >= inputRef.length(), () -> "BUG: inputRef="+inputRef+" is longer than refAllele="+refAllele); // frequent simple case where there is already a common reference @@ -1654,10 +1659,11 @@ public static List> splitAttributesIntoPerAlleleLists(Varian * @return a list of bi-allelic (or monomorphic) variant context */ public static List splitVariantContextToBiallelics(final VariantContext vc, final boolean trimLeft, final GenotypeAssignmentMethod genotypeAssignmentMethod, - final boolean keepOriginalChrCounts) { + final boolean keepOriginalChrCounts) { Utils.nonNull(vc); - - if (!vc.isVariant() || vc.isBiallelic()) + if (!vc.isVariant()) { + return Collections.emptyList(); + } else if (vc.isBiallelic()) // non variant or biallelics already satisfy the contract return Collections.singletonList(vc); else { @@ -1695,6 +1701,29 @@ public static List splitVariantContextToBiallelics(final Variant } } + /** + * Split variant context into its biallelic components if there are more than 2 alleles + *

+ * For VC has A/B/C alleles, returns A/B and A/C contexts. + * Alleles are right trimmed to satisfy VCF conventions + *

+ * If vc is biallelic or non-variant it is just returned + *

+ * Chromosome counts are updated (but they are by definition 0) + * + * @param vc a potentially multi-allelic variant context + * @param trimLeft if true, we will also left trim alleles, potentially moving the resulting vcs forward on the genome + * @param genotypeAssignmentMethod assignment strategy for the (subsetted) PLs + * @param keepOriginalChrCounts keep the orignal chromosome counts before subsetting + * @return a list of bi-allelic (or monomorphic) variant context + */ + public static List splitVariantContextToEvents(final VariantContext vc, final boolean trimLeft, final GenotypeAssignmentMethod genotypeAssignmentMethod, + final boolean keepOriginalChrCounts) { + + return splitVariantContextToBiallelics(vc, trimLeft, genotypeAssignmentMethod, keepOriginalChrCounts).stream() + .map(Event::of).collect(Collectors.toList()); + } + /** * Check if any of the genotypes is heterozygous, non-reference (i.e. 1/2) * @@ -1709,59 +1738,6 @@ private static boolean hasHetNonRef(final GenotypesContext genotypesContext) { return false; } - /** - * Splits the alleles for the provided variant context into its primitive parts. - * Requires that the input VC be bi-allelic, so calling methods should first call splitVariantContextToBiallelics() if needed. - * Currently works only for MNPs. - * - * @param vc the non-null VC to split - * @return a non-empty list of VCs split into primitive parts or the original VC otherwise - */ - public static List splitIntoPrimitiveAlleles(final VariantContext vc) { - Utils.nonNull(vc); - if ( !vc.isBiallelic() ) { - throw new IllegalArgumentException("Trying to break a multi-allelic Variant Context into primitive parts"); - } - - // currently only works for MNPs - if ( !vc.isMNP() ) - return Arrays.asList(vc); - - final byte[] ref = vc.getReference().getBases(); - final byte[] alt = vc.getAlternateAllele(0).getBases(); - - Utils.validate(ref.length == alt.length, "ref and alt alleles for MNP have different lengths"); - - final List result = new ArrayList<>(ref.length); - - for ( int i = 0; i < ref.length; i++ ) { - - // if the ref and alt bases are different at a given position, create a new SNP record (otherwise do nothing) - if ( ref[i] != alt[i] ) { - - // create the ref and alt SNP alleles - final Allele newRefAllele = Allele.create(ref[i], true); - final Allele newAltAllele = Allele.create(alt[i], false); - - // create a new VariantContext with the new SNP alleles - final VariantContextBuilder newVC = new VariantContextBuilder(vc).start(vc.getStart() + i).stop(vc.getStart() + i).alleles(Arrays.asList(newRefAllele, newAltAllele)); - - // create new genotypes with updated alleles - final Map alleleMap = new LinkedHashMap<>(); - alleleMap.put(vc.getReference(), newRefAllele); - alleleMap.put(vc.getAlternateAllele(0), newAltAllele); - final GenotypesContext newGenotypes = updateGenotypesWithMappedAlleles(vc.getGenotypes(), new AlleleMapper(alleleMap)); - - result.add(newVC.genotypes(newGenotypes).make()); - } - } - - if ( result.isEmpty() ) - result.add(vc); - - return result; - } - /** * Add chromosome counts (AC, AN and AF) to the VCF header lines * @@ -2091,37 +2067,14 @@ public static int[] matchAllelesOnly(final VariantContext variant1, final Varian Utils.nonNull(variant1); Utils.nonNull(variant2); - // Grab the trivial case: - if (variant1.isBiallelic() && variant2.isBiallelic()) { - if (variant1.getAlternateAllele(0).equals(variant2.getAlternateAllele(0)) && - (variant1.getReference().equals(variant2.getReference()))) { - return new int[]{0}; - } else { - return new int[]{-1}; - } - } - - // Handle the case where one or both of the input VCs are not biallelic. - final int[] result = new int[variant1.getAlternateAlleles().size()]; + // First split and trim all variant contexts into events + final List events1 = simpleSplitIntoBiallelics(variant1); + final List events2 = simpleSplitIntoBiallelics(variant2); - // First split (and trim) all variant contexts into biallelics. We are only going ot be interested in the alleles. - final List splitVariants1 = simpleSplitIntoBiallelics(variant1); - final List splitVariants2 = simpleSplitIntoBiallelics(variant2); - - // Second, match on ref and alt. If match occurs add it to the output list. - for (int i = 0; i < splitVariants1.size(); i++) { - result[i] = -1; - for (int j = 0; j < splitVariants2.size(); j++) { - final VariantContext splitVariant1 = splitVariants1.get(i); - final VariantContext splitVariant2 = splitVariants2.get(j); - if (splitVariant1.getAlternateAllele(0).equals(splitVariant2.getAlternateAllele(0)) - && splitVariant1.getReference().equals(splitVariant2.getReference())) { - result[i] = j; - } - } - } - - return result; + // result[i] = j such that variant1's ith alt and variant2's jth alt are equivalent, or -1 if no such j exists + return IntStream.range(0, events1.size()) + .map(i -> IntStream.range(0, events2.size()).filter(j -> events1.get(i).equals(events2.get(j))).findFirst().orElse(-1)) + .toArray(); } /** @@ -2137,22 +2090,20 @@ public static int[] matchAllelesOnly(final VariantContext variant1, final Varian * Note that the variant contexts are usually stripped of attributes and genotypes. Never {@code null}. Empty list * if variant context has no alt alleles. */ - private static List simpleSplitIntoBiallelics(final VariantContext vc) { + private static List simpleSplitIntoBiallelics(final VariantContext vc) { Utils.nonNull(vc); - final List result = new ArrayList<>(); + final List result = new ArrayList<>(); if (vc.isBiallelic()) { - return Collections.singletonList(vc); + return Collections.singletonList(Event.of(vc)); } else { // Since variant context builders are slow to keep re-creating. Just create one and spew variant contexts from it, since // the only difference will be the alternate allele. Initialize the VCB with a dummy alternate allele, // since it will be overwritten in all cases. final VariantContextBuilder vcb = new VariantContextBuilder("SimpleSplit", vc.getContig(), vc.getStart(), vc.getEnd(), Arrays.asList(vc.getReference(), Allele.NO_CALL)); - vc.getAlternateAlleles().forEach(allele -> result.add(GATKVariantContextUtils.trimAlleles( - vcb.alleles(Arrays.asList(vc.getReference(), allele)).make(true), true, true) - ) - ); + vc.getAlternateAlleles().forEach(allele -> result.add(Event.of(GATKVariantContextUtils.trimAlleles( + vcb.alleles(Arrays.asList(vc.getReference(), allele)).make(true), true, true)))); } return result; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java index 0f0596942b4..c57dcc343c0 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java @@ -24,8 +24,7 @@ public void testNoNeedToFilter(){ final byte[] fullReferenceWithPadding = "CATGCATG".getBytes(); Haplotype haplotype = new Haplotype(fullReferenceWithPadding, true, 0, TextCigarCodec.decode("8M")); haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); - haplotype.setEventMap(new EventMap(haplotype, fullReferenceWithPadding, - new SimpleInterval("chr", (int)haplotype.getStartPosition(),(int)haplotype.getStopPosition()), "test", 0)); + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); @@ -33,8 +32,7 @@ public void testNoNeedToFilter(){ haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); haplotypeList.add(haplotype); - haplotype.setEventMap(new EventMap(haplotype, fullReferenceWithPadding, - new SimpleInterval("chr", (int)haplotype.getStartPosition(), (int)haplotype.getStopPosition()), "test", 0)); + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); AlleleList haplotypes = new IndexedAlleleList<>(haplotypeList); SampleList samples = new IndexedSampleList(Arrays.asList("sm1")); @@ -75,22 +73,19 @@ public void testFilterCloseMis(){ final byte[] fullReferenceWithPadding = "CATGCATG".getBytes(); Haplotype haplotype = new Haplotype(fullReferenceWithPadding, true, 0, TextCigarCodec.decode("8M")); haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); - haplotype.setEventMap(new EventMap(haplotype, fullReferenceWithPadding, - new SimpleInterval("chr", (int)haplotype.getStartPosition(),(int)haplotype.getStopPosition()), "test", 0)); + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGGCATG".getBytes(), false, 0, TextCigarCodec.decode("8M")); haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); - haplotype.setEventMap(new EventMap(haplotype, fullReferenceWithPadding, - new SimpleInterval("chr", (int)haplotype.getStartPosition(), (int)haplotype.getStopPosition()), "test", 0)); + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGTCATG".getBytes(), false, 0, TextCigarCodec.decode("8M")); haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); - haplotype.setEventMap(new EventMap(haplotype, fullReferenceWithPadding, - new SimpleInterval("chr", (int)haplotype.getStartPosition(), (int)haplotype.getStopPosition()), "test", 0)); + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); AlleleList haplotypes = new IndexedAlleleList<>(haplotypeList); @@ -136,22 +131,19 @@ public void testFilterDistantHindel(){ final byte[] fullReferenceWithPadding = "CATGCATG".getBytes(); Haplotype haplotype = new Haplotype(fullReferenceWithPadding, true, 0, TextCigarCodec.decode("8M")); haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); - haplotype.setEventMap(new EventMap(haplotype, fullReferenceWithPadding, - new SimpleInterval("chr", (int)haplotype.getStartPosition(),(int)haplotype.getStopPosition()), "test", 0)); + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGGCATG".getBytes(), false, 0, TextCigarCodec.decode("8M")); haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); - haplotype.setEventMap(new EventMap(haplotype, fullReferenceWithPadding, - new SimpleInterval("chr", (int)haplotype.getStartPosition(), (int)haplotype.getStopPosition()), "test", 0)); + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGGCATTG".getBytes(), false, 0, TextCigarCodec.decode("7M1I1M")); haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 109)); - haplotype.setEventMap(new EventMap(haplotype, fullReferenceWithPadding, - new SimpleInterval("chr", (int)haplotype.getStartPosition(), (int)haplotype.getStopPosition()), "test", 0)); + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); AlleleList haplotypes = new IndexedAlleleList<>(haplotypeList); @@ -197,22 +189,19 @@ public void testNotFilterDistantM(){ final byte[] fullReferenceWithPadding = "CATGCATG".getBytes(); Haplotype haplotype = new Haplotype(fullReferenceWithPadding, true, 0, TextCigarCodec.decode("8M")); haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); - haplotype.setEventMap(new EventMap(haplotype, fullReferenceWithPadding, - new SimpleInterval("chr", (int)haplotype.getStartPosition(),(int)haplotype.getStopPosition()), "test", 0)); + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGGCATG".getBytes(), false, 0, TextCigarCodec.decode("8M")); haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); - haplotype.setEventMap(new EventMap(haplotype, fullReferenceWithPadding, - new SimpleInterval("chr", (int)haplotype.getStartPosition(), (int)haplotype.getStopPosition()), "test", 0)); + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CATGCATC".getBytes(), false, 0, TextCigarCodec.decode("8M")); haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); - haplotype.setEventMap(new EventMap(haplotype, fullReferenceWithPadding, - new SimpleInterval("chr", (int)haplotype.getStartPosition(), (int)haplotype.getStopPosition()), "test", 0)); + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); AlleleList haplotypes = new IndexedAlleleList<>(haplotypeList); @@ -259,15 +248,13 @@ public void testNotFilterLoneWeakAllele(){ final byte[] fullReferenceWithPadding = "CATGCATG".getBytes(); Haplotype haplotype = new Haplotype(fullReferenceWithPadding, true, 0, TextCigarCodec.decode("8M")); haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); - haplotype.setEventMap(new EventMap(haplotype, fullReferenceWithPadding, - new SimpleInterval("chr", (int)haplotype.getStartPosition(),(int)haplotype.getStopPosition()), "test", 0)); + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGGCATG".getBytes(), false, 0, TextCigarCodec.decode("8M")); haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); - haplotype.setEventMap(new EventMap(haplotype, fullReferenceWithPadding, - new SimpleInterval("chr", (int)haplotype.getStartPosition(), (int)haplotype.getStopPosition()), "test", 0)); + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); AlleleList haplotypes = new IndexedAlleleList<>(haplotypeList); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtilsUnitTest.java index fae99f3a587..14b8aecd24b 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtilsUnitTest.java @@ -5,6 +5,7 @@ import htsjdk.samtools.util.Locatable; import htsjdk.variant.variantcontext.*; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.mutable.MutableInt; import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.gatk.nativebindings.smithwaterman.SWParameters; @@ -14,6 +15,7 @@ import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.genotyper.*; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.EventMap; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils; @@ -358,96 +360,6 @@ public void testAnnotateReadLikelihoodsWithSupportedAlleles(List activeAllelesToGenotype, - final List expectedVcsAtThisLocation) { - - final List vcsAtThisPosition = getVariantContextsFromGivenAlleles(loc, activeAllelesToGenotype, true); - Assert.assertEquals(vcsAtThisPosition.size(), expectedVcsAtThisLocation.size()); - for (int i = 0; i < expectedVcsAtThisLocation.size(); i++) { - VariantContextTestUtils.assertVariantContextsAreEqual(vcsAtThisPosition.get(i), expectedVcsAtThisLocation.get(i), new ArrayList<>(), Collections.emptyList()); - Assert.assertEquals(vcsAtThisPosition.get(i).getSource(), expectedVcsAtThisLocation.get(i).getSource()); - } - } - - @DataProvider(name = "getVariantContextsFromGivenAlleles") - public Object[][] getVcsAtThisLocationFromGivenAllelesData() { - final List tests = new ArrayList<>(); - - tests.add(new Object[]{1000, new ArrayList<>(), new ArrayList<>()}); - - final Haplotype snpHaplotype = new Haplotype("ACTGGTCAACTGGTCAACTGGTCAACTGGTCA".getBytes()); - final List snpAlleles = Arrays.asList(Allele.create("A", true), Allele.create("G")); - final VariantContextBuilder snpVCBuilder = new VariantContextBuilder("a", "20", 1000, 1000, snpAlleles); - final VariantContext snpVc = snpVCBuilder.make(); - snpHaplotype.setEventMap(new EventMap(Arrays.asList(snpVc))); - - // this one matches the snp haplotype above (to test duplicate removal) - final Haplotype snpHaplotypeDuplicate = new Haplotype("ACTGGTCAACTGGTCAACTGGTCAACTGGACA".getBytes()); - final List snpAlleles2 = Arrays.asList(Allele.create("A", true), Allele.create("G")); - final VariantContextBuilder svpVC2Builder = new VariantContextBuilder("a", "20", 1000, 1000, snpAlleles2); - final VariantContext snpVc2 = svpVC2Builder.make(); - final List snpAlleles3 = Arrays.asList(Allele.create("T", true), Allele.create("A")); - final VariantContextBuilder snpVC3Builder = new VariantContextBuilder("a", "20", 1020, 1020, snpAlleles3); - final VariantContext snpVc3 = snpVC3Builder.make(); - snpHaplotypeDuplicate.setEventMap(new EventMap(Arrays.asList(snpVc2, snpVc3))); - - - final Haplotype deletionHaplotype = new Haplotype("ACTGGTCAGGTCAACTGGTCA".getBytes()); - final List deletionAlleles = Arrays.asList(Allele.create("ACTGGTCAACT", true), Allele.create("A")); - final VariantContextBuilder deletionVCBuilder = new VariantContextBuilder("a", "20", 995, 1005, deletionAlleles); - final VariantContext deletionVc = deletionVCBuilder.make(); - deletionHaplotype.setEventMap(new EventMap(Arrays.asList(deletionVc))); - - // matches the deletion alleles above but at a different position (to catch an edge case in duplicate removal) - final Haplotype deletionHaplotypeFalseDuplicate = new Haplotype("ACTGGTCAGGTCAACTGGTCA".getBytes()); - final List deletionAllelesFalseDuplicate = Arrays.asList(Allele.create("ACTGGTCAACT", true), Allele.create("A")); - final VariantContextBuilder deletionFalseDuplicateBuilder = new VariantContextBuilder("a", "20", 998, 1008, deletionAllelesFalseDuplicate); - final VariantContext deletionVcFalseDuplicate = deletionFalseDuplicateBuilder.make(); - deletionHaplotypeFalseDuplicate.setEventMap(new EventMap(Arrays.asList(deletionVcFalseDuplicate))); - - // doesn't overlap 1000 - final Haplotype deletionHaplotypeNoSpan = new Haplotype("CAACTGGTCAACTGGTCAACTGGTCAACTGGTCAACTGGTCA".getBytes()); - final List deletionAllelesNoSpan = Arrays.asList(Allele.create("GTCAA", true), Allele.create("G")); - final VariantContextBuilder deletionVcNoSpanBuilder = new VariantContextBuilder("a", "20", 990, 994, deletionAllelesNoSpan); - final VariantContext deletionVcNoSpan = deletionVcNoSpanBuilder.make(); - deletionHaplotypeNoSpan.setEventMap(new EventMap(Arrays.asList(deletionVcNoSpan))); - - final Haplotype sameLocDelHap1 = new Haplotype("AAAAAAAGAAA".getBytes()); - final List sameLocDelAlleles1 = Arrays.asList(Allele.create("GTT", true), Allele.create("G")); - final VariantContext sameLocDelVc1 = new VariantContextBuilder("a", "20", 10093568, 10093570, sameLocDelAlleles1).make(); - sameLocDelHap1.setEventMap(new EventMap(Arrays.asList(sameLocDelVc1))); - - final Haplotype sameLocDelHap2 = new Haplotype("AAAAAAAGTAAA".getBytes()); - final List sameLocDelAlleles2 = Arrays.asList(Allele.create("GT", true), Allele.create("G")); - final VariantContext sameLocDelVc2 = new VariantContextBuilder("a", "20", 10093568, 10093569, sameLocDelAlleles2).make(); - sameLocDelHap2.setEventMap(new EventMap(Arrays.asList(sameLocDelVc2))); - - final Haplotype sameLocInsHap1 = new Haplotype("AAAAAAAGTTTAAA".getBytes()); - final List sameLocInsAlleles1 = Arrays.asList(Allele.create("G", true), Allele.create("GT")); - final VariantContext sameLocInsVc1 = new VariantContextBuilder("a", "20", 10093568, 10093568, sameLocInsAlleles1).make(); - sameLocInsHap1.setEventMap(new EventMap(Arrays.asList(sameLocInsVc1))); - - final VariantContextBuilder deletionVCBuilderWithGts = new VariantContextBuilder("a", "20", 995, 1005, deletionAlleles) - .genotypes(new GenotypeBuilder("TEST", Arrays.asList(deletionAlleles.get(0), deletionAlleles.get(1))).make()); - final VariantContext deletionVcWithGts = deletionVCBuilderWithGts.make(); - - tests.add(new Object[]{1000, Arrays.asList(snpVc), Arrays.asList(snpVCBuilder.source("Comp0Allele0").make())}); - tests.add(new Object[]{995, Arrays.asList(deletionVc), Arrays.asList(deletionVCBuilder.source("Comp0Allele0").make())}); - tests.add(new Object[]{1000, Arrays.asList(deletionVc), Arrays.asList(deletionVCBuilder.source("Comp0Allele0").make())}); - tests.add(new Object[]{1000, Arrays.asList(deletionVc, snpVc), - Arrays.asList(deletionVCBuilder.source("Comp0Allele0").make(), snpVCBuilder.source("Comp1Allele0").make())}); - tests.add(new Object[]{1000, Arrays.asList(deletionVc, deletionVcNoSpan), Arrays.asList(deletionVCBuilder.source("Comp0Allele0").make())}); - tests.add(new Object[]{1000, Arrays.asList(deletionVc, deletionVcFalseDuplicate, deletionVcNoSpan), - Arrays.asList(deletionVCBuilder.source("Comp0Allele0").make(), deletionFalseDuplicateBuilder.source("Comp1Allele0").make())}); - - tests.add(new Object[]{1000, Arrays.asList(deletionVcWithGts, snpVc), - Arrays.asList(deletionVCBuilder.source("Comp0Allele0").make(), snpVCBuilder.source("Comp1Allele0").make())}); - - return tests.toArray(new Object[][]{}); - } - @DataProvider(name = "getVariantContextsFromActiveHaplotypes") public Object[][] getVariantContextsFromActiveHaplotypesData() { final List tests = new ArrayList<>(); @@ -455,67 +367,52 @@ public Object[][] getVariantContextsFromActiveHaplotypesData() { tests.add(new Object[]{new ArrayList<>(), 1000, new ArrayList<>()}); final Haplotype snpHaplotype = new Haplotype("ACTGGTCAACTGGTCAACTGGTCAACTGGTCA".getBytes()); - final List snpAlleles = Arrays.asList(Allele.create("A", true), Allele.create("G")); - final VariantContextBuilder snpVCBuilder = new VariantContextBuilder("a", "20", 1000, 1000, snpAlleles); - final VariantContext snpVc = snpVCBuilder.make(); - snpHaplotype.setEventMap(new EventMap(Arrays.asList(snpVc))); + final Event snpEvent = new Event("20", 1000, Allele.create("A", true), Allele.create("G")); + snpHaplotype.setEventMap(EventMap.of(snpEvent)); // this one matches the snp haplotype above (to test duplicate removal) final Haplotype snpHaplotypeDuplicate = new Haplotype("ACTGGTCAACTGGTCAACTGGTCAACTGGACA".getBytes()); - final List snpAlleles2 = Arrays.asList(Allele.create("A", true), Allele.create("G")); - final VariantContextBuilder svpVC2Builder = new VariantContextBuilder("a", "20", 1000, 1000, snpAlleles2); - final VariantContext snpVc2 = svpVC2Builder.make(); - final List snpAlleles3 = Arrays.asList(Allele.create("T", true), Allele.create("A")); - final VariantContextBuilder snpVC3Builder = new VariantContextBuilder("a", "20", 1020, 1020, snpAlleles3); - final VariantContext snpVc3 = snpVC3Builder.make(); - snpHaplotypeDuplicate.setEventMap(new EventMap(Arrays.asList(snpVc2, snpVc3))); + final Event snpEvent2 = new Event("20", 1000, Allele.create("A", true), Allele.create("G")); + final Event snpEvent3 = new Event("20", 1020, Allele.create("T", true), Allele.create("A")); + snpHaplotypeDuplicate.setEventMap(EventMap.of(snpEvent2, snpEvent3)); final Haplotype deletionHaplotype = new Haplotype("ACTGGTCAGGTCAACTGGTCA".getBytes()); - final List deletionAlleles = Arrays.asList(Allele.create("ACTGGTCAACT", true), Allele.create("A")); - final VariantContextBuilder deletionVCBuilder = new VariantContextBuilder("a", "20", 995, 1005, deletionAlleles); - final VariantContext deletionVc = deletionVCBuilder.make(); - deletionHaplotype.setEventMap(new EventMap(Arrays.asList(deletionVc))); + final Event deletionEvent = new Event("20", 995, Allele.create("ACTGGTCAACT", true), Allele.create("A")); + deletionHaplotype.setEventMap(EventMap.of(deletionEvent)); // matches the deletion alleles above but at a different position (to catch an edge case in duplicate removal) final Haplotype deletionHaplotypeFalseDuplicate = new Haplotype("ACTGGTCAGGTCAACTGGTCA".getBytes()); - final List deletionAllelesFalseDuplicate = Arrays.asList(Allele.create("ACTGGTCAACT", true), Allele.create("A")); - final VariantContextBuilder deletionFalseDuplicateBuilder = new VariantContextBuilder("a", "20", 998, 1008, deletionAllelesFalseDuplicate); - final VariantContext deletionVcFalseDuplicate = deletionFalseDuplicateBuilder.make(); - deletionHaplotypeFalseDuplicate.setEventMap(new EventMap(Arrays.asList(deletionVcFalseDuplicate))); + final Event deletionEventFalseDuplicate = new Event("20", 998, Allele.create("ACTGGTCAACT", true), Allele.create("A")); + deletionHaplotypeFalseDuplicate.setEventMap(EventMap.of(deletionEventFalseDuplicate)); // doesn't overlap 1000 final Haplotype deletionHaplotypeNoSpan = new Haplotype("CAACTGGTCAACTGGTCAACTGGTCAACTGGTCAACTGGTCA".getBytes()); - final List deletionAllelesNoSpan = Arrays.asList(Allele.create("GTCAA", true), Allele.create("G")); - final VariantContextBuilder deletionVcNoSpanBuilder = new VariantContextBuilder("a", "20", 990, 994, deletionAllelesNoSpan); - final VariantContext deletionVcNoSpan = deletionVcNoSpanBuilder.make(); - deletionHaplotypeNoSpan.setEventMap(new EventMap(Arrays.asList(deletionVcNoSpan))); + final Event deletionEventNoSpan = new Event("20", 990, Allele.create("GTCAA", true), Allele.create("G")); + deletionHaplotypeNoSpan.setEventMap(EventMap.of(deletionEventNoSpan)); - tests.add(new Object[]{Arrays.asList(snpHaplotype), 1000, Arrays.asList(snpVc)}); - tests.add(new Object[]{Arrays.asList(snpHaplotype, snpHaplotypeDuplicate), 1000, Arrays.asList(snpVc)}); - tests.add(new Object[]{Arrays.asList(deletionHaplotype), 995, Arrays.asList(deletionVc)}); - tests.add(new Object[]{Arrays.asList(deletionHaplotype), 1000, Arrays.asList(deletionVc)}); - tests.add(new Object[]{Arrays.asList(deletionHaplotype, deletionHaplotypeNoSpan), 1000, Arrays.asList(deletionVc)}); - tests.add(new Object[]{Arrays.asList(deletionHaplotype, deletionHaplotypeFalseDuplicate, deletionHaplotypeNoSpan), 1000, Arrays.asList(deletionVc, deletionVcFalseDuplicate)}); + tests.add(new Object[]{Arrays.asList(snpHaplotype), 1000, Arrays.asList(snpEvent)}); + tests.add(new Object[]{Arrays.asList(snpHaplotype, snpHaplotypeDuplicate), 1000, Arrays.asList(snpEvent)}); + tests.add(new Object[]{Arrays.asList(deletionHaplotype), 995, Arrays.asList(deletionEvent)}); + tests.add(new Object[]{Arrays.asList(deletionHaplotype), 1000, Arrays.asList(deletionEvent)}); + tests.add(new Object[]{Arrays.asList(deletionHaplotype, deletionHaplotypeNoSpan), 1000, Arrays.asList(deletionEvent)}); + tests.add(new Object[]{Arrays.asList(deletionHaplotype, deletionHaplotypeFalseDuplicate, deletionHaplotypeNoSpan), 1000, Arrays.asList(deletionEvent, deletionEventFalseDuplicate)}); - tests.add(new Object[]{Arrays.asList(deletionHaplotype, snpHaplotype), 1000, Arrays.asList(deletionVc, snpVc)}); + tests.add(new Object[]{Arrays.asList(deletionHaplotype, snpHaplotype), 1000, Arrays.asList(deletionEvent, snpEvent)}); final Haplotype sameLocDelHap1 = new Haplotype("AAAAAAAGAAA".getBytes()); - final List sameLocDelAlleles1 = Arrays.asList(Allele.create("GTT", true), Allele.create("G")); - final VariantContext sameLocDelVc1 = new VariantContextBuilder("a", "20", 10093568, 10093570, sameLocDelAlleles1).make(); - sameLocDelHap1.setEventMap(new EventMap(Arrays.asList(sameLocDelVc1))); + final Event sameLocDelEvent1 = new Event("20", 10093568, Allele.create("GTT", true), Allele.create("G")); + sameLocDelHap1.setEventMap(EventMap.of(sameLocDelEvent1)); final Haplotype sameLocDelHap2 = new Haplotype("AAAAAAAGTAAA".getBytes()); - final List sameLocDelAlleles2 = Arrays.asList(Allele.create("GT", true), Allele.create("G")); - final VariantContext sameLocDelVc2 = new VariantContextBuilder("a", "20", 10093568, 10093569, sameLocDelAlleles2).make(); - sameLocDelHap2.setEventMap(new EventMap(Arrays.asList(sameLocDelVc2))); + final Event sameLocDelEvent2 = new Event("20", 10093568, Allele.create("GT", true), Allele.create("G")); + sameLocDelHap2.setEventMap(EventMap.of(sameLocDelEvent2)); final Haplotype sameLocInsHap1 = new Haplotype("AAAAAAAGTTTAAA".getBytes()); - final List sameLocInsAlleles1 = Arrays.asList(Allele.create("G", true), Allele.create("GT")); - final VariantContext sameLocInsVc1 = new VariantContextBuilder("a", "20", 10093568, 10093568, sameLocInsAlleles1).make(); - sameLocInsHap1.setEventMap(new EventMap(Arrays.asList(sameLocInsVc1))); + final Event sameLocInsEvent1 = new Event("20", 10093568, Allele.create("G", true), Allele.create("GT")); + sameLocInsHap1.setEventMap(EventMap.of(sameLocInsEvent1)); - tests.add(new Object[]{Arrays.asList(sameLocDelHap1, sameLocDelHap2, sameLocInsHap1), 10093568, Arrays.asList(sameLocDelVc1, sameLocDelVc2, sameLocInsVc1)}); + tests.add(new Object[]{Arrays.asList(sameLocDelHap1, sameLocDelHap2, sameLocInsHap1), 10093568, Arrays.asList(sameLocDelEvent1, sameLocDelEvent2, sameLocInsEvent1)}); return tests.toArray(new Object[][]{}); } @@ -523,13 +420,12 @@ public Object[][] getVariantContextsFromActiveHaplotypesData() { @Test(dataProvider = "getVariantContextsFromActiveHaplotypes") public void testGetVariantContextsFromActiveHaplotypes(final List haplotypes, final int loc, - final List expectedVcsAtThisLocation) { + final List expectedEventsAtThisLocation) { - final List vcsAtThisPosition = getVariantContextsFromActiveHaplotypes(loc, haplotypes, true); - Assert.assertEquals(vcsAtThisPosition.size(), expectedVcsAtThisLocation.size()); - for (int i = 0; i < expectedVcsAtThisLocation.size(); i++) { - VariantContextTestUtils.assertVariantContextsAreEqual(vcsAtThisPosition.get(i), expectedVcsAtThisLocation.get(i), new ArrayList<>(), Collections.emptyList()); - Assert.assertEquals(vcsAtThisPosition.get(i).getSource(), expectedVcsAtThisLocation.get(i).getSource()); + final List vcsAtThisPosition = getEventsFromActiveHaplotypes(loc, haplotypes, true); + Assert.assertEquals(vcsAtThisPosition.size(), expectedEventsAtThisLocation.size()); + for (int i = 0; i < expectedEventsAtThisLocation.size(); i++) { + VariantContextTestUtils.assertVariantContextsAreEqual(vcsAtThisPosition.get(i), expectedEventsAtThisLocation.get(i).asVariantContext(), new ArrayList<>(), Collections.emptyList()); } } @@ -537,43 +433,35 @@ public void testGetVariantContextsFromActiveHaplotypes(final List hap public Object[][] getEventMapperData() { final Haplotype refHaplotype = new Haplotype("ACTGGTCAACTAGTCAACTGGTCAACTGGTCA".getBytes()); - refHaplotype.setEventMap(new EventMap(new HashSet<>())); + refHaplotype.setEventMap(EventMap.of()); final Haplotype snpHaplotype = new Haplotype("ACTGGTCAACTGGTCAACTGGTCAACTGGTCA".getBytes()); final Allele refAllele = Allele.create("A", true); final List snpAlleles = Arrays.asList(refAllele, Allele.create("G")); - final VariantContextBuilder snpVCBuilder = new VariantContextBuilder("a", "20", 1000, 1000, snpAlleles); - final VariantContext snpVc = snpVCBuilder.make(); - snpHaplotype.setEventMap(new EventMap(Arrays.asList(snpVc))); + final Event snpEvent = new Event("20", 1000, refAllele, Allele.create("G")); + snpHaplotype.setEventMap(EventMap.of(snpEvent)); final Haplotype snpHaplotypeNotPresentInEventsAtThisLoc = new Haplotype("ACTGGTCAACTTGTCAACTGGTCAACTGGTCA".getBytes()); - final List snpAllelesNotPresentInEventsAtThisLoc = Arrays.asList(refAllele, Allele.create("T")); - final VariantContextBuilder snpNotPresentInEventsAtThisLocVCBuilder = new VariantContextBuilder("a", "20", 1000, 1000, snpAllelesNotPresentInEventsAtThisLoc); - final VariantContext snpVcNotPresentInEventsAtThisLoc = snpNotPresentInEventsAtThisLocVCBuilder.make(); - snpHaplotypeNotPresentInEventsAtThisLoc.setEventMap(new EventMap(Arrays.asList(snpVcNotPresentInEventsAtThisLoc))); + final Event snpEventNotPresentInEventsAtThisLoc = new Event("20", 1000, refAllele, Allele.create("T")); + snpHaplotypeNotPresentInEventsAtThisLoc.setEventMap(EventMap.of(snpEventNotPresentInEventsAtThisLoc)); final Haplotype deletionHaplotype = new Haplotype("ACTGGTCAGGTCAACTGGTCA".getBytes()); - final List deletionAlleles = Arrays.asList(Allele.create("ACTGGTCAACT", true), Allele.create("A")); - final VariantContextBuilder deletionVCBuilder = new VariantContextBuilder("a", "20", 995, 1005, deletionAlleles); - final VariantContext deletionVc = deletionVCBuilder.make(); - deletionHaplotype.setEventMap(new EventMap(Arrays.asList(deletionVc))); + final Event deletionEvent = new Event("20", 995, Allele.create("ACTGGTCAACT", true), Allele.create("A")); + deletionHaplotype.setEventMap(EventMap.of(deletionEvent)); final VariantContext spandDelVc = new VariantContextBuilder("a", "20", 1000, 1000, Arrays.asList(refAllele, Allele.SPAN_DEL)).make(); final Haplotype deletionHaplotype2 = new Haplotype("ACTGGTCAGGTCAAGGTCA".getBytes()); - final List deletionAlleles2 = Arrays.asList(Allele.create("ACTGGTCAACTCT", true), Allele.create("A")); - final VariantContextBuilder deletionVCBuilder2 = new VariantContextBuilder("b", "20", 995, 1007, deletionAlleles2); - final VariantContext deletionVc2 = deletionVCBuilder2.make(); - deletionHaplotype2.setEventMap(new EventMap(Arrays.asList(deletionVc2))); + final Event deletionEvent2 = new Event("20", 995, Allele.create("ACTGGTCAACTCT", true), Allele.create("A")); + deletionHaplotype2.setEventMap(EventMap.of(deletionEvent2)); final VariantContext spandDelVc2 = new VariantContextBuilder("b", "20", 1000, 1000, Arrays.asList(refAllele, Allele.SPAN_DEL)).make(); final Haplotype deletionStartingAtLocHaplotype = new Haplotype("ACTGGTCAGGTCAAGGTCA".getBytes()); final Allele deletionStartingAtLocRefAllele = Allele.create("ACTGGTCAACTCT", true); final List deletionStartingAtLocAlleles = Arrays.asList(deletionStartingAtLocRefAllele, Allele.create("A")); - final VariantContextBuilder deletionStartingAtLocVCBuilder = new VariantContextBuilder("b", "20", 1000, 1012, deletionStartingAtLocAlleles); - final VariantContext deletionStartingAtLocVc = deletionStartingAtLocVCBuilder.make(); - deletionStartingAtLocHaplotype.setEventMap(new EventMap(Arrays.asList(deletionStartingAtLocVc))); + final Event deletionStartingAtLocEvent = new Event("20", 1000, deletionStartingAtLocRefAllele, Allele.create("A")); + deletionStartingAtLocHaplotype.setEventMap(EventMap.of(deletionStartingAtLocEvent)); final Allele remappedSNPAllele = Allele.create("GCTGGTCAACTCT"); final VariantContext mergedSnpAndDelStartingAtLocVC = new VariantContextBuilder("a", "20", 1000, 1012, @@ -594,8 +482,8 @@ public Object[][] getEventMapperData() { final List tests = new ArrayList<>(); tests.add(new Object[]{ - snpVc, - snpVc.getStart(), + snpEvent.asVariantContext(), + snpEvent.getStart(), Arrays.asList(snpHaplotype, refHaplotype), Maps.asMap(new HashSet<>(snpAlleles), (key) -> { @@ -616,10 +504,10 @@ public Object[][] getEventMapperData() { }); // includes a SNP haplotype not present in events at this loc (which might happen in GGA mode) tests.add(new Object[]{ - snpVc, - snpVc.getStart(), + snpEvent.asVariantContext(), + snpEvent.getStart(), Arrays.asList(snpHaplotype, refHaplotype, snpHaplotypeNotPresentInEventsAtThisLoc), - Maps.asMap(new HashSet<>(snpVc.getAlleles()), + Maps.asMap(new HashSet<>(Arrays.asList(snpEvent.refAllele(), snpEvent.altAllele())), (key) -> { if (snpAlleles.get(1).equals(key)) return Arrays.asList(snpHaplotype); return Arrays.asList(refHaplotype); @@ -629,7 +517,7 @@ public Object[][] getEventMapperData() { // two spanning deletions, no given alleles -> both dels should be in event map for span del tests.add(new Object[]{ mergedSnpAndDelVC, - snpVc.getStart(), + snpEvent.getStart(), Arrays.asList(snpHaplotype, refHaplotype, deletionHaplotype, deletionHaplotype2), Maps.asMap(new HashSet<>(mergedSnpAndDelVC.getAlleles()), (key) -> { @@ -642,7 +530,7 @@ public Object[][] getEventMapperData() { // two spanning deletions, one in given alleles tests.add(new Object[]{ mergedSnpAndDelVC, - snpVc.getStart(), + snpEvent.getStart(), Arrays.asList(snpHaplotype, refHaplotype, deletionHaplotype, deletionHaplotype2), Maps.asMap(new HashSet<>(mergedSnpAndDelVC.getAlleles()), (key) -> { @@ -654,10 +542,10 @@ public Object[][] getEventMapperData() { // A deletion starting at the loc in the given alleles, the snp not in the given alleles tests.add(new Object[]{ - deletionStartingAtLocVc, - deletionStartingAtLocVc.getStart(), + deletionStartingAtLocEvent.asVariantContext(), + deletionStartingAtLocEvent.getStart(), Arrays.asList(snpHaplotype, refHaplotype, deletionStartingAtLocHaplotype), - Maps.asMap(new HashSet<>(deletionStartingAtLocVc.getAlleles()), + Maps.asMap(new HashSet<>(Arrays.asList(deletionStartingAtLocEvent.refAllele(), deletionStartingAtLocEvent.altAllele())), (key) -> { if (deletionStartingAtLocAlleles.get(1).equals(key)) return Arrays.asList(deletionStartingAtLocHaplotype); return Arrays.asList(refHaplotype); @@ -666,10 +554,10 @@ public Object[][] getEventMapperData() { // A deletion starting at the loc not in the given alleles, the snp in the given alleles tests.add(new Object[]{ - snpVc, - snpVc.getStart(), + snpEvent.asVariantContext(), + snpEvent.getStart(), Arrays.asList(snpHaplotype, refHaplotype, deletionStartingAtLocHaplotype), - Maps.asMap(new HashSet<>(snpVc.getAlleles()), + Maps.asMap(new HashSet<>(Arrays.asList(snpEvent.refAllele(), snpEvent.altAllele())), (key) -> { if (snpAlleles.get(1).equals(key)) return Arrays.asList(snpHaplotype); return Arrays.asList(refHaplotype); @@ -679,7 +567,7 @@ public Object[][] getEventMapperData() { // A deletion starting at the loc and the SNP in the given alleles tests.add(new Object[]{ mergedSnpAndDelStartingAtLocVC, - snpVc.getStart(), + snpEvent.getStart(), Arrays.asList(snpHaplotype, refHaplotype, deletionStartingAtLocHaplotype), Maps.asMap(new HashSet<>(mergedSnpAndDelStartingAtLocVC.getAlleles()), (key) -> { @@ -691,11 +579,11 @@ public Object[][] getEventMapperData() { // location forced to be after alleles - to activate Allele.SPAN_DEL insertion into the result - HashSet deletionLocationAfterAlleles = new HashSet<>(deletionStartingAtLocVc.getAlleles()); + HashSet deletionLocationAfterAlleles = new HashSet<>(Arrays.asList(deletionStartingAtLocEvent.refAllele(), deletionStartingAtLocEvent.altAllele())); deletionLocationAfterAlleles.add(Allele.SPAN_DEL); tests.add(new Object[]{ - deletionStartingAtLocVc, - deletionStartingAtLocVc.getStart() + 1, + deletionStartingAtLocEvent.asVariantContext(), + deletionStartingAtLocEvent.getStart() + 1, Arrays.asList(snpHaplotype, refHaplotype, deletionStartingAtLocHaplotype), Maps.asMap(new HashSet<>(deletionLocationAfterAlleles), (key) -> { @@ -854,35 +742,21 @@ public Object[][] makeConstructPhaseSetMappingData() { final List calls = Arrays.asList(vc2, vc3, vc4); final Haplotype pos1 = new Haplotype("CAAAA".getBytes()); - pos1.setEventMap(new EventMap(Arrays.asList(vc1))); - pos1.getEventMap().put(1, vc1); + pos1.setEventMap(EventMap.of(vcToEvent(vc1))); final Haplotype pos2 = new Haplotype("ACAAA".getBytes()); - pos2.setEventMap(new EventMap(Arrays.asList(vc2))); - pos2.getEventMap().put(2, vc2); + pos2.setEventMap(EventMap.of(vcToEvent(vc2))); final Haplotype pos3 = new Haplotype("AACAA".getBytes()); - pos3.setEventMap(new EventMap(Arrays.asList(vc3))); - pos3.getEventMap().put(3, vc3); + pos3.setEventMap(EventMap.of(vcToEvent(vc3))); final Haplotype pos4 = new Haplotype("AAACA".getBytes()); - pos4.setEventMap(new EventMap(Arrays.asList(vc4))); - pos4.getEventMap().put(4, vc4); + pos4.setEventMap(EventMap.of(vcToEvent(vc4))); final Haplotype pos24 = new Haplotype("ACACA".getBytes()); - pos24.setEventMap(new EventMap(Arrays.asList(vc2, vc4))); - pos24.getEventMap().put(2, vc2); - pos24.getEventMap().put(4, vc4); + pos24.setEventMap(EventMap.of(vcToEvent(vc2), vcToEvent(vc4))); final Haplotype pos34 = new Haplotype("AACCA".getBytes()); - pos34.setEventMap(new EventMap(Arrays.asList(vc3, vc4))); - pos34.getEventMap().put(3, vc3); - pos34.getEventMap().put(4, vc4); + pos34.setEventMap(EventMap.of(vcToEvent(vc3), vcToEvent(vc4))); final Haplotype pos234 = new Haplotype("ACCCA".getBytes()); - pos234.setEventMap(new EventMap(Arrays.asList(vc2, vc3, vc4))); - pos234.getEventMap().put(2, vc2); - pos234.getEventMap().put(3, vc3); - pos234.getEventMap().put(4, vc4); + pos234.setEventMap(EventMap.of(vcToEvent(vc2), vcToEvent(vc3), vcToEvent(vc4))); final Haplotype pos23 = new Haplotype("ACCAA".getBytes()); - pos24.setEventMap(new EventMap(Arrays.asList(vc2, vc3))); - pos24.getEventMap().put(2, vc2); - pos24.getEventMap().put(3, vc3); - + pos24.setEventMap(EventMap.of(vcToEvent(vc2), vcToEvent(vc3))); final Map> haplotypeMap = new HashMap<>(); @@ -978,10 +852,11 @@ public Object[][] makeConstructPhaseSetMappingData() { // the ref haplotype would be "TAGCA" final Haplotype spandelHap = new Haplotype("TACA".getBytes()); - spandelHap.setEventMap(new EventMap(Arrays.asList(delVC))); + spandelHap.setEventMap(EventMap.of(vcToEvent(delVC))); final Haplotype spannedSnp = new Haplotype("TATCA".getBytes()); - spannedSnp.setEventMap(new EventMap(Arrays.asList(spannedSnpVC))); + // set the event map to just the SNP, not the SPAN_DEL symbolic allele, from spannedSnpVC + spannedSnp.setEventMap(EventMap.of(new Event(spannedSnpVC.getContig(), spannedSnpVC.getStart(), spannedSnpVC.getReference(), altT))); final Set haplotypesWithSpanDel = new HashSet<>(); haplotypesWithSpanDel.add(spandelHap); @@ -1065,19 +940,11 @@ public Object[][] makeConstructPhaseSetMappingData() { // test 15: create two phase groups broken by an unphased SNP final Map> haplotypeMap15 = new HashMap<>(); final Haplotype pos12 = new Haplotype("CCAAA".getBytes()); - pos12.setEventMap(new EventMap(Arrays.asList(vc1, vc2))); - pos12.getEventMap().put(1, vc1); - pos12.getEventMap().put(2, vc2); + pos12.setEventMap(EventMap.of(vcToEvent(vc1), vcToEvent(vc2))); final Haplotype pos1245 = new Haplotype("CCACC".getBytes()); - pos12.setEventMap(new EventMap(Arrays.asList(vc1, vc2, vc4, vc5))); - pos12.getEventMap().put(1, vc1); - pos12.getEventMap().put(2, vc2); - pos12.getEventMap().put(4, vc5); - pos12.getEventMap().put(5, vc5); + pos12.setEventMap(EventMap.of(vcToEvent(vc1), vcToEvent(vc2), vcToEvent(vc4), vcToEvent(vc5))); final Haplotype pos45 = new Haplotype("AAACC".getBytes()); - pos45.setEventMap(new EventMap(Arrays.asList(vc4, vc5))); - pos45.getEventMap().put(4, vc4); - pos45.getEventMap().put(5, vc5); + pos45.setEventMap(EventMap.of(vcToEvent(vc4), vcToEvent(vc5))); final Set haplotypes1het15 = new HashSet<>(); haplotypes1het15.add(pos12); @@ -1153,14 +1020,12 @@ public Object[][] makeCreateHaplotypeMappingData() { final Haplotype AtoC1 = new Haplotype("AACAA".getBytes()); final VariantContext vc1 = new VariantContextBuilder().chr("20").start(3).stop(3).alleles(Arrays.asList(ref, altC)).make(); - AtoC1.setEventMap(new EventMap(Arrays.asList(vc1))); - AtoC1.getEventMap().put(3, vc1); + AtoC1.setEventMap(EventMap.of(vcToEvent(vc1))); haplotypes.add(AtoC1); final Haplotype AtoC2 = new Haplotype("AAACA".getBytes()); final VariantContext vc2 = new VariantContextBuilder().chr("20").start(4).stop(4).alleles(Arrays.asList(ref, altT)).make(); - AtoC2.setEventMap(new EventMap(Arrays.asList(vc2))); - AtoC2.getEventMap().put(4, vc2); + AtoC2.setEventMap(EventMap.of(vcToEvent(vc2))); haplotypes.add(AtoC2); final VariantContext spannedSnpVC = new VariantContextBuilder().chr("20").start(4).stop(4).alleles(Arrays.asList(ref, altT, Allele.SPAN_DEL)).make(); @@ -1170,7 +1035,7 @@ public Object[][] makeCreateHaplotypeMappingData() { final List deletionAlleles = Arrays.asList(Allele.create("AA", true), Allele.create("A")); final VariantContextBuilder deletionVCBuilder = new VariantContextBuilder("a", "20", 3, 4, deletionAlleles); final VariantContext deletionVc = deletionVCBuilder.make(); - spandelHap.setEventMap(new EventMap(Arrays.asList(deletionVc))); + spandelHap.setEventMap(EventMap.of(vcToEvent(deletionVc))); final Set haplotypesWithSpanDel = new HashSet<>(haplotypes); haplotypesWithSpanDel.add(spandelHap); @@ -1217,11 +1082,11 @@ public Object[][] makePhaseCallsData() { final Set haplotypes = new HashSet<>(); final Haplotype hap1 = new Haplotype("ACATAA".getBytes()); - hap1.setEventMap(new EventMap(Arrays.asList(vc1, vc3))); + hap1.setEventMap(EventMap.of(vcToEvent(vc1), vcToEvent(vc3))); haplotypes.add(hap1); final Haplotype hap2 = new Haplotype("AACATA".getBytes()); - hap2.setEventMap(new EventMap(Arrays.asList(vc2, vc4))); + hap2.setEventMap(EventMap.of(vcToEvent(vc2), vcToEvent(vc4))); haplotypes.add(hap2); @@ -1244,7 +1109,7 @@ public Object[][] makePhaseCallsData() { final Set haplotypesPlusUncalledVariant = new HashSet<>(haplotypes); final Haplotype hap3 = new Haplotype("AAAAAT".getBytes()); - hap3.setEventMap(new EventMap(Arrays.asList(vc5Uncalled))); + hap3.setEventMap(EventMap.of(vcToEvent(vc5Uncalled))); haplotypesPlusUncalledVariant.add(hap3); tests.add(new Object[]{calls, haplotypesPlusUncalledVariant, phasedCalls}); @@ -1280,25 +1145,23 @@ public void testAddGivenAlleles() { assemblyResultSet.setFullReferenceWithPadding(fullReferenceWithPadding); // add a SNP - final VariantContext givenVC = new VariantContextBuilder("test", "chr", 2, 2, - Arrays.asList(Allele.create((byte) 'A', true), Allele.create((byte) 'C', false))).make(); + final Event givenEvent = new Event("chr", 2, Allele.create((byte) 'A', true), Allele.create((byte) 'C')); - addGivenAlleles(Collections.singletonList(givenVC), maxMnpDistance, - aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS, assemblyResultSet); + assemblyResultSet.addGivenAlleles(Collections.singletonList(givenEvent), maxMnpDistance, + aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS); Assert.assertEquals(assemblyResultSet.getHaplotypeCount(), 2); Assert.assertEquals(assemblyResultSet.getHaplotypeList().get(1).getBaseString(), "ACAACCCCGGGGTTTT"); // adding the same VC should have no effect - addGivenAlleles(Collections.singletonList(givenVC), maxMnpDistance, - aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS, assemblyResultSet); + assemblyResultSet.addGivenAlleles(Collections.singletonList(givenEvent), maxMnpDistance, + aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS); Assert.assertEquals(assemblyResultSet.getHaplotypeCount(), 2); // add another SNP - final VariantContext givenVC2 = new VariantContextBuilder("test", "chr", 5, 5, - Arrays.asList(Allele.create((byte) 'C', true), Allele.create((byte) 'G', false))).make(); - addGivenAlleles(Collections.singletonList(givenVC2), maxMnpDistance, - aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS, assemblyResultSet); + final Event givenEvent2 = new Event("chr", 5, Allele.create((byte) 'C', true), Allele.create((byte) 'G')); + assemblyResultSet.addGivenAlleles(Collections.singletonList(givenEvent2), maxMnpDistance, + aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS); // SNP is not found in existing variation, so it's added to the ref and the first SNP Assert.assertEquals(assemblyResultSet.getHaplotypeCount(), 4); Assert.assertEquals(assemblyResultSet.getHaplotypeList().get(2).getBaseString(), "AAAAGCCCGGGGTTTT"); @@ -1306,19 +1169,17 @@ public void testAddGivenAlleles() { // add a deletion that overlaps the second SNP. This variant gets added to the ref and first SNP haplotypes but not either // haplotype that contains the overlapping 2nd SNP - final VariantContext givenVC3 = new VariantContextBuilder("test", "chr", 5, 7, - Arrays.asList(Allele.create("CCC".getBytes(), true), Allele.create((byte) 'C', false))).make(); - addGivenAlleles(Collections.singletonList(givenVC3), maxMnpDistance, - aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS, assemblyResultSet); + final Event givenEvent3 = new Event("chr", 5, Allele.create("CCC".getBytes(), true), Allele.create((byte) 'C')); + assemblyResultSet.addGivenAlleles(Collections.singletonList(givenEvent3), maxMnpDistance, + aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS); Assert.assertEquals(assemblyResultSet.getHaplotypeCount(), 6); Assert.assertEquals(assemblyResultSet.getHaplotypeList().get(4).getBaseString(), "AAAACCGGGGTTTT"); Assert.assertEquals(assemblyResultSet.getHaplotypeList().get(5).getBaseString(), "ACAACCGGGGTTTT"); - // adding an equivalent deletion should do nothing - final VariantContext givenVC4 = new VariantContextBuilder("test", "chr", 5, 8, - Arrays.asList(Allele.create("CCCC".getBytes(), true), Allele.create("CC".getBytes(), false))).make(); - addGivenAlleles(Collections.singletonList(givenVC4), maxMnpDistance, - aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS, assemblyResultSet); + // adding the same deletion should do nothing + final Event givenEvent4 = new Event("chr", 5, Allele.create("CCC".getBytes(), true), Allele.create("C".getBytes(), false)); + assemblyResultSet.addGivenAlleles(Collections.singletonList(givenEvent4), maxMnpDistance, + aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS); Assert.assertEquals(assemblyResultSet.getHaplotypeCount(), 6); // finally, add a haplotype with two new phased SNPs, after which adding an allele with one of these SNPs does nothing @@ -1332,10 +1193,9 @@ public void testAddGivenAlleles() { Assert.assertEquals(assemblyResultSet.getHaplotypeCount(), 7); - final VariantContext givenVC5 = new VariantContextBuilder("test", "chr", 8, 8, - Arrays.asList(Allele.create((byte) 'C', true), Allele.create((byte) 'T', false))).make(); - addGivenAlleles(Collections.singletonList(givenVC5), maxMnpDistance, - aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS, assemblyResultSet); + final Event givenEvent5 = new Event("chr", 8, Allele.create((byte) 'C', true), Allele.create((byte) 'T')); + assemblyResultSet.addGivenAlleles(Collections.singletonList(givenEvent5), maxMnpDistance, + aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS); Assert.assertEquals(assemblyResultSet.getHaplotypeCount(), 7); } @@ -1356,11 +1216,11 @@ public void testAddMultiallelicGivenAlleles() { assemblyResultSet.setFullReferenceWithPadding(fullReferenceWithPadding); // add two SNPs at the same locus - final VariantContext givenVC = new VariantContextBuilder("test", "chr", 2, 2, - Arrays.asList(Allele.create((byte) 'A', true), Allele.create((byte) 'C', false), Allele.create((byte) 'T', false))).make(); + final Event givenEvent1 = new Event("chr", 2, Allele.create((byte) 'A', true), Allele.create((byte) 'C')); + final Event givenEvent2 = new Event("chr", 2, Allele.create((byte) 'A', true), Allele.create((byte) 'T')); - addGivenAlleles(Collections.singletonList(givenVC), maxMnpDistance, - aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS, assemblyResultSet); + assemblyResultSet.addGivenAlleles(List.of(givenEvent1, givenEvent2), maxMnpDistance, + aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS); Assert.assertEquals(assemblyResultSet.getHaplotypeCount(), 3); Assert.assertEquals(assemblyResultSet.getHaplotypeList().get(1).getBaseString(), "ACAACCCCGGGGTTTT"); Assert.assertEquals(assemblyResultSet.getHaplotypeList().get(2).getBaseString(), "ATAACCCCGGGGTTTT"); @@ -1388,11 +1248,10 @@ public void testGivenAllelesHugeInsertion() { // add huge insertion - final VariantContext givenVC = new VariantContextBuilder("test", "chr", 2, 2, - Arrays.asList(Allele.create((byte) 'A', true), Allele.create('A' + new String(insertedBases), false))).make(); + final Event givenEvent = new Event("chr", 2, Allele.create((byte) 'A', true), Allele.create('A' + new String(insertedBases))); - addGivenAlleles(Collections.singletonList(givenVC), maxMnpDistance, - aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS, assemblyResultSet); + assemblyResultSet.addGivenAlleles(Collections.singletonList(givenEvent), maxMnpDistance, + aligner, HAPLOTYPE_TO_REFERENCE_SW_PARAMETERS); Assert.assertEquals(assemblyResultSet.getHaplotypeCount(), 2); Assert.assertEquals(assemblyResultSet.getHaplotypeList().get(1).getBaseString(), "AA" + new String(insertedBases) + "AACCCCGGGGTTTT"); } @@ -1454,24 +1313,24 @@ public Object[][] filterPileupHaplotypesDataProvider() { Object[][] tests = new Object[][] { new Object[]{Arrays.asList(hapA,hapB,hapC,hapD),flatSupportAllKmers,5,3,Arrays.asList(hapA,hapB,hapC,hapD)}, //returns all when no filtering required // These haplotypes are all equivalent, these test stability of the filtering - new Object[]{Arrays.asList(hapA,hapB,hapC,hapD),flatSupportAllKmers,1,3,Arrays.asList(hapA)}, - new Object[]{Arrays.asList(hapA,hapB,hapC,hapD),flatSupportAllKmers,2,3,Arrays.asList(hapA,hapB)}, - new Object[]{Arrays.asList(hapA,hapB,hapC,hapD),flatSupportAllKmers,3,3,Arrays.asList(hapA,hapB,hapC)}, + new Object[]{Arrays.asList(hapA,hapB,hapC,hapD),flatSupportAllKmers,1,3,Arrays.asList(hapA,hapB,hapC,hapD)}, + new Object[]{Arrays.asList(hapA,hapB,hapC,hapD),flatSupportAllKmers,2,3,Arrays.asList(hapA,hapB,hapC,hapD)}, + new Object[]{Arrays.asList(hapA,hapB,hapC,hapD),flatSupportAllKmers,3,3,Arrays.asList(hapA,hapB,hapC,hapD)}, // Repetitive kmers in hapF don't get double counted new Object[]{Arrays.asList(hapA,hapB,hapD,hapF),hapFRepeatedKmers,2,3,Arrays.asList(hapF,hapD)}, - new Object[]{Arrays.asList(hapA,hapB,hapD,hapF),hapFRepeatedKmers,1,3,Arrays.asList(hapD)}, //currently repeated kmers only count as singular evidence + new Object[]{Arrays.asList(hapA,hapB,hapD,hapF),hapFRepeatedKmers,1,3,Arrays.asList(hapF, hapD)}, //currently repeated kmers only count as singular evidence // These tests demonstrate that the weights in the map don't matter - new Object[]{Arrays.asList(hapA,hapB,hapC,hapD),hapDKmersHighSupport,1,3,Arrays.asList(hapA)}, - new Object[]{Arrays.asList(hapA,hapB,hapC,hapD),hapDKmersHighSupport,2,3,Arrays.asList(hapA,hapB)}, - new Object[]{Arrays.asList(hapA,hapB,hapC,hapD),hapDKmersHighSupport,3,3,Arrays.asList(hapA,hapB,hapC)}, // Despite hapD having good support it is not weighted higher + new Object[]{Arrays.asList(hapA,hapB,hapC,hapD),hapDKmersHighSupport,1,3,Arrays.asList(hapA,hapB,hapC,hapD)}, + new Object[]{Arrays.asList(hapA,hapB,hapC,hapD),hapDKmersHighSupport,2,3,Arrays.asList(hapA,hapB,hapC,hapD)}, + new Object[]{Arrays.asList(hapA,hapB,hapC,hapD),hapDKmersHighSupport,3,3,Arrays.asList(hapA,hapB,hapC,hapD)}, // Despite hapD having good support it is not weighted higher // Test of the output when only one hap has support new Object[]{Arrays.asList(hapA,hapB,hapC,hapD,hapF),hapDKmers,1,3,Arrays.asList(hapD)}, - new Object[]{Arrays.asList(hapA,hapB,hapC,hapD,hapF),hapDKmers,2,3,Arrays.asList(hapD,hapA)}, + new Object[]{Arrays.asList(hapA,hapB,hapC,hapD,hapF),hapDKmers,2,3,Arrays.asList(hapD,hapA, hapC)}, new Object[]{Arrays.asList(hapA,hapB,hapC,hapD,hapF),hapDKmers,3,3,Arrays.asList(hapD,hapA,hapC)}, - new Object[]{Arrays.asList(hapA,hapB,hapC,hapD,hapF),hapDKmers,4,3,Arrays.asList(hapD,hapA,hapC,hapB)}, + new Object[]{Arrays.asList(hapA,hapB,hapC,hapD,hapF),hapDKmers,4,3,Arrays.asList(hapD,hapA,hapC,hapB,hapF)}, }; return tests; @@ -1484,8 +1343,15 @@ public void testFilterPileupHaplotypes(final List inputHaplotypes, final int numPileupHaplotypes, final int kmerSize, final List expected) { - Set actual = AssemblyBasedCallerUtils.filterPileupHaplotypes(inputHaplotypes, kmerReadCounts, numPileupHaplotypes, kmerSize); + final Map counts = kmerReadCounts.entrySet().stream() + .collect(Collectors.toMap(entry -> entry.getKey(), entry -> new MutableInt(entry.getValue()))); + Set actual = AssemblyBasedCallerUtils.filterPileupHaplotypes(new HashSet<>(inputHaplotypes), counts, numPileupHaplotypes, kmerSize); Assert.assertEquals(actual, new HashSet<>(expected)); } + + private static Event vcToEvent(final VariantContext vc) { + Utils.validate(vc.getNAlleles() == 2, "must be biallelic"); + return new Event(vc.getContig(), vc.getStart(), vc.getReference(), vc.getAlternateAllele(0)); + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java index 96f5fb208ed..d3055a0f224 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java @@ -5,27 +5,22 @@ import htsjdk.variant.variantcontext.*; import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy; import org.broadinstitute.gatk.nativebindings.smithwaterman.SWParameters; -import org.broadinstitute.hellbender.engine.AlignmentContext; import org.broadinstitute.hellbender.engine.FeatureContext; import org.broadinstitute.hellbender.engine.FeatureInput; -import org.broadinstitute.hellbender.engine.ReadsDataSource; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine; -import org.broadinstitute.hellbender.tools.walkers.qc.Pileup; import org.broadinstitute.hellbender.utils.QualityUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.utils.downsampling.DownsamplingMethod; import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; import org.broadinstitute.hellbender.utils.genotyper.IndexedAlleleList; import org.broadinstitute.hellbender.utils.genotyper.SampleList; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.EventMap; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; -import org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState; import org.broadinstitute.hellbender.utils.pileup.ReadPileup; import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; -import org.broadinstitute.hellbender.utils.read.ReadUtils; import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanJavaAligner; import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAlignment; import org.broadinstitute.hellbender.GATKBaseTest; @@ -34,7 +29,6 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.io.File; import java.util.*; /** @@ -55,10 +49,10 @@ public BasicGenotypingTestProvider(String refString, String hapString, Map calcAlignment() { + public Map calcAlignment() { final SmithWatermanAlignment alignment = SmithWatermanJavaAligner.getInstance().align(ref, hap, new SWParameters(3, -1, -4, -1), SWOverhangStrategy.SOFTCLIP); final Haplotype h = new Haplotype(hap, false, alignment.getAlignmentOffset(), alignment.getCigar()); - return new EventMap(h, ref, new SimpleInterval("4", 1, 1 + ref.length), "name", 1); + return EventMap.fromHaplotype(h, ref, new SimpleInterval("4", 1, 1 + ref.length), 1); } public String toString() { @@ -132,7 +126,7 @@ public Object[][] makeBasicGenotypingTests() { @Test(dataProvider = "BasicGenotypingTestProvider", enabled = true) public void testHaplotypeToVCF(BasicGenotypingTestProvider cfg) { - Map calculatedMap = cfg.calcAlignment(); + Map calculatedMap = cfg.calcAlignment(); Map expectedMap = cfg.expected; logger.warn(String.format("Test: %s", cfg.toString())); if(!compareVCMaps(calculatedMap, expectedMap)) { @@ -199,7 +193,7 @@ public Iterator addMiscellaneousAlleleDataProvider() { /** * Private function to compare Map of VCs, it only checks the types and start locations of the VariantContext */ - private boolean compareVCMaps(Map calc, Map expected) { + private boolean compareVCMaps(Map calc, Map expected) { if( !calc.keySet().equals(expected.keySet()) ) { return false; } // sanity check for( Integer loc : expected.keySet() ) { Byte type = expected.get(loc); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PartiallyDeterminedHaplotypeComputationEngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PartiallyDeterminedHaplotypeComputationEngineUnitTest.java index 8fe8ee94efa..1cea02ba6a3 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PartiallyDeterminedHaplotypeComputationEngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PartiallyDeterminedHaplotypeComputationEngineUnitTest.java @@ -2,11 +2,10 @@ import htsjdk.samtools.TextCigarCodec; import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.VariantContext; -import htsjdk.variant.variantcontext.VariantContextBuilder; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.EventMap; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.haplotype.PartiallyDeterminedHaplotype; @@ -14,44 +13,39 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; -import static org.testng.Assert.*; - public class PartiallyDeterminedHaplotypeComputationEngineUnitTest extends GATKBaseTest { - VariantContext SNP_C_90 = new VariantContextBuilder("a","20",90, 90, Arrays.asList(Allele.REF_A,Allele.ALT_C)).make(); - VariantContext DEL_AAAAAAA_98 = new VariantContextBuilder("a","20",98, 104, Arrays.asList(Allele.create("AAAAAAA", true),Allele.ALT_A)).make(); - VariantContext SNP_C_100 = new VariantContextBuilder("a","20",100, 100, Arrays.asList(Allele.REF_A,Allele.ALT_C)).make(); - VariantContext SNP_G_101 = new VariantContextBuilder("a","20",101, 101, Arrays.asList(Allele.REF_A,Allele.ALT_G)).make(); - VariantContext SNP_G_102 = new VariantContextBuilder("a","20",102, 102, Arrays.asList(Allele.REF_A,Allele.ALT_G)).make(); - VariantContext SNP_C_104 = new VariantContextBuilder("a","20",104, 104, Arrays.asList(Allele.REF_A,Allele.ALT_C)).make(); - VariantContext SNP_C_105 = new VariantContextBuilder("a","20",105, 105, Arrays.asList(Allele.REF_A,Allele.ALT_C)).make(); - VariantContext SNP_G_105 = new VariantContextBuilder("a","20",105, 105, Arrays.asList(Allele.REF_A,Allele.ALT_G)).make(); - VariantContext SNP_C_106 = new VariantContextBuilder("a","20",106, 106, Arrays.asList(Allele.REF_A,Allele.ALT_C)).make(); - VariantContext SNP_T_106 = new VariantContextBuilder("a","20",106, 106, Arrays.asList(Allele.REF_A,Allele.ALT_T)).make(); - VariantContext SNP_C_109 = new VariantContextBuilder("a","20",109, 109, Arrays.asList(Allele.REF_A,Allele.ALT_C)).make(); - VariantContext SNP_C_107 = new VariantContextBuilder("a","20",107, 107, Arrays.asList(Allele.REF_A,Allele.ALT_C)).make(); - - VariantContext DEL_AA_105 = new VariantContextBuilder("a","20",105, 106, Arrays.asList(Allele.create("AA", true),Allele.ALT_A)).make(); - VariantContext DEL_AA_100 = new VariantContextBuilder("a","20",100, 101, Arrays.asList(Allele.create("AA", true),Allele.ALT_A)).make(); - VariantContext DEL_AAA_102 = new VariantContextBuilder("a","20",102, 104, Arrays.asList(Allele.create("AAA", true),Allele.ALT_A)).make(); - VariantContext DEL_AAAAAAA_102 = new VariantContextBuilder("a","20",102, 108, Arrays.asList(Allele.create("AAAAAAA", true),Allele.ALT_A)).make(); - - - VariantContext INS_TT_105 = new VariantContextBuilder("a","20",105, 105, Arrays.asList(Allele.REF_A, Allele.create("AT"))).make(); - VariantContext INS_TT_103 = new VariantContextBuilder("a","20",103, 103, Arrays.asList(Allele.REF_A, Allele.create("AT"))).make(); - VariantContext INS_TT_100 = new VariantContextBuilder("a","20",100, 100, Arrays.asList(Allele.REF_A, Allele.create("AT"))).make(); - VariantContext INS_GGG_106 = new VariantContextBuilder("a","20",106, 106, Arrays.asList(Allele.REF_A, Allele.create("AGG"))).make(); + Event SNP_C_90 = new Event("20",90, Allele.REF_A,Allele.ALT_C); + Event DEL_AAAAAAA_98 = new Event("20",98, Allele.create("AAAAAAA", true),Allele.ALT_A); + Event SNP_C_100 = new Event("20",100, Allele.REF_A,Allele.ALT_C); + Event SNP_G_101 = new Event("20",101, Allele.REF_A,Allele.ALT_G); + Event SNP_G_102 = new Event("20",102, Allele.REF_A,Allele.ALT_G); + Event SNP_C_104 = new Event("20",104, Allele.REF_A,Allele.ALT_C); + Event SNP_C_105 = new Event("20",105, Allele.REF_A,Allele.ALT_C); + Event SNP_G_105 = new Event("20",105, Allele.REF_A,Allele.ALT_G); + Event SNP_C_106 = new Event("20",106, Allele.REF_A,Allele.ALT_C); + Event SNP_T_106 = new Event("20",106, Allele.REF_A,Allele.ALT_T); + Event SNP_C_109 = new Event("20",109, Allele.REF_A,Allele.ALT_C); + Event SNP_C_107 = new Event("20",107, Allele.REF_A,Allele.ALT_C); + + Event DEL_AA_105 = new Event("20",105, Allele.create("AA", true),Allele.ALT_A); + Event DEL_AA_100 = new Event("20",100, Allele.create("AA", true),Allele.ALT_A); + Event DEL_AAA_102 = new Event("20",102, Allele.create("AAA", true),Allele.ALT_A); + Event DEL_AAAAAAA_102 = new Event("20",102, Allele.create("AAAAAAA", true),Allele.ALT_A); + + + Event INS_TT_105 = new Event("20",105, Allele.REF_A, Allele.create("AT")); + Event INS_TT_103 = new Event("20",103, Allele.REF_A, Allele.create("AT")); + Event INS_TT_100 = new Event("20",100, Allele.REF_A, Allele.create("AT")); + Event INS_GGG_106 = new Event("20",106, Allele.REF_A, Allele.create("AGG")); // TODO THESE ARE FOR INVALID TEST CASES - VariantContext SNP_C_99 = new VariantContextBuilder("a","20",99, 99, Arrays.asList(Allele.REF_A,Allele.ALT_C)).make(); - VariantContext SNP_C_120 = new VariantContextBuilder("a","20",120, 120, Arrays.asList(Allele.REF_A,Allele.ALT_C)).make(); - - + Event SNP_C_99 = new Event("20",99, Allele.REF_A,Allele.ALT_C); + Event SNP_C_120 = new Event("20",120, Allele.REF_A,Allele.ALT_C); @DataProvider public Object[][] testConstructHaplotypeFromVariantsDataProvider() { @@ -86,25 +80,25 @@ public Object[][] testConstructHaplotypeFromVariantsDataProvider() { }; } @Test(dataProvider = "testConstructHaplotypeFromVariantsDataProvider") - public void basicConstructHaplotypeFromVariants(List variants, String expectedBases, String expectedCigar, int numberOfCompounds) { + public void basicConstructHaplotypeFromVariants(List events, String expectedBases, String expectedCigar, int numberOfCompounds) { Haplotype ref = new Haplotype("AAAAAAAAAA".getBytes(), true, 500, TextCigarCodec.decode("10M")); ref.setGenomeLocation(new SimpleInterval("20", 100, 110)); - Haplotype result = PartiallyDeterminedHaplotypeComputationEngine.constructHaplotypeFromVariants(ref, variants, true); + Haplotype result = PartiallyDeterminedHaplotypeComputationEngine.constructHaplotypeFromVariants(ref, events, true); Assert.assertEquals(result.getBases(), expectedBases.getBytes()); Assert.assertEquals(result.getCigar(), TextCigarCodec.decode(expectedCigar)); // Assert that the resulting event map matches the input variants: EventMap resultEMap = result.getEventMap(); // NOTE, because of representation in VCF lines, the compound alleles get compressed into a single in the event map, here we assert that this is correct. - Assert.assertEquals(resultEMap.getNumberOfEvents(), variants.size() - numberOfCompounds); + Assert.assertEquals(resultEMap.getNumberOfEvents(), events.size() - numberOfCompounds); } @Test(expectedExceptions = GATKException.class) public void TestOutOfOrderInputs() { Haplotype ref = new Haplotype("AAAAAAAAAA".getBytes(), true, 500, TextCigarCodec.decode("10M")); ref.setGenomeLocation(new SimpleInterval("20", 100, 110)); - List variants = Arrays.asList(SNP_C_105, SNP_G_105); + List variants = Arrays.asList(SNP_C_105, SNP_G_105); Haplotype result = PartiallyDeterminedHaplotypeComputationEngine.constructHaplotypeFromVariants(ref, variants, true); } @@ -113,27 +107,27 @@ public void TestOutOfOrderInputs() { public void TestSNPsOverlapping() { Haplotype ref = new Haplotype("AAAAAAAAAA".getBytes(), true, 500, TextCigarCodec.decode("10M")); ref.setGenomeLocation(new SimpleInterval("20", 100, 110)); - List variants = Arrays.asList(SNP_C_109, DEL_AA_100); + List events = Arrays.asList(SNP_C_109, DEL_AA_100); - Haplotype result = PartiallyDeterminedHaplotypeComputationEngine.constructHaplotypeFromVariants(ref, variants, true); + Haplotype result = PartiallyDeterminedHaplotypeComputationEngine.constructHaplotypeFromVariants(ref, events, true); } @Test(expectedExceptions = GATKException.class) public void TestVariantNotOverlappingHap() { Haplotype ref = new Haplotype("AAAAAAAAAA".getBytes(), true, 500, TextCigarCodec.decode("10M")); ref.setGenomeLocation(new SimpleInterval("20", 100, 110)); - List variants = Arrays.asList(SNP_C_90); + List events = Arrays.asList(SNP_C_90); - Haplotype result = PartiallyDeterminedHaplotypeComputationEngine.constructHaplotypeFromVariants(ref, variants, true); + Haplotype result = PartiallyDeterminedHaplotypeComputationEngine.constructHaplotypeFromVariants(ref, events, true); } @Test(expectedExceptions = GATKException.class) public void TestVariantIndelPartiallyOverlapping() { Haplotype ref = new Haplotype("AAAAAAAAAA".getBytes(), true, 500, TextCigarCodec.decode("10M")); ref.setGenomeLocation(new SimpleInterval("20", 100, 110)); - List variants = Arrays.asList(DEL_AAAAAAA_98); + List events = Arrays.asList(DEL_AAAAAAA_98); - Haplotype result = PartiallyDeterminedHaplotypeComputationEngine.constructHaplotypeFromVariants(ref, variants, true); + Haplotype result = PartiallyDeterminedHaplotypeComputationEngine.constructHaplotypeFromVariants(ref, events, true); } //This is a test asserting that a real edge case that was prone to cause failures in the PDHMM is handled properly when compound variants are taken into account. @@ -143,22 +137,22 @@ public void testMessyAlignemntSite() { Haplotype ref = new Haplotype("AAGAAAGATGGAGGCCCAGCCAGATCTGGACCCCACAGGCCGTCTCCCCACACAGCCATTCATGTGGTCTACTTCCAGCCATTCATGTGGTCTATTTCCAAGAAAATAGCCCATCCCCCCAAGATAACACCTTCTCAAAAACTTTACAGCTTTGTGTCTACACTGATATTTAGGTATTTTCTTTCTTTTTTTTTTATGATTAACACATCTAATTCAAGAATATCTTGGCAGGATATTCCCCGCTTAGGAAATG".getBytes(), true, 575, TextCigarCodec.decode("253M")); ref.setGenomeLocation(new SimpleInterval("20", 24152646, 24152898)); - VariantContext VC1 = new VariantContextBuilder("a", "20", 24152708, 24152708, Arrays.asList(Allele.REF_T, Allele.ALT_C)).make(); - VariantContext VC2 = new VariantContextBuilder("a", "20", 24152728, 24152728, Arrays.asList(Allele.REF_T, Allele.ALT_C)).make(); - VariantContext VC3 = new VariantContextBuilder("a", "20", 24152729, 24152741, Arrays.asList(Allele.create("CATGTGGTCTATT", true), Allele.ALT_C)).make(); + final Event e1 = new Event("20", 24152708, Allele.REF_T, Allele.ALT_C); + final Event e2 = new Event("20", 24152728, Allele.REF_T, Allele.ALT_C); + final Event e3 = new Event("20", 24152729, Allele.create("CATGTGGTCTATT", true), Allele.ALT_C); - List variants = Arrays.asList(VC1, VC2, VC3); + final List events = Arrays.asList(e1, e2, e3); - Haplotype result = PartiallyDeterminedHaplotypeComputationEngine.constructHaplotypeFromVariants(ref, variants, true); + Haplotype result = PartiallyDeterminedHaplotypeComputationEngine.constructHaplotypeFromVariants(ref, events, true); Assert.assertEquals(result.getCigar(), TextCigarCodec.decode("62M1X19M1X1M12D157M")); // Assert that the resulting event map matches the input variants: EventMap resultEMap = result.getEventMap(); - Assert.assertEquals(resultEMap.getNumberOfEvents(), variants.size()); - for (VariantContext v : variants) { - VariantContext actualVC = resultEMap.get(v.getStart()); - Assert.assertNotNull(actualVC); - Assert.assertEquals(actualVC.getAlleles(), v.getAlleles()); + Assert.assertEquals(resultEMap.getNumberOfEvents(), events.size()); + for (Event e : events) { + Event actualEvent = resultEMap.get(e.getStart()); + Assert.assertNotNull(actualEvent); + Assert.assertEquals(actualEvent, e); } } @@ -192,15 +186,15 @@ public Object[][] testGeneratePDHaplotypeDataProvider() { }; } @Test(dataProvider = "testGeneratePDHaplotypeDataProvider") - public void testGeneratePDHaplotypeFromVariants(List variants, VariantContext targetVariant, boolean useRefBase, String expectedBases, byte[] expectedAltArray, String expectedCigar) { + public void testGeneratePDHaplotypeFromVariants(List events, Event targetEvent, boolean useRefBase, String expectedBases, byte[] expectedAltArray, String expectedCigar) { Haplotype ref = new Haplotype("AAAAAAAAAA".getBytes(), true, 500, TextCigarCodec.decode("10M")); ref.setGenomeLocation(new SimpleInterval("20", 100, 110)); - PartiallyDeterminedHaplotype result = PartiallyDeterminedHaplotypeComputationEngine.createNewPDHaplotypeFromEvents(ref, targetVariant, useRefBase, variants); + PartiallyDeterminedHaplotype result = PartiallyDeterminedHaplotypeComputationEngine.createNewPDHaplotypeFromEvents(ref, targetEvent, useRefBase, events); Assert.assertEquals(new String(result.getBases()), expectedBases); Assert.assertEquals(result.getAlternateBases(), expectedAltArray); Assert.assertEquals(result.getCigar(), TextCigarCodec.decode(expectedCigar)); - Assert.assertEquals(result.getDeterminedPosition(), targetVariant.getStart()); + Assert.assertEquals(result.getDeterminedPosition(), targetEvent.getStart()); } // NOTE: This is an enfocement of a behavior that I consider to be a bug in DRAGEN. Specifically my assumption that we needn't ever concern diff --git a/src/test/java/org/broadinstitute/hellbender/utils/haplotype/EventMapUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/haplotype/EventMapUnitTest.java index d0c9c8248bc..8da29ae758c 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/haplotype/EventMapUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/haplotype/EventMapUnitTest.java @@ -3,20 +3,18 @@ import htsjdk.samtools.TextCigarCodec; import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.VariantContext; -import htsjdk.variant.variantcontext.VariantContextBuilder; import org.apache.commons.lang3.StringUtils; +import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.utils.GenomeLoc; -import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.UnvalidatingGenomeLoc; -import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.GATKBaseTest; -import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; public final class EventMapUnitTest extends GATKBaseTest { private final static String CHR = "20"; @@ -55,13 +53,13 @@ public void testMNPs(final String refBases, final String haplotypeBases, final S final Haplotype hap = new Haplotype(haplotypeBases.getBytes(), false, 0, TextCigarCodec.decode(cigar)); final GenomeLoc loc = new UnvalidatingGenomeLoc(CHR, 0, 1, refBases.length()); for (final int maxDist : maxMnpDistance) { - final EventMap events = new EventMap(hap, refBases.getBytes(), loc, NAME, maxDist); + final EventMap events = EventMap.fromHaplotype(hap, refBases.getBytes(), loc, maxDist); Assert.assertEquals(events.getNumberOfEvents(), expectedAlleles.size()); - final List foundAlleles = new ArrayList<>(events.getVariantContexts()); + final List foundAlleles = new ArrayList<>(events.getEvents()); for (int i = 0; i < events.getNumberOfEvents(); i++) { - final VariantContext actual = foundAlleles.get(i); - Assert.assertEquals(actual.getReference().getDisplayString(), expectedAlleles.get(i).get(0)); - Assert.assertEquals(actual.getAlternateAllele(0).getDisplayString(), expectedAlleles.get(i).get(1)); + final Event actual = foundAlleles.get(i); + Assert.assertEquals(actual.refAllele().getDisplayString(), expectedAlleles.get(i).get(0)); + Assert.assertEquals(actual.altAllele().getDisplayString(), expectedAlleles.get(i).get(1)); } } } @@ -122,16 +120,16 @@ public void testGetOverlappingEvents(final String haplotypeBases, final String c final GenomeLoc refLoc = new UnvalidatingGenomeLoc(CHR, 0, 1, refBases.length()); final Haplotype hap = new Haplotype(haplotypeBases.getBytes(), false, hapStartWrtRef, TextCigarCodec.decode(cigar)); - final EventMap eventMap = new EventMap(hap, refBases.getBytes(), refLoc, NAME, 1); + final EventMap eventMap = EventMap.fromHaplotype(hap, refBases.getBytes(), refLoc, 1); - final List overlappingEvents = eventMap.getOverlappingEvents(queryLoc); + final List overlappingEvents = eventMap.getOverlappingEvents(queryLoc); final boolean eventsExpected = expectedAlt != null || expectedRef != null; Assert.assertEquals(overlappingEvents.size(), eventsExpected ? 1 : 0); if (eventsExpected) { - Assert.assertEquals(overlappingEvents.get(0).getReference(), expectedRef); - Assert.assertEquals(overlappingEvents.get(0).getAlternateAllele(0), expectedAlt); + Assert.assertEquals(overlappingEvents.get(0).refAllele(), expectedRef); + Assert.assertEquals(overlappingEvents.get(0).altAllele(), expectedAlt); } } @@ -156,14 +154,12 @@ public Object[][] makeMakeBlockData() { */ @Test(dataProvider = "MakeBlockData") public void testGetNeighborhood(final List firstAlleles, final List secondAlleles, final List expectedAlleles) { - final VariantContext vc1 = GATKVariantContextUtils.makeFromAlleles("x", "20", 10, firstAlleles); - final VariantContext vc2 = GATKVariantContextUtils.makeFromAlleles("x", "20", 10, secondAlleles); - final VariantContext expected = GATKVariantContextUtils.makeFromAlleles("x", "20", 10, expectedAlleles); + final Event e1 = new Event("20", 10, Allele.create(firstAlleles.get(0), true), Allele.create(firstAlleles.get(1))); + final Event e2 = new Event("20", 10, Allele.create(secondAlleles.get(0), true), Allele.create(secondAlleles.get(1))); + final Event expected = new Event("20", 10, Allele.create(expectedAlleles.get(0), true), Allele.create(expectedAlleles.get(1))); - final EventMap eventMap = new EventMap(Collections.emptyList()); - final VariantContext block = eventMap.makeBlock(vc1, vc2); + final Event block = EventMap.combineEvents(e1, e2); - Assert.assertEquals(block.getStart(), expected.getStart()); - Assert.assertEquals(block.getAlleles(), expected.getAlleles()); + Assert.assertEquals(block, expected); } } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtilsUnitTest.java index 4c62d824c76..14e1282c7e2 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtilsUnitTest.java @@ -722,9 +722,9 @@ public Object[][] makeSplitBiallelics() throws CloneNotSupportedException { // biallelic -> biallelic tests.add(new Object[]{root.make(), Collections.singletonList(root.make())}); - // monos -> monos + // monos -> empty root.alleles(Collections.singletonList(Aref)); - tests.add(new Object[]{root.make(), Collections.singletonList(root.make())}); + tests.add(new Object[]{root.make(), Collections.emptyList()}); root.alleles(Arrays.asList(Aref, C, T)); tests.add(new Object[]{root.make(), @@ -1076,56 +1076,6 @@ public void testClipAlleles(final List alleleStrings, final List } } - // -------------------------------------------------------------------------------- - // - // test primitive allele splitting - // - // -------------------------------------------------------------------------------- - - @DataProvider(name = "PrimitiveAlleleSplittingData") - public Object[][] makePrimitiveAlleleSplittingData() { - List tests = new ArrayList<>(); - - // no split - tests.add(new Object[]{"A", "C", 0, null}); - tests.add(new Object[]{"A", "AC", 0, null}); - tests.add(new Object[]{"AC", "A", 0, null}); - - // one split - tests.add(new Object[]{"ACA", "GCA", 1, Collections.singletonList(0)}); - tests.add(new Object[]{"ACA", "AGA", 1, Collections.singletonList(1)}); - tests.add(new Object[]{"ACA", "ACG", 1, Collections.singletonList(2)}); - - // two splits - tests.add(new Object[]{"ACA", "GGA", 2, Arrays.asList(0, 1)}); - tests.add(new Object[]{"ACA", "GCG", 2, Arrays.asList(0, 2)}); - tests.add(new Object[]{"ACA", "AGG", 2, Arrays.asList(1, 2)}); - - // three splits - tests.add(new Object[]{"ACA", "GGG", 3, Arrays.asList(0, 1, 2)}); - - return tests.toArray(new Object[][]{}); - } - - @Test(dataProvider = "PrimitiveAlleleSplittingData") - public void testPrimitiveAlleleSplitting(final String ref, final String alt, final int expectedSplit, final List variantPositions) { - - final int start = 10; - final VariantContext vc = GATKVariantContextUtils.makeFromAlleles("test", "20", start, Arrays.asList(ref, alt)); - - final List result = GATKVariantContextUtils.splitIntoPrimitiveAlleles(vc); - - if ( expectedSplit > 0 ) { - Assert.assertEquals(result.size(), expectedSplit); - for ( int i = 0; i < variantPositions.size(); i++ ) { - Assert.assertEquals(result.get(i).getStart(), start + variantPositions.get(i)); - } - } else { - Assert.assertEquals(result.size(), 1); - Assert.assertEquals(vc, result.get(0)); - } - } - // -------------------------------------------------------------------------------- // // test allele remapping @@ -2248,13 +2198,13 @@ public Object[][] provideMatchAlleles() { new SimpleInterval("3", 69521, 69521), Arrays.asList("T", "A", "C"), new int[]{1, 0, -1}}, {new SimpleInterval("3", 69552, 69552), Arrays.asList("G", "A"), - new SimpleInterval("3", 69521, 69521), Arrays.asList("G", "T", "A", "C"), + new SimpleInterval("3", 69552, 69552), Arrays.asList("G", "T", "A", "C"), new int[]{1}}, - {new SimpleInterval("3", 69552, 69552), Arrays.asList("G", "T"), + {new SimpleInterval("3", 69521, 69521), Arrays.asList("G", "T"), new SimpleInterval("3", 69521, 69521), Arrays.asList("G", "T", "A", "C"), new int[]{0}}, - {new SimpleInterval("3", 69552, 69552), Arrays.asList("G", "C"), - new SimpleInterval("3", 69521, 69521), Arrays.asList("G", "T", "A", "C"), + {new SimpleInterval("3", 69550, 69550), Arrays.asList("G", "C"), + new SimpleInterval("3", 69550, 69550), Arrays.asList("G", "T", "A", "C"), new int[]{2}}, {new SimpleInterval("3", 324682, 324714), Arrays.asList("ACCAGGCCCAGCTCATGCTTCTTTGCAGCCTCT", "A"), new SimpleInterval("3", 324682, 324714), Arrays.asList("ACCAGGCCCAGCTCATGCTTCTTTGCAGCCTCT", "TCCAGGCCCAGCTCATGCTTCTTTGCAGCCTCT", "A"), @@ -2346,11 +2296,11 @@ public Object[][] multiAllelicDataProvider() { } @Test(dataProvider = "multiAllelic") - public void testSplitMultiAllelic(final VariantContext vcToSplit, final List expectedVcs, Boolean keepOriginalChrCounts) { + public void testSplitMultiAllelic(final VariantContext vcToSplit, final List expectedVariants, Boolean keepOriginalChrCounts) { final List outVcs = GATKVariantContextUtils.splitVariantContextToBiallelics(vcToSplit, true, GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, keepOriginalChrCounts); - Assert.assertEquals(outVcs.size(), expectedVcs.size()); + Assert.assertEquals(outVcs.size(), expectedVariants.size()); for (int i = 0; i < outVcs.size(); i++) { - VariantContextTestUtils.assertVariantContextsAreEqual(outVcs.get(i), expectedVcs.get(i), new ArrayList(), Collections.emptyList()); + VariantContextTestUtils.assertVariantContextsAreEqual(outVcs.get(i), expectedVariants.get(i), new ArrayList(), Collections.emptyList()); } } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDRAGEN.378.gatk4.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDRAGEN.378.gatk4.vcf.idx index faaa8db8f33f5465dd7a04f43478d68d10e33f15..28f0f470488a5eb7da48ddc83b30001e15699ab1 100644 GIT binary patch delta 28 kcmaDY_DF1k6h}&8S!PO7YTiWcde%s`*EJm**YDv10HX>Eod5s; delta 37 tcmaDP_F8O$lwfLZYEfmDenx6ePEu-KN@~$WrFzyC>bLY;HZI%41ppc@4>bS) diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDRAGEN.WithIndels.gatk4.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDRAGEN.WithIndels.gatk4.vcf index 09a2fbc648e..cc3a6dbae7f 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDRAGEN.WithIndels.gatk4.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDRAGEN.WithIndels.gatk4.vcf @@ -51,8 +51,8 @@ 20 10008146 . TA T 2165.03 . AC=2;AF=1.00;AN=2;DP=77;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.56;QD=30.93;SOR=0.874 GT:AD:DP:GQ:PL 1/1:0,70:70:99:2179,211,0 20 10008458 . T G 2669.06 . AC=2;AF=1.00;AN=2;DP=72;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=52.76;QD=23.23;SOR=0.980 GT:AD:DP:GQ:PL 1/1:0,69:69:99:2683,207,0 20 10008921 . C CA 283.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.610;DP=60;ExcessHet=0.0000;FS=1.311;MLEAC=1;MLEAF=0.500;MQ=56.00;MQRankSum=-1.306;QD=8.10;ReadPosRankSum=-0.017;SOR=0.644 GT:AD:DP:GQ:PL 0/1:18,17:35:99:291,0,366 -20 10008948 . TA T 663.02 . AC=2;AF=1.00;AN=2;BaseQRankSum=0.090;DP=55;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=51.58;MQRankSum=0.565;QD=23.68;ReadPosRankSum=0.403;SOR=0.527 GT:AD:DP:GQ:PL 1/1:2,26:28:29:677,29,0 -20 10008952 . CACACACACACA CCACACACACA,C 766.97 . AC=1,1;AF=0.500,0.500;AN=2;BaseQRankSum=-0.634;DP=58;ExcessHet=0.0000;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=51.40;MQRankSum=0.426;QD=25.57;ReadPosRankSum=0.125;SOR=0.473 GT:AD:DP:GQ:PL 1/2:2,12,16:30:99:802,564,583,218,0,925 +20 10008948 . TA T 548.01 . AC=2;AF=1.00;AN=2;BaseQRankSum=0.090;DP=55;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=51.58;MQRankSum=0.565;QD=19.57;ReadPosRankSum=0.403;SOR=0.527 GT:AD:DP:GQ:PL 1/1:2,26:28:27:562,27,0 +20 10008952 . CACACACACACA CCACACACACA,C 651.04 . AC=1,1;AF=0.500,0.500;AN=2;BaseQRankSum=-0.634;DP=58;ExcessHet=0.0000;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=51.40;MQRankSum=0.426;QD=21.70;ReadPosRankSum=0.125;SOR=0.473 GT:AD:DP:GQ:PL 1/2:2,12,16:30:99:802,564,583,218,0,925 20 10009400 . T A 3013.06 . AC=2;AF=1.00;AN=2;DP=75;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=29.03;SOR=0.804 GT:AD:DP:GQ:PL 1/1:0,74:74:99:3027,223,0 20 10009512 . C G 3054.06 . AC=2;AF=1.00;AN=2;DP=80;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.53;QD=30.67;SOR=1.223 GT:AD:DP:GQ:PL 1/1:0,77:77:99:3068,231,0 20 10009871 . A G 41.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.394;DP=18;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=57.39;MQRankSum=0.696;QD=2.97;ReadPosRankSum=-2.638;SOR=0.368 GT:AD:DP:GQ:PL 0/1:11,3:14:49:49,0,128 @@ -67,7 +67,7 @@ 20 10012362 . G T 850.06 . AC=2;AF=1.00;AN=2;DP=25;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=54.31;QD=34.42;SOR=4.003 GT:AD:DP:GQ:PL 1/1:0,23:23:69:864,69,0 20 10012384 . T C 250.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.773;DP=18;ExcessHet=0.0000;FS=7.375;MLEAC=1;MLEAF=0.500;MQ=53.38;MQRankSum=-0.811;QD=13.92;ReadPosRankSum=0.800;SOR=0.044 GT:AD:DP:GQ:PL 0/1:10,8:18:99:258,0,355 20 10012387 . T C 399.06 . AC=2;AF=1.00;AN=2;DP=18;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=53.38;QD=24.94;SOR=3.258 GT:AD:DP:GQ:PL 1/1:0,16:16:46:413,46,0 -20 10012570 . G GCA 275.98 . AC=2;AF=1.00;AN=2;DP=8;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=42.80;QD=28.53;SOR=4.174 GT:AD:DP:GQ:PL 1/1:0,7:7:21:290,21,0 +20 10012570 . G GCA 270.98 . AC=2;AF=1.00;AN=2;DP=8;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=42.80;QD=28.53;SOR=4.174 GT:AD:DP:GQ:PL 1/1:0,7:7:21:285,21,0 20 10012572 . GT G 191.93 . AC=2;AF=1.00;AN=2;DP=9;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=43.79;QD=31.99;SOR=3.912 GT:AD:DP:GQ:PL 1/1:0,6:6:18:206,18,0 20 10012631 . C CG 407.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.030;DP=21;ExcessHet=0.0000;FS=8.822;MLEAC=1;MLEAF=0.500;MQ=48.34;MQRankSum=-1.670;QD=19.41;ReadPosRankSum=-1.777;SOR=0.048 GT:AD:DP:GQ:PL 0/1:8,13:21:99:415,0,237 20 10012636 . G C 397.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.711;DP=22;ExcessHet=0.0000;FS=5.959;MLEAC=1;MLEAF=0.500;MQ=48.93;MQRankSum=-1.513;QD=18.07;ReadPosRankSum=-1.777;SOR=0.058 GT:AD:DP:GQ:PL 0/1:8,14:22:99:405,0,175 @@ -95,31 +95,31 @@ 20 10032972 . C CAT 1152.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.354;DP=67;ExcessHet=0.0000;FS=5.062;MLEAC=1;MLEAF=0.500;MQ=59.66;MQRankSum=-1.017;QD=18.30;ReadPosRankSum=-0.723;SOR=1.300 GT:AD:DP:GQ:PL 0/1:33,30:63:99:1160,0,1295 20 10034306 . T C 803.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-3.661;DP=80;ExcessHet=0.0000;FS=0.890;MLEAC=1;MLEAF=0.500;MQ=59.48;MQRankSum=1.150;QD=10.72;ReadPosRankSum=0.539;SOR=0.685 GT:AD:DP:GQ:PL 0/1:46,29:75:99:811,0,1593 20 10036930 . CGATAGCCCTAGCCCTAGATA C 1069.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.720;DP=96;ExcessHet=0.0000;FS=13.351;MLEAC=1;MLEAF=0.500;MQ=54.33;MQRankSum=-3.973;QD=16.21;ReadPosRankSum=0.460;SOR=0.256 GT:AD:DP:GQ:PL 0/1:37,29:66:99:1077,0,1441 -20 10037037 . C T 1453.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.504;DP=88;ExcessHet=0.0000;FS=1.943;MLEAC=1;MLEAF=0.500;MQ=60.08;MQRankSum=-1.193;QD=17.95;ReadPosRankSum=-1.609;SOR=0.495 GT:AD:DP:GQ:PL 0/1:39,42:81:99:1461,0,1127 -20 10037110 . T TGATA 1196.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.089;DP=83;ExcessHet=0.0000;FS=16.682;MLEAC=1;MLEAF=0.500;MQ=55.24;MQRankSum=5.270;QD=24.42;ReadPosRankSum=4.165;SOR=1.122 GT:AD:DP:GQ:PL 0/1:13,36:49:99:1204,0,402 -20 10037144 . T TGATAGATA 667.01 . AC=1;AF=0.500;AN=2;BaseQRankSum=1.744;DP=80;ExcessHet=0.0000;FS=13.898;MLEAC=1;MLEAF=0.500;MQ=53.44;MQRankSum=-5.852;QD=8.55;ReadPosRankSum=2.953;SOR=1.320 GT:AD:DP:GQ:PL 0/1:39,39:78:99:1428,0,1388 +20 10037037 . C T 1457.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.504;DP=88;ExcessHet=0.0000;FS=1.943;MLEAC=1;MLEAF=0.500;MQ=60.08;MQRankSum=-1.193;QD=18.00;ReadPosRankSum=-1.609;SOR=0.495 GT:AD:DP:GQ:PL 0/1:39,42:81:99:1465,0,1124 +20 10037110 . T TGATA 1701.03 . AC=2;AF=1.00;AN=2;DP=84;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=55.01;QD=29.52;SOR=1.127 GT:AD:DP:GQ:PL 1/1:0,42:42:99:1715,123,0 +20 10037144 . T TGATAGATA 1103.02 . AC=2;AF=1.00;AN=2;DP=81;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=53.21;QD=25.65;SOR=0.874 GT:AD:DP:GQ:PL 1/1:0,43:43:99:1857,127,0 20 10037709 . A T 1024.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.367;DP=73;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=59.68;MQRankSum=0.934;QD=14.43;ReadPosRankSum=0.040;SOR=0.608 GT:AD:DP:GQ:PL 0/1:39,32:71:99:1032,0,1354 20 10039371 . T G 1345.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-3.260;DP=76;ExcessHet=0.0000;FS=6.144;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=18.18;ReadPosRankSum=0.391;SOR=1.031 GT:AD:DP:GQ:PL 0/1:34,40:74:99:1353,0,1194 20 10040772 . C CT 1467.03 . AC=2;AF=1.00;AN=2;DP=64;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=56.07;QD=28.77;SOR=0.813 GT:AD:DP:GQ:PL 1/1:0,51:51:99:1481,153,0 -20 10042761 . A G 3536.06 . AC=2;AF=1.00;AN=2;DP=93;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=29.52;SOR=1.364 GT:AD:DP:GQ:PL 1/1:0,90:90:99:3550,269,0 +20 10042761 . A G 3536.06 . AC=2;AF=1.00;AN=2;DP=93;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=33.47;SOR=1.364 GT:AD:DP:GQ:PL 1/1:0,90:90:99:3550,269,0 20 10043002 . A T 1308.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.986;DP=89;ExcessHet=0.0000;FS=2.877;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=15.22;ReadPosRankSum=-0.035;SOR=0.434 GT:AD:DP:GQ:PL 0/1:48,38:86:99:1316,0,1658 -20 10044849 . A G 2106.06 . AC=2;AF=1.00;AN=2;DP=59;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.61;QD=33.47;SOR=1.316 GT:AD:DP:GQ:PL 1/1:0,59:59:99:2120,177,0 +20 10044849 . A G 2106.06 . AC=2;AF=1.00;AN=2;DP=59;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.61;QD=32.91;SOR=1.316 GT:AD:DP:GQ:PL 1/1:0,59:59:99:2120,177,0 20 10046537 . A G 840.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-3.909;DP=57;ExcessHet=0.0000;FS=3.724;MLEAC=1;MLEAF=0.500;MQ=54.50;MQRankSum=0.395;QD=14.75;ReadPosRankSum=0.727;SOR=1.262 GT:AD:DP:GQ:PL 0/1:28,29:57:99:848,0,902 20 10050828 . T C 1248.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.704;DP=91;ExcessHet=0.0000;FS=2.887;MLEAC=1;MLEAF=0.500;MQ=59.54;MQRankSum=0.892;QD=14.52;ReadPosRankSum=-0.140;SOR=0.521 GT:AD:DP:GQ:PL 0/1:49,37:86:99:1256,0,1768 20 10052688 . C A 1083.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.180;DP=83;ExcessHet=0.0000;FS=3.135;MLEAC=1;MLEAF=0.500;MQ=59.78;MQRankSum=0.870;QD=14.07;ReadPosRankSum=1.443;SOR=0.379 GT:AD:DP:GQ:PL 0/1:45,32:77:99:1091,0,1645 20 10058022 . T C 996.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.346;DP=72;ExcessHet=0.0000;FS=0.920;MLEAC=1;MLEAF=0.500;MQ=59.16;MQRankSum=0.466;QD=14.66;ReadPosRankSum=0.018;SOR=0.849 GT:AD:DP:GQ:PL 0/1:36,32:68:99:1004,0,1261 20 10067049 . TAAAAAAA T 461.20 . AC=2;AF=1.00;AN=2;BaseQRankSum=-0.254;DP=61;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=55.70;MQRankSum=-0.200;QD=30.75;ReadPosRankSum=-1.490;SOR=1.179 GT:AD:DP:GQ:PL 1/1:2,13:15:8:474,8,0 -20 10067264 . G A 2937.06 . AC=2;AF=1.00;AN=2;DP=73;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.37;QD=32.91;SOR=0.776 GT:AD:DP:GQ:PL 1/1:0,73:73:99:2951,219,0 -20 10067722 . A C 2161.06 . AC=2;AF=1.00;AN=2;DP=59;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=31.31;SOR=0.846 GT:AD:DP:GQ:PL 1/1:0,54:54:99:2175,163,0 +20 10067264 . G A 2937.06 . AC=2;AF=1.00;AN=2;DP=73;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.37;QD=31.31;SOR=0.776 GT:AD:DP:GQ:PL 1/1:0,73:73:99:2951,219,0 +20 10067722 . A C 2161.06 . AC=2;AF=1.00;AN=2;DP=59;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=31.45;SOR=0.846 GT:AD:DP:GQ:PL 1/1:0,54:54:99:2175,163,0 20 10068172 . G A 532.04 . AC=2;AF=1.00;AN=2;BaseQRankSum=1.453;DP=18;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=56.04;MQRankSum=-0.309;QD=31.30;ReadPosRankSum=-0.436;SOR=0.481 GT:AD:DP:GQ:PL 1/1:1,16:17:25:546,25,0 -20 10068981 . G A 2444.06 . AC=2;AF=1.00;AN=2;DP=63;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=31.45;SOR=0.950 GT:AD:DP:GQ:PL 1/1:0,59:59:99:2458,178,0 -20 10071135 . C T 3272.06 . AC=2;AF=1.00;AN=2;DP=88;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.21;QD=33.27;SOR=1.003 GT:AD:DP:GQ:PL 1/1:0,79:79:99:3286,238,0 -20 10071187 . G A 3425.06 . AC=2;AF=1.00;AN=2;DP=85;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.73;QD=31.26;SOR=1.367 GT:AD:DP:GQ:PL 1/1:0,82:82:99:3439,246,0 +20 10068981 . G A 2444.06 . AC=2;AF=1.00;AN=2;DP=63;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=33.27;SOR=0.950 GT:AD:DP:GQ:PL 1/1:0,59:59:99:2458,178,0 +20 10071135 . C T 3272.06 . AC=2;AF=1.00;AN=2;DP=88;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.21;QD=31.26;SOR=1.003 GT:AD:DP:GQ:PL 1/1:0,79:79:99:3286,238,0 +20 10071187 . G A 3425.06 . AC=2;AF=1.00;AN=2;DP=85;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.73;QD=27.76;SOR=1.367 GT:AD:DP:GQ:PL 1/1:0,82:82:99:3439,246,0 20 10072505 . A G 2114.06 . AC=2;AF=1.00;AN=2;DP=67;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=33.03;SOR=0.822 GT:AD:DP:GQ:PL 1/1:0,64:64:99:2128,191,0 -20 10074716 . G A 2415.06 . AC=2;AF=1.00;AN=2;DP=70;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.67;QD=27.76;SOR=1.214 GT:AD:DP:GQ:PL 1/1:0,64:64:99:2429,192,0 -20 10074806 . G A 2832.06 . AC=2;AF=1.00;AN=2;DP=75;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.38;QD=29.09;SOR=0.840 GT:AD:DP:GQ:PL 1/1:0,71:71:99:2846,213,0 -20 10075043 . T C 2420.06 . AC=2;AF=1.00;AN=2;DP=64;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.64;QD=26.08;SOR=1.107 GT:AD:DP:GQ:PL 1/1:0,61:61:99:2434,184,0 -20 10075168 . C T 3627.06 . AC=2;AF=1.00;AN=2;DP=91;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.37;QD=33.85;SOR=0.997 GT:AD:DP:GQ:PL 1/1:0,88:88:99:3641,264,0 +20 10074716 . G A 2415.06 . AC=2;AF=1.00;AN=2;DP=70;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.67;QD=29.09;SOR=1.214 GT:AD:DP:GQ:PL 1/1:0,64:64:99:2429,192,0 +20 10074806 . G A 2832.06 . AC=2;AF=1.00;AN=2;DP=75;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.38;QD=26.08;SOR=0.840 GT:AD:DP:GQ:PL 1/1:0,71:71:99:2846,213,0 +20 10075043 . T C 2420.06 . AC=2;AF=1.00;AN=2;DP=64;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.64;QD=33.85;SOR=1.107 GT:AD:DP:GQ:PL 1/1:0,61:61:99:2434,184,0 +20 10075168 . C T 3627.06 . AC=2;AF=1.00;AN=2;DP=91;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.37;QD=27.08;SOR=0.997 GT:AD:DP:GQ:PL 1/1:0,88:88:99:3641,264,0 20 10075508 . GA G 1952.03 . AC=2;AF=1.00;AN=2;DP=81;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=61.11;QD=27.89;SOR=1.085 GT:AD:DP:GQ:PL 1/1:0,70:70:99:1966,210,0 20 10076250 . A G 1128.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-3.386;DP=82;ExcessHet=0.0000;FS=7.546;MLEAC=1;MLEAF=0.500;MQ=59.41;MQRankSum=-1.423;QD=14.29;ReadPosRankSum=0.270;SOR=0.344 GT:AD:DP:GQ:PL 0/1:40,39:79:99:1136,0,1339 20 10076339 . A G 1330.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.069;DP=76;ExcessHet=0.0000;FS=1.992;MLEAC=1;MLEAF=0.500;MQ=59.70;MQRankSum=-0.866;QD=18.48;ReadPosRankSum=1.015;SOR=0.446 GT:AD:DP:GQ:PL 0/1:32,40:72:99:1338,0,1035 @@ -128,7 +128,7 @@ 20 10077752 . T C 1366.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-6.503;DP=83;ExcessHet=0.0000;FS=4.458;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=17.30;ReadPosRankSum=-0.352;SOR=1.251 GT:AD:DP:GQ:PL 0/1:34,45:79:99:1374,0,1187 20 10081750 . C A 1374.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.670;DP=92;ExcessHet=0.0000;FS=0.824;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=16.56;ReadPosRankSum=-0.406;SOR=0.664 GT:AD:DP:GQ:PL 0/1:44,39:83:99:1382,0,1525 20 10081800 . C T 1130.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.241;DP=69;ExcessHet=0.0000;FS=11.086;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=16.63;ReadPosRankSum=-2.622;SOR=0.143 GT:AD:DP:GQ:PL 0/1:35,33:68:99:1138,0,1222 -20 10082892 . C T 1673.06 . AC=2;AF=1.00;AN=2;DP=44;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.48;QD=27.08;SOR=0.836 GT:AD:DP:GQ:PL 1/1:0,43:43:99:1687,129,0 +20 10082892 . C T 1673.06 . AC=2;AF=1.00;AN=2;DP=44;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.48;QD=25.42;SOR=0.836 GT:AD:DP:GQ:PL 1/1:0,43:43:99:1687,129,0 20 10085211 . A T 1231.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.501;DP=81;ExcessHet=0.0000;FS=0.870;MLEAC=1;MLEAF=0.500;MQ=59.72;MQRankSum=-1.028;QD=16.21;ReadPosRankSum=-1.405;SOR=0.723 GT:AD:DP:GQ:PL 0/1:40,36:76:99:1239,0,1443 20 10086110 . G A 1295.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.343;DP=83;ExcessHet=0.0000;FS=1.810;MLEAC=1;MLEAF=0.500;MQ=59.16;MQRankSum=-0.680;QD=16.00;ReadPosRankSum=0.170;SOR=0.495 GT:AD:DP:GQ:PL 0/1:43,38:81:99:1303,0,1506 20 10086283 . G T 1046.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.304;DP=86;ExcessHet=0.0000;FS=0.855;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=12.46;ReadPosRankSum=1.889;SOR=0.529 GT:AD:DP:GQ:PL 0/1:52,32:84:99:1054,0,1771 @@ -138,8 +138,8 @@ 20 10087394 . T G 1709.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.340;DP=85;ExcessHet=0.0000;FS=4.225;MLEAC=1;MLEAF=0.500;MQ=59.46;MQRankSum=1.207;QD=20.35;ReadPosRankSum=0.885;SOR=0.436 GT:AD:DP:GQ:PL 0/1:35,49:84:99:1717,0,1186 20 10087754 . T G 1403.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.239;DP=87;ExcessHet=0.0000;FS=0.851;MLEAC=1;MLEAF=0.500;MQ=60.60;MQRankSum=2.166;QD=17.77;ReadPosRankSum=-0.378;SOR=0.596 GT:AD:DP:GQ:PL 0/1:37,42:79:99:1411,0,1236 20 10088699 . C T 797.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.803;DP=67;ExcessHet=0.0000;FS=16.952;MLEAC=1;MLEAF=0.500;MQ=56.90;MQRankSum=-3.140;QD=12.87;ReadPosRankSum=-1.456;SOR=1.914 GT:AD:DP:GQ:PL 0/1:36,26:62:99:805,0,1088 -20 10088730 . G A 961.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.224;DP=57;ExcessHet=0.0000;FS=9.673;MLEAC=1;MLEAF=0.500;MQ=52.92;MQRankSum=-5.143;QD=17.48;ReadPosRankSum=0.581;SOR=1.025 GT:AD:DP:GQ:PL 0/1:29,26:55:99:969,0,956 -20 10088736 . A C 884.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.282;DP=60;ExcessHet=0.0000;FS=9.673;MLEAC=1;MLEAF=0.500;MQ=52.86;MQRankSum=-5.169;QD=16.08;ReadPosRankSum=0.573;SOR=0.851 GT:AD:DP:GQ:PL 0/1:28,27:55:99:892,0,854 +20 10088730 . G A 637.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.224;DP=57;ExcessHet=0.0000;FS=15.383;MLEAC=1;MLEAF=0.500;MQ=52.92;MQRankSum=-4.839;QD=12.26;ReadPosRankSum=0.581;SOR=1.592 GT:AD:DP:GQ:PL 0/1:29,23:52:99:645,0,965 +20 10088736 . A C 774.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.282;DP=60;ExcessHet=0.0000;FS=9.673;MLEAC=1;MLEAF=0.500;MQ=52.86;MQRankSum=-5.169;QD=14.08;ReadPosRankSum=0.573;SOR=0.851 GT:AD:DP:GQ:PL 0/1:28,27:55:99:782,0,854 20 10088747 . A G 798.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-3.877;DP=56;ExcessHet=0.0000;FS=7.653;MLEAC=1;MLEAF=0.500;MQ=52.20;MQRankSum=-4.907;QD=14.79;ReadPosRankSum=0.009;SOR=1.092 GT:AD:DP:GQ:PL 0/1:25,29:54:99:806,0,854 20 10088895 . C T 556.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.330;DP=43;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=57.80;MQRankSum=0.260;QD=13.92;ReadPosRankSum=1.238;SOR=0.727 GT:AD:DP:GQ:PL 0/1:22,18:40:99:564,0,651 20 10088968 . C CAAA 211.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=1.220;DP=54;ExcessHet=0.0000;FS=9.387;MLEAC=1;MLEAF=0.500;MQ=57.02;MQRankSum=-3.733;QD=5.43;ReadPosRankSum=0.617;SOR=2.819 GT:AD:DP:GQ:PL 0/1:30,9:39:99:219,0,1177 @@ -159,12 +159,12 @@ 20 10096596 . C T 1583.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.728;DP=91;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=58.82;MQRankSum=0.612;QD=18.20;ReadPosRankSum=-0.170;SOR=0.640 GT:AD:DP:GQ:PL 0/1:40,47:87:99:1591,0,1273 20 10096768 . A C 1703.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.764;DP=96;ExcessHet=0.0000;FS=3.983;MLEAC=1;MLEAF=0.500;MQ=59.28;MQRankSum=-1.609;QD=18.72;ReadPosRankSum=0.613;SOR=0.445 GT:AD:DP:GQ:PL 0/1:42,49:91:99:1711,0,1452 20 10096899 . G T 1405.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.755;DP=84;ExcessHet=0.0000;FS=9.606;MLEAC=1;MLEAF=0.500;MQ=59.28;MQRankSum=-1.387;QD=17.79;ReadPosRankSum=0.422;SOR=1.707 GT:AD:DP:GQ:PL 0/1:39,40:79:99:1413,0,1365 -20 10096905 . TA T 1522.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.897;DP=86;ExcessHet=0.0000;FS=11.448;MLEAC=1;MLEAF=0.500;MQ=59.30;MQRankSum=-1.656;QD=18.57;ReadPosRankSum=0.269;SOR=1.760 GT:AD:DP:GQ:PL 0/1:39,43:82:99:1530,0,1212 +20 10096905 . TA T 1357.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.897;DP=86;ExcessHet=0.0000;FS=11.448;MLEAC=1;MLEAF=0.500;MQ=59.30;MQRankSum=-1.656;QD=16.56;ReadPosRankSum=0.269;SOR=1.760 GT:AD:DP:GQ:PL 0/1:39,43:82:99:1365,0,1212 20 10096933 . G C 1336.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.914;DP=89;ExcessHet=0.0000;FS=23.320;MLEAC=1;MLEAF=0.500;MQ=59.32;MQRankSum=-1.802;QD=15.73;ReadPosRankSum=0.383;SOR=1.534 GT:AD:DP:GQ:PL 0/1:44,41:85:99:1344,0,1514 20 10096958 . G A 1628.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.697;DP=93;ExcessHet=0.0000;FS=19.237;MLEAC=1;MLEAF=0.500;MQ=59.35;MQRankSum=-1.682;QD=17.90;ReadPosRankSum=-1.593;SOR=1.375 GT:AD:DP:GQ:PL 0/1:44,47:91:99:1636,0,1487 20 10097075 . T G 1466.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.533;DP=96;ExcessHet=0.0000;FS=6.613;MLEAC=1;MLEAF=0.500;MQ=58.14;MQRankSum=-1.389;QD=15.94;ReadPosRankSum=0.235;SOR=0.628 GT:AD:DP:GQ:PL 0/1:44,48:92:99:1474,0,1482 20 10097101 . C CTTT 1132.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.907;DP=80;ExcessHet=0.0000;FS=4.931;MLEAC=1;MLEAF=0.500;MQ=58.24;MQRankSum=-0.684;QD=17.16;ReadPosRankSum=1.777;SOR=0.385 GT:AD:DP:GQ:PL 0/1:33,33:66:99:1140,0,1220 -20 10097437 . TTTTC CTTTC,T 1114.06 . AC=1,1;AF=0.500,0.500;AN=2;BaseQRankSum=-0.660;DP=60;ExcessHet=0.0000;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=58.33;MQRankSum=3.089;QD=30.95;ReadPosRankSum=-0.138;SOR=0.859 GT:AD:DP:GQ:PL 1/2:2,11,23:36:99:1131,862,1679,307,0,383 +20 10097437 . TTTTC CTTTC,T 1151.06 . AC=1,1;AF=0.500,0.500;AN=2;DP=60;ExcessHet=0.0000;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=58.33;QD=31.11;SOR=2.774 GT:AD:DP:GQ:PL 1/2:0,14,23:37:99:1168,811,1679,418,0,395 20 10097789 . T C 709.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.157;DP=60;ExcessHet=0.0000;FS=5.374;MLEAC=1;MLEAF=0.500;MQ=58.05;MQRankSum=-2.805;QD=12.45;ReadPosRankSum=0.220;SOR=0.756 GT:AD:DP:GQ:PL 0/1:34,23:57:99:717,0,1190 20 10097928 . G A 591.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.203;DP=37;ExcessHet=0.0000;FS=1.315;MLEAC=1;MLEAF=0.500;MQ=58.11;MQRankSum=-1.654;QD=16.43;ReadPosRankSum=-0.079;SOR=0.951 GT:AD:DP:GQ:PL 0/1:17,19:36:99:599,0,586 20 10098110 . G C 573.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.510;DP=36;ExcessHet=0.0000;FS=6.658;MLEAC=1;MLEAF=0.500;MQ=58.71;MQRankSum=-1.477;QD=15.93;ReadPosRankSum=-0.730;SOR=0.069 GT:AD:DP:GQ:PL 0/1:19,17:36:99:581,0,631 @@ -175,12 +175,12 @@ 20 10098344 . A G 62.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.674;DP=5;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=47.08;MQRankSum=-1.383;QD=15.66;ReadPosRankSum=0.000;SOR=0.693 GT:AD:DP:GQ:PL 0/1:2,2:4:67:70,0,67 20 10098786 . C T 32.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.431;DP=9;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=54.65;MQRankSum=-2.200;QD=3.63;ReadPosRankSum=-1.383;SOR=0.132 GT:AD:DP:GQ:PL 0/1:7,2:9:40:40,0,217 20 10098885 . C CA 463.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.165;DP=33;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=49.73;MQRankSum=-4.449;QD=14.95;ReadPosRankSum=-0.954;SOR=0.518 GT:AD:DP:GQ:PL 0/1:17,14:31:99:471,0,572 -20 10099044 . A C 115.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.165;DP=24;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=54.53;MQRankSum=-3.771;QD=5.78;ReadPosRankSum=-1.601;SOR=0.465 GT:AD:DP:GQ:PL 0/1:15,5:20:99:123,0,493 -20 10099046 . T C 177.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.002;DP=21;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=52.78;MQRankSum=-3.858;QD=8.46;ReadPosRankSum=-2.053;SOR=0.616 GT:AD:DP:GQ:PL 0/1:15,6:21:99:185,0,552 -20 10099055 . T C 156.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.462;DP=26;ExcessHet=0.0000;FS=2.016;MLEAC=1;MLEAF=0.500;MQ=50.95;MQRankSum=-3.426;QD=6.53;ReadPosRankSum=-1.973;SOR=1.284 GT:AD:DP:GQ:PL 0/1:17,7:24:99:164,0,578 -20 10099079 . C T 241.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=1.367;DP=33;ExcessHet=0.0000;FS=1.397;MLEAC=1;MLEAF=0.500;MQ=50.89;MQRankSum=-4.614;QD=7.55;ReadPosRankSum=-1.267;SOR=1.051 GT:AD:DP:GQ:PL 0/1:19,13:32:99:249,0,528 -20 10099111 . T TTTTGTTTG 954.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.832;DP=55;ExcessHet=0.0000;FS=4.525;MLEAC=1;MLEAF=0.500;MQ=53.38;MQRankSum=-2.973;QD=22.73;ReadPosRankSum=-0.526;SOR=1.445 GT:AD:DP:GQ:PL 0/1:17,25:42:99:962,0,568 -20 10099140 . G T 995.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.167;DP=68;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=55.01;MQRankSum=-2.069;QD=15.32;ReadPosRankSum=1.073;SOR=0.776 GT:AD:DP:GQ:PL 0/1:33,32:65:99:1003,0,1014 +20 10099044 . A C 107.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.165;DP=24;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=54.53;MQRankSum=-3.771;QD=5.38;ReadPosRankSum=-1.601;SOR=0.465 GT:AD:DP:GQ:PL 0/1:15,5:20:99:115,0,493 +20 10099046 . T C 130.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.002;DP=21;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=52.78;MQRankSum=-3.771;QD=6.53;ReadPosRankSum=-2.053;SOR=0.465 GT:AD:DP:GQ:PL 0/1:15,5:20:99:138,0,555 +20 10099055 . T C 253.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.102;DP=26;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=50.95;MQRankSum=-4.101;QD=10.57;ReadPosRankSum=-2.806;SOR=0.804 GT:AD:DP:GQ:PL 0/1:15,9:24:99:261,0,539 +20 10099079 . C T 341.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=1.367;DP=33;ExcessHet=0.0000;FS=1.397;MLEAC=1;MLEAF=0.500;MQ=50.89;MQRankSum=-4.614;QD=10.68;ReadPosRankSum=-1.267;SOR=1.051 GT:AD:DP:GQ:PL 0/1:19,13:32:99:349,0,524 +20 10099111 . T TTTTGTTTG 950.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.832;DP=55;ExcessHet=0.0000;FS=4.525;MLEAC=1;MLEAF=0.500;MQ=53.38;MQRankSum=-2.973;QD=22.63;ReadPosRankSum=-0.526;SOR=1.445 GT:AD:DP:GQ:PL 0/1:17,25:42:99:958,0,568 +20 10099140 . G T 1102.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.164;DP=68;ExcessHet=0.0000;FS=2.177;MLEAC=1;MLEAF=0.500;MQ=55.01;MQRankSum=-2.787;QD=17.23;ReadPosRankSum=0.606;SOR=1.055 GT:AD:DP:GQ:PL 0/1:30,34:64:99:1110,0,925 20 10099190 . G T 1055.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.034;DP=65;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=57.56;MQRankSum=-1.711;QD=16.24;ReadPosRankSum=-1.260;SOR=0.776 GT:AD:DP:GQ:PL 0/1:33,32:65:99:1063,0,1154 20 10099220 . A G 598.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.664;DP=47;ExcessHet=0.0000;FS=1.137;MLEAC=1;MLEAF=0.500;MQ=57.74;MQRankSum=-2.596;QD=13.30;ReadPosRankSum=0.629;SOR=0.519 GT:AD:DP:GQ:PL 0/1:25,20:45:99:606,0,873 20 10099535 . G A 1356.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=5.034;DP=68;ExcessHet=0.0000;FS=2.172;MLEAC=1;MLEAF=0.500;MQ=59.04;MQRankSum=-1.418;QD=20.87;ReadPosRankSum=-0.777;SOR=0.408 GT:AD:DP:GQ:PL 0/1:26,39:65:99:1364,0,686 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDRAGEN.gatk4.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDRAGEN.gatk4.vcf index 83681307ab8..9a545c2ec14 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDRAGEN.gatk4.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDRAGEN.gatk4.vcf @@ -118,8 +118,8 @@ 20 10087394 . T G 1709.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.340;DP=85;ExcessHet=0.0000;FS=4.225;MLEAC=1;MLEAF=0.500;MQ=59.46;MQRankSum=1.207;QD=20.35;ReadPosRankSum=0.885;SOR=0.436 GT:AD:DP:GQ:PL 0/1:35,49:84:99:1717,0,1186 20 10087754 . T G 1403.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.239;DP=87;ExcessHet=0.0000;FS=0.851;MLEAC=1;MLEAF=0.500;MQ=60.60;MQRankSum=2.166;QD=17.77;ReadPosRankSum=-0.378;SOR=0.596 GT:AD:DP:GQ:PL 0/1:37,42:79:99:1411,0,1236 20 10088699 . C T 797.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.803;DP=67;ExcessHet=0.0000;FS=16.952;MLEAC=1;MLEAF=0.500;MQ=56.90;MQRankSum=-3.140;QD=12.87;ReadPosRankSum=-1.456;SOR=1.914 GT:AD:DP:GQ:PL 0/1:36,26:62:99:805,0,1088 -20 10088730 . G A 961.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.224;DP=57;ExcessHet=0.0000;FS=9.673;MLEAC=1;MLEAF=0.500;MQ=52.92;MQRankSum=-5.143;QD=17.48;ReadPosRankSum=0.581;SOR=1.025 GT:AD:DP:GQ:PL 0/1:29,26:55:99:969,0,956 -20 10088736 . A C 884.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.282;DP=60;ExcessHet=0.0000;FS=9.673;MLEAC=1;MLEAF=0.500;MQ=52.86;MQRankSum=-5.169;QD=16.08;ReadPosRankSum=0.573;SOR=0.851 GT:AD:DP:GQ:PL 0/1:28,27:55:99:892,0,854 +20 10088730 . G A 637.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.224;DP=57;ExcessHet=0.0000;FS=15.383;MLEAC=1;MLEAF=0.500;MQ=52.92;MQRankSum=-4.839;QD=12.26;ReadPosRankSum=0.581;SOR=1.592 GT:AD:DP:GQ:PL 0/1:29,23:52:99:645,0,965 +20 10088736 . A C 774.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.282;DP=60;ExcessHet=0.0000;FS=9.673;MLEAC=1;MLEAF=0.500;MQ=52.86;MQRankSum=-5.169;QD=14.08;ReadPosRankSum=0.573;SOR=0.851 GT:AD:DP:GQ:PL 0/1:28,27:55:99:782,0,854 20 10088747 . A G 798.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-3.877;DP=56;ExcessHet=0.0000;FS=7.653;MLEAC=1;MLEAF=0.500;MQ=52.20;MQRankSum=-4.907;QD=14.79;ReadPosRankSum=0.009;SOR=1.092 GT:AD:DP:GQ:PL 0/1:25,29:54:99:806,0,854 20 10088895 . C T 524.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.730;DP=43;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=57.40;MQRankSum=0.507;QD=13.12;ReadPosRankSum=1.109;SOR=0.567 GT:AD:DP:GQ:PL 0/1:23,17:40:99:532,0,676 20 10089441 . A G 1200.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-4.885;DP=79;ExcessHet=0.0000;FS=6.071;MLEAC=1;MLEAF=0.500;MQ=59.71;MQRankSum=-0.948;QD=15.80;ReadPosRankSum=-0.156;SOR=1.363 GT:AD:DP:GQ:PL 0/1:37,39:76:99:1208,0,1313 @@ -147,7 +147,7 @@ 20 10098786 . C T 32.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.431;DP=9;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=54.65;MQRankSum=-2.200;QD=3.63;ReadPosRankSum=-1.383;SOR=0.132 GT:AD:DP:GQ:PL 0/1:7,2:9:40:40,0,217 20 10099044 . A C 49.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.201;DP=22;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=56.15;MQRankSum=-3.419;QD=2.76;ReadPosRankSum=-1.481;SOR=0.193 GT:AD:DP:GQ:PL 0/1:15,3:18:57:57,0,499 20 10099046 . T C 73.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.822;DP=18;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=55.26;MQRankSum=-3.419;QD=4.09;ReadPosRankSum=-1.364;SOR=0.193 GT:AD:DP:GQ:PL 0/1:15,3:18:81:81,0,561 -20 10099055 . T C 50.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.460;DP=19;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=55.52;MQRankSum=-3.419;QD=2.81;ReadPosRankSum=-0.830;SOR=0.330 GT:AD:DP:GQ:PL 0/1:15,3:18:58:58,0,557 +20 10099055 . T C 47.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.460;DP=19;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=55.52;MQRankSum=-3.419;QD=2.65;ReadPosRankSum=-0.830;SOR=0.330 GT:AD:DP:GQ:PL 0/1:15,3:18:55:55,0,557 20 10099079 . C T 40.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.694;DP=23;ExcessHet=0.0000;FS=2.192;MLEAC=1;MLEAF=0.500;MQ=56.33;MQRankSum=-3.254;QD=1.77;ReadPosRankSum=-0.568;SOR=1.460 GT:AD:DP:GQ:PL 0/1:19,4:23:48:48,0,551 20 10099140 . G T 583.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.176;DP=49;ExcessHet=0.0000;FS=6.794;MLEAC=1;MLEAF=0.500;MQ=60.07;MQRankSum=1.344;QD=12.42;ReadPosRankSum=-0.466;SOR=1.944 GT:AD:DP:GQ:PL 0/1:28,19:47:99:591,0,932 20 10099190 . G T 1055.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.034;DP=65;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=57.56;MQRankSum=-1.711;QD=16.24;ReadPosRankSum=-1.260;SOR=0.776 GT:AD:DP:GQ:PL 0/1:33,32:65:99:1063,0,1154 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDRAGEN.gatk4.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDRAGEN.gatk4.vcf.idx index 3d8e4b970205cc4e70cf7f49aefa2078f60f9d1a..12ebd6bccc2a15c57340b736212719c444b7fc15 100644 GIT binary patch delta 45 rcmX@jbBJex6h}&8S!PO7YTiWcV%|R`3}BF}@VcgB diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDefaults.gatk4.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDefaults.gatk4.vcf index aa1708e5d9c..f14eb79614d 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDefaults.gatk4.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDefaults.gatk4.vcf @@ -34,12 +34,12 @@ 20 10001436 . A AAGGCT 2325.03 . AC=2;AF=1.00;AN=2;DP=58;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=41.22;QD=28.20;SOR=3.014 GT:AD:DP:GQ:PL 1/1:0,52:52:99:2339,156,0 20 10001474 . C T 2655.06 . AC=2;AF=1.00;AN=2;DP=77;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=44.87;QD=25.00;SOR=1.516 GT:AD:DP:GQ:PL 1/1:0,73:73:99:2669,219,0 20 10001617 . C A 1752.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=1.261;DP=105;ExcessHet=0.0000;FS=13.163;MLEAC=1;MLEAF=0.500;MQ=59.02;MQRankSum=1.373;QD=17.02;ReadPosRankSum=-0.280;SOR=1.346 GT:AD:DP:GQ:PL 0/1:52,51:103:99:1760,0,1781 -20 10001628 . G A 4009.06 . AC=2;AF=1.00;AN=2;DP=98;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.19;QD=29.56;SOR=1.053 GT:AD:DP:GQ:PL 1/1:0,95:95:99:4023,286,0 +20 10001628 . G A 3913.06 . AC=2;AF=1.00;AN=2;DP=98;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.19;QD=29.56;SOR=1.053 GT:AD:DP:GQ:PL 1/1:0,95:95:99:3927,285,0 20 10001661 . T C 3177.06 . AC=2;AF=1.00;AN=2;DP=85;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.13;QD=30.62;SOR=1.193 GT:AD:DP:GQ:PL 1/1:0,81:81:99:3191,243,0 20 10001670 . T G 3232.06 . AC=2;AF=1.00;AN=2;DP=86;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=57.60;QD=28.17;SOR=0.995 GT:AD:DP:GQ:PL 1/1:0,81:81:99:3246,244,0 20 10002058 . T G 2978.06 . AC=2;AF=1.00;AN=2;BaseQRankSum=1.931;DP=79;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=57.87;MQRankSum=3.068;QD=26.80;ReadPosRankSum=0.624;SOR=0.776 GT:AD:DP:GQ:PL 1/1:1,74:75:99:2992,216,0 20 10002099 . C T 1215.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.282;DP=68;ExcessHet=0.0000;FS=3.528;MLEAC=1;MLEAF=0.500;MQ=56.46;MQRankSum=-0.850;QD=18.70;ReadPosRankSum=0.173;SOR=0.375 GT:AD:DP:GQ:PL 0/1:27,38:65:99:1223,0,844 -20 10002138 . C G 2113.06 . AC=2;AF=1.00;AN=2;DP=57;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=56.64;QD=26.00;SOR=0.730 GT:AD:DP:GQ:PL 1/1:0,53:53:99:2127,159,0 +20 10002138 . C G 2035.06 . AC=2;AF=1.00;AN=2;DP=57;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=56.64;QD=26.00;SOR=0.730 GT:AD:DP:GQ:PL 1/1:0,53:53:99:2049,159,0 20 10002142 . G C 2003.06 . AC=2;AF=1.00;AN=2;DP=56;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=56.08;QD=30.02;SOR=0.853 GT:AD:DP:GQ:PL 1/1:0,52:52:99:2017,156,0 20 10002470 . C T 1255.06 . AC=2;AF=1.00;AN=2;DP=43;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=51.98;QD=33.03;SOR=0.693 GT:AD:DP:GQ:PL 1/1:0,38:38:99:1269,114,0 20 10002478 . A T 80.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.096;DP=47;ExcessHet=0.0000;FS=34.492;MLEAC=1;MLEAF=0.500;MQ=52.71;MQRankSum=-1.393;QD=1.92;ReadPosRankSum=2.806;SOR=5.281 GT:AD:DP:GQ:PL 0/1:29,13:42:88:88,0,791 @@ -50,7 +50,7 @@ 20 10003692 . A G 2624.06 . AC=2;AF=1.00;AN=2;DP=72;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.48;QD=28.08;SOR=1.352 GT:AD:DP:GQ:PL 1/1:0,68:68:99:2638,205,0 20 10003832 . G A 2720.06 . AC=2;AF=1.00;AN=2;DP=67;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=56.82;QD=23.23;SOR=1.473 GT:AD:DP:GQ:PL 1/1:0,66:66:99:2734,199,0 20 10004094 . A C 1840.06 . AC=2;AF=1.00;AN=2;DP=55;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=52.62;QD=34.72;SOR=1.524 GT:AD:DP:GQ:PL 1/1:0,53:53:99:1854,157,0 -20 10004147 . A G 1907.06 . AC=2;AF=1.00;AN=2;DP=54;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=55.19;QD=29.03;SOR=1.136 GT:AD:DP:GQ:PL 1/1:0,52:52:99:1921,156,0 +20 10004147 . A G 1852.06 . AC=2;AF=1.00;AN=2;DP=54;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=55.19;QD=29.03;SOR=1.136 GT:AD:DP:GQ:PL 1/1:0,52:52:99:1866,156,0 20 10004193 . G T 937.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.392;DP=54;ExcessHet=0.0000;FS=2.369;MLEAC=1;MLEAF=0.500;MQ=59.14;MQRankSum=3.385;QD=18.39;ReadPosRankSum=1.724;SOR=0.941 GT:AD:DP:GQ:PL 0/1:23,28:51:99:945,0,776 20 10004223 . A AG 755.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.124;DP=66;ExcessHet=0.0000;FS=3.679;MLEAC=1;MLEAF=0.500;MQ=58.93;MQRankSum=3.229;QD=13.03;ReadPosRankSum=-1.206;SOR=1.220 GT:AD:DP:GQ:PL 0/1:29,29:58:99:763,0,678 20 10004351 . C G 2770.06 . AC=2;AF=1.00;AN=2;DP=71;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.05;QD=30.67;SOR=0.874 GT:AD:DP:GQ:PL 1/1:0,70:70:99:2784,211,0 @@ -81,14 +81,14 @@ 20 10008742 . G T 736.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.893;DP=50;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=37.07;MQRankSum=-1.502;QD=15.03;ReadPosRankSum=0.200;SOR=0.741 GT:AD:DP:GQ:PL 0/1:23,26:49:99:744,0,580 20 10008758 . C CA 707.49 . AC=2;AF=1.00;AN=2;BaseQRankSum=0.130;DP=51;ExcessHet=0.0000;FS=2.191;MLEAC=1;MLEAF=0.500;MQ=37.72;MQRankSum=-0.875;QD=18.62;ReadPosRankSum=-0.262;SOR=0.405 GT:AD:DP:GQ:PL 1/1:4,34:38:10:721,10,0 20 10008921 . C CA 283.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.610;DP=60;ExcessHet=0.0000;FS=1.311;MLEAC=1;MLEAF=0.500;MQ=56.00;MQRankSum=-1.306;QD=8.10;ReadPosRankSum=-0.017;SOR=0.644 GT:AD:DP:GQ:PL 0/1:18,17:35:99:291,0,366 -20 10008948 . TA T 663.02 . AC=2;AF=1.00;AN=2;BaseQRankSum=0.090;DP=55;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=51.58;MQRankSum=0.565;QD=23.68;ReadPosRankSum=0.403;SOR=0.527 GT:AD:DP:GQ:PL 1/1:2,26:28:29:677,29,0 -20 10008952 . CACACACACACA CCACACACACA,C 766.97 . AC=1,1;AF=0.500,0.500;AN=2;BaseQRankSum=-0.634;DP=58;ExcessHet=0.0000;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=51.40;MQRankSum=0.426;QD=25.57;ReadPosRankSum=0.125;SOR=0.473 GT:AD:DP:GQ:PL 1/2:2,12,16:30:99:802,564,583,218,0,925 +20 10008948 . TA T 548.01 . AC=2;AF=1.00;AN=2;BaseQRankSum=0.090;DP=55;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=51.58;MQRankSum=0.565;QD=19.57;ReadPosRankSum=0.403;SOR=0.527 GT:AD:DP:GQ:PL 1/1:2,26:28:27:562,27,0 +20 10008952 . CACACACACACA CCACACACACA,C 651.04 . AC=1,1;AF=0.500,0.500;AN=2;BaseQRankSum=-0.634;DP=58;ExcessHet=0.0000;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=51.40;MQRankSum=0.426;QD=21.70;ReadPosRankSum=0.125;SOR=0.473 GT:AD:DP:GQ:PL 1/2:2,12,16:30:99:802,564,583,218,0,925 20 10009227 . A G 1390.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.220;DP=63;ExcessHet=0.0000;FS=9.110;MLEAC=1;MLEAF=0.500;MQ=56.82;MQRankSum=1.428;QD=22.43;ReadPosRankSum=-1.159;SOR=0.281 GT:AD:DP:GQ:PL 0/1:23,39:62:99:1398,0,740 20 10009246 . A G 3017.06 . AC=2;AF=1.00;AN=2;DP=81;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=55.68;QD=26.08;SOR=0.887 GT:AD:DP:GQ:PL 1/1:0,77:77:99:3031,231,0 20 10009400 . T A 3013.06 . AC=2;AF=1.00;AN=2;DP=75;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=33.85;SOR=0.804 GT:AD:DP:GQ:PL 1/1:0,74:74:99:3027,223,0 20 10009512 . C G 3054.06 . AC=2;AF=1.00;AN=2;DP=80;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.53;QD=27.08;SOR=1.223 GT:AD:DP:GQ:PL 1/1:0,77:77:99:3068,231,0 20 10009719 . A G 1901.06 . AC=2;AF=1.00;AN=2;DP=65;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.71;QD=33.35;SOR=1.047 GT:AD:DP:GQ:PL 1/1:0,57:57:99:1915,170,0 -20 10009795 . A G 1921.06 . AC=2;AF=1.00;AN=2;DP=49;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.62;QD=25.42;SOR=1.514 GT:AD:DP:GQ:PL 1/1:0,47:47:99:1935,141,0 +20 10009795 . A G 1899.06 . AC=2;AF=1.00;AN=2;DP=49;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.62;QD=25.42;SOR=1.514 GT:AD:DP:GQ:PL 1/1:0,47:47:99:1913,141,0 20 10009844 . G A 1703.06 . AC=2;AF=1.00;AN=2;BaseQRankSum=1.130;DP=48;ExcessHet=0.0000;FS=3.297;MLEAC=2;MLEAF=1.00;MQ=56.01;MQRankSum=2.656;QD=27.65;ReadPosRankSum=-1.079;SOR=0.499 GT:AD:DP:GQ:PL 1/1:1,46:47:99:1717,113,0 20 10009871 . A G 1326.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.305;DP=55;ExcessHet=0.0000;FS=5.432;MLEAC=1;MLEAF=0.500;MQ=52.86;MQRankSum=0.468;QD=24.57;ReadPosRankSum=1.080;SOR=0.620 GT:AD:DP:GQ:PL 0/1:9,45:54:20:1334,0,20 20 10009875 . A G 1404.06 . AC=2;AF=1.00;AN=2;BaseQRankSum=1.365;DP=52;ExcessHet=0.0000;FS=31.135;MLEAC=2;MLEAF=1.00;MQ=52.42;MQRankSum=-0.433;QD=27.53;ReadPosRankSum=-0.052;SOR=1.193 GT:AD:DP:GQ:PL 1/1:8,43:51:76:1418,76,0 @@ -108,14 +108,14 @@ 20 10012384 . T C 250.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.773;DP=18;ExcessHet=0.0000;FS=7.375;MLEAC=1;MLEAF=0.500;MQ=53.38;MQRankSum=-0.811;QD=13.92;ReadPosRankSum=0.800;SOR=0.044 GT:AD:DP:GQ:PL 0/1:10,8:18:99:258,0,355 20 10012387 . T C 399.06 . AC=2;AF=1.00;AN=2;DP=18;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=53.38;QD=24.94;SOR=3.258 GT:AD:DP:GQ:PL 1/1:0,16:16:46:413,46,0 20 10012479 . A G 507.06 . AC=2;AF=1.00;AN=2;DP=18;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=51.29;QD=29.83;SOR=0.804 GT:AD:DP:GQ:PL 1/1:0,17:17:50:521,50,0 -20 10012498 . C G 392.06 . AC=2;AF=1.00;AN=2;DP=11;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=42.15;QD=35.36;SOR=1.270 GT:AD:DP:GQ:PL 1/1:0,11:11:33:406,33,0 -20 10012518 . T C 440.06 . AC=2;AF=1.00;AN=2;DP=13;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=42.64;QD=34.30;SOR=0.693 GT:AD:DP:GQ:PL 1/1:0,12:12:36:454,36,0 +20 10012498 . C G 369.06 . AC=2;AF=1.00;AN=2;DP=11;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=42.15;QD=33.55;SOR=1.270 GT:AD:DP:GQ:PL 1/1:0,11:11:33:383,33,0 +20 10012518 . T C 339.06 . AC=2;AF=1.00;AN=2;DP=13;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=42.64;QD=30.82;SOR=0.859 GT:AD:DP:GQ:PL 1/1:0,11:11:33:353,33,0 20 10012521 . C T 89.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=1.531;DP=13;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=40.07;MQRankSum=-0.224;QD=6.90;ReadPosRankSum=2.125;SOR=0.527 GT:AD:DP:GQ:PL 0/1:9,4:13:97:97,0,247 -20 10012570 . G GCA 406.02 . AC=2;AF=1.00;AN=2;DP=11;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=39.52;QD=27.00;SOR=3.258 GT:AD:DP:GQ:PL 1/1:0,10:10:30:420,30,0 +20 10012570 . G GCA 393.02 . AC=2;AF=1.00;AN=2;DP=11;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=39.52;QD=35.36;SOR=3.258 GT:AD:DP:GQ:PL 1/1:0,10:10:30:407,30,0 20 10012572 . GT G 295.01 . AC=2;AF=1.00;AN=2;DP=12;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=40.60;QD=32.78;SOR=3.056 GT:AD:DP:GQ:PL 1/1:0,9:9:27:309,27,0 20 10012631 . C CG 407.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.030;DP=21;ExcessHet=0.0000;FS=8.822;MLEAC=1;MLEAF=0.500;MQ=48.34;MQRankSum=-1.670;QD=19.41;ReadPosRankSum=-1.777;SOR=0.048 GT:AD:DP:GQ:PL 0/1:8,13:21:99:415,0,237 20 10012636 . G C 397.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.711;DP=22;ExcessHet=0.0000;FS=5.959;MLEAC=1;MLEAF=0.500;MQ=48.93;MQRankSum=-1.513;QD=18.07;ReadPosRankSum=-1.777;SOR=0.058 GT:AD:DP:GQ:PL 0/1:8,14:22:99:405,0,175 -20 10012714 . G C 1315.06 . AC=2;AF=1.00;AN=2;DP=36;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=52.52;QD=30.82;SOR=2.964 GT:AD:DP:GQ:PL 1/1:0,35:35:99:1329,105,0 +20 10012714 . G C 1315.06 . AC=2;AF=1.00;AN=2;DP=36;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=52.52;QD=34.30;SOR=2.964 GT:AD:DP:GQ:PL 1/1:0,35:35:99:1329,105,0 20 10012751 . T C 952.06 . AC=2;AF=1.00;AN=2;DP=31;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=50.73;QD=31.74;SOR=0.976 GT:AD:DP:GQ:PL 1/1:0,30:30:89:966,89,0 20 10013119 . C T 1032.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.724;DP=61;ExcessHet=0.0000;FS=2.248;MLEAC=1;MLEAF=0.500;MQ=51.43;MQRankSum=-1.089;QD=17.50;ReadPosRankSum=0.159;SOR=0.405 GT:AD:DP:GQ:PL 0/1:29,30:59:99:1040,0,936 20 10013574 . G A 1071.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.469;DP=73;ExcessHet=0.0000;FS=7.109;MLEAC=1;MLEAF=0.500;MQ=58.17;MQRankSum=-0.271;QD=15.53;ReadPosRankSum=0.706;SOR=1.675 GT:AD:DP:GQ:PL 0/1:38,31:69:99:1079,0,1301 @@ -139,7 +139,7 @@ 20 10030188 . T A 1393.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.711;DP=82;ExcessHet=0.0000;FS=9.186;MLEAC=1;MLEAF=0.500;MQ=59.49;MQRankSum=-1.013;QD=17.21;ReadPosRankSum=-0.553;SOR=1.105 GT:AD:DP:GQ:PL 0/1:42,39:81:99:1401,0,1524 20 10031254 . A AT 1493.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.091;DP=90;ExcessHet=0.0000;FS=9.250;MLEAC=1;MLEAF=0.500;MQ=59.91;MQRankSum=0.017;QD=17.57;ReadPosRankSum=0.663;SOR=0.211 GT:AD:DP:GQ:PL 0/1:38,47:85:99:1501,0,1159 20 10031342 . G A 1855.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=1.833;DP=93;ExcessHet=0.0000;FS=1.714;MLEAC=1;MLEAF=0.500;MQ=59.75;MQRankSum=-0.866;QD=19.95;ReadPosRankSum=-0.012;SOR=0.955 GT:AD:DP:GQ:PL 0/1:41,52:93:99:1863,0,1375 -20 10031798 . G A 4311.06 . AC=2;AF=1.00;AN=2;DP=107;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.61;QD=29.27;SOR=0.900 GT:AD:DP:GQ:PL 1/1:0,104:104:99:4325,313,0 +20 10031798 . G A 4311.06 . AC=2;AF=1.00;AN=2;DP=107;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.61;QD=27.00;SOR=0.900 GT:AD:DP:GQ:PL 1/1:0,104:104:99:4325,313,0 20 10031827 . C T 2025.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.552;DP=115;ExcessHet=0.0000;FS=4.729;MLEAC=1;MLEAF=0.500;MQ=59.84;MQRankSum=-0.945;QD=18.76;ReadPosRankSum=-1.396;SOR=0.382 GT:AD:DP:GQ:PL 0/1:52,56:108:99:2033,0,1798 20 10032094 . G A 1241.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=5.488;DP=86;ExcessHet=0.0000;FS=6.039;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=14.96;ReadPosRankSum=-0.579;SOR=0.260 GT:AD:DP:GQ:PL 0/1:47,36:83:99:1249,0,1497 20 10032413 . T G 890.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-6.142;DP=66;ExcessHet=0.0000;FS=10.444;MLEAC=1;MLEAF=0.500;MQ=59.43;MQRankSum=-0.065;QD=13.49;ReadPosRankSum=-1.003;SOR=1.293 GT:AD:DP:GQ:PL 0/1:35,31:66:99:898,0,1277 @@ -147,48 +147,48 @@ 20 10032972 . C CAT 1152.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.354;DP=67;ExcessHet=0.0000;FS=5.062;MLEAC=1;MLEAF=0.500;MQ=59.66;MQRankSum=-1.017;QD=18.30;ReadPosRankSum=-0.723;SOR=1.300 GT:AD:DP:GQ:PL 0/1:33,30:63:99:1160,0,1295 20 10034306 . T C 803.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-3.661;DP=80;ExcessHet=0.0000;FS=0.890;MLEAC=1;MLEAF=0.500;MQ=59.48;MQRankSum=1.150;QD=10.72;ReadPosRankSum=0.539;SOR=0.685 GT:AD:DP:GQ:PL 0/1:46,29:75:99:811,0,1593 20 10036930 . CGATAGCCCTAGCCCTAGATA C 1069.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.720;DP=96;ExcessHet=0.0000;FS=13.351;MLEAC=1;MLEAF=0.500;MQ=54.33;MQRankSum=-3.973;QD=16.21;ReadPosRankSum=0.460;SOR=0.256 GT:AD:DP:GQ:PL 0/1:37,29:66:99:1077,0,1441 -20 10037037 . C T 1453.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.504;DP=88;ExcessHet=0.0000;FS=1.943;MLEAC=1;MLEAF=0.500;MQ=60.08;MQRankSum=-1.193;QD=17.95;ReadPosRankSum=-1.609;SOR=0.495 GT:AD:DP:GQ:PL 0/1:39,42:81:99:1461,0,1127 -20 10037110 . T TGATA 1196.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.089;DP=83;ExcessHet=0.0000;FS=16.682;MLEAC=1;MLEAF=0.500;MQ=55.24;MQRankSum=5.270;QD=24.42;ReadPosRankSum=4.165;SOR=1.122 GT:AD:DP:GQ:PL 0/1:13,36:49:99:1204,0,402 -20 10037144 . T TGATAGATA 667.01 . AC=1;AF=0.500;AN=2;BaseQRankSum=1.744;DP=80;ExcessHet=0.0000;FS=13.898;MLEAC=1;MLEAF=0.500;MQ=53.44;MQRankSum=-5.852;QD=8.55;ReadPosRankSum=2.953;SOR=1.320 GT:AD:DP:GQ:PL 0/1:39,39:78:99:1428,0,1388 +20 10037037 . C T 1457.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.504;DP=88;ExcessHet=0.0000;FS=1.943;MLEAC=1;MLEAF=0.500;MQ=60.08;MQRankSum=-1.193;QD=18.00;ReadPosRankSum=-1.609;SOR=0.495 GT:AD:DP:GQ:PL 0/1:39,42:81:99:1465,0,1124 +20 10037110 . T TGATA 1701.03 . AC=2;AF=1.00;AN=2;DP=84;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=55.01;QD=30.82;SOR=1.127 GT:AD:DP:GQ:PL 1/1:0,42:42:99:1715,123,0 +20 10037144 . T TGATAGATA 1103.02 . AC=2;AF=1.00;AN=2;DP=81;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=53.21;QD=25.65;SOR=0.874 GT:AD:DP:GQ:PL 1/1:0,43:43:99:1857,127,0 20 10037709 . A T 1024.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.367;DP=73;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=59.68;MQRankSum=0.934;QD=14.43;ReadPosRankSum=0.040;SOR=0.608 GT:AD:DP:GQ:PL 0/1:39,32:71:99:1032,0,1354 20 10039371 . T G 1345.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-3.260;DP=76;ExcessHet=0.0000;FS=6.144;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=18.18;ReadPosRankSum=0.391;SOR=1.031 GT:AD:DP:GQ:PL 0/1:34,40:74:99:1353,0,1194 20 10040772 . C CT 2020.03 . AC=2;AF=1.00;AN=2;DP=85;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=52.32;QD=28.86;SOR=0.811 GT:AD:DP:GQ:PL 1/1:0,70:70:99:2034,210,0 20 10040812 . AT A 2008.03 . AC=2;AF=1.00;AN=2;DP=83;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=51.21;QD=29.97;SOR=0.784 GT:AD:DP:GQ:PL 1/1:0,67:67:99:2022,202,0 20 10040821 . T A 1204.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.084;DP=84;ExcessHet=0.0000;FS=0.872;MLEAC=1;MLEAF=0.500;MQ=51.33;MQRankSum=0.265;QD=16.28;ReadPosRankSum=-2.132;SOR=0.573 GT:AD:DP:GQ:PL 0/1:34,40:74:99:1212,0,814 -20 10041304 . C T 2780.06 . AC=2;AF=1.00;AN=2;DP=68;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.39;QD=35.92;SOR=0.693 GT:AD:DP:GQ:PL 1/1:0,68:68:99:2794,204,0 -20 10041701 . A ATATG 2854.03 . AC=2;AF=1.00;AN=2;BaseQRankSum=1.731;DP=80;ExcessHet=0.0000;FS=5.755;MLEAC=2;MLEAF=1.00;MQ=58.93;MQRankSum=1.185;QD=29.63;ReadPosRankSum=0.347;SOR=0.107 GT:AD:DP:GQ:PL 1/1:3,73:76:99:2868,178,0 -20 10042319 . C T 2673.06 . AC=2;AF=1.00;AN=2;DP=69;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.06;QD=28.76;SOR=1.033 GT:AD:DP:GQ:PL 1/1:0,66:66:99:2687,199,0 -20 10042761 . A G 3568.06 . AC=2;AF=1.00;AN=2;DP=94;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=29.94;SOR=1.389 GT:AD:DP:GQ:PL 1/1:0,91:91:99:3582,272,0 -20 10042829 . A G 3070.06 . AC=2;AF=1.00;AN=2;DP=82;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.77;QD=31.54;SOR=0.855 GT:AD:DP:GQ:PL 1/1:0,78:78:99:3084,235,0 +20 10041304 . C T 2780.06 . AC=2;AF=1.00;AN=2;DP=68;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.39;QD=29.27;SOR=0.693 GT:AD:DP:GQ:PL 1/1:0,68:68:99:2794,204,0 +20 10041701 . A ATATG 2854.03 . AC=2;AF=1.00;AN=2;BaseQRankSum=1.731;DP=80;ExcessHet=0.0000;FS=5.755;MLEAC=2;MLEAF=1.00;MQ=58.93;MQRankSum=1.185;QD=35.92;ReadPosRankSum=0.347;SOR=0.107 GT:AD:DP:GQ:PL 1/1:3,73:76:99:2868,178,0 +20 10042319 . C T 2673.06 . AC=2;AF=1.00;AN=2;DP=69;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.06;QD=29.63;SOR=1.033 GT:AD:DP:GQ:PL 1/1:0,66:66:99:2687,199,0 +20 10042761 . A G 3568.06 . AC=2;AF=1.00;AN=2;DP=94;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=28.76;SOR=1.389 GT:AD:DP:GQ:PL 1/1:0,91:91:99:3582,272,0 +20 10042829 . A G 3070.06 . AC=2;AF=1.00;AN=2;DP=82;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.77;QD=29.94;SOR=0.855 GT:AD:DP:GQ:PL 1/1:0,78:78:99:3084,235,0 20 10043002 . A T 1308.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.986;DP=89;ExcessHet=0.0000;FS=2.877;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=15.22;ReadPosRankSum=-0.035;SOR=0.434 GT:AD:DP:GQ:PL 0/1:48,38:86:99:1316,0,1658 -20 10044849 . A G 2106.06 . AC=2;AF=1.00;AN=2;DP=59;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.61;QD=28.86;SOR=1.316 GT:AD:DP:GQ:PL 1/1:0,59:59:99:2120,177,0 -20 10045078 . G T 3286.06 . AC=2;AF=1.00;AN=2;DP=84;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.78;QD=29.41;SOR=0.876 GT:AD:DP:GQ:PL 1/1:0,81:81:99:3300,244,0 -20 10045642 . G C 3127.06 . AC=2;AF=1.00;AN=2;DP=82;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.77;QD=31.11;SOR=0.821 GT:AD:DP:GQ:PL 1/1:0,81:81:99:3141,243,0 -20 10046178 . AAGAAAGAAAG A 2136.03 . AC=2;AF=1.00;AN=2;DP=63;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=48.77;QD=34.66;SOR=0.818 GT:AD:DP:GQ:PL 1/1:0,49:49:99:2150,149,0 +20 10044849 . A G 2106.06 . AC=2;AF=1.00;AN=2;DP=59;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.61;QD=31.54;SOR=1.316 GT:AD:DP:GQ:PL 1/1:0,59:59:99:2120,177,0 +20 10045078 . G T 3286.06 . AC=2;AF=1.00;AN=2;DP=84;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.78;QD=28.86;SOR=0.876 GT:AD:DP:GQ:PL 1/1:0,81:81:99:3300,244,0 +20 10045642 . G C 3127.06 . AC=2;AF=1.00;AN=2;DP=82;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.77;QD=29.41;SOR=0.821 GT:AD:DP:GQ:PL 1/1:0,81:81:99:3141,243,0 +20 10046178 . AAGAAAGAAAG A 2136.03 . AC=2;AF=1.00;AN=2;DP=63;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=48.77;QD=31.11;SOR=0.818 GT:AD:DP:GQ:PL 1/1:0,49:49:99:2150,149,0 20 10046537 . A G 840.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-3.909;DP=57;ExcessHet=0.0000;FS=3.724;MLEAC=1;MLEAF=0.500;MQ=54.50;MQRankSum=0.395;QD=14.75;ReadPosRankSum=0.727;SOR=1.262 GT:AD:DP:GQ:PL 0/1:28,29:57:99:848,0,902 20 10050828 . T C 1248.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.704;DP=91;ExcessHet=0.0000;FS=2.887;MLEAC=1;MLEAF=0.500;MQ=59.54;MQRankSum=0.892;QD=14.52;ReadPosRankSum=-0.140;SOR=0.521 GT:AD:DP:GQ:PL 0/1:49,37:86:99:1256,0,1768 20 10051448 . T C 1155.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.501;DP=71;ExcessHet=0.0000;FS=3.687;MLEAC=1;MLEAF=0.500;MQ=59.41;MQRankSum=-1.404;QD=16.51;ReadPosRankSum=-0.705;SOR=0.302 GT:AD:DP:GQ:PL 0/1:35,35:70:99:1163,0,1150 20 10052688 . C A 1083.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.180;DP=83;ExcessHet=0.0000;FS=3.135;MLEAC=1;MLEAF=0.500;MQ=59.78;MQRankSum=0.870;QD=14.07;ReadPosRankSum=1.443;SOR=0.379 GT:AD:DP:GQ:PL 0/1:45,32:77:99:1091,0,1645 20 10058022 . T C 996.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.346;DP=72;ExcessHet=0.0000;FS=0.920;MLEAC=1;MLEAF=0.500;MQ=59.16;MQRankSum=0.466;QD=14.66;ReadPosRankSum=0.018;SOR=0.849 GT:AD:DP:GQ:PL 0/1:36,32:68:99:1004,0,1261 20 10067049 . TAAAAAAA T 672.99 . AC=2;AF=1.00;AN=2;BaseQRankSum=-0.095;DP=69;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=54.77;MQRankSum=-0.381;QD=33.65;ReadPosRankSum=-1.280;SOR=1.179 GT:AD:DP:GQ:PL 1/1:2,18:20:23:687,23,0 -20 10067090 . C A 2010.06 . AC=2;AF=1.00;AN=2;DP=55;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=54.93;QD=29.40;SOR=0.767 GT:AD:DP:GQ:PL 1/1:0,54:54:99:2024,161,0 -20 10067264 . G A 2937.06 . AC=2;AF=1.00;AN=2;DP=73;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.37;QD=36.76;SOR=0.776 GT:AD:DP:GQ:PL 1/1:0,73:73:99:2951,219,0 -20 10067722 . A C 2161.06 . AC=2;AF=1.00;AN=2;DP=59;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=30.54;SOR=0.846 GT:AD:DP:GQ:PL 1/1:0,54:54:99:2175,163,0 +20 10067090 . C A 2010.06 . AC=2;AF=1.00;AN=2;DP=55;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=54.93;QD=34.66;SOR=0.767 GT:AD:DP:GQ:PL 1/1:0,54:54:99:2024,161,0 +20 10067264 . G A 2937.06 . AC=2;AF=1.00;AN=2;DP=73;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.37;QD=29.40;SOR=0.776 GT:AD:DP:GQ:PL 1/1:0,73:73:99:2951,219,0 +20 10067722 . A C 2161.06 . AC=2;AF=1.00;AN=2;DP=59;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=36.76;SOR=0.846 GT:AD:DP:GQ:PL 1/1:0,54:54:99:2175,163,0 20 10068172 . G A 532.04 . AC=2;AF=1.00;AN=2;BaseQRankSum=1.453;DP=18;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=56.04;MQRankSum=-0.309;QD=31.30;ReadPosRankSum=-0.436;SOR=0.481 GT:AD:DP:GQ:PL 1/1:1,16:17:25:546,25,0 -20 10068981 . G A 2444.06 . AC=2;AF=1.00;AN=2;DP=63;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=21.53;SOR=0.950 GT:AD:DP:GQ:PL 1/1:0,59:59:99:2458,178,0 -20 10070602 . T C 2706.06 . AC=2;AF=1.00;AN=2;DP=76;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.78;QD=34.87;SOR=0.749 GT:AD:DP:GQ:PL 1/1:0,72:72:99:2720,216,0 -20 10070936 . T A 3330.06 . AC=2;AF=1.00;AN=2;DP=86;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.96;QD=31.04;SOR=0.876 GT:AD:DP:GQ:PL 1/1:0,81:81:99:3344,244,0 -20 10070938 . G GA 2934.03 . AC=2;AF=1.00;AN=2;DP=85;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.07;QD=27.86;SOR=0.824 GT:AD:DP:GQ:PL 1/1:0,79:79:99:2948,238,0 -20 10071135 . C T 3272.06 . AC=2;AF=1.00;AN=2;DP=88;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.21;QD=34.17;SOR=1.003 GT:AD:DP:GQ:PL 1/1:0,79:79:99:3286,238,0 -20 10071187 . G A 3425.06 . AC=2;AF=1.00;AN=2;DP=85;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.73;QD=28.60;SOR=1.367 GT:AD:DP:GQ:PL 1/1:0,82:82:99:3439,246,0 +20 10068981 . G A 2444.06 . AC=2;AF=1.00;AN=2;DP=63;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=30.54;SOR=0.950 GT:AD:DP:GQ:PL 1/1:0,59:59:99:2458,178,0 +20 10070602 . T C 2706.06 . AC=2;AF=1.00;AN=2;DP=76;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.78;QD=21.53;SOR=0.749 GT:AD:DP:GQ:PL 1/1:0,72:72:99:2720,216,0 +20 10070936 . T A 3330.06 . AC=2;AF=1.00;AN=2;DP=86;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.96;QD=34.87;SOR=0.876 GT:AD:DP:GQ:PL 1/1:0,81:81:99:3344,244,0 +20 10070938 . G GA 2934.03 . AC=2;AF=1.00;AN=2;DP=85;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.07;QD=31.04;SOR=0.824 GT:AD:DP:GQ:PL 1/1:0,79:79:99:2948,238,0 +20 10071135 . C T 3272.06 . AC=2;AF=1.00;AN=2;DP=88;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.21;QD=27.86;SOR=1.003 GT:AD:DP:GQ:PL 1/1:0,79:79:99:3286,238,0 +20 10071187 . G A 3425.06 . AC=2;AF=1.00;AN=2;DP=85;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.73;QD=34.17;SOR=1.367 GT:AD:DP:GQ:PL 1/1:0,82:82:99:3439,246,0 20 10071890 . T C 1989.06 . AC=2;AF=1.00;AN=2;DP=61;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.31;QD=34.90;SOR=0.728 GT:AD:DP:GQ:PL 1/1:0,57:57:99:2003,171,0 20 10072505 . A G 2114.06 . AC=2;AF=1.00;AN=2;DP=67;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=33.03;SOR=0.822 GT:AD:DP:GQ:PL 1/1:0,64:64:99:2128,191,0 -20 10074187 . A G 3116.06 . AC=2;AF=1.00;AN=2;DP=81;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.14;QD=25.57;SOR=0.881 GT:AD:DP:GQ:PL 1/1:0,79:79:99:3130,238,0 -20 10074240 . T C 3421.06 . AC=2;AF=1.00;AN=2;DP=89;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.79;QD=33.11;SOR=0.922 GT:AD:DP:GQ:PL 1/1:0,85:85:99:3435,255,0 -20 10074716 . G A 2415.06 . AC=2;AF=1.00;AN=2;DP=70;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.67;QD=30.20;SOR=1.214 GT:AD:DP:GQ:PL 1/1:0,64:64:99:2429,192,0 -20 10074806 . G A 2832.06 . AC=2;AF=1.00;AN=2;DP=75;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.38;QD=30.49;SOR=0.840 GT:AD:DP:GQ:PL 1/1:0,71:71:99:2846,213,0 -20 10075043 . T C 2420.06 . AC=2;AF=1.00;AN=2;DP=64;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.64;QD=31.67;SOR=1.107 GT:AD:DP:GQ:PL 1/1:0,61:61:99:2434,184,0 -20 10075168 . C T 3627.06 . AC=2;AF=1.00;AN=2;DP=91;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.37;QD=29.53;SOR=0.997 GT:AD:DP:GQ:PL 1/1:0,88:88:99:3641,264,0 +20 10074187 . A G 3116.06 . AC=2;AF=1.00;AN=2;DP=81;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.14;QD=28.60;SOR=0.881 GT:AD:DP:GQ:PL 1/1:0,79:79:99:3130,238,0 +20 10074240 . T C 3421.06 . AC=2;AF=1.00;AN=2;DP=89;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.79;QD=25.57;SOR=0.922 GT:AD:DP:GQ:PL 1/1:0,85:85:99:3435,255,0 +20 10074716 . G A 2415.06 . AC=2;AF=1.00;AN=2;DP=70;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.67;QD=33.11;SOR=1.214 GT:AD:DP:GQ:PL 1/1:0,64:64:99:2429,192,0 +20 10074806 . G A 2832.06 . AC=2;AF=1.00;AN=2;DP=75;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.38;QD=30.20;SOR=0.840 GT:AD:DP:GQ:PL 1/1:0,71:71:99:2846,213,0 +20 10075043 . T C 2420.06 . AC=2;AF=1.00;AN=2;DP=64;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.64;QD=30.49;SOR=1.107 GT:AD:DP:GQ:PL 1/1:0,61:61:99:2434,184,0 +20 10075168 . C T 3627.06 . AC=2;AF=1.00;AN=2;DP=91;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.37;QD=31.67;SOR=0.997 GT:AD:DP:GQ:PL 1/1:0,88:88:99:3641,264,0 20 10075508 . GA G 1952.03 . AC=2;AF=1.00;AN=2;DP=81;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=61.11;QD=27.89;SOR=1.085 GT:AD:DP:GQ:PL 1/1:0,70:70:99:1966,210,0 20 10076250 . A G 1128.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-3.386;DP=82;ExcessHet=0.0000;FS=7.546;MLEAC=1;MLEAF=0.500;MQ=59.41;MQRankSum=-1.423;QD=14.29;ReadPosRankSum=0.270;SOR=0.344 GT:AD:DP:GQ:PL 0/1:40,39:79:99:1136,0,1339 20 10076339 . A G 1330.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.069;DP=76;ExcessHet=0.0000;FS=1.992;MLEAC=1;MLEAF=0.500;MQ=59.70;MQRankSum=-0.866;QD=18.48;ReadPosRankSum=1.015;SOR=0.446 GT:AD:DP:GQ:PL 0/1:32,40:72:99:1338,0,1035 @@ -198,7 +198,7 @@ 20 10079476 . T G 31.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-4.570;DP=55;ExcessHet=0.0000;FS=57.922;MLEAC=1;MLEAF=0.500;MQ=58.81;MQRankSum=-0.826;QD=0.72;ReadPosRankSum=1.533;SOR=4.970 GT:AD:DP:GQ:PL 0/1:33,11:44:39:39,0,1137 20 10081750 . C A 1374.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.670;DP=92;ExcessHet=0.0000;FS=0.824;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=16.56;ReadPosRankSum=-0.406;SOR=0.664 GT:AD:DP:GQ:PL 0/1:44,39:83:99:1382,0,1525 20 10081800 . C T 1130.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.241;DP=69;ExcessHet=0.0000;FS=11.086;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=16.63;ReadPosRankSum=-2.622;SOR=0.143 GT:AD:DP:GQ:PL 0/1:35,33:68:99:1138,0,1222 -20 10082892 . C T 1673.06 . AC=2;AF=1.00;AN=2;DP=44;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.48;QD=32.63;SOR=0.836 GT:AD:DP:GQ:PL 1/1:0,43:43:99:1687,129,0 +20 10082892 . C T 1673.06 . AC=2;AF=1.00;AN=2;DP=44;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.48;QD=29.53;SOR=0.836 GT:AD:DP:GQ:PL 1/1:0,43:43:99:1687,129,0 20 10085211 . A T 1231.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.501;DP=81;ExcessHet=0.0000;FS=0.870;MLEAC=1;MLEAF=0.500;MQ=59.72;MQRankSum=-1.028;QD=16.21;ReadPosRankSum=-1.405;SOR=0.723 GT:AD:DP:GQ:PL 0/1:40,36:76:99:1239,0,1443 20 10086110 . G A 1295.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.343;DP=83;ExcessHet=0.0000;FS=1.810;MLEAC=1;MLEAF=0.500;MQ=59.16;MQRankSum=-0.680;QD=16.00;ReadPosRankSum=0.170;SOR=0.495 GT:AD:DP:GQ:PL 0/1:43,38:81:99:1303,0,1506 20 10086283 . G T 1046.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.304;DP=86;ExcessHet=0.0000;FS=0.855;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=12.46;ReadPosRankSum=1.889;SOR=0.529 GT:AD:DP:GQ:PL 0/1:52,32:84:99:1054,0,1771 @@ -212,14 +212,14 @@ 20 10087820 . C CAG 802.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.081;DP=100;ExcessHet=0.0000;FS=0.949;MLEAC=1;MLEAF=0.500;MQ=60.75;MQRankSum=2.850;QD=12.35;ReadPosRankSum=-0.323;SOR=0.906 GT:AD:DP:GQ:PL 0/1:36,29:65:99:810,0,1047 20 10088063 . C T 1438.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.405;DP=94;ExcessHet=0.0000;FS=2.739;MLEAC=1;MLEAF=0.500;MQ=59.36;MQRankSum=0.515;QD=15.47;ReadPosRankSum=0.285;SOR=1.053 GT:AD:DP:GQ:PL 0/1:49,44:93:99:1446,0,1403 20 10088699 . C T 797.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.803;DP=67;ExcessHet=0.0000;FS=16.952;MLEAC=1;MLEAF=0.500;MQ=56.90;MQRankSum=-3.140;QD=12.87;ReadPosRankSum=-1.456;SOR=1.914 GT:AD:DP:GQ:PL 0/1:36,26:62:99:805,0,1088 -20 10088730 . G A 961.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.224;DP=57;ExcessHet=0.0000;FS=9.673;MLEAC=1;MLEAF=0.500;MQ=52.92;MQRankSum=-5.143;QD=17.48;ReadPosRankSum=0.581;SOR=1.025 GT:AD:DP:GQ:PL 0/1:29,26:55:99:969,0,956 -20 10088736 . A C 973.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.282;DP=60;ExcessHet=0.0000;FS=9.673;MLEAC=1;MLEAF=0.500;MQ=52.86;MQRankSum=-5.169;QD=17.70;ReadPosRankSum=0.573;SOR=0.851 GT:AD:DP:GQ:PL 0/1:28,27:55:99:981,0,854 +20 10088730 . G A 812.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.224;DP=57;ExcessHet=0.0000;FS=9.673;MLEAC=1;MLEAF=0.500;MQ=52.92;MQRankSum=-5.143;QD=14.78;ReadPosRankSum=0.581;SOR=1.025 GT:AD:DP:GQ:PL 0/1:29,26:55:99:820,0,956 +20 10088736 . A C 863.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.282;DP=60;ExcessHet=0.0000;FS=9.673;MLEAC=1;MLEAF=0.500;MQ=52.86;MQRankSum=-5.169;QD=15.70;ReadPosRankSum=0.573;SOR=0.851 GT:AD:DP:GQ:PL 0/1:28,27:55:99:871,0,854 20 10088747 . A G 798.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-3.877;DP=56;ExcessHet=0.0000;FS=7.653;MLEAC=1;MLEAF=0.500;MQ=52.20;MQRankSum=-4.907;QD=14.79;ReadPosRankSum=0.009;SOR=1.092 GT:AD:DP:GQ:PL 0/1:25,29:54:99:806,0,854 20 10088799 . G A 1039.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=5.503;DP=54;ExcessHet=0.0000;FS=10.098;MLEAC=1;MLEAF=0.500;MQ=53.34;MQRankSum=-2.498;QD=19.62;ReadPosRankSum=1.075;SOR=1.003 GT:AD:DP:GQ:PL 0/1:22,31:53:99:1047,0,617 20 10088895 . C T 556.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.330;DP=43;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=57.80;MQRankSum=0.260;QD=13.92;ReadPosRankSum=1.238;SOR=0.727 GT:AD:DP:GQ:PL 0/1:22,18:40:99:564,0,651 20 10088968 . C CAAA 1042.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.267;DP=78;ExcessHet=0.0000;FS=0.971;MLEAC=1;MLEAF=0.500;MQ=52.91;MQRankSum=-5.108;QD=17.09;ReadPosRankSum=0.614;SOR=0.646 GT:AD:DP:GQ:PL 0/1:30,31:61:99:1050,0,1111 -20 10088980 . T C 1111.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.522;DP=75;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=51.54;MQRankSum=-5.302;QD=16.11;ReadPosRankSum=1.708;SOR=0.756 GT:AD:DP:GQ:PL 0/1:38,31:69:99:1119,0,1146 -20 10088985 . T C 846.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.145;DP=78;ExcessHet=0.0000;FS=0.880;MLEAC=1;MLEAF=0.500;MQ=51.41;MQRankSum=-5.314;QD=11.29;ReadPosRankSum=1.586;SOR=0.780 GT:AD:DP:GQ:PL 0/1:43,32:75:99:854,0,1357 +20 10088980 . T C 765.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.522;DP=75;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=51.54;MQRankSum=-5.302;QD=11.10;ReadPosRankSum=1.708;SOR=0.756 GT:AD:DP:GQ:PL 0/1:38,31:69:99:773,0,1147 +20 10088985 . T C 846.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.145;DP=78;ExcessHet=0.0000;FS=0.880;MLEAC=1;MLEAF=0.500;MQ=51.41;MQRankSum=-5.314;QD=11.29;ReadPosRankSum=1.586;SOR=0.780 GT:AD:DP:GQ:PL 0/1:43,32:75:99:854,0,1323 20 10089441 . A G 1200.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-4.885;DP=79;ExcessHet=0.0000;FS=6.071;MLEAC=1;MLEAF=0.500;MQ=59.71;MQRankSum=-0.948;QD=15.80;ReadPosRankSum=-0.156;SOR=1.363 GT:AD:DP:GQ:PL 0/1:37,39:76:99:1208,0,1313 20 10089525 . C T 1569.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=5.859;DP=83;ExcessHet=0.0000;FS=4.260;MLEAC=1;MLEAF=0.500;MQ=59.50;MQRankSum=1.531;QD=19.38;ReadPosRankSum=0.862;SOR=0.617 GT:AD:DP:GQ:PL 0/1:38,43:81:99:1577,0,1088 20 10090289 . CA C 414.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.431;DP=84;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=59.97;MQRankSum=1.393;QD=10.91;ReadPosRankSum=0.048;SOR=0.859 GT:AD:DP:GQ:PL 0/1:14,24:38:99:422,0,231 @@ -239,12 +239,12 @@ 20 10096596 . C T 1583.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.728;DP=91;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=58.82;MQRankSum=0.612;QD=18.20;ReadPosRankSum=-0.170;SOR=0.640 GT:AD:DP:GQ:PL 0/1:40,47:87:99:1591,0,1273 20 10096768 . A C 1703.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.764;DP=96;ExcessHet=0.0000;FS=3.983;MLEAC=1;MLEAF=0.500;MQ=59.28;MQRankSum=-1.609;QD=18.72;ReadPosRankSum=0.613;SOR=0.445 GT:AD:DP:GQ:PL 0/1:42,49:91:99:1711,0,1452 20 10096899 . G T 1405.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.755;DP=84;ExcessHet=0.0000;FS=9.606;MLEAC=1;MLEAF=0.500;MQ=59.28;MQRankSum=-1.387;QD=17.79;ReadPosRankSum=0.422;SOR=1.707 GT:AD:DP:GQ:PL 0/1:39,40:79:99:1413,0,1365 -20 10096905 . TA T 1522.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.897;DP=86;ExcessHet=0.0000;FS=11.448;MLEAC=1;MLEAF=0.500;MQ=59.30;MQRankSum=-1.656;QD=18.57;ReadPosRankSum=0.269;SOR=1.760 GT:AD:DP:GQ:PL 0/1:39,43:82:99:1530,0,1212 +20 10096905 . TA T 1357.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.897;DP=86;ExcessHet=0.0000;FS=11.448;MLEAC=1;MLEAF=0.500;MQ=59.30;MQRankSum=-1.656;QD=16.56;ReadPosRankSum=0.269;SOR=1.760 GT:AD:DP:GQ:PL 0/1:39,43:82:99:1365,0,1212 20 10096933 . G C 1336.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.914;DP=89;ExcessHet=0.0000;FS=23.320;MLEAC=1;MLEAF=0.500;MQ=59.32;MQRankSum=-1.802;QD=15.73;ReadPosRankSum=0.383;SOR=1.534 GT:AD:DP:GQ:PL 0/1:44,41:85:99:1344,0,1514 20 10096958 . G A 1628.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.697;DP=93;ExcessHet=0.0000;FS=19.237;MLEAC=1;MLEAF=0.500;MQ=59.35;MQRankSum=-1.682;QD=17.90;ReadPosRankSum=-1.593;SOR=1.375 GT:AD:DP:GQ:PL 0/1:44,47:91:99:1636,0,1487 20 10097075 . T G 1466.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.533;DP=96;ExcessHet=0.0000;FS=6.613;MLEAC=1;MLEAF=0.500;MQ=58.14;MQRankSum=-1.389;QD=15.94;ReadPosRankSum=0.235;SOR=0.628 GT:AD:DP:GQ:PL 0/1:44,48:92:99:1474,0,1482 20 10097101 . C CTTT 1132.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.907;DP=80;ExcessHet=0.0000;FS=4.931;MLEAC=1;MLEAF=0.500;MQ=58.24;MQRankSum=-0.684;QD=17.16;ReadPosRankSum=1.777;SOR=0.385 GT:AD:DP:GQ:PL 0/1:33,33:66:99:1140,0,1220 -20 10097437 . TTTTC CTTTC,T 1114.06 . AC=1,1;AF=0.500,0.500;AN=2;BaseQRankSum=-0.660;DP=60;ExcessHet=0.0000;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=58.33;MQRankSum=3.089;QD=30.95;ReadPosRankSum=-0.138;SOR=0.859 GT:AD:DP:GQ:PL 1/2:2,11,23:36:99:1131,862,1679,307,0,383 +20 10097437 . TTTTC CTTTC,T 1151.06 . AC=1,1;AF=0.500,0.500;AN=2;DP=60;ExcessHet=0.0000;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=58.33;QD=31.11;SOR=2.774 GT:AD:DP:GQ:PL 1/2:0,14,23:37:99:1168,811,1679,418,0,395 20 10097626 . C A 647.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=1.085;DP=54;ExcessHet=0.0000;FS=2.701;MLEAC=1;MLEAF=0.500;MQ=58.00;MQRankSum=-1.331;QD=13.49;ReadPosRankSum=1.015;SOR=0.324 GT:AD:DP:GQ:PL 0/1:28,20:48:99:655,0,957 20 10097789 . T C 709.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.157;DP=60;ExcessHet=0.0000;FS=5.374;MLEAC=1;MLEAF=0.500;MQ=58.05;MQRankSum=-2.805;QD=12.45;ReadPosRankSum=0.220;SOR=0.756 GT:AD:DP:GQ:PL 0/1:34,23:57:99:717,0,1190 20 10097928 . G A 591.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.203;DP=37;ExcessHet=0.0000;FS=1.315;MLEAC=1;MLEAF=0.500;MQ=58.11;MQRankSum=-1.654;QD=16.43;ReadPosRankSum=-0.079;SOR=0.951 GT:AD:DP:GQ:PL 0/1:17,19:36:99:599,0,586 @@ -258,7 +258,7 @@ 20 10098885 . C CA 463.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=0.165;DP=33;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=49.73;MQRankSum=-4.449;QD=14.95;ReadPosRankSum=-0.954;SOR=0.518 GT:AD:DP:GQ:PL 0/1:17,14:31:99:471,0,572 20 10098945 . T C 412.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=1.475;DP=22;ExcessHet=0.0000;FS=3.882;MLEAC=1;MLEAF=0.500;MQ=51.15;MQRankSum=-2.504;QD=18.76;ReadPosRankSum=1.038;SOR=2.368 GT:AD:DP:GQ:PL 0/1:9,13:22:99:420,0,248 20 10098987 . C T 400.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.127;DP=25;ExcessHet=0.0000;FS=7.005;MLEAC=1;MLEAF=0.500;MQ=51.36;MQRankSum=-2.734;QD=16.69;ReadPosRankSum=0.436;SOR=2.925 GT:AD:DP:GQ:PL 0/1:11,13:24:99:408,0,350 -20 10099029 . T C 358.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.809;DP=30;ExcessHet=0.0000;FS=2.032;MLEAC=1;MLEAF=0.500;MQ=52.93;MQRankSum=-3.641;QD=12.81;ReadPosRankSum=-1.224;SOR=1.329 GT:AD:DP:GQ:PL 0/1:18,10:28:99:366,0,634 +20 10099029 . T C 328.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.809;DP=30;ExcessHet=0.0000;FS=2.032;MLEAC=1;MLEAF=0.500;MQ=52.93;MQRankSum=-3.641;QD=11.74;ReadPosRankSum=-1.224;SOR=1.329 GT:AD:DP:GQ:PL 0/1:18,10:28:99:336,0,634 20 10099034 . C A 338.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=2.757;DP=29;ExcessHet=0.0000;FS=4.265;MLEAC=1;MLEAF=0.500;MQ=53.57;MQRankSum=-3.641;QD=12.09;ReadPosRankSum=-1.850;SOR=1.721 GT:AD:DP:GQ:PL 0/1:18,10:28:99:346,0,578 20 10099044 . A C 367.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-1.148;DP=30;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=51.44;MQRankSum=-4.449;QD=14.71;ReadPosRankSum=-2.207;SOR=0.616 GT:AD:DP:GQ:PL 0/1:15,10:25:99:375,0,478 20 10099046 . T C 401.64 . AC=1;AF=0.500;AN=2;BaseQRankSum=-2.264;DP=26;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=49.18;MQRankSum=-4.484;QD=15.45;ReadPosRankSum=-2.553;SOR=0.719 GT:AD:DP:GQ:PL 0/1:15,11:26:99:409,0,597 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDefaults.gatk4.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.pileupCallerDefaults.gatk4.vcf.idx index 62a6855975131d45ff20a22b85f9c60fe12fb415..b56e032d77515c19a2221b2f2373e224d241861a 100644 GIT binary patch delta 61 zcmaDMc3*6Q6h}&8S!PO7YTiWcQr`KO7{K6P;p>`?jcXTkNis9Ktw$H IAv8<{0AMo|Jpcdz diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testExposureOfSmithWatermanParameters.HC.gatk4.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testExposureOfSmithWatermanParameters.HC.gatk4.vcf.idx index aa56e24f7b6558073040cc05157ac8d1fc3d140e..4f739f406bc6a6f188b3eb437bf0f09f58385de0 100644 GIT binary patch delta 32 ncmZ1x)Dtv8nkTU=GbJfCPd`1eBzvNB4{NaAi<*v&S3)!Z&mIjD delta 60 zcmeAPS`josTB9hxC_A;dI48A8-z7h}G&eP`q*&iQv&5q`Nk2WYBwM$jD6=fFBz2;C Q59`M-BF%LhcZ6sF0MrN=aR2}S diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testForceCallingNotProducingNoCalls.gatk4.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testForceCallingNotProducingNoCalls.gatk4.vcf index 75e8c19b0d4..785e44fb112 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testForceCallingNotProducingNoCalls.gatk4.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testForceCallingNotProducingNoCalls.gatk4.vcf @@ -47,6 +47,7 @@ 20 10003832 . G A 2720.06 . AC=2;AF=1.00;AN=2;DP=67;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=56.82;QD=29.03;SOR=1.473 GT:AD:DP:GQ:PL 1/1:0,66:66:99:2734,199,0 20 10004094 . A C,T 1840.06 . AC=2,0;AF=1.00,0.00;AN=2;DP=55;ExcessHet=0.0000;FS=0.000;MLEAC=2,0;MLEAF=1.00,0.00;MQ=52.62;QD=34.72;SOR=1.524 GT:AD:DP:GQ:PL 1/1:0,53,0:53:99:1854,157,0,1899,164,2091 20 10004147 . A G 1852.06 . AC=2;AF=1.00;AN=2;DP=54;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=55.19;QD=30.67;SOR=1.136 GT:AD:DP:GQ:PL 1/1:0,52:52:99:1866,156,0 +20 10004193 . G T 0 LowQual AC=1;AF=0.500;AN=2;BaseQRankSum=-0.392;DP=54;ExcessHet=0.0000;FS=2.369;MLEAC=1;MLEAF=0.500;MQ=59.14;MQRankSum=3.385;QD=0.00;ReadPosRankSum=1.724;SOR=0.941 GT:AD:DP:GQ:PL 0/1:23,28:51:99:945,0,776 20 10004351 . C G 2770.06 . AC=2;AF=1.00;AN=2;DP=71;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.05;QD=26.61;SOR=0.874 GT:AD:DP:GQ:PL 1/1:0,70:70:99:2784,211,0 20 10004389 . T G 2509.06 . AC=2;AF=1.00;AN=2;DP=67;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=56.62;QD=29.20;SOR=1.143 GT:AD:DP:GQ:PL 1/1:0,62:62:99:2523,187,0 20 10004610 . A C 3640.06 . AC=2;AF=1.00;AN=2;DP=93;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.75;QD=28.55;SOR=0.761 GT:AD:DP:GQ:PL 1/1:0,89:89:99:3654,267,0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testGVCFMode.gatk4.alleleSpecific.g.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testGVCFMode.gatk4.alleleSpecific.g.vcf.idx index 2f7a1a840801ee8826fb00e06662f3010ab83eb1..389f432c6de78c23a86085bd2b50a1f8c8db3de9 100644 GIT binary patch delta 34 qcmeyikLlJvrU}wKiDj87NvV1I>4_!T6P+7bIo`ae>DYL3?K}VkxDU1f delta 62 zcmcb$kLlY!rU}v-MfpY9sl~-PsYUuO`N^fZsd**E`tF$}9;He8>4_!Tx&=j4_!T6P=4#`&PfG>Dahy?K}Vi#Sgy# delta 62 zcmX@TkLme7rU}v-MfpY9sl~-PsYUuO`N^fZsd**E`tF$}9;He8>4_!Tx&=jR diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testHaplotypeCallerRemoveAltAlleleBasedOnHaptypeScores.gatk4.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testHaplotypeCallerRemoveAltAlleleBasedOnHaptypeScores.gatk4.vcf.idx index d77d9bc95660e1e11a8d2f3afecc5190635c96f7..453c33e22cc4c763f4ccb505e9270d8a072bbc20 100644 GIT binary patch delta 32 ocmaDQa86)?G*4n#W=c|Oo_=~_N%lnNS*!(NFKaqBKI&ix0MhvlhX4Qo delta 60 zcmX>n@Je8Uv_?^WQFdx^aZYNHzDs^`X>Mv>NwL0rW{F2>l74z(Nw#i5QD#|UN$N!P QS*+ddV$F3Mk9DvE0M4fu&j0`b diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testLinkedDebruijnMode.gatk4.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testLinkedDebruijnMode.gatk4.vcf.idx index 6124fd5755e8fb786d845d5b79c6229a97e8ad74..28cbc25149ec1d28522390b3a0134af71c863996 100644 GIT binary patch delta 32 ocmew>_E2nsG*4n#W=c|Oo_=~_N%lnND%SkEmo*(55AEgx0NC3N)&Kwi delta 60 zcmaDT_E&6zv_?^WQFdx^aZYNHzDs^`X>Mv>NwL0rW{F2>l74z(Nw#i5QD#|UN$N!P QDpu~xV$F3Mm+j^P0M@D(!vFvP diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testVCFMode.gatk4.DRAGEN.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testVCFMode.gatk4.DRAGEN.vcf.idx index b4174b6f4b7a418b2d2ade815fb54e5428254d27..3dd3b6c21f02957e2feec6a443eb3bfef2f6340d 100644 GIT binary patch delta 32 ocmewMv>NwL0rW{F2>l74z(Nw#i5QD#|UN$N!P QGFHniBF%Lh7wzT(0M#-VzyJUM diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testVCFMode.gatk4.FRDBQD.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testVCFMode.gatk4.FRDBQD.vcf.idx index 5e9286a84c65b080e356d64512f032e4d30aa35d..89ff250f6162bcc04252724c587512cec4cb7b2e 100644 GIT binary patch delta 32 ocmZ1^*CsbXnkTU=GbJfCPd`1eBzvNB8S7$$7d0Il_oeUw0K+T{x&QzG delta 60 zcmZpZTO>C@TB9hxC_A;dI48A8-z7h}G&eP`q*&iQv&5q`Nk2WYBwM$jD6=fFBz2;C Q8S9eoBF%Lh7p3q40JZ5CeEMv>NwL0rW{F2>l74z(Nw#i5QD#|UN$N!P QeAZpH63ulRXYb+y0MrW??*IS* diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGvcfBeforeRebase.expected.flowbased.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGvcfBeforeRebase.expected.flowbased.vcf.idx index 1014e277132aa058876d8d44cb910f2fb5092cf1..7a5cb8011a644b8d4a4e76347ec7c4d93785e23f 100644 GIT binary patch delta 54 zcmV-60LlO4b1!6Jc4=f{Wp0rpfdwZB>3oQRsfDQlg{cCCsRMb1!6Jc4=f{Wo|ELVRUPeFp&iTDCvBNfyRZ#0foi_ Rg~kJg#sr1N1+~Toys_<58LR*R delta 68 zcmV-K0K5PBb1!6Jc4=f{Wp0rpmIdWQ>3oQRzJV6i1fxU&j0foH+g}noX Ny#$551+~2fy|J$27={1< diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGvcfKeepLoneAlleles.expected.flowbased.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGvcfKeepLoneAlleles.expected.flowbased.vcf.idx index 400e0f018bbfd5ee937ed04d04b60a8f73e77830..3b0000bf6b53b466bcb60f4bcb03fdfb83b9513b 100644 GIT binary patch delta 54 zcmV-60LlO8b1!6Jc4=f{Wp0rpgaw@y>3oQRtc9!rg{%UFtOJFt M1cj^xwX6obv64I&-2eap delta 55 zcmV-70LcI6Ofvbh90fnmqg{uRF Ns|1Cs1+}XNys?_q7oz|G diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGvcfWithAssemblyComplexityAnnotationRevamp.expected.flowbased.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGvcfWithAssemblyComplexityAnnotationRevamp.expected.flowbased.vcf.idx index d9dbf915d6cdaff937d0e4525cbbcec53590cf68..cfc4af15754202c473d98c7ba66823df81df8e5e 100644 GIT binary patch delta 59 zcmV-B0L1@?b1!6Jc4=f{Wo|ELVRUPeFqQ?XRq1?)fy;%<0fox~ Rh06nl%LIkX1+~itys-#b8ngfa delta 68 zcmV-K0K5PGvSh diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testVcfBeforeRebase.expected.flowbased.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testVcfBeforeRebase.expected.flowbased.vcf.idx index 879511a4a5092a0b1993137e8756be215d726344..fd50c515731fc6746b46a06942b43fa9addc8892 100644 GIT binary patch delta 39 vcmaFW!2YsSU)APEHfo3HBUc1u_Sw