Skip to content

Commit

Permalink
Using a light ref/alt/pos Event class for assembled variation, pileup…
Browse files Browse the repository at this point in the history
… events, and force-calling alleles
  • Loading branch information
davidbenjamin committed Jun 2, 2023
1 parent 4456792 commit 33ac71e
Show file tree
Hide file tree
Showing 56 changed files with 1,264 additions and 1,859 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.haplotype.Event;
import org.broadinstitute.hellbender.utils.haplotype.EventMap;
import org.broadinstitute.hellbender.utils.haplotype.Haplotype;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
Expand Down Expand Up @@ -64,9 +65,9 @@ public static Triple<int[], int[], double[]> annotate(final VariantContext vc,
// encode each haplotype as a string of variant starts and alt allele strings, excluding the locus of vc (to avoid reference/germline bias)
// note that VariantContexts in an EventMap are always biallelic, so var.getAlternateAllele(0) is valid
final Map<String, List<Haplotype>> haplotypeGroups = haplotypeLikelihoods.alleles().stream()
.collect(Collectors.groupingBy(hap -> hap.getEventMap().getVariantContexts().stream()
.filter(var -> var.getStart() != vc.getStart())
.map(var -> var.getStart() + var.getAlternateAllele(0).getBaseString())
.collect(Collectors.groupingBy(hap -> hap.getEventMap().getEvents().stream()
.filter(event -> event.getStart() != vc.getStart())
.map(event -> event.getStart() + event.altAllele().getBaseString())
.collect(Collectors.joining())));

// sum the read support counts for all haplotypes within each group
Expand Down Expand Up @@ -143,16 +144,16 @@ public List<String> getKeyNames() {
// k bases longer we simply check that the event map alt allele matches the variant context alt allele excluding the
// latter's last k bases
private static boolean containsAltAllele(final EventMap eventMap, final VariantContext vc, final int altAlleleIndex) {
final List<VariantContext> overlapping = eventMap.getOverlappingEvents(vc.getStart());
final List<Event> overlapping = eventMap.getOverlappingEvents(vc.getStart());
if (overlapping.isEmpty()) {
return false;
} else if (overlapping.get(0).getStart() != vc.getStart()) {
return false;
} else {
final VariantContext eventMapVC = overlapping.get(0);
final int excessBases = vc.getReference().length() - eventMapVC.getReference().length();
final Event overlappingEvent = overlapping.get(0);
final int excessBases = vc.getReference().length() - overlappingEvent.refAllele().length();

return equalBasesExcludingSuffix(eventMapVC.getAlternateAllele(0).getBases(),
return equalBasesExcludingSuffix(overlappingEvent.altAllele().getBases(),
vc.getAlternateAllele(altAlleleIndex).getBases(), excessBases);
}
}
Expand All @@ -177,12 +178,11 @@ private static boolean equalBasesExcludingSuffix(final byte[] eventMapBases, fin
}

// count variants in one haplotype but not the other
// note that we use the fact that EventMap VariantContexts are biallelic
private static int uniqueVariants(final Haplotype hap1, final Haplotype hap2, final int excludedPosition) {
final EventMap eventMap2 = hap2.getEventMap();
return (int) hap1.getEventMap().getVariantContexts().stream()
.filter(vc -> vc.getStart() != excludedPosition)
.filter(vc -> !containsAltAllele(eventMap2, vc, 0))
return (int) hap1.getEventMap().getEvents().stream()
.filter(event -> event.getStart() != excludedPosition)
.filter(event -> !event.equals(eventMap2.get(event.getStart())))
.count();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.haplotype.Event;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
Expand Down Expand Up @@ -48,6 +49,12 @@ public Map<String, Object> annotate(final ReferenceContext ref,
return Collections.unmodifiableMap(map);
}

public static Pair<List<Integer>, byte[]> getNumTandemRepeatUnits(final ReferenceContext ref, final Event event) {
final byte[] refBases = ref.getBases();
final int startIndex = event.getStart() + 1 - ref.getWindow().getStart(); // +1 to exclude leading match base common to VC's ref and alt alleles
return GATKVariantContextUtils.getNumTandemRepeatUnits(event.refAllele(), Collections.singletonList(event.altAllele()), Arrays.copyOfRange(refBases, startIndex, refBases.length));
}

public static Pair<List<Integer>, byte[]> getNumTandemRepeatUnits(final ReferenceContext ref, final VariantContext vc) {
final byte[] refBases = ref.getBases();
final int startIndex = vc.getStart() + 1 - ref.getWindow().getStart(); // +1 to exclude leading match base common to VC's ref and alt alleles
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.haplotype.Event;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;

import java.util.ArrayList;
Expand Down Expand Up @@ -162,7 +163,7 @@ private static String getRsID(final List<VariantContext> rsIDSourceVCs, final Va
Utils.nonNull(vcToAnnotate, "vcToAnnotate cannot be null");
final List<String> rsids = new ArrayList<>();

final List<VariantContext> vcAnnotateList = GATKVariantContextUtils.splitVariantContextToBiallelics(vcToAnnotate, true,
final List<Event> vcAnnotateList = GATKVariantContextUtils.splitVariantContextToEvents(vcToAnnotate, true,
GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS, true);

for ( final VariantContext vcCompSource : rsIDSourceVCs ) {
Expand All @@ -174,12 +175,12 @@ private static String getRsID(final List<VariantContext> rsIDSourceVCs, final Va
throw new IllegalArgumentException("source rsID VariantContext " + vcCompSource + " is not on same chromosome as vcToAnnotate " + vcToAnnotate);
}

final List<VariantContext> vcCompList = GATKVariantContextUtils.splitVariantContextToBiallelics(vcCompSource, true,
final List<Event> vcCompList = GATKVariantContextUtils.splitVariantContextToEvents(vcCompSource, true,
GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS, true);
boolean addThisID = false;
for (final VariantContext vcComp : vcCompList) {
for (final VariantContext vcToAnnotateBi : vcAnnotateList) {
if (vcComp.getStart() == vcToAnnotateBi.getStart() && vcToAnnotateBi.getReference().equals(vcComp.getReference()) && vcComp.getAlternateAlleles().equals(vcToAnnotateBi.getAlternateAlleles())) {
for (final Event vcComp : vcCompList) {
for (final Event vcToAnnotateBi : vcAnnotateList) {
if (vcComp.equals(vcToAnnotateBi)) {
addThisID = true;
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerUtils;
import org.broadinstitute.hellbender.utils.*;
import org.broadinstitute.hellbender.utils.genotyper.SampleList;
import org.broadinstitute.hellbender.utils.haplotype.Event;
import org.broadinstitute.hellbender.utils.logging.OneShotLogger;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
Expand Down Expand Up @@ -124,7 +125,7 @@ public Config getConfiguration() {
* @param vc Input variant context to complete.
* @return VC with assigned genotypes
*/
public VariantContext calculateGenotypes(final VariantContext vc, final GenotypePriorCalculator gpc, final List<VariantContext> givenAlleles) {
public VariantContext calculateGenotypes(final VariantContext vc, final GenotypePriorCalculator gpc, final List<Event> givenAlleles) {
// if input VC can't be genotyped, exit with either null VCC or, in case where we need to emit all sites, an empty call
if (cannotBeGenotyped(vc) || vc.getNSamples() == 0) {
return null;
Expand All @@ -150,7 +151,7 @@ public VariantContext calculateGenotypes(final VariantContext vc, final Genotype
}

final AFCalculationResult AFresult = alleleFrequencyCalculator.calculate(reducedVC, defaultPloidy);
final Set<Allele> forcedAlleles = AssemblyBasedCallerUtils.getAllelesConsistentWithGivenAlleles(givenAlleles, vc);
final Set<Allele> forcedAlleles = AssemblyBasedCallerUtils.allelesConsistentWithGivenAlleles(givenAlleles, vc);
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc, forcedAlleles);

// note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
Expand Down Expand Up @@ -289,7 +290,7 @@ public List<Integer> alternativeAlleleMLECounts() {
* Provided the exact mode computations it returns the appropriate subset of alleles that progress to genotyping.
* @param afCalculationResult the allele fraction calculation result.
* @param vc the variant context
* @param forcedAlleles alleles from the vc input that are consistent with forced alleles in the assembly region {@link AssemblyBasedCallerUtils#getAllelesConsistentWithGivenAlleles}
* @param forcedAlleles alleles from the vc input that are consistent with forced alleles in the assembly region {@link AssemblyBasedCallerUtils#allelesConsistentWithGivenAlleles}
* @return information about the alternative allele subsetting {@code null}.
*/
private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult afCalculationResult, final VariantContext vc, final Set<Allele> forcedAlleles) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc;

import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContext;
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap;
import org.apache.commons.math3.special.Gamma;
Expand All @@ -10,11 +13,9 @@
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeIndexCalculator;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypingLikelihoods;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AlleleAndContext;
import org.broadinstitute.hellbender.utils.*;
import org.broadinstitute.hellbender.utils.dragstr.DragstrParams;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -154,22 +155,13 @@ public AFCalculationResult fastCalculateDiploidBasedOnGLs(final GenotypingLikeli
final int numAlleles = gls.numberOfAlleles();
final List<Allele> alleles = gls.asListOfAlleles();

final List<Integer> alleleLengths = new ArrayList<>();
for (Allele al : gls.asListOfAlleles()) {
if (al instanceof AlleleAndContext) {
alleleLengths.add(((AlleleAndContext) al).maxAlleleLength());
} else {
alleleLengths.add(al.length());
}
}
final int alleleLength = alleleLengths.stream().max(Integer::compare).get();
final int alleleLength = gls.asListOfAlleles().stream().map(Allele::length).max(Integer::compare).get();

final List<String> samples = gls.asListOfSamples();
final List<Genotype> genotypes = IntStream.range(0, samples.size()).mapToObj(idx -> new GenotypeBuilder(samples.get(idx)).alleles(alleles).PL(gls.sampleLikelihoods(idx).getAsPLs()).make()).collect(Collectors.toList());
return calculate(numAlleles, alleles, genotypes, defaultPloidy, alleleLength);
}


/**
* Private function that actually calculates allele frequencies etc.
*
Expand Down

This file was deleted.

Loading

0 comments on commit 33ac71e

Please sign in to comment.