Skip to content

Commit

Permalink
Fix bug which caused --max_alternate_alleles to be ignored when using…
Browse files Browse the repository at this point in the history
… GenomicsDB (#7576)

* Fix a bug where --max_alternate_alleles was being ignored instead of being passed to GenomicsDB
* Cleanup some variable names which shouldn't have been uppercase
  • Loading branch information
nalinigans authored Jan 4, 2022
1 parent 9c7eaf7 commit 5b87367
Show file tree
Hide file tree
Showing 11 changed files with 58 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,24 @@ public GenomicsDBOptions(final Path reference) {
}

public GenomicsDBOptions(final Path reference, GenomicsDBArgumentCollection genomicsdbArgs) {
this(reference, genomicsdbArgs, new GenotypeCalculationArgumentCollection(), false);
this(reference, genomicsdbArgs, new GenotypeCalculationArgumentCollection());
}

public GenomicsDBOptions(final Path reference, final GenomicsDBArgumentCollection genomicsdbArgs,
final GenotypeCalculationArgumentCollection genotypeCalcArgs, final boolean forceCallGenotypes) {
final GenotypeCalculationArgumentCollection genotypeCalcArgs) {
this.reference = reference;
this.callGenotypes = genomicsdbArgs.callGenotypes || forceCallGenotypes;
this.maxDiploidAltAllelesThatCanBeGenotyped = genomicsdbArgs.maxDiploidAltAllelesThatCanBeGenotyped;
this.maxGenotypeCount = genotypeCalcArgs.MAX_GENOTYPE_COUNT;
this.callGenotypes = genomicsdbArgs.callGenotypes;
this.useBCFCodec = genomicsdbArgs.useBCFCodec;
this.sharedPosixFSOptimizations = genomicsdbArgs.sharedPosixFSOptimizations;
this.useGcsHdfsConnector = genomicsdbArgs.useGcsHdfsConnector;
if (genotypeCalcArgs != null) {
this.maxDiploidAltAllelesThatCanBeGenotyped = genotypeCalcArgs.maxAlternateAlleles;
this.maxGenotypeCount = genotypeCalcArgs.maxGenotypeCount;
} else {
// Some defaults
this.maxDiploidAltAllelesThatCanBeGenotyped = genomicsdbArgs.maxDiploidAltAllelesThatCanBeGenotyped;
this.maxGenotypeCount = GenotypeCalculationArgumentCollection.DEFAULT_MAX_GENOTYPE_COUNT;
}
}

public Path getReference() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,10 @@ public boolean requiresReference() {

@Override
protected GenomicsDBOptions getGenomicsDBOptions() {
//extract called genotypes so hom refs with no PLs aren't ambiguous
if (genomicsDBOptions == null) {
genomicsDBOptions = new GenomicsDBOptions(referenceArguments.getReferencePath(), genomicsdbArgs, genotypeArgs, true);
//extract called genotypes so hom refs with no PLs aren't ambiguous
genomicsdbArgs.callGenotypes = true;
genomicsDBOptions = new GenomicsDBOptions(referenceArguments.getReferencePath(), genomicsdbArgs, genotypeArgs);
}
return genomicsDBOptions;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ private VariantContext callSomaticGenotypes(final VariantContext vc, double tlod
final VariantContextBuilder builder = new VariantContextBuilder(vc);
final VariantContext regenotypedVC = builder.genotypes(newGenotypes).make();

final int maxAltAlleles = genotypingEngine.getConfiguration().genotypeArgs.MAX_ALTERNATE_ALLELES;
final int maxAltAlleles = genotypingEngine.getConfiguration().genotypeArgs.maxAlternateAlleles;
List<Allele> allelesToKeep;

//we need to make sure all alleles pass the tlodThreshold
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.hellbender.engine.FeatureInput;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.variant.HomoSapiensConstants;

import java.io.Serializable;
Expand Down Expand Up @@ -127,19 +126,19 @@ public GenotypeCalculationArgumentCollection clone() {
* Note that the default was changed from 10.0 to 30.0 in version 4.1.0.0 to accompany the switch to use the the new quality score by default.
*/
@Argument(fullName = CALL_CONFIDENCE_LONG_NAME, shortName = CALL_CONFIDENCE_SHORT_NAME, doc = "The minimum phred-scaled confidence threshold at which variants should be called", optional = true)
public double STANDARD_CONFIDENCE_FOR_CALLING = DEFAULT_STANDARD_CONFIDENCE_FOR_CALLING;
public double standardConfidenceForCalling = DEFAULT_STANDARD_CONFIDENCE_FOR_CALLING;

/**
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
* that you not play around with this parameter.
*
* See also {@link #MAX_GENOTYPE_COUNT}.
* See also {@link #maxGenotypeCount}.
*/
@Advanced
@Argument(fullName = MAX_ALTERNATE_ALLELES_LONG_NAME, doc = "Maximum number of alternate alleles to genotype", optional = true)
public int MAX_ALTERNATE_ALLELES = DEFAULT_MAX_ALTERNATE_ALLELES;
public int maxAlternateAlleles = DEFAULT_MAX_ALTERNATE_ALLELES;

/**
* If there are more than this number of genotypes at a locus presented to the genotyper, then only this many genotypes will be used.
Expand All @@ -152,17 +151,18 @@ public GenotypeCalculationArgumentCollection clone() {
* Unless there is a good reason to change the default value, we highly recommend that you not play around with this parameter.
*
* The maximum number of alternative alleles used in the genotyping step will be the lesser of the two:
* 1. the largest number of alt alleles, given ploidy, that yields a genotype count no higher than {@link #MAX_GENOTYPE_COUNT}
* 2. the value of {@link #MAX_ALTERNATE_ALLELES}
* 1. the largest number of alt alleles, given ploidy, that yields a genotype count no higher than {@link #maxGenotypeCount}
* 2. the value of {@link #maxAlternateAlleles}
*
* See also {@link #MAX_ALTERNATE_ALLELES}.
* See also {@link #maxAlternateAlleles}.
*/
@Advanced
@Argument(fullName = MAX_GENOTYPE_COUNT_LONG_NAME, doc = "Maximum number of genotypes to consider at any site", optional = true)
public int MAX_GENOTYPE_COUNT = DEFAULT_MAX_GENOTYPE_COUNT;
public int maxGenotypeCount = DEFAULT_MAX_GENOTYPE_COUNT;

/**
* Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy
* Sample ploidy - equivalent to number of chromoso
* mes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy
*/
@Argument(shortName = SAMPLE_PLOIDY_SHORT_NAME, fullName = SAMPLE_PLOIDY_LONG_NAME, doc="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", optional=true)
public int samplePloidy = HomoSapiensConstants.DEFAULT_PLOIDY;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ public VariantContext calculateGenotypes(final VariantContext vc, final Genotype
}

final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES;
final int maxAltAlleles = configuration.genotypeArgs.maxAlternateAlleles;

VariantContext reducedVC = vc;
if (maxAltAlleles < vc.getAlternateAlleles().size()) {
Expand Down Expand Up @@ -305,7 +305,7 @@ private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult
// we want to keep the NON_REF symbolic allele but only in the absence of a non-symbolic allele, e.g.
// if we combined a ref / NON_REF gVCF with a ref / alt gVCF
final boolean isNonRefWhichIsLoneAltAllele = alternativeAlleleCount == 1 && allele.equals(Allele.NON_REF_ALLELE);
final boolean isPlausible = afCalculationResult.passesThreshold(allele, configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING);
final boolean isPlausible = afCalculationResult.passesThreshold(allele, configuration.genotypeArgs.standardConfidenceForCalling);

//it's possible that the upstream deletion that spanned this site was not emitted, mooting the symbolic spanning deletion allele
final boolean isSpuriousSpanningDeletion = GATKVCFConstants.isSpanningDeletion(allele) && !isVcCoveredByDeletion(vc);
Expand Down Expand Up @@ -414,7 +414,7 @@ protected final boolean passesEmitThreshold(final double conf, final boolean bes
}

protected final boolean passesCallThreshold(final double conf) {
return conf >= configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING;
return conf >= configuration.genotypeArgs.standardConfidenceForCalling;
}

protected Map<String,Object> composeCallAttributes(final VariantContext vc, final List<Integer> alleleCountsofMLE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,9 @@ protected List<SimpleInterval> transformTraversalIntervals(final List<SimpleInte
@Override
protected GenomicsDBOptions getGenomicsDBOptions() {
if (genomicsDBOptions == null) {
genomicsdbArgs.maxDiploidAltAllelesThatCanBeGenotyped = PIPELINE_MAX_ALT_COUNT;
genomicsDBOptions = new GenomicsDBOptions(referenceArguments.getReferencePath(), genomicsdbArgs, genotypeArgs, CALL_GENOTYPES);
genomicsdbArgs.callGenotypes = CALL_GENOTYPES;
genotypeArgs.maxAlternateAlleles = PIPELINE_MAX_ALT_COUNT;
genomicsDBOptions = new GenomicsDBOptions(referenceArguments.getReferencePath(), genomicsdbArgs, genotypeArgs);
}
return genomicsDBOptions;
}
Expand All @@ -195,7 +196,7 @@ public void onTraversalStart() {

setupVCFWriter(inputVCFHeader, samples);

genotyperEngine = new GnarlyGenotyperEngine(keepAllSites, genotypeArgs.MAX_ALTERNATE_ALLELES, SUMMARIZE_PLs, stripASAnnotations);
genotyperEngine = new GnarlyGenotyperEngine(keepAllSites, genotypeArgs.maxAlternateAlleles, SUMMARIZE_PLs, stripASAnnotations);

Reflections reflections = new Reflections("org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific");
//not InfoFieldAnnotation.class because we don't want AS_InbreedingCoeff
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ public void onTraversalStart() {
hcArgs.likelihoodArgs.enableDynamicReadDisqualification = true;
hcArgs.likelihoodArgs.expectedErrorRatePerBase = 0.03;
hcArgs.standardArgs.genotypeArgs.genotypeAssignmentMethod = GenotypeAssignmentMethod.USE_POSTERIOR_PROBABILITIES;
hcArgs.standardArgs.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = 3.0;
hcArgs.standardArgs.genotypeArgs.standardConfidenceForCalling = 3.0;
hcArgs.standardArgs.genotypeArgs.usePosteriorProbabilitiesToCalculateQual = true;
assemblyRegionArgs.indelPaddingForGenotyping = 150;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ private void validateAndInitializeArgs() {
}

if ( emitReferenceConfidence() ) {
hcArgs.standardArgs.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = -0.0;
hcArgs.standardArgs.genotypeArgs.standardConfidenceForCalling = -0.0;

logger.info("Standard Emitting and Calling confidence set to 0.0 for reference-model confidence output");
if ( ! hcArgs.standardArgs.annotateAllSitesWithPLs ) {
Expand Down Expand Up @@ -333,7 +333,7 @@ private void initializeActiveRegionEvaluationGenotyperEngine() {
activeRegionArgs.copyStandardCallerArgsFrom(hcArgs.standardArgs);

activeRegionArgs.outputMode = OutputMode.EMIT_VARIANTS_ONLY;
activeRegionArgs.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = Math.min(MAXMIN_CONFIDENCE_FOR_CONSIDERING_A_SITE_AS_POSSIBLE_VARIANT_IN_ACTIVE_REGION_DISCOVERY, hcArgs.standardArgs.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING ); // low values used for isActive determination only, default/user-specified values used for actual calling
activeRegionArgs.genotypeArgs.standardConfidenceForCalling = Math.min(MAXMIN_CONFIDENCE_FOR_CONSIDERING_A_SITE_AS_POSSIBLE_VARIANT_IN_ACTIVE_REGION_DISCOVERY, hcArgs.standardArgs.genotypeArgs.standardConfidenceForCalling); // low values used for isActive determination only, default/user-specified values used for actual calling
activeRegionArgs.CONTAMINATION_FRACTION = 0.0;
activeRegionArgs.CONTAMINATION_FRACTION_FILE = null;
// Seems that at least with some test data we can lose genuine haploid variation if we use ploidy == 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection c
genotypingModel = hcArgs.applyBQD || hcArgs.applyFRD ?
new DRAGENGenotypesModel(applyBQD, hcArgs.applyFRD, hcArgs.informativeReadOverlapMargin, hcArgs.maxEffectiveDepthAdjustment, dragstrParams) :
new IndependentSampleGenotypesModel();
maxGenotypeCountToEnumerate = configuration.standardArgs.genotypeArgs.MAX_GENOTYPE_COUNT;
maxGenotypeCountToEnumerate = configuration.standardArgs.genotypeArgs.maxGenotypeCount;
referenceConfidenceMode = configuration.emitReferenceConfidence;
snpHeterozygosity = configuration.standardArgs.genotypeArgs.snpHeterozygosity;
indelHeterozygosity = configuration.standardArgs.genotypeArgs.indelHeterozygosity;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ private HaplotypeCallerGenotypingEngine createGenotypingEngine(final SampleList
hcArgs.standardArgs.annotateAllSitesWithPLs = true;
hcArgs.standardArgs.genotypeArgs = genotypeArgs.clone();
hcArgs.emitReferenceConfidence = ReferenceConfidenceMode.GVCF; //this is important to force emission of all alleles at a multiallelic site
hcArgs.standardArgs.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = dropLowQuals ? genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING : 0.0;
hcArgs.standardArgs.genotypeArgs.standardConfidenceForCalling = dropLowQuals ? genotypeArgs.standardConfidenceForCalling : 0.0;
return new HaplotypeCallerGenotypingEngine(hcArgs, samples, true, false);

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,22 @@ public Object[][] getGVCFsForGenomicsDBOverMultipleIntervals() {
public void assertMatchingGenotypesFromGenomicsDB(File input, File expected, Locatable interval, String reference) throws IOException {
final File tempGenomicsDB = GenomicsDBTestUtils.createTempGenomicsDB(input, interval);
final String genomicsDBUri = GenomicsDBTestUtils.makeGenomicsDBUri(tempGenomicsDB);
runGenotypeGVCFSAndAssertSomething(genomicsDBUri, expected, NO_EXTRA_ARGS, VariantContextTestUtils::assertVariantContextsHaveSameGenotypes, reference);
final List<String> args = new ArrayList<String>();
args.add("--"+GenotypeCalculationArgumentCollection.MAX_ALTERNATE_ALLELES_LONG_NAME);
args.add("2"); // Too small max_alternate_alleles arg to GenomicsDB, should fail
try {
File output = runGenotypeGVCFS(genomicsDBUri, expected, args, reference);
Assert.fail("Expected exception not thrown");
} catch (IllegalStateException e) {
// Pass
}

args.clear();
args.add("--"+GenotypeCalculationArgumentCollection.MAX_ALTERNATE_ALLELES_LONG_NAME);
args.add("8");
runGenotypeGVCFSAndAssertSomething(genomicsDBUri, expected, args, VariantContextTestUtils::assertVariantContextsHaveSameGenotypes, reference);

// The default option with GenomicsDB input uses VCFCodec for decoding, test BCFCodec explicitly
final List<String> args = new ArrayList<String>();
args.add("--"+GenomicsDBArgumentCollection.USE_BCF_CODEC_LONG_NAME);
runGenotypeGVCFSAndAssertSomething(genomicsDBUri, expected, args, VariantContextTestUtils::assertVariantContextsHaveSameGenotypes, reference);
}
Expand Down Expand Up @@ -362,9 +374,8 @@ private void runGenotypeGVCFSAndAssertSomething(File input, File expected, List<
);
}

private void runGenotypeGVCFSAndAssertSomething(String input, File expected, List<String> additionalArguments, BiConsumer<VariantContext, VariantContext> assertion, String reference) throws IOException {
private File runGenotypeGVCFS(String input, File expected, List<String> additionalArguments, String reference) {
final File output = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? expected : createTempFile("genotypegvcf", ".vcf");

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(reference))
.add("V", input)
Expand All @@ -377,6 +388,13 @@ private void runGenotypeGVCFSAndAssertSomething(String input, File expected, Lis
Utils.resetRandomGenerator();
runCommandLine(args);

return output;
}

private void runGenotypeGVCFSAndAssertSomething(String input, File expected, List<String> additionalArguments, BiConsumer<VariantContext, VariantContext> assertion, String reference) throws IOException {
final File output = runGenotypeGVCFS(input, expected, additionalArguments, reference);
Assert.assertTrue(output.exists());

if (! UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS) {
final List<VariantContext> expectedVC = VariantContextTestUtils.getVariantContexts(expected);
final List<VariantContext> actualVC = VariantContextTestUtils.getVariantContexts(output);
Expand Down

0 comments on commit 5b87367

Please sign in to comment.