Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added custom VariantClassification severity ordering. #7673

Merged
merged 9 commits into from
Mar 8, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.Hidden;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation;

import java.io.File;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.nio.file.Path;
import java.util.*;
import java.util.stream.Collectors;

public abstract class BaseFuncotatorArgumentCollection implements Serializable {
private static final long serialVersionUID = 1L;
Expand Down Expand Up @@ -115,4 +116,11 @@ public abstract class BaseFuncotatorArgumentCollection implements Serializable {
doc = "The minimum number of bases for a variant to be annotated as a segment. Recommended to be changed only for use with FuncotateSegments. Defaults to " + FuncotatorUtils.DEFAULT_MIN_NUM_BASES_FOR_VALID_SEGMENT
)
public int minNumBasesForValidSegment = FuncotatorUtils.DEFAULT_MIN_NUM_BASES_FOR_VALID_SEGMENT;

@Argument(
fullName = FuncotatorArgumentDefinitions.CUSTOM_VARIANT_CLASS_ORDER_FILE,
optional = true,
doc = "TSV File containing custom Variant Classification severity map of the form: VARIANT_CLASSIFICATION\tSEV. VARIANT_CLASSIFICATION must match one of the VariantClassification names (" + GencodeFuncotation.VariantClassification.ALL_VC_NAMES + "). SEV is an unsigned integer, where lower is sorted first. When using this option it is HIGHLY recommended you also use the `BEST_EFFECT` transcript selection mode."
)
public GATKPath customVariantClassificationOrderFile = null;
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ public class FuncotatorArgumentDefinitions {
public static final String FORCE_B37_TO_HG19_REFERENCE_CONTIG_CONVERSION = "force-b37-to-hg19-reference-contig-conversion";
public static final String MIN_NUM_BASES_FOR_SEGMENT_FUNCOTATION = "min-num-bases-for-segment-funcotation";

public static final String CUSTOM_VARIANT_CLASS_ORDER_FILE = "custom-variant-classification-order";

// ------------------------------------------------------------
// Helper Types:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@ public FuncotatorEngine(final BaseFuncotatorArgumentCollection funcotatorArgs,

// Determine whether we have to convert given variants from B37 to HG19:
mustConvertInputContigsToHg19 = determineReferenceAndDatasourceCompatibility();

// Read in the custom variant classification order file here so that it can be shared across all engines:
if (funcotatorArgs.customVariantClassificationOrderFile != null) {
FuncotatorUtils.setVariantClassificationCustomSeverity(funcotatorArgs.customVariantClassificationOrderFile);
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
Expand All @@ -34,9 +35,12 @@
import org.broadinstitute.hellbender.utils.reference.ReferenceUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -2344,4 +2348,57 @@ public static <T,U> LinkedHashMap<T,U> createLinkedHashMapFromLists(final List<T
}, LinkedHashMap::new));
}

/**
* Set the severity for {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification}s as specified in a given input file.
* @param customSeverityFile {@link GATKPath} to TSV file containing VARIANT_CLASSIFICATION SEV information.
*/
public static void setVariantClassificationCustomSeverity(final GATKPath customSeverityFile) {
jonn-smith marked this conversation as resolved.
Show resolved Hide resolved
try {
logger.info("Setting custom variant classification severities from: " + customSeverityFile);

if ( !Files.exists(customSeverityFile.toPath()) ) {
throw new UserException.CouldNotReadInputFile("Custom severity file does not exist: " + customSeverityFile);
}
try (final BufferedReader reader = new BufferedReader(new InputStreamReader(customSeverityFile.getInputStream()))) {

int lineNum = 1;
String line = null;
while ( (line = reader.readLine()) != null ) {

// Ignore empty lines:
if ( line.length() == 0 ) {
continue;
}

final String[] lineFields = line.split("\t", -1);
if ( lineFields.length != 2 ) {
throw new UserException.MalformedFile(customSeverityFile + ":" + lineNum + " has " + lineFields.length + " fields! Each TSV line must have 2 fields!");
}

try {
final String vcName = lineFields[ 0 ];
final int sev = Integer.parseInt(lineFields[ 1 ]);

try {
logger.info(" Setting new Variant Classification severity: " + vcName + " = " + sev);
GencodeFuncotation.VariantClassification.valueOf(vcName).setSeverity(sev);
}
catch (final IllegalArgumentException ex) {
throw new UserException.MalformedFile(customSeverityFile + ":" + lineNum + ": invalid/unknown variant classification specified (possible typo): " + vcName);
}
}
catch ( final NumberFormatException ex ) {
throw new UserException.MalformedFile(customSeverityFile + ":" + lineNum + ": severity is not an integer (" + lineFields[ 1 ] + ")! Custom severities must be integer values!");
}

lineNum += 1;
}
}
}
catch (final IOException ex) {
throw new UserException.CouldNotReadInputFile("Could not read from custom Variant Classification file: " + customSeverityFile, ex);
}
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation;

import java.io.File;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.stream.Collectors;

/**
* Arguments to be be used by the {@link Funcotator} {@link org.broadinstitute.hellbender.engine.GATKTool},
Expand Down Expand Up @@ -47,6 +53,4 @@ public class FuncotatorVariantArgumentCollection extends BaseFuncotatorArgumentC
doc = "When input VCF has already been annotated, still annotate again."
)
public boolean reannotateVCF = false;


}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import java.util.LinkedHashSet;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import java.util.stream.Stream;

/**
* A class to represent a Functional Annotation. Each instance represents the annotations on a single transcript.
Expand Down Expand Up @@ -754,6 +754,18 @@ public String toString() {
*/
public enum VariantClassification {

// __ ___ ____ _ _ ___ _ _ ____
// \ \ / / \ | _ \| \ | |_ _| \ | |/ ___|
// \ \ /\ / / _ \ | |_) | \| || || \| | | _
// \ V V / ___ \| _ <| |\ || || |\ | |_| |
// \_/\_/_/ \_\_| \_\_| \_|___|_| \_|\____|
//
// When new types are added to VariantClassification
// or when existing types are modified, they MUST
// also be added to the ALL_VC_NAMES variable.
//
// There does not seem to be a good way around this.

/** Variant classification could not be determined. */
COULD_NOT_DETERMINE("COULD_NOT_DETERMINE",99),

Expand Down Expand Up @@ -824,26 +836,58 @@ public enum VariantClassification {
/** Variant lies on one of the lincRNAs. */
LINCRNA("LINCRNA", 4);

/**
* Variable to store the list of all valid {@link VariantClassification} types.
* This is used for command-line argument documentation and MUST be maintained if / when any of the
* VariantClassification values / names are updated.
*/
public static final String ALL_VC_NAMES = "COULD_NOT_DETERMINE, INTRON, FIVE_PRIME_UTR, THREE_PRIME_UTR, IGR, FIVE_PRIME_FLANK, THREE_PRIME_FLANK, MISSENSE, NONSENSE, NONSTOP, SILENT, SPLICE_SITE, IN_FRAME_DEL, IN_FRAME_INS, FRAME_SHIFT_INS, FRAME_SHIFT_DEL, START_CODON_SNP, START_CODON_INS, START_CODON_DEL, DE_NOVO_START_IN_FRAME, DE_NOVO_START_OUT_FRAME, RNA, LINCRNA";
jonn-smith marked this conversation as resolved.
Show resolved Hide resolved

/**
* The relative severity of each {@link VariantClassification}.
* Lower numbers are considered more severe.
* Higher numbers are considered less severe.
*/
final private int relativeSeverity;
private int relativeSeverity;

/**
* The default value for the {@link VariantClassification#relativeSeverity} of this {@link VariantClassification}.
*/
final private int defaultRelativeSeverity;

/** The serialized version of this {@link VariantClassification} */
final private String serialized;

VariantClassification(final String serialized, final int sev) {
this.serialized = serialized;
defaultRelativeSeverity = sev;
relativeSeverity = sev;
}

/**
* Reset the severities of all {@link VariantClassification}s to their default values.
*/
public static void resetSeveritiesToDefault() {
for (VariantClassification vc : VariantClassification.values()) {
vc.setSeverity(vc.getDefaultSeverity());
}
}

/**
* @return The {@link VariantClassification#relativeSeverity} of {@code this} {@link VariantClassification}.
*/
public int getSeverity() { return relativeSeverity; }

/**
* @return The {@link VariantClassification#defaultRelativeSeverity} of {@code this} {@link VariantClassification}.
*/
public int getDefaultSeverity() { return defaultRelativeSeverity; }

/**
* Set the {@link VariantClassification#relativeSeverity} of {@code this} {@link VariantClassification}.
*/
public void setSeverity(final int sev) { this.relativeSeverity = sev; }

@Override
public String toString() {
return serialized;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureDataSource;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
Expand All @@ -33,11 +34,9 @@
import org.testng.annotations.Test;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.sql.Timestamp;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
Expand Down Expand Up @@ -81,6 +80,9 @@ public class FuncotatorIntegrationTest extends CommandLineProgramTest {

// TODO: Get rid of this variable and use the general data sources path (issue #5350 - https://github.com/broadinstitute/gatk/issues/5350):
private static final String DS_PIK3CA_DIR = largeFileTestDir + "funcotator" + File.separator + "small_ds_pik3ca" + File.separator;

private static final String MEDIUM_DATASOURCES_DIR = largeFileTestDir + "funcotator" + File.separator + "funcotator_dataSources" + File.separator;

private static final String MAF_TEST_CONFIG = toolsTestDir + "funcotator" + File.separator + "maf.config";
private static final String XSV_CLINVAR_COL_TEST_VCF = toolsTestDir + "funcotator" + File.separator + "clinvar_hg19_column_test.vcf";
private static final String DS_XSV_CLINVAR_COL_TEST = largeFileTestDir + "funcotator" + File.separator + "small_ds_clinvar_hg19" + File.separator;
Expand Down Expand Up @@ -2073,6 +2075,56 @@ public void testAlreadyAnnotatedInputWithOverrideArgument(final String inputVcfN

}

@Test
public void testCustomVariantClassificationOrder() {

final FuncotatorArgumentDefinitions.OutputFormatType outputFormatType = FuncotatorArgumentDefinitions.OutputFormatType.VCF;
final File outputFile = getOutputFile(outputFormatType);

final ArgumentsBuilder arguments = createBaselineArgumentsForFuncotator(
largeFileTestDir + "funcotator" + File.separator + "custom_vc_order_files" + File.separator + "custom_vc_input_test.vcf",
outputFile,
b37Reference,
MEDIUM_DATASOURCES_DIR,
FuncotatorTestConstants.REFERENCE_VERSION_HG19,
outputFormatType,
false);

// We need this argument since we are testing on a subset of b37
arguments.add(FuncotatorArgumentDefinitions.FORCE_B37_TO_HG19_REFERENCE_CONTIG_CONVERSION, true);

// It's best to make this run on BEST_EFFECT with this test
arguments.add(FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_LONG_NAME, TranscriptSelectionMode.BEST_EFFECT);

// Add the new ordering of the variant classifications:
final GATKPath custom_vc_path = new GATKPath(largeFileTestDir + "funcotator" + File.separator
+ "custom_vc_order_files" + File.separator + "custom_vc_order_for_int_test.tsv");
arguments.add(FuncotatorArgumentDefinitions.CUSTOM_VARIANT_CLASS_ORDER_FILE, custom_vc_path.toPath().toUri().toString());

// Run Funcotator:
runCommandLine(arguments);

// Validate results:
final String expectedFilePath = largeFileTestDir + "funcotator" + File.separator + "custom_vc_order_files" + File.separator + "custom_vc_expected_out.vcf";

final Pair<VCFHeader, List<VariantContext>> vcfInfo = VariantContextTestUtils.readEntireVCFIntoMemory(outputFile.getAbsolutePath());
final List<VariantContext> variantContexts = vcfInfo.getRight();
final Pair<VCFHeader, List<VariantContext>> expectedVcfInfo = VariantContextTestUtils.readEntireVCFIntoMemory(expectedFilePath);
final List<VariantContext> expectedVariantContexts = expectedVcfInfo.getRight();

Assert.assertEquals(variantContexts.size(), expectedVariantContexts.size());

try {
IntegrationTestSpec.assertEqualTextFiles(outputFile, new File(expectedFilePath), "#");
}
catch ( final IOException ex ) {
throw new GATKException("Error opening expected file: " + expectedFilePath, ex);
}

// Reset severity:
GencodeFuncotation.VariantClassification.resetSeveritiesToDefault();
}

}


Loading