Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed support for newer Gencode GTF versions #8351

Merged
merged 5 commits into from
Jun 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.broadinstitute.hellbender.tools.funcotator;

import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation;
import org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGTFFieldConstants;
import org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGtfFeature;

import java.util.Comparator;
Expand Down Expand Up @@ -246,8 +247,8 @@ public ComparatorByProteinCodingStatus(){}
@Override
public int compare( final GencodeFuncotation a, final GencodeFuncotation b ) {
// Is it protein coding?
final boolean isAProteinCoding = GencodeGtfFeature.KnownGeneBiotype.PROTEIN_CODING.toString().equals(a.getGeneTranscriptType());
final boolean isBProteinCoding = GencodeGtfFeature.KnownGeneBiotype.PROTEIN_CODING.toString().equals(b.getGeneTranscriptType());
final boolean isAProteinCoding = GencodeGTFFieldConstants.KnownGeneBiotype.PROTEIN_CODING.toString().equals(a.getGeneTranscriptType());
final boolean isBProteinCoding = GencodeGTFFieldConstants.KnownGeneBiotype.PROTEIN_CODING.toString().equals(b.getGeneTranscriptType());
if ( isAProteinCoding != isBProteinCoding ) {
if ( isAProteinCoding ) {
return -1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.broadinstitute.hellbender.tools.funcotator.Funcotation;
import org.broadinstitute.hellbender.tools.funcotator.metadata.FuncotationMetadata;
import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer;
import org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGTFFieldConstants;
import org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGtfGeneFeature;

import java.util.Arrays;
Expand Down Expand Up @@ -74,7 +75,7 @@ public class GencodeFuncotation implements Funcotation {

// These are included because they help determine the transcript selection
private Integer locusLevel;
private GencodeGtfGeneFeature.FeatureTag apprisRank;
private GencodeGTFFieldConstants.FeatureTag apprisRank;
private Integer transcriptLength;
private String version;
private String geneTranscriptType;
Expand Down Expand Up @@ -378,7 +379,7 @@ public boolean equals(final Object o) {
if (transcriptLength != null ? !transcriptLength.equals(that.transcriptLength) : that.transcriptLength != null)
return false;
if (version != null ? !version.equals(that.version) : that.version != null) return false;
if (geneTranscriptType != that.geneTranscriptType) return false;
if (geneTranscriptType != that.geneTranscriptType) return false; //TODO this is a problem string equality comparison.... it breaks tests to fix it though...
jamesemery marked this conversation as resolved.
Show resolved Hide resolved
if (hugoSymbolSerializedOverride != null ? !hugoSymbolSerializedOverride.equals(that.hugoSymbolSerializedOverride) : that.hugoSymbolSerializedOverride != null)
return false;
if (ncbiBuildSerializedOverride != null ? !ncbiBuildSerializedOverride.equals(that.ncbiBuildSerializedOverride) : that.ncbiBuildSerializedOverride != null)
Expand Down Expand Up @@ -660,11 +661,11 @@ public void setLocusLevel(final Integer locusLevel) {
this.locusLevel = locusLevel;
}

public GencodeGtfGeneFeature.FeatureTag getApprisRank() {
public GencodeGTFFieldConstants.FeatureTag getApprisRank() {
return apprisRank;
}

public void setApprisRank(final GencodeGtfGeneFeature.FeatureTag apprisRank) {
public void setApprisRank(final GencodeGTFFieldConstants.FeatureTag apprisRank) {
this.apprisRank = apprisRank;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.tools.funcotator.metadata.FuncotationMetadataUtils;
import org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGTFFieldConstants;
import org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGtfGeneFeature;

import java.util.ArrayList;
Expand Down Expand Up @@ -260,10 +261,10 @@ public GencodeFuncotationBuilder setLocusLevel( final Integer locusLevel ) {

/**
* Set the Appris Rank in the {@link GencodeFuncotation}.
* @param apprisRank The {@link GencodeGtfGeneFeature.FeatureTag} containing the Appris Rank for the {@link GencodeFuncotation}.
* @param apprisRank The {@link GencodeGTFFieldConstants.FeatureTag} containing the Appris Rank for the {@link GencodeFuncotation}.
* @return {@code this} {@link GencodeFuncotationBuilder}
*/
public GencodeFuncotationBuilder setApprisRank( final GencodeGtfGeneFeature.FeatureTag apprisRank ) {
public GencodeFuncotationBuilder setApprisRank( final GencodeGTFFieldConstants.FeatureTag apprisRank ) {
gencodeFuncotation.setApprisRank( apprisRank );
return this;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGtfFeature.FeatureTag.*;
import static org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGTFFieldConstants.FeatureTag.*;

/**
* A factory to create {@link GencodeFuncotation}s.
Expand Down Expand Up @@ -111,24 +111,22 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory {
/**
* List of valid Appris Ranks used for sorting funcotations to get the "best" one.z
*/
private static final HashSet<GencodeGtfGeneFeature.FeatureTag> apprisRanks = new HashSet<>(
Arrays.asList(
APPRIS_PRINCIPAL,
APPRIS_PRINCIPAL_1,
APPRIS_PRINCIPAL_2,
APPRIS_PRINCIPAL_3,
APPRIS_PRINCIPAL_4,
APPRIS_PRINCIPAL_5,
APPRIS_ALTERNATIVE_1,
APPRIS_ALTERNATIVE_2,
APPRIS_CANDIDATE_HIGHEST_SCORE,
APPRIS_CANDIDATE_LONGEST_CCDS,
APPRIS_CANDIDATE_CCDS,
APPRIS_CANDIDATE_LONGEST_SEQ,
APPRIS_CANDIDATE_LONGEST,
APPRIS_CANDIDATE
)
);
private static final LinkedHashMap<String, GencodeGTFFieldConstants.FeatureTag> apprisRanks = new LinkedHashMap<>() {{
jonn-smith marked this conversation as resolved.
Show resolved Hide resolved
put(APPRIS_PRINCIPAL.toString(), APPRIS_PRINCIPAL);
put(APPRIS_PRINCIPAL_1.toString(), APPRIS_PRINCIPAL_1);
put(APPRIS_PRINCIPAL_2.toString(), APPRIS_PRINCIPAL_2);
put(APPRIS_PRINCIPAL_3.toString(), APPRIS_PRINCIPAL_3);
put(APPRIS_PRINCIPAL_4.toString(), APPRIS_PRINCIPAL_4);
put(APPRIS_PRINCIPAL_5.toString(), APPRIS_PRINCIPAL_5);
put(APPRIS_ALTERNATIVE_1.toString(), APPRIS_ALTERNATIVE_1);
put(APPRIS_ALTERNATIVE_2.toString(), APPRIS_ALTERNATIVE_2);
put(APPRIS_CANDIDATE_HIGHEST_SCORE.toString(), APPRIS_CANDIDATE_HIGHEST_SCORE);
put(APPRIS_CANDIDATE_LONGEST_CCDS.toString(), APPRIS_CANDIDATE_LONGEST_CCDS);
put(APPRIS_CANDIDATE_CCDS.toString(), APPRIS_CANDIDATE_CCDS);
put(APPRIS_CANDIDATE_LONGEST_SEQ.toString(), APPRIS_CANDIDATE_LONGEST_SEQ);
put(APPRIS_CANDIDATE_LONGEST.toString(), APPRIS_CANDIDATE_LONGEST);
put(APPRIS_CANDIDATE.toString(), APPRIS_CANDIDATE);
}};

/**
* The set of {@link GencodeFuncotation.VariantClassification} types that are valid for coding regions.
Expand Down Expand Up @@ -946,8 +944,7 @@ private static boolean isBasic(final GencodeGtfTranscriptFeature transcript) {
// Check if this transcript has the `basic` tag:
return transcript.getOptionalFields().stream()
.filter( f -> f.getName().equals("tag") )
.filter( f -> f.getValue() instanceof GencodeGtfFeature.FeatureTag )
.filter( f -> f.getValue().equals(GencodeGtfFeature.FeatureTag.BASIC) )
.filter( f -> f.getValue().equals(GencodeGTFFieldConstants.FeatureTag.BASIC.toString()) )
.count() > 0;
}

Expand Down Expand Up @@ -1079,7 +1076,7 @@ private GencodeFuncotation createExonFuncotation(final VariantContext variant,

// Before we get started, check to see if this is a non-protein-coding feature.
// If it is, we must handle it differently:
if ( GencodeGtfFeature.KnownGeneBiotype.PROTEIN_CODING.toString().equals(transcript.getGeneType()) ) {
if ( GencodeGTFFieldConstants.KnownGeneBiotype.PROTEIN_CODING.toString().equals(transcript.getGeneType()) ) {
return createCodingRegionFuncotationForProteinCodingFeature(variant, altAllele, reference, transcript, exon);
}
else {
Expand Down Expand Up @@ -1700,7 +1697,7 @@ private GencodeFuncotation createIntronFuncotation(final VariantContext variant,
gencodeFuncotationBuilder.setReferenceContext(referenceBases.getBaseString(Strand.POSITIVE));

// Set the VariantClassification:
if ( GencodeGtfFeature.KnownGeneBiotype.PROTEIN_CODING.toString().equals(transcript.getGeneType()) ) {
if ( GencodeGTFFieldConstants.KnownGeneBiotype.PROTEIN_CODING.toString().equals(transcript.getGeneType()) ) {
gencodeFuncotationBuilder.setVariantClassification(GencodeFuncotation.VariantClassification.INTRON);
}
else {
Expand Down Expand Up @@ -2708,19 +2705,18 @@ else if (altAllele.length() < refAllele.length()) {

/**
* Get the Appris Rank from the given {@link GencodeGtfGeneFeature}.
* Appris ranks are specified as annotations using {@link org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGtfFeature.FeatureTag}s.
* Appris ranks are specified as annotations using {@link GencodeGTFFieldConstants.FeatureTag}s.
* @param gtfFeature The {@link GencodeGtfTranscriptFeature} from which to get the Appris Rank.
* @return The highest Appris Rank found in the given {@code gtfFeature}; if no Appris Rank exists, {@code null}.
*/
@VisibleForTesting
static GencodeGtfFeature.FeatureTag getApprisRank( final GencodeGtfTranscriptFeature gtfFeature ) {
static GencodeGTFFieldConstants.FeatureTag getApprisRank(final GencodeGtfTranscriptFeature gtfFeature ) {

// Get our appris tag(s) if it/they exist(s):
final List<GencodeGtfFeature.FeatureTag> gtfApprisTags = gtfFeature.getOptionalFields().stream()
// Get the Appris Rank tags and convert them to Sortable Enums:
final List<GencodeGTFFieldConstants.FeatureTag> gtfApprisTags = gtfFeature.getOptionalFields().stream()
.filter( f -> f.getName().equals("tag") )
.filter( f -> f.getValue() instanceof GencodeGtfFeature.FeatureTag )
.filter( f -> apprisRanks.contains( f.getValue() ) )
.map( f -> (GencodeGtfFeature.FeatureTag)f.getValue() ).collect(Collectors.toList());
.filter( f -> apprisRanks.containsKey( f.getValue() ) )
.map( f -> apprisRanks.get(f.getValue()) ).collect(Collectors.toList());

if ( gtfApprisTags.isEmpty() ) {
return null;
Expand All @@ -2737,16 +2733,16 @@ else if ( gtfApprisTags.size() == 1 ) {

/**
* Converts a given GeneTranscriptType {@link String} to a {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification}.
* Assumes the given {@code type} is not {@link GencodeGtfFeature.KnownGeneBiotype#PROTEIN_CODING}.
* Assumes the given {@code type} is not {@link GencodeGTFFieldConstants.KnownGeneBiotype#PROTEIN_CODING}.
* If no type can be assessed, returns {@code null}.
* @param type A {@link String} representing a GeneTranscriptType to convert to a {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification}.
* @return A {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification} representing the given GeneTranscriptType {@link String}, or {@code null}.
*/
private static GencodeFuncotation.VariantClassification convertGeneTranscriptTypeToVariantClassification (final String type ) {

//TODO: This all needs to be fixed so there is a 1:1 mapping of GencodeGtfFeature.KnownGeneBiotype->VariantClassification - Issue #4405
if (GencodeGtfFeature.KnownGeneBiotype.LINCRNA.toString().equals(type) ||
GencodeGtfFeature.KnownGeneBiotype.MACRO_LNCRNA.toString().equals(type)) {
if (GencodeGTFFieldConstants.KnownGeneBiotype.LINCRNA.toString().equals(type) ||
GencodeGTFFieldConstants.KnownGeneBiotype.MACRO_LNCRNA.toString().equals(type)) {
return GencodeFuncotation.VariantClassification.LINCRNA;
}
return GencodeFuncotation.VariantClassification.RNA;
Expand Down
Loading