Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Output VRS_Error INFO field for VCF sites where VRS computation fails #301

Merged
merged 8 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 36 additions & 6 deletions src/ga4gh/vrs/extras/vcf_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,17 @@ class VCFAnnotator: # pylint: disable=too-few-public-methods
VRS_STARTS_FIELD = "VRS_Starts"
VRS_ENDS_FIELD = "VRS_Ends"
VRS_STATES_FIELD = "VRS_States"
VRS_ERROR_FIELD = "VRS_Error"

# VCF character escape map
VCF_ESCAPE_MAP = [
("%", "%25"),
(";", "%3B"),
(",", "%2C"),
("\r", "%0D"),
("\n", "%0A"),
("\t", "%09"),
]

def __init__(self, seqrepo_dp_type: SeqRepoProxyType = SeqRepoProxyType.LOCAL,
seqrepo_base_url: str = "http://localhost:5000/seqrepo",
Expand Down Expand Up @@ -181,8 +192,11 @@ def annotate( # pylint: disable=too-many-arguments,too-many-locals
("The computed identifiers for the GA4GH VRS Alleles corresponding to the "
f"GT indexes of the {info_field_desc} alleles")
)
vcf_in.header.info.add(
self.VRS_ERROR_FIELD, ".", "String",
("If an error occurred computing a VRS Identifier, the error message")
)

additional_info_fields = [self.VRS_ALLELE_IDS_FIELD]
if vrs_attributes:
vcf_in.header.info.add(
self.VRS_STARTS_FIELD, info_field_num, "String",
Expand All @@ -199,7 +213,6 @@ def annotate( # pylint: disable=too-many-arguments,too-many-locals
("The literal sequence states used for the GA4GH VRS Alleles "
f"corresponding to the GT indexes of the {info_field_desc} alleles")
)
additional_info_fields += [self.VRS_STARTS_FIELD, self.VRS_ENDS_FIELD, self.VRS_STATES_FIELD]

if vcf_out:
vcf_out = pysam.VariantFile(vcf_out, "w", header=vcf_in.header) # pylint: disable=no-member
Expand All @@ -208,10 +221,23 @@ def annotate( # pylint: disable=too-many-arguments,too-many-locals
output_pickle = bool(vrs_pickle_out)

for record in vcf_in:
vrs_field_data = self._get_vrs_data(
record, vrs_data, assembly, additional_info_fields, vrs_attributes,
output_pickle, output_vcf, compute_for_ref
)
additional_info_fields = [self.VRS_ALLELE_IDS_FIELD]
if vrs_attributes:
additional_info_fields += [self.VRS_STARTS_FIELD, self.VRS_ENDS_FIELD, self.VRS_STATES_FIELD]
try:
vrs_field_data = self._get_vrs_data(
record, vrs_data, assembly, additional_info_fields, vrs_attributes,
output_pickle, output_vcf, compute_for_ref
)
except Exception as ex:
_logger.exception("VRS error on %s-%s", record.chrom, record.pos)
err_msg = f"{ex}" or f"{type(ex)}"
for search_repl in VCFAnnotator.VCF_ESCAPE_MAP:
err_msg = err_msg.replace(search_repl[0], search_repl[1])
additional_info_fields = [self.VRS_ERROR_FIELD]
vrs_field_data = {self.VRS_ERROR_FIELD: [err_msg]}

_logger.debug("VCF record %s-%s generated vrs_field_data %s", record.chrom, record.pos, vrs_field_data)

if output_vcf:
for k in additional_info_fields:
Expand Down Expand Up @@ -256,15 +282,19 @@ def _get_vrs_object( # pylint: disable=too-many-arguments,too-many-locals
except (ValidationError, TranslatorValidationError) as e:
vrs_obj = None
_logger.error("ValidationError when translating %s from gnomad: %s", vcf_coords, str(e))
raise
except KeyError as e:
vrs_obj = None
_logger.error("KeyError when translating %s from gnomad: %s", vcf_coords, str(e))
raise
except AssertionError as e:
vrs_obj = None
_logger.error("AssertionError when translating %s from gnomad: %s", vcf_coords, str(e))
raise
except Exception as e: # pylint: disable=broad-except
vrs_obj = None
_logger.error("Unhandled Exception when translating %s from gnomad: %s", vcf_coords, str(e))
raise
else:
if not vrs_obj:
_logger.debug("None was returned when translating %s from gnomad", vcf_coords)
Expand Down
Loading
Loading