From 97d8138e03f9cf38c712c9846df9232f498a9448 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 18 Dec 2024 10:46:01 +0100 Subject: [PATCH] Report the location on error --- CHANGELOG.rst | 2 ++ includes/snv-indels/scripts/filter_vep.py | 18 ++++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4d37b42..a22c553 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -26,6 +26,8 @@ Novel module Bugfixes ======== * Fix a rare bug where different modules use the same multiqc file list. +* Fix a bug with filtering VEP records that contain multiple population + frequency records for a single variant. Updates ======= diff --git a/includes/snv-indels/scripts/filter_vep.py b/includes/snv-indels/scripts/filter_vep.py index c9be456..8e78d82 100644 --- a/includes/snv-indels/scripts/filter_vep.py +++ b/includes/snv-indels/scripts/filter_vep.py @@ -119,11 +119,13 @@ def _extract_frequencies(self) -> FrequenciesType: if not frequencies: frequencies = var["frequencies"] elif var["frequencies"] != frequencies: - msg = "Multiple colocated variants with 'frequencies' entry encountered" + location = self.location + msg = f"Multiple colocated variants with 'frequencies' entry encountered on {location}" raise RuntimeError(msg) if len(frequencies) > 1: - msg = "'frequencies' entry from VEP should only contain a single key" + location = self.location + msg = f"'frequencies' entry with multiple keys encountered on {location}" raise RuntimeError(msg) return frequencies @@ -162,6 +164,18 @@ def above_population_threshold(self, population: str, threshold: float) -> bool: """ return self.population_frequency(population) > threshold + @property + def location(self) -> str: + """ + Return a representation the location of the VEP record on the genome + """ + input = self.get("input") + if input is None: + return "unknown location" + + chrom, pos = input.split("\t")[:2] + return f"{chrom}:{pos}" + def read_goi_file(fname: str) -> Tuple[Set[str], Set[str]]: goi = set()