diff --git a/pyard/ard.py b/pyard/ard.py index fb8d618..6881393 100644 --- a/pyard/ard.py +++ b/pyard/ard.py @@ -25,6 +25,7 @@ import re import sqlite3 import sys +from collections import Counter from typing import Iterable, List from . import broad_splits, smart_sort @@ -408,8 +409,8 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES) -> str: ) # Handle MAC - if self._config["reduce_MAC"] and self.is_mac(glstring): - if db.is_valid_mac_code(self.db_connection, code): + if self._config["reduce_MAC"] and code.isalpha(): + if self.is_mac(glstring): # Make sure it's a valid MAC if HLA_regex.search(glstring): # Remove HLA- prefix allele_name = glstring.split("-")[1] @@ -436,12 +437,7 @@ def validate(self, glstring): :param glstring: GL String to validate :return: boolean indicating success """ - try: - return self._is_valid_gl(glstring) - except InvalidAlleleError as e: - raise InvalidTypingError( - f"{glstring} is not valid GL String. \n {e.message}", e - ) from None + return self._is_valid_gl(glstring) def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool: if loc_antigen is None or code is None: @@ -484,12 +480,39 @@ def is_mac(self, allele: str) -> bool: :return: True if MAC """ if ":" in allele: - code = allele.split(":")[1] - try: + allele_split = allele.split(":") + if len(allele_split) == 2: # MACs have only single : + locus_antigen, code = allele_split if code.isalpha(): - return db.is_valid_mac_code(self.db_connection, code) - except sqlite3.OperationalError as e: - print("Error: ", e) + try: + alleles = db.mac_code_to_alleles(self.db_connection, code) + if alleles: + if any(map(lambda a: ":" in a, alleles)): + # allele specific antigen codes have ':' in the MAC mapping + # e.g. CFWRN -> 15:01/15:98/15:157/15:202/ + # 15:239/15:280/15:340/35:43/35:67/35:79/35:102/35:118/35:185/51:220 + # Extract the antigens from the mapped alleles + antigen_groups = map(lambda a: a.split(":")[0], alleles) + # Rule 1: The 1st field with the most allele designations in the request is + # the 1st field of the allele code designation + # Rule 2: If there is a tie in the number of alleles designations sharing the 1st field, + # the 1st field with the lowest numeric value is selected. + antigen_counts = Counter(antigen_groups) + # Create a table of antigen to it's counts + # '15': 7 + # '35': 6 + # '51': 1 + # Valid antigen is the first most common one. + # As it's presorted in db, also satisfies Rule 2. + valid_antigen = antigen_counts.most_common(1).pop()[0] + # Get antigen value 15 from 'DRB1*15' + provided_antigen = locus_antigen.split("*").pop() + # The MAC is only valid if the given antigen satisfies the antigen matching Rule 1 and 2 + return provided_antigen == valid_antigen + # Valid when antigen group codes + return True + except sqlite3.OperationalError as e: + print("Error: ", e) return False def is_v2(self, allele: str) -> bool: @@ -719,8 +742,8 @@ def expand_mac(self, mac_code: str): :return: GL String of expanded alleles :rtype: str """ - locus_antigen, code = mac_code.split(":") - if db.is_valid_mac_code(self.db_connection, code): + if self.is_mac(mac_code): # Validate MAC first + locus_antigen, code = mac_code.split(":") if HLA_regex.search(mac_code): locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix return "/".join( diff --git a/pyard/db.py b/pyard/db.py index e08b506..a53ec66 100644 --- a/pyard/db.py +++ b/pyard/db.py @@ -172,21 +172,6 @@ def alleles_to_mac_code( return None -def is_valid_mac_code(connection: sqlite3.Connection, code: str) -> bool: - """ - Check db if the MAC code exists. - - :param connection: db connection of type sqlite.Connection - :param code: MAC code - :return: code is MAC code ? - """ - mac_query = "SELECT count(alleles) from mac_codes where code = ?" - cursor = connection.execute(mac_query, (code,)) - result = cursor.fetchone() - cursor.close() - return result[0] > 0 - - def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[str]: """ Look up Serology in the database and return corresponding list of alleles. diff --git a/scripts/pyard b/scripts/pyard index 035c9ee..99b92b4 100755 --- a/scripts/pyard +++ b/scripts/pyard @@ -155,6 +155,9 @@ if __name__ == "__main__": except InvalidTypingError as e: print("Typing Error:", e.message, file=sys.stderr) sys.exit(2) + except InvalidMACError as e: + print("MAC Error:", e.message, file=sys.stderr) + sys.exit(3) else: # Remove ard and close db connection del ard diff --git a/tests/features/mac.feature b/tests/features/mac.feature index 4cc3160..f517a73 100644 --- a/tests/features/mac.feature +++ b/tests/features/mac.feature @@ -48,3 +48,26 @@ Feature: MAC (Multiple Allele Code) | A*01:01/A*01:02 | A*01:AB | | HLA-A*25:01/HLA-A*26:01 | HLA-A*25:BYHR | | HLA-A*02:01/HLA-A*02:09/HLA-A*02:43N | HLA-A*02:GNF | + + + Scenario Outline: Validate allele specific antigen MACs + + MAC validation rules for allele specific antigen codes: + - The 1st field with the most allele designations in the request is + the 1st field of the allele code designation + - If there is a tie in the number of alleles designations sharing the 1st field, + the 1st field with the lowest numeric value is selected. + + + Given the MAC code is + When checking for validity of the MAC + Then the validness is + + Examples: + | MAC | Validity | + | DRB1*07:DFJR | Invalid | + | DRB1*15:DFJR | Valid | + | DPB1*08:BHHE | Invalid | + | DPB1*19:BHHE | Valid | + | A*31:CMZEY | Invalid | + | A*02:CMZEY | Valid | diff --git a/tests/steps/mac.py b/tests/steps/mac.py index 91f1919..64069d5 100644 --- a/tests/steps/mac.py +++ b/tests/steps/mac.py @@ -1,6 +1,8 @@ from behave import * from hamcrest import assert_that, is_ +from pyard.exceptions import InvalidAlleleError + @given("the MAC code is {mac_code}") def step_impl(context, mac_code): @@ -30,3 +32,17 @@ def step_impl(context): @then("the decoded MAC is {mac_code}") def step_impl(context, mac_code): assert_that(context.mac_code, is_(mac_code)) + + +@when("checking for validity of the MAC") +def step_impl(context): + try: + context.is_valid = context.ard.validate(context.mac_code) + except InvalidAlleleError: + context.is_valid = False + + +@then("the validness is {validity}") +def step_impl(context, validity): + valid = validity == "Valid" + assert_that(context.is_valid, is_(valid)) diff --git a/tests/test_pyard.py b/tests/test_pyard.py index 0dcf87a..39dbd60 100644 --- a/tests/test_pyard.py +++ b/tests/test_pyard.py @@ -34,7 +34,7 @@ import pyard from pyard.constants import DEFAULT_CACHE_SIZE -from pyard.exceptions import InvalidAlleleError, InvalidMACError, InvalidTypingError +from pyard.exceptions import InvalidAlleleError from pyard.misc import validate_reduction_type @@ -135,15 +135,15 @@ def test_redux_types(self): validate_reduction_type("XX") def test_empty_allele(self): - with self.assertRaises(InvalidTypingError): + with self.assertRaises(InvalidAlleleError): self.ard.redux("A*", "lgx") def test_fp_allele(self): - with self.assertRaises(InvalidTypingError): + with self.assertRaises(InvalidAlleleError): self.ard.redux("A*0.123", "lgx") def test_empty_fields(self): - with self.assertRaises(InvalidTypingError): + with self.assertRaises(InvalidAlleleError): # : without any data self.ard.redux("DQA1*01:01:01:G", "lgx") @@ -152,7 +152,7 @@ def test_invalid_serology(self): serology_a10 = self.ard.redux("A10", "lgx") self.assertEqual(serology_a10.split("/")[0], "A*25:01") # And A100 isn't a valid typing - with self.assertRaises(InvalidTypingError): + with self.assertRaises(InvalidAlleleError): self.ard.redux("A100", "lgx") def test_allele_duplicated(self):