Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validation of allele specific MAC codes #276

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 38 additions & 15 deletions pyard/ard.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import re
import sqlite3
import sys
from collections import Counter
from typing import Iterable, List

from . import broad_splits, smart_sort
Expand Down Expand Up @@ -408,8 +409,8 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES) -> str:
)

# Handle MAC
if self._config["reduce_MAC"] and self.is_mac(glstring):
if db.is_valid_mac_code(self.db_connection, code):
if self._config["reduce_MAC"] and code.isalpha():
if self.is_mac(glstring): # Make sure it's a valid MAC
if HLA_regex.search(glstring):
# Remove HLA- prefix
allele_name = glstring.split("-")[1]
Expand All @@ -436,12 +437,7 @@ def validate(self, glstring):
:param glstring: GL String to validate
:return: boolean indicating success
"""
try:
return self._is_valid_gl(glstring)
except InvalidAlleleError as e:
raise InvalidTypingError(
f"{glstring} is not valid GL String. \n {e.message}", e
) from None
return self._is_valid_gl(glstring)

def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool:
if loc_antigen is None or code is None:
Expand Down Expand Up @@ -484,12 +480,39 @@ def is_mac(self, allele: str) -> bool:
:return: True if MAC
"""
if ":" in allele:
code = allele.split(":")[1]
try:
allele_split = allele.split(":")
if len(allele_split) == 2: # MACs have only single :
locus_antigen, code = allele_split
if code.isalpha():
return db.is_valid_mac_code(self.db_connection, code)
except sqlite3.OperationalError as e:
print("Error: ", e)
try:
alleles = db.mac_code_to_alleles(self.db_connection, code)
if alleles:
if any(map(lambda a: ":" in a, alleles)):
# allele specific antigen codes have ':' in the MAC mapping
# e.g. CFWRN -> 15:01/15:98/15:157/15:202/
# 15:239/15:280/15:340/35:43/35:67/35:79/35:102/35:118/35:185/51:220
# Extract the antigens from the mapped alleles
antigen_groups = map(lambda a: a.split(":")[0], alleles)
# Rule 1: The 1st field with the most allele designations in the request is
# the 1st field of the allele code designation
# Rule 2: If there is a tie in the number of alleles designations sharing the 1st field,
# the 1st field with the lowest numeric value is selected.
antigen_counts = Counter(antigen_groups)
# Create a table of antigen to it's counts
# '15': 7
# '35': 6
# '51': 1
# Valid antigen is the first most common one.
# As it's presorted in db, also satisfies Rule 2.
valid_antigen = antigen_counts.most_common(1).pop()[0]
# Get antigen value 15 from 'DRB1*15'
provided_antigen = locus_antigen.split("*").pop()
# The MAC is only valid if the given antigen satisfies the antigen matching Rule 1 and 2
return provided_antigen == valid_antigen
# Valid when antigen group codes
return True
except sqlite3.OperationalError as e:
print("Error: ", e)
return False

def is_v2(self, allele: str) -> bool:
Expand Down Expand Up @@ -719,8 +742,8 @@ def expand_mac(self, mac_code: str):
:return: GL String of expanded alleles
:rtype: str
"""
locus_antigen, code = mac_code.split(":")
if db.is_valid_mac_code(self.db_connection, code):
if self.is_mac(mac_code): # Validate MAC first
locus_antigen, code = mac_code.split(":")
if HLA_regex.search(mac_code):
locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix
return "/".join(
Expand Down
15 changes: 0 additions & 15 deletions pyard/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,21 +172,6 @@ def alleles_to_mac_code(
return None


def is_valid_mac_code(connection: sqlite3.Connection, code: str) -> bool:
"""
Check db if the MAC code exists.

:param connection: db connection of type sqlite.Connection
:param code: MAC code
:return: code is MAC code ?
"""
mac_query = "SELECT count(alleles) from mac_codes where code = ?"
cursor = connection.execute(mac_query, (code,))
result = cursor.fetchone()
cursor.close()
return result[0] > 0


def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[str]:
"""
Look up Serology in the database and return corresponding list of alleles.
Expand Down
3 changes: 3 additions & 0 deletions scripts/pyard
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ if __name__ == "__main__":
except InvalidTypingError as e:
print("Typing Error:", e.message, file=sys.stderr)
sys.exit(2)
except InvalidMACError as e:
print("MAC Error:", e.message, file=sys.stderr)
sys.exit(3)
else:
# Remove ard and close db connection
del ard
23 changes: 23 additions & 0 deletions tests/features/mac.feature
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,26 @@ Feature: MAC (Multiple Allele Code)
| A*01:01/A*01:02 | A*01:AB |
| HLA-A*25:01/HLA-A*26:01 | HLA-A*25:BYHR |
| HLA-A*02:01/HLA-A*02:09/HLA-A*02:43N | HLA-A*02:GNF |


Scenario Outline: Validate allele specific antigen MACs

MAC validation rules for allele specific antigen codes:
- The 1st field with the most allele designations in the request is
the 1st field of the allele code designation
- If there is a tie in the number of alleles designations sharing the 1st field,
the 1st field with the lowest numeric value is selected.


Given the MAC code is <MAC>
When checking for validity of the MAC
Then the validness is <Validity>

Examples:
| MAC | Validity |
| DRB1*07:DFJR | Invalid |
| DRB1*15:DFJR | Valid |
| DPB1*08:BHHE | Invalid |
| DPB1*19:BHHE | Valid |
| A*31:CMZEY | Invalid |
| A*02:CMZEY | Valid |
16 changes: 16 additions & 0 deletions tests/steps/mac.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from behave import *
from hamcrest import assert_that, is_

from pyard.exceptions import InvalidAlleleError


@given("the MAC code is {mac_code}")
def step_impl(context, mac_code):
Expand Down Expand Up @@ -30,3 +32,17 @@ def step_impl(context):
@then("the decoded MAC is {mac_code}")
def step_impl(context, mac_code):
assert_that(context.mac_code, is_(mac_code))


@when("checking for validity of the MAC")
def step_impl(context):
try:
context.is_valid = context.ard.validate(context.mac_code)
except InvalidAlleleError:
context.is_valid = False


@then("the validness is {validity}")
def step_impl(context, validity):
valid = validity == "Valid"
assert_that(context.is_valid, is_(valid))
10 changes: 5 additions & 5 deletions tests/test_pyard.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

import pyard
from pyard.constants import DEFAULT_CACHE_SIZE
from pyard.exceptions import InvalidAlleleError, InvalidMACError, InvalidTypingError
from pyard.exceptions import InvalidAlleleError
from pyard.misc import validate_reduction_type


Expand Down Expand Up @@ -135,15 +135,15 @@ def test_redux_types(self):
validate_reduction_type("XX")

def test_empty_allele(self):
with self.assertRaises(InvalidTypingError):
with self.assertRaises(InvalidAlleleError):
self.ard.redux("A*", "lgx")

def test_fp_allele(self):
with self.assertRaises(InvalidTypingError):
with self.assertRaises(InvalidAlleleError):
self.ard.redux("A*0.123", "lgx")

def test_empty_fields(self):
with self.assertRaises(InvalidTypingError):
with self.assertRaises(InvalidAlleleError):
# : without any data
self.ard.redux("DQA1*01:01:01:G", "lgx")

Expand All @@ -152,7 +152,7 @@ def test_invalid_serology(self):
serology_a10 = self.ard.redux("A10", "lgx")
self.assertEqual(serology_a10.split("/")[0], "A*25:01")
# And A100 isn't a valid typing
with self.assertRaises(InvalidTypingError):
with self.assertRaises(InvalidAlleleError):
self.ard.redux("A100", "lgx")

def test_allele_duplicated(self):
Expand Down