Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add XX mappings of Serology #310

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 24 additions & 11 deletions pyard/ard.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def __init__(
dr.generate_serology_mapping(
self.db_connection, imgt_version, self.serology_mapping, self._redux_allele
)
self.valid_serology_set = dr.build_valid_serology_set(self.db_connection)
self.valid_serology_set = SerologyMapping.get_valid_serology_names()

# Load V2 to V3 mappings
dr.generate_v2_to_v3_mapping(self.db_connection, imgt_version)
Expand Down Expand Up @@ -436,16 +436,23 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES) -> str:
is_hla_prefix = HLA_regex.search(loc_antigen)
if is_hla_prefix:
loc_antigen = loc_antigen.split("-")[1]
if self.is_XX(glstring, loc_antigen, code):
if is_hla_prefix:
reduced_alleles = self.redux(
"/".join(self.code_mappings.xx_codes[loc_antigen]), redux_type
)
return "/".join(["HLA-" + a for a in reduced_alleles.split("/")])
if code == "XX":
if self.is_XX(glstring, loc_antigen, code):
if is_hla_prefix:
reduced_alleles = self.redux(
"/".join(self.code_mappings.xx_codes[loc_antigen]),
redux_type,
)
return "/".join(
["HLA-" + a for a in reduced_alleles.split("/")]
)
else:
return self.redux(
"/".join(self.code_mappings.xx_codes[loc_antigen]),
redux_type,
)
else:
return self.redux(
"/".join(self.code_mappings.xx_codes[loc_antigen]), redux_type
)
raise InvalidTypingError(f"{glstring} is not valid XX code")

# Handle MAC
if self._config["reduce_MAC"] and code.isalpha():
Expand Down Expand Up @@ -633,7 +640,13 @@ def find_broad_splits(self, allele) -> tuple:
return self.serology_mapping.find_splits(allele)

def find_associated_antigen(self, serology) -> str:
return self.serology_mapping.serology_associated_map.get(serology, serology)
return self.serology_mapping.find_associated_antigen(serology)

@functools.lru_cache()
def find_xx_from_serology(self, serology):
if self.is_serology(serology):
return db.find_xx_for_serology(self.db_connection, serology)
raise InvalidAlleleError(f"{serology} is not a valid serology")

def _get_alleles(self, code, locus_antigen) -> Iterable[str]:
"""
Expand Down
54 changes: 27 additions & 27 deletions pyard/data_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,14 @@
import pyard.load
from pyard.smart_sort import smart_sort_comparator
from . import db
from .serology import broad_splits_dna_mapping, get_all_valid_serology_names
from .constants import expression_chars
from .load import (
load_g_group,
load_p_group,
load_allele_list,
load_serology_mappings,
load_latest_version,
)
from .constants import expression_chars
from .mappings import (
ars_mapping_tables,
ARSMapping,
Expand All @@ -50,6 +49,7 @@
number_of_fields,
get_1field_allele,
)
from .serology import broad_splits_dna_mapping, SerologyMapping


def expression_reduce(df):
Expand Down Expand Up @@ -356,7 +356,10 @@ def to_serological_name(locus_name: str):


def generate_serology_mapping(
db_connection: sqlite3.Connection, imgt_version, serology_mapping, redux_function
db_connection: sqlite3.Connection,
imgt_version: str,
serology_mapping: SerologyMapping,
redux_function,
):
if not db.table_exists(db_connection, "serology_mapping"):
df_sero = load_serology_mappings(imgt_version)
Expand Down Expand Up @@ -412,22 +415,28 @@ def generate_serology_mapping(
if split in sero_mapping:
sero_mapping[broad] = sero_mapping[split]

# re-sort allele lists into smartsort order
for sero in sero_mapping.keys():
sero_mapping[sero] = (
"/".join(
sorted(
sero_mapping[sero][0],
key=functools.cmp_to_key(smart_sort_comparator),
)
),
"/".join(
sorted(
sero_mapping[sero][1],
key=functools.cmp_to_key(smart_sort_comparator),
# Create a mapping of serology to alleles, lgx_alleles and associated XX allele
serology_xx_mapping = serology_mapping.get_xx_mappings()
# re-sort allele lists into smart-sort order
for sero in serology_xx_mapping:
if sero in sero_mapping:
sero_mapping[sero] = (
"/".join(
sorted(
sero_mapping[sero][0],
key=functools.cmp_to_key(smart_sort_comparator),
)
),
"/".join(
sorted(
sero_mapping[sero][1],
key=functools.cmp_to_key(smart_sort_comparator),
),
),
),
)
serology_xx_mapping[sero],
)
else:
sero_mapping[sero] = (None, None, serology_xx_mapping[sero])

db.save_serology_mappings(db_connection, sero_mapping)

Expand Down Expand Up @@ -483,12 +492,3 @@ def generate_cwd_mapping(db_connection: sqlite3.Connection):
if not db.table_exists(db_connection, "cwd2"):
cwd2_map = pyard.load.load_cwd2()
db.save_cwd2(db_connection, cwd2_map)


def build_valid_serology_set(db_connection: sqlite3.Connection):
valid_serology_names = get_all_valid_serology_names()
# Save to db if `valid_serology` table is not present
if not db.table_exists(db_connection, "valid_serology"):
db.save_set(db_connection, "valid_serology", valid_serology_names, "serology")

return set(valid_serology_names)
29 changes: 22 additions & 7 deletions pyard/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,22 @@ def find_serology_for_allele(
return serology_mapping


def find_xx_for_serology(connection: sqlite3.Connection, serology: str) -> str:
"""
Find the corresponding XX allele for the given serology

:param connection: db connection of type sqlite.Connection
:param serology: serology for which to find XX allele
:return: XX allele for given serology
"""
query = f"SELECT xx FROM serology_mapping WHERE serology = ?"
cursor = connection.execute(query, (serology,))
results = cursor.fetchone()
if results:
return results[0]
return None


def get_user_version(connection: sqlite3.Connection) -> int:
"""
Retrieve user_version from db
Expand All @@ -424,9 +440,7 @@ def get_user_version(connection: sqlite3.Connection) -> int:
version = result[0]
cursor.close()

if version:
return version
return None
return version


def set_user_version(connection: sqlite3.Connection, version: int):
Expand Down Expand Up @@ -579,15 +593,16 @@ def save_serology_mappings(db_connection, sero_mapping):
# Create table
create_table_sql = f"""CREATE TABLE serology_mapping (
serology TEXT PRIMARY KEY,
allele_list TEXT NOT NULL,
lgx_allele_list TEXT NOT NULL
allele_list TEXT,
lgx_allele_list TEXT,
xx TEXT NOT NULL
)"""
cursor.execute(create_table_sql)

rows = ((k, v[0], v[1]) for k, v in sero_mapping.items())
rows = ((k, v[0], v[1], v[2]) for k, v in sero_mapping.items())

# insert
cursor.executemany(f"INSERT INTO serology_mapping VALUES (?, ?, ?)", rows)
cursor.executemany(f"INSERT INTO serology_mapping VALUES (?, ?, ?, ?)", rows)

# commit transaction - writes to the db
db_connection.commit()
Expand Down
136 changes: 99 additions & 37 deletions pyard/serology.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,72 @@
# > http://www.fsf.org/licensing/licenses/lgpl.html
# > http://www.opensource.org/licenses/lgpl-license.php
#
from pyard.constants import HLA_regex
import re

from pyard.constants import HLA_regex

#
# HLA Antigens
# List of all recognised serological collected from:
# https://hla.alleles.org/antigens/recognised_serology.html
#
def get_all_valid_serology_names():


# -#
# Broad, Splits and Associated Antigens
# http://hla.alleles.org/antigens/broads_splits.html
#
#
# Mapping Generated from `dna_relshp.csv` file
#
broad_splits_dna_mapping = {
"A*09": ["A*23", "A*24"],
"A*10": ["A*25", "A*26", "A*34", "A*66"],
"A*19": ["A*29", "A*30", "A*31", "A*32", "A*33", "A*74"],
"A*28": ["A*68", "A*69"],
"B*05": ["B*51", "B*52"],
"B*12": ["B*44", "B*45"],
"B*16": ["B*38", "B*39"],
"B*17": ["B*57", "B*58"],
"B*21": ["B*49", "B*50"],
"B*22": ["B*54", "B*55", "B*56"],
"C*10": ["C*03", "C*04"],
"DQB1*01": ["DQB1*05", "DQB1*06"],
"DRB1*02": ["DRB1*15", "DRB1*16"],
"DRB1*06": ["DRB1*13", "DRB1*14"],
}

serology_xx_exception_mapping = {
# Locus B
# Broad B40
"B60": "B*40:XX",
"B61": "B*40:XX",
# Broad B14
"B64": "B*14:XX",
"B65": "B*14:XX",
# Broad B15
"B62": "B*15:XX",
"B63": "B*15:XX",
"B70": "B*15:XX",
"B75": "B*15:XX",
"B76": "B*15:XX",
"B77": "B*15:XX",
# Broad B70
"B71": "B*15:XX",
"B72": "B*15:XX",
"DR17": "DRB1*03:XX",
"DR18": "DRB1*03:XX",
# Locus DQB1
# Broad DQ3
"DQ7": "DQB1*03:XX",
"DQ8": "DQB1*03:XX",
"DQ9": "DQB1*03:XX",
}

sero_antigen_regex = re.compile(r"(\D+)(\d+)")


class SerologyMapping:
valid_serology_map = {
"A": [
"A1",
Expand Down Expand Up @@ -153,7 +210,7 @@ def get_all_valid_serology_names():
"Dw25",
"Dw26",
],
"DR": [
"DRB1": [
"DR1",
"DR103",
"DR2",
Expand All @@ -179,40 +236,10 @@ def get_all_valid_serology_names():
"DR52",
"DR53",
],
"DQ": ["DQ1", "DQ2", "DQ3", "DQ4", "DQ5", "DQ6", "DQ7", "DQ8", "DQ9"],
"DP": ["DPw1", "DPw2", "DPw3", "DPw4", "DPw5", "DPw6"],
"DQB1": ["DQ1", "DQ2", "DQ3", "DQ4", "DQ5", "DQ6", "DQ7", "DQ8", "DQ9"],
"DPB1": ["DPw1", "DPw2", "DPw3", "DPw4", "DPw5", "DPw6"],
}

all_serology_names = [x for v in valid_serology_map.values() for x in v]
return all_serology_names


# -#
# Broad, Splits and Associated Antigens
# http://hla.alleles.org/antigens/broads_splits.html
#
#
# Mapping Generated from `dna_relshp.csv` file
#
broad_splits_dna_mapping = {
"A*09": ["A*23", "A*24"],
"A*10": ["A*25", "A*26", "A*34", "A*66"],
"A*19": ["A*29", "A*30", "A*31", "A*32", "A*33", "A*74"],
"A*28": ["A*68", "A*69"],
"B*05": ["B*51", "B*52"],
"B*12": ["B*44", "B*45"],
"B*16": ["B*38", "B*39"],
"B*17": ["B*57", "B*58"],
"B*21": ["B*49", "B*50"],
"B*22": ["B*54", "B*55", "B*56"],
"C*10": ["C*03", "C*04"],
"DQB1*01": ["DQB1*05", "DQB1*06"],
"DRB1*02": ["DRB1*15", "DRB1*16"],
"DRB1*06": ["DRB1*13", "DRB1*14"],
}


class SerologyMapping:
def __init__(self, broad_splits_mapping, associated_mapping):
self.broad_splits_map = broad_splits_mapping
self.serology_associated_map = associated_mapping
Expand All @@ -237,8 +264,43 @@ def find_splits(self, allele: str) -> tuple:
if allele_name in mapping[broad]:
return self._get_mapping(broad, mapping, prefix)

@staticmethod
def _get_mapping(broad, mapping, prefix):
def find_associated_antigen(self, serology):
return self.serology_associated_map.get(serology, serology)

def get_xx_mappings(self):
all_xx_mappings = {}
for locus, serologies in SerologyMapping.valid_serology_map.items():
xx_mapping = {
serology: self._map_serology_to_xx(locus, serology)
for serology in serologies
}
all_xx_mappings.update(xx_mapping)
return all_xx_mappings

@classmethod
def get_valid_serology_names(cls):
all_serology_names = {x for v in cls.valid_serology_map.values() for x in v}
return all_serology_names

def _map_serology_to_xx(self, locus, serology):
if serology in serology_xx_exception_mapping.keys():
return serology_xx_exception_mapping[serology]

# Use the associated serology for XX version
serology = self.find_associated_antigen(serology)

# Extract just the digits
antigen_group = sero_antigen_regex.match(serology).group(2)
# Pad numbers with 0 for single digit numbers
antigen_group_num = int(antigen_group)
if antigen_group_num < 10:
antigen_group = f"{antigen_group_num:02}"

# Build the XX allele
return f"{locus}*{antigen_group}:XX"

@classmethod
def _get_mapping(cls, broad, mapping, prefix):
if prefix:
return "HLA-" + broad, list(map(lambda x: "HLA-" + x, mapping[broad]))
else:
Expand Down
Loading
Loading