Skip to content

Commit

Permalink
Merge pull request #305 from pbashyal-nmdp/290_serology_lgx_table
Browse files Browse the repository at this point in the history
Serology reduction based on lgx
  • Loading branch information
mmaiers-nmdp authored Feb 20, 2024
2 parents 4665c1d + ab99745 commit 9d1792d
Show file tree
Hide file tree
Showing 10 changed files with 73 additions and 28 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ LABEL MAINTAINER="Pradeep Bashyal"

WORKDIR /app

ARG PY_ARD_VERSION=1.1.0
ARG PY_ARD_VERSION=1.1.1

COPY requirements.txt /app
RUN pip install --no-cache-dir --upgrade pip && \
Expand Down
2 changes: 1 addition & 1 deletion api-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ openapi: 3.0.3
info:
title: ARD Reduction
description: Reduce to ARD Level
version: "1.1.0"
version: "1.1.1"
servers:
- url: 'http://localhost:8080'
tags:
Expand Down
2 changes: 1 addition & 1 deletion pyard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from .misc import get_imgt_db_versions as db_versions

__author__ = """NMDP Bioinformatics"""
__version__ = "1.1.0"
__version__ = "1.1.1"


def init(
Expand Down
13 changes: 11 additions & 2 deletions pyard/ard.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,9 @@ def __init__(
)

dr.generate_serology_mapping(
self.db_connection, self.serology_mapping, imgt_version
self.db_connection, imgt_version, self.serology_mapping, self._redux_allele
)

# Load V2 to V3 mappings
dr.generate_v2_to_v3_mapping(self.db_connection, imgt_version)
# Save IMGT database version
Expand Down Expand Up @@ -271,7 +272,15 @@ def _redux_allele(
return self._redux_allele(allele, "lgx")
elif redux_type == "S":
# find serology equivalent in serology_mapping
serology_mapping = db.find_serology_for_allele(self.db_connection, allele)
if is_2_field_allele(allele):
allele = self._redux_allele(allele, "lgx")
serology_mapping = db.find_serology_for_allele(
self.db_connection, allele, "lgx_allele_list"
)
else:
serology_mapping = db.find_serology_for_allele(
self.db_connection, allele
)
serology_set = set()
for serology, allele_list in serology_mapping.items():
if allele in allele_list.split("/"):
Expand Down
31 changes: 21 additions & 10 deletions pyard/data_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ def to_serological_name(locus_name: str):


def generate_serology_mapping(
db_connection: sqlite3.Connection, serology_mapping, imgt_version
db_connection: sqlite3.Connection, imgt_version, serology_mapping, redux_function
):
if not db.table_exists(db_connection, "serology_mapping"):
df_sero = load_serology_mappings(imgt_version)
Expand Down Expand Up @@ -390,10 +390,12 @@ def generate_serology_mapping(
sero_mapping_combined["Sero"] = sero_mapping_combined["Sero"].apply(
to_serological_name
)

sero_mapping_combined["lgx"] = sero_mapping_combined["Allele"].apply(
lambda allele: redux_function(allele, "lgx")
)
sero_mapping = (
sero_mapping_combined.groupby("Sero")
.apply(lambda x: "/".join(sorted(x["Allele"])))
.apply(lambda x: (set(x["Allele"]), set(x["lgx"])))
.to_dict()
)

Expand All @@ -402,20 +404,29 @@ def generate_serology_mapping(
for broad, splits in serology_mapping.broad_splits_map.items():
for split in splits:
try:
sero_mapping[broad] = "/".join(
[sero_mapping[broad], sero_mapping[split]]
sero_mapping[broad] = (
sero_mapping[broad][0].union(sero_mapping[split][0]),
sero_mapping[broad][1].union(sero_mapping[split][1]),
)
except KeyError:
if split in sero_mapping:
sero_mapping[broad] = sero_mapping[split]

# re-sort allele lists into smartsort order
for sero in sero_mapping.keys():
sero_mapping[sero] = "/".join(
sorted(
sero_mapping[sero].split("/"),
key=functools.cmp_to_key(smart_sort_comparator),
)
sero_mapping[sero] = (
"/".join(
sorted(
sero_mapping[sero][0],
key=functools.cmp_to_key(smart_sort_comparator),
)
),
"/".join(
sorted(
sero_mapping[sero][1],
key=functools.cmp_to_key(smart_sort_comparator),
),
),
)

db.save_serology_mappings(db_connection, sero_mapping)
Expand Down
33 changes: 23 additions & 10 deletions pyard/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,18 +393,17 @@ def similar_mac(connection: sqlite3.Connection, mac_prefix: str) -> Set[str]:


def find_serology_for_allele(
connection: sqlite3.Connection, allele_name: str
connection: sqlite3.Connection, allele_name: str, column: str = "allele_list"
) -> Dict[str, str]:
"""
Find similar alleles starting with the provided allele_name.
:param connection: db connection of type sqlite.Connection
:param allele_name: Allele name to use as a prefix to find similar alleles
:param column: Column to look for allele, "allele_list" or "lgx_allele_list"
:return: list of similar alleles
"""
query = (
"SELECT serology, allele_list FROM serology_mapping WHERE allele_list LIKE ?"
)
query = f"SELECT serology, {column} FROM serology_mapping WHERE {column} LIKE ?"
cursor = connection.execute(query, (f"%{allele_name}%",))
results = cursor.fetchall()
# fetchall() returns a list of tuples of results
Expand Down Expand Up @@ -574,12 +573,26 @@ def save_mac_codes(db_connection, mac, mac_table_name):

def save_serology_mappings(db_connection, sero_mapping):
# Save the serology mapping to db
save_dict(
db_connection,
table_name="serology_mapping",
dictionary=sero_mapping,
columns=("serology", "allele_list"),
)
cursor = db_connection.cursor()
# Drop the table first
cursor.execute("DROP TABLE IF EXISTS serology_mapping")
# Create table
create_table_sql = f"""CREATE TABLE serology_mapping (
serology TEXT PRIMARY KEY,
allele_list TEXT NOT NULL,
lgx_allele_list TEXT NOT NULL
)"""
cursor.execute(create_table_sql)

rows = ((k, v[0], v[1]) for k, v in sero_mapping.items())

# insert
cursor.executemany(f"INSERT INTO serology_mapping VALUES (?, ?, ?)", rows)

# commit transaction - writes to the db
db_connection.commit()
# close the cursor
cursor.close()


def load_v2_v3_mappings(db_connection):
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.1.0
current_version = 1.1.1
commit = True
tag = True

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

setup(
name="py-ard",
version="1.1.0",
version="1.1.1",
description="ARD reduction for HLA with Python",
long_description=readme,
long_description_content_type="text/markdown",
Expand Down
8 changes: 8 additions & 0 deletions tests/features/serology_redux.feature
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,11 @@ Feature: Serology Reduction
Examples: Skip Loci that don't have Serology mappings
| Allele | Level | Redux Serology |
| A*01:01+A*01:01^B*08:ASXJP+B*07:02^C*02:02+C*07:HTGM^DPB1*28:01:01G+DPB1*296:01 | S | A1+A1^B7+B8^Cw2+Cw7 |

Examples: 2 field Serology Reduction uses lgx version of serology mapping

| Allele | Level | Redux Serology |
| DRB1*07:34 | S | DR7 |
| DRB1*07:34:01 | S | DR7 |
| DRB1*07:34:02 | S | DR7 |
| DRB4*01:03N | S | X |
6 changes: 5 additions & 1 deletion tests/steps/redux_allele.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ def step_impl(context, level):
@when("reducing on the {level} level with ping")
def step_impl(context, level):
context.level = level
context.redux_allele = context.ard_ping.redux(context.allele, level)
redux_allele = context.ard_ping.redux(context.allele, level)
if not redux_allele:
context.redux_allele = "X"
else:
context.redux_allele = redux_allele


@when("reducing on the {level} level with ARS suffix enabled")
Expand Down

0 comments on commit 9d1792d

Please sign in to comment.