Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Serology Associated Antigens #303

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ LABEL MAINTAINER="Pradeep Bashyal"

WORKDIR /app

ARG PY_ARD_VERSION=1.0.11
ARG PY_ARD_VERSION=1.1.0

COPY requirements.txt /app
RUN pip install --no-cache-dir --upgrade pip && \
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,10 @@ dist: clean ## builds source and wheel package
ls -l dist

docker-build: ## build a docker image for the service
docker build -t pyard-service:latest .
docker build --platform=linux/amd64 -t nmdpbioinformatics/pyard-service:latest .

docker: docker-build ## build a docker image and run the service
docker run --rm --name pyard-service -p 8080:8080 pyard-service:latest
docker run --platform=linux/amd64 --rm --name pyard-service -p 8080:8080 nmdpbioinformatics/pyard-service:latest

install: clean ## install the package to the active Python's site-packages
pip install --upgrade pip
Expand Down
2 changes: 1 addition & 1 deletion api-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ openapi: 3.0.3
info:
title: ARD Reduction
description: Reduce to ARD Level
version: "1.0.11"
version: "1.1.0"
servers:
- url: 'http://localhost:8080'
tags:
Expand Down
3 changes: 1 addition & 2 deletions pyard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,11 @@
# > http://www.opensource.org/licenses/lgpl-license.php
#
from .blender import blender as dr_blender
from .broad_splits import find_splits as find_broad_splits
from .constants import DEFAULT_CACHE_SIZE
from .misc import get_imgt_db_versions as db_versions

__author__ = """NMDP Bioinformatics"""
__version__ = "1.0.11"
__version__ = "1.1.0"


def init(
Expand Down
18 changes: 15 additions & 3 deletions pyard/ard.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,16 @@ def __init__(
)

# Load Serology mappings
broad_splits.broad_splits_ser_mapping = (
dr.generate_serology_broad_split_mapping(self.db_connection, imgt_version)
broad_splits_mapping, associated_mapping = dr.generate_broad_splits_mapping(
self.db_connection, imgt_version
)
self.serology_mapping = broad_splits.SerologyMapping(
broad_splits_mapping, associated_mapping
)

dr.generate_serology_mapping(
self.db_connection, self.serology_mapping, imgt_version
)
dr.generate_serology_mapping(self.db_connection, imgt_version)
# Load V2 to V3 mappings
dr.generate_v2_to_v3_mapping(self.db_connection, imgt_version)
# Save IMGT database version
Expand Down Expand Up @@ -608,6 +614,12 @@ def is_exp_allele(self, allele):
"""
return allele in self.allele_group.exp_alleles

def find_broad_splits(self, allele) -> tuple:
return self.serology_mapping.find_splits(allele)

def find_associated_antigen(self, serology) -> str:
return self.serology_mapping.serology_associated_map.get(serology, serology)

def _get_alleles(self, code, locus_antigen) -> Iterable[str]:
"""
Look up allele code in database and generate alleles
Expand Down
55 changes: 28 additions & 27 deletions pyard/broad_splits.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
# > http://www.fsf.org/licensing/licenses/lgpl.html
# > http://www.opensource.org/licenses/lgpl-license.php
#
import re

from pyard.constants import HLA_regex

#
# Broad, Splits and Associated Antigens
Expand All @@ -46,35 +47,35 @@
"DRB1*06": ["DRB1*13", "DRB1*14"],
}

# Loaded at runtime
broad_splits_ser_mapping = None

HLA_regex = re.compile("^HLA-")


def find_splits(allele: str) -> tuple:
if HLA_regex.search(allele):
prefix = True
allele_name = allele.split("-")[1]
else:
prefix = False
allele_name = allele
class SerologyMapping:
def __init__(self, broad_splits_mapping, associated_mapping):
self.broad_splits_map = broad_splits_mapping
self.serology_associated_map = associated_mapping

if "*" in allele_name:
mapping = broad_splits_dna_mapping
else:
mapping = broad_splits_ser_mapping
def find_splits(self, allele: str) -> tuple:
if HLA_regex.search(allele):
prefix = True
allele_name = allele.split("-")[1]
else:
prefix = False
allele_name = allele

if allele_name in mapping:
return _get_mapping(allele_name, mapping, prefix)
if "*" in allele_name:
mapping = broad_splits_dna_mapping
else:
mapping = self.broad_splits_map

for broad in mapping:
if allele_name in mapping[broad]:
return _get_mapping(broad, mapping, prefix)
if allele_name in mapping:
return self._get_mapping(allele_name, mapping, prefix)

for broad in mapping:
if allele_name in mapping[broad]:
return self._get_mapping(broad, mapping, prefix)

def _get_mapping(broad, mapping, prefix):
if prefix:
return "HLA-" + broad, list(map(lambda x: "HLA-" + x, mapping[broad]))
else:
return broad, mapping[broad]
@staticmethod
def _get_mapping(broad, mapping, prefix):
if prefix:
return "HLA-" + broad, list(map(lambda x: "HLA-" + x, mapping[broad]))
else:
return broad, mapping[broad]
27 changes: 17 additions & 10 deletions pyard/data_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@

import pyard.load
from pyard.smart_sort import smart_sort_comparator
from . import db, broad_splits
from . import db
from .broad_splits import broad_splits_dna_mapping
from .load import (
load_g_group,
load_p_group,
Expand Down Expand Up @@ -216,7 +217,7 @@ def generate_alleles_and_xx_codes_and_who(
xx_codes = xx_df.groupby(["1d"]).apply(lambda x: list(x["Allele"])).to_dict()

# Update xx codes with broads and splits
for broad, splits in broad_splits.broad_splits_dna_mapping.items():
for broad, splits in broad_splits_dna_mapping.items():
for split in splits:
if broad in xx_codes:
xx_codes[broad].extend(xx_codes[split])
Expand Down Expand Up @@ -354,7 +355,9 @@ def to_serological_name(locus_name: str):
return sero_name


def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
def generate_serology_mapping(
db_connection: sqlite3.Connection, serology_mapping, imgt_version
):
if not db.table_exists(db_connection, "serology_mapping"):
df_sero = load_serology_mappings(imgt_version)

Expand Down Expand Up @@ -396,7 +399,7 @@ def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):

# map alleles for split serology to their corresponding broad
# Update xx codes with broads and splits
for broad, splits in broad_splits.broad_splits_ser_mapping.items():
for broad, splits in serology_mapping.broad_splits_map.items():
for split in splits:
try:
sero_mapping[broad] = "/".join(
Expand Down Expand Up @@ -450,15 +453,19 @@ def get_db_version(db_connection: sqlite3.Connection):
return db.get_user_version(db_connection)


def generate_serology_broad_split_mapping(
db_connection: sqlite3.Connection, imgt_version
):
def generate_broad_splits_mapping(db_connection: sqlite3.Connection, imgt_version):
if not db.table_exists(db_connection, "serology_broad_split_mapping"):
sero_mapping = pyard.load.load_serology_broad_split_mapping(imgt_version)
sero_mapping, associated_mapping = pyard.load.load_serology_broad_split_mapping(
imgt_version
)
db.save_serology_broad_split_mappings(db_connection, sero_mapping)
return sero_mapping
db.save_serology_associated_mappings(db_connection, associated_mapping)
return sero_mapping, associated_mapping

sero_mapping = db.load_serology_broad_split_mappings(db_connection)
associated_mapping = db.load_serology_associated_mappings(db_connection)

return db.load_serology_broad_split_mappings(db_connection)
return sero_mapping, associated_mapping


def generate_cwd_mapping(db_connection: sqlite3.Connection):
Expand Down
20 changes: 18 additions & 2 deletions pyard/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,20 +609,36 @@ def load_v2_v3_mappings(db_connection):

def load_serology_broad_split_mappings(db_connection):
sero_mapping = load_dict(
db_connection, "serology_broad_split_mapping", ("serology", "splits")
db_connection, "serology_broad_split_mapping", ("broad", "splits")
)
sero_splits = {k: v.split("/") for k, v in sero_mapping.items()}
return sero_splits


def load_serology_associated_mappings(db_connection):
associated_mapping = load_dict(
db_connection, "serology_associated_mappings", ("associated", "antigen")
)
return associated_mapping


def save_serology_broad_split_mappings(db_connection, sero_mapping):
# Save the `splits` as a "/" delimited string to db
sero_splits = {sero: "/".join(splits) for sero, splits in sero_mapping.items()}
save_dict(
db_connection,
table_name="serology_broad_split_mapping",
dictionary=sero_splits,
columns=("serology", "splits"),
columns=("broad", "splits"),
)


def save_serology_associated_mappings(db_connection, associated_mapping):
save_dict(
db_connection,
table_name="serology_associated_mappings",
dictionary=associated_mapping,
columns=("associated", "antigen"),
)


Expand Down
31 changes: 23 additions & 8 deletions pyard/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# > http://www.opensource.org/licenses/lgpl-license.php
#
import sys
from typing import Dict, List
from typing import Dict, List, Tuple
from urllib.error import URLError

from pyard.misc import get_G_name, get_2field_allele, get_3field_allele, get_P_name
Expand All @@ -38,7 +38,7 @@ def add_locus_name(locus: str, splits: str) -> List:
# Derived from rel_ser_ser.txt
# https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/wmda/rel_ser_ser.txt
#
def load_serology_broad_split_mapping(imgt_version: str) -> Dict:
def load_serology_broad_split_mapping(imgt_version: str) -> Tuple[Dict, Dict]:
import pandas as pd

ser_ser_url = f"{IMGT_HLA_URL}{imgt_version}/wmda/rel_ser_ser.txt"
Expand All @@ -47,21 +47,36 @@ def load_serology_broad_split_mapping(imgt_version: str) -> Dict:
ser_ser_url,
skiprows=6,
names=["Locus", "A", "Splits", "Associated"],
usecols=[0, 1, 2],
dtype="string",
sep=";",
).dropna()
)
except URLError as e:
print(f"Error downloading {ser_ser_url}", e, file=sys.stderr)
sys.exit(1)

df_p["Sero"] = df_p["Locus"] + df_p["A"]
df_p["Splits"] = df_p[["Locus", "Splits"]].apply(
splits_df = df_p[["Locus", "A", "Splits"]].dropna()
associated_df = df_p[["Locus", "A", "Associated"]].dropna()

splits_df["Sero"] = splits_df["Locus"] + splits_df["A"]
splits_df["Splits"] = splits_df[["Locus", "Splits"]].apply(
lambda x: add_locus_name(x["Locus"], x["Splits"]), axis=1
)
splits_df = splits_df.astype({"A": "int32"}).sort_values(by=["Locus", "A"])

associated_df["Sero"] = associated_df["Locus"] + associated_df["A"]
associated_df["Associated"] = associated_df[["Locus", "Associated"]].apply(
lambda x: add_locus_name(x["Locus"], x["Associated"]), axis=1
)
associated_df = associated_df.astype({"A": "int32"}).sort_values(by=["Locus", "A"])

splits_mapping = splits_df[["Sero", "Splits"]].set_index("Sero")["Splits"].to_dict()
associated_mapping = (
associated_df.explode("Associated")[["Associated", "Sero"]]
.set_index("Associated")["Sero"]
.to_dict()
)

sero_mapping = df_p[["Sero", "Splits"]].set_index("Sero")["Splits"].to_dict()
return sero_mapping
return splits_mapping, associated_mapping


def load_g_group(imgt_version):
Expand Down
10 changes: 5 additions & 5 deletions scripts/pyard
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ from pyard.exceptions import InvalidAlleleError, InvalidTypingError, InvalidMACE
from pyard.misc import get_data_dir, get_imgt_version


def find_similar_alleles(prefix):
def find_similar_alleles(ard, prefix):
alleles = ard.similar_alleles(prefix)
if alleles:
for allele in alleles:
Expand Down Expand Up @@ -62,8 +62,8 @@ def expand_mac_code():
sys.exit(0)


def find_broad_splits():
mapping = pyard.find_broad_splits(args.splits)
def find_broad_splits(ard):
mapping = ard.find_broad_splits(args.splits)
if mapping:
print(f"{mapping[0]} = {'/'.join(mapping[1])}")
sys.exit(0)
Expand Down Expand Up @@ -166,7 +166,7 @@ if __name__ == "__main__":

# Handle --splits option
if args.splits:
find_broad_splits()
find_broad_splits(ard)

# Handle --expand-mac option
if args.expand_mac:
Expand All @@ -178,7 +178,7 @@ if __name__ == "__main__":

# Handle --similar option
if args.similar_allele:
find_similar_alleles(args.similar_allele)
find_similar_alleles(ard, args.similar_allele)

try:
if args.cwd:
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.0.11
current_version = 1.1.0
commit = True
tag = True

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

setup(
name="py-ard",
version="1.0.11",
version="1.1.0",
description="ARD reduction for HLA with Python",
long_description=readme,
long_description_content_type="text/markdown",
Expand Down
16 changes: 16 additions & 0 deletions tests/features/broad_splits.feature
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,19 @@ Feature: Broad Splits for DNA/Serology
| DQB1*05 | DQB1*06 | DQB1*01 |
| B*55 | B*54/B*56 | B*22 |
| A25 | A26/A34/A66 | A10 |


Scenario Outline: Associated Serology

Given the serology antigen is <Serology>
When looking for associated serology
Then the associated serology is found to be <Associated Serology>

Examples: Alleles to Serology
| Serology | Associated Serology |
| A23 | A23 |
| A24 | A24 |
| A2403 | A24 |
| DR1403 | DR14 |
| DR1404 | DR14 |
| B5 | B5 |
Loading
Loading