From 31f133787bf817e19f0867d00ee04ed9c959e5cb Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Fri, 15 Dec 2023 08:59:24 -0600 Subject: [PATCH] Update cwd2 (#295) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update CWD2 - Update CWD2 mapping to one from igdawg. - Update cwd redux to handle MACs and nulls without reductions * Update Python to 3.11 for Docker * Bump version: 1.0.8 → 1.0.9 --- Dockerfile | 4 +- api-spec.yaml | 4 +- pyard/CWD2.csv | 855 ++++++++++++++++++++++++++++++++++--- pyard/__init__.py | 2 +- pyard/ard.py | 89 ++-- setup.cfg | 2 +- setup.py | 2 +- tests/features/cwd.feature | 21 +- tests/steps/cwd.py | 2 +- 9 files changed, 871 insertions(+), 110 deletions(-) diff --git a/Dockerfile b/Dockerfile index eb4d101..9f867bb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,10 @@ -FROM python:3.10-slim-buster +FROM python:3.11-slim-buster LABEL MAINTAINER="Pradeep Bashyal" WORKDIR /app -ARG PY_ARD_VERSION=1.0.8 +ARG PY_ARD_VERSION=1.0.9 COPY requirements.txt /app RUN pip install --no-cache-dir --upgrade pip && \ diff --git a/api-spec.yaml b/api-spec.yaml index 3d9fc39..d7df6ba 100644 --- a/api-spec.yaml +++ b/api-spec.yaml @@ -2,7 +2,7 @@ openapi: 3.0.3 info: title: ARD Reduction description: Reduce to ARD Level - version: "1.0.8" + version: "1.0.9" servers: - url: 'http://localhost:8080' tags: @@ -126,7 +126,7 @@ paths: cwd_mac: description: MAC of reduced CWD if available type: string - example: "A*26:BYHR" + example: "" 400: description: Invalid GL String Form content: diff --git a/pyard/CWD2.csv b/pyard/CWD2.csv index 9f66a10..4ec1e60 100644 --- a/pyard/CWD2.csv +++ b/pyard/CWD2.csv @@ -2,6 +2,12 @@ LOCUS,ALLELE A,A*01:01 A,A*01:02 A,A*01:03 +A,A*01:04N +A,A*01:06 +A,A*01:09 +A,A*01:12 +A,A*01:17 +A,A*01:25 A,A*02:01 A,A*02:02 A,A*02:03 @@ -13,58 +19,277 @@ A,A*02:08 A,A*02:09 A,A*02:10 A,A*02:11 +A,A*02:12 +A,A*02:13 A,A*02:14 +A,A*02:16 A,A*02:17 +A,A*02:18 +A,A*02:19 A,A*02:20 +A,A*02:21 +A,A*02:22 +A,A*02:24 +A,A*02:25 +A,A*02:27 +A,A*02:29 +A,A*02:30 +A,A*02:33 +A,A*02:34 +A,A*02:35 +A,A*02:36 +A,A*02:38 +A,A*02:39 +A,A*02:44 +A,A*02:45 +A,A*02:49 +A,A*02:53N +A,A*02:58 +A,A*02:60 +A,A*02:63 +A,A*02:64 +A,A*02:74 +A,A*02:84 +A,A*02:85 +A,A*02:86 +A,A*02:87 +A,A*02:93 +A,A*02:96 +A,A*02:119 +A,A*02:122 +A,A*02:123 +A,A*02:131 +A,A*02:137 A,A*03:01 A,A*03:02 +A,A*03:05 +A,A*03:06 +A,A*03:07 +A,A*03:08 +A,A*03:09 +A,A*03:17 +A,A*03:21N +A,A*03:22 +A,A*03:26 +A,A*03:50 +A,A*03:108 A,A*11:01 A,A*11:02 +A,A*11:03 +A,A*11:04 +A,A*11:05 +A,A*11:08 +A,A*11:09 +A,A*11:10 +A,A*11:12 +A,A*11:19 +A,A*11:29 +A,A*11:30 A,A*23:01 +A,A*23:03 +A,A*23:04 +A,A*23:05 +A,A*23:06 +A,A*23:13 +A,A*23:15 +A,A*23:17 +A,A*23:19Q A,A*24:02 +A,A*24:02L A,A*24:03 +A,A*24:04 +A,A*24:05 +A,A*24:06 A,A*24:07 +A,A*24:08 +A,A*24:09N A,A*24:10 +A,A*24:11N +A,A*24:13 +A,A*24:14 +A,A*24:15 +A,A*24:17 +A,A*24:20 +A,A*24:22 +A,A*24:23 +A,A*24:25 +A,A*24:26 +A,A*24:29 +A,A*24:30 +A,A*24:31 +A,A*24:32 A,A*24:33 +A,A*24:35 +A,A*24:37 +A,A*24:38 +A,A*24:43 +A,A*24:47 +A,A*24:53 +A,A*24:56 +A,A*24:57 +A,A*24:58 +A,A*24:64 +A,A*24:72 +A,A*24:81 +A,A*24:90N +A,A*24:95 A,A*25:01 +A,A*25:02 A,A*26:01 +A,A*26:02 +A,A*26:03 +A,A*26:07 A,A*26:08 +A,A*26:09 +A,A*26:12 +A,A*26:13 +A,A*26:14 +A,A*26:15 +A,A*26:16 +A,A*26:17 +A,A*26:18 +A,A*26:20 +A,A*26:31 A,A*29:01 A,A*29:02 +A,A*29:04 +A,A*29:09 +A,A*29:10 +A,A*29:12 A,A*30:01 A,A*30:02 +A,A*30:03 A,A*30:04 +A,A*30:07 +A,A*30:08 +A,A*30:09 +A,A*30:10 +A,A*30:11 +A,A*30:15 +A,A*30:16 A,A*31:01 +A,A*31:02 +A,A*31:03 +A,A*31:04 +A,A*31:06 +A,A*31:08 +A,A*31:09 +A,A*31:11 +A,A*31:12 +A,A*31:13 +A,A*31:15 +A,A*31:16 +A,A*31:18 +A,A*31:20 +A,A*31:26 A,A*32:01 +A,A*32:02 +A,A*32:03 +A,A*32:04 +A,A*32:06 +A,A*32:07 +A,A*32:08 A,A*33:01 A,A*33:03 +A,A*33:05 +A,A*33:09 A,A*34:01 A,A*34:02 +A,A*34:05 A,A*36:01 +A,A*36:02 +A,A*36:03 +A,A*36:04 A,A*43:01 A,A*66:01 A,A*66:02 -A,A*68:01 +A,A*66:03 +A,A*66:04 A,A*68:01 A,A*68:02 A,A*68:03 A,A*68:05 +A,A*68:06 +A,A*68:07 +A,A*68:08 +A,A*68:10 +A,A*68:11N +A,A*68:12 +A,A*68:15 +A,A*68:17 +A,A*68:20 +A,A*68:22 +A,A*68:24 +A,A*68:25 +A,A*68:27 +A,A*68:30 +A,A*68:31 +A,A*68:35 +A,A*68:37 +A,A*68:40 A,A*69:01 A,A*74:01 +A,A*74:03 +A,A*74:05 +A,A*74:06 +A,A*74:09 +A,A*74:10 +A,A*74:11 +A,A*74:13 A,A*80:01 B,B*07:02 +B,B*07:03 B,B*07:04 B,B*07:05 B,B*07:06 +B,B*07:07 +B,B*07:08 +B,B*07:09 B,B*07:10 +B,B*07:12 +B,B*07:13 +B,B*07:14 +B,B*07:15 +B,B*07:16 +B,B*07:20 +B,B*07:23 +B,B*07:26 +B,B*07:36 +B,B*07:37 +B,B*07:42 +B,B*07:43 +B,B*07:46 +B,B*07:51 B,B*08:01 +B,B*08:02 +B,B*08:03 +B,B*08:04 +B,B*08:07 +B,B*08:09 +B,B*08:13 +B,B*08:18 +B,B*08:20 +B,B*08:23 +B,B*08:35 B,B*13:01 B,B*13:02 +B,B*13:03 +B,B*13:04 +B,B*13:08Q +B,B*13:10 +B,B*13:12 B,B*14:01 B,B*14:02 +B,B*14:03 +B,B*14:05 +B,B*14:09 B,B*15:01 +B,B*15:01N B,B*15:02 B,B*15:03 +B,B*15:04 +B,B*15:05 +B,B*15:06 B,B*15:07 B,B*15:08 B,B*15:09 @@ -72,21 +297,67 @@ B,B*15:10 B,B*15:11 B,B*15:12 B,B*15:13 +B,B*15:14 B,B*15:15 B,B*15:16 B,B*15:17 B,B*15:18 +B,B*15:20 B,B*15:21 +B,B*15:23 +B,B*15:24 B,B*15:25 +B,B*15:27 +B,B*15:28 +B,B*15:29 B,B*15:30 +B,B*15:31 B,B*15:32 +B,B*15:33 B,B*15:34 B,B*15:35 +B,B*15:37 +B,B*15:38 B,B*15:39 +B,B*15:40 +B,B*15:42 +B,B*15:44 +B,B*15:45 +B,B*15:46 +B,B*15:47 +B,B*15:48 +B,B*15:52 +B,B*15:53 +B,B*15:54 +B,B*15:55 +B,B*15:56 +B,B*15:58 +B,B*15:63 +B,B*15:65 +B,B*15:71 +B,B*15:73 +B,B*15:75 +B,B*15:82 +B,B*15:113 +B,B*15:125 +B,B*15:132 +B,B*15:141 B,B*18:01 B,B*18:02 B,B*18:03 B,B*18:04 +B,B*18:05 +B,B*18:06 +B,B*18:08 +B,B*18:11 +B,B*18:12 +B,B*18:13 +B,B*18:14 +B,B*18:18 +B,B*18:20 +B,B*18:25 +B,B*18:33 +B,B*27:01 B,B*27:02 B,B*27:03 B,B*27:04 @@ -94,185 +365,383 @@ B,B*27:05 B,B*27:06 B,B*27:07 B,B*27:08 +B,B*27:09 +B,B*27:10 +B,B*27:12 +B,B*27:14 +B,B*27:15 +B,B*27:17 +B,B*27:19 +B,B*27:20 +B,B*27:24 +B,B*27:27 B,B*35:01 B,B*35:02 B,B*35:03 +B,B*35:04 B,B*35:05 +B,B*35:06 B,B*35:08 +B,B*35:09 +B,B*35:10 +B,B*35:11 B,B*35:12 +B,B*35:13 B,B*35:14 +B,B*35:15 B,B*35:16 B,B*35:17 +B,B*35:18 +B,B*35:19 B,B*35:20 B,B*35:21 +B,B*35:22 +B,B*35:23 +B,B*35:24 +B,B*35:25 +B,B*35:26 +B,B*35:27 +B,B*35:28 +B,B*35:29 +B,B*35:30 +B,B*35:31 +B,B*35:32 +B,B*35:34 +B,B*35:37 +B,B*35:41 B,B*35:43 +B,B*35:47 +B,B*35:48 +B,B*35:49 +B,B*35:55 +B,B*35:77 B,B*37:01 +B,B*37:02 +B,B*37:03N +B,B*37:07 B,B*38:01 B,B*38:02 +B,B*38:06 +B,B*38:09 B,B*39:01 B,B*39:02 +B,B*39:03 +B,B*39:04 B,B*39:05 B,B*39:06 +B,B*39:07 B,B*39:08 +B,B*39:09 B,B*39:10 +B,B*39:11 +B,B*39:12 B,B*39:13 +B,B*39:14 B,B*39:15 +B,B*39:20 +B,B*39:22 B,B*39:24 +B,B*39:26 +B,B*39:31 +B,B*39:37 +B,B*39:39 B,B*40:01 B,B*40:02 +B,B*40:03 +B,B*40:04 B,B*40:05 B,B*40:06 +B,B*40:07 B,B*40:08 +B,B*40:09 B,B*40:10 +B,B*40:11 +B,B*40:12 +B,B*40:13 +B,B*40:16 +B,B*40:19 +B,B*40:20 +B,B*40:22N +B,B*40:23 +B,B*40:25 +B,B*40:26 B,B*40:27 +B,B*40:30 +B,B*40:32 +B,B*40:40 +B,B*40:43 +B,B*40:50 +B,B*40:64 +B,B*40:90 +B,B*40:94 +B,B*40:114 +B,B*40:129 B,B*41:01 B,B*41:02 B,B*41:03 +B,B*41:07 B,B*42:01 B,B*42:02 +B,B*42:05 B,B*44:02 B,B*44:03 -B,B*44:03 B,B*44:04 B,B*44:05 +B,B*44:06 +B,B*44:07 +B,B*44:08 +B,B*44:09 B,B*44:10 +B,B*44:12 +B,B*44:15 +B,B*44:18 +B,B*44:20 +B,B*44:21 +B,B*44:22 +B,B*44:23N +B,B*44:24 +B,B*44:25 B,B*44:27 +B,B*44:29 +B,B*44:50 B,B*45:01 B,B*46:01 +B,B*46:02 B,B*47:01 +B,B*47:02 +B,B*47:03 B,B*48:01 +B,B*48:02 B,B*48:03 +B,B*48:04 +B,B*48:05 +B,B*48:06 +B,B*48:07 +B,B*48:12 +B,B*48:13 B,B*49:01 B,B*50:01 B,B*50:02 +B,B*50:04 B,B*51:01 B,B*51:02 +B,B*51:03 +B,B*51:04 +B,B*51:05 B,B*51:06 B,B*51:07 B,B*51:08 B,B*51:09 +B,B*51:10 +B,B*51:11N +B,B*51:13 +B,B*51:14 +B,B*51:17 +B,B*51:19 +B,B*51:21 +B,B*51:22 +B,B*51:23 +B,B*51:29 +B,B*51:34 +B,B*51:40 +B,B*51:43 +B,B*51:49 +B,B*51:50 +B,B*51:55 B,B*52:01 -B,B*52:01 +B,B*52:02 +B,B*52:03 +B,B*52:04 +B,B*52:21 B,B*53:01 +B,B*53:02 +B,B*53:04 +B,B*53:05 +B,B*53:06 B,B*54:01 B,B*55:01 B,B*55:02 +B,B*55:04 +B,B*55:07 +B,B*55:12 +B,B*55:23 B,B*56:01 +B,B*56:02 +B,B*56:03 +B,B*56:04 +B,B*56:10 +B,B*56:15 +B,B*56:16 B,B*57:01 B,B*57:02 B,B*57:03 +B,B*57:04 +B,B*57:05 B,B*58:01 B,B*58:02 +B,B*58:04 +B,B*58:06 B,B*59:01 B,B*67:01 +B,B*67:02 B,B*73:01 B,B*78:01 +B,B*78:02 +B,B*78:04 B,B*81:01 +B,B*81:02 B,B*82:01 +B,B*82:02 C,C*01:02 +C,C*01:03 +C,C*01:06 +C,C*01:07 +C,C*01:08 +C,C*01:10 +C,C*01:17 +C,C*01:27 C,C*02:02 +C,C*02:04 +C,C*02:06 +C,C*02:07 +C,C*02:08 C,C*02:10 +C,C*02:14 +C,C*02:19 C,C*03:02 C,C*03:03 -C,C*03:03 -C,C*03:04 C,C*03:04 C,C*03:05 C,C*03:06 +C,C*03:07 +C,C*03:08 +C,C*03:09 +C,C*03:10 +C,C*03:14 +C,C*03:16 +C,C*03:17 +C,C*03:19 +C,C*03:35 +C,C*03:36 +C,C*03:40 +C,C*03:88 C,C*04:01 C,C*04:03 +C,C*04:04 +C,C*04:05 +C,C*04:06 C,C*04:07 -C,C*04:09 +C,C*04:08 +C,C*04:09N +C,C*04:10 +C,C*04:13 +C,C*04:19 +C,C*04:27 +C,C*04:82 C,C*05:01 -C,C*05:07 +C,C*05:04 +C,C*05:05 +C,C*05:07N +C,C*05:08 +C,C*05:09 +C,C*05:10 +C,C*05:14 C,C*06:02 +C,C*06:04 +C,C*06:06 +C,C*06:07 +C,C*06:08 +C,C*06:09 +C,C*06:27 C,C*07:01 C,C*07:02 C,C*07:04 +C,C*07:05 C,C*07:06 +C,C*07:07 +C,C*07:10 +C,C*07:104N +C,C*07:12 +C,C*07:13 +C,C*07:17 C,C*07:18 +C,C*07:19 +C,C*07:21 +C,C*07:22 +C,C*07:24 +C,C*07:25 +C,C*07:26 +C,C*07:27 +C,C*07:29 +C,C*07:32N +C,C*07:35 +C,C*07:40 +C,C*07:43 +C,C*07:46 +C,C*07:56 +C,C*07:60 +C,C*07:61N +C,C*07:66 +C,C*07:67 +C,C*07:72 C,C*08:01 C,C*08:02 C,C*08:03 C,C*08:04 +C,C*08:05 C,C*08:06 +C,C*08:10 +C,C*08:12 +C,C*08:13 +C,C*08:22 C,C*12:02 C,C*12:03 C,C*12:04 +C,C*12:05 +C,C*12:09 +C,C*12:12 +C,C*12:13 +C,C*12:16 C,C*14:02 C,C*14:03 +C,C*14:04 +C,C*14:05 +C,C*14:11 C,C*15:02 +C,C*15:03 C,C*15:04 C,C*15:05 C,C*15:06 +C,C*15:07 +C,C*15:08 +C,C*15:09 +C,C*15:11 +C,C*15:13 +C,C*15:22 C,C*16:01 C,C*16:02 C,C*16:04 C,C*17:01 +C,C*17:03 C,C*18:01 C,C*18:02 -DPB1,DPB1*01:01 -DPB1,DPB1*02:01 -DPB1,DPB1*02:02 -DPB1,DPB1*03:01 -DPB1,DPB1*04:01 -DPB1,DPB1*04:02 -DPB1,DPB1*05:01 -DPB1,DPB1*06:01 -DPB1,DPB1*105:01 -DPB1,DPB1*09:01 -DPB1,DPB1*10:01 -DPB1,DPB1*11:01 -DPB1,DPB1*13:01 -DPB1,DPB1*14:01 -DPB1,DPB1*15:01 -DPB1,DPB1*16:01 -DPB1,DPB1*17:01 -DPB1,DPB1*18:01 -DPB1,DPB1*19:01 -DPB1,DPB1*20:01 -DPB1,DPB1*21:01 -DPB1,DPB1*23:01 -DPB1,DPB1*30:01 -DPB1,DPB1*40:01 -DPB1,DPB1*85:01 -DQA1,DQA1*01:01 -DQA1,DQA1*01:02 -DQA1,DQA1*01:03 -DQA1,DQA1*01:04 -DQA1,DQA1*02:01 -DQA1,DQA1*03:01 -DQA1,DQA1*03:02 -DQA1,DQA1*03:03 -DQA1,DQA1*04:01 -DQA1,DQA1*05:01 -DQA1,DQA1*05:03 -DQA1,DQA1*05:05 -DQA1,DQA1*06:01 -DQB1,DQB1*02:01 -DQB1,DQB1*02:02 -DQB1,DQB1*03:01 -DQB1,DQB1*03:02 -DQB1,DQB1*03:03 -DQB1,DQB1*03:04 -DQB1,DQB1*03:05 -DQB1,DQB1*03:19 -DQB1,DQB1*04:01 -DQB1,DQB1*04:02 -DQB1,DQB1*05:01 -DQB1,DQB1*05:02 -DQB1,DQB1*05:03 -DQB1,DQB1*05:04 -DQB1,DQB1*06:01 -DQB1,DQB1*06:02 -DQB1,DQB1*06:03 -DQB1,DQB1*06:04 -DQB1,DQB1*06:09 DRB1,DRB1*01:01 DRB1,DRB1*01:02 DRB1,DRB1*01:03 +DRB1,DRB1*01:04 +DRB1,DRB1*01:07 +DRB1,DRB1*01:11 DRB1,DRB1*03:01 DRB1,DRB1*03:02 +DRB1,DRB1*03:03 +DRB1,DRB1*03:04 +DRB1,DRB1*03:05 +DRB1,DRB1*03:06 +DRB1,DRB1*03:07 +DRB1,DRB1*03:15 +DRB1,DRB1*03:16 +DRB1,DRB1*03:22 +DRB1,DRB1*03:23 DRB1,DRB1*04:01 DRB1,DRB1*04:02 DRB1,DRB1*04:03 @@ -281,40 +750,140 @@ DRB1,DRB1*04:05 DRB1,DRB1*04:06 DRB1,DRB1*04:07 DRB1,DRB1*04:08 +DRB1,DRB1*04:09 DRB1,DRB1*04:10 DRB1,DRB1*04:11 +DRB1,DRB1*04:12 +DRB1,DRB1*04:13 +DRB1,DRB1*04:14 +DRB1,DRB1*04:15 +DRB1,DRB1*04:16 +DRB1,DRB1*04:17 +DRB1,DRB1*04:18 +DRB1,DRB1*04:19 +DRB1,DRB1*04:22 +DRB1,DRB1*04:23 +DRB1,DRB1*04:25 +DRB1,DRB1*04:26 +DRB1,DRB1*04:33 +DRB1,DRB1*04:35 DRB1,DRB1*04:38 +DRB1,DRB1*04:40 +DRB1,DRB1*04:41 +DRB1,DRB1*04:50 +DRB1,DRB1*04:51 +DRB1,DRB1*04:54 DRB1,DRB1*07:01 +DRB1,DRB1*07:03 +DRB1,DRB1*07:05 +DRB1,DRB1*07:07 +DRB1,DRB1*07:11 +DRB1,DRB1*07:13 DRB1,DRB1*08:01 DRB1,DRB1*08:02 DRB1,DRB1*08:03 DRB1,DRB1*08:04 -DRB1,DRB1*08:04 +DRB1,DRB1*08:05 DRB1,DRB1*08:06 +DRB1,DRB1*08:07 +DRB1,DRB1*08:08 +DRB1,DRB1*08:09 DRB1,DRB1*08:10 DRB1,DRB1*08:11 +DRB1,DRB1*08:12 +DRB1,DRB1*08:13 DRB1,DRB1*08:14 +DRB1,DRB1*08:17 +DRB1,DRB1*08:18 +DRB1,DRB1*08:19 +DRB1,DRB1*08:20 +DRB1,DRB1*08:26 +DRB1,DRB1*08:28 DRB1,DRB1*09:01 +DRB1,DRB1*09:04 +DRB1,DRB1*09:06 DRB1,DRB1*10:01 DRB1,DRB1*11:01 -DRB1,DRB1*11:01 DRB1,DRB1*11:02 DRB1,DRB1*11:03 DRB1,DRB1*11:04 +DRB1,DRB1*11:05 DRB1,DRB1*11:06 +DRB1,DRB1*11:07 +DRB1,DRB1*11:08 DRB1,DRB1*11:09 DRB1,DRB1*11:10 +DRB1,DRB1*11:11 DRB1,DRB1*11:12 +DRB1,DRB1*11:13 DRB1,DRB1*11:14 DRB1,DRB1*11:15 +DRB1,DRB1*11:16 +DRB1,DRB1*11:17 +DRB1,DRB1*11:18 +DRB1,DRB1*11:19 +DRB1,DRB1*11:20 +DRB1,DRB1*11:24 +DRB1,DRB1*11:25 +DRB1,DRB1*11:28 +DRB1,DRB1*11:29 +DRB1,DRB1*11:32 +DRB1,DRB1*11:34 +DRB1,DRB1*11:36 +DRB1,DRB1*11:37 +DRB1,DRB1*11:39 +DRB1,DRB1*11:42 +DRB1,DRB1*11:43 +DRB1,DRB1*11:45 +DRB1,DRB1*11:47 +DRB1,DRB1*11:56 +DRB1,DRB1*11:66 +DRB1,DRB1*11:69 DRB1,DRB1*12:01 DRB1,DRB1*12:02 +DRB1,DRB1*12:04 +DRB1,DRB1*12:05 +DRB1,DRB1*12:10 DRB1,DRB1*13:01 DRB1,DRB1*13:02 DRB1,DRB1*13:03 DRB1,DRB1*13:04 DRB1,DRB1*13:05 +DRB1,DRB1*13:06 +DRB1,DRB1*13:07 +DRB1,DRB1*13:08 +DRB1,DRB1*13:09 DRB1,DRB1*13:10 +DRB1,DRB1*13:11 +DRB1,DRB1*13:12 +DRB1,DRB1*13:13 +DRB1,DRB1*13:14 +DRB1,DRB1*13:15 +DRB1,DRB1*13:16 +DRB1,DRB1*13:17 +DRB1,DRB1*13:18 +DRB1,DRB1*13:19 +DRB1,DRB1*13:20 +DRB1,DRB1*13:22 +DRB1,DRB1*13:25 +DRB1,DRB1*13:26 +DRB1,DRB1*13:27 +DRB1,DRB1*13:29 +DRB1,DRB1*13:31 +DRB1,DRB1*13:36 +DRB1,DRB1*13:37 +DRB1,DRB1*13:38 +DRB1,DRB1*13:39 +DRB1,DRB1*13:40 +DRB1,DRB1*13:41 +DRB1,DRB1*13:42 +DRB1,DRB1*13:49 +DRB1,DRB1*13:56 +DRB1,DRB1*13:59 +DRB1,DRB1*13:60 +DRB1,DRB1*13:76 +DRB1,DRB1*13:81 +DRB1,DRB1*13:82 DRB1,DRB1*14:01 DRB1,DRB1*14:02 DRB1,DRB1*14:03 @@ -322,26 +891,174 @@ DRB1,DRB1*14:04 DRB1,DRB1*14:05 DRB1,DRB1*14:06 DRB1,DRB1*14:07 +DRB1,DRB1*14:08 +DRB1,DRB1*14:09 +DRB1,DRB1*14:10 +DRB1,DRB1*14:11 +DRB1,DRB1*14:12 +DRB1,DRB1*14:13 +DRB1,DRB1*14:14 +DRB1,DRB1*14:15 +DRB1,DRB1*14:16 +DRB1,DRB1*14:17 DRB1,DRB1*14:18 +DRB1,DRB1*14:19 +DRB1,DRB1*14:20 +DRB1,DRB1*14:21 +DRB1,DRB1*14:22 +DRB1,DRB1*14:24 +DRB1,DRB1*14:25 +DRB1,DRB1*14:28 +DRB1,DRB1*14:29 +DRB1,DRB1*14:33 +DRB1,DRB1*14:48 DRB1,DRB1*14:54 +DRB1,DRB1*14:61 +DRB1,DRB1*14:70 DRB1,DRB1*15:01 DRB1,DRB1*15:02 DRB1,DRB1*15:03 +DRB1,DRB1*15:04 DRB1,DRB1*15:06 +DRB1,DRB1*15:07 +DRB1,DRB1*15:10 +DRB1,DRB1*15:11 +DRB1,DRB1*15:14 +DRB1,DRB1*15:18 +DRB1,DRB1*15:20 +DRB1,DRB1*15:22 +DRB1,DRB1*15:23 +DRB1,DRB1*15:24 +DRB1,DRB1*15:38 DRB1,DRB1*16:01 DRB1,DRB1*16:02 +DRB1,DRB1*16:04 +DRB1,DRB1*16:05 +DRB1,DRB1*16:07 +DRB1,DRB1*16:10 +DRB1,DRB1*16:12 DRB3,DRB3*01:01 -DRB3,DRB3*01:01 +DRB3,DRB3*01:02 +DRB3,DRB3*01:03 DRB3,DRB3*02:01 DRB3,DRB3*02:02 DRB3,DRB3*02:03 +DRB3,DRB3*02:06 +DRB3,DRB3*02:10 +DRB3,DRB3*02:11 +DRB3,DRB3*02:17 DRB3,DRB3*03:01 DRB4,DRB4*01:01 DRB4,DRB4*01:02 DRB4,DRB4*01:03 -DRB4,DRB4*01:03 -DRB5,DRB5*01:01 +DRB4,DRB4*01:03N +DRB4,DRB4*02:01N +DRB4,DRB4*03:01N DRB5,DRB5*01:01 DRB5,DRB5*01:02 +DRB5,DRB5*01:03 +DRB5,DRB5*01:08N +DRB5,DRB5*01:10N DRB5,DRB5*02:02 DRB5,DRB5*02:03 +DQA1,DQA1*01:01 +DQA1,DQA1*01:02 +DQA1,DQA1*01:03 +DQA1,DQA1*01:04 +DQA1,DQA1*01:05 +DQA1,DQA1*02:01 +DQA1,DQA1*03:01 +DQA1,DQA1*03:02 +DQA1,DQA1*03:03 +DQA1,DQA1*04:01 +DQA1,DQA1*05:01 +DQA1,DQA1*05:02 +DQA1,DQA1*05:03 +DQA1,DQA1*05:05 +DQA1,DQA1*05:08 +DQA1,DQA1*05:09 +DQA1,DQA1*06:01 +DQB1,DQB1*02:01 +DQB1,DQB1*02:02 +DQB1,DQB1*02:03 +DQB1,DQB1*03:01 +DQB1,DQB1*03:02 +DQB1,DQB1*03:03 +DQB1,DQB1*03:04 +DQB1,DQB1*03:05 +DQB1,DQB1*03:09 +DQB1,DQB1*03:13 +DQB1,DQB1*03:19 +DQB1,DQB1*04:01 +DQB1,DQB1*04:02 +DQB1,DQB1*05:01 +DQB1,DQB1*05:02 +DQB1,DQB1*05:03 +DQB1,DQB1*05:04 +DQB1,DQB1*06:01 +DQB1,DQB1*06:02 +DQB1,DQB1*06:03 +DQB1,DQB1*06:04 +DQB1,DQB1*06:05 +DQB1,DQB1*06:08 +DQB1,DQB1*06:09 +DQB1,DQB1*06:10 +DQB1,DQB1*06:11 +DPA1,DPA1*01:03 +DPA1,DPA1*01:04 +DPA1,DPA1*02:01 +DPA1,DPA1*02:02 +DPA1,DPA1*03:01 +DPA1,DPA1*04:01 +DPB1,DPB1*01:01 +DPB1,DPB1*02:01 +DPB1,DPB1*02:02 +DPB1,DPB1*03:01 +DPB1,DPB1*04:01 +DPB1,DPB1*04:02 +DPB1,DPB1*05:01 +DPB1,DPB1*06:01 +DPB1,DPB1*09:01 +DPB1,DPB1*10:01 +DPB1,DPB1*11:01 +DPB1,DPB1*13:01 +DPB1,DPB1*14:01 +DPB1,DPB1*15:01 +DPB1,DPB1*16:01 +DPB1,DPB1*17:01 +DPB1,DPB1*18:01 +DPB1,DPB1*19:01 +DPB1,DPB1*20:01 +DPB1,DPB1*21:01 +DPB1,DPB1*22:01 +DPB1,DPB1*23:01 +DPB1,DPB1*26:01 +DPB1,DPB1*27:01 +DPB1,DPB1*28:01 +DPB1,DPB1*29:01 +DPB1,DPB1*30:01 +DPB1,DPB1*31:01 +DPB1,DPB1*33:01 +DPB1,DPB1*34:01 +DPB1,DPB1*35:01 +DPB1,DPB1*36:01 +DPB1,DPB1*38:01 +DPB1,DPB1*39:01 +DPB1,DPB1*40:01 +DPB1,DPB1*45:01 +DPB1,DPB1*46:01 +DPB1,DPB1*47:01 +DPB1,DPB1*49:01 +DPB1,DPB1*51:01 +DPB1,DPB1*55:01 +DPB1,DPB1*57:01 +DPB1,DPB1*59:01 +DPB1,DPB1*63:01 +DPB1,DPB1*69:01 +DPB1,DPB1*72:01 +DPB1,DPB1*78:01 +DPB1,DPB1*81:01 +DPB1,DPB1*85:01 +DPB1,DPB1*104:01 +DPB1,DPB1*105:01 +DPB1,DPB1*106:01 diff --git a/pyard/__init__.py b/pyard/__init__.py index 39fe97a..a0d5ad3 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -27,7 +27,7 @@ from .misc import get_imgt_db_versions as db_versions __author__ = """NMDP Bioinformatics""" -__version__ = "1.0.8" +__version__ = "1.0.9" def init( diff --git a/pyard/ard.py b/pyard/ard.py index ca49965..12d6e94 100644 --- a/pyard/ard.py +++ b/pyard/ard.py @@ -258,7 +258,7 @@ def _redux_allele( return allele # If the 2 field reduction is unambiguous, reduce to 2 field level allele_2_fields = get_n_field_allele(allele, 2, preserve_expression=True) - if self._is_valid_allele(allele_2_fields): + if self._is_allele_in_db(allele_2_fields): return allele_2_fields else: # If ambiguous, reduce to G group level @@ -290,7 +290,7 @@ def _redux_allele( if allele in self.ars_mappings.g_group.values(): return allele - if self._is_valid_allele(allele): + if self._is_allele_in_db(allele): return allele else: raise InvalidAlleleError(f"{allele} is an invalid allele.") @@ -303,9 +303,9 @@ def _get_non_strict_allele(self, allele): @param allele: allele that might have non-strict version @return: non-strict version of the allele if it exists """ - if not self._is_valid_allele(allele): + if not self._is_allele_in_db(allele): for expr_char in expression_chars: - if self._is_valid_allele(allele + expr_char): + if self._is_allele_in_db(allele + expr_char): if self._config["verbose_log"]: print(f"{allele} is not valid. Using {allele}{expr_char}") allele = allele + expr_char @@ -559,7 +559,7 @@ def is_v2(self, allele: str) -> bool: # If the last field of the allele is alpha, check if it's a MAC if v3_format_allele.split(":").pop().isalpha(): return self.is_mac(v3_format_allele) - return self._is_valid_allele(v3_format_allele) + return self._is_allele_in_db(v3_format_allele) return False @@ -571,7 +571,7 @@ def _is_who_allele(self, allele): """ return allele in self.allele_group.who_alleles - def _is_valid_allele(self, allele): + def _is_allele_in_db(self, allele): """ Test if allele is valid in the current imgt database :param allele: Allele to test @@ -623,11 +623,11 @@ def _get_alleles(self, code, locus_antigen) -> Iterable[str]: else: alleles = [f"{locus_antigen}:{a}" for a in alleles] - return list(filter(self._is_valid_allele, alleles)) + return list(filter(self._is_allele_in_db, alleles)) def _get_alleles_from_serology(self, serology) -> Iterable[str]: alleles = db.serology_to_alleles(self.db_connection, serology) - return filter(self._is_valid_allele, alleles) + return filter(self._is_allele_in_db, alleles) @staticmethod def _combine_with_colon(digits_field): @@ -716,24 +716,33 @@ def _is_valid(self, allele: str) -> bool: and not self.is_v2(allele) and not self.is_shortnull(allele) ): - # Alleles ending with P or G are valid_alleles - if allele.endswith(("P", "G")): - # remove the last character - allele = allele[:-1] - # validate format: there are no empty fields eg, 2 :: together - if "*" in allele: - _, fields = allele.split("*") - if not all(map(str.isalnum, fields.split(":"))): - return False - # The allele is valid as whole or as a 2 field version - if self._is_valid_allele(allele): - return True - else: - allele = get_2field_allele(allele) - return self._is_valid_allele(allele) + return self._is_valid_allele(allele) return True + def _is_valid_allele(self, allele): + """ + Is the given allele valid? + + @param allele: + @return: + """ + # Alleles ending with P or G are valid_alleles + if allele.endswith(("P", "G")): + # remove the last character + allele = allele[:-1] + # validate format: there are no empty fields eg, 2 :: together + if "*" in allele: + _, fields = allele.split("*") + if not all(map(str.isalnum, fields.split(":"))): + return False + # The allele is valid as whole or as a 2 field version + if self._is_allele_in_db(allele): + return True + else: + allele = get_2field_allele(allele) + return self._is_allele_in_db(allele) + def _is_valid_gl(self, glstring: str) -> bool: """ Determines validity of glstring @@ -815,16 +824,38 @@ def lookup_mac(self, allelelist_gl: str): raise InvalidMACError(f"{allelelist_gl} does not have a MAC.") def cwd_redux(self, allele_list_gl): - lgx_redux = self.redux(allele_list_gl, "lgx") + """ + Reduce alleles from allele_list_gl to a list that + consists of only ones appearing in CWD 2 + + If it's a MAC, use the expanded list to compare with CWD list + if it's an allele(may have null), use the allele. + + @param allele_list_gl: allele, allele list or MAC + @return: CWD alleles as an allele list GL String + """ + alleles = [] + for allele in allele_list_gl.split("/"): + if self.is_mac(allele): + alleles.extend(self.expand_mac(allele).split("/")) + elif is_2_field_allele(allele) and not self.is_mac(allele): + alleles.append(allele) + else: + alleles.extend(self.redux(allele, "lgx").split("/")) + + # get the CWD for the locus and find the containing CWD alleles locus = allele_list_gl.split("*")[0] if HLA_regex.search(locus): locus = locus.split("-")[1] ciwd_for_locus = db.load_cwd(self.db_connection, locus) - lgx_redux_alleles = set(lgx_redux.split("/")) - alleles_in_ciwd = ciwd_for_locus.intersection(lgx_redux_alleles) - sorted_alleles = sorted( - alleles_in_ciwd, key=functools.cmp_to_key(self.smart_sort_comparator) - ) + + alleles_in_ciwd = ciwd_for_locus.intersection(alleles) + sorted_alleles = sorted(alleles_in_ciwd) + # TODO: doesn't sort when compared with sorting with null + # E.g. B*15:01/B*15:01N + # sorted_alleles = sorted( + # alleles_in_ciwd, key=functools.cmp_to_key(self.smart_sort_comparator) + # ) return "/".join(sorted_alleles) def v2_to_v3(self, v2_allele) -> str: diff --git a/setup.cfg b/setup.cfg index d178781..0c1b534 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.0.8 +current_version = 1.0.9 commit = True tag = True diff --git a/setup.py b/setup.py index 86281d2..858feab 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ setup( name="py-ard", - version="1.0.8", + version="1.0.9", description="ARD reduction for HLA with Python", long_description=readme, long_description_content_type="text/markdown", diff --git a/tests/features/cwd.feature b/tests/features/cwd.feature index 73e8b40..d70d312 100644 --- a/tests/features/cwd.feature +++ b/tests/features/cwd.feature @@ -10,21 +10,20 @@ Feature: CWD Reduction Given the MAC Code we want to find CWD of is "A*26:CBJTR" When we reduce MAC code to lgx and find CWD alleles in the expansion - Then the CWD alleles should be "A*25:01/A*26:01" - And the MAC Code for CWD alleles should be "A*25:BYHR" + Then the CWD alleles should be "A*25:01/A*26:01/A*26:02/A*26:09/A*26:15/A*26:17/A*26:20" Scenario: Using Allele list GL String of 1 CWD allele Given the GL String we want to find CWD of is "B*08:01/B*08:05/B*08:08N/B*08:10/B*08:15/B*08:18/B*08:19N/B*08:22/B*08:24/B*08:27/B*08:30N" When we find CWD alleles for the GL String - Then the CWD alleles should be "B*08:01" + Then the CWD alleles should be "B*08:01/B*08:18" Scenario: Using Allele list GL String of 2 CWD allele Given the GL String we want to find CWD of is "B*15:01:01/B*15:01:03/B*15:04/B*15:07/B*15:26N/B*15:27" When we find CWD alleles for the GL String - Then the CWD alleles should be "B*15:01/B*15:07" + Then the CWD alleles should be "B*15:01/B*15:04/B*15:07/B*15:27" Scenario: Using Allele list GL String that also has a MAC mapping @@ -32,3 +31,17 @@ Feature: CWD Reduction When we find CWD alleles for the GL String Then the CWD alleles should be "A*01:01/A*01:02/A*01:03" And the MAC Code for CWD alleles should be "A*01:MN" + + Scenario Outline: CWD Alleles with Nulls + + Given the GL String we want to find CWD of is "" + When we find CWD alleles for the GL String + Then the CWD alleles should be "" + Examples: + | gl_string | cwd_list | + | A*03:01/A*03:01N | A*03:01 | + | C*04:09N | C*04:09N | + | C*04:01:01 | C*04:01 | + | C*04:KBG | C*04:01/C*04:09N | + | C*04:01:01G/C*04:09N | C*04:01/C*04:09N | + | B*15:01/B*15:01N/B*15:102/B*15:104 | B*15:01/B*15:01N | diff --git a/tests/steps/cwd.py b/tests/steps/cwd.py index e28ef64..6dedc09 100644 --- a/tests/steps/cwd.py +++ b/tests/steps/cwd.py @@ -30,4 +30,4 @@ def step_impl(context, gl_string): @when("we find CWD alleles for the GL String") def step_impl(context): - context.cwd = context.ard.cwd_redux(context.gl_string) + context.cwd = context.ard_non_strict.cwd_redux(context.gl_string)