From bb52ef64c1498de8f7d8a9fd4ac561c3b8baf16e Mon Sep 17 00:00:00 2001 From: Anuradha Date: Mon, 24 Apr 2023 06:23:27 +0000 Subject: [PATCH 1/2] Adding test data --- README.md | 7 +- examples/test-data/GUIDE.md | 31 + examples/test-data/chr1.vcf.gz | Bin 0 -> 4838 bytes examples/test-data/chr1.vcf.gz.tbi | Bin 0 -> 149 bytes examples/test-data/submission.json | 1069 ++++++++++++++++++++++++++++ 5 files changed, 1106 insertions(+), 1 deletion(-) create mode 100644 examples/test-data/GUIDE.md create mode 100644 examples/test-data/chr1.vcf.gz create mode 100644 examples/test-data/chr1.vcf.gz.tbi create mode 100644 examples/test-data/submission.json diff --git a/README.md b/README.md index 0bd9dd3..f912e85 100644 --- a/README.md +++ b/README.md @@ -155,7 +155,7 @@ module "serverless-beacon" { region = "REGION" } ``` -Please refer to [./examples](./examples) to find a minimal and a complete setup. +Please refer to [./examples/minimum/](./examples/minimum/) or [./examples/full](./examples/full) to find a minimal and a complete setup. ## Development All the layers needed for the program to run are in layers folder. To add a new layer for immediate use with additional configs, run the following commands. Once the decision to use the library is finalised update the `init.sh` script to automate the process. @@ -188,6 +188,10 @@ Please make a copy of `backend.tf.template` with suited parameters and rename as ## API +### Example data + +Please find the data in [./examples/test-data/](./examples/test-data/) and use the [./examples/test-data/GUIDE.md](./examples/test-data/GUIDE.md) to try the provided test data. + ### Data ingestion API Use the following schemas for data submission @@ -224,3 +228,4 @@ $ ./init.sh -msse4.2 -O3 ### Provider produced inconsistent final plan If `terraform apply --auto-approve` complaints about a provider error. Please retry. If the issue persists, please raise an issue with the complete terraform log. + diff --git a/examples/test-data/GUIDE.md b/examples/test-data/GUIDE.md new file mode 100644 index 0000000..a5c1f63 --- /dev/null +++ b/examples/test-data/GUIDE.md @@ -0,0 +1,31 @@ +# Getting started with test data + +Please ensure you first upload the `chr1.vcf.gz` and `chr1.vcf.gz.tbi` files to an S3 bucket that is accessible from the sBeacon deployment account. Obtain the S3 URI for the `chr1.vcf.gz` from the uploaded desitation. Note that, both `vcf.gz` and `vcf.gz.tbi` files must have the same prefix in S3 for this to work. + +Now edit the `submission.json` file such that they match the S3 URI of the `vcf.gz` file. + +```json +... + "vcfLocations": [ + "s3:////chr1.vcf.gz" + ] +... +``` + +You can submit the data in two ways. + +### Submission as request body + +You can simply copy the edited JSON content in to the API gateway `/submit` POST endpoint. If you're using a REST client make sure you add authorization headers before you make the request. For example, Postman supports Authorization type AWS Signature and there you can enter AWS Keys. + +### Submission as an S3 payload + +Alternatively, you can upload edited `submission.json` file to an S3 location accessible from deployment. Then you can use the file's S3 URI as follows in the API Gateway or in your REST client. + +```json +{ + "s3Payload": "s3:////submission.json" +} +``` + +This approach is recommended for larger submissions with thousands of metadata entries. \ No newline at end of file diff --git a/examples/test-data/chr1.vcf.gz b/examples/test-data/chr1.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..e410ba518be740e8cb4a45b34ef007a727dd2d32 GIT binary patch literal 4838 zcma*rXH*kho(6CXp-L|b3L-^HsCg-&SC!s7211eEL8K=jC`uLS2m}P_Efhg%LJSB9 zh%`eFAd%i{fQ++e&z{+to!z}3?)^RIKKIl8_UB5V1d{xH5y?r^H2@_2Of`91ro10V zKdS^HFynJ^D-}`38@3E4zZ@MjSnGk|Edj{#ZkomTg(a|;Of%wQESLTEkd=26wyjc$ z)Izsr8~N#x_lpI1IEP_;-&sIzok3?_LD#^Yi~Sj2{LL(M<4jtYU4-n%HF|OaSA0Md zhq%S^+2+dB)#Eos-@L zT$!=8z9786wGX8}C=5#p-C65u4Uj`vFEzkXDMyYFIGw+w9ZT9?^=IzlGcRd)r3@8e zgUWFGITp#%ST=D$m6a@9DtA$%l*?R9CM%HmJMqTo=%UE7V2^z9J@746ZJ#HuP4Ouo zzC|y6QfNs|k%>x^^p-Eyk7x?GTRk~vJALX_@jSD2bl6QZk(>@N^pl-dL>Vf0uO!tL z!@aG~F`RAjy_x&V9F@8i*<{7GUk@@$Qo++JSQcmG68l8wsVy*8lid;!yD`nUBo?G6 z=x#Qp$q-yY<`<971o>y(8cf6mcq09?ctmIAXC`9%lq$)#P4{Z~6}OGtquabG_F7d~ zDfu(D!-&iG9#SgWDFJSkzW?p9@YPS9Twk0Mm0UQ98n(Kbm71Q2UViXZFo5$3MbTUS z$FZ2_OFS=_!1kZu?)OEwm)II(Ulz+O(?iW9huPdoV&ZtkG}P`&dF{==G@{WW7m25J zWdQ&xxGjn(>+gO@_U2Uw*q1bX`HO{wsnHF>58+`Ud9&eD?ZKx-4CGWH$xFTJ`^}_6 z!Zt+7{;f!+mi;wY)sx4^ukeE2;t?xE@=w$id%5-JU?cq^F9m@1pE8nAUrWb{!N;Q< zNt{)g2t|S?h@qVmDSO0=XPv7x#&E0oW0^mg}$QRz*0$@L@oQDs9529J7G5NfL zl&#(C5HWQ;pk}F@iuzhb9v@3FdvZ&W@{1OXIqnTqVbEwnjK0T7+Nhgra z7cWI?7|r*2Ru6+w_7L9eE88UYiSldjIN9)8 z6bY-fKkt3;Rfi$%mIHX2@Iuz{&0(kcqtOcwt^4q1b4e(29_%8;D+bGUXn~#@f*$yx z$0vsf!Ak*ri9f|e*9{&buD-2B?1{F)bLMXjZn`L+$>fXQ z99a%H_Y^$BZ8nG=6=kzrRUMa)K=O^n6Z%8aeQDcU?{L`Wa~_GSwDiHg4LGiKeOpao z?|h2`jX8dZr-Io*k==BA>zB67G@XaRFPGbzcI!n$F8|79Is2>aR^4hb{%q4hZdd^8 z@+tWuYrC=z)38aW(BZ-e8S;SqN~5ZLnvU|4f+Z$2@KWvQKFhk8OkLg?+&hh5iqvMz zO5f2PXe|kYnPBc65#Gp)xzA@w`D+t8OOkXyo!sN>Rs*Q3g3|$v3=LFwuccz|zfXxERk2 z-2>Lj{;zxmtOa&wXf5h6K>yB+GZPc8f5`uidW$+t(0*VoUw6iT(cJVsi#gEpuV`Uy zzkAHBZ~m3%XKEL3Cs@aUnnPGGw6oN?xlJI-c^^!HL)>=WG1 zLZT)QkF>0H($bA>O)`J*JDVZ@Tp3w@(`6d9$VJ7tL+^U-(2!g5&O!}1C!Qq zTH>OJO)~W8OS?QRd7+3+HT39DQ#vatRm5f(dJLLIhHANgBb~^JOP^FU>i83eNhk8- z(vKC5F8_q@8f_YgtF_}wnBlH}D-3ZF2_T?-6P6y=+a$XhZV6qQUK5UJWa?~HCYsen z`aivN5FPSdOTbo!fHG~9TvC=Ph%0I`y(d|9XMBTM2U%E+7m|-t4xD8Q#4<5%&>L9K zP7ZvdIo5V#b!<^!s)zl@6 z5SUWRV6)AZka(?+&xnCIx$DL&Yo)z2zMb-?C@k!wO=qHoxNei0bQz*cX=7q7?9-i& zsVc`aGexs-QH9$b@&~Y5{A-2P?1dBo%y|~|cSy=XSl9EDeyO%?SpF9Mi_00D@~xdZ z2vUsDaudCf(wN}!ooUY6;i$YHsnhYbl6y@=oMM+Zx$3xo9%M`=)?aI9RX z(#h~h#IDc=Oiw7qKbPn2YPTNi!?zO+1m7IXW6H}gqnuyj^++7_^q_B#=H$$bN->e^ zt!4>B*2L7iY-#3k-MNB%%Xa&6?td%5SD~&&2mOjLW7on0nQ-W&ApGDU94~r+HBG(D*+LVW z%euB7?)+A+g(wIfto3YM1|_a2u2i)bd;iFplhJ(#9zKt0V~IJWk}Qn)kS%^8^x5Be z!%q}HR*kTcjOZZN;)azMuJS-gU!tJ-IbNlVdVuIq3B4=CwVy}GwYP4->;)hdx`l${B*-qGmjIeR3Iq^KifLoX+hQ=?>=h^-gBTIoz?j3 zUOkxJni_kOiOpi5Ia)s!^!zRzzeEcJF90R+v9UWp*+6O~tZ&+_{6^!LqzP zVdwMOvV7WN@T^ccv`qFQIoiK3rkNwYe;H0(Sw7iWNa><6B%gwWbH#;kDpXym9q3$C z9k$p6j1!mbR7w#?_cJq7IitwlLli>IcQpFOyRcHA%g`EE=#g zO3MZt#|%ov?0hNtVI^4x`Z9BC(8dWT;>L(-MaUlyE%n5IUgD{5S%r424UzS9p;{J6 z>1tM+^S8JhaF3Cp6hfGhP+l_r^YL39^+t$aCC-cy zlYQnM=t!-frLY_ZuJ_MqC2spfIh;}r*VH?-?`^*%ZQ9^!zgCjhh#G5~C!uszESb5` zEmOY=@{0g#RoIix?n>-tt0ER1w7nLLha#tTQ$2* zK_PJc5IAcbrHXg6L!b}$yG~*tALG>&ehS2@(8a-;$s_x@m;A!>rWMSbbn5&%!bIjg zaGTdW7U7mZ;YXi;kZ`X1)0~SEYMish$@Yr~(xQAFN-AjIN8qv7B&JwsP{wO(_7p8SJ3DHHX(B11w9paOe@ zJd1M+Dw+4smIEJqXIvlEiEaTszaOLVZSf6;#>X@0UZzRGYz=yy$fq6^&!SHD{uzxE zEqP4TO`#qyVG_rtVBT<%KyR5s&@@%}vaM?TSMpZOYo@|vu<*-_5HiXJ)=5b8S`AIK ztt)td7VRK_Iq7Mkw1%+iLchZT8>NUj^_p%3ZM1rrgSTr>9Dru(&Rvy7ImV2YR&izmfW4)5 zX+GiLKnK{si<1CK zkbXZ}oy(K<%4hib&;wzY@Hn58ytx;g?c+AA6=M+)(WY_1MN|5Cs?e!LU92Pxr+>YRc1!%(ci)~5xnIUue*3}|pFAC^vjRqr z7Wzs&c;MJYl+@^sqhotFje=#@Pmeon_C03qz4I16>VbUKLi^ilpUIQpS;gbc*#898Af!9kjs=I1W1!m;g921k8$x!D(s$|IBMy`e1)BpyE!=toO zd{&~u)~Ttb&;7k9y5ttJNCU~DW5sW_{3{eAI@xVz*=@0x{j<(&c0y0nea1zv%erY> zt6Z1?6eZM7=~c|SbLQx!Gg`)5)NL54RPrXyAJGKMFd^!f=#|m?Xuwqso$^tWI$0!B ziL`{ue3Aa42DjE3{;2P_Z*UHBxMP4__Xk(O!nU_%?BUdV{BB?F`Gzu@NLsWM5;1H` TLc;Y=ldRh$r2jG4`up}bV|#ph literal 0 HcmV?d00001 diff --git a/examples/test-data/chr1.vcf.gz.tbi b/examples/test-data/chr1.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..691236597cd5646bd27c65c8ca3c1c073f8885f4 GIT binary patch literal 149 zcmb2|=3rp}f&Xj_PR>jW6%5>kpHfm%5)u-ak|cPUP6bFEF;7Wgo;1@*KveLXz)ab5 z0=Ke-Ttt25G(Man&~YWjfi*JVuR*QYIsq$bExyOUk2<`NSz*z0UYvVj@xn(2c4zED jRm^)zq-=XG9u{C@Ft+P9{>R9`AdhCTGy^l(9UuY#wuLLG literal 0 HcmV?d00001 diff --git a/examples/test-data/submission.json b/examples/test-data/submission.json new file mode 100644 index 0000000..c2ee649 --- /dev/null +++ b/examples/test-data/submission.json @@ -0,0 +1,1069 @@ +{ + "datasetId": "UNQ_1", + "dataset": { + "id": "UNQ_1", + "createDateTime": "2021-03-21T02:37:00-08:00", + "dataUseConditions": { + "duoDataUse": [ + { + "id": "DUO:0000042", + "label": "general research use", + "version": "17-07-2016" + } + ] + }, + "description": "Simulation set 1.", + "externalUrl": "http://example.org/wiki/Main_Page", + "info": {}, + "name": "Dataset with fake data", + "updateDateTime": "2022-08-05T17:21:00+01:00", + "version": "v1.1" + }, + "assemblyId": "GRCH38", + "cohortId": "UNQ_1", + "cohort": { + "id": "UNQ_1", + "cohortDataTypes": [ + { + "id": "OMIABIS:0000060", + "label": "survey data" + }, + { + "id": "OBI:0000070", + "label": "genotyping assay" + } + ], + "cohortDesign": { + "id": "orcid:0000-0003-3463-0775" + }, + "cohortSize": -1, + "cohortType": "beacon-defined", + "name": "CGG group" + }, + "vcfLocations": [ + "s3:////chr1.vcf.gz" + ], + "individuals": [ + { + "id": "UNQ_1-1", + "ethnicity": { + "id": "SNOMED:52075006", + "label": "Congolese" + }, + "geographicOrigin": { + "id": "SNOMED:223688001", + "label": "United States of America" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C79426" + } + }, + { + "procedureCode": { + "id": "NCIT:C64264" + } + } + ], + "karyotypicSex": "XXY", + "sex": { + "id": "SNOMED:407378000", + "label": "Surgically transgendered transsexual, male-to-female" + } + }, + { + "id": "UNQ_1-2", + "diseases": [ + { + "diseaseCode": { + "id": "SNOMED:734099007", + "label": "Neuroblastoma of central nervous system" + } + }, + { + "diseaseCode": { + "id": "SNOMED:135811000119107", + "label": "Lewy body dementia with behavioral disturbance (disorder)" + } + }, + { + "diseaseCode": { + "id": "SNOMED:23853001", + "label": "Disorder of the central nervous system" + } + } + ], + "ethnicity": { + "id": "SNOMED:12556008", + "label": "Tamils" + }, + "geographicOrigin": { + "id": "SNOMED:223688001", + "label": "United States of America" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C79426" + } + }, + { + "procedureCode": { + "id": "NCIT:C64264" + } + } + ], + "karyotypicSex": "XXYY", + "sex": { + "id": "SNOMED:407378000", + "label": "Surgically transgendered transsexual, male-to-female" + } + }, + { + "id": "UNQ_1-3", + "diseases": [ + { + "diseaseCode": { + "id": "SNOMED:26929004", + "label": "Alzheimer's disease" + } + }, + { + "diseaseCode": { + "id": "SNOMED:23853001", + "label": "Disorder of the central nervous system" + } + }, + { + "diseaseCode": { + "id": "SNOMED:359642000", + "label": "Diabetes mellitus type 2 in nonobese (disorder)" + } + } + ], + "ethnicity": { + "id": "SNOMED:113170005", + "label": "Aymara" + }, + "geographicOrigin": { + "id": "SNOMED:223688001", + "label": "United States of America" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C79426" + } + }, + { + "procedureCode": { + "id": "NCIT:C64263" + } + } + ], + "karyotypicSex": "XXX", + "sex": { + "id": "SNOMED:407374003", + "label": "Transsexual" + } + }, + { + "id": "UNQ_1-4", + "ethnicity": { + "id": "SNOMED:10432001", + "label": "Onge" + }, + "geographicOrigin": { + "id": "SNOMED:223600005", + "label": "India" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C79426" + } + }, + { + "procedureCode": { + "id": "NCIT:C64264" + } + } + ], + "karyotypicSex": "XYY", + "sex": { + "id": "SNOMED:407377005", + "label": "Female-to-male transsexual" + } + }, + { + "id": "UNQ_1-5", + "diseases": [ + { + "diseaseCode": { + "id": "SNOMED:254955001", + "label": "Pituitary carcinoma" + } + } + ], + "ethnicity": { + "id": "SNOMED:12556008", + "label": "Tamils" + }, + "geographicOrigin": { + "id": "SNOMED:223498002", + "label": "Africa" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C64263" + } + }, + { + "procedureCode": { + "id": "NCIT:C64264" + } + } + ], + "karyotypicSex": "XXXX", + "sex": { + "id": "SNOMED:407374003", + "label": "Transsexual" + } + }, + { + "id": "UNQ_1-6", + "diseases": [ + { + "diseaseCode": { + "id": "SNOMED:56265001", + "label": "Heart disease (disorder)" + } + } + ], + "ethnicity": { + "id": "SNOMED:17789004", + "label": "Papuans" + }, + "geographicOrigin": { + "id": "SNOMED:223713009", + "label": "Argentina" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C93025" + } + } + ], + "karyotypicSex": "XX", + "sex": { + "id": "SNOMED:248152002", + "label": "Female" + } + }, + { + "id": "UNQ_1-7", + "ethnicity": { + "id": "SNOMED:77502007", + "label": "Atacamenos" + }, + "geographicOrigin": { + "id": "SNOMED:223498002", + "label": "Africa" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C79426" + } + } + ], + "karyotypicSex": "XXXY", + "sex": { + "id": "SNOMED:407377005", + "label": "Female-to-male transsexual" + } + }, + { + "id": "UNQ_1-8", + "diseases": [ + { + "diseaseCode": { + "id": "SNOMED:359642000", + "label": "Diabetes mellitus type 2 in nonobese (disorder)" + } + }, + { + "diseaseCode": { + "id": "SNOMED:312991009", + "label": "Senile dementia of the Lewy body type (disorder)" + } + }, + { + "diseaseCode": { + "id": "SNOMED:81531005", + "label": "Diabetes mellitus type 2 in obese (disorder)" + } + } + ], + "ethnicity": { + "id": "SNOMED:89026003", + "label": "Alacaluf" + }, + "geographicOrigin": { + "id": "SNOMED:223498002", + "label": "Africa" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C64263" + } + } + ], + "karyotypicSex": "XX", + "sex": { + "id": "SNOMED:407378000", + "label": "Surgically transgendered transsexual, male-to-female" + } + }, + { + "id": "UNQ_1-9", + "diseases": [ + { + "diseaseCode": { + "id": "SNOMED:26929004", + "label": "Alzheimer's disease" + } + }, + { + "diseaseCode": { + "id": "SNOMED:81531005", + "label": "Diabetes mellitus type 2 in obese (disorder)" + } + }, + { + "diseaseCode": { + "id": "SNOMED:135811000119107", + "label": "Lewy body dementia with behavioral disturbance (disorder)" + } + } + ], + "ethnicity": { + "id": "SNOMED:10292001", + "label": "Guamians" + }, + "geographicOrigin": { + "id": "SNOMED:223498002", + "label": "Africa" + }, + "karyotypicSex": "XXXX", + "sex": { + "id": "SNOMED:407377005", + "label": "Female-to-male transsexual" + } + }, + { + "id": "UNQ_1-10", + "ethnicity": { + "id": "SNOMED:76460008", + "label": "Yanomama" + }, + "geographicOrigin": { + "id": "SNOMED:223688001", + "label": "United States of America" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C64264" + } + } + ], + "karyotypicSex": "XXXY", + "sex": { + "id": "SNOMED:248153007", + "label": "Male" + } + } + ], + "biosamples": [ + { + "id": "UNQ_1-1", + "individualId": "UNQ_1-1", + "biosampleStatus": { + "id": "SNOMED:365641003", + "label": "Minor blood groups - finding" + }, + "collectionDate": "2019-04-23", + "collectionMoment": "P32Y6M1D", + "histologicalDiagnosis": { + "id": "SNOMED:719046005", + "label": "12q14 microdeletion syndrome" + }, + "obtentionProcedure": { + "procedureCode": { + "id": "NCIT:C157179", + "label": "FGFR1 Mutation Analysis" + } + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48725", + "label": "T2a Stage Finding" + } + ], + "sampleOriginDetail": { + "id": "SNOMED:258497007", + "label": "Abscess swab" + }, + "sampleOriginType": { + "id": "SNOMED:31675002", + "label": "Capillary blood" + }, + "tumorProgression": { + "id": "NCIT:C84509", + "label": "Primary Malignant Neoplasm" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-2", + "individualId": "UNQ_1-2", + "biosampleStatus": { + "id": "SNOMED:702782002", + "label": "Mitochondrial 1555 A to G mutation positive" + }, + "collectionDate": "2022-04-23", + "collectionMoment": "P32Y6M1D", + "histologicalDiagnosis": { + "id": "SNOMED:771439009", + "label": "14q22q23 microdeletion syndrome" + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48699", + "label": "M0 Stage Finding" + } + ], + "sampleOriginDetail": { + "id": "SNOMED:734336008", + "label": "Specimen from aorta" + }, + "sampleProcessing": { + "id": "SNOMED:18809007", + "label": "Meckel's ganglionectomy" + }, + "tumorGrade": { + "id": "NCIT:C28080", + "label": "Grade 3a" + }, + "info": {}, + "notes": "", + "sampleOriginType": { + "id": "SNOMED:31675002", + "label": "Capillary blood" + } + }, + { + "id": "UNQ_1-3", + "individualId": "UNQ_1-3", + "biosampleStatus": { + "id": "SNOMED:702782002", + "label": "Mitochondrial 1555 A to G mutation positive" + }, + "collectionDate": "2021-04-23", + "collectionMoment": "P32Y6M1D", + "histologicalDiagnosis": { + "id": "SNOMED:771439009", + "label": "14q22q23 microdeletion syndrome" + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48725", + "label": "T2a Stage Finding" + } + ], + "sampleOriginType": { + "id": "SNOMED:702451000", + "label": "Cultured cells" + }, + "sampleProcessing": { + "id": "SNOMED:18809007", + "label": "Meckel's ganglionectomy" + }, + "tumorGrade": { + "id": "NCIT:C28080", + "label": "Grade 3a" + }, + "tumorProgression": { + "id": "NCIT:C4813", + "label": "Recurrent Malignant Neoplasm" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-4", + "individualId": "UNQ_1-4", + "biosampleStatus": { + "id": "SNOMED:365641003", + "label": "Minor blood groups - finding" + }, + "collectionDate": "2021-04-23", + "collectionMoment": "P7D", + "histologicalDiagnosis": { + "id": "SNOMED:771439009", + "label": "14q22q23 microdeletion syndrome" + }, + "obtentionProcedure": { + "procedureCode": { + "id": "NCIT:C157179", + "label": "FGFR1 Mutation Analysis" + } + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48725", + "label": "T2a Stage Finding" + } + ], + "sampleOriginDetail": { + "id": "SNOMED:258603007", + "label": "Respiratory specimen" + }, + "sampleOriginType": { + "id": "SNOMED:782814004", + "label": "Cultured autograft of skin" + }, + "sampleProcessing": { + "id": "SNOMED:18809007", + "label": "Meckel's ganglionectomy" + }, + "tumorGrade": { + "id": "NCIT:C28080", + "label": "Grade 3a" + }, + "tumorProgression": { + "id": "NCIT:C84509", + "label": "Primary Malignant Neoplasm" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-5", + "individualId": "UNQ_1-5", + "biosampleStatus": { + "id": "SNOMED:310294002", + "label": "Mitochondrial antibodies positive" + }, + "collectionDate": "2022-04-23", + "collectionMoment": "P7D", + "histologicalDiagnosis": { + "id": "SNOMED:362965005", + "label": "Disorder of body system (disorder)" + }, + "pathologicalStage": { + "id": "NCIT:C27977", + "label": "Stage IIIA" + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48725", + "label": "T2a Stage Finding" + } + ], + "sampleOriginDetail": { + "id": "SNOMED:258500001", + "label": "Nasopharyngeal swab" + }, + "sampleOriginType": { + "id": "SNOMED:782814004", + "label": "Cultured autograft of skin" + }, + "sampleProcessing": { + "id": "SNOMED:72019009", + "label": "Mechanical vitrectomy by posterior approach" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-6", + "individualId": "UNQ_1-6", + "biosampleStatus": { + "id": "SNOMED:276447000", + "label": "Mite present" + }, + "collectionDate": "2018-04-23", + "collectionMoment": "P32Y6M1D", + "histologicalDiagnosis": { + "id": "SNOMED:719046005", + "label": "12q14 microdeletion syndrome" + }, + "obtentionProcedure": { + "procedureCode": { + "id": "NCIT:C15189", + "label": "biopsy" + } + }, + "sampleOriginType": { + "id": "SNOMED:782814004", + "label": "Cultured autograft of skin" + }, + "sampleProcessing": { + "id": "SNOMED:87021001", + "label": "Mechanical vitrectomy by pars plana approach" + }, + "tumorGrade": { + "id": "NCIT:C28080", + "label": "Grade 3a" + }, + "tumorProgression": { + "id": "NCIT:C84509", + "label": "Primary Malignant Neoplasm" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-7", + "individualId": "UNQ_1-7", + "biosampleStatus": { + "id": "SNOMED:310294002", + "label": "Mitochondrial antibodies positive" + }, + "collectionDate": "2021-04-23", + "collectionMoment": "P32Y6M1D", + "histologicalDiagnosis": { + "id": "SNOMED:237592006", + "label": "Abnormality of bombesin secretion" + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48699", + "label": "M0 Stage Finding" + } + ], + "sampleOriginDetail": { + "id": "SNOMED:734336008", + "label": "Specimen from aorta" + }, + "sampleOriginType": { + "id": "SNOMED:31675002", + "label": "Capillary blood" + }, + "tumorProgression": { + "id": "NCIT:C84509", + "label": "Primary Malignant Neoplasm" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-8", + "individualId": "UNQ_1-8", + "biosampleStatus": { + "id": "SNOMED:702782002", + "label": "Mitochondrial 1555 A to G mutation positive" + }, + "collectionDate": "2015-04-23", + "collectionMoment": "P32Y6M1D", + "histologicalDiagnosis": { + "id": "SNOMED:237592006", + "label": "Abnormality of bombesin secretion" + }, + "sampleOriginDetail": { + "id": "SNOMED:385338007", + "label": "Specimen from anus obtained by transanal disk excision" + }, + "sampleOriginType": { + "id": "SNOMED:422236008", + "label": "Agar medium" + }, + "tumorGrade": { + "id": "NCIT:C28080", + "label": "Grade 3a" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-9", + "individualId": "UNQ_1-9", + "biosampleStatus": { + "id": "SNOMED:310293008", + "label": "Mitochondrial antibodies negative" + }, + "collectionDate": "2018-04-23", + "collectionMoment": "P32Y6M1D", + "histologicalDiagnosis": { + "id": "SNOMED:771439009", + "label": "14q22q23 microdeletion syndrome" + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48709", + "label": "N1c Stage Finding" + } + ], + "sampleOriginType": { + "id": "SNOMED:31675002", + "label": "Capillary blood" + }, + "sampleProcessing": { + "id": "SNOMED:87021001", + "label": "Mechanical vitrectomy by pars plana approach" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-10", + "individualId": "UNQ_1-10", + "biosampleStatus": { + "id": "SNOMED:365641003", + "label": "Minor blood groups - finding" + }, + "collectionDate": "2022-04-23", + "collectionMoment": "P7D", + "histologicalDiagnosis": { + "id": "SNOMED:719046005", + "label": "12q14 microdeletion syndrome" + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48709", + "label": "N1c Stage Finding" + } + ], + "sampleOriginType": { + "id": "SNOMED:422236008", + "label": "Agar medium" + }, + "sampleProcessing": { + "id": "SNOMED:72019009", + "label": "Mechanical vitrectomy by posterior approach" + }, + "tumorGrade": { + "id": "NCIT:C28080", + "label": "Grade 3a" + }, + "tumorProgression": { + "id": "NCIT:C84509", + "label": "Primary Malignant Neoplasm" + }, + "info": {}, + "notes": "" + } + ], + "runs": [ + { + "id": "UNQ_1-1", + "biosampleId": "UNQ_1-1", + "individualId": "UNQ_1-1", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001969", + "label": "other library source" + }, + "libraryStrategy": "WGS", + "platform": "PacBio", + "platformModel": { + "id": "OBI:0002012", + "label": "PacBio RS II" + }, + "runDate": "2021-10-18" + }, + { + "id": "UNQ_1-2", + "biosampleId": "UNQ_1-2", + "individualId": "UNQ_1-2", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001966", + "label": "genomic source" + }, + "libraryStrategy": "WGS", + "platform": "Illumina", + "platformModel": { + "id": "OBI:0002048", + "label": "Illumina HiSeq 3000" + }, + "runDate": "2021-10-18" + }, + { + "id": "UNQ_1-3", + "biosampleId": "UNQ_1-3", + "individualId": "UNQ_1-3", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001966", + "label": "genomic source" + }, + "libraryStrategy": "WGS", + "platform": "NanoPore", + "platformModel": { + "id": "OBI:0002750", + "label": "Oxford Nanopore MinION" + }, + "runDate": "2021-10-18" + }, + { + "id": "UNQ_1-4", + "biosampleId": "UNQ_1-4", + "individualId": "UNQ_1-4", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001969", + "label": "other library source" + }, + "libraryStrategy": "WGS", + "platform": "NanoPore", + "platformModel": { + "id": "OBI:0002750", + "label": "Oxford Nanopore MinION" + }, + "runDate": "2021-10-18" + }, + { + "id": "UNQ_1-5", + "biosampleId": "UNQ_1-5", + "individualId": "UNQ_1-5", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001969", + "label": "other library source" + }, + "libraryStrategy": "WGS", + "platform": "PacBio", + "platformModel": { + "id": "OBI:0002012", + "label": "PacBio RS II" + }, + "runDate": "2018-01-01" + }, + { + "id": "UNQ_1-6", + "biosampleId": "UNQ_1-6", + "individualId": "UNQ_1-6", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001969", + "label": "other library source" + }, + "libraryStrategy": "WGS", + "platform": "PacBio", + "platformModel": { + "id": "OBI:0002012", + "label": "PacBio RS II" + }, + "runDate": "2018-01-01" + }, + { + "id": "UNQ_1-7", + "biosampleId": "UNQ_1-7", + "individualId": "UNQ_1-7", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001969", + "label": "other library source" + }, + "libraryStrategy": "WGS", + "platform": "Illumina", + "platformModel": { + "id": "OBI:0002048", + "label": "Illumina HiSeq 3000" + }, + "runDate": "2021-10-18" + }, + { + "id": "UNQ_1-8", + "biosampleId": "UNQ_1-8", + "individualId": "UNQ_1-8", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001966", + "label": "genomic source" + }, + "libraryStrategy": "WGS", + "platform": "NanoPore", + "platformModel": { + "id": "OBI:0002750", + "label": "Oxford Nanopore MinION" + }, + "runDate": "2021-10-18" + }, + { + "id": "UNQ_1-9", + "biosampleId": "UNQ_1-9", + "individualId": "UNQ_1-9", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001966", + "label": "genomic source" + }, + "libraryStrategy": "WGS", + "platform": "Illumina", + "platformModel": { + "id": "OBI:0002048", + "label": "Illumina HiSeq 3000" + }, + "runDate": "2018-01-01" + }, + { + "id": "UNQ_1-10", + "biosampleId": "UNQ_1-10", + "individualId": "UNQ_1-10", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001969", + "label": "other library source" + }, + "libraryStrategy": "WGS", + "platform": "Illumina", + "platformModel": { + "id": "OBI:0002048", + "label": "Illumina HiSeq 3000" + }, + "runDate": "2022-08-08" + } + ], + "analyses": [ + { + "id": "UNQ_1-1", + "individualId": "UNQ_1-1", + "biosampleId": "UNQ_1-1", + "runId": "UNQ_1-1", + "aligner": "bwa-0.7.8", + "analysisDate": "2020-2-15", + "pipelineName": "pipeline 5", + "pipelineRef": "Example", + "variantCaller": "SoapSNP", + "vcfSampleId": "HG00096" + }, + { + "id": "UNQ_1-2", + "individualId": "UNQ_1-2", + "biosampleId": "UNQ_1-2", + "runId": "UNQ_1-2", + "aligner": "minimap2", + "analysisDate": "2019-3-17", + "pipelineName": "pipeline 1", + "pipelineRef": "Example", + "variantCaller": "GATK4.0", + "vcfSampleId": "HG00097" + }, + { + "id": "UNQ_1-3", + "individualId": "UNQ_1-3", + "biosampleId": "UNQ_1-3", + "runId": "UNQ_1-3", + "aligner": "minimap2", + "analysisDate": "2018-10-2", + "pipelineName": "pipeline 5", + "pipelineRef": "Example", + "variantCaller": "GATK4.0", + "vcfSampleId": "HG00099" + }, + { + "id": "UNQ_1-4", + "individualId": "UNQ_1-4", + "biosampleId": "UNQ_1-4", + "runId": "UNQ_1-4", + "aligner": "bwa-0.7.8", + "analysisDate": "2018-11-9", + "pipelineName": "pipeline 5", + "pipelineRef": "Example", + "variantCaller": "kmer2snp", + "vcfSampleId": "HG00100" + }, + { + "id": "UNQ_1-5", + "individualId": "UNQ_1-5", + "biosampleId": "UNQ_1-5", + "runId": "UNQ_1-5", + "aligner": "bowtie", + "analysisDate": "2019-5-27", + "pipelineName": "pipeline 3", + "pipelineRef": "Example", + "variantCaller": "GATK4.0", + "vcfSampleId": "HG00101" + }, + { + "id": "UNQ_1-6", + "individualId": "UNQ_1-6", + "biosampleId": "UNQ_1-6", + "runId": "UNQ_1-6", + "aligner": "bwa-0.7.8", + "analysisDate": "2021-11-22", + "pipelineName": "pipeline 1", + "pipelineRef": "Example", + "variantCaller": "SoapSNP", + "vcfSampleId": "HG00102" + }, + { + "id": "UNQ_1-7", + "individualId": "UNQ_1-7", + "biosampleId": "UNQ_1-7", + "runId": "UNQ_1-7", + "aligner": "bowtie", + "analysisDate": "2018-1-8", + "pipelineName": "pipeline 1", + "pipelineRef": "Example", + "variantCaller": "SoapSNP", + "vcfSampleId": "HG00103" + }, + { + "id": "UNQ_1-8", + "individualId": "UNQ_1-8", + "biosampleId": "UNQ_1-8", + "runId": "UNQ_1-8", + "aligner": "minimap2", + "analysisDate": "2022-3-6", + "pipelineName": "pipeline 1", + "pipelineRef": "Example", + "variantCaller": "GATK4.0", + "vcfSampleId": "HG00105" + }, + { + "id": "UNQ_1-9", + "individualId": "UNQ_1-9", + "biosampleId": "UNQ_1-9", + "runId": "UNQ_1-9", + "aligner": "bowtie", + "analysisDate": "2021-2-17", + "pipelineName": "pipeline 2", + "pipelineRef": "Example", + "variantCaller": "SoapSNP", + "vcfSampleId": "HG00106" + }, + { + "id": "UNQ_1-10", + "individualId": "UNQ_1-10", + "biosampleId": "UNQ_1-10", + "runId": "UNQ_1-10", + "aligner": "bwa-0.7.8", + "analysisDate": "2019-8-13", + "pipelineName": "pipeline 1", + "pipelineRef": "Example", + "variantCaller": "SoapSNP", + "vcfSampleId": "HG00107" + } + ], + "index": true +} \ No newline at end of file From b3efa98996a8ca66f0067b31e636313551f3d923 Mon Sep 17 00:00:00 2001 From: Anuradha Date: Mon, 24 Apr 2023 06:54:33 +0000 Subject: [PATCH 2/2] Refactoring results count, updated examples guide --- examples/test-data/GUIDE.md | 298 +++++++++++++++++- .../route_g_variants_id_biosamples.py | 58 ++-- .../route_g_variants_id_individuals.py | 54 ++-- 3 files changed, 348 insertions(+), 62 deletions(-) diff --git a/examples/test-data/GUIDE.md b/examples/test-data/GUIDE.md index a5c1f63..44256ae 100644 --- a/examples/test-data/GUIDE.md +++ b/examples/test-data/GUIDE.md @@ -12,6 +12,8 @@ Now edit the `submission.json` file such that they match the S3 URI of the `vcf. ... ``` +## Data submission + You can submit the data in two ways. ### Submission as request body @@ -28,4 +30,298 @@ Alternatively, you can upload edited `submission.json` file to an S3 location ac } ``` -This approach is recommended for larger submissions with thousands of metadata entries. \ No newline at end of file +This approach is recommended for larger submissions with thousands of metadata entries. + +## API testing + +### POST requst to `/g_variants` with following payload + +```json +{ + "meta": { + "apiVersion": "v2.0" + }, + "query": { + "pagination": {}, + "includeResultsetResponses": "HIT", + "requestedGranularity": "record", + "filters": [ + ], + "requestParameters": { + "assemblyId": "GRCH38", + "start": [ + 546801 + ], + "end": [ + 546810 + ], + "referenceName": "1" + } + } +} +``` + +Result + +```json +{ + "meta": { + "beaconId": "au.csiro-serverless.beacon", + "apiVersion": "v2.0.0", + "returnedGranularity": "record", + "receivedRequestSummary": { + "apiVersion": "v2.0", + "requestedSchemas": [], + "filters": [], + "req_params": { + "assemblyId": "GRCH38", + "start": [ + 546801 + ], + "end": [ + 546810 + ], + "referenceName": "1" + }, + "includeResultsetResponses": "HIT", + "pagination": { + "skip": 0, + "limit": 10 + }, + "requestedGranularity": "record", + "testMode": false + }, + "returnedSchemas": [ + { + "entityType": "genomicVariation", + "schema": "beacon-g_variant-v2.0.0" + } + ] + }, + "responseSummary": { + "exists": true, + "numTotalResults": 2 + }, + "response": { + "resultSets": [ + { + "id": "", + "setType": "", + "exists": true, + "resultsCount": 2, + "results": [ + { + "variantInternalId": "R1JDSDM4CTEJNTQ2ODAyCUcJQw==", + "variation": { + "referenceBases": "G", + "alternateBases": "C", + "location": { + "interval": { + "start": { + "type": "Number", + "value": 546802 + }, + "end": { + "type": "Number", + "value": 546803 + }, + "type": "SequenceInterval" + }, + "sequence_id": "GRCH38", + "type": "SequenceLocation" + }, + "variantType": "SNP" + } + }, + { + "variantInternalId": "R1JDSDM4CTEJNTQ2ODA1CVQJQw==", + "variation": { + "referenceBases": "T", + "alternateBases": "C", + "location": { + "interval": { + "start": { + "type": "Number", + "value": 546805 + }, + "end": { + "type": "Number", + "value": 546806 + }, + "type": "SequenceInterval" + }, + "sequence_id": "GRCH38", + "type": "SequenceLocation" + }, + "variantType": "SNP" + } + } + ], + "resultsHandover": null + } + ] + }, + "beaconHandovers": [] +} +``` + +### POST request to `/g_variants/R1JDSDM4CTEJNTQ2ODAyCUcJQw==/individuals` with following payload + +```json +{ + "meta": { + "apiVersion": "v2.0" + }, + "query": { + "requestedGranularity": "record", + "pagination": { + "limit": 1 + }, + "filters": [] + } +} +``` + +Result + +```json +{ + "meta": { + "beaconId": "au.csiro-serverless.beacon", + "apiVersion": "v2.0.0", + "returnedGranularity": "record", + "receivedRequestSummary": { + "apiVersion": "v2.0", + "requestedSchemas": [], + "filters": [], + "req_params": {}, + "includeResultsetResponses": "HIT", + "pagination": { + "skip": 0, + "limit": 1 + }, + "requestedGranularity": "record", + "testMode": false + }, + "returnedSchemas": [ + { + "entityType": "individual", + "schema": "beacon-individual-v2.0.0" + } + ] + }, + "responseSummary": { + "exists": true, + "numTotalResults": 9 + }, + "response": { + "resultSets": [ + { + "id": "", + "setType": "", + "exists": true, + "resultsCount": 9, + "results": [ + { + "diseases": [ + { + "diseaseCode": { + "id": "SNOMED:56265001", + "label": "Heart disease (disorder)" + } + } + ], + "ethnicity": { + "id": "SNOMED:17789004", + "label": "Papuans" + }, + "exposures": "", + "geographicOrigin": { + "id": "SNOMED:223713009", + "label": "Argentina" + }, + "id": "UNQ_1-6", + "info": "", + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C93025" + } + } + ], + "karyotypicSex": "XX", + "measures": "", + "pedigrees": "", + "phenotypicFeatures": "", + "sex": { + "id": "SNOMED:248152002", + "label": "Female" + }, + "treatments": "" + } + ], + "resultsHandover": null + } + ] + }, + "beaconHandovers": [] +} +``` + +### POST request to `/individuals` with following payload + +```json +{ + "query": { + "filters": [ + { + "id": "SNOMED:223688001" + } + ], + "requestedGranularity": "count" + }, + "meta": { + "apiVersion": "v2.0" + } +} +``` + +Result + +```json +{ + "meta": { + "beaconId": "au.csiro-serverless.beacon", + "apiVersion": "v2.0.0", + "returnedGranularity": "count", + "receivedRequestSummary": { + "apiVersion": "v2.0", + "requestedSchemas": [], + "filters": [ + { + "id": "SNOMED:223688001" + } + ], + "req_params": {}, + "includeResultsetResponses": "HIT", + "pagination": { + "skip": 0, + "limit": 10 + }, + "requestedGranularity": "count", + "testMode": false + }, + "returnedSchemas": [ + { + "entityType": "individual", + "schema": "beacon-individual-v2.0.0" + } + ] + }, + "responseSummary": { + "exists": true, + "numTotalResults": 4 + }, + "beaconHandovers": [] +} +``` \ No newline at end of file diff --git a/lambda/getGenomicVariants/route_g_variants_id_biosamples.py b/lambda/getGenomicVariants/route_g_variants_id_biosamples.py index 93598a9..bc3d70d 100644 --- a/lambda/getGenomicVariants/route_g_variants_id_biosamples.py +++ b/lambda/getGenomicVariants/route_g_variants_id_biosamples.py @@ -1,4 +1,4 @@ -from collections import defaultdict +from collections import defaultdict, OrderedDict import json import base64 @@ -168,46 +168,42 @@ def route(request: RequestParams, variant_id): ) queries = [] + + dataset_samples_sorted = OrderedDict(sorted(dataset_samples.items())) iterated_biosamples = 0 chosen_biosamples = 0 + total_biosamples = sum([len(sample_names) for sample_names in dataset_samples_sorted.values()]) for dataset_id, sample_names in dataset_samples.items(): - if (len(sample_names)) > 0: - if request.query.requested_granularity == "count": - # query = get_count_query(dataset_id, sample_names) - # queries.append(query) - # TODO optimise for duplicate individuals - iterated_biosamples += len(sample_names) - elif request.query.requested_granularity == Granularity.RECORD: - # TODO optimise for duplicate individuals - chosen_samples = [] - - for sample_name in sample_names: - iterated_biosamples += 1 - if ( - iterated_biosamples > request.query.pagination.skip - and chosen_biosamples < request.query.pagination.limit - ): - chosen_samples.append(sample_name) - chosen_biosamples += 1 - - if chosen_biosamples == request.query.pagination.limit: - break - if len(chosen_samples) > 0: - query = get_record_query(dataset_id, chosen_samples) - queries.append(query) - - if request.query.requested_granularity == "boolean": + if len(sample_names) > 0 and request.query.requested_granularity == Granularity.RECORD: + # TODO optimise for duplicate individuals + chosen_samples = [] + + for sample_name in sample_names: + iterated_biosamples += 1 + if ( + iterated_biosamples > request.query.pagination.skip + and chosen_biosamples < request.query.pagination.limit + ): + chosen_samples.append(sample_name) + chosen_biosamples += 1 + + if chosen_biosamples == request.query.pagination.limit: + break + if len(chosen_samples) > 0: + query = get_record_query(dataset_id, chosen_samples) + queries.append(query) + + if request.query.requested_granularity == Granularity.BOOLEAN: response = build_beacon_boolean_response( {}, 1 if exists else 0, request, {}, DefaultSchemas.BIOSAMPLES ) print("Returning Response: {}".format(json.dumps(response))) return bundle_response(200, response) - if request.query.requested_granularity == "count": - count = iterated_biosamples + if request.query.requested_granularity == Granularity.COUNT: response = build_beacon_count_response( - {}, count, request, {}, DefaultSchemas.BIOSAMPLES + {}, total_biosamples, request, {}, DefaultSchemas.BIOSAMPLES ) print("Returning Response: {}".format(json.dumps(response))) return bundle_response(200, response) @@ -217,7 +213,7 @@ def route(request: RequestParams, variant_id): biosamples = Biosample.get_by_query(query) if len(queries) > 0 else [] response = build_beacon_resultset_response( jsons.dump(biosamples, strip_privates=True), - len(biosamples), + total_biosamples, request, {}, DefaultSchemas.BIOSAMPLES, diff --git a/lambda/getGenomicVariants/route_g_variants_id_individuals.py b/lambda/getGenomicVariants/route_g_variants_id_individuals.py index b977a90..645aa00 100644 --- a/lambda/getGenomicVariants/route_g_variants_id_individuals.py +++ b/lambda/getGenomicVariants/route_g_variants_id_individuals.py @@ -139,44 +139,38 @@ def route(request: RequestParams, variant_id): dataset_samples_sorted = OrderedDict(sorted(dataset_samples.items())) iterated_individuals = 0 chosen_individuals = 0 + total_individuals = sum([len(sample_names) for sample_names in dataset_samples_sorted.values()]) for dataset_id, sample_names in dataset_samples_sorted.items(): - if (len(sample_names)) > 0: - if request.query.requested_granularity == "count": - # query = get_count_query(dataset_id, sample_names) - # queries.append(query) - # TODO optimise for duplicate individuals - iterated_individuals += len(sample_names) - elif request.query.requested_granularity == Granularity.RECORD: - # TODO optimise for duplicate individuals - chosen_samples = [] - - for sample_name in sample_names: - iterated_individuals += 1 - if ( - iterated_individuals > request.query.pagination.skip - and chosen_individuals < request.query.pagination.limit - ): - chosen_samples.append(sample_name) - chosen_individuals += 1 - - if chosen_individuals == request.query.pagination.limit: - break - if len(chosen_samples) > 0: - query = get_record_query(dataset_id, chosen_samples) - queries.append(query) - - if request.query.requested_granularity == "boolean": + if len(sample_names) > 0 and request.query.requested_granularity == Granularity.RECORD: + # TODO optimise for duplicate individuals + chosen_samples = [] + + for sample_name in sample_names: + iterated_individuals += 1 + if ( + iterated_individuals > request.query.pagination.skip + and chosen_individuals < request.query.pagination.limit + ): + chosen_samples.append(sample_name) + chosen_individuals += 1 + + if chosen_individuals == request.query.pagination.limit: + break + if len(chosen_samples) > 0: + query = get_record_query(dataset_id, chosen_samples) + queries.append(query) + + if request.query.requested_granularity == Granularity.BOOLEAN: response = build_beacon_boolean_response( {}, 1 if exists else 0, request, {}, DefaultSchemas.INDIVIDUALS ) print("Returning Response: {}".format(json.dumps(response))) return bundle_response(200, response) - if request.query.requested_granularity == "count": - count = iterated_individuals + if request.query.requested_granularity == Granularity.COUNT: response = build_beacon_count_response( - {}, count, request, {}, DefaultSchemas.INDIVIDUALS + {}, total_individuals, request, {}, DefaultSchemas.INDIVIDUALS ) print("Returning Response: {}".format(json.dumps(response))) return bundle_response(200, response) @@ -186,7 +180,7 @@ def route(request: RequestParams, variant_id): individuals = Individual.get_by_query(query) if len(queries) > 0 else [] response = build_beacon_resultset_response( jsons.dump(individuals, strip_privates=True), - len(individuals), + total_individuals, request, {}, DefaultSchemas.INDIVIDUALS,