diff --git a/README.md b/README.md index 0bd9dd3..f912e85 100644 --- a/README.md +++ b/README.md @@ -155,7 +155,7 @@ module "serverless-beacon" { region = "REGION" } ``` -Please refer to [./examples](./examples) to find a minimal and a complete setup. +Please refer to [./examples/minimum/](./examples/minimum/) or [./examples/full](./examples/full) to find a minimal and a complete setup. ## Development All the layers needed for the program to run are in layers folder. To add a new layer for immediate use with additional configs, run the following commands. Once the decision to use the library is finalised update the `init.sh` script to automate the process. @@ -188,6 +188,10 @@ Please make a copy of `backend.tf.template` with suited parameters and rename as ## API +### Example data + +Please find the data in [./examples/test-data/](./examples/test-data/) and use the [./examples/test-data/GUIDE.md](./examples/test-data/GUIDE.md) to try the provided test data. + ### Data ingestion API Use the following schemas for data submission @@ -224,3 +228,4 @@ $ ./init.sh -msse4.2 -O3 ### Provider produced inconsistent final plan If `terraform apply --auto-approve` complaints about a provider error. Please retry. If the issue persists, please raise an issue with the complete terraform log. + diff --git a/examples/test-data/GUIDE.md b/examples/test-data/GUIDE.md new file mode 100644 index 0000000..44256ae --- /dev/null +++ b/examples/test-data/GUIDE.md @@ -0,0 +1,327 @@ +# Getting started with test data + +Please ensure you first upload the `chr1.vcf.gz` and `chr1.vcf.gz.tbi` files to an S3 bucket that is accessible from the sBeacon deployment account. Obtain the S3 URI for the `chr1.vcf.gz` from the uploaded desitation. Note that, both `vcf.gz` and `vcf.gz.tbi` files must have the same prefix in S3 for this to work. + +Now edit the `submission.json` file such that they match the S3 URI of the `vcf.gz` file. + +```json +... + "vcfLocations": [ + "s3:////chr1.vcf.gz" + ] +... +``` + +## Data submission + +You can submit the data in two ways. + +### Submission as request body + +You can simply copy the edited JSON content in to the API gateway `/submit` POST endpoint. If you're using a REST client make sure you add authorization headers before you make the request. For example, Postman supports Authorization type AWS Signature and there you can enter AWS Keys. + +### Submission as an S3 payload + +Alternatively, you can upload edited `submission.json` file to an S3 location accessible from deployment. Then you can use the file's S3 URI as follows in the API Gateway or in your REST client. + +```json +{ + "s3Payload": "s3:////submission.json" +} +``` + +This approach is recommended for larger submissions with thousands of metadata entries. + +## API testing + +### POST requst to `/g_variants` with following payload + +```json +{ + "meta": { + "apiVersion": "v2.0" + }, + "query": { + "pagination": {}, + "includeResultsetResponses": "HIT", + "requestedGranularity": "record", + "filters": [ + ], + "requestParameters": { + "assemblyId": "GRCH38", + "start": [ + 546801 + ], + "end": [ + 546810 + ], + "referenceName": "1" + } + } +} +``` + +Result + +```json +{ + "meta": { + "beaconId": "au.csiro-serverless.beacon", + "apiVersion": "v2.0.0", + "returnedGranularity": "record", + "receivedRequestSummary": { + "apiVersion": "v2.0", + "requestedSchemas": [], + "filters": [], + "req_params": { + "assemblyId": "GRCH38", + "start": [ + 546801 + ], + "end": [ + 546810 + ], + "referenceName": "1" + }, + "includeResultsetResponses": "HIT", + "pagination": { + "skip": 0, + "limit": 10 + }, + "requestedGranularity": "record", + "testMode": false + }, + "returnedSchemas": [ + { + "entityType": "genomicVariation", + "schema": "beacon-g_variant-v2.0.0" + } + ] + }, + "responseSummary": { + "exists": true, + "numTotalResults": 2 + }, + "response": { + "resultSets": [ + { + "id": "", + "setType": "", + "exists": true, + "resultsCount": 2, + "results": [ + { + "variantInternalId": "R1JDSDM4CTEJNTQ2ODAyCUcJQw==", + "variation": { + "referenceBases": "G", + "alternateBases": "C", + "location": { + "interval": { + "start": { + "type": "Number", + "value": 546802 + }, + "end": { + "type": "Number", + "value": 546803 + }, + "type": "SequenceInterval" + }, + "sequence_id": "GRCH38", + "type": "SequenceLocation" + }, + "variantType": "SNP" + } + }, + { + "variantInternalId": "R1JDSDM4CTEJNTQ2ODA1CVQJQw==", + "variation": { + "referenceBases": "T", + "alternateBases": "C", + "location": { + "interval": { + "start": { + "type": "Number", + "value": 546805 + }, + "end": { + "type": "Number", + "value": 546806 + }, + "type": "SequenceInterval" + }, + "sequence_id": "GRCH38", + "type": "SequenceLocation" + }, + "variantType": "SNP" + } + } + ], + "resultsHandover": null + } + ] + }, + "beaconHandovers": [] +} +``` + +### POST request to `/g_variants/R1JDSDM4CTEJNTQ2ODAyCUcJQw==/individuals` with following payload + +```json +{ + "meta": { + "apiVersion": "v2.0" + }, + "query": { + "requestedGranularity": "record", + "pagination": { + "limit": 1 + }, + "filters": [] + } +} +``` + +Result + +```json +{ + "meta": { + "beaconId": "au.csiro-serverless.beacon", + "apiVersion": "v2.0.0", + "returnedGranularity": "record", + "receivedRequestSummary": { + "apiVersion": "v2.0", + "requestedSchemas": [], + "filters": [], + "req_params": {}, + "includeResultsetResponses": "HIT", + "pagination": { + "skip": 0, + "limit": 1 + }, + "requestedGranularity": "record", + "testMode": false + }, + "returnedSchemas": [ + { + "entityType": "individual", + "schema": "beacon-individual-v2.0.0" + } + ] + }, + "responseSummary": { + "exists": true, + "numTotalResults": 9 + }, + "response": { + "resultSets": [ + { + "id": "", + "setType": "", + "exists": true, + "resultsCount": 9, + "results": [ + { + "diseases": [ + { + "diseaseCode": { + "id": "SNOMED:56265001", + "label": "Heart disease (disorder)" + } + } + ], + "ethnicity": { + "id": "SNOMED:17789004", + "label": "Papuans" + }, + "exposures": "", + "geographicOrigin": { + "id": "SNOMED:223713009", + "label": "Argentina" + }, + "id": "UNQ_1-6", + "info": "", + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C93025" + } + } + ], + "karyotypicSex": "XX", + "measures": "", + "pedigrees": "", + "phenotypicFeatures": "", + "sex": { + "id": "SNOMED:248152002", + "label": "Female" + }, + "treatments": "" + } + ], + "resultsHandover": null + } + ] + }, + "beaconHandovers": [] +} +``` + +### POST request to `/individuals` with following payload + +```json +{ + "query": { + "filters": [ + { + "id": "SNOMED:223688001" + } + ], + "requestedGranularity": "count" + }, + "meta": { + "apiVersion": "v2.0" + } +} +``` + +Result + +```json +{ + "meta": { + "beaconId": "au.csiro-serverless.beacon", + "apiVersion": "v2.0.0", + "returnedGranularity": "count", + "receivedRequestSummary": { + "apiVersion": "v2.0", + "requestedSchemas": [], + "filters": [ + { + "id": "SNOMED:223688001" + } + ], + "req_params": {}, + "includeResultsetResponses": "HIT", + "pagination": { + "skip": 0, + "limit": 10 + }, + "requestedGranularity": "count", + "testMode": false + }, + "returnedSchemas": [ + { + "entityType": "individual", + "schema": "beacon-individual-v2.0.0" + } + ] + }, + "responseSummary": { + "exists": true, + "numTotalResults": 4 + }, + "beaconHandovers": [] +} +``` \ No newline at end of file diff --git a/examples/test-data/chr1.vcf.gz b/examples/test-data/chr1.vcf.gz new file mode 100644 index 0000000..e410ba5 Binary files /dev/null and b/examples/test-data/chr1.vcf.gz differ diff --git a/examples/test-data/chr1.vcf.gz.tbi b/examples/test-data/chr1.vcf.gz.tbi new file mode 100644 index 0000000..6912365 Binary files /dev/null and b/examples/test-data/chr1.vcf.gz.tbi differ diff --git a/examples/test-data/submission.json b/examples/test-data/submission.json new file mode 100644 index 0000000..c2ee649 --- /dev/null +++ b/examples/test-data/submission.json @@ -0,0 +1,1069 @@ +{ + "datasetId": "UNQ_1", + "dataset": { + "id": "UNQ_1", + "createDateTime": "2021-03-21T02:37:00-08:00", + "dataUseConditions": { + "duoDataUse": [ + { + "id": "DUO:0000042", + "label": "general research use", + "version": "17-07-2016" + } + ] + }, + "description": "Simulation set 1.", + "externalUrl": "http://example.org/wiki/Main_Page", + "info": {}, + "name": "Dataset with fake data", + "updateDateTime": "2022-08-05T17:21:00+01:00", + "version": "v1.1" + }, + "assemblyId": "GRCH38", + "cohortId": "UNQ_1", + "cohort": { + "id": "UNQ_1", + "cohortDataTypes": [ + { + "id": "OMIABIS:0000060", + "label": "survey data" + }, + { + "id": "OBI:0000070", + "label": "genotyping assay" + } + ], + "cohortDesign": { + "id": "orcid:0000-0003-3463-0775" + }, + "cohortSize": -1, + "cohortType": "beacon-defined", + "name": "CGG group" + }, + "vcfLocations": [ + "s3:////chr1.vcf.gz" + ], + "individuals": [ + { + "id": "UNQ_1-1", + "ethnicity": { + "id": "SNOMED:52075006", + "label": "Congolese" + }, + "geographicOrigin": { + "id": "SNOMED:223688001", + "label": "United States of America" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C79426" + } + }, + { + "procedureCode": { + "id": "NCIT:C64264" + } + } + ], + "karyotypicSex": "XXY", + "sex": { + "id": "SNOMED:407378000", + "label": "Surgically transgendered transsexual, male-to-female" + } + }, + { + "id": "UNQ_1-2", + "diseases": [ + { + "diseaseCode": { + "id": "SNOMED:734099007", + "label": "Neuroblastoma of central nervous system" + } + }, + { + "diseaseCode": { + "id": "SNOMED:135811000119107", + "label": "Lewy body dementia with behavioral disturbance (disorder)" + } + }, + { + "diseaseCode": { + "id": "SNOMED:23853001", + "label": "Disorder of the central nervous system" + } + } + ], + "ethnicity": { + "id": "SNOMED:12556008", + "label": "Tamils" + }, + "geographicOrigin": { + "id": "SNOMED:223688001", + "label": "United States of America" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C79426" + } + }, + { + "procedureCode": { + "id": "NCIT:C64264" + } + } + ], + "karyotypicSex": "XXYY", + "sex": { + "id": "SNOMED:407378000", + "label": "Surgically transgendered transsexual, male-to-female" + } + }, + { + "id": "UNQ_1-3", + "diseases": [ + { + "diseaseCode": { + "id": "SNOMED:26929004", + "label": "Alzheimer's disease" + } + }, + { + "diseaseCode": { + "id": "SNOMED:23853001", + "label": "Disorder of the central nervous system" + } + }, + { + "diseaseCode": { + "id": "SNOMED:359642000", + "label": "Diabetes mellitus type 2 in nonobese (disorder)" + } + } + ], + "ethnicity": { + "id": "SNOMED:113170005", + "label": "Aymara" + }, + "geographicOrigin": { + "id": "SNOMED:223688001", + "label": "United States of America" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C79426" + } + }, + { + "procedureCode": { + "id": "NCIT:C64263" + } + } + ], + "karyotypicSex": "XXX", + "sex": { + "id": "SNOMED:407374003", + "label": "Transsexual" + } + }, + { + "id": "UNQ_1-4", + "ethnicity": { + "id": "SNOMED:10432001", + "label": "Onge" + }, + "geographicOrigin": { + "id": "SNOMED:223600005", + "label": "India" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C79426" + } + }, + { + "procedureCode": { + "id": "NCIT:C64264" + } + } + ], + "karyotypicSex": "XYY", + "sex": { + "id": "SNOMED:407377005", + "label": "Female-to-male transsexual" + } + }, + { + "id": "UNQ_1-5", + "diseases": [ + { + "diseaseCode": { + "id": "SNOMED:254955001", + "label": "Pituitary carcinoma" + } + } + ], + "ethnicity": { + "id": "SNOMED:12556008", + "label": "Tamils" + }, + "geographicOrigin": { + "id": "SNOMED:223498002", + "label": "Africa" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C64263" + } + }, + { + "procedureCode": { + "id": "NCIT:C64264" + } + } + ], + "karyotypicSex": "XXXX", + "sex": { + "id": "SNOMED:407374003", + "label": "Transsexual" + } + }, + { + "id": "UNQ_1-6", + "diseases": [ + { + "diseaseCode": { + "id": "SNOMED:56265001", + "label": "Heart disease (disorder)" + } + } + ], + "ethnicity": { + "id": "SNOMED:17789004", + "label": "Papuans" + }, + "geographicOrigin": { + "id": "SNOMED:223713009", + "label": "Argentina" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C93025" + } + } + ], + "karyotypicSex": "XX", + "sex": { + "id": "SNOMED:248152002", + "label": "Female" + } + }, + { + "id": "UNQ_1-7", + "ethnicity": { + "id": "SNOMED:77502007", + "label": "Atacamenos" + }, + "geographicOrigin": { + "id": "SNOMED:223498002", + "label": "Africa" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C79426" + } + } + ], + "karyotypicSex": "XXXY", + "sex": { + "id": "SNOMED:407377005", + "label": "Female-to-male transsexual" + } + }, + { + "id": "UNQ_1-8", + "diseases": [ + { + "diseaseCode": { + "id": "SNOMED:359642000", + "label": "Diabetes mellitus type 2 in nonobese (disorder)" + } + }, + { + "diseaseCode": { + "id": "SNOMED:312991009", + "label": "Senile dementia of the Lewy body type (disorder)" + } + }, + { + "diseaseCode": { + "id": "SNOMED:81531005", + "label": "Diabetes mellitus type 2 in obese (disorder)" + } + } + ], + "ethnicity": { + "id": "SNOMED:89026003", + "label": "Alacaluf" + }, + "geographicOrigin": { + "id": "SNOMED:223498002", + "label": "Africa" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C64263" + } + } + ], + "karyotypicSex": "XX", + "sex": { + "id": "SNOMED:407378000", + "label": "Surgically transgendered transsexual, male-to-female" + } + }, + { + "id": "UNQ_1-9", + "diseases": [ + { + "diseaseCode": { + "id": "SNOMED:26929004", + "label": "Alzheimer's disease" + } + }, + { + "diseaseCode": { + "id": "SNOMED:81531005", + "label": "Diabetes mellitus type 2 in obese (disorder)" + } + }, + { + "diseaseCode": { + "id": "SNOMED:135811000119107", + "label": "Lewy body dementia with behavioral disturbance (disorder)" + } + } + ], + "ethnicity": { + "id": "SNOMED:10292001", + "label": "Guamians" + }, + "geographicOrigin": { + "id": "SNOMED:223498002", + "label": "Africa" + }, + "karyotypicSex": "XXXX", + "sex": { + "id": "SNOMED:407377005", + "label": "Female-to-male transsexual" + } + }, + { + "id": "UNQ_1-10", + "ethnicity": { + "id": "SNOMED:76460008", + "label": "Yanomama" + }, + "geographicOrigin": { + "id": "SNOMED:223688001", + "label": "United States of America" + }, + "interventionsOrProcedures": [ + { + "procedureCode": { + "id": "NCIT:C64264" + } + } + ], + "karyotypicSex": "XXXY", + "sex": { + "id": "SNOMED:248153007", + "label": "Male" + } + } + ], + "biosamples": [ + { + "id": "UNQ_1-1", + "individualId": "UNQ_1-1", + "biosampleStatus": { + "id": "SNOMED:365641003", + "label": "Minor blood groups - finding" + }, + "collectionDate": "2019-04-23", + "collectionMoment": "P32Y6M1D", + "histologicalDiagnosis": { + "id": "SNOMED:719046005", + "label": "12q14 microdeletion syndrome" + }, + "obtentionProcedure": { + "procedureCode": { + "id": "NCIT:C157179", + "label": "FGFR1 Mutation Analysis" + } + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48725", + "label": "T2a Stage Finding" + } + ], + "sampleOriginDetail": { + "id": "SNOMED:258497007", + "label": "Abscess swab" + }, + "sampleOriginType": { + "id": "SNOMED:31675002", + "label": "Capillary blood" + }, + "tumorProgression": { + "id": "NCIT:C84509", + "label": "Primary Malignant Neoplasm" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-2", + "individualId": "UNQ_1-2", + "biosampleStatus": { + "id": "SNOMED:702782002", + "label": "Mitochondrial 1555 A to G mutation positive" + }, + "collectionDate": "2022-04-23", + "collectionMoment": "P32Y6M1D", + "histologicalDiagnosis": { + "id": "SNOMED:771439009", + "label": "14q22q23 microdeletion syndrome" + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48699", + "label": "M0 Stage Finding" + } + ], + "sampleOriginDetail": { + "id": "SNOMED:734336008", + "label": "Specimen from aorta" + }, + "sampleProcessing": { + "id": "SNOMED:18809007", + "label": "Meckel's ganglionectomy" + }, + "tumorGrade": { + "id": "NCIT:C28080", + "label": "Grade 3a" + }, + "info": {}, + "notes": "", + "sampleOriginType": { + "id": "SNOMED:31675002", + "label": "Capillary blood" + } + }, + { + "id": "UNQ_1-3", + "individualId": "UNQ_1-3", + "biosampleStatus": { + "id": "SNOMED:702782002", + "label": "Mitochondrial 1555 A to G mutation positive" + }, + "collectionDate": "2021-04-23", + "collectionMoment": "P32Y6M1D", + "histologicalDiagnosis": { + "id": "SNOMED:771439009", + "label": "14q22q23 microdeletion syndrome" + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48725", + "label": "T2a Stage Finding" + } + ], + "sampleOriginType": { + "id": "SNOMED:702451000", + "label": "Cultured cells" + }, + "sampleProcessing": { + "id": "SNOMED:18809007", + "label": "Meckel's ganglionectomy" + }, + "tumorGrade": { + "id": "NCIT:C28080", + "label": "Grade 3a" + }, + "tumorProgression": { + "id": "NCIT:C4813", + "label": "Recurrent Malignant Neoplasm" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-4", + "individualId": "UNQ_1-4", + "biosampleStatus": { + "id": "SNOMED:365641003", + "label": "Minor blood groups - finding" + }, + "collectionDate": "2021-04-23", + "collectionMoment": "P7D", + "histologicalDiagnosis": { + "id": "SNOMED:771439009", + "label": "14q22q23 microdeletion syndrome" + }, + "obtentionProcedure": { + "procedureCode": { + "id": "NCIT:C157179", + "label": "FGFR1 Mutation Analysis" + } + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48725", + "label": "T2a Stage Finding" + } + ], + "sampleOriginDetail": { + "id": "SNOMED:258603007", + "label": "Respiratory specimen" + }, + "sampleOriginType": { + "id": "SNOMED:782814004", + "label": "Cultured autograft of skin" + }, + "sampleProcessing": { + "id": "SNOMED:18809007", + "label": "Meckel's ganglionectomy" + }, + "tumorGrade": { + "id": "NCIT:C28080", + "label": "Grade 3a" + }, + "tumorProgression": { + "id": "NCIT:C84509", + "label": "Primary Malignant Neoplasm" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-5", + "individualId": "UNQ_1-5", + "biosampleStatus": { + "id": "SNOMED:310294002", + "label": "Mitochondrial antibodies positive" + }, + "collectionDate": "2022-04-23", + "collectionMoment": "P7D", + "histologicalDiagnosis": { + "id": "SNOMED:362965005", + "label": "Disorder of body system (disorder)" + }, + "pathologicalStage": { + "id": "NCIT:C27977", + "label": "Stage IIIA" + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48725", + "label": "T2a Stage Finding" + } + ], + "sampleOriginDetail": { + "id": "SNOMED:258500001", + "label": "Nasopharyngeal swab" + }, + "sampleOriginType": { + "id": "SNOMED:782814004", + "label": "Cultured autograft of skin" + }, + "sampleProcessing": { + "id": "SNOMED:72019009", + "label": "Mechanical vitrectomy by posterior approach" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-6", + "individualId": "UNQ_1-6", + "biosampleStatus": { + "id": "SNOMED:276447000", + "label": "Mite present" + }, + "collectionDate": "2018-04-23", + "collectionMoment": "P32Y6M1D", + "histologicalDiagnosis": { + "id": "SNOMED:719046005", + "label": "12q14 microdeletion syndrome" + }, + "obtentionProcedure": { + "procedureCode": { + "id": "NCIT:C15189", + "label": "biopsy" + } + }, + "sampleOriginType": { + "id": "SNOMED:782814004", + "label": "Cultured autograft of skin" + }, + "sampleProcessing": { + "id": "SNOMED:87021001", + "label": "Mechanical vitrectomy by pars plana approach" + }, + "tumorGrade": { + "id": "NCIT:C28080", + "label": "Grade 3a" + }, + "tumorProgression": { + "id": "NCIT:C84509", + "label": "Primary Malignant Neoplasm" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-7", + "individualId": "UNQ_1-7", + "biosampleStatus": { + "id": "SNOMED:310294002", + "label": "Mitochondrial antibodies positive" + }, + "collectionDate": "2021-04-23", + "collectionMoment": "P32Y6M1D", + "histologicalDiagnosis": { + "id": "SNOMED:237592006", + "label": "Abnormality of bombesin secretion" + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48699", + "label": "M0 Stage Finding" + } + ], + "sampleOriginDetail": { + "id": "SNOMED:734336008", + "label": "Specimen from aorta" + }, + "sampleOriginType": { + "id": "SNOMED:31675002", + "label": "Capillary blood" + }, + "tumorProgression": { + "id": "NCIT:C84509", + "label": "Primary Malignant Neoplasm" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-8", + "individualId": "UNQ_1-8", + "biosampleStatus": { + "id": "SNOMED:702782002", + "label": "Mitochondrial 1555 A to G mutation positive" + }, + "collectionDate": "2015-04-23", + "collectionMoment": "P32Y6M1D", + "histologicalDiagnosis": { + "id": "SNOMED:237592006", + "label": "Abnormality of bombesin secretion" + }, + "sampleOriginDetail": { + "id": "SNOMED:385338007", + "label": "Specimen from anus obtained by transanal disk excision" + }, + "sampleOriginType": { + "id": "SNOMED:422236008", + "label": "Agar medium" + }, + "tumorGrade": { + "id": "NCIT:C28080", + "label": "Grade 3a" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-9", + "individualId": "UNQ_1-9", + "biosampleStatus": { + "id": "SNOMED:310293008", + "label": "Mitochondrial antibodies negative" + }, + "collectionDate": "2018-04-23", + "collectionMoment": "P32Y6M1D", + "histologicalDiagnosis": { + "id": "SNOMED:771439009", + "label": "14q22q23 microdeletion syndrome" + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48709", + "label": "N1c Stage Finding" + } + ], + "sampleOriginType": { + "id": "SNOMED:31675002", + "label": "Capillary blood" + }, + "sampleProcessing": { + "id": "SNOMED:87021001", + "label": "Mechanical vitrectomy by pars plana approach" + }, + "info": {}, + "notes": "" + }, + { + "id": "UNQ_1-10", + "individualId": "UNQ_1-10", + "biosampleStatus": { + "id": "SNOMED:365641003", + "label": "Minor blood groups - finding" + }, + "collectionDate": "2022-04-23", + "collectionMoment": "P7D", + "histologicalDiagnosis": { + "id": "SNOMED:719046005", + "label": "12q14 microdeletion syndrome" + }, + "pathologicalTnmFinding": [ + { + "id": "NCIT:C48709", + "label": "N1c Stage Finding" + } + ], + "sampleOriginType": { + "id": "SNOMED:422236008", + "label": "Agar medium" + }, + "sampleProcessing": { + "id": "SNOMED:72019009", + "label": "Mechanical vitrectomy by posterior approach" + }, + "tumorGrade": { + "id": "NCIT:C28080", + "label": "Grade 3a" + }, + "tumorProgression": { + "id": "NCIT:C84509", + "label": "Primary Malignant Neoplasm" + }, + "info": {}, + "notes": "" + } + ], + "runs": [ + { + "id": "UNQ_1-1", + "biosampleId": "UNQ_1-1", + "individualId": "UNQ_1-1", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001969", + "label": "other library source" + }, + "libraryStrategy": "WGS", + "platform": "PacBio", + "platformModel": { + "id": "OBI:0002012", + "label": "PacBio RS II" + }, + "runDate": "2021-10-18" + }, + { + "id": "UNQ_1-2", + "biosampleId": "UNQ_1-2", + "individualId": "UNQ_1-2", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001966", + "label": "genomic source" + }, + "libraryStrategy": "WGS", + "platform": "Illumina", + "platformModel": { + "id": "OBI:0002048", + "label": "Illumina HiSeq 3000" + }, + "runDate": "2021-10-18" + }, + { + "id": "UNQ_1-3", + "biosampleId": "UNQ_1-3", + "individualId": "UNQ_1-3", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001966", + "label": "genomic source" + }, + "libraryStrategy": "WGS", + "platform": "NanoPore", + "platformModel": { + "id": "OBI:0002750", + "label": "Oxford Nanopore MinION" + }, + "runDate": "2021-10-18" + }, + { + "id": "UNQ_1-4", + "biosampleId": "UNQ_1-4", + "individualId": "UNQ_1-4", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001969", + "label": "other library source" + }, + "libraryStrategy": "WGS", + "platform": "NanoPore", + "platformModel": { + "id": "OBI:0002750", + "label": "Oxford Nanopore MinION" + }, + "runDate": "2021-10-18" + }, + { + "id": "UNQ_1-5", + "biosampleId": "UNQ_1-5", + "individualId": "UNQ_1-5", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001969", + "label": "other library source" + }, + "libraryStrategy": "WGS", + "platform": "PacBio", + "platformModel": { + "id": "OBI:0002012", + "label": "PacBio RS II" + }, + "runDate": "2018-01-01" + }, + { + "id": "UNQ_1-6", + "biosampleId": "UNQ_1-6", + "individualId": "UNQ_1-6", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001969", + "label": "other library source" + }, + "libraryStrategy": "WGS", + "platform": "PacBio", + "platformModel": { + "id": "OBI:0002012", + "label": "PacBio RS II" + }, + "runDate": "2018-01-01" + }, + { + "id": "UNQ_1-7", + "biosampleId": "UNQ_1-7", + "individualId": "UNQ_1-7", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001969", + "label": "other library source" + }, + "libraryStrategy": "WGS", + "platform": "Illumina", + "platformModel": { + "id": "OBI:0002048", + "label": "Illumina HiSeq 3000" + }, + "runDate": "2021-10-18" + }, + { + "id": "UNQ_1-8", + "biosampleId": "UNQ_1-8", + "individualId": "UNQ_1-8", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001966", + "label": "genomic source" + }, + "libraryStrategy": "WGS", + "platform": "NanoPore", + "platformModel": { + "id": "OBI:0002750", + "label": "Oxford Nanopore MinION" + }, + "runDate": "2021-10-18" + }, + { + "id": "UNQ_1-9", + "biosampleId": "UNQ_1-9", + "individualId": "UNQ_1-9", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001966", + "label": "genomic source" + }, + "libraryStrategy": "WGS", + "platform": "Illumina", + "platformModel": { + "id": "OBI:0002048", + "label": "Illumina HiSeq 3000" + }, + "runDate": "2018-01-01" + }, + { + "id": "UNQ_1-10", + "biosampleId": "UNQ_1-10", + "individualId": "UNQ_1-10", + "libraryLayout": "PAIRED", + "librarySelection": "RANDOM", + "librarySource": { + "id": "GENEPIO:0001969", + "label": "other library source" + }, + "libraryStrategy": "WGS", + "platform": "Illumina", + "platformModel": { + "id": "OBI:0002048", + "label": "Illumina HiSeq 3000" + }, + "runDate": "2022-08-08" + } + ], + "analyses": [ + { + "id": "UNQ_1-1", + "individualId": "UNQ_1-1", + "biosampleId": "UNQ_1-1", + "runId": "UNQ_1-1", + "aligner": "bwa-0.7.8", + "analysisDate": "2020-2-15", + "pipelineName": "pipeline 5", + "pipelineRef": "Example", + "variantCaller": "SoapSNP", + "vcfSampleId": "HG00096" + }, + { + "id": "UNQ_1-2", + "individualId": "UNQ_1-2", + "biosampleId": "UNQ_1-2", + "runId": "UNQ_1-2", + "aligner": "minimap2", + "analysisDate": "2019-3-17", + "pipelineName": "pipeline 1", + "pipelineRef": "Example", + "variantCaller": "GATK4.0", + "vcfSampleId": "HG00097" + }, + { + "id": "UNQ_1-3", + "individualId": "UNQ_1-3", + "biosampleId": "UNQ_1-3", + "runId": "UNQ_1-3", + "aligner": "minimap2", + "analysisDate": "2018-10-2", + "pipelineName": "pipeline 5", + "pipelineRef": "Example", + "variantCaller": "GATK4.0", + "vcfSampleId": "HG00099" + }, + { + "id": "UNQ_1-4", + "individualId": "UNQ_1-4", + "biosampleId": "UNQ_1-4", + "runId": "UNQ_1-4", + "aligner": "bwa-0.7.8", + "analysisDate": "2018-11-9", + "pipelineName": "pipeline 5", + "pipelineRef": "Example", + "variantCaller": "kmer2snp", + "vcfSampleId": "HG00100" + }, + { + "id": "UNQ_1-5", + "individualId": "UNQ_1-5", + "biosampleId": "UNQ_1-5", + "runId": "UNQ_1-5", + "aligner": "bowtie", + "analysisDate": "2019-5-27", + "pipelineName": "pipeline 3", + "pipelineRef": "Example", + "variantCaller": "GATK4.0", + "vcfSampleId": "HG00101" + }, + { + "id": "UNQ_1-6", + "individualId": "UNQ_1-6", + "biosampleId": "UNQ_1-6", + "runId": "UNQ_1-6", + "aligner": "bwa-0.7.8", + "analysisDate": "2021-11-22", + "pipelineName": "pipeline 1", + "pipelineRef": "Example", + "variantCaller": "SoapSNP", + "vcfSampleId": "HG00102" + }, + { + "id": "UNQ_1-7", + "individualId": "UNQ_1-7", + "biosampleId": "UNQ_1-7", + "runId": "UNQ_1-7", + "aligner": "bowtie", + "analysisDate": "2018-1-8", + "pipelineName": "pipeline 1", + "pipelineRef": "Example", + "variantCaller": "SoapSNP", + "vcfSampleId": "HG00103" + }, + { + "id": "UNQ_1-8", + "individualId": "UNQ_1-8", + "biosampleId": "UNQ_1-8", + "runId": "UNQ_1-8", + "aligner": "minimap2", + "analysisDate": "2022-3-6", + "pipelineName": "pipeline 1", + "pipelineRef": "Example", + "variantCaller": "GATK4.0", + "vcfSampleId": "HG00105" + }, + { + "id": "UNQ_1-9", + "individualId": "UNQ_1-9", + "biosampleId": "UNQ_1-9", + "runId": "UNQ_1-9", + "aligner": "bowtie", + "analysisDate": "2021-2-17", + "pipelineName": "pipeline 2", + "pipelineRef": "Example", + "variantCaller": "SoapSNP", + "vcfSampleId": "HG00106" + }, + { + "id": "UNQ_1-10", + "individualId": "UNQ_1-10", + "biosampleId": "UNQ_1-10", + "runId": "UNQ_1-10", + "aligner": "bwa-0.7.8", + "analysisDate": "2019-8-13", + "pipelineName": "pipeline 1", + "pipelineRef": "Example", + "variantCaller": "SoapSNP", + "vcfSampleId": "HG00107" + } + ], + "index": true +} \ No newline at end of file diff --git a/lambda/getGenomicVariants/route_g_variants_id_biosamples.py b/lambda/getGenomicVariants/route_g_variants_id_biosamples.py index 93598a9..bc3d70d 100644 --- a/lambda/getGenomicVariants/route_g_variants_id_biosamples.py +++ b/lambda/getGenomicVariants/route_g_variants_id_biosamples.py @@ -1,4 +1,4 @@ -from collections import defaultdict +from collections import defaultdict, OrderedDict import json import base64 @@ -168,46 +168,42 @@ def route(request: RequestParams, variant_id): ) queries = [] + + dataset_samples_sorted = OrderedDict(sorted(dataset_samples.items())) iterated_biosamples = 0 chosen_biosamples = 0 + total_biosamples = sum([len(sample_names) for sample_names in dataset_samples_sorted.values()]) for dataset_id, sample_names in dataset_samples.items(): - if (len(sample_names)) > 0: - if request.query.requested_granularity == "count": - # query = get_count_query(dataset_id, sample_names) - # queries.append(query) - # TODO optimise for duplicate individuals - iterated_biosamples += len(sample_names) - elif request.query.requested_granularity == Granularity.RECORD: - # TODO optimise for duplicate individuals - chosen_samples = [] - - for sample_name in sample_names: - iterated_biosamples += 1 - if ( - iterated_biosamples > request.query.pagination.skip - and chosen_biosamples < request.query.pagination.limit - ): - chosen_samples.append(sample_name) - chosen_biosamples += 1 - - if chosen_biosamples == request.query.pagination.limit: - break - if len(chosen_samples) > 0: - query = get_record_query(dataset_id, chosen_samples) - queries.append(query) - - if request.query.requested_granularity == "boolean": + if len(sample_names) > 0 and request.query.requested_granularity == Granularity.RECORD: + # TODO optimise for duplicate individuals + chosen_samples = [] + + for sample_name in sample_names: + iterated_biosamples += 1 + if ( + iterated_biosamples > request.query.pagination.skip + and chosen_biosamples < request.query.pagination.limit + ): + chosen_samples.append(sample_name) + chosen_biosamples += 1 + + if chosen_biosamples == request.query.pagination.limit: + break + if len(chosen_samples) > 0: + query = get_record_query(dataset_id, chosen_samples) + queries.append(query) + + if request.query.requested_granularity == Granularity.BOOLEAN: response = build_beacon_boolean_response( {}, 1 if exists else 0, request, {}, DefaultSchemas.BIOSAMPLES ) print("Returning Response: {}".format(json.dumps(response))) return bundle_response(200, response) - if request.query.requested_granularity == "count": - count = iterated_biosamples + if request.query.requested_granularity == Granularity.COUNT: response = build_beacon_count_response( - {}, count, request, {}, DefaultSchemas.BIOSAMPLES + {}, total_biosamples, request, {}, DefaultSchemas.BIOSAMPLES ) print("Returning Response: {}".format(json.dumps(response))) return bundle_response(200, response) @@ -217,7 +213,7 @@ def route(request: RequestParams, variant_id): biosamples = Biosample.get_by_query(query) if len(queries) > 0 else [] response = build_beacon_resultset_response( jsons.dump(biosamples, strip_privates=True), - len(biosamples), + total_biosamples, request, {}, DefaultSchemas.BIOSAMPLES, diff --git a/lambda/getGenomicVariants/route_g_variants_id_individuals.py b/lambda/getGenomicVariants/route_g_variants_id_individuals.py index b977a90..645aa00 100644 --- a/lambda/getGenomicVariants/route_g_variants_id_individuals.py +++ b/lambda/getGenomicVariants/route_g_variants_id_individuals.py @@ -139,44 +139,38 @@ def route(request: RequestParams, variant_id): dataset_samples_sorted = OrderedDict(sorted(dataset_samples.items())) iterated_individuals = 0 chosen_individuals = 0 + total_individuals = sum([len(sample_names) for sample_names in dataset_samples_sorted.values()]) for dataset_id, sample_names in dataset_samples_sorted.items(): - if (len(sample_names)) > 0: - if request.query.requested_granularity == "count": - # query = get_count_query(dataset_id, sample_names) - # queries.append(query) - # TODO optimise for duplicate individuals - iterated_individuals += len(sample_names) - elif request.query.requested_granularity == Granularity.RECORD: - # TODO optimise for duplicate individuals - chosen_samples = [] - - for sample_name in sample_names: - iterated_individuals += 1 - if ( - iterated_individuals > request.query.pagination.skip - and chosen_individuals < request.query.pagination.limit - ): - chosen_samples.append(sample_name) - chosen_individuals += 1 - - if chosen_individuals == request.query.pagination.limit: - break - if len(chosen_samples) > 0: - query = get_record_query(dataset_id, chosen_samples) - queries.append(query) - - if request.query.requested_granularity == "boolean": + if len(sample_names) > 0 and request.query.requested_granularity == Granularity.RECORD: + # TODO optimise for duplicate individuals + chosen_samples = [] + + for sample_name in sample_names: + iterated_individuals += 1 + if ( + iterated_individuals > request.query.pagination.skip + and chosen_individuals < request.query.pagination.limit + ): + chosen_samples.append(sample_name) + chosen_individuals += 1 + + if chosen_individuals == request.query.pagination.limit: + break + if len(chosen_samples) > 0: + query = get_record_query(dataset_id, chosen_samples) + queries.append(query) + + if request.query.requested_granularity == Granularity.BOOLEAN: response = build_beacon_boolean_response( {}, 1 if exists else 0, request, {}, DefaultSchemas.INDIVIDUALS ) print("Returning Response: {}".format(json.dumps(response))) return bundle_response(200, response) - if request.query.requested_granularity == "count": - count = iterated_individuals + if request.query.requested_granularity == Granularity.COUNT: response = build_beacon_count_response( - {}, count, request, {}, DefaultSchemas.INDIVIDUALS + {}, total_individuals, request, {}, DefaultSchemas.INDIVIDUALS ) print("Returning Response: {}".format(json.dumps(response))) return bundle_response(200, response) @@ -186,7 +180,7 @@ def route(request: RequestParams, variant_id): individuals = Individual.get_by_query(query) if len(queries) > 0 else [] response = build_beacon_resultset_response( jsons.dump(individuals, strip_privates=True), - len(individuals), + total_individuals, request, {}, DefaultSchemas.INDIVIDUALS,