diff --git a/data_processors/pipeline/lambdas/rnasum.py b/data_processors/pipeline/lambdas/rnasum.py index 5d982445..4de0fa62 100644 --- a/data_processors/pipeline/lambdas/rnasum.py +++ b/data_processors/pipeline/lambdas/rnasum.py @@ -75,20 +75,20 @@ def sqs_handler(event, context): def handler(event, context) -> dict: """event payload dict { - "dragen_transcriptome_directory": { + "dragen_wts_dir": { "class": "Directory", "location": "gds://path/to/WTS/output/dir" }, - "umccrise_directory": { + "umccrise": { "class": "Directory", "location": "gds://path/to/umccrise/output/dir" }, - "arriba_directory": { + "arriba_dir": { "class": "Directory", "location": "gds://path/to/arriba/output/dir" }, "sample_name": "TUMOR_SAMPLE_ID", - "report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID", + "report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID", "dataset": "reference_data", "subject_id": "SUBJECT_ID", "tumor_library_id": "WTS_TUMOR_LIBRARY_ID" @@ -112,10 +112,10 @@ def handler(event, context) -> dict: # Read input template from parameter store workflow_input: dict = wfl_helper.get_workflow_input() - workflow_input['dragen_transcriptome_directory'] = event['dragen_transcriptome_directory'] - workflow_input['umccrise_directory'] = event['umccrise_directory'] - workflow_input['arriba_directory'] = event['arriba_directory'] - workflow_input['report_directory'] = event['report_directory'] + workflow_input['dragen_wts_dir'] = event['dragen_wts_dir'] + workflow_input['umccrise'] = event['umccrise'] + workflow_input['arriba_dir'] = event['arriba_dir'] + workflow_input['report_dir'] = event['report_dir'] workflow_input['sample_name'] = sample_name # TCGA dataset diff --git a/data_processors/pipeline/lambdas/tests/test_rnasum.py b/data_processors/pipeline/lambdas/tests/test_rnasum.py index 0ccff977..3134fa90 100644 --- a/data_processors/pipeline/lambdas/tests/test_rnasum.py +++ b/data_processors/pipeline/lambdas/tests/test_rnasum.py @@ -24,20 +24,20 @@ def test_handler(self): result: dict = rnasum.handler( { - "dragen_transcriptome_directory": { + "dragen_wts_dir": { "class": "Directory", "location": "gds://path/to/WTS/output/dir" }, - "umccrise_directory": { + "umccrise": { "class": "Directory", "location": "gds://path/to/umccrise/output/dir" }, - "arriba_directory": { + "arriba_dir": { "class": "Directory", "location": "gds://path/to/arriba/output/dir" }, "sample_name": "TUMOR_SAMPLE_ID", - "report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID", + "report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID", "dataset": "BRCA", "subject_id": "SUBJECT_ID", "tumor_library_id": mock_tumor_library_run.library_id @@ -60,6 +60,14 @@ def test_handler(self): logger.info(lib_run) self.assertIn(lib_run.library_id, fixtures) + # Assert CWL workflow input keys payload interface are in-placed + # See https://github.com/umccr/data-portal-apis/issues/672 + rnasum_wfl_input = json.loads(wfl.input) + self.assertIn("dragen_wts_dir", rnasum_wfl_input.keys()) + self.assertIn("umccrise", rnasum_wfl_input.keys()) + self.assertIn("arriba_dir", rnasum_wfl_input.keys()) + self.assertIn("report_dir", rnasum_wfl_input.keys()) + def test_handler_dataset_none(self): """ python manage.py test data_processors.pipeline.lambdas.tests.test_rnasum.RNAsumLambdaUnitTests.test_handler_dataset_none @@ -67,15 +75,12 @@ def test_handler_dataset_none(self): input_tpl = json.dumps( { - "dragen_transcriptome_directory": None, - "umccrise_directory": None, + "dragen_wts_dir": None, + "umccrise": None, + "arriba_dir": None, "sample_name": None, "dataset": None, - "report_directory": None, - "ref_data_directory": { - "class": "File", - "location": "gds://development/reference-data/rnasum/" - } + "report_dir": None } ) @@ -85,20 +90,20 @@ def test_handler_dataset_none(self): result: dict = rnasum.handler( { - "dragen_transcriptome_directory": { + "dragen_wts_dir": { "class": "Directory", "location": "gds://path/to/WTS/output/dir" }, - "umccrise_directory": { + "umccrise": { "class": "Directory", "location": "gds://path/to/umccrise/output/dir" }, - "arriba_directory": { + "arriba_dir": { "class": "Directory", "location": "gds://path/to/arriba/output/dir" }, "sample_name": "TUMOR_SAMPLE_ID", - "report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID", + "report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID", "dataset": None, "subject_id": "SUBJECT_ID", "tumor_library_id": "WTS_TUMOR_LIBRARY_ID" @@ -124,15 +129,12 @@ def test_handler_dataset_brca(self): input_tpl = json.dumps( { - "dragen_transcriptome_directory": None, - "umccrise_directory": None, + "dragen_wts_dir": None, + "umccrise": None, + "arriba_dir": None, "sample_name": None, "dataset": "BRCA", # BRCA as default dataset in input template - "report_directory": None, - "ref_data_directory": { - "class": "File", - "location": "gds://development/reference-data/rnasum/" - } + "report_dir": None } ) @@ -142,20 +144,20 @@ def test_handler_dataset_brca(self): result: dict = rnasum.handler( { - "dragen_transcriptome_directory": { + "dragen_wts_dir": { "class": "Directory", "location": "gds://path/to/WTS/output/dir" }, - "umccrise_directory": { + "umccrise": { "class": "Directory", "location": "gds://path/to/umccrise/output/dir" }, - "arriba_directory": { + "arriba_dir": { "class": "Directory", "location": "gds://path/to/arriba/output/dir" }, "sample_name": "TUMOR_SAMPLE_ID", - "report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID", + "report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID", "dataset": None, # User payload None dataset "subject_id": "SUBJECT_ID", "tumor_library_id": "WTS_TUMOR_LIBRARY_ID" diff --git a/data_processors/pipeline/orchestration/rnasum_step.py b/data_processors/pipeline/orchestration/rnasum_step.py index 60cf323b..5075ebe6 100644 --- a/data_processors/pipeline/orchestration/rnasum_step.py +++ b/data_processors/pipeline/orchestration/rnasum_step.py @@ -160,11 +160,11 @@ def prepare_rnasum_jobs(this_workflow: Workflow) -> List[Dict]: deduce_umccrise_result_location_from_root_dir(umccrise_directory, this_subject, this_wgs_tumor_sample) job = { - "dragen_transcriptome_directory": dragen_transcriptome_directory, - "umccrise_directory": umccrise_directory, - "arriba_directory": arriba_directory, + "dragen_wts_dir": dragen_transcriptome_directory, + "umccrise": umccrise_directory, + "arriba_dir": arriba_directory, "sample_name": wts_tumor_meta.sample_id, - "report_directory": f"{this_subject}__{this_wts_tumor_library}", + "report_dir": f"{this_subject}__{this_wts_tumor_library}", "dataset": tumor_dataset, "subject_id": this_subject, "tumor_library_id": this_wts_tumor_library, diff --git a/data_processors/pipeline/orchestration/tests/test_rnasum_step.py b/data_processors/pipeline/orchestration/tests/test_rnasum_step.py index 0a7b5b36..ceaa4663 100644 --- a/data_processors/pipeline/orchestration/tests/test_rnasum_step.py +++ b/data_processors/pipeline/orchestration/tests/test_rnasum_step.py @@ -115,8 +115,8 @@ def test_prepare_rnasum_jobs(self): for job in job_list: logger.info(f"\n{libjson.dumps(job)}") # NOTE libjson is intentional and part of ser/deser test - self.assertIn("dragen_transcriptome_directory", job.keys()) - self.assertIn("umccrise_directory", job.keys()) + self.assertIn("dragen_wts_dir", job.keys()) + self.assertIn("umccrise", job.keys()) self.assertEqual(job['subject_id'], TestConstant.subject_id.value) self.assertEqual(job['tumor_library_id'], TestConstant.wts_library_id_tumor.value) @@ -234,7 +234,7 @@ def test_prepare_rnasum_jobs_SBJ01670(self): # Extra assert that we have really pointed to umccrise sample result subdirectory wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2200320").sample_id - self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise_directory']['location']) + self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location']) @skip def test_prepare_rnasum_jobs_SBJ01285(self): @@ -321,6 +321,10 @@ def test_prepare_rnasum_jobs_SBJ01285(self): self.assertIsNotNone(job_list) self.assertEqual(len(job_list), 1) + # Extra assert that we have really pointed to umccrise sample result subdirectory + wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2101732").sample_id + self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location']) + @skip def test_prepare_rnasum_jobs_SBJ01625(self): """ @@ -397,6 +401,10 @@ def test_prepare_rnasum_jobs_SBJ01625(self): self.assertIsNotNone(job_list) self.assertEqual(len(job_list), 1) + # Extra assert that we have really pointed to umccrise sample result subdirectory + wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2200229").sample_id + self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location']) + @skip def test_prepare_rnasum_jobs_SBJ00910(self): """ @@ -472,6 +480,10 @@ def test_prepare_rnasum_jobs_SBJ00910(self): self.assertIsNotNone(job_list) self.assertEqual(len(job_list), 1) + # Extra assert that we have really pointed to umccrise sample result subdirectory + wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2100746").sample_id + self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location']) + @skip def test_prepare_rnasum_jobs_SBJ02060(self): """ @@ -599,6 +611,10 @@ def test_prepare_rnasum_jobs_SBJ02060(self): self.assertEqual(len(job_list), 1) self.assertEqual(job_list[0]['tumor_library_id'], "L2201545") + # Extra assert that we have really pointed to umccrise sample result subdirectory + wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2201539").sample_id + self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location']) + @skip def test_prepare_rnasum_jobs_SBJ02569(self): """ @@ -699,6 +715,10 @@ def test_prepare_rnasum_jobs_SBJ02569(self): self.assertEqual(len(job_list), 1) self.assertEqual(job_list[0]['tumor_library_id'], "L2201035") + # Extra assert that we have really pointed to umccrise sample result subdirectory + wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2200985").sample_id + self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location']) + @skip def test_lookup_tcga_dataset(self): """ diff --git a/docs/pipeline/automation/rnasum.md b/docs/pipeline/automation/rnasum.md index a89d8bd5..f56ee156 100644 --- a/docs/pipeline/automation/rnasum.md +++ b/docs/pipeline/automation/rnasum.md @@ -75,20 +75,20 @@ aws lambda invoke --profile prodops \ --function-name data-portal-api-prod-rnasum \ --cli-binary-format raw-in-base64-out \ --payload '{ - "dragen_transcriptome_directory": { + "dragen_wts_dir": { "class": "Directory", "location": "gds://path/to/WTS/output/dir" }, - "arriba_directory": { + "arriba_dir": { "class": "Directory", "location": "gds://path/to/arriba/output/dir" }, - "umccrise_directory": { + "umccrise": { "class": "Directory", "location": "gds://path/to/umccrise/output/dir" }, "sample_name": "TUMOR_SAMPLE_ID", - "report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID", + "report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID", "dataset": "reference_data", "subject_id": "SUBJECT_ID", "tumor_library_id": "WTS_TUMOR_LIBRARY_ID"