umccr · skanwal · May 27, 2024 · May 23, 2024 · May 23, 2024 · May 27, 2024
diff --git a/data_processors/pipeline/lambdas/rnasum.py b/data_processors/pipeline/lambdas/rnasum.py
@@ -75,20 +75,20 @@ def sqs_handler(event, context):
 def handler(event, context) -> dict:
     """event payload dict
     {
-        "dragen_transcriptome_directory": {
+        "dragen_wts_dir": {
             "class": "Directory",
             "location": "gds://path/to/WTS/output/dir"
         },
-        "umccrise_directory": {
+        "umccrise": {
             "class": "Directory",
             "location": "gds://path/to/umccrise/output/dir"
         },
-        "arriba_directory": {
+        "arriba_dir": {
             "class": "Directory",
             "location": "gds://path/to/arriba/output/dir"
         },
         "sample_name": "TUMOR_SAMPLE_ID",
-        "report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
+        "report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
         "dataset": "reference_data",
         "subject_id": "SUBJECT_ID",
         "tumor_library_id": "WTS_TUMOR_LIBRARY_ID"
@@ -112,10 +112,10 @@ def handler(event, context) -> dict:
 
     # Read input template from parameter store
     workflow_input: dict = wfl_helper.get_workflow_input()
-    workflow_input['dragen_transcriptome_directory'] = event['dragen_transcriptome_directory']
-    workflow_input['umccrise_directory'] = event['umccrise_directory']
-    workflow_input['arriba_directory'] = event['arriba_directory']
-    workflow_input['report_directory'] = event['report_directory']
+    workflow_input['dragen_wts_dir'] = event['dragen_wts_dir']
+    workflow_input['umccrise'] = event['umccrise']
+    workflow_input['arriba_dir'] = event['arriba_dir']
+    workflow_input['report_dir'] = event['report_dir']
     workflow_input['sample_name'] = sample_name
 
     # TCGA dataset

diff --git a/data_processors/pipeline/lambdas/tests/test_rnasum.py b/data_processors/pipeline/lambdas/tests/test_rnasum.py
@@ -24,20 +24,20 @@ def test_handler(self):
 
         result: dict = rnasum.handler(
             {
-                "dragen_transcriptome_directory": {
+                "dragen_wts_dir": {
                     "class": "Directory",
                     "location": "gds://path/to/WTS/output/dir"
                 },
-                "umccrise_directory": {
+                "umccrise": {
                     "class": "Directory",
                     "location": "gds://path/to/umccrise/output/dir"
                 },
-                "arriba_directory": {
+                "arriba_dir": {
                     "class": "Directory",
                     "location": "gds://path/to/arriba/output/dir"
                 },
                 "sample_name": "TUMOR_SAMPLE_ID",
-                "report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
+                "report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
                 "dataset": "BRCA",
                 "subject_id": "SUBJECT_ID",
                 "tumor_library_id": mock_tumor_library_run.library_id
@@ -60,22 +60,27 @@ def test_handler(self):
             logger.info(lib_run)
             self.assertIn(lib_run.library_id, fixtures)
 
+        # Assert CWL workflow input keys payload interface are in-placed
+        # See https://github.com/umccr/data-portal-apis/issues/672
+        rnasum_wfl_input = json.loads(wfl.input)
+        self.assertIn("dragen_wts_dir", rnasum_wfl_input.keys())
+        self.assertIn("umccrise", rnasum_wfl_input.keys())
+        self.assertIn("arriba_dir", rnasum_wfl_input.keys())
+        self.assertIn("report_dir", rnasum_wfl_input.keys())
+
     def test_handler_dataset_none(self):
         """
         python manage.py test data_processors.pipeline.lambdas.tests.test_rnasum.RNAsumLambdaUnitTests.test_handler_dataset_none
         """
 
         input_tpl = json.dumps(
             {
-                "dragen_transcriptome_directory": None,
-                "umccrise_directory": None,
+                "dragen_wts_dir": None,
+                "umccrise": None,
+                "arriba_dir": None,
                 "sample_name": None,
                 "dataset": None,
-                "report_directory": None,
-                "ref_data_directory": {
-                    "class": "File",
-                    "location": "gds://development/reference-data/rnasum/"
-                }
+                "report_dir": None
             }
         )
 
@@ -85,20 +90,20 @@ def test_handler_dataset_none(self):
 
         result: dict = rnasum.handler(
             {
-                "dragen_transcriptome_directory": {
+                "dragen_wts_dir": {
                     "class": "Directory",
                     "location": "gds://path/to/WTS/output/dir"
                 },
-                "umccrise_directory": {
+                "umccrise": {
                     "class": "Directory",
                     "location": "gds://path/to/umccrise/output/dir"
                 },
-                "arriba_directory": {
+                "arriba_dir": {
                     "class": "Directory",
                     "location": "gds://path/to/arriba/output/dir"
                 },
                 "sample_name": "TUMOR_SAMPLE_ID",
-                "report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
+                "report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
                 "dataset": None,
                 "subject_id": "SUBJECT_ID",
                 "tumor_library_id": "WTS_TUMOR_LIBRARY_ID"
@@ -124,15 +129,12 @@ def test_handler_dataset_brca(self):
 
         input_tpl = json.dumps(
             {
-                "dragen_transcriptome_directory": None,
-                "umccrise_directory": None,
+                "dragen_wts_dir": None,
+                "umccrise": None,
+                "arriba_dir": None,
                 "sample_name": None,
                 "dataset": "BRCA",  # BRCA as default dataset in input template
-                "report_directory": None,
-                "ref_data_directory": {
-                    "class": "File",
-                    "location": "gds://development/reference-data/rnasum/"
-                }
+                "report_dir": None
             }
         )
 
@@ -142,20 +144,20 @@ def test_handler_dataset_brca(self):
 
         result: dict = rnasum.handler(
             {
-                "dragen_transcriptome_directory": {
+                "dragen_wts_dir": {
                     "class": "Directory",
                     "location": "gds://path/to/WTS/output/dir"
                 },
-                "umccrise_directory": {
+                "umccrise": {
                     "class": "Directory",
                     "location": "gds://path/to/umccrise/output/dir"
                 },
-                "arriba_directory": {
+                "arriba_dir": {
                     "class": "Directory",
                     "location": "gds://path/to/arriba/output/dir"
                 },
                 "sample_name": "TUMOR_SAMPLE_ID",
-                "report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
+                "report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
                 "dataset": None,  # User payload None dataset
                 "subject_id": "SUBJECT_ID",
                 "tumor_library_id": "WTS_TUMOR_LIBRARY_ID"

diff --git a/data_processors/pipeline/orchestration/rnasum_step.py b/data_processors/pipeline/orchestration/rnasum_step.py
@@ -160,11 +160,11 @@ def prepare_rnasum_jobs(this_workflow: Workflow) -> List[Dict]:
     deduce_umccrise_result_location_from_root_dir(umccrise_directory, this_subject, this_wgs_tumor_sample)
 
     job = {
-        "dragen_transcriptome_directory": dragen_transcriptome_directory,
-        "umccrise_directory": umccrise_directory,
-        "arriba_directory": arriba_directory,
+        "dragen_wts_dir": dragen_transcriptome_directory,
+        "umccrise": umccrise_directory,
+        "arriba_dir": arriba_directory,
         "sample_name": wts_tumor_meta.sample_id,
-        "report_directory": f"{this_subject}__{this_wts_tumor_library}",
+        "report_dir": f"{this_subject}__{this_wts_tumor_library}",
         "dataset": tumor_dataset,
         "subject_id": this_subject,
         "tumor_library_id": this_wts_tumor_library,

diff --git a/data_processors/pipeline/orchestration/tests/test_rnasum_step.py b/data_processors/pipeline/orchestration/tests/test_rnasum_step.py
@@ -115,8 +115,8 @@ def test_prepare_rnasum_jobs(self):
 
         for job in job_list:
             logger.info(f"\n{libjson.dumps(job)}")  # NOTE libjson is intentional and part of ser/deser test
-            self.assertIn("dragen_transcriptome_directory", job.keys())
-            self.assertIn("umccrise_directory", job.keys())
+            self.assertIn("dragen_wts_dir", job.keys())
+            self.assertIn("umccrise", job.keys())
             self.assertEqual(job['subject_id'], TestConstant.subject_id.value)
             self.assertEqual(job['tumor_library_id'], TestConstant.wts_library_id_tumor.value)
 
@@ -234,7 +234,7 @@ def test_prepare_rnasum_jobs_SBJ01670(self):
 
         # Extra assert that we have really pointed to umccrise sample result subdirectory
         wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2200320").sample_id
-        self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise_directory']['location'])
+        self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location'])
 
     @skip
     def test_prepare_rnasum_jobs_SBJ01285(self):
@@ -321,6 +321,10 @@ def test_prepare_rnasum_jobs_SBJ01285(self):
         self.assertIsNotNone(job_list)
         self.assertEqual(len(job_list), 1)
 
+        # Extra assert that we have really pointed to umccrise sample result subdirectory
+        wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2101732").sample_id
+        self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location'])
+
     @skip
     def test_prepare_rnasum_jobs_SBJ01625(self):
         """
@@ -397,6 +401,10 @@ def test_prepare_rnasum_jobs_SBJ01625(self):
         self.assertIsNotNone(job_list)
         self.assertEqual(len(job_list), 1)
 
+        # Extra assert that we have really pointed to umccrise sample result subdirectory
+        wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2200229").sample_id
+        self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location'])
+
     @skip
     def test_prepare_rnasum_jobs_SBJ00910(self):
         """
@@ -472,6 +480,10 @@ def test_prepare_rnasum_jobs_SBJ00910(self):
         self.assertIsNotNone(job_list)
         self.assertEqual(len(job_list), 1)
 
+        # Extra assert that we have really pointed to umccrise sample result subdirectory
+        wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2100746").sample_id
+        self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location'])
+
     @skip
     def test_prepare_rnasum_jobs_SBJ02060(self):
         """
@@ -599,6 +611,10 @@ def test_prepare_rnasum_jobs_SBJ02060(self):
         self.assertEqual(len(job_list), 1)
         self.assertEqual(job_list[0]['tumor_library_id'], "L2201545")
 
+        # Extra assert that we have really pointed to umccrise sample result subdirectory
+        wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2201539").sample_id
+        self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location'])
+
     @skip
     def test_prepare_rnasum_jobs_SBJ02569(self):
         """
@@ -699,6 +715,10 @@ def test_prepare_rnasum_jobs_SBJ02569(self):
         self.assertEqual(len(job_list), 1)
         self.assertEqual(job_list[0]['tumor_library_id'], "L2201035")
 
+        # Extra assert that we have really pointed to umccrise sample result subdirectory
+        wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2200985").sample_id
+        self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location'])
+
     @skip
     def test_lookup_tcga_dataset(self):
         """

diff --git a/docs/pipeline/automation/rnasum.md b/docs/pipeline/automation/rnasum.md
@@ -75,20 +75,20 @@ aws lambda invoke --profile prodops \
   --function-name data-portal-api-prod-rnasum \
   --cli-binary-format raw-in-base64-out \
   --payload '{
-        "dragen_transcriptome_directory": {
+        "dragen_wts_dir": {
             "class": "Directory",
             "location": "gds://path/to/WTS/output/dir"
         },
-        "arriba_directory": {
+        "arriba_dir": {
             "class": "Directory",
             "location": "gds://path/to/arriba/output/dir"
         },
-        "umccrise_directory": {
+        "umccrise": {
             "class": "Directory",
             "location": "gds://path/to/umccrise/output/dir"
         },
         "sample_name": "TUMOR_SAMPLE_ID",
-        "report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
+        "report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
         "dataset": "reference_data",
         "subject_id": "SUBJECT_ID",
         "tumor_library_id": "WTS_TUMOR_LIBRARY_ID"