Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update RNAsum orchestration #674

Merged
merged 4 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions data_processors/pipeline/lambdas/rnasum.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,20 +75,20 @@ def sqs_handler(event, context):
def handler(event, context) -> dict:
"""event payload dict
{
"dragen_transcriptome_directory": {
"dragen_wts_dir": {
"class": "Directory",
"location": "gds://path/to/WTS/output/dir"
},
"umccrise_directory": {
"umccrise": {
"class": "Directory",
"location": "gds://path/to/umccrise/output/dir"
},
"arriba_directory": {
"arriba_dir": {
"class": "Directory",
"location": "gds://path/to/arriba/output/dir"
},
"sample_name": "TUMOR_SAMPLE_ID",
"report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
"report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
"dataset": "reference_data",
"subject_id": "SUBJECT_ID",
"tumor_library_id": "WTS_TUMOR_LIBRARY_ID"
Expand All @@ -112,10 +112,10 @@ def handler(event, context) -> dict:

# Read input template from parameter store
workflow_input: dict = wfl_helper.get_workflow_input()
workflow_input['dragen_transcriptome_directory'] = event['dragen_transcriptome_directory']
workflow_input['umccrise_directory'] = event['umccrise_directory']
workflow_input['arriba_directory'] = event['arriba_directory']
workflow_input['report_directory'] = event['report_directory']
workflow_input['dragen_wts_dir'] = event['dragen_wts_dir']
workflow_input['umccrise'] = event['umccrise']
workflow_input['arriba_dir'] = event['arriba_dir']
workflow_input['report_dir'] = event['report_dir']
workflow_input['sample_name'] = sample_name

# TCGA dataset
Expand Down
54 changes: 28 additions & 26 deletions data_processors/pipeline/lambdas/tests/test_rnasum.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,20 @@ def test_handler(self):

result: dict = rnasum.handler(
{
"dragen_transcriptome_directory": {
"dragen_wts_dir": {
"class": "Directory",
"location": "gds://path/to/WTS/output/dir"
},
"umccrise_directory": {
"umccrise": {
"class": "Directory",
"location": "gds://path/to/umccrise/output/dir"
},
"arriba_directory": {
"arriba_dir": {
"class": "Directory",
"location": "gds://path/to/arriba/output/dir"
},
"sample_name": "TUMOR_SAMPLE_ID",
"report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
"report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
"dataset": "BRCA",
"subject_id": "SUBJECT_ID",
"tumor_library_id": mock_tumor_library_run.library_id
Expand All @@ -60,22 +60,27 @@ def test_handler(self):
logger.info(lib_run)
self.assertIn(lib_run.library_id, fixtures)

# Assert CWL workflow input keys payload interface are in-placed
# See https://github.com/umccr/data-portal-apis/issues/672
rnasum_wfl_input = json.loads(wfl.input)
self.assertIn("dragen_wts_dir", rnasum_wfl_input.keys())
self.assertIn("umccrise", rnasum_wfl_input.keys())
self.assertIn("arriba_dir", rnasum_wfl_input.keys())
self.assertIn("report_dir", rnasum_wfl_input.keys())

def test_handler_dataset_none(self):
"""
python manage.py test data_processors.pipeline.lambdas.tests.test_rnasum.RNAsumLambdaUnitTests.test_handler_dataset_none
"""

input_tpl = json.dumps(
{
"dragen_transcriptome_directory": None,
"umccrise_directory": None,
"dragen_wts_dir": None,
"umccrise": None,
"arriba_dir": None,
"sample_name": None,
"dataset": None,
"report_directory": None,
"ref_data_directory": {
"class": "File",
"location": "gds://development/reference-data/rnasum/"
}
"report_dir": None
}
)

Expand All @@ -85,20 +90,20 @@ def test_handler_dataset_none(self):

result: dict = rnasum.handler(
{
"dragen_transcriptome_directory": {
"dragen_wts_dir": {
"class": "Directory",
"location": "gds://path/to/WTS/output/dir"
},
"umccrise_directory": {
"umccrise": {
"class": "Directory",
"location": "gds://path/to/umccrise/output/dir"
},
"arriba_directory": {
"arriba_dir": {
"class": "Directory",
"location": "gds://path/to/arriba/output/dir"
},
"sample_name": "TUMOR_SAMPLE_ID",
"report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
"report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
"dataset": None,
"subject_id": "SUBJECT_ID",
"tumor_library_id": "WTS_TUMOR_LIBRARY_ID"
Expand All @@ -124,15 +129,12 @@ def test_handler_dataset_brca(self):

input_tpl = json.dumps(
{
"dragen_transcriptome_directory": None,
"umccrise_directory": None,
"dragen_wts_dir": None,
"umccrise": None,
"arriba_dir": None,
"sample_name": None,
"dataset": "BRCA", # BRCA as default dataset in input template
"report_directory": None,
"ref_data_directory": {
"class": "File",
"location": "gds://development/reference-data/rnasum/"
}
"report_dir": None
}
)

Expand All @@ -142,20 +144,20 @@ def test_handler_dataset_brca(self):

result: dict = rnasum.handler(
{
"dragen_transcriptome_directory": {
"dragen_wts_dir": {
"class": "Directory",
"location": "gds://path/to/WTS/output/dir"
},
"umccrise_directory": {
"umccrise": {
"class": "Directory",
"location": "gds://path/to/umccrise/output/dir"
},
"arriba_directory": {
"arriba_dir": {
"class": "Directory",
"location": "gds://path/to/arriba/output/dir"
},
"sample_name": "TUMOR_SAMPLE_ID",
"report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
"report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
"dataset": None, # User payload None dataset
"subject_id": "SUBJECT_ID",
"tumor_library_id": "WTS_TUMOR_LIBRARY_ID"
Expand Down
8 changes: 4 additions & 4 deletions data_processors/pipeline/orchestration/rnasum_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,11 @@ def prepare_rnasum_jobs(this_workflow: Workflow) -> List[Dict]:
deduce_umccrise_result_location_from_root_dir(umccrise_directory, this_subject, this_wgs_tumor_sample)

job = {
"dragen_transcriptome_directory": dragen_transcriptome_directory,
"umccrise_directory": umccrise_directory,
"arriba_directory": arriba_directory,
"dragen_wts_dir": dragen_transcriptome_directory,
"umccrise": umccrise_directory,
"arriba_dir": arriba_directory,
"sample_name": wts_tumor_meta.sample_id,
"report_directory": f"{this_subject}__{this_wts_tumor_library}",
"report_dir": f"{this_subject}__{this_wts_tumor_library}",
"dataset": tumor_dataset,
"subject_id": this_subject,
"tumor_library_id": this_wts_tumor_library,
Expand Down
26 changes: 23 additions & 3 deletions data_processors/pipeline/orchestration/tests/test_rnasum_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ def test_prepare_rnasum_jobs(self):

for job in job_list:
logger.info(f"\n{libjson.dumps(job)}") # NOTE libjson is intentional and part of ser/deser test
self.assertIn("dragen_transcriptome_directory", job.keys())
self.assertIn("umccrise_directory", job.keys())
self.assertIn("dragen_wts_dir", job.keys())
self.assertIn("umccrise", job.keys())
self.assertEqual(job['subject_id'], TestConstant.subject_id.value)
self.assertEqual(job['tumor_library_id'], TestConstant.wts_library_id_tumor.value)

Expand Down Expand Up @@ -234,7 +234,7 @@ def test_prepare_rnasum_jobs_SBJ01670(self):

# Extra assert that we have really pointed to umccrise sample result subdirectory
wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2200320").sample_id
self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise_directory']['location'])
self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location'])

@skip
def test_prepare_rnasum_jobs_SBJ01285(self):
Expand Down Expand Up @@ -321,6 +321,10 @@ def test_prepare_rnasum_jobs_SBJ01285(self):
self.assertIsNotNone(job_list)
self.assertEqual(len(job_list), 1)

# Extra assert that we have really pointed to umccrise sample result subdirectory
wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2101732").sample_id
self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location'])

@skip
def test_prepare_rnasum_jobs_SBJ01625(self):
"""
Expand Down Expand Up @@ -397,6 +401,10 @@ def test_prepare_rnasum_jobs_SBJ01625(self):
self.assertIsNotNone(job_list)
self.assertEqual(len(job_list), 1)

# Extra assert that we have really pointed to umccrise sample result subdirectory
wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2200229").sample_id
self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location'])

@skip
def test_prepare_rnasum_jobs_SBJ00910(self):
"""
Expand Down Expand Up @@ -472,6 +480,10 @@ def test_prepare_rnasum_jobs_SBJ00910(self):
self.assertIsNotNone(job_list)
self.assertEqual(len(job_list), 1)

# Extra assert that we have really pointed to umccrise sample result subdirectory
wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2100746").sample_id
self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location'])

@skip
def test_prepare_rnasum_jobs_SBJ02060(self):
"""
Expand Down Expand Up @@ -599,6 +611,10 @@ def test_prepare_rnasum_jobs_SBJ02060(self):
self.assertEqual(len(job_list), 1)
self.assertEqual(job_list[0]['tumor_library_id'], "L2201545")

# Extra assert that we have really pointed to umccrise sample result subdirectory
wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2201539").sample_id
self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location'])

@skip
def test_prepare_rnasum_jobs_SBJ02569(self):
"""
Expand Down Expand Up @@ -699,6 +715,10 @@ def test_prepare_rnasum_jobs_SBJ02569(self):
self.assertEqual(len(job_list), 1)
self.assertEqual(job_list[0]['tumor_library_id'], "L2201035")

# Extra assert that we have really pointed to umccrise sample result subdirectory
wgs_tumor_sample_id = LabMetadata.objects.get(library_id__iexact="L2200985").sample_id
self.assertIn(wgs_tumor_sample_id, job_list[0]['umccrise']['location'])

@skip
def test_lookup_tcga_dataset(self):
"""
Expand Down
8 changes: 4 additions & 4 deletions docs/pipeline/automation/rnasum.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,20 +75,20 @@ aws lambda invoke --profile prodops \
--function-name data-portal-api-prod-rnasum \
--cli-binary-format raw-in-base64-out \
--payload '{
"dragen_transcriptome_directory": {
"dragen_wts_dir": {
"class": "Directory",
"location": "gds://path/to/WTS/output/dir"
},
"arriba_directory": {
"arriba_dir": {
"class": "Directory",
"location": "gds://path/to/arriba/output/dir"
},
"umccrise_directory": {
"umccrise": {
"class": "Directory",
"location": "gds://path/to/umccrise/output/dir"
},
"sample_name": "TUMOR_SAMPLE_ID",
"report_directory": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
"report_dir": "SUBJECT_ID__WTS_TUMOR_LIBRARY_ID",
"dataset": "reference_data",
"subject_id": "SUBJECT_ID",
"tumor_library_id": "WTS_TUMOR_LIBRARY_ID"
Expand Down