diff --git a/BALSAMIC/assets/scripts/collect_qc_metrics.py b/BALSAMIC/assets/scripts/collect_qc_metrics.py index 917f36cbf..46377bd0d 100644 --- a/BALSAMIC/assets/scripts/collect_qc_metrics.py +++ b/BALSAMIC/assets/scripts/collect_qc_metrics.py @@ -96,15 +96,15 @@ def get_multiqc_data_source(multiqc_data: dict, sample: str, tool: str) -> str: ) -def get_qc_available_panel_beds(metrics: List[str]) -> List[str]: - """Returns available panel bed file names from a list of requested metrics""" - available_beds = [] +def get_qc_supported_capture_kit(capture_kit, metrics: List[str]) -> str: + """Returns a BALSAMIC supported panel bed name associated to a specific capture_kit parameter""" + available_panel_beds = [] for k in metrics: if k != "default": - available_beds.append(k) + available_panel_beds.append(k) - return available_beds + return next((i for i in available_panel_beds if i in capture_kit), None) def get_requested_metrics( @@ -115,8 +115,11 @@ def get_requested_metrics( requested_metrics = metrics[sequencing_type] if capture_kit: requested_metrics = metrics[sequencing_type]["default"] - if capture_kit in get_qc_available_panel_beds(metrics[sequencing_type]): - requested_metrics.update(metrics[sequencing_type][capture_kit]) + supported_capture_kit = get_qc_supported_capture_kit( + capture_kit, metrics[sequencing_type] + ) + if supported_capture_kit: + requested_metrics.update(metrics[sequencing_type][supported_capture_kit]) return requested_metrics diff --git a/BALSAMIC/constants/quality_check_reporting.py b/BALSAMIC/constants/quality_check_reporting.py index 272cb01e5..dba06df8e 100644 --- a/BALSAMIC/constants/quality_check_reporting.py +++ b/BALSAMIC/constants/quality_check_reporting.py @@ -55,37 +55,37 @@ "default": { "MEAN_INSERT_SIZE": {"condition": None}, "PERCENT_DUPLICATION": {"condition": None}, - "MEDIAN_TARGET_COVERAGE": {"condition": None}, + "MEDIAN_TARGET_COVERAGE": {"condition": {"norm": "gt", "threshold": 500}}, "PCT_TARGET_BASES_50X": {"condition": None}, "PCT_TARGET_BASES_100X": {"condition": None}, "PCT_TARGET_BASES_250X": {"condition": None}, "PCT_TARGET_BASES_500X": {"condition": None}, "PCT_TARGET_BASES_1000X": {"condition": None}, "MEAN_TARGET_COVERAGE": {"condition": None}, - "FOLD_80_BASE_PENALTY": {"condition": None}, + "FOLD_80_BASE_PENALTY": {"condition": {"norm": "lt", "threshold": 1.8}}, "PCT_OFF_BAIT": {"condition": None}, }, - "gicfdna_3.1_hg19_design.bed": { + "gicfdna": { "MEDIAN_TARGET_COVERAGE": {"condition": {"norm": "gt", "threshold": 1000}}, "FOLD_80_BASE_PENALTY": {"condition": {"norm": "lt", "threshold": 1.6}}, }, - "gmcksolid_4.1_hg19_design.bed": { + "gmcksolid": { "MEDIAN_TARGET_COVERAGE": {"condition": {"norm": "gt", "threshold": 500}}, "FOLD_80_BASE_PENALTY": {"condition": {"norm": "lt", "threshold": 1.8}}, }, - "gmsmyeloid_5.2_hg19_design.bed": { + "gmsmyeloid": { "MEDIAN_TARGET_COVERAGE": {"condition": {"norm": "gt", "threshold": 1000}}, "FOLD_80_BASE_PENALTY": {"condition": {"norm": "lt", "threshold": 1.6}}, }, - "lymphoma_6.1_hg19_design.bed": { + "lymphoma": { "MEDIAN_TARGET_COVERAGE": {"condition": {"norm": "gt", "threshold": 1000}}, "FOLD_80_BASE_PENALTY": {"condition": {"norm": "lt", "threshold": 1.6}}, }, - "gmslymphoid_7.1_hg19_design.bed": { + "gmslymphoid": { "MEDIAN_TARGET_COVERAGE": {"condition": {"norm": "gt", "threshold": 1000}}, "FOLD_80_BASE_PENALTY": {"condition": {"norm": "lt", "threshold": 1.6}}, }, - "twistexomerefseq_9.1_hg19_design.bed": { + "twistexome": { "MEDIAN_TARGET_COVERAGE": {"condition": {"norm": "gt", "threshold": 100}}, "FOLD_80_BASE_PENALTY": {"condition": {"norm": "lt", "threshold": 1.8}}, }, diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a83defdbc..5fa79e25f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,7 @@ Added: * Snakemake workflow to create canfam3 reference #843 * Call umi variants using TNscope in bed defined regions #821 * UMI duplication metrics to report in multiqc_picard_dups.json #844 +* QC default validation conditions (for not defined capture kits) #855 Changed: ^^^^^^^^ @@ -15,7 +16,9 @@ Changed: Fixed: ^^^^^^ + * ``collect_qc_metrics.py`` failing for WGS cases with empty ``capture_kit`` argument #850 +* QC metric validation for different panel bed version #855 Removed ^^^^^^^ diff --git a/tests/conftest.py b/tests/conftest.py index 0657f3291..cff2d544a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -518,11 +518,12 @@ def qc_requested_metrics(): "METRIC_1": {"condition": None}, "METRIC_2": {"condition": {"norm": "gt", "threshold": 2}}, }, - "panel_1.bed": { + "panel_1": { "METRIC_3": {"condition": {"norm": "gt", "threshold": 3}}, }, - "panel_2.bed": { + "panel_2": { "METRIC_1": {"condition": {"norm": "gt", "threshold": 1}}, + "METRIC_2": {"condition": {"norm": "gt", "threshold": 22}}, "METRIC_4": {"condition": {"norm": "gt", "threshold": 4}}, }, }, diff --git a/tests/scripts/test_collect_qc_metrics.py b/tests/scripts/test_collect_qc_metrics.py index 5808b08bb..9b2cb7e16 100644 --- a/tests/scripts/test_collect_qc_metrics.py +++ b/tests/scripts/test_collect_qc_metrics.py @@ -5,7 +5,7 @@ get_multiqc_data_source, get_multiqc_metrics, collect_qc_metrics, - get_qc_available_panel_beds, + get_qc_supported_capture_kit, get_requested_metrics, capture_kit_resolve_type, ) @@ -22,17 +22,22 @@ def test_capture_kit_resolve_type(): assert capture_kit_resolve_type(capture_kit) == capture_kit -def test_get_qc_available_panel_beds(qc_requested_metrics): - """test extraction of capture kits available for analysis""" +def test_get_qc_supported_capture_kit(qc_requested_metrics): + """test extraction of the capture kit name available for analysis""" + + # GIVEN a capture kit + capture_kit = "panel_1_v1.0_hg19_design.bed" # GIVEN an expected output - expected_output = ["panel_1.bed", "panel_2.bed"] + expected_output = "panel_1" # WHEN calling the function - available_panel_beds = get_qc_available_panel_beds(qc_requested_metrics["targeted"]) + supported_capture_kit = get_qc_supported_capture_kit( + capture_kit, qc_requested_metrics["targeted"] + ) - # THEN check if the extracted bed file names correspond to the expected ones - assert available_panel_beds == expected_output + # THEN check if the extracted bed file name corresponds to the expected one + assert supported_capture_kit == expected_output def test_get_requested_metrics_targeted(qc_requested_metrics): @@ -40,13 +45,13 @@ def test_get_requested_metrics_targeted(qc_requested_metrics): # GIVEN a sequencing type and a capture kit seq_type = "targeted" - capture_kit = "panel_1.bed" + capture_kit = "panel_2_v1.0_hg19_design.bed" # GIVEN the expected output expected_output = { - "METRIC_1": {"condition": None}, - "METRIC_2": {"condition": {"norm": "gt", "threshold": 2}}, - "METRIC_3": {"condition": {"norm": "gt", "threshold": 3}}, + "METRIC_1": {"condition": {"norm": "gt", "threshold": 1}}, + "METRIC_2": {"condition": {"norm": "gt", "threshold": 22}}, + "METRIC_4": {"condition": {"norm": "gt", "threshold": 4}}, } # WHEN calling the function