Skip to content

Commit

Permalink
fix: QC panel bed version generalisation (#855)
Browse files Browse the repository at this point in the history
* version independent panel bed names
* default conditions for panel qc validation
  • Loading branch information
ivadym authored Feb 7, 2022
1 parent 4e29a32 commit b4cda37
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 28 deletions.
17 changes: 10 additions & 7 deletions BALSAMIC/assets/scripts/collect_qc_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,15 @@ def get_multiqc_data_source(multiqc_data: dict, sample: str, tool: str) -> str:
)


def get_qc_available_panel_beds(metrics: List[str]) -> List[str]:
"""Returns available panel bed file names from a list of requested metrics"""
available_beds = []
def get_qc_supported_capture_kit(capture_kit, metrics: List[str]) -> str:
"""Returns a BALSAMIC supported panel bed name associated to a specific capture_kit parameter"""
available_panel_beds = []

for k in metrics:
if k != "default":
available_beds.append(k)
available_panel_beds.append(k)

return available_beds
return next((i for i in available_panel_beds if i in capture_kit), None)


def get_requested_metrics(
Expand All @@ -115,8 +115,11 @@ def get_requested_metrics(
requested_metrics = metrics[sequencing_type]
if capture_kit:
requested_metrics = metrics[sequencing_type]["default"]
if capture_kit in get_qc_available_panel_beds(metrics[sequencing_type]):
requested_metrics.update(metrics[sequencing_type][capture_kit])
supported_capture_kit = get_qc_supported_capture_kit(
capture_kit, metrics[sequencing_type]
)
if supported_capture_kit:
requested_metrics.update(metrics[sequencing_type][supported_capture_kit])

return requested_metrics

Expand Down
16 changes: 8 additions & 8 deletions BALSAMIC/constants/quality_check_reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,37 +55,37 @@
"default": {
"MEAN_INSERT_SIZE": {"condition": None},
"PERCENT_DUPLICATION": {"condition": None},
"MEDIAN_TARGET_COVERAGE": {"condition": None},
"MEDIAN_TARGET_COVERAGE": {"condition": {"norm": "gt", "threshold": 500}},
"PCT_TARGET_BASES_50X": {"condition": None},
"PCT_TARGET_BASES_100X": {"condition": None},
"PCT_TARGET_BASES_250X": {"condition": None},
"PCT_TARGET_BASES_500X": {"condition": None},
"PCT_TARGET_BASES_1000X": {"condition": None},
"MEAN_TARGET_COVERAGE": {"condition": None},
"FOLD_80_BASE_PENALTY": {"condition": None},
"FOLD_80_BASE_PENALTY": {"condition": {"norm": "lt", "threshold": 1.8}},
"PCT_OFF_BAIT": {"condition": None},
},
"gicfdna_3.1_hg19_design.bed": {
"gicfdna": {
"MEDIAN_TARGET_COVERAGE": {"condition": {"norm": "gt", "threshold": 1000}},
"FOLD_80_BASE_PENALTY": {"condition": {"norm": "lt", "threshold": 1.6}},
},
"gmcksolid_4.1_hg19_design.bed": {
"gmcksolid": {
"MEDIAN_TARGET_COVERAGE": {"condition": {"norm": "gt", "threshold": 500}},
"FOLD_80_BASE_PENALTY": {"condition": {"norm": "lt", "threshold": 1.8}},
},
"gmsmyeloid_5.2_hg19_design.bed": {
"gmsmyeloid": {
"MEDIAN_TARGET_COVERAGE": {"condition": {"norm": "gt", "threshold": 1000}},
"FOLD_80_BASE_PENALTY": {"condition": {"norm": "lt", "threshold": 1.6}},
},
"lymphoma_6.1_hg19_design.bed": {
"lymphoma": {
"MEDIAN_TARGET_COVERAGE": {"condition": {"norm": "gt", "threshold": 1000}},
"FOLD_80_BASE_PENALTY": {"condition": {"norm": "lt", "threshold": 1.6}},
},
"gmslymphoid_7.1_hg19_design.bed": {
"gmslymphoid": {
"MEDIAN_TARGET_COVERAGE": {"condition": {"norm": "gt", "threshold": 1000}},
"FOLD_80_BASE_PENALTY": {"condition": {"norm": "lt", "threshold": 1.6}},
},
"twistexomerefseq_9.1_hg19_design.bed": {
"twistexome": {
"MEDIAN_TARGET_COVERAGE": {"condition": {"norm": "gt", "threshold": 100}},
"FOLD_80_BASE_PENALTY": {"condition": {"norm": "lt", "threshold": 1.8}},
},
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Added:
* Snakemake workflow to create canfam3 reference #843
* Call umi variants using TNscope in bed defined regions #821
* UMI duplication metrics to report in multiqc_picard_dups.json #844
* QC default validation conditions (for not defined capture kits) #855

Changed:
^^^^^^^^
Expand All @@ -15,7 +16,9 @@ Changed:

Fixed:
^^^^^^

* ``collect_qc_metrics.py`` failing for WGS cases with empty ``capture_kit`` argument #850
* QC metric validation for different panel bed version #855

Removed
^^^^^^^
Expand Down
5 changes: 3 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,11 +518,12 @@ def qc_requested_metrics():
"METRIC_1": {"condition": None},
"METRIC_2": {"condition": {"norm": "gt", "threshold": 2}},
},
"panel_1.bed": {
"panel_1": {
"METRIC_3": {"condition": {"norm": "gt", "threshold": 3}},
},
"panel_2.bed": {
"panel_2": {
"METRIC_1": {"condition": {"norm": "gt", "threshold": 1}},
"METRIC_2": {"condition": {"norm": "gt", "threshold": 22}},
"METRIC_4": {"condition": {"norm": "gt", "threshold": 4}},
},
},
Expand Down
27 changes: 16 additions & 11 deletions tests/scripts/test_collect_qc_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
get_multiqc_data_source,
get_multiqc_metrics,
collect_qc_metrics,
get_qc_available_panel_beds,
get_qc_supported_capture_kit,
get_requested_metrics,
capture_kit_resolve_type,
)
Expand All @@ -22,31 +22,36 @@ def test_capture_kit_resolve_type():
assert capture_kit_resolve_type(capture_kit) == capture_kit


def test_get_qc_available_panel_beds(qc_requested_metrics):
"""test extraction of capture kits available for analysis"""
def test_get_qc_supported_capture_kit(qc_requested_metrics):
"""test extraction of the capture kit name available for analysis"""

# GIVEN a capture kit
capture_kit = "panel_1_v1.0_hg19_design.bed"

# GIVEN an expected output
expected_output = ["panel_1.bed", "panel_2.bed"]
expected_output = "panel_1"

# WHEN calling the function
available_panel_beds = get_qc_available_panel_beds(qc_requested_metrics["targeted"])
supported_capture_kit = get_qc_supported_capture_kit(
capture_kit, qc_requested_metrics["targeted"]
)

# THEN check if the extracted bed file names correspond to the expected ones
assert available_panel_beds == expected_output
# THEN check if the extracted bed file name corresponds to the expected one
assert supported_capture_kit == expected_output


def test_get_requested_metrics_targeted(qc_requested_metrics):
"""test retrieval of the requested targeted metrics"""

# GIVEN a sequencing type and a capture kit
seq_type = "targeted"
capture_kit = "panel_1.bed"
capture_kit = "panel_2_v1.0_hg19_design.bed"

# GIVEN the expected output
expected_output = {
"METRIC_1": {"condition": None},
"METRIC_2": {"condition": {"norm": "gt", "threshold": 2}},
"METRIC_3": {"condition": {"norm": "gt", "threshold": 3}},
"METRIC_1": {"condition": {"norm": "gt", "threshold": 1}},
"METRIC_2": {"condition": {"norm": "gt", "threshold": 22}},
"METRIC_4": {"condition": {"norm": "gt", "threshold": 4}},
}

# WHEN calling the function
Expand Down

0 comments on commit b4cda37

Please sign in to comment.