From 3ab49439a595e135ed5ec6b67379a293892e5fbe Mon Sep 17 00:00:00 2001 From: ashwini06 Date: Fri, 20 May 2022 18:19:03 +0200 Subject: [PATCH 1/8] add analysis workflow option to config case --- BALSAMIC/commands/config/case.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/BALSAMIC/commands/config/case.py b/BALSAMIC/commands/config/case.py index b52d11824..657d47772 100644 --- a/BALSAMIC/commands/config/case.py +++ b/BALSAMIC/commands/config/case.py @@ -117,13 +117,6 @@ multiple=True, help="Fastq files for normal sample.", ) -@click.option( - "--umiworkflow/--no-umiworkflow", - default=True, - show_default=True, - is_flag=True, - help="Enable running UMI workflow", -) @click.option("--tumor-sample-name", help="Tumor sample name") @click.option("--normal-sample-name", help="Normal sample name") @click.option( @@ -136,6 +129,19 @@ "will be /genome_version" ), ) +@click.option( + "-w", + "--analysis-workflow", + default="balsamic", + show_default=True, + type=click.Choice(["balsamic", "balsamic-umi"]), + help=( + 'Analysis workflow to run. By default: "balsamic" only ' + "workflow will be running. If you want to run both " + "balsamic and UMI workflow together for panel data; " + 'choose "balsamic-umi" option ' + ), +) @click.pass_context def case_config( context, @@ -150,12 +156,12 @@ def case_config( analysis_dir, tumor, normal, - umiworkflow, tumor_sample_name, normal_sample_name, genome_version, balsamic_cache, container_version, + analysis_workflow, ): try: @@ -190,6 +196,7 @@ def case_config( "analysis_dir": analysis_dir, "analysis_type": "paired" if normal else "single", "sequencing_type": "targeted" if panel_bed else "wgs", + "analysis_workflow": analysis_workflow, }, reference=reference_dict, singularity=os.path.join(balsamic_cache, balsamic_version, "containers"), @@ -207,7 +214,6 @@ def case_config( } if panel_bed else None, - umiworkflow=umiworkflow if panel_bed else False, ).dict(by_alias=True, exclude_none=True) LOG.info("Config file generated successfully") From 217928b2b730d9d0bfacb47d8962500fcbb07496 Mon Sep 17 00:00:00 2001 From: ashwini06 Date: Fri, 20 May 2022 18:19:36 +0200 Subject: [PATCH 2/8] add analysisworkflow defaults to pon and qc config --- BALSAMIC/commands/config/pon.py | 1 + 1 file changed, 1 insertion(+) diff --git a/BALSAMIC/commands/config/pon.py b/BALSAMIC/commands/config/pon.py index 3e5146418..b8b889488 100644 --- a/BALSAMIC/commands/config/pon.py +++ b/BALSAMIC/commands/config/pon.py @@ -118,6 +118,7 @@ def pon_config( "case_id": case_id, "analysis_dir": analysis_dir, "analysis_type": "pon", + "analysis_workflow": "balsamic", "sequencing_type": "targeted" if panel_bed else "wgs", }, reference=reference_dict, From a1587647e55597a09dadb991e869e66f42bcef47 Mon Sep 17 00:00:00 2001 From: ashwini06 Date: Fri, 20 May 2022 18:19:56 +0200 Subject: [PATCH 3/8] add analysisworkflow defaults to qc config --- BALSAMIC/commands/config/qc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BALSAMIC/commands/config/qc.py b/BALSAMIC/commands/config/qc.py index e56f7f6f1..905beb954 100644 --- a/BALSAMIC/commands/config/qc.py +++ b/BALSAMIC/commands/config/qc.py @@ -166,6 +166,7 @@ def qc_config( "case_id": case_id, "analysis_dir": analysis_dir, "analysis_type": "qc_panel", + "analysis_workflow": "balsamic", "sequencing_type": "targeted" if panel_bed else "wgs", }, reference=reference_dict, @@ -181,7 +182,6 @@ def qc_config( } if panel_bed else None, - umiworkflow=False, ).dict(by_alias=True, exclude_none=True) LOG.info("QC config file generated successfully") From edc3cdf96bbd729be70ed21038664d0836d9a2b1 Mon Sep 17 00:00:00 2001 From: ashwini06 Date: Fri, 20 May 2022 18:20:53 +0200 Subject: [PATCH 4/8] fix models and constants --- BALSAMIC/constants/common.py | 6 +++--- BALSAMIC/utils/models.py | 19 ++++++++++++++++--- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/BALSAMIC/constants/common.py b/BALSAMIC/constants/common.py index ecf9eca5e..3db6ca020 100644 --- a/BALSAMIC/constants/common.py +++ b/BALSAMIC/constants/common.py @@ -34,12 +34,12 @@ ) # Analysis related constants +ANALYSIS_TYPES = ["paired", "single", "qc_panel", "pon"] +ANALYSIS_WORKFLOW = ["balsamic", "balsamic-umi"] +SEQUENCING_TYPE = ["wgs", "targeted"] MUTATION_CLASS = ["somatic", "germline"] MUTATION_TYPE = ["SNV", "SV", "CNV"] -ANALYSIS_TYPES = ["paired", "single", "qc_panel", "pon"] WORKFLOW_SOLUTION = ["BALSAMIC", "Sentieon", "DRAGEN", "Sentieon_umi"] -SEQUENCING_TYPE = ["wgs", "targeted"] - # list of bioinfo tools for each conda env VALID_CONTAINER_CONDA_NAME = { diff --git a/BALSAMIC/utils/models.py b/BALSAMIC/utils/models.py index dc300a54b..2ec981874 100644 --- a/BALSAMIC/utils/models.py +++ b/BALSAMIC/utils/models.py @@ -14,6 +14,7 @@ BIOINFO_TOOL_ENV, SEQUENCING_TYPE, ANALYSIS_TYPES, + ANALYSIS_WORKFLOW, WORKFLOW_SOLUTION, MUTATION_CLASS, MUTATION_TYPE, @@ -208,7 +209,10 @@ class AnalysisModel(BaseModel): sequencing_type : Field(required); string literal [targeted, wgs] targeted : if capture kit was used to enrich specific genomic regions wgs : if whole genome sequencing was performed - analysis_dir : Field(required); existing path where to save files + analysis_workflow: Field(required); string literal [balsamic, balsamic-umi] + balsamic: execute balsamic workflow + balsamic-umi: execute balsamic along with UMIworkflow for panels + analysis_dir : Field(required); existing path where to save files fastq_path : Field(optional); Path where fastq files will be stored script : Field(optional); Path where snakemake scripts will be stored log : Field(optional); Path where logs will be saved @@ -222,11 +226,13 @@ class AnalysisModel(BaseModel): ValueError: When analysis_type is set to any value other than [single, paired, pon, qc_panel] When sequencing_type is set to any value other than [wgs, targeted] + When analysis_workflow is set to any other than [balsamic, balsamic-umi] """ case_id: str analysis_type: str sequencing_type: str + analysis_workflow: str analysis_dir: DirectoryPath fastq_path: Optional[DirectoryPath] script: Optional[DirectoryPath] @@ -258,6 +264,15 @@ def sequencing_type_literal(cls, value) -> str: ) return value + @validator("analysis_workflow", check_fields=True) + def analysis_workflow_literal(cls, value) -> str: + balsamic_analysis_workflow = ANALYSIS_WORKFLOW + if value not in balsamic_analysis_workflow: + raise ValueError( + f"Provided analysis workflow ({value} not supported in BALSAMIC" + ) + return value + @validator("analysis_dir") def dirpath_always_abspath(cls, value) -> str: return Path(value).resolve().as_posix() @@ -426,7 +441,6 @@ class BalsamicConfigModel(BaseModel): singularity : Field(Path); path to singularity container of BALSAMIC background_variants: Field(Path(optional)); path to BACKGROUND VARIANTS for UMI rule_directory : Field(Path(RULE_DIRECTORY)); path where snakemake rules can be found - umiworkflow : Field(bool); whether UMI workflow to run in parallel """ QC: QCModel @@ -439,7 +453,6 @@ class BalsamicConfigModel(BaseModel): bioinfo_tools: dict bioinfo_tools_version: dict panel: Optional[PanelModel] - umiworkflow: bool @validator("reference") def abspath_as_str(cls, value): From d93231eca69813f6327dfda7ba20d67d24f17529 Mon Sep 17 00:00:00 2001 From: ashwini06 Date: Fri, 20 May 2022 18:21:23 +0200 Subject: [PATCH 5/8] fix multiqc and qc_metrics conditional for umi --- BALSAMIC/snakemake_rules/quality_control/multiqc.rule | 2 +- BALSAMIC/snakemake_rules/quality_control/qc_metrics.rule | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/BALSAMIC/snakemake_rules/quality_control/multiqc.rule b/BALSAMIC/snakemake_rules/quality_control/multiqc.rule index aa1605095..2724303ed 100644 --- a/BALSAMIC/snakemake_rules/quality_control/multiqc.rule +++ b/BALSAMIC/snakemake_rules/quality_control/multiqc.rule @@ -55,7 +55,7 @@ else: # samtools metrics multiqc_input.extend(expand(bam_dir + "{sample}.samtools.{stats}.txt", sample=config["samples"], stats=['flagstats', 'idxstats', 'stats'])) - if config["umiworkflow"]: + if config["analysis"]["analysis_workflow"]=="balsamic-umi": # UMI picard metrics multiqc_input.extend(expand(umi_qc_dir + "{sample}.umi.collect_hsmetric", sample=config["samples"])) multiqc_input.extend(expand(umi_qc_dir + "{sample}.umi.metrics", sample=config["samples"])) diff --git a/BALSAMIC/snakemake_rules/quality_control/qc_metrics.rule b/BALSAMIC/snakemake_rules/quality_control/qc_metrics.rule index b5ac0851d..68798bfc2 100644 --- a/BALSAMIC/snakemake_rules/quality_control/qc_metrics.rule +++ b/BALSAMIC/snakemake_rules/quality_control/qc_metrics.rule @@ -9,7 +9,7 @@ if config["analysis"]["sequencing_type"] == 'wgs': else: bcftools_counts_input.append(vep_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vardict.all.filtered.pass.stats") - if config["umiworkflow"]: + if config["analysis"]["analysis_workflow"]=="balsamic-umi": # bcftools counts bcftools_counts_input.append(vep_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".TNscope_umi.all.filtered.pass.stats") From 06dab1995cdf7c279b14de7bdcad282abc952360 Mon Sep 17 00:00:00 2001 From: ashwini06 Date: Fri, 20 May 2022 18:21:41 +0200 Subject: [PATCH 6/8] changes to balsamic workflow --- BALSAMIC/workflows/balsamic.smk | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/BALSAMIC/workflows/balsamic.smk b/BALSAMIC/workflows/balsamic.smk index a36e91bff..54f40d42f 100644 --- a/BALSAMIC/workflows/balsamic.smk +++ b/BALSAMIC/workflows/balsamic.smk @@ -213,9 +213,6 @@ if "disable_variant_caller" in config: if var_caller in germline_caller: germline_caller.remove(var_caller) -LOG.info(f"The following Germline variant callers will be included in the workflow: {germline_caller}") -LOG.info(f"The following somatic variant callers will be included in the workflow: {somatic_caller}") - rules_to_include = [] analysis_type = config['analysis']["analysis_type"] sequence_type = config['analysis']["sequencing_type"] @@ -225,7 +222,16 @@ for sub,value in SNAKEMAKE_RULES.items(): for module_name,module_rules in value.items(): rules_to_include.extend(module_rules) +if config["analysis"]["analysis_workflow"] == "balsamic": + rules_to_include = [rule for rule in rules_to_include if "umi" not in rule] + somatic_caller = [var_caller for var_caller in somatic_caller if "umi" not in var_caller] + somatic_caller_tmb = [var_caller for var_caller in somatic_caller_tmb if "umi" not in var_caller] + + LOG.info(f"The following rules will be included in the workflow: {rules_to_include}") +LOG.info(f"The following Germline variant callers will be included in the workflow: {germline_caller}") +LOG.info(f"The following somatic variant callers will be included in the workflow: {somatic_caller}") + for r in rules_to_include: include: Path(RULE_DIRECTORY, r).as_posix() @@ -277,10 +283,11 @@ if config["analysis"]["sequencing_type"] != "wgs": expand(vep_dir + "{vcf}.all.filtered.pass.ranked.vcf.gz", vcf=get_vcf(config, ["vardict"], [case_id])) ) # UMI - analysis_specific_results.extend(expand(umi_qc_dir + "{sample}.umi.mean_family_depth",sample=config["samples"])) - if background_variant_file: - analysis_specific_results.extend( - expand(umi_qc_dir + "{case_name}.{var_caller}.AFtable.txt", case_name=case_id, var_caller=["TNscope_umi"]) + if config["analysis"]["analysis_workflow"]=="balsamic-umi": + analysis_specific_results.extend(expand(umi_qc_dir + "{sample}.umi.mean_family_depth",sample=config["samples"])) + if background_variant_file: + analysis_specific_results.extend( + expand(umi_qc_dir + "{case_name}.{var_caller}.AFtable.txt", case_name=case_id, var_caller=["TNscope_umi"]) ) # AscatNgs From 88ca6e590270e02c9935718d8b36e366baf8d942 Mon Sep 17 00:00:00 2001 From: ashwini06 Date: Fri, 20 May 2022 18:22:19 +0200 Subject: [PATCH 7/8] update test models --- tests/utils/test_models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/utils/test_models.py b/tests/utils/test_models.py index aa056e3f6..a967797e4 100644 --- a/tests/utils/test_models.py +++ b/tests/utils/test_models.py @@ -261,7 +261,7 @@ def test_analysis_model(): "analysis_type": "paired", "sequencing_type": "targeted", "analysis_dir": "tests/test_data", - "umiworkflow": "true", + "analysis_workflow": "balsamic-umi", } # THEN we can successully create a config dict assert AnalysisModel.parse_obj(valid_args) @@ -272,6 +272,7 @@ def test_analysis_model(): "analysis_type": "odd", "sequencing_type": "wrong", "analysis_dir": "tests/test_data", + "analysis_workflow": "umi", } # THEN should trigger ValueError with pytest.raises(ValueError) as excinfo: From 458b022d13facfde85e8b8b17b41b7ecbb5f02e0 Mon Sep 17 00:00:00 2001 From: ashwini06 Date: Fri, 20 May 2022 18:22:32 +0200 Subject: [PATCH 8/8] update changelog --- CHANGELOG.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d1ae531f8..657e4d57a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,17 @@ +[X.X.X] +------- + +Added: +^^^^^^ + +* New option `analysis-workflow` to balsamic config case CLI https://github.com/Clinical-Genomics/BALSAMIC/pull/932 + + +Changed: +^^^^^^^^ +* UMI-workflow for panel cases to be run only with `balsamic-umi` flag https://github.com/Clinical-Genomics/BALSAMIC/issues/896 + + [9.0.1] -------