Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Option to support new analysis workflows #936

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions BALSAMIC/commands/config/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,6 @@
multiple=True,
help="Fastq files for normal sample.",
)
@click.option(
"--umiworkflow/--no-umiworkflow",
default=True,
show_default=True,
is_flag=True,
help="Enable running UMI workflow",
)
@click.option("--tumor-sample-name", help="Tumor sample name")
@click.option("--normal-sample-name", help="Normal sample name")
@click.option(
Expand All @@ -136,6 +129,19 @@
"will be <outdir>/genome_version"
),
)
@click.option(
"-w",
"--analysis-workflow",
default="balsamic",
show_default=True,
type=click.Choice(["balsamic", "balsamic-umi"]),
help=(
'Analysis workflow to run. By default: "balsamic" only '
"workflow will be running. If you want to run both "
"balsamic and UMI workflow together for panel data; "
'choose "balsamic-umi" option '
),
)
@click.pass_context
def case_config(
context,
Expand All @@ -150,12 +156,12 @@ def case_config(
analysis_dir,
tumor,
normal,
umiworkflow,
tumor_sample_name,
normal_sample_name,
genome_version,
balsamic_cache,
container_version,
analysis_workflow,
):

try:
Expand Down Expand Up @@ -190,6 +196,7 @@ def case_config(
"analysis_dir": analysis_dir,
"analysis_type": "paired" if normal else "single",
"sequencing_type": "targeted" if panel_bed else "wgs",
"analysis_workflow": analysis_workflow,
},
reference=reference_dict,
singularity=os.path.join(balsamic_cache, balsamic_version, "containers"),
Expand All @@ -207,7 +214,6 @@ def case_config(
}
if panel_bed
else None,
umiworkflow=umiworkflow if panel_bed else False,
).dict(by_alias=True, exclude_none=True)
LOG.info("Config file generated successfully")

Expand Down
1 change: 1 addition & 0 deletions BALSAMIC/commands/config/pon.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def pon_config(
"case_id": case_id,
"analysis_dir": analysis_dir,
"analysis_type": "pon",
"analysis_workflow": "balsamic",
"sequencing_type": "targeted" if panel_bed else "wgs",
},
reference=reference_dict,
Expand Down
2 changes: 1 addition & 1 deletion BALSAMIC/commands/config/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ def qc_config(
"case_id": case_id,
"analysis_dir": analysis_dir,
"analysis_type": "qc_panel",
"analysis_workflow": "balsamic",
"sequencing_type": "targeted" if panel_bed else "wgs",
},
reference=reference_dict,
Expand All @@ -181,7 +182,6 @@ def qc_config(
}
if panel_bed
else None,
umiworkflow=False,
).dict(by_alias=True, exclude_none=True)
LOG.info("QC config file generated successfully")

Expand Down
6 changes: 3 additions & 3 deletions BALSAMIC/constants/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@
)

# Analysis related constants
ANALYSIS_TYPES = ["paired", "single", "qc_panel", "pon"]
ANALYSIS_WORKFLOW = ["balsamic", "balsamic-umi"]
SEQUENCING_TYPE = ["wgs", "targeted"]
MUTATION_CLASS = ["somatic", "germline"]
MUTATION_TYPE = ["SNV", "SV", "CNV"]
ANALYSIS_TYPES = ["paired", "single", "qc_panel", "pon"]
WORKFLOW_SOLUTION = ["BALSAMIC", "Sentieon", "DRAGEN", "Sentieon_umi"]
SEQUENCING_TYPE = ["wgs", "targeted"]


# list of bioinfo tools for each conda env
VALID_CONTAINER_CONDA_NAME = {
Expand Down
2 changes: 1 addition & 1 deletion BALSAMIC/snakemake_rules/quality_control/multiqc.rule
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ else:
# samtools metrics
multiqc_input.extend(expand(bam_dir + "{sample}.samtools.{stats}.txt", sample=config["samples"], stats=['flagstats', 'idxstats', 'stats']))

if config["umiworkflow"]:
if config["analysis"]["analysis_workflow"]=="balsamic-umi":
# UMI picard metrics
multiqc_input.extend(expand(umi_qc_dir + "{sample}.umi.collect_hsmetric", sample=config["samples"]))
multiqc_input.extend(expand(umi_qc_dir + "{sample}.umi.metrics", sample=config["samples"]))
Expand Down
2 changes: 1 addition & 1 deletion BALSAMIC/snakemake_rules/quality_control/qc_metrics.rule
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ if config["analysis"]["sequencing_type"] == 'wgs':
else:
bcftools_counts_input.append(vep_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vardict.all.filtered.pass.stats")

if config["umiworkflow"]:
if config["analysis"]["analysis_workflow"]=="balsamic-umi":
# bcftools counts
bcftools_counts_input.append(vep_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".TNscope_umi.all.filtered.pass.stats")

Expand Down
19 changes: 16 additions & 3 deletions BALSAMIC/utils/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
BIOINFO_TOOL_ENV,
SEQUENCING_TYPE,
ANALYSIS_TYPES,
ANALYSIS_WORKFLOW,
WORKFLOW_SOLUTION,
MUTATION_CLASS,
MUTATION_TYPE,
Expand Down Expand Up @@ -208,7 +209,10 @@ class AnalysisModel(BaseModel):
sequencing_type : Field(required); string literal [targeted, wgs]
targeted : if capture kit was used to enrich specific genomic regions
wgs : if whole genome sequencing was performed
analysis_dir : Field(required); existing path where to save files
analysis_workflow: Field(required); string literal [balsamic, balsamic-umi]
balsamic: execute balsamic workflow
balsamic-umi: execute balsamic along with UMIworkflow for panels
analysis_dir : Field(required); existing path where to save files
fastq_path : Field(optional); Path where fastq files will be stored
script : Field(optional); Path where snakemake scripts will be stored
log : Field(optional); Path where logs will be saved
Expand All @@ -222,11 +226,13 @@ class AnalysisModel(BaseModel):
ValueError:
When analysis_type is set to any value other than [single, paired, pon, qc_panel]
When sequencing_type is set to any value other than [wgs, targeted]
When analysis_workflow is set to any other than [balsamic, balsamic-umi]
"""

case_id: str
analysis_type: str
sequencing_type: str
analysis_workflow: str
analysis_dir: DirectoryPath
fastq_path: Optional[DirectoryPath]
script: Optional[DirectoryPath]
Expand Down Expand Up @@ -258,6 +264,15 @@ def sequencing_type_literal(cls, value) -> str:
)
return value

@validator("analysis_workflow", check_fields=True)
def analysis_workflow_literal(cls, value) -> str:
balsamic_analysis_workflow = ANALYSIS_WORKFLOW
if value not in balsamic_analysis_workflow:
raise ValueError(
f"Provided analysis workflow ({value} not supported in BALSAMIC"
)
return value

@validator("analysis_dir")
def dirpath_always_abspath(cls, value) -> str:
return Path(value).resolve().as_posix()
Expand Down Expand Up @@ -426,7 +441,6 @@ class BalsamicConfigModel(BaseModel):
singularity : Field(Path); path to singularity container of BALSAMIC
background_variants: Field(Path(optional)); path to BACKGROUND VARIANTS for UMI
rule_directory : Field(Path(RULE_DIRECTORY)); path where snakemake rules can be found
umiworkflow : Field(bool); whether UMI workflow to run in parallel
"""

QC: QCModel
Expand All @@ -439,7 +453,6 @@ class BalsamicConfigModel(BaseModel):
bioinfo_tools: dict
bioinfo_tools_version: dict
panel: Optional[PanelModel]
umiworkflow: bool

@validator("reference")
def abspath_as_str(cls, value):
Expand Down
21 changes: 14 additions & 7 deletions BALSAMIC/workflows/balsamic.smk
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,6 @@ if "disable_variant_caller" in config:
if var_caller in germline_caller:
germline_caller.remove(var_caller)

LOG.info(f"The following Germline variant callers will be included in the workflow: {germline_caller}")
LOG.info(f"The following somatic variant callers will be included in the workflow: {somatic_caller}")

rules_to_include = []
analysis_type = config['analysis']["analysis_type"]
sequence_type = config['analysis']["sequencing_type"]
Expand All @@ -225,7 +222,16 @@ for sub,value in SNAKEMAKE_RULES.items():
for module_name,module_rules in value.items():
rules_to_include.extend(module_rules)

if config["analysis"]["analysis_workflow"] == "balsamic":
rules_to_include = [rule for rule in rules_to_include if "umi" not in rule]
somatic_caller = [var_caller for var_caller in somatic_caller if "umi" not in var_caller]
somatic_caller_tmb = [var_caller for var_caller in somatic_caller_tmb if "umi" not in var_caller]


LOG.info(f"The following rules will be included in the workflow: {rules_to_include}")
LOG.info(f"The following Germline variant callers will be included in the workflow: {germline_caller}")
LOG.info(f"The following somatic variant callers will be included in the workflow: {somatic_caller}")


for r in rules_to_include:
include: Path(RULE_DIRECTORY, r).as_posix()
Expand Down Expand Up @@ -277,10 +283,11 @@ if config["analysis"]["sequencing_type"] != "wgs":
expand(vep_dir + "{vcf}.all.filtered.pass.ranked.vcf.gz", vcf=get_vcf(config, ["vardict"], [case_id]))
)
# UMI
analysis_specific_results.extend(expand(umi_qc_dir + "{sample}.umi.mean_family_depth",sample=config["samples"]))
if background_variant_file:
analysis_specific_results.extend(
expand(umi_qc_dir + "{case_name}.{var_caller}.AFtable.txt", case_name=case_id, var_caller=["TNscope_umi"])
if config["analysis"]["analysis_workflow"]=="balsamic-umi":
analysis_specific_results.extend(expand(umi_qc_dir + "{sample}.umi.mean_family_depth",sample=config["samples"]))
if background_variant_file:
analysis_specific_results.extend(
expand(umi_qc_dir + "{case_name}.{var_caller}.AFtable.txt", case_name=case_id, var_caller=["TNscope_umi"])
)

# AscatNgs
Expand Down
14 changes: 14 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
[X.X.X]
-------

Added:
^^^^^^

* New option `analysis-workflow` to balsamic config case CLI https://github.com/Clinical-Genomics/BALSAMIC/pull/932


Changed:
^^^^^^^^
* UMI-workflow for panel cases to be run only with `balsamic-umi` flag https://github.com/Clinical-Genomics/BALSAMIC/issues/896


[9.0.1]
-------

Expand Down
3 changes: 2 additions & 1 deletion tests/utils/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def test_analysis_model():
"analysis_type": "paired",
"sequencing_type": "targeted",
"analysis_dir": "tests/test_data",
"umiworkflow": "true",
"analysis_workflow": "balsamic-umi",
}
# THEN we can successully create a config dict
assert AnalysisModel.parse_obj(valid_args)
Expand All @@ -272,6 +272,7 @@ def test_analysis_model():
"analysis_type": "odd",
"sequencing_type": "wrong",
"analysis_dir": "tests/test_data",
"analysis_workflow": "umi",
}
# THEN should trigger ValueError
with pytest.raises(ValueError) as excinfo:
Expand Down