diff --git a/BALSAMIC/constants/rules.py b/BALSAMIC/constants/rules.py index 9bb961e79..a6c9ffea7 100644 --- a/BALSAMIC/constants/rules.py +++ b/BALSAMIC/constants/rules.py @@ -30,6 +30,7 @@ SNAKEMAKE_RULES: Dict[str, Dict[str, list]] = { "common": { + "misc": ["snakemake_rules/misc/sleep.rule"], "qc": [ "snakemake_rules/quality_control/fastqc.rule", "snakemake_rules/quality_control/multiqc.rule", diff --git a/BALSAMIC/constants/workflow_params.py b/BALSAMIC/constants/workflow_params.py index d3a2486b6..2c7f0b04a 100644 --- a/BALSAMIC/constants/workflow_params.py +++ b/BALSAMIC/constants/workflow_params.py @@ -108,6 +108,8 @@ }, } +SLEEP_BEFORE_START = 300 + WORKFLOW_PARAMS = { "common": { "pcr_model": "NONE", diff --git a/BALSAMIC/snakemake_rules/misc/sleep.rule b/BALSAMIC/snakemake_rules/misc/sleep.rule new file mode 100644 index 000000000..ddd79784e --- /dev/null +++ b/BALSAMIC/snakemake_rules/misc/sleep.rule @@ -0,0 +1,15 @@ + +rule sleep_before_start: + """Wait the specified number of seconds before starting any processing to avoid key_error issue.""" + output: + wake_up = result_dir + "start_analysis" + params: + sleep_seconds = SLEEP_BEFORE_START + threads: get_threads(cluster_config, "sleep_before_start") + message: + "Sleeping for {params.sleep_seconds} seconds before starting analysis." + shell: + """ +sleep {params.sleep_seconds} +echo "Waited: {params.sleep_seconds} seconds. Now starting analysis." >> {output.wake_up} + """ diff --git a/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule b/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule index a9abae72f..841eca850 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule @@ -3,6 +3,7 @@ rule fastp_umi_trim: """Fastq TGA data pre-processing to remove UMIs.""" input: + wake_up = result_dir + "start_analysis", fastq_r1 = lambda wildcards: config_model.get_fastq_by_fastq_pattern(wildcards.fastq_pattern, FastqName.FWD), fastq_r2 = lambda wildcards: config_model.get_fastq_by_fastq_pattern(wildcards.fastq_pattern, FastqName.REV) output: diff --git a/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule b/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule index 743d50db3..0b4bba3dc 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule @@ -4,6 +4,7 @@ rule fastp_quality_trim_wgs: """Fastq data pre-processing for WGS.""" input: + wake_up = result_dir + "start_analysis", fastq_r1 = lambda wildcards: config_model.get_fastq_by_fastq_pattern(wildcards.fastq_pattern, FastqName.FWD), fastq_r2 = lambda wildcards: config_model.get_fastq_by_fastq_pattern(wildcards.fastq_pattern, FastqName.REV) output: diff --git a/BALSAMIC/snakemake_rules/quality_control/fastqc.rule b/BALSAMIC/snakemake_rules/quality_control/fastqc.rule index 493a892fd..4d1d895f5 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastqc.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastqc.rule @@ -4,6 +4,7 @@ rule fastqc: """Perform quality control checks on raw sequence data.""" input: + wake_up = result_dir + "start_analysis", fastq = input_fastq_dir + "{fastq_file_names}.fastq.gz" output: fastqc_zip = fastqc_dir + "{fastq_file_names}_fastqc.zip" diff --git a/BALSAMIC/snakemake_rules/umi/concatenation_umi.rule b/BALSAMIC/snakemake_rules/umi/concatenation_umi.rule index 23de2c483..4f502e6dd 100644 --- a/BALSAMIC/snakemake_rules/umi/concatenation_umi.rule +++ b/BALSAMIC/snakemake_rules/umi/concatenation_umi.rule @@ -4,6 +4,7 @@ rule concatenate_umi_reads: input: + wake_up = result_dir + "start_analysis", fastqs_fwd=lambda wildcards: config_model.get_all_fastqs_for_sample( sample_name=wildcards.sample, fastq_types=[FastqName.FWD] ), diff --git a/BALSAMIC/workflows/PON.smk b/BALSAMIC/workflows/PON.smk index 0ed589067..63ae6472c 100644 --- a/BALSAMIC/workflows/PON.smk +++ b/BALSAMIC/workflows/PON.smk @@ -10,13 +10,14 @@ from typing import Dict, List from BALSAMIC.constants.analysis import FastqName, Gender, PONWorkflow, SampleType, SequencingType from BALSAMIC.constants.paths import BALSAMIC_DIR -from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS +from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS, SLEEP_BEFORE_START from BALSAMIC.models.config import ConfigModel from BALSAMIC.models.params import BalsamicWorkflowConfig from BALSAMIC.utils.exc import BalsamicError from BALSAMIC.utils.io import write_finish_file from BALSAMIC.utils.rule import get_fastp_parameters, get_result_dir, get_threads + # Initialize ConfigModel config_model = ConfigModel.model_validate(config) @@ -81,6 +82,7 @@ if not Path(config["SENTIEON_EXEC"]).exists(): sequence_type = config['analysis']["sequencing_type"] rules_to_include = [] +rules_to_include.append("snakemake_rules/misc/sleep.rule") if sequence_type == SequencingType.TARGETED: rules_to_include.append("snakemake_rules/quality_control/fastp_tga.rule") else: diff --git a/BALSAMIC/workflows/QC.smk b/BALSAMIC/workflows/QC.smk index 997d55782..b721be365 100644 --- a/BALSAMIC/workflows/QC.smk +++ b/BALSAMIC/workflows/QC.smk @@ -9,7 +9,7 @@ from typing import Dict, List from BALSAMIC.constants.analysis import AnalysisType, FastqName, SampleType from BALSAMIC.constants.paths import BALSAMIC_DIR from BALSAMIC.constants.rules import SNAKEMAKE_RULES -from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS +from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS, SLEEP_BEFORE_START from BALSAMIC.models.config import ConfigModel from BALSAMIC.models.params import BalsamicWorkflowConfig from BALSAMIC.utils.cli import check_executable, generate_h5 @@ -112,9 +112,10 @@ sequence_type = config['analysis']["sequencing_type"] rules_to_include = [] for workflow_type, value in SNAKEMAKE_RULES.items(): if workflow_type in ["common", analysis_type + "_" + sequence_type]: - rules_to_include.extend(value.get("qc", []) + value.get("align", [])) + rules_to_include.extend(value.get("qc", []) + value.get("align", []) + value.get("misc", [])) rules_to_include = [rule for rule in rules_to_include if "umi" not in rule and "report" not in rule] + # Somalier only implemented for hg38 and hg19 if "canfam3" in config["reference"]["reference_genome"]: rules_to_include.remove("snakemake_rules/quality_control/somalier.rule") diff --git a/BALSAMIC/workflows/balsamic.smk b/BALSAMIC/workflows/balsamic.smk index 8a3cd272a..25a1d6aee 100644 --- a/BALSAMIC/workflows/balsamic.smk +++ b/BALSAMIC/workflows/balsamic.smk @@ -18,7 +18,7 @@ from BALSAMIC.constants.variant_filters import ( SVDB_FILTER_SETTINGS, VARDICT_SETTINGS, ) -from BALSAMIC.constants.workflow_params import VARCALL_PARAMS, WORKFLOW_PARAMS +from BALSAMIC.constants.workflow_params import VARCALL_PARAMS, WORKFLOW_PARAMS, SLEEP_BEFORE_START from BALSAMIC.models.config import ConfigModel from BALSAMIC.models.params import BalsamicWorkflowConfig, VarCallerFilter from BALSAMIC.utils.cli import check_executable, generate_h5 @@ -83,7 +83,6 @@ delivery_dir: str = Path(result_dir, "delivery").as_posix() + "/" umi_dir: str = Path(result_dir, "umi").as_posix() + "/" umi_qc_dir: str = Path(qc_dir, "umi_qc").as_posix() + "/" - # Annotations research_annotations = [] clinical_annotations = [] diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f5e05d067..f1a26ba75 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,11 @@ +[13.0.1] +------- + +Added: +^^^^^^ +* Sleep rule before start to fix key_error https://github.com/Clinical-Genomics/BALSAMIC/pull/1311 + + [13.0.0] ------- @@ -35,6 +43,7 @@ Added: * `wkhtmltopdf` to system requirements https://github.com/Clinical-Genomics/BALSAMIC/pull/1339 * Store WGS CNV report plots https://github.com/Clinical-Genomics/BALSAMIC/pull/1347 + Changed: ^^^^^^^^ * Changed CN header field in cnvpytor in cnvpytor_tumor_only to be Float instead of Integer https://github.com/Clinical-Genomics/BALSAMIC/pull/1182