From 8d348cf7f3a4f597e34736d047e6dd73250220ad Mon Sep 17 00:00:00 2001 From: Mathias Johansson <math.joh.bio@gmail.com> Date: Wed, 1 Nov 2023 15:11:15 +0100 Subject: [PATCH 01/10] add rule sleep before starting analysis --- BALSAMIC/constants/rules.py | 1 + BALSAMIC/constants/workflow_params.py | 2 ++ BALSAMIC/snakemake_rules/misc/sleep.rule | 15 +++++++++++++++ .../quality_control/fastp_tga.rule | 1 + .../quality_control/fastp_wgs.rule | 1 + .../snakemake_rules/quality_control/fastqc.rule | 1 + BALSAMIC/workflows/PON.smk | 6 +++++- BALSAMIC/workflows/QC.smk | 6 ++++-- BALSAMIC/workflows/balsamic.smk | 4 +++- CHANGELOG.rst | 2 ++ 10 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 BALSAMIC/snakemake_rules/misc/sleep.rule diff --git a/BALSAMIC/constants/rules.py b/BALSAMIC/constants/rules.py index a9d440b9c..2b96cf2d6 100644 --- a/BALSAMIC/constants/rules.py +++ b/BALSAMIC/constants/rules.py @@ -30,6 +30,7 @@ SNAKEMAKE_RULES: Dict[str, Dict[str, list]] = { "common": { + "misc": ["snakemake_rules/misc/sleep.rule"], "qc": [ "snakemake_rules/quality_control/fastqc.rule", "snakemake_rules/quality_control/multiqc.rule", diff --git a/BALSAMIC/constants/workflow_params.py b/BALSAMIC/constants/workflow_params.py index 3c6d46614..fcf822b54 100644 --- a/BALSAMIC/constants/workflow_params.py +++ b/BALSAMIC/constants/workflow_params.py @@ -108,6 +108,8 @@ }, } +SLEEP_BEFORE_START = 120 + WORKFLOW_PARAMS = { "common": { "pcr_model": "NONE", diff --git a/BALSAMIC/snakemake_rules/misc/sleep.rule b/BALSAMIC/snakemake_rules/misc/sleep.rule new file mode 100644 index 000000000..c97deb58d --- /dev/null +++ b/BALSAMIC/snakemake_rules/misc/sleep.rule @@ -0,0 +1,15 @@ + +rule sleep_before_start: + """Wait 120s before starting any processing to avoid key_error issue.""" + output: + wake_up = result_dir + "start_analysis" + params: + sleep_seconds = seconds_before_start + threads: get_threads(cluster_config, "sleep_before_start") + message: + "Sleeping for {params.sleep_seconds} seconds before starting analysis." + shell: + """ +sleep {params.sleep_seconds} +echo "Waited: {params.sleep_seconds} seconds. Now starting analysis." >> {output.wake_up} + """ diff --git a/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule b/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule index a9abae72f..841eca850 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule @@ -3,6 +3,7 @@ rule fastp_umi_trim: """Fastq TGA data pre-processing to remove UMIs.""" input: + wake_up = result_dir + "start_analysis", fastq_r1 = lambda wildcards: config_model.get_fastq_by_fastq_pattern(wildcards.fastq_pattern, FastqName.FWD), fastq_r2 = lambda wildcards: config_model.get_fastq_by_fastq_pattern(wildcards.fastq_pattern, FastqName.REV) output: diff --git a/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule b/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule index 743d50db3..0b4bba3dc 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule @@ -4,6 +4,7 @@ rule fastp_quality_trim_wgs: """Fastq data pre-processing for WGS.""" input: + wake_up = result_dir + "start_analysis", fastq_r1 = lambda wildcards: config_model.get_fastq_by_fastq_pattern(wildcards.fastq_pattern, FastqName.FWD), fastq_r2 = lambda wildcards: config_model.get_fastq_by_fastq_pattern(wildcards.fastq_pattern, FastqName.REV) output: diff --git a/BALSAMIC/snakemake_rules/quality_control/fastqc.rule b/BALSAMIC/snakemake_rules/quality_control/fastqc.rule index 493a892fd..4d1d895f5 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastqc.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastqc.rule @@ -4,6 +4,7 @@ rule fastqc: """Perform quality control checks on raw sequence data.""" input: + wake_up = result_dir + "start_analysis", fastq = input_fastq_dir + "{fastq_file_names}.fastq.gz" output: fastqc_zip = fastqc_dir + "{fastq_file_names}_fastqc.zip" diff --git a/BALSAMIC/workflows/PON.smk b/BALSAMIC/workflows/PON.smk index 134e5ff06..092029cb8 100644 --- a/BALSAMIC/workflows/PON.smk +++ b/BALSAMIC/workflows/PON.smk @@ -15,7 +15,7 @@ from BALSAMIC.constants.paths import BALSAMIC_DIR from BALSAMIC.constants.analysis import FastqName, SampleType, SequencingType, PONWorkflow, Gender from BALSAMIC.utils.io import write_finish_file from BALSAMIC.utils.rule import get_fastp_parameters, get_threads, get_result_dir -from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS +from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS, SLEEP_BEFORE_START from BALSAMIC.models.analysis import BalsamicWorkflowConfig, ConfigModel @@ -50,6 +50,9 @@ bam_dir: str = Path(result_dir, "bam", "").as_posix() + "/" cnv_dir: str = Path(result_dir, "cnv", "").as_posix() + "/" qc_dir: str = Path(result_dir, "qc", "").as_posix() + "/" +# Pre run parameters +seconds_before_start: int = SLEEP_BEFORE_START + # PON setting pon_workflow: PONWorkflow = config_model.analysis.pon_workflow @@ -83,6 +86,7 @@ if not Path(config["SENTIEON_EXEC"]).exists(): sequence_type = config['analysis']["sequencing_type"] rules_to_include = [] +rules_to_include.append("snakemake_rules/misc/sleep.rule") if sequence_type == SequencingType.TARGETED: rules_to_include.append("snakemake_rules/quality_control/fastp_tga.rule") else: diff --git a/BALSAMIC/workflows/QC.smk b/BALSAMIC/workflows/QC.smk index c8bbb140f..49bba14fb 100644 --- a/BALSAMIC/workflows/QC.smk +++ b/BALSAMIC/workflows/QC.smk @@ -24,7 +24,7 @@ from BALSAMIC.utils.rule import (get_fastp_parameters, get_rule_output, get_resu get_script_path, get_threads, get_sequencing_type, get_capture_kit) -from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS +from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS, SLEEP_BEFORE_START # Initialize ConfigModel config_model = ConfigModel.parse_obj(config) @@ -56,6 +56,8 @@ vcf_dir: str = Path(result_dir, "vcf").as_posix() + "/" qc_dir: str = Path(result_dir, "qc").as_posix() + "/" delivery_dir: str = Path(result_dir, "delivery").as_posix() + "/" +# Pre run parameters +seconds_before_start: int = SLEEP_BEFORE_START # Run information singularity_image: str = config_model.singularity['image'] @@ -112,7 +114,7 @@ sequence_type = config['analysis']["sequencing_type"] rules_to_include = [] for workflow_type, value in SNAKEMAKE_RULES.items(): if workflow_type in ["common", analysis_type + "_" + sequence_type]: - rules_to_include.extend(value.get("qc", []) + value.get("align", [])) + rules_to_include.extend(value.get("misc", []) + value.get("qc", []) + value.get("align", [])) rules_to_include = [rule for rule in rules_to_include if "umi" not in rule] if "snakemake_rules/quality_control/report.rule" in rules_to_include: rules_to_include = [rule for rule in rules_to_include if "quality_control/report.rule" not in rule] diff --git a/BALSAMIC/workflows/balsamic.smk b/BALSAMIC/workflows/balsamic.smk index eb7f4f4fc..8555b19c6 100644 --- a/BALSAMIC/workflows/balsamic.smk +++ b/BALSAMIC/workflows/balsamic.smk @@ -34,7 +34,7 @@ from BALSAMIC.utils.rule import (get_fastp_parameters, get_variant_callers, get_ from BALSAMIC.constants.analysis import MutationType, FastqName, SampleType from BALSAMIC.constants.variant_filters import (COMMON_SETTINGS, VARDICT_SETTINGS, SENTIEON_VARCALL_SETTINGS, SVDB_FILTER_SETTINGS) -from BALSAMIC.constants.workflow_params import (WORKFLOW_PARAMS, VARCALL_PARAMS) +from BALSAMIC.constants.workflow_params import (WORKFLOW_PARAMS, VARCALL_PARAMS, SLEEP_BEFORE_START) from BALSAMIC.constants.rules import SNAKEMAKE_RULES # Initialize ConfigModel @@ -71,6 +71,8 @@ delivery_dir: str = Path(result_dir, "delivery").as_posix() + "/" umi_dir: str = Path(result_dir, "umi").as_posix() + "/" umi_qc_dir: str = Path(qc_dir, "umi_qc").as_posix() + "/" +# Pre run parameters +seconds_before_start: int = SLEEP_BEFORE_START # Annotations research_annotations = [] diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4a249a2c7..31c764fe0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -29,6 +29,8 @@ Added: * CNVs from PureCN to TGA workflow https://Clinical-Genomics/BALSAMIC/pull/1278 * CNVkit Panel of Normal for gmsmyeloid_5.3 to TGA workflow https://Clinical-Genomics/BALSAMIC/pull/1278 * Command-line arguments and rules for creation of GENS files https://github.com/Clinical-Genomics/BALSAMIC/pull/1279 +* Sleep rule before start to fix key_error https://github.com/Clinical-Genomics/BALSAMIC/pull/1310 + Changed: From f18446069d1a0cfe5200c753053c44b9c1716791 Mon Sep 17 00:00:00 2001 From: Mathias Johansson <math.joh.bio@gmail.com> Date: Thu, 4 Jan 2024 15:39:51 +0100 Subject: [PATCH 02/10] increase time to 200 sec --- BALSAMIC/constants/workflow_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BALSAMIC/constants/workflow_params.py b/BALSAMIC/constants/workflow_params.py index 1d3988708..bfe5d26d4 100644 --- a/BALSAMIC/constants/workflow_params.py +++ b/BALSAMIC/constants/workflow_params.py @@ -108,7 +108,7 @@ }, } -SLEEP_BEFORE_START = 120 +SLEEP_BEFORE_START = 200 WORKFLOW_PARAMS = { "common": { From 3ac67a5ab1ceb7039ea716d02816b940722d8140 Mon Sep 17 00:00:00 2001 From: Mathias Johansson <math.joh.bio@gmail.com> Date: Thu, 4 Jan 2024 15:50:36 +0100 Subject: [PATCH 03/10] add sleep requirement to concatenate umi --- BALSAMIC/snakemake_rules/umi/concatenation_umi.rule | 1 + 1 file changed, 1 insertion(+) diff --git a/BALSAMIC/snakemake_rules/umi/concatenation_umi.rule b/BALSAMIC/snakemake_rules/umi/concatenation_umi.rule index 23de2c483..4f502e6dd 100644 --- a/BALSAMIC/snakemake_rules/umi/concatenation_umi.rule +++ b/BALSAMIC/snakemake_rules/umi/concatenation_umi.rule @@ -4,6 +4,7 @@ rule concatenate_umi_reads: input: + wake_up = result_dir + "start_analysis", fastqs_fwd=lambda wildcards: config_model.get_all_fastqs_for_sample( sample_name=wildcards.sample, fastq_types=[FastqName.FWD] ), From 11064da6985ba0784e05b1d556320ae1b27c5b73 Mon Sep 17 00:00:00 2001 From: Mathias Johansson <math.joh.bio@gmail.com> Date: Mon, 15 Jan 2024 14:31:52 +0100 Subject: [PATCH 04/10] fix qc workflow --- BALSAMIC/workflows/QC.smk | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/BALSAMIC/workflows/QC.smk b/BALSAMIC/workflows/QC.smk index df88e3b5b..6874ecf8b 100644 --- a/BALSAMIC/workflows/QC.smk +++ b/BALSAMIC/workflows/QC.smk @@ -57,12 +57,9 @@ vcf_dir: str = Path(result_dir, "vcf").as_posix() + "/" qc_dir: str = Path(result_dir, "qc").as_posix() + "/" delivery_dir: str = Path(result_dir, "delivery").as_posix() + "/" -<<<<<<< HEAD # Pre run parameters seconds_before_start: int = SLEEP_BEFORE_START -======= ->>>>>>> release_v13.0.0 # Run information singularity_image: str = config_model.singularity['image'] sample_names: List[str] = config_model.get_all_sample_names() @@ -118,15 +115,9 @@ sequence_type = config['analysis']["sequencing_type"] rules_to_include = [] for workflow_type, value in SNAKEMAKE_RULES.items(): if workflow_type in ["common", analysis_type + "_" + sequence_type]: -<<<<<<< HEAD - rules_to_include.extend(value.get("misc", []) + value.get("qc", []) + value.get("align", [])) -rules_to_include = [rule for rule in rules_to_include if "umi" not in rule] -if "snakemake_rules/quality_control/report.rule" in rules_to_include: - rules_to_include = [rule for rule in rules_to_include if "quality_control/report.rule" not in rule] -======= - rules_to_include.extend(value.get("qc", []) + value.get("align", [])) + rules_to_include.extend(value.get("qc", []) + value.get("align", []) + value.get("misc", [])) rules_to_include = [rule for rule in rules_to_include if "umi" not in rule and "report" not in rule] ->>>>>>> release_v13.0.0 + # Somalier only implemented for hg38 and hg19 if "canfam3" in config["reference"]["reference_genome"]: From 0c200e06324f7b376911ae37ac51b4d50a170d3c Mon Sep 17 00:00:00 2001 From: Mathias Johansson <math.joh.bio@gmail.com> Date: Mon, 22 Jan 2024 11:38:04 +0100 Subject: [PATCH 05/10] increase to 5 mins --- BALSAMIC/constants/workflow_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BALSAMIC/constants/workflow_params.py b/BALSAMIC/constants/workflow_params.py index bfe5d26d4..2c7f0b04a 100644 --- a/BALSAMIC/constants/workflow_params.py +++ b/BALSAMIC/constants/workflow_params.py @@ -108,7 +108,7 @@ }, } -SLEEP_BEFORE_START = 200 +SLEEP_BEFORE_START = 300 WORKFLOW_PARAMS = { "common": { From 105439450112f64dfa9293ad73b5d370744d4df6 Mon Sep 17 00:00:00 2001 From: Mathias Johansson <math.joh.bio@gmail.com> Date: Mon, 22 Jan 2024 11:41:47 +0100 Subject: [PATCH 06/10] changelog --- CHANGELOG.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 49f536249..7f81f168f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,11 @@ +[X.X.X] +------- + +Added: +^^^^^^ +* Sleep rule before start to fix key_error https://github.com/Clinical-Genomics/BALSAMIC/pull/1310 + + [13.0.0] ------- @@ -34,7 +42,6 @@ Added: * CNV report for TGA workflow https://github.com/Clinical-Genomics/BALSAMIC/pull/1339 * `wkhtmltopdf` to system requirements https://github.com/Clinical-Genomics/BALSAMIC/pull/1339 * Store WGS CNV report plots https://github.com/Clinical-Genomics/BALSAMIC/pull/1347 -* Sleep rule before start to fix key_error https://github.com/Clinical-Genomics/BALSAMIC/pull/1310 Changed: From 9e0a4bb9c82e19aba23c28d2133b9d1dfbe79bfb Mon Sep 17 00:00:00 2001 From: Mathias Johansson <math.joh.bio@gmail.com> Date: Mon, 22 Jan 2024 11:44:25 +0100 Subject: [PATCH 07/10] remove unnecessary variables in smks --- BALSAMIC/snakemake_rules/misc/sleep.rule | 2 +- BALSAMIC/workflows/PON.smk | 3 --- BALSAMIC/workflows/QC.smk | 3 --- BALSAMIC/workflows/balsamic.smk | 3 --- 4 files changed, 1 insertion(+), 10 deletions(-) diff --git a/BALSAMIC/snakemake_rules/misc/sleep.rule b/BALSAMIC/snakemake_rules/misc/sleep.rule index c97deb58d..ba960cdab 100644 --- a/BALSAMIC/snakemake_rules/misc/sleep.rule +++ b/BALSAMIC/snakemake_rules/misc/sleep.rule @@ -4,7 +4,7 @@ rule sleep_before_start: output: wake_up = result_dir + "start_analysis" params: - sleep_seconds = seconds_before_start + sleep_seconds = SLEEP_BEFORE_START threads: get_threads(cluster_config, "sleep_before_start") message: "Sleeping for {params.sleep_seconds} seconds before starting analysis." diff --git a/BALSAMIC/workflows/PON.smk b/BALSAMIC/workflows/PON.smk index d7f7827e1..63ae6472c 100644 --- a/BALSAMIC/workflows/PON.smk +++ b/BALSAMIC/workflows/PON.smk @@ -49,9 +49,6 @@ bam_dir: str = Path(result_dir, "bam", "").as_posix() + "/" cnv_dir: str = Path(result_dir, "cnv", "").as_posix() + "/" qc_dir: str = Path(result_dir, "qc", "").as_posix() + "/" -# Pre run parameters -seconds_before_start: int = SLEEP_BEFORE_START - # PON setting pon_workflow: PONWorkflow = config_model.analysis.pon_workflow diff --git a/BALSAMIC/workflows/QC.smk b/BALSAMIC/workflows/QC.smk index 6874ecf8b..b721be365 100644 --- a/BALSAMIC/workflows/QC.smk +++ b/BALSAMIC/workflows/QC.smk @@ -57,9 +57,6 @@ vcf_dir: str = Path(result_dir, "vcf").as_posix() + "/" qc_dir: str = Path(result_dir, "qc").as_posix() + "/" delivery_dir: str = Path(result_dir, "delivery").as_posix() + "/" -# Pre run parameters -seconds_before_start: int = SLEEP_BEFORE_START - # Run information singularity_image: str = config_model.singularity['image'] sample_names: List[str] = config_model.get_all_sample_names() diff --git a/BALSAMIC/workflows/balsamic.smk b/BALSAMIC/workflows/balsamic.smk index e6b315eaa..25a1d6aee 100644 --- a/BALSAMIC/workflows/balsamic.smk +++ b/BALSAMIC/workflows/balsamic.smk @@ -83,9 +83,6 @@ delivery_dir: str = Path(result_dir, "delivery").as_posix() + "/" umi_dir: str = Path(result_dir, "umi").as_posix() + "/" umi_qc_dir: str = Path(qc_dir, "umi_qc").as_posix() + "/" -# Pre run parameters -seconds_before_start: int = SLEEP_BEFORE_START - # Annotations research_annotations = [] clinical_annotations = [] From 259bbed2ed950d627e2d799c5f36cc00c6c0201d Mon Sep 17 00:00:00 2001 From: Mathias Johansson <math.joh.bio@gmail.com> Date: Mon, 22 Jan 2024 11:45:40 +0100 Subject: [PATCH 08/10] fix string --- BALSAMIC/snakemake_rules/misc/sleep.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BALSAMIC/snakemake_rules/misc/sleep.rule b/BALSAMIC/snakemake_rules/misc/sleep.rule index ba960cdab..ddd79784e 100644 --- a/BALSAMIC/snakemake_rules/misc/sleep.rule +++ b/BALSAMIC/snakemake_rules/misc/sleep.rule @@ -1,6 +1,6 @@ rule sleep_before_start: - """Wait 120s before starting any processing to avoid key_error issue.""" + """Wait the specified number of seconds before starting any processing to avoid key_error issue.""" output: wake_up = result_dir + "start_analysis" params: From be3cf2cac01e64d82838a824083a94e183dc18e4 Mon Sep 17 00:00:00 2001 From: Mathias Johansson <math.joh.bio@gmail.com> Date: Mon, 22 Jan 2024 12:18:05 +0100 Subject: [PATCH 09/10] changelog version bump --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7f81f168f..769753891 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,4 +1,4 @@ -[X.X.X] +[13.0.1] ------- Added: From 17e55fd0a18bafa216ea3194af788aa4a71554e0 Mon Sep 17 00:00:00 2001 From: Mathias Johansson <math.joh.bio@gmail.com> Date: Mon, 22 Jan 2024 13:29:29 +0100 Subject: [PATCH 10/10] fix pr link --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 769753891..f1a26ba75 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,7 +3,7 @@ Added: ^^^^^^ -* Sleep rule before start to fix key_error https://github.com/Clinical-Genomics/BALSAMIC/pull/1310 +* Sleep rule before start to fix key_error https://github.com/Clinical-Genomics/BALSAMIC/pull/1311 [13.0.0]