From 2b019107cd051d7367fd5cd722af5422dd4fa88c Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Wed, 20 Apr 2022 20:39:32 +0200 Subject: [PATCH 01/19] fix: correct check for trimmed reads --- workflow/rules/common.smk | 3 ++- workflow/scripts/bwa-mem2_wrapper.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 8f2eb67..d14762d 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -215,6 +215,7 @@ def get_mapping_input(wildcards): elif trimming_algorithm.lower() == "trimmomatic": mapping_input["reads"] = ["results/{ID}/trimmed/trimmomatic/{SAMPLE}.1.fastq.gz", "results/{ID}/trimmed/trimmomatic/{SAMPLE}.2.fastq.gz",] if all_data: - mapping_input["non_merged"] = ["results/{ID}/trimmed/trimmomatic/{SAMPLE}.1.unpaired.fastq.gz","results/{ID}/trimmed/trimmomatic/{SAMPLE}.2.unpaired.fastq.gz"] + mapping_input["noadapter_R1"] = "results/{ID}/trimmed/trimmomatic/{SAMPLE}.1.unpaired.fastq.gz" + mapping_input["noadapter_R2"] = "results/{ID}/trimmed/trimmomatic/{SAMPLE}.2.unpaired.fastq.gz" return mapping_input diff --git a/workflow/scripts/bwa-mem2_wrapper.py b/workflow/scripts/bwa-mem2_wrapper.py index 490d465..35ca2c7 100644 --- a/workflow/scripts/bwa-mem2_wrapper.py +++ b/workflow/scripts/bwa-mem2_wrapper.py @@ -8,6 +8,7 @@ from os import path +import sys from wsgiref.handlers import BaseCGIHandler from snakemake.shell import shell @@ -28,7 +29,7 @@ non_merged_cmd = "" singleton_cmd = "" -if snakemake.input.get("non_merged"): +if snakemake.input.get("noadapter_R1") and snakemake.input.get("noadapter_R2"): non_merged_cmd = "bwa-mem2 mem -t {snakemake.threads} -R \"{snakemake.params.RG}\" {snakemake.input.ref} {snakemake.input.noadapter_R1} {snakemake.input.noadapter_R2}| grep -v \"^@\" || true ; " if snakemake.input.get("single_reads"): @@ -39,6 +40,7 @@ {singleton_cmd}\ ) | samtools view -b -o {{snakemake.output.mapped_reads}} - ) 2>{{snakemake.log}}" +print(f"Executing command:\n {base_cmd}", file=sys.stderr) shell(base_cmd) From dbd5d274a75f761211095c69e80c0a90792a5934 Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Tue, 28 Jun 2022 11:25:59 +0200 Subject: [PATCH 02/19] initial commit --- .editorconfig | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..b94edaa --- /dev/null +++ b/.editorconfig @@ -0,0 +1,32 @@ +# EditorConfig is awesome: https://EditorConfig.org + +# top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file +[*] +end_of_line = lf +insert_final_newline = true + +# Matches multiple files with brace expansion notation +# Set default charset +[*.{js,py}] +charset = utf-8 + +# 4 space indentation +[*.py] +indent_style = space +indent_size = 4 + +# Tab indentation (no size specified) +[Makefile] +indent_style = tab + +# Indentation override for .tsv files +[*.tsv] +indent_style = tab + +# Matches the exact files either package.json or .travis.yml +[{package.json,.travis.yml}] +indent_style = space +indent_size = 2 From 6ad926e5576ab540c8e16aef11610eecfe6318ed Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Tue, 28 Jun 2022 11:31:33 +0200 Subject: [PATCH 03/19] feat: improved thread/core handling; minor refactoring --- workflow/rules/bam_to_fastq.smk | 6 +++--- workflow/rules/mapping.smk | 6 +++--- workflow/rules/reference.smk | 5 +---- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/workflow/rules/bam_to_fastq.smk b/workflow/rules/bam_to_fastq.smk index 574353e..56403fd 100644 --- a/workflow/rules/bam_to_fastq.smk +++ b/workflow/rules/bam_to_fastq.smk @@ -19,9 +19,9 @@ rule bam_to_fastq: shell: """set +o pipefail; ( - samtools sort -T {params.TMPDIR} -n -@ {threads} {input.bam} | \ - samtools fastq -@ {threads} -t \ + samtools sort -T {params.TMPDIR} -n -@ $(({threads}/2)) {input.bam} | \ + samtools fastq -@ $(({threads}/2)) -t \ -1 {output.r1} \ -2 {output.r2} \ -0 {output.s1} \ - )2> {log}""" \ No newline at end of file + )2> {log}""" diff --git a/workflow/rules/mapping.smk b/workflow/rules/mapping.smk index 7c8cf5f..9784d41 100644 --- a/workflow/rules/mapping.smk +++ b/workflow/rules/mapping.smk @@ -27,9 +27,9 @@ rule mark_duplicates: threads: 64 shell: #"set +o pipefail;" - "(samtools fixmate -u -@ {threads} -m {input.mapped_reads} - | " - "samtools sort -u -@ {threads} -T {params.TMPDIR} - | " - "samtools markdup -@ {threads} - {output.processed_reads}) 2>{log}" + "(samtools fixmate -u -@ $(({threads}/3)) -m {input.mapped_reads} - | " + "samtools sort -u -@ $(({threads}/3)) -T {params.TMPDIR} - | " + "samtools markdup -@ $(({threads}/3)) - {output.processed_reads}) 2>{log}" rule index_bam: input: diff --git a/workflow/rules/reference.smk b/workflow/rules/reference.smk index 52a17b7..de047ea 100644 --- a/workflow/rules/reference.smk +++ b/workflow/rules/reference.smk @@ -1,7 +1,4 @@ -#! wrapper rules können auch anders benannte inputs und outputs haben -> output based on configs! - - rule get_reference: output: "resources/reference/{GENOME}.fa", @@ -50,4 +47,4 @@ rule get_trimmomatic_adapters: log: "logs/get_trimmomatic_adapter.log" shell: - "wget -P {params.prefix} {params.URLs} -o {log}" \ No newline at end of file + "wget -P {params.prefix} {params.URLs} -o {log}" From 5aa85618ab35e6bcecbf24fe9610da497d373eff Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Tue, 28 Jun 2022 11:33:47 +0200 Subject: [PATCH 04/19] feat: generalization of bam indexing rule --- workflow/rules/mapping.smk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workflow/rules/mapping.smk b/workflow/rules/mapping.smk index 9784d41..3c383ff 100644 --- a/workflow/rules/mapping.smk +++ b/workflow/rules/mapping.smk @@ -33,11 +33,11 @@ rule mark_duplicates: rule index_bam: input: - "results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam" + "{path}.bam" output: - "results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai" - log:"results/logs/{ID}/index_bam/{SAMPLE}.{GENOME}.log", + "{path}.bam.bai" conda: "../envs/cfDNA_prep.yaml" threads: 32 shell: - "samtools index -@ {threads} {input} {output} 2> {log}" + "samtools index -@ {threads} {input} {output}" + From a8985f585fcb11eb065e8587714031919bba3531 Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Thu, 11 Aug 2022 12:06:40 +0200 Subject: [PATCH 05/19] initial commit --- workflow/envs/icorCNA.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 workflow/envs/icorCNA.yaml diff --git a/workflow/envs/icorCNA.yaml b/workflow/envs/icorCNA.yaml new file mode 100644 index 0000000..2553821 --- /dev/null +++ b/workflow/envs/icorCNA.yaml @@ -0,0 +1,9 @@ +name: icorCNA +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - r-ichorcna + - hmmcopy + From d18b8768ac46376d34223dc946cf94afbefd94ae Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Thu, 11 Aug 2022 12:06:55 +0200 Subject: [PATCH 06/19] initial commit --- workflow/rules/icorCNA.smk | 81 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 workflow/rules/icorCNA.smk diff --git a/workflow/rules/icorCNA.smk b/workflow/rules/icorCNA.smk new file mode 100644 index 0000000..af09900 --- /dev/null +++ b/workflow/rules/icorCNA.smk @@ -0,0 +1,81 @@ + +rule get_chroms: + input: + bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam", + bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai", + output: + chroms="results/{ID}/icorCNA/chroms/{SAMPLE}_processed.{GENOME}.chromosomes.txt" + log: + "results/logs/{ID}/get_chroms/{SAMPLE}_processed.{GENOME}.log", + conda: + "../envs/cfDNA_prep.yaml" + shell: + """samtools idxstats {input.bam} | \ + cut -f 1 | grep -w 'chr[1-9]\|chr[1-2][0-9]\|chr[X,Y]\|^[1-2][0-9]\|^[0-9]\|^[X,Y]' \ + | tr '\n' ','| sed 's/,*\r*$//' \ + 1> {output.chroms} 2>{log}""" + + + +rule read_counter: + input: + bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam", + bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai", + output: + wig="results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig" + log: + "results/logs/{ID}/mapping/{SAMPLE}_all.{GENOME}.log", + params: + window=1000000 + quality=20 + chroms="chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY" + conda: + "../envs/icorCNA.yaml" + shell: + """ + readCounter --window {params.window} --quality {params.quality} \ + --chromosome {params.chroms} {input.bam} 1>{output.wig} 2>{log} + """ + + +rule icorCNA: + input: + bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam", + bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai", + wig="results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig", + output: + outDir=directory("results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}") + params: + ID="{SAMPLE}_processed", + ploidy='"c(2,3)"', + normal='"c(0.5,0.6,0.7,0.8,0.9)"', + maxCN=5, + gcWig="../ichorCNA/inst/extdata/gc_hg38_1000kb.wig", + mapWIG="../ichorCNA/inst/extdata/map_hg38_1000kb.wig", + centro="../ichorCNA/inst/extdata/GRCh38.GCA_000001405.2_centromere_acen.txt", + normalPanel="../ichorCNA/inst/extdata/HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds", + includeHOMD="False", + chrs='"c(1:22, \"X\")"', + chrTrain='"c(1:22)"', + estimateNormal="True", + estimatePloidy="True", + estimateScPrevalence="True", + scStates='"c(1,3)"', + txnE=0.9999, + txnStrength=10000, + log: + "results/logs/{ID}/mapping/{SAMPLE}_all.{GENOME}.log", + conda: + "../envs/icorCNA.yaml" + shell: + """ + runIchorCNA.R --id {params.ID} --WIG {input.wig} \ + --ploidy {params.ploidy} --normal {params.normal} --maxCN {params.maxCN} \ + --gcWig {params.gcWIG} --mapWig {params.mapWIG} \ + --centromere {params.centro} --normalPanel {params.normalPanel} \ + --includeHOMD {params.includeHOMD} --chrs {params.chrs} \ + --chrTrain {params.chrTrain} --estimateNormal {params.estimateNormal} \ + --estimatePloidy {params.estimatePloidy} \ + --estimateScPrevalence {params.estimateScPrevalence} --scStates {params.scStates} \ + --txnE {params.txnE} --txnStrength {params.txnStrength} --outDir {output.outDir} + """ From 2bd913d20b15acd05bc7072a1593c5a5dc8b1896 Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Thu, 11 Aug 2022 12:14:11 +0200 Subject: [PATCH 07/19] refactor: corrected rule name and snakefmt --- workflow/rules/{icorCNA.smk => ichorCNA.smk} | 59 ++++++++++---------- 1 file changed, 29 insertions(+), 30 deletions(-) rename workflow/rules/{icorCNA.smk => ichorCNA.smk} (71%) diff --git a/workflow/rules/icorCNA.smk b/workflow/rules/ichorCNA.smk similarity index 71% rename from workflow/rules/icorCNA.smk rename to workflow/rules/ichorCNA.smk index af09900..0ca4f40 100644 --- a/workflow/rules/icorCNA.smk +++ b/workflow/rules/ichorCNA.smk @@ -4,7 +4,7 @@ rule get_chroms: bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam", bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai", output: - chroms="results/{ID}/icorCNA/chroms/{SAMPLE}_processed.{GENOME}.chromosomes.txt" + chroms="results/{ID}/icorCNA/chroms/{SAMPLE}_processed.{GENOME}.chromosomes.txt", log: "results/logs/{ID}/get_chroms/{SAMPLE}_processed.{GENOME}.log", conda: @@ -16,19 +16,18 @@ rule get_chroms: 1> {output.chroms} 2>{log}""" - rule read_counter: - input: + input: bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam", bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai", - output: - wig="results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig" + output: + wig="results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig", log: "results/logs/{ID}/mapping/{SAMPLE}_all.{GENOME}.log", params: - window=1000000 - quality=20 - chroms="chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY" + window=1000000, + quality=20, + chroms="chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY", conda: "../envs/icorCNA.yaml" shell: @@ -38,38 +37,38 @@ rule read_counter: """ -rule icorCNA: - input: +rule ichorCNA: + input: bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam", bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai", wig="results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig", - output: - outDir=directory("results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}") + output: + outDir=directory("results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}"), params: - ID="{SAMPLE}_processed", - ploidy='"c(2,3)"', - normal='"c(0.5,0.6,0.7,0.8,0.9)"', - maxCN=5, - gcWig="../ichorCNA/inst/extdata/gc_hg38_1000kb.wig", - mapWIG="../ichorCNA/inst/extdata/map_hg38_1000kb.wig", - centro="../ichorCNA/inst/extdata/GRCh38.GCA_000001405.2_centromere_acen.txt", - normalPanel="../ichorCNA/inst/extdata/HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds", - includeHOMD="False", - chrs='"c(1:22, \"X\")"', - chrTrain='"c(1:22)"', - estimateNormal="True", - estimatePloidy="True", - estimateScPrevalence="True", - scStates='"c(1,3)"', - txnE=0.9999, - txnStrength=10000, + sID="{SAMPLE}_processed", + ploidy='"c(2,3)"', + normal='"c(0.5,0.6,0.7,0.8,0.9)"', + maxCN=5, + gcWig="../ichorCNA/inst/extdata/gc_hg38_1000kb.wig", + mapWIG="../ichorCNA/inst/extdata/map_hg38_1000kb.wig", + centro="../ichorCNA/inst/extdata/GRCh38.GCA_000001405.2_centromere_acen.txt", + normalPanel="../ichorCNA/inst/extdata/HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds", + includeHOMD="False", + chrs='"c(1:22, "X")"', + chrTrain='"c(1:22)"', + estimateNormal="True", + estimatePloidy="True", + estimateScPrevalence="True", + scStates='"c(1,3)"', + txnE=0.9999, + txnStrength=10000, log: "results/logs/{ID}/mapping/{SAMPLE}_all.{GENOME}.log", conda: "../envs/icorCNA.yaml" shell: """ - runIchorCNA.R --id {params.ID} --WIG {input.wig} \ + runIchorCNA.R --id {params.sID} --WIG {input.wig} \ --ploidy {params.ploidy} --normal {params.normal} --maxCN {params.maxCN} \ --gcWig {params.gcWIG} --mapWig {params.mapWIG} \ --centromere {params.centro} --normalPanel {params.normalPanel} \ From 203013833d3e07df9151962169e6f237737b2d80 Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Thu, 11 Aug 2022 12:14:41 +0200 Subject: [PATCH 08/19] feat: add ichorCNA rule --- workflow/Snakefile | 1 + 1 file changed, 1 insertion(+) diff --git a/workflow/Snakefile b/workflow/Snakefile index ae65c0a..b07852e 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -12,6 +12,7 @@ include: "rules/bam_to_fastq.smk" include: "rules/trimming.smk" include: "rules/filtering.smk" include: "rules/mapping.smk" +include: "rules/ichorCNA.smk" include: "rules/QualityControl.smk" From 41933aa1098f35c373ca06339d255ab7c0a95302 Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Thu, 11 Aug 2022 12:15:33 +0200 Subject: [PATCH 09/19] feat: add readCount wig file to outputs --- workflow/rules/common.smk | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index d14762d..b20fc98 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -48,6 +48,15 @@ def get_final_output(): final_output.extend( expand("results/{ID}/qc/multiqc.html", ID=samples["ID"].unique()) ) + final_output.extend( + expand( + "results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig", + zip, + ID=samples["ID"], + SAMPLE=samples["sample"], + GENOME=samples["genome_build"], + ) + ) return final_output From b617a5a9243b0c595a86321c0cfc04d500a5dd86 Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Thu, 11 Aug 2022 12:20:29 +0200 Subject: [PATCH 10/19] fix: typo in ichorCNA params --- workflow/rules/ichorCNA.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/ichorCNA.smk b/workflow/rules/ichorCNA.smk index 0ca4f40..ababbcf 100644 --- a/workflow/rules/ichorCNA.smk +++ b/workflow/rules/ichorCNA.smk @@ -49,7 +49,7 @@ rule ichorCNA: ploidy='"c(2,3)"', normal='"c(0.5,0.6,0.7,0.8,0.9)"', maxCN=5, - gcWig="../ichorCNA/inst/extdata/gc_hg38_1000kb.wig", + gcWIG="../ichorCNA/inst/extdata/gc_hg38_1000kb.wig", mapWIG="../ichorCNA/inst/extdata/map_hg38_1000kb.wig", centro="../ichorCNA/inst/extdata/GRCh38.GCA_000001405.2_centromere_acen.txt", normalPanel="../ichorCNA/inst/extdata/HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds", From 23b3bd80d98b9a39b2dca390e0358a250846c74d Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Thu, 11 Aug 2022 12:21:01 +0200 Subject: [PATCH 11/19] feat: add ichorCNA output dir to final outputs --- workflow/rules/common.smk | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index b20fc98..b6fcac1 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -57,6 +57,18 @@ def get_final_output(): GENOME=samples["genome_build"], ) ) + final_output.extend( + expand( + "results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}", + zip, + ID=samples["ID"], + SAMPLE=samples["sample"], + GENOME=samples["genome_build"], + ) + ) + + + return final_output From 283720d45c4a118b48b115b1ac69bad11628d446 Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Thu, 11 Aug 2022 15:14:32 +0200 Subject: [PATCH 12/19] feat: add rule for getting ichorCNA files --- workflow/rules/reference.smk | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/workflow/rules/reference.smk b/workflow/rules/reference.smk index de047ea..dd6d130 100644 --- a/workflow/rules/reference.smk +++ b/workflow/rules/reference.smk @@ -48,3 +48,15 @@ rule get_trimmomatic_adapters: "logs/get_trimmomatic_adapter.log" shell: "wget -P {params.prefix} {params.URLs} -o {log}" + +rule get_ichorCNA_files: + output: + zip="ichorCNA_master.zip", + dir=directory("resources/ichorCNA") + params: + prefix="resources/", + URL="https://github.com/broadinstitute/ichorCNA/archive/refs/heads/master.zip" + log: + + shell: + 'wget -P {params.prefix} -c {params.URL} -O {output.zip}; unzip -j {output.zip} "ichorCNA-master/inst/*" -d {output.dir}' From 094746124332d555d3233b33e7039158b9f80448 Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Thu, 11 Aug 2022 15:36:42 +0200 Subject: [PATCH 13/19] fix: add resources path to input; add one result file for creating output dir; fix relative paths --- workflow/rules/ichorCNA.smk | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/workflow/rules/ichorCNA.smk b/workflow/rules/ichorCNA.smk index ababbcf..4738912 100644 --- a/workflow/rules/ichorCNA.smk +++ b/workflow/rules/ichorCNA.smk @@ -42,19 +42,21 @@ rule ichorCNA: bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam", bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai", wig="results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig", + resources="resources/ichorCNA/" output: - outDir=directory("results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}"), + outDir=directory("results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}/"), + summary="results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}/{SAMPLE}_processed.params.txt" params: sID="{SAMPLE}_processed", ploidy='"c(2,3)"', normal='"c(0.5,0.6,0.7,0.8,0.9)"', maxCN=5, - gcWIG="../ichorCNA/inst/extdata/gc_hg38_1000kb.wig", - mapWIG="../ichorCNA/inst/extdata/map_hg38_1000kb.wig", - centro="../ichorCNA/inst/extdata/GRCh38.GCA_000001405.2_centromere_acen.txt", - normalPanel="../ichorCNA/inst/extdata/HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds", + gcWIG="resources/ichorCNA/gc_hg38_1000kb.wig", + mapWIG="resources/ichorCNA/map_hg38_1000kb.wig", + centro="resources/ichorCNA/GRCh38.GCA_000001405.2_centromere_acen.txt", + normalPanel="resources/ichorCNA/HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds", includeHOMD="False", - chrs='"c(1:22, "X")"', + chrs="'c(1:22)'", chrTrain='"c(1:22)"', estimateNormal="True", estimatePloidy="True", From 2bfa9705152737220685074e611ae91d93f7d4be Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Thu, 11 Aug 2022 15:37:22 +0200 Subject: [PATCH 14/19] feat: add logging to get_ichorCNA_files rule --- workflow/rules/reference.smk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/rules/reference.smk b/workflow/rules/reference.smk index dd6d130..7a3aaaf 100644 --- a/workflow/rules/reference.smk +++ b/workflow/rules/reference.smk @@ -57,6 +57,6 @@ rule get_ichorCNA_files: prefix="resources/", URL="https://github.com/broadinstitute/ichorCNA/archive/refs/heads/master.zip" log: - + "logs/get_ichorCNA_files.log" shell: - 'wget -P {params.prefix} -c {params.URL} -O {output.zip}; unzip -j {output.zip} "ichorCNA-master/inst/*" -d {output.dir}' + '(wget -P {params.prefix} -c {params.URL} -O {output.zip}; unzip -j {output.zip} "ichorCNA-master/inst/*" -d {output.dir}) 2> {log}' From 048178618d4849ba6de53600f962c51a16070a0c Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Tue, 8 Nov 2022 16:24:38 +0100 Subject: [PATCH 15/19] feat: use a higher number of maximum threads --- workflow/rules/QualityControl.smk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/workflow/rules/QualityControl.smk b/workflow/rules/QualityControl.smk index 6595903..1be197a 100644 --- a/workflow/rules/QualityControl.smk +++ b/workflow/rules/QualityControl.smk @@ -7,6 +7,7 @@ rule fastqc: zip="results/{ID}/qc/fastqc/{SAMPLE}.{GENOME}_fastqc.zip", log: "results/logs/{ID}/fastqc/{SAMPLE}_all.{GENOME}.log", + threads: 8 wrapper: "0.75.0/bio/fastqc" @@ -18,6 +19,7 @@ rule samtools_stats: "results/{ID}/qc/samtools-stats/{SAMPLE}.{GENOME}.txt", log: "results/logs/{ID}/fastqc/{SAMPLE}.{GENOME}.log", + threads: 8 wrapper: "0.75.0/bio/samtools/stats" From 95a4e460cf57fd8eb65db9cacf904dfc3859f554 Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Wed, 9 Nov 2022 13:40:25 +0100 Subject: [PATCH 16/19] feat: improve distribution of provided CPU cores in pipe command --- workflow/scripts/bwa-mem2_wrapper.py | 29 ++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/workflow/scripts/bwa-mem2_wrapper.py b/workflow/scripts/bwa-mem2_wrapper.py index 35ca2c7..0700b87 100644 --- a/workflow/scripts/bwa-mem2_wrapper.py +++ b/workflow/scripts/bwa-mem2_wrapper.py @@ -15,9 +15,17 @@ inputs = snakemake.input params = snakemake.params +threads = snakemake.threads + +print(threads) log = snakemake.log_fmt_shell(stdout=False, stderr=True) +samtools_CPU = max(1,threads//5) +remaining_CPU = threads - samtools_CPU + + + # Check inputs/arguments. if not isinstance(snakemake.input.reads, str) and len(snakemake.input.reads) not in { 1, @@ -29,16 +37,29 @@ non_merged_cmd = "" singleton_cmd = "" + +### it is okay, that all bwa-mem2 commands get the same number of CPUs, as they are run sequentially. +### Samtools after the pipe is run in a separate subshell, therefore being run in an additional process + + if snakemake.input.get("noadapter_R1") and snakemake.input.get("noadapter_R2"): - non_merged_cmd = "bwa-mem2 mem -t {snakemake.threads} -R \"{snakemake.params.RG}\" {snakemake.input.ref} {snakemake.input.noadapter_R1} {snakemake.input.noadapter_R2}| grep -v \"^@\" || true ; " + non_merged_cmd = f"bwa-mem2 mem -t {remaining_CPU} -R \"{{snakemake.params.RG}}\" {{snakemake.input.ref}} {{snakemake.input.noadapter_R1}} {{snakemake.input.noadapter_R2}}| grep -v \"^@\" || true ; " +# non_merged_cmd = "bwa-mem2 mem -t {snakemake.threads} -R \"{snakemake.params.RG}\" {snakemake.input.ref} {snakemake.input.noadapter_R1} {snakemake.input.noadapter_R2}| grep -v \"^@\" || true ; " if snakemake.input.get("single_reads"): - singleton_cmd = "bwa-mem2 mem -t {snakemake.threads} -R \"{snakemake.params.RG}\" {snakemake.input.ref} {snakemake.input.single_reads} | grep -v \"^@\" || true ; " + singleton_cmd = f"bwa-mem2 mem -t {remaining_CPU} -R \"{{snakemake.params.RG}}\" {{snakemake.input.ref}} {{snakemake.input.single_reads}} | grep -v \"^@\" || true ; " + #singleton_cmd = "bwa-mem2 mem -t {snakemake.threads} -R \"{snakemake.params.RG}\" {snakemake.input.ref} {snakemake.input.single_reads} | grep -v \"^@\" || true ; " -base_cmd = f"((bwa-mem2 mem -t {{snakemake.threads}} -R \"{{snakemake.params.RG}}\" {{snakemake.input.ref}} {{snakemake.input.reads}}; \ + +base_cmd = f"((bwa-mem2 mem -t {remaining_CPU} -R \"{{snakemake.params.RG}}\" {{snakemake.input.ref}} {{snakemake.input.reads}}; \ {non_merged_cmd}\ {singleton_cmd}\ -) | samtools view -b -o {{snakemake.output.mapped_reads}} - ) 2>{{snakemake.log}}" +) | samtools view -b -@ {samtools_CPU} -o {{snakemake.output.mapped_reads}} - ) 2>{{snakemake.log}}" + +#base_cmd = f"((bwa-mem2 mem -t {{snakemake.threads}} -R \"{{snakemake.params.RG}}\" {{snakemake.input.ref}} {{snakemake.input.reads}}; \ +#{non_merged_cmd}\ +#{singleton_cmd}\ +#) | samtools view -b -@ {{snakemake.threads}} -o {{snakemake.output.mapped_reads}} - ) 2>{{snakemake.log}}" print(f"Executing command:\n {base_cmd}", file=sys.stderr) From 916db886b32d0862330634128f3dfba08c0296f7 Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Wed, 9 Nov 2022 13:41:56 +0100 Subject: [PATCH 17/19] refactor: make parameters configurable in config file --- workflow/rules/ichorCNA.smk | 38 ++++++++++++++++----------------- workflow/rules/raw_input_QC.smk | 2 +- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/workflow/rules/ichorCNA.smk b/workflow/rules/ichorCNA.smk index 4738912..10e26eb 100644 --- a/workflow/rules/ichorCNA.smk +++ b/workflow/rules/ichorCNA.smk @@ -25,9 +25,9 @@ rule read_counter: log: "results/logs/{ID}/mapping/{SAMPLE}_all.{GENOME}.log", params: - window=1000000, - quality=20, - chroms="chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY", + window = config["read_counter"]["window"], + quality = config["read_counter"]["quality"], + chroms= config["read_counter"]["chroms"], conda: "../envs/icorCNA.yaml" shell: @@ -48,22 +48,22 @@ rule ichorCNA: summary="results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}/{SAMPLE}_processed.params.txt" params: sID="{SAMPLE}_processed", - ploidy='"c(2,3)"', - normal='"c(0.5,0.6,0.7,0.8,0.9)"', - maxCN=5, - gcWIG="resources/ichorCNA/gc_hg38_1000kb.wig", - mapWIG="resources/ichorCNA/map_hg38_1000kb.wig", - centro="resources/ichorCNA/GRCh38.GCA_000001405.2_centromere_acen.txt", - normalPanel="resources/ichorCNA/HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds", - includeHOMD="False", - chrs="'c(1:22)'", - chrTrain='"c(1:22)"', - estimateNormal="True", - estimatePloidy="True", - estimateScPrevalence="True", - scStates='"c(1,3)"', - txnE=0.9999, - txnStrength=10000, + ploidy = config["ichorCNA"]["ploidy"], + normal = config["ichorCNA"]["normal"], + maxCN = config["ichorCNA"]["maxCN"], + gcWIG = lambda wc: config["ichorCNA"]["gcWIG"][wc.GENOME], + mapWIG = lambda wc: config["ichorCNA"]["mapWIG"][wc.GENOME], + centro = lambda wc: config["ichorCNA"]["centro"][wc.GENOME], + normalPanel = config["ichorCNA"]["normalPanel"], + includeHOMD = config["ichorCNA"]["includeHOMD"], + chrs = config["ichorCNA"]["chrs"], + chrTrain = config["ichorCNA"]["chrTrain"], + estimateNormal = config["ichorCNA"]["estimateNormal"], + estimatePloidy = config["ichorCNA"]["estimatePloidy"], + estimateScPrevalence = config["ichorCNA"]["estimateScPrevalence"], + scStates = config["ichorCNA"]["scStates"], + txnE = config["ichorCNA"]["txnE"], + txnStrength = config["ichorCNA"]["txnStrength"], log: "results/logs/{ID}/mapping/{SAMPLE}_all.{GENOME}.log", conda: diff --git a/workflow/rules/raw_input_QC.smk b/workflow/rules/raw_input_QC.smk index b7e411b..839438d 100644 --- a/workflow/rules/raw_input_QC.smk +++ b/workflow/rules/raw_input_QC.smk @@ -19,4 +19,4 @@ rule flagstat_pre_conversion: ## samtools stats for bam files -## FastQC: BAM, SAM or FastQ files \ No newline at end of file +## FastQC: BAM, SAM or FastQ files From 5ef2fe314e305eb29ff39f91a6f81c8e406344ab Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Thu, 17 Nov 2022 16:47:08 +0100 Subject: [PATCH 18/19] chore: update mosdepth wrapper version; remove --fast-mode for better coverage estimation --- workflow/rules/QualityControl.smk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/rules/QualityControl.smk b/workflow/rules/QualityControl.smk index 1be197a..1544192 100644 --- a/workflow/rules/QualityControl.smk +++ b/workflow/rules/QualityControl.smk @@ -35,12 +35,12 @@ rule mosdepth: log: "results/logs/{ID}/mosdepth/{SAMPLE}.{GENOME}.log", params: - extra="--no-per-base --fast-mode", # optional + extra="--no-per-base", # optional by="500", # additional decompression threads through `--threads` threads: 32 # This value - 1 will be sent to `--threads` wrapper: - "v1.1.0/bio/mosdepth" + "v1.19.1/bio/mosdepth" rule multiqc: From 7aa79bd5af75a38defdd813beb6553dec41b6f26 Mon Sep 17 00:00:00 2001 From: sroener <40714954+sroener@users.noreply.github.com> Date: Thu, 17 Nov 2022 16:48:23 +0100 Subject: [PATCH 19/19] feat: add automatic chromosome parsing for read_counter rule --- workflow/rules/ichorCNA.smk | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/workflow/rules/ichorCNA.smk b/workflow/rules/ichorCNA.smk index 10e26eb..80482a5 100644 --- a/workflow/rules/ichorCNA.smk +++ b/workflow/rules/ichorCNA.smk @@ -1,3 +1,13 @@ +def get_chroms_readcounter(chromfile): + if config["read_counter"]["chroms"]["autoselect_chroms"]: + with open(chromfile,"r") as f: + chrom = f.read() + return chrom + else: + return config["read_counter"]["chroms"]["chrom_list"] + + + rule get_chroms: input: @@ -20,6 +30,7 @@ rule read_counter: input: bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam", bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai", + chroms="results/{ID}/icorCNA/chroms/{SAMPLE}_processed.{GENOME}.chromosomes.txt", output: wig="results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig", log: @@ -27,7 +38,7 @@ rule read_counter: params: window = config["read_counter"]["window"], quality = config["read_counter"]["quality"], - chroms= config["read_counter"]["chroms"], + chroms= lambda wc: get_chroms_readcounter(f"test/{wc.ID}/icorCNA/chroms/{wc.SAMPLE}_processed.{wc.GENOME}.chromosomes.txt"), conda: "../envs/icorCNA.yaml" shell: