From 2b019107cd051d7367fd5cd722af5422dd4fa88c Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Wed, 20 Apr 2022 20:39:32 +0200
Subject: [PATCH 01/19] fix: correct check for trimmed reads

---
 workflow/rules/common.smk            | 3 ++-
 workflow/scripts/bwa-mem2_wrapper.py | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 8f2eb67..d14762d 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -215,6 +215,7 @@ def get_mapping_input(wildcards):
     elif trimming_algorithm.lower() == "trimmomatic":
         mapping_input["reads"] = ["results/{ID}/trimmed/trimmomatic/{SAMPLE}.1.fastq.gz", "results/{ID}/trimmed/trimmomatic/{SAMPLE}.2.fastq.gz",]
         if all_data:
-            mapping_input["non_merged"] = ["results/{ID}/trimmed/trimmomatic/{SAMPLE}.1.unpaired.fastq.gz","results/{ID}/trimmed/trimmomatic/{SAMPLE}.2.unpaired.fastq.gz"]
+            mapping_input["noadapter_R1"] = "results/{ID}/trimmed/trimmomatic/{SAMPLE}.1.unpaired.fastq.gz"
+            mapping_input["noadapter_R2"] = "results/{ID}/trimmed/trimmomatic/{SAMPLE}.2.unpaired.fastq.gz"
         
     return mapping_input
diff --git a/workflow/scripts/bwa-mem2_wrapper.py b/workflow/scripts/bwa-mem2_wrapper.py
index 490d465..35ca2c7 100644
--- a/workflow/scripts/bwa-mem2_wrapper.py
+++ b/workflow/scripts/bwa-mem2_wrapper.py
@@ -8,6 +8,7 @@
 
 
 from os import path
+import sys
 from wsgiref.handlers import BaseCGIHandler
 
 from snakemake.shell import shell
@@ -28,7 +29,7 @@
 non_merged_cmd = ""
 singleton_cmd = ""
 
-if snakemake.input.get("non_merged"):
+if snakemake.input.get("noadapter_R1") and snakemake.input.get("noadapter_R2"):
     non_merged_cmd = "bwa-mem2 mem -t {snakemake.threads} -R \"{snakemake.params.RG}\" {snakemake.input.ref} {snakemake.input.noadapter_R1} {snakemake.input.noadapter_R2}| grep -v \"^@\" || true ; "
 
 if snakemake.input.get("single_reads"):
@@ -39,6 +40,7 @@
 {singleton_cmd}\
 ) | samtools view -b -o {{snakemake.output.mapped_reads}} - ) 2>{{snakemake.log}}"
 
+print(f"Executing command:\n {base_cmd}", file=sys.stderr)
 
 shell(base_cmd)
 

From dbd5d274a75f761211095c69e80c0a90792a5934 Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Tue, 28 Jun 2022 11:25:59 +0200
Subject: [PATCH 02/19] initial commit

---
 .editorconfig | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 .editorconfig

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..b94edaa
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,32 @@
+# EditorConfig is awesome: https://EditorConfig.org
+
+# top-most EditorConfig file
+root = true
+
+# Unix-style newlines with a newline ending every file
+[*]
+end_of_line = lf
+insert_final_newline = true
+
+# Matches multiple files with brace expansion notation
+# Set default charset
+[*.{js,py}]
+charset = utf-8
+
+# 4 space indentation
+[*.py]
+indent_style = space
+indent_size = 4
+
+# Tab indentation (no size specified)
+[Makefile]
+indent_style = tab
+
+# Indentation override for .tsv files
+[*.tsv]
+indent_style = tab
+
+# Matches the exact files either package.json or .travis.yml
+[{package.json,.travis.yml}]
+indent_style = space
+indent_size = 2

From 6ad926e5576ab540c8e16aef11610eecfe6318ed Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Tue, 28 Jun 2022 11:31:33 +0200
Subject: [PATCH 03/19] feat: improved thread/core handling; minor refactoring

---
 workflow/rules/bam_to_fastq.smk | 6 +++---
 workflow/rules/mapping.smk      | 6 +++---
 workflow/rules/reference.smk    | 5 +----
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/workflow/rules/bam_to_fastq.smk b/workflow/rules/bam_to_fastq.smk
index 574353e..56403fd 100644
--- a/workflow/rules/bam_to_fastq.smk
+++ b/workflow/rules/bam_to_fastq.smk
@@ -19,9 +19,9 @@ rule bam_to_fastq:
     shell:
         """set +o pipefail;
         (
-                    samtools sort -T {params.TMPDIR} -n -@ {threads} {input.bam} | \
-        samtools fastq -@ {threads} -t \
+        samtools sort -T {params.TMPDIR} -n -@ $(({threads}/2)) {input.bam} | \
+        samtools fastq -@ $(({threads}/2)) -t \
         -1 {output.r1} \
         -2 {output.r2} \
         -0 {output.s1} \
-        )2> {log}"""
\ No newline at end of file
+        )2> {log}"""
diff --git a/workflow/rules/mapping.smk b/workflow/rules/mapping.smk
index 7c8cf5f..9784d41 100644
--- a/workflow/rules/mapping.smk
+++ b/workflow/rules/mapping.smk
@@ -27,9 +27,9 @@ rule mark_duplicates:
     threads: 64
     shell:
         #"set +o pipefail;"
-        "(samtools fixmate -u -@ {threads} -m {input.mapped_reads} - | "
-        "samtools sort -u -@ {threads} -T {params.TMPDIR} - | "
-        "samtools markdup -@ {threads} - {output.processed_reads}) 2>{log}"
+        "(samtools fixmate -u -@ $(({threads}/3)) -m {input.mapped_reads} - | "
+        "samtools sort -u -@ $(({threads}/3)) -T {params.TMPDIR} - | "
+        "samtools markdup -@ $(({threads}/3)) - {output.processed_reads}) 2>{log}"
 
 rule index_bam:
     input:
diff --git a/workflow/rules/reference.smk b/workflow/rules/reference.smk
index 52a17b7..de047ea 100644
--- a/workflow/rules/reference.smk
+++ b/workflow/rules/reference.smk
@@ -1,7 +1,4 @@
 
-#! wrapper rules können auch anders benannte inputs und outputs haben -> output based on configs!
-
-
 rule get_reference:
     output:
         "resources/reference/{GENOME}.fa",
@@ -50,4 +47,4 @@ rule get_trimmomatic_adapters:
     log:
         "logs/get_trimmomatic_adapter.log"
     shell:
-        "wget -P {params.prefix} {params.URLs} -o {log}"
\ No newline at end of file
+        "wget -P {params.prefix} {params.URLs} -o {log}"

From 5aa85618ab35e6bcecbf24fe9610da497d373eff Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Tue, 28 Jun 2022 11:33:47 +0200
Subject: [PATCH 04/19] feat: generalization of bam indexing rule

---
 workflow/rules/mapping.smk | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/workflow/rules/mapping.smk b/workflow/rules/mapping.smk
index 9784d41..3c383ff 100644
--- a/workflow/rules/mapping.smk
+++ b/workflow/rules/mapping.smk
@@ -33,11 +33,11 @@ rule mark_duplicates:
 
 rule index_bam:
     input:
-        "results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam"
+        "{path}.bam"
     output:
-        "results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai"
-    log:"results/logs/{ID}/index_bam/{SAMPLE}.{GENOME}.log",
+        "{path}.bam.bai"
     conda: "../envs/cfDNA_prep.yaml"
     threads: 32
     shell:
-        "samtools index -@ {threads} {input} {output} 2> {log}"
+        "samtools index -@ {threads} {input} {output}"
+

From a8985f585fcb11eb065e8587714031919bba3531 Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Thu, 11 Aug 2022 12:06:40 +0200
Subject: [PATCH 05/19] initial commit

---
 workflow/envs/icorCNA.yaml | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 workflow/envs/icorCNA.yaml

diff --git a/workflow/envs/icorCNA.yaml b/workflow/envs/icorCNA.yaml
new file mode 100644
index 0000000..2553821
--- /dev/null
+++ b/workflow/envs/icorCNA.yaml
@@ -0,0 +1,9 @@
+name: icorCNA
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - r-ichorcna
+  - hmmcopy
+

From d18b8768ac46376d34223dc946cf94afbefd94ae Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Thu, 11 Aug 2022 12:06:55 +0200
Subject: [PATCH 06/19] initial commit

---
 workflow/rules/icorCNA.smk | 81 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 workflow/rules/icorCNA.smk

diff --git a/workflow/rules/icorCNA.smk b/workflow/rules/icorCNA.smk
new file mode 100644
index 0000000..af09900
--- /dev/null
+++ b/workflow/rules/icorCNA.smk
@@ -0,0 +1,81 @@
+
+rule get_chroms:
+    input:
+        bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam",
+        bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai",
+    output:
+        chroms="results/{ID}/icorCNA/chroms/{SAMPLE}_processed.{GENOME}.chromosomes.txt"
+    log:
+        "results/logs/{ID}/get_chroms/{SAMPLE}_processed.{GENOME}.log",
+    conda:
+        "../envs/cfDNA_prep.yaml"
+    shell:
+        """samtools idxstats {input.bam} | \
+        cut -f 1 | grep -w 'chr[1-9]\|chr[1-2][0-9]\|chr[X,Y]\|^[1-2][0-9]\|^[0-9]\|^[X,Y]' \
+        | tr '\n' ','| sed 's/,*\r*$//' \
+        1> {output.chroms} 2>{log}"""
+
+
+
+rule read_counter:
+    input: 
+        bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam",
+        bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai",
+    output: 
+        wig="results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig"
+    log:
+        "results/logs/{ID}/mapping/{SAMPLE}_all.{GENOME}.log",
+    params:
+        window=1000000
+        quality=20
+        chroms="chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY"
+    conda:
+        "../envs/icorCNA.yaml"
+    shell:
+        """
+        readCounter --window {params.window} --quality {params.quality} \
+        --chromosome {params.chroms} {input.bam} 1>{output.wig} 2>{log}
+        """
+
+
+rule icorCNA:
+    input: 
+        bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam",
+        bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai",
+        wig="results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig",
+    output: 
+        outDir=directory("results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}")
+    params:
+    ID="{SAMPLE}_processed",
+    ploidy='"c(2,3)"',
+    normal='"c(0.5,0.6,0.7,0.8,0.9)"',
+    maxCN=5,
+    gcWig="../ichorCNA/inst/extdata/gc_hg38_1000kb.wig",
+    mapWIG="../ichorCNA/inst/extdata/map_hg38_1000kb.wig",
+    centro="../ichorCNA/inst/extdata/GRCh38.GCA_000001405.2_centromere_acen.txt",
+    normalPanel="../ichorCNA/inst/extdata/HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds",
+    includeHOMD="False",
+    chrs='"c(1:22, \"X\")"',
+    chrTrain='"c(1:22)"',
+    estimateNormal="True",
+    estimatePloidy="True",
+    estimateScPrevalence="True",
+    scStates='"c(1,3)"',
+    txnE=0.9999,
+    txnStrength=10000,
+    log:
+        "results/logs/{ID}/mapping/{SAMPLE}_all.{GENOME}.log",
+    conda:
+        "../envs/icorCNA.yaml"
+    shell:
+        """
+        runIchorCNA.R --id {params.ID} --WIG {input.wig} \
+        --ploidy {params.ploidy} --normal {params.normal} --maxCN {params.maxCN} \
+        --gcWig {params.gcWIG} --mapWig {params.mapWIG} \
+        --centromere {params.centro} --normalPanel {params.normalPanel} \
+        --includeHOMD {params.includeHOMD} --chrs {params.chrs} \
+        --chrTrain {params.chrTrain} --estimateNormal {params.estimateNormal} \
+        --estimatePloidy {params.estimatePloidy} \
+        --estimateScPrevalence {params.estimateScPrevalence} --scStates {params.scStates} \
+        --txnE {params.txnE} --txnStrength {params.txnStrength} --outDir {output.outDir}
+        """

From 2bd913d20b15acd05bc7072a1593c5a5dc8b1896 Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Thu, 11 Aug 2022 12:14:11 +0200
Subject: [PATCH 07/19] refactor: corrected rule name and snakefmt

---
 workflow/rules/{icorCNA.smk => ichorCNA.smk} | 59 ++++++++++----------
 1 file changed, 29 insertions(+), 30 deletions(-)
 rename workflow/rules/{icorCNA.smk => ichorCNA.smk} (71%)

diff --git a/workflow/rules/icorCNA.smk b/workflow/rules/ichorCNA.smk
similarity index 71%
rename from workflow/rules/icorCNA.smk
rename to workflow/rules/ichorCNA.smk
index af09900..0ca4f40 100644
--- a/workflow/rules/icorCNA.smk
+++ b/workflow/rules/ichorCNA.smk
@@ -4,7 +4,7 @@ rule get_chroms:
         bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam",
         bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai",
     output:
-        chroms="results/{ID}/icorCNA/chroms/{SAMPLE}_processed.{GENOME}.chromosomes.txt"
+        chroms="results/{ID}/icorCNA/chroms/{SAMPLE}_processed.{GENOME}.chromosomes.txt",
     log:
         "results/logs/{ID}/get_chroms/{SAMPLE}_processed.{GENOME}.log",
     conda:
@@ -16,19 +16,18 @@ rule get_chroms:
         1> {output.chroms} 2>{log}"""
 
 
-
 rule read_counter:
-    input: 
+    input:
         bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam",
         bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai",
-    output: 
-        wig="results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig"
+    output:
+        wig="results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig",
     log:
         "results/logs/{ID}/mapping/{SAMPLE}_all.{GENOME}.log",
     params:
-        window=1000000
-        quality=20
-        chroms="chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY"
+        window=1000000,
+        quality=20,
+        chroms="chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY",
     conda:
         "../envs/icorCNA.yaml"
     shell:
@@ -38,38 +37,38 @@ rule read_counter:
         """
 
 
-rule icorCNA:
-    input: 
+rule ichorCNA:
+    input:
         bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam",
         bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai",
         wig="results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig",
-    output: 
-        outDir=directory("results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}")
+    output:
+        outDir=directory("results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}"),
     params:
-    ID="{SAMPLE}_processed",
-    ploidy='"c(2,3)"',
-    normal='"c(0.5,0.6,0.7,0.8,0.9)"',
-    maxCN=5,
-    gcWig="../ichorCNA/inst/extdata/gc_hg38_1000kb.wig",
-    mapWIG="../ichorCNA/inst/extdata/map_hg38_1000kb.wig",
-    centro="../ichorCNA/inst/extdata/GRCh38.GCA_000001405.2_centromere_acen.txt",
-    normalPanel="../ichorCNA/inst/extdata/HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds",
-    includeHOMD="False",
-    chrs='"c(1:22, \"X\")"',
-    chrTrain='"c(1:22)"',
-    estimateNormal="True",
-    estimatePloidy="True",
-    estimateScPrevalence="True",
-    scStates='"c(1,3)"',
-    txnE=0.9999,
-    txnStrength=10000,
+        sID="{SAMPLE}_processed",
+        ploidy='"c(2,3)"',
+        normal='"c(0.5,0.6,0.7,0.8,0.9)"',
+        maxCN=5,
+        gcWig="../ichorCNA/inst/extdata/gc_hg38_1000kb.wig",
+        mapWIG="../ichorCNA/inst/extdata/map_hg38_1000kb.wig",
+        centro="../ichorCNA/inst/extdata/GRCh38.GCA_000001405.2_centromere_acen.txt",
+        normalPanel="../ichorCNA/inst/extdata/HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds",
+        includeHOMD="False",
+        chrs='"c(1:22, "X")"',
+        chrTrain='"c(1:22)"',
+        estimateNormal="True",
+        estimatePloidy="True",
+        estimateScPrevalence="True",
+        scStates='"c(1,3)"',
+        txnE=0.9999,
+        txnStrength=10000,
     log:
         "results/logs/{ID}/mapping/{SAMPLE}_all.{GENOME}.log",
     conda:
         "../envs/icorCNA.yaml"
     shell:
         """
-        runIchorCNA.R --id {params.ID} --WIG {input.wig} \
+        runIchorCNA.R --id {params.sID} --WIG {input.wig} \
         --ploidy {params.ploidy} --normal {params.normal} --maxCN {params.maxCN} \
         --gcWig {params.gcWIG} --mapWig {params.mapWIG} \
         --centromere {params.centro} --normalPanel {params.normalPanel} \

From 203013833d3e07df9151962169e6f237737b2d80 Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Thu, 11 Aug 2022 12:14:41 +0200
Subject: [PATCH 08/19] feat: add ichorCNA rule

---
 workflow/Snakefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index ae65c0a..b07852e 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -12,6 +12,7 @@ include: "rules/bam_to_fastq.smk"
 include: "rules/trimming.smk"
 include: "rules/filtering.smk"
 include: "rules/mapping.smk"
+include: "rules/ichorCNA.smk"
 include: "rules/QualityControl.smk"
 
 

From 41933aa1098f35c373ca06339d255ab7c0a95302 Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Thu, 11 Aug 2022 12:15:33 +0200
Subject: [PATCH 09/19] feat: add readCount wig file to outputs

---
 workflow/rules/common.smk | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index d14762d..b20fc98 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -48,6 +48,15 @@ def get_final_output():
     final_output.extend(
         expand("results/{ID}/qc/multiqc.html", ID=samples["ID"].unique())
     )
+    final_output.extend(
+        expand(
+            "results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig",
+            zip,
+            ID=samples["ID"],
+            SAMPLE=samples["sample"],
+            GENOME=samples["genome_build"],
+        )
+    )
 
     return final_output
 

From b617a5a9243b0c595a86321c0cfc04d500a5dd86 Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Thu, 11 Aug 2022 12:20:29 +0200
Subject: [PATCH 10/19] fix: typo in ichorCNA params

---
 workflow/rules/ichorCNA.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/rules/ichorCNA.smk b/workflow/rules/ichorCNA.smk
index 0ca4f40..ababbcf 100644
--- a/workflow/rules/ichorCNA.smk
+++ b/workflow/rules/ichorCNA.smk
@@ -49,7 +49,7 @@ rule ichorCNA:
         ploidy='"c(2,3)"',
         normal='"c(0.5,0.6,0.7,0.8,0.9)"',
         maxCN=5,
-        gcWig="../ichorCNA/inst/extdata/gc_hg38_1000kb.wig",
+        gcWIG="../ichorCNA/inst/extdata/gc_hg38_1000kb.wig",
         mapWIG="../ichorCNA/inst/extdata/map_hg38_1000kb.wig",
         centro="../ichorCNA/inst/extdata/GRCh38.GCA_000001405.2_centromere_acen.txt",
         normalPanel="../ichorCNA/inst/extdata/HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds",

From 23b3bd80d98b9a39b2dca390e0358a250846c74d Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Thu, 11 Aug 2022 12:21:01 +0200
Subject: [PATCH 11/19] feat: add ichorCNA output dir to final outputs

---
 workflow/rules/common.smk | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index b20fc98..b6fcac1 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -57,6 +57,18 @@ def get_final_output():
             GENOME=samples["genome_build"],
         )
     )
+    final_output.extend(
+        expand(
+            "results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}",
+            zip,
+            ID=samples["ID"],
+            SAMPLE=samples["sample"],
+            GENOME=samples["genome_build"],
+        )
+    )
+
+
+
 
     return final_output
 

From 283720d45c4a118b48b115b1ac69bad11628d446 Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Thu, 11 Aug 2022 15:14:32 +0200
Subject: [PATCH 12/19] feat: add rule for getting ichorCNA files

---
 workflow/rules/reference.smk | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/workflow/rules/reference.smk b/workflow/rules/reference.smk
index de047ea..dd6d130 100644
--- a/workflow/rules/reference.smk
+++ b/workflow/rules/reference.smk
@@ -48,3 +48,15 @@ rule get_trimmomatic_adapters:
         "logs/get_trimmomatic_adapter.log"
     shell:
         "wget -P {params.prefix} {params.URLs} -o {log}"
+
+rule get_ichorCNA_files:
+    output:
+        zip="ichorCNA_master.zip",
+        dir=directory("resources/ichorCNA")
+    params:
+        prefix="resources/",
+        URL="https://github.com/broadinstitute/ichorCNA/archive/refs/heads/master.zip"
+    log:
+
+    shell:
+        'wget -P {params.prefix} -c {params.URL} -O {output.zip}; unzip -j {output.zip} "ichorCNA-master/inst/*" -d {output.dir}'

From 094746124332d555d3233b33e7039158b9f80448 Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Thu, 11 Aug 2022 15:36:42 +0200
Subject: [PATCH 13/19] fix: add resources path to input; add one result file
 for creating output dir; fix relative paths

---
 workflow/rules/ichorCNA.smk | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/workflow/rules/ichorCNA.smk b/workflow/rules/ichorCNA.smk
index ababbcf..4738912 100644
--- a/workflow/rules/ichorCNA.smk
+++ b/workflow/rules/ichorCNA.smk
@@ -42,19 +42,21 @@ rule ichorCNA:
         bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam",
         bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai",
         wig="results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig",
+        resources="resources/ichorCNA/"
     output:
-        outDir=directory("results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}"),
+        outDir=directory("results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}/"),
+        summary="results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}/{SAMPLE}_processed.params.txt"
     params:
         sID="{SAMPLE}_processed",
         ploidy='"c(2,3)"',
         normal='"c(0.5,0.6,0.7,0.8,0.9)"',
         maxCN=5,
-        gcWIG="../ichorCNA/inst/extdata/gc_hg38_1000kb.wig",
-        mapWIG="../ichorCNA/inst/extdata/map_hg38_1000kb.wig",
-        centro="../ichorCNA/inst/extdata/GRCh38.GCA_000001405.2_centromere_acen.txt",
-        normalPanel="../ichorCNA/inst/extdata/HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds",
+        gcWIG="resources/ichorCNA/gc_hg38_1000kb.wig",
+        mapWIG="resources/ichorCNA/map_hg38_1000kb.wig",
+        centro="resources/ichorCNA/GRCh38.GCA_000001405.2_centromere_acen.txt",
+        normalPanel="resources/ichorCNA/HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds",
         includeHOMD="False",
-        chrs='"c(1:22, "X")"',
+        chrs="'c(1:22)'",
         chrTrain='"c(1:22)"',
         estimateNormal="True",
         estimatePloidy="True",

From 2bfa9705152737220685074e611ae91d93f7d4be Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Thu, 11 Aug 2022 15:37:22 +0200
Subject: [PATCH 14/19] feat: add logging to get_ichorCNA_files rule

---
 workflow/rules/reference.smk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflow/rules/reference.smk b/workflow/rules/reference.smk
index dd6d130..7a3aaaf 100644
--- a/workflow/rules/reference.smk
+++ b/workflow/rules/reference.smk
@@ -57,6 +57,6 @@ rule get_ichorCNA_files:
         prefix="resources/",
         URL="https://github.com/broadinstitute/ichorCNA/archive/refs/heads/master.zip"
     log:
-
+        "logs/get_ichorCNA_files.log"
     shell:
-        'wget -P {params.prefix} -c {params.URL} -O {output.zip}; unzip -j {output.zip} "ichorCNA-master/inst/*" -d {output.dir}'
+        '(wget -P {params.prefix} -c {params.URL} -O {output.zip}; unzip -j {output.zip} "ichorCNA-master/inst/*" -d {output.dir}) 2> {log}'

From 048178618d4849ba6de53600f962c51a16070a0c Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Tue, 8 Nov 2022 16:24:38 +0100
Subject: [PATCH 15/19] feat: use a higher number of maximum threads

---
 workflow/rules/QualityControl.smk | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/workflow/rules/QualityControl.smk b/workflow/rules/QualityControl.smk
index 6595903..1be197a 100644
--- a/workflow/rules/QualityControl.smk
+++ b/workflow/rules/QualityControl.smk
@@ -7,6 +7,7 @@ rule fastqc:
         zip="results/{ID}/qc/fastqc/{SAMPLE}.{GENOME}_fastqc.zip",
     log:
         "results/logs/{ID}/fastqc/{SAMPLE}_all.{GENOME}.log",
+    threads: 8
     wrapper:
         "0.75.0/bio/fastqc"
 
@@ -18,6 +19,7 @@ rule samtools_stats:
         "results/{ID}/qc/samtools-stats/{SAMPLE}.{GENOME}.txt",
     log:
         "results/logs/{ID}/fastqc/{SAMPLE}.{GENOME}.log",
+    threads: 8
     wrapper:
         "0.75.0/bio/samtools/stats"
 

From 95a4e460cf57fd8eb65db9cacf904dfc3859f554 Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Wed, 9 Nov 2022 13:40:25 +0100
Subject: [PATCH 16/19] feat: improve distribution of provided CPU cores in
 pipe command

---
 workflow/scripts/bwa-mem2_wrapper.py | 29 ++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/workflow/scripts/bwa-mem2_wrapper.py b/workflow/scripts/bwa-mem2_wrapper.py
index 35ca2c7..0700b87 100644
--- a/workflow/scripts/bwa-mem2_wrapper.py
+++ b/workflow/scripts/bwa-mem2_wrapper.py
@@ -15,9 +15,17 @@
 
 inputs = snakemake.input
 params = snakemake.params
+threads = snakemake.threads
+
+print(threads)
 
 log = snakemake.log_fmt_shell(stdout=False, stderr=True)
 
+samtools_CPU = max(1,threads//5)
+remaining_CPU = threads - samtools_CPU
+
+
+
 # Check inputs/arguments.
 if not isinstance(snakemake.input.reads, str) and len(snakemake.input.reads) not in {
     1,
@@ -29,16 +37,29 @@
 non_merged_cmd = ""
 singleton_cmd = ""
 
+
+### it is okay, that all bwa-mem2 commands get the same number of CPUs, as they are run sequentially.
+### Samtools after the pipe is run in a separate subshell, therefore being run in an additional process
+
+
 if snakemake.input.get("noadapter_R1") and snakemake.input.get("noadapter_R2"):
-    non_merged_cmd = "bwa-mem2 mem -t {snakemake.threads} -R \"{snakemake.params.RG}\" {snakemake.input.ref} {snakemake.input.noadapter_R1} {snakemake.input.noadapter_R2}| grep -v \"^@\" || true ; "
+    non_merged_cmd = f"bwa-mem2 mem -t {remaining_CPU} -R \"{{snakemake.params.RG}}\" {{snakemake.input.ref}} {{snakemake.input.noadapter_R1}} {{snakemake.input.noadapter_R2}}| grep -v \"^@\" || true ; "
+#    non_merged_cmd = "bwa-mem2 mem -t {snakemake.threads} -R \"{snakemake.params.RG}\" {snakemake.input.ref} {snakemake.input.noadapter_R1} {snakemake.input.noadapter_R2}| grep -v \"^@\" || true ; "
 
 if snakemake.input.get("single_reads"):
-    singleton_cmd = "bwa-mem2 mem -t {snakemake.threads} -R \"{snakemake.params.RG}\" {snakemake.input.ref} {snakemake.input.single_reads} | grep -v \"^@\" || true ; "
+    singleton_cmd = f"bwa-mem2 mem -t {remaining_CPU} -R \"{{snakemake.params.RG}}\" {{snakemake.input.ref}} {{snakemake.input.single_reads}} | grep -v \"^@\" || true ; "
+    #singleton_cmd = "bwa-mem2 mem -t {snakemake.threads} -R \"{snakemake.params.RG}\" {snakemake.input.ref} {snakemake.input.single_reads} | grep -v \"^@\" || true ; "
 
-base_cmd = f"((bwa-mem2 mem -t {{snakemake.threads}} -R \"{{snakemake.params.RG}}\" {{snakemake.input.ref}} {{snakemake.input.reads}}; \
+
+base_cmd = f"((bwa-mem2 mem -t {remaining_CPU} -R \"{{snakemake.params.RG}}\" {{snakemake.input.ref}} {{snakemake.input.reads}}; \
 {non_merged_cmd}\
 {singleton_cmd}\
-) | samtools view -b -o {{snakemake.output.mapped_reads}} - ) 2>{{snakemake.log}}"
+) | samtools view -b -@ {samtools_CPU} -o {{snakemake.output.mapped_reads}} - ) 2>{{snakemake.log}}"
+
+#base_cmd = f"((bwa-mem2 mem -t {{snakemake.threads}} -R \"{{snakemake.params.RG}}\" {{snakemake.input.ref}} {{snakemake.input.reads}}; \
+#{non_merged_cmd}\
+#{singleton_cmd}\
+#) | samtools view -b -@ {{snakemake.threads}} -o {{snakemake.output.mapped_reads}} - ) 2>{{snakemake.log}}"
 
 print(f"Executing command:\n {base_cmd}", file=sys.stderr)
 

From 916db886b32d0862330634128f3dfba08c0296f7 Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Wed, 9 Nov 2022 13:41:56 +0100
Subject: [PATCH 17/19] refactor: make parameters configurable in config file

---
 workflow/rules/ichorCNA.smk     | 38 ++++++++++++++++-----------------
 workflow/rules/raw_input_QC.smk |  2 +-
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/workflow/rules/ichorCNA.smk b/workflow/rules/ichorCNA.smk
index 4738912..10e26eb 100644
--- a/workflow/rules/ichorCNA.smk
+++ b/workflow/rules/ichorCNA.smk
@@ -25,9 +25,9 @@ rule read_counter:
     log:
         "results/logs/{ID}/mapping/{SAMPLE}_all.{GENOME}.log",
     params:
-        window=1000000,
-        quality=20,
-        chroms="chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY",
+        window = config["read_counter"]["window"],
+        quality = config["read_counter"]["quality"],
+        chroms= config["read_counter"]["chroms"],
     conda:
         "../envs/icorCNA.yaml"
     shell:
@@ -48,22 +48,22 @@ rule ichorCNA:
         summary="results/{ID}/icorCNA/{SAMPLE}_processed_{GENOME}/{SAMPLE}_processed.params.txt"
     params:
         sID="{SAMPLE}_processed",
-        ploidy='"c(2,3)"',
-        normal='"c(0.5,0.6,0.7,0.8,0.9)"',
-        maxCN=5,
-        gcWIG="resources/ichorCNA/gc_hg38_1000kb.wig",
-        mapWIG="resources/ichorCNA/map_hg38_1000kb.wig",
-        centro="resources/ichorCNA/GRCh38.GCA_000001405.2_centromere_acen.txt",
-        normalPanel="resources/ichorCNA/HD_ULP_PoN_1Mb_median_normAutosome_mapScoreFiltered_median.rds",
-        includeHOMD="False",
-        chrs="'c(1:22)'",
-        chrTrain='"c(1:22)"',
-        estimateNormal="True",
-        estimatePloidy="True",
-        estimateScPrevalence="True",
-        scStates='"c(1,3)"',
-        txnE=0.9999,
-        txnStrength=10000,
+        ploidy = config["ichorCNA"]["ploidy"],
+        normal = config["ichorCNA"]["normal"],
+        maxCN = config["ichorCNA"]["maxCN"],
+        gcWIG = lambda wc: config["ichorCNA"]["gcWIG"][wc.GENOME],
+        mapWIG = lambda wc: config["ichorCNA"]["mapWIG"][wc.GENOME],
+        centro = lambda wc: config["ichorCNA"]["centro"][wc.GENOME],
+        normalPanel = config["ichorCNA"]["normalPanel"],
+        includeHOMD = config["ichorCNA"]["includeHOMD"],
+        chrs = config["ichorCNA"]["chrs"],
+        chrTrain = config["ichorCNA"]["chrTrain"],
+        estimateNormal = config["ichorCNA"]["estimateNormal"],
+        estimatePloidy = config["ichorCNA"]["estimatePloidy"],
+        estimateScPrevalence = config["ichorCNA"]["estimateScPrevalence"],
+        scStates = config["ichorCNA"]["scStates"],
+        txnE = config["ichorCNA"]["txnE"],
+        txnStrength = config["ichorCNA"]["txnStrength"],
     log:
         "results/logs/{ID}/mapping/{SAMPLE}_all.{GENOME}.log",
     conda:
diff --git a/workflow/rules/raw_input_QC.smk b/workflow/rules/raw_input_QC.smk
index b7e411b..839438d 100644
--- a/workflow/rules/raw_input_QC.smk
+++ b/workflow/rules/raw_input_QC.smk
@@ -19,4 +19,4 @@ rule flagstat_pre_conversion:
 ## samtools stats for bam files
 
 
-## FastQC: BAM, SAM or FastQ files
\ No newline at end of file
+## FastQC: BAM, SAM or FastQ files

From 5ef2fe314e305eb29ff39f91a6f81c8e406344ab Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Thu, 17 Nov 2022 16:47:08 +0100
Subject: [PATCH 18/19] chore: update mosdepth wrapper version; remove
 --fast-mode for better coverage estimation

---
 workflow/rules/QualityControl.smk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflow/rules/QualityControl.smk b/workflow/rules/QualityControl.smk
index 1be197a..1544192 100644
--- a/workflow/rules/QualityControl.smk
+++ b/workflow/rules/QualityControl.smk
@@ -35,12 +35,12 @@ rule mosdepth:
     log:
         "results/logs/{ID}/mosdepth/{SAMPLE}.{GENOME}.log",
     params:
-        extra="--no-per-base --fast-mode",  # optional
+        extra="--no-per-base",  # optional
         by="500",
     # additional decompression threads through `--threads`
     threads: 32  # This value - 1 will be sent to `--threads`
     wrapper:
-        "v1.1.0/bio/mosdepth"
+        "v1.19.1/bio/mosdepth"
 
 
 rule multiqc:

From 7aa79bd5af75a38defdd813beb6553dec41b6f26 Mon Sep 17 00:00:00 2001
From: sroener <40714954+sroener@users.noreply.github.com>
Date: Thu, 17 Nov 2022 16:48:23 +0100
Subject: [PATCH 19/19] feat: add automatic chromosome parsing for read_counter
 rule

---
 workflow/rules/ichorCNA.smk | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/workflow/rules/ichorCNA.smk b/workflow/rules/ichorCNA.smk
index 10e26eb..80482a5 100644
--- a/workflow/rules/ichorCNA.smk
+++ b/workflow/rules/ichorCNA.smk
@@ -1,3 +1,13 @@
+def get_chroms_readcounter(chromfile):
+    if config["read_counter"]["chroms"]["autoselect_chroms"]:
+        with open(chromfile,"r") as f:
+            chrom = f.read()
+        return chrom
+    else:
+        return config["read_counter"]["chroms"]["chrom_list"]
+
+
+
 
 rule get_chroms:
     input:
@@ -20,6 +30,7 @@ rule read_counter:
     input:
         bam="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam",
         bai="results/{ID}/mapped_reads/{SAMPLE}_processed.{GENOME}.bam.bai",
+        chroms="results/{ID}/icorCNA/chroms/{SAMPLE}_processed.{GENOME}.chromosomes.txt",
     output:
         wig="results/{ID}/icorCNA/readcounts/{SAMPLE}_processed.{GENOME}.wig",
     log:
@@ -27,7 +38,7 @@ rule read_counter:
     params:
         window = config["read_counter"]["window"],
         quality = config["read_counter"]["quality"],
-        chroms= config["read_counter"]["chroms"],
+        chroms= lambda wc: get_chroms_readcounter(f"test/{wc.ID}/icorCNA/chroms/{wc.SAMPLE}_processed.{wc.GENOME}.chromosomes.txt"),
     conda:
         "../envs/icorCNA.yaml"
     shell: