From e7be14690dbf97c2f06b89e0b39f07d7dd200c16 Mon Sep 17 00:00:00 2001 From: ashwini06 Date: Fri, 20 Aug 2021 16:03:53 +0200 Subject: [PATCH 1/5] refactor quality control rules according to snakemake etiquette --- .../snakemake_rules/quality_control/GATK.rule | 73 +++--- .../quality_control/contest.rule | 49 ++-- .../quality_control/fastp.rule | 10 +- .../quality_control/fastqc.rule | 45 ++-- .../quality_control/mosdepth.rule | 57 +++-- .../quality_control/multiqc.rule | 25 ++- .../quality_control/picard.rule | 212 ++++++++++-------- .../quality_control/picard_wgs.rule | 70 +++--- .../quality_control/sambamba_depth.rule | 118 +++++----- .../quality_control/sentieon_qc_metrics.rule | 35 ++- 10 files changed, 385 insertions(+), 309 deletions(-) diff --git a/BALSAMIC/snakemake_rules/quality_control/GATK.rule b/BALSAMIC/snakemake_rules/quality_control/GATK.rule index 4e61949a9..0f97ea576 100644 --- a/BALSAMIC/snakemake_rules/quality_control/GATK.rule +++ b/BALSAMIC/snakemake_rules/quality_control/GATK.rule @@ -2,42 +2,41 @@ # coding: utf-8 rule PreparePopVCF: - input: - bam = bam_dir + "tumor.merged.bam", - ref1kg = config["reference"]["1kg_snps_all"], - output: - popvcf = result_dir + "popvcf.vcf" - params: - conda = config["bioinfo_tools"].get("bcftools"), - anno_str1 = "FORMAT/GT,FORMAT/GL,FORMAT/DS,^INFO/AC,^INFO/AF,^INFO/AN,^INFO/", - popcode = "EUR" - singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() - benchmark: - benchmark_dir + "PreparePopVCF_" + "tumor_prepare_pop_vcf.tsv" - shell: - "source activate {params.conda}; " - "readlink -f {input.bam}; " - "bcftools annotate " - "-x {params.anno_str1}{params.popcode}_AF " - "{input.ref1kg} " - " | " - "bcftools annotate " - "-i INFO/{params.popcode}_AF!=0.0 " - " | " - "awk -v OFS=\"\\t\" " - "'$1~/^#/ {{ print; }} " - " $1!~/^#/ {{ " - "split($8,INFO,\";\"); " - "newINFO=\"\";" - "for (i in INFO) {{ " - "if (INFO[i]~\"{params.popcode}_AF\") {{ " - "split(INFO[i],AF,\"=\"); " - "P=substr(AF[1], 1, length(AF[1])-3); " - "INFO[i]=P\"={{\"$4\"*=\"AF[2]\",\"$5\"=\"1-AF[2]\"}}\"; " - "INFO[i]=INFO[i]\";set=\"P;}} " - "newINFO=INFO[i] \";\" newINFO; " - "}} " - "$8=substr(newINFO, 1, length(newINFO)-1); " - "print; }}' > {output.popvcf}; " + input: + bam = bam_dir + "tumor.merged.bam", + ref1kg = config["reference"]["1kg_snps_all"], + output: + popvcf = result_dir + "popvcf.vcf" + benchmark: + Path(benchmark_dir, "PreparePopVCF_" + "tumor.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() + params: + conda = config["bioinfo_tools"].get("bcftools"), + anno_str1 = "FORMAT/GT,FORMAT/GL,FORMAT/DS,^INFO/AC,^INFO/AF,^INFO/AN,^INFO/", + popcode = "EUR" + message: + "Generate intermediate pop vcf file for gatk analysis" + shell: + """ +source activate {params.conda}; +readlink -f {input.bam}; + +bcftools annotate \ +-x {params.anno_str1}{params.popcode}_AF \ +{input.ref1kg} \ +| bcftools annotate \ +-i INFO/{params.popcode}_AF!=0.0 \ +| awk -v OFS=\"\\t\" '$1~/^#/ {{ print; }} $1!~/^#/ {{ split($8,INFO,\";\"); newINFO=\"\";" + +for (i in INFO) {{ \ +if (INFO[i]~\"{params.popcode}_AF\") {{ \ +split(INFO[i],AF,\"=\"); P=substr(AF[1], 1, length(AF[1])-3); \ +INFO[i]=P\"={{\"$4\"*=\"AF[2]\",\"$5\"=\"1-AF[2]\"}}\"; \ +INFO[i]=INFO[i]\";set=\"P; }} \ +newINFO=INFO[i] \";\" newINFO; }} \ +$8=sustr(newINFO, 1, length(newINFO)-1); print; }}' \ + > {output.popvcf}; + """ diff --git a/BALSAMIC/snakemake_rules/quality_control/contest.rule b/BALSAMIC/snakemake_rules/quality_control/contest.rule index 2173cb55a..a16943d71 100644 --- a/BALSAMIC/snakemake_rules/quality_control/contest.rule +++ b/BALSAMIC/snakemake_rules/quality_control/contest.rule @@ -2,32 +2,31 @@ # coding: utf-8 rule gatk_contest: - input: - bamN = bam_dir + "normal.merged.bam", - bamT = bam_dir + "tumor.merged.bam", - fa = config["reference"]["reference_genome"], - popvcf = result_dir + "popvcf.vcf", - output: - N_vs_T = bam_dir + "normal_tumor.contest", - T_vs_N = bam_dir + "tumor_normal.contest", - benchmark: - benchmark_dir + "gatk_contest_" + config["analysis"]["case_id"] + "tsv" - singularity: - Path(singularity_image, config["bioinfo_tools"].get("gatk") + ".sif").as_posix() - params: - conda = config["bioinfo_tools"].get("gatk"), - min_genotype_ratio="0.95", - popcode = "EUR", - tmpdir = tempfile.mkdtemp(prefix=tmp_dir), - message: - "Running gatk contamination estimation between normal and tumor" - shell: - """ + input: + bamN = bam_dir + "normal.merged.bam", + bamT = bam_dir + "tumor.merged.bam", + fa = config["reference"]["reference_genome"], + popvcf = result_dir + "popvcf.vcf", + output: + N_vs_T = bam_dir + "normal_tumor.contest", + T_vs_N = bam_dir + "tumor_normal.contest", + benchmark: + Path(benchmark_dir, "gatk_contest_" + config["analysis"]["case_id"] + ".tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("gatk") + ".sif").as_posix() + params: + conda = config["bioinfo_tools"].get("gatk"), + min_genotype_ratio="0.95", + popcode = "EUR", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + message: + "Running gatk contamination estimation between normal and tumor" + shell: + """ source activate {params.conda}; - mkdir -p {params.tmpdir}; export TMPDIR={params.tmpdir}; - + java -jar -Djava.io.tmpdir={params.tmpdir} \ -Xms8G -Xmx16G \ $CONDA_PREFIX/opt/gatk-3.8/GenomeAnalysisTK.jar \ @@ -39,7 +38,7 @@ $CONDA_PREFIX/opt/gatk-3.8/GenomeAnalysisTK.jar \ --population {params.popcode} \ --min_genotype_ratio {params.min_genotype_ratio} \ -o {output.N_vs_T}; - + java -jar -Djava.io.tmpdir={params.tmpdir} \ -Xms8G -Xmx16G \ $CONDA_PREFIX/opt/gatk-3.8/GenomeAnalysisTK.jar \ @@ -51,4 +50,4 @@ $CONDA_PREFIX/opt/gatk-3.8/GenomeAnalysisTK.jar \ --population {params.popcode} \ --min_genotype_ratio {params.min_genotype_ratio} \ -o {output.T_vs_N}; - """ + """ diff --git a/BALSAMIC/snakemake_rules/quality_control/fastp.rule b/BALSAMIC/snakemake_rules/quality_control/fastp.rule index b1c41d57d..96cee3745 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastp.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastp.rule @@ -35,7 +35,7 @@ rule fastp_umi: json = qc_dir + "fastp/{sample}_fastp_umi.json", html = qc_dir + "fastp/{sample}_fastp_umi.html", benchmark: - benchmark_dir + "fastp_umi" + "{sample}_fastp.tsv" + Path(benchmark_dir, "fastp_umi" + "{sample}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("fastp") + ".sif").as_posix() params: @@ -44,7 +44,8 @@ rule fastp_umi: adapter = " ".join(fastp_param_adapter), sample_name = "{sample}", conda = config["bioinfo_tools"].get("fastp") - threads: get_threads(cluster_config, 'fastp') + threads: + get_threads(cluster_config, 'fastp') message: "Quality control and trimming input fastq for {params.sample_name}" shell: @@ -76,7 +77,7 @@ rule fastp: json = qc_dir + "fastp/{sample}_fastp.json", html = qc_dir + "fastp/{sample}_fastp.html" benchmark: - benchmark_dir + "fastp_" + "{sample}_fastp.tsv" + Path(benchmark_dir, "fastp_" + "{sample}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("fastp") + ".sif").as_posix() params: @@ -86,7 +87,8 @@ rule fastp: minimum_length = config["QC"]["min_seq_length"], sample_name = "{sample}", conda = config["bioinfo_tools"].get("fastp") - threads: get_threads(cluster_config, 'fastp') + threads: + get_threads(cluster_config, 'fastp') message: "Quality control and trimming of umi optimized fastq file for {params.sample_name}" shell: diff --git a/BALSAMIC/snakemake_rules/quality_control/fastqc.rule b/BALSAMIC/snakemake_rules/quality_control/fastqc.rule index cdef5075e..2abc0e305 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastqc.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastqc.rule @@ -3,35 +3,34 @@ # Following rule will take input fastq files, align them using bwa mem, and convert the output to sam format rule fastqc: - input: - read1 = fastq_dir + "{sample}_1.fastq.gz", - read2 = fastq_dir + "{sample}_2.fastq.gz", - output: - read1 = fastqc_dir + "{sample}_1_fastqc.zip", - read2 = fastqc_dir + "{sample}_2_fastqc.zip" - benchmark: - benchmark_dir + "fastqc_{sample}.tsv" - singularity: - Path(singularity_image, config["bioinfo_tools"].get("fastqc") + ".sif").as_posix() - params: - conda = config["bioinfo_tools"].get("fastqc"), - fastqc_dir = fastqc_dir, - sample = "{sample}", - tmpdir = tempfile.mkdtemp(prefix=tmp_dir), - message: - "Running FastQC on {params.sample}" - shell: - """ + input: + read1 = fastq_dir + "{sample}_1.fastq.gz", + read2 = fastq_dir + "{sample}_2.fastq.gz", + output: + read1 = fastqc_dir + "{sample}_1_fastqc.zip", + read2 = fastqc_dir + "{sample}_2_fastqc.zip" + benchmark: + Path(benchmark_dir, "fastqc_{sample}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("fastqc") + ".sif").as_posix() + params: + conda = config["bioinfo_tools"].get("fastqc"), + fastqc_dir = fastqc_dir, + sample = "{sample}", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + message: + "Running FastQC on {params.sample}" + shell: + """ source activate {params.conda}; - mkdir -p {params.tmpdir}; export TMPDIR={params.tmpdir}; - + fastqc {input.read1} \ --dir {params.tmpdir} \ --outdir {params.fastqc_dir}; - + fastqc {input.read2} \ --dir {params.tmpdir} \ --outdir {params.fastqc_dir}; - """ + """ diff --git a/BALSAMIC/snakemake_rules/quality_control/mosdepth.rule b/BALSAMIC/snakemake_rules/quality_control/mosdepth.rule index 11367812d..fdcd629d2 100644 --- a/BALSAMIC/snakemake_rules/quality_control/mosdepth.rule +++ b/BALSAMIC/snakemake_rules/quality_control/mosdepth.rule @@ -2,39 +2,38 @@ # coding: utf-8 rule mosdepth_coverage: - input: - bam = bam_dir + "{sample}" + ".sorted." + picarddup + ".bam", - bed = config["panel"]["capture_kit"] - output: - bam_dir + "{sample}.mosdepth.global.dist.txt", - bam_dir + "{sample}.mosdepth.region.dist.txt", - bam_dir + "{sample}.mosdepth.summary.txt", - bam_dir + "{sample}.per-base.bed.gz", - bam_dir + "{sample}.regions.bed.gz" - benchmark: - benchmark_dir + "mosdepth_covearge_" + "{sample}.tsv" - singularity: - Path(singularity_image, config["bioinfo_tools"].get("mosdepth") + ".sif").as_posix() - params: - mapq='20', - samflag='1796', - quantize='0:1:50:150:', - sample_name='{sample}', - output_dir=bam_dir, - conda = config["bioinfo_tools"].get("mosdepth") - threads: - get_threads(cluster_config, "mosdepth") - message: - "Running mosdepth for coveage on {params.sample_name}" - shell: - """ + input: + bam = bam_dir + "{sample}" + ".sorted." + picarddup + ".bam", + bed = config["panel"]["capture_kit"] + output: + bam_dir + "{sample}.mosdepth.global.dist.txt", + bam_dir + "{sample}.mosdepth.region.dist.txt", + bam_dir + "{sample}.mosdepth.summary.txt", + bam_dir + "{sample}.per-base.bed.gz", + bam_dir + "{sample}.regions.bed.gz" + benchmark: + Path(benchmark_dir, "mosdepth_coverage_" + "{sample}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("mosdepth") + ".sif").as_posix() + params: + mapq = '20', + samflag = '1796', + quantize = '0:1:50:150:', + sample_name = '{sample}', + output_dir = bam_dir, + conda = config["bioinfo_tools"].get("mosdepth") + threads: + get_threads(cluster_config, "mosdepth_coverage") + message: + "Calculate coverage using mosdepth for sample {params.sample_name}" + shell: + """ source activate {params.conda}; - export MOSDEPTH_Q0=NO_COVERAGE; export MOSDEPTH_Q1=LOW_COVERAGE; export MOSDEPTH_Q2=CALLABLE; export MOSDEPTH_Q3=HIGH_COVERAGE; - + mosdepth \ --by {input.bed} \ --mapq {params.mapq} \ @@ -43,4 +42,4 @@ mosdepth \ --threads {threads} \ {params.output_dir}/{params.sample_name} \ {input.bam}; - """ + """ diff --git a/BALSAMIC/snakemake_rules/quality_control/multiqc.rule b/BALSAMIC/snakemake_rules/quality_control/multiqc.rule index 77bd78cb2..bba315c37 100644 --- a/BALSAMIC/snakemake_rules/quality_control/multiqc.rule +++ b/BALSAMIC/snakemake_rules/quality_control/multiqc.rule @@ -10,8 +10,8 @@ if config['analysis']['analysis_type'] == "paired": if config["analysis"]["sequencing_type"] == 'wgs': picard_metrics_wildcard = ["alignment_summary_metrics", "base_distribution_by_cycle_metrics", "base_distribution_by_cycle.pdf", "insert_size_histogram.pdf", "insert_size_metrics", - "quality_by_cycle_metrics", - "quality_by_cycle.pdf", "quality_distribution_metrics", "quality_distribution.pdf"] + "quality_by_cycle_metrics", "quality_by_cycle.pdf", + "quality_distribution_metrics", "quality_distribution.pdf"] # fastqc metrics multiqc_input.extend(expand(fastqc_dir + "{sample}_{read_num}_fastqc.zip", sample=config["samples"], read_num=[1, 2])) @@ -66,16 +66,23 @@ rule multiqc: output: html = qc_dir + "multiqc_report.html", json = qc_dir + "multiqc_data/multiqc_data.json", + benchmark: + Path(benchmark_dir, "multiqc_" + config["analysis"]["case_id"] + ".multiqc.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("multiqc") + ".sif").as_posix() params: housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "multiqc"}, dir_list = result_dir, qc_dir = qc_dir, conda = config["bioinfo_tools"].get("multiqc"), - singularity: Path(singularity_image, config["bioinfo_tools"].get("multiqc") + ".sif").as_posix() - benchmark: - benchmark_dir + "multiqc_" + config["analysis"]["case_id"] + ".multiqc.tsv" + case_name = config["analysis"]["case_id"] + message: + "Aggregrate quality metrics results using multiqc for sample {params.case_name}" shell: - "source activate {params.conda};" - "echo -e \"{params.dir_list}\" > {params.qc_dir}/dir_list; " - "multiqc --force --outdir {params.qc_dir} --data-format json -l {params.qc_dir}/dir_list; " - "chmod -R 777 {params.qc_dir};" + """ +source activate {params.conda}; + +echo -e \"{params.dir_list}\" > {params.qc_dir}/dir_list; +multiqc --force --outdir {params.qc_dir} --data-format json -l {params.qc_dir}/dir_list; +chmod -R 777 {params.qc_dir}; + """ diff --git a/BALSAMIC/snakemake_rules/quality_control/picard.rule b/BALSAMIC/snakemake_rules/quality_control/picard.rule index 81d9f812c..fac7b459f 100644 --- a/BALSAMIC/snakemake_rules/quality_control/picard.rule +++ b/BALSAMIC/snakemake_rules/quality_control/picard.rule @@ -2,99 +2,125 @@ # coding: utf-8 def picard_flag(picarddup): - if picarddup == "mrkdup": - return "FALSE" - else: - return "TRUE" + if picarddup == "mrkdup": + return "FALSE" + else: + return "TRUE" -rule CollectHsMetrics: - input: - fadict = (config["reference"]["reference_genome"]).replace(".fasta",".dict"), - bed = config["panel"]["capture_kit"], - bam = bam_dir + "{sample}.sorted." + picarddup + ".bam", - fa = config["reference"]["reference_genome"], - output: - mrkdup = bam_dir + "{sample}.sorted." + picarddup + ".hsmetric" - params: - mem = "16g", - tmpdir = tempfile.mkdtemp(prefix=tmp_dir), - conda = config["bioinfo_tools"].get("picard"), - baitsetname = os.path.basename(config["panel"]["capture_kit"]) - singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() - benchmark: - benchmark_dir + "CollectHsMetrics_" + "{sample}.collect_hsmetrics.tsv" - shell: - "source activate {params.conda};" - "mkdir -p {params.tmpdir}; " - "export TMPDIR={params.tmpdir}; " - "picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} " - "BedToIntervalList " - "I={input.bed} " - "O={input.bam}.picard.bedintervals " - "SD={input.fadict}; " - "picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} " - "CollectHsMetrics " - "BI={input.bam}.picard.bedintervals " - "TI={input.bam}.picard.bedintervals " - "I={input.bam} " - "O={output.mrkdup} " - "R={input.fa} " - "BAIT_SET_NAME={params.baitsetname} " - "COVERAGE_CAP=50000 " - "METRIC_ACCUMULATION_LEVEL=ALL_READS; " - +rule picard_CollectHsMetrics: + input: + fadict = (config["reference"]["reference_genome"]).replace(".fasta",".dict"), + bed = config["panel"]["capture_kit"], + bam = bam_dir + "{sample}.sorted." + picarddup + ".bam", + fa = config["reference"]["reference_genome"], + output: + mrkdup = bam_dir + "{sample}.sorted." + picarddup + ".hsmetric" + benchmark: + Path(benchmark_dir + "picard_CollectHsMetrics_" + "{sample}.tsv").as_posix() + singularity: + Path(singularity_image,config["bioinfo_tools"].get("picard") + ".sif").as_posix() + params: + mem = "16g", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + conda = config["bioinfo_tools"].get("picard"), + baitsetname = os.path.basename(config["panel"]["capture_kit"]), + sample = '{sample}' + threads: + get_threads(cluster_config, 'picard_CollectHsMetrics') + message: + "Calculating picard HsMetrics for sample '{params.sample}'" + shell: + """ +source activate {params.conda}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; -rule CollectAlignmentSummaryMetrics: - input: - bam = bam_dir + "{sample}.sorted.bam", - fa = config["reference"]["reference_genome"] - output: - bam_dir + "{sample}.sorted.alignmetric" - params: - mem = "16g", - tmpdir = tempfile.mkdtemp(prefix=tmp_dir), - conda = config["bioinfo_tools"].get("picard"), - adapter = config["QC"]["adapter"] - singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() - benchmark: - benchmark_dir + "CollectAlignmentSummaryMetrics_" + "{sample}.collect_alignment_summary.tsv" - shell: - "source activate {params.conda};" - "mkdir -p {params.tmpdir}; " - "export TMPDIR={params.tmpdir}; " - "picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} " - "CollectAlignmentSummaryMetrics " - "R={input.fa} " - "I={input.bam} " - "O={output} " - "ADAPTER_SEQUENCE={params.adapter} " - "METRIC_ACCUMULATION_LEVEL=ALL_READS " - "METRIC_ACCUMULATION_LEVEL=LIBRARY;" - +picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} \ +BedToIntervalList \ +I={input.bed} \ +O={input.bam}.picard.bedintervals \ +SD={input.fadict}; -rule CollectInsertSizeMetrics: - input: - bam = bam_dir + "{sample}.sorted.bam" - output: - pdf = bam_dir + "{sample}.sorted.insertsizemetric.pdf", - txt = bam_dir + "{sample}.sorted.insertsizemetric" - params: - mem = "16g", - tmpdir = tempfile.mkdtemp(prefix=tmp_dir), - conda = config["bioinfo_tools"].get("picard") - singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() - benchmark: - benchmark_dir + "CollectInsertSizeMetrics_" + "{sample}.collect_insertsize_metrics.tsv" - shell: - "source activate {params.conda};" - "mkdir -p {params.tmpdir}; " - "export TMPDIR={params.tmpdir}; " - "picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} " - "CollectInsertSizeMetrics " - "I={input.bam} " - "H={output.pdf} " - "O={output.txt} " - "M=0.01 " - "INCLUDE_DUPLICATES=TRUE " - "METRIC_ACCUMULATION_LEVEL=ALL_READS " - "METRIC_ACCUMULATION_LEVEL=LIBRARY; " +picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} \ +CollectHsMetrics \ +BI={input.bam}.picard.bedintervals \ +TI={input.bam}.picard.bedintervals \ +I={input.bam} \ +O={output.mrkdup} \ +R={input.fa} \ +BAIT_SET_NAME={params.baitsetname} \ +COVERAGE_CAP=50000 \ +METRIC_ACCUMULATION_LEVEL=ALL_READS; + """ + +rule picard_CollectAlignmentSummaryMetrics: + input: + bam = bam_dir + "{sample}.sorted.bam", + fa = config["reference"]["reference_genome"] + output: + bam_dir + "{sample}.sorted.alignmetric" + benchmark: + Path(benchmark_dir, "CollectAlignmentSummaryMetrics_" + "{sample}.tsv").as_posix() + singularity: + Path(singularity_image,config["bioinfo_tools"].get("picard") + ".sif").as_posix() + params: + mem = "16g", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + conda = config["bioinfo_tools"].get("picard"), + adapter = config["QC"]["adapter"], + sample = '{sample}' + threads: + get_threads(cluster_config,'picard_CollectAlignmentSummaryMetrics') + message: + "Calculating picard alignment summary metrics for sample '{params.sample}'" + shell: + """ +source activate {params.conda}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; + +picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} \ +CollectAlignmentSummaryMetrics \ +R={input.fa} \ +I={input.bam} \ +O={output} \ +ADAPTER_SEQUENCE={params.adapter} \ +METRIC_ACCUMULATION_LEVEL=ALL_READS \ +METRIC_ACCUMULATION_LEVEL=LIBRARY; + """ + +rule picard_CollectInsertSizeMetrics: + input: + bam = bam_dir + "{sample}.sorted.bam" + output: + pdf = bam_dir + "{sample}.sorted.insertsizemetric.pdf", + txt = bam_dir + "{sample}.sorted.insertsizemetric" + benchmark: + Path(benchmark_dir, "picard_CollectInsertSizeMetrics_" + "{sample}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() + params: + mem = "16g", + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + conda = config["bioinfo_tools"].get("picard"), + sample = '{sample}' + threads: + get_threads(cluster_config,'picard_CollectInsertSizeMetrics') + message: + "Calculating picard InsertSize metrics for sample '{params.sample}'" + shell: + """ +source activate {params.conda}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; + +picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} \ +CollectInsertSizeMetrics \ +I={input.bam} \ +H={output.pdf} \ +O={output.txt} \ +M=0.01 \ +INCLUDE_DUPLICATES=TRUE \ +METRIC_ACCUMULATION_LEVEL=ALL_READS \ +METRIC_ACCUMULATION_LEVEL=LIBRARY; + """ \ No newline at end of file diff --git a/BALSAMIC/snakemake_rules/quality_control/picard_wgs.rule b/BALSAMIC/snakemake_rules/quality_control/picard_wgs.rule index cc8d60a16..5cf3133bd 100644 --- a/BALSAMIC/snakemake_rules/quality_control/picard_wgs.rule +++ b/BALSAMIC/snakemake_rules/quality_control/picard_wgs.rule @@ -6,53 +6,69 @@ picard_metrics_wildcard = ["alignment_summary_metrics", "base_distribution_by_cy "insert_size_metrics", "quality_by_cycle_metrics", "quality_by_cycle.pdf", "quality_distribution_metrics", "quality_distribution.pdf"] -rule CollectMultipleMetrics: +rule picard_CollectMultipleMetrics: input: bam = bam_dir + "{sample}.dedup.bam", reference = config["reference"]["reference_genome"] output: - expand(qc_dir + "{{sample}}.multiple_metrics.{metrics_wc}", sample=config["samples"], metrics_wc=picard_metrics_wildcard) + expand(qc_dir + "{{sample}}.multiple_metrics.{metrics_wc}", sample = config["samples"], metrics_wc = picard_metrics_wildcard) + benchmark: + Path(benchmark_dir, "picard_CollectMultipleMetrics_" + "{sample}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() params: mem = "16g", tmpdir = tempfile.mkdtemp(prefix=tmp_dir), output_prefix = qc_dir + "{sample}.multiple_metrics", conda = config["bioinfo_tools"].get("picard"), - singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() - benchmark: - benchmark_dir + "CollectMultipleMetrics_" + "{sample}.picard_collect_multiple_metrics.tsv" + sample = '{sample}' + threads: + get_threads(cluster_config,'picard_CollectMultipleMetrics') + message: + "Collecting picard multiple quality metrics for wgs sample {params.sample}" shell: - "source activate {params.conda};" - "mkdir -p {params.tmpdir}; " - "export TMPDIR={params.tmpdir}; " - "picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} " - " CollectMultipleMetrics " - " I={input.bam} " - " O={params.output_prefix} " - " R={input.reference}; " - + """ +source activate {params.conda}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; + +picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} \ +CollectMultipleMetrics \ +I={input.bam} \ +O={params.output_prefix} \ +R={input.reference}; + """ -rule CollectWgsMetrics: +rule picard_CollectWgsMetrics: input: bam = bam_dir + "{sample}.dedup.bam", reference = config["reference"]["reference_genome"] output: qc_dir + "{sample}_picard_wgs_metrics.txt" + benchmark: + Path(benchmark_dir + "picard_CollectWgsMetrics_" + "{sample}.tsv").as_posix() + singularity: + Path(singularity_image,config[ "bioinfo_tools" ].get("picard") + ".sif").as_posix() params: mem = "16g", tmpdir = tempfile.mkdtemp(prefix=tmp_dir), conda = config["bioinfo_tools"].get("picard"), - singularity: Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() - benchmark: - benchmark_dir + "CollectWgsMetrics_" + "{sample}.picard_collect_wgs_metrics.tsv" + sample = '{sample}' + threads: + get_threads(cluster_config,'picard_CollectWgsMetrics') + message: + "Collecting various picard quality metrics for wgs sample '{params.sample}'" shell: - "source activate {params.conda};" - "mkdir -p {params.tmpdir}; " - "export TMPDIR={params.tmpdir}; " - "picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} " - " CollectWgsMetrics " - " I={input.bam} " - " O={output} " - " R={input.reference}; " - + """ +source activate {params.conda}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; + +picard -Djava.io.tmpdir={params.tmpdir} -Xmx{params.mem} \ +CollectWgsMetrics \ +I={input.bam} \ +O={output} \ +R={input.reference}; + """ diff --git a/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule b/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule index 0387e02ca..fc21048ad 100644 --- a/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule +++ b/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule @@ -2,56 +2,72 @@ # coding: utf-8 rule sambamba_panel_depth: - input: - bam = bam_dir + "{sample}" + ".sorted." + picarddup + ".bam", - bed = config["panel"]["capture_kit"] - output: - bam_dir + "{sample}.sorted." + picarddup + ".cov.bed" - params: - base_qual=10, - cov_start=50, - cov_end=1000, - cov_step=50, - filter_string="'not (unmapped or mate_is_unmapped) and not duplicate and not failed_quality_control and mapping_quality > 10'", - conda = config["bioinfo_tools"].get("sambamba") - singularity: Path(singularity_image, config["bioinfo_tools"].get("sambamba") + ".sif").as_posix() - benchmark: - benchmark_dir + "panel_depth_" + "{sample}.sambamba_panel_depth.tsv" - shell: - "source activate {params.conda}; " - "covStr=`seq {params.cov_start} {params.cov_step} {params.cov_end} | xargs -n1 echo -n \" --cov-threshold\"`; " - "sambamba depth region " - "--regions {input.bed} " - "--min-base-quality={params.base_qual} " - "--filter {params.filter_string} " - "`echo $covStr` {input.bam} > {output}; " - + input: + bam = bam_dir + "{sample}" + ".sorted." + picarddup + ".bam", + bed = config["panel"]["capture_kit"] + output: + bam_dir + "{sample}.sorted." + picarddup + ".cov.bed" + benchmark: + Path(benchmark_dir, "sambamba_panel_depth_" + "{sample}.tsv").as_posix() + singularity: + Path(singularity_image,config[ "bioinfo_tools" ].get("sambamba") + ".sif").as_posix() + params: + base_qual=10, + cov_start=50, + cov_end=1000, + cov_step=50, + filter_string="'not (unmapped or mate_is_unmapped) and not duplicate and not failed_quality_control and mapping_quality > 10'", + conda = config["bioinfo_tools"].get("sambamba"), + sample = '{sample}' + threads: + get_threads(cluster_config, 'sambamba_panel_depth') + message: + "Calculate depth statistics using sambamba for sample {params.sample}" + shell: + """ +source activate {params.conda}; +covStr=`seq {params.cov_start} {params.cov_step} {params.cov_end} | xargs -n1 echo -n \" --cov-threshold\"`; + +sambamba depth region \ +--regions {input.bed} \ +--min-base-quality={params.base_qual} \ +--filter {params.filter_string} \ +`echo $covStr` {input.bam} > {output}; + """ rule sambamba_exon_depth: - input: - bam = bam_dir + "{sample}" + ".sorted." + picarddup + ".bam", - bed = config["reference"]["exon_bed"] - output: - bam_dir + "{sample}.sorted." + picarddup + ".exon.cov.bed" - params: - base_qual=10, - cov_1="50", - cov_2="100", - cov_3="150", - cov_4="200", - cov_5="250", - filter_string="'not (unmapped or mate_is_unmapped) and not duplicate and not failed_quality_control and mapping_quality > 10'", - conda = config["bioinfo_tools"].get("sambamba") - singularity: Path(singularity_image, config["bioinfo_tools"].get("sambamba") + ".sif").as_posix() - benchmark: - benchmark_dir + "exon_depth_" + "{sample}.sambamba_exon_depth.tsv" - shell: - "source activate {params.conda}; " - "sambamba depth region " - "--regions {input.bed} " - "--min-base-quality={params.base_qual} " - "--filter {params.filter_string} " - "--cov-threshold {params.cov_1} --cov-threshold {params.cov_2} " - "--cov-threshold {params.cov_3} --cov-threshold {params.cov_4} " - "--cov-threshold {params.cov_5} {input.bam} > {output}; " - + input: + bam = bam_dir + "{sample}" + ".sorted." + picarddup + ".bam", + bed = config["reference"]["exon_bed"] + output: + bam_dir + "{sample}.sorted." + picarddup + ".exon.cov.bed" + benchmark: + Path(benchmark_dir, "sambamba_exon_depth_" + "{sample}.tsv").as_posix() + singularity: + Path(singularity_image,config[ "bioinfo_tools" ].get("sambamba") + ".sif").as_posix() + params: + base_qual = 10, + cov_1 = "50", + cov_2 = "100", + cov_3 = "150", + cov_4 = "200", + cov_5 = "250", + filter_string = "'not (unmapped or mate_is_unmapped) and not duplicate and not failed_quality_control and mapping_quality > 10'", + conda = config["bioinfo_tools"].get("sambamba"), + sample = 'sample' + threads: + get_threads(cluster_config,'sambamba_exon_depth') + message: + "Calculate exon depth stastics using sambamba for sample {params.sample}" + shell: + """ +source activate {params.conda}; + +sambamba depth region \ +--regions {input.bed} \ +--min-base-quality={params.base_qual} \ +--filter {params.filter_string} \ +--cov-threshold {params.cov_1} --cov-threshold {params.cov_2} \ +--cov-threshold {params.cov_3} --cov-threshold {params.cov_4} \ +--cov-threshold {params.cov_5} {input.bam} > {output}; + """ \ No newline at end of file diff --git a/BALSAMIC/snakemake_rules/quality_control/sentieon_qc_metrics.rule b/BALSAMIC/snakemake_rules/quality_control/sentieon_qc_metrics.rule index c3dc99236..f233d2631 100644 --- a/BALSAMIC/snakemake_rules/quality_control/sentieon_qc_metrics.rule +++ b/BALSAMIC/snakemake_rules/quality_control/sentieon_qc_metrics.rule @@ -17,29 +17,42 @@ rule sentieon_wgs_metrics: output: wgs_metrics = qc_dir + "{sample}_sentieon_wgs_metrics.txt", coverage_metrics = qc_dir + "{sample}_coverage.gz" + benchmark: + Path(benchmark_dir,'sentieon_wgs_metrics_' + "{sample}.tsv").as_posix() params: - tmpdir = tmp_dir, + tmpdir = tempfile.mkdtemp(prefix=tmp_dir), min_base_qual = '10', gene_list = config["reference"]["refGene"], cov_threshold = repeat("--cov_thresh", [50, 100, 150, 200, 250]), sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], - threads: get_threads(cluster_config, 'sentieon_wgs_metrics') - benchmark: - benchmark_dir + 'sentieon_wgs_metrics_' + "{sample}_wgs_metrics.tsv" + sample = '{sample}' + threads: + get_threads(cluster_config, 'sentieon_wgs_metrics') + message: + "Calculate coverage metrics for wgs cases using sentieon tools for sample {params.sample}" shell: """ -rand_str=$(openssl rand -hex 5); -tmpdir={params.tmpdir}/${{rand_str}}; -mkdir -p ${{tmpdir}}; -export TMPDIR=${{tmpdir}}; -export SENTIEON_TMPDIR=${{tmpdir}}; +mkdir -p {params.tmpdir}; +export TMPDIR={params.tmpdir}; +export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; -{params.sentieon_exec} driver -i {input.bam} -r {input.reference} --algo WgsMetricsAlgo --min_base_qual {params.min_base_qual} {output.wgs_metrics}; +{params.sentieon_exec} driver \ +-i {input.bam} \ +-r {input.reference} \ +--algo WgsMetricsAlgo \ +--min_base_qual {params.min_base_qual} \ +{output.wgs_metrics}; -{params.sentieon_exec} driver -i {input.bam} -r {input.reference} --algo CoverageMetrics --gene_list {params.gene_list} {params.cov_threshold} {output.coverage_metrics}_tmp; +{params.sentieon_exec} driver \ +-i {input.bam} \ +-r {input.reference} \ +--algo CoverageMetrics \ +--gene_list {params.gene_list} {params.cov_threshold} \ +{output.coverage_metrics}_tmp; gzip -c {output.coverage_metrics}_tmp > {output.coverage_metrics}; rm {output.coverage_metrics}_tmp; +rm -rf {params.tmpdir}; """ From 3e457114935de25da9f53c15ea0fe6253983ba3a Mon Sep 17 00:00:00 2001 From: ashwini06 Date: Fri, 20 Aug 2021 16:08:10 +0200 Subject: [PATCH 2/5] update changelog --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ac3ef3da0..ff96c0b37 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -52,7 +52,7 @@ Fixed: * Refactor snakemake ``umi`` rules according to snakemake etiquette #636 * Refactor snakemake ``variant calling`` rules according to snakemake etiquette #636 * Wrong spacing in reference json issue #704 - +* Refactor snakemake ``quality control`` rules according to snakemake etiquette #636 Removed: ^^^^^^^^ From 91d8bfdc9b4340184fcdfe8f514b4edab334f1fe Mon Sep 17 00:00:00 2001 From: ashwini06 Date: Mon, 23 Aug 2021 10:43:46 +0200 Subject: [PATCH 3/5] refactor dragen rule --- BALSAMIC/snakemake_rules/dragen_suite/dragen_dna.rule | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/BALSAMIC/snakemake_rules/dragen_suite/dragen_dna.rule b/BALSAMIC/snakemake_rules/dragen_suite/dragen_dna.rule index 05bd3f488..b565a59ae 100644 --- a/BALSAMIC/snakemake_rules/dragen_suite/dragen_dna.rule +++ b/BALSAMIC/snakemake_rules/dragen_suite/dragen_dna.rule @@ -4,8 +4,8 @@ rule dragen_align_call_tumor_only: input: reference = config["reference"]["reference_genome"], - read1 = Path(fastq_dir, "{mysample}_1.fp.fastq.gz".format(mysample=tumor_sample)).as_posix(), - read2 = Path(fastq_dir, "{mysample}_2.fp.fastq.gz".format(mysample=tumor_sample)).as_posix(), + read1 = Path(fastq_dir, "{mysample}_1.fp.fastq.gz".format(mysample = tumor_sample)).as_posix(), + read2 = Path(fastq_dir, "{mysample}_2.fp.fastq.gz".format(mysample = tumor_sample)).as_posix(), output: bam = Path(result_dir, "dragen", "SNV.somatic." + config["analysis"]["case_id"] + ".dragen_tumor.bam").as_posix(), vcf = Path(result_dir, "dragen", "SNV.somatic." + config["analysis"]["case_id"] + ".dragen.vcf.gz").as_posix() @@ -37,8 +37,8 @@ mkdir -p {params.tmp_reference_dir} # Build reference dragen --build-hash-table true \ - --ht-reference {input.reference} \ - --output-directory {params.tmp_reference_dir} #--ht-alt-liftover /opt/edico/liftover/hg19_alt_liftover.sam +--ht-reference {input.reference} \ +--output-directory {params.tmp_reference_dir} #--ht-alt-liftover /opt/edico/liftover/hg19_alt_liftover.sam # Make sure reference loads properly dragen -l -r {params.tmp_reference_dir} From 8bd05c046adb697f16d023e7333c1fb19b07206f Mon Sep 17 00:00:00 2001 From: ashwini06 Date: Mon, 23 Aug 2021 11:48:52 +0200 Subject: [PATCH 4/5] add message to variantcalling splitbed rule --- BALSAMIC/snakemake_rules/variant_calling/split_bed.rule | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/BALSAMIC/snakemake_rules/variant_calling/split_bed.rule b/BALSAMIC/snakemake_rules/variant_calling/split_bed.rule index badc95206..762af1e5d 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/split_bed.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/split_bed.rule @@ -1,7 +1,7 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -rule split_bed_by_chrom: +rule bedtools_splitbed_by_chrom: input: bed = config["panel"]["capture_kit"], chrom = config["reference"]["genome_chrom_size"], @@ -9,7 +9,7 @@ rule split_bed_by_chrom: output: bed = expand(vcf_dir + "split_bed/" + "{chrom}." + capture_kit, chrom=chromlist) benchmark: - Path(benchmark_dir, 'split_bed_by_chrom.tsv').as_posix() + Path(benchmark_dir, 'bedtools_splitbed_by_chrom.tsv').as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("bedtools") + ".sif").as_posix() params: @@ -17,6 +17,9 @@ rule split_bed_by_chrom: split_bed_dir = vcf_dir + "split_bed/", origin_bed = capture_kit, conda = config["bioinfo_tools"].get("bedtools"), + message: + ("Capturing reference genome chromosome size and splitting the panel bed per chromosome" + "Extend the region by 100bp on each direction, sort and merge the overlapping intervals using bedtools") shell: """ source activate {params.conda}; From 3296b472bd293de73297b497b7b8ec19ecef4896 Mon Sep 17 00:00:00 2001 From: ashwini06 Date: Mon, 23 Aug 2021 12:05:56 +0200 Subject: [PATCH 5/5] finalcheck- fix spaces for multiple rules --- .../annotation/varcaller_wgs_filter_tumor_only.rule | 6 +++--- BALSAMIC/snakemake_rules/quality_control/picard.rule | 2 +- .../quality_control/sambamba_depth.rule | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule index 7f6b9b143..bcdb72eba 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule @@ -11,7 +11,7 @@ rule bcftools_filter_tnscope_tumor_only: benchmark: Path(benchmark_dir, 'bcftools_filter_tnscope_tumor_only_' + "{var_type}.somatic.{case_name}.tsv").as_posix() singularity: - Path(singularity_image,config[ "bioinfo_tools" ].get("bcftools") + ".sif").as_posix() + Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() params: conda = config["bioinfo_tools"].get("bcftools"), DP = [SENTIEON_CALLER.DP.tag_value, SENTIEON_CALLER.DP.filter_name], @@ -56,7 +56,7 @@ rule bcftools_filter_tnhaplotyper_tumor_only: benchmark: Path(benchmark_dir, 'bcftools_filter_tnhaplotyper_tumor_only_' + "{var_type}.somatic.{case_name}.tsv").as_posix() singularity: - Path(singularity_image, config[ "bioinfo_tools" ].get("bcftools") + ".sif").as_posix() + Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() params: conda = config["bioinfo_tools"].get("bcftools"), DP = [SENTIEON_CALLER.DP.tag_value, SENTIEON_CALLER.DP.filter_name], @@ -100,7 +100,7 @@ rule bcftools_intersect_tumor_only: benchmark: Path(benchmark_dir, 'bcftools_intersect_tumor_only_' + "{var_type}.somatic.{case_name}.tsv").as_posix() singularity: - Path(singularity_image,config[ "bioinfo_tools" ].get("bcftools") + ".sif").as_posix() + Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() params: conda = config["bioinfo_tools"].get("bcftools"), vcf_dir = vep_dir + "sentieon_callers_intersect", diff --git a/BALSAMIC/snakemake_rules/quality_control/picard.rule b/BALSAMIC/snakemake_rules/quality_control/picard.rule index fac7b459f..e47ee92e3 100644 --- a/BALSAMIC/snakemake_rules/quality_control/picard.rule +++ b/BALSAMIC/snakemake_rules/quality_control/picard.rule @@ -18,7 +18,7 @@ rule picard_CollectHsMetrics: benchmark: Path(benchmark_dir + "picard_CollectHsMetrics_" + "{sample}.tsv").as_posix() singularity: - Path(singularity_image,config["bioinfo_tools"].get("picard") + ".sif").as_posix() + Path(singularity_image, config["bioinfo_tools"].get("picard") + ".sif").as_posix() params: mem = "16g", tmpdir = tempfile.mkdtemp(prefix=tmp_dir), diff --git a/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule b/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule index fc21048ad..803e50a68 100644 --- a/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule +++ b/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule @@ -10,12 +10,12 @@ rule sambamba_panel_depth: benchmark: Path(benchmark_dir, "sambamba_panel_depth_" + "{sample}.tsv").as_posix() singularity: - Path(singularity_image,config[ "bioinfo_tools" ].get("sambamba") + ".sif").as_posix() + Path(singularity_image, config["bioinfo_tools"].get("sambamba") + ".sif").as_posix() params: - base_qual=10, - cov_start=50, - cov_end=1000, - cov_step=50, + base_qual = 10, + cov_start = 50, + cov_end = 1000, + cov_step = 50, filter_string="'not (unmapped or mate_is_unmapped) and not duplicate and not failed_quality_control and mapping_quality > 10'", conda = config["bioinfo_tools"].get("sambamba"), sample = '{sample}' @@ -44,7 +44,7 @@ rule sambamba_exon_depth: benchmark: Path(benchmark_dir, "sambamba_exon_depth_" + "{sample}.tsv").as_posix() singularity: - Path(singularity_image,config[ "bioinfo_tools" ].get("sambamba") + ".sif").as_posix() + Path(singularity_image, config["bioinfo_tools"].get("sambamba") + ".sif").as_posix() params: base_qual = 10, cov_1 = "50",