From ea5800f268540b882896016c67545925f564ed26 Mon Sep 17 00:00:00 2001 From: ashwini06 Date: Fri, 20 Aug 2021 14:25:19 +0200 Subject: [PATCH] refactor annotation rules according snakemake git etiquette (#696) * refactor annotation rules according snakemake git etiquette * update changelog * fix full abbreviations * replace message text with case name * refactored names in default_rules_to_deliver * add issues links to changelog * fix changelog * change tumoronly to tumor_only --- BALSAMIC/commands/report/deliver.py | 12 ++-- .../snakemake_rules/annotation/rankscore.rule | 21 +++++-- .../varcaller_filter_tumor_normal.rule | 19 ++++-- .../varcaller_filter_tumor_only.rule | 19 ++++-- .../annotation/varcaller_sv_filter.rule | 11 ++-- .../varcaller_wgs_filter_tumor_normal.rule | 21 +++++-- .../varcaller_wgs_filter_tumor_only.rule | 61 +++++++++++++------ BALSAMIC/snakemake_rules/annotation/vep.rule | 10 +-- CHANGELOG.rst | 14 ++--- 9 files changed, 123 insertions(+), 65 deletions(-) diff --git a/BALSAMIC/commands/report/deliver.py b/BALSAMIC/commands/report/deliver.py index 1700f4d7d..1425d49b9 100644 --- a/BALSAMIC/commands/report/deliver.py +++ b/BALSAMIC/commands/report/deliver.py @@ -97,11 +97,13 @@ def deliver(context, sample_config, analysis_type, rules_to_deliver, "vep_somatic", "vep_germline", "vep_stat", - "ngs_filter_vardict", - "ngs_filter_tnscope", - "ngs_filter_manta", - "ngs_filter_intersect", - "rankscore", + "bcftools_filter_vardict_tumor_only", + "bcftools_filter_vardict_tumor_normal", + "bcftools_filter_tnscope_tumor_only", + "bcftools_filter_tnscope_tumor_normal", + "bcftools_filter_manta", + "bcftools_intersect_tumor_only", + "genmod_score_vardict", "mergeBam_tumor", "mergeBam_normal", "cnvkit_paired", diff --git a/BALSAMIC/snakemake_rules/annotation/rankscore.rule b/BALSAMIC/snakemake_rules/annotation/rankscore.rule index fe4646dbc..6af5247b3 100644 --- a/BALSAMIC/snakemake_rules/annotation/rankscore.rule +++ b/BALSAMIC/snakemake_rules/annotation/rankscore.rule @@ -2,22 +2,31 @@ # coding: utf-8 # Rank variants according to a rankscore model -rule rankscore: +rule genmod_score_vardict: input: vcf = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.vcf.gz", rankscore = config["reference"]["rankscore"] output: vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.ranked.vcf.gz", + benchmark: + Path(benchmark_dir, 'genmod_score_vardict_' + "{var_type}.somatic.{case_name}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("genmod") + ".sif").as_posix() params: housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, conda = config["bioinfo_tools"].get("genmod"), - threads: get_threads(cluster_config, 'rankscore') - singularity: Path(singularity_image, config["bioinfo_tools"].get("genmod") + ".sif").as_posix() - benchmark: - benchmark_dir + 'rankscore_' + "{var_type}.somatic.{case_name}.vardict.vep.tsv" + case_name = "{case_name}" + threads: + get_threads(cluster_config, 'genmod_score_vardict') + message: + ("Score annotated vardict variants using genmod" + "and compress vcf using bcftools on {params.case_name}") shell: """ source activate {params.conda}; -genmod score -r -c {input.rankscore} {input.vcf} | bcftools view -o {output.vcf_pass} -O z + +genmod score -r -c {input.rankscore} {input.vcf} | \ +bcftools view -o {output.vcf_pass} -O z; + tabix -p vcf -f {output.vcf_pass}; """ diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule index 491d8f12d..e41374d6c 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule @@ -2,12 +2,16 @@ # coding: utf-8 # NGS filters for various scenarios -rule ngs_filter_vardict: +rule bcftools_filter_vardict_tumor_normal: input: vcf = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.vcf.gz", output: vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.vcf.gz", vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.vcf.gz", + benchmark: + Path(benchmark_dir, 'bcftools_filter_vardict_tumor_normal_' + "{var_type}.somatic.{case_name}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() params: housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}, conda = config["bioinfo_tools"].get("bcftools"), @@ -18,13 +22,15 @@ rule ngs_filter_vardict: AF_max = [VARDICT.AF_max.tag_value, VARDICT.AF_max.filter_name], possible_germline = "balsamic_possible_germline", pop_freq = [VARDICT.pop_freq.tag_value, VARDICT.pop_freq.filter_name], - threads: get_threads(cluster_config, 'ngs_filter_vardict') - singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() - benchmark: - benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.vardict.vep.tsv" + case_name = '{case_name}' + threads: + get_threads(cluster_config, 'bcftools_filter_vardict_tumor_normal') + message: + "Filtering vardict tumor-normal annotated variants using bcftools on {params.case_name}" shell: """ source activate {params.conda}; + bcftools view {input.vcf} \ | bcftools filter --include 'SMPL_MIN(FMT/MQ) >= {params.MQ[0]}' --soft-filter '{params.MQ[1]}' --mode + \ | bcftools filter --include 'INFO/DP >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ @@ -34,7 +40,10 @@ bcftools view {input.vcf} \ | bcftools filter --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' \ | bcftools filter --exclude 'INFO/STATUS ~ "germline/i"' --soft-filter '{params.possible_germline}' --mode '+' \ | bcftools view -o {output.vcf_filtered} -O z; + tabix -p vcf -f {output.vcf_filtered}; + bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered}; + tabix -p vcf -f {output.vcf_pass}; """ diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule index 3c8929db3..5f80888b9 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule @@ -2,12 +2,16 @@ # coding: utf-8 # NGS filters for various scenarios -rule ngs_filter_vardict: +rule bcftools_filter_vardict_tumor_only: input: vcf = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.vcf.gz", output: vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.vcf.gz", vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.vcf.gz", + benchmark: + Path(benchmark_dir, 'bcftools_filter_vardict_tumor_only_' + "{var_type}.somatic.{case_name}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() params: housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}, conda = config["bioinfo_tools"].get("bcftools"), @@ -17,13 +21,15 @@ rule ngs_filter_vardict: AF_min = [VARDICT.AF_min.tag_value, VARDICT.AF_min.filter_name], AF_max = [VARDICT.AF_max.tag_value, VARDICT.AF_max.filter_name], pop_freq = [VARDICT.pop_freq.tag_value, VARDICT.pop_freq.filter_name], - threads: get_threads(cluster_config, 'ngs_filter_vardict') - singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() - benchmark: - benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.vardict.vep.tsv" + case_name = '{case_name}' + threads: + get_threads(cluster_config, 'bcftools_filter_vardict_tumor_only') + message: + "Filtering vardict tumor-only annotated variants using bcftools on {params.case_name}" shell: """ source activate {params.conda}; + bcftools view {input.vcf} \ | bcftools filter --include 'INFO/MQ >= {params.MQ[0]}' --soft-filter '{params.MQ[1]}' --mode '+' \ | bcftools filter --include 'INFO/DP >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ @@ -32,7 +38,10 @@ bcftools view {input.vcf} \ | bcftools filter --include 'INFO/AF < {params.AF_max[0]}' --soft-filter '{params.AF_max[1]}' --mode '+' \ | bcftools filter --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' \ | bcftools view -o {output.vcf_filtered} -O z; + tabix -p vcf -f {output.vcf_filtered}; + bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered}; + tabix -p vcf -f {output.vcf_pass}; """ diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule index 1f0ca6d70..02305423e 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule @@ -3,27 +3,28 @@ # NGS filters for various scenarios -rule ngs_filter_manta: +rule bcftools_filter_manta: input: vcf = vep_dir + "{var_type}.somatic.{case_name}.manta.all.vcf.gz", output: vcf_sv_pass = vep_dir + "{var_type}.somatic.{case_name}.manta.all.filtered.pass.vcf.gz", benchmark: - benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.manta.vep.tsv" + Path(benchmark_dir, 'bcftools_filter_manta_' + "{var_type}.somatic.{case_name}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() params: - case_name = "{case_name}", + case_name = '{case_name}', conda = config["bioinfo_tools"].get("bcftools"), housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"} threads: - get_threads(cluster_config, 'ngs_filter_manta') + get_threads(cluster_config, 'bcftools_filter_manta') message: - ("Running ngs filters on annotated Manta structural variant calling results.") + ("Filtering manta annotated structural variants using bcftools on {params.case_name}") shell: """ source activate {params.conda}; bcftools view --threads {threads} -f PASS -o {output.vcf_sv_pass} -O z {input.vcf}; + tabix -p vcf -f {output.vcf_sv_pass}; """ diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule index e479a0f29..413db2d2d 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule @@ -2,12 +2,16 @@ # coding: utf-8 # NGS filters for various scenarios -rule ngs_filter_tnscope: +rule bcftools_filter_tnscope_tumor_normal: input: vcf = vep_dir + "{var_type}.somatic.{case_name}.tnscope.all.vcf.gz", output: vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnscope.filtered.vcf.gz", vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.tnscope.filtered.pass.vcf.gz", + benchmark: + Path(benchmark_dir, 'bcftools_filter_tnscope_tumor_normal_' + "{var_type}.somatic.{case_name}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() params: conda = config["bioinfo_tools"].get("bcftools"), AD = [SENTIEON_CALLER.AD.tag_value, SENTIEON_CALLER.AD.filter_name], @@ -15,14 +19,16 @@ rule ngs_filter_tnscope: AF_min = [SENTIEON_CALLER.AF_min.tag_value, SENTIEON_CALLER.AF_min.filter_name], AF_max = [SENTIEON_CALLER.AF_max.tag_value, SENTIEON_CALLER.AF_max.filter_name], pop_freq = [SENTIEON_CALLER.pop_freq.tag_value, SENTIEON_CALLER.pop_freq.filter_name], - housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"} - threads: get_threads(cluster_config, 'ngs_filter_tnscope') - singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() - benchmark: - benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.tnscope.tsv" + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}, + case_name = '{case_name}' + threads: + get_threads(cluster_config, 'bcftools_filter_tnscope_tumor_normal') + message: + "Filtering wgs tumor-normal tnscope annotated variants using bcftools on {params.case_name}" shell: """ source activate {params.conda}; + bcftools view {input.vcf} \ | bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]} || SUM(FORMAT/AD[1:0]+FORMAT/AD[1:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ | bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ @@ -30,7 +36,10 @@ bcftools view {input.vcf} \ | bcftools filter --threads {threads} --include 'FORMAT/AF[0] < {params.AF_max[0]}' --soft-filter '{params.AF_max[1]}' --mode '+' \ | bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' \ | bcftools view -o {output.vcf_filtered} -O z; + tabix -p vcf -f {output.vcf_filtered}; + bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered}; + tabix -p vcf -f {output.vcf_pass}; """ diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule index 91704f3cb..7f6b9b143 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule @@ -2,12 +2,16 @@ # coding: utf-8 # NGS filters for various scenarios -rule ngs_filter_tnscope: +rule bcftools_filter_tnscope_tumor_only: input: vcf = vep_dir + "{var_type}.somatic.{case_name}.tnscope.all.vcf.gz", wgs_calling_file = config["reference"]["wgs_calling_interval"] output: vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnscope_isec.filtered.vcf.gz", + benchmark: + Path(benchmark_dir, 'bcftools_filter_tnscope_tumor_only_' + "{var_type}.somatic.{case_name}.tsv").as_posix() + singularity: + Path(singularity_image,config[ "bioinfo_tools" ].get("bcftools") + ".sif").as_posix() params: conda = config["bioinfo_tools"].get("bcftools"), DP = [SENTIEON_CALLER.DP.tag_value, SENTIEON_CALLER.DP.filter_name], @@ -18,16 +22,17 @@ rule ngs_filter_tnscope: strand_reads = [SENTIEON_CALLER.strand_reads.tag_value, SENTIEON_CALLER.strand_reads.filter_name], qss = [SENTIEON_CALLER.qss.tag_value, SENTIEON_CALLER.qss.filter_name], sor = [SENTIEON_CALLER.sor.tag_value, SENTIEON_CALLER.sor.filter_name], + case_name = '{case_name}' threads: - get_threads(cluster_config, 'ngs_filter_tnscope') - singularity: - Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() - benchmark: - benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.tnscope.tsv" + get_threads(cluster_config, 'bcftools_filter_tnscope_tumor_only') + message: + "Filtering wgs tumor-only tnscope annotated variants using bcftools on {params.case_name}" shell: """ source activate {params.conda}; + grep -v '^@' {input.wgs_calling_file} > {input.wgs_calling_file}.bed + bcftools view -f PASS --threads {threads} --regions-file {input.wgs_calling_file}.bed {input.vcf} \ | bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ | bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] > {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ @@ -38,15 +43,20 @@ bcftools view -f PASS --threads {threads} --regions-file {input.wgs_calling_file | bcftools filter --threads {threads} --include 'FORMAT/ALT_F1R2 > {params.strand_reads[0]} && (FORMAT/ALT_F1R2 > 0 && FORMAT/ALT_F2R1 > {params.strand_reads[0]} && FORMAT/REF_F1R2 > {params.strand_reads[0]} && FORMAT/REF_F2R1 > {params.strand_reads[0]})' --soft-filter '{params.strand_reads[1]}' --mode '+' \ | bcftools filter --threads {threads} --include "INFO/SOR < {params.sor[0]}" --soft-filter '{params.sor[1]}' --mode '+' \ | bcftools view -o {output.vcf_filtered} -O z; + tabix -p vcf -f {output.vcf_filtered}; """ -rule ngs_filter_tnhaplotyper: +rule bcftools_filter_tnhaplotyper_tumor_only: input: vcf = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.vcf.gz", wgs_calling_file = config["reference"]["wgs_calling_interval"] output: vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper_isec.filtered.vcf.gz", + benchmark: + Path(benchmark_dir, 'bcftools_filter_tnhaplotyper_tumor_only_' + "{var_type}.somatic.{case_name}.tsv").as_posix() + singularity: + Path(singularity_image, config[ "bioinfo_tools" ].get("bcftools") + ".sif").as_posix() params: conda = config["bioinfo_tools"].get("bcftools"), DP = [SENTIEON_CALLER.DP.tag_value, SENTIEON_CALLER.DP.filter_name], @@ -55,17 +65,18 @@ rule ngs_filter_tnhaplotyper: AF_max = [SENTIEON_CALLER.AF_max.tag_value, SENTIEON_CALLER.AF_max.filter_name], pop_freq = [SENTIEON_CALLER.pop_freq.tag_value, SENTIEON_CALLER.pop_freq.filter_name], strand_reads = [SENTIEON_CALLER.strand_reads.tag_value, SENTIEON_CALLER.strand_reads.filter_name], - qss = [SENTIEON_CALLER.qss.tag_value, SENTIEON_CALLER.qss.filter_name] + qss = [SENTIEON_CALLER.qss.tag_value, SENTIEON_CALLER.qss.filter_name], + case_name = '{case_name}' threads: - get_threads(cluster_config, 'ngs_filter_tnhaplotyper') - singularity: - Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() - benchmark: - benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.tnhaplotyper.tsv" + get_threads(cluster_config, 'bcftools_filter_tnhaplotyper_tumor_only') + message: + "Filtering wgs tumor-only tnhaplotyper annotated variants using bcftools on {params.case_name}" shell: """ source activate {params.conda}; + grep -v '^@' {input.wgs_calling_file} > {input.wgs_calling_file}.bed + bcftools view -f PASS --threads {threads} --regions-file {input.wgs_calling_file}.bed {input.vcf} \ | bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ | bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ @@ -75,33 +86,43 @@ bcftools view -f PASS --threads {threads} --regions-file {input.wgs_calling_file | bcftools filter --threads {threads} --include 'SUM(FORMAT/QSS)/SUM(FORMAT/AD) >= {params.qss[0]}' --soft-filter '{params.qss[1]}' --mode '+' \ | bcftools filter --threads {threads} --include 'FORMAT/ALT_F1R2 > {params.strand_reads[0]} && (FORMAT/ALT_F1R2 > 0 && FORMAT/ALT_F2R1 > {params.strand_reads[0]} && FORMAT/REF_F1R2 > {params.strand_reads[0]} && FORMAT/REF_F2R1 > {params.strand_reads[0]})' --soft-filter '{params.strand_reads[1]}' --mode '+' \ | bcftools view -o {output.vcf_filtered} -O z; + tabix -p vcf -f {output.vcf_filtered}; """ -rule ngs_filter_intersect: +rule bcftools_intersect_tumor_only: input: tnscope = vep_dir + "{var_type}.somatic.{case_name}.tnscope_isec.filtered.vcf.gz", tnhaplotyper = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper_isec.filtered.vcf.gz" output: vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnscope.filtered.vcf.gz", vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.tnscope.filtered.pass.vcf.gz", + benchmark: + Path(benchmark_dir, 'bcftools_intersect_tumor_only_' + "{var_type}.somatic.{case_name}.tsv").as_posix() + singularity: + Path(singularity_image,config[ "bioinfo_tools" ].get("bcftools") + ".sif").as_posix() params: conda = config["bioinfo_tools"].get("bcftools"), vcf_dir = vep_dir + "sentieon_callers_intersect", - housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"} + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}, + case_name = '{case_name}' threads: - get_threads(cluster_config, 'ngs_filter_intersect') - singularity: - Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() - benchmark: - benchmark_dir + 'ngs_filter_intersect' + "{var_type}.somatic.{case_name}.tsv" + get_threads(cluster_config, 'bcftools_intersect_tumor_only') + message: + "Retrieving common annotated variants between tnscope and tnhaplotyper using bcftools for {params.case_name}" shell: """ source activate {params.conda}; + bcftools isec {input.tnscope} {input.tnhaplotyper} -p {params.vcf_dir} -O z; + cp {params.vcf_dir}/0002.vcf.gz {output.vcf_filtered}; + tabix -p vcf -f {output.vcf_filtered}; + bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered}; + tabix -p vcf -f {output.vcf_pass}; + rm -r {params.vcf_dir} """ diff --git a/BALSAMIC/snakemake_rules/annotation/vep.rule b/BALSAMIC/snakemake_rules/annotation/vep.rule index 9d409e6a5..192ee5eda 100644 --- a/BALSAMIC/snakemake_rules/annotation/vep.rule +++ b/BALSAMIC/snakemake_rules/annotation/vep.rule @@ -11,7 +11,7 @@ rule vep_somatic: vcf_all = vep_dir + "{var_type}.somatic.{case_name}.{var_caller}.all.vcf.gz", vcf_summary = vep_dir + "{var_type}.somatic.{case_name}.{var_caller}.all.vcf.gz_summary.html", benchmark: - benchmark_dir + "vep_somatic_{var_type}.somatic.{case_name}.{var_caller}.tsv" + Path(benchmark_dir, "vep_somatic_{var_type}.somatic.{case_name}.{var_caller}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix() params: @@ -70,7 +70,7 @@ rule tmb_calculation: output: tmb = vep_dir + "SNV.somatic.{case_name}.{var_caller}.balsamic_stat" benchmark: - benchmark_dir + "vep_stat_SNV.somatic.{case_name}.{var_caller}.tsv" + Path(benchmark_dir, "vep_stat_SNV.somatic.{case_name}.{var_caller}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix() params: @@ -109,18 +109,18 @@ rule vep_germline: vcf_all = vep_dir + "{var_type}.germline.{sample}.{var_caller}.vcf.gz", vcf_summary = vep_dir + "{var_type}.germline.{sample}.{var_caller}.vcf.gz_summary.html", benchmark: - benchmark_dir + "vep_germline_{var_type}.germline.{sample}.{var_caller}.tsv" + Path(benchmark_dir, "vep_germline_{var_type}.germline.{sample}.{var_caller}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix() params: conda = config["bioinfo_tools"].get("ensembl-vep"), housekeeper_id = {"id": "{sample}", "tags": "annotated-germline"}, - message_text = "{var_type}.germline.{sample}.{var_caller}", + sample = '{sample}', vep_cache = config["reference"]["vep"] threads: get_threads(cluster_config, 'vep') message: - "Running vep annotation on {params.message_text}" + "Running vep annotation on {params.sample}" shell: """ source activate {params.conda}; diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c6f829049..ac3ef3da0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -42,17 +42,15 @@ Fixed: * post-processing of the umi consensus in handling BI tags * vcf-filtered-clinical tag files will have all variants including PASS -* Refactor ``snakemake`` align rules according to ``snakemake`` etiquette -* Refactor snakemake ``fastqc`` ``vep`` contest and ``mosdepth`` rules according to ``snakemake`` etiquette -* Refactor snakemake ``manta`` rule according to snakemake etiquette +* Refactor snakemake ``annotate`` rules according to snakemake etiquette #636 +* Refactor snakemake ``align`` rules according to snakemake etiquette #636 +* Refactor snakemake ``fastqc`` ``vep`` contest and ``mosdepth`` rules according to ``snakemake`` etiquette #636 * Order of columns in QC and coverage report issue #601 * ``delly`` not showing in workflow at runtime #644 -* ``ascatngs`` documentaion links in ``FAQs`` #672 +* ``ascatngs`` documentation links in ``FAQs`` #672 * ``varcall_py36`` container build and push #703 -* delly run -* Refactor snakemake umi rules according to snakemake etiquette -* Refactor snakemake variant calling rules according to snakemake etiquette -* ascat documentaion links in FAQs +* Refactor snakemake ``umi`` rules according to snakemake etiquette #636 +* Refactor snakemake ``variant calling`` rules according to snakemake etiquette #636 * Wrong spacing in reference json issue #704