Skip to content

Commit

Permalink
refactor annotation rules according snakemake git etiquette (#696)
Browse files Browse the repository at this point in the history
* refactor annotation rules according snakemake git etiquette

* update changelog

* fix full abbreviations

* replace message text with case name

* refactored names in default_rules_to_deliver

* add issues links to changelog

* fix changelog

* change tumoronly to tumor_only
  • Loading branch information
ashwini06 authored Aug 20, 2021
1 parent 2d6fdbd commit ea5800f
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 65 deletions.
12 changes: 7 additions & 5 deletions BALSAMIC/commands/report/deliver.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,13 @@ def deliver(context, sample_config, analysis_type, rules_to_deliver,
"vep_somatic",
"vep_germline",
"vep_stat",
"ngs_filter_vardict",
"ngs_filter_tnscope",
"ngs_filter_manta",
"ngs_filter_intersect",
"rankscore",
"bcftools_filter_vardict_tumor_only",
"bcftools_filter_vardict_tumor_normal",
"bcftools_filter_tnscope_tumor_only",
"bcftools_filter_tnscope_tumor_normal",
"bcftools_filter_manta",
"bcftools_intersect_tumor_only",
"genmod_score_vardict",
"mergeBam_tumor",
"mergeBam_normal",
"cnvkit_paired",
Expand Down
21 changes: 15 additions & 6 deletions BALSAMIC/snakemake_rules/annotation/rankscore.rule
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,31 @@
# coding: utf-8
# Rank variants according to a rankscore model

rule rankscore:
rule genmod_score_vardict:
input:
vcf = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.vcf.gz",
rankscore = config["reference"]["rankscore"]
output:
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.ranked.vcf.gz",
benchmark:
Path(benchmark_dir, 'genmod_score_vardict_' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Path(singularity_image, config["bioinfo_tools"].get("genmod") + ".sif").as_posix()
params:
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
conda = config["bioinfo_tools"].get("genmod"),
threads: get_threads(cluster_config, 'rankscore')
singularity: Path(singularity_image, config["bioinfo_tools"].get("genmod") + ".sif").as_posix()
benchmark:
benchmark_dir + 'rankscore_' + "{var_type}.somatic.{case_name}.vardict.vep.tsv"
case_name = "{case_name}"
threads:
get_threads(cluster_config, 'genmod_score_vardict')
message:
("Score annotated vardict variants using genmod"
"and compress vcf using bcftools on {params.case_name}")
shell:
"""
source activate {params.conda};
genmod score -r -c {input.rankscore} {input.vcf} | bcftools view -o {output.vcf_pass} -O z
genmod score -r -c {input.rankscore} {input.vcf} | \
bcftools view -o {output.vcf_pass} -O z;
tabix -p vcf -f {output.vcf_pass};
"""
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
# coding: utf-8
# NGS filters for various scenarios

rule ngs_filter_vardict:
rule bcftools_filter_vardict_tumor_normal:
input:
vcf = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.vcf.gz",
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.vcf.gz",
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.vcf.gz",
benchmark:
Path(benchmark_dir, 'bcftools_filter_vardict_tumor_normal_' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix()
params:
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"},
conda = config["bioinfo_tools"].get("bcftools"),
Expand All @@ -18,13 +22,15 @@ rule ngs_filter_vardict:
AF_max = [VARDICT.AF_max.tag_value, VARDICT.AF_max.filter_name],
possible_germline = "balsamic_possible_germline",
pop_freq = [VARDICT.pop_freq.tag_value, VARDICT.pop_freq.filter_name],
threads: get_threads(cluster_config, 'ngs_filter_vardict')
singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix()
benchmark:
benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.vardict.vep.tsv"
case_name = '{case_name}'
threads:
get_threads(cluster_config, 'bcftools_filter_vardict_tumor_normal')
message:
"Filtering vardict tumor-normal annotated variants using bcftools on {params.case_name}"
shell:
"""
source activate {params.conda};
bcftools view {input.vcf} \
| bcftools filter --include 'SMPL_MIN(FMT/MQ) >= {params.MQ[0]}' --soft-filter '{params.MQ[1]}' --mode + \
| bcftools filter --include 'INFO/DP >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \
Expand All @@ -34,7 +40,10 @@ bcftools view {input.vcf} \
| bcftools filter --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' \
| bcftools filter --exclude 'INFO/STATUS ~ "germline/i"' --soft-filter '{params.possible_germline}' --mode '+' \
| bcftools view -o {output.vcf_filtered} -O z;
tabix -p vcf -f {output.vcf_filtered};
bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered};
tabix -p vcf -f {output.vcf_pass};
"""
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
# coding: utf-8
# NGS filters for various scenarios

rule ngs_filter_vardict:
rule bcftools_filter_vardict_tumor_only:
input:
vcf = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.vcf.gz",
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.vcf.gz",
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.vcf.gz",
benchmark:
Path(benchmark_dir, 'bcftools_filter_vardict_tumor_only_' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix()
params:
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"},
conda = config["bioinfo_tools"].get("bcftools"),
Expand All @@ -17,13 +21,15 @@ rule ngs_filter_vardict:
AF_min = [VARDICT.AF_min.tag_value, VARDICT.AF_min.filter_name],
AF_max = [VARDICT.AF_max.tag_value, VARDICT.AF_max.filter_name],
pop_freq = [VARDICT.pop_freq.tag_value, VARDICT.pop_freq.filter_name],
threads: get_threads(cluster_config, 'ngs_filter_vardict')
singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix()
benchmark:
benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.vardict.vep.tsv"
case_name = '{case_name}'
threads:
get_threads(cluster_config, 'bcftools_filter_vardict_tumor_only')
message:
"Filtering vardict tumor-only annotated variants using bcftools on {params.case_name}"
shell:
"""
source activate {params.conda};
bcftools view {input.vcf} \
| bcftools filter --include 'INFO/MQ >= {params.MQ[0]}' --soft-filter '{params.MQ[1]}' --mode '+' \
| bcftools filter --include 'INFO/DP >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \
Expand All @@ -32,7 +38,10 @@ bcftools view {input.vcf} \
| bcftools filter --include 'INFO/AF < {params.AF_max[0]}' --soft-filter '{params.AF_max[1]}' --mode '+' \
| bcftools filter --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' \
| bcftools view -o {output.vcf_filtered} -O z;
tabix -p vcf -f {output.vcf_filtered};
bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered};
tabix -p vcf -f {output.vcf_pass};
"""
11 changes: 6 additions & 5 deletions BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,28 @@
# NGS filters for various scenarios


rule ngs_filter_manta:
rule bcftools_filter_manta:
input:
vcf = vep_dir + "{var_type}.somatic.{case_name}.manta.all.vcf.gz",
output:
vcf_sv_pass = vep_dir + "{var_type}.somatic.{case_name}.manta.all.filtered.pass.vcf.gz",
benchmark:
benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.manta.vep.tsv"
Path(benchmark_dir, 'bcftools_filter_manta_' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix()
params:
case_name = "{case_name}",
case_name = '{case_name}',
conda = config["bioinfo_tools"].get("bcftools"),
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}
threads:
get_threads(cluster_config, 'ngs_filter_manta')
get_threads(cluster_config, 'bcftools_filter_manta')
message:
("Running ngs filters on annotated Manta structural variant calling results.")
("Filtering manta annotated structural variants using bcftools on {params.case_name}")
shell:
"""
source activate {params.conda};
bcftools view --threads {threads} -f PASS -o {output.vcf_sv_pass} -O z {input.vcf};
tabix -p vcf -f {output.vcf_sv_pass};
"""
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,44 @@
# coding: utf-8
# NGS filters for various scenarios

rule ngs_filter_tnscope:
rule bcftools_filter_tnscope_tumor_normal:
input:
vcf = vep_dir + "{var_type}.somatic.{case_name}.tnscope.all.vcf.gz",
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnscope.filtered.vcf.gz",
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.tnscope.filtered.pass.vcf.gz",
benchmark:
Path(benchmark_dir, 'bcftools_filter_tnscope_tumor_normal_' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix()
params:
conda = config["bioinfo_tools"].get("bcftools"),
AD = [SENTIEON_CALLER.AD.tag_value, SENTIEON_CALLER.AD.filter_name],
DP = [SENTIEON_CALLER.DP.tag_value, SENTIEON_CALLER.DP.filter_name],
AF_min = [SENTIEON_CALLER.AF_min.tag_value, SENTIEON_CALLER.AF_min.filter_name],
AF_max = [SENTIEON_CALLER.AF_max.tag_value, SENTIEON_CALLER.AF_max.filter_name],
pop_freq = [SENTIEON_CALLER.pop_freq.tag_value, SENTIEON_CALLER.pop_freq.filter_name],
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}
threads: get_threads(cluster_config, 'ngs_filter_tnscope')
singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix()
benchmark:
benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.tnscope.tsv"
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"},
case_name = '{case_name}'
threads:
get_threads(cluster_config, 'bcftools_filter_tnscope_tumor_normal')
message:
"Filtering wgs tumor-normal tnscope annotated variants using bcftools on {params.case_name}"
shell:
"""
source activate {params.conda};
bcftools view {input.vcf} \
| bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]} || SUM(FORMAT/AD[1:0]+FORMAT/AD[1:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/AF[0] >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/AF[0] < {params.AF_max[0]}' --soft-filter '{params.AF_max[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' \
| bcftools view -o {output.vcf_filtered} -O z;
tabix -p vcf -f {output.vcf_filtered};
bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered};
tabix -p vcf -f {output.vcf_pass};
"""
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
# coding: utf-8
# NGS filters for various scenarios

rule ngs_filter_tnscope:
rule bcftools_filter_tnscope_tumor_only:
input:
vcf = vep_dir + "{var_type}.somatic.{case_name}.tnscope.all.vcf.gz",
wgs_calling_file = config["reference"]["wgs_calling_interval"]
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnscope_isec.filtered.vcf.gz",
benchmark:
Path(benchmark_dir, 'bcftools_filter_tnscope_tumor_only_' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Path(singularity_image,config[ "bioinfo_tools" ].get("bcftools") + ".sif").as_posix()
params:
conda = config["bioinfo_tools"].get("bcftools"),
DP = [SENTIEON_CALLER.DP.tag_value, SENTIEON_CALLER.DP.filter_name],
Expand All @@ -18,16 +22,17 @@ rule ngs_filter_tnscope:
strand_reads = [SENTIEON_CALLER.strand_reads.tag_value, SENTIEON_CALLER.strand_reads.filter_name],
qss = [SENTIEON_CALLER.qss.tag_value, SENTIEON_CALLER.qss.filter_name],
sor = [SENTIEON_CALLER.sor.tag_value, SENTIEON_CALLER.sor.filter_name],
case_name = '{case_name}'
threads:
get_threads(cluster_config, 'ngs_filter_tnscope')
singularity:
Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix()
benchmark:
benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.tnscope.tsv"
get_threads(cluster_config, 'bcftools_filter_tnscope_tumor_only')
message:
"Filtering wgs tumor-only tnscope annotated variants using bcftools on {params.case_name}"
shell:
"""
source activate {params.conda};
grep -v '^@' {input.wgs_calling_file} > {input.wgs_calling_file}.bed
bcftools view -f PASS --threads {threads} --regions-file {input.wgs_calling_file}.bed {input.vcf} \
| bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] > {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \
Expand All @@ -38,15 +43,20 @@ bcftools view -f PASS --threads {threads} --regions-file {input.wgs_calling_file
| bcftools filter --threads {threads} --include 'FORMAT/ALT_F1R2 > {params.strand_reads[0]} && (FORMAT/ALT_F1R2 > 0 && FORMAT/ALT_F2R1 > {params.strand_reads[0]} && FORMAT/REF_F1R2 > {params.strand_reads[0]} && FORMAT/REF_F2R1 > {params.strand_reads[0]})' --soft-filter '{params.strand_reads[1]}' --mode '+' \
| bcftools filter --threads {threads} --include "INFO/SOR < {params.sor[0]}" --soft-filter '{params.sor[1]}' --mode '+' \
| bcftools view -o {output.vcf_filtered} -O z;
tabix -p vcf -f {output.vcf_filtered};
"""

rule ngs_filter_tnhaplotyper:
rule bcftools_filter_tnhaplotyper_tumor_only:
input:
vcf = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.vcf.gz",
wgs_calling_file = config["reference"]["wgs_calling_interval"]
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper_isec.filtered.vcf.gz",
benchmark:
Path(benchmark_dir, 'bcftools_filter_tnhaplotyper_tumor_only_' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Path(singularity_image, config[ "bioinfo_tools" ].get("bcftools") + ".sif").as_posix()
params:
conda = config["bioinfo_tools"].get("bcftools"),
DP = [SENTIEON_CALLER.DP.tag_value, SENTIEON_CALLER.DP.filter_name],
Expand All @@ -55,17 +65,18 @@ rule ngs_filter_tnhaplotyper:
AF_max = [SENTIEON_CALLER.AF_max.tag_value, SENTIEON_CALLER.AF_max.filter_name],
pop_freq = [SENTIEON_CALLER.pop_freq.tag_value, SENTIEON_CALLER.pop_freq.filter_name],
strand_reads = [SENTIEON_CALLER.strand_reads.tag_value, SENTIEON_CALLER.strand_reads.filter_name],
qss = [SENTIEON_CALLER.qss.tag_value, SENTIEON_CALLER.qss.filter_name]
qss = [SENTIEON_CALLER.qss.tag_value, SENTIEON_CALLER.qss.filter_name],
case_name = '{case_name}'
threads:
get_threads(cluster_config, 'ngs_filter_tnhaplotyper')
singularity:
Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix()
benchmark:
benchmark_dir + 'ngs_filter_' + "{var_type}.somatic.{case_name}.tnhaplotyper.tsv"
get_threads(cluster_config, 'bcftools_filter_tnhaplotyper_tumor_only')
message:
"Filtering wgs tumor-only tnhaplotyper annotated variants using bcftools on {params.case_name}"
shell:
"""
source activate {params.conda};
grep -v '^@' {input.wgs_calling_file} > {input.wgs_calling_file}.bed
bcftools view -f PASS --threads {threads} --regions-file {input.wgs_calling_file}.bed {input.vcf} \
| bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \
Expand All @@ -75,33 +86,43 @@ bcftools view -f PASS --threads {threads} --regions-file {input.wgs_calling_file
| bcftools filter --threads {threads} --include 'SUM(FORMAT/QSS)/SUM(FORMAT/AD) >= {params.qss[0]}' --soft-filter '{params.qss[1]}' --mode '+' \
| bcftools filter --threads {threads} --include 'FORMAT/ALT_F1R2 > {params.strand_reads[0]} && (FORMAT/ALT_F1R2 > 0 && FORMAT/ALT_F2R1 > {params.strand_reads[0]} && FORMAT/REF_F1R2 > {params.strand_reads[0]} && FORMAT/REF_F2R1 > {params.strand_reads[0]})' --soft-filter '{params.strand_reads[1]}' --mode '+' \
| bcftools view -o {output.vcf_filtered} -O z;
tabix -p vcf -f {output.vcf_filtered};
"""

rule ngs_filter_intersect:
rule bcftools_intersect_tumor_only:
input:
tnscope = vep_dir + "{var_type}.somatic.{case_name}.tnscope_isec.filtered.vcf.gz",
tnhaplotyper = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper_isec.filtered.vcf.gz"
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnscope.filtered.vcf.gz",
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.tnscope.filtered.pass.vcf.gz",
benchmark:
Path(benchmark_dir, 'bcftools_intersect_tumor_only_' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Path(singularity_image,config[ "bioinfo_tools" ].get("bcftools") + ".sif").as_posix()
params:
conda = config["bioinfo_tools"].get("bcftools"),
vcf_dir = vep_dir + "sentieon_callers_intersect",
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"},
case_name = '{case_name}'
threads:
get_threads(cluster_config, 'ngs_filter_intersect')
singularity:
Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix()
benchmark:
benchmark_dir + 'ngs_filter_intersect' + "{var_type}.somatic.{case_name}.tsv"
get_threads(cluster_config, 'bcftools_intersect_tumor_only')
message:
"Retrieving common annotated variants between tnscope and tnhaplotyper using bcftools for {params.case_name}"
shell:
"""
source activate {params.conda};
bcftools isec {input.tnscope} {input.tnhaplotyper} -p {params.vcf_dir} -O z;
cp {params.vcf_dir}/0002.vcf.gz {output.vcf_filtered};
tabix -p vcf -f {output.vcf_filtered};
bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered};
tabix -p vcf -f {output.vcf_pass};
rm -r {params.vcf_dir}
"""
Loading

0 comments on commit ea5800f

Please sign in to comment.