Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: remove stat files and add count files for scout upload VCFs #899

Merged
merged 12 commits into from
Apr 13, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ rule bcftools_filter_vardict_tumor_normal:
vcf = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.vcf.gz",
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.vcf.gz",
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.vcf.gz",
vcf_pass_vardict = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.vcf.gz",
bcftools_counts = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.stats"
benchmark:
Path(benchmark_dir, 'bcftools_filter_vardict_tumor_normal_' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Expand Down Expand Up @@ -42,9 +43,11 @@ bcftools view {input.vcf} \

tabix -p vcf -f {output.vcf_filtered};

bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered};
bcftools view -f PASS -o {output.vcf_pass_vardict} -O z {output.vcf_filtered};

tabix -p vcf -f {output.vcf_pass};
tabix -p vcf -f {output.vcf_pass_vardict};

bcftools +counts {output.vcf_pass_vardict} > {output.bcftools_counts};
"""


Expand All @@ -53,7 +56,7 @@ rule bcftools_filter_tnhaplotyper_tumor_normal:
vcf = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.vcf.gz",
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.vcf.gz",
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.pass.vcf.gz",
vcf_pass_tnhaplotyper = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.pass.vcf.gz",
benchmark:
Path(benchmark_dir, 'bcftools_filter_tnhaplotyper_tumor_normal' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Expand All @@ -74,9 +77,10 @@ bcftools view {input.vcf} \

tabix -p vcf -f {output.vcf_filtered};

bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered};
bcftools view -f PASS -o {output.vcf_pass_tnhaplotyper} -O z {output.vcf_filtered};

tabix -p vcf -f {output.vcf_pass_tnhaplotyper};

tabix -p vcf -f {output.vcf_pass};
"""


Expand All @@ -85,7 +89,8 @@ rule bcftools_filter_TNscope_umi_tumor_normal:
vcf = vep_dir + "{var_type}.somatic.{case_name}.TNscope_umi.all.vcf.gz",
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.TNscope_umi.all.filtered.vcf.gz",
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.TNscope_umi.all.filtered.pass.vcf.gz",
vcf_pass_TNscope_umi = vep_dir + "{var_type}.somatic.{case_name}.TNscope_umi.all.filtered.pass.vcf.gz",
bcftools_counts = vep_dir + "{var_type}.somatic.{case_name}.TNscope_umi.all.filtered.pass.stats"
benchmark:
Path(benchmark_dir, 'bcftools_filter_TNscope_umi_tumor_normal' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Expand All @@ -106,7 +111,9 @@ bcftools view {input.vcf} \

tabix -p vcf -f {output.vcf_filtered};

bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered};
bcftools view -f PASS -o {output.vcf_pass_TNscope_umi} -O z {output.vcf_filtered};

tabix -p vcf -f {output.vcf_pass_TNscope_umi};

tabix -p vcf -f {output.vcf_pass};
bcftools +counts {output.vcf_pass_TNscope_umi} > {output.bcftools_counts};
"""
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ rule bcftools_filter_vardict_tumor_only:
vcf = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.vcf.gz",
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.vcf.gz",
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.vcf.gz",
vcf_pass_vardict = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.vcf.gz",
bcftools_counts = vep_dir + "{var_type}.somatic.{case_name}.vardict.all.filtered.pass.stats"
benchmark:
Path(benchmark_dir, 'bcftools_filter_vardict_tumor_only_' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Expand Down Expand Up @@ -40,9 +41,11 @@ bcftools view {input.vcf} \

tabix -p vcf -f {output.vcf_filtered};

bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered};
bcftools view -f PASS -o {output.vcf_pass_vardict} -O z {output.vcf_filtered};

tabix -p vcf -f {output.vcf_pass};
tabix -p vcf -f {output.vcf_pass_vardict};

bcftools +counts {output.vcf_pass_vardict} > {output.bcftools_counts};
"""


Expand All @@ -51,7 +54,7 @@ rule bcftools_filter_tnhaplotyper_tumor_only:
vcf = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.vcf.gz",
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.vcf.gz",
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.pass.vcf.gz",
vcf_pass_tnhaplotyper = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.pass.vcf.gz",
benchmark:
Path(benchmark_dir, 'bcftools_filter_tnhaplotyper_tumor_only' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Expand All @@ -72,9 +75,10 @@ bcftools view {input.vcf} \

tabix -p vcf -f {output.vcf_filtered};

bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered};
bcftools view -f PASS -o {output.vcf_pass_tnhaplotyper} -O z {output.vcf_filtered};

tabix -p vcf -f {output.vcf_pass_tnhaplotyper};

tabix -p vcf -f {output.vcf_pass};
"""


Expand All @@ -83,7 +87,8 @@ rule bcftools_filter_TNscope_umi_tumor_only:
vcf = vep_dir + "{var_type}.somatic.{case_name}.TNscope_umi.all.vcf.gz",
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.TNscope_umi.all.filtered.vcf.gz",
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.TNscope_umi.all.filtered.pass.vcf.gz",
vcf_pass_TNscope_umi = vep_dir + "{var_type}.somatic.{case_name}.TNscope_umi.all.filtered.pass.vcf.gz",
bcftools_counts = vep_dir + "{var_type}.somatic.{case_name}.TNscope_umi.all.filtered.pass.stats"
benchmark:
Path(benchmark_dir, 'bcftools_filter_TNscope_umi_tumor_only' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Expand All @@ -104,7 +109,9 @@ bcftools view {input.vcf} \

tabix -p vcf -f {output.vcf_filtered};

bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered};
bcftools view -f PASS -o {output.vcf_pass_TNscope_umi} -O z {output.vcf_filtered};

tabix -p vcf -f {output.vcf_pass_TNscope_umi};

tabix -p vcf -f {output.vcf_pass};
bcftools +counts {output.vcf_pass_TNscope_umi} > {output.bcftools_counts};
"""
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ rule bcftools_filter_svdb:
input:
vcf = vep_dir + "SV.somatic.{case_name}.svdb.all.vcf.gz",
output:
vcf_sv_pass_svdb = vep_dir + "SV.somatic.{case_name}.svdb.all.filtered.pass.vcf.gz",
vcf_pass_svdb = vep_dir + "SV.somatic.{case_name}.svdb.all.filtered.pass.vcf.gz",
bcftools_counts = vep_dir + "SV.somatic.{case_name}.svdb.all.filtered.pass.stats"
benchmark:
benchmark_dir + "bcftools_filter_svdb_SV.somatic.{case_name}.svdb.vep.tsv"
singularity:
Expand All @@ -20,7 +21,9 @@ rule bcftools_filter_svdb:
"Filtering svdb merged Manta and Delly results for PASS variants using bcftools for sample '{params.case_name}' "
shell:
"""
bcftools view --threads {threads} -f .,PASS -o {output.vcf_sv_pass_svdb} -O z {input.vcf};
bcftools view --threads {threads} -f .,PASS -o {output.vcf_pass_svdb} -O z {input.vcf};

tabix -p vcf -f {output.vcf_sv_pass_svdb};
tabix -p vcf -f {output.vcf_pass_svdb};

bcftools +counts {output.vcf_pass_svdb} > {output.bcftools_counts};
"""
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ rule bcftools_filter_tnscope_tumor_normal:
vcf = vep_dir + "{var_type}.somatic.{case_name}.tnscope.all.vcf.gz",
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnscope.all.filtered.vcf.gz",
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.tnscope.all.filtered.pass.vcf.gz",
vcf_pass_tnscope = vep_dir + "{var_type}.somatic.{case_name}.tnscope.all.filtered.pass.vcf.gz",
bcftools_counts = vep_dir + "{var_type}.somatic.{case_name}.tnscope.all.filtered.pass.stats"
benchmark:
Path(benchmark_dir, 'bcftools_filter_tnscope_tumor_normal_' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Expand Down Expand Up @@ -38,9 +39,11 @@ bcftools view {input.vcf} \

tabix -p vcf -f {output.vcf_filtered};

bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered};
bcftools view -f PASS -o {output.vcf_pass_tnscope} -O z {output.vcf_filtered};

tabix -p vcf -f {output.vcf_pass};
tabix -p vcf -f {output.vcf_pass_tnscope};

bcftools +counts {output.vcf_pass_tnscope} > {output.bcftools_counts};
"""


Expand All @@ -50,7 +53,7 @@ rule bcftools_filter_tnhaplotyper_tumor_normal:
wgs_calling_file = config["reference"]["wgs_calling_interval"]
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.vcf.gz",
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.pass.vcf.gz",
vcf_pass_tnhaplotyper = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.pass.vcf.gz",
benchmark:
Path(benchmark_dir, 'bcftools_filter_tnhaplotyper_tumor_normal_' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Expand All @@ -73,7 +76,8 @@ bcftools view -f PASS --threads {threads} --regions-file {input.wgs_calling_file

tabix -p vcf -f {output.vcf_filtered};

bcftools view -f PASS --threads {threads} -O z -o {output.vcf_pass} {output.vcf_filtered}
bcftools view -f PASS --threads {threads} -O z -o {output.vcf_pass_tnhaplotyper} {output.vcf_filtered}

tabix -p vcf -f {output.vcf_pass_tnhaplotyper}

tabix -p vcf -f {output.vcf_pass}
"""
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ rule bcftools_filter_tnhaplotyper_tumor_only:
wgs_calling_file = config["reference"]["wgs_calling_interval"]
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.vcf.gz",
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.pass.vcf.gz",
vcf_pass_tnhaplotyper = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.pass.vcf.gz",
benchmark:
Path(benchmark_dir, 'bcftools_filter_tnhaplotyper_tumor_only_' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Expand Down Expand Up @@ -89,9 +89,10 @@ bcftools view -f PASS --threads {threads} --regions-file {input.wgs_calling_file

tabix -p vcf -f {output.vcf_filtered};

bcftools view -f PASS --threads {threads} -O z -o {output.vcf_pass} {output.vcf_filtered}
bcftools view -f PASS --threads {threads} -O z -o {output.vcf_pass_tnhaplotyper} {output.vcf_filtered}

tabix -p vcf -f {output.vcf_pass_tnhaplotyper}

tabix -p vcf -f {output.vcf_pass}
"""


Expand All @@ -101,7 +102,8 @@ rule bcftools_intersect_tumor_only:
tnhaplotyper = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.vcf.gz"
output:
vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnscope.all.filtered.vcf.gz",
vcf_pass = vep_dir + "{var_type}.somatic.{case_name}.tnscope.all.filtered.pass.vcf.gz",
vcf_pass_tnscope = vep_dir + "{var_type}.somatic.{case_name}.tnscope.all.filtered.pass.vcf.gz",
bcftools_counts = vep_dir + "{var_type}.somatic.{case_name}.tnscope.all.filtered.pass.stats"
benchmark:
Path(benchmark_dir, 'bcftools_intersect_tumor_only_' + "{var_type}.somatic.{case_name}.tsv").as_posix()
singularity:
Expand All @@ -122,9 +124,11 @@ cp {params.vcf_dir}/0002.vcf.gz {output.vcf_filtered};

tabix -p vcf -f {output.vcf_filtered};

bcftools view -f PASS -o {output.vcf_pass} -O z {output.vcf_filtered};
bcftools view -f PASS -o {output.vcf_pass_tnscope} -O z {output.vcf_filtered};

tabix -p vcf -f {output.vcf_pass_tnscope};

tabix -p vcf -f {output.vcf_pass};
bcftools +counts {output.vcf_pass_tnscope} > {output.bcftools_counts};

rm -r {params.vcf_dir}
"""
12 changes: 0 additions & 12 deletions BALSAMIC/snakemake_rules/annotation/vep.rule
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ rule vep_somatic_snv:
cosmic = config["reference"]["cosmic"]
output:
vcf_all = temp(vep_dir + "SNV.somatic.{case_name}.{var_caller}.all.vcf.gz"),
vcf_summary = vep_dir + "SNV.somatic.{case_name}.{var_caller}.all.vcf.gz_summary.html",
bcftools_stats = vep_dir + "SNV.somatic.{case_name}.{var_caller}.all.stats"
benchmark:
Path(benchmark_dir, "vep_somatic_SNV.somatic.{case_name}.{var_caller}.tsv").as_posix()
singularity:
Expand Down Expand Up @@ -50,8 +48,6 @@ vep \

tabix -p vcf -f {output.vcf_all};

bcftools stats {output.vcf_all} > {output.bcftools_stats};

rm $tmpvcf;
"""

Expand All @@ -61,8 +57,6 @@ rule vep_somatic_sv:
header = vcf_dir + "SV.somatic.{case_name}.svdb.sample_name_map",
output:
vcf_all = temp(vep_dir + "SV.somatic.{case_name}.svdb.all.vcf.gz"),
vcf_summary = vep_dir + "SV.somatic.{case_name}.svdb.all.vcf.gz_summary.html",
bcftools_stats = vep_dir + "SV.somatic.{case_name}.svdb.all.stats"
benchmark:
Path(benchmark_dir, "vep_somatic_SV.somatic.{case_name}.svdb.tsv").as_posix()
singularity:
Expand Down Expand Up @@ -92,9 +86,6 @@ vep \
{params.vep_defaults} \

tabix -p vcf -f {output.vcf_all};

bcftools stats {output.vcf_all} > {output.bcftools_stats};

"""

rule tmb_calculation:
Expand Down Expand Up @@ -160,8 +151,6 @@ rule vep_germline:
cosmic = config["reference"]["cosmic"]
output:
vcf_all = vep_dir + "{var_type}.germline.{sample}.{var_caller}.vcf.gz",
vcf_summary = vep_dir + "{var_type}.germline.{sample}.{var_caller}.vcf.gz_summary.html",
bcftools_stats = vep_dir + "{var_type}.germline.{sample}.{var_caller}.all.stats"
benchmark:
Path(benchmark_dir, "vep_germline_{var_type}.germline.{sample}.{var_caller}.tsv").as_posix()
singularity:
Expand Down Expand Up @@ -192,5 +181,4 @@ vep \

tabix -p vcf -f {output.vcf_all};

bcftools stats {output.vcf_all} > {output.bcftools_stats};
"""
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Added:
* Snakemake rule for creating `.cgh` files from `CNVkit` outputs #858
* SVdb to TGA workflow #871
* SVdb merge SV and CNV #871
* bcftools counts to varcall filter rules #898
* Additional WGS metrics to be stored in ``<case>_metrics_deliverables.yaml`` #907

Changed:
Expand All @@ -39,6 +40,7 @@ Removed
* vcfanno from SV annotation
* Removed `MSK_impact` and `MSK_impact_noStrelka` json files from config
* Cleanup of `strelka`, `pindel` , `mutect2` variables from BALSAMIC
* bcftools_stats from vep #898

[8.2.10]
--------
Expand Down