Skip to content

Commit

Permalink
refactor: Update the list of files to be stored and delivered (#915)
Browse files Browse the repository at this point in the history
  • Loading branch information
ivadym authored and rannick committed May 23, 2022
1 parent 523026d commit 3c87fe3
Show file tree
Hide file tree
Showing 22 changed files with 275 additions and 124 deletions.
4 changes: 2 additions & 2 deletions BALSAMIC/assets/scripts/collect_qc_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ def capture_kit_resolve_type(capture_kit: str):

if capture_kit == "None":
return None
else:
return capture_kit

return capture_kit


def get_multiqc_data_source(multiqc_data: dict, sample: str, tool: str) -> str:
Expand Down
38 changes: 19 additions & 19 deletions BALSAMIC/constants/workflow_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,31 +113,31 @@


DELIVERY_RULES = [
"fastp",
"multiqc",
"collect_custom_qc_metrics",
"vep_somatic_snv",
"vep_somatic_sv",
"vep_germline",
"tmb_calculation",
"bcftools_filter_TNscope_umi_tumor_only",
"bcftools_filter_TNscope_umi_tumor_normal",
"bcftools_filter_vardict_tumor_only",
"bcftools_filter_vardict_tumor_normal",
"bcftools_filter_tnscope_tumor_only",
"bcftools_filter_tnscope_tumor_normal",
"bcftools_filter_tnhaplotyper_tumor_only",
"bcftools_filter_tnhaplotyper_tumor_normal",
"bcftools_filter_svdb",
"bcftools_intersect_tumor_only",
"bcftools_filter_TNscope_umi_tumor_only",
"genmod_score_vardict",
"mergeBam_tumor",
"mergeBam_normal",
"mergeBam_tumor_umiconsensus",
"mergeBam_normal_umiconsensus",
"cnvkit_paired",
"vep_germline",
"svdb_merge_tumor_only",
"svdb_merge_tumor_normal",
"sentieon_TNscope_tumor_only",
"sentieon_TNscope",
"vardict_merge",
"sentieon_tnscope_umi",
"sentieon_tnscope_umi_tn",
"ascat_tumor_normal",
"ascat_tumor_normal_merge_output",
"delly_cnv_tumor_only",
"cnvkit_single",
"cnvkit_paired",
"vcf2cytosure_convert",
"ascat_tumor_normal_merge_output",
"bcftools_filter_svdb",
"bcftools_intersect_tumor_only",
"bcftools_filter_tnscope_tumor_normal",
"bcftools_filter_vardict_tumor_only",
"bcftools_filter_vardict_tumor_normal",
"bcftools_filter_TNscope_umi_tumor_only",
"bcftools_filter_TNscope_umi_tumor_normal",
]
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ rule bcftools_filter_tnhaplotyper_tumor_normal:
Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix()
params:
pop_freq = [COMMON_FILTERS.pop_freq.tag_value, COMMON_FILTERS.pop_freq.filter_name],
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
case_name = '{case_name}'
threads:
get_threads(cluster_config, 'bcftools_filter_tnhaplotyper_tumor_normal')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ rule bcftools_filter_tnhaplotyper_tumor_only:
Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix()
params:
pop_freq = [COMMON_FILTERS.pop_freq.tag_value, COMMON_FILTERS.pop_freq.filter_name],
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
case_name = '{case_name}'
threads:
get_threads(cluster_config, 'bcftools_filter_tnhaplotyper_tumor_only')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ rule bcftools_filter_tnhaplotyper_tumor_normal:
Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix()
params:
pop_freq = [SENTIEON_CALLER.pop_freq.tag_value, SENTIEON_CALLER.pop_freq.filter_name],
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
case_name = '{case_name}'
threads:
get_threads(cluster_config, 'bcftools_filter_tnhaplotyper_tumor_normal')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ rule bcftools_filter_tnscope_tumor_only:
strand_reads = [SENTIEON_CALLER.strand_reads.tag_value, SENTIEON_CALLER.strand_reads.filter_name],
qss = [SENTIEON_CALLER.qss.tag_value, SENTIEON_CALLER.qss.filter_name],
sor = [SENTIEON_CALLER.sor.tag_value, SENTIEON_CALLER.sor.filter_name],
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
case_name = '{case_name}'
threads:
get_threads(cluster_config, 'bcftools_filter_tnscope_tumor_only')
Expand Down Expand Up @@ -67,7 +66,6 @@ rule bcftools_filter_tnhaplotyper_tumor_only:
pop_freq = [SENTIEON_CALLER.pop_freq.tag_value, SENTIEON_CALLER.pop_freq.filter_name],
strand_reads = [SENTIEON_CALLER.strand_reads.tag_value, SENTIEON_CALLER.strand_reads.filter_name],
qss = [SENTIEON_CALLER.qss.tag_value, SENTIEON_CALLER.qss.filter_name],
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
case_name = '{case_name}'
threads:
get_threads(cluster_config, 'bcftools_filter_tnhaplotyper_tumor_only')
Expand Down
5 changes: 1 addition & 4 deletions BALSAMIC/snakemake_rules/annotation/vep.rule
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ rule vep_somatic_snv:
singularity:
Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix()
params:
housekeeper_id = {"id": "{case_name}", "tags": "annotated-somatic"},
ref_path = Path(config["reference"]["gnomad_variant"]).parent.as_posix(),
message_text = "SNV.somatic.{case_name}.{var_caller}.vcf.gz",
tmpvcf = vep_dir + "SNV.somatic.{case_name}.{var_caller}.tmp.vcf.gz",
Expand Down Expand Up @@ -62,7 +61,6 @@ rule vep_somatic_sv:
singularity:
Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix()
params:
housekeeper_id = {"id": "{case_name}", "tags": "annotated-somatic"},
message_text = "SV.somatic.{case_name}.svdb.vcf.gz",
vep_cache = config["reference"]["vep"],
vep_defaults = params.vep.vep_filters
Expand Down Expand Up @@ -100,7 +98,6 @@ rule tmb_calculation:
params:
af_cutoff = "0.05",
bed = config["panel"]["capture_kit"] if "panel" in config else "",
housekeeper_id = {"id": "{case_name}", "tags": "stat-somatic"},
message_text = "{var_type}.somatic.{case_name}.{var_caller}.all",
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
threads:
Expand Down Expand Up @@ -156,7 +153,7 @@ rule vep_germline:
singularity:
Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix()
params:
housekeeper_id = {"id": "{sample}", "tags": "annotated-germline"},
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "annotated-germline"},
sample = '{sample}',
vep_cache = config["reference"]["vep"],
vep_defaults = params.vep.vep_filters
Expand Down
9 changes: 4 additions & 5 deletions BALSAMIC/snakemake_rules/quality_control/fastp.rule
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ rule fastp_umi:
read1=config["analysis"]["fastq_path"] + "{sample}" + "_1.fastq.gz",
read2=config["analysis"]["fastq_path"] + "{sample}" + "_2.fastq.gz",
output:
read1 = fastq_dir + "{sample}_1.umi_optimized.fastq.gz",
read2 = fastq_dir + "{sample}_2.umi_optimized.fastq.gz",
read1 = temp(fastq_dir + "{sample}_1.umi_optimized.fastq.gz"),
read2 = temp(fastq_dir + "{sample}_2.umi_optimized.fastq.gz"),
json = qc_dir + "fastp/{sample}_fastp_umi.json",
html = qc_dir + "fastp/{sample}_fastp_umi.html",
benchmark:
Expand Down Expand Up @@ -73,16 +73,15 @@ rule fastp:
read1 = fastq_dir + "{sample}_1.umi_optimized.fastq.gz",
read2 = fastq_dir + "{sample}_2.umi_optimized.fastq.gz"
output:
read1 = fastq_dir + "{sample}_1.fp.fastq.gz",
read2 = fastq_dir + "{sample}_2.fp.fastq.gz",
read1 = temp(fastq_dir + "{sample}_1.fp.fastq.gz"),
read2 = temp(fastq_dir + "{sample}_2.fp.fastq.gz"),
json = qc_dir + "fastp/{sample}_fastp.json",
html = qc_dir + "fastp/{sample}_fastp.html"
benchmark:
Path(benchmark_dir, "fastp_" + "{sample}.tsv").as_posix()
singularity:
Path(singularity_image, config["bioinfo_tools"].get("fastp") + ".sif").as_posix()
params:
housekeeper_id = {"id": "{sample}", "tags": "quality-trimmed-fastq"},
tmpdir = tmp_dir,
umi = " ".join(fastp_param_umi),
minimum_length = config["QC"]["min_seq_length"],
Expand Down
5 changes: 3 additions & 2 deletions BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ rule sentieon_tnscope_umi:
bed = config["panel"]["capture_kit"],
dbsnp = config["reference"]["dbsnp"]
output:
vcf = vcf_dir + "SNV.somatic."+ config["analysis"]["case_id"] + ".TNscope_umi.vcf.gz",
vcf_tnscope_umi = vcf_dir + "SNV.somatic."+ config["analysis"]["case_id"] + ".TNscope_umi.vcf.gz",
namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".TNscope_umi.sample_name_map"
benchmark:
Path(benchmark_dir, "sentieon_tnscope_umi_" + config["analysis"]["case_id"] + ".tsv").as_posix()
params:
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
Expand Down Expand Up @@ -58,7 +59,7 @@ export SENTIEON_LICENSE={params.sentieon_lic};
--max_error_per_read {params.error_rate} \
--pcr_indel_model {params.pcr_model} \
--prune_factor {params.prune_factor} \
{output.vcf};
{output.vcf_tnscope_umi};
echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap};
"""
5 changes: 3 additions & 2 deletions BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ rule sentieon_tnscope_umi_tn:
bed = config["panel"]["capture_kit"],
dbsnp = config["reference"]["dbsnp"]
output:
vcf = vcf_dir + "SNV.somatic."+ config["analysis"]["case_id"] + ".TNscope_umi.vcf.gz",
vcf_tnscope_umi = vcf_dir + "SNV.somatic."+ config["analysis"]["case_id"] + ".TNscope_umi.vcf.gz",
namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".TNscope_umi.sample_name_map"
benchmark:
Path(benchmark_dir, "sentieon_tnscope_umi_" + config["analysis"]["case_id"] + ".tsv").as_posix()
params:
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
Expand Down Expand Up @@ -62,7 +63,7 @@ export SENTIEON_LICENSE={params.sentieon_lic};
--max_error_per_read {params.error_rate} \
--pcr_indel_model {params.pcr_model} \
--prune_factor {params.prune_factor} \
{output.vcf};
{output.vcf_tnscope_umi};
echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap};
"""
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,13 @@ rule sentieon_TNscope_tumor_only:
bam = expand(bam_dir + "tumor.merged.bam"),
recal = expand(bam_dir + "tumor.merged.recal_data.table")
output:
vcf = vcf_dir + "sentieon_tnscope" + "/" + "ALL.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz",
vcf_tnscope = vcf_dir + "sentieon_tnscope" + "/" + "ALL.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz",
namemap_snv = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.sample_name_map",
namemap_sv = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".tnscope.sample_name_map",
benchmark:
Path(benchmark_dir, "sentieon_TNscope_tumor_only_" + config["analysis"]["case_id"] + ".tsv").as_posix()
params:
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
tumor = "TUMOR",
tumor_options = VARCALL_PARAMS["tnscope"]["tumor"],
Expand Down Expand Up @@ -162,7 +163,7 @@ export SENTIEON_LICENSE={params.sentieon_lic};
--tumor_sample {params.tumor} {params.pon} \
--dbsnp {input.dbsnp} \
--pcr_indel_mode {params.pcr_model} \
{params.tumor_options} {output.vcf};
{params.tumor_options} {output.vcf_tnscope};
echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap_snv};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,12 +162,13 @@ rule sentieon_TNscope:
recalT = expand(bam_dir + "tumor.merged.recal_data.table"),
recalN = expand(bam_dir + "normal.merged.recal_data.table"),
output:
vcf_all = vcf_dir + "sentieon_tnscope/ALL.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz",
vcf_tnscope = vcf_dir + "sentieon_tnscope/ALL.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz",
namemap_snv = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.sample_name_map",
namemap_sv = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".tnscope.sample_name_map",
benchmark:
Path(benchmark_dir, 'sentieon_TNscope_' + config[ "analysis" ][ "case_id" ] + ".tsv").as_posix()
params:
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
tumor = "TUMOR",
normal = "NORMAL",
Expand Down Expand Up @@ -211,7 +212,7 @@ intermediate_vcf={params.tmpdir}/tn_sentieon_varcall_file
-r {input.ref} \
--algo TNModelApply \
-m {params.sentieon_ml_tnscope} \
-v $intermediate_vcf {output.vcf_all};
-v $intermediate_vcf {output.vcf_tnscope};
echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap_snv};
cp {output.namemap_snv} {output.namemap_sv}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,17 +122,20 @@ rule ascat_tumor_normal:
output:
final_vcf = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.vcf.gz",
ascat_copynumber = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.copynumber.txt.gz",
sample_statistics = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.samplestatistics.txt",
ascat_plots= expand(
vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat." + "{output_suffix}" + ".png",
output_suffix=["ascatprofile", "rawprofile", "ASPCF", "tumor", "germline", "sunrise"]
),
sample_statistics = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.samplestatistics.txt"),
plot_ascat_profile = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.ascatprofile.png"),
plot_raw_profile = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.rawprofile.png"),
plot_aspcf = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.ASPCF.png"),
plot_tumor = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.tumor.png"),
plot_germline = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.germline.png"),
plot_sunrise = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.sunrise.png"),
namemap = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.sample_name_map",
benchmark:
benchmark_dir + 'ascat_tumor_normal_' + config["analysis"]["case_id"] + "_ascat.tsv"
singularity:
Path(singularity_image, config["bioinfo_tools"].get("ascatNgs") + ".sif").as_posix()
params:
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"},
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
tumor = "TUMOR",
normal = "NORMAL",
Expand Down Expand Up @@ -164,17 +167,17 @@ cp {params.tmpdir}/{params.tumor}.copynumber.txt.gz {output.ascat_copynumber}
cp {params.tmpdir}/{params.tumor}.samplestatistics.txt {output.sample_statistics};
cp {params.tmpdir}/{params.tumor}.ASCATprofile.png {output.ascat_plots[0]};
cp {params.tmpdir}/{params.tumor}.ASCATprofile.png {output.plot_ascat_profile};
cp {params.tmpdir}/{params.tumor}.rawprofile.png {output.ascat_plots[1]};
cp {params.tmpdir}/{params.tumor}.rawprofile.png {output.plot_raw_profile};
cp {params.tmpdir}/{params.tumor}.ASPCF.png {output.ascat_plots[2]};
cp {params.tmpdir}/{params.tumor}.ASPCF.png {output.plot_aspcf};
cp {params.tmpdir}/{params.tumor}.tumour.png {output.ascat_plots[3]};
cp {params.tmpdir}/{params.tumor}.tumour.png {output.plot_tumor};
cp {params.tmpdir}/{params.tumor}.germline.png {output.ascat_plots[4]};
cp {params.tmpdir}/{params.tumor}.germline.png {output.plot_germline};
cp {params.tmpdir}/{params.tumor}.sunrise.png {output.ascat_plots[5]};
cp {params.tmpdir}/{params.tumor}.sunrise.png {output.plot_sunrise};
tabix -p vcf -f {output.final_vcf};
Expand All @@ -191,9 +194,9 @@ rule ascat_tumor_normal_merge_output:
output_suffix=["ascatprofile", "rawprofile", "ASPCF", "tumor", "germline", "sunrise"]
),
output:
ascat_output_pdf = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.output.pdf"
ascat_pdf = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.output.pdf"
params:
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"},
merge_ascat_output_script= get_script_path("create_pdf.py"),
singularity:
Path(singularity_image, "balsamic.sif").as_posix()
Expand All @@ -203,7 +206,7 @@ rule ascat_tumor_normal_merge_output:
"Merging the output plots and the sample statistics from ascatNGS into a single PDF"
shell:
"""
python {params.merge_ascat_output_script} {output.ascat_output_pdf} {input.sample_statistics} {input.ascat_plots}
python {params.merge_ascat_output_script} {output.ascat_pdf} {input.sample_statistics} {input.ascat_plots}
"""

rule svdb_merge_tumor_normal:
Expand All @@ -215,13 +218,14 @@ rule svdb_merge_tumor_normal:
vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".{caller}.vcf.gz",
caller=somatic_caller_cnv)
output:
svdb_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.vcf.gz",
vcf_svdb = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.vcf.gz",
namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.sample_name_map",
benchmark:
Path(benchmark_dir, 'svdb_merge_tumor_normal_' + config["analysis"]["case_id"] + ".tsv")
singularity:
Path(singularity_image, config["bioinfo_tools"].get("svdb") + ".sif").as_posix()
params:
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
tumor = get_sample_type(config["samples"], "tumor"),
normal = get_sample_type(config["samples"], "normal"),
case_name = config["analysis"]["case_id"],
Expand All @@ -236,7 +240,8 @@ rule svdb_merge_tumor_normal:
svdb --merge --no_intra --bnd_distance 5000 --overlap 0.80 \
--vcf {params.vcf} \
--priority {params.svdb_priority} | \
bgzip -l 9 -c > {output.svdb_vcf};
bgzip -l 9 -c > {output.vcf_svdb};
tabix -p vcf -f {output.vcf_svdb};
echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap};
"""
Loading

0 comments on commit 3c87fe3

Please sign in to comment.