diff --git a/BALSAMIC/assets/scripts/collect_qc_metrics.py b/BALSAMIC/assets/scripts/collect_qc_metrics.py index e376430f4..33834ec10 100755 --- a/BALSAMIC/assets/scripts/collect_qc_metrics.py +++ b/BALSAMIC/assets/scripts/collect_qc_metrics.py @@ -51,8 +51,8 @@ def capture_kit_resolve_type(capture_kit: str): if capture_kit == "None": return None - else: - return capture_kit + + return capture_kit def get_multiqc_data_source(multiqc_data: dict, sample: str, tool: str) -> str: diff --git a/BALSAMIC/constants/workflow_rules.py b/BALSAMIC/constants/workflow_rules.py index bbd5c0d0a..4f77c2731 100644 --- a/BALSAMIC/constants/workflow_rules.py +++ b/BALSAMIC/constants/workflow_rules.py @@ -113,31 +113,31 @@ DELIVERY_RULES = [ - "fastp", "multiqc", "collect_custom_qc_metrics", - "vep_somatic_snv", - "vep_somatic_sv", - "vep_germline", - "tmb_calculation", - "bcftools_filter_TNscope_umi_tumor_only", - "bcftools_filter_TNscope_umi_tumor_normal", - "bcftools_filter_vardict_tumor_only", - "bcftools_filter_vardict_tumor_normal", - "bcftools_filter_tnscope_tumor_only", - "bcftools_filter_tnscope_tumor_normal", - "bcftools_filter_tnhaplotyper_tumor_only", - "bcftools_filter_tnhaplotyper_tumor_normal", - "bcftools_filter_svdb", - "bcftools_intersect_tumor_only", - "bcftools_filter_TNscope_umi_tumor_only", - "genmod_score_vardict", "mergeBam_tumor", "mergeBam_normal", "mergeBam_tumor_umiconsensus", "mergeBam_normal_umiconsensus", - "cnvkit_paired", + "vep_germline", + "svdb_merge_tumor_only", + "svdb_merge_tumor_normal", + "sentieon_TNscope_tumor_only", + "sentieon_TNscope", + "vardict_merge", + "sentieon_tnscope_umi", + "sentieon_tnscope_umi_tn", + "ascat_tumor_normal", + "ascat_tumor_normal_merge_output", + "delly_cnv_tumor_only", "cnvkit_single", + "cnvkit_paired", "vcf2cytosure_convert", - "ascat_tumor_normal_merge_output", + "bcftools_filter_svdb", + "bcftools_intersect_tumor_only", + "bcftools_filter_tnscope_tumor_normal", + "bcftools_filter_vardict_tumor_only", + "bcftools_filter_vardict_tumor_normal", + "bcftools_filter_TNscope_umi_tumor_only", + "bcftools_filter_TNscope_umi_tumor_normal", ] diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule index 532702769..a6068ea4e 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule @@ -63,7 +63,6 @@ rule bcftools_filter_tnhaplotyper_tumor_normal: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() params: pop_freq = [COMMON_FILTERS.pop_freq.tag_value, COMMON_FILTERS.pop_freq.filter_name], - housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, case_name = '{case_name}' threads: get_threads(cluster_config, 'bcftools_filter_tnhaplotyper_tumor_normal') diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule index 7c90f12f8..90e338f76 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule @@ -61,7 +61,6 @@ rule bcftools_filter_tnhaplotyper_tumor_only: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() params: pop_freq = [COMMON_FILTERS.pop_freq.tag_value, COMMON_FILTERS.pop_freq.filter_name], - housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, case_name = '{case_name}' threads: get_threads(cluster_config, 'bcftools_filter_tnhaplotyper_tumor_only') diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule index 017937cd3..4473d2974 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule @@ -60,7 +60,6 @@ rule bcftools_filter_tnhaplotyper_tumor_normal: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() params: pop_freq = [SENTIEON_CALLER.pop_freq.tag_value, SENTIEON_CALLER.pop_freq.filter_name], - housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, case_name = '{case_name}' threads: get_threads(cluster_config, 'bcftools_filter_tnhaplotyper_tumor_normal') diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule index 4a73b17be..79befa5e7 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_only.rule @@ -23,7 +23,6 @@ rule bcftools_filter_tnscope_tumor_only: strand_reads = [SENTIEON_CALLER.strand_reads.tag_value, SENTIEON_CALLER.strand_reads.filter_name], qss = [SENTIEON_CALLER.qss.tag_value, SENTIEON_CALLER.qss.filter_name], sor = [SENTIEON_CALLER.sor.tag_value, SENTIEON_CALLER.sor.filter_name], - housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, case_name = '{case_name}' threads: get_threads(cluster_config, 'bcftools_filter_tnscope_tumor_only') @@ -67,7 +66,6 @@ rule bcftools_filter_tnhaplotyper_tumor_only: pop_freq = [SENTIEON_CALLER.pop_freq.tag_value, SENTIEON_CALLER.pop_freq.filter_name], strand_reads = [SENTIEON_CALLER.strand_reads.tag_value, SENTIEON_CALLER.strand_reads.filter_name], qss = [SENTIEON_CALLER.qss.tag_value, SENTIEON_CALLER.qss.filter_name], - housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, case_name = '{case_name}' threads: get_threads(cluster_config, 'bcftools_filter_tnhaplotyper_tumor_only') diff --git a/BALSAMIC/snakemake_rules/annotation/vep.rule b/BALSAMIC/snakemake_rules/annotation/vep.rule index a79526cc6..ad30ab891 100644 --- a/BALSAMIC/snakemake_rules/annotation/vep.rule +++ b/BALSAMIC/snakemake_rules/annotation/vep.rule @@ -15,7 +15,6 @@ rule vep_somatic_snv: singularity: Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix() params: - housekeeper_id = {"id": "{case_name}", "tags": "annotated-somatic"}, ref_path = Path(config["reference"]["gnomad_variant"]).parent.as_posix(), message_text = "SNV.somatic.{case_name}.{var_caller}.vcf.gz", tmpvcf = vep_dir + "SNV.somatic.{case_name}.{var_caller}.tmp.vcf.gz", @@ -62,7 +61,6 @@ rule vep_somatic_sv: singularity: Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix() params: - housekeeper_id = {"id": "{case_name}", "tags": "annotated-somatic"}, message_text = "SV.somatic.{case_name}.svdb.vcf.gz", vep_cache = config["reference"]["vep"], vep_defaults = params.vep.vep_filters @@ -100,7 +98,6 @@ rule tmb_calculation: params: af_cutoff = "0.05", bed = config["panel"]["capture_kit"] if "panel" in config else "", - housekeeper_id = {"id": "{case_name}", "tags": "stat-somatic"}, message_text = "{var_type}.somatic.{case_name}.{var_caller}.all", tmpdir = tempfile.mkdtemp(prefix=tmp_dir), threads: @@ -156,7 +153,7 @@ rule vep_germline: singularity: Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix() params: - housekeeper_id = {"id": "{sample}", "tags": "annotated-germline"}, + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "annotated-germline"}, sample = '{sample}', vep_cache = config["reference"]["vep"], vep_defaults = params.vep.vep_filters diff --git a/BALSAMIC/snakemake_rules/quality_control/fastp.rule b/BALSAMIC/snakemake_rules/quality_control/fastp.rule index 0c1bd57fe..06296f4fc 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastp.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastp.rule @@ -32,8 +32,8 @@ rule fastp_umi: read1=config["analysis"]["fastq_path"] + "{sample}" + "_1.fastq.gz", read2=config["analysis"]["fastq_path"] + "{sample}" + "_2.fastq.gz", output: - read1 = fastq_dir + "{sample}_1.umi_optimized.fastq.gz", - read2 = fastq_dir + "{sample}_2.umi_optimized.fastq.gz", + read1 = temp(fastq_dir + "{sample}_1.umi_optimized.fastq.gz"), + read2 = temp(fastq_dir + "{sample}_2.umi_optimized.fastq.gz"), json = qc_dir + "fastp/{sample}_fastp_umi.json", html = qc_dir + "fastp/{sample}_fastp_umi.html", benchmark: @@ -73,8 +73,8 @@ rule fastp: read1 = fastq_dir + "{sample}_1.umi_optimized.fastq.gz", read2 = fastq_dir + "{sample}_2.umi_optimized.fastq.gz" output: - read1 = fastq_dir + "{sample}_1.fp.fastq.gz", - read2 = fastq_dir + "{sample}_2.fp.fastq.gz", + read1 = temp(fastq_dir + "{sample}_1.fp.fastq.gz"), + read2 = temp(fastq_dir + "{sample}_2.fp.fastq.gz"), json = qc_dir + "fastp/{sample}_fastp.json", html = qc_dir + "fastp/{sample}_fastp.html" benchmark: @@ -82,7 +82,6 @@ rule fastp: singularity: Path(singularity_image, config["bioinfo_tools"].get("fastp") + ".sif").as_posix() params: - housekeeper_id = {"id": "{sample}", "tags": "quality-trimmed-fastq"}, tmpdir = tmp_dir, umi = " ".join(fastp_param_umi), minimum_length = config["QC"]["min_seq_length"], diff --git a/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule b/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule index cff784b12..6dac5db99 100644 --- a/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule +++ b/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule @@ -12,11 +12,12 @@ rule sentieon_tnscope_umi: bed = config["panel"]["capture_kit"], dbsnp = config["reference"]["dbsnp"] output: - vcf = vcf_dir + "SNV.somatic."+ config["analysis"]["case_id"] + ".TNscope_umi.vcf.gz", + vcf_tnscope_umi = vcf_dir + "SNV.somatic."+ config["analysis"]["case_id"] + ".TNscope_umi.vcf.gz", namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".TNscope_umi.sample_name_map" benchmark: Path(benchmark_dir, "sentieon_tnscope_umi_" + config["analysis"]["case_id"] + ".tsv").as_posix() params: + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, tmpdir = tempfile.mkdtemp(prefix=tmp_dir), sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], @@ -58,7 +59,7 @@ export SENTIEON_LICENSE={params.sentieon_lic}; --max_error_per_read {params.error_rate} \ --pcr_indel_model {params.pcr_model} \ --prune_factor {params.prune_factor} \ -{output.vcf}; +{output.vcf_tnscope_umi}; echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; """ diff --git a/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule b/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule index 64ded84bb..7205afc2c 100644 --- a/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule +++ b/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule @@ -12,11 +12,12 @@ rule sentieon_tnscope_umi_tn: bed = config["panel"]["capture_kit"], dbsnp = config["reference"]["dbsnp"] output: - vcf = vcf_dir + "SNV.somatic."+ config["analysis"]["case_id"] + ".TNscope_umi.vcf.gz", + vcf_tnscope_umi = vcf_dir + "SNV.somatic."+ config["analysis"]["case_id"] + ".TNscope_umi.vcf.gz", namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".TNscope_umi.sample_name_map" benchmark: Path(benchmark_dir, "sentieon_tnscope_umi_" + config["analysis"]["case_id"] + ".tsv").as_posix() params: + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, tmpdir = tempfile.mkdtemp(prefix=tmp_dir), sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], @@ -62,7 +63,7 @@ export SENTIEON_LICENSE={params.sentieon_lic}; --max_error_per_read {params.error_rate} \ --pcr_indel_model {params.pcr_model} \ --prune_factor {params.prune_factor} \ -{output.vcf}; +{output.vcf_tnscope_umi}; echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap}; """ diff --git a/BALSAMIC/snakemake_rules/variant_calling/sentieon_t_varcall.rule b/BALSAMIC/snakemake_rules/variant_calling/sentieon_t_varcall.rule index 1c9750b5e..75a8cefcf 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/sentieon_t_varcall.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/sentieon_t_varcall.rule @@ -127,12 +127,13 @@ rule sentieon_TNscope_tumor_only: bam = expand(bam_dir + "tumor.merged.bam"), recal = expand(bam_dir + "tumor.merged.recal_data.table") output: - vcf = vcf_dir + "sentieon_tnscope" + "/" + "ALL.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", + vcf_tnscope = vcf_dir + "sentieon_tnscope" + "/" + "ALL.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", namemap_snv = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.sample_name_map", namemap_sv = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".tnscope.sample_name_map", benchmark: Path(benchmark_dir, "sentieon_TNscope_tumor_only_" + config["analysis"]["case_id"] + ".tsv").as_posix() params: + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, tmpdir = tempfile.mkdtemp(prefix=tmp_dir), tumor = "TUMOR", tumor_options = VARCALL_PARAMS["tnscope"]["tumor"], @@ -162,7 +163,7 @@ export SENTIEON_LICENSE={params.sentieon_lic}; --tumor_sample {params.tumor} {params.pon} \ --dbsnp {input.dbsnp} \ --pcr_indel_mode {params.pcr_model} \ -{params.tumor_options} {output.vcf}; +{params.tumor_options} {output.vcf_tnscope}; echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap_snv}; diff --git a/BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall.rule b/BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall.rule index f2b756710..91113ff1b 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall.rule @@ -162,12 +162,13 @@ rule sentieon_TNscope: recalT = expand(bam_dir + "tumor.merged.recal_data.table"), recalN = expand(bam_dir + "normal.merged.recal_data.table"), output: - vcf_all = vcf_dir + "sentieon_tnscope/ALL.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", + vcf_tnscope = vcf_dir + "sentieon_tnscope/ALL.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", namemap_snv = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.sample_name_map", namemap_sv = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".tnscope.sample_name_map", benchmark: Path(benchmark_dir, 'sentieon_TNscope_' + config[ "analysis" ][ "case_id" ] + ".tsv").as_posix() params: + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, tmpdir = tempfile.mkdtemp(prefix=tmp_dir), tumor = "TUMOR", normal = "NORMAL", @@ -211,7 +212,7 @@ intermediate_vcf={params.tmpdir}/tn_sentieon_varcall_file -r {input.ref} \ --algo TNModelApply \ -m {params.sentieon_ml_tnscope} \ --v $intermediate_vcf {output.vcf_all}; +-v $intermediate_vcf {output.vcf_tnscope}; echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap_snv}; cp {output.namemap_snv} {output.namemap_sv} diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule index d08d06914..6c5fe40fb 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule @@ -122,17 +122,20 @@ rule ascat_tumor_normal: output: final_vcf = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.vcf.gz", ascat_copynumber = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.copynumber.txt.gz", - sample_statistics = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.samplestatistics.txt", - ascat_plots= expand( - vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat." + "{output_suffix}" + ".png", - output_suffix=["ascatprofile", "rawprofile", "ASPCF", "tumor", "germline", "sunrise"] - ), + sample_statistics = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.samplestatistics.txt"), + plot_ascat_profile = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.ascatprofile.png"), + plot_raw_profile = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.rawprofile.png"), + plot_aspcf = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.ASPCF.png"), + plot_tumor = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.tumor.png"), + plot_germline = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.germline.png"), + plot_sunrise = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.sunrise.png"), namemap = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.sample_name_map", benchmark: benchmark_dir + 'ascat_tumor_normal_' + config["analysis"]["case_id"] + "_ascat.tsv" singularity: Path(singularity_image, config["bioinfo_tools"].get("ascatNgs") + ".sif").as_posix() params: + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}, tmpdir = tempfile.mkdtemp(prefix=tmp_dir), tumor = "TUMOR", normal = "NORMAL", @@ -164,17 +167,17 @@ cp {params.tmpdir}/{params.tumor}.copynumber.txt.gz {output.ascat_copynumber} cp {params.tmpdir}/{params.tumor}.samplestatistics.txt {output.sample_statistics}; -cp {params.tmpdir}/{params.tumor}.ASCATprofile.png {output.ascat_plots[0]}; +cp {params.tmpdir}/{params.tumor}.ASCATprofile.png {output.plot_ascat_profile}; -cp {params.tmpdir}/{params.tumor}.rawprofile.png {output.ascat_plots[1]}; +cp {params.tmpdir}/{params.tumor}.rawprofile.png {output.plot_raw_profile}; -cp {params.tmpdir}/{params.tumor}.ASPCF.png {output.ascat_plots[2]}; +cp {params.tmpdir}/{params.tumor}.ASPCF.png {output.plot_aspcf}; -cp {params.tmpdir}/{params.tumor}.tumour.png {output.ascat_plots[3]}; +cp {params.tmpdir}/{params.tumor}.tumour.png {output.plot_tumor}; -cp {params.tmpdir}/{params.tumor}.germline.png {output.ascat_plots[4]}; +cp {params.tmpdir}/{params.tumor}.germline.png {output.plot_germline}; -cp {params.tmpdir}/{params.tumor}.sunrise.png {output.ascat_plots[5]}; +cp {params.tmpdir}/{params.tumor}.sunrise.png {output.plot_sunrise}; tabix -p vcf -f {output.final_vcf}; @@ -191,9 +194,9 @@ rule ascat_tumor_normal_merge_output: output_suffix=["ascatprofile", "rawprofile", "ASPCF", "tumor", "germline", "sunrise"] ), output: - ascat_output_pdf = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.output.pdf" + ascat_pdf = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.output.pdf" params: - housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}, merge_ascat_output_script= get_script_path("create_pdf.py"), singularity: Path(singularity_image, "balsamic.sif").as_posix() @@ -203,7 +206,7 @@ rule ascat_tumor_normal_merge_output: "Merging the output plots and the sample statistics from ascatNGS into a single PDF" shell: """ -python {params.merge_ascat_output_script} {output.ascat_output_pdf} {input.sample_statistics} {input.ascat_plots} +python {params.merge_ascat_output_script} {output.ascat_pdf} {input.sample_statistics} {input.ascat_plots} """ rule svdb_merge_tumor_normal: @@ -215,13 +218,14 @@ rule svdb_merge_tumor_normal: vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".{caller}.vcf.gz", caller=somatic_caller_cnv) output: - svdb_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.vcf.gz", + vcf_svdb = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.vcf.gz", namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.sample_name_map", benchmark: Path(benchmark_dir, 'svdb_merge_tumor_normal_' + config["analysis"]["case_id"] + ".tsv") singularity: Path(singularity_image, config["bioinfo_tools"].get("svdb") + ".sif").as_posix() params: + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, tumor = get_sample_type(config["samples"], "tumor"), normal = get_sample_type(config["samples"], "normal"), case_name = config["analysis"]["case_id"], @@ -236,7 +240,8 @@ rule svdb_merge_tumor_normal: svdb --merge --no_intra --bnd_distance 5000 --overlap 0.80 \ --vcf {params.vcf} \ --priority {params.svdb_priority} | \ -bgzip -l 9 -c > {output.svdb_vcf}; +bgzip -l 9 -c > {output.vcf_svdb}; +tabix -p vcf -f {output.vcf_svdb}; echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap}; """ diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule index ac11b2ae5..4a3620f8a 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only.rule @@ -87,8 +87,8 @@ rule delly_cnv_tumor_only: bcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".delly.bcf", map = config["reference"]["delly_mappability"], output: - cnv = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".delly.bcf", - rd = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".delly.cov.gz", + cnv_delly = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".delly.bcf", + rd_delly = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".dellycnv.cov.gz", namemap= vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".dellycnv.sample_name_map", benchmark: benchmark_dir + 'delly_cnv_tumor_only_' + config["analysis"]["case_id"] + ".tsv" @@ -96,6 +96,7 @@ rule delly_cnv_tumor_only: Path(singularity_image, config["bioinfo_tools"].get("delly") + ".sif").as_posix() params: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), + housekeeper_id= {"id": config["analysis"]["case_id"],"tags": "clinical"}, runmode = "local", tumor = "TUMOR", case_name = config["analysis"]["case_id"] @@ -105,7 +106,7 @@ rule delly_cnv_tumor_only: ("Calling copy number variants using delly for {params.case_name}") shell: """ -delly cnv -m {input.map} -g {input.fa} -c {output.rd} -o {output.cnv} -l {input.bcf} {input.bamT} +delly cnv -m {input.map} -g {input.fa} -c {output.rd_delly} -o {output.cnv_delly} -l {input.bcf} {input.bamT} echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; @@ -149,13 +150,14 @@ rule svdb_merge_tumor_only: vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".{caller}.vcf.gz", caller=somatic_caller_cnv) output: - svdb_vcf = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.vcf.gz", + vcf_svdb = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.vcf.gz", namemap = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.sample_name_map", benchmark: Path(benchmark_dir, 'svdb_merge_tumor_only_' + config["analysis"]["case_id"] + ".tsv") singularity: Path(singularity_image, config["bioinfo_tools"].get("svdb") + ".sif").as_posix() params: + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, tumor = get_sample_type(config["samples"], "tumor"), case_name = config["analysis"]["case_id"], vcf= lambda wildcards, input:[input[index] + ":" + svdb_callers_prio[index] for index in range(0,len(input))], @@ -169,6 +171,8 @@ rule svdb_merge_tumor_only: svdb --merge --no_intra --bnd_distance 5000 --overlap 0.80 \ --vcf {params.vcf} \ --priority {params.svdb_priority} | \ -bgzip -l 9 -c > {output.svdb_vcf}; +bgzip -l 9 -c > {output.vcf_svdb}; +tabix -p vcf -f {output.vcf_svdb}; + echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; """ diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_normal.rule index 532460b78..95d14e31c 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_normal.rule @@ -50,10 +50,11 @@ rule vardict_merge: input: expand(vcf_dir + "vardict/split_vcf/{chrom}_vardict.vcf.gz", chrom=chromlist) output: - vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vardict.vcf.gz", + vcf_vardict = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vardict.vcf.gz", yaml = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vardict.yaml", namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vardict.sample_name_map" params: + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, tmpdir = tempfile.mkdtemp(prefix=tmp_dir), case_name = config["analysis"]["case_id"], benchmark: @@ -68,13 +69,13 @@ rule vardict_merge: """ mkdir -p {params.tmpdir}; -bcftools concat {input} | bcftools sort --temp-dir {params.tmpdir} - | bgzip > {output.vcf}; -tabix -f -p vcf {output.vcf}; +bcftools concat {input} | bcftools sort --temp-dir {params.tmpdir} - | bgzip > {output.vcf_vardict}; +tabix -f -p vcf {output.vcf_vardict}; echo -e \"{params.case_name}\\tTUMOR\\n{params.case_name}-match\\tNORMAL\" > {output.namemap}; echo -e \"{params.case_name}\" > {output.namemap}.tumor; echo -e \"{params.case_name}-match\" > {output.namemap}.normal; -echo '{{ vcf: {{ vardict: {{ name: vardict, path: {output.vcf} }} }} }}' > {output.yaml}; +echo '{{ vcf: {{ vardict: {{ name: vardict, path: {output.vcf_vardict} }} }} }}' > {output.yaml}; rm -rf {params.tmpdir}; """ diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_only.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_only.rule index 37992d751..267fd091a 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_tumor_only.rule @@ -53,12 +53,13 @@ rule vardict_merge: output: namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vardict.sample_name_map", yaml = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vardict.yaml", - vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vardict.vcf.gz" + vcf_vardict = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".vardict.vcf.gz" benchmark: Path(benchmark_dir, 'vardict_merge_' + config["analysis"]["case_id"] + ".tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("vardict") + ".sif").as_posix() params: + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, tmpdir = tempfile.mkdtemp(prefix=tmp_dir), case_name = config["analysis"]["case_id"], threads: @@ -72,12 +73,12 @@ export TMPDIR={params.tmpdir}; bcftools concat {input} \ | bcftools sort --temp-dir {params.tmpdir} - \ -| bgzip > {output.vcf}; -tabix -f -p vcf {output.vcf}; +| bgzip > {output.vcf_vardict}; +tabix -f -p vcf {output.vcf_vardict}; echo -e \"{params.case_name}\\tTUMOR\" > {output.namemap}; echo -e \"{params.case_name}\" > {output.namemap}.tumor; -echo '{{ vcf: {{ vardict: {{ name: vardict , path: {output.vcf} }} }} }}' > {output.yaml}; +echo '{{ vcf: {{ vardict: {{ name: vardict , path: {output.vcf_vardict} }} }} }}' > {output.yaml}; """ diff --git a/BALSAMIC/workflows/balsamic.smk b/BALSAMIC/workflows/balsamic.smk index 071e9d743..a36e91bff 100644 --- a/BALSAMIC/workflows/balsamic.smk +++ b/BALSAMIC/workflows/balsamic.smk @@ -40,6 +40,11 @@ logging.getLogger("filelock").setLevel("WARN") tmp_dir = os.path.join(get_result_dir(config), "tmp", "" ) Path.mkdir(Path(tmp_dir), exist_ok=True) +# Set case id/name +case_id = config["analysis"]["case_id"] + +# Directories +analysis_dir = config["analysis"]["analysis_dir"] + "/" +case_id + "/" benchmark_dir = config["analysis"]["benchmark"] fastq_dir = get_result_dir(config) + "/fastq/" bam_dir = get_result_dir(config) + "/bam/" @@ -50,7 +55,6 @@ vcf_dir = get_result_dir(config) + "/vcf/" vep_dir = get_result_dir(config) + "/vep/" qc_dir = get_result_dir(config) + "/qc/" delivery_dir = get_result_dir(config) + "/delivery/" - umi_dir = get_result_dir(config) + "/umi/" umi_qc_dir = qc_dir + "umi_qc/" @@ -76,9 +80,6 @@ tumor_sample = get_sample_type(config["samples"], "tumor")[0] if config['analysis']['analysis_type'] == "paired": normal_sample = get_sample_type(config["samples"], "normal")[0] -# Set case id/name -case_id = config["analysis"]["case_id"] - # explicitly check if cluster_config dict has zero keys. if len(cluster_config.keys()) == 0: cluster_config = config @@ -231,47 +232,81 @@ for r in rules_to_include: # Define common and analysis specific outputs quality_control_results = [ + os.path.join(qc_dir,case_id + "_metrics_deliverables.yaml"), os.path.join(qc_dir, "multiqc_report.html"), - os.path.join(qc_dir, case_id + "_metrics_deliverables.yaml"), + os.path.join(qc_dir, "multiqc_data/multiqc_data.json") ] -analysis_specific_results = [expand(vep_dir + "{vcf}.vcf.gz", - vcf=get_vcf(config, germline_caller, germline_call_samples)), - expand(vep_dir + "{vcf}.all.vcf.gz", - vcf=get_vcf(config, somatic_caller, [config["analysis"]["case_id"]]))] +# Analysis results +analysis_specific_results = [] -if config["analysis"]["sequencing_type"] != "wgs": - analysis_specific_results.append(expand(vep_dir + "{vcf}.all.filtered.pass.ranked.vcf.gz", - vcf=get_vcf(config, ["vardict"], [config["analysis"]["case_id"]]))) +# Germline SNVs/SVs +analysis_specific_results.extend( + expand(vep_dir + "{vcf}.vcf.gz", vcf=get_vcf(config, germline_caller, germline_call_samples)) +) - analysis_specific_results.append(expand(vcf_dir + "CNV.somatic.{case_name}.{var_caller}.vcf2cytosure.cgh", - case_name=config["analysis"]["case_id"], - var_caller=["cnvkit"])) +# Raw VCFs +analysis_specific_results.extend( + expand(vcf_dir + "{vcf}.vcf.gz", vcf=get_vcf(config, somatic_caller, [case_id])) +) - analysis_specific_results.append(expand(umi_qc_dir + "{sample}.umi.mean_family_depth", sample=config["samples"])) - - if background_variant_file: - analysis_specific_results.extend([expand(umi_qc_dir + "{case_name}.{var_caller}.AFtable.txt", - case_name=config["analysis"]["case_id"], - var_caller=["TNscope_umi"])]), +# Filtered and passed post annotation VCFs +analysis_specific_results.extend( + expand(vep_dir + "{vcf}.all.filtered.pass.vcf.gz", vcf=get_vcf(config, somatic_caller, [case_id])) +) -#Calculate TMB per somatic variant caller -analysis_specific_results.extend(expand(vep_dir + "{vcf}.balsamic_stat", - vcf=get_vcf(config, somatic_caller_tmb, [config["analysis"]["case_id"]]))) +# TMB +analysis_specific_results.extend( + expand(vep_dir + "{vcf}.balsamic_stat", vcf=get_vcf(config, somatic_caller_tmb, [case_id])) +) -#Gather all the filtered and PASSed variants post annotation -analysis_specific_results.extend([expand(vep_dir + "{vcf}.all.filtered.pass.vcf.gz", - vcf=get_vcf(config, somatic_caller, [config["analysis"]["case_id"]]))]) - -LOG.info(f"Following outputs will be delivered {analysis_specific_results}") +# TGA specific files +if config["analysis"]["sequencing_type"] != "wgs": + # CNVkit + analysis_specific_results.append(cnv_dir + "tumor.merged.cns") + analysis_specific_results.extend(expand(cnv_dir + "tumor.merged-{plot}", plot=["diagram.pdf", "scatter.pdf"])) + analysis_specific_results.append(cnv_dir + case_id +".gene_metrics") + # vcf2cytosure + analysis_specific_results.extend(expand( + vcf_dir + "CNV.somatic.{case_name}.{var_caller}.vcf2cytosure.cgh", + case_name=case_id, + var_caller=["cnvkit"] + )) + # VarDict + analysis_specific_results.extend( + expand(vep_dir + "{vcf}.all.filtered.pass.ranked.vcf.gz", vcf=get_vcf(config, ["vardict"], [case_id])) + ) + # UMI + analysis_specific_results.extend(expand(umi_qc_dir + "{sample}.umi.mean_family_depth",sample=config["samples"])) + if background_variant_file: + analysis_specific_results.extend( + expand(umi_qc_dir + "{case_name}.{var_caller}.AFtable.txt", case_name=case_id, var_caller=["TNscope_umi"]) + ) +# AscatNgs +if config["analysis"]["sequencing_type"] == "wgs" and config['analysis']['analysis_type'] == "paired": + analysis_specific_results.extend( + expand(vcf_dir + "{vcf}.output.pdf", vcf=get_vcf(config, ["ascat"], [case_id])) + ) + analysis_specific_results.extend( + expand(vcf_dir + "{vcf}.copynumber.txt.gz", vcf=get_vcf(config, ["ascat"], [case_id])) + ) + +# Delly CNV +if config['analysis']['analysis_type'] == "single": + analysis_specific_results.extend( + expand(vcf_dir + "{vcf}.cov.gz",vcf=get_vcf(config,["dellycnv"],[case_id])) + ) + +# Dragen if config["analysis"]["sequencing_type"] == "wgs" and config['analysis']['analysis_type'] == "single": if "dragen" in config: - analysis_specific_results.extend([Path(result_dir, "dragen", "SNV.somatic." + config["analysis"]["case_id"] + ".dragen_tumor.bam").as_posix(), - Path(result_dir, "dragen", "SNV.somatic." + config["analysis"]["case_id"] + ".dragen.vcf.gz").as_posix()]) + analysis_specific_results.extend([ + Path(result_dir, "dragen", "SNV.somatic." + case_id + ".dragen_tumor.bam").as_posix(), + Path(result_dir, "dragen", "SNV.somatic." + case_id + ".dragen.vcf.gz").as_posix() + ]) -if config["analysis"]["sequencing_type"] == "wgs" and config['analysis']['analysis_type'] == "paired": - analysis_specific_results.append(expand(vcf_dir + "{vcf}.output.pdf", vcf=get_vcf(config, ["ascat"], [config["analysis"]["case_id"]]))) +LOG.info(f"Following outputs will be delivered {analysis_specific_results}") if 'benchmark_plots' in config: log_dir = config["analysis"]["log"] @@ -303,20 +338,20 @@ if 'benchmark_plots' in config: for plots in my_rule_plots: plots.unlink() - - if 'delivery' in config: - wildcard_dict = {"sample": list(config["samples"].keys())+["tumor", "normal"], - "case_name": config["analysis"]["case_id"], - "allow_missing": True - } + wildcard_dict = { + "sample": list(config["samples"].keys())+["tumor", "normal"], + "case_name": case_id, + "allow_missing": True + } if config['analysis']["analysis_type"] in ["paired", "single"]: - wildcard_dict.update({"var_type": ["CNV", "SNV", "SV"], - "var_class": ["somatic", "germline"], - "var_caller": somatic_caller + germline_caller, - "bedchrom": config["panel"]["chrom"] if "panel" in config else [], - }) + wildcard_dict.update({ + "var_type": ["CNV", "SNV", "SV"], + "var_class": ["somatic", "germline"], + "var_caller": somatic_caller + germline_caller, + "bedchrom": config["panel"]["chrom"] if "panel" in config else [], + }) if 'rules_to_deliver' in config: rules_to_deliver = config['rules_to_deliver'].split(",") @@ -338,9 +373,7 @@ if 'delivery' in config: output_files_ready.extend(files_to_deliver) output_files_ready = [dict(zip(output_files_ready[0], value)) for value in output_files_ready[1:]] - delivery_ready = os.path.join(get_result_dir(config), - "delivery_report", - config["analysis"]["case_id"] + "_delivery_ready.hk") + delivery_ready = os.path.join(get_result_dir(config), "delivery_report", case_id + "_delivery_ready.hk") write_json(output_files_ready, delivery_ready) FormatFile(delivery_ready) @@ -359,7 +392,7 @@ rule all: # Perform validation of extracted QC metrics try: - validate_qc_metrics(read_yaml(input[1])) + validate_qc_metrics(read_yaml(input[0])) except ValueError as val_exc: LOG.error(val_exc) raise BalsamicError diff --git a/CHANGELOG.rst b/CHANGELOG.rst index cc44815ed..cee4722ac 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -36,6 +36,7 @@ Changed: * container base_image (align_qc, annotate, coverage_qc, varcall_cnvkit, varcall_py36) to 4.10.3-alpine #921 * update container (align_qc, annotate, coverage_qc, varcall_cnvkit,varcall_py36) bioinfo tool versions #921 * update tool versions (align_qc, annotate, coverage_qc, varcall_cnvkit) in methods and softwares docs #921 +* Updated the list of files to be stored and delivered #848 Fixed: ^^^^^^ @@ -43,6 +44,7 @@ Fixed: * ``collect_qc_metrics.py`` failing for WGS cases with empty ``capture_kit`` argument #850 * QC metric validation for different panel bed version #855 * Fixed development version of ``fpdf2`` to ``2.4.6`` #878 +* Added missing svdb index file #848 Removed ^^^^^^^ diff --git a/tests/commands/report/test_deliver.py b/tests/commands/report/test_deliver.py index 43477c106..6a5e52754 100644 --- a/tests/commands/report/test_deliver.py +++ b/tests/commands/report/test_deliver.py @@ -58,17 +58,17 @@ def test_deliver_tumor_normal_panel( # Actual delivery files dummies with and without index cnv_result_dir = Path(helpers.result_dir, "cnv") cnv_result_dir.mkdir(parents=True, exist_ok=True) - actual_delivery_file = Path(cnv_result_dir, "tumor.merged.cnr") + actual_delivery_file = Path(cnv_result_dir, "tumor.merged.cns") actual_delivery_file.touch() vep_result_dir = Path(helpers.result_dir, "vep") vep_result_dir.mkdir(parents=True, exist_ok=True) touch_vcf_delivery_file = Path( - vep_result_dir, "SNV.somatic." + helpers.case_id + ".vardict.all.vcf.gz" + vep_result_dir, "SNV.somatic." + helpers.case_id + ".vardict.vcf.gz" ) touch_vcf_delivery_file.touch() touch_vcf_delivery_file_index = Path( - vep_result_dir, "SNV.somatic." + helpers.case_id + ".vardict.all.vcf.gz.tbi" + vep_result_dir, "SNV.somatic." + helpers.case_id + ".vardict.vcf.gz.tbi" ) touch_vcf_delivery_file_index.touch() diff --git a/tests/conftest.py b/tests/conftest.py index bfbd55af8..f5eb9aefb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,7 +9,7 @@ from click.testing import CliRunner from BALSAMIC.utils.cli import read_yaml -from .helpers import ConfigHelper +from .helpers import ConfigHelper, Map from BALSAMIC.commands.base import cli from BALSAMIC import __version__ as balsamic_version @@ -673,3 +673,49 @@ def qc_requested_metrics(): def qc_extracted_metrics(metrics_yaml_path): """Extracted and formatted QC metrics""" return read_yaml(metrics_yaml_path) + + +@pytest.fixture(scope="function") +def snakemake_fastqc_rule(tumor_only_config, helpers): + """FastQC snakemake mock rule""" + + helpers.read_config(tumor_only_config) + fastq_path = os.path.join( + helpers.analysis_dir, + helpers.case_id, + "analysis", + "fastq", + "concatenated_tumor_XXXXXX_R_{read}.fastq.gz", + ) + + return Map( + { + "fastqc": Map( + { + "params": Map( + { + "housekeeper_id": { + "id": "sample_tumor_only", + "tags": "quality-trimmed-seq", + } + } + ), + "output": Map( + { + "_names": Map({"fastqc": fastq_path}), + "fastqc": fastq_path, + } + ), + "rule": Map( + { + "name": "fastq", + "output": [ + fastq_path, + ], + "temp_output": set(), + } + ), + } + ) + } + ) diff --git a/tests/helpers.py b/tests/helpers.py index 5d8c17a4f..fae159606 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -16,3 +16,35 @@ def read_config(self, balsamic_config): self.analysis_dir = sample_config["analysis"]["analysis_dir"] self.result_dir = sample_config["analysis"]["result"] self.delivery_dir = Path(self.result_dir, "delivery_report").as_posix() + + +class Map(dict): + """Mock class to use dot notation to access values of a dictionary""" + + def __init__(self, *args, **kwargs): + super(Map, self).__init__(*args, **kwargs) + for arg in args: + if isinstance(arg, dict): + for k, v in arg.items(): + self[k] = v + + if kwargs: + for k, v in kwargs.items(): + self[k] = v + + def __getattr__(self, attr): + return self.get(attr) + + def __setattr__(self, key, value): + self.__setitem__(key, value) + + def __setitem__(self, key, value): + super(Map, self).__setitem__(key, value) + self.__dict__.update({key: value}) + + def __delattr__(self, item): + self.__delitem__(item) + + def __delitem__(self, key): + super(Map, self).__delitem__(key) + del self.__dict__[key] diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 77bf50a55..92afad384 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -1,4 +1,5 @@ import json +import os import subprocess import pytest import sys @@ -59,7 +60,9 @@ get_threads, get_delivery_id, get_reference_output_files, + get_rule_output, ) +from tests.helpers import Map def test_get_variant_callers_wrong_analysis_type(tumor_normal_config): @@ -1045,3 +1048,32 @@ def test_create_md5(tmp_path): # THEN md5 file exists assert dummy_file.exists() + + +def test_get_rule_output(snakemake_fastqc_rule): + """Tests retrieval of existing output files from a specific workflow""" + + # GIVEN a snakemake fastqc rule object, a rule name and a list of associated wildcards + rules = snakemake_fastqc_rule + rule_name = "fastqc" + output_file_wildcards = { + "sample": ["concatenated_tumor_XXXXXX_R", "tumor", "normal"], + "case_name": "sample_tumor_only", + } + + # THEN retrieve the output files + output_files = get_rule_output(rules, rule_name, output_file_wildcards) + + # THEN check that the fastq files has been picked up by the function and that the tags has been correctly created + assert len(output_files) == 2 + for file in output_files: + # Expected file names + assert ( + os.path.basename(file[0]) == "concatenated_tumor_XXXXXX_R_1.fastq.gz" + or os.path.basename(file[0]) == "concatenated_tumor_XXXXXX_R_2.fastq.gz" + ) + # Expected tags + assert ( + file[3] == "1,fastqc,quality-trimmed-seq-fastqc" + or file[3] == "2,fastqc,quality-trimmed-seq-fastqc" + )