From d5a0eb11a620a4d2339a5513b7b76d6953fd2af2 Mon Sep 17 00:00:00 2001 From: Vadym Ivanchuk Date: Mon, 29 Aug 2022 11:06:40 +0200 Subject: [PATCH 1/4] remove tnhaplotyper for paired wgs --- BALSAMIC/workflows/balsamic.smk | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/BALSAMIC/workflows/balsamic.smk b/BALSAMIC/workflows/balsamic.smk index db27ef06a..b8a84ed4c 100644 --- a/BALSAMIC/workflows/balsamic.smk +++ b/BALSAMIC/workflows/balsamic.smk @@ -218,6 +218,12 @@ for ws in ["BALSAMIC","Sentieon","Sentieon_umi"]: mutation_class="somatic") somatic_caller_tmb += somatic_caller_snv + +# Remove TNhaplotyer for WGS-TN analysis +if config["analysis"]["sequencing_type"] == "wgs" and config['analysis']['analysis_type'] == "paired": + somatic_caller.remove("tnhaplotyper") + somatic_caller_tmb.remove("tnhaplotyper") + # Remove variant callers from list of callers if "disable_variant_caller" in config: variant_callers_to_remove = config["disable_variant_caller"].split(",") From e7711f7feee2b58e6dff7755da61f6c0ce9fd864 Mon Sep 17 00:00:00 2001 From: Vadym Ivanchuk Date: Mon, 29 Aug 2022 11:12:04 +0200 Subject: [PATCH 2/4] changelog --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index cdafc50f3..5aea53b38 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -16,6 +16,7 @@ Removed ^^^^^^^ * case ID from the PON `.cnn` output file https://github.com/Clinical-Genomics/BALSAMIC/pull/983 +* TNhaplotyper for paired WGS analysis https://github.com/Clinical-Genomics/BALSAMIC/pull/988 [10.0.2] --------- From 7130066415ca0aa8f5dc1d817c37e58b12761992 Mon Sep 17 00:00:00 2001 From: Vadym Ivanchuk Date: Mon, 29 Aug 2022 17:10:40 +0200 Subject: [PATCH 3/4] emove unused rules --- BALSAMIC/config/cluster.json | 4 - .../varcaller_wgs_filter_tumor_normal.rule | 35 ------- .../variant_calling/sentieon_tn_varcall.rule | 93 +------------------ 3 files changed, 2 insertions(+), 130 deletions(-) diff --git a/BALSAMIC/config/cluster.json b/BALSAMIC/config/cluster.json index 06d339758..ac16bf69b 100644 --- a/BALSAMIC/config/cluster.json +++ b/BALSAMIC/config/cluster.json @@ -100,10 +100,6 @@ "time": "24:00:00", "n": 24 }, - "sentieon_TN_corealign": { - "time": "24:00:00", - "n": 24 - }, "sentieon_TNhaplotyper": { "time": "24:00:00", "n": 24 diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule index 4473d2974..b8b972ffd 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_wgs_filter_tumor_normal.rule @@ -45,38 +45,3 @@ tabix -p vcf -f {output.vcf_pass_tnscope}; bcftools +counts {output.vcf_pass_tnscope} > {output.bcftools_counts}; """ - - -rule bcftools_filter_tnhaplotyper_tumor_normal: - input: - vcf = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.vcf.gz", - wgs_calling_file = config["reference"]["wgs_calling_interval"] - output: - vcf_filtered = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.vcf.gz", - vcf_pass_tnhaplotyper = vep_dir + "{var_type}.somatic.{case_name}.tnhaplotyper.all.filtered.pass.vcf.gz", - benchmark: - Path(benchmark_dir, 'bcftools_filter_tnhaplotyper_tumor_normal_' + "{var_type}.somatic.{case_name}.tsv").as_posix() - singularity: - Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() - params: - pop_freq = [SENTIEON_CALLER.pop_freq.tag_value, SENTIEON_CALLER.pop_freq.filter_name], - case_name = '{case_name}' - threads: - get_threads(cluster_config, 'bcftools_filter_tnhaplotyper_tumor_normal') - message: - "Filtering WGS tumor-normal tnhaplotyper annotated variants using bcftools for {params.case_name}" - shell: - """ -grep -v '^@' {input.wgs_calling_file} > {input.wgs_calling_file}.bed - -bcftools view -f PASS --threads {threads} --regions-file {input.wgs_calling_file}.bed {input.vcf} \ -| bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' \ -| bcftools view -o {output.vcf_filtered} -O z; - -tabix -p vcf -f {output.vcf_filtered}; - -bcftools view -f PASS --threads {threads} -O z -o {output.vcf_pass_tnhaplotyper} {output.vcf_filtered} - -tabix -p vcf -f {output.vcf_pass_tnhaplotyper} - - """ diff --git a/BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall.rule b/BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall.rule index 91113ff1b..d64af4833 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall.rule @@ -64,95 +64,6 @@ rm -rf {params.tmpdir}; """ -rule sentieon_TN_corealign: - input: - ref = config["reference"]["reference_genome"], - bamT = expand(bam_dir + "tumor.merged.bam"), - bamN = expand(bam_dir + "normal.merged.bam"), - recalT = expand(bam_dir + "tumor.merged.recal_data.table"), - recalN = expand(bam_dir + "normal.merged.recal_data.table"), - mills = config["reference"]["mills_1kg"], - indel_1kg = config["reference"]["1kg_known_indel"], - output: - bam = bam_dir + config["analysis"]["case_id"] + ".corealign.bam" - benchmark: - Path(benchmark_dir, 'sentieon_TN_corealign_' + config[ "analysis" ][ "case_id" ] + ".tsv").as_posix() - params: - tmpdir = tempfile.mkdtemp(prefix=tmp_dir), - sentieon_exec = config["SENTIEON_EXEC"], - sentieon_lic = config["SENTIEON_LICENSE"], - case_name = config["analysis"]["case_id"] - threads: - get_threads(cluster_config, 'sentieon_TN_corealign') - message: - ("Perform local realignment around indels on the bam files" - " using Sentieon tools for sample {params.case_name}") - shell: - """ -mkdir -p {params.tmpdir}; -export TMPDIR={params.tmpdir}; -export SENTIEON_TMPDIR={params.tmpdir}; -export SENTIEON_LICENSE={params.sentieon_lic}; - -{params.sentieon_exec} driver \ --r {input.ref} \ --t {threads} \ --i {input.bamT} \ --i {input.bamN} \ --q {input.recalT} \ --q {input.recalN} \ ---algo Realigner \ --k {input.mills} \ --k {input.indel_1kg} {output.bam} - -rm -rf {params.tmpdir}; - """ - - -rule sentieon_TNhaplotyper: - input: - bam = bam_dir + config["analysis"]["case_id"] + ".corealign.bam", - ref = config["reference"]["reference_genome"], - dbsnp = config["reference"]["dbsnp"], - output: - vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnhaplotyper.vcf.gz", - namemap = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnhaplotyper.sample_name_map", - benchmark: - Path(benchmark_dir, 'sentieon_TNhaplotyper_' + config[ "analysis" ][ "case_id" ] + ".tsv").as_posix() - params: - tmpdir = tempfile.mkdtemp(prefix=tmp_dir), - tumor = "TUMOR", - normal = "NORMAL", - pcr_model = params.common.pcr_model, - sentieon_exec = config["SENTIEON_EXEC"], - sentieon_lic = config["SENTIEON_LICENSE"], - case_name = config["analysis"]["case_id"] - threads: - get_threads(cluster_config, 'sentieon_TNhaplotyper') - message: - "Calling SNV variants using Sentieon TNhaplotyper for sample {params.case_name}" - shell: - """ -mkdir -p {params.tmpdir}; -export TMPDIR={params.tmpdir}; -export SENTIEON_TMPDIR={params.tmpdir}; -export SENTIEON_LICENSE={params.sentieon_lic}; - -{params.sentieon_exec} driver \ --r {input.ref} \ --t {threads} \ --i {input.bam} \ ---algo TNhaplotyper \ ---tumor_sample {params.tumor} \ ---normal_sample {params.normal} \ ---pcr_indel_mode {params.pcr_model} \ ---dbsnp {input.dbsnp} {output.vcf}; - -echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap}; -rm -rf {params.tmpdir}; - """ - - rule sentieon_TNscope: input: ref = config["reference"]["reference_genome"], @@ -173,8 +84,8 @@ rule sentieon_TNscope: tumor = "TUMOR", normal = "NORMAL", pcr_model = params.common.pcr_model, - tumor_options = VARCALL_PARAMS["tnscope"]["tumor"], - normal_options = VARCALL_PARAMS["tnscope"]["normal"], + tumor_options = VARCALL_PARAMS["tnscope"]["tumor"], + normal_options = VARCALL_PARAMS["tnscope"]["normal"], sentieon_ml_tnscope = config["SENTIEON_TNSCOPE"], sentieon_exec = config["SENTIEON_EXEC"], sentieon_lic = config["SENTIEON_LICENSE"], From c2ef5927fd867ef08b922288d1ecd91c30d084e5 Mon Sep 17 00:00:00 2001 From: Vadym Ivanchuk Date: Tue, 30 Aug 2022 15:06:55 +0200 Subject: [PATCH 4/4] typo --- BALSAMIC/workflows/balsamic.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BALSAMIC/workflows/balsamic.smk b/BALSAMIC/workflows/balsamic.smk index b8a84ed4c..1cb3682fe 100644 --- a/BALSAMIC/workflows/balsamic.smk +++ b/BALSAMIC/workflows/balsamic.smk @@ -219,7 +219,7 @@ for ws in ["BALSAMIC","Sentieon","Sentieon_umi"]: somatic_caller_tmb += somatic_caller_snv -# Remove TNhaplotyer for WGS-TN analysis +# Remove TNhaplotyper for WGS-TN analyses if config["analysis"]["sequencing_type"] == "wgs" and config['analysis']['analysis_type'] == "paired": somatic_caller.remove("tnhaplotyper") somatic_caller_tmb.remove("tnhaplotyper")