diff --git a/BALSAMIC/assets/scripts/create_pdf.py b/BALSAMIC/assets/scripts/create_pdf.py new file mode 100644 index 000000000..1a1b10313 --- /dev/null +++ b/BALSAMIC/assets/scripts/create_pdf.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +import os +import click + +from fpdf import FPDF +from PIL import Image + + +@click.command( + short_help="Merge images and a txt file into a single PDF", +) +@click.argument("output", type=click.Path(exists=False), required=True) +@click.argument("data", type=click.Path(exists=True), required=True) +@click.argument("images", nargs=-1, type=click.Path(exists=True), required=True) +def create_pdf(output, data, images): + pdf = generate_fpdf() + pdf = add_table_pdf(pdf, data) + pdf = add_images_pdf(pdf, images) + pdf.output(output) + + +class PDF(FPDF): + def footer(self): + self.set_y(-15) + self.set_font("helvetica", "I", 8) + self.cell(0, 10, f"Page {self.page_no()}/{{nb}}", 0, 0, "C") + + +def generate_fpdf(): + pdf = PDF() + pdf.alias_nb_pages(alias="{nb}") + return pdf + + +def add_images_pdf(pdf, img_paths): + pdf.set_font("helvetica", "B", 15) + + for path in img_paths: + title = os.path.basename(path).replace(".png", "") + + # Image & page layout parameters + if "sunrise" in title: + page_orientation = "portrait" + img_size = 500, 500 + title_w_pos = 25 + title_wh = 140, 10 + img_xy = 10, 55 + else: + page_orientation = "landscape" + img_size = 800, 800 + title_w_pos = 68.5 + title_wh = 140, 10 + img_xy = 5, 40 + + pdf.add_page(orientation=page_orientation) + + # Title position & styling + pdf.cell(title_w_pos) + pdf.cell(title_wh[0], title_wh[1], title, 1, 0, "C") + + # Image position & resizing + img = Image.open(path) + img.thumbnail(img_size, Image.ANTIALIAS) + pdf.image(img, img_xy[0], img_xy[1]) + + return pdf + + +def add_table_pdf(pdf, data_path): + + with open(data_path) as data: + data = data.readlines() + + pdf.add_page() + pdf.set_font("helvetica", "B", 15) + + # Title layout & styling + title = os.path.basename(data_path).replace(".txt", "") + pdf.cell(25) + pdf.cell(140, 10, title, 1, 0, "C") + pdf.cell(35, 25, ln=1) # Post title indentation + + # Table layout & styling + pdf.set_font("Times", size=11) + line_height = pdf.font_size * 2.5 + col_width = pdf.epw / 4 # Even distribution of the content + for row in data: + pdf.cell(45) + for statistic in row.split(): + pdf.multi_cell( + col_width, + line_height, + statistic, + align="C", + border=1, + ln=3, + max_line_height=pdf.font_size, + ) + pdf.ln(line_height) + + return pdf + + +if __name__ == "__main__": + create_pdf() diff --git a/BALSAMIC/config/cluster.json b/BALSAMIC/config/cluster.json index 15f466b6f..e468df258 100644 --- a/BALSAMIC/config/cluster.json +++ b/BALSAMIC/config/cluster.json @@ -251,5 +251,9 @@ "ascat_tumor_normal": { "time": "8:00:00", "n": 36 + }, + "ascat_tumor_normal_merge_output": { + "time": "00:15:00", + "n": 1 } } diff --git a/BALSAMIC/constants/workflow_params.py b/BALSAMIC/constants/workflow_params.py index 129e133ad..44d45e30b 100644 --- a/BALSAMIC/constants/workflow_params.py +++ b/BALSAMIC/constants/workflow_params.py @@ -118,7 +118,7 @@ "column_info": "-c 1 -S 2 -E 3 -g 4", }, "vep": { - "vep_filters": "--compress_output bgzip --vcf --everything --allow_non_variant --dont_skip --buffer_size 20000 --format vcf --offline --variant_class --merged --cache --verbose --force_overwrite" + "vep_filters": "--compress_output bgzip --vcf --everything --hgvsg --allow_non_variant --dont_skip --buffer_size 20000 --format vcf --offline --variant_class --merged --cache --verbose --force_overwrite" }, "umicommon": { "align_header": "'@RG\\tID:{sample}\\tSM:{sample}\\tLB:TargetPanel\\tPL:ILLUMINA'", diff --git a/BALSAMIC/constants/workflow_rules.py b/BALSAMIC/constants/workflow_rules.py index dfcfb9035..064d87501 100644 --- a/BALSAMIC/constants/workflow_rules.py +++ b/BALSAMIC/constants/workflow_rules.py @@ -136,5 +136,5 @@ "mergeBam_normal", "cnvkit_paired", "cnvkit_single", - "ascat_tumor_normal", + "ascat_tumor_normal_merge_output", ] diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule index de29f6f7e..432c523cd 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal.rule @@ -125,21 +125,18 @@ rule ascat_tumor_normal: gccorrection = config["reference"]["ascat_gccorrection"], chryloci= config["reference"]["ascat_chryloci"], output: - finalvcf = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.vcf.gz"), - samplestatistics = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.samplestatistics.txt", - germlineplot = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.germline.png", - rawprofileplot = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.rawprofile.png", - sunriseplot = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.sunrise.png", - tumorplot = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.tumor.png", - aspcfplot = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.ASPCF.png", - ascatprofileplot = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.ascatprofile.png", - namemap = temp(vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.sample_name_map"), + final_vcf = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.vcf.gz", + sample_statistics = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.samplestatistics.txt", + ascat_plots= expand( + vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat." + "{output_suffix}" + ".png", + output_suffix=["ascatprofile", "rawprofile", "ASPCF", "tumor", "germline", "sunrise"] + ), + namemap = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.sample_name_map", benchmark: benchmark_dir + 'ascat_tumor_normal_' + config["analysis"]["case_id"] + "_ascat.tsv" singularity: Path(singularity_image, config["bioinfo_tools"].get("ascatNgs") + ".sif").as_posix() params: - housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "ascat-ngs"}, tmpdir = tempfile.mkdtemp(prefix=tmp_dir), tumor = "TUMOR", normal = "NORMAL", @@ -165,25 +162,46 @@ ascat.pl \ -tumour {input.bamT} \ -normal {input.bamN}; -cp {params.tmpdir}/{params.tumor}.copynumber.caveman.vcf.gz {output.finalvcf}; +cp {params.tmpdir}/{params.tumor}.copynumber.caveman.vcf.gz {output.final_vcf}; -cp {params.tmpdir}/{params.tumor}.samplestatistics.txt {output.samplestatistics}; +cp {params.tmpdir}/{params.tumor}.samplestatistics.txt {output.sample_statistics}; -cp {params.tmpdir}/{params.tumor}.germline.png {output.germlineplot}; +cp {params.tmpdir}/{params.tumor}.ASCATprofile.png {output.ascat_plots[0]}; -cp {params.tmpdir}/{params.tumor}.rawprofile.png {output.rawprofileplot}; +cp {params.tmpdir}/{params.tumor}.rawprofile.png {output.ascat_plots[1]}; -cp {params.tmpdir}/{params.tumor}.sunrise.png {output.sunriseplot}; +cp {params.tmpdir}/{params.tumor}.ASPCF.png {output.ascat_plots[2]}; -cp {params.tmpdir}/{params.tumor}.tumour.png {output.tumorplot}; +cp {params.tmpdir}/{params.tumor}.tumour.png {output.ascat_plots[3]}; -cp {params.tmpdir}/{params.tumor}.ASCATprofile.png {output.ascatprofileplot}; +cp {params.tmpdir}/{params.tumor}.germline.png {output.ascat_plots[4]}; -cp {params.tmpdir}/{params.tumor}.ASPCF.png {output.aspcfplot}; +cp {params.tmpdir}/{params.tumor}.sunrise.png {output.ascat_plots[5]}; -tabix -p vcf -f {output.finalvcf}; +tabix -p vcf -f {output.final_vcf}; echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap}; rm -rf {params.tmpdir}; """ + +rule ascat_tumor_normal_merge_output: + input: + sample_statistics = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.samplestatistics.txt", + ascat_plots= expand( + vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat." + "{output_suffix}" + ".png", + output_suffix=["ascatprofile", "rawprofile", "ASPCF", "tumor", "germline", "sunrise"] + ), + output: + ascat_output_pdf = vcf_dir + "CNV.somatic." + config["analysis"]["case_id"] + ".ascat.output.pdf" + params: + housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, + merge_ascat_output_script= get_script_path("create_pdf.py"), + threads: + get_threads(cluster_config, "ascat_tumor_normal_merge_output") + message: + "Merge the ascatNgs output plots together with the sample statistics into a single PDF" + shell: + """ + python {params.merge_ascat_output_script} {output.ascat_output_pdf} {input.sample_statistics} {input.ascat_plots} + """ diff --git a/BALSAMIC/workflows/balsamic.smk b/BALSAMIC/workflows/balsamic.smk index 7df20c012..f15fe175f 100644 --- a/BALSAMIC/workflows/balsamic.smk +++ b/BALSAMIC/workflows/balsamic.smk @@ -241,6 +241,9 @@ if config["analysis"]["sequencing_type"] == "wgs" and config['analysis']['analys analysis_specific_results.extend([Path(result_dir, "dragen", "SNV.somatic." + config["analysis"]["case_id"] + ".dragen_tumor.bam").as_posix(), Path(result_dir, "dragen", "SNV.somatic." + config["analysis"]["case_id"] + ".dragen.vcf.gz").as_posix()]) +if config["analysis"]["sequencing_type"] == "wgs" and config['analysis']['analysis_type'] == "paired": + analysis_specific_results.append(expand(vcf_dir + "{vcf}.output.pdf", vcf=get_vcf(config, ["ascat"], [config["analysis"]["case_id"]]))) + if 'benchmark_plots' in config: log_dir = config["analysis"]["log"] if not check_executable("sh5util"): diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 8ed66fd1b..6a5a315f8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,12 @@ +[8.2.4] +------- + +Added: +^^^^^^ + +* ``--hgvsg`` annotation to VEP #830 +* ``ascatNgs`` PDF delivery (plots & statistics) #828 + [8.2.3] ------- Fixed: diff --git a/setup.py b/setup.py index eaef3709e..c78b7391a 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ "colorclass>=2.2.0", "coloredlogs>=14.0", "cyvcf2<0.10.0", + "fpdf2>=2.4.6", "graphviz==0.16", "gsutil==4.50", "jinja2>=2.11.2", @@ -26,6 +27,7 @@ "networkx>=2.4", "numpy>=1.19.2", "pandas>1.1.0", + "pillow>=8.4.0", "psutil>=5.7.0", "pydantic>=1.5.1", "pygments>=2.6.1", diff --git a/tests/scripts/test_create_pdf.py b/tests/scripts/test_create_pdf.py new file mode 100644 index 000000000..849232bc9 --- /dev/null +++ b/tests/scripts/test_create_pdf.py @@ -0,0 +1,74 @@ +from pathlib import Path + +from fpdf import FPDF + +from BALSAMIC.assets.scripts.create_pdf import ( + generate_fpdf, + add_images_pdf, + add_table_pdf, + create_pdf, +) + + +def test_generate_fpdf(): + # WHEN creating a dummy FPDF file + pdf = generate_fpdf() + + # THEN check if the pdf has been correctly created + assert isinstance(pdf, FPDF) + + +def test_add_images_pdf(): + # GIVEN ascatNGgs output PNG images + test_images_path = [ + "tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.sunrise.png", + "tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.germline.png", + ] + + # WHEN calling the function + pdf = add_images_pdf(generate_fpdf(), test_images_path) + + # THEN check if the images are appended to the PDF + assert isinstance(pdf, FPDF) + assert pdf.page_no() == 2 + + +def test_add_table_pdf(): + # GIVEN ascatNGgs output sample statistics .txt + test_statistics_path = ( + "tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.samplestatistics.txt" + ) + + # WHEN calling the function + pdf = add_table_pdf(generate_fpdf(), test_statistics_path) + + # THEN check if the table is appended to the created PDF + assert isinstance(pdf, FPDF) + assert pdf.page_no() == 1 + + +def test_create_pdf(tmp_path, cli_runner): + # GIVEN ascatNGgs output statistics + statistics_path = ( + "tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.samplestatistics.txt" + ) + + # GIVEN ascatNGgs output plots + plots_path = [ + "tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.germline.png", + "tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.sunrise.png", + ] + + # GIVEN the output path + output_path = tmp_path / "ascat.output.pdf" + + print(output_path) + + # WHEN invoking the python script + result = cli_runner.invoke( + create_pdf, [str(output_path), statistics_path, plots_path[0], plots_path[1]] + ) + + # THEN check if the PDF is correctly created and there is no errors + assert result.exit_code == 0 + assert Path(output_path).exists() diff --git a/tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.germline.png b/tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.germline.png new file mode 100644 index 000000000..8d956976d Binary files /dev/null and b/tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.germline.png differ diff --git a/tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.samplestatistics.txt b/tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.samplestatistics.txt new file mode 100644 index 000000000..d681e2968 --- /dev/null +++ b/tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.samplestatistics.txt @@ -0,0 +1,7 @@ +NormalContamination 0.378222936036507 +Ploidy 2.69008904657384 +rho 0.55 +psi 2.75 +goodnessOfFit 93.9311185291303 +GenderChr Y +GenderChrFound N diff --git a/tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.sunrise.png b/tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.sunrise.png new file mode 100644 index 000000000..e232162e9 Binary files /dev/null and b/tests/test_data/ascat_output/CNV.somatic.SAMPLE.ascat.sunrise.png differ