From 375d13954db7edb23600c76c466e3110959fd633 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Wed, 26 Feb 2020 18:09:41 +0100 Subject: [PATCH 01/11] fix issue with channel ch_software_versions_yaml --- main.nf | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index d3f930be4a..973cacb35e 100644 --- a/main.nf +++ b/main.nf @@ -622,7 +622,7 @@ process Get_software_versions { """ } -yamlSoftwareVersion = yamlSoftwareVersion.dump(tag:'SOFTWARE VERSIONS') +ch_software_versions_yaml = ch_software_versions_yaml.dump(tag:'SOFTWARE VERSIONS') /* ================================================================================ @@ -1377,7 +1377,12 @@ process GatherBQSRReports { tag {idPatient + "-" + idSample} - publishDir "${params.outdir}/Preprocessing/${idSample}/DuplicateMarked", mode: params.publish_dir_mode, overwrite: false + publishDir "${params.outdir}", mode: params.publish_dir_mode, overwrite: false, + saveAs: { + if (it == "${idSample}.recal.table" && 'baserecalibrator' in skipQC) null + else if (it == "${idSample}.recal.table") "Reports/${idSample}/BaseRecalibrator/${it}" + else "Preprocessing/${idSample}/DuplicateMarked/${it}" + } input: set idPatient, idSample, file(recal) from tableGatherBQSRReports @@ -1399,6 +1404,8 @@ process GatherBQSRReports { """ } +if ('baserecalibrator' in skipQC) baseRecalibratorReport.close() + recalTable = recalTable.dump(tag:'RECAL TABLE') (recalTableTSV, recalTableSampleTSV) = recalTableTSV.mix(recalTableTSVnoInt).into(2) @@ -3541,6 +3548,7 @@ def defineAnnoList() { def defineSkipQClist() { return [ 'bamqc', + 'baserecalibrator', 'bcftools', 'fastqc', 'markduplicates', From 2a6853d77efc708cc5c6fb4dbaa48fbf48788c97 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Wed, 26 Feb 2020 18:12:47 +0100 Subject: [PATCH 02/11] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2823a1c30e..fcdb05c361 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - [#110](https://github.com/nf-core/sarek/pull/110) - Fix `snpEff` report issue cf [#106](https://github.com/nf-core/sarek/issues/106) - [#126](https://github.com/nf-core/sarek/pull/126) - Fix `iGenomes` paths - [#127](https://github.com/nf-core/sarek/pull/127), [#128](https://github.com/nf-core/sarek/pull/128) - Fix `ASCAT` +- [#129](https://github.com/nf-core/sarek/pull/129)- Fix issue with Channel `channel ch_software_versions_yaml` ### `Deprecated` From 39f01cdee345fcea442467ae3769e18e1cb126d8 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 27 Feb 2020 09:21:16 +0100 Subject: [PATCH 03/11] use @drpatelh fix --- bin/markdown_to_html.py | 3 ++- environment.yml | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/markdown_to_html.py b/bin/markdown_to_html.py index 57cc4263fe..690b5d9602 100755 --- a/bin/markdown_to_html.py +++ b/bin/markdown_to_html.py @@ -4,9 +4,10 @@ import markdown import os import sys +import io def convert_markdown(in_fn): - input_md = open(in_fn, mode="r", encoding="utf-8").read() + input_md = io.open(in_fn, mode="r", encoding='utf-8').read() html = markdown.markdown( "[TOC]\n" + input_md, extensions = [ diff --git a/environment.yml b/environment.yml index a268548910..4d4851d98f 100644 --- a/environment.yml +++ b/environment.yml @@ -6,7 +6,6 @@ channels: - bioconda - defaults dependencies: - - conda-forge::python=3.7.3 - conda-forge::markdown=3.1.1 - conda-forge::pymdown-extensions=6.0 - conda-forge::pygments=2.5.2 From ef4c8bd4bee609612477c19b6904907abdccd7ef Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 27 Feb 2020 09:37:52 +0100 Subject: [PATCH 04/11] change :warning: to ascii style emoji --- docs/output.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/output.md b/docs/output.md index a915674114..c30ce3718d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -119,7 +119,7 @@ For all samples: - `duplicateMarked_[SAMPLE].tsv` and `recalibrated_[SAMPLE].tsv` - TSV files to start Sarek from `recalibration` or `variantcalling` steps for a specific sample. -> :warning: Only with [`--sentieon`](usage.md#--sentieon) +> `/!\` Only with [`--sentieon`](usage.md#--sentieon) For all samples: **Output directory: `results/Preprocessing/TSV`** @@ -242,7 +242,7 @@ Using [Strelka Best Practices](https://github.com/Illumina/strelka/blob/v2.9.x/d #### Sentieon DNAseq -> :warning: Only with [`--sentieon`](usage.md#--sentieon) +> `/!\` Only with [`--sentieon`](usage.md#--sentieon) [Sentieon DNAseq](https://www.sentieon.com/products/#dnaseq) implements the same mathematics used in the Broad Institute’s BWA-GATK HaplotypeCaller 3.3-4.1 Best Practices Workflow pipeline. @@ -256,7 +256,7 @@ For all samples: #### Sentieon DNAscope -> :warning: Only with [`--sentieon`](usage.md#--sentieon) +> `/!\` Only with [`--sentieon`](usage.md#--sentieon) [Sentieon DNAscope](https://www.sentieon.com/products) calls SNPs and small indels. @@ -270,7 +270,7 @@ For all samples: #### Sentieon TNscope -> :warning: Only with [`--sentieon`](usage.md#--sentieon) +> `/!\` Only with [`--sentieon`](usage.md#--sentieon) [Sentieon TNscope](https://www.sentieon.com/products/#tnscope) calls SNPs and small indels on an Tumor/Normal pair. @@ -349,7 +349,7 @@ For all samples: #### Sentieon DNAscope SV -> :warning: Only with [`--sentieon`](usage.md#--sentieon) +> `/!\` Only with [`--sentieon`](usage.md#--sentieon) [Sentieon DNAscope](https://www.sentieon.com/products) can perform structural variant calling in addition to calling SNPs and small indels. From de5439f147ac5595ec5509358fb821aba673eea7 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 27 Feb 2020 09:45:54 +0100 Subject: [PATCH 05/11] =?UTF-8?q?replace=20=E2=80=99=20by=20'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index c30ce3718d..174239d791 100644 --- a/docs/output.md +++ b/docs/output.md @@ -244,7 +244,7 @@ Using [Strelka Best Practices](https://github.com/Illumina/strelka/blob/v2.9.x/d > `/!\` Only with [`--sentieon`](usage.md#--sentieon) -[Sentieon DNAseq](https://www.sentieon.com/products/#dnaseq) implements the same mathematics used in the Broad Institute’s BWA-GATK HaplotypeCaller 3.3-4.1 Best Practices Workflow pipeline. +[Sentieon DNAseq](https://www.sentieon.com/products/#dnaseq) implements the same mathematics used in the Broad Institute's BWA-GATK HaplotypeCaller 3.3-4.1 Best Practices Workflow pipeline. For further reading and documentation see the [Sentieon DNAseq user guide](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/). From ca4840db99372a9cf6925fbd7e2d94d910ea6447 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 27 Feb 2020 09:49:35 +0100 Subject: [PATCH 06/11] =?UTF-8?q?=C3=AF=20->=20i?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 174239d791..493957cf9f 100644 --- a/docs/output.md +++ b/docs/output.md @@ -339,7 +339,7 @@ For all samples: - `TIDDIT_[SAMPLE].signals.tab` - tab file describing coverage across the genome, binned per 50 bp - `TIDDIT_[SAMPLE].ploidy.tab` - - tab file describing the estimated ploïdy and coverage across each contig + - tab file describing the estimated ploidy and coverage across each contig - `TIDDIT_[SAMPLE].old.vcf` - VCF including the low qualiy calls - `TIDDIT_[SAMPLE].wig` From ef255feba7542399d001916807e15db8b0960f4d Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 27 Feb 2020 09:56:11 +0100 Subject: [PATCH 07/11] update CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fcdb05c361..b2fc20aeeb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - [#126](https://github.com/nf-core/sarek/pull/126) - Fix `iGenomes` paths - [#127](https://github.com/nf-core/sarek/pull/127), [#128](https://github.com/nf-core/sarek/pull/128) - Fix `ASCAT` - [#129](https://github.com/nf-core/sarek/pull/129)- Fix issue with Channel `channel ch_software_versions_yaml` +- [#129](https://github.com/nf-core/sarek/pull/129)- Apply @drpatelh fix for `mardown_to_html.py` compatibility with Python 2 +- [#129](https://github.com/nf-core/sarek/pull/129)- Removed `Python` `3.7.3` from conda environment due to incompatibility +- [#129](https://github.com/nf-core/sarek/pull/129)- Change ascii characters that were not supported from the `output.md` docs ### `Deprecated` From d562a2a4a2c6a68aa36ae071ee30eee09e8d953b Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 27 Feb 2020 10:05:13 +0100 Subject: [PATCH 08/11] fix GatherBQSRReports output --- conf/base.config | 2 +- main.nf | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/conf/base.config b/conf/base.config index ee3bfa7604..929158315c 100644 --- a/conf/base.config +++ b/conf/base.config @@ -48,7 +48,7 @@ process { memory = {params.max_memory} } - withName:get_software_versions { + withName:Get_software_versions { cache = false } diff --git a/main.nf b/main.nf index 973cacb35e..e9323396d3 100644 --- a/main.nf +++ b/main.nf @@ -1377,12 +1377,7 @@ process GatherBQSRReports { tag {idPatient + "-" + idSample} - publishDir "${params.outdir}", mode: params.publish_dir_mode, overwrite: false, - saveAs: { - if (it == "${idSample}.recal.table" && 'baserecalibrator' in skipQC) null - else if (it == "${idSample}.recal.table") "Reports/${idSample}/BaseRecalibrator/${it}" - else "Preprocessing/${idSample}/DuplicateMarked/${it}" - } + publishDir "${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/", mode: params.publish_dir_mode, overwrite: false input: set idPatient, idSample, file(recal) from tableGatherBQSRReports From c5745325fb2f06e2f7fa18eb109e6ba9599dddc8 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 27 Feb 2020 10:09:28 +0100 Subject: [PATCH 09/11] code polishing + update docs --- docs/usage.md | 2 +- main.nf | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 9c9dd45a73..c06a875743 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -292,7 +292,7 @@ Use this to disable g.vcf from `HaplotypeCaller`. ### --skip_qc Use this to disable specific QC and Reporting tools. -Available: `all`, `bamQC`, `BCFtools`, `FastQC`, `MultiQC`, `samtools`, `vcftools`, `versions` +Available: `all`, `bamQC`, `BaseRecalibrator`, `BCFtools`, `Documentation`, `FastQC`, `MultiQC`, `samtools`, `vcftools`, `versions` Default: `None` ### --skipQC diff --git a/main.nf b/main.nf index e9323396d3..19fc959cc1 100644 --- a/main.nf +++ b/main.nf @@ -30,14 +30,14 @@ def helpMessage() { nextflow run nf-core/sarek --input sample.tsv -profile docker Mandatory arguments: - --input [file] Path to input TSV file on mapping, recalibrate and variantcalling steps - Multiple TSV files can be specified with quotes - Works also with the path to a directory on mapping step with a single germline sample only - Alternatively, path to VCF input file on annotate step - Multiple VCF files can be specified with quotes - -profile [str] Configuration profile to use - Can use multiple (comma separated) - Available: conda, docker, singularity, test and more + --input [file] Path to input TSV file on mapping, recalibrate and variantcalling steps + Multiple TSV files can be specified with quotes + Works also with the path to a directory on mapping step with a single germline sample only + Alternatively, path to VCF input file on annotate step + Multiple VCF files can be specified with quotes + -profile [str] Configuration profile to use + Can use multiple (comma separated) + Available: conda, docker, singularity, test and more --genome [str] Name of iGenomes reference --step [str] Specify starting step Available: Mapping, Recalibrate, VariantCalling, Annotate @@ -57,7 +57,7 @@ def helpMessage() { snpEff, VEP, merge Default: None --skip_qc [str] Specify which QC tools to skip when running Sarek - Available: all, bamQC, BCFtools, FastQC, MultiQC, samtools, vcftools, versions + Available: all, bamQC, BaseRecalibrator, BCFtools, Documentation, FastQC, MultiQC, samtools, vcftools, versions Default: None --annotate_tools [str] Specify from which tools Sarek will look for VCF files to annotate, only for step annotate Available: HaplotypeCaller, Manta, Mutect2, Strelka, TIDDIT @@ -3311,6 +3311,8 @@ process Output_documentation { output: file "results_description.html" + when: !('documentation' in skipQC) + script: """ markdown_to_html.py $output_docs -o results_description.html @@ -3545,6 +3547,7 @@ def defineSkipQClist() { 'bamqc', 'baserecalibrator', 'bcftools', + 'documentation', 'fastqc', 'markduplicates', 'multiqc', From 6ff582cf34591a16c4a574814d88aeaa96fb62fb Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 27 Feb 2020 10:11:33 +0100 Subject: [PATCH 10/11] code polishing --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 19fc959cc1..4d0507bd56 100644 --- a/main.nf +++ b/main.nf @@ -1377,7 +1377,7 @@ process GatherBQSRReports { tag {idPatient + "-" + idSample} - publishDir "${params.outdir}/Preprocessing/${idSample}/DuplicateMarked/", mode: params.publish_dir_mode, overwrite: false + publishDir "${params.outdir}/Preprocessing/${idSample}/DuplicateMarked", mode: params.publish_dir_mode, overwrite: false input: set idPatient, idSample, file(recal) from tableGatherBQSRReports From 3bd4729d3136c780185541e37cc91fd495753315 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 27 Feb 2020 10:36:53 +0100 Subject: [PATCH 11/11] code polishing --- main.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/main.nf b/main.nf index 4d0507bd56..708b2cce92 100644 --- a/main.nf +++ b/main.nf @@ -3350,8 +3350,6 @@ workflow.onComplete { email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - // TODO nf-core: If not using MultiQC, strip out this code (including params.max_multiqc_email_size) - // On success try attach the multiqc report def mqc_report = null try { if (workflow.success) {