From 87df112641f1578b4b2e8c9184fee3fce61fb74d Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 16 Jul 2020 15:57:44 +0200 Subject: [PATCH 1/9] update Nextflow version to 20.06.0-edge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bb70fe3771..353d9e3194 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ > **An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing** -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.04.1-brightgreen.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.06.0--edge-brightgreen.svg)](https://www.nextflow.io/) [![nf-core](https://img.shields.io/badge/nf--core-pipeline-brightgreen.svg)](https://nf-co.re/) [![DOI](https://zenodo.org/badge/184289291.svg)](https://zenodo.org/badge/latestdoi/184289291) From 5d53898af76073861b019a59c67e4ed1d1b8660a Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 16 Jul 2020 16:05:40 +0200 Subject: [PATCH 2/9] skip_qc is back + collect() too as it was actually useful --- main.nf | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/main.nf b/main.nf index 75240b1fa6..0c704af8fb 100644 --- a/main.nf +++ b/main.nf @@ -38,6 +38,7 @@ if (params.help) { include { check_parameter_existence; check_parameter_list; + define_skip_qc_list; define_step_list; define_tool_list; extract_bam; @@ -86,9 +87,9 @@ tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase().r if (step == 'controlfreec') tools = ['controlfreec'] if (!check_parameter_list(tools, tool_list)) exit 1, 'Unknown tool(s), see --help for more information' -// skip_qc_list = define_skip_qc_list() -// skip_qc = params.skip_qc ? params.skip_qc == 'all' ? skip_qc_list : params.skip_qc.split(',').collect{it.trim().toLowerCase().replaceAll('-', '').replaceAll('_', '')} : [] -// if (!check_parameter_list(skip_qc, skip_qc_list)) exit 1, 'Unknown QC tool(s), see --help for more information' +skip_qc_list = define_skip_qc_list() +skip_qc = params.skip_qc ? params.skip_qc == 'all' ? skip_qc_list : params.skip_qc.split(',').collect{it.trim().toLowerCase().replaceAll('-', '').replaceAll('_', '')} : [] +if (!check_parameter_list(skip_qc, skip_qc_list)) exit 1, 'Unknown QC tool(s), see --help for more information' // anno_list = define_anno_list() // annotate_tools = params.annotate_tools ? params.annotate_tools.split(',').collect{it.trim().toLowerCase().replaceAll('-', '')} : [] @@ -362,8 +363,10 @@ workflow { intervals_bed = Channel.from(file("${params.outdir}/no_intervals.bed")) } - // if(!('fastqc' in skip_qc)) - FASTQC(input_sample) + if(!('fastqc' in skip_qc)) + result_fastqc = FASTQC(input_sample) + else + result_fastqc = Channel.empty() if (params.trim_fastq) { TRIM_GALORE(input_sample) @@ -382,11 +385,11 @@ workflow { GET_SOFTWARE_VERSIONS() MULTIQC( - FASTQC.out.ifEmpty([]), + result_fastqc.collect().ifEmpty([]), multiqc_config, multiqc_custom_config.ifEmpty([]), GET_SOFTWARE_VERSIONS.out.yml, - result_trim_galore.ifEmpty([]), + result_trim_galore.collect().ifEmpty([]), workflow_summary) } From 1756f8105985e39a249293c4bca04a73ec95d2d2 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 16 Jul 2020 16:22:32 +0200 Subject: [PATCH 3/9] fix bwa-mem2 version --- bin/scrape_software_versions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 2d878f7f72..5f42871207 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -7,7 +7,7 @@ 'AlleleCount': ['v_allelecount.txt', r"(\S+)"], 'ASCAT': ['v_ascat.txt', r"Version: (\S+)"], 'bcftools': ['v_bcftools.txt', r"bcftools (\S+)"], - 'BWAMEM2': ['v_bwamem2.txt', r"Version: (\S+)"], + 'BWA-MEM2': ['v_bwamem2.txt', r"(\S+)"], 'CNVkit': ['v_cnvkit.txt', r"(\S+)"], 'Control-FREEC': ['v_controlfreec.txt', r"Control-FREEC\s(\S+)"], 'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"], From be8b3d2e7321b86b9fccfd49e02ecec3ac97d166 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 16 Jul 2020 16:22:59 +0200 Subject: [PATCH 4/9] remove unused params(params) and addParams(params) --- main.nf | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/main.nf b/main.nf index 0c704af8fb..95ee1168f4 100644 --- a/main.nf +++ b/main.nf @@ -253,10 +253,18 @@ if (params.sentieon) log.warn "[nf-core/sarek] Sentieon will be used, only works ================================================================================ */ -include { BWAMEM2_MEM } from './modules/local/bwamem2_mem.nf' addParams(params) -include { GET_SOFTWARE_VERSIONS } from './modules/local/get_software_versions' params(params) -include { OUTPUT_DOCUMENTATION } from './modules/local/output_documentation' params(params) -include { TRIM_GALORE } from './modules/local/trim_galore.nf' addParams(params) +include { BWAMEM2_MEM } from './modules/local/bwamem2_mem.nf' +include { GET_SOFTWARE_VERSIONS } from './modules/local/get_software_versions' +include { OUTPUT_DOCUMENTATION } from './modules/local/output_documentation' +include { TRIM_GALORE } from './modules/local/trim_galore.nf' + +/* +================================================================================ + INCLUDE LOCAL PIPELINE SUBWORKFLOWS +================================================================================ +*/ + +include { BUILD_INDICES } from './modules/subworkflows/build_indices' /* ================================================================================ @@ -264,8 +272,8 @@ include { TRIM_GALORE } from './modules/local/trim_galore.nf' addParam ================================================================================ */ -include { FASTQC } from './modules/nf-core/fastqc' params(params) -include { MULTIQC } from './modules/nf-core/multiqc' params(params) +include { FASTQC } from './modules/nf-core/fastqc' +include { MULTIQC } from './modules/nf-core/multiqc' // PREPARING CHANNELS FOR PREPROCESSING AND QC @@ -314,8 +322,6 @@ include { MULTIQC } from './modules/nf-core/multiqc' params(params) ================================================================================ */ -include { BUILD_INDICES } from './modules/subworkflows/build_indices' addParams(params) - workflow { BUILD_INDICES( From f683c50d5d575d0b59687716313f83a06069f90d Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 16 Jul 2020 16:24:46 +0200 Subject: [PATCH 5/9] center headers --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 95ee1168f4..277cba5e4c 100644 --- a/main.nf +++ b/main.nf @@ -249,7 +249,7 @@ if (params.sentieon) log.warn "[nf-core/sarek] Sentieon will be used, only works /* ================================================================================ - INCLUDE LOCAL PIPELINE MODULES + INCLUDE LOCAL PIPELINE MODULES ================================================================================ */ @@ -260,7 +260,7 @@ include { TRIM_GALORE } from './modules/local/trim_galore.nf' /* ================================================================================ - INCLUDE LOCAL PIPELINE SUBWORKFLOWS + INCLUDE LOCAL PIPELINE SUBWORKFLOWS ================================================================================ */ From 5d8fc2b55c0bf36e4ce757816b2a28226b7d5de7 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 16 Jul 2020 17:07:43 +0200 Subject: [PATCH 6/9] all intervals stuff in out of main.nf --- main.nf | 28 ++------------------------- modules/subworkflows/build_indices.nf | 26 ++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 29 deletions(-) diff --git a/main.nf b/main.nf index 277cba5e4c..116f6d69bb 100644 --- a/main.nf +++ b/main.nf @@ -338,37 +338,13 @@ workflow { dict = params.dict ?: BUILD_INDICES.out.dict fai = params.fasta_fai ? params.fasta_fai : BUILD_INDICES.out.fai germline_resource_tbi = params.germline_resource ? params.germline_resource_index ?: BUILD_INDICES.out.germline_resource_tbi : Channel.empty() - intervals_bed = params.no_intervals ? Channel.empty() : BUILD_INDICES.out.intervals_bed - known_indels_tbi = params.known_indels ? params.known_indels_index ?: BUILD_INDICES.out.known_indels_tbi : Channel.empty() - // known_indels_tbi = params.known_indels ? params.known_indels_index ?: BUILD_INDICES.out.known_indels_tbi.collect() : Channel.empty() + intervals_bed = BUILD_INDICES.out.intervals_bed + known_indels_tbi = params.known_indels ? params.known_indels_index ?: BUILD_INDICES.out.known_indels_tbi.collect() : Channel.empty() pon_tbi = params.pon ? params.pon_index ?: BUILD_INDICES.out.pon_tbi : Channel.empty() // PREPROCESSING - - // BED INTERVAL CHANNEL TRANSFORMING - intervals_bed.flatten() - .map { intervalFile -> - def duration = 0.0 - for (line in intervalFile.readLines()) { - final fields = line.split('\t') - if (fields.size() >= 5) duration += fields[4].toFloat() - else { - start = fields[1].toInteger() - end = fields[2].toInteger() - duration += (end - start) / params.nucleotides_per_second - } - } - [ duration, intervalFile] - }.toSortedList({ a, b -> b[0] <=> a[0] }) - .flatten().collate(2) - .map{duration, intervalFile -> intervalFile} intervals_bed.dump(tag:'bedintervals') - if (params.no_intervals && step != 'annotate') { - file("${params.outdir}/no_intervals.bed").text = "no_intervals\n" - intervals_bed = Channel.from(file("${params.outdir}/no_intervals.bed")) - } - if(!('fastqc' in skip_qc)) result_fastqc = FASTQC(input_sample) else diff --git a/modules/subworkflows/build_indices.nf b/modules/subworkflows/build_indices.nf index 731ce3bfbb..bc0fa1ba51 100644 --- a/modules/subworkflows/build_indices.nf +++ b/modules/subworkflows/build_indices.nf @@ -65,13 +65,33 @@ workflow BUILD_INDICES{ else result_pon_tbi = Channel.empty() - if (!('annotate' in step) && !('controlfreec' in step)) + if (params.no_intervals) { + file("${params.outdir}/no_intervals.bed").text = "no_intervals\n" + result_intervals = Channel.from(file("${params.outdir}/no_intervals.bed")) + } else if (!('annotate' in step) && !('controlfreec' in step)) if (!params.intervals) result_intervals = CREATE_INTERVALS_BED(BUILD_INTERVALS(SAMTOOLS_FAIDX.out)) else result_intervals = CREATE_INTERVALS_BED(params.intervals) - else - result_intervals = Channel.empty() + + if (!params.no_intervals) { + result_intervals.flatten() + .map { intervalFile -> + def duration = 0.0 + for (line in intervalFile.readLines()) { + final fields = line.split('\t') + if (fields.size() >= 5) duration += fields[4].toFloat() + else { + start = fields[1].toInteger() + end = fields[2].toInteger() + duration += (end - start) / params.nucleotides_per_second + } + } + [duration, intervalFile] + }.toSortedList({ a, b -> b[0] <=> a[0] }) + .flatten().collate(2) + .map{duration, intervalFile -> intervalFile} + } emit: bwa = result_bwa From b8df2ddf63796da91ccc3a55c8cce28134441d99 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 16 Jul 2020 18:58:15 +0200 Subject: [PATCH 7/9] restore multiple channels for bedintervals --- modules/subworkflows/build_indices.nf | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/modules/subworkflows/build_indices.nf b/modules/subworkflows/build_indices.nf index bc0fa1ba51..20ac7cf005 100644 --- a/modules/subworkflows/build_indices.nf +++ b/modules/subworkflows/build_indices.nf @@ -70,12 +70,12 @@ workflow BUILD_INDICES{ result_intervals = Channel.from(file("${params.outdir}/no_intervals.bed")) } else if (!('annotate' in step) && !('controlfreec' in step)) if (!params.intervals) - result_intervals = CREATE_INTERVALS_BED(BUILD_INTERVALS(SAMTOOLS_FAIDX.out)) + intervals = CREATE_INTERVALS_BED(BUILD_INTERVALS(SAMTOOLS_FAIDX.out)) else - result_intervals = CREATE_INTERVALS_BED(params.intervals) + intervals = CREATE_INTERVALS_BED(params.intervals) if (!params.no_intervals) { - result_intervals.flatten() + intervals.flatten() .map { intervalFile -> def duration = 0.0 for (line in intervalFile.readLines()) { @@ -91,6 +91,11 @@ workflow BUILD_INDICES{ }.toSortedList({ a, b -> b[0] <=> a[0] }) .flatten().collate(2) .map{duration, intervalFile -> intervalFile} + .multiMap{ + all: it + empty: "" + }.set{bed} + result_intervals = bed.all } emit: From 1e15297771664e5829ace89ee7cdcb52ab73ab26 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 16 Jul 2020 19:04:01 +0200 Subject: [PATCH 8/9] code polishing --- modules/subworkflows/build_indices.nf | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/modules/subworkflows/build_indices.nf b/modules/subworkflows/build_indices.nf index 20ac7cf005..588571a4e3 100644 --- a/modules/subworkflows/build_indices.nf +++ b/modules/subworkflows/build_indices.nf @@ -70,12 +70,12 @@ workflow BUILD_INDICES{ result_intervals = Channel.from(file("${params.outdir}/no_intervals.bed")) } else if (!('annotate' in step) && !('controlfreec' in step)) if (!params.intervals) - intervals = CREATE_INTERVALS_BED(BUILD_INTERVALS(SAMTOOLS_FAIDX.out)) + result_intervals = CREATE_INTERVALS_BED(BUILD_INTERVALS(SAMTOOLS_FAIDX.out)) else - intervals = CREATE_INTERVALS_BED(params.intervals) + result_intervals = CREATE_INTERVALS_BED(params.intervals) if (!params.no_intervals) { - intervals.flatten() + result_intervals = result_intervals.flatten() .map { intervalFile -> def duration = 0.0 for (line in intervalFile.readLines()) { @@ -91,11 +91,7 @@ workflow BUILD_INDICES{ }.toSortedList({ a, b -> b[0] <=> a[0] }) .flatten().collate(2) .map{duration, intervalFile -> intervalFile} - .multiMap{ - all: it - empty: "" - }.set{bed} - result_intervals = bed.all + .flatten() } emit: From cfb2ffdfcf6886d5ccb3a3730aa35422bc31c5d7 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 16 Jul 2020 19:04:55 +0200 Subject: [PATCH 9/9] further polishing --- modules/subworkflows/build_indices.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/subworkflows/build_indices.nf b/modules/subworkflows/build_indices.nf index 588571a4e3..f7868fb104 100644 --- a/modules/subworkflows/build_indices.nf +++ b/modules/subworkflows/build_indices.nf @@ -91,7 +91,6 @@ workflow BUILD_INDICES{ }.toSortedList({ a, b -> b[0] <=> a[0] }) .flatten().collate(2) .map{duration, intervalFile -> intervalFile} - .flatten() } emit: