From d248e7ce20ce59bf63ccfd713870d7dbe0498a24 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 9 Aug 2018 14:39:49 +0200 Subject: [PATCH 01/32] First efforts to generate bigWig files --- main.nf | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/main.nf b/main.nf index 9098207b2..5433c37f5 100644 --- a/main.nf +++ b/main.nf @@ -675,7 +675,7 @@ if(params.aligner == 'hisat2'){ file wherearemyfiles output: - file "${hisat2_bam.baseName}.sorted.bam" into bam_count, bam_rseqc, bam_preseq, bam_markduplicates, bam_featurecounts, bam_stringtieFPKM, bam_forSubsamp, bam_skipSubsamp + file "${hisat2_bam.baseName}.sorted.bam" into bam_count, bam_rseqc, bam_preseq, bam_markduplicates, bam_featurecounts, bam_stringtieFPKM, bam_for_genebody, bam_skipSubsamp file "where_are_my_files.txt" script: @@ -748,31 +748,30 @@ process rseqc { """ } + /* - * Step 4.1 Subsample the BAM files if necessary - */ -bam_forSubsamp - .filter { it.size() > params.subsampFilesizeThreshold } - .map { [it, params.subsampFilesizeThreshold / it.size() ] } - .set{ bam_forSubsampFiltered } -bam_skipSubsamp - .filter { it.size() <= params.subsampFilesizeThreshold } - .set{ bam_skipSubsampFiltered } -process bam_subsample { + * Step 4.1 Rseqc create BigWig coverage + */ + +process createBigWig { tag "${bam.baseName - '.sorted'}" + publishDir "${params.outdir}/bigwig", mode: 'copy' - input: - set file(bam), val(fraction) from bam_forSubsampFiltered + when: + !params.skip_qc && !params.skip_genebody_coverage + + input: + file bam from bam_for_genebody output: - file "*_subsamp.bam" into bam_subsampled + file "*.bigwig" into bigwig_for_genebody script: """ - samtools view -s $fraction -b $bam | samtools sort -o ${bam.baseName}_subsamp.bam + samtools index $bam + bamCoverage -b $bam -p ${task.cpus} -o $bam.bigwig """ } - /* * Step 4.2 Rseqc genebody_coverage */ @@ -791,7 +790,7 @@ process genebody_coverage { !params.skip_qc && !params.skip_genebody_coverage input: - file bam from bam_subsampled.concat(bam_skipSubsampFiltered) + file bigwig from bigwig_for_genebody file bed12 from bed_genebody_coverage.collect() output: @@ -799,9 +798,8 @@ process genebody_coverage { script: """ - samtools index $bam - geneBody_coverage.py \\ - -i $bam \\ + geneBody_coverage2.py \\ + -i $bigwig \\ -o ${bam.baseName}.rseqc \\ -r $bed12 mv log.txt ${bam.baseName}.rseqc.log.txt From 9fbefcba560e1340814ad42e04b8637478c448bf Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 9 Aug 2018 14:42:47 +0200 Subject: [PATCH 02/32] Add deeptools as dependency --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 1f9d5c554..7ec37e097 100644 --- a/environment.yml +++ b/environment.yml @@ -24,3 +24,4 @@ dependencies: - subread=1.6.1 - multiqc=1.5 - gffread=0.9.9 + - deeptools=3.1.1 From 16bab7f0ad4ee23d79e1fa9508d0db2a1b06a86c Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 9 Aug 2018 14:45:51 +0200 Subject: [PATCH 03/32] Fix subsampling steps in RNAseq --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 5433c37f5..517c3a15d 100644 --- a/main.nf +++ b/main.nf @@ -592,7 +592,7 @@ if(params.aligner == 'star'){ star_aligned .filter { logs, bams -> check_log(logs) } .flatMap { logs, bams -> bams } - .into { bam_count; bam_rseqc; bam_preseq; bam_markduplicates; bam_featurecounts; bam_stringtieFPKM; bam_forSubsamp; bam_skipSubsamp } + .into { bam_count; bam_rseqc; bam_preseq; bam_markduplicates; bam_featurecounts; bam_stringtieFPKM; bam_for_genebody } } @@ -675,7 +675,7 @@ if(params.aligner == 'hisat2'){ file wherearemyfiles output: - file "${hisat2_bam.baseName}.sorted.bam" into bam_count, bam_rseqc, bam_preseq, bam_markduplicates, bam_featurecounts, bam_stringtieFPKM, bam_for_genebody, bam_skipSubsamp + file "${hisat2_bam.baseName}.sorted.bam" into bam_count, bam_rseqc, bam_preseq, bam_markduplicates, bam_featurecounts, bam_stringtieFPKM, bam_for_genebody file "where_are_my_files.txt" script: From 88cfb2972cf901fa1bca8b042099a60ed631814c Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 9 Aug 2018 14:46:49 +0200 Subject: [PATCH 04/32] Add createBigWig to base.config --- conf/base.config | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conf/base.config b/conf/base.config index 24fa6dce0..a80593342 100644 --- a/conf/base.config +++ b/conf/base.config @@ -61,6 +61,12 @@ process { time = { check_max( 7.h * task.attempt, 'time' ) } errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } } + withName:createBigWig { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 32.GB * task.attempt, 'memory' ) } + time = { check_max( 7.h * task.attempt, 'time' ) } + errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } + } withName:genebody_coverage { cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 32.GB * task.attempt, 'memory' ) } From 5801cca7ce825ce2c1dacec76ac196a55b3842b6 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 9 Aug 2018 14:48:13 +0200 Subject: [PATCH 05/32] Probably missing collect --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 517c3a15d..2611eea6c 100644 --- a/main.nf +++ b/main.nf @@ -790,7 +790,7 @@ process genebody_coverage { !params.skip_qc && !params.skip_genebody_coverage input: - file bigwig from bigwig_for_genebody + file bigwig from bigwig_for_genebody.collect() file bed12 from bed_genebody_coverage.collect() output: From 36059f9ec72a77e203877c77d6b4df63fa2aabf2 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 9 Aug 2018 15:01:00 +0200 Subject: [PATCH 06/32] Fix suffix --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 2611eea6c..86924526d 100644 --- a/main.nf +++ b/main.nf @@ -769,7 +769,7 @@ process createBigWig { script: """ samtools index $bam - bamCoverage -b $bam -p ${task.cpus} -o $bam.bigwig + bamCoverage -b $bam -p ${task.cpus} -o ${bam.baseName - 'sorted'}.bigwig """ } /* From c1f36aa17c268d3c82a3ccb1a4fd593538628c92 Mon Sep 17 00:00:00 2001 From: jun-wan Date: Fri, 10 Aug 2018 10:37:51 +0200 Subject: [PATCH 07/32] Fix rnaseq pipeline crashes with large input files; Add sequencing center in BAM --- conf/base.config | 18 +++++++++++++++--- main.nf | 10 ++++++++-- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/conf/base.config b/conf/base.config index c239d8972..25482a38f 100644 --- a/conf/base.config +++ b/conf/base.config @@ -41,9 +41,11 @@ process { time = { check_max( 8.h * task.attempt, 'time' ) } } $star { - cpus = { check_max( 10 * task.attempt, 'cpus' ) } + cpus = 10 memory = { check_max( 80.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } + errorStrategy = { task.exitStatus in [1,143,137,104] ? 'retry' : 'terminate' } + maxRetries = 2 } $hisat2Align { cpus = { check_max( 8 * task.attempt, 'cpus' ) } @@ -71,12 +73,22 @@ process { errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } } $markDuplicates { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } + cpus = 2 memory = { check_max( 16.GB * task.attempt, 'memory' ) } + errorStrategy = { task.exitStatus in [1,143,137] ? 'retry' : 'terminate' } + maxRetries = 3 } $dupradar { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } + cpus = 1 + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + errorStrategy = { task.exitStatus in [1,143,137,139] ? 'retry' : 'terminate' } + maxRetries = 3 + } + $featureCounts { + cpus = 2 memory = { check_max( 16.GB * task.attempt, 'memory' ) } + errorStrategy = { task.exitStatus in [1,143,137,134,104] ? 'retry' : 'terminate' } + maxRetries = 3 } $sample_correlation { cpus = { check_max( 2 * task.attempt, 'cpus' ) } diff --git a/main.nf b/main.nf index e47e1d7c0..d3a50aed7 100644 --- a/main.nf +++ b/main.nf @@ -100,6 +100,7 @@ params.hisat2_index = params.genome ? params.genomes[ params.genome ].hisat2 ?: params.multiqc_config = "$baseDir/assets/multiqc_config.yaml" params.email = false params.plaintext_email = false +params.seqCenter = false mdsplot_header = file("$baseDir/assets/mdsplot_header.txt") heatmap_header = file("$baseDir/assets/heatmap_header.txt") @@ -529,6 +530,7 @@ if(params.aligner == 'star'){ script: prefix = reads[0].toString() - ~/(_R1)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ def avail_mem = task.memory == null ? '' : "--limitBAMsortRAM ${task.memory.toBytes() - 100000000}" + RG = params.seqCenter ? "--outSAMattrRGline ID:$prefix 'CN:$params.seqCenter'" : '' """ STAR --genomeDir $index \\ --sjdbGTFfile $gtf \\ @@ -539,7 +541,8 @@ if(params.aligner == 'star'){ --outSAMtype BAM SortedByCoordinate $avail_mem \\ --readFilesCommand zcat \\ --runDirPerm All_RWX \\ - --outFileNamePrefix $prefix + --outFileNamePrefix $prefix \\ + $RG """ } // Filter removes all 'aligned' channels that fail the check @@ -579,6 +582,7 @@ if(params.aligner == 'hisat2'){ script: index_base = hs2_indices[0].toString() - ~/.\d.ht2/ prefix = reads[0].toString() - ~/(_R1)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ + RG = params.seqCenter ? "--rg-id ${prefix} --rg 'CN:${params.seqCenter}'" : '' def rnastrandness = '' if (forward_stranded && !unstranded){ rnastrandness = params.singleEnd ? '--rna-strandness F' : '--rna-strandness FR' @@ -595,6 +599,7 @@ if(params.aligner == 'hisat2'){ --met-stderr \\ --new-summary \\ --summary-file ${prefix}.hisat2_summary.txt \\ + $RG \\ | samtools view -bS -F 4 -F 256 - > ${prefix}.bam """ } else { @@ -610,6 +615,7 @@ if(params.aligner == 'hisat2'){ --met-stderr \\ --new-summary \\ --summary-file ${prefix}.hisat2_summary.txt \\ + $RG \\ | samtools view -bS -F 4 -F 8 -F 256 - > ${prefix}.bam """ } @@ -800,7 +806,7 @@ process markDuplicates { avail_mem = task.memory.toGiga() } """ - picard MarkDuplicates \\ + picard -Xmx${avail_mem}g MarkDuplicates \\ INPUT=$bam \\ OUTPUT=${bam.baseName}.markDups.bam \\ METRICS_FILE=${bam.baseName}.markDups_metrics.txt \\ From 86387c729dd2326d0d78d0b875b5ea6822e32a2a Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 10 Aug 2018 11:09:46 +0200 Subject: [PATCH 08/32] Back to old Dockerfile syntax --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d4fdb9492..d6d7de824 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,4 +4,5 @@ LABEL authors="phil.ewels@scilifelab.se" \ description="Docker image containing all requirements for the nfcore/rnaseq pipeline" COPY environment.yml / -RUN conda env update -n root -f /environment.yml && conda clean -a +RUN conda env create -f /environment.yml && conda clean -a +ENV PATH /opt/conda/envs/nfcore-rnaseq-1.0dev/bin:$PATH \ No newline at end of file From 7d25565a45780d76c8e801f59a97e6c63ae444c9 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 10 Aug 2018 11:10:54 +0200 Subject: [PATCH 09/32] Removed bigwig from basename - its not bam anymore --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 86924526d..0a33327b7 100644 --- a/main.nf +++ b/main.nf @@ -776,7 +776,7 @@ process createBigWig { * Step 4.2 Rseqc genebody_coverage */ process genebody_coverage { - tag "${bam.baseName - '.sorted'}" + tag "${bigwig.baseName - '.bigwig'}" publishDir "${params.outdir}/rseqc" , mode: 'copy', saveAs: {filename -> if (filename.indexOf("geneBodyCoverage.curves.pdf") > 0) "geneBodyCoverage/$filename" From c47ca5ac3d26bea1e6f572edbbf1013292e89ef4 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 11 Aug 2018 00:16:21 +0200 Subject: [PATCH 10/32] Little update for environment --- environment.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/environment.yml b/environment.yml index daa8eae3b..3078fdefa 100644 --- a/environment.yml +++ b/environment.yml @@ -8,10 +8,10 @@ channels: dependencies: - conda-forge::openjdk=8.0.144 # Needed for FastQC - conda build hangs without this - fastqc=0.11.7 - - trim-galore=0.5 + - trim-galore=0.5.0 - star=2.6.0c - hisat2=2.1.0 - - picard=2.18.7 + - picard=2.18.11 - bioconductor-dupradar=1.8.0 - conda-forge::r-data.table=1.11.4 - conda-forge::r-gplots=3.0.1 @@ -21,7 +21,7 @@ dependencies: - rseqc=2.6.4 - samtools=1.9 - stringtie=1.3.4 - - subread=1.6.1 + - subread=1.6.2 - gffread=0.9.9 - deeptools=3.1.1 - multiqc=1.6 \ No newline at end of file From 4d8e91df21bdc2d5dee71f7ca5ecc4b3c3f63c59 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 11 Aug 2018 00:20:23 +0200 Subject: [PATCH 11/32] Just remove sorted suffix --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 0a33327b7..339c198e9 100644 --- a/main.nf +++ b/main.nf @@ -776,7 +776,7 @@ process createBigWig { * Step 4.2 Rseqc genebody_coverage */ process genebody_coverage { - tag "${bigwig.baseName - '.bigwig'}" + tag "${bigwig.baseName - '.sorted'}" publishDir "${params.outdir}/rseqc" , mode: 'copy', saveAs: {filename -> if (filename.indexOf("geneBodyCoverage.curves.pdf") > 0) "geneBodyCoverage/$filename" From 7808e62b688453973dee6090ba5d3521da9c8d3e Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 11 Aug 2018 00:28:53 +0200 Subject: [PATCH 12/32] Remove entire suffix --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 339c198e9..562dccf3b 100644 --- a/main.nf +++ b/main.nf @@ -754,7 +754,7 @@ process rseqc { */ process createBigWig { - tag "${bam.baseName - '.sorted'}" + tag "${bam.baseName}" publishDir "${params.outdir}/bigwig", mode: 'copy' when: From 8c1a4a693bde976dfc4b0552028c26a9c39f9485 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 11 Aug 2018 10:55:17 +0200 Subject: [PATCH 13/32] Fixing baseName usage --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 562dccf3b..489343016 100644 --- a/main.nf +++ b/main.nf @@ -769,7 +769,7 @@ process createBigWig { script: """ samtools index $bam - bamCoverage -b $bam -p ${task.cpus} -o ${bam.baseName - 'sorted'}.bigwig + bamCoverage -b $bam -p ${task.cpus} -o ${bam.baseName}.bigwig """ } /* From 3442d2555e25f2610349b12cfa99f8a571cccedb Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 11 Aug 2018 11:00:55 +0200 Subject: [PATCH 14/32] Don't remove sorted --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 489343016..5f87e7c34 100644 --- a/main.nf +++ b/main.nf @@ -776,7 +776,7 @@ process createBigWig { * Step 4.2 Rseqc genebody_coverage */ process genebody_coverage { - tag "${bigwig.baseName - '.sorted'}" + tag "${bigwig.baseName}" publishDir "${params.outdir}/rseqc" , mode: 'copy', saveAs: {filename -> if (filename.indexOf("geneBodyCoverage.curves.pdf") > 0) "geneBodyCoverage/$filename" From 140d7a2f844bb6a905b103dab94882c94b7aa4cf Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 11 Aug 2018 11:01:29 +0200 Subject: [PATCH 15/32] Theres no bam --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 5f87e7c34..824988e7a 100644 --- a/main.nf +++ b/main.nf @@ -800,9 +800,9 @@ process genebody_coverage { """ geneBody_coverage2.py \\ -i $bigwig \\ - -o ${bam.baseName}.rseqc \\ + -o ${bigwig.baseName}.rseqc \\ -r $bed12 - mv log.txt ${bam.baseName}.rseqc.log.txt + mv log.txt ${bigwig.baseName}.rseqc.log.txt """ } From 16fab8a3d55b02e4cc865b52f0ac87dbef716af8 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 11 Aug 2018 11:16:45 +0200 Subject: [PATCH 16/32] remove collect --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 824988e7a..a40da659b 100644 --- a/main.nf +++ b/main.nf @@ -754,7 +754,7 @@ process rseqc { */ process createBigWig { - tag "${bam.baseName}" + tag "${bam.baseName - 'sortedByCoord.out'}" publishDir "${params.outdir}/bigwig", mode: 'copy' when: @@ -790,7 +790,7 @@ process genebody_coverage { !params.skip_qc && !params.skip_genebody_coverage input: - file bigwig from bigwig_for_genebody.collect() + file bigwig from bigwig_for_genebody file bed12 from bed_genebody_coverage.collect() output: From 8575d4eece402e9a1d7bf4f1c318fb0ecae2e54e Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 11 Aug 2018 12:15:45 +0200 Subject: [PATCH 17/32] Add travis_wait / hopefully no more timeouts --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index c7b117c85..efee5c0f8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,6 +28,6 @@ script: # Lint the pipeline code - "nf-core lint ${TRAVIS_BUILD_DIR}" # Run, build reference genome with STAR - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker + - travis_wait 40 nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker # Run, build reference genome with HISAT2 - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --aligner hisat2 + - travis_wait 40 nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --aligner hisat2 From ee627490b5f2101c6e2a6d752fe65ade72682c7c Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 11 Aug 2018 15:08:09 +0200 Subject: [PATCH 18/32] Moving the log not required anymore --- main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/main.nf b/main.nf index a40da659b..8485cf14f 100644 --- a/main.nf +++ b/main.nf @@ -802,7 +802,6 @@ process genebody_coverage { -i $bigwig \\ -o ${bigwig.baseName}.rseqc \\ -r $bed12 - mv log.txt ${bigwig.baseName}.rseqc.log.txt """ } From fdd7ad60ea31706b7184918fd9343678bd072096 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sat, 11 Aug 2018 20:29:16 +0200 Subject: [PATCH 19/32] Still need a txt extension though --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 8485cf14f..83dc3a275 100644 --- a/main.nf +++ b/main.nf @@ -800,7 +800,7 @@ process genebody_coverage { """ geneBody_coverage2.py \\ -i $bigwig \\ - -o ${bigwig.baseName}.rseqc \\ + -o ${bigwig.baseName}.rseqc.txt \\ -r $bed12 """ } From 76044e0bb72e0ab931ae99a9b4b1f50d4d7fd06d Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Sun, 12 Aug 2018 21:58:35 +0200 Subject: [PATCH 20/32] Remove travis wait --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index efee5c0f8..c7b117c85 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,6 +28,6 @@ script: # Lint the pipeline code - "nf-core lint ${TRAVIS_BUILD_DIR}" # Run, build reference genome with STAR - - travis_wait 40 nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker # Run, build reference genome with HISAT2 - - travis_wait 40 nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --aligner hisat2 + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --aligner hisat2 From 4822d1b21eb1fac38b20c5005377868cd8d78aec Mon Sep 17 00:00:00 2001 From: jun-wan Date: Mon, 13 Aug 2018 11:23:36 +0200 Subject: [PATCH 21/32] update default seeting in base.config and minor changes in main.nf --- conf/base.config | 43 +++++++++++++++---------------------------- main.nf | 14 ++++++-------- 2 files changed, 21 insertions(+), 36 deletions(-) diff --git a/conf/base.config b/conf/base.config index 25482a38f..ab73dae76 100644 --- a/conf/base.config +++ b/conf/base.config @@ -13,90 +13,77 @@ process { container = params.container - cpus = { check_max( 1 * task.attempt, 'cpus' ) } + cpus = { check_max( 2, 'cpus' ) } memory = { check_max( 8.GB * task.attempt, 'memory' ) } time = { check_max( 2.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'terminate' } - maxRetries = 1 + errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'terminate' } + maxRetries = 3 maxErrors = '-1' // Process-specific resource requirements $makeSTARindex { - cpus = { check_max( 10 * task.attempt, 'cpus' ) } + cpus = { check_max( 10, 'cpus' ) } memory = { check_max( 80.GB * task.attempt, 'memory' ) } time = { check_max( 5.h * task.attempt, 'time' ) } } $makeHISATindex { - cpus = { check_max( 10 * task.attempt, 'cpus' ) } + cpus = { check_max( 10, 'cpus' ) } memory = { check_max( 80.GB * task.attempt, 'memory' ) } time = { check_max( 5.h * task.attempt, 'time' ) } } $fastqc { - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } + errorStrategy = 'ignore' } $trim_galore { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 16.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } } $star { - cpus = 10 + cpus = { check_max (10, 'cpus')} memory = { check_max( 80.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [1,143,137,104] ? 'retry' : 'terminate' } - maxRetries = 2 } $hisat2Align { - cpus = { check_max( 8 * task.attempt, 'cpus' ) } + cpus = { check_max( 8, 'cpus' ) } memory = { check_max( 64.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } } $hisat2_sortOutput { - cpus = { check_max( 4 * task.attempt, 'cpus' ) } + cpus = { check_max( 4, 'cpus' ) } memory = { check_max( 32.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } } $rseqc { - cpus = { check_max( 8 * task.attempt, 'cpus' ) } + cpus = { check_max( 8, 'cpus' ) } memory = { check_max( 32.GB * task.attempt, 'memory' ) } time = { check_max( 7.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } + errorStrategy = 'ignore' } $genebody_coverage { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } + cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 32.GB * task.attempt, 'memory' ) } time = { check_max( 7.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } } $preseq { - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } + errorStrategy = 'ignore' } $markDuplicates { - cpus = 2 memory = { check_max( 16.GB * task.attempt, 'memory' ) } - errorStrategy = { task.exitStatus in [1,143,137] ? 'retry' : 'terminate' } - maxRetries = 3 } $dupradar { - cpus = 1 + cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 16.GB * task.attempt, 'memory' ) } - errorStrategy = { task.exitStatus in [1,143,137,139] ? 'retry' : 'terminate' } - maxRetries = 3 } $featureCounts { - cpus = 2 memory = { check_max( 16.GB * task.attempt, 'memory' ) } - errorStrategy = { task.exitStatus in [1,143,137,134,104] ? 'retry' : 'terminate' } - maxRetries = 3 } $sample_correlation { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 16.GB * task.attempt, 'memory' ) } } $multiqc { memory = { check_max( 2.GB * task.attempt, 'memory' ) } - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } + errorStrategy = 'ignore' } $get_software_versions { memory = { check_max( 2.GB, 'memory' ) } diff --git a/main.nf b/main.nf index d3a50aed7..1e0138b22 100644 --- a/main.nf +++ b/main.nf @@ -64,6 +64,7 @@ def helpMessage() { --clusterOptions Extra SLURM options, used in conjunction with Uppmax.config --maxMultiqcEmailFileSize Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. + --seqCenter Add sequencing center in @RG line of output BAM header AWSBatch options: --awsqueue The AWSBatch JobQueue that needs to be set when running on AWSBatch @@ -530,7 +531,7 @@ if(params.aligner == 'star'){ script: prefix = reads[0].toString() - ~/(_R1)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ def avail_mem = task.memory == null ? '' : "--limitBAMsortRAM ${task.memory.toBytes() - 100000000}" - RG = params.seqCenter ? "--outSAMattrRGline ID:$prefix 'CN:$params.seqCenter'" : '' + def seqCenter = params.seqCenter ? "--outSAMattrRGline ID:$prefix 'CN:$params.seqCenter'" : '' """ STAR --genomeDir $index \\ --sjdbGTFfile $gtf \\ @@ -541,8 +542,7 @@ if(params.aligner == 'star'){ --outSAMtype BAM SortedByCoordinate $avail_mem \\ --readFilesCommand zcat \\ --runDirPerm All_RWX \\ - --outFileNamePrefix $prefix \\ - $RG + --outFileNamePrefix $prefix $seqCenter \\ """ } // Filter removes all 'aligned' channels that fail the check @@ -582,7 +582,7 @@ if(params.aligner == 'hisat2'){ script: index_base = hs2_indices[0].toString() - ~/.\d.ht2/ prefix = reads[0].toString() - ~/(_R1)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ - RG = params.seqCenter ? "--rg-id ${prefix} --rg 'CN:${params.seqCenter}'" : '' + def seqCenter = params.seqCenter ? "--rg-id ${prefix} --rg CN:${params.seqCenter.replaceAll("\\s","_")}" : '' def rnastrandness = '' if (forward_stranded && !unstranded){ rnastrandness = params.singleEnd ? '--rna-strandness F' : '--rna-strandness FR' @@ -598,8 +598,7 @@ if(params.aligner == 'hisat2'){ -p ${task.cpus} \\ --met-stderr \\ --new-summary \\ - --summary-file ${prefix}.hisat2_summary.txt \\ - $RG \\ + --summary-file ${prefix}.hisat2_summary.txt $seqCenter \\ | samtools view -bS -F 4 -F 256 - > ${prefix}.bam """ } else { @@ -614,8 +613,7 @@ if(params.aligner == 'hisat2'){ -p ${task.cpus} \\ --met-stderr \\ --new-summary \\ - --summary-file ${prefix}.hisat2_summary.txt \\ - $RG \\ + --summary-file ${prefix}.hisat2_summary.txt $seqCenter \\ | samtools view -bS -F 4 -F 8 -F 256 - > ${prefix}.bam """ } From 8dbe57c25ba3e290fe67a9748ecf387cd6a3ce26 Mon Sep 17 00:00:00 2001 From: jun-wan Date: Tue, 14 Aug 2018 13:19:37 +0200 Subject: [PATCH 22/32] Fix syntax error from the Travis CI test --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 02de02bc9..11af7ab88 100644 --- a/main.nf +++ b/main.nf @@ -577,7 +577,7 @@ if(params.aligner == 'star'){ script: prefix = reads[0].toString() - ~/(_R1)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ def avail_mem = task.memory == null ? '' : "--limitBAMsortRAM ${task.memory.toBytes() - 100000000}" - def seqCenter = params.seqCenter ? "--outSAMattrRGline ID:$prefix 'CN:$params.seqCenter'" : '' + seqCenter = params.seqCenter ? "--outSAMattrRGline ID:$prefix 'CN:$params.seqCenter'" : '' """ STAR --genomeDir $index \\ --sjdbGTFfile $gtf \\ @@ -628,7 +628,7 @@ if(params.aligner == 'hisat2'){ script: index_base = hs2_indices[0].toString() - ~/.\d.ht2/ prefix = reads[0].toString() - ~/(_R1)?(_trimmed)?(_val_1)?(\.fq)?(\.fastq)?(\.gz)?$/ - def seqCenter = params.seqCenter ? "--rg-id ${prefix} --rg CN:${params.seqCenter.replaceAll("\\s","_")}" : '' + seqCenter = params.seqCenter ? "--rg-id ${prefix} --rg CN:${params.seqCenter.replaceAll('\\s','_')}" : '' def rnastrandness = '' if (forward_stranded && !unstranded){ rnastrandness = params.singleEnd ? '--rna-strandness F' : '--rna-strandness FR' From 92cade7c081e3be16f2a0553d29aab6dd2062786 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 16 Aug 2018 13:26:40 +0200 Subject: [PATCH 23/32] Pull upstream and fix conflicts --- conf/base.config | 9 --------- nextflow.config | 15 +++++++++++---- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/conf/base.config b/conf/base.config index 48a59fdf1..c78239b5b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -32,9 +32,6 @@ process { memory = { check_max( 80.GB * task.attempt, 'memory' ) } time = { check_max( 5.h * task.attempt, 'time' ) } } - withName:fastqc { - errorStrategy = 'ignore' - } withName:trim_galore { memory = { check_max( 16.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } @@ -64,16 +61,12 @@ process { cpus = { check_max( 8 * task.attempt, 'cpus' ) } memory = { check_max( 32.GB * task.attempt, 'memory' ) } time = { check_max( 7.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } } withName:genebody_coverage { cpus = { check_max( 1, 'cpus' ) } memory = { check_max( 32.GB * task.attempt, 'memory' ) } time = { check_max( 7.h * task.attempt, 'time' ) } } - withName:preseq { - errorStrategy = 'ignore' - } withName:markDuplicates { memory = { check_max( 16.GB * task.attempt, 'memory' ) } } @@ -99,7 +92,6 @@ process { withName:workflow_summary_mqc { memory = { check_max( 2.GB, 'memory' ) } cache = false - executor = 'local' errorStrategy = 'ignore' } } @@ -110,5 +102,4 @@ params { max_cpus = 16 max_time = 240.h igenomes_base = 's3://ngi-igenomes/igenomes/' - maxMultiqcEmailFileSize = 25.MB } diff --git a/nextflow.config b/nextflow.config index 9de09a9b6..c3a7316da 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,23 +21,30 @@ params { unstranded = false splicesites = false outdir = './results' - hisatBuildMemory = 200 // Required amount of memory in GB to build HISAT2 index with splice sites - subsampFilesizeThreshold = 10000000000 // Don't subsample BAMs for RSeQC gene_body_coverage if less than this saveReference = false saveTrimmed = false saveAlignedIntermediates = false singleEnd = false reads = "data/*{1,2}.fastq.gz" outdir = './results' + // Custom trimming options clip_r1 = 0 clip_r2 = 0 three_prime_clip_r1 = 0 three_prime_clip_r2 = 0 - sampleLevel = false - clusterOptions = false + + // AWS Batch awsqueue = false awsregion = 'eu-west-1' + + // Defaults + sampleLevel = false + clusterOptions = false + hisatBuildMemory = 200 // Required amount of memory in GB to build HISAT2 index with splice sites + subsampFilesizeThreshold = 10000000000 // Don't subsample BAMs for RSeQC gene_body_coverage if less than this + maxMultiqcEmailFileSize = 25.MB + readPaths = null tracedir = "${params.outdir}/pipeline_info" } From 42bea74bbc80f83861a0b37c05d6e51a165e01a3 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 16 Aug 2018 13:31:00 +0200 Subject: [PATCH 24/32] Get the linting tests to pass --- .travis.yml | 31 ++++++++++++++++++------------- Dockerfile | 2 +- README.md | 2 +- Singularity | 8 ++++++-- environment.yml | 4 ++-- nextflow.config | 8 +++++--- 6 files changed, 33 insertions(+), 22 deletions(-) diff --git a/.travis.yml b/.travis.yml index c7b117c85..d99bbd9fc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,28 +1,33 @@ sudo: required -language: java +language: python jdk: openjdk8 services: docker +python: '3.6' +cache: pip +matrix: + fast_finish: true -before_install: docker pull nfcore/rnaseq:latest +before_install: + # PRs to master are only ok if coming from dev branch + - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])' + # Pull the docker image first so the test doesn't wait for this + - docker pull nfcore/rnaseq + # Fake the tag locally so that the pipeline runs properly + - docker tag nfcore/rnaseq nfcore/rnaseq:latest install: # Install Nextflow - - mkdir /tmp/nextflow - - cd /tmp/nextflow + - mkdir /tmp/nextflow && cd /tmp/nextflow - wget -qO- get.nextflow.io | bash - sudo ln -s /tmp/nextflow/nextflow /usr/local/bin/nextflow # Install nf-core/tools - - git clone https://github.com/nf-core/tools.git /tmp/nf-core-tools - - cd /tmp/nf-core-tools - - pip install --user -e . - # Make test directories - - mkdir ${TRAVIS_BUILD_DIR}/tests - - cd ${TRAVIS_BUILD_DIR}/tests - + - pip install nf-core + # Reset + - mkdir ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests env: - - NXF_VER=0.30.1 - - '' + - NXF_VER='0.31.1' # Specify a minimum NF version that should be tested and work + - NXF_VER='' # Plus: get the latest NF version and check that it works script: # Lint the pipeline code diff --git a/Dockerfile b/Dockerfile index d6d7de824..7e7bb43c7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,4 +5,4 @@ LABEL authors="phil.ewels@scilifelab.se" \ COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nfcore-rnaseq-1.0dev/bin:$PATH \ No newline at end of file +ENV PATH /opt/conda/envs/nf-core-rnaseq-1.0dev/bin:$PATH diff --git a/README.md b/README.md index 2cedeabd0..3a769ec03 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # ![nfcore/rnaseq](docs/images/nfcore-rnaseq_logo.png) [![Build Status](https://travis-ci.org/nf-core/rnaseq.svg?branch=master)](https://travis-ci.org/nf-core/rnaseq) -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.30.1-brightgreen.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.31.1-brightgreen.svg)](https://www.nextflow.io/) [![Gitter](https://img.shields.io/badge/gitter-%20join%20chat%20%E2%86%92-4fb99a.svg)](https://gitter.im/nf-core/Lobby) [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) diff --git a/Singularity b/Singularity index d286cbf35..ce0b0f59e 100644 --- a/Singularity +++ b/Singularity @@ -3,12 +3,16 @@ Bootstrap:docker %labels MAINTAINER Phil Ewels - DESCRIPTION Container image containing all requirements for the nf-core/rnaseq pipeline + DESCRIPTION Singularity image containing all requirements for nf-core/rnaseq pipeline VERSION 1.0dev +%environment + PATH=/opt/conda/envs/nf-core-rnaseq-1.0dev/bin:$PATH + export PATH + %files environment.yml / %post - /opt/conda/bin/conda env update -n root -f /environment.yml + /opt/conda/bin/conda env create -f /environment.yml /opt/conda/bin/conda clean -a diff --git a/environment.yml b/environment.yml index 3078fdefa..8b8966a22 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nfcore-rnaseq-1.0dev +name: nf-core-rnaseq-1.0dev channels: - bioconda - conda-forge @@ -24,4 +24,4 @@ dependencies: - subread=1.6.2 - gffread=0.9.9 - deeptools=3.1.1 - - multiqc=1.6 \ No newline at end of file + - multiqc=1.6 diff --git a/nextflow.config b/nextflow.config index c3a7316da..8b5837ab0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,8 +10,6 @@ // Global default params, used in configs params { - version = '1.0dev' //Pipeline version - nf_required_version = '0.30.1' //Minimum version of Nextflow required container = 'nfcore/rnaseq:latest' // Container slug. Stable releases should specify release tag! // Pipeline Options @@ -137,9 +135,13 @@ dag { } manifest { - homePage = 'https://github.com/nf-core/rnaseq' + name = 'nf-core/rnaseq' description = 'Nextflow RNA-Seq analysis pipeline, part of the nf-core community.' + homePage = 'https://github.com/nf-core/rnaseq' + author = 'Phil Ewels, Rickard Hammarén' + pipelineVersion = '1.0dev' mainScript = 'main.nf' + nextflowVersion = '>=0.31.1' } // Function to ensure that resource requirements don't go beyond From a484c3e8b9c4b7e74f9dc9d24e3095a99f827e61 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 16 Aug 2018 13:33:15 +0200 Subject: [PATCH 25/32] Update STAR version to latest (fixing segfaults) --- environment.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 3078fdefa..1414a8d94 100644 --- a/environment.yml +++ b/environment.yml @@ -9,7 +9,7 @@ dependencies: - conda-forge::openjdk=8.0.144 # Needed for FastQC - conda build hangs without this - fastqc=0.11.7 - trim-galore=0.5.0 - - star=2.6.0c + - star=2.6.1a - hisat2=2.1.0 - picard=2.18.11 - bioconductor-dupradar=1.8.0 @@ -24,4 +24,4 @@ dependencies: - subread=1.6.2 - gffread=0.9.9 - deeptools=3.1.1 - - multiqc=1.6 \ No newline at end of file + - multiqc=1.6 From d648825f12e3969f8485e1fd6217382a0d5fe150 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 16 Aug 2018 13:37:52 +0200 Subject: [PATCH 26/32] Switch from old nextflow & pipeline param usage to new manifest. --- Singularity | 2 +- main.nf | 40 +++++++--------------------------------- 2 files changed, 8 insertions(+), 34 deletions(-) diff --git a/Singularity b/Singularity index ce0b0f59e..86630108d 100644 --- a/Singularity +++ b/Singularity @@ -3,7 +3,7 @@ Bootstrap:docker %labels MAINTAINER Phil Ewels - DESCRIPTION Singularity image containing all requirements for nf-core/rnaseq pipeline + DESCRIPTION Singularity image containing all requirements for the nf-core/rnaseq pipeline VERSION 1.0dev %environment diff --git a/main.nf b/main.nf index acd1cedd9..2dc83ed23 100644 --- a/main.nf +++ b/main.nf @@ -15,7 +15,7 @@ def helpMessage() { log.info""" =================================== - nfcore/rnaseq ~ version ${params.version} + nfcore/rnaseq ~ version ${manifest.pipelineVersion} =================================== Usage: @@ -246,7 +246,7 @@ log.info """======================================================= | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,\' - nf-core/rnaseq : RNA-Seq Best Practice v${params.version} + nf-core/rnaseq : RNA-Seq Best Practice v${manifest.pipelineVersion} =======================================================""" def summary = [:] summary['Run Name'] = custom_runName ?: workflow.runName @@ -296,20 +296,6 @@ log.info summary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n") log.info "=========================================" -// Check that Nextflow version is up to date enough -// try / throw / catch works for NF versions < 0.25 when this was implemented -try { - if( ! nextflow.version.matches(">= $params.nf_required_version") ){ - throw GroovyException('Nextflow version too old') - } -} catch (all) { - log.error "====================================================\n" + - " Nextflow version $params.nf_required_version required! You are running v$workflow.nextflow.version.\n" + - " Pipeline execution will continue, but things may break.\n" + - " Please run `nextflow self-update` to update Nextflow.\n" + - "============================================================" -} - // Show a big error message if we're running on the base config and an uppmax cluster if( workflow.profile == 'standard'){ if ( "hostname".execute().text.contains('.uppmax.uu.se') ) { @@ -754,8 +740,8 @@ process rseqc { /* - * Step 4.1 Rseqc create BigWig coverage - */ + * Step 4.1 Rseqc create BigWig coverage + */ process createBigWig { tag "${bam.baseName - 'sortedByCoord.out'}" @@ -764,7 +750,7 @@ process createBigWig { when: !params.skip_qc && !params.skip_genebody_coverage - input: + input: file bam from bam_for_genebody output: @@ -1052,7 +1038,7 @@ process get_software_versions { script: """ - echo $params.version &> v_ngi_rnaseq.txt + echo $manifest.pipelineVersion &> v_ngi_rnaseq.txt echo $workflow.nextflow.version &> v_nextflow.txt fastqc --version &> v_fastqc.txt cutadapt --version &> v_cutadapt.txt @@ -1167,7 +1153,7 @@ workflow.onComplete { subject = "[nfcore/rnaseq] FAILED: $workflow.runName" } def email_fields = [:] - email_fields['version'] = params.version + email_fields['version'] = manifest.pipelineVersion email_fields['runName'] = custom_runName ?: workflow.runName email_fields['success'] = workflow.success email_fields['dateComplete'] = workflow.complete @@ -1253,18 +1239,6 @@ workflow.onComplete { log.info "[nfcore/rnaseq] Pipeline Complete" - try { - if( ! nextflow.version.matches(">= $params.nf_required_version") ){ - throw GroovyException('Nextflow version too old') - } - } catch (all) { - log.error "====================================================\n" + - " Nextflow version $params.nf_required_version required! You are running v$workflow.nextflow.version.\n" + - " Please be extra careful with pipeline results.\n" + - " Run `nextflow self-update` to update Nextflow.\n" + - "============================================================" - } - if(!workflow.success){ if( workflow.profile == 'standard'){ if ( "hostname".execute().text.contains('.uppmax.uu.se') ) { From cfadba521f0a0dbb300edfdb442fb145f1b304b1 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 17 Aug 2018 14:28:50 +0200 Subject: [PATCH 27/32] Fix containsKey if no genome is used --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index acd1cedd9..6dff222b1 100644 --- a/main.nf +++ b/main.nf @@ -95,8 +95,8 @@ if (params.help){ } // Check if genome exists in the config file -if (!params.genomes.containsKey(params.genome) && params.genome) { - exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}" +if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}" } // Configurable variables From 499f202bab1a0073d05ccdc5a8754a840888f5c0 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Fri, 17 Aug 2018 15:19:07 +0200 Subject: [PATCH 28/32] Remove manifest.pipelineVersion from main.nf Hopefully we can go back to only using this in a future version of nextflow. --- main.nf | 21 ++++++++++++++------- nextflow.config | 5 ++++- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/main.nf b/main.nf index 2dc83ed23..5a8eefa45 100644 --- a/main.nf +++ b/main.nf @@ -13,10 +13,17 @@ */ def helpMessage() { - log.info""" - =================================== - nfcore/rnaseq ~ version ${manifest.pipelineVersion} - =================================== + log.info """ + ======================================================= + ,--./,-. + ___ __ __ __ ___ /,-._.--~\' + |\\ | |__ __ / ` / \\ |__) |__ } { + | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, + `._,._,\' + + nf-core/rnaseq : RNA-Seq Best Practice v${params.pipelineVersion} + ======================================================= + Usage: The typical command for running the pipeline is as follows: @@ -246,7 +253,7 @@ log.info """======================================================= | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,\' - nf-core/rnaseq : RNA-Seq Best Practice v${manifest.pipelineVersion} + nf-core/rnaseq : RNA-Seq Best Practice v${params.pipelineVersion} =======================================================""" def summary = [:] summary['Run Name'] = custom_runName ?: workflow.runName @@ -1038,7 +1045,7 @@ process get_software_versions { script: """ - echo $manifest.pipelineVersion &> v_ngi_rnaseq.txt + echo $params.pipelineVersion &> v_ngi_rnaseq.txt echo $workflow.nextflow.version &> v_nextflow.txt fastqc --version &> v_fastqc.txt cutadapt --version &> v_cutadapt.txt @@ -1153,7 +1160,7 @@ workflow.onComplete { subject = "[nfcore/rnaseq] FAILED: $workflow.runName" } def email_fields = [:] - email_fields['version'] = manifest.pipelineVersion + email_fields['version'] = params.pipelineVersion email_fields['runName'] = custom_runName ?: workflow.runName email_fields['success'] = workflow.success email_fields['dateComplete'] = workflow.complete diff --git a/nextflow.config b/nextflow.config index 8b5837ab0..d6e923b5b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -44,6 +44,8 @@ params { maxMultiqcEmailFileSize = 25.MB readPaths = null tracedir = "${params.outdir}/pipeline_info" + // TODO: Remove this if/when we can. See https://github.com/nextflow-io/nextflow/issues/840 + pipelineVersion = '1.0dev' } profiles { @@ -139,7 +141,8 @@ manifest { description = 'Nextflow RNA-Seq analysis pipeline, part of the nf-core community.' homePage = 'https://github.com/nf-core/rnaseq' author = 'Phil Ewels, Rickard Hammarén' - pipelineVersion = '1.0dev' + // TODO: Define only here if/when we can. See https://github.com/nextflow-io/nextflow/issues/840 + pipelineVersion = params.pipelineVersion mainScript = 'main.nf' nextflowVersion = '>=0.31.1' } From d4b504906f8aa6a8e7c722887be6d9493eafa0c5 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Mon, 20 Aug 2018 16:30:23 +0200 Subject: [PATCH 29/32] Bump version number to v1.0 and write changelog --- .travis.yml | 2 +- CHANGELOG.md | 43 ++++++++++++++++++++++++++++++++++++++++--- Dockerfile | 2 +- Singularity | 4 ++-- environment.yml | 2 +- nextflow.config | 5 +++-- 6 files changed, 48 insertions(+), 10 deletions(-) diff --git a/.travis.yml b/.travis.yml index d99bbd9fc..ba99d8215 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ before_install: # Pull the docker image first so the test doesn't wait for this - docker pull nfcore/rnaseq # Fake the tag locally so that the pipeline runs properly - - docker tag nfcore/rnaseq nfcore/rnaseq:latest + - docker tag nfcore/rnaseq nfcore/rnaseq:1.0 install: # Install Nextflow diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c2a40096..4880f5608 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,42 @@ -## nf-core/rnaseq v1.0dev -This release marks the point where the pipeline was moved from SciLifeLab/NGI-RNAseq -over to the new nf-core community, at nf-core/rnaseq. +# nf-core/rnaseq + +## [Version 1.0](https://github.com/nf-core/rnaseq/releases/tag/1.0) - 2018-08-20 + +This release marks the point where the pipeline was moved from [SciLifeLab/NGI-RNAseq](https://github.com/SciLifeLab/NGI-RNAseq) +over to the new [nf-core](http://nf-co.re/) community, at [nf-core/rnaseq](https://github.com/nf-core/rnaseq). View the previous changelog at [SciLifeLab/NGI-RNAseq/CHANGELOG.md](https://github.com/SciLifeLab/NGI-RNAseq/blob/master/CHANGELOG.md) + +In addition to porting to the new nf-core community, the pipeline has had a number of major changes in this version. +There have been 157 commits by 16 different contributors coving 70 different files in the pipeline: 7,357 additions and 8,236 deletions! + +In summary, the main changes are: + +* Rebranding and renaming throughout the pipeline to nf-core +* Updating many parts of the pipeline config and style to meet nf-core standards +* Support for GFF files in addition to GTF files + * Just use `--gff` instead of `--gtf` when specifying a file path +* New command line options to skip various quality control steps +* More safety checks when launching a pipeline + * Several new sanity checks - for example, that the specified reference genome exists +* Improved performance with memory usage (especially STAR and Picard) +* New BigWig file outputs for plotting coverage across the genome +* Refactored gene body coverage calculation, now much faster and using much less memory +* Bugfixes in the MultiQC process to avoid edge cases where it wouldn't run +* MultiQC report now automatically attached to the email sent when the pipeline completes +* New testing method, with data on GitHub + * Now run pipeline with `-profile test` instead of using bash scripts +* Rewritten continuous integration tests with Travis CI +* New explicit support for Singularity containers +* Improved MultiQC support for DupRadar and featureCounts + * Now works for all users instead of just NGI Stockholm +* New configuration for use on AWS batch +* Updated config syntax to support latest versions of Nextflow +* Built-in support for a number of new local HPC systems + * CCGA, GIS, UCT HEX, updates to UPPMAX, CFC, BINAC, Hebbe, c3se +* Slightly improved documentation (more updates to come) +* Updated software packages + +...and many more minor tweaks. + +Thanks to everyone who has worked on this release! diff --git a/Dockerfile b/Dockerfile index 7e7bb43c7..431bc2793 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,4 +5,4 @@ LABEL authors="phil.ewels@scilifelab.se" \ COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nf-core-rnaseq-1.0dev/bin:$PATH +ENV PATH /opt/conda/envs/nf-core-rnaseq-1.0/bin:\$PATH diff --git a/Singularity b/Singularity index 86630108d..fd55e6532 100644 --- a/Singularity +++ b/Singularity @@ -4,10 +4,10 @@ Bootstrap:docker %labels MAINTAINER Phil Ewels DESCRIPTION Singularity image containing all requirements for the nf-core/rnaseq pipeline - VERSION 1.0dev + VERSION 1.0 %environment - PATH=/opt/conda/envs/nf-core-rnaseq-1.0dev/bin:$PATH + PATH=/opt/conda/envs/nf-core-rnaseq-1.0/bin:\$PATH export PATH %files diff --git a/environment.yml b/environment.yml index 6b613db65..6f78e33ea 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-rnaseq-1.0dev +name: nf-core-rnaseq-1.0 channels: - bioconda - conda-forge diff --git a/nextflow.config b/nextflow.config index d6e923b5b..50aa65607 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,10 +10,11 @@ // Global default params, used in configs params { - container = 'nfcore/rnaseq:latest' // Container slug. Stable releases should specify release tag! + container = 'nfcore/rnaseq:1.0' // Container slug. Stable releases should specify release tag! // Pipeline Options aligner = 'star' + genome = false forward_stranded = false reverse_stranded = false unstranded = false @@ -45,7 +46,7 @@ params { readPaths = null tracedir = "${params.outdir}/pipeline_info" // TODO: Remove this if/when we can. See https://github.com/nextflow-io/nextflow/issues/840 - pipelineVersion = '1.0dev' + pipelineVersion = '1.0' } profiles { From 0d0dab91b12b4c85cbdcbd9d5df49ee9e94858dd Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Mon, 20 Aug 2018 16:40:22 +0200 Subject: [PATCH 30/32] Changelog typo fixed --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4880f5608..8b41e1d39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ over to the new [nf-core](http://nf-co.re/) community, at [nf-core/rnaseq](https View the previous changelog at [SciLifeLab/NGI-RNAseq/CHANGELOG.md](https://github.com/SciLifeLab/NGI-RNAseq/blob/master/CHANGELOG.md) In addition to porting to the new nf-core community, the pipeline has had a number of major changes in this version. -There have been 157 commits by 16 different contributors coving 70 different files in the pipeline: 7,357 additions and 8,236 deletions! +There have been 157 commits by 16 different contributors covering 70 different files in the pipeline: 7,357 additions and 8,236 deletions! In summary, the main changes are: From 3197bddf55e94ef6957509519cb97338889c4cb6 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Mon, 20 Aug 2018 17:09:22 +0200 Subject: [PATCH 31/32] Remove cheeky backslashes that crept in after version bump --- Dockerfile | 2 +- Singularity | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 431bc2793..1c7574a5d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,4 +5,4 @@ LABEL authors="phil.ewels@scilifelab.se" \ COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nf-core-rnaseq-1.0/bin:\$PATH +ENV PATH /opt/conda/envs/nf-core-rnaseq-1.0/bin:$PATH diff --git a/Singularity b/Singularity index fd55e6532..fa004de48 100644 --- a/Singularity +++ b/Singularity @@ -7,7 +7,7 @@ Bootstrap:docker VERSION 1.0 %environment - PATH=/opt/conda/envs/nf-core-rnaseq-1.0/bin:\$PATH + PATH=/opt/conda/envs/nf-core-rnaseq-1.0/bin:$PATH export PATH %files From 0fe8b6cb7439567722809d68ab97f1faefd3e18b Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 20 Aug 2018 17:45:31 +0200 Subject: [PATCH 32/32] Add missing deepTools output for MultiQC/Report --- bin/scrape_software_versions.py | 2 ++ main.nf | 1 + 2 files changed, 3 insertions(+) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index ba6e83ed5..eb7eb55c5 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -14,6 +14,7 @@ 'Picard MarkDuplicates': ['v_markduplicates.txt', r"([\d\.]+)-SNAPSHOT"], 'Samtools': ['v_samtools.txt', r"samtools (\S+)"], 'featureCounts': ['v_featurecounts.txt', r"featureCounts v(\S+)"], + 'deepTools': ['v_deeptools.txt', r"bamCoverage (\S+)"], 'StringTie': ['v_stringtie.txt', r"(\S+)"], 'Preseq': ['v_preseq.txt', r"Version: (\S+)"], 'RSeQC': ['v_rseqc.txt', r"read_duplication.py ([\d\.]+)"], @@ -32,6 +33,7 @@ results['featureCounts'] = 'N/A' results['StringTie'] = 'N/A' results['Preseq'] = 'N/A' +results['deepTools'] = 'N/A' results['RSeQC'] = 'N/A' results['MultiQC'] = 'N/A' diff --git a/main.nf b/main.nf index 68afeca2b..a1abc371c 100644 --- a/main.nf +++ b/main.nf @@ -1055,6 +1055,7 @@ process get_software_versions { stringtie --version &> v_stringtie.txt preseq &> v_preseq.txt read_duplication.py --version &> v_rseqc.txt + echo \$(bamCoverage --version 2>&1) > v_deeptools.txt featureCounts -v &> v_featurecounts.txt picard MarkDuplicates --version &> v_markduplicates.txt || true samtools --version &> v_samtools.txt