From 1e5034e65f8681dbbe6c84722d1188af2f5016fc Mon Sep 17 00:00:00 2001 From: Smith Nicholas Date: Fri, 14 Jan 2022 22:12:34 +0100 Subject: [PATCH] fix to match nf-core updates --- .gitignore | 1 + bin/concatenateVCFs.sh | 18 ++++++++---------- conf/base.config | 2 +- lib/WorkflowMain.groovy | 6 ------ modules/nf-core/modules/samtools/merge/main.nf | 2 +- subworkflows/local/germline_variant_calling.nf | 5 +++-- 6 files changed, 14 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index 09ece0f765..fe141f73d4 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ results/ testing/ testing* *.pyc +*.swp diff --git a/bin/concatenateVCFs.sh b/bin/concatenateVCFs.sh index b9672abf17..2cbdcd05c1 100755 --- a/bin/concatenateVCFs.sh +++ b/bin/concatenateVCFs.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash set -euo pipefail - # This script concatenates all VCF files that are in the local directory, # that were created from different intervals to make a single final VCF @@ -49,8 +48,8 @@ if [ -z ${noInt+x} ] then # First make a header from one of the VCF # Remove interval information from the GATK command-line, but leave the rest - FIRSTVCF=$(set +o pipefail; ls *.vcf | head -n 1) - sed -n '/^[^#]/q;p' $FIRSTVCF | \ + FIRSTVCF=$(set +o pipefail; ls *.vcf.gz | head -n 1) + sed -n '/^[^#]/q;p' <(zcat $FIRSTVCF) | \ awk '!/GATKCommandLine/{print}/GATKCommandLine/{for(i=1;i<=NF;i++){if($i!~/intervals=/ && $i !~ /out=/){printf("%s ",$i)}}printf("\n")}' \ > header @@ -65,9 +64,9 @@ then for chr in "${CONTIGS[@]}"; do # Skip if globbing would not match any file to avoid errors such as - # "ls: cannot access chr3_*.vcf: No such file or directory" when chr3 + # "ls: cannot access chr3_*.vcf.gz: No such file or directory" when chr3 # was not processed. - pattern="${chr}_*.vcf" + pattern="*_${chr}_*.vcf.gz" if ! compgen -G "${pattern}" > /dev/null; then continue; fi # ls -v sorts by numeric value ("version"), which means that chr1_100_ @@ -76,20 +75,19 @@ then # Determine length of header. # The 'q' command makes sed exit when it sees the first non-header # line, which avoids reading in the entire file. - L=$(sed -n '/^[^#]/q;p' ${vcf} | wc -l) + L=$(sed -n '/^[^#]/q;p' <(zcat ${vcf}) | wc -l) # Then print all non-header lines. Since tail is very fast (nearly as # fast as cat), this is way more efficient than using a single sed, # awk or grep command. - tail -n +$((L+1)) ${vcf} + tail -n +$((L+1)) <(zcat ${vcf}) done done ) | bgzip -@${cpus} > rawcalls.vcf.gz tabix rawcalls.vcf.gz else - VCF=$(ls no_intervals*.vcf) - cp $VCF rawcalls.vcf - bgzip -@${cpus} rawcalls.vcf + VCF=$(ls no_intervals*.vcf.gz) + mv -v $VCF rawcalls.vcf.gz tabix rawcalls.vcf.gz fi diff --git a/conf/base.config b/conf/base.config index ce9e5c254b..a51094177f 100644 --- a/conf/base.config +++ b/conf/base.config @@ -14,7 +14,7 @@ process { time = { check_max( 4.h * task.attempt, 'time' ) } shell = ['/bin/bash', '-euo', 'pipefail'] - errorStrategy = { task.exitStatus in [143,137,104,134,139, 247] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in [143,137,104,134,139,140,247] ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index c0760a9da4..981243dbe3 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -71,12 +71,6 @@ class WorkflowMain { // Check AWS batch settings NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) - } } // diff --git a/modules/nf-core/modules/samtools/merge/main.nf b/modules/nf-core/modules/samtools/merge/main.nf index fcfcf61f3d..b97620f32b 100644 --- a/modules/nf-core/modules/samtools/merge/main.nf +++ b/modules/nf-core/modules/samtools/merge/main.nf @@ -1,6 +1,6 @@ process SAMTOOLS_MERGE { tag "$meta.id" - label 'process_low' + label 'process_medium' conda (params.enable_conda ? "bioconda::samtools=1.14" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 235f95c09f..bea569d511 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -70,12 +70,13 @@ workflow GERMLINE_VARIANT_CALLING { haplotypecaller_gvcf = CONCAT_GVCF.out.vcf // include interval name into meta.id - HAPLOTYPECALLER.out.interval_vcf.map{ meta, vcf, intervals -> + haplotypecaller_gvcf.combine(intervals).map{ meta, vcf, vcf_index, intervals -> new_meta = meta.clone() new_meta.id = meta.sample + "_" + intervals.baseName - [new_meta, vcf, intervals] + [new_meta, vcf, vcf_index, intervals] }.set{haplotypecaller_interval_vcf} + // STEP GATK HAPLOTYPECALLER.2 GENOTYPEGVCF( haplotypecaller_interval_vcf,