Merge pull request #379 from maxulysse/dsl2_modules_update

Update to DSL2 Best Practices
nf-core · Jun 11, 2021 · 39fd254 · 39fd254
2 parents 1b7432c + 68e1cc4
commit 39fd254
Show file tree

Hide file tree

Showing 158 changed files with 4,908 additions and 3,987 deletions.
diff --git a/.github/workflows/local_modules.yml b/.github/workflows/local_modules.yml
@@ -23,7 +23,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        nxf_version: ['20.11.0-edge']
+        nxf_version: ['21.04.0']
         tags: ['${{ fromJson(needs.changes.outputs.modules) }}']
         profile: ['docker', 'singularity'] ## 'conda'
     env:

diff --git a/.gitignore b/.gitignore
@@ -4,7 +4,6 @@ work/
 data/
 results/
 .DS_Store
-tests/
 testing/
 testing*
 *.pyc
diff --git a/.nf-core-lint.yml b/.nf-core-lint.yml
@@ -2,5 +2,7 @@ files_unchanged:
   - .github/ISSUE_TEMPLATE/bug_report.md
   - .github/ISSUE_TEMPLATE/feature_request.md
   - .github/PULL_REQUEST_TEMPLATE.md
+  - .gitignore
   - assets/nf-core-sarek_logo.png
   - docs/images/nf-core-sarek_logo.png
+  - lib/NfcoreSchema.groovy
diff --git a/assets/dummy_file.txt b/assets/dummy_file.txt
diff --git a/bin/concatenateVCFs.sh b/bin/concatenateVCFs.sh
@@ -8,27 +8,27 @@ usage() { echo "Usage: $0 [-i genome_index_file] [-o output.file.no.gz.extension
 
 while [[ $# -gt 0 ]]
 do
-  key=$1
-  case $key in
+	key=$1
+	case $key in
 		-i)
 			genomeIndex=$2
 			shift # past argument
-	    shift # past value
+			shift # past value
 			;;
 		-c)
 			cpus=$2
 			shift # past argument
-	    shift # past value
+			shift # past value
 			;;
 		-o)
 			outputFile=$2
 			shift # past argument
-	    shift # past value
+			shift # past value
 			;;
 		-t)
 			targetBED=$2
 			shift # past argument
-	    shift # past value
+			shift # past value
 			;;
 		-n)
 			noInt=1
@@ -46,7 +46,7 @@ if [ -z ${cpus} ]; then echo "No CPUs defined: setting to 1"; cpus=1; fi
 if [ -z ${outputFile} ]; then echo "Missing output file name"; usage; fi
 
 
-if [ -z ${noInt+x} ] 
+if [ -z ${noInt+x} ]
 then
 	# First make a header from one of the VCF
 	# Remove interval information from the GATK command-line, but leave the rest
@@ -62,36 +62,36 @@ then
 
 	# Concatenate VCFs in the correct order
 	(
-	  cat header
+		cat header
 
-	  for chr in "${CONTIGS[@]}"; do
-	    # Skip if globbing would not match any file to avoid errors such as
-	    # "ls: cannot access chr3_*.vcf: No such file or directory" when chr3
-	    # was not processed.
-	    pattern="${chr}_*.vcf"
-	    if ! compgen -G "${pattern}" > /dev/null; then continue; fi
+		for chr in "${CONTIGS[@]}"; do
+			# Skip if globbing would not match any file to avoid errors such as
+			# "ls: cannot access chr3_*.vcf: No such file or directory" when chr3
+			# was not processed.
+			pattern="${chr}_*.vcf"
+			if ! compgen -G "${pattern}" > /dev/null; then continue; fi
 
-	    # ls -v sorts by numeric value ("version"), which means that chr1_100_
-	    # is sorted *after* chr1_99_.
-	    for vcf in $(ls -v ${pattern}); do
-	      # Determine length of header.
-	      # The 'q' command makes sed exit when it sees the first non-header
-	      # line, which avoids reading in the entire file.
-	      L=$(sed -n '/^[^#]/q;p' ${vcf} | wc -l)
-	
-	      # Then print all non-header lines. Since tail is very fast (nearly as
-	      # fast as cat), this is way more efficient than using a single sed,
-	      # awk or grep command.
-	      tail -n +$((L+1)) ${vcf}
-	    done
-	  done
+			# ls -v sorts by numeric value ("version"), which means that chr1_100_
+			# is sorted *after* chr1_99_.
+			for vcf in $(ls -v ${pattern}); do
+				# Determine length of header.
+				# The 'q' command makes sed exit when it sees the first non-header
+				# line, which avoids reading in the entire file.
+				L=$(sed -n '/^[^#]/q;p' ${vcf} | wc -l)
+
+				# Then print all non-header lines. Since tail is very fast (nearly as
+				# fast as cat), this is way more efficient than using a single sed,
+				# awk or grep command.
+				tail -n +$((L+1)) ${vcf}
+			done
+		done
 	) | bgzip -@${cpus} > rawcalls.vcf.gz
 	tabix rawcalls.vcf.gz
 else
-        VCF=$(ls no_intervals*.vcf)
-        cp $VCF rawcalls.vcf 
-        bgzip -@${cpus} rawcalls.vcf
-        tabix rawcalls.vcf.gz
+	VCF=$(ls no_intervals*.vcf)
+	cp $VCF rawcalls.vcf
+	bgzip -@${cpus} rawcalls.vcf
+	tabix rawcalls.vcf.gz
 fi
 
 set +u

diff --git a/conf/genomes.config b/conf/genomes.config
@@ -79,5 +79,15 @@ params {
     'custom' {
       fasta                   = null
     }
+    'small_hg38' {
+      dbsnp                   = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
+      fasta                   = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.fasta"
+      fasta_fai               = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.fasta.fai"
+      germline_resource       = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz"
+      known_indels            = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz"
+      snpeff_db               = 'GRCh38.86'
+      species                 = 'homo_sapiens'
+      vep_cache_version       = '99'
+    }
   }
 }