Skip to content

Commit

Permalink
Merge pull request #379 from maxulysse/dsl2_modules_update
Browse files Browse the repository at this point in the history
Update to DSL2 Best Practices
  • Loading branch information
maxulysse authored Jun 11, 2021
2 parents 1b7432c + 68e1cc4 commit 39fd254
Show file tree
Hide file tree
Showing 158 changed files with 4,908 additions and 3,987 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/local_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
strategy:
fail-fast: false
matrix:
nxf_version: ['20.11.0-edge']
nxf_version: ['21.04.0']
tags: ['${{ fromJson(needs.changes.outputs.modules) }}']
profile: ['docker', 'singularity'] ## 'conda'
env:
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ work/
data/
results/
.DS_Store
tests/
testing/
testing*
*.pyc
2 changes: 2 additions & 0 deletions .nf-core-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@ files_unchanged:
- .github/ISSUE_TEMPLATE/bug_report.md
- .github/ISSUE_TEMPLATE/feature_request.md
- .github/PULL_REQUEST_TEMPLATE.md
- .gitignore
- assets/nf-core-sarek_logo.png
- docs/images/nf-core-sarek_logo.png
- lib/NfcoreSchema.groovy
Empty file added assets/dummy_file.txt
Empty file.
64 changes: 32 additions & 32 deletions bin/concatenateVCFs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,27 @@ usage() { echo "Usage: $0 [-i genome_index_file] [-o output.file.no.gz.extension

while [[ $# -gt 0 ]]
do
key=$1
case $key in
key=$1
case $key in
-i)
genomeIndex=$2
shift # past argument
shift # past value
shift # past value
;;
-c)
cpus=$2
shift # past argument
shift # past value
shift # past value
;;
-o)
outputFile=$2
shift # past argument
shift # past value
shift # past value
;;
-t)
targetBED=$2
shift # past argument
shift # past value
shift # past value
;;
-n)
noInt=1
Expand All @@ -46,7 +46,7 @@ if [ -z ${cpus} ]; then echo "No CPUs defined: setting to 1"; cpus=1; fi
if [ -z ${outputFile} ]; then echo "Missing output file name"; usage; fi


if [ -z ${noInt+x} ]
if [ -z ${noInt+x} ]
then
# First make a header from one of the VCF
# Remove interval information from the GATK command-line, but leave the rest
Expand All @@ -62,36 +62,36 @@ then

# Concatenate VCFs in the correct order
(
cat header
cat header

for chr in "${CONTIGS[@]}"; do
# Skip if globbing would not match any file to avoid errors such as
# "ls: cannot access chr3_*.vcf: No such file or directory" when chr3
# was not processed.
pattern="${chr}_*.vcf"
if ! compgen -G "${pattern}" > /dev/null; then continue; fi
for chr in "${CONTIGS[@]}"; do
# Skip if globbing would not match any file to avoid errors such as
# "ls: cannot access chr3_*.vcf: No such file or directory" when chr3
# was not processed.
pattern="${chr}_*.vcf"
if ! compgen -G "${pattern}" > /dev/null; then continue; fi

# ls -v sorts by numeric value ("version"), which means that chr1_100_
# is sorted *after* chr1_99_.
for vcf in $(ls -v ${pattern}); do
# Determine length of header.
# The 'q' command makes sed exit when it sees the first non-header
# line, which avoids reading in the entire file.
L=$(sed -n '/^[^#]/q;p' ${vcf} | wc -l)
# Then print all non-header lines. Since tail is very fast (nearly as
# fast as cat), this is way more efficient than using a single sed,
# awk or grep command.
tail -n +$((L+1)) ${vcf}
done
done
# ls -v sorts by numeric value ("version"), which means that chr1_100_
# is sorted *after* chr1_99_.
for vcf in $(ls -v ${pattern}); do
# Determine length of header.
# The 'q' command makes sed exit when it sees the first non-header
# line, which avoids reading in the entire file.
L=$(sed -n '/^[^#]/q;p' ${vcf} | wc -l)

# Then print all non-header lines. Since tail is very fast (nearly as
# fast as cat), this is way more efficient than using a single sed,
# awk or grep command.
tail -n +$((L+1)) ${vcf}
done
done
) | bgzip -@${cpus} > rawcalls.vcf.gz
tabix rawcalls.vcf.gz
else
VCF=$(ls no_intervals*.vcf)
cp $VCF rawcalls.vcf
bgzip -@${cpus} rawcalls.vcf
tabix rawcalls.vcf.gz
VCF=$(ls no_intervals*.vcf)
cp $VCF rawcalls.vcf
bgzip -@${cpus} rawcalls.vcf
tabix rawcalls.vcf.gz
fi

set +u
Expand Down
10 changes: 10 additions & 0 deletions conf/genomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,15 @@ params {
'custom' {
fasta = null
}
'small_hg38' {
dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.fasta"
fasta_fai = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.fasta.fai"
germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz"
known_indels = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz"
snpeff_db = 'GRCh38.86'
species = 'homo_sapiens'
vep_cache_version = '99'
}
}
}
Loading

0 comments on commit 39fd254

Please sign in to comment.