Skip to content

Commit

Permalink
feat: code polishing
Browse files Browse the repository at this point in the history
  • Loading branch information
maxulysse committed Jun 8, 2021
1 parent 0dd07ea commit 68e1cc4
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 59 deletions.
64 changes: 32 additions & 32 deletions bin/concatenateVCFs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,27 @@ usage() { echo "Usage: $0 [-i genome_index_file] [-o output.file.no.gz.extension

while [[ $# -gt 0 ]]
do
key=$1
case $key in
key=$1
case $key in
-i)
genomeIndex=$2
shift # past argument
shift # past value
shift # past value
;;
-c)
cpus=$2
shift # past argument
shift # past value
shift # past value
;;
-o)
outputFile=$2
shift # past argument
shift # past value
shift # past value
;;
-t)
targetBED=$2
shift # past argument
shift # past value
shift # past value
;;
-n)
noInt=1
Expand All @@ -47,7 +47,7 @@ if [ -z ${outputFile} ]; then echo "Missing output file name"; usage; fi


if [ -z ${noInt+x} ]
then
then
# First make a header from one of the VCF
# Remove interval information from the GATK command-line, but leave the rest
FIRSTVCF=$(set +o pipefail; ls *.vcf | head -n 1)
Expand All @@ -62,36 +62,36 @@ if [ -z ${noInt+x} ]

# Concatenate VCFs in the correct order
(
cat header
cat header

for chr in "${CONTIGS[@]}"; do
# Skip if globbing would not match any file to avoid errors such as
# "ls: cannot access chr3_*.vcf: No such file or directory" when chr3
# was not processed.
pattern="${chr}_*.vcf"
if ! compgen -G "${pattern}" > /dev/null; then continue; fi
for chr in "${CONTIGS[@]}"; do
# Skip if globbing would not match any file to avoid errors such as
# "ls: cannot access chr3_*.vcf: No such file or directory" when chr3
# was not processed.
pattern="${chr}_*.vcf"
if ! compgen -G "${pattern}" > /dev/null; then continue; fi

# ls -v sorts by numeric value ("version"), which means that chr1_100_
# is sorted *after* chr1_99_.
for vcf in $(ls -v ${pattern}); do
# Determine length of header.
# The 'q' command makes sed exit when it sees the first non-header
# line, which avoids reading in the entire file.
L=$(sed -n '/^[^#]/q;p' ${vcf} | wc -l)
# Then print all non-header lines. Since tail is very fast (nearly as
# fast as cat), this is way more efficient than using a single sed,
# awk or grep command.
tail -n +$((L+1)) ${vcf}
done
done
# ls -v sorts by numeric value ("version"), which means that chr1_100_
# is sorted *after* chr1_99_.
for vcf in $(ls -v ${pattern}); do
# Determine length of header.
# The 'q' command makes sed exit when it sees the first non-header
# line, which avoids reading in the entire file.
L=$(sed -n '/^[^#]/q;p' ${vcf} | wc -l)

# Then print all non-header lines. Since tail is very fast (nearly as
# fast as cat), this is way more efficient than using a single sed,
# awk or grep command.
tail -n +$((L+1)) ${vcf}
done
done
) | bgzip -@${cpus} > rawcalls.vcf.gz
tabix rawcalls.vcf.gz
else
VCF=$(ls no_intervals*.vcf)
cp $VCF rawcalls.vcf
bgzip -@${cpus} rawcalls.vcf
tabix rawcalls.vcf.gz
VCF=$(ls no_intervals*.vcf)
cp $VCF rawcalls.vcf
bgzip -@${cpus} rawcalls.vcf
tabix rawcalls.vcf.gz
fi

set +u
Expand Down
10 changes: 10 additions & 0 deletions conf/genomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,15 @@ params {
'custom' {
fasta = null
}
'small_hg38' {
dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.fasta"
fasta_fai = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.fasta.fai"
germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz"
known_indels = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz"
snpeff_db = 'GRCh38.86'
species = 'homo_sapiens'
vep_cache_version = '99'
}
}
}
27 changes: 0 additions & 27 deletions tests/test_aligner.yml
Original file line number Diff line number Diff line change
@@ -1,30 +1,3 @@
- name: Run bwa-mem
command: nextflow run main.nf -profile test,docker --aligner bwa-mem
tags:
- aligner
- bwa-mem
files:
- path: results/preprocessing/1234N/mapped/1234N.bam
- path: results/preprocessing/1234N/mapped/1234N.bam.bai
- path: results/preprocessing/1234N/markduplicates/1234N.md.bam
- path: results/preprocessing/1234N/markduplicates/1234N.md.bam.bai
- path: results/preprocessing/1234N/recal_table/1234N.recal.table
- path: results/preprocessing/1234N/recalibrated/1234N.recal.bam
- path: results/preprocessing/1234N/recalibrated/1234N.recal.bam.bai
- path: results/preprocessing/csv/markduplicates.csv
- path: results/preprocessing/csv/markduplicates_1234N.csv
- path: results/preprocessing/csv/markduplicates_no_table.csv
- path: results/preprocessing/csv/markduplicates_no_table_1234N.csv
- path: results/preprocessing/csv/recalibrated.csv
- path: results/preprocessing/csv/recalibrated_1234N.csv
- path: results/reports/fastqc/1234N-1234N_M1
- path: results/reports/fastqc/1234N-1234N_M2
- path: results/reports/fastqc/1234N-1234N_M4
- path: results/reports/fastqc/1234N-1234N_M5
- path: results/reports/fastqc/1234N-1234N_M6
- path: results/reports/fastqc/1234N-1234N_M7
- path: results/reports/qualimap/1234N
- path: results/reports/samtools_stats/1234N/1234N.bam.stats
- name: Run bwa-mem2
command: nextflow run main.nf -profile test,docker --aligner bwa-mem2
tags:
Expand Down

0 comments on commit 68e1cc4

Please sign in to comment.