forked from alexjvr1/UKButterflies
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path00_parallel_fastqc_bcp3.sh
194 lines (158 loc) · 3.79 KB
/
00_parallel_fastqc_bcp3.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#!/bin/bash
# (c) Romain Villoutreix
# romain.villoutreix@gmail.com
# Last modified: 06/10/2018
# Description:
# Given an input directory with raw fastq files (gzip and bzip2 compression allowed)
# it will produce the fastqc analysis
# Changelog
#
#QUEUE depreciated
#Email forwarding not active on bluecrystalp3
#Look into core options
VERSION='1.1-2018.10.06'
CMD="fastqc"
# Default values for optional variables
EXTRA=''
NOEXTRACT='yes'
NCORES=1
HRS=8
MEM=4
function author {
echo
echo "#########################################"
echo " $(basename $0)"
echo " version $VERSION"
echo " (c) Romain Villoutreix"
echo " romain.villoutreix@gmail.com"
echo "#########################################"
echo
}
function usage {
echo
echo "Usage:"
echo " $(basename $0)"
echo " -i <input directory> => Folder with fastq files (.gz accepted)"
echo " -o <output directory> => Output folder to save fastQC reports"
echo " -n <number of processors> => processors per library (optional, default=$NCORES)"
echo " -t <allocated time> => Allocated time (in hours) for each analysis (optional: default=$HRS)"
echo " -m <allocated memory> => Allocated memory (in gigabytes) for each analysis (optional: default=$MEM)"
echo " -h => show this help"
echo ""
echo " Example:"
echo " $(basename $0) -i raw_reads -o initial_fastqc"
echo ""
echo ""
exit 0
}
author
if [ "$#" -ge "2" ]; # min 2 args: 1 for -i <input directory>, 1 for -o <output directory>
then
while [ $# -gt 0 ]; do
case "$1" in
-h|-help) usage
;;
-i) shift
INDIR=$(readlink -f $1)
;;
-o) shift
OUTDIR=$(readlink -f $1)
;;
-n)shift
NCORES=$1
;;
-t) shift
HRS=$1
;;
-m) shift
MEM=$1
;;
*) echo
echo "ERROR - Invalid option: $1"
echo
usage
;;
esac
shift
done
else
usage
fi
N=$(find $INDIR -maxdepth 1 -name "*.fastq*" | wc -l | cut -f1 -d" ")
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
TMPDIR="parallel_fastqc_"$TIMESTAMP"_"$RANDOM
INPUT_LOCALDIR="/local/$TMPDIR""_in"
if [[ $QUEUE == "popgenom" || $QUEUE == "molecol" ]];
then
INPUT_LOCALDIR="/local/tmp/$TMPDIR""_in"
fi
SMSJOB="$OUTDIR/parallel_fastqc.$TIMESTAMP.smsjob.sh"
LOG="$OUTDIR/parallel_fastqc.$TIMESTAMP.smsjob.log"
# Initialize submission script
mkdir -p $OUTDIR
echo '#!/bin/bash' > $SMSJOB
# SGE OPTIONS
# -----------------------------------------
echo '#PBS -l walltime='$HRS':00:00' >> $SMSJOB
echo '#PBS -l mem='$MEM'gb' >> $SMSJOB
####TOFIX this: number of core asked per job
#if (($NCORES > 1 ));
#then
# echo '#$ -pe openmp '$NCORES >> $SMSJOB
#fi
echo '#PBS -t 1-'$N >> $SMSJOB
echo '#PBS -j oe' >> $SMSJOB
echo '#PBS -o '$LOG >> $SMSJOB
# -----------------------------------------
cat >> $SMSJOB <<EOF
module load apps/fastqc-0.11.5
INDIR=$INDIR
OUTDIR=$OUTDIR
FQFILES=(\$INDIR/*.fastq*)
#INDEX=\$((SGE_TASK_ID-1))
INDEX=\$((PBS_ARRAYID-1))
FQ=\${FQFILES[\$INDEX]}
EOF
cat >> $SMSJOB <<EOF
INPUT_TMPDIR=$INPUT_LOCALDIR
# Create temporary local directories
if [ ! -e \$INPUT_TMPDIR ];
then
mkdir -p \$INPUT_TMPDIR
fi
# copy to local temporary directory
cp \$FQ \$INPUT_TMPDIR/
FQ=\$(basename \$FQ)
EOF
cat >> $SMSJOB <<EOF
LOG="\$OUTDIR/"\${FQ%%.*}".log"
echo "fastQC \$FQ file..." > \$LOG
echo >> \$LOG
echo "CMD: " >> \$LOG
echo "$CMD \\\">> \$LOG
echo "\$FQ \\\">> \$LOG
EOF
cat >> $SMSJOB <<EOF
echo "-o \$OUTDIR \\\" >> \$LOG
echo "-t $NCORES \\\" >> \$LOG
echo "--no-extract \\\" >> \$LOG
echo >> \$LOG
echo "---------------------------------------------------" >> \$LOG
echo >> \$LOG
cd \$INPUT_TMPDIR
$CMD \\
\$FQ \\
EOF
cat >> $SMSJOB <<EOF
-o \$OUTDIR \\
-t $NCORES \\
--no-extract \\
>> \$LOG 2>&1
#remove temporary files
rm -f \$INPUT_TMPDIR/\$FQ
EOF
chmod +x $SMSJOB
echo "Command to submit the job to bluecrystal ($QUEUE queue):"
echo
echo "qsub $SMSJOB"
echo