-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsam_compress
39 lines (35 loc) · 1.76 KB
/
sam_compress
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/bash
#SBATCH -n 8 # Number of cores
#SBATCH -N 1 # Ensure that all cores are on one machine
#SBATCH --constraint=intel # only use faster intel cores
#SBATCH -t 3-00:00 # Runtime in D-HH:MM, minimum of 10 minutes
#SBATCH -p shared # Partition to submit to
#SBATCH --mem=16000 # Memory pool for all cores (see also --mem-per-cpu)
#SBATCH -J sam_compress # Job name for job
#SBATCH -o sam_compress_%j.out # File to which STDOUT will be written, %j inserts jobid
#SBATCH -e sam_compress_%j.err # File to which STDERR will be written, %j inserts jobid
#SBATCH --mail-type=ALL # Type of email notification- BEGIN,END,FAIL,ALL
#SBATCH --mail-user= # Email to which notifications will be sent
# arguments
# ${1} = the folder location to compress SAM
# within this folder, all .sam files greater than 100 MB will be compressed
# and the original files will be deleted
if [[ $# -eq 0 ]]; then
echo "ERROR: No directory provided to search for SAM files"
echo "Try issuing the following commands with <target_directory> set to the directory where you want to compress SAM files:"
echo "$ directory=<target_directory>"
echo "$ sbatch -o \${directory}_sam_compress.out -e \${directory}_sam_compress.err sam_compress \${directory}"
exit 1
fi
# load modules and environments
module purge
module load samtools
# commands to run
find ${1} -type f -name "*.sam" -size +100M |
while read file;
do
output=`echo ${file} | sed 's/\.sam$/\.bam/g'`;
cat ${file} | samtools view -@ ${SLURM_NTASKS} -b - > ${output} 2> >(awk -v file=${file} '{ print file": " $0 }' >&2) &&
samtools index -@ ${SLURM_NTASKS} ${output} 2> >(awk -v file=${file} '{ print file": " $0 }' >&2) &&
rm ${file} 2> >(awk -v file=${file} '{ print file": " $0 }' >&2)
done