forked from MorrellLAB/sequence_handling
-
Notifications
You must be signed in to change notification settings - Fork 0
/
read_counts.sh
executable file
·69 lines (52 loc) · 1.46 KB
/
read_counts.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/bin/bash
set -e
set -u
set -o pipefail
# This is a script to count the depths
# of reads defined by a list of samples
# This script uses bioawk to count the
# read depth, please make sure bioawk
# is installed before running this script
usage() {
echo -e "\
Usage: ./read_counts.sh sample_info outdirectory \n\
where: sample_info is a list of samples to be processed \n\
\n\
outdirectory is the directory where the file should be placed \n\
" >&2
exit 1
}
if [ "$#" -lt 2 ]; then
usage;
fi
# List of samples to be processed
sample_info=$1
# Specify path to outdirectory
OUTDIR=$2
# Check to see if bioawk is installed
if `command -v bioawk > /dev/null 2> /dev/null`
then
echo "Bioawk is installed"
else
echo "Please install Bioawk and add it to your PATH"
echo
echo "Running 'installer.sh bioawk' will do this"
exit 1
fi
# Truncate sample info file into output file name
outfile=$(basename $sample_info .txt)
echo "$outfile"
echo "$sample_info"
# Create a bash array of sample names for files
sample_names=($(cut -f 1 "$sample_info"))
# Create output file
touch ${OUTDIR}/${outfile}_out.txt
echo ${sample_names[*]}
# Iterate over each of the sample names and calculate
# Read depth
for sample in ${sample_names[*]}
do
count="$(bioawk -cfastx 'END{print NR}' $sample)"
printf %s"$sample \t $count \n" >> ./${outfile}_out.txt
done
echo Results can be found at "${OUTDIR}"/"$outfile"_out.txt