-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathCRISPRlib.sh
executable file
·98 lines (81 loc) · 2.8 KB
/
CRISPRlib.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/bin/bash
#####################################
requires=("cutadapt" "python3" "bowtie" "samtools")
for i in ${requires[@]};do
which $i &>/dev/null || { echo $i not found; exit 1; }
done
help(){
cat <<-EOF
Usage: CRISPRlib.sh <options> <reads_clean.fq.gz>
### INPUT: fastq files ###
This script will trim the input fastq to 20nt after the given sequence with cutadapt, and align the trimmed reads to the reference library build with Bowtie1, depending on the library selection passed by -l or the index and adapter sequence passed by -i and -a,
then statisticize each sequence's frequency, and all results will be store in current (./) directory.
### python3/cutadapt/bowtie1/samtools required ###
Options:
-l [str] library selection <110066|160129|162256>
-i [str] Custom bowtie index PATH
-a [str] Custom adapter sequence
-p [str] Prefix of output
-n [int] Threads (1 default)
-h Print this help message
EOF
exit 0
}
main(){
cutadapt -g $adpt -j $threads -l 20 -m 19 -o ${pre}tr.fq.gz $1 > ${pre}log
bowtie -x $idx -n 0 -p $threads --no-unal -l 20 ${pre}tr.fq.gz -S ${pre}sam 2>&1|tee -a ${pre}log
samtools view -@ $threads -o ${pre}bam ${pre}sam
samtools sort -@ $threads -o ${pre}srt.bam ${pre}bam
samtools index -@ $threads ${pre}srt.bam
samtools idxstats ${pre}srt.bam |awk '$3+0>0' |awk '{print $1"\t"$3}' > ${pre}counts.tsv
awk 'substr($1,1,1)!="@"' ${pre}sam|awk '{print $3"\t"$10}'|sort |uniq -c |awk '{print $2"\t"$3"\t"$1}' > ${pre}table.tsv
}
# no ARGs error
if [ $# -lt 1 ];then
help
exit 1
fi
while getopts "l:i:a:n:p:h" arg
do
case $arg in
l) if [ $OPTARG = "110066" ]; then
idx='/mnt/date3/Project/zhaoqy/genome/CRISPR/hs_metabolism_110066/metabolism'
adpt='TTTCTAGCTCTAAAAC'
elif [ $OPTARG = "160129" ]; then
idx='/mnt/date3/Project/zhaoqy/genome/CRISPR/mm_metabolism_160129/sgRNA_lib'
adpt='TGTTTCCAGCATAGCTCTTAAAC'
elif [ $OPTARG = "162256" ]; then
idx='/mnt/date3/Project/zhaoqy/genome/CRISPR/hs_epigentics_162256/lib'
adpt='ATAGCTCTTAAAC'
else
echo "Only support 110066, 160129, 162256 libraries, or pass your own bowtie1 index and adapter sequences"
exit 1
fi;;
i) idx=$OPTARG;;
a) adpt=$OPTARG;;
n) threads=$OPTARG;;
p) pre=$OPTARG;;
h) help ;;
?) help
exit 1;;
esac
done
# shift ARGs to reads
shift $(($OPTIND - 1))
# get prefix of output
if [ -z $pre ];then
echo "No -p <prefix> given, use file name as prefix"
pre=${1/clean.fq.gz/}
fi
if [ -z $threads ];then
echo "Using 12 threads as default"
threads=12
fi
main $1
# check running status
if [ $? -ne 0 ]; then
help
exit 1
else
echo "Run succeed"
fi