-
Notifications
You must be signed in to change notification settings - Fork 5
/
step_1_filter.sh
executable file
·28 lines (24 loc) · 1.16 KB
/
step_1_filter.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/bin/bash
#SCRIPT_PATH=`dirname $0`
source ./profile
echo "check files ..."
date
if [[ -z $ADAPTOR_F || -z $ADAPTOR_R ]] ; then
echo "ERROR : no adaptor sequence assigned in profile . exit ..."
exit 1
fi
if [[ ! -f $SPLIT.1.fq.gz || ! -f $SPLIT.1.fq.gz ]] ; then
echo "error : file $SPLIT.1.fq.gz or $SPLIT.2.fq.gz is not exsist !!! exit ..."
exit 1;
fi
echo "NOTICE : use adaptor F : $ADAPTOR_F R: $ADAPTOR_R"
cp $SCRIPT_PATH/data/lane.lst ./
echo "run SOAP_filter ... maybe long time ... "
date
tag=`date +_%m_%d_%H_%M_%S`
$SOAP_FILTER -q 33 -t $THREADS -y -F $ADAPTOR_F -R $ADAPTOR_R -p -M 2 -f -1 -Q 10 lane.lst stat.txt >SOAPfilter_"$tag".log 2>SOAPfilter_"$tag".err || exit 1
echo "re-generate new barcode.freq from clean data .. may cost hours ..."
#gzip -dc $SPLIT.1.fq.gz.clean.gz | awk '!(NR%4-1)' | awk -F '[# |]' '{print$2}' | awk -F '/' '{print $1}' | sort | uniq -c | awk '{printf("%s\t%s\n",$2,$1);}' > $CLEAN_BARCODE_FREQ
gzip -dc $SPLIT.1.fq.gz.clean.gz | awk '!(NR%4-1)' | awk -F '[# |]' '{print$2}' | awk -F '/' '{print $1}' |awk '{t[$1]+=1;}END{for(x in t) printf("%s\t%s\n",x,t[x]);}' > $CLEAN_BARCODE_FREQ
echo "step 1 done ..."
date