-
Notifications
You must be signed in to change notification settings - Fork 1
/
delete_invalid_sifts_worker.sh
executable file
·125 lines (85 loc) · 1.79 KB
/
delete_invalid_sifts_worker.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/bin/bash
source ./scripts/env.sh
CHK_SUM_DIR=
FILE_LIST=
TOTAL=
ARGV=`getopt --long -o "c:l:n:t:" "$@"`
eval set -- "$ARGV"
while true ; do
case "$1" in
-c)
CHK_SUM_DIR=$2
shift
;;
-l)
FILE_LIST=$2
shift
;;
-n)
PROC_INFO=$2
shift
;;
-t)
TOTAL=$2
shift
;;
*)
break
;;
esac
shift
done
if ! [[ $PROC_INFO =~ .*of.* ]] ; then
echo "Invalid thread id ($PROC_INFO)."
exit 1
fi
MAXPROCS=`echo $PROC_INFO | cut -d 'f' -f 2`
PROC_ID=`echo $PROC_INFO | cut -d 'o' -f 1`
PROC_ID=$(($PROC_ID - 1))
# TOTAL=`wc -l < $FILE_LIST`
proc_id=0
chk_sum_dir=`readlink -f $CHK_SUM_DIR`
while read sifts_xml_file
do
proc_id_mod=$(($proc_id % $MAXPROCS))
if [ $proc_id_mod = $PROC_ID ] ; then
if [ ! -e $sifts_xml_file ] ; then
let proc_id++
continue
fi
pdb_id=`basename $sifts_xml_file .xml`
chk_sum_file=$chk_sum_dir/$pdb_id.xml.md5
if [ $chk_sum_file -nt $sifts_xml_file ] ; then
if [ $proc_id_mod -eq 0 ] ; then
echo -e -n "\rDone "$((proc_id + 1)) of $TOTAL ...
fi
let proc_id++
continue
fi
new_chk_sum=`md5sum $sifts_xml_file | cut -d ' ' -f 1`
if [ -e $chk_sum_file ] ; then
old_chk_sum=`head -n 1 $chk_sum_file`
if [ "$old_chk_sum" = "$new_chk_sum" ] ; then
if [ $chk_sum_file -ot $sifts_xml_file ] ; then
touch $chk_sum_file
fi
if [ $proc_id_mod -eq 0 ] ; then
echo -e -n "\rDone "$((proc_id + 1)) of $TOTAL ...
fi
let proc_id++
continue
fi
fi
sed -n 2,2p $sifts_xml_file | grep 'dbAccessionId="'$pdb_id'"' > /dev/null
if [ $? != 0 ] ; then
echo deleting $sifts_xml_file
rm -f $sifts_xml_file
else
echo $new_chk_sum > $chk_sum_file
fi
if [ $proc_id_mod -eq 0 ] ; then
echo -e -n "\rDone "$((proc_id + 1)) of $TOTAL ...
fi
fi
let proc_id++
done < $FILE_LIST