forked from sftcd/tek_transparency
-
Notifications
You must be signed in to change notification settings - Fork 0
/
new-ie-ukni-sort.sh
executable file
·133 lines (113 loc) · 2.82 KB
/
new-ie-ukni-sort.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/bin/bash
# sort IE and UKNI TEKs apart, in an incremental manner
# set -x
x=${TOP:="$HOME/code/tek_transparency"}
x=${IEF:="new-iefirsts"}
x=${UKNIF:="new-uknifirsts"}
x=${DODGY:="new-notieuknifirsts"}
x=${CSVDIR:="`/bin/pwd`"}
tmpf=`mktemp /tmp/newsortXXXX`
tmpf1=`mktemp /tmp/newsortXXXX`
tmpf2=`mktemp /tmp/newsortXXXX`
# find modification time
if [ -f $IEF ]
then
iemod=`stat -c %Y $IEF`
else
iemod=0
fi
if [ -f $UKNIF ]
then
uknimod=`stat -c %Y $UKNIF`
else
uknimod=0
fi
early=$iemod
if (( uknimod < iemod ))
then
early=$uknimod
fi
maxfdate=0
# build list of files newer than our outputs
iflist=$CSVDIR/202*.csv
oflist=""
ofcount=0
for f in $iflist
do
fdate=`stat -c %Y $f`
if (( fdate > maxfdate ))
then
maxfdate=$fdate
fi
if (( fdate >= early ))
then
oflist="$oflist $f"
ofcount=$((ofcount+1))
fi
done
if [[ "$oflist" == "" ]]
then
mstr=`date -d @$maxfdate`
istr=`date -d @$early`
echo "Nothing to do - exiting - newewst CSV ($mstr) older than output ($istr)"
exit 0
fi
echo "Will search $ofcount CSVs"
# grep entire lines
grep ",ie," $oflist >$tmpf1
grep ",ukni," $oflist >>$tmpf1
sort $tmpf1 >$tmpf2
mv $tmpf2 $tmpf1
# ditch any TEKs we've already assigned - can happen if
# some manual messing with files or modification times
somdels="False"
if [ -f $IEF ]
then
b4ie=`wc -l $tmpf1 | awk '{print $1}'`
grep -v -f $IEF $tmpf1 >$tmpf2
mv $tmpf2 $tmpf1
aftrie=`wc -l $tmpf1 | awk '{print $1}'`
somedels="True"
fi
if [ -f $UKNIF ]
then
b4ukni=`wc -l $tmpf1 | awk '{print $1}'`
grep -v -f $UKNIF $tmpf1 >$tmpf2
mv $tmpf2 $tmpf1
aftrukni=`wc -l $tmpf1 | awk '{print $1}'`
somedels="True"
fi
if [[ "$somedels" != "False" ]]
then
echo "Ditched some already-known TEKS:"
echo "\tWe started with $b4ie apparently new TEKs"
echo "\tThere were $((b4ie-aftrie)) already known in .ie out of $b4ie"
echo "\tThere were $((b4ukni-aftrukni)) already known in .ukni out of $b4ukni"
echo "\tWe ended with $aftrukni apparently new TEKs"
fi
# grep tek values
cat $tmpf1 | awk -F, '{print $9}' | sort | uniq >$tmpf
iecnt=`wc -l $tmpf | awk '{print $1}'`
echo "Found $iecnt .ie TEKS"
# rm -f new-iefirsts new-uknifirsts new-notieuknifirsts
for tek in `cat $tmpf`
do
firstline=`grep $tek $tmpf1 | sort | head -1`
first=`echo $firstline | awk -F, '{print $2}'`
if [[ "$first" == "ie" ]]
then
echo "$firstline" | awk -F, '{print $9}' >>$IEF
elif [[ "$first" == "ukni" ]]
then
echo "$firstline" | awk -F, '{print $9}' >>$UKNIF
else
echo "$firstline" | awk -F, '{print $9}' >>$DODGY
echo "We have a dodgy one!"
fi
done
rm -f $tmpf $tmpf1
# Now reduce size in case there're dups
cat $IEF | sort | uniq >$tmpf
mv $tmpf $IEF
cat $UKNIF | sort | uniq >$tmpf1
mv $tmpf1 $UKNIF