forked from sftcd/tek_transparency
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sf-evol.sh
executable file
·320 lines (291 loc) · 9.46 KB
/
sf-evol.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
#!/bin/bash
# Make a CSV showing the evolution of shorfalls
# set -x
x=${HOME:='/home/stephen'}
x=${TOP:="$HOME/code/tek_transparency"}
x=${INCSVFILE="country-counts.csv"}
x=${OUTCSVFILE="shortfall-evol.csv"}
x=${PNGFILE="shortfall-evol.png"}
# script to count each day's TEKs for each country/region
# Our definition of that day's TEKs is the number of TEKs
# that were first seen on that day for that country/region
# The input here is the run-directory for the run at
# UTC midnight each day (currently, 1am Irish Summer Time)
. $TOP/country_list.sh
# default values for parameters
verbose="no"
START=`date +%s -d 2020-06-25T00:00:00Z`
STARTGIVEN="no"
END=`date +%s`
AUCSTR=""
function usage()
{
echo "$0 [-acehiopsv] - track evolution of shortfall"
echo " -a use daily/weekly counts where possible"
echo " -c [country-list] specifies which countries to process (defailt: all)"
echo " provide the country list as a space separatead lsit of 2-letter codes"
echo " e.g. '-c \"$COUNTRY_LIST\"'"
echo " -e specifies the end time, in secs since UNIX epoch (default: $END)"
echo " -h means print this"
echo " -i specifies the input CSV file (default: $INCSVFILE)"
echo " -o specifies the output CSV file (default: $OUTCSVFILE)"
echo " -p specifies the output PNG file (default: $PNGFILE)"
echo " -s specifies the start time, in secs since UNIX epoch (default: $START)"
echo " -v means be verbose"
exit 99
}
# options may be followed by one colon to indicate they have a required argument
if ! options=$(/usr/bin/getopt -s bash -o ac:e:hi:o:p:s:v -l actives,countries:,end:,help,infile:,outfile:,pngfile:,start:,verbose -- "$@")
then
# something went wrong, getopt will put out an error message for us
exit 1
fi
#echo "|$options|"
eval set -- "$options"
while [ $# -gt 0 ]
do
case "$1" in
-a|--actives) AUCSTR="-D $TOP/aucs ";;
-c|--countries) COUNTRY_LIST=$2; shift;;
-e|--end) END=$2; shift;;
-h|--help) usage;;
-i|--input) INCSVFILE=$2; shift;;
-o|--output) OUTCSVFILE=$2; shift;;
-p|--pngfile) PNGFILE=$2; shift;;
-s|--start) STARTGIVEN="yes"; START=$2; shift;;
-v|--verbose) verbose="yes" ;;
(--) shift; break;;
(-*) echo "$0: error - unrecognized option $1" 1>&2; exit 1;;
(*) break;;
esac
shift
done
function whenisitagain()
{
date -u +%Y%m%d-%H%M%S
}
NOW=$(whenisitagain)
echo "At $NOW: Running $0 $*"
# seconds per 2-weeks (fortnight)
WKSECS=$((7*60*60*24))
# for each country:
# - find the start date (if any)
# - for each 2week period 'till now, figure shortfall
# - whack into CSV
# plot that CSV
# end of loop time_t
endtt=`date +%s`
mintt=$endtt
if [ ! -f $INCSVFILE ]
then
echo "No input $INCSVFILE - exiting"
exit 1
fi
if [ -f $OUTCSVFILE ]
then
mv $OUTCSVFILE $OUTCSVFILE.backup-$NOW.csv
fi
echo "country,start,end,shortfall" >$OUTCSVFILE
if [ -f $PNGFILE ]
then
cp $PNGFILE $PNGFILE.backup-$NOW.png
fi
pngbase=`basename $PNGFILE .png`
if [[ "$pngbase.png" != "$PNGFILE" ]]
then
echo "Output PNG should be a .png file - things may get weird, but I'll try"
fi
csvbase=`basename $OUTCSVFILE .csv`
if [[ "$csvbase.csv" != "$OUTSCVFILE" ]]
then
echo "Output CSV should be a .csv file - things may get weird, but I'll try"
fi
# a temp file is always handy:-)
ctmp=`mktemp /tmp/sfevolXXXX`
# We'll do this 3 times, with a 1 week window, then with a
# 2 week window, and then from the start extending by one
# week each time
# it'd be more elegant if I did that in one loop but it's
# quicker to not, so I'm going with quicker:-)
### 1 week at a time
for country in $COUNTRY_LIST
do
sdate=`grep "$country," $INCSVFILE | grep -v ",0," | sort -t, -k2 | head -1 | awk -F, '{print $2}'`
if [[ "$sdate" == "" ]]
then
echo "Skipping $country - no first TEK"
continue
fi
sdtt=`date +%s -d $sdate`
echo "Doing $country 1w starting from $sdate"
dow=`date +%u -d @$sdtt`
# move back to the prev monday
sdtt=$((sdtt-(dow-1)*(60*60*24)))
# keep track of min date
if (( sdtt < mintt ))
then
mintt=$sdtt
fi
gotone="False"
while (((sdtt+WKSECS) < endtt ))
do
sstr=`date +%Y-%m-%d -d @$sdtt`
estr=`date +%Y-%m-%d -d @$((sdtt+WKSECS))`
sfo=`$TOP/shortfalls.py -c $country -t $INCSVFILE -d $TOP/country-pops.csv -rn -s $sstr -e $estr $AUCSTR`
sfr=`echo $sfo | awk -F, '{print $6}' | sed -e "s/'//g" | sed -e 's/ //g'`
if [[ "$gotone" == "False" ]]
then
if [[ "$sfr" == "" || "$sfr" == "-" ]]
then
echo "Breaking out of $country"
break
fi
gotone="True"
fi
if [[ "$sfr" == "" || "$sfr" == "-" ]]
then
# expected something, got nohting => 100% shortfall
echo "$country,$sstr,$estr,100.0" >>$csvbase-1w.csv
else
echo "$country,$sstr,$estr,$sfr" >>$csvbase-1w.csv
fi
sdtt=$((sdtt+WKSECS))
done
# do a bit of plotting
ccnt=`grep -c "$country," $csvbase-1w.csv`
if [[ "$ccnt" == "0" ]]
then
echo "Not plotting $country - nothing there"
else
echo "Plotting $country into $pngbase-1w-$country.png"
grep "$country," $csvbase-1w.csv >$ctmp
$TOP/plot-evol.py -i $ctmp -o $pngbase-1w-$country.png
fi
done
# last - plot all at once
$TOP/plot-evol.py -i $csvbase-1w.csv -o $pngbase-1w.png
### 2 weeks at a time
for country in $COUNTRY_LIST
do
sdate=`grep "$country," $INCSVFILE | grep -v ",0," | sort -t, -k2 | head -1 | awk -F, '{print $2}'`
if [[ "$sdate" == "" ]]
then
echo "Skipping $country - no first TEK"
continue
fi
sdtt=`date +%s -d $sdate`
echo "Doing $country 2w starting from $sdate"
dow=`date +%u -d @$sdtt`
# move back to the prev monday
sdtt=$((sdtt-(dow-1)*(60*60*24)))
# check that it's an even numbered week so all countries on the
# same schedule
weekno=`date +%V -d @$sdtt`
if (( (weekno%2) == 1 ))
then
# go back a week further
sdtt=$((sdtt-WKSECS))
fi
# keep track of min date
if (( sdtt < mintt ))
then
mintt=$sdtt
fi
gotone="False"
while (((sdtt+2*WKSECS) < endtt ))
do
sstr=`date +%Y-%m-%d -d @$sdtt`
estr=`date +%Y-%m-%d -d @$((sdtt+2*WKSECS))`
sfo=`$TOP/shortfalls.py -c $country -t $INCSVFILE -d $TOP/country-pops.csv -rn -s $sstr -e $estr $AUCSTR`
sfr=`echo $sfo | awk -F, '{print $6}' | sed -e "s/'//g" | sed -e 's/ //g'`
if [[ "$gotone" == "False" ]]
then
if [[ "$sfr" == "" || "$sfr" == "-" ]]
then
echo "Breaking out of $country"
break
fi
gotone="True"
fi
if [[ "$sfr" == "" || "$sfr" == "-" ]]
then
# expected something, got nohting => 100% shortfall
echo "$country,$sstr,$estr,100.0" >>$csvbase-2w.csv
else
echo "$country,$sstr,$estr,$sfr" >>$csvbase-2w.csv
fi
sdtt=$((sdtt+2*WKSECS))
done
# do a bit of plotting
ccnt=`grep -c "$country," $csvbase-2w.csv`
if [[ "$ccnt" == "0" ]]
then
echo "Not plotting $country - nothing there"
else
echo "Plotting $country into $pngbase-2w-$country.png"
grep "$country," $csvbase-2w.csv >$ctmp
$TOP/plot-evol.py -i $ctmp -o $pngbase-2w-$country.png
fi
done
# last - plot all at once
$TOP/plot-evol.py -i $csvbase-2w.csv -o $pngbase-2w.png
### 1 week at a time, but with start date of earliest TEK
### so not 1w or 2w but all weeks, so aw
for country in $COUNTRY_LIST
do
sdate=`grep "$country," $INCSVFILE | grep -v ",0," | sort -t, -k2 | head -1 | awk -F, '{print $2}'`
if [[ "$sdate" == "" ]]
then
echo "Skipping $country - no first TEK"
continue
fi
sdtt=`date +%s -d $sdate`
echo "Doing $country aw starting from $sdate"
dow=`date +%u -d @$sdtt`
# move back to the prev monday
sdtt=$((sdtt-(dow-1)*(60*60*24)))
# keep track of min date
if (( sdtt < mintt ))
then
mintt=$sdtt
fi
gotone="False"
while (((sdtt+WKSECS) < endtt ))
do
sstr=`date +%Y-%m-%d -d @$sdtt`
estr=`date +%Y-%m-%d -d @$((sdtt+WKSECS))`
sfo=`$TOP/shortfalls.py -c $country -t $INCSVFILE -d $TOP/country-pops.csv -rn -s $sdate -e $estr $AUCSTR`
sfr=`echo $sfo | awk -F, '{print $6}' | sed -e "s/'//g" | sed -e 's/ //g'`
if [[ "$gotone" == "False" ]]
then
if [[ "$sfr" == "" || "$sfr" == "-" ]]
then
echo "Breaking out of $country"
break
fi
gotone="True"
fi
if [[ "$sfr" == "" || "$sfr" == "-" ]]
then
# expected something, got nohting => 100% shortfall
echo "$country,$sstr,$estr,100.0" >>$csvbase-aw.csv
else
echo "$country,$sstr,$estr,$sfr" >>$csvbase-aw.csv
fi
sdtt=$((sdtt+WKSECS))
done
# do a bit of plotting
ccnt=`grep -c "$country," $csvbase-aw.csv`
if [[ "$ccnt" == "0" ]]
then
echo "Not plotting $country - nothing there"
else
echo "Plotting $country into $pngbase-aw-$country.png"
grep "$country," $csvbase-aw.csv >$ctmp
$TOP/plot-evol.py -i $ctmp -o $pngbase-aw-$country.png
fi
done
# last - plot all at once
$TOP/plot-evol.py -i $csvbase-aw.csv -o $pngbase-aw.png
# clean up
rm -f $ctmp