-
Notifications
You must be signed in to change notification settings - Fork 0
/
cityscrape
executable file
·154 lines (123 loc) · 4.05 KB
/
cityscrape
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/bin/sh
# usage cityscrape mon year
# where mon is three-letter lowercase month designator, e.g., may, aug, or nov
# and year is a four digit year, e.g., 2018
# output is CSV, sorted by race total
usage() {
echo usage: cityscrape mon year >&2
echo where mon is three-letter lowercase month designator, e.g., may, aug, or nov >&2
echo and year is a four digit year, e.g., 2018 >&2
exit 1
}
### args checking
case $# in
2) ;;
*) usage ;;
esac
case $1 in
may|aug|nov) ;;
*) echo is $1 an election month? i will try ... ;;
esac
case $2 in
20[0-9][0-9]) ;;
*) echo $2 is not a valid year >&2
usage ;;
esac
if [ $2 -lt 2013 -o \( $2 -eq 2013 -a $1 != "nov" \) ]; then
echo cityscrape can\'t process elections prior to nov 2013 2>&1
exit 1
fi
### end args checking
monyear=$1$2
# set up the cache drectory
if [ ! -d cachedir ]; then
mkdir cachedir
fi
if [ ! -d cachedir/$monyear ]; then
mkdir cachedir/$monyear
fi
### web scraping
WGET="wget -q --no-check-certificate"
if [ ! -s cachedir/$monyear/indexprecinctreport.html ]; then
$WGET -O cachedir/$monyear/indexprecinctreport.html https://electionresults.ewashtenaw.org/electionreporting/$monyear/indexprecinctreport.html
if [ $? != 0 ]; then
echo can\'t find $1 $2 election >&2
exit 1
fi
fi
grep 'City of Ann Arbor' cachedir/$monyear/indexprecinctreport.html | sed -e 's/.*<a href=.//' -e 's/.>.*//' | while read file; do
if [ ! -s cachedir/$monyear/$file ]; then
### TODO (?): if exit status is nonzero, some kind of internal error, so apologize and exit
$WGET -O cachedir/$monyear/$file https://electionresults.ewashtenaw.org/electionreporting/$monyear/$file
if [ $? != 0 ]; then
echo cityscrape internal error: can\'t cache file for $1 $2 >&2
exit 1
fi
fi
wardscrape cachedir/$monyear/$file
done |
### end scraping
awk -F, '
NF == 10 {
Ward = $2; Precinct = $4; WPRegisteredVoters = $6; WPBallotsCast = $8; WPVoterTurnout=$10
WardCount++
TotalRegisteredVoters += WPRegisteredVoters
TotalBallotsCast += WPBallotsCast
next
}
NF != 2 {
print "cityscrape internal error: NF is ", NF, "on line: " NF, $0
abort() # abort is undefined so this will definitely cause an exceptional exit
exit -1 # just in case
}
# NF == 2. $1 is the name of the race, $2 is the number of votes.
# total the votes from the wards
{
# Straight Party votes are special, I mean, they are kind of the reason we are here
# But in 2020, they changed the name from "Straight Party" to "Straight Party Ticket" alas
# So we fix that here.
raceName = $1
if (raceName ~ "Straight Party Ticket@")
raceName = "Straight Party@"
wards[raceName]++ # count the number of wards this races was seen in
votes[raceName] += $2 # count the votes
}
END { # produce output in CSV format
if (TotalBallotsCast != 0) {
print "Registered Voters" "," TotalRegisteredVoters
print "Ballots Cast" "," TotalBallotsCast # Ballots Cast is sometimes reported as :shrug emoji:
for (i in votes) {
if (wards[i] == WardCount) { # city-wide races only
if (i == "Straight Party@") {
print "Straight Party" "," votes["Straight Party@"]
continue;
}
if (i ~ "@")
printFunc(votes["Straight Party@"]) # called w/ # of SP votes
else
printFunc(0) # nonpartisan: no SP votes
}
}
}
}
# if partyline != 0, this is a partisan race so calculate the hand cast dropoff and total dropoff
function printFunc (partyline) {
TotalHandCast = TotalBallotsCast - partyline
ThisRaceHandCast = votes[i] - partyline
if (partyline) {
printf("%s,%d,%d,%2.2f%%,%2.2f%%\n",
i, # Race
votes[i], # Vote Total
ThisRaceHandCast, # Hand Cast
100*(TotalBallotsCast - votes[i]) / TotalBallotsCast, # Dropoff
100*(TotalHandCast - ThisRaceHandCast) / TotalHandCast) # Hand Cast Dropoff
} else {
printf("%s,%d,%d,%2.2f%%\n",
i, # Race
votes[i], # Vote Total
ThisRaceHandCast, # Hand Cast
100*(TotalBallotsCast - votes[i]) / TotalBallotsCast) # Total Dropoff
}
}
' | sort -t, -nr -k2 | sed -e 's/@/ partisan/' | (echo "Race,Total,Hand Cast,Dropoff,HC Dropoff"; cat)
exit