-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlobbyists.mk
71 lines (62 loc) · 2.14 KB
/
lobbyists.mk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Lobbyist expenditures and contributions
.PRECIOUS : data/intermediate/lobbyist_contributions.csv data/intermediate/lobbyist_expenditures.csv
data/processed/lobbyist_employer.csv : data/raw/lobbyists.csv data/intermediate/clients.csv
csvsql --query "SELECT \
ClientID, \
MemberID, \
Phone, \
LobbyistName, \
LobbyistAddress, \
Email, \
StartYear, \
EndYear \
FROM ( \
SELECT \
ClientID, \
MemberID, \
MAX(MemberVersionID) AS MemberVersionID, \
MAX(Year) AS Year, \
MIN(Year) AS StartYear, \
MAX(Year) AS EndYear \
FROM STDIN \
GROUP BY ClientID, MemberID \
) AS lobbyists \
JOIN STDIN \
USING (ClientID, MemberID, MemberVersionID, Year)" < $< | \
csvjoin -c ClientID - $(word 2, $^) > $@
data/processed/lobbyist_%.csv : data/intermediate/lobbyist_%.csv data/intermediate/filings.csv \
data/intermediate/lobbyists.csv
csvjoin --left -c Source,ReportFileName $< $(word 2, $^) | \
csvjoin --left -c MemberID - $(word 3, $^) > $@
data/intermediate/lobbyist_%.csv : filings
python -m scrapers.lobbyist.extract_transactions $* > $@
filings : data/intermediate/filings.csv
csvgrep -c ReportTypeCode -m "LNA" -i < $< | \
python -m scrapers.lobbyist.download_filings
data/intermediate/lobbyists.csv : data/raw/lobbyists.csv
csvsql --query "SELECT \
MemberID, \
Phone, \
LobbyistName, \
LobbyistAddress, \
Email \
FROM ( \
SELECT \
MemberID, \
MAX(MemberVersionID) AS MemberVersionID, \
MAX(Year) AS Year, \
MAX(ClientID) AS ClientID \
FROM STDIN \
GROUP BY MemberID \
) AS lobbyists \
JOIN STDIN \
USING (MemberID, MemberVersionID, Year, ClientID)" < $< > $@
data/intermediate/clients.csv : data/raw/clients.csv
csvsql --query "SELECT ClientID, ClientVersionID, MAX(ClientName) AS ClientName FROM STDIN GROUP BY ClientID" < $< > $@
data/intermediate/filings.csv : data/raw/lobbyists.csv
csvsql --query "SELECT DISTINCT MemberID, MemberVersionID FROM STDIN" < $< | \
python -m scrapers.lobbyist.scrape_filings > $@
data/raw/lobbyists.csv : data/intermediate/clients.csv
python -m scrapers.lobbyist.scrape_lobbyists < $< > $@
data/raw/clients.csv :
python -m scrapers.lobbyist.scrape_clients > $@