-
Notifications
You must be signed in to change notification settings - Fork 0
/
graph_generation.py
85 lines (78 loc) · 2.73 KB
/
graph_generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import snap
import json
import simplejson
import pdb
REPUBLICAN = ''
DEMOCRAT = ''
INDEPENDENT = ''
THIRD = ''
def load_data(filename):
data = {}
with open(filename) as contributions:
data = json.load(contributions)
return data
def get_candidate_info():
data = {}
with open('congress_full.json') as congress:
data = json.load(congress)
people_data = {}
for candidate in data['objects']:
cand_info = candidate['person']
people_data[cand_info['id']] = [cand_info['firstname'], cand_info['lastname'], candidate['party']]
return people_data
def create_recipients_list(contributions, candidate_info):
sources = {}
nodes = []
for contribution in contributions:
pid = contribution["recipient_id"]
nodes.append({"id": pid,
"name": (candidate_info[pid][0] + ' ' + candidate_info[pid][1]).encode('ascii', 'ignore'),
"group": candidate_info[pid][2].encode('ascii','ignore')
})
for contributor in contribution["contributors"]:
cid = contributor["contributor_ext_id"]
if cid in sources:
if not pid in sources[cid]:
sources[cid].append(pid)
else:
sources[cid] = [pid]
return sources, nodes
def create_edges(sources, nodes):
edges = []
edge_checks = set()
node_ids = [n['id'] for n in nodes]
int_nodes_dict = {}
i = 0
for nid in node_ids:
int_nodes_dict[nid] = i
i += 1
with open('int_to_ids.json','w') as sf:
sf.write(simplejson.dumps(int_nodes_dict, indent=4))
l = len(sources)
print l
cnt = 0
for contributor, recipients in sources.iteritems():
print str(cnt)+ '/' + str(l)
cnt += 1
for i in xrange(len(recipients)):
for j in xrange(i + 1, len(recipients)):
src = int_nodes_dict[recipients[i]]
dst = int_nodes_dict[recipients[j]]
edge ={"source": src, "target": dst, "value": 1}
edge_tuple = frozenset((src, dst))
if not edge_tuple in edge_checks:
edges.append(edge)
edge_checks.add(edge_tuple)
return edges
def generate_graph():
contributions = load_data('contributions.json')
candidate_info = get_candidate_info()
sources, nodes = create_recipients_list(contributions, candidate_info)
print "done nodes"
edges = create_edges(sources, nodes)
print 'done edges'
with open('recipients_graph.json', 'w') as rf:
rf.write(simplejson.dumps({"nodes": nodes, "links": edges}, indent=4))
return nodes
if __name__ == "__main__":
n = generate_graph()