forked from interstar/trustlist
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrustlist.py
164 lines (121 loc) · 5.31 KB
/
trustlist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# SETTINGS
import tweepy
import settings
import argparse
import useful
import netdb
import datetime
auth = tweepy.OAuthHandler(settings.CONSUMER_KEY, settings.CONSUMER_SECRET)
auth.set_access_token(settings.ACCESS_KEY, settings.ACCESS_SECRET)
api = tweepy.API(auth)
parser = argparse.ArgumentParser(description='Get seed and list information')
parser.add_argument('-s', '--seed', dest='seed_user', default=settings.seed_user)
parser.add_argument('-l', '--list', dest='list_name', default=settings.list_name)
parser.add_argument('-d', '--dot', dest='dot_file_name')
parser.add_argument('-w', action='store_true',default=False) # generates Phil's web format
parser.add_argument('-n', '--net', dest='net_file_name')
args = parser.parse_args()
# BUILD TRUSTNET
trust_list = []
new_list = []
dotfile = None
netfile = None
def get_list(seed_user, list_name) :
try :
users = api.list_members(seed_user,list_name)
# Fix for differences between Phil's and Eli's list_members function. (A tweepy issue?)
# Please remove when we've resolved this
if not (users[0].__class__.__name__ == "User") :
users = users[0]
# End of Fix
except Exception, e :
#print e,e.__class__
users = []
return users
def buildList(seed_user, list_name):
users = get_list(seed_user,list_name)
for user in users:
trust_list.append(user.screen_name.lower())
# crawl deeper
new_list = crawlDeeper(trust_list, list_name)
while len(new_list) > 0 : new_list = crawlDeeper(new_list, list_name)
# update database
if args.dot_file_name != None:
dotfile.write("}\n")
return trust_list
# CRAWL DEEPER (only call from buildList())
def crawlDeeper(list, list_name):
new_list[:] = []
for user in list:
print 'checking %s' % user
user = user.lower()
try:
candidates = get_list(user,list_name)
for candidate in candidates:
print '--checking candidate %s isn\'t already in trust list' % candidate.screen_name
#makeobservation('interstar','1mentat', 'tne-github', datetime.datetime.now())
try:
netdb.makeobservation(user, candidate.screen_name.lower(), list_name, datetime.datetime.now())
except:
print "Unexpected error:", sys.exc_info()[0]
print "netdb observation failed"
if args.dot_file_name != None:
dotfile.write(" \"{0}\" -> \"{1}\"\n".format(user, candidate.screen_name.lower()))
try:
trust_list.index(candidate.screen_name.lower())
except:
print '--adding user %s to trust list' % candidate.screen_name.lower()
trust_list.append(candidate.screen_name.lower())
new_list.append(candidate.screen_name.lower())
except:
continue
return new_list
# Phil's Alternative Crawler
# An alternative recursive crawler (not using build_list and crawl_deeper) that builds trust-lists into a SetDict (ie. dictionary of sets)
# One set is created for each layer of depth / distance from the root user
# The SetDict has a pp (pretty print) which can output data suitable for another program to format (eg. into a web-page)
# This also updates the .dot file
# This is an experiment, it's quite compact, and closer to the way I tend to write code these days.
# See if it's the style you'd like to use
visited = useful.SetDict()
def recurse(depth, user_name, list_name) :
""" The recursive step, crawls the tree and fills the "visited" SetDict.
Breadth-first search. (So that we place people as high as they deserve in the depth tree)"""
people = [p.screen_name.lower() for p in get_list(user_name,list_name)]
queue = []
for p in people :
if dotfile : dotfile.write(' "%s" -> "%s"\n' % (user_name,p))
netdb.makeobservation(user_name, p, list_name, datetime.datetime.now())
if not visited.contains(p) :
visited.insert(depth,p)
queue.append(p)
for p in queue :
recurse(depth+1,p,list_name)
def build(user,list_name) :
""" Call this to start the crawler"""
visited.insert(0,user)
recurse(1,user,list_name)
# End of Phil's alternative
if __name__ == '__main__' :
if args.dot_file_name != None:
dotfile = open(args.dot_file_name, 'w+')
if dotfile : dotfile.write("digraph G {\n")
if (not args.w) :
# Use James / Eli's original code
netdb.setupdb()
print buildList(args.seed_user, args.list_name)
if args.net_file_name != None:
graph = netdb.rendergraph(args.list_name)
netfile = open(args.net_file_name, 'w+')
netfile.write(graph)
else :
# my alternative (used in current web-based test)
netdb.setupdb()
visited = useful.SetDict() # a dictionary of sets. We're going to store one set for each "depth" (distance from the root)
build(args.seed_user,args.list_name)
if dotfile : dotfile.write("}\n")
if args.net_file_name != None:
graph = netdb.rendergraph(args.list_name)
netfile = open(args.net_file_name, 'w+')
netfile.write(graph)
visited.pp()