-
Notifications
You must be signed in to change notification settings - Fork 5
/
twitter_get_user.py
72 lines (61 loc) · 2.36 KB
/
twitter_get_user.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
import sys
import time
import json
import math
from twitterclient import get_twitter_client
from tweepy import Cursor
MAX_FRIENDS = 15000
def usage():
print("Usage: ")
print("python {} <<username>".format(sys.argv[0]))
def paginate(items, n):
"""
Generate n-sized chunks from Items
"""
for i in range(0, len(items), n):
yield items[i:i+n]
if __name__ == '__main__':
if len(sys.argv) != 2:
usage()
sys.exit(1)
screen_name = sys.argv[1]
client = get_twitter_client()
dirname = "Twitter_Profiles/{}".format(screen_name)
max_pages = math.ceil(MAX_FRIENDS/ 5000)
try:
os.makedirs(dirname, mode=0o755, exist_ok=True)
except OSError:
print("Directory {} already exists".format(dirname))
except Exception as e:
print("Error while creating directory {}".format(dirname))
print(e)
sys.exit(1)
#get followers for a given user
fname = "Twitter_Profiles/{}/followers.jsonl".format(screen_name)
with open(fname, 'w') as f:
for followers in Cursor(client.followers_ids, screen_name=screen_name).pages(max_pages):
for chunk in paginate(followers, 100):
users = client.lookup_users(user_ids=chunk)
for user in users:
f.write(json.dumps(user._json)+"\n")
if len(followers) == 5000:
print("More results available. Sleeping for 60 seconds to avoid rate limit")
time.sleep(60)
#get friends for a given user
fname = "Twitter_Profiles/{}/friends.jsonl".format(screen_name)
with open(fname, 'w') as f:
for friends in Cursor(client.friends_ids, screen_name=screen_name).pages(max_pages):
for chunk in paginate(friends, 100):
users = client.lookup_users(user_ids=chunk)
for user in users:
f.write(json.dumps(user._json)+"\n")
if len(friends) == 5000:
print("More results available. Sleeping for 60 seconds to avoid rate limit")
time.sleep(60)
# get user's profile
fname = "Twitter_Profiles/{}/user_profile.json".format(screen_name)
with open(fname, 'w') as f:
profile = client.get_user(screen_name=screen_name)
f.write(json.dumps(profile._json, indent=4))
# https://www.geeksforgeeks.org/convert-json-to-csv-in-python/