-
Notifications
You must be signed in to change notification settings - Fork 0
/
reddit.py
64 lines (55 loc) · 2.57 KB
/
reddit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import praw
import pickle
import time
import cred
class pull_reddit:
def updateRedditFile(self):
reddit = praw.Reddit(client_id=cred.client_id,
client_secret=cred.secret,
username=cred.username,
password=cred.password,
user_agent=cred.user_agent)
neu = reddit.subreddit("NEU")
try:
with open("data.pkl", "rb") as f:
data = pickle.load(f)
print("Loaded file correctly")
except (OSError, IOError) as e:
data = []
with open("data.pkl", "wb") as f:
pickle.dump(data, f)
print("Unable to load file. Created new one.")
user_based_collection = True
if user_based_collection:
thrbutal = reddit.redditor("throwawaybutalsome")
for comment in thrbutal.comments.new():
if comment.subreddit == "neu":
for line in comment.body.split("\n"):
if not line == "" and not line[0].isdigit(): # Check that the line is not empty and doesn't start with a number (there are lines which start with a number that are useless and mess up the data)
if not data.__contains__(line):
data.append(line)
print(line)
else:
print("========== Found Duplicate")
else:
print("========== In subreddit: " + comment.subreddit.title)
else:
start_time = time.time()
new_neu = neu.new()
for submission in new_neu:
#print(submission.title)
for comment in submission.comments:
if comment.author == "throwawaybutalsome":
for line in comment.body.split("\n"):
if not line == "":
if not data.__contains__(line):
data.append(line)
print("Added " + line)
else:
print("Found duplicate")
if time.time() - start_time > 60 * 10: # Run for 10 minutes, then break
break
time.sleep(2) # The API is limited to 30 every minute, so there is a 2 second wait between each call
with open('data.pkl', 'wb') as f:
pickle.dump(data, f)
print("Saved data")