-
Notifications
You must be signed in to change notification settings - Fork 0
/
vminstancescript.py
101 lines (84 loc) · 3.37 KB
/
vminstancescript.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# coding: utf-8
import datetime
import json
import time
import tweepy
from google.cloud import pubsub_v1
from tweepy.streaming import StreamListener
api_key = "HEREAPIKEY"
api_key_secret = "HEREAPISECRETKEY"
access_token = "HEREACCESSTOKEN"
access_token_secret = "HERESECRETACCESSTOKEN"
# Config
publisher = pubsub_v1.PublisherClient()
topic_path = publisher.topic_path('projectid','pubname')
auth = tweepy.OAuthHandler(api_key, api_key_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=False)
# Define the list of terms to listen to
lst_hashtags = ["#eth", "#ethereum"]
# Method to push messages to pubsub
def write_to_pubsub(data):
try:
if data["lang"] == "en":
publisher.publish(topic_path, data=json.dumps({
"text": data["text"],
"user_id": data["user_id"],
"id": data["id"],
"posted_at": datetime.datetime.fromtimestamp(data["created_at"]).strftime('%Y-%m-%d %H:%M:%S')
}).encode("utf-8"), tweet_id=str(data["id"]).encode("utf-8"))
except Exception as e:
raise
# Method to format a tweet from tweepy
def reformat_tweet(tweet):
x = tweet
processed_doc = {
"id": x["id"],
"lang": x["lang"],
"retweeted_id": x["retweeted_status"]["id"] if "retweeted_status" in x else None,
"favorite_count": x["favorite_count"] if "favorite_count" in x else 0,
"retweet_count": x["retweet_count"] if "retweet_count" in x else 0,
"coordinates_latitude": x["coordinates"]["coordinates"][0] if x["coordinates"] else 0,
"coordinates_longitude": x["coordinates"]["coordinates"][0] if x["coordinates"] else 0,
"place": x["place"]["country_code"] if x["place"] else None,
"user_id": x["user"]["id"],
"created_at": time.mktime(time.strptime(x["created_at"], "%a %b %d %H:%M:%S +0000 %Y"))
}
if x["entities"]["hashtags"]:
processed_doc["hashtags"] = [{"text": y["text"], "startindex": y["indices"][0]} for y in
x["entities"]["hashtags"]]
else:
processed_doc["hashtags"] = []
if x["entities"]["user_mentions"]:
processed_doc["usermentions"] = [{"screen_name": y["screen_name"], "startindex": y["indices"][0]} for y in
x["entities"]["user_mentions"]]
else:
processed_doc["usermentions"] = []
if "extended_tweet" in x:
processed_doc["text"] = x["extended_tweet"]["full_text"]
elif "full_text" in x:
processed_doc["text"] = x["full_text"]
else:
processed_doc["text"] = x["text"]
return processed_doc
# Custom listener class
class StdOutListener(StreamListener):
""" A listener handles tweets that are received from the stream.
This is a basic listener that just pushes tweets to pubsub
"""
def __init__(self):
super(StdOutListener, self).__init__()
self._counter = 0
def on_status(self, data):
write_to_pubsub(reformat_tweet(data._json))
print("sent tweet")
self._counter += 1
return True
def on_error(self, status):
if status == 420:
print("rate limit active")
return False
# Start listening
l = StdOutListener()
stream = tweepy.Stream(auth, l, tweet_mode='extended')
stream.filter(track=lst_hashtags)