-
Notifications
You must be signed in to change notification settings - Fork 0
/
twitter_db.py
104 lines (89 loc) · 3.56 KB
/
twitter_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import credentials
import settings
import re
import tweepy
import mysql.connector
import pandas as pd
from textblob import TextBlob
# Pre-processing
def deEmojify(text):
'''
Strip all non-ASCII characters to remove emoji characters
'''
if text:
return text.encode('ascii', 'ignore').decode('ascii')
else:
return None
# Connecting with Database
mydb = mysql.connector.connect(
host="localhost",
user="root",
passwd="neha",
database="TwitterDB",
charset = 'utf8'
)
if mydb.is_connected():
'''
Check if this table exits. If not, then create a new one.
'''
mycursor = mydb.cursor()
mycursor.execute("""
SELECT COUNT(*)
FROM information_schema.tables
WHERE table_name = '{0}'
""".format(settings.TABLE_NAME))
if mycursor.fetchone()[0] != 1:
mycursor.execute("CREATE TABLE {} ({})".format(settings.TABLE_NAME, settings.TABLE_ATTRIBUTES))
mydb.commit()
mycursor.close()
# Streaming tweets. These tweets will be stored in our database
class MyStreamListener(tweepy.StreamListener):
def on_status(self, status):
# Extract information from tweets
if status.retweeted:
return True # The goal is to avoid retweets/Only original tweets
# Extract attributes from 'status' tweet object
id_str = status.id_str
created_at = status.created_at
text = deEmojify(status.text)
sentiment = TextBlob(text).sentiment
polarity = sentiment.polarity
subjectivity = sentiment.subjectivity
user_created_at = status.user.created_at
user_location = deEmojify(status.user.location)
user_description = deEmojify(status.user.description)
user_followers_count =status.user.followers_count
longitude = None
latitude = None
if status.coordinates:
longitude = status.coordinates['coordinates'][0]
latitude = status.coordinates['coordinates'][1]
retweet_count = status.retweet_count
favorite_count = status.favorite_count
print(status.text)
print("Long: {}, Lati: {}".format(longitude, latitude))
# Store all data in MySQL
if mydb.is_connected():
mycursor = mydb.cursor()
sql = "INSERT INTO {} (id_str, created_at, text, polarity, subjectivity, user_created_at, user_location, user_description, user_followers_count, longitude, latitude, retweet_count, favorite_count) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)".format(settings.TABLE_NAME)#
val = (id_str, created_at, text, polarity, subjectivity, user_created_at, user_location, \
user_description, user_followers_count, longitude, latitude, retweet_count, favorite_count)
mycursor.execute(sql, val)
mydb.commit()
mycursor.close()
def on_error(self, status_code):
'''
Since Twitter API has rate limits, stop srcraping data as it exceed to the thresold.
'''
if status_code == 420:
# return False to disconnect the stream
return False
# Connecting twitter application credentials to this program
auth = tweepy.OAuthHandler(credentials.API_KEY, credentials.API_SECRET_KEY)
auth.set_access_token(credentials.ACCESS_TOEKN, credentials.ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)
# Sreeaming data
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth = api.auth, listener = myStreamListener)
myStream.filter(languages=["en"], track = settings.TRACK_WORDS)
mydb.close()