-
Notifications
You must be signed in to change notification settings - Fork 0
/
old_tweets.py
75 lines (63 loc) · 2.56 KB
/
old_tweets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import pandas as pd
import numpy as np
import seaborn as sns
import GetOldTweets3 as got
def get_tweets(username, max_tweets):
# specifying tweet search criteria
tweetCriteria = got.manager.TweetCriteria().setUsername(username).setMaxTweets(max_tweets)
# scraping tweets based on criteria
tweet = got.manager.TweetManager.getTweets(tweetCriteria)
# creating list of tweets with the tweet attributes
# specified in the list comprehension
text_tweets = [[tw.id,tw.username,
tw.text,
tw.date,
tw.retweets,
tw.favorites,
tw.mentions,
tw.hashtags,tw.geo] for tw in tweet]
# creating dataframe, assigning column names to list of
# tweets corresponding to tweet attributes
user_tweets = pd.DataFrame(text_tweets,
columns = ['id','user', 'text','date','favorites', 'retweets', 'mentions', 'hashTags','geo'])
return user_tweets
#Username of the user and max_tweets<1 so that all possible tweets are retrieved
df=get_tweets("@elonmusk", 0)
print(df.head())
print(len(df))
df.to_csv("/home/apurva/Documents/code/reddit/elon.csv", encoding = "utf-8",index = False)
def get_search(query,max_tweets,since):
tweetCriteria = got.manager.TweetCriteria().setQuerySearch(query).setSince(since).setMaxTweets(max_tweets)
#"2015-05-30"
tweet = got.manager.TweetManager.getTweets(tweetCriteria)
#max_tweets<1, all possible tweets will be generated
text_tweets = [[tw.id,tw.username,
tw.text,
tw.date,
tw.retweets,
tw.favorites,
tw.mentions,
tw.hashtags,tw.geo] for tw in tweet]
#print(text_tweets)
query_tweets = pd.DataFrame(text_tweets,
columns = ["id",'user', 'text','date', 'favorites', 'retweets', 'mentions', 'hashTags','geo'])
return query_tweets
'''
dates_list=[ "2019/12/01", "2020/01/01", "2020/02/01", "2020/03/01", "2020/04/01", "2020/05/01"]
counter=0
for dat in dates_list:
if(counter>0):
df_old=df.copy()
df=get_search("covid-19", 0, dat)
print("df_old",len(df_old))
print("df",len(df))
df = df_old.merge(df, on=list(df_old), how='outer')
df.drop_duplicates(subset=['id'], inplace=True, keep='last')
print("joined",len(df))
else:
df=get_search("covid-19", 0, dat)
counter+=1
print(df.head())
print(len(df))
df.to_csv("/home/apurva/Documents/code/reddit/covid_dec.csv", encoding = "utf-8",index = False)
'''