-
Notifications
You must be signed in to change notification settings - Fork 1
/
plot_hashtag_bar.py
84 lines (68 loc) · 2.13 KB
/
plot_hashtag_bar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ast import literal_eval as make_tuple
def plotHashtags(filename, savePng=True):
"""Created a stacked bar plot for the given hashtag data csv
:input: filename: path to file of csv
savePng: if user wants the plot saved as a png
:type: None
:return: None
:type: None
"""
# error checking
assert isinstance(filename, basestring)
try:
df = pd.read_csv(filename)
except:
print "error reading file " + filename
return
col_number = df.shape[1]
row_number = df.shape[0]
#print hasthag counts to console
# for i in df.ix[y]:
# print i
data = [[] for i in range(col_number)]
col_number2=str(col_number-2)
temp=[]
for row in df.iterrows():
index, data = row
temp.append(data.tolist())
xDates = df['date']
data=[]
data_numer=[]
data_hashtag=[]
# get data except dates
for i in df.loc[:,col_number2:'0']:
data.append( df[i].tolist())
# extract tuple data
for i in data:
x=[]
y=[]
for j in i:
x.append(make_tuple(j)[1])
y.append(make_tuple(j)[0])
data_numer.append(x)
data_hashtag.append(y)
df=pd.DataFrame(data_numer)
df=df.transpose()
# plot counts
ax = df.plot.bar(stacked=True,legend=False);
#label totals at top of bar
for i in ax.patches[len(ax.patches)-row_number:]:
plt.text(i.get_x(),i.get_y()+75, int(i.get_y()),fontsize=10)
plt.gcf().subplots_adjust(bottom=0.15)
plt.xticks(range(10),xDates)
plt.xticks(rotation=30)
plt.ylabel('number of hashtags')
plt.xlabel('date')
height = 2000
for i in temp:
plt.text(10,height,i)
height = height-200
# if set, save as png
if savePng:
plt.savefig(filename+'.png')
plt.show();
return
plotHashtags('twitter_analyzer/United_Airlines_Popular_Hashtags_2017-04-04_to_2017-04-13.csv', True)