forked from tbourrely/tweets2polarity
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tweets2polarity.py
executable file
·89 lines (65 loc) · 2.38 KB
/
tweets2polarity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python3
#======= IMPORTS =======
import argparse
import ndjson
import sys
import os
from polarityComputers.hashtagPolarity import HashtagsPolarity
#======= ARGS ========
parser = argparse.ArgumentParser()
parser.add_argument("model", help="Polarity Computing Class", type=int)
parser.add_argument("tweets", help="tweets as an ndjson file")
parser.add_argument("version", help="Polarisation file version", type=int)
parser.add_argument("--limit", help="iteration limit", type=int)
args = parser.parse_args()
#======= Functions ======
def loadTweetsFromNDJson(filepath):
f = open(filepath)
content = f.read()
return ndjson.loads(content)
def validateTweetStructure(tweet):
return 'hashtags' in tweet
def writeNDJsonToFile(filepath, content):
with open(filepath, 'w') as output:
ndjson.dump(content, output)
def defineOutputFilename(model):
basename = 'tweetsWithPolarity'
extension = 'json'
if (0 == model):
return "{}-{}.{}".format(basename, 'hashtags', extension)
def main():
if (0 == args.model):
polarisationFile = os.path.dirname(os.path.abspath(__file__)) + '/polarityCsv/PolarisationV{}.csv'.format(args.version)
polarityComputer = HashtagsPolarity(polarisationFile)
else:
print("Available models : \n")
print("0 - Hashtags")
sys.exit()
print("Loading tweets from {}".format(args.tweets))
tweets = loadTweetsFromNDJson(args.tweets)
tweetsWithPolarity = []
outputFile = defineOutputFilename(args.model)
limit = args.limit if args.limit else len(tweets)
i = 0
for tweet in tweets:
print('Processing tweet {}'.format(i))
if not validateTweetStructure(tweet):
print('-- invalid --')
continue
print('-- passing --')
scores = polarityComputer.getPolarityScores(tweet.get('hashtags'))
if (scores[0] == scores[1]):
tweet['polarity'] = 'neutral'
elif (scores[0] > scores[1]):
tweet['polarity'] = 'positive'
else:
tweet['polarity'] = 'negative'
tweetsWithPolarity.append(tweet)
i += 1
if i == limit:
break
print('Processed {}/{}'.format(len(tweetsWithPolarity), len(tweets)))
print('Writing processed tweets to {}'.format(outputFile))
writeNDJsonToFile(outputFile, tweetsWithPolarity)
#======= Main =======
main()