-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_main.py
135 lines (91 loc) · 3.67 KB
/
test_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# QUICKTEST: python -m pytest -o -s -k "optional_method" log_cli=true test_main.py
import os
import click
import shutil
import unittest
import pandas as pd
from click.testing import CliRunner
from main import (extract,
utils)
class ExtractTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.runner = CliRunner()
def setUp(self):
self.data = "views/data.csv"
self.newdata = "views/newdata.csv"
shutil.copyfile("test_files/MOCK_DATA_SENTIMENT.csv", self.data)
# def tearDown(self):
# os.remove("views/data.csv")
# try:
# os.remove("views/newdata.csv")
# except Exception as e:
# pass
def test_kwic(self):
result = self.runner.invoke(extract, ['data',
'newdata',
'kwic',
'test_files/test_keywords.txt',
'text'])
assert result.exit_code == 0
d = pd.read_csv(self.newdata)
assert all([colname in d.columns.tolist() for colname in [
'sent_ranges', 'context', 'keyword', 'parent_id']])
assert len(d) == 31
def test_ctfidf(self):
result = self.runner.invoke(extract, ['data',
'newdata',
'ctfidf',
'topic',
'text'])
d = pd.read_csv(self.newdata)
assert result.exit_code == 0
assert all([colname in d.columns.tolist()
for colname in ['group_label', 'word', 'rank', 'tfidf']])
assert len(d) == 1620
mask = (d["word"] == "bad") & (d["group_label"] == '"booz allen"')
assert float(d.loc[mask, ["tfidf"]]["tfidf"]) == 0.0878212304837641
def test_similarity(self):
result = self.runner.invoke(extract, ['data',
'newdata',
'similarity',
'--lang',
'english',
'topic',
'text'])
d = pd.read_csv(self.newdata)
assert result.exit_code == 0
assert len(d) == 81
assert d.loc[d["Unnamed: 0"] == "g2",
'"booz allen"'].item() == 0.0009484395652787
class UtilsTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.runner = CliRunner()
def setUp(self):
self.data = "views/data.csv"
shutil.copyfile("test_files/MOCK_DATA_SENTIMENT.csv", self.data)
# def tearDown(self):
# os.remove("views/data.csv")
# try:
# os.remove("views/newdata.csv")
# except Exception as e:
# pass
def test_sentiment(self):
result = self.runner.invoke(utils, ['text',
'data',
'sentiment'])
print(result.output)
assert result.exit_code == 0
d = pd.read_csv(self.data)
assert "sentiment_score" in d.columns.tolist()
assert d["sentiment_score"].between(-1, 1).all()
def test_matchcounter(self):
pass
# def test_segment(self):
# result = self.runner.invoke(segment, ['someview', 'world', 'no2'])
# print(result.output)
# assert result.exit_code == 0
# if __name__ == "__main__":
# test_kwic()
# test_segment()