-
Notifications
You must be signed in to change notification settings - Fork 13
/
app.py
110 lines (87 loc) · 2.49 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
__version__ = '0.0.1'
import os
import sys
from jina.flow import Flow
from jina import Document
def config():
"""
Configure environment variables.
"""
parallel = 1 if sys.argv[1] == 'index' else 1
shards = 1
os.environ['JINA_PARALLEL'] = str(parallel)
os.environ['JINA_SHARDS'] = str(shards)
os.environ['WORKDIR'] = './workspace'
os.makedirs(os.environ['WORKDIR'], exist_ok=True)
os.environ['JINA_PORT'] = os.environ.get('JINA_PORT', str(65481))
os.environ['JINA_DATA_PATH'] = 'dataset/test_answers.csv'
def index_generator():
"""
Define data as Document to be indexed.
"""
import csv
data_path = os.path.join(os.path.dirname(__file__), os.environ['JINA_DATA_PATH'])
# Get Document and ID
with open(data_path) as f:
reader = csv.reader(f, delimiter='\t')
for i, data in enumerate(reader):
d = Document()
# docid
d.tags['id'] = int(data[0])
# doc
d.text = data[1]
yield d
def index():
"""
Index data using Index Flow.
"""
f = Flow.load_config('flows/index.yml')
with f:
f.index(input_fn=index_generator, batch_size=16)
def print_resp(resp, question):
"""
Print response.
"""
for d in resp.search.docs:
print(f"🔮 Ranked list of answers to the question: {question} \n")
for idx, match in enumerate(d.matches):
score = match.score.value
if score < 0.0:
continue
answer = match.text.strip()
print(f'> {idx+1:>2d}. "{answer}"\n Score: ({score:.2f})')
def search():
"""
Search results using Query Flow.
"""
f = Flow.load_config('flows/query.yml')
with f:
while True:
text = input("Please type a question: ")
if not text:
break
def ppr(x):
print_resp(x, text)
f.search_lines(lines=[text, ], output_fn=ppr, top_k=50)
def dryrun():
"""
Dry run.
"""
f = Flow().load_config("flows/index.yml")
with f:
f.dry_run()
if __name__ == '__main__':
if len(sys.argv) < 2:
print('choose between "index/search/dryrun" mode')
exit(1)
if sys.argv[1] == 'index':
config()
index()
elif sys.argv[1] == 'search':
config()
search()
elif sys.argv[1] == "dryrun":
config()
dryrun()
else:
raise NotImplementedError(f'unsupported mode {sys.argv[1]}')