-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval.py
70 lines (53 loc) · 1.89 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import json
import nest_asyncio
import numpy as np
from trulens_eval import Tru, Feedback, TruLlama, OpenAI
from trulens_eval.feedback import Groundedness
nest_asyncio.apply()
def get_trulens_recorder(query_engine, id):
openai = OpenAI()
qa_relevance = (
Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
.on_input_output()
)
qs_relevance = (
Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance")
.on_input()
.on(TruLlama.select_source_nodes().node.text)
.aggregate(np.mean)
)
#grounded = Groundedness(groundedness_provider=openai, summarize_provider=openai)
grounded = Groundedness(groundedness_provider=openai)
groundedness = (
Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness")
.on(TruLlama.select_source_nodes().node.text)
.on_output()
.aggregate(grounded.grounded_statements_aggregator)
)
feedbacks = [qa_relevance, qs_relevance, groundedness]
tru_recorder = TruLlama(
query_engine,
app_id=id,
feedbacks=feedbacks
)
return tru_recorder
def read_evals():
evals = []
with open('evals/practicalai.json', 'r') as file:
data = json.load(file)
return data['evals']
def run_evals(evals, query_engine, id):
tru_recorder = get_trulens_recorder(query_engine, id)
for eval in evals:
question = eval['q']
with tru_recorder as recording:
response = query_engine.query(question)
print("==========")
print(f"Question: {question}")
print(f"Actual response: {response}")
print(f"Reference response: {eval['a']}")
def run(query_engine, reset_eval_db=False, id="<default>"):
evals = read_evals()
if reset_eval_db:
Tru().reset_database()
run_evals(evals, query_engine, id=id)