Skip to content

Commit

Permalink
Verbose output for more tasks (Stability-AI#92)
Browse files Browse the repository at this point in the history
* Add output to jaqket v2

* Add details to jsquad

* Add versbose output to xlsum

---------

Co-authored-by: Paul O'Leary McCann <polm@dampfkraft.com>
  • Loading branch information
polm-stability and polm committed Oct 11, 2023
1 parent 1547fc7 commit ec0e2a5
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 3 deletions.
12 changes: 11 additions & 1 deletion lm_eval/tasks/ja/jaqket_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def process_results(self, doc, results):
"id": doc["qid"],
"answers": doc["answers"],
}
return {
out = {
"exact_match": (
predictions,
references,
Expand All @@ -290,6 +290,16 @@ def process_results(self, doc, results):
), # The F-score of predicted tokens versus the gold answer
}

# add details. Because the metric computation isn't simple (probably?)
# always include it.
out["details"] = {
"question": doc["question"],
"response": continuation,
"gold": doc["answers"]
}

return out


def aggregation(self):
return {
Expand Down
12 changes: 11 additions & 1 deletion lm_eval/tasks/ja/jsquad.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def process_results(self, doc, results):
"id": doc["id"],
"answers": doc["answers"],
}
return {
out = {
"exact_match": (
predictions,
references,
Expand All @@ -162,6 +162,16 @@ def process_results(self, doc, results):
), # The F-score of predicted tokens versus the gold answer
}

# add verbose output
out["details"] = {
"question": doc["question"],
"response": continuation,
"gold": doc["answers"]
}

return out


def aggregation(self):
return {
"exact_match": partial(
Expand Down
11 changes: 10 additions & 1 deletion lm_eval/tasks/ja/xlsum_ja.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,21 @@ def construct_requests(self, doc, ctx):
def process_results(self, doc, results):
continuation = results[0]
ground_truth = doc["summary"]
return {
out = {
"rouge2": (
continuation,
ground_truth,
)
}
# add verbose output
out["details"] = {
# this isn't really a question, but keeping it this way for
# consistency
"question": doc["text"],
"response": continuation,
"gold": doc["summary"]
}
return out

def aggregation(self):
return {
Expand Down

0 comments on commit ec0e2a5

Please sign in to comment.