Skip to content

Commit

Permalink
bug: fix MRR and MAP calculations (#7841)
Browse files Browse the repository at this point in the history
* bug: fix MRR and MAP calculations
  • Loading branch information
Amnah199 authored Jun 25, 2024
1 parent c51f8ff commit fc011d7
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 32 deletions.
33 changes: 15 additions & 18 deletions haystack/components/evaluators/document_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class DocumentMAPEvaluator:
```
"""

# Refer to https://www.pinecone.io/learn/offline-evaluation/ for the algorithm.
@component.output_types(score=float, individual_scores=List[float])
def run(
self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]]
Expand All @@ -69,25 +70,21 @@ def run(
individual_scores = []

for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents):
score = 0.0
for ground_document in ground_truth:
if ground_document.content is None:
continue

average_precision = 0.0
relevant_documents = 0
average_precision = 0.0
average_precision_numerator = 0.0
relevant_documents = 0

for rank, retrieved_document in enumerate(retrieved):
if retrieved_document.content is None:
continue

if ground_document.content in retrieved_document.content:
relevant_documents += 1
average_precision += relevant_documents / (rank + 1)
if relevant_documents > 0:
score = average_precision / relevant_documents
individual_scores.append(score)
ground_truth_contents = [doc.content for doc in ground_truth if doc.content is not None]
for rank, retrieved_document in enumerate(retrieved):
if retrieved_document.content is None:
continue

score = sum(individual_scores) / len(retrieved_documents)
if retrieved_document.content in ground_truth_contents:
relevant_documents += 1
average_precision_numerator += relevant_documents / (rank + 1)
if relevant_documents > 0:
average_precision = average_precision_numerator / relevant_documents
individual_scores.append(average_precision)

score = sum(individual_scores) / len(ground_truth_documents)
return {"score": score, "individual_scores": individual_scores}
24 changes: 11 additions & 13 deletions haystack/components/evaluators/document_mrr.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class DocumentMRREvaluator:
```
"""

# Refer to https://www.pinecone.io/learn/offline-evaluation/ for the algorithm.
@component.output_types(score=float, individual_scores=List[float])
def run(
self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]]
Expand All @@ -67,20 +68,17 @@ def run(
individual_scores = []

for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents):
score = 0.0
for ground_document in ground_truth:
if ground_document.content is None:
continue

for rank, retrieved_document in enumerate(retrieved):
if retrieved_document.content is None:
continue
reciprocal_rank = 0.0

if ground_document.content in retrieved_document.content:
score = 1 / (rank + 1)
break
individual_scores.append(score)
ground_truth_contents = [doc.content for doc in ground_truth if doc.content is not None]
for rank, retrieved_document in enumerate(retrieved):
if retrieved_document.content is None:
continue
if retrieved_document.content in ground_truth_contents:
reciprocal_rank = 1 / (rank + 1)
break
individual_scores.append(reciprocal_rank)

score = sum(individual_scores) / len(retrieved_documents)
score = sum(individual_scores) / len(ground_truth_documents)

return {"score": score, "individual_scores": individual_scores}
4 changes: 4 additions & 0 deletions releasenotes/notes/fix-issue-7758-d35b687ca226a707.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
Fixed the calculation for MRR and MAP scores.
12 changes: 11 additions & 1 deletion test/components/evaluators/test_document_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,17 @@ def test_run_with_complex_data():
],
],
)
assert result == {"individual_scores": [1.0, 0.8333333333333333, 1.0, 0.5, 0.0, 1.0], "score": 0.7222222222222222}
assert result == {
"individual_scores": [
1.0,
pytest.approx(0.8333333333333333),
1.0,
pytest.approx(0.5833333333333333),
0.0,
pytest.approx(0.8055555555555555),
],
"score": pytest.approx(0.7037037037037037),
}


def test_run_with_different_lengths():
Expand Down

0 comments on commit fc011d7

Please sign in to comment.