-
-
Notifications
You must be signed in to change notification settings - Fork 228
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add sem_score metric for generation evaluation (#155)
* add all-mpnet-base-v2 embedding model as default * add all-mpnet-base-v2 to local model docs * add sem score metric and test code for it * add cast_metrics for processing metrics list or dict from yaml file. * add cast_metrics at evaluate for new List[Dict] input type * edit metrics type and add new metric sem_score to full.yaml * add documentation about sem_score * add api specification for new files --------- Co-authored-by: jeffrey <vkefhdl1@gmail.com>
- Loading branch information
Showing
15 changed files
with
194 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import numpy as np | ||
|
||
|
||
def calculate_cosine_similarity(a, b): | ||
dot_product = np.dot(a, b) | ||
norm_a = np.linalg.norm(a) | ||
norm_b = np.linalg.norm(b) | ||
similarity = dot_product / (norm_a * norm_b) | ||
return similarity |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from typing import Union, List, Dict, Tuple, Any | ||
|
||
from autorag import embedding_models | ||
|
||
|
||
def cast_metrics(metrics: Union[List[str], List[Dict]]) -> Tuple[List[str], List[Dict[str, Any]]]: | ||
""" | ||
Turn metrics to list of metric names and parameter list. | ||
:param metrics: List of string or dictionary. | ||
:return: The list of metric names and dictionary list of metric parameters. | ||
""" | ||
if not isinstance(metrics, list): | ||
raise ValueError("metrics must be a list of string or dictionary.") | ||
if isinstance(metrics[0], str): | ||
return metrics, [{} for _ in metrics] | ||
elif isinstance(metrics[0], dict): | ||
# pop 'metric_name' key from dictionary | ||
metric_names = list(map(lambda x: x.pop('metric_name'), metrics)) | ||
metric_params = [dict(map(lambda x, y: cast_embedding_model(x, y), metric.keys(), metric.values())) for metric | ||
in metrics] | ||
return metric_names, metric_params | ||
else: | ||
raise ValueError("metrics must be a list of string or dictionary.") | ||
|
||
|
||
def cast_embedding_model(key, value): | ||
if key == 'embedding_model': | ||
return key, embedding_models[value] | ||
else: | ||
return key, value |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from llama_index import OpenAIEmbedding | ||
|
||
from autorag.evaluate.util import cast_metrics | ||
|
||
|
||
def test_cast_metrics(): | ||
metric1 = ['bleu', 'meteor', 'rouge'] | ||
metric_names, metric_params = cast_metrics(metric1) | ||
assert metric_names == ['bleu', 'meteor', 'rouge'] | ||
assert metric_params == [{}, {}, {}] | ||
|
||
metric2 = [{'metric_name': 'bleu'}, {'metric_name': 'meteor'}, {'metric_name': 'rouge'}] | ||
metric_names, metric_params = cast_metrics(metric2) | ||
assert metric_names == ['bleu', 'meteor', 'rouge'] | ||
assert metric_params == [{}, {}, {}] | ||
|
||
metric3 = [{'metric_name': 'bleu'}, {'metric_name': 'sem_score', 'embedding_model': 'openai'}] | ||
metric_names, metric_params = cast_metrics(metric3) | ||
assert metric_names == ['bleu', 'sem_score'] | ||
assert metric_params == [{}, {'embedding_model': OpenAIEmbedding()}] | ||
|
||
metric4 = [{'metric_name': 'bleu', 'extra_param': 'extra'}, | ||
{'metric_name': 'sem_score', 'embedding_model': 'openai', 'extra_param': 'extra'}] | ||
metric_names, metric_params = cast_metrics(metric4) | ||
assert metric_names == ['bleu', 'sem_score'] | ||
assert metric_params == [{'extra_param': 'extra'}, {'embedding_model': OpenAIEmbedding(), 'extra_param': 'extra'}] |
Oops, something went wrong.