Skip to content

Commit

Permalink
Rename EmbeddingDistance to EmbeddingSimilarity (#41)
Browse files Browse the repository at this point in the history
Forgot to do this before landing 🤦
  • Loading branch information
ankrgyl authored Dec 15, 2023
1 parent 8a00ce6 commit 31c0ee2
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 13 deletions.
6 changes: 3 additions & 3 deletions js/embeddings.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { EmbeddingDistance } from "./string.js";
import { EmbeddingSimilarity } from "./string.js";

const SYNONYMS = [
{
Expand All @@ -25,7 +25,7 @@ test("Embeddings Test", async () => {
const prefix = "resource type: ";
for (const { word, synonyms } of SYNONYMS) {
for (const synonym of synonyms) {
const result = await EmbeddingDistance({
const result = await EmbeddingSimilarity({
prefix,
output: word,
expected: synonym,
Expand All @@ -42,7 +42,7 @@ test("Embeddings Test", async () => {

const word1 = UNRELATED[i];
const word2 = UNRELATED[j];
const result = await EmbeddingDistance({
const result = await EmbeddingSimilarity({
prefix,
output: word1,
expected: word2,
Expand Down
14 changes: 12 additions & 2 deletions js/string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,17 @@ export const LevenshteinScorer: Scorer<string, {}> = (args) => {
};
};

export const EmbeddingDistance: Scorer<
/**
* A scorer that uses cosine similarity to compare two strings.
*
* @param args
* @param args.prefix A prefix to prepend to the prompt. This is useful for specifying the domain of the inputs.
* @param args.model The model to use for the embedding distance. Defaults to "text-embedding-ada-002".
* @param args.expectedMin The minimum expected score. Defaults to 0.7. Values below this will be scored as 0, and
* values between this and 1 will be scaled linearly.
* @returns A score between 0 and 1, where 1 is a perfect match.
*/
export const EmbeddingSimilarity: Scorer<
string,
{
prefix?: string;
Expand All @@ -37,7 +47,7 @@ export const EmbeddingDistance: Scorer<
} & OpenAIAuth
> = async (args) => {
if (args.expected === undefined) {
throw new Error("EmbeddingDistance requires an expected value");
throw new Error("EmbeddingSimilarity requires an expected value");
}

const prefix = args.prefix ?? "";
Expand Down
12 changes: 6 additions & 6 deletions py/autoevals/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,16 @@ def _run_eval_sync(self, output, expected=None, **kwargs):
LevenshteinScorer = Levenshtein # backcompat


class EmbeddingDistance(Scorer):
class EmbeddingSimilarity(Scorer):
"""
A simple scorer that uses embedding distance to compare two strings.
A simple scorer that uses cosine similarity to compare two strings.
"""

MODEL = "text-embedding-ada-002"

def __init__(self, prefix="", model=MODEL, expected_min=0.7, api_key=None, base_url=None):
"""
Create a new EmbeddingDistance scorer.
Create a new EmbeddingSimilarity scorer.
:param prefix: A prefix to prepend to the prompt. This is useful for specifying the domain of the inputs.
:param model: The model to use for the embedding distance. Defaults to "text-embedding-ada-002".
Expand All @@ -53,7 +53,7 @@ def __init__(self, prefix="", model=MODEL, expected_min=0.7, api_key=None, base_

async def _run_eval_async(self, output, expected=None, **kwargs):
if expected is None:
raise ValueError("EmbeddingDistance requires an expected value")
raise ValueError("EmbeddingSimilarity requires an expected value")

output_embedding_p = arun_cached_request(input=f"{self.prefix}{output}", **self.extra_args)
expected_embedding_p = arun_cached_request(input=f"{self.prefix}{expected}", **self.extra_args)
Expand All @@ -69,7 +69,7 @@ async def _run_eval_async(self, output, expected=None, **kwargs):

def _run_eval_sync(self, output, expected=None, **kwargs):
if expected is None:
raise ValueError("EmbeddingDistance requires an expected value")
raise ValueError("EmbeddingSimilarity requires an expected value")

output_result = run_cached_request("embed", input=f"{self.prefix}{output}", **self.extra_args)
expected_result = run_cached_request("embed", input=f"{self.prefix}{expected}", **self.extra_args)
Expand Down Expand Up @@ -104,4 +104,4 @@ def cosine_similarity(list1, list2):
return min(dot_product / (magnitude_list1 * magnitude_list2), 1)


__all__ = ["LevenshteinScorer", "Levenshtein", "EmbeddingDistance"]
__all__ = ["LevenshteinScorer", "Levenshtein", "EmbeddingSimilarity"]
4 changes: 2 additions & 2 deletions py/autoevals/test_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from autoevals import EmbeddingDistance
from autoevals import EmbeddingSimilarity

SYNONYMS = [
("water", ["water", "H2O", "agua"]),
Expand All @@ -10,7 +10,7 @@


def test_embeddings():
evaluator = EmbeddingDistance(prefix="resource type: ")
evaluator = EmbeddingSimilarity(prefix="resource type: ")
for word, synonyms in SYNONYMS:
for synonym in synonyms:
result = evaluator(word, synonym)
Expand Down

0 comments on commit 31c0ee2

Please sign in to comment.