Skip to content

Commit

Permalink
Fix for llm similar tils2 -c python, refs #190
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Sep 1, 2023
1 parent 5e82929 commit 2463b28
Showing 1 changed file with 14 additions and 9 deletions.
23 changes: 14 additions & 9 deletions llm/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1042,6 +1042,7 @@ def similar(collection, id, input, content, number, database):
if not matches:
raise click.ClickException("No match found for id: {}".format(id))
embedding = matches[0]["embedding"]
comparison_vector = decode(embedding)
else:
# Embed the content that was provided instead
if not content:
Expand All @@ -1056,28 +1057,32 @@ def similar(collection, id, input, content, number, database):
model = get_embedding_model(model)
except UnknownModelError as ex:
raise click.ClickException(str(ex))
embedding = model.embed(content)

# Now we have as embedding for the comparison
comparison_vector = decode(embedding)
comparison_vector = model.embed(content)

def distance_score(other_encoded):
other_vector = decode(other_encoded)
return cosine_similarity(other_vector, comparison_vector)

db.register_function(distance_score)

where_bits = ["collection_id = ?"]
where_args = [collection_row["id"]]

if id:
where_bits.append("id != ?")
where_args.append(id)

results = db.query(
"""
select id, distance_score(embedding) as score
from embeddings
where collection_id = ?
and id != ?
order by score desc limit {}
where {where}
order by score desc limit {number}
""".format(
number
where=" and ".join(where_bits),
number=number,
),
[collection_row["id"], id],
where_args,
)

for result in results:
Expand Down

0 comments on commit 2463b28

Please sign in to comment.