Skip to content

Commit

Permalink
Tests for llm embed-multi --files, refs #215
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Sep 3, 2023
1 parent 889374e commit afb6b5d
Showing 1 changed file with 46 additions and 1 deletion.
47 changes: 46 additions & 1 deletion tests/test_embed_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from llm.cli import cli
from llm import Collection
import json
import pathlib
import pytest
import sqlite_utils
from unittest.mock import ANY
Expand Down Expand Up @@ -268,7 +269,7 @@ def test_embed_multi_file_input(tmpdir, use_stdin, prefix, filename, content):

@pytest.mark.parametrize("use_other_db", (True, False))
@pytest.mark.parametrize("prefix", (None, "prefix"))
def test_sql(tmpdir, use_other_db, prefix):
def test_embed_multi_sql(tmpdir, use_other_db, prefix):
db_path = str(tmpdir / "embeddings.db")
db = sqlite_utils.Database(db_path)
extra_args = []
Expand Down Expand Up @@ -313,6 +314,50 @@ def test_sql(tmpdir, use_other_db, prefix):
]


def test_embed_multi_files(tmpdir):
db_path = str(tmpdir / "files.db")
files = tmpdir / "files"
for filename, content in (
("file1.txt", "hello world"),
("file2.txt", "goodbye world"),
("nested/one.txt", "one"),
("nested/two.txt", "two"),
("nested/more/three.txt", "three"),
("nested/more/ignored.ini", "Does not match glob"),
):
path = pathlib.Path(files / filename)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content, "utf-8")

runner = CliRunner()
result = runner.invoke(
cli,
[
"embed-multi",
"files",
"-d",
db_path,
"--files",
str(files),
"**/*.txt",
"-m",
"embed-demo",
"--store",
],
)
assert result.exit_code == 0
embeddings_db = sqlite_utils.Database(db_path)
assert embeddings_db["embeddings"].count == 5
rows = list(embeddings_db.query("select id, content from embeddings"))
assert rows == [
{"id": "file2.txt", "content": "goodbye world"},
{"id": "file1.txt", "content": "hello world"},
{"id": "nested/two.txt", "content": "two"},
{"id": "nested/one.txt", "content": "one"},
{"id": "nested/more/three.txt", "content": "three"},
]


def test_default_embedding_model():
runner = CliRunner()
result = runner.invoke(cli, ["embed-models", "default"])
Expand Down

0 comments on commit afb6b5d

Please sign in to comment.