llm logs now defaults to text output, use --json for JSON, use -c X f…

…or specific conversation Refs #160
simonw · Aug 17, 2023 · 113df5d · 113df5d
1 parent 8bf27b3
commit 113df5d
Show file tree

Hide file tree

Showing 3 changed files with 144 additions and 28 deletions.
diff --git a/docs/help.md b/docs/help.md
@@ -183,12 +183,15 @@ Usage: llm logs list [OPTIONS]
   Show recent logged prompts and their responses
 
 Options:
-  -n, --count INTEGER  Number of entries to show - 0 for all
-  -p, --path FILE      Path to log database
-  -m, --model TEXT     Filter by model or model alias
-  -q, --query TEXT     Search for logs matching this string
-  -t, --truncate       Truncate long strings in output
-  --help               Show this message and exit.
+  -n, --count INTEGER      Number of entries to show - defaults to 3, use 0 for
+                           all
+  -p, --path FILE          Path to log database
+  -m, --model TEXT         Filter by model or model alias
+  -q, --query TEXT         Search for logs matching this string
+  -t, --truncate           Truncate long strings in output
+  -c, --conversation TEXT  Show logs for this conversation ID
+  --json                   Output logs as JSON
+  --help                   Show this message and exit.
 ```
 ### llm models --help
 ```

diff --git a/llm/cli.py b/llm/cli.py
@@ -391,7 +391,7 @@ def logs_turn_off():
 {columns}
 from
     responses
-left join conversations on responses.conversation_id = conversations.id{where}
+left join conversations on responses.conversation_id = conversations.id{extra_where}
 order by responses.id desc{limit}
 """
 LOGS_SQL_SEARCH = """
@@ -410,8 +410,9 @@ def logs_turn_off():
 @click.option(
     "-n",
     "--count",
-    default=3,
-    help="Number of entries to show - 0 for all",
+    type=int,
+    default=None,
+    help="Number of entries to show - defaults to 3, use 0 for all",
 )
 @click.option(
     "-p",
@@ -422,14 +423,32 @@ def logs_turn_off():
 @click.option("-m", "--model", help="Filter by model or model alias")
 @click.option("-q", "--query", help="Search for logs matching this string")
 @click.option("-t", "--truncate", is_flag=True, help="Truncate long strings in output")
-def logs_list(count, path, model, query, truncate):
+@click.option(
+    "-c",
+    "--conversation",
+    help="Show logs for this conversation ID",
+)
+@click.option(
+    "json_output",
+    "--json",
+    is_flag=True,
+    help="Output logs as JSON",
+)
+def logs_list(count, path, model, query, truncate, conversation, json_output):
     "Show recent logged prompts and their responses"
     path = pathlib.Path(path or logs_db_path())
     if not path.exists():
         raise click.ClickException("No log database found at {}".format(path))
     db = sqlite_utils.Database(path)
     migrate(db)
 
+    # For --conversation set limit 0, if not explicitly set
+    if count is None:
+        if conversation:
+            count = 0
+        else:
+            count = 3
+
     model_id = None
     if model:
         # Resolve alias, if any
@@ -440,21 +459,38 @@ def logs_list(count, path, model, query, truncate):
             model_id = model
 
     sql = LOGS_SQL
-    format_kwargs = {
-        "limit": " limit {}".format(count) if count else "",
-        "columns": LOGS_COLUMNS,
-    }
     if query:
         sql = LOGS_SQL_SEARCH
-        format_kwargs["extra_where"] = (
-            " and responses.model = :model" if model_id else ""
-        )
-    else:
-        format_kwargs["where"] = " where responses.model = :model" if model_id else ""
 
+    limit = ""
+    if count is not None and count > 0:
+        limit = " limit {}".format(count)
+
+    sql_format = {
+        "limit": limit,
+        "columns": LOGS_COLUMNS,
+        "extra_where": "",
+    }
+    where_bits = []
+    if model_id:
+        where_bits.append("responses.model = :model")
+    if conversation:
+        where_bits.append("responses.conversation_id = :conversation")
+    if where_bits:
+        sql_format["extra_where"] = " where " + " and ".join(where_bits)
+
+    final_sql = sql.format(**sql_format)
     rows = list(
-        db.query(sql.format(**format_kwargs), {"model": model_id, "query": query})
+        db.query(
+            final_sql,
+            {"model": model_id, "query": query, "conversation": conversation},
+        )
     )
+    # Reverse the order - we do this because we 'order by id desc limit 3' to get the
+    # 3 most recent results, but we still want to display them in chronological order
+    # ... except for searches where we don't do this
+    if not query:
+        rows.reverse()
     for row in rows:
         if truncate:
             row["prompt"] = _truncate_string(row["prompt"])
@@ -467,7 +503,37 @@ def logs_list(count, path, model, query, truncate):
                     del row[key]
                 else:
                     row[key] = json.loads(row[key])
-    click.echo(json.dumps(list(rows), indent=2))
+
+    # Output as JSON if request
+    if json_output:
+        click.echo(json.dumps(list(rows), indent=2))
+    else:
+        # Output neatly formatted human-readable logs
+        current_system = None
+        should_show_conversation = True
+        for row in rows:
+            click.echo(
+                "{}{}{}\n".format(
+                    row["datetime_utc"].split(".")[0],
+                    "    {}".format(row["model"]) if should_show_conversation else "",
+                    "    conversation: {}".format(row["conversation_id"])
+                    if should_show_conversation
+                    else "",
+                )
+            )
+            # In conversation log mode only show it for the first one
+            if conversation:
+                should_show_conversation = False
+            click.echo("  Prompt:\n{}".format(textwrap.indent(row["prompt"], "    ")))
+            if row["system"] != current_system:
+                if row["system"] is not None:
+                    click.echo(
+                        "\n  System:\n{}".format(textwrap.indent(row["system"], "    "))
+                    )
+                current_system = row["system"]
+            click.echo(
+                "\n  Response:\n{}\n".format(textwrap.indent(row["response"], "    "))
+            )
 
 
 @cli.group()

diff --git a/tests/test_llm.py b/tests/test_llm.py
@@ -1,9 +1,11 @@
 from click.testing import CliRunner
+import datetime
 from llm.cli import cli
 from llm.migrations import migrate
 import json
 import os
 import pytest
+import re
 import sqlite_utils
 from ulid import ULID
 from unittest import mock
@@ -17,24 +19,65 @@ def test_version():
         assert result.output.startswith("cli, version ")
 
 
-@pytest.mark.parametrize("n", (None, 0, 2))
-def test_logs(n, user_path):
-    "Test that logs command correctly returns requested -n records"
+@pytest.fixture
+def log_path(user_path):
     log_path = str(user_path / "logs.db")
     db = sqlite_utils.Database(log_path)
     migrate(db)
+    start = datetime.datetime.utcnow()
     db["responses"].insert_all(
         {
             "id": str(ULID()).lower(),
             "system": "system",
             "prompt": "prompt",
             "response": "response",
             "model": "davinci",
+            "datetime_utc": (start + datetime.timedelta(seconds=i)).isoformat(),
+            "conversation_id": "abc123",
         }
         for i in range(100)
     )
+    return log_path
+
+
+datetime_re = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}")
+
+
+def test_logs_text(log_path):
     runner = CliRunner()
     args = ["logs", "-p", str(log_path)]
+    result = runner.invoke(cli, args, catch_exceptions=False)
+    assert result.exit_code == 0
+    output = result.output
+    # Replace 2023-08-17T20:53:58 with YYYY-MM-DDTHH:MM:SS
+    output = datetime_re.sub("YYYY-MM-DDTHH:MM:SS", output)
+
+    assert output == (
+        "YYYY-MM-DDTHH:MM:SS    davinci    conversation: abc123\n\n"
+        "  Prompt:\n"
+        "    prompt\n\n"
+        "  System:\n"
+        "    system\n\n"
+        "  Response:\n"
+        "    response\n\n"
+        "YYYY-MM-DDTHH:MM:SS    davinci    conversation: abc123\n\n"
+        "  Prompt:\n"
+        "    prompt\n\n"
+        "  Response:\n"
+        "    response\n\n"
+        "YYYY-MM-DDTHH:MM:SS    davinci    conversation: abc123\n\n"
+        "  Prompt:\n"
+        "    prompt\n\n"
+        "  Response:\n"
+        "    response\n\n"
+    )
+
+
+@pytest.mark.parametrize("n", (None, 0, 2))
+def test_logs_json(n, log_path):
+    "Test that logs command correctly returns requested -n records"
+    runner = CliRunner()
+    args = ["logs", "-p", str(log_path), "--json"]
     if n is not None:
         args.extend(["-n", str(n)])
     result = runner.invoke(cli, args, catch_exceptions=False)
@@ -79,7 +122,7 @@ def test_logs_filtered(user_path, model):
         for i in range(100)
     )
     runner = CliRunner()
-    result = runner.invoke(cli, ["logs", "list", "-m", model])
+    result = runner.invoke(cli, ["logs", "list", "-m", model, "--json"])
     assert result.exit_code == 0
     records = json.loads(result.output.strip())
     assert all(record["model"] == model for record in records)
@@ -88,7 +131,9 @@ def test_logs_filtered(user_path, model):
 @pytest.mark.parametrize(
     "query,expected",
     (
-        ("", ["doc3", "doc2", "doc1"]),
+        # With no search term order should be by datetime
+        ("", ["doc1", "doc2", "doc3"]),
+        # With a search it's order by rank instead
         ("llama", ["doc1", "doc3"]),
         ("alpaca", ["doc2"]),
     ),
@@ -113,7 +158,7 @@ def _insert(id, text):
     _insert("doc2", "alpaca")
     _insert("doc3", "llama llama")
     runner = CliRunner()
-    result = runner.invoke(cli, ["logs", "list", "-q", query])
+    result = runner.invoke(cli, ["logs", "list", "-q", query, "--json"])
     assert result.exit_code == 0
     records = json.loads(result.output.strip())
     assert [record["id"] for record in records] == expected
@@ -195,7 +240,9 @@ def test_llm_default_prompt(
     }
 
     # Test "llm logs"
-    log_result = runner.invoke(cli, ["logs", "-n", "1"], catch_exceptions=False)
+    log_result = runner.invoke(
+        cli, ["logs", "-n", "1", "--json"], catch_exceptions=False
+    )
     log_json = json.loads(log_result.output)
 
     # Should have logged correctly: