From 6f021e8a1a3e075e705afa94351a6266f20214cc Mon Sep 17 00:00:00 2001 From: Will Chen Date: Tue, 10 Sep 2024 16:33:41 -0700 Subject: [PATCH] Create MVP AI console (#934) --- .gitignore | 3 + .gitmodules | 4 + ai/README.md | 18 ++ ai/src/ai/common/diff.py | 36 +++ ai/src/ai/common/entity_store.py | 49 ++++ ai/src/ai/common/example.py | 137 +++++++++++ ai/src/ai/common/executor.py | 128 +++++++++++ ai/src/ai/common/model.py | 17 ++ ai/src/ai/common/producer.py | 16 ++ ai/src/ai/common/prompt_context.py | 15 ++ ai/src/ai/common/prompt_fragment.py | 31 +++ ai/src/ai/console/__init__.py | 0 ai/src/ai/console/pages/__init__.py | 0 ai/src/ai/console/pages/add_edit_eval_page.py | 53 +++++ .../pages/add_edit_expected_examples_page.py | 56 +++++ .../pages/add_edit_golden_examples_page.py | 126 ++++++++++ .../ai/console/pages/add_edit_model_page.py | 25 ++ .../ai/console/pages/add_edit_page_helper.py | 165 ++++++++++++++ .../console/pages/add_edit_producer_page.py | 74 ++++++ .../pages/add_edit_prompt_context_page.py | 78 +++++++ .../pages/add_edit_prompt_fragment_page.py | 84 +++++++ .../pages/create_golden_dataset_page.py | 81 +++++++ ai/src/ai/console/pages/eval_item_page.py | 111 +++++++++ ai/src/ai/console/pages/eval_page.py | 100 ++++++++ ai/src/ai/console/pages/evals_page.py | 48 ++++ .../console/pages/expected_examples_page.py | 51 +++++ .../ai/console/pages/golden_examples_page.py | 61 +++++ ai/src/ai/console/pages/models_page.py | 44 ++++ ai/src/ai/console/pages/producers_page.py | 64 ++++++ .../ai/console/pages/prompt_contexts_page.py | 55 +++++ .../ai/console/pages/prompt_fragments_page.py | 57 +++++ ai/src/ai/console/scaffold.py | 190 ++++++++++++++++ ai/src/ai/offline_common/eval.py | 215 ++++++++++++++++++ ai/src/ai/offline_common/golden_dataset.py | 9 + ai/src/console.py | 68 ++++++ ai/src/migrate_goldens.py | 55 +++++ ai/src/service.py | 29 ++- 37 files changed, 2342 insertions(+), 11 deletions(-) create mode 100644 ai/src/ai/common/diff.py create mode 100644 ai/src/ai/common/entity_store.py create mode 100644 ai/src/ai/common/example.py create mode 100644 ai/src/ai/common/executor.py create mode 100644 ai/src/ai/common/model.py create mode 100644 ai/src/ai/common/producer.py create mode 100644 ai/src/ai/common/prompt_context.py create mode 100644 ai/src/ai/common/prompt_fragment.py create mode 100644 ai/src/ai/console/__init__.py create mode 100644 ai/src/ai/console/pages/__init__.py create mode 100644 ai/src/ai/console/pages/add_edit_eval_page.py create mode 100644 ai/src/ai/console/pages/add_edit_expected_examples_page.py create mode 100644 ai/src/ai/console/pages/add_edit_golden_examples_page.py create mode 100644 ai/src/ai/console/pages/add_edit_model_page.py create mode 100644 ai/src/ai/console/pages/add_edit_page_helper.py create mode 100644 ai/src/ai/console/pages/add_edit_producer_page.py create mode 100644 ai/src/ai/console/pages/add_edit_prompt_context_page.py create mode 100644 ai/src/ai/console/pages/add_edit_prompt_fragment_page.py create mode 100644 ai/src/ai/console/pages/create_golden_dataset_page.py create mode 100644 ai/src/ai/console/pages/eval_item_page.py create mode 100644 ai/src/ai/console/pages/eval_page.py create mode 100644 ai/src/ai/console/pages/evals_page.py create mode 100644 ai/src/ai/console/pages/expected_examples_page.py create mode 100644 ai/src/ai/console/pages/golden_examples_page.py create mode 100644 ai/src/ai/console/pages/models_page.py create mode 100644 ai/src/ai/console/pages/producers_page.py create mode 100644 ai/src/ai/console/pages/prompt_contexts_page.py create mode 100644 ai/src/ai/console/pages/prompt_fragments_page.py create mode 100644 ai/src/ai/console/scaffold.py create mode 100644 ai/src/ai/offline_common/eval.py create mode 100644 ai/src/ai/offline_common/golden_dataset.py create mode 100644 ai/src/console.py create mode 100644 ai/src/migrate_goldens.py diff --git a/.gitignore b/.gitignore index e6ff7466a..e4db60611 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ __pycache__ *.log +# This is a git submodule +/ai/data/ + # Do not save generated files /ai/ft/outputs/ /ai/outputs/ diff --git a/.gitmodules b/.gitmodules index 6317b4346..2a17e79e4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,7 @@ [submodule "third_party/angular_components"] path = third_party/angular_components url = https://github.com/angular/components.git + +[submodule "ai/data"] + path = ai/data + url = git@hf.co:datasets/wwwillchen/mesop-data diff --git a/ai/README.md b/ai/README.md index f9fc5a291..a9a61be6d 100644 --- a/ai/README.md +++ b/ai/README.md @@ -9,6 +9,24 @@ All the commands should be run from the `ai/` directory. - All entry-points are in `src/*.py` - this includes the AI service and scripts. - `src/common` contains code that's shared between offline scripts and the online service. +## AI Console + +**Setup**: + +```sh +git clone git@hf.co:datasets/wwwillchen/mesop-data data +``` + +**Running**: + +Inside `ai/src/`, run the following command: + +```sh +mesop console.py --port=32124 +``` + +> Note: you can run this on a separate port to avoid conflicting with the main Mesop development app. + ## Scripts These are scripts used to generate and process data for offline evaluation. diff --git a/ai/src/ai/common/diff.py b/ai/src/ai/common/diff.py new file mode 100644 index 000000000..4e82a5deb --- /dev/null +++ b/ai/src/ai/common/diff.py @@ -0,0 +1,36 @@ +import re +from typing import NamedTuple + +EDIT_HERE_MARKER = " # <--- EDIT HERE" + + +class ApplyPatchResult(NamedTuple): + has_error: bool + result: str + + +def apply_patch(original_code: str, patch: str) -> ApplyPatchResult: + # Extract the diff content + diff_pattern = r"<<<<<<< ORIGINAL(.*?)=======\n(.*?)>>>>>>> UPDATED" + matches = re.findall(diff_pattern, patch, re.DOTALL) + patched_code = original_code + if len(matches) == 0: + print("[WARN] No diff found:", patch) + return ApplyPatchResult( + True, + "[AI-001] Sorry! AI output was mis-formatted. Please try again.", + ) + for original, updated in matches: + original = original.strip().replace(EDIT_HERE_MARKER, "") + updated = updated.strip().replace(EDIT_HERE_MARKER, "") + + # Replace the original part with the updated part + new_patched_code = patched_code.replace(original, updated, 1) + if new_patched_code == patched_code: + return ApplyPatchResult( + True, + "[AI-002] Sorry! AI output could not be used. Please try again.", + ) + patched_code = new_patched_code + + return ApplyPatchResult(False, patched_code) diff --git a/ai/src/ai/common/entity_store.py b/ai/src/ai/common/entity_store.py new file mode 100644 index 000000000..b17f68bed --- /dev/null +++ b/ai/src/ai/common/entity_store.py @@ -0,0 +1,49 @@ +import os +from typing import Generic, TypeVar + +from pydantic import BaseModel + +T = TypeVar("T", bound=BaseModel) + + +def get_data_path(dirname: str) -> str: + return os.path.join( + os.path.dirname(__file__), "..", "..", "..", "data", dirname + ) + + +class EntityStore(Generic[T]): + def __init__(self, entity_type: type[T], *, dirname: str): + self.entity_type = entity_type + self.directory_path = get_data_path(dirname) + + def get(self, id: str) -> T: + file_path = os.path.join(self.directory_path, f"{id}.json") + with open(file_path) as f: + entity_json = f.read() + entity = self.entity_type.model_validate_json(entity_json) + return entity + + def get_all(self) -> list[T]: + entities: list[T] = [] + for filename in os.listdir(self.directory_path): + if filename.endswith(".json"): + file_path = os.path.join(self.directory_path, filename) + with open(file_path) as f: + entity_json = f.read() + entities.append(self.entity_type.model_validate_json(entity_json)) + entities.sort(key=lambda x: x.id, reverse=True) + return entities + + def save(self, entity: T, overwrite: bool = False): + id = entity.id # type: ignore + entity_path = os.path.join(self.directory_path, f"{id}.json") + if not overwrite and os.path.exists(entity_path): + raise ValueError( + f"{self.entity_type.__name__} with id {id} already exists" + ) + with open(entity_path, "w") as f: + f.write(entity.model_dump_json(indent=4)) + + def delete(self, entity_id: str): + os.remove(os.path.join(self.directory_path, f"{entity_id}.json")) diff --git a/ai/src/ai/common/example.py b/ai/src/ai/common/example.py new file mode 100644 index 000000000..ec91a7a0b --- /dev/null +++ b/ai/src/ai/common/example.py @@ -0,0 +1,137 @@ +""" +An example is a single input/output pair. + - Examples are used for fine-tuning a model (i.e. golden example) or running an eval (i.e. expected example). + - There are two types of examples: + - **Golden Example**: A golden example is an example that is used to create a golden dataset. + - **Expected Example**: An expected example is an example that is used to evaluate a producer. + Internally, once an expected example has been run through an eval, we create an **evaluated example**, but you don't need to create this manually in the UI. +""" + +import os +import shutil +from typing import Generic, Literal, TypeVar + +from pydantic import BaseModel + + +class ExampleInput(BaseModel): + prompt: str + input_code: str | None = None + line_number_target: int | None = None + + +class BaseExample(BaseModel): + id: str + input: ExampleInput + + +class ExampleOutput(BaseModel): + output_code: str | None = None + raw_output: str | None = None + output_type: Literal["full", "diff"] = "diff" + + +class ExpectedExample(BaseExample): + expect_executable: bool = True + expect_type_checkable: bool = True + + +class ExpectResult(BaseModel): + name: Literal["executable", "type_checkable", "patchable"] + score: int # 0 or 1 + message: str | None = None + + +class EvaluatedExampleOutput(BaseModel): + time_spent_secs: float + tokens: int + output: ExampleOutput + expect_results: list[ExpectResult] + + +class EvaluatedExample(BaseModel): + expected: ExpectedExample + outputs: list[EvaluatedExampleOutput] + + +class GoldenExample(BaseExample): + output: ExampleOutput + + +T = TypeVar("T", bound=BaseExample) + + +class ExampleStore(Generic[T]): + def __init__(self, entity_type: type[T], *, dirname: str): + self.entity_type = entity_type + self.directory_path = os.path.join( + os.path.dirname(__file__), "..", "..", "..", "data", dirname + ) + + def get(self, id: str) -> T: + dir_path = os.path.join(self.directory_path, id) + json_path = os.path.join(dir_path, "example_input.json") + with open(json_path) as f: + entity_json = f.read() + entity = self.entity_type.model_validate_json(entity_json) + input = entity.input + input_py_path = os.path.join(dir_path, "input.py") + if os.path.exists(input_py_path): + with open(input_py_path) as f: + input.input_code = f.read() + if isinstance(entity, GoldenExample): + output_py_path = os.path.join(dir_path, "output.py") + if os.path.exists(output_py_path): + with open(output_py_path) as f: + entity.output.output_code = f.read() + raw_output_path = os.path.join(dir_path, "raw_output.txt") + if os.path.exists(raw_output_path): + with open(raw_output_path) as f: + entity.output.raw_output = f.read() + return entity + + def get_all(self) -> list[T]: + entities: list[T] = [] + for filename in os.listdir(self.directory_path): + entities.append(self.get(filename)) + return entities + + def save(self, entity: T, overwrite: bool = False): + id = entity.id + dir_path = os.path.join(self.directory_path, id) + + if not overwrite: + if os.path.exists(dir_path): + raise ValueError( + f"{self.entity_type.__name__} with id {id} already exists" + ) + else: + os.mkdir(dir_path) + json_path = os.path.join(dir_path, "example_input.json") + input_code = entity.input.input_code + if input_code: + input_py_path = os.path.join(dir_path, "input.py") + with open(input_py_path, "w") as f: + f.write(input_code) + entity.input.input_code = None + + if isinstance(entity, GoldenExample): + output_py_path = os.path.join(dir_path, "output.py") + with open(output_py_path, "w") as f: + f.write(entity.output.output_code) + raw_output_path = os.path.join(dir_path, "raw_output.txt") + with open(raw_output_path, "w") as f: + f.write(entity.output.raw_output) + entity.output.output_code = None + entity.output.raw_output = None + with open(json_path, "w") as f: + f.write(entity.model_dump_json(indent=4)) + + def delete(self, entity_id: str): + shutil.rmtree(os.path.join(self.directory_path, entity_id)) + + +expected_example_store = ExampleStore( + ExpectedExample, dirname="expected_examples" +) +golden_example_store = ExampleStore(GoldenExample, dirname="golden_examples") diff --git a/ai/src/ai/common/executor.py b/ai/src/ai/common/executor.py new file mode 100644 index 000000000..5e905b326 --- /dev/null +++ b/ai/src/ai/common/executor.py @@ -0,0 +1,128 @@ +from os import getenv +from typing import Iterator + +from openai import OpenAI +from openai.types.chat import ( + ChatCompletionMessageParam, +) + +from ai.common.diff import EDIT_HERE_MARKER, ApplyPatchResult, apply_patch +from ai.common.entity_store import get_data_path +from ai.common.example import ExampleInput +from ai.common.model import model_store +from ai.common.producer import producer_store +from ai.common.prompt_context import prompt_context_store +from ai.common.prompt_fragment import PromptFragment, prompt_fragment_store + + +class ProviderExecutor: + def __init__(self, model_name: str, prompt_fragments: list[PromptFragment]): + self.model_name = model_name + + self.prompt_fragments = [ + PromptFragment( + id=pf.id, + role=pf.role, + chain_of_thought=pf.chain_of_thought, + content_value=get_content_value(pf), + content_path=None, + ) + for pf in prompt_fragments + ] + + def format_messages( + self, input: ExampleInput + ) -> list[ChatCompletionMessageParam]: + code = input.input_code or "" + # Add sentinel token based on line_number (1-indexed) + if input.line_number_target is not None: + code_lines = code.splitlines() + if 1 <= input.line_number_target <= len(code_lines): + code_lines[input.line_number_target - 1] += EDIT_HERE_MARKER + code = "\n".join(code_lines) + + return [ + { + "role": pf.role, + "content": pf.content_value.replace("", code).replace( # type: ignore + "", input.prompt + ), + } + for pf in self.prompt_fragments + ] + + def execute(self, input: ExampleInput) -> str: ... + + def execute_stream(self, input: ExampleInput) -> Iterator[str]: ... + + +class OpenaiExecutor(ProviderExecutor): + def __init__(self, model_name: str, prompt_fragments: list[PromptFragment]): + super().__init__(model_name, prompt_fragments) + self.client = OpenAI( + api_key=getenv("OPENAI_API_KEY"), + ) + + def execute(self, input: ExampleInput) -> str: + response = self.client.chat.completions.create( + model=self.model_name, + max_tokens=10_000, + messages=self.format_messages(input), + ) + return response.choices[0].message.content or "" + + def execute_stream(self, input: ExampleInput) -> Iterator[str]: + stream = self.client.chat.completions.create( + model=self.model_name, + max_tokens=10_000, + messages=self.format_messages(input), + stream=True, + ) + for chunk in stream: + content = chunk.choices[0].delta.content + yield content or "" + + +provider_executors: dict[str, type[ProviderExecutor]] = { + "openai": OpenaiExecutor, +} + + +class ProducerExecutor: + def __init__(self, producer_id: str): + self.producer = producer_store.get(producer_id) + + def get_provider_executor(self) -> ProviderExecutor: + prompt_context = prompt_context_store.get(self.producer.prompt_context_id) + prompt_fragments = [ + prompt_fragment_store.get(pfid) for pfid in prompt_context.fragment_ids + ] + model = model_store.get(self.producer.mesop_model_id) + provider_executor_type = provider_executors.get(model.provider) + if provider_executor_type is None: + raise ValueError(f"Provider {model.provider} not supported") + provider_executor = provider_executor_type(model.name, prompt_fragments) + return provider_executor + + def execute(self, input: ExampleInput): + return self.get_provider_executor().execute(input) + + def execute_stream(self, input: ExampleInput): + return self.get_provider_executor().execute_stream(input) + + def transform_output(self, input_code: str, output: str): + if self.producer.output_format == "diff": + return apply_patch(input_code, output) + elif self.producer.output_format == "full": + return ApplyPatchResult(True, output) + else: + raise ValueError(f"Unknown output format: {self.producer.output_format}") + + +def get_content_value(pf: PromptFragment) -> str | None: + if pf.content_value is not None: + return pf.content_value + if pf.content_path is not None: + with open(get_data_path(pf.content_path.replace("//", ""))) as f: + return f.read() + return None diff --git a/ai/src/ai/common/model.py b/ai/src/ai/common/model.py new file mode 100644 index 000000000..f9c877e30 --- /dev/null +++ b/ai/src/ai/common/model.py @@ -0,0 +1,17 @@ +from pydantic import BaseModel + +from ai.common.entity_store import EntityStore + + +class Model(BaseModel): + """ + Model represents an LLM. + Name should match the model name used by the provider for the API call. + """ + + id: str + name: str + provider: str + + +model_store = EntityStore(Model, dirname="models") diff --git a/ai/src/ai/common/producer.py b/ai/src/ai/common/producer.py new file mode 100644 index 000000000..d23921baa --- /dev/null +++ b/ai/src/ai/common/producer.py @@ -0,0 +1,16 @@ +from typing import Literal + +from pydantic import BaseModel + +from ai.common.entity_store import EntityStore + + +class Producer(BaseModel): + id: str + mesop_model_id: str # using model_id has a conflict with Pydantic + prompt_context_id: str + output_format: Literal["full", "diff"] + temperature: float = 0.8 + + +producer_store = EntityStore(Producer, dirname="producers") diff --git a/ai/src/ai/common/prompt_context.py b/ai/src/ai/common/prompt_context.py new file mode 100644 index 000000000..7c25592e4 --- /dev/null +++ b/ai/src/ai/common/prompt_context.py @@ -0,0 +1,15 @@ +from pydantic import BaseModel + +from ai.common.entity_store import EntityStore + + +class PromptContext(BaseModel): + """ + PromptContext represents the context of a prompt. + """ + + id: str + fragment_ids: list[str] + + +prompt_context_store = EntityStore(PromptContext, dirname="prompt_contexts") diff --git a/ai/src/ai/common/prompt_fragment.py b/ai/src/ai/common/prompt_fragment.py new file mode 100644 index 000000000..9cd148399 --- /dev/null +++ b/ai/src/ai/common/prompt_fragment.py @@ -0,0 +1,31 @@ +from typing import Literal + +from pydantic import BaseModel, model_validator + +from ai.common.entity_store import EntityStore + + +class PromptFragment(BaseModel): + id: str + content_value: str | None = None + content_path: str | None = None + role: Literal["user", "assistant", "system"] + chain_of_thought: bool = False + + @model_validator(mode="after") + def check_content_value_or_path(self): + if self.content_value == "": + self.content_value = None + if self.content_path == "": + self.content_path = None + + content_value = self.content_value + content_path = self.content_path + if content_value is not None and content_path is not None: + raise ValueError("Only one of content_value or content_path is allowed") + if content_value is None and content_path is None: + raise ValueError("Either content_value or content_path is required") + return self + + +prompt_fragment_store = EntityStore(PromptFragment, dirname="prompt_fragments") diff --git a/ai/src/ai/console/__init__.py b/ai/src/ai/console/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ai/src/ai/console/pages/__init__.py b/ai/src/ai/console/pages/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ai/src/ai/console/pages/add_edit_eval_page.py b/ai/src/ai/console/pages/add_edit_eval_page.py new file mode 100644 index 000000000..ab3e35433 --- /dev/null +++ b/ai/src/ai/console/pages/add_edit_eval_page.py @@ -0,0 +1,53 @@ +import datetime +from typing import Any + +import mesop as me +from ai.common.producer import producer_store +from ai.console.pages.add_edit_page_helper import ( + create_add_edit_page, + form_field, + get_field_value, + update_state, +) +from ai.offline_common.eval import ( + Eval, +) +from ai.offline_common.eval import ( + eval_store as store, +) + + +def get_producer_ids(): + options: list[me.AutocompleteOption] = [] + + for producer in producer_store.get_all(): + options.append(me.AutocompleteOption(label=producer.id, value=producer.id)) + + return options + + +def form(): + form_field("id", "Eval id") + me.autocomplete( + value=get_field_value("producer_id"), + label="Producer id", + options=get_producer_ids(), + style=me.Style(width="100%"), + on_selection_change=lambda e: update_state("producer_id", e.value), + ) + + +def create_default_eval() -> dict[str, Any]: + id = datetime.datetime.now().replace(microsecond=0).isoformat() + return {"id": id, "producer_id": ""} + + +create_add_edit_page( + store=store, + entity_type=Eval, + entity_name="Eval", + root_path="/evals", + form=form, + create_default_entity=create_default_eval, + disable_edit=True, +) diff --git a/ai/src/ai/console/pages/add_edit_expected_examples_page.py b/ai/src/ai/console/pages/add_edit_expected_examples_page.py new file mode 100644 index 000000000..9cfc9d6cf --- /dev/null +++ b/ai/src/ai/console/pages/add_edit_expected_examples_page.py @@ -0,0 +1,56 @@ +import mesop as me +from ai.common.example import ( + ExpectedExample, +) +from ai.common.example import ( + expected_example_store as store, +) +from ai.console.pages.add_edit_page_helper import ( + create_add_edit_page, + form_field, + get_field_value, + update_state, +) + + +def form(): + form_field("id", "Unique identifier for the example") + form_field("input.prompt", "Input: prompt") + me.textarea( + value=get_field_value("input.input_code"), + appearance="outline", + label="Input code", + on_blur=lambda e: update_state(e.key, e.value), + key="input.input_code", + hint_label=f"Input code path: data/golden_examples/{get_field_value('id')}/input.py", + style=me.Style(width="min(100%, 360px)"), + ) + form_field( + "input.line_number_target", "Input: line number target", type="number" + ) + + me.checkbox( + checked=bool(get_field_value("expect_executable")), + label="Expect executable", + key="expect_executable", + on_change=lambda e: update_state("expect_executable", e.checked), + ) + me.checkbox( + checked=bool(get_field_value("expect_type_checkable")), + label="Expect type checkable", + key="expect_type_checkable", + on_change=lambda e: update_state("expect_type_checkable", e.checked), + ) + + +create_add_edit_page( + store=store, + entity_type=ExpectedExample, + entity_name="Expected Example", + root_path="/expected-examples", + form=form, + create_default_entity=lambda: { + "expect_executable": True, + "expect_type_checkable": True, + }, +) diff --git a/ai/src/ai/console/pages/add_edit_golden_examples_page.py b/ai/src/ai/console/pages/add_edit_golden_examples_page.py new file mode 100644 index 000000000..66d6f1746 --- /dev/null +++ b/ai/src/ai/console/pages/add_edit_golden_examples_page.py @@ -0,0 +1,126 @@ +import base64 + +import requests + +import mesop as me +from ai.common.diff import apply_patch +from ai.common.example import ( + GoldenExample, +) +from ai.common.example import ( + golden_example_store as store, +) +from ai.console.pages.add_edit_page_helper import ( + create_add_edit_page, + form_field, + get_field_value, + update_state, +) +from ai.offline_common.eval import SANDBOX_URL + + +@me.stateclass +class State: + preview_url: str + preview_error: str + + +def load_preview(e: me.ClickEvent): + state = me.state(State) + code = get_field_value("output.output_code") + result = requests.post( + SANDBOX_URL + "/exec", + data={"code": base64.b64encode(code.encode("utf-8"))}, + ) + if result.status_code == 200: + url_path = result.content.decode("utf-8") + state.preview_url = SANDBOX_URL + url_path + state.preview_error = "" + else: + state.preview_error = result.content.decode("utf-8") + + +def form(): + state = me.state(State) + me.button("Load preview", on_click=load_preview, type="flat") + if state.preview_url: + me.link( + text="Open preview", + url=state.preview_url, + style=me.Style(color=me.theme_var("primary"), text_decoration="none"), + open_in_new_tab=True, + ) + if state.preview_error: + me.text( + state.preview_error, + style=me.Style(font_family="monospace", white_space="pre"), + ) + + form_field("id", "Unique identifier for the example") + form_field("input.prompt", "Input: prompt") + me.textarea( + value=get_field_value("input.input_code"), # type: ignore + appearance="outline", + label="Input code", + on_blur=lambda e: update_state(e.key, e.value), + key="input.input_code", + hint_label=f"Input code path: data/expected_examples/{get_field_value('id')}/input.py", + style=me.Style(width="100%"), + ) + form_field( + "input.line_number_target", "Input: line number target", type="number" + ) + me.select( + value=get_field_value("output.output_type"), # type: ignore + options=[ + me.SelectOption(label="Full", value="full"), + me.SelectOption(label="Diff", value="diff"), + ], + on_selection_change=lambda e: update_state(e.key, e.value), + key="output.output_type", + label="Output type", + style=me.Style(width="min(100%, 360px)"), + ) + me.textarea( + value=get_field_value("output.raw_output"), # type: ignore + appearance="outline", + label="Raw output", + on_blur=update_raw_output, + key="output.raw_output", + hint_label=f"Output code path: data/expected_examples/{get_field_value('id')}/raw_output.txt", + style=me.Style(width="100%"), + ) + me.textarea( + readonly=True, + value=get_field_value("output.output_code"), # type: ignore + appearance="outline", + label="Generated output code (read-only)", + hint_label=f"Output code path: data/expected_examples/{get_field_value('id')}/output.py", + style=me.Style(width="100%"), + ) + + +def update_raw_output(e: me.InputBlurEvent): + update_state(e.key, e.value) + output_type = get_field_value("output.output_type") + if output_type == "full": + update_state("output.output_code", e.value) + elif output_type == "diff": + result = apply_patch(get_field_value("input.input_code"), e.value) + update_state("output.output_code", result.result) + else: + raise ValueError(f"Unknown output type: {output_type}") + + +create_add_edit_page( + store=store, + entity_type=GoldenExample, + entity_name="Golden Example", + root_path="/golden-examples", + form=form, + create_default_entity=lambda: { + "output": { + "output_type": "diff", + }, + }, +) diff --git a/ai/src/ai/console/pages/add_edit_model_page.py b/ai/src/ai/console/pages/add_edit_model_page.py new file mode 100644 index 000000000..891ce0899 --- /dev/null +++ b/ai/src/ai/console/pages/add_edit_model_page.py @@ -0,0 +1,25 @@ +from ai.common.model import ( + Model, +) +from ai.common.model import ( + model_store as store, +) +from ai.console.pages.add_edit_page_helper import ( + create_add_edit_page, + form_field, +) + + +def form(): + form_field("provider", "Provider of the model") + form_field("name", "Descriptive name for the model") + form_field("id", "Unique identifier for the model") + + +create_add_edit_page( + store=store, + entity_type=Model, + entity_name="Model", + root_path="/models", + form=form, +) diff --git a/ai/src/ai/console/pages/add_edit_page_helper.py b/ai/src/ai/console/pages/add_edit_page_helper.py new file mode 100644 index 000000000..5e496fdeb --- /dev/null +++ b/ai/src/ai/console/pages/add_edit_page_helper.py @@ -0,0 +1,165 @@ +from functools import partial +from typing import Any, Callable, Type, TypeVar + +from pydantic import BaseModel + +import mesop as me +from ai.common.entity_store import EntityStore +from ai.common.example import BaseExample, ExampleStore +from ai.console.scaffold import page_scaffold + + +@me.stateclass +class State: + entity: dict[str, Any] + + +def form_field(field: str, description: str, type: str | None = None): + disabled = "id" in me.query_params and field == "id" + me.input( + disabled=disabled, + value=str(get_field_value(field)), + appearance="outline", + type=type, # type: ignore + label=field, + on_blur=lambda e: update_state(e.key, e.value), + key=field, + hint_label=description, + style=me.Style(width="min(100%, 360px)"), + ) + + +def update_state(key: str, value: Any): + state = me.state(State) + state.entity[key] = value + + +def get_field_value(field_name: str): + state = me.state(State) + # We do some hacky-ish logic to support both dot notation and nested dicts. + + # When the field is set in the current page, we set it with dot notation. + if field_name in state.entity: + return state.entity[field_name] or "" + + # Otherwise, if we loaded the entity from the store (i.e. filesystem), + # we access it as a nested dict. + keys = field_name.split(".") + value = state.entity + for key in keys: + if isinstance(value, dict): + value = value.get(key, "") + else: + return "" + return value or "" + + +T = TypeVar("T", bound=BaseModel) +E = TypeVar("E", bound=BaseExample) + + +def create_add_edit_page( + *, + store: EntityStore[T] | ExampleStore[E], + entity_type: Type[T] | Type[E], + entity_name: str, + root_path: str, + form: Callable[[], None], + create_default_entity: Callable[[], dict[str, Any]] | None = None, + disable_edit: bool = False, +): + def on_load_edit_page(e: me.LoadEvent): + me.set_theme_mode("system") + id = me.query_params.get("id") + assert id is not None + entity = store.get(id) + state = me.state(State) + state.entity = entity.model_dump() + + def delete(e: me.ClickEvent): + store.delete(me.state(State).entity["id"]) + reset_and_navigate() + + if not disable_edit: + + @me.page(path=root_path + "/edit", on_load=on_load_edit_page) + def edit_page(): # type: ignore + with page_scaffold(title=f"Edit {entity_name}"): + with me.box( + style=me.Style( + display="flex", flex_direction="column", gap=24, max_width=640 + ) + ): + form() + with me.box( + style=me.Style( + display="flex", + flex_direction="row", + justify_content="space-between", + gap=16, + ) + ): + me.button( + "Back", + type="stroked", + on_click=lambda e: reset_and_navigate(), + ) + me.button("Delete", type="flat", color="warn", on_click=delete) + me.button( + "Save", type="flat", on_click=partial(update, overwrite=True) + ) + + def on_load_add_page(e: me.LoadEvent): + me.set_theme_mode("system") + state = me.state(State) + if create_default_entity is not None: + state.entity = create_default_entity() + else: + state.entity = {} + + @me.page(path=root_path + "/add", on_load=on_load_add_page) + def add_page(): # type: ignore + with page_scaffold( + title=f"Add {entity_name}", + ): + with me.box( + style=me.Style( + display="flex", flex_direction="column", gap=24, max_width=640 + ) + ): + form() + with me.box( + style=me.Style( + display="flex", + flex_direction="row", + justify_content="space-between", + gap=16, + ) + ): + me.button( + "Back", + type="stroked", + on_click=lambda e: reset_and_navigate(), + ) + me.button("Add", type="flat", on_click=update) + + def update(e: me.ClickEvent, *, overwrite: bool = False): + state = me.state(State) + # convert dot notation to nested dicts + converted: dict[str, Any] = {} + for key in state.entity: + keys = key.split(".") + current = converted + for k in keys[:-1]: + if k not in current: + current[k] = {} + current = current[k] + current[keys[-1]] = state.entity[key] + + store.save(entity_type(**converted), overwrite=overwrite) # type: ignore + reset_and_navigate() + + def reset_and_navigate(): + state = me.state(State) + state.entity = {} + me.navigate(root_path or "/") diff --git a/ai/src/ai/console/pages/add_edit_producer_page.py b/ai/src/ai/console/pages/add_edit_producer_page.py new file mode 100644 index 000000000..e7a3a332b --- /dev/null +++ b/ai/src/ai/console/pages/add_edit_producer_page.py @@ -0,0 +1,74 @@ +import mesop as me +from ai.common.model import model_store +from ai.common.producer import ( + Producer, +) +from ai.common.producer import ( + producer_store as store, +) +from ai.common.prompt_context import prompt_context_store +from ai.console.pages.add_edit_page_helper import ( + create_add_edit_page, + form_field, + get_field_value, + update_state, +) + + +def get_model_ids(): + options: list[me.AutocompleteOption] = [] + + for model in model_store.get_all(): + options.append(me.AutocompleteOption(label=model.id, value=model.id)) + + return options + + +def get_prompt_context_ids(): + options: list[me.AutocompleteOption] = [] + + for prompt_context in prompt_context_store.get_all(): + options.append( + me.AutocompleteOption(label=prompt_context.id, value=prompt_context.id) + ) + + return options + + +def form(): + form_field("id", "Unique identifier for the producer") + me.autocomplete( + value=get_field_value("mesop_model_id"), + label="Model id", + options=get_model_ids(), + style=me.Style(width="min(100%, 360px)"), + on_selection_change=lambda e: update_state("mesop_model_id", e.value), + ) + me.autocomplete( + value=get_field_value("prompt_context_id"), + label="Prompt context id", + options=get_prompt_context_ids(), + style=me.Style(width="min(100%, 360px)"), + on_selection_change=lambda e: update_state("prompt_context_id", e.value), + ) + + me.select( + value=get_field_value("output_format"), + label="Output format", + options=[ + me.SelectOption(label="Full", value="full"), + me.SelectOption(label="Diff", value="diff"), + ], + on_selection_change=lambda e: update_state("output_format", e.value), + style=me.Style(width="min(100%, 360px)"), + ) + form_field("temperature", "temperature (default 0.8)", type="number") + + +create_add_edit_page( + store=store, + entity_type=Producer, + entity_name="Producer", + root_path="/producers", + form=form, +) diff --git a/ai/src/ai/console/pages/add_edit_prompt_context_page.py b/ai/src/ai/console/pages/add_edit_prompt_context_page.py new file mode 100644 index 000000000..4ca1f1b33 --- /dev/null +++ b/ai/src/ai/console/pages/add_edit_prompt_context_page.py @@ -0,0 +1,78 @@ +from functools import partial + +import mesop as me +from ai.common.prompt_context import ( + PromptContext, +) +from ai.common.prompt_context import ( + prompt_context_store as store, +) +from ai.common.prompt_fragment import ( + prompt_fragment_store, +) +from ai.console.pages.add_edit_page_helper import ( + create_add_edit_page, + form_field, + get_field_value, + update_state, +) + + +def update_fragment_id(e: me.SelectSelectionChangeEvent, index: int): + fragment_ids = get_field_value("fragment_ids") + fragment_ids[index] = e.value + update_state("fragment_ids", fragment_ids) + + +def delete_fragment_id(e: me.ClickEvent, index: int): + fragment_ids = get_field_value("fragment_ids") + fragment_ids.pop(index) + update_state("fragment_ids", fragment_ids) + + +def append_fragment_id(e: me.SelectSelectionChangeEvent): + fragment_ids = get_field_value("fragment_ids") + if fragment_ids is None or fragment_ids == "": + fragment_ids = [] + fragment_ids.append(e.value) + update_state("fragment_ids", fragment_ids) + + +def form(): + form_field("id", "Unique identifier") + fragment_ids = get_field_value("fragment_ids") + for index, fragment_id in enumerate(fragment_ids): + with me.box(style=me.Style(display="flex", gap=8)): + me.select( + value=fragment_id, + label="Fragment IDs", + options=get_fragment_options(), + style=me.Style(width="360px"), + on_selection_change=partial(update_fragment_id, index=index), + ) + me.button( + "Remove", + on_click=partial(delete_fragment_id, index=index), + ) + me.select( + label="Fragment IDs", + options=get_fragment_options(), + style=me.Style(width="min(100%, 360px)"), + on_selection_change=append_fragment_id, + ) + + +def get_fragment_options(): + return [ + me.SelectOption(label=fragment.id, value=fragment.id) + for fragment in prompt_fragment_store.get_all() + ] + + +create_add_edit_page( + store=store, + entity_type=PromptContext, + entity_name="Prompt Context", + root_path="/prompt-contexts", + form=form, +) diff --git a/ai/src/ai/console/pages/add_edit_prompt_fragment_page.py b/ai/src/ai/console/pages/add_edit_prompt_fragment_page.py new file mode 100644 index 000000000..f5ea466be --- /dev/null +++ b/ai/src/ai/console/pages/add_edit_prompt_fragment_page.py @@ -0,0 +1,84 @@ +import os + +import mesop as me +from ai.common.prompt_fragment import ( + PromptFragment, +) +from ai.common.prompt_fragment import ( + prompt_fragment_store as store, +) +from ai.console.pages.add_edit_page_helper import ( + create_add_edit_page, + form_field, + get_field_value, + update_state, +) + + +def get_autocomplete_options(): + options: list[me.AutocompleteOption] = [] + prompt_contents_dir = os.path.join( + os.path.dirname(__file__), "..", "..", "..", "..", "data", "prompt_contents" + ) + if os.path.exists(prompt_contents_dir): + for filename in os.listdir(prompt_contents_dir): + file_path = os.path.join(prompt_contents_dir, filename) + if os.path.isfile(file_path): + options.append( + me.AutocompleteOption( + label=filename, value="//prompt_contents/" + filename + ) + ) + + return options + + +def form(): + form_field("id", "Unique identifier") + me.select( + value=get_field_value("role"), + label="Role", + options=[ + me.SelectOption(label="User", value="user"), + me.SelectOption(label="Assistant", value="assistant"), + me.SelectOption(label="System", value="system"), + ], + on_selection_change=lambda e: update_state("role", e.value), + style=me.Style(width="min(100%, 360px)"), + ) + me.divider() + me.text("Content (set either value or path)") + me.textarea( + value=get_field_value("content_value"), + appearance="outline", + label="Content value", + on_blur=lambda e: update_state(e.key, e.value), + key="content_value", + # TODO: potentially support golden example variables + # for more powerful few-shot prompting, e.g. + hint_label="Variables: ", + style=me.Style(width="min(100%)"), + ) + me.autocomplete( + value=get_field_value("content_path"), + label="Content path", + options=get_autocomplete_options(), + hint_label="(absolute) path to a file containing the content", + style=me.Style(width="min(100%, 360px)"), + on_selection_change=lambda e: update_state("content_path", e.value), + ) + me.divider() + me.checkbox( + checked=bool(get_field_value("chain_of_thought")), + label="Chain of thought", + on_change=lambda e: update_state("chain_of_thought", e.checked), + ) + + +create_add_edit_page( + store=store, + entity_type=PromptFragment, + entity_name="Prompt Fragment", + root_path="/prompt-fragments", + form=form, +) diff --git a/ai/src/ai/console/pages/create_golden_dataset_page.py b/ai/src/ai/console/pages/create_golden_dataset_page.py new file mode 100644 index 000000000..acb501b98 --- /dev/null +++ b/ai/src/ai/console/pages/create_golden_dataset_page.py @@ -0,0 +1,81 @@ +import mesop as me +from ai.common.prompt_context import prompt_context_store +from ai.console.scaffold import page_scaffold +from ai.offline_common.golden_dataset import create_golden_dataset + + +def on_load(e: me.LoadEvent): + me.set_theme_mode("system") + + +def get_prompt_context_options(): + return [ + me.SelectOption(label=context.id, value=context.id) + for context in prompt_context_store.get_all() + ] + + +@me.stateclass +class State: + prompt_context_id: str + dataset_name: str + dataset_path: str + + +def select_prompt_context(e: me.SelectSelectionChangeEvent): + state = me.state(State) + state.prompt_context_id = e.value + + +def on_dataset_name_blur(e: me.InputBlurEvent): + state = me.state(State) + state.dataset_name = e.value + + +@me.page(path="/create-golden-dataset", on_load=on_load) +def create_golden_dataset_page(): + state = me.state(State) + with page_scaffold( + current_path="/create-golden-dataset", title="Create golden dataset" + ): + me.input( + label="Dataset name", + on_blur=on_dataset_name_blur, + ) + me.select( + label="Prompt Context", + options=get_prompt_context_options(), + style=me.Style(width="min(100%, 360px)"), + on_selection_change=select_prompt_context, + ) + with me.box( + style=me.Style( + padding=me.Padding(bottom=16), + display="flex", + justify_content="space-between", + ) + ): + me.button( + "Back", + on_click=lambda e: me.navigate("/golden-examples"), + type="stroked", + color="accent", + ) + me.button( + "Create dataset", + on_click=create_dataset, + type="flat", + color="accent", + ) + if state.dataset_path: + me.text(state.dataset_path) + + +def create_dataset(e: me.ClickEvent): + state = me.state(State) + prompt_context_id = state.prompt_context_id + dataset_name = state.dataset_name + prompt_context = prompt_context_store.get(prompt_context_id) + dataset_path = create_golden_dataset(prompt_context, dataset_name) + state.dataset_path = dataset_path + print("dataset_path", dataset_path) diff --git a/ai/src/ai/console/pages/eval_item_page.py b/ai/src/ai/console/pages/eval_item_page.py new file mode 100644 index 000000000..eae5f8ee3 --- /dev/null +++ b/ai/src/ai/console/pages/eval_item_page.py @@ -0,0 +1,111 @@ +import base64 + +import requests + +import mesop as me +from ai.console.scaffold import page_scaffold +from ai.offline_common.eval import ( + SANDBOX_URL, + get_eval_example, +) + + +def on_load(e: me.LoadEvent): + me.set_theme_mode("system") + state = me.state(State) + example = get_eval_example( + me.query_params["eval-id"], me.query_params["example-id"] + ) + code = example.outputs[0].output.output_code or "" + result = requests.post( + SANDBOX_URL + "/exec", + data={"code": base64.b64encode(code.encode("utf-8"))}, + ) + if result.status_code == 200: + url_path = result.content.decode("utf-8") + state.loaded_url = SANDBOX_URL + url_path + state.error = "" + else: + state.error = result.content.decode("utf-8") + + +@me.stateclass +class State: + loaded_url: str + error: str + + +@me.page(title="Mesop AI Console - Eval", path="/eval-item", on_load=on_load) +def eval_item_page(): + state = me.state(State) + example = get_eval_example( + me.query_params["eval-id"], me.query_params["example-id"] + ) + with page_scaffold(current_path="/eval", title="Eval"): + with me.box( + style=me.Style( + display="grid", + grid_template_columns="80px 1fr", + gap=8, + justify_items="start", + margin=me.Margin(bottom=8), + ) + ): + with me.box( + style=me.Style( + display="grid", + grid_template_columns="repeat(2, calc(calc(100vw - 310px)/2))", + gap=16, + align_items="start", + ) + ): + # Header + me.text("Result", style=me.Style(font_weight="bold")) + me.text("Preview", style=me.Style(font_weight="bold")) + + # Body + with me.box( + style=me.Style( + display="flex", + flex_direction="column", + gap=8, + height="calc(100vh - 160px)", + overflow_y="auto", + ) + ): + me.text("ID", style=me.Style(font_weight="bold")) + me.text(example.expected.id) + + me.text("Results", style=me.Style(font_weight="bold")) + for result in example.outputs[0].expect_results: + with me.box( + style=me.Style(display="flex", flex_direction="row", gap=8) + ): + me.text(result.name) + me.text(str(result.score)) + + me.text( + result.message, + style=me.Style(font_family="monospace", white_space="pre"), + ) + + me.text("Code") + me.markdown( + "```\n" + (example.outputs[0].output.output_code or "") + "\n```", + style=me.Style(font_size=14), + ) + me.divider() + me.text("Raw output") + me.markdown( + "```\n" + (example.outputs[0].output.raw_output or "") + "\n```", + style=me.Style(font_size=14), + ) + with me.box( + style=me.Style(display="flex", flex_direction="column", gap=8) + ): + if state.error: + me.text("Error") + me.text(state.error) + me.embed( + src=state.loaded_url, style=me.Style(width="100%", height="80vh") + ) diff --git a/ai/src/ai/console/pages/eval_page.py b/ai/src/ai/console/pages/eval_page.py new file mode 100644 index 000000000..3a9c5b391 --- /dev/null +++ b/ai/src/ai/console/pages/eval_page.py @@ -0,0 +1,100 @@ +import mesop as me +from ai.console.scaffold import page_scaffold +from ai.offline_common.eval import EvalRunner, get_eval_examples +from ai.offline_common.eval import eval_store as store + + +def on_load(e: me.LoadEvent): + me.set_theme_mode("system") + + +def run_eval(e: me.ClickEvent): + eval = store.get(me.query_params["id"]) + EvalRunner(eval).run() + + +@me.page(title="Mesop AI Console - Eval", path="/eval", on_load=on_load) +def eval_page(): + eval = store.get(me.query_params["id"]) + examples = get_eval_examples(eval.id) + with page_scaffold(current_path="/eval", title="Eval"): + with me.box( + style=me.Style( + display="grid", + grid_template_columns="80px 1fr", + gap=8, + justify_items="start", + ) + ): + me.text("ID", style=me.Style(font_weight="bold")) + me.text(eval.id) + me.text("State", style=me.Style(font_weight="bold")) + me.text(eval.state) + me.text("Examples", style=me.Style(font_weight="bold")) + me.text(str(len(examples))) + if eval.eval_outcome: + me.text("Score", style=me.Style(font_weight="bold")) + with me.tooltip( + message=f"Score: {eval.eval_outcome.score} / Max score: {eval.eval_outcome.max_score}" + ): + me.text( + f"{eval.eval_outcome.score / eval.eval_outcome.max_score * 100:.0f}% " + ) + with me.box(style=me.Style(padding=me.Padding(top=32))): + if eval.state == "pending": + me.button( + "Run eval", + on_click=run_eval, + type="flat", + color="accent", + ) + + if eval.state == "complete": + with me.box( + style=me.Style( + display="grid", + grid_template_columns="220px 300px 32px 48px 1fr", + gap=16, + align_items="center", + ) + ): + # Header + me.text("ID", style=me.Style(font_weight="bold")) + me.text("Prompt", style=me.Style(font_weight="bold")) + me.text("Secs", style=me.Style(font_weight="bold")) + me.text("Tokens", style=me.Style(font_weight="bold")) + me.text("Expect results", style=me.Style(font_weight="bold")) + # Body + for example in examples: + # use a link because back navigation drops the query params + me.link( + text=example.expected.id, + style=me.Style( + font_size=16, + text_decoration="none", + color=me.theme_var("primary"), + ), + url=f"/eval-item?example-id={example.expected.id}&eval-id={eval.id}", + ) + me.text(example.expected.input.prompt) + me.text(f"{example.outputs[0].time_spent_secs:.1f}") + me.text(str(example.outputs[0].tokens)) + with me.box( + style=me.Style(display="flex", flex_direction="row", gap=12) + ): + for result in example.outputs[0].expect_results: + with me.tooltip(message=result.message or ""[-300:-120]): + with me.box( + style=me.Style( + display="flex", + flex_direction="column", + gap=8, + background=me.theme_var("error-container") + if result.score == 0 + else None, + padding=me.Padding.all(4), + border_radius=8, + ) + ): + me.text(result.name[:5], style=me.Style(font_weight="bold")) + me.text(str(result.score)) diff --git a/ai/src/ai/console/pages/evals_page.py b/ai/src/ai/console/pages/evals_page.py new file mode 100644 index 000000000..d3e0899ea --- /dev/null +++ b/ai/src/ai/console/pages/evals_page.py @@ -0,0 +1,48 @@ +import mesop as me +from ai.console.scaffold import page_scaffold +from ai.offline_common.eval import eval_store as store + + +def on_load(e: me.LoadEvent): + me.set_theme_mode("system") + + +@me.page(title="Mesop AI Console - Evals", path="/evals", on_load=on_load) +def evals_page(): + with page_scaffold(current_path="/evals", title="Evals"): + evals = store.get_all() + with me.box( + style=me.Style( + display="grid", + grid_template_columns="repeat(2, 1fr)", + gap=16, + align_items="center", + ) + ): + # Header + me.text("ID", style=me.Style(font_weight="bold")) + me.text("Name", style=me.Style(font_weight="bold")) + # Body + for eval in evals: + me.button( + eval.id, + on_click=lambda e: me.navigate("/eval", query_params={"id": e.key}), + key=eval.id, + style=me.Style(font_size=16), + ) + me.button( + eval.producer_id, + on_click=lambda e: me.navigate( + "/producers/edit", query_params={"id": e.key} + ), + key=eval.producer_id, + style=me.Style(font_size=16), + ) + + with me.box(style=me.Style(padding=me.Padding(top=32))): + me.button( + "Create eval", + on_click=lambda e: me.navigate("/evals/add"), + type="flat", + color="accent", + ) diff --git a/ai/src/ai/console/pages/expected_examples_page.py b/ai/src/ai/console/pages/expected_examples_page.py new file mode 100644 index 000000000..dc334c477 --- /dev/null +++ b/ai/src/ai/console/pages/expected_examples_page.py @@ -0,0 +1,51 @@ +import mesop as me +from ai.common.example import expected_example_store as store +from ai.console.scaffold import page_scaffold + + +def on_load(e: me.LoadEvent): + me.set_theme_mode("system") + + +@me.page(path="/expected-examples", on_load=on_load) +def expected_examples_page(): + with page_scaffold( + current_path="/expected-examples", title="Expected Examples" + ): + with me.box(style=me.Style(padding=me.Padding(bottom=16))): + me.button( + "Add Expected Example", + on_click=lambda e: me.navigate("/expected-examples/add"), + type="flat", + color="accent", + ) + + examples = store.get_all() + with me.box( + style=me.Style( + display="grid", + grid_template_columns="repeat(4, 1fr)", + gap=16, + align_items="center", + overflow_y="auto", + height="100%", + ) + ): + # Header + me.text("ID", style=me.Style(font_weight="bold")) + me.text("Prompt", style=me.Style(font_weight="bold")) + me.text("Has input code", style=me.Style(font_weight="bold")) + me.text("Has line # target", style=me.Style(font_weight="bold")) + # Body + for example in examples: + me.button( + example.id, + on_click=lambda e: me.navigate( + "/expected-examples/edit", query_params={"id": e.key} + ), + key=example.id, + style=me.Style(font_size=16), + ) + me.text(example.input.prompt) + me.text(str(bool(example.input.input_code))) + me.text(str(bool(example.input.line_number_target))) diff --git a/ai/src/ai/console/pages/golden_examples_page.py b/ai/src/ai/console/pages/golden_examples_page.py new file mode 100644 index 000000000..ad401350b --- /dev/null +++ b/ai/src/ai/console/pages/golden_examples_page.py @@ -0,0 +1,61 @@ +import mesop as me +from ai.common.example import golden_example_store as store +from ai.console.scaffold import page_scaffold + + +def on_load(e: me.LoadEvent): + me.set_theme_mode("system") + + +@me.page(path="/golden-examples", on_load=on_load) +def golden_examples_page(): + with page_scaffold(current_path="/golden-examples", title="golden Examples"): + examples = store.get_all() + with me.box( + style=me.Style( + padding=me.Padding(bottom=16), + display="flex", + justify_content="space-between", + ) + ): + me.button( + "Add golden Example", + on_click=lambda e: me.navigate("/golden-examples/add"), + type="flat", + color="accent", + ) + with me.tooltip(message="Create a golden dataset for fine-tuning"): + me.button( + "Create golden dataset", + on_click=lambda e: me.navigate("/create-golden-dataset"), + type="flat", + color="accent", + ) + with me.box( + style=me.Style( + display="grid", + grid_template_columns="200px 1fr 48px 48px", + gap=12, + align_items="center", + overflow_y="auto", + height="100%", + ) + ): + # Header + me.text("ID", style=me.Style(font_weight="bold")) + me.text("Prompt", style=me.Style(font_weight="bold")) + me.text("Has input code", style=me.Style(font_weight="bold")) + me.text("Has line # target", style=me.Style(font_weight="bold")) + # Body + for example in examples: + me.button( + example.id[0:20] + "..." if len(example.id) > 20 else example.id, + on_click=lambda e: me.navigate( + "/golden-examples/edit", query_params={"id": e.key} + ), + key=example.id, + style=me.Style(font_size=16), + ) + me.text(example.input.prompt) + me.text(str(bool(example.input.input_code))) + me.text(str(bool(example.input.line_number_target))) diff --git a/ai/src/ai/console/pages/models_page.py b/ai/src/ai/console/pages/models_page.py new file mode 100644 index 000000000..2e9b468f9 --- /dev/null +++ b/ai/src/ai/console/pages/models_page.py @@ -0,0 +1,44 @@ +import mesop as me +from ai.common.model import model_store as store +from ai.console.scaffold import page_scaffold + + +def on_load(e: me.LoadEvent): + me.set_theme_mode("system") + + +@me.page(title="Mesop AI Console - Models", path="/models", on_load=on_load) +def models_page(): + with page_scaffold(current_path="/models", title="Models"): + models = store.get_all() + with me.box( + style=me.Style( + display="grid", + grid_template_columns="repeat(3, 1fr)", + gap=16, + align_items="center", + ) + ): + # Header + me.text("ID", style=me.Style(font_weight="bold")) + me.text("Name", style=me.Style(font_weight="bold")) + me.text("Provider", style=me.Style(font_weight="bold")) + # Body + for model in models: + me.button( + model.id, + on_click=lambda e: me.navigate( + "/models/edit", query_params={"id": e.key} + ), + key=model.id, + style=me.Style(font_size=16), + ) + me.text(model.name) + me.text(model.provider) + with me.box(style=me.Style(padding=me.Padding(top=32))): + me.button( + "Add Model", + on_click=lambda e: me.navigate("/models/add"), + type="flat", + color="accent", + ) diff --git a/ai/src/ai/console/pages/producers_page.py b/ai/src/ai/console/pages/producers_page.py new file mode 100644 index 000000000..34cc5bebe --- /dev/null +++ b/ai/src/ai/console/pages/producers_page.py @@ -0,0 +1,64 @@ +import mesop as me +from ai.common.producer import producer_store as store +from ai.console.scaffold import page_scaffold + + +def on_load(e: me.LoadEvent): + me.set_theme_mode("system") + + +@me.page( + title="Mesop AI Console - Producers", path="/producers", on_load=on_load +) +def producers_page(): + with page_scaffold(current_path="/producers", title="Producers"): + producers = store.get_all() + with me.box( + style=me.Style( + display="grid", + grid_template_columns="repeat(5, 1fr)", + gap=16, + align_items="center", + ) + ): + # Header + me.text("ID", style=me.Style(font_weight="bold")) + me.text("Model", style=me.Style(font_weight="bold")) + me.text("Prompt Context", style=me.Style(font_weight="bold")) + me.text("Output Format", style=me.Style(font_weight="bold")) + me.text("Temperature", style=me.Style(font_weight="bold")) + # Body + for producer in producers: + me.button( + producer.id, + on_click=lambda e: me.navigate( + "/producers/edit", query_params={"id": e.key} + ), + key=producer.id, + style=me.Style(font_size=16), + ) + me.button( + producer.mesop_model_id, + on_click=lambda e: me.navigate( + "/models/edit", query_params={"id": e.key} + ), + key=producer.mesop_model_id, + style=me.Style(font_size=16), + ) + me.button( + producer.prompt_context_id, + on_click=lambda e: me.navigate( + "/prompt-contexts/edit", query_params={"id": e.key} + ), + key=producer.prompt_context_id, + style=me.Style(font_size=16), + ) + me.text(producer.output_format) + me.text(str(producer.temperature)) + with me.box(style=me.Style(padding=me.Padding(top=32))): + me.button( + "Add Producer", + on_click=lambda e: me.navigate("/producers/add"), + type="flat", + color="accent", + ) diff --git a/ai/src/ai/console/pages/prompt_contexts_page.py b/ai/src/ai/console/pages/prompt_contexts_page.py new file mode 100644 index 000000000..2d4c8ac83 --- /dev/null +++ b/ai/src/ai/console/pages/prompt_contexts_page.py @@ -0,0 +1,55 @@ +import mesop as me +from ai.common.prompt_context import prompt_context_store +from ai.console.scaffold import page_scaffold + + +def on_load(e: me.LoadEvent): + me.set_theme_mode("system") + + +@me.page( + title="Mesop AI Console - Prompt Contexts", + path="/prompt-contexts", + on_load=on_load, +) +def prompt_contexts_page(): + with page_scaffold(current_path="/prompt-contexts", title="Prompt Contexts"): + prompt_contexts = prompt_context_store.get_all() + with me.box( + style=me.Style( + display="grid", + grid_template_columns="400px 400px", + gap=16, + align_items="center", + ) + ): + # Header + me.text("ID", style=me.Style(font_weight="bold")) + me.text("Fragments", style=me.Style(font_weight="bold")) + # Body + for prompt_context in prompt_contexts: + me.button( + prompt_context.id, + on_click=lambda e: me.navigate( + "/prompt-contexts/edit", query_params={"id": e.key} + ), + key=prompt_context.id, + style=me.Style(font_size=16, flex_wrap="wrap", word_wrap="anywhere"), + ) + with me.box(style=me.Style(display="flex-wrap", flex_direction="row")): + for fragment_id in prompt_context.fragment_ids: + me.button( + fragment_id, + on_click=lambda e: me.navigate( + "/prompt-fragments/edit", query_params={"id": e.key} + ), + key=fragment_id, + style=me.Style(font_size=16), + ) + with me.box(style=me.Style(padding=me.Padding(top=32))): + me.button( + "Add Prompt Context", + on_click=lambda e: me.navigate("/prompt-contexts/add"), + type="flat", + color="accent", + ) diff --git a/ai/src/ai/console/pages/prompt_fragments_page.py b/ai/src/ai/console/pages/prompt_fragments_page.py new file mode 100644 index 000000000..24f4feaa1 --- /dev/null +++ b/ai/src/ai/console/pages/prompt_fragments_page.py @@ -0,0 +1,57 @@ +import mesop as me +from ai.common.prompt_fragment import prompt_fragment_store +from ai.console.scaffold import page_scaffold + + +def on_load(e: me.LoadEvent): + me.set_theme_mode("system") + + +@me.page( + title="Mesop AI Console - Prompt Fragments", + path="/prompt-fragments", + on_load=on_load, +) +def prompt_fragments_page(): + with page_scaffold( + current_path="/prompt-fragments", title="Prompt Fragments" + ): + prompt_fragments = prompt_fragment_store.get_all() + with me.box( + style=me.Style( + display="grid", + grid_template_columns="1fr 1fr 1fr 48px", + gap=16, + align_items="center", + ) + ): + # Header + me.text("ID", style=me.Style(font_weight="bold")) + me.text("Contents", style=me.Style(font_weight="bold")) + me.text("Role", style=me.Style(font_weight="bold")) + with me.tooltip(message="Chain of Thought"): + me.text("CoT", style=me.Style(font_weight="bold")) + # Body + for prompt_fragment in prompt_fragments: + me.button( + prompt_fragment.id, + on_click=lambda e: me.navigate( + "/prompt-fragments/edit", query_params={"id": e.key} + ), + key=prompt_fragment.id, + style=me.Style(font_size=16), + ) + if prompt_fragment.content_value: + me.text("Value: " + prompt_fragment.content_value[:10]) + elif prompt_fragment.content_path: + me.text("Path: " + prompt_fragment.content_path) + + me.text(prompt_fragment.role) + me.text(str(prompt_fragment.chain_of_thought)) + with me.box(style=me.Style(padding=me.Padding(top=32))): + me.button( + "Add Prompt Fragment", + on_click=lambda e: me.navigate("/prompt-fragments/add"), + type="flat", + color="accent", + ) diff --git a/ai/src/ai/console/scaffold.py b/ai/src/ai/console/scaffold.py new file mode 100644 index 000000000..179eb296b --- /dev/null +++ b/ai/src/ai/console/scaffold.py @@ -0,0 +1,190 @@ +import mesop as me + + +@me.stateclass +class State: + sidenav_menu_open: bool + + +def toggle_menu_button(e: me.ClickEvent): + s = me.state(State) + s.sidenav_menu_open = not s.sidenav_menu_open + + +def is_mobile(): + return me.viewport_size().width < 640 + + +@me.content_component +def page_scaffold(current_path: str = "", title: str = "Mesop AI Console"): + with me.box(style=me.Style(display="flex", height="100%")): + if is_mobile(): + with me.content_button( + type="icon", + style=me.Style(top=6, left=8, position="absolute", z_index=9), + on_click=toggle_menu_button, + ): + me.icon("menu") + with me.sidenav( + opened=me.state(State).sidenav_menu_open, + style=me.Style( + background=me.theme_var("surface-container-low"), + ), + ): + sidenav(current_path) + else: + sidenav(current_path) + with me.box( + style=me.Style( + background=me.theme_var("surface-container-low"), + display="flex", + flex_direction="column", + flex_grow=1, + ) + ): + header(title) + with me.box( + style=me.Style( + background=me.theme_var("background"), + flex_grow=1, + padding=me.Padding( + left=16, + right=16, + top=16, + ), + border_radius=16, + overflow_y="auto", + display="flex", + flex_direction="column", + ) + ): + me.slot() + + +def toggle_theme(e: me.ClickEvent): + if me.theme_brightness() == "light": + me.set_theme_mode("dark") + else: + me.set_theme_mode("light") + + +def header(title: str): + with me.box( + style=me.Style( + height=64, + width="100%", + padding=me.Padding.all(16), + display="flex", + align_items="center", + ), + ): + me.text( + title, + style=me.Style( + color=me.theme_var("on-background"), + font_size=22, + font_weight=500, + letter_spacing="0.8px", + padding=me.Padding(left=36) if is_mobile() else None, + ), + ) + + with me.content_button( + type="icon", + style=me.Style(position="absolute", right=4, top=8), + on_click=toggle_theme, + ): + me.icon("light_mode" if me.theme_brightness() == "dark" else "dark_mode") + + +def sidenav(current_path: str): + with me.box( + style=me.Style( + width=240, + min_width=240, + max_width=240, + height="100%", + background=me.theme_var("surface-container-low"), + padding=me.Padding.all(16), + ) + ): + with me.box( + style=me.Style( + padding=me.Padding(top=24), + display="flex", + flex_direction="column", + gap=12, + ), + ): + nav_link("Home", icon="home", path="/", current_path=current_path) + nav_link("Evals", icon="labs", path="/evals", current_path=current_path) + nav_link( + "Producers", + icon="precision_manufacturing", + path="/producers", + current_path=current_path, + ) + nav_link( + "Models", icon="model", path="/models", current_path=current_path + ) + me.text( + "Prompts", + style=me.Style( + font_weight=500, font_size=16, margin=me.Margin(top=4, left=4) + ), + ) + nav_link( + "Prompt Contexts", + icon="notebook", + path="/prompt-contexts", + current_path=current_path, + ) + nav_link( + "Prompt Fragments", + icon="description", + path="/prompt-fragments", + current_path=current_path, + ) + me.text( + "Examples", + style=me.Style( + font_weight=500, font_size=16, margin=me.Margin(top=4, left=4) + ), + ) + nav_link( + "Expected Examples", + icon="labs", + path="/expected-examples", + current_path=current_path, + ) + nav_link( + "Golden Examples", + icon="school", + path="/golden-examples", + current_path=current_path, + ) + + +def nav_link( + label: str, icon: str, path: str, current_path: str, nested: bool = False +): + with me.box( + style=me.Style( + cursor="pointer", + margin=me.Margin(left=32) if nested else None, + padding=me.Padding.all(12), + border_radius=12, + display="flex", + align_items="center", + gap=12, + background=me.theme_var("secondary-container") + if path == current_path + else None, + font_weight=500, + font_size=16, + ), + key=path, + on_click=lambda e: me.navigate(e.key), + ): + me.icon(icon) + me.text(label) diff --git a/ai/src/ai/offline_common/eval.py b/ai/src/ai/offline_common/eval.py new file mode 100644 index 000000000..56035d9cf --- /dev/null +++ b/ai/src/ai/offline_common/eval.py @@ -0,0 +1,215 @@ +import base64 +import concurrent.futures +import os +import subprocess +import time +from concurrent.futures import ThreadPoolExecutor +from typing import Literal + +import requests +from pydantic import BaseModel + +from ai.common.entity_store import EntityStore, get_data_path +from ai.common.example import ( + EvaluatedExample, + EvaluatedExampleOutput, + ExampleOutput, + ExpectedExample, + ExpectResult, + expected_example_store, +) +from ai.common.executor import ProducerExecutor + +SANDBOX_URL = "http://localhost:8080" + + +class EvalOutcome(BaseModel): + examples_run: int + examples_succeeded: int + score: float # sum of scores across expect_results in examples + max_score: float # potential max score across expect_results in examples + + +class Eval(BaseModel): + id: str + producer_id: str + state: Literal["pending", "running", "complete", "failed"] = "pending" + eval_outcome: EvalOutcome | None = None + + +eval_store = EntityStore(Eval, dirname="evals") + + +def get_eval_example(eval_id: str, example_id: str) -> EvaluatedExample: + eval_path = get_data_path(os.path.join("evals", eval_id)) + if not os.path.exists(eval_path): + raise ValueError(f"Eval {eval_id} example {example_id} not found") + with open(os.path.join(eval_path, example_id, "evaluated_example.json")) as f: + evaluated_example = EvaluatedExample.model_validate_json(f.read()) + with open(os.path.join(eval_path, example_id, "output.txt")) as f: + evaluated_example.outputs[0].output.raw_output = f.read() + with open(os.path.join(eval_path, example_id, "patched.py")) as f: + evaluated_example.outputs[0].output.output_code = f.read() + return evaluated_example + + +def get_eval_examples(eval_id: str) -> list[EvaluatedExample]: + eval_path = get_data_path(os.path.join("evals", eval_id)) + if not os.path.exists(eval_path): + return [] + examples: list[EvaluatedExample] = [] + for file in os.listdir(eval_path): + with open(os.path.join(eval_path, file, "evaluated_example.json")) as f: + examples.append(EvaluatedExample.model_validate_json(f.read())) + return examples + + +class EvalRunner: + def __init__(self, eval: Eval): + self.eval = eval + self.producer_executor = ProducerExecutor(self.eval.producer_id) + self.eval_path = get_data_path(os.path.join("evals", self.eval.id)) + + def run(self): + os.makedirs(self.eval_path, exist_ok=True) + + examples = expected_example_store.get_all() + eval_outcome = EvalOutcome( + examples_run=0, examples_succeeded=0, score=0, max_score=0 + ) + + try: + with ThreadPoolExecutor() as executor: + future_to_example = { + executor.submit(self.eval_example, example): example + for example in examples + } + for future in concurrent.futures.as_completed(future_to_example): + evaluated_example = future.result() + eval_outcome.examples_run += 1 + for result in evaluated_example.outputs[0].expect_results: + eval_outcome.score += result.score + eval_outcome.max_score += 1 + if all( + result.score == 1 + for result in evaluated_example.outputs[0].expect_results + ): + eval_outcome.examples_succeeded += 1 + print("---") + print("Proessed: ", evaluated_example.expected.id) + print( + f"Examples succeeded: {eval_outcome.examples_succeeded}/{len(examples)}" + ) + print("---") + except Exception as e: + self.eval.state = "failed" + eval_store.save(self.eval, overwrite=True) + raise e + + self.eval.state = "complete" + self.eval.eval_outcome = eval_outcome + eval_store.save(self.eval, overwrite=True) + + def eval_example(self, example: ExpectedExample) -> EvaluatedExample: + example_path = os.path.join(self.eval_path, example.id) + os.makedirs(example_path) + + start_time = time.time() + output = self.producer_executor.execute(example.input) + end_time = time.time() + time_elapsed = end_time - start_time + with open(os.path.join(example_path, "output.txt"), "w") as f: + f.write(output) + + evaluated_example_output = EvaluatedExampleOutput( + time_spent_secs=time_elapsed, + tokens=int(len(output) / 4), # rough estimate + output=ExampleOutput( + output_type=self.producer_executor.producer.output_format, + ), + expect_results=[], + ) + + patched_code = self.producer_executor.transform_output( + input_code=example.input.input_code or "", output=output + ) + evaluated_example_output.expect_results.append( + ExpectResult( + name="patchable", + score=0 if patched_code.has_error else 1, + message=patched_code.result if patched_code.has_error else "Success", + ) + ) + if not patched_code.has_error: + patched_code_path = os.path.join(example_path, "patched.py") + with open(patched_code_path, "w") as f: + f.write(patched_code.result) + + self.check_executable(evaluated_example_output, patched_code_path) + self.check_type_checkable(evaluated_example_output, patched_code_path) + evaluated_example = EvaluatedExample( + expected=example, + outputs=[evaluated_example_output], + ) + + with open(os.path.join(example_path, "evaluated_example.json"), "w") as f: + f.write(evaluated_example.model_dump_json(indent=4)) + return evaluated_example + + def check_type_checkable( + self, + evaluated_example_output: EvaluatedExampleOutput, + patched_code_path: str, + ): + try: + subprocess.run( + [ + "yarn", + "pyright", + patched_code_path, + ], + capture_output=True, + text=True, + check=True, + ) + evaluated_example_output.expect_results.append( + ExpectResult( + name="type_checkable", + score=1, + message="Success", + ) + ) + except subprocess.CalledProcessError as e: + evaluated_example_output.expect_results.append( + ExpectResult( + name="type_checkable", score=0, message=e.stdout + e.stderr + ) + ) + + def check_executable( + self, + evaluated_example_output: EvaluatedExampleOutput, + patched_code_path: str, + ): + with open(patched_code_path) as f: + code = f.read() + result = requests.post( + SANDBOX_URL + "/exec-py", + data={"code": base64.b64encode(code.encode("utf-8"))}, + ) + if result.status_code == 200: + evaluated_example_output.expect_results.append( + ExpectResult( + name="executable", + score=1, + message="Success", + ) + ) + else: + evaluated_example_output.expect_results.append( + ExpectResult( + name="executable", + score=0, + message=result.text, + ) + ) diff --git a/ai/src/ai/offline_common/golden_dataset.py b/ai/src/ai/offline_common/golden_dataset.py new file mode 100644 index 000000000..955839d7d --- /dev/null +++ b/ai/src/ai/offline_common/golden_dataset.py @@ -0,0 +1,9 @@ +from ai.common.example import GoldenExample +from ai.common.prompt_context import PromptContext + + +def create_golden_dataset( + examples: list[GoldenExample], prompt_context: PromptContext +) -> str: + print("create_golden_dataset", examples, prompt_context) + return "not yet implemented" diff --git a/ai/src/console.py b/ai/src/console.py new file mode 100644 index 000000000..05077ac87 --- /dev/null +++ b/ai/src/console.py @@ -0,0 +1,68 @@ +import ai.console.scaffold as scaffold +import mesop as me +from ai.console.pages import add_edit_eval_page as add_edit_eval_page +from ai.console.pages import ( + add_edit_expected_examples_page as add_edit_expected_examples_page, +) +from ai.console.pages import ( + add_edit_golden_examples_page as add_edit_golden_examples_page, +) +from ai.console.pages import add_edit_model_page as add_edit_model_page +from ai.console.pages import add_edit_producer_page as add_edit_producer_page +from ai.console.pages import ( + add_edit_prompt_context_page as add_edit_prompt_context_page, +) +from ai.console.pages import ( + add_edit_prompt_fragment_page as add_edit_prompt_fragment_page, +) +from ai.console.pages import ( + create_golden_dataset_page as create_golden_dataset_page, +) +from ai.console.pages import eval_item_page as eval_item_page +from ai.console.pages import eval_page as eval_page +from ai.console.pages import evals_page as evals_page +from ai.console.pages import expected_examples_page as expected_examples_page +from ai.console.pages import golden_examples_page as golden_examples_page +from ai.console.pages import models_page as models_page +from ai.console.pages import producers_page as producers_page +from ai.console.pages import prompt_contexts_page as prompt_contexts_page +from ai.console.pages import prompt_fragments_page as prompt_fragments_page + + +def on_load(e: me.LoadEvent): + me.set_theme_mode("system") + + +@me.page(title="Mesop AI Console", path="/", on_load=on_load) +def index_page(): + with scaffold.page_scaffold(current_path="/", title="Home"): + me.markdown( + """ +# Mesop AI Console Overview + +## Principles + +- **Version Control**: Mesop AI Console is a UI on top of the [mesop-data](https://huggingface.co/datasets/wwwillchen/mesop-data) Git repo. + - If you make changes, you should `cd ai/data` and commit the Git changes and push/make a pull request. + +## Core Entities + +- **Producer**: A producer fully specifies how to call a model, including configurations like temperature, prompt context, and how to process its outputs (e.g. taking the diff and apply it to the input code). + - A producer can be used for inference (online) or evaluation (offline). + - Producer = Model + Prompt Context + Settings (e.g. temperature, output format) + +- **Prompt Context**: A prompt context is a prompt template with variables that are filled in at execution time. + - A prompt context consists of one or more prompt fragments. + +- **Prompt Fragment**: A prompt fragment is a chunk of a prompt for a specific role, e.g. `user` or `system` + - Note: you can have multiple fragments with the same role, which are effectively concatenated together. + +- **Example**: An example is a single input/output pair. + - Examples are used for fine-tuning a model (i.e. golden example) or running an eval (i.e. expected example). + - There are two types of examples: + - **Golden Example**: A golden example is an example that is used to create a golden dataset. + - **Expected Example**: An expected example is an example that is used to evaluate a producer. + Internally, once an expected example has been run through an eval, we create an **evaluated example**, but you don't need to create this manually in the UI. + """, + style=me.Style(line_height=1.5), + ) diff --git a/ai/src/migrate_goldens.py b/ai/src/migrate_goldens.py new file mode 100644 index 000000000..7967622c4 --- /dev/null +++ b/ai/src/migrate_goldens.py @@ -0,0 +1,55 @@ +import json +import os + +from ai.common.example import ( + ExampleInput, + ExampleOutput, + GoldenExample, + golden_example_store, +) + +OLD_GOLDENS_DIR = os.path.join(os.path.dirname(__file__), "..", "ft", "goldens") +NEW_GOLDENS_DIR = os.path.join( + os.path.dirname(__file__), "..", "data", "golden_examples" +) + + +def migrate_goldens(): + for filename in os.listdir(OLD_GOLDENS_DIR): + old_dir_path = os.path.join(OLD_GOLDENS_DIR, filename) + if not os.path.isdir(old_dir_path): + continue + with open(os.path.join(old_dir_path, "diff.txt")) as f: + diff = f.read() + with open(os.path.join(old_dir_path, "prompt.txt")) as f: + prompt = f.read() + source = None + if os.path.exists(os.path.join(old_dir_path, "source.py")): + with open(os.path.join(old_dir_path, "source.py")) as f: + source = f.read() + with open(os.path.join(old_dir_path, "patched.py")) as f: + patched = f.read() + line_number = None + if os.path.exists(os.path.join(old_dir_path, "metadata.json")): + with open(os.path.join(old_dir_path, "metadata.json")) as f: + metadata = json.load(f) + line_number = metadata.get("line_number", None) + golden_example = GoldenExample( + id=filename, + input=ExampleInput( + prompt=prompt, input_code=source, line_number_target=line_number + ), + output=ExampleOutput( + output_code=patched, raw_output=diff, output_type="diff" + ), + # diff=diff, + # prompt=prompt, + # source=source, + # patched=patched, + # metadata=metadata, + ) + golden_example_store.save(golden_example) + + +if __name__ == "__main__": + migrate_goldens() diff --git a/ai/src/service.py b/ai/src/service.py index 968824ae9..546ad28d2 100644 --- a/ai/src/service.py +++ b/ai/src/service.py @@ -8,7 +8,10 @@ from flask import Flask, Response, request, stream_with_context -from ai.common.llm_lib import adjust_mesop_app_stream, apply_patch +from ai.common.example import ExampleInput +from ai.common.executor import ( + ProducerExecutor, +) app = Flask(__name__) @@ -53,6 +56,9 @@ def save_interaction_endpoint() -> Response | dict[str, str]: return {"folder": folder_name} +DEFAULT_PRODUCER_ID = "openai-gpt4o-mini-ft-2024-08-default" + + @app.route("/adjust-mesop-app", methods=["POST"]) def adjust_mesop_app_endpoint(): data = request.json @@ -65,23 +71,24 @@ def adjust_mesop_app_endpoint(): return Response("Both 'code' and 'prompt' are required", status=400) def generate(): - stream = adjust_mesop_app_stream( - code=code, - user_input=prompt, - line_number=line_number, + executor = ProducerExecutor(DEFAULT_PRODUCER_ID) + stream = executor.execute_stream( + ExampleInput( + input_code=code, prompt=prompt, line_number_target=line_number + ) ) - diff = "" + + acc = "" for chunk in stream: - if chunk: - diff += chunk - yield f"data: {json.dumps({'type': 'progress', 'data': chunk})}\n\n" + acc += chunk + yield f"data: {json.dumps({'type': 'progress', 'data': chunk})}\n\n" - result = apply_patch(code, diff) + result = executor.transform_output(input_code=code, output=acc) if result.has_error: yield f"data: {json.dumps({'type': 'error', 'error': result.result})}\n\n" return - yield f"data: {json.dumps({'type': 'end', 'code': result.result, 'diff': diff})}\n\n" + yield f"data: {json.dumps({'type': 'end', 'code': result.result, 'diff': acc})}\n\n" return Response( stream_with_context(generate()), content_type="text/event-stream"