feat: InstructionFinetuningDataRepository

WIP: implement initial interface WIP: minimal working implementation WIP: store multiple samples for postgres repo WIP: poetry lock, linting WIP: actually running poetry lock WIP: seperate functions for single and batch storing WIP: test sample validations WIP: `InstructionFinetuningDataHandler` WIP: Support filtering WIP: linting
Aleph-Alpha · Sep 27, 2024 · 1c8e68a · 1c8e68a
1 parent 7ccd666
commit 1c8e68a
Show file tree

Hide file tree

Showing 15 changed files with 2,412 additions and 810 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -33,7 +33,7 @@ repos:
             "-L",
             "newyorker,te,responde,ist,als,oder,technik,sie,rouge,unter,juli,fiel,couldn,mke, vor,fille,ans",
           ]
-        exclude: '^(poetry\.lock|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/examples/qa/multiple_chunk_qa.py|src/intelligence_layer/examples/summarize/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/examples/classify/keyword_extract.py|tests/examples/summarize/test_single_chunk_few_shot_summarize.py|tests/examples/summarize/very_long_text.txt)$'
+        exclude: '^(poetry\.lock|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/examples/qa/multiple_chunk_qa.py|src/intelligence_layer/examples/summarize/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/examples/classify/keyword_extract.py|tests/examples/summarize/test_single_chunk_few_shot_summarize.py|tests/examples/summarize/very_long_text.txt|src/intelligence_layer/learning/enrich.py)$'
   - repo: https://github.com/akaihola/darglint2
     rev: v1.8.2
     hooks:

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -31,6 +31,8 @@ rouge-score = "^0.1.2"
 sacrebleu = "^2.4.3"
 lingua-language-detector = "^2.0.2"
 argilla = "^2.1.0"
+sqlalchemy = "^2.0.35"
+psycopg2-binary = "^2.9.9"
 
 [tool.poetry.group.dev.dependencies]
 # lint & format

diff --git a/src/intelligence_layer/core/__init__.py b/src/intelligence_layer/core/__init__.py
@@ -15,13 +15,15 @@
 from .echo import TokenWithLogProb as TokenWithLogProb
 from .instruct import Instruct as Instruct
 from .instruct import InstructInput as InstructInput
+from .model import AlephAlphaChatModel as AlephAlphaChatModel
 from .model import AlephAlphaModel as AlephAlphaModel
 from .model import ChatModel as ChatModel
 from .model import CompleteInput as CompleteInput
 from .model import CompleteOutput as CompleteOutput
 from .model import ControlModel as ControlModel
 from .model import ExplainInput as ExplainInput
 from .model import ExplainOutput as ExplainOutput
+from .model import FinetuningMessage as FinetuningMessage
 from .model import LanguageModel as LanguageModel
 from .model import Llama2InstructModel as Llama2InstructModel
 from .model import Llama3ChatModel as Llama3ChatModel

diff --git a/src/intelligence_layer/core/model.py b/src/intelligence_layer/core/model.py
@@ -171,11 +171,17 @@ def echo(
         ...
 
 
-class Message(BaseModel):
+class Message(BaseModel, frozen=True):
     role: Literal["system", "user", "assistant"]
     content: str
 
 
+class FinetuningMessage(BaseModel):
+    has_loss: bool
+    content: str
+    type: str = "text"
+
+
 class ChatModel(LanguageModel):
     """Abstract base class to implement any model that supports chat."""
 
@@ -553,8 +559,22 @@ class AlephAlphaChatModel(ChatModel, AlephAlphaModel):
     CHAT_PROMPT_TEMPLATE: PromptTemplate
     RECOMMENDED_MODELS: ClassVar[list[str]] = []
 
+    @abstractmethod
+    def to_finetuning_sample(
+        self, messages: Sequence[Message]
+    ) -> Sequence[FinetuningMessage]:
+        """Abstract function allowing a user to what the model's finetuning samples should look like.
+
+        Args:
+            messages: The messages making up the finetuning sample
+
+        Returns:
+            A finetuning sample containing the input messages
+        """
+        ...
+
     def to_chat_prompt(
-        self, messages: list[Message], response_prefix: str | None = None
+        self, messages: Sequence[Message], response_prefix: str | None = None
     ) -> RichPrompt:
         """Method to create a chat-`RichPrompt` object to use with any `AlephAlphaModel`.
 
@@ -564,13 +584,16 @@ def to_chat_prompt(
             messages: A number of messages to use as prompt for the model
             response_prefix: Append the given string to the beginning of the final agent message to
                 steer the generation. Defaults to None.
+
+        Returns:
+            A RichPrompt object to be consumed by the Aleph Alpha client
         """
         return self.CHAT_PROMPT_TEMPLATE.to_rich_prompt(
             messages=[m.model_dump() for m in messages], response_prefix=response_prefix
         )
 
     def generate_chat(
-        self, messages: list[Message], response_prefix: str | None, tracer: Tracer
+        self, messages: Sequence[Message], response_prefix: str | None, tracer: Tracer
     ) -> str:
         """Generate a raw completion to messages for any `AlephAlphaChatModel`.
 
@@ -579,6 +602,9 @@ def generate_chat(
             response_prefix: Optional argument to append a string to the beginning of the
                 final agent message to steer the generation
             tracer: Valid instance of a tracer
+
+        Returns:
+            An LLM completion
         """
         prompt = self.to_chat_prompt(messages, response_prefix)
         prompt_item = prompt.items[0]
@@ -609,6 +635,47 @@ def echo_chat(
 )
 
 
+def to_llama_3_finetuning_sample(
+    messages: Sequence[Message], eot_token: str
+) -> Sequence[FinetuningMessage]:
+    """Turn a sequence of messages into a finetuning train sample using the llama-3 format.
+
+    Args:
+        messages: The messages making up the finetuning sample
+        eot_token: The end-of-turn token used to separate the messages
+
+    Returns:
+        A sequence of formatted message for finetuning
+    """
+
+    def get_content(
+        message: Message, is_first_message: bool, is_preceding_assistant_message: bool
+    ) -> str:
+        prompt = "<|begin_of_text|>" if is_first_message else ""
+        prompt += (
+            f"<|begin_of_text|><|start_header_id|>{message.role}<|end_header_id|>\n\n{message.content}{eot_token}"
+            if message.role != "assistant"
+            else f"{message.content}{eot_token}"
+        )
+        if is_preceding_assistant_message:
+            prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
+        return prompt
+
+    return [
+        FinetuningMessage(
+            has_loss=message.role == "assistant",
+            content=get_content(
+                message,
+                index == 0,
+                messages[index + 1].role == "assistant"
+                if index + 1 < len(messages)
+                else False,
+            ),
+        )
+        for index, message in enumerate(messages)
+    ]
+
+
 class Pharia1ChatModel(AlephAlphaChatModel):
     """Chat model to be used for any `"Pharia-1-LLM-*` model.
 
@@ -648,6 +715,11 @@ def complete(self, input: CompleteInput, tracer: Tracer) -> CompleteOutput:
     def eot_token(self) -> str:
         return "<|endoftext|>"
 
+    def to_finetuning_sample(
+        self, messages: Sequence[Message]
+    ) -> Sequence[FinetuningMessage]:
+        return to_llama_3_finetuning_sample(messages, self.eot_token)
+
 
 class Llama3ChatModel(AlephAlphaChatModel):
     """Chat model to be used for `llama-3-*` and `llama-3.1-*` models.
@@ -678,3 +750,8 @@ def __init__(
     @property
     def eot_token(self) -> str:
         return "<|eot_id|>"
+
+    def to_finetuning_sample(
+        self, messages: Sequence[Message]
+    ) -> Sequence[FinetuningMessage]:
+        return to_llama_3_finetuning_sample(messages, self.eot_token)
diff --git a/src/intelligence_layer/learning/__init__.py b/src/intelligence_layer/learning/__init__.py
@@ -0,0 +1,23 @@
+from .enrich import EnrichDomain as EnrichDomain
+from .enrich import EnrichQuality as EnrichQuality
+from .instruction_finetuning_data_handler import EnrichAction as EnrichAction
+from .instruction_finetuning_data_handler import (
+    InstructionFinetuningDataHandler as InstructionFinetuningDataHandler,
+)
+from .instruction_finetuning_data_repository import (
+    InstructionFinetuningDataRepository as InstructionFinetuningDataRepository,
+)
+from .models import InstructionFinetuningSample as InstructionFinetuningSample
+from .models import (
+    InstructionFinetuningSample_ as InstructionFinetuningSample_,
+)
+from .models import (
+    InstructionFinetuningSampleAttributes as InstructionFinetuningSampleAttributes,
+)
+from .models import InvalidSampleError as InvalidSampleError
+from .models import RawInstructionFinetuningSample as RawInstructionFinetuningSample
+from .postgres_instruction_finetuning_data_repository import (
+    PostgresInstructionFinetuningDataRepository as PostgresInstructionFinetuningDataRepository,
+)
+
+__all__ = [symbol for symbol in dir()]
diff --git a/src/intelligence_layer/learning/enrich.py b/src/intelligence_layer/learning/enrich.py
@@ -0,0 +1,163 @@
+import json
+from collections.abc import Mapping, Sequence
+from typing import Any, Literal, Optional
+
+from liquid import Template
+from pydantic import BaseModel
+
+from intelligence_layer.core.detect_language import Language
+from intelligence_layer.core.model import ChatModel, Llama3ChatModel, Message
+from intelligence_layer.core.task import Task
+from intelligence_layer.core.tracer.tracer import TaskSpan
+
+
+class EnrichmentInput(BaseModel):
+    messages: Sequence[Message]
+    language: Language
+
+
+class EnrichDomainConfig(BaseModel):
+    prompt_template: str
+    system_verbose: str
+    user_verbose: str
+    assistant_verbose: str
+
+
+ENRICH_DOMAIN_INSTRUCTIONS = {
+    Language("en"): EnrichDomainConfig(
+        prompt_template="Classify the given text into one of these domains: {{domains}}.",
+        system_verbose="System instructions",
+        user_verbose="User message",
+        assistant_verbose="Assistant message",
+    ),
+    Language("de"): EnrichDomainConfig(
+        prompt_template="Ordne den gegebenen Text in eine der folgenden Domänen ein: {{domains}}.",
+        system_verbose="Systeminstruktionen",
+        user_verbose="Nutzernachricht",
+        assistant_verbose="Assistentennachricht",
+    ),
+}
+
+
+class EnrichDomain(Task[EnrichmentInput, Optional[str]]):
+    def __init__(
+        self,
+        domains: Sequence[str],
+        chat_model: Optional[ChatModel] = None,
+        instruction_config: Mapping[
+            Language, EnrichDomainConfig
+        ] = ENRICH_DOMAIN_INSTRUCTIONS,
+    ) -> None:
+        self._domains = domains
+        self._chat_model = chat_model or Llama3ChatModel()
+        self._instruction_config = instruction_config
+
+    def do_run(self, input: EnrichmentInput, task_span: TaskSpan) -> Optional[str]:
+        instruction_config = input.language.language_config(self._instruction_config)
+        instruction = Template(instruction_config.prompt_template).render(
+            domains=", ".join(self._domains)
+        )
+        generation = self._chat_model.generate_chat(
+            messages=[
+                Message(role="system", content=instruction),
+                Message(
+                    role="user",
+                    content=self.input_messages_to_text(
+                        input.messages, instruction_config
+                    ),
+                ),
+            ],
+            response_prefix=None,
+            tracer=task_span,
+        )
+        return next((d for d in self._domains if d in generation), None)
+
+    @staticmethod
+    def input_messages_to_text(
+        messages: Sequence[Message], instruction_config: EnrichDomainConfig
+    ) -> str:
+        role_mapping = {
+            "system": instruction_config.system_verbose,
+            "user": instruction_config.user_verbose,
+            "assistant": instruction_config.assistant_verbose,
+        }
+
+        def get_role(role: Literal["system", "user", "assistant"]) -> str:
+            if role not in role_mapping:
+                raise ValueError(f"Got unexpected role in messages: {role}.")
+            return role_mapping[role]
+
+        return "\n\n".join(f"{get_role(m.role)}: {m.content}" for m in messages)
+
+
+class EnrichQualityConfig(BaseModel):
+    system_prompt: str
+    final_user_prompt: str
+    grading_scale: Mapping[Any, int]
+
+
+ENRICH_QUALITY_INSTRUCTIONS = {
+    Language("en"): EnrichQualityConfig(
+        system_prompt="You pretend to be an AI assistant assisting the user with his queries. At the end, you will be asked to critique your own responses with regard to their helpfulness.",
+        final_user_prompt="""Now, critique all past responses.
+The score should be given in the form of an American school grade, with "A" meaning exceptional performance and "F" meaning bad performance. Please respond with a JSON representing the evaluation. Respond in the format:
+```
+{
+    "explanation": "One short and concise sentence explaining the evaluation, avoiding any potential bias. Use no more than 3 sentences.",
+    "grade": Literal[A, B, C, D, E, F]
+}
+```
+""",
+        grading_scale={"A": 5, "B": 4, "C": 3, "D": 2, "F": 1},
+    ),
+    Language("de"): EnrichQualityConfig(
+        system_prompt="Gib vor, ein KI-Assistent zu sein, der dem Benutzer bei seinen Fragen hilft. Am Ende wirst du gebeten, deine eigenen Antworten auf ihre Hilfsbereitschaft hin zu überprüfen.",
+        final_user_prompt="""Bewerte nun alle bisherigen Antworten.
+Die Bewertung sollte in Form einer deutschen Schulnote erfolgen, wobei "1" für hervorragende Leistung und "6" für schlechte Leistung steht. Bitte antworte mit einem JSON, das die Bewertung darstellt. Antworten in diesem Format:
+```
+{
+    "explanation": „Ein kurzer und prägnanter Satz zur Erläuterung der Bewertung, wobei eine mögliche Voreingenommenheit zu vermeiden ist. Verwenden Sie nicht mehr als 3 Sätze.“,
+    "grade": Literal[1, 2, 3, 4, 5, 6]
+}
+```
+""",
+        grading_scale={"1": 5, "2": 4, "3": 3, "4": 2, "5": 1, "6": 1},
+    ),
+}
+
+
+class EnrichQuality(Task[EnrichmentInput, Optional[int]]):
+    def __init__(
+        self,
+        chat_model: Optional[ChatModel] = None,
+        instruction_config: Mapping[
+            Language, EnrichQualityConfig
+        ] = ENRICH_QUALITY_INSTRUCTIONS,
+    ) -> None:
+        self._chat_model = chat_model or Llama3ChatModel(name="llama-3.1-70b-instruct")
+        self._instruction_config = instruction_config
+
+    def do_run(self, input: EnrichmentInput, task_span: TaskSpan) -> Optional[int]:
+        instruction_config = input.language.language_config(self._instruction_config)
+        response_prefix = "```\n{"
+        generation = response_prefix + self._chat_model.generate_chat(
+            messages=[
+                Message(role="system", content=instruction_config.system_prompt),
+                *input.messages,
+                Message(role="user", content=instruction_config.final_user_prompt),
+            ],
+            response_prefix=response_prefix,
+            tracer=task_span,
+        )
+        try:
+            return self._parse_response(generation, instruction_config)
+        except Exception as _:
+            return None
+
+    @staticmethod
+    def _parse_response(
+        generation: str, instruction_config: EnrichQualityConfig
+    ) -> Optional[int]:
+        loaded_json: Mapping[str, Any] = json.loads(generation.replace("```", ""))
+        generated_grade = loaded_json.get("grade")
+        return instruction_config.grading_scale.get(generated_grade)