Skip to content

Commit

Permalink
Merge pull request #39 from Kiln-AI/main
Browse files Browse the repository at this point in the history
Add docs for v0.6.1 (redux)
  • Loading branch information
scosman authored Nov 20, 2024
2 parents e290aa0 + 4cb1554 commit 8d95683
Showing 1 changed file with 67 additions and 0 deletions.
67 changes: 67 additions & 0 deletions libs/core/kiln_ai/adapters/data_gen/data_gen_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@


class DataGenCategoriesTaskInput(BaseModel):
"""Input model for generating categories/subtopics.
Attributes:
node_path: List of strings representing the hierarchical path to current node
system_prompt: System prompt to guide the AI generation
num_subtopics: Number of subtopics to generate
human_guidance: Optional human guidance to influence generation
existing_topics: Optional list of existing topics to avoid duplication
"""

node_path: list[str]
system_prompt: str
num_subtopics: int
Expand All @@ -27,6 +37,18 @@ def from_task(
human_guidance: str | None = None,
existing_topics: list[str] | None = None,
) -> "DataGenCategoriesTaskInput":
"""Create a DataGenCategoriesTaskInput instance from a Task.
Args:
task: The source Task object
node_path: Path to current node in topic hierarchy
num_subtopics: Number of subtopics to generate
human_guidance: Optional guidance for generation
existing_topics: Optional list of existing topics
Returns:
A new DataGenCategoriesTaskInput instance
"""
prompt_builder = SimplePromptBuilder(task=task)
return cls(
node_path=node_path,
Expand All @@ -38,10 +60,22 @@ def from_task(


class DataGenCategoriesTaskOutput(BaseModel):
"""Output model for generated categories/subtopics.
Attributes:
subtopics: List of generated subtopic strings
"""

subtopics: list[str]


class DataGenCategoriesTask(Task, parent_of={}):
"""Task for generating hierarchical categories/subtopics.
Generates synthetic data categories which can be used to generate
training data for model learning.
"""

def __init__(self):
# Keep the typechecker happy. TODO: shouldn't need this or parent_of above.
tmp_project = Project(name="DataGen")
Expand All @@ -60,6 +94,15 @@ def __init__(self):


class DataGenSampleTaskInput(BaseModel):
"""Input model for generating data samples for a kiln task.
Attributes:
topic: List of strings representing the topic path
system_prompt: System prompt to guide the AI generation
num_samples: Number of samples to generate
human_guidance: Optional human guidance to influence generation
"""

topic: list[str]
system_prompt: str
num_samples: int
Expand All @@ -73,6 +116,17 @@ def from_task(
num_samples: int = 8,
human_guidance: str | None = None,
) -> "DataGenSampleTaskInput":
"""Create a DataGenSampleTaskInput instance from a Task.
Args:
task: The source Task object
topic: Topic path for sample generation
num_samples: Number of samples to generate
human_guidance: Optional guidance for generation
Returns:
A new DataGenSampleTaskInput instance
"""
prompt_builder = SimplePromptBuilder(task=task)
return cls(
topic=topic,
Expand All @@ -83,6 +137,14 @@ def from_task(


def list_json_schema_for_task(task: Task) -> str:
"""Generate a JSON schema for a list of task inputs (json schema)
Args:
task: Task object whose input schema will be used
Returns:
JSON string representing the schema for a list of task inputs
"""
if task.input_json_schema:
items_schema = json.loads(task.input_json_schema)
else:
Expand All @@ -105,6 +167,11 @@ def list_json_schema_for_task(task: Task) -> str:


class DataGenSampleTask(Task, parent_of={}):
"""Task for generating data samples for a given topic.
Generates synthetic data samples based on provided topics and subtopics.
"""

def __init__(self, target_task: Task, num_samples: int = 8):
# Keep the typechecker happy. TODO: shouldn't need this or parent_of above.
tmp_project = Project(name="DataGenSample")
Expand Down

0 comments on commit 8d95683

Please sign in to comment.