Skip to content

Commit

Permalink
Added: StructuredOutputParser which takes "plain php class" converts …
Browse files Browse the repository at this point in the history
…it into a json schema, and marshals the JSON response back into an instance of that class.

Moved OutputParser tests into its own folder, added new method on LLM interace that takes a prompttemplate instead of a string.
  • Loading branch information
HelgeSverre committed May 29, 2023
1 parent c0043f0 commit 941c51f
Show file tree
Hide file tree
Showing 12 changed files with 369 additions and 65 deletions.
6 changes: 5 additions & 1 deletion src/Contracts/LLM.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@

namespace Mindwave\Mindwave\Contracts;

use Mindwave\Mindwave\Prompts\PromptTemplate;

interface LLM
{
// TODO(11 May 2023) ~ Helge: make an interface that makes sense
// TODO(29 May 2023) ~ Helge: These methods names are vague, rename them to something better.

public function predict(string $prompt): ?string;

public function run(PromptTemplate $promptTemplate): mixed;
}
6 changes: 6 additions & 0 deletions src/LLM/Drivers/Fake.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,17 @@
namespace Mindwave\Mindwave\LLM\Drivers;

use Mindwave\Mindwave\Contracts\LLM;
use Mindwave\Mindwave\Prompts\PromptTemplate;

class Fake implements LLM
{
public function predict(string $prompt): ?string
{
return $prompt;
}

public function run(PromptTemplate $promptTemplate): mixed
{
return 'implement this';
}
}
10 changes: 10 additions & 0 deletions src/LLM/Drivers/OpenAIChat.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Mindwave\Mindwave\LLM\Drivers;

use Mindwave\Mindwave\Contracts\LLM;
use Mindwave\Mindwave\Prompts\PromptTemplate;
use OpenAI\Client;
use OpenAI\Responses\Chat\CreateResponseMessage;

Expand Down Expand Up @@ -48,4 +49,13 @@ public function predict(string $prompt): ?string

return $message->content;
}

public function run(PromptTemplate $promptTemplate, array $inputs = []): mixed
{
$formatted = $promptTemplate->format($inputs);

$response = $this->predict($formatted);

return $promptTemplate->parse($response);
}
}
10 changes: 10 additions & 0 deletions src/LLM/Drivers/OpenAICompletion.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Mindwave\Mindwave\LLM\Drivers;

use Mindwave\Mindwave\Contracts\LLM;
use Mindwave\Mindwave\Prompts\PromptTemplate;
use OpenAI\Client;

class OpenAICompletion implements LLM
Expand Down Expand Up @@ -38,4 +39,13 @@ public function predict(string $prompt): ?string

return $response->choices[0]?->text;
}

public function run(PromptTemplate $promptTemplate, array $inputs = []): mixed
{
$formatted = $promptTemplate->format($inputs);

$response = $this->predict($formatted);

return $promptTemplate->parse($response);
}
}
101 changes: 101 additions & 0 deletions src/Prompts/OutputParsers/StructuredOutputParser.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
<?php

namespace Mindwave\Mindwave\Prompts\OutputParsers;

use Illuminate\Support\Collection;
use Mindwave\Mindwave\Contracts\OutputParser;
use ReflectionClass;

class StructuredOutputParser implements OutputParser
{
protected $schema;

public function __construct($schema = null)
{
$this->schema = $schema;
}

public function fromClass($schema): self
{
$this->schema = $schema;

return $this;
}

public function getSchemaStructure(): array
{
$reflectionClass = new ReflectionClass($this->schema);
$properties = [];
$required = [];

foreach ($reflectionClass->getProperties() as $property) {
$propertyName = $property->getName();
$propertyType = $property->getType()->getName();

if ($property->getType()->allowsNull() === false) {
$required[] = $propertyName;
}

$properties[$propertyName] = [
'type' => match ($propertyType) {
'string', 'int', 'float', 'bool' => $propertyType,
'array', Collection::class => 'array',
default => 'object',
},
];
}

return [
'properties' => $properties,
'required' => $required,
];
}

public function getFormatInstructions(): string
{
$schema = json_encode($this->getSchemaStructure());

return trim('
RESPONSE FORMAT INSTRUCTIONS
----------------------------
The output should be formatted as a JSON instance that conforms to the JSON schema below.
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}}}
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
Here is the output schema:
```json
'.$schema.'
```
Remember to respond with a JSON blob, and NOTHING else.');
}

public function parse(string $text): mixed
{
$reflectionClass = new ReflectionClass($this->schema);
$data = json_decode($text, true);

if (! $data) {
// TODO(29 May 2023) ~ Helge: Throw custom exception
return null;
}

$instance = new $this->schema();

foreach ($data as $key => $value) {

$type = $reflectionClass->getProperty($key)->getType();

// TODO(29 May 2023) ~ Helge: There are probably libraries that do this in a more clever way, but this is fine for now.
$instance->{$key} = match ($type->getName()) {
'bool' => boolval($value),
'int' => intval($value),
'float' => floatval($value),
Collection::class => collect($value),
default => $value,
};
}

return $instance;
}
}
72 changes: 72 additions & 0 deletions tests/LLMTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
<?php

use Illuminate\Support\Collection;
use Illuminate\Support\Facades\Config;
use Mindwave\Mindwave\Facades\Mindwave;
use Mindwave\Mindwave\Prompts\OutputParsers\StructuredOutputParser;
use Mindwave\Mindwave\Prompts\PromptTemplate;

it('can use a structured output parser', function () {
Config::set('mindwave-vectorstore.default', 'array');
Config::set('mindwave-embeddings.embeddings.openai.api_key', env('MINDWAVE_OPENAI_API_KEY'));
Config::set('mindwave-llm.llms.openai_chat.api_key', env('MINDWAVE_OPENAI_API_KEY'));

class Person
{
public string $name;

public ?int $age;

public ?bool $hasBusiness;

public ?array $interests;

public ?Collection $tags;
}

$model = Mindwave::llm();
$parser = new StructuredOutputParser(Person::class);

$result = $model->run(PromptTemplate::create(
'Generate random details about a fictional person', $parser
));

expect($result)->toBeInstanceOf(Person::class);

dump($result);
});

it('We can parse a small recipe into an object', function () {
Config::set('mindwave-vectorstore.default', 'array');
Config::set('mindwave-embeddings.embeddings.openai.api_key', env('MINDWAVE_OPENAI_API_KEY'));
Config::set('mindwave-llm.llms.openai_chat.api_key', env('MINDWAVE_OPENAI_API_KEY'));
Config::set('mindwave-llm.llms.openai_chat.max_tokens', 2500);
Config::set('mindwave-llm.llms.openai_chat.temperature', 0.2);

class Recipe
{
public string $dishName;

public ?string $description;

public ?int $portions;

public ?array $steps;
}

// Source: https://sugarspunrun.com/the-best-pizza-dough-recipe/
$rawRecipeText = file_get_contents(__DIR__.'/data/samples/pizza-recipe.txt');

$template = PromptTemplate::create(
template: 'Extract details from this recipe: {recipe}',
outputParser: new StructuredOutputParser(Recipe::class)
);

$result = Mindwave::llm()->run($template, [
'recipe' => $rawRecipeText,
]);

expect($result)->toBeInstanceOf(Recipe::class);

dump($result);
});
15 changes: 15 additions & 0 deletions tests/Prompts/OutputParser/CommaSeparatedListOutputParserTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?php

use Mindwave\Mindwave\Prompts\OutputParsers\CommaSeparatedListOutputParser;

it('can parse comma separated output', function () {

$parser = new CommaSeparatedListOutputParser();

expect($parser->parse('monsters, bananas, flies, sausages'))->toEqual([
'monsters',
'bananas',
'flies',
'sausages',
]);
});
42 changes: 42 additions & 0 deletions tests/Prompts/OutputParser/JsonListOutputParserTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<?php

use Mindwave\Mindwave\Prompts\OutputParsers\JsonListOutputParser;
use Mindwave\Mindwave\Prompts\PromptTemplate;

it('json list output parser generates a list from constructor', function () {
$outputParser = new JsonListOutputParser();
$prompt = PromptTemplate::create(
template: 'Generate 10 keywords for {topic}',
outputParser: $outputParser
)->format([
'topic' => 'Mindwave',
]);

expect($prompt)->toContain('Generate 10 keywords for Mindwave');
expect($prompt)->toContain($outputParser->getFormatInstructions());
});

it('json list output parser generates a list from method', function () {
$outputParser = new JsonListOutputParser();

$prompt = PromptTemplate::create(
template: 'Generate 10 keywords for {topic}',
)->withOutputParser($outputParser)->format([
'topic' => 'Laravel',
]);

expect($prompt)->toContain('Generate 10 keywords for Laravel');
expect($prompt)->toContain($outputParser->getFormatInstructions());
});

it('can parse json array as array', function () {

$parser = new JsonListOutputParser();

expect($parser->parse('```json{"data": ["monsters", "bananas", "flies", "sausages"]}```'))->toEqual([
'monsters',
'bananas',
'flies',
'sausages',
]);
});
15 changes: 15 additions & 0 deletions tests/Prompts/OutputParser/JsonOutputParserTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?php

use Mindwave\Mindwave\Prompts\OutputParsers\JsonOutputParser;
use Mindwave\Mindwave\Prompts\PromptTemplate;

it('can parse a response', function () {
$prompt = PromptTemplate::create('Test prompt', new JsonOutputParser())
->parse('```json { "hello": "world", "nice":["mindwave", "package"] } ```');

expect($prompt)
->toBeArray()
->and($prompt)
->toHaveKey('hello', 'world')
->toHaveKey('nice', ['mindwave', 'package']);
});
55 changes: 55 additions & 0 deletions tests/Prompts/OutputParser/StructuredOutputParserTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<?php

use Illuminate\Support\Collection;
use Mindwave\Mindwave\Prompts\OutputParsers\StructuredOutputParser;

class Person
{
public string $name;

public ?int $age;

public ?bool $hasBusiness;

public ?array $interests;

public ?Collection $tags;
}

it('can convert a class into a schema for StructuredOutputParser', function () {
$parser = new StructuredOutputParser(Person::class);

expect($parser->getSchemaStructure())
->toBe([
'properties' => [
'name' => ['type' => 'string'],
'age' => ['type' => 'int'],
'hasBusiness' => ['type' => 'bool'],
'interests' => ['type' => 'array'],
'tags' => ['type' => 'array'],
],
'required' => ['name'],
]);
});

it('can parse response into class instance', function () {
$parser = new StructuredOutputParser(Person::class);

/** @var Person $person */
$person = $parser->parse('{"name": "Lila Jones", "age": 28, "hasBusiness": true, "interests": ["hiking", "reading", "painting"], "tags": ["adventurous", "creative", "entrepreneur"]}');

expect($person)->toBeInstanceOf(Person::class);
expect($person->name)->toBe('Lila Jones');
expect($person->age)->toBe(28);
expect($person->hasBusiness)->toBe(true);
expect($person->interests)->toBe(['hiking', 'reading', 'painting']);
expect($person->tags)->toEqual(collect(['adventurous', 'creative', 'entrepreneur']));
});

it('can returns null if parsing data fails.', function () {
$parser = new StructuredOutputParser(Person::class);

$person = $parser->parse('broken and invalid data');

expect($person)->toBeNull(Person::class);
});
Loading

0 comments on commit 941c51f

Please sign in to comment.