From 941c51fe3be6579ffdd69be98422b8b457c8d0cb Mon Sep 17 00:00:00 2001 From: Helge Sverre Date: Mon, 29 May 2023 09:40:48 +0200 Subject: [PATCH] Added: StructuredOutputParser which takes "plain php class" converts it into a json schema, and marshals the JSON response back into an instance of that class. Moved OutputParser tests into its own folder, added new method on LLM interace that takes a prompttemplate instead of a string. --- src/Contracts/LLM.php | 6 +- src/LLM/Drivers/Fake.php | 6 ++ src/LLM/Drivers/OpenAIChat.php | 10 ++ src/LLM/Drivers/OpenAICompletion.php | 10 ++ .../OutputParsers/StructuredOutputParser.php | 101 ++++++++++++++++++ tests/LLMTest.php | 72 +++++++++++++ .../CommaSeparatedListOutputParserTest.php | 15 +++ .../OutputParser/JsonListOutputParserTest.php | 42 ++++++++ .../OutputParser/JsonOutputParserTest.php | 15 +++ .../StructuredOutputParserTest.php | 55 ++++++++++ tests/Prompts/PromptTemplateTest.php | 64 ----------- tests/data/samples/pizza-recipe.txt | 38 +++++++ 12 files changed, 369 insertions(+), 65 deletions(-) create mode 100644 src/Prompts/OutputParsers/StructuredOutputParser.php create mode 100644 tests/LLMTest.php create mode 100644 tests/Prompts/OutputParser/CommaSeparatedListOutputParserTest.php create mode 100644 tests/Prompts/OutputParser/JsonListOutputParserTest.php create mode 100644 tests/Prompts/OutputParser/JsonOutputParserTest.php create mode 100644 tests/Prompts/OutputParser/StructuredOutputParserTest.php create mode 100644 tests/data/samples/pizza-recipe.txt diff --git a/src/Contracts/LLM.php b/src/Contracts/LLM.php index 891d8d7..c469da2 100644 --- a/src/Contracts/LLM.php +++ b/src/Contracts/LLM.php @@ -2,9 +2,13 @@ namespace Mindwave\Mindwave\Contracts; +use Mindwave\Mindwave\Prompts\PromptTemplate; + interface LLM { - // TODO(11 May 2023) ~ Helge: make an interface that makes sense + // TODO(29 May 2023) ~ Helge: These methods names are vague, rename them to something better. public function predict(string $prompt): ?string; + + public function run(PromptTemplate $promptTemplate): mixed; } diff --git a/src/LLM/Drivers/Fake.php b/src/LLM/Drivers/Fake.php index 5987505..87259b6 100644 --- a/src/LLM/Drivers/Fake.php +++ b/src/LLM/Drivers/Fake.php @@ -3,6 +3,7 @@ namespace Mindwave\Mindwave\LLM\Drivers; use Mindwave\Mindwave\Contracts\LLM; +use Mindwave\Mindwave\Prompts\PromptTemplate; class Fake implements LLM { @@ -10,4 +11,9 @@ public function predict(string $prompt): ?string { return $prompt; } + + public function run(PromptTemplate $promptTemplate): mixed + { + return 'implement this'; + } } diff --git a/src/LLM/Drivers/OpenAIChat.php b/src/LLM/Drivers/OpenAIChat.php index 01fcd5d..25f1732 100644 --- a/src/LLM/Drivers/OpenAIChat.php +++ b/src/LLM/Drivers/OpenAIChat.php @@ -3,6 +3,7 @@ namespace Mindwave\Mindwave\LLM\Drivers; use Mindwave\Mindwave\Contracts\LLM; +use Mindwave\Mindwave\Prompts\PromptTemplate; use OpenAI\Client; use OpenAI\Responses\Chat\CreateResponseMessage; @@ -48,4 +49,13 @@ public function predict(string $prompt): ?string return $message->content; } + + public function run(PromptTemplate $promptTemplate, array $inputs = []): mixed + { + $formatted = $promptTemplate->format($inputs); + + $response = $this->predict($formatted); + + return $promptTemplate->parse($response); + } } diff --git a/src/LLM/Drivers/OpenAICompletion.php b/src/LLM/Drivers/OpenAICompletion.php index 1737e28..f4f10cd 100644 --- a/src/LLM/Drivers/OpenAICompletion.php +++ b/src/LLM/Drivers/OpenAICompletion.php @@ -3,6 +3,7 @@ namespace Mindwave\Mindwave\LLM\Drivers; use Mindwave\Mindwave\Contracts\LLM; +use Mindwave\Mindwave\Prompts\PromptTemplate; use OpenAI\Client; class OpenAICompletion implements LLM @@ -38,4 +39,13 @@ public function predict(string $prompt): ?string return $response->choices[0]?->text; } + + public function run(PromptTemplate $promptTemplate, array $inputs = []): mixed + { + $formatted = $promptTemplate->format($inputs); + + $response = $this->predict($formatted); + + return $promptTemplate->parse($response); + } } diff --git a/src/Prompts/OutputParsers/StructuredOutputParser.php b/src/Prompts/OutputParsers/StructuredOutputParser.php new file mode 100644 index 0000000..92244c8 --- /dev/null +++ b/src/Prompts/OutputParsers/StructuredOutputParser.php @@ -0,0 +1,101 @@ +schema = $schema; + } + + public function fromClass($schema): self + { + $this->schema = $schema; + + return $this; + } + + public function getSchemaStructure(): array + { + $reflectionClass = new ReflectionClass($this->schema); + $properties = []; + $required = []; + + foreach ($reflectionClass->getProperties() as $property) { + $propertyName = $property->getName(); + $propertyType = $property->getType()->getName(); + + if ($property->getType()->allowsNull() === false) { + $required[] = $propertyName; + } + + $properties[$propertyName] = [ + 'type' => match ($propertyType) { + 'string', 'int', 'float', 'bool' => $propertyType, + 'array', Collection::class => 'array', + default => 'object', + }, + ]; + } + + return [ + 'properties' => $properties, + 'required' => $required, + ]; + } + + public function getFormatInstructions(): string + { + $schema = json_encode($this->getSchemaStructure()); + + return trim(' +RESPONSE FORMAT INSTRUCTIONS +---------------------------- +The output should be formatted as a JSON instance that conforms to the JSON schema below. + +As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}}} +the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted. + +Here is the output schema: +```json +'.$schema.' +``` +Remember to respond with a JSON blob, and NOTHING else.'); + } + + public function parse(string $text): mixed + { + $reflectionClass = new ReflectionClass($this->schema); + $data = json_decode($text, true); + + if (! $data) { + // TODO(29 May 2023) ~ Helge: Throw custom exception + return null; + } + + $instance = new $this->schema(); + + foreach ($data as $key => $value) { + + $type = $reflectionClass->getProperty($key)->getType(); + + // TODO(29 May 2023) ~ Helge: There are probably libraries that do this in a more clever way, but this is fine for now. + $instance->{$key} = match ($type->getName()) { + 'bool' => boolval($value), + 'int' => intval($value), + 'float' => floatval($value), + Collection::class => collect($value), + default => $value, + }; + } + + return $instance; + } +} diff --git a/tests/LLMTest.php b/tests/LLMTest.php new file mode 100644 index 0000000..e138df4 --- /dev/null +++ b/tests/LLMTest.php @@ -0,0 +1,72 @@ +run(PromptTemplate::create( + 'Generate random details about a fictional person', $parser + )); + + expect($result)->toBeInstanceOf(Person::class); + + dump($result); +}); + +it('We can parse a small recipe into an object', function () { + Config::set('mindwave-vectorstore.default', 'array'); + Config::set('mindwave-embeddings.embeddings.openai.api_key', env('MINDWAVE_OPENAI_API_KEY')); + Config::set('mindwave-llm.llms.openai_chat.api_key', env('MINDWAVE_OPENAI_API_KEY')); + Config::set('mindwave-llm.llms.openai_chat.max_tokens', 2500); + Config::set('mindwave-llm.llms.openai_chat.temperature', 0.2); + + class Recipe + { + public string $dishName; + + public ?string $description; + + public ?int $portions; + + public ?array $steps; + } + + // Source: https://sugarspunrun.com/the-best-pizza-dough-recipe/ + $rawRecipeText = file_get_contents(__DIR__.'/data/samples/pizza-recipe.txt'); + + $template = PromptTemplate::create( + template: 'Extract details from this recipe: {recipe}', + outputParser: new StructuredOutputParser(Recipe::class) + ); + + $result = Mindwave::llm()->run($template, [ + 'recipe' => $rawRecipeText, + ]); + + expect($result)->toBeInstanceOf(Recipe::class); + + dump($result); +}); diff --git a/tests/Prompts/OutputParser/CommaSeparatedListOutputParserTest.php b/tests/Prompts/OutputParser/CommaSeparatedListOutputParserTest.php new file mode 100644 index 0000000..e7c7333 --- /dev/null +++ b/tests/Prompts/OutputParser/CommaSeparatedListOutputParserTest.php @@ -0,0 +1,15 @@ +parse('monsters, bananas, flies, sausages'))->toEqual([ + 'monsters', + 'bananas', + 'flies', + 'sausages', + ]); +}); diff --git a/tests/Prompts/OutputParser/JsonListOutputParserTest.php b/tests/Prompts/OutputParser/JsonListOutputParserTest.php new file mode 100644 index 0000000..2bdf5ec --- /dev/null +++ b/tests/Prompts/OutputParser/JsonListOutputParserTest.php @@ -0,0 +1,42 @@ +format([ + 'topic' => 'Mindwave', + ]); + + expect($prompt)->toContain('Generate 10 keywords for Mindwave'); + expect($prompt)->toContain($outputParser->getFormatInstructions()); +}); + +it('json list output parser generates a list from method', function () { + $outputParser = new JsonListOutputParser(); + + $prompt = PromptTemplate::create( + template: 'Generate 10 keywords for {topic}', + )->withOutputParser($outputParser)->format([ + 'topic' => 'Laravel', + ]); + + expect($prompt)->toContain('Generate 10 keywords for Laravel'); + expect($prompt)->toContain($outputParser->getFormatInstructions()); +}); + +it('can parse json array as array', function () { + + $parser = new JsonListOutputParser(); + + expect($parser->parse('```json{"data": ["monsters", "bananas", "flies", "sausages"]}```'))->toEqual([ + 'monsters', + 'bananas', + 'flies', + 'sausages', + ]); +}); diff --git a/tests/Prompts/OutputParser/JsonOutputParserTest.php b/tests/Prompts/OutputParser/JsonOutputParserTest.php new file mode 100644 index 0000000..62d4c31 --- /dev/null +++ b/tests/Prompts/OutputParser/JsonOutputParserTest.php @@ -0,0 +1,15 @@ +parse('```json { "hello": "world", "nice":["mindwave", "package"] } ```'); + + expect($prompt) + ->toBeArray() + ->and($prompt) + ->toHaveKey('hello', 'world') + ->toHaveKey('nice', ['mindwave', 'package']); +}); diff --git a/tests/Prompts/OutputParser/StructuredOutputParserTest.php b/tests/Prompts/OutputParser/StructuredOutputParserTest.php new file mode 100644 index 0000000..0953525 --- /dev/null +++ b/tests/Prompts/OutputParser/StructuredOutputParserTest.php @@ -0,0 +1,55 @@ +getSchemaStructure()) + ->toBe([ + 'properties' => [ + 'name' => ['type' => 'string'], + 'age' => ['type' => 'int'], + 'hasBusiness' => ['type' => 'bool'], + 'interests' => ['type' => 'array'], + 'tags' => ['type' => 'array'], + ], + 'required' => ['name'], + ]); +}); + +it('can parse response into class instance', function () { + $parser = new StructuredOutputParser(Person::class); + + /** @var Person $person */ + $person = $parser->parse('{"name": "Lila Jones", "age": 28, "hasBusiness": true, "interests": ["hiking", "reading", "painting"], "tags": ["adventurous", "creative", "entrepreneur"]}'); + + expect($person)->toBeInstanceOf(Person::class); + expect($person->name)->toBe('Lila Jones'); + expect($person->age)->toBe(28); + expect($person->hasBusiness)->toBe(true); + expect($person->interests)->toBe(['hiking', 'reading', 'painting']); + expect($person->tags)->toEqual(collect(['adventurous', 'creative', 'entrepreneur'])); +}); + +it('can returns null if parsing data fails.', function () { + $parser = new StructuredOutputParser(Person::class); + + $person = $parser->parse('broken and invalid data'); + + expect($person)->toBeNull(Person::class); +}); diff --git a/tests/Prompts/PromptTemplateTest.php b/tests/Prompts/PromptTemplateTest.php index 4586edb..3cb54a7 100644 --- a/tests/Prompts/PromptTemplateTest.php +++ b/tests/Prompts/PromptTemplateTest.php @@ -1,9 +1,6 @@ format())->toEndWith('TESTING'); }); -it('can parse comma separated output', function () { - - $parser = new CommaSeparatedListOutputParser(); - - expect($parser->parse('monsters, bananas, flies, sausages'))->toEqual([ - 'monsters', - 'bananas', - 'flies', - 'sausages', - ]); -}); - -it('can parse json array as array', function () { - - $parser = new JsonListOutputParser(); - - expect($parser->parse('```json{"data": ["monsters", "bananas", "flies", "sausages"]}```'))->toEqual([ - 'monsters', - 'bananas', - 'flies', - 'sausages', - ]); -}); - -it('can parse a response', function () { - $prompt = PromptTemplate::create('Test prompt', new JsonOutputParser()) - ->parse('```json { "hello": "world", "nice":["mindwave", "package"] } ```'); - - expect($prompt) - ->toBeArray() - ->and($prompt) - ->toHaveKey('hello', 'world') - ->toHaveKey('nice', ['mindwave', 'package']); -}); - it('can convert a template to a string', function () { $prompt = PromptTemplate::create('This is a {variable} template. ') ->format(['variable' => 'test']); @@ -83,32 +45,6 @@ public function parse(string $text): mixed expect($prompt)->toBe('This is a test template.'); }); -it('json list output parser generates a list from constructor', function () { - $outputParser = new JsonListOutputParser(); - $prompt = PromptTemplate::create( - template: 'Generate 10 keywords for {topic}', - outputParser: $outputParser - )->format([ - 'topic' => 'Mindwave', - ]); - - expect($prompt)->toContain('Generate 10 keywords for Mindwave'); - expect($prompt)->toContain($outputParser->getFormatInstructions()); -}); - -it('json list output parser generates a list from method', function () { - $outputParser = new JsonListOutputParser(); - - $prompt = PromptTemplate::create( - template: 'Generate 10 keywords for {topic}', - )->withOutputParser($outputParser)->format([ - 'topic' => 'Laravel', - ]); - - expect($prompt)->toContain('Generate 10 keywords for Laravel'); - expect($prompt)->toContain($outputParser->getFormatInstructions()); -}); - it('formats the template with input variables', function () { expect( PromptTemplate::create('Hello, {name}! Your {product} is ready.') diff --git a/tests/data/samples/pizza-recipe.txt b/tests/data/samples/pizza-recipe.txt new file mode 100644 index 0000000..f74afe0 --- /dev/null +++ b/tests/data/samples/pizza-recipe.txt @@ -0,0 +1,38 @@ +The Best Pizza Dough Recipe +How to make the BEST Pizza Dough Recipe +4.96 from 3755 votes + Print Pin Rate +Course: Main CourseCuisine: American Prep Time: 15minutes minutesCook Time: 15minutes minutesRising Time: 30minutes minutesTotal Time: 1hour hour Servings: 12 servings (makes one 10-12" pizza) Calories: 113kcal Author: Sam Merritt +Ingredients +▢2-2 ⅓ cups all-purpose flour OR bread flour¹ divided (250-295g) +▢1 packet instant yeast² (2 ¼ teaspoon) +▢1 ½ teaspoons sugar +▢¾ teaspoon salt +▢⅛-¼ teaspoon garlic powder and/or dried basil leaves optional +▢2 Tablespoons olive oil + additional +▢¾ cup warm water³ (175ml) +Cook Mode +Prevent your screen from going dark +Instructions +Combine 1 cup (125g) of flour, instant yeast, sugar, and salt in a large bowl. If desired, add garlic powder and dried basil at this point as well. +Add olive oil and warm water and use a wooden spoon to stir well very well. +Gradually add another 1 cup (125g) of flour. Add any additional flour as needed (I've found that sometimes I need as much as an additional ⅓ cup), stirring until the dough is forming into a cohesive, elastic ball and is beginning to pull away from the sides of the bowl (see video above recipe for visual cue). The dough will still be slightly sticky but still should be manageable with your hands. +Drizzle a separate, large, clean bowl generously with olive oil and use a pastry brush to brush up the sides of the bowl. +Lightly dust your hands with flour and form your pizza dough into a round ball and transfer to your olive oil-brushed bowl. Use your hands to roll the pizza dough along the inside of the bowl until it is coated in olive oil, then cover the bowl tightly with plastic wrap and place it in a warm place. +Allow dough to rise for 30 minutes or until doubled in size. If you intend to bake this dough into a pizza, I also recommend preheating your oven to 425F (215C) at this point so that it will have reached temperature once your pizza is ready to bake. +Once the dough has risen, use your hands to gently deflate it and transfer to a lightly floured surface and knead briefly until smooth (about 3-5 times). +Use either your hands or a rolling pin to work the dough into 12" circle. +Transfer dough to a parchment paper lined pizza pan and either pinch the edges or fold them over to form a crust. +Drizzle additional olive oil (about a Tablespoon) over the top of the pizza and use your pastry brush to brush the entire surface of the pizza (including the crust) with olive oil. +Use a fork to poke holes all over the center of the pizza to keep the dough from bubbling up in the oven. +Add desired toppings (see the notes for a link to my favorite, 5-minute pizza sauce recipe!) and bake in a 425F (215C) preheated oven for 13-15 minutes or until toppings are golden brown. Slice and serve. +Notes +¹I've found that all-purpose flour yields a softer crust while bread flour gives a slightly crispier exterior. Please see the post for more information on all-purpose vs. bread flour in pizza dough. +²Many commenters have reported using active dry yeast (use the same amount, 2 ¼ teaspoon) with success. Some people have proofed the yeast first with the ¾ cup warm water, which is what I would recommend, while others have just mixed it into the dough as instructed. Both have had success! +³Ideally your water should be between 105-115F (40-46C). I usually just use warm tap water but do make sure that your water isn't too hot or it will kill your yeast! +Making in Advance: +To make in advance, let the dough rise covered at room temperature as indicated in the recipe, then deflate it, wrap it tightly so it doesn't dry out, and store in the refrigerator for up to several days or it will freeze for up to a month. +Top your pizza off with my favorite, easy, homemade pizza sauce! +Nutrition +Serving: 1serving | Calories: 113kcal | Carbohydrates: 19g | Protein: 3g | Fat: 3g | Saturated Fat: 1g | Sodium: 146mg | Potassium: 32mg | Fiber: 1g | Sugar: 1g | Calcium: 4mg | Iron: 1mg +Nutritional information is based on third-party calculations and should be considered an estimate only. Actual nutritional content will vary based upon brands used, measuring methods, cooking method, portion sizes, and more.