From 1894977ecd1a7ad8dbe3951cb11f58ba08ff0b22 Mon Sep 17 00:00:00 2001 From: Omar Khattab Date: Sat, 28 Dec 2024 13:33:04 -0800 Subject: [PATCH] Add AlfWorld dataset and tutorial. Improve BootstrapFT. --- docs/docs/tutorials/games/index.ipynb | 656 +++++++++++++++++++++++++ dspy/datasets/__init__.py | 1 + dspy/datasets/alfworld/__init__.py | 1 + dspy/datasets/alfworld/alfworld.py | 149 ++++++ dspy/datasets/alfworld/base_config.yml | 145 ++++++ dspy/teleprompt/bootstrap_finetune.py | 79 +-- 6 files changed, 997 insertions(+), 34 deletions(-) create mode 100644 docs/docs/tutorials/games/index.ipynb create mode 100644 dspy/datasets/alfworld/__init__.py create mode 100644 dspy/datasets/alfworld/alfworld.py create mode 100644 dspy/datasets/alfworld/base_config.yml diff --git a/docs/docs/tutorials/games/index.ipynb b/docs/docs/tutorials/games/index.ipynb new file mode 100644 index 000000000..e6b794353 --- /dev/null +++ b/docs/docs/tutorials/games/index.ipynb @@ -0,0 +1,656 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tutorial: Fine-tuning Agents\n", + "\n", + "Let's walk through a quick example of optimizing the _language model weights_ (i.e., fine-tuning) inside a DSPy module that represents a ReAct agent playing a game with 50-step tasks.\n", + "\n", + "### Install dependencies and download data\n", + "\n", + "Install the latest DSPy via `pip install -U --pre dspy` and follow along. This tutorial uses the AlfWorld dataset, which depends on DSPy 2.6.0 (pre-release).\n", + "\n", + "You will also need the following dependencies:\n", + "\n", + "```shell\n", + "> pip install -U alfworld==0.3.5 multiprocess\n", + "> alfworld-download\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up the language models\n", + "\n", + "Our goal is to allow `gpt-4o-mini` to play the AlfWorld household game proficiently, without tinkering with string prompts or example trajectories by hand.\n", + "\n", + "Though it's not strictly necessary, we'll make our job a little easier by using the larger `gpt-4o` for prompt optimization and fine-tuning, building our small `gpt-4o-mini` agent." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import dspy\n", + "\n", + "gpt4o_mini = dspy.LM('gpt-4o-mini-2024-07-18')\n", + "gpt4o = dspy.LM('openai/gpt-4o')\n", + "dspy.configure(experimental=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's load 200 training and 200 development tasks from AlfWorld. The dataset is much larger, but a small number of examples will help keep this tutorial run in 1-2 hours, including fine-tuning.\n", + "\n", + "With just 100 training tasks, we'll teach 4o-mini to go from 19% (can barely play the game) to 72%. If you use 500 tasks and retain the demonstrations during fine-tuning, you can push that easily to 82%." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(200, 200)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dspy.datasets.alfworld import AlfWorld\n", + "\n", + "alfworld = AlfWorld()\n", + "trainset, devset = alfworld.trainset[:200], alfworld.devset[-200:]\n", + "len(trainset), len(devset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before we proceed, let's view an example of this task." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-= Welcome to TextWorld, ALFRED! =-\n", + "\n", + "You are in the middle of a room. Looking quickly around you, you see a countertop 1, a drawer 8, a drawer 7, a drawer 6, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a garbagecan 1, a handtowelholder 1, a sinkbasin 2, a sinkbasin 1, a toilet 1, a toiletpaperhanger 1, and a towelholder 1.\n", + "\n", + "Your task is to: put a clean soapbar in garbagecan.\n" + ] + } + ], + "source": [ + "example = trainset[0]\n", + "\n", + "with alfworld.POOL.session() as env:\n", + " task, info = env.init(**example.inputs())\n", + "\n", + "print(task)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Defining the Agent program\n", + "\n", + "The agent is a pretty simple `dspy.Module` with one sub-module called `self.react`.\n", + "\n", + "This sub-module consumes a definition of a specific `task`, sees its previous `trajectory`, and sees a list of\n", + "`possible_actions` it can take. It responds simply with the next action.\n", + "\n", + "In the `forward` method, we just initialize an environment for the given task `idx`. And we loop up to `self.max_iters`,\n", + "repeatedly invoking the `self.react` module to take the next action." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class Agent(dspy.Module):\n", + " def __init__(self, max_iters=50, verbose=False):\n", + " self.max_iters = max_iters\n", + " self.verbose = verbose\n", + " self.react = dspy.Predict(\"task, trajectory, possible_actions: list[str] -> action\")\n", + "\n", + " def forward(self, idx):\n", + " with alfworld.POOL.session() as env:\n", + " trajectory = []\n", + " task, info = env.init(idx)\n", + " if self.verbose:\n", + " print(f\"Task: {task}\")\n", + "\n", + " for _ in range(self.max_iters):\n", + " trajectory_ = \"\\n\".join(trajectory)\n", + " possible_actions = info[\"admissible_commands\"][0] + [\"think: ${...thoughts...}\"]\n", + " prediction = self.react(task=task, trajectory=trajectory_, possible_actions=possible_actions)\n", + " trajectory.append(f\"> {prediction.action}\")\n", + "\n", + " if prediction.action.startswith(\"think:\"):\n", + " trajectory.append(\"OK.\")\n", + " continue\n", + "\n", + " obs, reward, done, info = env.step(prediction.action)\n", + " obs, reward, done = obs[0], reward[0], done[0]\n", + " trajectory.append(obs)\n", + "\n", + " if self.verbose:\n", + " print(\"\\n\".join(trajectory[-2:]))\n", + "\n", + " if done:\n", + " break\n", + "\n", + " assert reward == int(info[\"won\"][0]), (reward, info[\"won\"][0])\n", + " return dspy.Prediction(trajecotry=trajectory, success=reward)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Aside: If you wanted to include instructions for your agent...\n", + "\n", + "Above, we opted to keep the agent super simple, without even providing short instructions that describe the task.\n", + "\n", + "In principle, you can copy a short definition of the AlfWorld task (based on Yao et al., 2022) and use that as the\n", + "instruction for your agent. This is not inherently essential, but it helps illustrate the role that\n", + "instructions play in DSPy: they're not for coercing the model to exhibit a certain behavior, but they're there to\n", + "describe the fundamentals of the task in a straightforward, human-readable way.\n", + "\n", + "If you want to do that, you can simply replace this:\n", + "\n", + "```python\n", + "self.react = dspy.Predict(\"task, trajectory, possible_actions: list[str] -> action\")\n", + "```\n", + "\n", + "with this:\n", + "\n", + "```python\n", + "INSTRUCTIONS = \"\"\"\n", + "Interact with a simulated household to achieve a high-level goal. Make sure to plan, track subgoals,\n", + "determine likely locations for common household items (e.g. desklamps will likely be on desks, shelfs, or dressers),\n", + "and explore systematically (e.g. check all desks one by one for desklamp).\n", + "\"\"\".strip()\n", + "\n", + "self.react = dspy.Predict(dspy.Signature(\"task, trajectory, possible_actions: list[str] -> action\", INSTRUCTIONS))\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Zero-shot evaluation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's try this simple program, prior to any optimization work." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Task: -= Welcome to TextWorld, ALFRED! =-\n", + "\n", + "You are in the middle of a room. Looking quickly around you, you see a countertop 1, a drawer 8, a drawer 7, a drawer 6, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a garbagecan 1, a handtowelholder 1, a sinkbasin 2, a sinkbasin 1, a toilet 1, a toiletpaperhanger 1, and a towelholder 1.\n", + "\n", + "Your task is to: put a clean soapbar in garbagecan.\n", + "> go to countertop 1\n", + "You arrive at countertop 1. On the countertop 1, you see a candle 1, a soapbar 1, a soapbottle 2, a soapbottle 1, and a spraybottle 1.\n", + "> take soapbar 1 from countertop 1\n", + "You pick up the soapbar 1 from the countertop 1.\n", + "> go to garbagecan 1\n", + "You arrive at garbagecan 1. On the garbagecan 1, you see nothing.\n", + "> move soapbar 1 to garbagecan 1\n", + "You move the soapbar 1 to the garbagecan 1.\n", + "> examine garbagecan 1\n", + "On the garbagecan 1, you see a soapbar 1.\n", + "> examine garbagecan 1\n", + "On the garbagecan 1, you see a soapbar 1.\n", + "> examine garbagecan 1\n", + "On the garbagecan 1, you see a soapbar 1.\n", + "> examine garbagecan 1\n", + "On the garbagecan 1, you see a soapbar 1.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> take soapbar 1 from garbagecan 1\n", + "You pick up the soapbar 1 from the garbagecan 1.\n", + "> move soapbar 1 to garbagecan 1\n", + "You move the soapbar 1 to the garbagecan 1.\n", + "> examine garbagecan 1\n", + "On the garbagecan 1, you see a soapbar 1.\n", + "> examine garbagecan 1\n", + "On the garbagecan 1, you see a soapbar 1.\n", + "> examine garbagecan 1\n", + "On the garbagecan 1, you see a soapbar 1.\n", + "> examine garbagecan 1\n", + "On the garbagecan 1, you see a soapbar 1.\n", + "> examine garbagecan 1\n", + "On the garbagecan 1, you see a soapbar 1.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> examine garbagecan 1\n", + "On the garbagecan 1, you see a soapbar 1.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> examine garbagecan 1\n", + "On the garbagecan 1, you see a soapbar 1.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n", + "> examine garbagecan 1\n", + "On the garbagecan 1, you see a soapbar 1.\n", + "> look\n", + "You are facing the garbagecan 1. Next to it, you see nothing.\n" + ] + }, + { + "data": { + "text/plain": [ + "Prediction(\n", + " trajecotry=['> go to countertop 1', 'You arrive at countertop 1. On the countertop 1, you see a candle 1, a soapbar 1, a soapbottle 2, a soapbottle 1, and a spraybottle 1.', '> take soapbar 1 from countertop 1', 'You pick up the soapbar 1 from the countertop 1.', '> go to garbagecan 1', 'You arrive at garbagecan 1. On the garbagecan 1, you see nothing.', '> move soapbar 1 to garbagecan 1', 'You move the soapbar 1 to the garbagecan 1.', '> examine garbagecan 1', 'On the garbagecan 1, you see a soapbar 1.', '> examine garbagecan 1', 'On the garbagecan 1, you see a soapbar 1.', '> examine garbagecan 1', 'On the garbagecan 1, you see a soapbar 1.', '> examine garbagecan 1', 'On the garbagecan 1, you see a soapbar 1.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> take soapbar 1 from garbagecan 1', 'You pick up the soapbar 1 from the garbagecan 1.', '> move soapbar 1 to garbagecan 1', 'You move the soapbar 1 to the garbagecan 1.', '> examine garbagecan 1', 'On the garbagecan 1, you see a soapbar 1.', '> examine garbagecan 1', 'On the garbagecan 1, you see a soapbar 1.', '> examine garbagecan 1', 'On the garbagecan 1, you see a soapbar 1.', '> examine garbagecan 1', 'On the garbagecan 1, you see a soapbar 1.', '> examine garbagecan 1', 'On the garbagecan 1, you see a soapbar 1.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> examine garbagecan 1', 'On the garbagecan 1, you see a soapbar 1.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> examine garbagecan 1', 'On the garbagecan 1, you see a soapbar 1.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.', '> examine garbagecan 1', 'On the garbagecan 1, you see a soapbar 1.', '> look', 'You are facing the garbagecan 1. Next to it, you see nothing.'],\n", + " success=0\n", + ")" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_4o = Agent()\n", + "agent_4o.set_lm(gpt4o)\n", + "agent_4o.verbose = True\n", + "\n", + "agent_4o(**example.inputs())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Okay, in this case it couldn't solve this example! Now, let's check the average quality of 4o and 4o-mini." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "metric = lambda x, y, trace=None: y.success\n", + "evaluate = dspy.Evaluate(devset=devset, metric=metric, display_progress=True, num_threads=16)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 115.00 / 200 (57.5%): 100%|██████████| 200/200 [06:14<00:00, 1.87s/it]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024/12/28 11:10:25 INFO dspy.evaluate.evaluate: Average Metric: 115 / 200 (57.5%)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "57.5" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_4o.verbose = False\n", + "evaluate(agent_4o)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 30.00 / 200 (15.0%): 100%|██████████| 200/200 [08:33<00:00, 2.57s/it]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024/12/28 11:18:59 INFO dspy.evaluate.evaluate: Average Metric: 30 / 200 (15.0%)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "15.0" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_4o_mini = Agent()\n", + "agent_4o_mini.set_lm(gpt4o_mini)\n", + "\n", + "evaluate(agent_4o_mini)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Out of the box, on this task, 4o is decent (58% success rate) while 4o-mini struggles (15% success rate).\n", + "\n", + "Let's apply the following strategy:\n", + "\n", + "1. We'll optimize the _prompts_ for gpt-4o in a lightweight way.\n", + "2. We'll then use this prompt-optimized agent as a teacher to fine-tune gpt-4o-mini on the task. This will increase its quality from 19% to 72% (or 82% if you use 500 trainset examples)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prompt-optimizing GPT-4o" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "optimizer = dspy.MIPROv2(metric=metric, auto=\"light\", num_threads=16, prompt_model=gpt4o)\n", + "\n", + "config = dict(max_bootstrapped_demos=1, max_labeled_demos=0, minibatch_size=40)\n", + "optimized_4o = optimizer.compile(agent_4o, trainset=trainset, **config, requires_permission_to_run=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fine-tuning GPT-4o-mini\n", + "\n", + "For fine-tuning, we'll need a teacher program (`optimized_4o` above) and a student program derived from it (`student_4om` below)." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "student_4o_mini = optimized_4o.deepcopy()\n", + "student_4o_mini.set_lm(gpt4o_mini)\n", + "# student_4o_mini.react.demos = [] # you can optionally reset the demos" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "optimizer = dspy.BootstrapFinetune(metric=metric, num_threads=16)\n", + "finetuned_4o_mini = optimizer.compile(student_4o_mini, teacher=optimized_4o, trainset=trainset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Evaluate the finetuned GPT-4o-mini agent" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average Metric: 143.00 / 200 (71.5%): 100%|██████████| 200/200 [03:15<00:00, 1.05it/s]" + ] + } + ], + "source": [ + "evaluate(finetuned_4o_mini)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Having done all this optimization, let's save our program so we can use it later! This will keep a reference to the fine-tuned model as well, as long as it continued to exist with the same identifier at the provider side." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "finetuned_4o_mini.save('finetuned_4o_mini_001.pkl')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's now check out one task using our fine-tuned agent program!" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Task: -= Welcome to TextWorld, ALFRED! =-\n", + "\n", + "You are in the middle of a room. Looking quickly around you, you see a armchair 1, a cabinet 1, a drawer 21, a drawer 20, a drawer 19, a drawer 18, a drawer 17, a drawer 16, a drawer 15, a drawer 14, a drawer 13, a drawer 12, a drawer 11, a drawer 10, a drawer 9, a drawer 8, a drawer 7, a drawer 6, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a dresser 1, a garbagecan 1, a sidetable 5, a sidetable 4, a sidetable 3, a sidetable 2, a sidetable 1, and a sofa 1.\n", + "\n", + "Your task is to: put some box on dresser.\n", + "> go to cabinet 1\n", + "You arrive at cabinet 1. On the cabinet 1, you see nothing.\n", + "> go to dresser 1\n", + "You arrive at dresser 1. On the dresser 1, you see a book 1, a newspaper 1, a remotecontrol 1, a statue 3, and a television 1.\n", + "> look\n", + "You are facing the dresser 1. Next to it, you see nothing.\n", + "> go to sidetable 1\n", + "You arrive at sidetable 1. On the sidetable 1, you see a cellphone 1, and a desklamp 1.\n", + "> go to sidetable 2\n", + "You arrive at sidetable 2. On the sidetable 2, you see a box 2.\n", + "> take box 2 from sidetable 2\n", + "You pick up the box 2 from the sidetable 2.\n", + "> go to dresser 1\n", + "You arrive at dresser 1. On the dresser 1, you see a book 1, a newspaper 1, a remotecontrol 1, a statue 3, and a television 1.\n", + "> move box 2 to dresser 1\n", + "You move the box 2 to the dresser 1.\n" + ] + }, + { + "data": { + "text/plain": [ + "Prediction(\n", + " trajecotry=['> go to cabinet 1', 'You arrive at cabinet 1. On the cabinet 1, you see nothing.', '> go to dresser 1', 'You arrive at dresser 1. On the dresser 1, you see a book 1, a newspaper 1, a remotecontrol 1, a statue 3, and a television 1.', '> look', 'You are facing the dresser 1. Next to it, you see nothing.', '> go to sidetable 1', 'You arrive at sidetable 1. On the sidetable 1, you see a cellphone 1, and a desklamp 1.', '> go to sidetable 2', 'You arrive at sidetable 2. On the sidetable 2, you see a box 2.', '> take box 2 from sidetable 2', 'You pick up the box 2 from the sidetable 2.', '> go to dresser 1', 'You arrive at dresser 1. On the dresser 1, you see a book 1, a newspaper 1, a remotecontrol 1, a statue 3, and a television 1.', '> move box 2 to dresser 1', 'You move the box 2 to the dresser 1.'],\n", + " success=1\n", + ")" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "finetuned_4o_mini.verbose = True\n", + "finetuned_4o_mini(**devset[0].inputs())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to load and use the agent program, you can do that as follows." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "loaded = Agent()\n", + "loaded.load('finetuned_4o_mini_001.pkl')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "jun2024_py310", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/dspy/datasets/__init__.py b/dspy/datasets/__init__.py index 582be6e29..d9d8c510f 100644 --- a/dspy/datasets/__init__.py +++ b/dspy/datasets/__init__.py @@ -3,6 +3,7 @@ from dspy.datasets.dataset import Dataset from dspy.datasets.hotpotqa import HotPotQA from dspy.datasets.math import MATH +from dspy.datasets.alfworld import AlfWorld __all__ = [ "Colors", diff --git a/dspy/datasets/alfworld/__init__.py b/dspy/datasets/alfworld/__init__.py new file mode 100644 index 000000000..9a1bc42d5 --- /dev/null +++ b/dspy/datasets/alfworld/__init__.py @@ -0,0 +1 @@ +from dspy.datasets.alfworld.alfworld import AlfWorld \ No newline at end of file diff --git a/dspy/datasets/alfworld/alfworld.py b/dspy/datasets/alfworld/alfworld.py new file mode 100644 index 000000000..8a78a3e01 --- /dev/null +++ b/dspy/datasets/alfworld/alfworld.py @@ -0,0 +1,149 @@ +import os +import queue +import random + +def env_worker(inq, outq): + """ + Worker process: creates a single AlfredTWEnv instance, + handles 'init' (with task idx) and 'step' (with action). + """ + + try: + import io + import yaml + import alfworld.agents.environment as environment + from contextlib import redirect_stdout, redirect_stderr + except ImportError: + raise ImportError("alfworld is not installed. " \ + "Please install it via `pip install alfworld==0.3.5` then run `alfworld-download`.") + + buf = io.StringIO() + base_dir = os.path.dirname(os.path.abspath(__file__)) + config_path = os.path.join(base_dir, 'base_config.yml') + + with open(config_path) as f: + config = yaml.safe_load(f) + + with redirect_stdout(buf), redirect_stderr(buf): + base_env = environment.AlfredTWEnv(config, train_eval="train") + + env = None + while True: + cmd, data = inq.get() + if cmd == 'init': + env = base_env.init_env(batch_size=1) + env.skip(data) + task_def, info = env.reset() + outq.put((task_def[0], info)) + elif cmd == 'step': + obs, rew, done, info = env.step([data]) + outq.put((obs, rew, done, info)) + elif cmd == 'close': + outq.put('CLOSED') + break + else: + outq.put('UNKNOWN_CMD') + + +class EnvPool: + """ + Pool of processes, each with a unique env_worker. + Acquire a worker using a context manager for safe usage: + with pool.session() as sess: + sess.init(5) # init with idx=5 + obs, rew, done, info = sess.step("go north") + ... + """ + def __init__(self, size=2): + self.size = size + self.workers = [] + self.available = queue.Queue() + + try: + import multiprocess as mp + except ImportError: + raise ImportError("multiprocess is not installed. " \ + "Please install it via `pip install multiprocess`.") + + # Must call set_start_method('spawn') here, before creating any processes + try: + mp.set_start_method("spawn", force=True) + except RuntimeError: + # If it's already set, ignore + pass + + ctx = mp.get_context("spawn") + for i in range(size): + inq = ctx.Queue() + outq = ctx.Queue() + p = ctx.Process(target=env_worker, args=(inq, outq), daemon=True) + p.start() + self.workers.append((inq, outq, p)) + self.available.put(i) + + def _acquire(self): + wid = self.available.get() + return wid, self.workers[wid][0], self.workers[wid][1] + + def _release(self, wid): + self.available.put(wid) + + def close_all(self): + """Close all processes in the pool.""" + while not self.available.empty(): + wid = self.available.get() + inq, outq, proc = self.workers[wid] + inq.put(('close', None)) + outq.get() # Wait 'CLOSED' + inq.close() + outq.close() + proc.join() + + def session(self): + """Context manager that acquires/releases a single worker.""" + return _EnvSession(self) + + +class _EnvSession: + """ + A context manager that acquires a worker from the pool, + provides .init(idx) and .step(action), then releases the worker. + """ + def __init__(self, pool: EnvPool): + self.pool = pool + self.wid = None + self.inq = None + self.outq = None + + def __enter__(self): + self.wid, self.inq, self.outq = self.pool._acquire() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.pool._release(self.wid) + + def init(self, idx): + self.inq.put(('init', idx)) + return self.outq.get() # (task_def, info) + + def step(self, action): + self.inq.put(('step', action)) + return self.outq.get() # (obs, rew, done, info) + + +class AlfWorld: + def __init__(self, max_threads=20): + self.POOL = EnvPool(size=max_threads) + + import dspy + dataset = [dspy.Example(idx=idx).with_inputs('idx') for idx in range(3500)] + random.Random(0).shuffle(dataset) + + trainset, devset = dataset[:3000], dataset[-500:] + assert len(trainset) + len(devset) <= len(dataset) + + self.trainset = trainset + self.devset = devset + + def __del__(self): + self.POOL.close_all() diff --git a/dspy/datasets/alfworld/base_config.yml b/dspy/datasets/alfworld/base_config.yml new file mode 100644 index 000000000..03c6d146b --- /dev/null +++ b/dspy/datasets/alfworld/base_config.yml @@ -0,0 +1,145 @@ +dataset: + data_path: '$ALFWORLD_DATA/json_2.1.1/train' + eval_id_data_path: '$ALFWORLD_DATA/json_2.1.1/valid_seen' # null/None to disable + eval_ood_data_path: '$ALFWORLD_DATA/json_2.1.1/valid_unseen' # null/None to disable + num_train_games: -1 # max training games (<=0 indicates full dataset) + num_eval_games: -1 # max evaluation games (<=0 indicates full dataset) + +logic: + domain: '$ALFWORLD_DATA/logic/alfred.pddl' # PDDL domain file that defines the world dynamics + grammar: '$ALFWORLD_DATA/logic/alfred.twl2' # Grammar file that defines the text feedbacks + +env: + type: 'AlfredTWEnv' # 'AlfredTWEnv' or 'AlfredThorEnv' or 'AlfredHybrid' + regen_game_files: False # check if game is solvable by expert and save to game.tw-pddl file + domain_randomization: False # shuffle Textworld print order and object id nums + task_types: [1, 2, 3, 4, 5, 6] # task-type ids: 1 - Pick & Place, 2 - Examine in Light, 3 - Clean & Place, 4 - Heat & Place, 5 - Cool & Place, 6 - Pick Two & Place + expert_timeout_steps: 150 # max steps before timeout for expert to solve the task + expert_type: "handcoded" # 'handcoded' or 'downward'. Note: the downward planner is very slow for real-time use + goal_desc_human_anns_prob: 0.0 # prob of using human-annotated goal language instead of templated goals (1.0 indicates all human annotations from ALFRED) + + hybrid: + start_eps: 100000 # starting episode of hybrid training, tw-only training upto this point + thor_prob: 0.5 # prob of AlfredThorEnv during hybrid training + eval_mode: "tw" # 'tw' or 'thor' - env used for evaluation during hybrid training + + thor: + screen_width: 300 # width of THOR window + screen_height: 300 # height of THOR window + smooth_nav: False # smooth rotations, looks, and translations during navigation (very slow) + save_frames_to_disk: False # save frame PNGs to disk (useful for making videos) + save_frames_path: './videos/' # path to save frame PNGs + +controller: + type: 'oracle' # 'oracle' or 'oracle_astar' or 'mrcnn' or 'mrcnn_astar' (aka BUTLER) + debug: False + load_receps: True # load receptacle locations from precomputed dict (if available) + +mask_rcnn: + pretrained_model_path: '$ALFWORLD_DATA/detectors/mrcnn.pth' + +general: + random_seed: 42 + use_cuda: True # disable this when running on machine without cuda + visdom: False # plot training/eval curves, run with visdom server + task: 'alfred' + training_method: 'dagger' # 'dqn' or 'dagger' + save_path: './training/' # path to save pytorch models + observation_pool_capacity: 3 # k-size queue, 0 indicates no observation + hide_init_receptacles: False # remove initial observation containing navigable receptacles + + training: + batch_size: 10 + max_episode: 50000 + smoothing_eps: 0.1 + optimizer: + learning_rate: 0.001 + clip_grad_norm: 5 + + evaluate: + run_eval: True + batch_size: 10 + env: + type: "AlfredTWEnv" + + checkpoint: + report_frequency: 1000 # report every N episode + experiment_tag: 'test' # name of experiment + load_pretrained: False # during test, enable this so that the agent load your pretrained model + load_from_tag: 'not loading anything' # name of pre-trained model to load in save_path + + model: + encoder_layers: 1 + decoder_layers: 1 + encoder_conv_num: 5 + block_hidden_dim: 64 + n_heads: 1 + dropout: 0.1 + block_dropout: 0.1 + recurrent: True + +rl: + action_space: "admissible" # 'admissible' (candidates from text engine) or 'generation' (seq2seq-style generation) or 'beam_search_choice' or 'exhaustive' (not working) + max_target_length: 20 # max token length for seq2seq generation + beam_width: 10 # 1 means greedy + generate_top_k: 3 + + training: + max_nb_steps_per_episode: 50 # terminate after this many steps + learn_start_from_this_episode: 0 # delay updates until this epsiode + target_net_update_frequency: 500 # sync target net with online net per this many epochs + + replay: + accumulate_reward_from_final: True + count_reward_lambda: 0.0 # 0 to disable + novel_object_reward_lambda: 0.0 # 0 to disable + discount_gamma_game_reward: 0.9 + discount_gamma_count_reward: 0.5 + discount_gamma_novel_object_reward: 0.5 + replay_memory_capacity: 500000 # adjust this depending on your RAM size + replay_memory_priority_fraction: 0.5 + update_per_k_game_steps: 5 + replay_batch_size: 64 + multi_step: 3 + replay_sample_history_length: 4 + replay_sample_update_from: 2 + + epsilon_greedy: + noisy_net: False # if this is true, then epsilon greedy is disabled + epsilon_anneal_episodes: 1000 # -1 if not annealing + epsilon_anneal_from: 0.3 + epsilon_anneal_to: 0.1 + +dagger: + action_space: "generation" # 'admissible' (candidates from text engine) or 'generation' (seq2seq-style generation) or 'exhaustive' (not working) + max_target_length: 20 # max token length for seq2seq generation + beam_width: 10 # 1 means greedy + generate_top_k: 5 + unstick_by_beam_search: False # use beam-search for failed actions, set True during evaluation + + training: + max_nb_steps_per_episode: 50 # terminate after this many steps + + fraction_assist: + fraction_assist_anneal_episodes: 50000 + fraction_assist_anneal_from: 1.0 + fraction_assist_anneal_to: 0.01 + + fraction_random: + fraction_random_anneal_episodes: 0 + fraction_random_anneal_from: 0.0 + fraction_random_anneal_to: 0.0 + + replay: + replay_memory_capacity: 500000 + update_per_k_game_steps: 5 + replay_batch_size: 64 + replay_sample_history_length: 4 + replay_sample_update_from: 2 + +vision_dagger: + model_type: "resnet" # 'resnet' (whole image features) or 'maskrcnn_whole' (whole image MaskRCNN feats) or 'maskrcnn' (top k MaskRCNN detection feats) or 'no_vision' (zero vision input) + resnet_fc_dim: 64 + maskrcnn_top_k_boxes: 10 # top k box features + use_exploration_frame_feats: False # append feats from initial exploration (memory intensive!) + sequence_aggregation_method: "average" # 'sum' or 'average' or 'rnn' \ No newline at end of file diff --git a/dspy/teleprompt/bootstrap_finetune.py b/dspy/teleprompt/bootstrap_finetune.py index af75c7249..c69694625 100644 --- a/dspy/teleprompt/bootstrap_finetune.py +++ b/dspy/teleprompt/bootstrap_finetune.py @@ -65,12 +65,16 @@ def compile(self, student: Program, trainset: List[Example], teacher: Optional[P # environments. print("[BootstrapFinetune] Preparing the student and teacher programs...") student = prepare_student(student) - teacher = prepare_teacher(student, teacher) + teachers = teacher if isinstance(teacher, list) else [teacher] + teachers = [prepare_teacher(student, teacher) for teacher in teachers] set_missing_predictor_lms(student) - set_missing_predictor_lms(teacher) print("[BootstrapFinetune] Bootstrapping data...") - trace_data = bootstrap_trace_data(program=teacher, dataset=trainset, metric=self.metric, num_threads=self.num_threads) + trace_data = [] + + for teacher in teachers: + set_missing_predictor_lms(teacher) + trace_data += bootstrap_trace_data(program=teacher, dataset=trainset, metric=self.metric, num_threads=self.num_threads) print("[BootstrapFinetune] Preparing the train data...") key_to_data = {} @@ -179,46 +183,53 @@ def bootstrap_trace_data( # Return a list of dicts with the following keys: # example_ind, example, prediction, trace, and score (if metric != None) evaluator = Evaluate( - devset=dataset, num_threads=num_threads, display_progress=True, + devset=dataset, num_threads=num_threads, display_progress=True, return_outputs=True, provide_traceback=True # TODO(check with team) ) - # TODO(PR): Should "trace" not be included in the lambda function? - _metric = metric if metric else lambda example, prediction: 1 - evaluator(program, metric=_metric) + + def wrapped_metric(example, prediction, trace=None): + prediction, _ = prediction + return metric(example, prediction, trace) if metric else True + + def wrapped_program(**kwargs): + with dspy.context(trace=[]): + return program(**kwargs), dspy.settings.trace.copy() + + _, outputs = evaluator(wrapped_program, metric=wrapped_metric) data = [] - for example_ind, example in enumerate(dataset): - data_dict = bootstrap_trace_data_one_example( - example=example, program=program, metric=metric - ) - data_dict["example_ind"] = example_ind + for example_ind, (example, prediction, score) in enumerate(outputs): + prediction, trace = prediction + data_dict = dict(example=example, prediction=prediction, trace=trace, example_ind=example_ind) + if metric: + data_dict["score"] = score data.append(data_dict) return data -# TODO(PR) check with team -def bootstrap_trace_data_one_example( - example: Example, - program: Program, - metric: Optional[Callable] = None -) -> Dict[str, Any]: - # Return a dict with the following keys: - # example, prediction, trace, and score (if metric != None) - with dspy.context(trace=[]): - prediction = program(**example.inputs()) - trace = dspy.settings.trace - score = metric(example, prediction, trace) if metric else None - - data_dict = dict( - example=example, - prediction=prediction, - trace=trace, - ) - if metric: - data_dict["score"] = score +# # TODO(PR) check with team +# def bootstrap_trace_data_one_example( +# example: Example, +# program: Program, +# metric: Optional[Callable] = None +# ) -> Dict[str, Any]: +# # Return a dict with the following keys: +# # example, prediction, trace, and score (if metric != None) +# with dspy.context(trace=[]): +# prediction = program(**example.inputs()) +# trace = dspy.settings.trace +# score = metric(example, prediction, trace) if metric else None + +# data_dict = dict( +# example=example, +# prediction=prediction, +# trace=trace, +# ) +# if metric: +# data_dict["score"] = score - return data_dict +# return data_dict # Note: Shared below are useful functions for preparing student/teacher programs @@ -273,7 +284,7 @@ def assert_structural_equivalency(program1: object, program2: object): assert name1 == name2, err assert isinstance(pred1, Predict) assert isinstance(pred2, Predict) - assert pred1.signature.equals(pred2.signature) + # assert pred1.signature.equals(pred2.signature) def assert_no_shared_predictor(program1: Program, program2: Program):