From 4503d964e6551b0f21d257b8cf3b20f1d6295c36 Mon Sep 17 00:00:00 2001
From: Jack Gerrits <jackgerrits@users.noreply.github.com>
Date: Thu, 7 Mar 2024 13:11:52 -0500
Subject: [PATCH] Implement docker based command line code executor (#1856)

* implement docker based command line code executor

* undo import

* test skips

* format

* fix type issue

* skip docker tests

* fix paths

* add docs

* Update __init__.py

* class name

* precommit

* undo twoagent change

* use relative to directly

* Update, fixes, etc.

* update doc

* Update docstring

---------

Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
---
 autogen/coding/__init__.py                    |   5 +
 .../docker_commandline_code_executor.py       | 231 +++++++++++
 .../coding/jupyter/docker_jupyter_server.py   |  18 +-
 autogen/coding/jupyter/jupyter_client.py      |   4 +-
 .../coding/jupyter/jupyter_code_executor.py   |   4 +-
 .../coding/jupyter/local_jupyter_server.py    |   4 +-
 test/coding/test_commandline_code_executor.py | 100 ++++-
 .../test_embedded_ipython_code_executor.py    |   5 +
 .../code-execution/cli-code-executor.ipynb    | 362 ++++++++++++++++++
 9 files changed, 701 insertions(+), 32 deletions(-)
 create mode 100644 autogen/coding/docker_commandline_code_executor.py
 create mode 100644 website/docs/topics/code-execution/cli-code-executor.ipynb

diff --git a/autogen/coding/__init__.py b/autogen/coding/__init__.py
index cf75d11436b..6153fc514b4 100644
--- a/autogen/coding/__init__.py
+++ b/autogen/coding/__init__.py
@@ -1,6 +1,8 @@
 from .base import CodeBlock, CodeExecutor, CodeExtractor, CodeResult
 from .factory import CodeExecutorFactory
 from .markdown_code_extractor import MarkdownCodeExtractor
+from .local_commandline_code_executor import LocalCommandLineCodeExecutor, CommandLineCodeResult
+from .docker_commandline_code_executor import DockerCommandLineCodeExecutor
 
 __all__ = (
     "CodeBlock",
@@ -9,4 +11,7 @@
     "CodeExecutor",
     "CodeExecutorFactory",
     "MarkdownCodeExtractor",
+    "LocalCommandLineCodeExecutor",
+    "CommandLineCodeResult",
+    "DockerCommandLineCodeExecutor",
 )
diff --git a/autogen/coding/docker_commandline_code_executor.py b/autogen/coding/docker_commandline_code_executor.py
new file mode 100644
index 00000000000..18b9254f55f
--- /dev/null
+++ b/autogen/coding/docker_commandline_code_executor.py
@@ -0,0 +1,231 @@
+from __future__ import annotations
+import atexit
+from hashlib import md5
+import logging
+from pathlib import Path
+from time import sleep
+from types import TracebackType
+import uuid
+from typing import List, Optional, Type, Union
+import docker
+from docker.models.containers import Container
+from docker.errors import ImageNotFound
+
+from .local_commandline_code_executor import CommandLineCodeResult
+
+from ..code_utils import TIMEOUT_MSG, _cmd
+from .base import CodeBlock, CodeExecutor, CodeExtractor
+from .markdown_code_extractor import MarkdownCodeExtractor
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+
+def _wait_for_ready(container: Container, timeout: int = 60, stop_time: int = 0.1) -> None:
+    elapsed_time = 0
+    while container.status != "running" and elapsed_time < timeout:
+        sleep(stop_time)
+        elapsed_time += stop_time
+        container.reload()
+        continue
+    if container.status != "running":
+        raise ValueError("Container failed to start")
+
+
+__all__ = ("DockerCommandLineCodeExecutor",)
+
+
+class DockerCommandLineCodeExecutor(CodeExecutor):
+    def __init__(
+        self,
+        image: str = "python:3-slim",
+        container_name: Optional[str] = None,
+        timeout: int = 60,
+        work_dir: Union[Path, str] = Path("."),
+        auto_remove: bool = True,
+        stop_container: bool = True,
+    ):
+        """(Experimental) A code executor class that executes code through
+        a command line environment in a Docker container.
+
+        The executor first saves each code block in a file in the working
+        directory, and then executes the code file in the container.
+        The executor executes the code blocks in the order they are received.
+        Currently, the executor only supports Python and shell scripts.
+        For Python code, use the language "python" for the code block.
+        For shell scripts, use the language "bash", "shell", or "sh" for the code
+        block.
+
+        Args:
+            image (_type_, optional): Docker image to use for code execution.
+                Defaults to "python:3-slim".
+            container_name (Optional[str], optional): Name of the Docker container
+                which is created. If None, will autogenerate a name. Defaults to None.
+            timeout (int, optional): The timeout for code execution. Defaults to 60.
+            work_dir (Union[Path, str], optional): The working directory for the code
+                execution. Defaults to Path(".").
+            auto_remove (bool, optional): If true, will automatically remove the Docker
+                container when it is stopped. Defaults to True.
+            stop_container (bool, optional): If true, will automatically stop the
+                container when stop is called, when the context manager exits or when
+                the Python process exits with atext. Defaults to True.
+
+        Raises:
+            ValueError: On argument error, or if the container fails to start.
+        """
+
+        if timeout < 1:
+            raise ValueError("Timeout must be greater than or equal to 1.")
+
+        if isinstance(work_dir, str):
+            work_dir = Path(work_dir)
+
+        if not work_dir.exists():
+            raise ValueError(f"Working directory {work_dir} does not exist.")
+
+        client = docker.from_env()
+
+        # Check if the image exists
+        try:
+            client.images.get(image)
+        except ImageNotFound:
+            logging.info(f"Pulling image {image}...")
+            # Let the docker exception escape if this fails.
+            client.images.pull(image)
+
+        if container_name is None:
+            container_name = f"autogen-code-exec-{uuid.uuid4()}"
+
+        # Start a container from the image, read to exec commands later
+        self._container = client.containers.create(
+            image,
+            name=container_name,
+            entrypoint="/bin/sh",
+            tty=True,
+            auto_remove=auto_remove,
+            volumes={str(work_dir.resolve()): {"bind": "/workspace", "mode": "rw"}},
+            working_dir="/workspace",
+        )
+        self._container.start()
+
+        _wait_for_ready(self._container)
+
+        def cleanup():
+            try:
+                container = client.containers.get(container_name)
+                container.stop()
+            except docker.errors.NotFound:
+                pass
+
+            atexit.unregister(cleanup)
+
+        if stop_container:
+            atexit.register(cleanup)
+
+        self._cleanup = cleanup
+
+        # Check if the container is running
+        if self._container.status != "running":
+            raise ValueError(f"Failed to start container from image {image}. Logs: {self._container.logs()}")
+
+        self._timeout = timeout
+        self._work_dir: Path = work_dir
+
+    @property
+    def timeout(self) -> int:
+        """(Experimental) The timeout for code execution."""
+        return self._timeout
+
+    @property
+    def work_dir(self) -> Path:
+        """(Experimental) The working directory for the code execution."""
+        return self._work_dir
+
+    @property
+    def code_extractor(self) -> CodeExtractor:
+        """(Experimental) Export a code extractor that can be used by an agent."""
+        return MarkdownCodeExtractor()
+
+    def execute_code_blocks(self, code_blocks: List[CodeBlock]) -> CommandLineCodeResult:
+        """(Experimental) Execute the code blocks and return the result.
+
+        Args:
+            code_blocks (List[CodeBlock]): The code blocks to execute.
+
+        Returns:
+            CommandlineCodeResult: The result of the code execution."""
+
+        if len(code_blocks) == 0:
+            raise ValueError("No code blocks to execute.")
+
+        outputs = []
+        files = []
+        last_exit_code = 0
+        for code_block in code_blocks:
+            lang = code_block.language
+            code = code_block.code
+
+            code_hash = md5(code.encode()).hexdigest()
+
+            # Check if there is a filename comment
+            # Get first line
+            first_line = code.split("\n")[0]
+            if first_line.startswith("# filename:"):
+                filename = first_line.split(":")[1].strip()
+
+                # Handle relative paths in the filename
+                path = Path(filename)
+                if not path.is_absolute():
+                    path = Path("/workspace") / path
+                path = path.resolve()
+                try:
+                    path.relative_to(Path("/workspace"))
+                except ValueError:
+                    return CommandLineCodeResult(exit_code=1, output="Filename is not in the workspace")
+            else:
+                # create a file with a automatically generated name
+                filename = f"tmp_code_{code_hash}.{'py' if lang.startswith('python') else lang}"
+
+            code_path = self._work_dir / filename
+            with code_path.open("w", encoding="utf-8") as fout:
+                fout.write(code)
+
+            command = ["timeout", str(self._timeout), _cmd(lang), filename]
+
+            result = self._container.exec_run(command)
+            exit_code = result.exit_code
+            output = result.output.decode("utf-8")
+            if exit_code == 124:
+                output += "\n"
+                output += TIMEOUT_MSG
+
+            outputs.append(output)
+            files.append(code_path)
+
+            last_exit_code = exit_code
+            if exit_code != 0:
+                break
+
+        code_file = str(files[0]) if files else None
+        return CommandLineCodeResult(exit_code=last_exit_code, output="".join(outputs), code_file=code_file)
+
+    def restart(self) -> None:
+        """(Experimental) Restart the code executor."""
+        self._container.restart()
+        if self._container.status != "running":
+            raise ValueError(f"Failed to restart container. Logs: {self._container.logs()}")
+
+    def stop(self) -> None:
+        """(Experimental) Stop the code executor."""
+        self._cleanup()
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
+    ) -> None:
+        self.stop()
diff --git a/autogen/coding/jupyter/docker_jupyter_server.py b/autogen/coding/jupyter/docker_jupyter_server.py
index 5288d295cd7..bf55543a440 100644
--- a/autogen/coding/jupyter/docker_jupyter_server.py
+++ b/autogen/coding/jupyter/docker_jupyter_server.py
@@ -2,16 +2,17 @@
 
 from pathlib import Path
 import sys
-from time import sleep
 from types import TracebackType
 import uuid
-from typing import Dict, Optional, Union
+from typing import Dict, Optional, Type, Union
 import docker
 import secrets
 import io
 import atexit
 import logging
 
+from ..docker_commandline_code_executor import _wait_for_ready
+
 if sys.version_info >= (3, 11):
     from typing import Self
 else:
@@ -22,17 +23,6 @@
 from .base import JupyterConnectable, JupyterConnectionInfo
 
 
-def _wait_for_ready(container: docker.Container, timeout: int = 60, stop_time: int = 0.1) -> None:
-    elapsed_time = 0
-    while container.status != "running" and elapsed_time < timeout:
-        sleep(stop_time)
-        elapsed_time += stop_time
-        container.reload()
-        continue
-    if container.status != "running":
-        raise ValueError("Container failed to start")
-
-
 class DockerJupyterServer(JupyterConnectable):
     DEFAULT_DOCKERFILE = """FROM quay.io/jupyter/docker-stacks-foundation
 
@@ -162,6 +152,6 @@ def __enter__(self) -> Self:
         return self
 
     def __exit__(
-        self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
+        self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
     ) -> None:
         self.stop()
diff --git a/autogen/coding/jupyter/jupyter_client.py b/autogen/coding/jupyter/jupyter_client.py
index e02d3b49bca..8f97ab82418 100644
--- a/autogen/coding/jupyter/jupyter_client.py
+++ b/autogen/coding/jupyter/jupyter_client.py
@@ -2,7 +2,7 @@
 
 from dataclasses import dataclass
 from types import TracebackType
-from typing import Any, Dict, List, Optional, cast
+from typing import Any, Dict, List, Optional, Type, cast
 import sys
 
 if sys.version_info >= (3, 11):
@@ -111,7 +111,7 @@ def __enter__(self) -> Self:
         return self
 
     def __exit__(
-        self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
+        self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
     ) -> None:
         self.stop()
 
diff --git a/autogen/coding/jupyter/jupyter_code_executor.py b/autogen/coding/jupyter/jupyter_code_executor.py
index 6e63652ef37..37cf5c91c61 100644
--- a/autogen/coding/jupyter/jupyter_code_executor.py
+++ b/autogen/coding/jupyter/jupyter_code_executor.py
@@ -5,7 +5,7 @@
 import re
 from types import TracebackType
 import uuid
-from typing import Any, ClassVar, List, Optional, Union
+from typing import Any, ClassVar, List, Optional, Type, Union
 import sys
 
 if sys.version_info >= (3, 11):
@@ -201,6 +201,6 @@ def __enter__(self) -> Self:
         return self
 
     def __exit__(
-        self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
+        self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
     ) -> None:
         self.stop()
diff --git a/autogen/coding/jupyter/local_jupyter_server.py b/autogen/coding/jupyter/local_jupyter_server.py
index 91cccdab304..0709f55ee4e 100644
--- a/autogen/coding/jupyter/local_jupyter_server.py
+++ b/autogen/coding/jupyter/local_jupyter_server.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 from types import TracebackType
 
-from typing import Optional, Union, cast
+from typing import Optional, Type, Union, cast
 import subprocess
 import signal
 import sys
@@ -157,6 +157,6 @@ def __enter__(self) -> Self:
         return self
 
     def __exit__(
-        self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
+        self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
     ) -> None:
         self.stop()
diff --git a/test/coding/test_commandline_code_executor.py b/test/coding/test_commandline_code_executor.py
index 6dec5c574a3..529732bd6d4 100644
--- a/test/coding/test_commandline_code_executor.py
+++ b/test/coding/test_commandline_code_executor.py
@@ -1,16 +1,29 @@
+from pathlib import Path
 import sys
 import tempfile
 import pytest
 from autogen.agentchat.conversable_agent import ConversableAgent
+from autogen.code_utils import is_docker_running
 from autogen.coding.base import CodeBlock, CodeExecutor
 from autogen.coding.factory import CodeExecutorFactory
+from autogen.coding.docker_commandline_code_executor import DockerCommandLineCodeExecutor
 from autogen.coding.local_commandline_code_executor import LocalCommandLineCodeExecutor
 from autogen.oai.openai_utils import config_list_from_json
 
-from conftest import MOCK_OPEN_AI_API_KEY, skip_openai
+from conftest import MOCK_OPEN_AI_API_KEY, skip_openai, skip_docker
 
+if skip_docker or not is_docker_running():
+    classes_to_test = [LocalCommandLineCodeExecutor]
+else:
+    classes_to_test = [LocalCommandLineCodeExecutor, DockerCommandLineCodeExecutor]
 
-def test_create() -> None:
+
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_is_code_executor(cls) -> None:
+    assert isinstance(cls, CodeExecutor)
+
+
+def test_create_local() -> None:
     config = {"executor": "commandline-local"}
     executor = CodeExecutorFactory.create(config)
     assert isinstance(executor, LocalCommandLineCodeExecutor)
@@ -20,18 +33,30 @@ def test_create() -> None:
     assert executor is config["executor"]
 
 
-def test_local_commandline_executor_init() -> None:
-    executor = LocalCommandLineCodeExecutor(timeout=10, work_dir=".")
-    assert executor.timeout == 10 and executor.work_dir == "."
+@pytest.mark.skipif(
+    skip_docker or not is_docker_running(),
+    reason="docker is not running or requested to skip docker tests",
+)
+def test_create_docker() -> None:
+    config = {"executor": DockerCommandLineCodeExecutor()}
+    executor = CodeExecutorFactory.create(config)
+    assert executor is config["executor"]
+
+
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_commandline_executor_init(cls) -> None:
+    executor = cls(timeout=10, work_dir=".")
+    assert executor.timeout == 10 and str(executor.work_dir) == "."
 
     # Try invalid working directory.
     with pytest.raises(ValueError, match="Working directory .* does not exist."):
-        executor = LocalCommandLineCodeExecutor(timeout=111, work_dir="/invalid/directory")
+        executor = cls(timeout=111, work_dir="/invalid/directory")
 
 
-def test_local_commandline_executor_execute_code() -> None:
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_commandline_executor_execute_code(cls) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
-        executor = LocalCommandLineCodeExecutor(work_dir=temp_dir)
+        executor = cls(work_dir=temp_dir)
         _test_execute_code(executor=executor)
 
 
@@ -79,9 +104,10 @@ def _test_execute_code(executor: CodeExecutor) -> None:
 
 
 @pytest.mark.skipif(sys.platform in ["win32"], reason="do not run on windows")
-def test_local_commandline_code_executor_timeout() -> None:
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_commandline_code_executor_timeout(cls) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
-        executor = LocalCommandLineCodeExecutor(timeout=1, work_dir=temp_dir)
+        executor = cls(timeout=1, work_dir=temp_dir)
         _test_timeout(executor)
 
 
@@ -96,6 +122,20 @@ def test_local_commandline_code_executor_restart() -> None:
     _test_restart(executor)
 
 
+# This is kind of hard to test because each exec is a new env
+@pytest.mark.skipif(
+    skip_docker or not is_docker_running(),
+    reason="docker is not running or requested to skip docker tests",
+)
+def test_docker_commandline_code_executor_restart() -> None:
+    with DockerCommandLineCodeExecutor() as executor:
+        result = executor.execute_code_blocks([CodeBlock(code="echo $HOME", language="sh")])
+        assert result.exit_code == 0
+        executor.restart()
+        result = executor.execute_code_blocks([CodeBlock(code="echo $HOME", language="sh")])
+        assert result.exit_code == 0
+
+
 def _test_restart(executor: CodeExecutor) -> None:
     # Check warning.
     with pytest.warns(UserWarning, match=r".*No action is taken."):
@@ -148,9 +188,10 @@ def _test_conversable_agent_capability(executor: CodeExecutor) -> None:
     assert code_result.exit_code == 0 and "hello world" in code_result.output.lower().replace(",", "")
 
 
-def test_local_commandline_executor_conversable_agent_code_execution() -> None:
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_commandline_executor_conversable_agent_code_execution(cls) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
-        executor = LocalCommandLineCodeExecutor(work_dir=temp_dir)
+        executor = cls(work_dir=temp_dir)
         with pytest.MonkeyPatch.context() as mp:
             mp.setenv("OPENAI_API_KEY", MOCK_OPEN_AI_API_KEY)
             _test_conversable_agent_code_execution(executor)
@@ -196,3 +237,38 @@ def test_dangerous_commands(lang, code, expected_message):
     assert expected_message in str(
         exc_info.value
     ), f"Expected message '{expected_message}' not found in '{str(exc_info.value)}'"
+
+
+# This is kind of hard to test because each exec is a new env
+@pytest.mark.skipif(
+    skip_docker or not is_docker_running(),
+    reason="docker is not running or requested to skip docker tests",
+)
+def test_docker_invalid_relative_path() -> None:
+    with DockerCommandLineCodeExecutor() as executor:
+        code = """# filename: /tmp/test.py
+
+print("hello world")
+"""
+        result = executor.execute_code_blocks([CodeBlock(code=code, language="python")])
+        assert result.exit_code == 1 and "Filename is not in the workspace" in result.output
+
+
+@pytest.mark.skipif(
+    skip_docker or not is_docker_running(),
+    reason="docker is not running or requested to skip docker tests",
+)
+def test_docker_valid_relative_path() -> None:
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_dir = Path(temp_dir)
+        with DockerCommandLineCodeExecutor(work_dir=temp_dir) as executor:
+            code = """# filename: test.py
+
+print("hello world")
+"""
+            result = executor.execute_code_blocks([CodeBlock(code=code, language="python")])
+            assert result.exit_code == 0
+            assert "hello world" in result.output
+            assert "test.py" in result.code_file
+            assert (temp_dir / "test.py") == Path(result.code_file)
+            assert (temp_dir / "test.py").exists()
diff --git a/test/coding/test_embedded_ipython_code_executor.py b/test/coding/test_embedded_ipython_code_executor.py
index 529925d91bb..9c8e5a720ae 100644
--- a/test/coding/test_embedded_ipython_code_executor.py
+++ b/test/coding/test_embedded_ipython_code_executor.py
@@ -46,6 +46,11 @@ def __init__(self, **kwargs):
     classes_to_test = []
 
 
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_is_code_executor(cls) -> None:
+    assert isinstance(cls, CodeExecutor)
+
+
 @pytest.mark.skipif(skip, reason=skip_reason)
 def test_create_dict() -> None:
     config: Dict[str, Union[str, CodeExecutor]] = {"executor": "ipython-embedded"}
diff --git a/website/docs/topics/code-execution/cli-code-executor.ipynb b/website/docs/topics/code-execution/cli-code-executor.ipynb
new file mode 100644
index 00000000000..60b50ab0296
--- /dev/null
+++ b/website/docs/topics/code-execution/cli-code-executor.ipynb
@@ -0,0 +1,362 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Command Line Code Executor\n",
+    "\n",
+    "Command line code execution is the simplest form of code execution. Generally speaking, it will save each code block to a file and the execute that file. This means that each code block is executed in a new process. There are two forms of this executor:\n",
+    "\n",
+    "- Docker ([`DockerCommandLineCodeExecutor`](/docs/reference/coding/docker_commandline_code_executor#dockercommandlinecodeexecutor)) - this is where all commands are executed in a Docker container\n",
+    "- Local ([`LocalCommandLineCodeExecutor`](/docs/reference/coding/local_commandline_code_executor#localcommandlinecodeexecutor)) - this is where all commands are executed on the host machine\n",
+    "\n",
+    "This executor type is similar to the legacy code execution in AutoGen.\n",
+    "\n",
+    "## Docker\n",
+    "\n",
+    "The [`DockerCommandLineCodeExecutor`](/docs/reference/coding/docker_commandline_code_executor#dockercommandlinecodeexecutor) will create a Docker container and run all commands within that container. The default image that is used is `python:3-slim`, this can be customized by passing the `image` parameter to the constructor. If the image is not found locally then the class will try to pull it. Therefore, having built the image locally is enough. The only thing required for this image to be compatible with the executor is to have `sh` and `python` installed. Therefore, creating a custom image is a simple and effective way to ensure required system dedendencies are available.\n",
+    "\n",
+    "You can use the executor as a context manager to ensure the container is cleaned up after use. Otherwise, the `atexit` module will be used to stop the container when the program exits.\n",
+    "\n",
+    "### Inspecting the container\n",
+    "\n",
+    "If you wish to keep the container around after AutoGen is finished using it for whatever reason (e.g. to inspect the container), then you can set the `auto_remove` parameter to `False` when creating the executor. `stop_container` can also be set to `False` to prevent the container from being stopped at the end of the execution.\n",
+    "\n",
+    "### Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "exit_code=0 output='Hello, World!\\n' code_file='coding/tmp_code_07da107bb575cc4e02b0e1d6d99cc204.py'\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pathlib import Path\n",
+    "from autogen.coding import CodeBlock\n",
+    "from autogen.coding import DockerCommandLineCodeExecutor\n",
+    "\n",
+    "work_dir = Path(\"coding\")\n",
+    "work_dir.mkdir(exist_ok=True)\n",
+    "\n",
+    "with DockerCommandLineCodeExecutor(work_dir=work_dir) as executor:\n",
+    "    print(\n",
+    "        executor.execute_code_blocks(\n",
+    "            code_blocks=[\n",
+    "                CodeBlock(language=\"python\", code=\"print('Hello, World!')\"),\n",
+    "            ]\n",
+    "        )\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Local\n",
+    "\n",
+    "````{=mdx}\n",
+    ":::danger\n",
+    "The local version will run code on your local system. Use it with caution.\n",
+    ":::\n",
+    "````\n",
+    "\n",
+    "To execute code on the host machine, as in the machine running AutoGen, the [`LocalCommandLineCodeExecutor`](/docs/reference/coding/local_commandline_code_executor#localcommandlinecodeexecutor) can be used.\n",
+    "\n",
+    "### Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "exit_code=0 output='\\nHello, World!\\n' code_file='coding/065b51a16ee54f3298847518f9e999d7.py'\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pathlib import Path\n",
+    "from autogen.coding import CodeBlock\n",
+    "from autogen.coding import LocalCommandLineCodeExecutor\n",
+    "\n",
+    "work_dir = Path(\"coding\")\n",
+    "work_dir.mkdir(exist_ok=True)\n",
+    "\n",
+    "executor = LocalCommandLineCodeExecutor(work_dir=str(work_dir))\n",
+    "print(\n",
+    "    executor.execute_code_blocks(\n",
+    "        code_blocks=[\n",
+    "            CodeBlock(language=\"python\", code=\"print('Hello, World!')\"),\n",
+    "        ]\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Assigning to an agent\n",
+    "\n",
+    "These executors can be used to facilitate the execution of agent written code. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from autogen import ConversableAgent\n",
+    "from autogen.coding import DockerCommandLineCodeExecutor\n",
+    "from pathlib import Path\n",
+    "\n",
+    "work_dir = Path(\"coding\")\n",
+    "work_dir.mkdir(exist_ok=True)\n",
+    "\n",
+    "executor = DockerCommandLineCodeExecutor(work_dir=work_dir)\n",
+    "\n",
+    "code_executor_agent = ConversableAgent(\n",
+    "    name=\"code_executor_agent\",\n",
+    "    llm_config=False,\n",
+    "    code_execution_config={\n",
+    "        \"executor\": executor,\n",
+    "    },\n",
+    "    human_input_mode=\"NEVER\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "When using code execution it is critical that you update the system prompt of agents you expect to write code to be able to make use of the executor. For example, for the [`DockerCommandLineCodeExecutor`](/docs/reference/coding/docker_commandline_code_executor#dockercommandlinecodeexecutor) you might setup a code writing agent like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The code writer agent's system message is to instruct the LLM on how to\n",
+    "# use the Jupyter code executor with IPython kernel.\n",
+    "code_writer_system_message = \"\"\"\n",
+    "You have been given coding capability to solve tasks using Python code.\n",
+    "In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute.\n",
+    "    1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself.\n",
+    "    2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly.\n",
+    "Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill.\n",
+    "When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user.\n",
+    "If you want the user to save the code in a file before executing it, put # filename: <filename> inside the code block as the first line. Don't include multiple code blocks in one response. Do not ask users to copy and paste the result. Instead, use 'print' function for the output when relevant. Check the execution result returned by the user.\n",
+    "\"\"\"\n",
+    "\n",
+    "import os\n",
+    "\n",
+    "code_writer_agent = ConversableAgent(\n",
+    "    \"code_writer\",\n",
+    "    system_message=code_writer_system_message,\n",
+    "    llm_config={\"config_list\": [{\"model\": \"gpt-4\", \"api_key\": os.environ[\"OPENAI_API_KEY\"]}]},\n",
+    "    code_execution_config=False,  # Turn off code execution for this agent.\n",
+    "    max_consecutive_auto_reply=2,\n",
+    "    human_input_mode=\"NEVER\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then we can use these two agents to solve a problem:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mcode_executor_agent\u001b[0m (to code_writer):\n",
+      "\n",
+      "Write Python code to calculate the 14th Fibonacci number.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mcode_writer\u001b[0m (to code_executor_agent):\n",
+      "\n",
+      "Sure, we can calculate the Fibonacci series using a few different methods such as recursion, iterative, by using Binet's formula, or by using matrix exponentiation.\n",
+      "\n",
+      "But, since we only need to calculate the 14th number, we will simply use the iterative method as it's the most efficient for this case.\n",
+      "\n",
+      "Here is the Python code that solves the task:\n",
+      "\n",
+      "```python\n",
+      "def fibonacci(n):\n",
+      "    a, b = 0, 1\n",
+      "    for _ in range(n):\n",
+      "        a, b = b, a + b\n",
+      "    return a\n",
+      "\n",
+      "print(fibonacci(14))\n",
+      "```\n",
+      "\n",
+      "In this script, `fibonacci(n)` is a function that calculates the nth Fibonacci number. Inside the function, two variables `a` and `b` are initialised to `0` and `1` which are the first two numbers in the Fibonacci series. Then, a loop runs `n` times (14 times in your case), and in each iteration `a` is replaced with `b` and `b` is replaced with `a + b`, which generates the next number in the series. \n",
+      "\n",
+      "The function returns `a`, which is the nth Fibonacci number. The result is then printed to the console.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[31m\n",
+      ">>>>>>>> EXECUTING 1 CODE BLOCKS (inferred languages are [python])...\u001b[0m\n",
+      "\u001b[33mcode_executor_agent\u001b[0m (to code_writer):\n",
+      "\n",
+      "exitcode: 0 (execution succeeded)\n",
+      "Code output: 377\n",
+      "\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mcode_writer\u001b[0m (to code_executor_agent):\n",
+      "\n",
+      "Great! The script has successfully computed the 14th Fibonacci number as 377. If you need to compute other Fibonacci numbers, you can simply change the argument in the `fibonacci()` function call. Please let me know if you need help with anything else.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mcode_executor_agent\u001b[0m (to code_writer):\n",
+      "\n",
+      "\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "ChatResult(chat_id=None,\n",
+      "           chat_history=[{'content': 'Write Python code to calculate the 14th '\n",
+      "                                     'Fibonacci number.',\n",
+      "                          'role': 'assistant'},\n",
+      "                         {'content': 'Sure, we can calculate the Fibonacci '\n",
+      "                                     'series using a few different methods '\n",
+      "                                     'such as recursion, iterative, by using '\n",
+      "                                     \"Binet's formula, or by using matrix \"\n",
+      "                                     'exponentiation.\\n'\n",
+      "                                     '\\n'\n",
+      "                                     'But, since we only need to calculate the '\n",
+      "                                     '14th number, we will simply use the '\n",
+      "                                     \"iterative method as it's the most \"\n",
+      "                                     'efficient for this case.\\n'\n",
+      "                                     '\\n'\n",
+      "                                     'Here is the Python code that solves the '\n",
+      "                                     'task:\\n'\n",
+      "                                     '\\n'\n",
+      "                                     '```python\\n'\n",
+      "                                     'def fibonacci(n):\\n'\n",
+      "                                     '    a, b = 0, 1\\n'\n",
+      "                                     '    for _ in range(n):\\n'\n",
+      "                                     '        a, b = b, a + b\\n'\n",
+      "                                     '    return a\\n'\n",
+      "                                     '\\n'\n",
+      "                                     'print(fibonacci(14))\\n'\n",
+      "                                     '```\\n'\n",
+      "                                     '\\n'\n",
+      "                                     'In this script, `fibonacci(n)` is a '\n",
+      "                                     'function that calculates the nth '\n",
+      "                                     'Fibonacci number. Inside the function, '\n",
+      "                                     'two variables `a` and `b` are '\n",
+      "                                     'initialised to `0` and `1` which are the '\n",
+      "                                     'first two numbers in the Fibonacci '\n",
+      "                                     'series. Then, a loop runs `n` times (14 '\n",
+      "                                     'times in your case), and in each '\n",
+      "                                     'iteration `a` is replaced with `b` and '\n",
+      "                                     '`b` is replaced with `a + b`, which '\n",
+      "                                     'generates the next number in the '\n",
+      "                                     'series. \\n'\n",
+      "                                     '\\n'\n",
+      "                                     'The function returns `a`, which is the '\n",
+      "                                     'nth Fibonacci number. The result is then '\n",
+      "                                     'printed to the console.',\n",
+      "                          'role': 'user'},\n",
+      "                         {'content': 'exitcode: 0 (execution succeeded)\\n'\n",
+      "                                     'Code output: 377\\n',\n",
+      "                          'role': 'assistant'},\n",
+      "                         {'content': 'Great! The script has successfully '\n",
+      "                                     'computed the 14th Fibonacci number as '\n",
+      "                                     '377. If you need to compute other '\n",
+      "                                     'Fibonacci numbers, you can simply change '\n",
+      "                                     'the argument in the `fibonacci()` '\n",
+      "                                     'function call. Please let me know if you '\n",
+      "                                     'need help with anything else.',\n",
+      "                          'role': 'user'},\n",
+      "                         {'content': '', 'role': 'assistant'}],\n",
+      "           summary='',\n",
+      "           cost=({'gpt-4-0613': {'completion_tokens': 302,\n",
+      "                                 'cost': 0.04842,\n",
+      "                                 'prompt_tokens': 1010,\n",
+      "                                 'total_tokens': 1312},\n",
+      "                  'total_cost': 0.04842},\n",
+      "                 {'gpt-4-0613': {'completion_tokens': 302,\n",
+      "                                 'cost': 0.04842,\n",
+      "                                 'prompt_tokens': 1010,\n",
+      "                                 'total_tokens': 1312},\n",
+      "                  'total_cost': 0.04842}),\n",
+      "           human_input=[])\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pprint\n",
+    "\n",
+    "chat_result = code_executor_agent.initiate_chat(\n",
+    "    code_writer_agent, message=\"Write Python code to calculate the 14th Fibonacci number.\"\n",
+    ")\n",
+    "\n",
+    "pprint.pprint(chat_result)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, stop the container. Or better yet use a context manager for it to be stopped automatically."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "executor.stop()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "autogen",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}