From a646df2d025b29117c0e760cf5f0eb424ecd7224 Mon Sep 17 00:00:00 2001
From: Abhay Mathur <a21.mathur21@gmail.com>
Date: Fri, 23 Feb 2024 09:11:33 +0530
Subject: [PATCH 1/7] Updating code_utils.py to solve issue #1747 (#1758)

* Update code_utils.py

Updated the powershell command to pwsh

* Update code_utils.py

added a split to handle powershell in the first condition as well

* Update local_commandline_code_executor.py

added "pwsh" as a command option in lang variable

* Update autogen/coding/local_commandline_code_executor.py

Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>

* Update code_utils.py

* Update code_utils.py

fixed formatting

* Update code_utils.py

defined a function to detect whether 'powershell' or 'pwsh' works and accordingly use the one that works

* Update code_utils.py

fixed formatting

* Update and rename test_code.py to test_code_utils.py

added a unit test for get_powershell_command function in code_utils.py

* Update test_code_utils.py

fixed formatting

* Update test_code_utils.py

fixed formatting

* Update autogen/code_utils.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* solved issue #1747

* updated unit test

* fixed formatting

* fixed formatting

* fixed formatting

* fixed a bug

* removed extra return None and removed redundant comments

---------

Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
Co-authored-by: Chi Wang <wang.chi@microsoft.com>
---
 autogen/code_utils.py   |  3 ++-
 test/test_code_utils.py | 22 +++++++++++++---------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/autogen/code_utils.py b/autogen/code_utils.py
index 3a7c67c136d..4abd2844925 100644
--- a/autogen/code_utils.py
+++ b/autogen/code_utils.py
@@ -229,7 +229,8 @@ def get_powershell_command():
                 return "pwsh"
 
         except FileNotFoundError:
-            print("Neither powershell nor pwsh is installed.")
+            if WIN32:
+                logging.warning("Neither powershell nor pwsh is installed but it is a Windows OS")
             return None
 
 
diff --git a/test/test_code_utils.py b/test/test_code_utils.py
index 74efd9ddc22..be6c611661f 100644
--- a/test/test_code_utils.py
+++ b/test/test_code_utils.py
@@ -575,20 +575,24 @@ def test_get_powershell_command_pwsh(self, mock_subprocess_run):
         self.assertEqual(get_powershell_command(), "pwsh")
 
     @patch("subprocess.run")
-    def test_get_powershell_command_no_shell(self, mock_subprocess_run):
+    @patch("logging.warning")
+    def test_get_powershell_command_windows_no_shell(self, mock_logging_warning, mock_subprocess_run):
         # Set up the mock to simulate 'powershell' and 'pwsh' not found
         mock_subprocess_run.side_effect = [FileNotFoundError, FileNotFoundError]
 
-        with patch("sys.stdout", new=StringIO()) as fake_out:
-            get_powershell_command()
-            self.assertEqual(fake_out.getvalue().strip(), "Neither powershell nor pwsh is installed.")
+        with patch("autogen.code_utils.WIN32", True):
+            self.assertIsNone(get_powershell_command())
+            mock_logging_warning.assert_called_once_with(
+                "Neither powershell nor pwsh is installed but it is a Windows OS"
+            )
 
     @patch("subprocess.run")
-    def test_get_powershell_command_no_shell_no_output(self, mock_subprocess_run):
-        # Set up the mock to simulate 'powershell' and 'pwsh' not found without printing error message
-        mock_subprocess_run.side_effect = [FileNotFoundError, FileNotFoundError]
-
-        self.assertIsNone(get_powershell_command())
+    def test_get_powershell_command_no_windows_no_shell(self, mock_subprocess_run):
+        # Set up the mock to simulate 'powershell' and 'pwsh' not found
+        mock_subprocess_run.side_effect = FileNotFoundError
+        # Mock WIN32 to False
+        with patch("autogen.code_utils.WIN32", False):
+            self.assertIsNone(get_powershell_command())
 
 
 if __name__ == "__main__":

From ac15996f5ad88f3d87a99d2980e0cb9a785d2e4e Mon Sep 17 00:00:00 2001
From: Jack Gerrits <jackgerrits@users.noreply.github.com>
Date: Thu, 22 Feb 2024 23:01:17 -0500
Subject: [PATCH 2/7] Add sidebar for notebooks page (#1766)

---
 website/sidebars.js | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/website/sidebars.js b/website/sidebars.js
index 625e8263ea5..5b1b9d6896b 100644
--- a/website/sidebars.js
+++ b/website/sidebars.js
@@ -29,5 +29,20 @@
     'Migration-Guide'
   ],
   // pydoc-markdown auto-generated markdowns from docstrings
-  referenceSideBar: [require("./docs/reference/sidebar.json")]
+  referenceSideBar: [require("./docs/reference/sidebar.json")],
+  notebooksSidebar: [
+    {
+      type: "category",
+      label: "Notebooks",
+      items: [{
+        type: "autogenerated",
+        dirName: "notebooks",
+      },],
+      link: {
+        type: 'doc',
+        id: "notebooks"
+      },
+    },
+
+  ]
 };

From fbc2f6e911818036ae863481231e03247a28e8a3 Mon Sep 17 00:00:00 2001
From: Jack Gerrits <jackgerrits@users.noreply.github.com>
Date: Fri, 23 Feb 2024 14:09:59 -0500
Subject: [PATCH 3/7] Use jupyer-kernel-gateway for ipython executor (#1748)

* checkpoint async based

* Implement jupyter client and use jupyer gateway

* update deps

* address comments

* add missing parenthesis

* Update build.yml

* CI fixes

* change requirement name

* debug

* print stderr

* dont seek

* show token

* mitigaton for windows bug

* use hex token to avoid - in token

* formatting

* put back in place original while the windows bug exists

* lint

* Update autogen/coding/jupyter_code_executor.py

* Update jupyter_code_executor.py

* Update test_embedded_ipython_code_executor.py

* Update setup.py

* Update build.yml

* fix nameerror

---------

Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
---
 .github/workflows/build.yml                   |   5 +
 autogen/coding/base.py                        |   9 +
 .../coding/embedded_ipython_code_executor.py  |  16 +-
 autogen/coding/factory.py                     |   4 +
 autogen/coding/jupyter/__init__.py            |   5 +
 autogen/coding/jupyter/base.py                |  21 ++
 autogen/coding/jupyter/jupyter_client.py      | 200 ++++++++++++++++
 .../coding/jupyter/local_jupyter_server.py    | 148 ++++++++++++
 autogen/coding/jupyter_code_executor.py       | 222 ++++++++++++++++++
 setup.py                                      |   4 +
 .../test_embedded_ipython_code_executor.py    |  78 +++---
 11 files changed, 673 insertions(+), 39 deletions(-)
 create mode 100644 autogen/coding/jupyter/__init__.py
 create mode 100644 autogen/coding/jupyter/base.py
 create mode 100644 autogen/coding/jupyter/jupyter_client.py
 create mode 100644 autogen/coding/jupyter/local_jupyter_server.py
 create mode 100644 autogen/coding/jupyter_code_executor.py

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index f6df28088f5..2abbc6bd68e 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -42,8 +42,13 @@ jobs:
           pip install -e .
           python -c "import autogen"
           pip install pytest mock
+      - name: Install optional dependencies for code executors
+        # code executors auto skip without deps, so only run for python 3.11
+        if: matrix.python-version == '3.11'
+        run: |
           pip install jupyter-client ipykernel
           python -m ipykernel install --user --name python3
+          pip install -e ".[local-jupyter-exec]"
       - name: Set AUTOGEN_USE_DOCKER based on OS
         shell: bash
         run: |
diff --git a/autogen/coding/base.py b/autogen/coding/base.py
index 936d3ee2536..0bd373157c9 100644
--- a/autogen/coding/base.py
+++ b/autogen/coding/base.py
@@ -92,3 +92,12 @@ def restart(self) -> None:
         This method is called when the agent is reset.
         """
         ...  # pragma: no cover
+
+
+class IPythonCodeResult(CodeResult):
+    """(Experimental) A code result class for IPython code executor."""
+
+    output_files: List[str] = Field(
+        default_factory=list,
+        description="The list of files that the executed code blocks generated.",
+    )
diff --git a/autogen/coding/embedded_ipython_code_executor.py b/autogen/coding/embedded_ipython_code_executor.py
index c85798f7503..a83dab23327 100644
--- a/autogen/coding/embedded_ipython_code_executor.py
+++ b/autogen/coding/embedded_ipython_code_executor.py
@@ -1,6 +1,7 @@
 import base64
 import json
 import os
+from pathlib import Path
 import re
 import uuid
 from queue import Empty
@@ -11,19 +12,10 @@
 from pydantic import BaseModel, Field, field_validator
 
 from ..agentchat.agent import LLMAgent
-from .base import CodeBlock, CodeExtractor, CodeResult
+from .base import CodeBlock, CodeExtractor, IPythonCodeResult
 from .markdown_code_extractor import MarkdownCodeExtractor
 
-__all__ = ("EmbeddedIPythonCodeExecutor", "IPythonCodeResult")
-
-
-class IPythonCodeResult(CodeResult):
-    """(Experimental) A code result class for IPython code executor."""
-
-    output_files: List[str] = Field(
-        default_factory=list,
-        description="The list of files that the executed code blocks generated.",
-    )
+__all__ = "EmbeddedIPythonCodeExecutor"
 
 
 class EmbeddedIPythonCodeExecutor(BaseModel):
@@ -126,6 +118,8 @@ def __init__(self, **kwargs: Any):
         self._kernel_client = self._kernel_manager.client()
         self._kernel_client.start_channels()
         self._timeout = self.timeout
+        self._kernel_name = self.kernel_name
+        self._output_dir = Path(self.output_dir)
 
     @property
     def user_capability(self) -> "EmbeddedIPythonCodeExecutor.UserCapability":
diff --git a/autogen/coding/factory.py b/autogen/coding/factory.py
index 953de5906dd..ceb01ca3dfa 100644
--- a/autogen/coding/factory.py
+++ b/autogen/coding/factory.py
@@ -37,5 +37,9 @@ def create(code_execution_config: Dict[str, Any]) -> CodeExecutor:
             from .local_commandline_code_executor import LocalCommandlineCodeExecutor
 
             return LocalCommandlineCodeExecutor(**code_execution_config.get("commandline-local", {}))
+        elif executor == "jupyter-local":
+            from .jupyter_code_executor import LocalJupyterCodeExecutor
+
+            return LocalJupyterCodeExecutor(**code_execution_config.get("jupyter-local", {}))
         else:
             raise ValueError(f"Unknown code executor {executor}")
diff --git a/autogen/coding/jupyter/__init__.py b/autogen/coding/jupyter/__init__.py
new file mode 100644
index 00000000000..96c8cf4a65c
--- /dev/null
+++ b/autogen/coding/jupyter/__init__.py
@@ -0,0 +1,5 @@
+from .base import JupyterConnectable, JupyterConnectionInfo
+from .jupyter_client import JupyterClient
+from .local_jupyter_server import LocalJupyterServer
+
+__all__ = ["JupyterConnectable", "JupyterConnectionInfo", "JupyterClient", "LocalJupyterServer"]
diff --git a/autogen/coding/jupyter/base.py b/autogen/coding/jupyter/base.py
new file mode 100644
index 00000000000..8e86897249e
--- /dev/null
+++ b/autogen/coding/jupyter/base.py
@@ -0,0 +1,21 @@
+from dataclasses import dataclass
+from typing import Optional, Protocol, runtime_checkable
+
+
+@dataclass
+class JupyterConnectionInfo:
+    """(Experimental)"""
+
+    host: str
+    use_https: bool
+    port: int
+    token: Optional[str]
+
+
+@runtime_checkable
+class JupyterConnectable(Protocol):
+    """(Experimental)"""
+
+    @property
+    def connection_info(self) -> JupyterConnectionInfo:
+        pass
diff --git a/autogen/coding/jupyter/jupyter_client.py b/autogen/coding/jupyter/jupyter_client.py
new file mode 100644
index 00000000000..edecc415cd1
--- /dev/null
+++ b/autogen/coding/jupyter/jupyter_client.py
@@ -0,0 +1,200 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from types import TracebackType
+from typing import Any, Dict, List, Optional, cast
+import sys
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+import json
+import uuid
+import datetime
+import requests
+
+import websocket
+from websocket import WebSocket
+
+from .base import JupyterConnectionInfo
+
+
+class JupyterClient:
+    """(Experimental) A client for communicating with a Jupyter gateway server."""
+
+    def __init__(self, connection_info: JupyterConnectionInfo):
+        self._connection_info = connection_info
+
+    def _get_headers(self) -> Dict[str, str]:
+        if self._connection_info.token is None:
+            return {}
+        return {"Authorization": f"token {self._connection_info.token}"}
+
+    def _get_api_base_url(self) -> str:
+        protocol = "https" if self._connection_info.use_https else "http"
+        return f"{protocol}://{self._connection_info.host}:{self._connection_info.port}"
+
+    def _get_ws_base_url(self) -> str:
+        return f"ws://{self._connection_info.host}:{self._connection_info.port}"
+
+    def list_kernel_specs(self) -> Dict[str, Dict[str, str]]:
+        response = requests.get(f"{self._get_api_base_url()}/api/kernelspecs", headers=self._get_headers())
+        return cast(Dict[str, Dict[str, str]], response.json())
+
+    def list_kernels(self) -> List[Dict[str, str]]:
+        response = requests.get(f"{self._get_api_base_url()}/api/kernels", headers=self._get_headers())
+        return cast(List[Dict[str, str]], response.json())
+
+    def start_kernel(self, kernel_spec_name: str) -> str:
+        """Start a new kernel.
+
+        Args:
+            kernel_spec_name (str): Name of the kernel spec to start
+
+        Returns:
+            str: ID of the started kernel
+        """
+
+        response = requests.post(
+            f"{self._get_api_base_url()}/api/kernels",
+            headers=self._get_headers(),
+            json={"name": kernel_spec_name},
+        )
+        return cast(str, response.json()["id"])
+
+    def restart_kernel(self, kernel_id: str) -> None:
+        response = requests.post(
+            f"{self._get_api_base_url()}/api/kernels/{kernel_id}/restart", headers=self._get_headers()
+        )
+        response.raise_for_status()
+
+    def get_kernel_client(self, kernel_id: str) -> JupyterKernelClient:
+        ws_url = f"{self._get_ws_base_url()}/api/kernels/{kernel_id}/channels"
+        ws = websocket.create_connection(ws_url, header=self._get_headers())
+        return JupyterKernelClient(ws)
+
+
+class JupyterKernelClient:
+    """(Experimental) A client for communicating with a Jupyter kernel."""
+
+    @dataclass
+    class ExecutionResult:
+        @dataclass
+        class DataItem:
+            mime_type: str
+            data: str
+
+        is_ok: bool
+        output: str
+        data_items: List[DataItem]
+
+    def __init__(self, websocket: WebSocket):
+        self._session_id: str = uuid.uuid4().hex
+        self._websocket: WebSocket = websocket
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
+    ) -> None:
+        self._websocket.close()
+
+    def _send_message(self, *, content: Dict[str, Any], channel: str, message_type: str) -> str:
+        timestamp = datetime.datetime.now().isoformat()
+        message_id = uuid.uuid4().hex
+        message = {
+            "header": {
+                "username": "autogen",
+                "version": "5.0",
+                "session": self._session_id,
+                "msg_id": message_id,
+                "msg_type": message_type,
+                "date": timestamp,
+            },
+            "parent_header": {},
+            "channel": channel,
+            "content": content,
+            "metadata": {},
+            "buffers": {},
+        }
+        self._websocket.send_text(json.dumps(message))
+        return message_id
+
+    def _receive_message(self, timeout_seconds: Optional[float]) -> Optional[Dict[str, Any]]:
+        self._websocket.settimeout(timeout_seconds)
+        try:
+            data = self._websocket.recv()
+            if isinstance(data, bytes):
+                data = data.decode("utf-8")
+            return cast(Dict[str, Any], json.loads(data))
+        except websocket.WebSocketTimeoutException:
+            return None
+
+    def wait_for_ready(self, timeout_seconds: Optional[float] = None) -> bool:
+        message_id = self._send_message(content={}, channel="shell", message_type="kernel_info_request")
+        while True:
+            message = self._receive_message(timeout_seconds)
+            # This means we timed out with no new messages.
+            if message is None:
+                return False
+            if (
+                message.get("parent_header", {}).get("msg_id") == message_id
+                and message["msg_type"] == "kernel_info_reply"
+            ):
+                return True
+
+    def execute(self, code: str, timeout_seconds: Optional[float] = None) -> ExecutionResult:
+        message_id = self._send_message(
+            content={
+                "code": code,
+                "silent": False,
+                "store_history": True,
+                "user_expressions": {},
+                "allow_stdin": False,
+                "stop_on_error": True,
+            },
+            channel="shell",
+            message_type="execute_request",
+        )
+
+        text_output = []
+        data_output = []
+        while True:
+            message = self._receive_message(timeout_seconds)
+            if message is None:
+                return JupyterKernelClient.ExecutionResult(
+                    is_ok=False, output="ERROR: Timeout waiting for output from code block.", data_items=[]
+                )
+
+            # Ignore messages that are not for this execution.
+            if message.get("parent_header", {}).get("msg_id") != message_id:
+                continue
+
+            msg_type = message["msg_type"]
+            content = message["content"]
+            if msg_type in ["execute_result", "display_data"]:
+                for data_type, data in content["data"].items():
+                    if data_type == "text/plain":
+                        text_output.append(data)
+                    elif data_type.startswith("image/") or data_type == "text/html":
+                        data_output.append(self.ExecutionResult.DataItem(mime_type=data_type, data=data))
+                    else:
+                        text_output.append(json.dumps(data))
+            elif msg_type == "stream":
+                text_output.append(content["text"])
+            elif msg_type == "error":
+                # Output is an error.
+                return JupyterKernelClient.ExecutionResult(
+                    is_ok=False,
+                    output=f"ERROR: {content['ename']}: {content['evalue']}\n{content['traceback']}",
+                    data_items=[],
+                )
+            if msg_type == "status" and content["execution_state"] == "idle":
+                break
+
+        return JupyterKernelClient.ExecutionResult(
+            is_ok=True, output="\n".join([str(output) for output in text_output]), data_items=data_output
+        )
diff --git a/autogen/coding/jupyter/local_jupyter_server.py b/autogen/coding/jupyter/local_jupyter_server.py
new file mode 100644
index 00000000000..decbb3f430e
--- /dev/null
+++ b/autogen/coding/jupyter/local_jupyter_server.py
@@ -0,0 +1,148 @@
+from __future__ import annotations
+from types import TracebackType
+
+from typing import Optional, Union, cast
+import subprocess
+import signal
+import sys
+import json
+import secrets
+import socket
+import atexit
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+from .base import JupyterConnectable, JupyterConnectionInfo
+from .jupyter_client import JupyterClient
+
+
+def _get_free_port() -> int:
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(("", 0))
+        return cast(int, s.getsockname()[1])
+
+
+class LocalJupyterServer(JupyterConnectable):
+    class GenerateToken:
+        pass
+
+    def __init__(
+        self,
+        ip: str = "127.0.0.1",
+        port: Optional[int] = None,
+        token: Union[str, GenerateToken] = GenerateToken(),
+        log_file: str = "jupyter_gateway.log",
+        log_level: str = "INFO",
+        log_max_bytes: int = 1048576,
+        log_backup_count: int = 3,
+    ):
+        # Remove as soon as https://github.com/jupyter-server/kernel_gateway/issues/398 is fixed
+        if sys.platform == "win32":
+            raise ValueError("LocalJupyterServer is not supported on Windows due to kernelgateway bug.")
+
+        # Check Jupyter gateway server is installed
+        try:
+            subprocess.run(
+                [sys.executable, "-m", "jupyter", "kernelgateway", "--version"],
+                check=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+            )
+        except subprocess.CalledProcessError:
+            raise ValueError(
+                "Jupyter gateway server is not installed. Please install it with `pip install jupyter_kernel_gateway`."
+            )
+
+        self.ip = ip
+        if port is None:
+            port = _get_free_port()
+        self.port = port
+
+        if isinstance(token, LocalJupyterServer.GenerateToken):
+            token = secrets.token_hex(32)
+
+        self.token = token
+        logging_config = {
+            "handlers": {
+                "file": {
+                    "class": "logging.handlers.RotatingFileHandler",
+                    "level": log_level,
+                    "maxBytes": log_max_bytes,
+                    "backupCount": log_backup_count,
+                    "filename": log_file,
+                }
+            },
+            "loggers": {"KernelGatewayApp": {"level": log_level, "handlers": ["file", "console"]}},
+        }
+
+        # Run Jupyter gateway server with detached subprocess
+        args = [
+            sys.executable,
+            "-m",
+            "jupyter",
+            "kernelgateway",
+            "--KernelGatewayApp.ip",
+            ip,
+            "--KernelGatewayApp.port",
+            str(port),
+            "--KernelGatewayApp.auth_token",
+            token,
+            "--JupyterApp.answer_yes",
+            "true",
+            "--JupyterApp.logging_config",
+            json.dumps(logging_config),
+            "--JupyterWebsocketPersonality.list_kernels",
+            "true",
+        ]
+        self._subprocess = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+
+        # Satisfy mypy, we know this is not None because we passed PIPE
+        assert self._subprocess.stderr is not None
+        # Read stderr until we see "is available at" or the process has exited with an error
+        stderr = ""
+        while True:
+            result = self._subprocess.poll()
+            if result is not None:
+                stderr += self._subprocess.stderr.read()
+                print(f"token=[[[[{token}]]]]")
+                raise ValueError(f"Jupyter gateway server failed to start with exit code: {result}. stderr:\n{stderr}")
+            line = self._subprocess.stderr.readline()
+            stderr += line
+            if "is available at" in line:
+                break
+
+        # Poll the subprocess to check if it is still running
+        result = self._subprocess.poll()
+        if result is not None:
+            raise ValueError(
+                f"Jupyter gateway server failed to start. Please check the logs ({log_file}) for more information."
+            )
+
+        atexit.register(self.stop)
+
+    def stop(self) -> None:
+        if self._subprocess.poll() is None:
+            if sys.platform == "win32":
+                self._subprocess.send_signal(signal.CTRL_C_EVENT)
+            else:
+                self._subprocess.send_signal(signal.SIGINT)
+            self._subprocess.wait()
+
+    @property
+    def connection_info(self) -> JupyterConnectionInfo:
+        return JupyterConnectionInfo(host=self.ip, use_https=False, port=self.port, token=self.token)
+
+    def get_client(self) -> JupyterClient:
+        return JupyterClient(self.connection_info)
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
+    ) -> None:
+        self.stop()
diff --git a/autogen/coding/jupyter_code_executor.py b/autogen/coding/jupyter_code_executor.py
new file mode 100644
index 00000000000..551aea18aeb
--- /dev/null
+++ b/autogen/coding/jupyter_code_executor.py
@@ -0,0 +1,222 @@
+import base64
+import json
+import os
+from pathlib import Path
+import re
+import uuid
+from typing import Any, ClassVar, List, Union
+
+from pydantic import Field
+
+
+from ..agentchat.agent import LLMAgent
+from .base import CodeBlock, CodeExecutor, CodeExtractor, CodeResult, IPythonCodeResult
+from .markdown_code_extractor import MarkdownCodeExtractor
+from .jupyter import JupyterConnectable, JupyterConnectionInfo, LocalJupyterServer, JupyterClient
+
+__all__ = ("JupyterCodeExecutor", "LocalJupyterCodeExecutor")
+
+
+class JupyterCodeExecutor(CodeExecutor):
+    """(Experimental) A code executor class that executes code statefully using an embedded
+    IPython kernel managed by this class.
+
+    **This will execute LLM generated code on the local machine.**
+
+    Each execution is stateful and can access variables created from previous
+    executions in the same session. The kernel must be installed before using
+    this class. The kernel can be installed using the following command:
+    `python -m ipykernel install --user --name {kernel_name}`
+    where `kernel_name` is the name of the kernel to install.
+
+    Args:
+        timeout (int): The timeout for code execution, by default 60.
+        kernel_name (str): The kernel name to use. Make sure it is installed.
+            By default, it is "python3".
+        output_dir (str): The directory to save output files, by default ".".
+        system_message_update (str): The system message update to add to the
+            agent that produces code. By default it is
+            `JupyterCodeExecutor.DEFAULT_SYSTEM_MESSAGE_UPDATE`.
+    """
+
+    DEFAULT_SYSTEM_MESSAGE_UPDATE: ClassVar[
+        str
+    ] = """
+# IPython Coding Capability
+You have been given coding capability to solve tasks using Python code in a stateful IPython kernel.
+You are responsible for writing the code, and the user is responsible for executing the code.
+
+When you write Python code, put the code in a markdown code block with the language set to Python.
+For example:
+```python
+x = 3
+```
+You can use the variable `x` in subsequent code blocks.
+```python
+print(x)
+```
+
+Write code incrementally and leverage the statefulness of the kernel to avoid repeating code.
+Import libraries in a separate code block.
+Define a function or a class in a separate code block.
+Run code that produces output in a separate code block.
+Run code that involves expensive operations like download, upload, and call external APIs in a separate code block.
+
+When your code produces an output, the output will be returned to you.
+Because you have limited conversation memory, if your code creates an image,
+the output will be a path to the image instead of the image itself.
+"""
+
+    class UserCapability:
+        """(Experimental) An AgentCapability class that gives agent ability use a stateful
+        IPython code executor. This capability can be added to an agent using
+        the `add_to_agent` method which append a system message update to the
+        agent's system message."""
+
+        def __init__(self, system_message_update: str):
+            self._system_message_update = system_message_update
+
+        def add_to_agent(self, agent: LLMAgent) -> None:
+            """Add this capability to an agent by appending a system message
+            update to the agent's system message.
+
+            **Currently we do not check for conflicts with existing content in
+            the agent's system message.**
+
+            Args:
+                agent (LLMAgent): The agent to add the capability to.
+            """
+            agent.update_system_message(agent.system_message + self._system_message_update)
+
+    def __init__(
+        self,
+        jupyter_server: Union[JupyterConnectable, JupyterConnectionInfo],
+        kernel_name: str = "python3",
+        timeout: int = 60,
+        output_dir: Union[Path, str] = Path("."),
+        system_message_update: str = DEFAULT_SYSTEM_MESSAGE_UPDATE,
+    ):
+        if timeout < 1:
+            raise ValueError("Timeout must be greater than or equal to 1.")
+
+        if isinstance(output_dir, str):
+            output_dir = Path(output_dir)
+
+        if not output_dir.exists():
+            raise ValueError(f"Output directory {output_dir} does not exist.")
+
+        if isinstance(jupyter_server, JupyterConnectable):
+            self._connection_info = jupyter_server.connection_info
+        elif isinstance(jupyter_server, JupyterConnectionInfo):
+            self._connection_info = jupyter_server
+        else:
+            raise ValueError("jupyter_server must be a JupyterConnectable or JupyterConnectionInfo.")
+
+        self._jupyter_client = JupyterClient(self._connection_info)
+        available_kernels = self._jupyter_client.list_kernel_specs()
+        if kernel_name not in available_kernels["kernelspecs"]:
+            raise ValueError(f"Kernel {kernel_name} is not installed.")
+
+        self._kernel_id = self._jupyter_client.start_kernel(kernel_name)
+        self._kernel_name = kernel_name
+        self._jupyter_kernel_client = self._jupyter_client.get_kernel_client(self._kernel_id)
+        self._timeout = timeout
+        self._output_dir = output_dir
+        self._system_message_update = system_message_update
+
+    @property
+    def user_capability(self) -> "JupyterCodeExecutor.UserCapability":
+        """(Experimental) Export a user capability for this executor that can be added to
+        an agent using the `add_to_agent` method."""
+        return JupyterCodeExecutor.UserCapability(self._system_message_update)
+
+    @property
+    def code_extractor(self) -> CodeExtractor:
+        """(Experimental) Export a code extractor that can be used by an agent."""
+        return MarkdownCodeExtractor()
+
+    def execute_code_blocks(self, code_blocks: List[CodeBlock]) -> IPythonCodeResult:
+        """(Experimental) Execute a list of code blocks and return the result.
+
+        This method executes a list of code blocks as cells in an IPython kernel
+        managed by this class.
+        See: https://jupyter-client.readthedocs.io/en/stable/messaging.html
+        for the message protocol.
+
+        Args:
+            code_blocks (List[CodeBlock]): A list of code blocks to execute.
+
+        Returns:
+            IPythonCodeResult: The result of the code execution.
+        """
+        self._jupyter_kernel_client.wait_for_ready()
+        outputs = []
+        output_files = []
+        for code_block in code_blocks:
+            code = self._process_code(code_block.code)
+            result = self._jupyter_kernel_client.execute(code, timeout_seconds=self._timeout)
+            if result.is_ok:
+                outputs.append(result.output)
+                for data in result.data_items:
+                    if data.mime_type == "image/png":
+                        path = self._save_image(data.data)
+                        outputs.append(f"Image data saved to {path}")
+                        output_files.append(path)
+                    elif data.mime_type == "text/html":
+                        path = self._save_html(data.data)
+                        outputs.append(f"HTML data saved to {path}")
+                        output_files.append(path)
+                    else:
+                        outputs.append(json.dumps(data.data))
+            else:
+                return IPythonCodeResult(
+                    exit_code=1,
+                    output=f"ERROR: {result.output}",
+                )
+
+        return IPythonCodeResult(
+            exit_code=0, output="\n".join([str(output) for output in outputs]), output_files=output_files
+        )
+
+    def restart(self) -> None:
+        """(Experimental) Restart a new session."""
+        self._jupyter_client.restart_kernel(self._kernel_id)
+        self._jupyter_kernel_client = self._jupyter_client.get_kernel_client(self._kernel_id)
+
+    def _save_image(self, image_data_base64: str) -> str:
+        """Save image data to a file."""
+        image_data = base64.b64decode(image_data_base64)
+        # Randomly generate a filename.
+        filename = f"{uuid.uuid4().hex}.png"
+        path = os.path.join(self._output_dir, filename)
+        with open(path, "wb") as f:
+            f.write(image_data)
+        return os.path.abspath(path)
+
+    def _save_html(self, html_data: str) -> str:
+        """Save html data to a file."""
+        # Randomly generate a filename.
+        filename = f"{uuid.uuid4().hex}.html"
+        path = os.path.join(self._output_dir, filename)
+        with open(path, "w") as f:
+            f.write(html_data)
+        return os.path.abspath(path)
+
+    def _process_code(self, code: str) -> str:
+        """Process code before execution."""
+        # Find lines that start with `! pip install` and make sure "-qqq" flag is added.
+        lines = code.split("\n")
+        for i, line in enumerate(lines):
+            # use regex to find lines that start with `! pip install` or `!pip install`.
+            match = re.search(r"^! ?pip install", line)
+            if match is not None:
+                if "-qqq" not in line:
+                    lines[i] = line.replace(match.group(0), match.group(0) + " -qqq")
+        return "\n".join(lines)
+
+
+class LocalJupyterCodeExecutor(JupyterCodeExecutor):
+    def __init__(self, **kwargs: Any):
+        """Creates a LocalJupyterServer and passes it to JupyterCodeExecutor, see JupyterCodeExecutor for args"""
+        jupyter_server = LocalJupyterServer()
+        super().__init__(jupyter_server=jupyter_server, **kwargs)
diff --git a/setup.py b/setup.py
index a2577271cc1..768ad708cf4 100644
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,11 @@
         "graph": ["networkx", "matplotlib"],
         "websurfer": ["beautifulsoup4", "markdownify", "pdfminer.six", "pathvalidate"],
         "redis": ["redis"],
+        # Dependencies for EmbeddedIPythonExecutor, to be removed once upstream bug fixed
+        # https://github.com/jupyter-server/kernel_gateway/issues/398
         "ipython": ["jupyter-client>=8.6.0", "ipykernel>=6.29.0"],
+        # Dependencies for LocalJupyterExecutor
+        "local-jupyter-exec": ["jupyter-kernel-gateway", "websocket-client", "requests", "ipykernel"],
     },
     classifiers=[
         "Programming Language :: Python :: 3",
diff --git a/test/coding/test_embedded_ipython_code_executor.py b/test/coding/test_embedded_ipython_code_executor.py
index a43f103d4e2..fcd423497aa 100644
--- a/test/coding/test_embedded_ipython_code_executor.py
+++ b/test/coding/test_embedded_ipython_code_executor.py
@@ -1,4 +1,6 @@
 import os
+import sys
+from pathlib import Path
 import tempfile
 from typing import Dict, Union
 import uuid
@@ -11,50 +13,62 @@
 
 try:
     from autogen.coding.embedded_ipython_code_executor import EmbeddedIPythonCodeExecutor
+    from autogen.coding.jupyter_code_executor import LocalJupyterCodeExecutor
+
+    # Skip on windows due to kernelgateway bug https://github.com/jupyter-server/kernel_gateway/issues/398
+    if sys.platform == "win32":
+        classes_to_test = [EmbeddedIPythonCodeExecutor]
+    else:
+        classes_to_test = [EmbeddedIPythonCodeExecutor, LocalJupyterCodeExecutor]
 
     skip = False
     skip_reason = ""
 except ImportError:
     skip = True
-    skip_reason = "Dependencies for EmbeddedIPythonCodeExecutor not installed."
+    skip_reason = "Dependencies for EmbeddedIPythonCodeExecutor or LocalJupyterCodeExecutor not installed."
+    classes_to_test = []
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_create() -> None:
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_create(cls) -> None:
     config: Dict[str, Union[str, CodeExecutor]] = {"executor": "ipython-embedded"}
     executor = CodeExecutorFactory.create(config)
     assert isinstance(executor, EmbeddedIPythonCodeExecutor)
 
-    config = {"executor": EmbeddedIPythonCodeExecutor()}
+    config = {"executor": cls()}
     executor = CodeExecutorFactory.create(config)
     assert executor is config["executor"]
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_init() -> None:
-    executor = EmbeddedIPythonCodeExecutor(timeout=10, kernel_name="python3", output_dir=".")
-    assert executor.timeout == 10 and executor.kernel_name == "python3" and executor.output_dir == "."
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_init(cls) -> None:
+    executor = cls(timeout=10, kernel_name="python3", output_dir=".")
+    assert executor._timeout == 10 and executor._kernel_name == "python3" and executor._output_dir == Path(".")
 
     # Try invalid output directory.
     with pytest.raises(ValueError, match="Output directory .* does not exist."):
-        executor = EmbeddedIPythonCodeExecutor(timeout=111, kernel_name="python3", output_dir="/invalid/directory")
+        executor = cls(timeout=111, kernel_name="python3", output_dir="/invalid/directory")
 
     # Try invalid kernel name.
     with pytest.raises(ValueError, match="Kernel .* is not installed."):
-        executor = EmbeddedIPythonCodeExecutor(timeout=111, kernel_name="invalid_kernel_name", output_dir=".")
+        executor = cls(timeout=111, kernel_name="invalid_kernel_name", output_dir=".")
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_execute_code_single_code_block() -> None:
-    executor = EmbeddedIPythonCodeExecutor()
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_execute_code_single_code_block(cls) -> None:
+    executor = cls()
     code_blocks = [CodeBlock(code="import sys\nprint('hello world!')", language="python")]
     code_result = executor.execute_code_blocks(code_blocks)
     assert code_result.exit_code == 0 and "hello world!" in code_result.output
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_execute_code_multiple_code_blocks() -> None:
-    executor = EmbeddedIPythonCodeExecutor()
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_execute_code_multiple_code_blocks(cls) -> None:
+    executor = cls()
     code_blocks = [
         CodeBlock(code="import sys\na = 123 + 123\n", language="python"),
         CodeBlock(code="print(a)", language="python"),
@@ -75,8 +89,9 @@ def test_function(a, b):
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_execute_code_bash_script() -> None:
-    executor = EmbeddedIPythonCodeExecutor()
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_execute_code_bash_script(cls) -> None:
+    executor = cls()
     # Test bash script.
     code_blocks = [CodeBlock(code='!echo "hello world!"', language="bash")]
     code_result = executor.execute_code_blocks(code_blocks)
@@ -84,16 +99,18 @@ def test_execute_code_bash_script() -> None:
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_timeout() -> None:
-    executor = EmbeddedIPythonCodeExecutor(timeout=1)
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_timeout(cls) -> None:
+    executor = cls(timeout=1)
     code_blocks = [CodeBlock(code="import time; time.sleep(10); print('hello world!')", language="python")]
     code_result = executor.execute_code_blocks(code_blocks)
     assert code_result.exit_code and "Timeout" in code_result.output
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_silent_pip_install() -> None:
-    executor = EmbeddedIPythonCodeExecutor(timeout=600)
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_silent_pip_install(cls) -> None:
+    executor = cls(timeout=600)
     code_blocks = [CodeBlock(code="!pip install matplotlib numpy", language="python")]
     code_result = executor.execute_code_blocks(code_blocks)
     assert code_result.exit_code == 0 and code_result.output.strip() == ""
@@ -105,8 +122,9 @@ def test_silent_pip_install() -> None:
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_restart() -> None:
-    executor = EmbeddedIPythonCodeExecutor()
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_restart(cls) -> None:
+    executor = cls()
     code_blocks = [CodeBlock(code="x = 123", language="python")]
     code_result = executor.execute_code_blocks(code_blocks)
     assert code_result.exit_code == 0 and code_result.output.strip() == ""
@@ -118,9 +136,10 @@ def test_restart() -> None:
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_save_image() -> None:
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_save_image(cls) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
-        executor = EmbeddedIPythonCodeExecutor(output_dir=temp_dir)
+        executor = cls(output_dir=temp_dir)
         # Install matplotlib.
         code_blocks = [CodeBlock(code="!pip install matplotlib", language="python")]
         code_result = executor.execute_code_blocks(code_blocks)
@@ -137,9 +156,10 @@ def test_save_image() -> None:
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_save_html() -> None:
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_save_html(cls) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
-        executor = EmbeddedIPythonCodeExecutor(output_dir=temp_dir)
+        executor = cls(output_dir=temp_dir)
         # Test saving html.
         code_blocks = [
             CodeBlock(code="from IPython.display import HTML\nHTML('<h1>Hello, world!</h1>')", language="python")
@@ -152,7 +172,8 @@ def test_save_html() -> None:
 
 @pytest.mark.skipif(skip, reason=skip_reason)
 @pytest.mark.skipif(skip_openai, reason="openai not installed OR requested to skip")
-def test_conversable_agent_capability() -> None:
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_conversable_agent_capability(cls) -> None:
     KEY_LOC = "notebook"
     OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
     config_list = config_list_from_json(
@@ -171,7 +192,7 @@ def test_conversable_agent_capability() -> None:
         llm_config=llm_config,
         code_execution_config=False,
     )
-    executor = EmbeddedIPythonCodeExecutor()
+    executor = cls()
     executor.user_capability.add_to_agent(agent)
 
     # Test updated system prompt.
@@ -193,11 +214,12 @@ def test_conversable_agent_capability() -> None:
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_conversable_agent_code_execution() -> None:
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_conversable_agent_code_execution(cls) -> None:
     agent = ConversableAgent(
         "user_proxy",
         llm_config=False,
-        code_execution_config={"executor": "ipython-embedded"},
+        code_execution_config={"executor": cls()},
     )
     msg = """
 Run this code:

From fb2b412c4a2c63538a9d9a169a303b3951d17267 Mon Sep 17 00:00:00 2001
From: Ian <ArGregoryIan@gmail.com>
Date: Sat, 24 Feb 2024 11:56:39 +0800
Subject: [PATCH 4/7] Handle azure_deployment Parameter Issue in
 GPTAssistantAgent to Maintain Compatibility with OpenAIWrapper (#1721)

* support getting model from both llm config and config list

* address comments

* address commentsd

---------

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
---
 .../agentchat/contrib/gpt_assistant_agent.py  | 30 ++++++++++++-------
 test/agentchat/contrib/test_gpt_assistant.py  | 21 +++++++++++--
 2 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/autogen/agentchat/contrib/gpt_assistant_agent.py b/autogen/agentchat/contrib/gpt_assistant_agent.py
index e5916781cd6..a6ab3372f87 100644
--- a/autogen/agentchat/contrib/gpt_assistant_agent.py
+++ b/autogen/agentchat/contrib/gpt_assistant_agent.py
@@ -3,6 +3,7 @@
 import json
 import time
 import logging
+import copy
 
 from autogen import OpenAIWrapper
 from autogen.oai.openai_utils import retrieve_assistants_by_name
@@ -52,12 +53,26 @@ def __init__(
                 - verbose (bool): If set to True, enables more detailed output from the assistant thread.
                 - Other kwargs: Except verbose, others are passed directly to ConversableAgent.
         """
+
+        self._verbose = kwargs.pop("verbose", False)
+        super().__init__(
+            name=name, system_message=instructions, human_input_mode="NEVER", llm_config=llm_config, **kwargs
+        )
+
+        if llm_config is False:
+            raise ValueError("llm_config=False is not supported for GPTAssistantAgent.")
+
         # Use AutoGen OpenAIWrapper to create a client
-        openai_client_cfg = None
-        model_name = "gpt-4-1106-preview"
-        if llm_config and llm_config.get("config_list") is not None and len(llm_config["config_list"]) > 0:
-            openai_client_cfg = llm_config["config_list"][0].copy()
-            model_name = openai_client_cfg.pop("model", "gpt-4-1106-preview")
+        model_name = "gpt-4-0125-preview"
+        openai_client_cfg = copy.deepcopy(llm_config)
+        # GPTAssistantAgent's azure_deployment param may cause NotFoundError (404) in client.beta.assistants.list()
+        # See: https://github.com/microsoft/autogen/pull/1721
+        if openai_client_cfg.get("config_list") is not None and len(openai_client_cfg["config_list"]) > 0:
+            model_name = openai_client_cfg["config_list"][0].pop("model", "gpt-4-0125-preview")
+        else:
+            model_name = openai_client_cfg.pop("model", "gpt-4-0125-preview")
+
+        logger.warning("OpenAI client config of GPTAssistantAgent(%s) - model: %s", name, model_name)
 
         oai_wrapper = OpenAIWrapper(**openai_client_cfg)
         if len(oai_wrapper._clients) > 1:
@@ -143,11 +158,6 @@ def __init__(
                 # Tools are specified but overwrite_tools is False; do not update the assistant's tools
                 logger.warning("overwrite_tools is False. Using existing tools from assistant API.")
 
-        self._verbose = kwargs.pop("verbose", False)
-        super().__init__(
-            name=name, system_message=instructions, human_input_mode="NEVER", llm_config=llm_config, **kwargs
-        )
-
         # lazily create threads
         self._openai_threads = {}
         self._unread_index = defaultdict(int)
diff --git a/test/agentchat/contrib/test_gpt_assistant.py b/test/agentchat/contrib/test_gpt_assistant.py
index 5419c99492a..23e865c4a9f 100644
--- a/test/agentchat/contrib/test_gpt_assistant.py
+++ b/test/agentchat/contrib/test_gpt_assistant.py
@@ -17,7 +17,21 @@
 
 if not skip:
     openai_config_list = autogen.config_list_from_json(
-        OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={"api_type": ["openai"]}
+        OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+        # The Retrieval tool requires at least gpt-3.5-turbo-1106 (newer versions are supported) or gpt-4-turbo-preview models.
+        # https://platform.openai.com/docs/models/overview
+        filter_dict={
+            "api_type": ["openai"],
+            "model": [
+                "gpt-4-turbo-preview",
+                "gpt-4-0125-preview",
+                "gpt-4-1106-preview",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-1106",
+            ],
+        },
     )
     aoai_config_list = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
@@ -41,7 +55,8 @@ def test_config_list() -> None:
 )
 def test_gpt_assistant_chat() -> None:
     for gpt_config in [openai_config_list, aoai_config_list]:
-        _test_gpt_assistant_chat(gpt_config)
+        _test_gpt_assistant_chat({"config_list": gpt_config})
+        _test_gpt_assistant_chat(gpt_config[0])
 
 
 def _test_gpt_assistant_chat(gpt_config) -> None:
@@ -68,7 +83,7 @@ def ask_ossinsight(question: str) -> str:
     name = f"For test_gpt_assistant_chat {uuid.uuid4()}"
     analyst = GPTAssistantAgent(
         name=name,
-        llm_config={"tools": [{"type": "function", "function": ossinsight_api_schema}], "config_list": gpt_config},
+        llm_config={"tools": [{"type": "function", "function": ossinsight_api_schema}], **gpt_config},
         instructions="Hello, Open Source Project Analyst. You'll conduct comprehensive evaluations of open source projects or organizations on the GitHub platform",
     )
     try:

From 477598afffc0542c653342c9a64fcad0df39e06b Mon Sep 17 00:00:00 2001
From: afourney <adamfo@microsoft.com>
Date: Sat, 24 Feb 2024 08:50:46 -0800
Subject: [PATCH 5/7] Groupchat send introductions (#961)

* Allow the GroupChatManager to send introductions.

* Fixed function name.

* Added test cases for sending introductions.

* Trying to sort out why remote pytest is failing.

* Fixed broken plugin behavior.

* Update autogen/agentchat/groupchat.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Updated as per Chi's suggestions.

---------

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
---
 autogen/agentchat/groupchat.py   |  34 +++++++++-
 test/agentchat/test_groupchat.py | 107 ++++++++++++++++++++++++++++++-
 2 files changed, 139 insertions(+), 2 deletions(-)

diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py
index 1b5905e8d1b..5997b093a36 100644
--- a/autogen/agentchat/groupchat.py
+++ b/autogen/agentchat/groupchat.py
@@ -58,7 +58,8 @@ class GroupChat:
         Must be supplied if `allowed_or_disallowed_speaker_transitions` is not None.
     - enable_clear_history: enable possibility to clear history of messages for agents manually by providing
         "clear history" phrase in user prompt. This is experimental feature.
-        See description of `GroupChatManager.clear_agents_history` function for more info.
+        See description of GroupChatManager.clear_agents_history function for more info.
+    - send_introductions: send a round of introductions at the start of the group chat, so agents know who they can speak to (default: False)
     """
 
     agents: List[Agent]
@@ -71,6 +72,7 @@ class GroupChat:
     allowed_or_disallowed_speaker_transitions: Optional[Dict] = None
     speaker_transitions_type: Optional[str] = None
     enable_clear_history: Optional[bool] = False
+    send_introductions: Optional[bool] = False
 
     _VALID_SPEAKER_SELECTION_METHODS = ["auto", "manual", "random", "round_robin"]
     _VALID_SPEAKER_TRANSITIONS_TYPE = ["allowed", "disallowed", None]
@@ -229,6 +231,16 @@ def select_speaker_prompt(self, agents: Optional[List[Agent]] = None) -> str:
             agents = self.agents
         return f"Read the above conversation. Then select the next role from {[agent.name for agent in agents]} to play. Only return the role."
 
+    def introductions_msg(self, agents: Optional[List[Agent]] = None) -> str:
+        """Return the system message for selecting the next speaker. This is always the *first* message in the context."""
+        if agents is None:
+            agents = self.agents
+
+        return f"""Hello everyone. We have assembled a great team today to answer questions and solve tasks. In attendance are:
+
+{self._participant_roles(agents)}
+"""
+
     def manual_select_speaker(self, agents: Optional[List[Agent]] = None) -> Union[Agent, None]:
         """Manually select the next speaker."""
         if agents is None:
@@ -535,6 +547,16 @@ def run_chat(
         message = messages[-1]
         speaker = sender
         groupchat = config
+        send_introductions = getattr(groupchat, "send_introductions", False)
+
+        if send_introductions:
+            # Broadcast the intro
+            intro = groupchat.introductions_msg()
+            for agent in groupchat.agents:
+                self.send(intro, agent, request_reply=False, silent=True)
+            # NOTE: We do not also append to groupchat.messages,
+            # since groupchat handles its own introductions
+
         if self.client_cache is not None:
             for a in groupchat.agents:
                 a.previous_cache = a.client_cache
@@ -598,6 +620,16 @@ async def a_run_chat(
         message = messages[-1]
         speaker = sender
         groupchat = config
+        send_introductions = getattr(groupchat, "send_introductions", False)
+
+        if send_introductions:
+            # Broadcast the intro
+            intro = groupchat.introductions_msg()
+            for agent in groupchat.agents:
+                self.a_send(intro, agent, request_reply=False, silent=True)
+            # NOTE: We do not also append to groupchat.messages,
+            # since groupchat handles its own introductions
+
         if self.client_cache is not None:
             for a in groupchat.agents:
                 a.previous_cache = a.client_cache
diff --git a/test/agentchat/test_groupchat.py b/test/agentchat/test_groupchat.py
index 8bb55fc0f66..51a4906c09c 100644
--- a/test/agentchat/test_groupchat.py
+++ b/test/agentchat/test_groupchat.py
@@ -448,6 +448,110 @@ def test_next_agent():
     assert groupchat.next_agent(agent4, [agent1, agent2, agent3]) == agent1
 
 
+def test_send_intros():
+    agent1 = autogen.ConversableAgent(
+        "alice",
+        description="The first agent.",
+        max_consecutive_auto_reply=10,
+        human_input_mode="NEVER",
+        llm_config=False,
+        default_auto_reply="This is alice speaking. TERMINATE",
+    )
+    agent2 = autogen.ConversableAgent(
+        "bob",
+        description="The second agent.",
+        max_consecutive_auto_reply=10,
+        human_input_mode="NEVER",
+        llm_config=False,
+        default_auto_reply="This is bob speaking. TERMINATE",
+    )
+    agent3 = autogen.ConversableAgent(
+        "sam",
+        description="The third agent.",
+        max_consecutive_auto_reply=10,
+        human_input_mode="NEVER",
+        llm_config=False,
+        default_auto_reply="This is sam speaking. TERMINATE",
+    )
+    agent4 = autogen.ConversableAgent(
+        "sally",
+        description="The fourth agent.",
+        max_consecutive_auto_reply=10,
+        human_input_mode="NEVER",
+        llm_config=False,
+        default_auto_reply="This is sally speaking. TERMINATE",
+    )
+
+    # Test empty is_termination_msg function
+    groupchat = autogen.GroupChat(
+        agents=[agent1, agent2, agent3],
+        messages=[],
+        speaker_selection_method="round_robin",
+        max_round=10,
+        send_introductions=True,
+    )
+
+    intro = groupchat.introductions_msg()
+    assert "The first agent." in intro
+    assert "The second agent." in intro
+    assert "The third agent." in intro
+    assert "The fourth agent." not in intro
+
+    intro = groupchat.introductions_msg([agent1, agent2, agent4])
+    assert "The first agent." in intro
+    assert "The second agent." in intro
+    assert "The third agent." not in intro
+    assert "The fourth agent." in intro
+
+    groupchat = autogen.GroupChat(
+        agents=[agent1, agent2, agent3],
+        messages=[],
+        speaker_selection_method="round_robin",
+        max_round=10,
+        send_introductions=True,
+    )
+
+    group_chat_manager = autogen.GroupChatManager(
+        groupchat=groupchat,
+        llm_config=False,
+        is_termination_msg=lambda x: x.get("content", "").rstrip().find("TERMINATE") >= 0,
+    )
+
+    group_chat_manager.initiate_chat(group_chat_manager, message="The initiating message.")
+    for a in [agent1, agent2, agent3]:
+        messages = agent1.chat_messages[group_chat_manager]
+        assert len(messages) == 3
+        assert "The first agent." in messages[0]["content"]
+        assert "The second agent." in messages[0]["content"]
+        assert "The third agent." in messages[0]["content"]
+        assert "The initiating message." == messages[1]["content"]
+        assert messages[2]["content"] == agent1._default_auto_reply
+
+    # Reset and start again
+    agent1.reset()
+    agent2.reset()
+    agent3.reset()
+    agent4.reset()
+
+    # Check the default (no introductions)
+    groupchat2 = autogen.GroupChat(
+        agents=[agent1, agent2, agent3], messages=[], speaker_selection_method="round_robin", max_round=10
+    )
+
+    group_chat_manager2 = autogen.GroupChatManager(
+        groupchat=groupchat2,
+        llm_config=False,
+        is_termination_msg=lambda x: x.get("content", "").rstrip().find("TERMINATE") >= 0,
+    )
+
+    group_chat_manager2.initiate_chat(group_chat_manager2, message="The initiating message.")
+    for a in [agent1, agent2, agent3]:
+        messages = agent1.chat_messages[group_chat_manager2]
+        assert len(messages) == 2
+        assert "The initiating message." == messages[0]["content"]
+        assert messages[1]["content"] == agent1._default_auto_reply
+
+
 def test_selection_helpers():
     agent1 = autogen.ConversableAgent(
         "alice",
@@ -814,6 +918,7 @@ def chat(gc_manager: autogen.GroupChatManager):
     # test_agent_mentions()
     # test_termination()
     # test_next_agent()
+    test_send_intros()
     # test_invalid_allow_repeat_speaker()
     # test_graceful_exit_before_max_round()
-    test_clear_agents_history()
+    # test_clear_agents_history()

From 085bf6cf3d1f848877ab24ac3aeaf5f37cbc9d46 Mon Sep 17 00:00:00 2001
From: afourney <adamfo@microsoft.com>
Date: Sat, 24 Feb 2024 10:12:57 -0800
Subject: [PATCH 6/7] Version 0.0.2 of Autogenbench (#1548)

* Prints the version of AutoGenBench from the command line, closing i1458

* Added autogenbench version to timestamp.txt

* Attempting to fix formatting.

* Add a gitignore for autogenbench

* Generalize to read all template dirs from Templates

* AutoGenBench logs telemetry when available.

* Remove spaces if present from template names.

* Bump version.

* Fixed formatting.

* Allow native warning to be skipped. Mount autogen repo in Docker if it can be found (experimental).

* Native execution now occurs in a venv.

* Bump version.

* Fixed a prompt escaping bug evident in GAIA task '6f37996b-2ac7-44b0-8e68-6d28256631b4'

* Updated all scenarios to use template discovery.

* Update with main version of runtime_logging.

---------

Co-authored-by: gagb <gagb@users.noreply.github.com>
---
 samples/tools/autogenbench/.gitignore         |   3 +
 .../tools/autogenbench/autogenbench/cli.py    |  11 ++
 .../autogenbench/autogenbench/run_cmd.py      | 120 +++++++++++++++---
 .../autogenbench/template/testbed_utils.py    |  17 +++
 .../autogenbench/autogenbench/version.py      |   2 +-
 samples/tools/autogenbench/pyproject.toml     |   5 +
 .../scenarios/AutoGPT/Scripts/init_tasks.py   |   8 +-
 .../autogenbench/scenarios/GAIA/MANIFEST.json |   4 +-
 .../scenarios/GAIA/Scripts/init_tasks.py      |  13 +-
 .../GAIA/Templates/BasicTwoAgents/prompt.txt  |   1 +
 .../GAIA/Templates/BasicTwoAgents/scenario.py |   8 +-
 .../GAIA/Templates/SocietyOfMind/prompt.txt   |   1 +
 .../GAIA/Templates/SocietyOfMind/scenario.py  |  15 ++-
 .../scenarios/HumanEval/Scripts/init_tasks.py |  33 +++--
 .../scenarios/MATH/Scripts/init_tasks.py      |  12 +-
 15 files changed, 201 insertions(+), 52 deletions(-)
 create mode 100644 samples/tools/autogenbench/.gitignore
 create mode 100644 samples/tools/autogenbench/scenarios/GAIA/Templates/BasicTwoAgents/prompt.txt
 create mode 100644 samples/tools/autogenbench/scenarios/GAIA/Templates/SocietyOfMind/prompt.txt

diff --git a/samples/tools/autogenbench/.gitignore b/samples/tools/autogenbench/.gitignore
new file mode 100644
index 00000000000..2eccb6f6c69
--- /dev/null
+++ b/samples/tools/autogenbench/.gitignore
@@ -0,0 +1,3 @@
+scenarios/*/Downloads
+scenarios/*/Tasks
+*/Results
diff --git a/samples/tools/autogenbench/autogenbench/cli.py b/samples/tools/autogenbench/autogenbench/cli.py
index dd0ebd70ea7..6b27a8aeba4 100644
--- a/samples/tools/autogenbench/autogenbench/cli.py
+++ b/samples/tools/autogenbench/autogenbench/cli.py
@@ -1,4 +1,5 @@
 import sys
+from .version import __version__
 from .run_cmd import run_cli
 from .clone_cmd import clone_cli
 from .tabulate_cmd import tabulate_cli
@@ -9,6 +10,7 @@ def main(args=None):
         args = sys.argv[:]  # Shallow copy
 
     invocation_cmd = "autogenbench"
+    version_string = f"AutoGenBench version {__version__}"
 
     commands = [
         {
@@ -26,6 +28,11 @@ def main(args=None):
             "description": "tabulate the results of a previous run",
             "function": tabulate_cli,
         },
+        {
+            "command": "--version",
+            "description": f"print the version of {invocation_cmd}",
+            "function": lambda _args: print(f"{version_string}"),
+        },
         {"command": "--help", "description": "print this message", "function": None},
     ]
 
@@ -40,6 +47,8 @@ def main(args=None):
         commands_details += f"    {padded_cmd}: {c['description']}\n"
 
     usage_text = f"""
+{version_string}
+
 usage: {invocation_cmd} COMMAND ARGS
 
 Where, COMMAND is one of: {commands_list}
@@ -49,6 +58,8 @@ def main(args=None):
 """.strip()
 
     help_text = f"""
+{version_string}
+
 usage: {invocation_cmd} COMMAND ARGS
 
 {invocation_cmd} is a tool for running and managing AutoGen benchmark scenarios. A typically session might resemble:
diff --git a/samples/tools/autogenbench/autogenbench/run_cmd.py b/samples/tools/autogenbench/autogenbench/run_cmd.py
index c29f064d56e..d4f6d3face3 100644
--- a/samples/tools/autogenbench/autogenbench/run_cmd.py
+++ b/samples/tools/autogenbench/autogenbench/run_cmd.py
@@ -11,6 +11,7 @@
 import random
 from autogen import config_list_from_json
 from autogen.oai.openai_utils import filter_config
+from .version import __version__
 
 # Figure out where everything is
 SCRIPT_PATH = os.path.realpath(__file__)
@@ -247,17 +248,25 @@ def get_scenario_env(config_list, env_file=DEFAULT_ENV_FILE):
     Returns: A dictionary of keys and values that need to be added to the system environment.
     """
     env = dict()
+
+    # Populate with commonly needed keys
+    openai_api_key = os.environ.get("OPENAI_API_KEY")
+    if openai_api_key is not None and len(openai_api_key.strip()) > 0:
+        env["OPENAI_API_KEY"] = openai_api_key
+
+    bing_api_key = os.environ.get("BING_API_KEY")
+    if bing_api_key is not None and len(bing_api_key.strip()) > 0:
+        env["BING_API_KEY"] = bing_api_key
+
+    # Update with any values from the ENV.json file
     if os.path.isfile(env_file):
         with open(env_file, "rt") as fh:
-            env = json.loads(fh.read())
+            env.update(json.loads(fh.read()))
 
+    # Include the config_list that we are using
     config_list_json = json.dumps(config_list)
     env["OAI_CONFIG_LIST"] = config_list_json
 
-    openai_api_key = os.environ.get("OPENAI_API_KEY")
-    if openai_api_key is not None and len(openai_api_key.strip()) > 0:
-        env["OPENAI_API_KEY"] = openai_api_key
-
     return env
 
 
@@ -286,6 +295,12 @@ def run_scenario_natively(work_dir, env, timeout=TASK_TIMEOUT):
             f"""#
 echo RUN.SH STARTING !#!#
 export AUTOGEN_TESTBED_SETTING="Native"
+echo "autogenbench version: {__version__}" > timestamp.txt
+
+# Create and activate the virtual environment
+# This is called in a subprocess, and will not impact the parent
+{sys.executable} -m venv .autogenbench_venv
+. .autogenbench_venv/bin/activate
 
 # Run the global init script if it exists
 if [ -f global_init.sh ] ; then
@@ -298,6 +313,7 @@ def run_scenario_natively(work_dir, env, timeout=TASK_TIMEOUT):
 fi
 
 # Run the scenario
+pip install -r requirements.txt
 echo SCENARIO.PY STARTING !#!#
 timeout --preserve-status --kill-after {timeout  + 30}s {timeout}s python scenario.py
 EXIT_CODE=$?
@@ -312,6 +328,10 @@ def run_scenario_natively(work_dir, env, timeout=TASK_TIMEOUT):
     rm -Rf .cache
 fi
 
+if [ -d __pycache__ ] ; then
+    rm -Rf __pycache__
+fi
+
 # Run the scenario finalize script if it exists
 if [ -f scenario_finalize.sh ] ; then
     . ./scenario_finalize.sh
@@ -322,6 +342,12 @@ def run_scenario_natively(work_dir, env, timeout=TASK_TIMEOUT):
     . ./global_finalize.sh
 fi
 
+# We don't need to deactivate the venv because it's
+# contained in the subprocess; but we should clean it up
+if [ -d .autogenbench_venv ] ; then
+    rm -Rf .autogenbench_venv
+fi
+
 echo RUN.SH COMPLETE !#!#
 """
         )
@@ -387,7 +413,9 @@ def run_scenario_in_docker(work_dir, env, timeout=TASK_TIMEOUT, docker_image=Non
             f"""#
 echo RUN.SH STARTING !#!#
 export AUTOGEN_TESTBED_SETTING="Docker"
+
 umask 000
+echo "autogenbench version: {__version__}" > timestamp.txt
 
 # Run the global init script if it exists
 if [ -f global_init.sh ] ; then
@@ -415,6 +443,10 @@ def run_scenario_in_docker(work_dir, env, timeout=TASK_TIMEOUT, docker_image=Non
     rm -Rf .cache
 fi
 
+if [ -d __pycache__ ] ; then
+    rm -Rf __pycache__
+fi
+
 # Run the scenario finalize script if it exists
 if [ -f scenario_finalize.sh ] ; then
     . ./scenario_finalize.sh
@@ -429,18 +461,31 @@ def run_scenario_in_docker(work_dir, env, timeout=TASK_TIMEOUT, docker_image=Non
 """
         )
 
-    print("\n\n" + work_dir + "\n===================================================================")
+    # Figure out what folders to mount
+    volumes = {str(pathlib.Path(work_dir).absolute()): {"bind": "/workspace", "mode": "rw"}}
+
+    # Add the autogen repo if we can find it
+    autogen_repo_base = os.environ.get("AUTOGENBENCH_REPO_BASE")
+    if autogen_repo_base is None:
+        autogen_repo_base = find_autogen_repo(os.getcwd())
+    elif not os.path.isdir(autogen_repo_base):
+        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), autogen_repo_base)
+
+    if autogen_repo_base is not None:
+        volumes[str(pathlib.Path(autogen_repo_base).absolute())] = {"bind": "/autogen", "mode": "rw"}
+
+    print("Mounting:")
+    for k in volumes:
+        bind = volumes[k]["bind"]
+        mode = volumes[k]["mode"].upper()
+        if bind == "/workspace":
+            k = os.path.relpath(k)
+        print(f"[{mode}]\t'{k}' => '{bind}'")
+    print("===================================================================")
 
     # Create and run the container
-    abs_path = str(pathlib.Path(work_dir).absolute())
     container = client.containers.run(
-        image,
-        command=["sh", "run.sh"],
-        working_dir="/workspace",
-        environment=env,
-        detach=True,
-        # get absolute path to the working directory
-        volumes={abs_path: {"bind": "/workspace", "mode": "rw"}},
+        image, command=["sh", "run.sh"], working_dir="/workspace", environment=env, detach=True, volumes=volumes
     )
 
     # Read the logs in a streaming fashion. Keep an eye on the time to make sure we don't need to stop.
@@ -485,6 +530,34 @@ def build_default_docker_image(docker_client, image_tag):
             sys.stdout.write(segment["stream"])
 
 
+def find_autogen_repo(path):
+    """
+    Utility for identifying if the path is a subdirectory of the autogen repo.
+
+    Returns: the path to the root of the autogen repo if one is found, otherwise None
+    """
+
+    # Normalize the path (we expect a directory)
+    path = os.path.abspath(path)
+    if os.path.isfile(path):
+        path = os.path.dirname(path)
+
+    while True:
+        test_path = os.path.join(path, "autogen", "agentchat", "conversable_agent.py")  # We found autogen
+        if os.path.isfile(test_path):
+            return path
+
+        # Stop if we hit the root
+        parent_dir = os.path.abspath(os.path.join(path, os.pardir))
+        if parent_dir == path:
+            break
+
+        # Keep searching
+        path = parent_dir
+
+    return None
+
+
 def run_cli(args):
     invocation_cmd = args[0]
     args = args[1:]
@@ -581,12 +654,23 @@ def run_cli(args):
         if parsed_args.requirements is not None:
             sys.exit("--requirements is not compatible with --native. Exiting.")
 
-        choice = input(
-            'WARNING: Running natively, without Docker, not only poses the usual risks of executing arbitrary AI generated code on your machine, it also makes it impossible to ensure that each test starts from a known and consistent set of initial conditions. For example, if the agents spend time debugging and installing Python libraries to solve the task, then those libraries will be available to all other runs. In other words, earlier runs can influence later runs, leading to many confounds in testing.\n\nAre you absolutely sure you want to continue with native execution? Type "Yes" exactly, and in full, to proceed: '
+        sys.stderr.write(
+            "WARNING: Running natively, without Docker, not only poses the usual risks of executing arbitrary AI generated code on your machine, it also makes it impossible to ensure that each test starts from a known and consistent set of initial conditions. For example, if the agents spend time debugging and installing Python libraries to solve the task, then those libraries will be available to all other runs. In other words, earlier runs can influence later runs, leading to many confounds in testing.\n\n"
         )
 
-        if choice.strip().lower() != "yes":
-            sys.exit("Received '" + choice + "'. Exiting.")
+        # Does an environment variable override the prompt?
+        allow_native = os.environ.get("AUTOGENBENCH_ALLOW_NATIVE")
+        if allow_native is None or allow_native == "":
+            choice = input(
+                'Are you absolutely sure you want to continue with native execution? Type "Yes" exactly, and in full, to proceed: '
+            )
+            if choice.strip().lower() != "yes":
+                sys.exit("Received '" + choice + "'. Exiting.")
+        elif allow_native.strip().lower() != "yes":
+            sys.exit(f"Exiting because AUTOGENBENCH_ALLOW_NATIVE is '{allow_native}'\n")
+        else:
+            sys.stderr.write(f"Continuing because AUTOGENBENCH_ALLOW_NATIVE is '{allow_native}'\n")
+            time.sleep(0.75)  # Pause very briefly so the message isn't lost in the noise
 
     # Parse the subsample
     subsample = None
diff --git a/samples/tools/autogenbench/autogenbench/template/testbed_utils.py b/samples/tools/autogenbench/autogenbench/template/testbed_utils.py
index bb435c5536c..a9a95615831 100644
--- a/samples/tools/autogenbench/autogenbench/template/testbed_utils.py
+++ b/samples/tools/autogenbench/autogenbench/template/testbed_utils.py
@@ -6,6 +6,15 @@
 
 AUTOGEN_VERSION = packaging.version.parse(autogen.__version__)
 
+# Try importing the runtime_logging module (only available in some branches)
+LOGGING_ENABLED = False
+try:
+    import autogen.runtime_logging
+
+    LOGGING_ENABLED = True
+except ImportError:
+    pass
+
 
 def default_llm_config(config_list, timeout=180):
     """Return a default config list with a given timeout, and with caching disabled.
@@ -57,6 +66,10 @@ def init():
     if AUTOGEN_VERSION < packaging.version.parse("0.2.0b1"):
         autogen.Completion.start_logging(compact=False)
 
+    # Start logging
+    if LOGGING_ENABLED:
+        autogen.runtime_logging.start(config={"dbname": "telemetry.db"})
+
 
 def finalize(agents):
     """Helper function to finalize logging in a testbed scenario.
@@ -89,3 +102,7 @@ def messages_to_json(agent):
         with open(os.path.join(script_dir, "completion_log.json"), "wt") as fh:
             fh.write(json.dumps(autogen.Completion.logged_history, indent=4))
         autogen.Completion.stop_logging()
+
+    # Stop logging
+    if LOGGING_ENABLED:
+        autogen.runtime_logging.stop()
diff --git a/samples/tools/autogenbench/autogenbench/version.py b/samples/tools/autogenbench/autogenbench/version.py
index f102a9cadfa..5f0b332cb55 100644
--- a/samples/tools/autogenbench/autogenbench/version.py
+++ b/samples/tools/autogenbench/autogenbench/version.py
@@ -1 +1 @@
-__version__ = "0.0.1"
+__version__ = "0.0.2a4"
diff --git a/samples/tools/autogenbench/pyproject.toml b/samples/tools/autogenbench/pyproject.toml
index 339217691d9..8cabc4b55e6 100644
--- a/samples/tools/autogenbench/pyproject.toml
+++ b/samples/tools/autogenbench/pyproject.toml
@@ -47,3 +47,8 @@ exclude = ["*.tests*"]
 
 [project.scripts]
 autogenbench = "autogenbench.cli:main"
+
+[tool.black]
+# https://github.com/psf/black
+line-length = 120
+exclude = "(.eggs|.git|.hg|.mypy_cache|.venv|_build|buck-out|build|dist)"
diff --git a/samples/tools/autogenbench/scenarios/AutoGPT/Scripts/init_tasks.py b/samples/tools/autogenbench/scenarios/AutoGPT/Scripts/init_tasks.py
index 00a6d15ef77..2f5ba5f40e8 100644
--- a/samples/tools/autogenbench/scenarios/AutoGPT/Scripts/init_tasks.py
+++ b/samples/tools/autogenbench/scenarios/AutoGPT/Scripts/init_tasks.py
@@ -8,6 +8,7 @@
 import sys
 import glob
 import base64
+import re
 from huggingface_hub import snapshot_download
 
 SCRIPT_PATH = os.path.realpath(__file__)
@@ -88,7 +89,12 @@ def create_jsonl(name, template):
 
 ###############################################################################
 def main():
-    templates = {"two_agents": os.path.join(TEMPLATES_DIR, "TwoAgents")}
+    # list all directories in the Templates directory
+    # and populate a dictionary with the name and path
+    templates = {}
+    for entry in os.scandir(TEMPLATES_DIR):
+        if entry.is_dir():
+            templates[re.sub(r"\s", "", entry.name)] = entry.path
 
     # Add coding directories if needed (these are usually empty and left out of the repo)
     for template in templates.values():
diff --git a/samples/tools/autogenbench/scenarios/GAIA/MANIFEST.json b/samples/tools/autogenbench/scenarios/GAIA/MANIFEST.json
index 807ec57bdc3..02c829f25c4 100644
--- a/samples/tools/autogenbench/scenarios/GAIA/MANIFEST.json
+++ b/samples/tools/autogenbench/scenarios/GAIA/MANIFEST.json
@@ -4,9 +4,11 @@
         "Scripts/init_tasks.py": "Scripts/init_tasks.py",
         "Scripts/custom_tabulate.py": "Scripts/custom_tabulate.py",
         "Templates/BasicTwoAgents/expected_answer.txt": "Templates/BasicTwoAgents/expected_answer.txt",
+        "Templates/BasicTwoAgents/prompt.txt": "Templates/BasicTwoAgents/prompt.txt",
         "Templates/BasicTwoAgents/scenario.py": "Templates/BasicTwoAgents/scenario.py",
-        "Templates/SocietyOfMind/scenario.py": "Templates/SocietyOfMind/scenario.py",
         "Templates/SocietyOfMind/expected_answer.txt": "Templates/SocietyOfMind/expected_answer.txt",
+        "Templates/SocietyOfMind/prompt.txt": "Templates/SocietyOfMind/prompt.txt",
+        "Templates/SocietyOfMind/scenario.py": "Templates/SocietyOfMind/scenario.py",
         "Templates/SocietyOfMind/requirements.txt": "Templates/SocietyOfMind/requirements.txt"
     }
 }
diff --git a/samples/tools/autogenbench/scenarios/GAIA/Scripts/init_tasks.py b/samples/tools/autogenbench/scenarios/GAIA/Scripts/init_tasks.py
index 3ff483af181..61e2864a253 100644
--- a/samples/tools/autogenbench/scenarios/GAIA/Scripts/init_tasks.py
+++ b/samples/tools/autogenbench/scenarios/GAIA/Scripts/init_tasks.py
@@ -6,6 +6,7 @@
 import json
 import os
 import sys
+import re
 from huggingface_hub import snapshot_download
 
 SCRIPT_PATH = os.path.realpath(__file__)
@@ -60,9 +61,9 @@ def create_jsonl(name, tasks, files_dir, template):
                 "substitutions": {
                     "scenario.py": {
                         "__FILE_NAME__": task["file_name"],
-                        "__PROMPT__": task["Question"],
                     },
                     "expected_answer.txt": {"__EXPECTED_ANSWER__": task["Final answer"]},
+                    "prompt.txt": {"__PROMPT__": task["Question"]},
                 },
             }
 
@@ -97,10 +98,12 @@ def main():
 
             gaia_test_tasks[data["Level"] - 1].append(data)
 
-    templates = {
-        "two_agents": os.path.join(TEMPLATES_DIR, "BasicTwoAgents"),
-        "soc": os.path.join(TEMPLATES_DIR, "SocietyOfMind"),
-    }
+    # list all directories in the Templates directory
+    # and populate a dictionary with the name and path
+    templates = {}
+    for entry in os.scandir(TEMPLATES_DIR):
+        if entry.is_dir():
+            templates[re.sub(r"\s", "", entry.name)] = entry.path
 
     # Add coding directories if needed (these are usually empty and left out of the repo)
     for template in templates.values():
diff --git a/samples/tools/autogenbench/scenarios/GAIA/Templates/BasicTwoAgents/prompt.txt b/samples/tools/autogenbench/scenarios/GAIA/Templates/BasicTwoAgents/prompt.txt
new file mode 100644
index 00000000000..482f50dca31
--- /dev/null
+++ b/samples/tools/autogenbench/scenarios/GAIA/Templates/BasicTwoAgents/prompt.txt
@@ -0,0 +1 @@
+__PROMPT__
diff --git a/samples/tools/autogenbench/scenarios/GAIA/Templates/BasicTwoAgents/scenario.py b/samples/tools/autogenbench/scenarios/GAIA/Templates/BasicTwoAgents/scenario.py
index 5ca7b0a2814..3f3f53f18f6 100644
--- a/samples/tools/autogenbench/scenarios/GAIA/Templates/BasicTwoAgents/scenario.py
+++ b/samples/tools/autogenbench/scenarios/GAIA/Templates/BasicTwoAgents/scenario.py
@@ -7,6 +7,10 @@
 testbed_utils.init()
 ##############################
 
+# Read the prompt
+PROMPT = ""
+with open("prompt.txt", "rt") as fh:
+    PROMPT = fh.read().strip()
 
 GAIA_SYSTEM_MESSAGE = (
     "You are a helpful AI assistant, and today's date is "
@@ -48,9 +52,7 @@
 )
 
 filename = "__FILE_NAME__".strip()
-question = """
-__PROMPT__
-""".strip()
+question = PROMPT
 
 if len(filename) > 0:
     question = f"Consider the file '{filename}', which can be read from the current working directory. If you need to read or write it, output python code in a code block (```python) to do so. {question}"
diff --git a/samples/tools/autogenbench/scenarios/GAIA/Templates/SocietyOfMind/prompt.txt b/samples/tools/autogenbench/scenarios/GAIA/Templates/SocietyOfMind/prompt.txt
new file mode 100644
index 00000000000..482f50dca31
--- /dev/null
+++ b/samples/tools/autogenbench/scenarios/GAIA/Templates/SocietyOfMind/prompt.txt
@@ -0,0 +1 @@
+__PROMPT__
diff --git a/samples/tools/autogenbench/scenarios/GAIA/Templates/SocietyOfMind/scenario.py b/samples/tools/autogenbench/scenarios/GAIA/Templates/SocietyOfMind/scenario.py
index 129c898e47f..bacd22e096c 100644
--- a/samples/tools/autogenbench/scenarios/GAIA/Templates/SocietyOfMind/scenario.py
+++ b/samples/tools/autogenbench/scenarios/GAIA/Templates/SocietyOfMind/scenario.py
@@ -15,6 +15,11 @@
 testbed_utils.init()
 ##############################
 
+# Read the prompt
+PROMPT = ""
+with open("prompt.txt", "rt") as fh:
+    PROMPT = fh.read().strip()
+
 config_list = autogen.config_list_from_json(
     "OAI_CONFIG_LIST",
     filter_dict={"model": ["gpt-4"]},
@@ -46,9 +51,9 @@ def response_preparer(inner_messages):
     messages = [
         {
             "role": "user",
-            "content": """Earlier you were asked the following:
+            "content": f"""Earlier you were asked the following:
 
-__PROMPT__
+{PROMPT}
 
 Your team then worked diligently to address that request. Here is a transcript of that conversation:""",
         }
@@ -69,10 +74,10 @@ def response_preparer(inner_messages):
     messages.append(
         {
             "role": "user",
-            "content": """
+            "content": f"""
 Read the above conversation and output a FINAL ANSWER to the question. The question is repeated here for convenience:
 
-__PROMPT__
+{PROMPT}
 
 To output the final answer, use the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
 YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
@@ -140,7 +145,7 @@ def response_preparer(inner_messages):
 question = f"""
 Below I will pose a question to you that I would like you to answer. You should begin by listing all the relevant facts necessary to derive an answer, then fill in those facts from memory where possible, including specific names, numbers and statistics. You are Ken Jennings-level with trivia, and Mensa-level with puzzles, so there should be a deep well to draw from. After listing the facts, begin to solve the question in earnest. Here is the question:
 
-{filename_prompt}__PROMPT__
+{filename_prompt}{PROMPT}
 """.strip()
 
 groupchat = GroupChatModerator(
diff --git a/samples/tools/autogenbench/scenarios/HumanEval/Scripts/init_tasks.py b/samples/tools/autogenbench/scenarios/HumanEval/Scripts/init_tasks.py
index 799ac7b170c..04480f5d2a9 100644
--- a/samples/tools/autogenbench/scenarios/HumanEval/Scripts/init_tasks.py
+++ b/samples/tools/autogenbench/scenarios/HumanEval/Scripts/init_tasks.py
@@ -8,6 +8,7 @@
 import io
 import json
 import os
+import re
 import base64
 
 URL = "https://github.com/openai/human-eval/raw/master/data/HumanEval.jsonl.gz"
@@ -16,7 +17,13 @@
 SCRIPT_NAME = os.path.basename(SCRIPT_PATH)
 SCRIPT_DIR = os.path.dirname(SCRIPT_PATH)
 
+SCENARIO_DIR = os.path.realpath(os.path.join(SCRIPT_DIR, os.path.pardir))
+TEMPLATES_DIR = os.path.join(SCENARIO_DIR, "Templates")
+TASKS_DIR = os.path.join(SCENARIO_DIR, "Tasks")
+
 # A selected subset of HumanEval problems to work with during development
+
+# Deprecated 2/5/2024 -- Use subsample instead
 REDUCED_SET = [
     "HumanEval/2",
     "HumanEval/26",
@@ -73,19 +80,17 @@ def create_jsonl(name, tasks, template):
     """Creates a JSONL scenario file with a given name, list of HumanEval tasks, and template path."""
 
     # Create a task directory if it doesn't exist
-    scenario_dir = os.path.realpath(os.path.join(SCRIPT_DIR, os.path.pardir))
-    task_dir = os.path.join(scenario_dir, "Tasks")
-    if not os.path.isdir(task_dir):
-        os.mkdir(task_dir)
+    if not os.path.isdir(TASKS_DIR):
+        os.mkdir(TASKS_DIR)
 
     # Create the jsonl file
-    with open(os.path.join(task_dir, name + ".jsonl"), "wt") as fh:
+    with open(os.path.join(TASKS_DIR, name + ".jsonl"), "wt") as fh:
         for task in tasks:
             print(f"Converting: [{name}] {task['task_id']}")
 
             record = {
                 "id": task["task_id"].replace("/", "_"),
-                "template": os.path.join(os.path.pardir, template),
+                "template": template,
                 "substitutions": {
                     "scenario.py": {
                         "__ENTRY_POINT__": task["entry_point"],
@@ -102,19 +107,19 @@ def create_jsonl(name, tasks, template):
 ###############################################################################
 def main():
     human_eval = download_human_eval()
-    reduced_human_eval = [t for t in human_eval if t["task_id"] in REDUCED_SET]
+    # Deprecated: reduced_human_eval = [t for t in human_eval if t["task_id"] in REDUCED_SET]
 
-    templates = {
-        "two_agents": "Templates/TwoAgents",
-        # "gc3_distractor": "Templates/GroupChatThreeAgents_Distractor",
-        # "gc3_guardrails": "Templates/GroupChatThreeAgents_Guardrails",
-        # "gc4": "Templates/GroupChatFourAgents",
-    }
+    # list all directories in the Templates directory
+    # and populate a dictionary with the name and path
+    templates = {}
+    for entry in os.scandir(TEMPLATES_DIR):
+        if entry.is_dir():
+            templates[re.sub(r"\s", "", entry.name)] = entry.path
 
     # Create the various combinations of [models] x [templates]
     for t in templates.items():
         create_jsonl(f"human_eval_{t[0]}", human_eval, t[1])
-        create_jsonl(f"r_human_eval_{t[0]}", reduced_human_eval, t[1])
+        # Deprecated: create_jsonl(f"r_human_eval_{t[0]}", reduced_human_eval, t[1])
 
 
 if __name__ == "__main__" and __package__ is None:
diff --git a/samples/tools/autogenbench/scenarios/MATH/Scripts/init_tasks.py b/samples/tools/autogenbench/scenarios/MATH/Scripts/init_tasks.py
index 16545c8e5d0..8b2d07995e2 100644
--- a/samples/tools/autogenbench/scenarios/MATH/Scripts/init_tasks.py
+++ b/samples/tools/autogenbench/scenarios/MATH/Scripts/init_tasks.py
@@ -8,6 +8,7 @@
 import io
 import json
 import os
+import re
 import sys
 
 URL = "https://people.eecs.berkeley.edu/~hendrycks/MATH.tar"
@@ -91,7 +92,7 @@ def create_jsonl(name, problems, template):
 
             record = {
                 "id": task_id,
-                "template": os.path.join(os.path.pardir, template),
+                "template": template,
                 "substitutions": {
                     "prompt.txt": {"__PROMPT__": data["problem"]},
                     "expected_answer.txt": {"__ANSWER__": data["solution"]},
@@ -105,9 +106,12 @@ def create_jsonl(name, problems, template):
 def main():
     problems = download_math()
 
-    templates = {
-        "two_agents": "Templates/TwoAgents",
-    }
+    # list all directories in the Templates directory
+    # and populate a dictionary with the name and path
+    templates = {}
+    for entry in os.scandir(TEMPLATES_DIR):
+        if entry.is_dir():
+            templates[re.sub(r"\s", "", entry.name)] = entry.path
 
     for t in templates.items():
         create_jsonl(f"math_{t[0]}", problems, t[1])

From 8ec1c3e0b3aeb130eab21e682f2c4fe393706eb6 Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Sun, 25 Feb 2024 07:57:27 -0800
Subject: [PATCH 7/7] process message before send (#1783)

* process message before send

* rename
---
 .../contrib/capabilities/context_handling.py  |  2 +-
 .../contrib/capabilities/teachability.py      |  4 +--
 autogen/agentchat/conversable_agent.py        | 33 ++++++++++++++-----
 test/agentchat/test_conversable_agent.py      | 21 +++++++++++-
 4 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/autogen/agentchat/contrib/capabilities/context_handling.py b/autogen/agentchat/contrib/capabilities/context_handling.py
index d12af605164..ebbc00e1097 100644
--- a/autogen/agentchat/contrib/capabilities/context_handling.py
+++ b/autogen/agentchat/contrib/capabilities/context_handling.py
@@ -46,7 +46,7 @@ def add_to_agent(self, agent: ConversableAgent):
         """
         Adds TransformChatHistory capability to the given agent.
         """
-        agent.register_hook(hookable_method="process_all_messages", hook=self._transform_messages)
+        agent.register_hook(hookable_method="process_all_messages_before_reply", hook=self._transform_messages)
 
     def _transform_messages(self, messages: List[Dict]) -> List[Dict]:
         """
diff --git a/autogen/agentchat/contrib/capabilities/teachability.py b/autogen/agentchat/contrib/capabilities/teachability.py
index c5c959da8d8..e90612fa53b 100644
--- a/autogen/agentchat/contrib/capabilities/teachability.py
+++ b/autogen/agentchat/contrib/capabilities/teachability.py
@@ -61,7 +61,7 @@ def add_to_agent(self, agent: ConversableAgent):
         self.teachable_agent = agent
 
         # Register a hook for processing the last message.
-        agent.register_hook(hookable_method="process_last_message", hook=self.process_last_message)
+        agent.register_hook(hookable_method="process_last_received_message", hook=self.process_last_received_message)
 
         # Was an llm_config passed to the constructor?
         if self.llm_config is None:
@@ -82,7 +82,7 @@ def prepopulate_db(self):
         """Adds a few arbitrary memos to the DB."""
         self.memo_store.prepopulate()
 
-    def process_last_message(self, text):
+    def process_last_received_message(self, text):
         """
         Appends any relevant memos to the message text, and stores any apparent teachings in new memos.
         Uses TextAnalyzerAgent to make decisions about memo storage and retrieval.
diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py
index a3726678a45..b31c8ce786d 100644
--- a/autogen/agentchat/conversable_agent.py
+++ b/autogen/agentchat/conversable_agent.py
@@ -223,7 +223,11 @@ def __init__(
 
         # Registered hooks are kept in lists, indexed by hookable method, to be called in their order of registration.
         # New hookable methods should be added to this list as required to support new agent capabilities.
-        self.hook_lists = {"process_last_message": [], "process_all_messages": []}
+        self.hook_lists = {
+            "process_last_received_message": [],
+            "process_all_messages_before_reply": [],
+            "process_message_before_send": [],
+        }
 
     @property
     def name(self) -> str:
@@ -467,6 +471,15 @@ def _append_oai_message(self, message: Union[Dict, str], role, conversation_id:
         self._oai_messages[conversation_id].append(oai_message)
         return True
 
+    def _process_message_before_send(
+        self, message: Union[Dict, str], recipient: Agent, silent: bool
+    ) -> Union[Dict, str]:
+        """Process the message before sending it to the recipient."""
+        hook_list = self.hook_lists["process_message_before_send"]
+        for hook in hook_list:
+            message = hook(message, recipient, silent)
+        return message
+
     def send(
         self,
         message: Union[Dict, str],
@@ -509,6 +522,7 @@ def send(
         Returns:
             ChatResult: a ChatResult object.
         """
+        message = self._process_message_before_send(message, recipient, silent)
         # When the agent composes and sends the message, the role of the message is "assistant"
         # unless it's "function".
         valid = self._append_oai_message(message, "assistant", recipient)
@@ -561,6 +575,7 @@ async def a_send(
         Returns:
             ChatResult: an ChatResult object.
         """
+        message = self._process_message_before_send(message, recipient, silent)
         # When the agent composes and sends the message, the role of the message is "assistant"
         # unless it's "function".
         valid = self._append_oai_message(message, "assistant", recipient)
@@ -1634,11 +1649,11 @@ def generate_reply(
 
         # Call the hookable method that gives registered hooks a chance to process all messages.
         # Message modifications do not affect the incoming messages or self._oai_messages.
-        messages = self.process_all_messages(messages)
+        messages = self.process_all_messages_before_reply(messages)
 
         # Call the hookable method that gives registered hooks a chance to process the last message.
         # Message modifications do not affect the incoming messages or self._oai_messages.
-        messages = self.process_last_message(messages)
+        messages = self.process_last_received_message(messages)
 
         for reply_func_tuple in self._reply_func_list:
             reply_func = reply_func_tuple["reply_func"]
@@ -1695,11 +1710,11 @@ async def a_generate_reply(
 
         # Call the hookable method that gives registered hooks a chance to process all messages.
         # Message modifications do not affect the incoming messages or self._oai_messages.
-        messages = self.process_all_messages(messages)
+        messages = self.process_all_messages_before_reply(messages)
 
         # Call the hookable method that gives registered hooks a chance to process the last message.
         # Message modifications do not affect the incoming messages or self._oai_messages.
-        messages = self.process_last_message(messages)
+        messages = self.process_last_received_message(messages)
 
         for reply_func_tuple in self._reply_func_list:
             reply_func = reply_func_tuple["reply_func"]
@@ -2333,11 +2348,11 @@ def register_hook(self, hookable_method: str, hook: Callable):
         assert hook not in hook_list, f"{hook} is already registered as a hook."
         hook_list.append(hook)
 
-    def process_all_messages(self, messages: List[Dict]) -> List[Dict]:
+    def process_all_messages_before_reply(self, messages: List[Dict]) -> List[Dict]:
         """
         Calls any registered capability hooks to process all messages, potentially modifying the messages.
         """
-        hook_list = self.hook_lists["process_all_messages"]
+        hook_list = self.hook_lists["process_all_messages_before_reply"]
         # If no hooks are registered, or if there are no messages to process, return the original message list.
         if len(hook_list) == 0 or messages is None:
             return messages
@@ -2348,14 +2363,14 @@ def process_all_messages(self, messages: List[Dict]) -> List[Dict]:
             processed_messages = hook(processed_messages)
         return processed_messages
 
-    def process_last_message(self, messages):
+    def process_last_received_message(self, messages):
         """
         Calls any registered capability hooks to use and potentially modify the text of the last message,
         as long as the last message is not a function call or exit command.
         """
 
         # If any required condition is not met, return the original message list.
-        hook_list = self.hook_lists["process_last_message"]
+        hook_list = self.hook_lists["process_last_received_message"]
         if len(hook_list) == 0:
             return messages  # No hooks registered.
         if messages is None:
diff --git a/test/agentchat/test_conversable_agent.py b/test/agentchat/test_conversable_agent.py
index b71a6341c87..2a5eaf5f5bb 100644
--- a/test/agentchat/test_conversable_agent.py
+++ b/test/agentchat/test_conversable_agent.py
@@ -1074,6 +1074,24 @@ def test_max_turn():
     assert len(res.chat_history) <= 6
 
 
+def test_process_before_send():
+    print_mock = unittest.mock.MagicMock()
+
+    def send_to_frontend(message, recipient, silent):
+        if not silent:
+            print(f"Message sent to {recipient.name}: {message}")
+            print_mock(message=message)
+        return message
+
+    dummy_agent_1 = ConversableAgent(name="dummy_agent_1", llm_config=False, human_input_mode="NEVER")
+    dummy_agent_2 = ConversableAgent(name="dummy_agent_2", llm_config=False, human_input_mode="NEVER")
+    dummy_agent_1.register_hook("process_message_before_send", send_to_frontend)
+    dummy_agent_1.send("hello", dummy_agent_2)
+    print_mock.assert_called_once_with(message="hello")
+    dummy_agent_1.send("silent hello", dummy_agent_2, silent=True)
+    print_mock.assert_called_once_with(message="hello")
+
+
 if __name__ == "__main__":
     # test_trigger()
     # test_context()
@@ -1081,4 +1099,5 @@ def test_max_turn():
     # test_generate_code_execution_reply()
     # test_conversable_agent()
     # test_no_llm_config()
-    test_max_turn()
+    # test_max_turn()
+    test_process_before_send()