diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index c35078e60af..434226b3e88 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -22,6 +22,12 @@ body:
         3. ...
         4. See error
       placeholder: How can we replicate the issue?
+  - type: textarea
+    id: modelused
+    attributes:
+      label: Model Used
+      description: A description of the model that was used when the error was encountered
+      placeholder: gpt-4, mistral-7B etc
   - type: textarea
     id: expected_behavior
     attributes:
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8ff768fe53b..aa6311b7fbd 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -47,9 +47,8 @@ jobs:
         # code executors auto skip without deps, so only run for python 3.11
         if: matrix.python-version == '3.11'
         run: |
-          pip install jupyter-client ipykernel
+          pip install -e ".[jupyter-executor]"
           python -m ipykernel install --user --name python3
-          pip install -e ".[local-jupyter-exec]"
       - name: Set AUTOGEN_USE_DOCKER based on OS
         shell: bash
         run: |
diff --git a/.github/workflows/deploy-website.yml b/.github/workflows/deploy-website.yml
index 7198a311de7..c9c7deede62 100644
--- a/.github/workflows/deploy-website.yml
+++ b/.github/workflows/deploy-website.yml
@@ -52,7 +52,7 @@ jobs:
           quarto render .
       - name: Process notebooks
         run: |
-          python process_notebooks.py
+          python process_notebooks.py render
       - name: Test Build
         run: |
           if [ -e yarn.lock ]; then
@@ -98,7 +98,7 @@ jobs:
           quarto render .
       - name: Process notebooks
         run: |
-          python process_notebooks.py
+          python process_notebooks.py render
       - name: Build website
         run: |
           if [ -e yarn.lock ]; then
diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml
index 975045f4ce1..be2840c2dc6 100644
--- a/.github/workflows/openai.yml
+++ b/.github/workflows/openai.yml
@@ -14,7 +14,7 @@ on:
       - "notebook/agentchat_groupchat_finite_state_machine.ipynb"
       - ".github/workflows/openai.yml"
 permissions: {}
-    # actions: read
+  # actions: read
   # checks: read
   # contents: read
   # deployments: read
diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py
index 5997b093a36..fbb7b88afee 100644
--- a/autogen/agentchat/groupchat.py
+++ b/autogen/agentchat/groupchat.py
@@ -626,7 +626,7 @@ async def a_run_chat(
             # Broadcast the intro
             intro = groupchat.introductions_msg()
             for agent in groupchat.agents:
-                self.a_send(intro, agent, request_reply=False, silent=True)
+                await self.a_send(intro, agent, request_reply=False, silent=True)
             # NOTE: We do not also append to groupchat.messages,
             # since groupchat handles its own introductions
 
diff --git a/autogen/coding/__init__.py b/autogen/coding/__init__.py
index 7c223401d58..cf75d11436b 100644
--- a/autogen/coding/__init__.py
+++ b/autogen/coding/__init__.py
@@ -2,4 +2,11 @@
 from .factory import CodeExecutorFactory
 from .markdown_code_extractor import MarkdownCodeExtractor
 
-__all__ = ("CodeBlock", "CodeResult", "CodeExtractor", "CodeExecutor", "CodeExecutorFactory", "MarkdownCodeExtractor")
+__all__ = (
+    "CodeBlock",
+    "CodeResult",
+    "CodeExtractor",
+    "CodeExecutor",
+    "CodeExecutorFactory",
+    "MarkdownCodeExtractor",
+)
diff --git a/autogen/coding/factory.py b/autogen/coding/factory.py
index ceb01ca3dfa..e4ff09c568f 100644
--- a/autogen/coding/factory.py
+++ b/autogen/coding/factory.py
@@ -30,16 +30,12 @@ def create(code_execution_config: Dict[str, Any]) -> CodeExecutor:
             # If the executor is already an instance of CodeExecutor, return it.
             return executor
         if executor == "ipython-embedded":
-            from .embedded_ipython_code_executor import EmbeddedIPythonCodeExecutor
+            from .jupyter.embedded_ipython_code_executor import EmbeddedIPythonCodeExecutor
 
             return EmbeddedIPythonCodeExecutor(**code_execution_config.get("ipython-embedded", {}))
         elif executor == "commandline-local":
             from .local_commandline_code_executor import LocalCommandlineCodeExecutor
 
             return LocalCommandlineCodeExecutor(**code_execution_config.get("commandline-local", {}))
-        elif executor == "jupyter-local":
-            from .jupyter_code_executor import LocalJupyterCodeExecutor
-
-            return LocalJupyterCodeExecutor(**code_execution_config.get("jupyter-local", {}))
         else:
             raise ValueError(f"Unknown code executor {executor}")
diff --git a/autogen/coding/jupyter/__init__.py b/autogen/coding/jupyter/__init__.py
index 96c8cf4a65c..5c1a9607f56 100644
--- a/autogen/coding/jupyter/__init__.py
+++ b/autogen/coding/jupyter/__init__.py
@@ -1,5 +1,16 @@
 from .base import JupyterConnectable, JupyterConnectionInfo
 from .jupyter_client import JupyterClient
 from .local_jupyter_server import LocalJupyterServer
+from .docker_jupyter_server import DockerJupyterServer
+from .embedded_ipython_code_executor import EmbeddedIPythonCodeExecutor
+from .jupyter_code_executor import JupyterCodeExecutor
 
-__all__ = ["JupyterConnectable", "JupyterConnectionInfo", "JupyterClient", "LocalJupyterServer"]
+__all__ = [
+    "JupyterConnectable",
+    "JupyterConnectionInfo",
+    "JupyterClient",
+    "LocalJupyterServer",
+    "DockerJupyterServer",
+    "EmbeddedIPythonCodeExecutor",
+    "JupyterCodeExecutor",
+]
diff --git a/autogen/coding/jupyter/docker_jupyter_server.py b/autogen/coding/jupyter/docker_jupyter_server.py
new file mode 100644
index 00000000000..5288d295cd7
--- /dev/null
+++ b/autogen/coding/jupyter/docker_jupyter_server.py
@@ -0,0 +1,167 @@
+from __future__ import annotations
+
+from pathlib import Path
+import sys
+from time import sleep
+from types import TracebackType
+import uuid
+from typing import Dict, Optional, Union
+import docker
+import secrets
+import io
+import atexit
+import logging
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+
+from .jupyter_client import JupyterClient
+from .base import JupyterConnectable, JupyterConnectionInfo
+
+
+def _wait_for_ready(container: docker.Container, timeout: int = 60, stop_time: int = 0.1) -> None:
+    elapsed_time = 0
+    while container.status != "running" and elapsed_time < timeout:
+        sleep(stop_time)
+        elapsed_time += stop_time
+        container.reload()
+        continue
+    if container.status != "running":
+        raise ValueError("Container failed to start")
+
+
+class DockerJupyterServer(JupyterConnectable):
+    DEFAULT_DOCKERFILE = """FROM quay.io/jupyter/docker-stacks-foundation
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+USER ${NB_UID}
+RUN mamba install --yes jupyter_kernel_gateway ipykernel && \
+    mamba clean --all -f -y && \
+    fix-permissions "${CONDA_DIR}" && \
+    fix-permissions "/home/${NB_USER}"
+
+ENV TOKEN="UNSET"
+CMD python -m jupyter kernelgateway --KernelGatewayApp.ip=0.0.0.0 \
+    --KernelGatewayApp.port=8888 \
+    --KernelGatewayApp.auth_token="${TOKEN}" \
+    --JupyterApp.answer_yes=true \
+    --JupyterWebsocketPersonality.list_kernels=true
+
+EXPOSE 8888
+
+WORKDIR "${HOME}"
+"""
+
+    class GenerateToken:
+        pass
+
+    def __init__(
+        self,
+        *,
+        custom_image_name: Optional[str] = None,
+        container_name: Optional[str] = None,
+        auto_remove: bool = True,
+        stop_container: bool = True,
+        docker_env: Dict[str, str] = {},
+        token: Union[str, GenerateToken] = GenerateToken(),
+    ):
+        """Start a Jupyter kernel gateway server in a Docker container.
+
+        Args:
+            custom_image_name (Optional[str], optional): Custom image to use. If this is None,
+                then the bundled image will be built and used. The default image is based on
+                quay.io/jupyter/docker-stacks-foundation and extended to include jupyter_kernel_gateway
+            container_name (Optional[str], optional): Name of the container to start.
+                A name will be generated if None.
+            auto_remove (bool, optional): If true the Docker container will be deleted
+                when it is stopped.
+            stop_container (bool, optional): If true the container will be stopped,
+                either by program exit or using the context manager
+            docker_env (Dict[str, str], optional): Extra environment variables to pass
+                to the running Docker container.
+            token (Union[str, GenerateToken], optional): Token to use for authentication.
+                If GenerateToken is used, a random token will be generated. Empty string
+                will be unauthenticated.
+        """
+        if container_name is None:
+            container_name = f"autogen-jupyterkernelgateway-{uuid.uuid4()}"
+
+        client = docker.from_env()
+        if custom_image_name is None:
+            image_name = "autogen-jupyterkernelgateway"
+            # Make sure the image exists
+            try:
+                client.images.get(image_name)
+            except docker.errors.ImageNotFound:
+                # Build the image
+                # Get this script directory
+                here = Path(__file__).parent
+                dockerfile = io.BytesIO(self.DEFAULT_DOCKERFILE.encode("utf-8"))
+                logging.info(f"Image {image_name} not found. Building it now.")
+                client.images.build(path=here, fileobj=dockerfile, tag=image_name)
+                logging.info(f"Image {image_name} built successfully.")
+        else:
+            image_name = custom_image_name
+            # Check if the image exists
+            try:
+                client.images.get(image_name)
+            except docker.errors.ImageNotFound:
+                raise ValueError(f"Custom image {image_name} does not exist")
+
+        if isinstance(token, DockerJupyterServer.GenerateToken):
+            self._token = secrets.token_hex(32)
+        else:
+            self._token = token
+
+        # Run the container
+        env = {"TOKEN": self._token}
+        env.update(docker_env)
+        container = client.containers.run(
+            image_name,
+            detach=True,
+            auto_remove=auto_remove,
+            environment=env,
+            publish_all_ports=True,
+            name=container_name,
+        )
+        _wait_for_ready(container)
+        container_ports = container.ports
+        self._port = int(container_ports["8888/tcp"][0]["HostPort"])
+        self._container_id = container.id
+
+        def cleanup():
+            try:
+                inner_container = client.containers.get(container.id)
+                inner_container.stop()
+            except docker.errors.NotFound:
+                pass
+
+            atexit.unregister(cleanup)
+
+        if stop_container:
+            atexit.register(cleanup)
+
+        self._cleanup_func = cleanup
+        self._stop_container = stop_container
+
+    @property
+    def connection_info(self) -> JupyterConnectionInfo:
+        return JupyterConnectionInfo(host="127.0.0.1", use_https=False, port=self._port, token=self._token)
+
+    def stop(self):
+        self._cleanup_func()
+
+    def get_client(self) -> JupyterClient:
+        return JupyterClient(self.connection_info)
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
+    ) -> None:
+        self.stop()
diff --git a/autogen/coding/embedded_ipython_code_executor.py b/autogen/coding/jupyter/embedded_ipython_code_executor.py
similarity index 98%
rename from autogen/coding/embedded_ipython_code_executor.py
rename to autogen/coding/jupyter/embedded_ipython_code_executor.py
index a83dab23327..7758c0a2be2 100644
--- a/autogen/coding/embedded_ipython_code_executor.py
+++ b/autogen/coding/jupyter/embedded_ipython_code_executor.py
@@ -11,9 +11,9 @@
 from jupyter_client.kernelspec import KernelSpecManager
 from pydantic import BaseModel, Field, field_validator
 
-from ..agentchat.agent import LLMAgent
-from .base import CodeBlock, CodeExtractor, IPythonCodeResult
-from .markdown_code_extractor import MarkdownCodeExtractor
+from ...agentchat.agent import LLMAgent
+from ..base import CodeBlock, CodeExtractor, IPythonCodeResult
+from ..markdown_code_extractor import MarkdownCodeExtractor
 
 __all__ = "EmbeddedIPythonCodeExecutor"
 
diff --git a/autogen/coding/jupyter/jupyter_client.py b/autogen/coding/jupyter/jupyter_client.py
index edecc415cd1..459add85b5f 100644
--- a/autogen/coding/jupyter/jupyter_client.py
+++ b/autogen/coding/jupyter/jupyter_client.py
@@ -14,6 +14,7 @@
 import uuid
 import datetime
 import requests
+from requests.adapters import HTTPAdapter, Retry
 
 import websocket
 from websocket import WebSocket
@@ -26,6 +27,9 @@ class JupyterClient:
 
     def __init__(self, connection_info: JupyterConnectionInfo):
         self._connection_info = connection_info
+        self._session = requests.Session()
+        retries = Retry(total=5, backoff_factor=0.1)
+        self._session.mount("http://", HTTPAdapter(max_retries=retries))
 
     def _get_headers(self) -> Dict[str, str]:
         if self._connection_info.token is None:
@@ -40,11 +44,11 @@ def _get_ws_base_url(self) -> str:
         return f"ws://{self._connection_info.host}:{self._connection_info.port}"
 
     def list_kernel_specs(self) -> Dict[str, Dict[str, str]]:
-        response = requests.get(f"{self._get_api_base_url()}/api/kernelspecs", headers=self._get_headers())
+        response = self._session.get(f"{self._get_api_base_url()}/api/kernelspecs", headers=self._get_headers())
         return cast(Dict[str, Dict[str, str]], response.json())
 
     def list_kernels(self) -> List[Dict[str, str]]:
-        response = requests.get(f"{self._get_api_base_url()}/api/kernels", headers=self._get_headers())
+        response = self._session.get(f"{self._get_api_base_url()}/api/kernels", headers=self._get_headers())
         return cast(List[Dict[str, str]], response.json())
 
     def start_kernel(self, kernel_spec_name: str) -> str:
@@ -57,15 +61,21 @@ def start_kernel(self, kernel_spec_name: str) -> str:
             str: ID of the started kernel
         """
 
-        response = requests.post(
+        response = self._session.post(
             f"{self._get_api_base_url()}/api/kernels",
             headers=self._get_headers(),
             json={"name": kernel_spec_name},
         )
         return cast(str, response.json()["id"])
 
+    def delete_kernel(self, kernel_id: str) -> None:
+        response = self._session.delete(
+            f"{self._get_api_base_url()}/api/kernels/{kernel_id}", headers=self._get_headers()
+        )
+        response.raise_for_status()
+
     def restart_kernel(self, kernel_id: str) -> None:
-        response = requests.post(
+        response = self._session.post(
             f"{self._get_api_base_url()}/api/kernels/{kernel_id}/restart", headers=self._get_headers()
         )
         response.raise_for_status()
@@ -100,6 +110,9 @@ def __enter__(self) -> Self:
     def __exit__(
         self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
     ) -> None:
+        self.stop()
+
+    def stop(self) -> None:
         self._websocket.close()
 
     def _send_message(self, *, content: Dict[str, Any], channel: str, message_type: str) -> str:
diff --git a/autogen/coding/jupyter_code_executor.py b/autogen/coding/jupyter/jupyter_code_executor.py
similarity index 91%
rename from autogen/coding/jupyter_code_executor.py
rename to autogen/coding/jupyter/jupyter_code_executor.py
index 551aea18aeb..5e190d5f1b9 100644
--- a/autogen/coding/jupyter_code_executor.py
+++ b/autogen/coding/jupyter/jupyter_code_executor.py
@@ -3,18 +3,22 @@
 import os
 from pathlib import Path
 import re
+from types import TracebackType
 import uuid
-from typing import Any, ClassVar, List, Union
+from typing import Any, ClassVar, List, Optional, Union
+import sys
 
-from pydantic import Field
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
 
 
-from ..agentchat.agent import LLMAgent
-from .base import CodeBlock, CodeExecutor, CodeExtractor, CodeResult, IPythonCodeResult
-from .markdown_code_extractor import MarkdownCodeExtractor
-from .jupyter import JupyterConnectable, JupyterConnectionInfo, LocalJupyterServer, JupyterClient
-
-__all__ = ("JupyterCodeExecutor", "LocalJupyterCodeExecutor")
+from ...agentchat.agent import LLMAgent
+from ..base import CodeBlock, CodeExecutor, CodeExtractor, IPythonCodeResult
+from ..markdown_code_extractor import MarkdownCodeExtractor
+from .base import JupyterConnectable, JupyterConnectionInfo
+from .jupyter_client import JupyterClient
 
 
 class JupyterCodeExecutor(CodeExecutor):
@@ -214,9 +218,14 @@ def _process_code(self, code: str) -> str:
                     lines[i] = line.replace(match.group(0), match.group(0) + " -qqq")
         return "\n".join(lines)
 
+    def stop(self) -> None:
+        """Stop the kernel."""
+        self._jupyter_client.delete_kernel(self._kernel_id)
+
+    def __enter__(self) -> Self:
+        return self
 
-class LocalJupyterCodeExecutor(JupyterCodeExecutor):
-    def __init__(self, **kwargs: Any):
-        """Creates a LocalJupyterServer and passes it to JupyterCodeExecutor, see JupyterCodeExecutor for args"""
-        jupyter_server = LocalJupyterServer()
-        super().__init__(jupyter_server=jupyter_server, **kwargs)
+    def __exit__(
+        self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
+    ) -> None:
+        self.stop()
diff --git a/autogen/version.py b/autogen/version.py
index ddc77a88056..699eb888d3a 100644
--- a/autogen/version.py
+++ b/autogen/version.py
@@ -1 +1 @@
-__version__ = "0.2.15"
+__version__ = "0.2.16"
diff --git a/notebook/agentchat_RetrieveChat.ipynb b/notebook/agentchat_RetrieveChat.ipynb
index b8cd70ec48f..6c965d0e7ee 100644
--- a/notebook/agentchat_RetrieveChat.ipynb
+++ b/notebook/agentchat_RetrieveChat.ipynb
@@ -3036,7 +3036,8 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.10.12"
-  }
+},
+"test_skip": "Requires interactive usage"
  },
  "nbformat": 4,
  "nbformat_minor": 4
diff --git a/notebook/contributing.md b/notebook/contributing.md
index 4fb78b0964b..4234bce6dcd 100644
--- a/notebook/contributing.md
+++ b/notebook/contributing.md
@@ -74,3 +74,37 @@ Learn more about configuring LLMs for agents [here](/docs/llm_configuration).
 :::
 ````
 ``````
+
+## Testing
+
+Notebooks can be tested by running:
+
+```sh
+python website/process_notebooks.py test
+```
+
+This will automatically scan for all notebooks in the notebook/ and website/ dirs.
+
+To test a specific notebook pass its path:
+
+```sh
+python website/process_notebooks.py test notebook/agentchat_logging.ipynb
+```
+
+Options:
+- `--timeout` - timeout for a single notebook
+- `--exit-on-first-fail` - stop executing further notebooks after the first one fails
+
+### Skip tests
+
+If a notebook needs to be skipped then add to the notebook metadata:
+```json
+{
+    "...": "...",
+    "metadata": {
+        "test_skip": "REASON"
+    }
+}
+```
+
+Note: Notebook metadata can be edited by opening the notebook in a text editor (Or "Open With..." -> "Text Editor" in VSCode)
diff --git a/setup.py b/setup.py
index 2b8e1da16cb..130fd91f382 100644
--- a/setup.py
+++ b/setup.py
@@ -55,11 +55,13 @@
         "graph": ["networkx", "matplotlib"],
         "websurfer": ["beautifulsoup4", "markdownify", "pdfminer.six", "pathvalidate", "selenium"],
         "redis": ["redis"],
-        # Dependencies for EmbeddedIPythonExecutor, to be removed once upstream bug fixed
-        # https://github.com/jupyter-server/kernel_gateway/issues/398
-        "ipython": ["jupyter-client>=8.6.0", "ipykernel>=6.29.0"],
-        # Dependencies for LocalJupyterExecutor
-        "local-jupyter-exec": ["jupyter-kernel-gateway", "websocket-client", "requests", "ipykernel"],
+        "jupyter-executor": [
+            "jupyter-kernel-gateway",
+            "websocket-client",
+            "requests",
+            "jupyter-client>=8.6.0",
+            "ipykernel>=6.29.0",
+        ],
     },
     classifiers=[
         "Programming Language :: Python :: 3",
diff --git a/test/coding/test_embedded_ipython_code_executor.py b/test/coding/test_embedded_ipython_code_executor.py
index fcd423497aa..75d827fdf51 100644
--- a/test/coding/test_embedded_ipython_code_executor.py
+++ b/test/coding/test_embedded_ipython_code_executor.py
@@ -9,11 +9,25 @@
 from autogen.coding.base import CodeBlock, CodeExecutor
 from autogen.coding.factory import CodeExecutorFactory
 from autogen.oai.openai_utils import config_list_from_json
-from conftest import MOCK_OPEN_AI_API_KEY, skip_openai  # noqa: E402
+from conftest import MOCK_OPEN_AI_API_KEY, skip_openai, skip_docker  # noqa: E402
 
 try:
-    from autogen.coding.embedded_ipython_code_executor import EmbeddedIPythonCodeExecutor
-    from autogen.coding.jupyter_code_executor import LocalJupyterCodeExecutor
+    from autogen.coding.jupyter import (
+        DockerJupyterServer,
+        EmbeddedIPythonCodeExecutor,
+        JupyterCodeExecutor,
+        LocalJupyterServer,
+    )
+
+    class DockerJupyterExecutor(JupyterCodeExecutor):
+        def __init__(self, **kwargs):
+            jupyter_server = DockerJupyterServer()
+            super().__init__(jupyter_server=jupyter_server, **kwargs)
+
+    class LocalJupyterCodeExecutor(JupyterCodeExecutor):
+        def __init__(self, **kwargs):
+            jupyter_server = LocalJupyterServer()
+            super().__init__(jupyter_server=jupyter_server, **kwargs)
 
     # Skip on windows due to kernelgateway bug https://github.com/jupyter-server/kernel_gateway/issues/398
     if sys.platform == "win32":
@@ -21,21 +35,27 @@
     else:
         classes_to_test = [EmbeddedIPythonCodeExecutor, LocalJupyterCodeExecutor]
 
+    if not skip_docker:
+        classes_to_test.append(DockerJupyterExecutor)
+
     skip = False
     skip_reason = ""
-except ImportError:
+except ImportError as e:
     skip = True
-    skip_reason = "Dependencies for EmbeddedIPythonCodeExecutor or LocalJupyterCodeExecutor not installed."
+    skip_reason = "Dependencies for EmbeddedIPythonCodeExecutor or LocalJupyterCodeExecutor not installed. " + e.msg
     classes_to_test = []
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-@pytest.mark.parametrize("cls", classes_to_test)
-def test_create(cls) -> None:
+def test_create_dict() -> None:
     config: Dict[str, Union[str, CodeExecutor]] = {"executor": "ipython-embedded"}
     executor = CodeExecutorFactory.create(config)
     assert isinstance(executor, EmbeddedIPythonCodeExecutor)
 
+
+@pytest.mark.skipif(skip, reason=skip_reason)
+@pytest.mark.parametrize("cls", classes_to_test)
+def test_create(cls) -> None:
     config = {"executor": cls()}
     executor = CodeExecutorFactory.create(config)
     assert executor is config["executor"]
diff --git a/website/README.md b/website/README.md
index fdc4e5162ef..fa451489eda 100644
--- a/website/README.md
+++ b/website/README.md
@@ -33,8 +33,7 @@ Navigate to the `website` folder and run:
 
 ```console
 pydoc-markdown
-quarto render ./docs
-python ./process_notebooks.py
+python ./process_notebooks.py render
 yarn start
 ```
 
diff --git a/website/build_website.sh b/website/build_website.sh
index 9295b090611..e4d6441be12 100755
--- a/website/build_website.sh
+++ b/website/build_website.sh
@@ -28,11 +28,8 @@ fi
 # Generate documentation using pydoc-markdown
 pydoc-markdown
 
-# Render the website using Quarto
-quarto render ./docs
-
 # Process notebooks using a Python script
-python ./process_notebooks.py
+python ./process_notebooks.py render
 
 # Start the website using yarn
 yarn start
diff --git a/website/docs/Contribute.md b/website/docs/Contribute.md
index 398584e1bfc..c2daf7e9a7a 100644
--- a/website/docs/Contribute.md
+++ b/website/docs/Contribute.md
@@ -175,6 +175,8 @@ Tests for the `autogen.agentchat.contrib` module may be skipped automatically if
 required dependencies are not installed. Please consult the documentation for
 each contrib module to see what dependencies are required.
 
+See [here](https://github.com/microsoft/autogen/blob/main/notebook/contributing.md#testing) for how to run notebook tests.
+
 #### Skip flags for tests
 
 - `--skip-openai` for skipping tests that require access to OpenAI services.
@@ -216,11 +218,11 @@ Then:
 
 ```console
 npm install --global yarn  # skip if you use the dev container we provided
-pip install pydoc-markdown  # skip if you use the dev container we provided
+pip install pydoc-markdown pyyaml termcolor # skip if you use the dev container we provided
 cd website
 yarn install --frozen-lockfile --ignore-engines
 pydoc-markdown
-quarto render ./docs
+python process_notebooks.py render
 yarn start
 ```
 
@@ -245,7 +247,7 @@ Once at the CLI in Docker run the following commands:
 cd website
 yarn install --frozen-lockfile --ignore-engines
 pydoc-markdown
-quarto render ./docs
+python process_notebooks.py render
 yarn start --host 0.0.0.0 --port 3000
 ```
 
diff --git a/website/process_notebooks.py b/website/process_notebooks.py
index b2fe473826a..835a69a3565 100644
--- a/website/process_notebooks.py
+++ b/website/process_notebooks.py
@@ -1,11 +1,24 @@
+#!/usr/bin/env python
+
+from __future__ import annotations
+import signal
 import sys
 from pathlib import Path
 import subprocess
 import argparse
 import shutil
 import json
+import tempfile
+import threading
+import time
 import typing
 import concurrent.futures
+import os
+
+from typing import Optional, Tuple, Union
+from dataclasses import dataclass
+
+from multiprocessing import current_process
 
 try:
     import yaml
@@ -13,6 +26,27 @@
     print("pyyaml not found.\n\nPlease install pyyaml:\n\tpip install pyyaml\n")
     sys.exit(1)
 
+try:
+    import nbclient
+    from nbclient.client import (
+        CellExecutionError,
+        CellTimeoutError,
+        NotebookClient,
+    )
+except ImportError:
+    if current_process().name == "MainProcess":
+        print("nbclient not found.\n\nPlease install nbclient:\n\tpip install nbclient\n")
+        print("test won't work without nbclient")
+
+try:
+    import nbformat
+    from nbformat import NotebookNode
+except ImportError:
+    if current_process().name == "MainProcess":
+        print("nbformat not found.\n\nPlease install nbformat:\n\tpip install nbformat\n")
+        print("test won't work without nbclient")
+
+
 try:
     from termcolor import colored
 except ImportError:
@@ -28,7 +62,7 @@ def __init__(self, returncode: int, stdout: str, stderr: str):
         self.stderr = stderr
 
 
-def check_quarto_bin(quarto_bin: str = "quarto"):
+def check_quarto_bin(quarto_bin: str = "quarto") -> None:
     """Check if quarto is installed."""
     try:
         subprocess.check_output([quarto_bin, "--version"])
@@ -72,6 +106,17 @@ def extract_yaml_from_notebook(notebook: Path) -> typing.Optional[typing.Dict]:
 
 def skip_reason_or_none_if_ok(notebook: Path) -> typing.Optional[str]:
     """Return a reason to skip the notebook, or None if it should not be skipped."""
+
+    if notebook.suffix != ".ipynb":
+        return "not a notebook"
+
+    if not notebook.exists():
+        return "file does not exist"
+
+    # Extra checks for notebooks in the notebook directory
+    if "notebook" not in notebook.parts:
+        return None
+
     with open(notebook, "r", encoding="utf-8") as f:
         content = f.read()
 
@@ -121,56 +166,166 @@ def skip_reason_or_none_if_ok(notebook: Path) -> typing.Optional[str]:
     return None
 
 
-def process_notebook(src_notebook: Path, dest_dir: Path, quarto_bin: str, dry_run: bool) -> str:
+def process_notebook(src_notebook: Path, website_dir: Path, notebook_dir: Path, quarto_bin: str, dry_run: bool) -> str:
     """Process a single notebook."""
-    reason_or_none = skip_reason_or_none_if_ok(src_notebook)
-    if reason_or_none:
-        return colored(f"Skipping {src_notebook.name}, reason: {reason_or_none}", "yellow")
 
-    target_mdx_file = dest_dir / f"{src_notebook.stem}.mdx"
-    intermediate_notebook = dest_dir / f"{src_notebook.stem}.ipynb"
+    in_notebook_dir = "notebook" in src_notebook.parts
+
+    if in_notebook_dir:
+        relative_notebook = src_notebook.relative_to(notebook_dir)
+        dest_dir = notebooks_target_dir(website_directory=website_dir)
+        target_mdx_file = dest_dir / relative_notebook.with_suffix(".mdx")
+        intermediate_notebook = dest_dir / relative_notebook
+
+        # If the intermediate_notebook already exists, check if it is newer than the source file
+        if target_mdx_file.exists():
+            if target_mdx_file.stat().st_mtime > src_notebook.stat().st_mtime:
+                return colored(f"Skipping {src_notebook.name}, as target file is newer", "blue")
+
+        if dry_run:
+            return colored(f"Would process {src_notebook.name}", "green")
+
+        # Copy notebook to target dir
+        # The reason we copy the notebook is that quarto does not support rendering from a different directory
+        shutil.copy(src_notebook, intermediate_notebook)
+
+        # Check if another file has to be copied too
+        # Solely added for the purpose of agent_library_example.json
+        front_matter = extract_yaml_from_notebook(src_notebook)
+        # Should not be none at this point as we have already done the same checks as in extract_yaml_from_notebook
+        assert front_matter is not None, f"Front matter is None for {src_notebook.name}"
+        if "extra_files_to_copy" in front_matter:
+            for file in front_matter["extra_files_to_copy"]:
+                shutil.copy(src_notebook.parent / file, dest_dir / file)
+
+        # Capture output
+        result = subprocess.run(
+            [quarto_bin, "render", intermediate_notebook], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        )
+        if result.returncode != 0:
+            return (
+                colored(f"Failed to render {intermediate_notebook}", "red")
+                + f"\n{result.stderr}"
+                + f"\n{result.stdout}"
+            )
 
-    # If the intermediate_notebook already exists, check if it is newer than the source file
-    if target_mdx_file.exists():
-        if target_mdx_file.stat().st_mtime > src_notebook.stat().st_mtime:
-            return colored(f"Skipping {src_notebook.name}, as target file is newer", "blue")
+        # Unlink intermediate files
+        intermediate_notebook.unlink()
 
-    if dry_run:
-        return colored(f"Would process {src_notebook.name}", "green")
+        if "extra_files_to_copy" in front_matter:
+            for file in front_matter["extra_files_to_copy"]:
+                (dest_dir / file).unlink()
 
-    # Copy notebook to target dir
-    # The reason we copy the notebook is that quarto does not support rendering from a different directory
-    shutil.copy(src_notebook, intermediate_notebook)
+        # Post process the file
+        post_process_mdx(target_mdx_file)
+    else:
+        target_mdx_file = src_notebook.with_suffix(".mdx")
 
-    # Check if another file has to be copied too
-    # Solely added for the purpose of agent_library_example.json
-    front_matter = extract_yaml_from_notebook(src_notebook)
-    # Should not be none at this point as we have already done the same checks as in extract_yaml_from_notebook
-    assert front_matter is not None, f"Front matter is None for {src_notebook.name}"
-    if "extra_files_to_copy" in front_matter:
-        for file in front_matter["extra_files_to_copy"]:
-            shutil.copy(src_notebook.parent / file, dest_dir / file)
-
-    # Capture output
-    result = subprocess.run(
-        [quarto_bin, "render", intermediate_notebook], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
-    )
-    if result.returncode != 0:
-        return colored(f"Failed to render {intermediate_notebook}", "red") + f"\n{result.stderr}" + f"\n{result.stdout}"
-
-    # Unlink intermediate files
-    intermediate_notebook.unlink()
+        # If the intermediate_notebook already exists, check if it is newer than the source file
+        if target_mdx_file.exists():
+            if target_mdx_file.stat().st_mtime > src_notebook.stat().st_mtime:
+                return colored(f"Skipping {src_notebook.name}, as target file is newer", "blue")
 
-    if "extra_files_to_copy" in front_matter:
-        for file in front_matter["extra_files_to_copy"]:
-            (dest_dir / file).unlink()
+        if dry_run:
+            return colored(f"Would process {src_notebook.name}", "green")
 
-    # Post process the file
-    post_process_mdx(target_mdx_file)
+        result = subprocess.run(
+            [quarto_bin, "render", src_notebook], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        )
+        if result.returncode != 0:
+            return colored(f"Failed to render {src_notebook}", "red") + f"\n{result.stderr}" + f"\n{result.stdout}"
 
     return colored(f"Processed {src_notebook.name}", "green")
 
 
+# Notebook execution based on nbmake: https://github.com/treebeardtech/nbmakes
+@dataclass
+class NotebookError:
+    error_name: str
+    error_value: Optional[str]
+    traceback: str
+    cell_source: str
+
+
+@dataclass
+class NotebookSkip:
+    reason: str
+
+
+NB_VERSION = 4
+
+
+def test_notebook(notebook_path: Path, timeout: int = 300) -> Tuple[Path, Optional[Union[NotebookError, NotebookSkip]]]:
+    nb = nbformat.read(str(notebook_path), NB_VERSION)
+
+    allow_errors = False
+    if "execution" in nb.metadata:
+        if "timeout" in nb.metadata.execution:
+            timeout = nb.metadata.execution.timeout
+        if "allow_errors" in nb.metadata.execution:
+            allow_errors = nb.metadata.execution.allow_errors
+
+    if "test_skip" in nb.metadata:
+        return notebook_path, NotebookSkip(reason=nb.metadata.test_skip)
+
+    try:
+        c = NotebookClient(
+            nb,
+            timeout=timeout,
+            allow_errors=allow_errors,
+            record_timing=True,
+        )
+        os.environ["PYDEVD_DISABLE_FILE_VALIDATION"] = "1"
+        os.environ["TOKENIZERS_PARALLELISM"] = "false"
+        with tempfile.TemporaryDirectory() as tempdir:
+            c.execute(cwd=tempdir)
+    except CellExecutionError:
+        error = get_error_info(nb)
+        assert error is not None
+        return notebook_path, error
+    except CellTimeoutError:
+        error = get_timeout_info(nb)
+        assert error is not None
+        return notebook_path, error
+
+    return notebook_path, None
+
+
+# Find the first code cell which did not complete.
+def get_timeout_info(
+    nb: NotebookNode,
+) -> Optional[NotebookError]:
+    for i, cell in enumerate(nb.cells):
+        if cell.cell_type != "code":
+            continue
+        if "shell.execute_reply" not in cell.metadata.execution:
+            return NotebookError(
+                error_name="timeout",
+                error_value="",
+                traceback="",
+                cell_source="".join(cell["source"]),
+            )
+
+    return None
+
+
+def get_error_info(nb: NotebookNode) -> Optional[NotebookError]:
+    for cell in nb["cells"]:  # get LAST error
+        if cell["cell_type"] != "code":
+            continue
+        errors = [output for output in cell["outputs"] if output["output_type"] == "error" or "ename" in output]
+
+        if errors:
+            traceback = "\n".join(errors[0].get("traceback", ""))
+            return NotebookError(
+                error_name=errors[0].get("ename", ""),
+                error_value=errors[0].get("evalue", ""),
+                traceback=traceback,
+                cell_source="".join(cell["source"]),
+            )
+    return None
+
+
 # rendered_notebook is the final mdx file
 def post_process_mdx(rendered_mdx: Path) -> None:
     notebook_name = f"{rendered_mdx.stem}.ipynb"
@@ -234,9 +389,32 @@ def path(path_str: str) -> Path:
     return Path(path_str)
 
 
-def main():
+def collect_notebooks(notebook_directory: Path, website_directory: Path) -> typing.List[Path]:
+    notebooks = list(notebook_directory.glob("*.ipynb"))
+    notebooks.extend(list(website_directory.glob("docs/**/*.ipynb")))
+    return notebooks
+
+
+def start_thread_to_terminate_when_parent_process_dies(ppid: int):
+    pid = os.getpid()
+
+    def f() -> None:
+        while True:
+            try:
+                os.kill(ppid, 0)
+            except OSError:
+                os.kill(pid, signal.SIGTERM)
+            time.sleep(1)
+
+    thread = threading.Thread(target=f, daemon=True)
+    thread.start()
+
+
+def main() -> None:
     script_dir = Path(__file__).parent.absolute()
     parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers(dest="subcommand")
+
     parser.add_argument(
         "--notebook-directory",
         type=path,
@@ -246,29 +424,95 @@ def main():
     parser.add_argument(
         "--website-directory", type=path, help="Root directory of docusarus website", default=script_dir
     )
-    parser.add_argument("--quarto-bin", help="Path to quarto binary", default="quarto")
-    parser.add_argument("--dry-run", help="Don't render", action="store_true")
     parser.add_argument("--workers", help="Number of workers to use", type=int, default=-1)
 
-    args = parser.parse_args()
+    render_parser = subparsers.add_parser("render")
+    render_parser.add_argument("--quarto-bin", help="Path to quarto binary", default="quarto")
+    render_parser.add_argument("--dry-run", help="Don't render", action="store_true")
+    render_parser.add_argument("notebooks", type=path, nargs="*", default=None)
+
+    test_parser = subparsers.add_parser("test")
+    test_parser.add_argument("--timeout", help="Timeout for each notebook", type=int, default=60)
+    test_parser.add_argument("--exit-on-first-fail", "-e", help="Exit after first test fail", action="store_true")
+    test_parser.add_argument("notebooks", type=path, nargs="*", default=None)
 
+    args = parser.parse_args()
     if args.workers == -1:
         args.workers = None
 
-    check_quarto_bin(args.quarto_bin)
-
-    if not notebooks_target_dir(args.website_directory).exists():
-        notebooks_target_dir(args.website_directory).mkdir(parents=True)
+    if args.subcommand is None:
+        print("No subcommand specified")
+        sys.exit(1)
 
-    with concurrent.futures.ProcessPoolExecutor(max_workers=args.workers) as executor:
-        futures = [
-            executor.submit(
-                process_notebook, f, notebooks_target_dir(args.website_directory), args.quarto_bin, args.dry_run
-            )
-            for f in args.notebook_directory.glob("*.ipynb")
-        ]
-        for future in concurrent.futures.as_completed(futures):
-            print(future.result())
+    if args.notebooks:
+        collected_notebooks = args.notebooks
+    else:
+        collected_notebooks = collect_notebooks(args.notebook_directory, args.website_directory)
+
+    filtered_notebooks = []
+    for notebook in collected_notebooks:
+        reason = skip_reason_or_none_if_ok(notebook)
+        if reason:
+            print(f"{colored('[Skip]', 'yellow')} {colored(notebook.name, 'blue')}: {reason}")
+        else:
+            filtered_notebooks.append(notebook)
+
+    print(f"Processing {len(filtered_notebooks)} notebook{'s' if len(filtered_notebooks) != 1 else ''}...")
+
+    if args.subcommand == "test":
+        failure = False
+        with concurrent.futures.ProcessPoolExecutor(
+            max_workers=args.workers,
+            initializer=start_thread_to_terminate_when_parent_process_dies,
+            initargs=(os.getpid(),),
+        ) as executor:
+            futures = [executor.submit(test_notebook, f, args.timeout) for f in filtered_notebooks]
+            for future in concurrent.futures.as_completed(futures):
+                notebook, optional_error_or_skip = future.result()
+                if isinstance(optional_error_or_skip, NotebookError):
+                    if optional_error_or_skip.error_name == "timeout":
+                        print(
+                            f"{colored('[Error]', 'red')} {colored(notebook.name, 'blue')}: {optional_error_or_skip.error_name}"
+                        )
+
+                    else:
+                        print("-" * 80)
+                        print(
+                            f"{colored('[Error]', 'red')} {colored(notebook.name, 'blue')}: {optional_error_or_skip.error_name} - {optional_error_or_skip.error_value}"
+                        )
+                        print(optional_error_or_skip.traceback)
+                        print("-" * 80)
+                    if args.exit_on_first_fail:
+                        sys.exit(1)
+                    failure = True
+                elif isinstance(optional_error_or_skip, NotebookSkip):
+                    print(
+                        f"{colored('[Skip]', 'yellow')} {colored(notebook.name, 'blue')}: {optional_error_or_skip.reason}"
+                    )
+                else:
+                    print(f"{colored('[OK]', 'green')} {colored(notebook.name, 'blue')}")
+
+        if failure:
+            sys.exit(1)
+
+    elif args.subcommand == "render":
+        check_quarto_bin(args.quarto_bin)
+
+        if not notebooks_target_dir(args.website_directory).exists():
+            notebooks_target_dir(args.website_directory).mkdir(parents=True)
+
+        with concurrent.futures.ProcessPoolExecutor(max_workers=args.workers) as executor:
+            futures = [
+                executor.submit(
+                    process_notebook, f, args.website_directory, args.notebook_directory, args.quarto_bin, args.dry_run
+                )
+                for f in filtered_notebooks
+            ]
+            for future in concurrent.futures.as_completed(futures):
+                print(future.result())
+    else:
+        print("Unknown subcommand")
+        sys.exit(1)
 
 
 if __name__ == "__main__":
diff --git a/website/src/data/gallery.json b/website/src/data/gallery.json
index 5511c4bd8c5..3a02b58f1d5 100644
--- a/website/src/data/gallery.json
+++ b/website/src/data/gallery.json
@@ -1,4 +1,11 @@
 [
+  {
+    "title": "Function Generator & Validator",
+    "link": "https://github.com/abhaymathur21/TensionCode",
+    "description": "A platform where user-required code is generated and simultaneously validated against sample data by AutoGen.",
+    "image": "TensionCode.png",
+    "tags": ["app", "ui"]
+  },
   {
     "title": "Autogen Robot",
     "link": "https://github.com/AaronWard/generative-ai-workbook/tree/main/personal_projects/19.autogen-robot",
diff --git a/website/static/img/gallery/TensionCode.png b/website/static/img/gallery/TensionCode.png
new file mode 100644
index 00000000000..da6135a55fa
Binary files /dev/null and b/website/static/img/gallery/TensionCode.png differ