diff --git a/examples/conversation_with_stablediffusion_model/README.md b/examples/conversation_with_stablediffusion_model/README.md
new file mode 100644
index 000000000..59e3a7270
--- /dev/null
+++ b/examples/conversation_with_stablediffusion_model/README.md
@@ -0,0 +1,129 @@
+# Conversation with Stable-diffusion model
+
+This example will show
+
+- How to use Stable Diffusion models in AgentScope.
+
+In this example, you can interact in a conversational format to generate images.
+Once the image is generated, the agent will respond with the local file path where the image is saved.
+
+## Minimum Hardware Requirements
+
+- **GPU**: NVIDIA GPU with at least 6.9GB of VRAM
+- **CPU**: Modern multi-core CPU (e.g., Intel i5 or AMD Ryzen 5)
+- **RAM**: Minimum 8GB
+- **Storage**: At least 10GB of available hard drive space
+
+## How to Run
+
+You need to satisfy the following requirements to run this example:
+
+### Step 0: Install Stable Diffusion Web UI and AgentScope
+
+- Install Stable Diffusion Web UI by following the instructions at [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui).
+- Install the latest version of AgentScope by
+ ```bash
+ git clone https://github.com/modelscope/agentscope.git
+ cd agentscope
+ pip install -e .
+ ```
+
+### Step 1: Download the required checkpoints
+
+Before starting the Stable Diffusion Web UI, you need to download at least one model to ensure normal operation.
+Download the model to `stable-diffusion-webui/models/Stable-diffusion` directory.
+
+### Step 2: Launch the Stable Diffusion Web UI
+
+We've provided a convenient shell script to quickly start the Stable Diffusion Web UI:
+`scripts/stable_diffusion_webui/sd_setup.sh`
+
+Activate the virtual environment first, Then, run the following command in your terminal, replacing YOUR-SD-WEBUI-PATH with the actual path to your Stable Diffusion Web UI directory:
+
+```bash
+bash scripts/stable_diffusion_webui/sd_setup.sh -s YOUR-SD-WEBUI-PATH
+```
+
+If you choose to start it on your own, you need to launch the Stable Diffusion Web UI with the following arguments: `--api --port=7862`. For more detailed instructions on starting the WebUI, refer to the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui).
+
+### Step 3: Running the Example
+
+Run the example and input your prompt.
+
+```bash
+python conversation_with_stablediffusion_model.py
+```
+
+## Customization Options
+
+### `model_config` Example:
+
+```json
+{
+ "model_type": "sd_txt2img",
+ "config_name": "sd",
+ "options": {
+ "sd_model_checkpoint": "Anything-V3.0-pruned",
+ "sd_lora": "add_detail",
+ "CLIP_stop_at_last_layers": 2
+ },
+ "generate_args": {
+ "steps": 50,
+ "n_iter": 1,
+ "override_settings": {
+ "CLIP_stop_at_last_layers": 3
+ }
+ }
+}
+```
+
+### Parameter Explanation:
+
+- `options`: Global configuration that directly affects the WebUI settings.
+- `generate_args`: Controls parameters for individual image generation requests, such as `steps` (number of sampling steps) and `n_iter` (number of iterations).
+ - `override_settings`: Overrides WebUI settings for a single request, taking precedence over `options`.
+
+Notes:
+
+- `override_settings` only affects the current request, while changes made to `options` persist.
+- Both parameters can set the same options, but `override_settings` has a higher priority.
+
+As shown in the example, the final image will be generated with the following settings:
+
+steps: 50
+n_iter: 1
+sd_model_checkpoint: Anything-V3.0-pruned
+sd_lora: add_detail
+CLIP_stop_at_last_layers: 3
+
+However, the web UI will always display the following settings:
+
+sd_model_checkpoint: Anything-V3.0-pruned
+sd_lora: add_detail
+CLIP_stop_at_last_layers: 2
+
+### Available Parameter Lists:
+
+If you've successfully enabled the Stable Diffusion Web UI API, you should be able to access its documentation at http://127.0.0.1:7862/docs (or whatever URL you're using + /docs).
+
+- `generate_args`: {url}/docs#/default/text2imgapi_sdapi_v1_txt2img_post
+- `options` and `override_settings`: {url}/docs#/default/get_config_sdapi_v1_options_get
+
+For this project, the "options" parameter will be posted to the /sdapi/v1/options API endpoint,
+and the "generate_args" parameter will be posted to the /sdapi/v1/txt2img API endpoint.
+You can refer to https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API for a more parameter reference guide.
+
+## A Running Example
+
+- Conversation history with Stable Diffusion Web UI.
+ ```bash
+ User input:Horses on Mars
+ User: Horses on Mars
+ Assistant: Image saved to path\agentscope\runs\run_20240920-142208_rqsvhh\file\image_20240920-142522_HTF38X.png
+ User input: boy eating ice-cream
+ User: boy eating ice-cream
+ Assistant: Image saved to path\agentscope\runs\run_20240920-142208_rqsvhh\file\image_20240920-142559_2xGtUs.png
+ ```
+- Image
+
+
\ No newline at end of file
diff --git a/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py b/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py
new file mode 100644
index 000000000..9a185f9c8
--- /dev/null
+++ b/examples/conversation_with_stablediffusion_model/conversation_with_stablediffusion_model.py
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+"""conversation between user and stable-diffusion agent."""
+import agentscope
+from agentscope.agents import DialogAgent
+from agentscope.agents.user_agent import UserAgent
+
+
+def main() -> None:
+ """A basic conversation demo"""
+
+ agentscope.init(
+ model_configs=[
+ {
+ "model_type": "sd_txt2img",
+ "config_name": "sd",
+ "options": {
+ "sd_model_checkpoint": "xxxxxx",
+ "CLIP_stop_at_last_layers": 2,
+ },
+ "generate_args": {
+ "steps": 50,
+ "n_iter": 1,
+ },
+ },
+ ],
+ project="txt2img-Agent Conversation",
+ save_api_invoke=True,
+ )
+
+ # Init two agents
+ dialog_agent = DialogAgent(
+ name="Assistant",
+ sys_prompt="dreamy", # replace by your image style prompts
+ model_config_name="sd", # replace by your model config name
+ )
+ user_agent = UserAgent()
+
+ # start the conversation between user and assistant
+ msg = None
+ while True:
+ msg = user_agent(msg)
+ if msg.content == "exit":
+ break
+ msg = dialog_agent(msg)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/stable_diffusion_webui/model_config.json b/scripts/stable_diffusion_webui/model_config.json
new file mode 100644
index 000000000..823ea406e
--- /dev/null
+++ b/scripts/stable_diffusion_webui/model_config.json
@@ -0,0 +1,14 @@
+{
+ "model_type": "sd_txt2img",
+ "config_name": "stable_diffusion_txt2img",
+ "host": "127.0.0.1:7862",
+ "options": {
+ "sd_model_checkpoint": "Anything-V3.0-pruned",
+ "sd_lora": "add_detail",
+ "CLIP_stop_at_last_layers": 2
+ },
+ "generate_args": {
+ "steps": 50,
+ "n_iter": 1
+ }
+ }
\ No newline at end of file
diff --git a/scripts/stable_diffusion_webui/sd_setup.sh b/scripts/stable_diffusion_webui/sd_setup.sh
new file mode 100644
index 000000000..ce71baba8
--- /dev/null
+++ b/scripts/stable_diffusion_webui/sd_setup.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# set VENV_DIR=%~dp0%venv
+# call "%VENV_DIR%\Scripts\activate.bat"
+
+# stable_diffusion_webui_path="YOUR_PATH_TO_STABLE_DIFFUSION_WEBUI"
+
+port=7862
+
+while getopts ":p:s:" opt
+do
+ # shellcheck disable=SC2220
+ case $opt in
+ p) port="$OPTARG";;
+ s) stable_diffusion_webui_path="$OPTARG"
+ ;;
+ esac
+done
+
+stable_diffusion_webui_path=${stable_diffusion_webui_path%/}
+launch_py_path="$stable_diffusion_webui_path/launch.py"
+
+# Check if the launch.py script exists
+if [[ ! -f "$launch_py_path" ]]; then
+ echo "The launch.py script was not found at $launch_py_path."
+ echo "Please ensure you have specified the correct path to your Stable Diffusion WebUI using the -s option."
+ echo "Example: ./sd_setup.sh -s /path/to/your/stable-diffusion-webui"
+ echo "Alternatively, you can set the path directly in the script."
+ exit 1
+fi
+
+cd $stable_diffusion_webui_path
+
+python ./launch.py --api --port=$port
diff --git a/setup.py b/setup.py
index b9bee5039..6b65f3c42 100644
--- a/setup.py
+++ b/setup.py
@@ -95,6 +95,7 @@
extra_litellm_requires = ["litellm"]
extra_zhipuai_requires = ["zhipuai"]
extra_ollama_requires = ["ollama>=0.1.7"]
+extra_sd_webuiapi_requires = ["webuiapi"]
extra_anthropic_requires = ["anthropic"]
# Full requires
@@ -108,6 +109,7 @@
+ extra_litellm_requires
+ extra_zhipuai_requires
+ extra_ollama_requires
+ + extra_sd_webuiapi_requires
+ extra_anthropic_requires
)
@@ -147,6 +149,7 @@
"litellm": extra_litellm_requires,
"zhipuai": extra_zhipuai_requires,
"gemini": extra_gemini_requires,
+ "stablediffusion": extra_sd_webuiapi_requires,
"anthropic": extra_anthropic_requires,
# For service functions
"service": extra_service_requires,
diff --git a/src/agentscope/models/__init__.py b/src/agentscope/models/__init__.py
index 9aad55d9a..fafa0a746 100644
--- a/src/agentscope/models/__init__.py
+++ b/src/agentscope/models/__init__.py
@@ -39,6 +39,9 @@
YiChatWrapper,
)
from .anthropic_model import AnthropicChatWrapper
+from .stablediffusion_model import (
+ StableDiffusionImageSynthesisWrapper,
+)
_BUILD_IN_MODEL_WRAPPERS = [
"PostAPIChatWrapper",
@@ -59,6 +62,7 @@
"LiteLLMChatWrapper",
"YiChatWrapper",
"AnthropicChatWrapper",
+ "StableDiffusionImageSynthesisWrapper",
]
__all__ = [
@@ -83,5 +87,6 @@
"ZhipuAIEmbeddingWrapper",
"LiteLLMChatWrapper",
"YiChatWrapper",
+ "StableDiffusionImageSynthesisWrapper",
"AnthropicChatWrapper",
]
diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py
new file mode 100644
index 000000000..e1948e38a
--- /dev/null
+++ b/src/agentscope/models/stablediffusion_model.py
@@ -0,0 +1,223 @@
+# -*- coding: utf-8 -*-
+"""Model wrapper for stable diffusion models."""
+from abc import ABC
+from typing import Any, Union, Sequence
+
+try:
+ import webuiapi
+except ImportError:
+ webuiapi = None
+
+from . import ModelWrapperBase, ModelResponse
+from ..message import Msg
+from ..manager import FileManager
+from ..utils.common import _convert_to_str
+
+
+class StableDiffusionWrapperBase(ModelWrapperBase, ABC):
+ """The base class for stable-diffusion model wrappers.
+
+ To use SD-webui API, please
+ 1. First download stable-diffusion-webui from
+ https://github.com/AUTOMATIC1111/stable-diffusion-webui and
+ install it
+ 2. Move your checkpoint to 'models/Stable-diffusion' folder
+ 3. Start launch.py with the '--api --port=7862' parameter
+ 4. Install the 'webuiapi' package by 'pip install webuiapi'
+ After that, you can use the SD-webui API and
+ query the available parameters on the http://localhost:7862/docs page
+ """
+
+ model_type: str = "stable_diffusion"
+
+ def __init__(
+ self,
+ config_name: str,
+ generate_args: dict = None,
+ options: dict = None,
+ host: str = "127.0.0.1",
+ port: int = 7862,
+ **kwargs: Any,
+ ) -> None:
+ """
+ Initializes the SD-webui API client.
+
+ Args:
+ config_name (`str`):
+ The name of the model config.
+ generate_args (`dict`, default `None`):
+ The extra keyword arguments used in SD api generation,
+ e.g. `{"steps": 50}`.
+ options (`dict`, default `None`):
+ The keyword arguments to change the sd-webui settings
+ such as model or CLIP skip, this changes will persist.
+ e.g. `{"sd_model_checkpoint": "Anything-V3.0-pruned"}`.
+ host (`str`, default `"127.0.0.1"`):
+ The host of the stable-diffusion webui server.
+ port (`int`, default `7862`):
+ The port of the stable-diffusion webui server.
+ """
+ # Initialize the SD-webui API
+ self.api = webuiapi.WebUIApi(host=host, port=port, **kwargs)
+ self.generate_args = generate_args or {}
+
+ # Set options if provided
+ if options:
+ self.api.set_options(options)
+
+ # Get the default model name from the web-options
+ model_name = (
+ self.api.get_options()["sd_model_checkpoint"].split("[")[0].strip()
+ )
+ # Update the model name
+ if self.generate_args.get("override_settings"):
+ model_name = generate_args["override_settings"].get(
+ "sd_model_checkpoint",
+ model_name,
+ )
+
+ super().__init__(config_name=config_name, model_name=model_name)
+
+
+class StableDiffusionImageSynthesisWrapper(StableDiffusionWrapperBase):
+ """Stable Diffusion Text-to-Image (txt2img) API Wrapper"""
+
+ model_type: str = "sd_txt2img"
+
+ def __call__(
+ self,
+ prompt: str,
+ save_local: bool = True,
+ **kwargs: Any,
+ ) -> ModelResponse:
+ """
+ Args:
+ prompt (`str`):
+ The prompt string to generate images from.
+ save_local (`bool`, default `True`):
+ Whether to save the generated images locally.
+ **kwargs (`Any`):
+ The keyword arguments to SD-webui txt2img API, e.g.
+ `n_iter`, `steps`, `seed`, `width`, etc. Please refer to
+ https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API
+ or http://localhost:7862/docs
+ for more detailed arguments.
+ Returns:
+ `ModelResponse`:
+ A list of image local urls in image_urls field and the
+ raw response in raw field.
+ """
+ # step1: prepare keyword arguments
+ payload = {
+ "prompt": prompt,
+ **kwargs,
+ **self.generate_args,
+ }
+
+ # step2: forward to generate response
+ response = self.api.txt2img(**payload)
+
+ # step3: save model invocation and update monitor
+ self._save_model_invocation_and_update_monitor(
+ payload=payload,
+ response=response.json,
+ )
+
+ # step4: parse the response
+ PIL_images = response.images
+
+ file_manager = FileManager.get_instance()
+ if save_local:
+ # Save images
+ image_urls = [file_manager.save_image(_) for _ in PIL_images]
+ text = "Image saved to " + "\n".join(image_urls)
+ else:
+ image_urls = PIL_images
+ text = "" # Just a placeholder
+
+ return ModelResponse(
+ text=text,
+ image_urls=image_urls,
+ raw=response.json,
+ )
+
+ def _save_model_invocation_and_update_monitor(
+ self,
+ payload: dict,
+ response: dict,
+ ) -> None:
+ """Save the model invocation and update the monitor accordingly.
+
+ Args:
+ kwargs (`dict`):
+ The keyword arguments to the DashScope chat API.
+ response (`dict`):
+ The response object returned by the DashScope chat API.
+ """
+ self._save_model_invocation(
+ arguments=payload,
+ response=response,
+ )
+
+ session_parameters = response["parameters"]
+ size = f"{session_parameters['width']}*{session_parameters['height']}"
+ image_count = (
+ session_parameters["batch_size"] * session_parameters["n_iter"]
+ )
+
+ self.monitor.update_image_tokens(
+ model_name=self.model_name,
+ image_count=image_count,
+ resolution=size,
+ )
+
+ def format(self, *args: Union[Msg, Sequence[Msg]]) -> str:
+ # This is a temporary implementation to focus on the prompt
+ # on single-turn image generation by preserving only the system prompt
+ # and the last user message. This logic might change in the future
+ # to support more complex conversational scenarios
+ if len(args) == 0:
+ raise ValueError(
+ "At least one message should be provided. An empty message "
+ "list is not allowed.",
+ )
+
+ # Parse all information into a list of messages
+ input_msgs = []
+ for _ in args:
+ if _ is None:
+ continue
+ if isinstance(_, Msg):
+ input_msgs.append(_)
+ elif isinstance(_, list) and all(isinstance(__, Msg) for __ in _):
+ input_msgs.extend(_)
+ else:
+ raise TypeError(
+ f"The input should be a Msg object or a list "
+ f"of Msg objects, got {type(_)}.",
+ )
+
+ # record user message history as a list of strings
+ user_messages = []
+ sys_prompt = None
+ for i, unit in enumerate(input_msgs):
+ if i == 0 and unit.role == "system":
+ # if system prompt is available, place it at the beginning
+ sys_prompt = _convert_to_str(unit.content)
+ elif unit.role == "user":
+ # Merge user messages into a conversation history prompt
+ user_messages.append(_convert_to_str(unit.content))
+ else:
+ continue
+
+ content_components = []
+ # Add system prompt at the beginning if provided
+ if sys_prompt:
+ content_components.append(sys_prompt)
+ # Add the last user message if the user messages is not empty
+ if len(user_messages) > 0:
+ content_components.append(user_messages[-1])
+
+ prompt = ",".join(content_components)
+
+ return prompt
diff --git a/src/agentscope/service/__init__.py b/src/agentscope/service/__init__.py
index 6d2fd1342..35f45f582 100644
--- a/src/agentscope/service/__init__.py
+++ b/src/agentscope/service/__init__.py
@@ -45,6 +45,7 @@
openai_edit_image,
openai_create_image_variation,
)
+from .multi_modality.stablediffusion_services import sd_text_to_image
from .service_response import ServiceResponse
from .service_toolkit import ServiceToolkit
@@ -120,6 +121,7 @@ def get_help() -> None:
"openai_image_to_text",
"openai_edit_image",
"openai_create_image_variation",
+ "sd_text_to_image",
"tripadvisor_search",
"tripadvisor_search_location_photos",
"tripadvisor_search_location_details",
diff --git a/src/agentscope/service/multi_modality/stablediffusion_services.py b/src/agentscope/service/multi_modality/stablediffusion_services.py
new file mode 100644
index 000000000..4547aa115
--- /dev/null
+++ b/src/agentscope/service/multi_modality/stablediffusion_services.py
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+"""Use StableDiffusion-webui API to generate images
+"""
+import os
+from typing import Optional
+
+from ...models import StableDiffusionImageSynthesisWrapper
+
+from ...manager import FileManager
+from ..service_response import (
+ ServiceResponse,
+ ServiceExecStatus,
+)
+from ...utils.common import (
+ _get_timestamp,
+ _generate_random_code,
+)
+from ...constants import _DEFAULT_IMAGE_NAME
+
+
+def sd_text_to_image(
+ prompt: str,
+ n_iter: int = 1,
+ width: int = 1024,
+ height: int = 1024,
+ options: dict = None,
+ baseurl: str = None,
+ save_dir: Optional[str] = None,
+) -> ServiceResponse:
+ """Generate image(s) based on the given prompt, and return image url(s).
+
+ Args:
+ prompt (`str`):
+ The text prompt to generate image.
+ n (`int`, defaults to `1`):
+ The number of images to generate.
+ width (`int`, defaults to `1024`):
+ Width of the image.
+ height (`int`, defaults to `1024`):
+ Height of the image.
+ options (`dict`, defaults to `None`):
+ The options to override the sd-webui default settings.
+ If not specified, will use the default settings.
+ baseurl (`str`, defaults to `None`):
+ The base url of the sd-webui.
+ save_dir (`Optional[str]`, defaults to 'None'):
+ The directory to save the generated images. If not specified,
+ will return the web urls.
+
+ Returns:
+ ServiceResponse:
+ A dictionary with two variables: `status` and`content`.
+ If `status` is ServiceExecStatus.SUCCESS,
+ the `content` is a dict with key 'fig_paths" and
+ value is a list of the paths to the generated images.
+
+ Example:
+
+ .. code-block:: python
+
+ prompt = "A beautiful sunset in the mountains"
+ print(sd_text_to_image(prompt, 2))
+
+ > {
+ > 'status': 'SUCCESS',
+ > 'content': {'image_urls': ['IMAGE_URL1', 'IMAGE_URL2']}
+ > }
+
+ """
+ text2img = StableDiffusionImageSynthesisWrapper(
+ config_name="sd-text-to-image-service", # Just a placeholder
+ baseurl=baseurl,
+ )
+ try:
+ kwargs = {"n_iter": n_iter, "width": width, "height": height}
+ if options:
+ kwargs["override_settings"] = options
+
+ res = text2img(prompt=prompt, save_local=False, **kwargs)
+ images = res.image_urls
+
+ # save images to save_dir
+ if images is not None:
+ if save_dir:
+ os.makedirs(save_dir, exist_ok=True)
+ urls_local = []
+ # Obtain the image file names in the url
+ for image in images:
+ image_name = _DEFAULT_IMAGE_NAME.format(
+ _get_timestamp(
+ "%Y%m%d-%H%M%S",
+ ),
+ _generate_random_code(),
+ )
+ image_path = os.path.abspath(
+ os.path.join(save_dir, image_name),
+ )
+ # Download the image
+ image.save(image_path)
+ urls_local.append(image_path)
+ return ServiceResponse(
+ ServiceExecStatus.SUCCESS,
+ {"image_urls": urls_local},
+ )
+ else:
+ # Return the default urls
+ file_manager = FileManager.get_instance()
+ urls = [file_manager.save_image(_) for _ in images]
+ return ServiceResponse(
+ ServiceExecStatus.SUCCESS,
+ {"image_urls": urls},
+ )
+ else:
+ return ServiceResponse(
+ ServiceExecStatus.ERROR,
+ "Error: Failed to generate images",
+ )
+ except Exception as e:
+ return ServiceResponse(
+ ServiceExecStatus.ERROR,
+ str(e),
+ )
diff --git a/tests/model_test.py b/tests/model_test.py
index dafc02c79..37682cc88 100644
--- a/tests/model_test.py
+++ b/tests/model_test.py
@@ -28,6 +28,7 @@
OpenAIChatWrapper,
PostAPIChatWrapper,
AnthropicChatWrapper,
+ StableDiffusionImageSynthesisWrapper,
)
@@ -79,6 +80,7 @@ def test_build_in_model_wrapper_classes(self) -> None:
"litellm_chat": LiteLLMChatWrapper,
"yi_chat": YiChatWrapper,
"anthropic_chat": AnthropicChatWrapper,
+ "sd_txt2img": StableDiffusionImageSynthesisWrapper,
},
)