From 6015c1661824e2e59a611fe4f3248dcd40294b9d Mon Sep 17 00:00:00 2001
From: mannaandpoem <1580466765@qq.com>
Date: Mon, 5 Feb 2024 21:48:59 +0800
Subject: [PATCH 1/5] remove get_result method and improve gpt_v_generator.py
and test_gpt_v_generator.py.
---
metagpt/tools/libs/gpt_v_generator.py | 93 ++++---------------
metagpt/tools/tool_convert.py | 3 +-
.../tools/libs/test_gpt_v_generator.py | 77 ++++++++++++---
3 files changed, 88 insertions(+), 85 deletions(-)
diff --git a/metagpt/tools/libs/gpt_v_generator.py b/metagpt/tools/libs/gpt_v_generator.py
index 6953300d8..b1e8317ed 100644
--- a/metagpt/tools/libs/gpt_v_generator.py
+++ b/metagpt/tools/libs/gpt_v_generator.py
@@ -5,15 +5,13 @@
@Author : mannaandpoem
@File : gpt_v_generator.py
"""
-import base64
import os
from pathlib import Path
-import requests
-
from metagpt.const import DEFAULT_WORKSPACE_ROOT
from metagpt.tools.tool_registry import register_tool
from metagpt.tools.tool_type import ToolType
+from metagpt.utils.common import encode_image
ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX, please generate layout information for this image:
@@ -43,27 +41,26 @@ class GPTvGenerator:
def __init__(self):
"""Initialize GPTvGenerator class with default values from the configuration."""
from metagpt.config2 import config
+ from metagpt.llm import LLM
- self.api_key = config.llm.api_key
- self.api_base = config.llm.base_url
- self.model = config.openai_vision_model
- self.max_tokens = config.vision_max_tokens
+ self.llm = LLM(llm_config=config.get_openai_llm())
+ self.llm.model = "gpt-4-vision-preview"
- def analyze_layout(self, image_path):
- """Analyze the layout of the given image and return the result.
+ async def analyze_layout(self, image_path: Path) -> str:
+ """Asynchronously analyze the layout of the given image and return the result.
This is a helper method to generate a layout description based on the image.
Args:
- image_path (str): Path of the image to analyze.
+ image_path (Path): Path of the image to analyze.
Returns:
str: The layout analysis result.
"""
- return self.get_result(image_path, ANALYZE_LAYOUT_PROMPT)
+ return await self.llm.aask(msg=ANALYZE_LAYOUT_PROMPT, images=[encode_image(image_path)])
- def generate_webpages(self, image_path):
- """Generate webpages including all code (HTML, CSS, and JavaScript) in one go based on the image.
+ async def generate_webpages(self, image_path: str) -> str:
+ """Asynchronously generate webpages including all code (HTML, CSS, and JavaScript) in one go based on the image.
Args:
image_path (str): The path of the image file.
@@ -71,58 +68,14 @@ def generate_webpages(self, image_path):
Returns:
str: Generated webpages content.
"""
- layout = self.analyze_layout(image_path)
+ if isinstance(image_path, str):
+ image_path = Path(image_path)
+ layout = await self.analyze_layout(image_path)
prompt = GENERATE_PROMPT + "\n\n # Context\n The layout information of the sketch image is: \n" + layout
- result = self.get_result(image_path, prompt)
- return result
-
- def get_result(self, image_path, prompt):
- """Get the result from the vision model based on the given image path and prompt.
-
- Args:
- image_path (str): Path of the image to analyze.
- prompt (str): Prompt to use for the analysis.
-
- Returns:
- str: The model's response as a string.
- """
- base64_image = self.encode_image(image_path)
- headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
- payload = {
- "model": self.model,
- "messages": [
- {
- "role": "user",
- "content": [
- {"type": "text", "text": prompt},
- {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
- ],
- }
- ],
- "max_tokens": self.max_tokens,
- }
- response = requests.post(f"{self.api_base}/chat/completions", headers=headers, json=payload)
-
- if response.status_code != 200:
- raise ValueError(f"Request failed with status {response.status_code}, {response.text}")
- else:
- return response.json()["choices"][0]["message"]["content"]
+ return await self.llm.aask(msg=prompt, images=[encode_image(image_path)])
@staticmethod
- def encode_image(image_path):
- """Encode the image at the given path to a base64 string.
-
- Args:
- image_path (str): Path of the image to encode.
-
- Returns:
- str: The base64 encoded string of the image.
- """
- with open(image_path, "rb") as image_file:
- return base64.b64encode(image_file.read()).decode("utf-8")
-
- @staticmethod
- def save_webpages(image_path, webpages) -> Path:
+ def save_webpages(image_path: str, webpages: str) -> Path:
"""Save webpages including all code (HTML, CSS, and JavaScript) at once.
Args:
@@ -132,35 +85,29 @@ def save_webpages(image_path, webpages) -> Path:
Returns:
Path: The path of the saved webpages.
"""
- # 在workspace目录下,创建一个名为下webpages的文件夹,用于存储html、css和js文件
+ # Create a folder called webpages in the workspace directory to store HTML, CSS, and JavaScript files
webpages_path = DEFAULT_WORKSPACE_ROOT / "webpages" / Path(image_path).stem
os.makedirs(webpages_path, exist_ok=True)
index_path = webpages_path / "index.html"
-
try:
index = webpages.split("```html")[1].split("```")[0]
- except IndexError:
- index = "No html code found in the result, please check your image and try again." + "\n" + webpages
-
- try:
+ style_path = None
if "styles.css" in index:
style_path = webpages_path / "styles.css"
elif "style.css" in index:
style_path = webpages_path / "style.css"
- else:
- style_path = None
style = webpages.split("```css")[1].split("```")[0] if style_path else ""
+ js_path = None
if "scripts.js" in index:
js_path = webpages_path / "scripts.js"
elif "script.js" in index:
js_path = webpages_path / "script.js"
- else:
- js_path = None
+
js = webpages.split("```javascript")[1].split("```")[0] if js_path else ""
except IndexError:
- raise ValueError("No css or js code found in the result, please check your image and try again.")
+ raise ValueError(f"No html or css or js code found in the result. \nWebpages: {webpages}")
try:
with open(index_path, "w", encoding="utf-8") as f:
diff --git a/metagpt/tools/tool_convert.py b/metagpt/tools/tool_convert.py
index 417a938e1..fc7cb9a15 100644
--- a/metagpt/tools/tool_convert.py
+++ b/metagpt/tools/tool_convert.py
@@ -15,7 +15,8 @@ def convert_code_to_tool_schema(obj, include: list[str] = []):
# method_doc = inspect.getdoc(method)
method_doc = get_class_method_docstring(obj, name)
if method_doc:
- schema["methods"][name] = docstring_to_schema(method_doc)
+ function_type = "function" if not inspect.iscoroutinefunction(method) else "async_function"
+ schema["methods"][name] = {"type": function_type, **docstring_to_schema(method_doc)}
elif inspect.isfunction(obj):
schema = {
diff --git a/tests/metagpt/tools/libs/test_gpt_v_generator.py b/tests/metagpt/tools/libs/test_gpt_v_generator.py
index d686d38ba..1b8b756e1 100644
--- a/tests/metagpt/tools/libs/test_gpt_v_generator.py
+++ b/tests/metagpt/tools/libs/test_gpt_v_generator.py
@@ -5,36 +5,91 @@
@Author : mannaandpoem
@File : test_gpt_v_generator.py
"""
+from pathlib import Path
+
import pytest
from metagpt import logs
+from metagpt.const import METAGPT_ROOT
from metagpt.tools.libs.gpt_v_generator import GPTvGenerator
@pytest.fixture
-def mock_webpages(mocker):
+def mock_webpage_filename_with_styles_and_scripts(mocker):
mock_data = """```html\n\n
-\n\n```\n
-```css\n.class { ... }\n```\n
-```javascript\nfunction() { ... }\n```\n"""
- mocker.patch("metagpt.tools.libs.gpt_v_generator.GPTvGenerator.generate_webpages", return_value=mock_data)
+\n