geekan · geekan · Feb 6, 2024 · Feb 5, 2024 · Feb 5, 2024 · Feb 5, 2024
diff --git a/examples/crawl_webpage.py b/examples/crawl_webpage.py
@@ -10,7 +10,7 @@
 
 async def main():
     prompt = """Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
-    and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key data*"""
+    and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*"""
     ci = CodeInterpreter(goal=prompt, use_tools=True)
 
     await ci.run(prompt)

diff --git a/metagpt/config2.py b/metagpt/config2.py
@@ -75,8 +75,6 @@ class Config(CLIParams, YamlModel):
     iflytek_api_key: str = ""
     azure_tts_subscription_key: str = ""
     azure_tts_region: str = ""
-    openai_vision_model: str = "gpt-4-vision-preview"
-    vision_max_tokens: int = 4096
 
     @classmethod
     def from_home(cls, path):

diff --git a/metagpt/tools/libs/gpt_v_generator.py b/metagpt/tools/libs/gpt_v_generator.py
@@ -5,15 +5,13 @@
 @Author  : mannaandpoem
 @File    : gpt_v_generator.py
 """
-import base64
 import os
 from pathlib import Path
 
-import requests
-
 from metagpt.const import DEFAULT_WORKSPACE_ROOT
 from metagpt.tools.tool_registry import register_tool
 from metagpt.tools.tool_type import ToolType
+from metagpt.utils.common import encode_image
 
 ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX, please generate layout information for this image:
 
@@ -43,86 +41,41 @@ class GPTvGenerator:
     def __init__(self):
         """Initialize GPTvGenerator class with default values from the configuration."""
         from metagpt.config2 import config
+        from metagpt.llm import LLM
 
-        self.api_key = config.llm.api_key
-        self.api_base = config.llm.base_url
-        self.model = config.openai_vision_model
-        self.max_tokens = config.vision_max_tokens
+        self.llm = LLM(llm_config=config.get_openai_llm())
+        self.llm.model = "gpt-4-vision-preview"
 
-    def analyze_layout(self, image_path):
-        """Analyze the layout of the given image and return the result.
+    async def analyze_layout(self, image_path: Path) -> str:
+        """Asynchronously analyze the layout of the given image and return the result.
 
         This is a helper method to generate a layout description based on the image.
 
         Args:
-            image_path (str): Path of the image to analyze.
+            image_path (Path): Path of the image to analyze.
 
         Returns:
             str: The layout analysis result.
         """
-        return self.get_result(image_path, ANALYZE_LAYOUT_PROMPT)
+        return await self.llm.aask(msg=ANALYZE_LAYOUT_PROMPT, images=[encode_image(image_path)])
 
-    def generate_webpages(self, image_path):
-        """Generate webpages including all code (HTML, CSS, and JavaScript) in one go based on the image.
+    async def generate_webpages(self, image_path: str) -> str:
+        """Asynchronously generate webpages including all code (HTML, CSS, and JavaScript) in one go based on the image.
 
         Args:
             image_path (str): The path of the image file.
 
         Returns:
             str: Generated webpages content.
         """
-        layout = self.analyze_layout(image_path)
+        if isinstance(image_path, str):
+            image_path = Path(image_path)
+        layout = await self.analyze_layout(image_path)
         prompt = GENERATE_PROMPT + "\n\n # Context\n The layout information of the sketch image is: \n" + layout
-        result = self.get_result(image_path, prompt)
-        return result
-
-    def get_result(self, image_path, prompt):
-        """Get the result from the vision model based on the given image path and prompt.
-
-        Args:
-            image_path (str): Path of the image to analyze.
-            prompt (str): Prompt to use for the analysis.
-
-        Returns:
-            str: The model's response as a string.
-        """
-        base64_image = self.encode_image(image_path)
-        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
-        payload = {
-            "model": self.model,
-            "messages": [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "text", "text": prompt},
-                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
-                    ],
-                }
-            ],
-            "max_tokens": self.max_tokens,
-        }
-        response = requests.post(f"{self.api_base}/chat/completions", headers=headers, json=payload)
-
-        if response.status_code != 200:
-            raise ValueError(f"Request failed with status {response.status_code}, {response.text}")
-        else:
-            return response.json()["choices"][0]["message"]["content"]
+        return await self.llm.aask(msg=prompt, images=[encode_image(image_path)])
 
     @staticmethod
-    def encode_image(image_path):
-        """Encode the image at the given path to a base64 string.
-
-        Args:
-            image_path (str): Path of the image to encode.
-
-        Returns:
-            str: The base64 encoded string of the image.
-        """
-        with open(image_path, "rb") as image_file:
-            return base64.b64encode(image_file.read()).decode("utf-8")
-
-    @staticmethod
-    def save_webpages(image_path, webpages) -> Path:
+    def save_webpages(image_path: str, webpages: str) -> Path:
         """Save webpages including all code (HTML, CSS, and JavaScript) at once.
 
         Args:
@@ -132,35 +85,29 @@ def save_webpages(image_path, webpages) -> Path:
         Returns:
             Path: The path of the saved webpages.
         """
-        # 在workspace目录下，创建一个名为下webpages的文件夹，用于存储html、css和js文件
+        # Create a folder called webpages in the workspace directory to store HTML, CSS, and JavaScript files
         webpages_path = DEFAULT_WORKSPACE_ROOT / "webpages" / Path(image_path).stem
         os.makedirs(webpages_path, exist_ok=True)
 
         index_path = webpages_path / "index.html"
-
         try:
             index = webpages.split("```html")[1].split("```")[0]
-        except IndexError:
-            index = "No html code found in the result, please check your image and try again." + "\n" + webpages
-
-        try:
+            style_path = None
             if "styles.css" in index:
                 style_path = webpages_path / "styles.css"
             elif "style.css" in index:
                 style_path = webpages_path / "style.css"
-            else:
-                style_path = None
             style = webpages.split("```css")[1].split("```")[0] if style_path else ""
 
+            js_path = None
             if "scripts.js" in index:
                 js_path = webpages_path / "scripts.js"
             elif "script.js" in index:
                 js_path = webpages_path / "script.js"
-            else:
-                js_path = None
+
             js = webpages.split("```javascript")[1].split("```")[0] if js_path else ""
         except IndexError:
-            raise ValueError("No css or js code found in the result, please check your image and try again.")
+            raise ValueError(f"No html or css or js code found in the result. \nWebpages: {webpages}")
 
         try:
             with open(index_path, "w", encoding="utf-8") as f:

diff --git a/metagpt/tools/libs/web_scraping.py b/metagpt/tools/libs/web_scraping.py
@@ -4,19 +4,18 @@
 
 
 @register_tool(tool_type=ToolType.WEBSCRAPING.type_name)
-async def scrape_web_playwright(url, *urls):
+async def scrape_web_playwright(url):
     """
-    Scrape and save the HTML structure and inner text content of a web page using Playwright.
+    Asynchronously Scrape and save the HTML structure and inner text content of a web page using Playwright.
 
     Args:
         url (str): The main URL to fetch inner text from.
-        *urls (str): Additional URLs to fetch inner text from.
 
     Returns:
-        (dict): The inner text content and html structure of the web page, key are : 'inner_text', 'html'.
+        dict: The inner text content and html structure of the web page, keys are 'inner_text', 'html'.
     """
     # Create a PlaywrightWrapper instance for the Chromium browser
-    web = await PlaywrightWrapper().run(url, *urls)
+    web = await PlaywrightWrapper().run(url)
 
     # Return the inner text content of the web page
     return {"inner_text": web.inner_text.strip(), "html": web.html.strip()}
diff --git a/metagpt/tools/tool_convert.py b/metagpt/tools/tool_convert.py
@@ -15,7 +15,8 @@ def convert_code_to_tool_schema(obj, include: list[str] = []):
             # method_doc = inspect.getdoc(method)
             method_doc = get_class_method_docstring(obj, name)
             if method_doc:
-                schema["methods"][name] = docstring_to_schema(method_doc)
+                function_type = "function" if not inspect.iscoroutinefunction(method) else "async_function"
+                schema["methods"][name] = {"type": function_type, **docstring_to_schema(method_doc)}
 
     elif inspect.isfunction(obj):
         schema = {

diff --git a/tests/metagpt/tools/libs/test_gpt_v_generator.py b/tests/metagpt/tools/libs/test_gpt_v_generator.py
@@ -5,36 +5,81 @@
 @Author  : mannaandpoem
 @File    : test_gpt_v_generator.py
 """
+from pathlib import Path
+
 import pytest
 
 from metagpt import logs
+from metagpt.const import METAGPT_ROOT
 from metagpt.tools.libs.gpt_v_generator import GPTvGenerator
 
 
 @pytest.fixture
-def mock_webpages(mocker):
+def mock_webpage_filename_with_styles_and_scripts(mocker):
     mock_data = """```html\n<html>\n<script src="scripts.js"></script>
-<link rel="stylesheet" href="styles.css(">\n</html>\n```\n
-```css\n.class { ... }\n```\n
-```javascript\nfunction() { ... }\n```\n"""
-    mocker.patch("metagpt.tools.libs.gpt_v_generator.GPTvGenerator.generate_webpages", return_value=mock_data)
+<link rel="stylesheet" href="styles.css">\n</html>\n```\n
+```css\n/* styles.css */\n```\n
+```javascript\n// scripts.js\n```\n"""
+    mocker.patch("metagpt.provider.base_llm.BaseLLM.aask", return_value=mock_data)
+    return mocker
+
+
+@pytest.fixture
+def mock_webpage_filename_with_style_and_script(mocker):
+    mock_data = """```html\n<html>\n<script src="script.js"></script>
+<link rel="stylesheet" href="style.css">\n</html>\n```\n
+```css\n/* style.css */\n```\n
+```javascript\n// script.js\n```\n"""
+    mocker.patch("metagpt.provider.base_llm.BaseLLM.aask", return_value=mock_data)
     return mocker
 
 
-def test_vision_generate_webpages(mock_webpages):
-    image_path = "image.png"
+@pytest.fixture
+def mock_image_layout(mocker):
+    image_layout = "The layout information of the sketch image is ..."
+    mocker.patch("metagpt.provider.base_llm.BaseLLM.aask", return_value=image_layout)
+    return mocker
+
+
+@pytest.fixture
+def image_path():
+    return f"{METAGPT_ROOT}/docs/resources/workspace/content_rec_sys/resources/competitive_analysis.png"
+
+
+@pytest.mark.asyncio
+async def test_generate_webpages(mock_webpage_filename_with_styles_and_scripts, image_path):
     generator = GPTvGenerator()
-    rsp = generator.generate_webpages(image_path=image_path)
+    rsp = await generator.generate_webpages(image_path=image_path)
     logs.logger.info(rsp)
     assert "html" in rsp
     assert "css" in rsp
     assert "javascript" in rsp
 
 
-def test_save_webpages(mock_webpages):
-    image_path = "image.png"
+@pytest.mark.asyncio
+async def test_save_webpages_with_styles_and_scripts(mock_webpage_filename_with_styles_and_scripts, image_path):
     generator = GPTvGenerator()
-    webpages = generator.generate_webpages(image_path)
+    webpages = await generator.generate_webpages(image_path)
     webpages_dir = generator.save_webpages(image_path=image_path, webpages=webpages)
     logs.logger.info(webpages_dir)
     assert webpages_dir.exists()
+
+
+@pytest.mark.asyncio
+async def test_save_webpages_with_style_and_script(mock_webpage_filename_with_style_and_script, image_path):
+    generator = GPTvGenerator()
+    webpages = await generator.generate_webpages(image_path)
+    webpages_dir = generator.save_webpages(image_path=image_path, webpages=webpages)
+    logs.logger.info(webpages_dir)
+    assert webpages_dir.exists()
+
+
+@pytest.mark.asyncio
+async def test_analyze_layout(mock_image_layout, image_path):
+    layout = await GPTvGenerator().analyze_layout(Path(image_path))
+    logs.logger.info(layout)
+    assert layout
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-s"])
diff --git a/tests/metagpt/utils/test_save_code.py b/tests/metagpt/utils/test_save_code.py
@@ -41,4 +41,4 @@ async def test_save_code_file_notebook():
     notebook = nbformat.read(file_path, as_version=4)
     assert len(notebook.cells) > 0, "Notebook should have at least one cell"
     first_cell_source = notebook.cells[0].source
-    assert "print('Hello, World!')" in first_cell_source, "Notebook cell content does not match"
+    assert "print" in first_cell_source, "Notebook cell content does not match"