From 6015c1661824e2e59a611fe4f3248dcd40294b9d Mon Sep 17 00:00:00 2001 From: mannaandpoem <1580466765@qq.com> Date: Mon, 5 Feb 2024 21:48:59 +0800 Subject: [PATCH 1/5] remove get_result method and improve gpt_v_generator.py and test_gpt_v_generator.py. --- metagpt/tools/libs/gpt_v_generator.py | 93 ++++--------------- metagpt/tools/tool_convert.py | 3 +- .../tools/libs/test_gpt_v_generator.py | 77 ++++++++++++--- 3 files changed, 88 insertions(+), 85 deletions(-) diff --git a/metagpt/tools/libs/gpt_v_generator.py b/metagpt/tools/libs/gpt_v_generator.py index 6953300d8..b1e8317ed 100644 --- a/metagpt/tools/libs/gpt_v_generator.py +++ b/metagpt/tools/libs/gpt_v_generator.py @@ -5,15 +5,13 @@ @Author : mannaandpoem @File : gpt_v_generator.py """ -import base64 import os from pathlib import Path -import requests - from metagpt.const import DEFAULT_WORKSPACE_ROOT from metagpt.tools.tool_registry import register_tool from metagpt.tools.tool_type import ToolType +from metagpt.utils.common import encode_image ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX, please generate layout information for this image: @@ -43,27 +41,26 @@ class GPTvGenerator: def __init__(self): """Initialize GPTvGenerator class with default values from the configuration.""" from metagpt.config2 import config + from metagpt.llm import LLM - self.api_key = config.llm.api_key - self.api_base = config.llm.base_url - self.model = config.openai_vision_model - self.max_tokens = config.vision_max_tokens + self.llm = LLM(llm_config=config.get_openai_llm()) + self.llm.model = "gpt-4-vision-preview" - def analyze_layout(self, image_path): - """Analyze the layout of the given image and return the result. + async def analyze_layout(self, image_path: Path) -> str: + """Asynchronously analyze the layout of the given image and return the result. This is a helper method to generate a layout description based on the image. Args: - image_path (str): Path of the image to analyze. + image_path (Path): Path of the image to analyze. Returns: str: The layout analysis result. """ - return self.get_result(image_path, ANALYZE_LAYOUT_PROMPT) + return await self.llm.aask(msg=ANALYZE_LAYOUT_PROMPT, images=[encode_image(image_path)]) - def generate_webpages(self, image_path): - """Generate webpages including all code (HTML, CSS, and JavaScript) in one go based on the image. + async def generate_webpages(self, image_path: str) -> str: + """Asynchronously generate webpages including all code (HTML, CSS, and JavaScript) in one go based on the image. Args: image_path (str): The path of the image file. @@ -71,58 +68,14 @@ def generate_webpages(self, image_path): Returns: str: Generated webpages content. """ - layout = self.analyze_layout(image_path) + if isinstance(image_path, str): + image_path = Path(image_path) + layout = await self.analyze_layout(image_path) prompt = GENERATE_PROMPT + "\n\n # Context\n The layout information of the sketch image is: \n" + layout - result = self.get_result(image_path, prompt) - return result - - def get_result(self, image_path, prompt): - """Get the result from the vision model based on the given image path and prompt. - - Args: - image_path (str): Path of the image to analyze. - prompt (str): Prompt to use for the analysis. - - Returns: - str: The model's response as a string. - """ - base64_image = self.encode_image(image_path) - headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"} - payload = { - "model": self.model, - "messages": [ - { - "role": "user", - "content": [ - {"type": "text", "text": prompt}, - {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}, - ], - } - ], - "max_tokens": self.max_tokens, - } - response = requests.post(f"{self.api_base}/chat/completions", headers=headers, json=payload) - - if response.status_code != 200: - raise ValueError(f"Request failed with status {response.status_code}, {response.text}") - else: - return response.json()["choices"][0]["message"]["content"] + return await self.llm.aask(msg=prompt, images=[encode_image(image_path)]) @staticmethod - def encode_image(image_path): - """Encode the image at the given path to a base64 string. - - Args: - image_path (str): Path of the image to encode. - - Returns: - str: The base64 encoded string of the image. - """ - with open(image_path, "rb") as image_file: - return base64.b64encode(image_file.read()).decode("utf-8") - - @staticmethod - def save_webpages(image_path, webpages) -> Path: + def save_webpages(image_path: str, webpages: str) -> Path: """Save webpages including all code (HTML, CSS, and JavaScript) at once. Args: @@ -132,35 +85,29 @@ def save_webpages(image_path, webpages) -> Path: Returns: Path: The path of the saved webpages. """ - # 在workspace目录下,创建一个名为下webpages的文件夹,用于存储html、css和js文件 + # Create a folder called webpages in the workspace directory to store HTML, CSS, and JavaScript files webpages_path = DEFAULT_WORKSPACE_ROOT / "webpages" / Path(image_path).stem os.makedirs(webpages_path, exist_ok=True) index_path = webpages_path / "index.html" - try: index = webpages.split("```html")[1].split("```")[0] - except IndexError: - index = "No html code found in the result, please check your image and try again." + "\n" + webpages - - try: + style_path = None if "styles.css" in index: style_path = webpages_path / "styles.css" elif "style.css" in index: style_path = webpages_path / "style.css" - else: - style_path = None style = webpages.split("```css")[1].split("```")[0] if style_path else "" + js_path = None if "scripts.js" in index: js_path = webpages_path / "scripts.js" elif "script.js" in index: js_path = webpages_path / "script.js" - else: - js_path = None + js = webpages.split("```javascript")[1].split("```")[0] if js_path else "" except IndexError: - raise ValueError("No css or js code found in the result, please check your image and try again.") + raise ValueError(f"No html or css or js code found in the result. \nWebpages: {webpages}") try: with open(index_path, "w", encoding="utf-8") as f: diff --git a/metagpt/tools/tool_convert.py b/metagpt/tools/tool_convert.py index 417a938e1..fc7cb9a15 100644 --- a/metagpt/tools/tool_convert.py +++ b/metagpt/tools/tool_convert.py @@ -15,7 +15,8 @@ def convert_code_to_tool_schema(obj, include: list[str] = []): # method_doc = inspect.getdoc(method) method_doc = get_class_method_docstring(obj, name) if method_doc: - schema["methods"][name] = docstring_to_schema(method_doc) + function_type = "function" if not inspect.iscoroutinefunction(method) else "async_function" + schema["methods"][name] = {"type": function_type, **docstring_to_schema(method_doc)} elif inspect.isfunction(obj): schema = { diff --git a/tests/metagpt/tools/libs/test_gpt_v_generator.py b/tests/metagpt/tools/libs/test_gpt_v_generator.py index d686d38ba..1b8b756e1 100644 --- a/tests/metagpt/tools/libs/test_gpt_v_generator.py +++ b/tests/metagpt/tools/libs/test_gpt_v_generator.py @@ -5,36 +5,91 @@ @Author : mannaandpoem @File : test_gpt_v_generator.py """ +from pathlib import Path + import pytest from metagpt import logs +from metagpt.const import METAGPT_ROOT from metagpt.tools.libs.gpt_v_generator import GPTvGenerator @pytest.fixture -def mock_webpages(mocker): +def mock_webpage_filename_with_styles_and_scripts(mocker): mock_data = """```html\n\n -\n\n```\n -```css\n.class { ... }\n```\n -```javascript\nfunction() { ... }\n```\n""" - mocker.patch("metagpt.tools.libs.gpt_v_generator.GPTvGenerator.generate_webpages", return_value=mock_data) +\n\n```\n +```css\n/* styles.css */\n```\n +```javascript\n// scripts.js\n```\n""" + mocker.patch("metagpt.provider.base_llm.BaseLLM.aask", return_value=mock_data) + return mocker + + +@pytest.fixture +def mock_webpage_filename_with_style_and_script(mocker): + mock_data = """```html\n\n +\n\n```\n +```css\n/* style.css */\n```\n +```javascript\n// script.js\n```\n""" + mocker.patch("metagpt.provider.base_llm.BaseLLM.aask", return_value=mock_data) return mocker -def test_vision_generate_webpages(mock_webpages): - image_path = "image.png" +@pytest.fixture +def mock_image_layout(mocker): + image_layout = "The layout information of the sketch image is ..." + mocker.patch("metagpt.provider.base_llm.BaseLLM.aask", return_value=image_layout) + return mocker + + +@pytest.fixture +def image_path(): + return f"{METAGPT_ROOT}/docs/resources/workspace/content_rec_sys/resources/competitive_analysis.png" + + +@pytest.mark.asyncio +async def test_generate_webpages_with_suffix_s(mock_webpage_filename_with_styles_and_scripts, image_path): generator = GPTvGenerator() - rsp = generator.generate_webpages(image_path=image_path) + rsp = await generator.generate_webpages(image_path=image_path) logs.logger.info(rsp) assert "html" in rsp assert "css" in rsp assert "javascript" in rsp -def test_save_webpages(mock_webpages): - image_path = "image.png" +@pytest.mark.asyncio +async def test_generate_webpages_without_suffix_s(mock_webpage_filename_with_style_and_script, image_path): generator = GPTvGenerator() - webpages = generator.generate_webpages(image_path) + rsp = await generator.generate_webpages(image_path=image_path) + logs.logger.info(rsp) + assert "html" in rsp + assert "css" in rsp + assert "javascript" in rsp + + +@pytest.mark.asyncio +async def test_save_webpages_with_suffix_s(mock_webpage_filename_with_styles_and_scripts, image_path): + generator = GPTvGenerator() + webpages = await generator.generate_webpages(image_path) + webpages_dir = generator.save_webpages(image_path=image_path, webpages=webpages) + logs.logger.info(webpages_dir) + assert webpages_dir.exists() + + +@pytest.mark.asyncio +async def test_save_webpages_without_suffix_s(mock_webpage_filename_with_style_and_script, image_path): + generator = GPTvGenerator() + webpages = await generator.generate_webpages(image_path) webpages_dir = generator.save_webpages(image_path=image_path, webpages=webpages) logs.logger.info(webpages_dir) assert webpages_dir.exists() + + +@pytest.mark.asyncio +async def test_analyze_layout(mock_image_layout, image_path): + layout = await GPTvGenerator().analyze_layout(Path(image_path)) + logs.logger.info(layout) + assert layout + + +if __name__ == "__main__": + pytest.main([__file__, "-s"]) From 5abde78767cf3861e74be1fce3dc1f4cd1fd8c93 Mon Sep 17 00:00:00 2001 From: mannaandpoem <1580466765@qq.com> Date: Mon, 5 Feb 2024 21:54:09 +0800 Subject: [PATCH 2/5] remove get_result method and improve gpt_v_generator.py and test_gpt_v_generator.py. --- tests/metagpt/tools/libs/test_gpt_v_generator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/metagpt/tools/libs/test_gpt_v_generator.py b/tests/metagpt/tools/libs/test_gpt_v_generator.py index 1b8b756e1..76ada8622 100644 --- a/tests/metagpt/tools/libs/test_gpt_v_generator.py +++ b/tests/metagpt/tools/libs/test_gpt_v_generator.py @@ -47,7 +47,7 @@ def image_path(): @pytest.mark.asyncio -async def test_generate_webpages_with_suffix_s(mock_webpage_filename_with_styles_and_scripts, image_path): +async def test_generate_webpages_with_styles_and_scripts(mock_webpage_filename_with_styles_and_scripts, image_path): generator = GPTvGenerator() rsp = await generator.generate_webpages(image_path=image_path) logs.logger.info(rsp) @@ -57,7 +57,7 @@ async def test_generate_webpages_with_suffix_s(mock_webpage_filename_with_styles @pytest.mark.asyncio -async def test_generate_webpages_without_suffix_s(mock_webpage_filename_with_style_and_script, image_path): +async def test_generate_webpages_with_style_and_script(mock_webpage_filename_with_style_and_script, image_path): generator = GPTvGenerator() rsp = await generator.generate_webpages(image_path=image_path) logs.logger.info(rsp) @@ -67,7 +67,7 @@ async def test_generate_webpages_without_suffix_s(mock_webpage_filename_with_sty @pytest.mark.asyncio -async def test_save_webpages_with_suffix_s(mock_webpage_filename_with_styles_and_scripts, image_path): +async def test_save_webpages_with_styles_and_scripts(mock_webpage_filename_with_styles_and_scripts, image_path): generator = GPTvGenerator() webpages = await generator.generate_webpages(image_path) webpages_dir = generator.save_webpages(image_path=image_path, webpages=webpages) @@ -76,7 +76,7 @@ async def test_save_webpages_with_suffix_s(mock_webpage_filename_with_styles_and @pytest.mark.asyncio -async def test_save_webpages_without_suffix_s(mock_webpage_filename_with_style_and_script, image_path): +async def test_save_webpages_with_style_and_script(mock_webpage_filename_with_style_and_script, image_path): generator = GPTvGenerator() webpages = await generator.generate_webpages(image_path) webpages_dir = generator.save_webpages(image_path=image_path, webpages=webpages) From 9b72370cbebda21d0ad120ef0f42bc1199cb7922 Mon Sep 17 00:00:00 2001 From: yzlin Date: Mon, 5 Feb 2024 22:15:47 +0800 Subject: [PATCH 3/5] update webscraping tool --- examples/crawl_webpage.py | 2 +- metagpt/tools/libs/web_scraping.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/examples/crawl_webpage.py b/examples/crawl_webpage.py index 7dcbf7993..2db9e407b 100644 --- a/examples/crawl_webpage.py +++ b/examples/crawl_webpage.py @@ -10,7 +10,7 @@ async def main(): prompt = """Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/, - and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key data*""" + and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*""" ci = CodeInterpreter(goal=prompt, use_tools=True) await ci.run(prompt) diff --git a/metagpt/tools/libs/web_scraping.py b/metagpt/tools/libs/web_scraping.py index 6fd3b9435..d01e69d09 100644 --- a/metagpt/tools/libs/web_scraping.py +++ b/metagpt/tools/libs/web_scraping.py @@ -4,19 +4,18 @@ @register_tool(tool_type=ToolType.WEBSCRAPING.type_name) -async def scrape_web_playwright(url, *urls): +async def scrape_web_playwright(url): """ - Scrape and save the HTML structure and inner text content of a web page using Playwright. + Asynchronously Scrape and save the HTML structure and inner text content of a web page using Playwright. Args: url (str): The main URL to fetch inner text from. - *urls (str): Additional URLs to fetch inner text from. Returns: - (dict): The inner text content and html structure of the web page, key are : 'inner_text', 'html'. + dict: The inner text content and html structure of the web page, keys are 'inner_text', 'html'. """ # Create a PlaywrightWrapper instance for the Chromium browser - web = await PlaywrightWrapper().run(url, *urls) + web = await PlaywrightWrapper().run(url) # Return the inner text content of the web page return {"inner_text": web.inner_text.strip(), "html": web.html.strip()} From 675b96b0f5c39ce008c00f278b7eb5b5dc9ca501 Mon Sep 17 00:00:00 2001 From: mannaandpoem <1580466765@qq.com> Date: Tue, 6 Feb 2024 09:07:29 +0800 Subject: [PATCH 4/5] remove attribute openai_vision_model and vision_max_tokens and method test_generate_webpages_with_style_and_script --- metagpt/config2.py | 2 -- tests/metagpt/tools/libs/test_gpt_v_generator.py | 12 +----------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/metagpt/config2.py b/metagpt/config2.py index d983a43c3..bc6af18c6 100644 --- a/metagpt/config2.py +++ b/metagpt/config2.py @@ -75,8 +75,6 @@ class Config(CLIParams, YamlModel): iflytek_api_key: str = "" azure_tts_subscription_key: str = "" azure_tts_region: str = "" - openai_vision_model: str = "gpt-4-vision-preview" - vision_max_tokens: int = 4096 @classmethod def from_home(cls, path): diff --git a/tests/metagpt/tools/libs/test_gpt_v_generator.py b/tests/metagpt/tools/libs/test_gpt_v_generator.py index 76ada8622..907006765 100644 --- a/tests/metagpt/tools/libs/test_gpt_v_generator.py +++ b/tests/metagpt/tools/libs/test_gpt_v_generator.py @@ -47,17 +47,7 @@ def image_path(): @pytest.mark.asyncio -async def test_generate_webpages_with_styles_and_scripts(mock_webpage_filename_with_styles_and_scripts, image_path): - generator = GPTvGenerator() - rsp = await generator.generate_webpages(image_path=image_path) - logs.logger.info(rsp) - assert "html" in rsp - assert "css" in rsp - assert "javascript" in rsp - - -@pytest.mark.asyncio -async def test_generate_webpages_with_style_and_script(mock_webpage_filename_with_style_and_script, image_path): +async def test_generate_webpages(mock_webpage_filename_with_styles_and_scripts, image_path): generator = GPTvGenerator() rsp = await generator.generate_webpages(image_path=image_path) logs.logger.info(rsp) From 0fe854fa7f7145179ec726c50a8576b1503073d1 Mon Sep 17 00:00:00 2001 From: yzlin Date: Tue, 6 Feb 2024 14:13:02 +0800 Subject: [PATCH 5/5] fix save code --- tests/metagpt/utils/test_save_code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/metagpt/utils/test_save_code.py b/tests/metagpt/utils/test_save_code.py index 57a19049b..35ad84baf 100644 --- a/tests/metagpt/utils/test_save_code.py +++ b/tests/metagpt/utils/test_save_code.py @@ -41,4 +41,4 @@ async def test_save_code_file_notebook(): notebook = nbformat.read(file_path, as_version=4) assert len(notebook.cells) > 0, "Notebook should have at least one cell" first_cell_source = notebook.cells[0].source - assert "print('Hello, World!')" in first_cell_source, "Notebook cell content does not match" + assert "print" in first_cell_source, "Notebook cell content does not match"