Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update gptv tool and save code tests #847

Merged
merged 8 commits into from
Feb 6, 2024
2 changes: 1 addition & 1 deletion examples/crawl_webpage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

async def main():
prompt = """Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key data*"""
and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*"""
ci = CodeInterpreter(goal=prompt, use_tools=True)

await ci.run(prompt)
Expand Down
2 changes: 0 additions & 2 deletions metagpt/config2.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ class Config(CLIParams, YamlModel):
iflytek_api_key: str = ""
azure_tts_subscription_key: str = ""
azure_tts_region: str = ""
openai_vision_model: str = "gpt-4-vision-preview"
vision_max_tokens: int = 4096

@classmethod
def from_home(cls, path):
Expand Down
93 changes: 20 additions & 73 deletions metagpt/tools/libs/gpt_v_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,13 @@
@Author : mannaandpoem
@File : gpt_v_generator.py
"""
import base64
import os
from pathlib import Path

import requests

from metagpt.const import DEFAULT_WORKSPACE_ROOT
from metagpt.tools.tool_registry import register_tool
from metagpt.tools.tool_type import ToolType
from metagpt.utils.common import encode_image

ANALYZE_LAYOUT_PROMPT = """You are now a UI/UX, please generate layout information for this image:

Expand Down Expand Up @@ -43,86 +41,41 @@ class GPTvGenerator:
def __init__(self):
"""Initialize GPTvGenerator class with default values from the configuration."""
from metagpt.config2 import config
from metagpt.llm import LLM

self.api_key = config.llm.api_key
self.api_base = config.llm.base_url
self.model = config.openai_vision_model
self.max_tokens = config.vision_max_tokens
self.llm = LLM(llm_config=config.get_openai_llm())
self.llm.model = "gpt-4-vision-preview"

def analyze_layout(self, image_path):
"""Analyze the layout of the given image and return the result.
async def analyze_layout(self, image_path: Path) -> str:
"""Asynchronously analyze the layout of the given image and return the result.

This is a helper method to generate a layout description based on the image.

Args:
image_path (str): Path of the image to analyze.
image_path (Path): Path of the image to analyze.

Returns:
str: The layout analysis result.
"""
return self.get_result(image_path, ANALYZE_LAYOUT_PROMPT)
return await self.llm.aask(msg=ANALYZE_LAYOUT_PROMPT, images=[encode_image(image_path)])

def generate_webpages(self, image_path):
"""Generate webpages including all code (HTML, CSS, and JavaScript) in one go based on the image.
async def generate_webpages(self, image_path: str) -> str:
"""Asynchronously generate webpages including all code (HTML, CSS, and JavaScript) in one go based on the image.

Args:
image_path (str): The path of the image file.

Returns:
str: Generated webpages content.
"""
layout = self.analyze_layout(image_path)
if isinstance(image_path, str):
image_path = Path(image_path)
layout = await self.analyze_layout(image_path)
prompt = GENERATE_PROMPT + "\n\n # Context\n The layout information of the sketch image is: \n" + layout
result = self.get_result(image_path, prompt)
return result

def get_result(self, image_path, prompt):
"""Get the result from the vision model based on the given image path and prompt.

Args:
image_path (str): Path of the image to analyze.
prompt (str): Prompt to use for the analysis.

Returns:
str: The model's response as a string.
"""
base64_image = self.encode_image(image_path)
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
payload = {
"model": self.model,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}},
],
}
],
"max_tokens": self.max_tokens,
}
response = requests.post(f"{self.api_base}/chat/completions", headers=headers, json=payload)

if response.status_code != 200:
raise ValueError(f"Request failed with status {response.status_code}, {response.text}")
else:
return response.json()["choices"][0]["message"]["content"]
return await self.llm.aask(msg=prompt, images=[encode_image(image_path)])

@staticmethod
def encode_image(image_path):
"""Encode the image at the given path to a base64 string.

Args:
image_path (str): Path of the image to encode.

Returns:
str: The base64 encoded string of the image.
"""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")

@staticmethod
def save_webpages(image_path, webpages) -> Path:
def save_webpages(image_path: str, webpages: str) -> Path:
"""Save webpages including all code (HTML, CSS, and JavaScript) at once.

Args:
Expand All @@ -132,35 +85,29 @@ def save_webpages(image_path, webpages) -> Path:
Returns:
Path: The path of the saved webpages.
"""
# 在workspace目录下,创建一个名为下webpages的文件夹,用于存储html、css和js文件
# Create a folder called webpages in the workspace directory to store HTML, CSS, and JavaScript files
webpages_path = DEFAULT_WORKSPACE_ROOT / "webpages" / Path(image_path).stem
os.makedirs(webpages_path, exist_ok=True)

index_path = webpages_path / "index.html"

try:
index = webpages.split("```html")[1].split("```")[0]
except IndexError:
index = "No html code found in the result, please check your image and try again." + "\n" + webpages

try:
style_path = None
if "styles.css" in index:
style_path = webpages_path / "styles.css"
elif "style.css" in index:
style_path = webpages_path / "style.css"
else:
style_path = None
style = webpages.split("```css")[1].split("```")[0] if style_path else ""

js_path = None
if "scripts.js" in index:
js_path = webpages_path / "scripts.js"
elif "script.js" in index:
js_path = webpages_path / "script.js"
else:
js_path = None

js = webpages.split("```javascript")[1].split("```")[0] if js_path else ""
except IndexError:
raise ValueError("No css or js code found in the result, please check your image and try again.")
raise ValueError(f"No html or css or js code found in the result. \nWebpages: {webpages}")

try:
with open(index_path, "w", encoding="utf-8") as f:
Expand Down
9 changes: 4 additions & 5 deletions metagpt/tools/libs/web_scraping.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,18 @@


@register_tool(tool_type=ToolType.WEBSCRAPING.type_name)
async def scrape_web_playwright(url, *urls):
async def scrape_web_playwright(url):
"""
Scrape and save the HTML structure and inner text content of a web page using Playwright.
Asynchronously Scrape and save the HTML structure and inner text content of a web page using Playwright.

Args:
url (str): The main URL to fetch inner text from.
*urls (str): Additional URLs to fetch inner text from.

Returns:
(dict): The inner text content and html structure of the web page, key are : 'inner_text', 'html'.
dict: The inner text content and html structure of the web page, keys are 'inner_text', 'html'.
"""
# Create a PlaywrightWrapper instance for the Chromium browser
web = await PlaywrightWrapper().run(url, *urls)
web = await PlaywrightWrapper().run(url)

# Return the inner text content of the web page
return {"inner_text": web.inner_text.strip(), "html": web.html.strip()}
3 changes: 2 additions & 1 deletion metagpt/tools/tool_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ def convert_code_to_tool_schema(obj, include: list[str] = []):
# method_doc = inspect.getdoc(method)
method_doc = get_class_method_docstring(obj, name)
if method_doc:
schema["methods"][name] = docstring_to_schema(method_doc)
function_type = "function" if not inspect.iscoroutinefunction(method) else "async_function"
schema["methods"][name] = {"type": function_type, **docstring_to_schema(method_doc)}

elif inspect.isfunction(obj):
schema = {
Expand Down
67 changes: 56 additions & 11 deletions tests/metagpt/tools/libs/test_gpt_v_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,81 @@
@Author : mannaandpoem
@File : test_gpt_v_generator.py
"""
from pathlib import Path

import pytest

from metagpt import logs
from metagpt.const import METAGPT_ROOT
from metagpt.tools.libs.gpt_v_generator import GPTvGenerator


@pytest.fixture
def mock_webpages(mocker):
def mock_webpage_filename_with_styles_and_scripts(mocker):
mock_data = """```html\n<html>\n<script src="scripts.js"></script>
<link rel="stylesheet" href="styles.css(">\n</html>\n```\n
```css\n.class { ... }\n```\n
```javascript\nfunction() { ... }\n```\n"""
mocker.patch("metagpt.tools.libs.gpt_v_generator.GPTvGenerator.generate_webpages", return_value=mock_data)
<link rel="stylesheet" href="styles.css">\n</html>\n```\n
```css\n/* styles.css */\n```\n
```javascript\n// scripts.js\n```\n"""
mocker.patch("metagpt.provider.base_llm.BaseLLM.aask", return_value=mock_data)
return mocker


@pytest.fixture
def mock_webpage_filename_with_style_and_script(mocker):
mock_data = """```html\n<html>\n<script src="script.js"></script>
<link rel="stylesheet" href="style.css">\n</html>\n```\n
```css\n/* style.css */\n```\n
```javascript\n// script.js\n```\n"""
mocker.patch("metagpt.provider.base_llm.BaseLLM.aask", return_value=mock_data)
return mocker


def test_vision_generate_webpages(mock_webpages):
image_path = "image.png"
@pytest.fixture
def mock_image_layout(mocker):
image_layout = "The layout information of the sketch image is ..."
mocker.patch("metagpt.provider.base_llm.BaseLLM.aask", return_value=image_layout)
return mocker


@pytest.fixture
def image_path():
return f"{METAGPT_ROOT}/docs/resources/workspace/content_rec_sys/resources/competitive_analysis.png"


@pytest.mark.asyncio
async def test_generate_webpages(mock_webpage_filename_with_styles_and_scripts, image_path):
generator = GPTvGenerator()
rsp = generator.generate_webpages(image_path=image_path)
rsp = await generator.generate_webpages(image_path=image_path)
logs.logger.info(rsp)
assert "html" in rsp
assert "css" in rsp
assert "javascript" in rsp


def test_save_webpages(mock_webpages):
image_path = "image.png"
@pytest.mark.asyncio
async def test_save_webpages_with_styles_and_scripts(mock_webpage_filename_with_styles_and_scripts, image_path):
generator = GPTvGenerator()
webpages = generator.generate_webpages(image_path)
webpages = await generator.generate_webpages(image_path)
webpages_dir = generator.save_webpages(image_path=image_path, webpages=webpages)
logs.logger.info(webpages_dir)
assert webpages_dir.exists()


@pytest.mark.asyncio
async def test_save_webpages_with_style_and_script(mock_webpage_filename_with_style_and_script, image_path):
generator = GPTvGenerator()
webpages = await generator.generate_webpages(image_path)
webpages_dir = generator.save_webpages(image_path=image_path, webpages=webpages)
logs.logger.info(webpages_dir)
assert webpages_dir.exists()


@pytest.mark.asyncio
async def test_analyze_layout(mock_image_layout, image_path):
layout = await GPTvGenerator().analyze_layout(Path(image_path))
logs.logger.info(layout)
assert layout


if __name__ == "__main__":
pytest.main([__file__, "-s"])
2 changes: 1 addition & 1 deletion tests/metagpt/utils/test_save_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ async def test_save_code_file_notebook():
notebook = nbformat.read(file_path, as_version=4)
assert len(notebook.cells) > 0, "Notebook should have at least one cell"
first_cell_source = notebook.cells[0].source
assert "print('Hello, World!')" in first_cell_source, "Notebook cell content does not match"
assert "print" in first_cell_source, "Notebook cell content does not match"
Loading