diff --git a/docs/sphinx_doc/en/source/agentscope.service.web.rst b/docs/sphinx_doc/en/source/agentscope.service.web.rst new file mode 100644 index 000000000..ed7c2a2b2 --- /dev/null +++ b/docs/sphinx_doc/en/source/agentscope.service.web.rst @@ -0,0 +1,34 @@ +Web search package +======================================= + +search module +--------------------------------------------- + +.. automodule:: agentscope.service.web.search + :members: + :undoc-members: + :show-inheritance: + +arxiv module +--------------------------------------------- + +.. automodule:: agentscope.service.web.arxiv + :members: + :undoc-members: + :show-inheritance: + +download module +--------------------------------------------- + +.. automodule:: agentscope.service.web.download + :members: + :undoc-members: + :show-inheritance: + +web\_digest module +--------------------------------------------- + +.. automodule:: agentscope.service.web.web_digest + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/sphinx_doc/en/source/agentscope.service.web_search.rst b/docs/sphinx_doc/en/source/agentscope.service.web_search.rst deleted file mode 100644 index d2ef72a9a..000000000 --- a/docs/sphinx_doc/en/source/agentscope.service.web_search.rst +++ /dev/null @@ -1,18 +0,0 @@ -Web search package -======================================= - -search module ---------------------------------------------- - -.. automodule:: agentscope.service.web_search.search - :members: - :undoc-members: - :show-inheritance: - -web\_digest module ---------------------------------------------- - -.. automodule:: agentscope.service.web_search.web_digest - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/sphinx_doc/en/source/tutorial/204-service.md b/docs/sphinx_doc/en/source/tutorial/204-service.md index be4916684..826b1fec6 100644 --- a/docs/sphinx_doc/en/source/tutorial/204-service.md +++ b/docs/sphinx_doc/en/source/tutorial/204-service.md @@ -13,7 +13,7 @@ AgentScope and how to use them to enhance the capabilities of your agents. The following table outlines the various Service functions by type. These functions can be called using `agentscope.service.{function_name}`. | Service Scene | Service Function Name | Description | -| --------------------------- | --------------------- | -------------------------------------------------------------------------------------------------------------- | +|-----------------------------|-----------------------|----------------------------------------------------------------------------------------------------------------| | Code | `execute_python_code` | Execute a piece of Python code, optionally inside a Docker container. | | Retrieval | `retrieve_from_list` | Retrieve a specific item from a list based on given criteria. | | | `cos_sim` | Compute the cosine similarity between two different embeddings. | @@ -21,8 +21,11 @@ The following table outlines the various Service functions by type. These functi | | `query_sqlite` | Execute SQL queries on a SQLite database and return results. | | | `query_mongodb` | Perform queries or operations on a MongoDB collection. | | Text Processing | `summarization` | Summarize a piece of text using a large language model to highlight its main points. | -| Web Search | `web_search` | Perform a web search using a specified search engine (currently supports Google and Bing). | -| | `load_web` | Load and parse the web page of the specificed url (currently only supports HTML). | +| Web | `bing_search` | Perform bing search | +| | `google_search` | Perform google search | +| | `arxiv_search` | Perform arXiv search | +| | `download_from_url` | Download file from given URL. | +| | `load_web` | Load and parse the web page of the specified url (currently only supports HTML). | | | `digest_webpage` | Digest the content of a already loaded web page (currently only supports HTML). | | File | `create_file` | Create a new file at a specified path, optionally with initial content. | | | `delete_file` | Delete a file specified by a file path. | diff --git a/docs/sphinx_doc/zh_CN/source/agentscope.service.web.rst b/docs/sphinx_doc/zh_CN/source/agentscope.service.web.rst new file mode 100644 index 000000000..ed7c2a2b2 --- /dev/null +++ b/docs/sphinx_doc/zh_CN/source/agentscope.service.web.rst @@ -0,0 +1,34 @@ +Web search package +======================================= + +search module +--------------------------------------------- + +.. automodule:: agentscope.service.web.search + :members: + :undoc-members: + :show-inheritance: + +arxiv module +--------------------------------------------- + +.. automodule:: agentscope.service.web.arxiv + :members: + :undoc-members: + :show-inheritance: + +download module +--------------------------------------------- + +.. automodule:: agentscope.service.web.download + :members: + :undoc-members: + :show-inheritance: + +web\_digest module +--------------------------------------------- + +.. automodule:: agentscope.service.web.web_digest + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/sphinx_doc/zh_CN/source/agentscope.service.web_search.rst b/docs/sphinx_doc/zh_CN/source/agentscope.service.web_search.rst deleted file mode 100644 index d2ef72a9a..000000000 --- a/docs/sphinx_doc/zh_CN/source/agentscope.service.web_search.rst +++ /dev/null @@ -1,18 +0,0 @@ -Web search package -======================================= - -search module ---------------------------------------------- - -.. automodule:: agentscope.service.web_search.search - :members: - :undoc-members: - :show-inheritance: - -web\_digest module ---------------------------------------------- - -.. automodule:: agentscope.service.web_search.web_digest - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md b/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md index 72d1bc7ae..e3938d1db 100644 --- a/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md +++ b/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md @@ -9,29 +9,32 @@ 下面的表格按照类型概述了各种Service函数。以下函数可以通过`agentscope.service.{函数名}`进行调用。 -| Service场景 | Service函数名称 | 描述 | -| ------------------ | --------------------- | ---------------------------------------------------------------- | -| 代码 | `execute_python_code` | 执行一段 Python 代码,可选择在 Docker
容器内部执行。 | -| 检索 | `retrieve_from_list` | 根据给定的标准从列表中检索特定项目。 | -| | `cos_sim` | 计算2个embedding的余弦相似度。 | -| SQL查询 | `query_mysql` | 在 MySQL 数据库上执行 SQL 查询并返回结果。 | -| | `query_sqlite` | 在 SQLite 数据库上执行 SQL 查询并返回结果。 | -| | `query_mongodb` | 对 MongoDB 集合执行查询或操作。 | -| 文本处理 | `summarization` | 使用大型语言模型总结一段文字以突出其主要要点。 | -| 网络搜索 | `web_search` | 使用指定的搜索引擎(当前支持 Google 和 Bing)执行网络搜索。 | -| | `load_web` | 爬取并解析指定的网页链接 (目前仅支持爬取 HTML 页面) | -| | `digest_webpage` | 对已经爬取好的网页生成摘要信息(目前仅支持 HTML 页面) | -| 文件处理 | `create_file` | 在指定路径创建一个新文件,并可选择添加初始内容。 | -| | `delete_file` | 删除由文件路径指定的文件。 | -| | `move_file` | 将文件从一个路径移动或重命名到另一个路径。 | -| | `create_directory` | 在指定路径创建一个新的目录。 | -| | `delete_directory` | 删除一个目录及其所有内容。 | -| | `move_directory` | 将目录从一个路径移动或重命名到另一个路径。 | -| | `read_text_file` | 读取并返回文本文件的内容。 | -| | `write_text_file` | 向指定路径的文件写入文本内容。 | -| | `read_json_file` | 读取并解析 JSON 文件的内容。 | -| | `write_json_file` | 将 Python 对象序列化为 JSON 并写入到文件。 | -| *更多服务即将推出* | | 正在开发更多服务功能,并将添加到 AgentScope 以进一步增强其能力。 | +| Service场景 | Service函数名称 | 描述 | +|------------|-----------------------|-----------------------------------------| +| 代码 | `execute_python_code` | 执行一段 Python 代码,可选择在 Docker
容器内部执行。 | +| 检索 | `retrieve_from_list` | 根据给定的标准从列表中检索特定项目。 | +| | `cos_sim` | 计算2个embedding的余弦相似度。 | +| SQL查询 | `query_mysql` | 在 MySQL 数据库上执行 SQL 查询并返回结果。 | +| | `query_sqlite` | 在 SQLite 数据库上执行 SQL 查询并返回结果。 | +| | `query_mongodb` | 对 MongoDB 集合执行查询或操作。 | +| 文本处理 | `summarization` | 使用大型语言模型总结一段文字以突出其主要要点。 | +| 网络 | `bing_search` | 使用bing搜索。 | +| | `google_search` | 使用google搜索。 | +| | `arxiv_search` | 使用arxiv搜索。 | +| | `download_from_url` | 从指定的 URL 下载文件。 | +| | `load_web` | 爬取并解析指定的网页链接 (目前仅支持爬取 HTML 页面) | +| | `digest_webpage` | 对已经爬取好的网页生成摘要信息(目前仅支持 HTML 页面) | +| 文件处理 | `create_file` | 在指定路径创建一个新文件,并可选择添加初始内容。 | +| | `delete_file` | 删除由文件路径指定的文件。 | +| | `move_file` | 将文件从一个路径移动或重命名到另一个路径。 | +| | `create_directory` | 在指定路径创建一个新的目录。 | +| | `delete_directory` | 删除一个目录及其所有内容。 | +| | `move_directory` | 将目录从一个路径移动或重命名到另一个路径。 | +| | `read_text_file` | 读取并返回文本文件的内容。 | +| | `write_text_file` | 向指定路径的文件写入文本内容。 | +| | `read_json_file` | 读取并解析 JSON 文件的内容。 | +| | `write_json_file` | 将 Python 对象序列化为 JSON 并写入到文件。 | +| *更多服务即将推出* | | 正在开发更多服务功能,并将添加到 AgentScope 以进一步增强其能力。 | 关于详细的参数、预期输入格式、返回类型,请参阅[API文档](https://modelscope.github.io/agentscope/)。 diff --git a/examples/distributed_search/answerer_agent.py b/examples/distributed_search/answerer_agent.py index 5e0982080..5b441957b 100644 --- a/examples/distributed_search/answerer_agent.py +++ b/examples/distributed_search/answerer_agent.py @@ -3,7 +3,7 @@ from agentscope.message import Msg from agentscope.agents import AgentBase -from agentscope.service.web_search.web_digest import load_web +from agentscope.service import load_web class AnswererAgent(AgentBase): diff --git a/examples/distributed_search/searcher_agent.py b/examples/distributed_search/searcher_agent.py index 127acf13e..63aec9e11 100644 --- a/examples/distributed_search/searcher_agent.py +++ b/examples/distributed_search/searcher_agent.py @@ -4,7 +4,7 @@ from functools import partial from agentscope.message import Msg from agentscope.agents import AgentBase -from agentscope.service.web_search.search import google_search, bing_search +from agentscope.service import google_search, bing_search class SearcherAgent(AgentBase): diff --git a/src/agentscope/service/__init__.py b/src/agentscope/service/__init__.py index 50e9e89a6..8eb59a564 100644 --- a/src/agentscope/service/__init__.py +++ b/src/agentscope/service/__init__.py @@ -16,15 +16,16 @@ from .sql_query.mysql import query_mysql from .sql_query.sqlite import query_sqlite from .sql_query.mongodb import query_mongodb -from .web_search.search import bing_search, google_search -from .web_search.arxiv import arxiv_search +from .web.search import bing_search, google_search +from .web.arxiv import arxiv_search from .service_response import ServiceResponse from .service_factory import ServiceFactory from .retrieval.similarity import cos_sim from .text_processing.summarization import summarization from .retrieval.retrieval_from_list import retrieve_from_list from .service_status import ServiceExecStatus -from .web_search.web_digest import digest_webpage, load_web, parse_html_to_text +from .web.web_digest import digest_webpage, load_web, parse_html_to_text +from .web.download import download_from_url def get_help() -> None: @@ -60,4 +61,5 @@ def get_help() -> None: "digest_webpage", "load_web", "parse_html_to_text", + "download_from_url", ] diff --git a/src/agentscope/service/web_search/__init__.py b/src/agentscope/service/web/__init__.py similarity index 100% rename from src/agentscope/service/web_search/__init__.py rename to src/agentscope/service/web/__init__.py diff --git a/src/agentscope/service/web_search/arxiv.py b/src/agentscope/service/web/arxiv.py similarity index 100% rename from src/agentscope/service/web_search/arxiv.py rename to src/agentscope/service/web/arxiv.py diff --git a/src/agentscope/service/web/download.py b/src/agentscope/service/web/download.py new file mode 100644 index 000000000..007423362 --- /dev/null +++ b/src/agentscope/service/web/download.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +"""Download file from URL.""" +import os + +import requests +from tqdm import tqdm + +from agentscope.service import ServiceResponse, ServiceExecStatus + + +def download_from_url( + url: str, + filepath: str, + timeout: int = 120, + retries: int = 3, +) -> ServiceResponse: + """Download file from the given url to the specified location. + + Args: + url (`str`): + The URL of the file to download. + filepath (`str`): + The path to save the downloaded file. + timeout (`int`, defaults to `120`): + The timeout for the download request. + retries (`int`, defaults to `3`): + The number of retries for the download request. + + Returns: + `ServiceResponse`: A `ServiceResponse` object that contains execution + results or error message. + """ + + # Check if the target file exists already + if os.path.exists(filepath): + return ServiceResponse( + status=ServiceExecStatus.ERROR, + content=f"The file {filepath} already exists.", + ) + + # Download the file + try: + session = requests.Session() + response = session.get(url, stream=True, timeout=timeout) + response.raise_for_status() + + file_size = int(response.headers.get("content-length", 0)) + chunk_size = 1024 * 32 # 32 KB + progress_bar = tqdm(total=file_size, unit="iB", unit_scale=True) + + with open(filepath, "wb") as file: + for chunk in response.iter_content(chunk_size=chunk_size): + progress_bar.update(len(chunk)) + file.write(chunk) + progress_bar.close() + + return ServiceResponse( + status=ServiceExecStatus.SUCCESS, + content={ + "url": url, + "saved_file_path": filepath, + }, + ) + except requests.exceptions.RequestException as e: + if retries > 0: + # remove the incomplete file + if os.path.exists(filepath): + os.remove(filepath) + # retry the download + return download_from_url(url, filepath, timeout, retries - 1) + else: + return ServiceResponse( + status=ServiceExecStatus.ERROR, + content=f"Failed to download file from {url}: {str(e)}", + ) diff --git a/src/agentscope/service/web_search/search.py b/src/agentscope/service/web/search.py similarity index 100% rename from src/agentscope/service/web_search/search.py rename to src/agentscope/service/web/search.py diff --git a/src/agentscope/service/web_search/web_digest.py b/src/agentscope/service/web/web_digest.py similarity index 100% rename from src/agentscope/service/web_search/web_digest.py rename to src/agentscope/service/web/web_digest.py diff --git a/tests/web_search_test.py b/tests/web_search_test.py index 0e406b1ab..06dcf2057 100644 --- a/tests/web_search_test.py +++ b/tests/web_search_test.py @@ -6,7 +6,7 @@ from agentscope.service import ServiceResponse, arxiv_search from agentscope.service import bing_search, google_search from agentscope.service.service_status import ServiceExecStatus -from agentscope.service.web_search.arxiv import _reformat_query +from agentscope.service.web.arxiv import _reformat_query class TestWebSearches(unittest.TestCase):