Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat(tools) add tavily extract tool and enhance tavily search implementation #10786

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 6 additions & 11 deletions api/core/tools/provider/builtin/tavily/tavily.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
identity:
author: Yash Parmar
author: Yash Parmar, Kalo Chin
name: tavily
label:
en_US: Tavily
zh_Hans: Tavily
pt_BR: Tavily
en_US: Tavily Search & Extract
zh_Hans: Tavily 搜索和提取
description:
en_US: Tavily
zh_Hans: Tavily
pt_BR: Tavily
en_US: A powerful AI-native search engine and web content extraction tool that provides highly relevant search results and raw content extraction from web pages.
zh_Hans: 一个强大的原生AI搜索引擎和网页内容提取工具,提供高度相关的搜索结果和网页原始内容提取。
icon: icon.png
tags:
- search
Expand All @@ -19,13 +17,10 @@ credentials_for_provider:
label:
en_US: Tavily API key
zh_Hans: Tavily API key
pt_BR: Tavily API key
placeholder:
en_US: Please input your Tavily API key
zh_Hans: 请输入你的 Tavily API key
pt_BR: Please input your Tavily API key
help:
en_US: Get your Tavily API key from Tavily
zh_Hans: 从 TavilyApi 获取您的 Tavily API key
pt_BR: Get your Tavily API key from Tavily
url: https://docs.tavily.com/docs/welcome
url: https://app.tavily.com/home
145 changes: 145 additions & 0 deletions api/core/tools/provider/builtin/tavily/tools/tavily_extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
from typing import Any

import requests

from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.tool.builtin_tool import BuiltinTool

TAVILY_API_URL = "https://api.tavily.com"


class TavilyExtract:
"""
A class for extracting content from web pages using the Tavily Extract API.

Args:
api_key (str): The API key for accessing the Tavily Extract API.

Methods:
extract_content: Retrieves extracted content from the Tavily Extract API.
"""

def __init__(self, api_key: str) -> None:
self.api_key = api_key

def extract_content(self, params: dict[str, Any]) -> dict:
"""
Retrieves extracted content from the Tavily Extract API.

Args:
params (Dict[str, Any]): The extraction parameters.

Returns:
dict: The extracted content.

"""
# Ensure required parameters are set
if "api_key" not in params:
params["api_key"] = self.api_key

# Process parameters
processed_params = self._process_params(params)

response = requests.post(f"{TAVILY_API_URL}/extract", json=processed_params)
response.raise_for_status()
return response.json()

def _process_params(self, params: dict[str, Any]) -> dict:
"""
Processes and validates the extraction parameters.

Args:
params (Dict[str, Any]): The extraction parameters.

Returns:
dict: The processed parameters.
"""
processed_params = {}

# Process 'urls'
if "urls" in params:
urls = params["urls"]
if isinstance(urls, str):
processed_params["urls"] = [url.strip() for url in urls.replace(",", " ").split()]
elif isinstance(urls, list):
processed_params["urls"] = urls
else:
raise ValueError("The 'urls' parameter is required.")

# Only include 'api_key'
processed_params["api_key"] = params.get("api_key", self.api_key)

return processed_params


class TavilyExtractTool(BuiltinTool):
"""
A tool for extracting content from web pages using Tavily Extract.
"""

def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]:
"""
Invokes the Tavily Extract tool with the given user ID and tool parameters.

Args:
user_id (str): The ID of the user invoking the tool.
tool_parameters (Dict[str, Any]): The parameters for the Tavily Extract tool.

Returns:
ToolInvokeMessage | list[ToolInvokeMessage]: The result of the Tavily Extract tool invocation.
"""
urls = tool_parameters.get("urls", "")
api_key = self.runtime.credentials.get("tavily_api_key")
if not api_key:
return self.create_text_message(
"Tavily API key is missing. Please set the 'tavily_api_key' in credentials."
)
if not urls:
return self.create_text_message("Please input at least one URL to extract.")

tavily_extract = TavilyExtract(api_key)
try:
raw_results = tavily_extract.extract_content(tool_parameters)
except requests.HTTPError as e:
return self.create_text_message(f"Error occurred while extracting content: {str(e)}")

if not raw_results.get("results"):
return self.create_text_message("No content could be extracted from the provided URLs.")
else:
# Always return JSON message with all data
json_message = self.create_json_message(raw_results)

# Create text message based on user-selected parameters
text_message_content = self._format_results_as_text(raw_results)
text_message = self.create_text_message(text=text_message_content)

return [json_message, text_message]

def _format_results_as_text(self, raw_results: dict) -> str:
"""
Formats the raw extraction results into a markdown text based on user-selected parameters.

Args:
raw_results (dict): The raw extraction results.

Returns:
str: The formatted markdown text.
"""
output_lines = []

for idx, result in enumerate(raw_results.get("results", []), 1):
url = result.get("url", "")
raw_content = result.get("raw_content", "")

output_lines.append(f"## Extracted Content {idx}: {url}\n")
output_lines.append(f"**Raw Content:**\n{raw_content}\n")
output_lines.append("---\n")

if raw_results.get("failed_results"):
output_lines.append("## Failed URLs:\n")
for failed in raw_results["failed_results"]:
url = failed.get("url", "")
error = failed.get("error", "Unknown error")
output_lines.append(f"- {url}: {error}\n")

return "\n".join(output_lines)
23 changes: 23 additions & 0 deletions api/core/tools/provider/builtin/tavily/tools/tavily_extract.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
identity:
name: tavily_extract
author: Kalo Chin
label:
en_US: Tavily Extract
zh_Hans: Tavily Extract
description:
human:
en_US: A web extraction tool built specifically for AI agents (LLMs), delivering raw content from web pages.
zh_Hans: 专为人工智能代理 (LLM) 构建的网页提取工具,提供网页的原始内容。
llm: A tool for extracting raw content from web pages, designed for AI agents (LLMs).
parameters:
- name: urls
type: string
required: true
label:
en_US: URLs
zh_Hans: URLs
human_description:
en_US: A comma-separated list of URLs to extract content from.
zh_Hans: 要从中提取内容的 URL 的逗号分隔列表。
llm_description: A comma-separated list of URLs to extract content from.
form: llm
Loading