From 1e36a3da8422b87079bfb1a1c6aa250cce5917d0 Mon Sep 17 00:00:00 2001 From: TomasLiu Date: Tue, 15 Oct 2024 12:09:40 +0800 Subject: [PATCH] fix readme and manifest --- .../extension/azure_vision_python/README.md | 47 +++++++++++------ .../azure_vision_python/manifest.json | 31 ++++++++++- .../bingsearch_tool_python/README.md | 27 +++++----- .../bingsearch_tool_python/manifest.json | 52 ++++++++++++++++++- .../extension/vision_tool_python/README.md | 33 +++++++----- .../weatherapi_tool_python/README.md | 20 +++---- 6 files changed, 155 insertions(+), 55 deletions(-) diff --git a/agents/ten_packages/extension/azure_vision_python/README.md b/agents/ten_packages/extension/azure_vision_python/README.md index 525362b3..e8cb3ca4 100644 --- a/agents/ten_packages/extension/azure_vision_python/README.md +++ b/agents/ten_packages/extension/azure_vision_python/README.md @@ -1,29 +1,44 @@ # azure_vision_python - +This is the extension calling azure ai vision. -## Features - - +The document is as follow: https://learn.microsoft.com/zh-cn/azure/ai-services/computer-vision/overview -- xxx feature +## Properties -## API - -Refer to `api` definition in [manifest.json] and default values in [property.json](property.json). +- key +- endpoint - - -## Development +## Features -### Build +- Only support one frame of image +- No customization for feature +- By default will include `TAGS`, `CAPTION`, `READ`, `PEOPLE`, `OBJECTS` - +## API -### Unit test +Refer to `api` definition in [manifest.json] and default values in [property.json](property.json). - +Other extensions can call `analyze_image` cmd and will get all analyze result from result in `response` property, the result will looks like this: + +``` json +{ + "modelVersion": "2023-10-01", + "captionResult": { + "text": "a group of toys on a table", + "confidence": 0.7558467388153076 + }, + "metadata": { + "width": 320, + "height": 240 + }, + "objectsResult": {}, + "readResult": {}, + "peopleResult": {} +} +``` ## Misc - +- Video analyze +- Multi-frame analyze \ No newline at end of file diff --git a/agents/ten_packages/extension/azure_vision_python/manifest.json b/agents/ten_packages/extension/azure_vision_python/manifest.json index c6e5cf1a..d9448f45 100644 --- a/agents/ten_packages/extension/azure_vision_python/manifest.json +++ b/agents/ten_packages/extension/azure_vision_python/manifest.json @@ -19,5 +19,34 @@ "README.md" ] }, - "api": {} + "api": { + "property": { + "key": { + "type": "string" + }, + "endpoint": { + "type": "string" + } + } + }, + "cmd_in": [ + { + "name": "analyze_image", + "property": { + "image_data": { + "type": "buf" + } + }, + "required": [ + "image_data" + ], + "result": { + "property": { + "response": { + "type": "string" + } + } + } + } + ] } \ No newline at end of file diff --git a/agents/ten_packages/extension/bingsearch_tool_python/README.md b/agents/ten_packages/extension/bingsearch_tool_python/README.md index 581fdf5e..b5b20b98 100644 --- a/agents/ten_packages/extension/bingsearch_tool_python/README.md +++ b/agents/ten_packages/extension/bingsearch_tool_python/README.md @@ -1,29 +1,26 @@ # bingsearch_tool_python - +This is tool for bing search, the document link is as follow: https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/quickstarts/rest/python + +It is built using TEN Tool Call Protocol (Beta). ## Features - +It is the bing search tool that will auto register to any llm extension. + +The tool description is as follow: -- xxx feature +*Use Bing.com to search for latest information. Call this function if you are not sure about the answer.* ## API Refer to `api` definition in [manifest.json] and default values in [property.json](property.json). - - -## Development - -### Build - - - -### Unit test - - +- out: tool_register +- in: tool_call ## Misc - +- use Tool Call Protocol Standard +- support async call +- apply asyncio template diff --git a/agents/ten_packages/extension/bingsearch_tool_python/manifest.json b/agents/ten_packages/extension/bingsearch_tool_python/manifest.json index 3e5a4193..47f22fdd 100644 --- a/agents/ten_packages/extension/bingsearch_tool_python/manifest.json +++ b/agents/ten_packages/extension/bingsearch_tool_python/manifest.json @@ -19,5 +19,55 @@ "README.md" ] }, - "api": {} + "api": { + "property": { + "api_key": { + "type": "string" + } + }, + "cmd_out": [ + { + "name": "tool_register", + "property": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "string" + } + }, + "required": [ + "name", + "description", + "parameters" + ], + "result": { + "property": { + "response": { + "type": "string" + } + } + } + } + ], + "cmd_in": [ + { + "name": "tool_call", + "property": { + "name": { + "type": "string" + }, + "args": { + "type": "string" + } + }, + "required": [ + "name" + ] + } + ] + } } \ No newline at end of file diff --git a/agents/ten_packages/extension/vision_tool_python/README.md b/agents/ten_packages/extension/vision_tool_python/README.md index b00a3151..74278fb9 100644 --- a/agents/ten_packages/extension/vision_tool_python/README.md +++ b/agents/ten_packages/extension/vision_tool_python/README.md @@ -1,29 +1,36 @@ # vision_tool_python - +This is tool for vision ability, currently there are two patterns: +- use triditional model +- use multimodal llm model -## Features +The pattern can be switched by `use_llm` pattern to use different cmd protocol. - +Tool description is as follow: -- xxx feature +*Query to the latest frame from camera. The camera is always on, always use latest frame to answer user's question. Call this whenever you need to understand the input camera image like you have vision capability, for example when user asks 'What can you see?', 'Can you see me?', 'take a look.'* -## API +It is built using TEN Tool Call Protocol (Beta). -Refer to `api` definition in [manifest.json] and default values in [property.json](property.json). +## Features - +The tool can accept video frame from rtc extension. -## Development +The tool will only register itself to llm extension as soon as the video frame is received. -### Build +The tool will cache video frame every `frequency_ms` ms. - +## API -### Unit test +Refer to `api` definition in [manifest.json] and default values in [property.json](property.json). - +- out: `tool_register` +- in: `tool_call` +- out(`use_llm=false`): `analyze_image` +- out(`use_llm=true`): `chat_completion` ## Misc - +- Multi-frame support +- Movement detection +- Prompt Engineering \ No newline at end of file diff --git a/agents/ten_packages/extension/weatherapi_tool_python/README.md b/agents/ten_packages/extension/weatherapi_tool_python/README.md index de7c18aa..81808f90 100644 --- a/agents/ten_packages/extension/weatherapi_tool_python/README.md +++ b/agents/ten_packages/extension/weatherapi_tool_python/README.md @@ -1,21 +1,23 @@ # weatherapi_tool_python -This is the tool demo for weather query. +This is the tool for weather query, including current weather, broadcast and history weather check, the document link is as follow: https://www.weatherapi.com/docs/ + +It is built using TEN Tool Call Protocol (Beta). ## Features +For free plan: - Fetch today's weather. -- Search for history weather. +- Search for history weather within 7 days. - Forcast weather in 3 days. -## API - -Refer to `api` definition in [manifest.json] and default values in [property.json](property.json). +You can extend by using other plan in your project. -### Out: +https://www.weatherapi.com/pricing.aspx -- `tool_register`: auto register tool to llm +## API -### In: +Refer to `api` definition in [manifest.json] and default values in [property.json](property.json). -- `tool_call`: sync cmd to fetch weather +- out: tool_register +- in: tool_call