From 6d839ca6a0099980d38e906236458ca0647794ce Mon Sep 17 00:00:00 2001 From: Adarshjha619 Date: Fri, 4 Aug 2023 16:12:21 +0530 Subject: [PATCH 1/6] new branch --- requirements.txt | 1 - superagi/tools/file/read_file.py | 94 ++++++++++++++++++++++++++++++-- 2 files changed, 89 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9412a9788..53df4c440 100644 --- a/requirements.txt +++ b/requirements.txt @@ -154,4 +154,3 @@ html2text==2020.1.16 duckduckgo-search==3.8.3 google-generativeai==0.1.0 unstructured==0.8.1 -beautifulsoup4==4.12.2 diff --git a/superagi/tools/file/read_file.py b/superagi/tools/file/read_file.py index d04fe41dc..5de369560 100644 --- a/superagi/tools/file/read_file.py +++ b/superagi/tools/file/read_file.py @@ -1,7 +1,77 @@ +# import os +# from typing import Type, Optional + +# from pydantic import BaseModel, Field + +# from superagi.helper.resource_helper import ResourceHelper +# from superagi.helper.s3_helper import S3Helper +# from superagi.models.agent_execution import AgentExecution +# from superagi.resource_manager.file_manager import FileManager +# from superagi.tools.base_tool import BaseTool +# from superagi.models.agent import Agent +# from superagi.types.storage_types import StorageType +# from superagi.config.config import get_config + + +# class ReadFileSchema(BaseModel): +# """Input for CopyFileTool.""" +# file_name: str = Field(..., description="Path of the file to read") + + +# class ReadFileTool(BaseTool): +# """ +# Read File tool + +# Attributes: +# name : The name. +# description : The description. +# args_schema : The args schema. +# """ +# name: str = "Read File" +# agent_id: int = None +# agent_execution_id: int = None +# args_schema: Type[BaseModel] = ReadFileSchema +# description: str = "Reads the file content in a specified location" +# resource_manager: Optional[FileManager] = None + +# def _execute(self, file_name: str): +# """ +# Execute the read file tool. + +# Args: +# file_name : The name of the file to read. + +# Returns: +# The file content and the file name +# """ +# final_path = ResourceHelper.get_agent_read_resource_path(file_name, agent=Agent.get_agent_from_id( +# session=self.toolkit_config.session, agent_id=self.agent_id), agent_execution=AgentExecution +# .get_agent_execution_from_id(session=self +# .toolkit_config.session, +# agent_execution_id=self +# .agent_execution_id)) +# if StorageType.get_storage_type(get_config("STORAGE_TYPE", StorageType.FILE.value)) == StorageType.S3: +# return S3Helper().read_from_s3(final_path) + +# if final_path is None or not os.path.exists(final_path): +# raise FileNotFoundError(f"File '{file_name}' not found.") +# directory = os.path.dirname(final_path) +# os.makedirs(directory, exist_ok=True) + +# with open(final_path, 'r') as file: +# file_content = file.read() +# max_length = len(' '.join(file_content.split(" ")[:1000])) +# return file_content[:max_length] + "\n File " + file_name + " read successfully." + + import os from typing import Type, Optional +import ebooklib +import bs4 +from bs4 import BeautifulSoup from pydantic import BaseModel, Field +from ebooklib import epub from superagi.helper.resource_helper import ResourceHelper from superagi.helper.s3_helper import S3Helper @@ -11,7 +81,7 @@ from superagi.models.agent import Agent from superagi.types.storage_types import StorageType from superagi.config.config import get_config - +from unstructured.partition.auto import partition class ReadFileSchema(BaseModel): """Input for CopyFileTool.""" @@ -57,8 +127,22 @@ def _execute(self, file_name: str): raise FileNotFoundError(f"File '{file_name}' not found.") directory = os.path.dirname(final_path) os.makedirs(directory, exist_ok=True) + + # Check if the file is an .epub file + if final_path.lower().endswith('.epub'): + # Use ebooklib to read the epub file + book = epub.read_epub(final_path) + # Get the text content from each item in the book + content = [] + for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT): + soup = BeautifulSoup(item.get_content(), 'html.parser') + content.append(soup.get_text()) + + content = "\n".join(content) + else: + elements = partition(final_path) + content = "\n\n".join([str(el) for el in elements]) + + return content + - with open(final_path, 'r') as file: - file_content = file.read() - max_length = len(' '.join(file_content.split(" ")[:1000])) - return file_content[:max_length] + "\n File " + file_name + " read successfully." \ No newline at end of file From c86b37c4e9fc817f11c6002627c0f0b120550abd Mon Sep 17 00:00:00 2001 From: Adarshjha619 Date: Mon, 7 Aug 2023 11:45:04 +0530 Subject: [PATCH 2/6] updated read file --- superagi/tools/file/read_file.py | 65 -------------------------------- tools.json | 4 +- 2 files changed, 3 insertions(+), 66 deletions(-) diff --git a/superagi/tools/file/read_file.py b/superagi/tools/file/read_file.py index 5de369560..6d2655760 100644 --- a/superagi/tools/file/read_file.py +++ b/superagi/tools/file/read_file.py @@ -1,68 +1,3 @@ -# import os -# from typing import Type, Optional - -# from pydantic import BaseModel, Field - -# from superagi.helper.resource_helper import ResourceHelper -# from superagi.helper.s3_helper import S3Helper -# from superagi.models.agent_execution import AgentExecution -# from superagi.resource_manager.file_manager import FileManager -# from superagi.tools.base_tool import BaseTool -# from superagi.models.agent import Agent -# from superagi.types.storage_types import StorageType -# from superagi.config.config import get_config - - -# class ReadFileSchema(BaseModel): -# """Input for CopyFileTool.""" -# file_name: str = Field(..., description="Path of the file to read") - - -# class ReadFileTool(BaseTool): -# """ -# Read File tool - -# Attributes: -# name : The name. -# description : The description. -# args_schema : The args schema. -# """ -# name: str = "Read File" -# agent_id: int = None -# agent_execution_id: int = None -# args_schema: Type[BaseModel] = ReadFileSchema -# description: str = "Reads the file content in a specified location" -# resource_manager: Optional[FileManager] = None - -# def _execute(self, file_name: str): -# """ -# Execute the read file tool. - -# Args: -# file_name : The name of the file to read. - -# Returns: -# The file content and the file name -# """ -# final_path = ResourceHelper.get_agent_read_resource_path(file_name, agent=Agent.get_agent_from_id( -# session=self.toolkit_config.session, agent_id=self.agent_id), agent_execution=AgentExecution -# .get_agent_execution_from_id(session=self -# .toolkit_config.session, -# agent_execution_id=self -# .agent_execution_id)) -# if StorageType.get_storage_type(get_config("STORAGE_TYPE", StorageType.FILE.value)) == StorageType.S3: -# return S3Helper().read_from_s3(final_path) - -# if final_path is None or not os.path.exists(final_path): -# raise FileNotFoundError(f"File '{file_name}' not found.") -# directory = os.path.dirname(final_path) -# os.makedirs(directory, exist_ok=True) - -# with open(final_path, 'r') as file: -# file_content = file.read() -# max_length = len(' '.join(file_content.split(" ")[:1000])) -# return file_content[:max_length] + "\n File " + file_name + " read successfully." - import os from typing import Type, Optional diff --git a/tools.json b/tools.json index 7a587e73a..4b8b1f805 100644 --- a/tools.json +++ b/tools.json @@ -1,4 +1,6 @@ { "tools": { + "DuckDuckGo": "https://github.com/TransformerOptimus/SuperAGI-Tools/tree/main/DuckDuckGo", + "notion": "https://github.com/TransformerOptimus/SuperAGI-Tools/tree/main/notion" } -} +} \ No newline at end of file From 609c1c8bf8fc027589244458328f04353d0a58eb Mon Sep 17 00:00:00 2001 From: Adarshjha619 Date: Mon, 7 Aug 2023 13:04:41 +0530 Subject: [PATCH 3/6] updaeted test for read_file --- tests/unit_tests/tools/file/test_read_file.py | 186 ++++++++++++++---- 1 file changed, 149 insertions(+), 37 deletions(-) diff --git a/tests/unit_tests/tools/file/test_read_file.py b/tests/unit_tests/tools/file/test_read_file.py index 81399c2cc..03931098d 100644 --- a/tests/unit_tests/tools/file/test_read_file.py +++ b/tests/unit_tests/tools/file/test_read_file.py @@ -1,5 +1,58 @@ +# import pytest +# from unittest.mock import patch, mock_open, MagicMock + +# from superagi.models.agent_execution import AgentExecution +# from superagi.tools.file.read_file import ReadFileTool +# from superagi.models.agent import Agent + + +# @pytest.fixture +# def read_file_tool(): +# read_file_tool = ReadFileTool() +# read_file_tool.agent_id = 1 # Set a dummy agent ID for testing. + +# yield read_file_tool + + +# def test_read_file_success(read_file_tool): +# # Mock the open function, and make it return a file object that has 'Hello, World!' as its contents. +# mock_file = mock_open(read_data='Hello, World!') +# with patch('builtins.open', mock_file), \ +# patch('os.path.exists', return_value=True), \ +# patch('os.makedirs', return_value=True), \ +# patch('superagi.helper.resource_helper.ResourceHelper.get_root_input_dir', +# return_value="/input_dir/{agent_id}/"), \ +# patch('superagi.helper.resource_helper.ResourceHelper.get_root_output_dir', +# return_value="/output_dir/{agent_id}/"), \ +# patch('superagi.models.agent.Agent.get_agent_from_id', return_value=Agent(id=1, name='TestAgent')), \ +# patch('superagi.models.agent_execution.AgentExecution.get_agent_execution_from_id', +# return_value= +# AgentExecution(id=1, name='TestExecution')): +# read_file_tool.toolkit_config.session = MagicMock() +# file_content = read_file_tool._execute('file.txt') + +# expected_content = 'Hello, World!\n File file.txt read successfully.' +# assert file_content == expected_content + + +# def test_read_file_file_not_found(read_file_tool): +# with patch('os.path.exists', return_value=False), \ +# patch('superagi.helper.resource_helper.ResourceHelper.get_root_input_dir', +# return_value="/input_dir/{agent_id}/"), \ +# patch('superagi.helper.resource_helper.ResourceHelper.get_root_output_dir', +# return_value="/output_dir/{agent_id}/"), \ +# patch('superagi.models.agent.Agent.get_agent_from_id', return_value=Agent(id=1, name='TestAgent')), \ +# patch('superagi.models.agent_execution.AgentExecution.get_agent_execution_from_id', +# return_value=AgentExecution(id=1, name='TestExecution')): +# read_file_tool.toolkit_config.session = MagicMock() +# with pytest.raises(FileNotFoundError): +# read_file_tool._execute('file.txt') + +import os import pytest -from unittest.mock import patch, mock_open, MagicMock +import tempfile +from unittest.mock import MagicMock, patch +from superagi.tools.file.read_file import ReadFileTool from superagi.models.agent_execution import AgentExecution from superagi.tools.file.read_file import ReadFileTool @@ -11,39 +64,98 @@ def read_file_tool(): read_file_tool = ReadFileTool() read_file_tool.agent_id = 1 # Set a dummy agent ID for testing. - yield read_file_tool - - -def test_read_file_success(read_file_tool): - # Mock the open function, and make it return a file object that has 'Hello, World!' as its contents. - mock_file = mock_open(read_data='Hello, World!') - with patch('builtins.open', mock_file), \ - patch('os.path.exists', return_value=True), \ - patch('os.makedirs', return_value=True), \ - patch('superagi.helper.resource_helper.ResourceHelper.get_root_input_dir', - return_value="/input_dir/{agent_id}/"), \ - patch('superagi.helper.resource_helper.ResourceHelper.get_root_output_dir', - return_value="/output_dir/{agent_id}/"), \ - patch('superagi.models.agent.Agent.get_agent_from_id', return_value=Agent(id=1, name='TestAgent')), \ - patch('superagi.models.agent_execution.AgentExecution.get_agent_execution_from_id', - return_value= - AgentExecution(id=1, name='TestExecution')): - read_file_tool.toolkit_config.session = MagicMock() - file_content = read_file_tool._execute('file.txt') - - expected_content = 'Hello, World!\n File file.txt read successfully.' - assert file_content == expected_content - - -def test_read_file_file_not_found(read_file_tool): - with patch('os.path.exists', return_value=False), \ - patch('superagi.helper.resource_helper.ResourceHelper.get_root_input_dir', - return_value="/input_dir/{agent_id}/"), \ - patch('superagi.helper.resource_helper.ResourceHelper.get_root_output_dir', - return_value="/output_dir/{agent_id}/"), \ - patch('superagi.models.agent.Agent.get_agent_from_id', return_value=Agent(id=1, name='TestAgent')), \ - patch('superagi.models.agent_execution.AgentExecution.get_agent_execution_from_id', - return_value=AgentExecution(id=1, name='TestExecution')): - read_file_tool.toolkit_config.session = MagicMock() - with pytest.raises(FileNotFoundError): - read_file_tool._execute('file.txt') +@pytest.fixture +def mock_s3_helper(): + with patch("superagi.helper.s3_helper.S3Helper") as mock_s3_helper: + yield mock_s3_helper + +@pytest.fixture +def mock_partition(): + with patch("unstructured.partition.auto.partition") as mock_partition: + yield mock_partition + +@pytest.fixture +def mock_get_agent_from_id(): + with patch("superagi.models.agent.Agent.get_agent_from_id") as mock_get_agent: + yield mock_get_agent + +@pytest.fixture +def mock_get_agent_execution_from_id(): + with patch("superagi.models.agent_execution.AgentExecution.get_agent_execution_from_id") as mock_execution: + yield mock_execution +@pytest.fixture +def mock_resource_helper(): + with patch("superagi.helper.resource_helper.ResourceHelper.get_agent_read_resource_path") as mock_resource_helper: + yield mock_resource_helper + +def test_read_file_tool(mock_os_path_exists, mock_os_makedirs, mock_get_config, mock_s3_helper, mock_partition, + mock_get_agent_from_id, mock_get_agent_execution_from_id, mock_resource_helper): + mock_os_path_exists.return_value = True + mock_partition.return_value = ["This is a file.", "This is the second line."] + mock_get_config.return_value = "FILE" + mock_get_agent_from_id.return_value = MagicMock() + mock_get_agent_execution_from_id.return_value = MagicMock() + + tool = ReadFileTool() + + with tempfile.NamedTemporaryFile('w', delete=False, suffix='.txt') as tmp: + tmp.write("This is a file.\nThis is the second line.") + tmp.seek(0) # Reset file pointer to the beginning + tmp.close() # Explicitly close the file + + mock_resource_helper.return_value = tmp.name + + try: + result = tool._execute(tmp.name) + assert isinstance(result, str) + assert "This is a file." in result + assert "This is the second line." in result + finally: + os.remove(tmp.name) # Ensure the temporary file is deleted + +def test_read_file_tool_s3(mock_os_path_exists, mock_os_makedirs, mock_get_config, mock_s3_helper, mock_partition, + mock_get_agent_from_id, mock_get_agent_execution_from_id, mock_resource_helper): + mock_os_path_exists.return_value = True + mock_get_config.return_value = "S3" # ensure this function returns "S3" + mock_get_agent_from_id.return_value = MagicMock() + mock_get_agent_execution_from_id.return_value = MagicMock() + + tool = ReadFileTool() + + with tempfile.NamedTemporaryFile('w', delete=False, suffix='.txt') as tmp: + tmp.write("This is a file.\nThis is the second line.") + tmp.seek(0) # Reset file pointer to the beginning + tmp.close() # Explicitly close the file + + mock_resource_helper.return_value = tmp.name + mock_s3_helper.return_value.read_from_s3.return_value = open(tmp.name, 'r').read() + + try: + result = tool._execute(tmp.name) + assert isinstance(result, str) + assert "This is a file." in result + assert "This is the second line." in result + finally: + os.remove(tmp.name) # Ensure the temporary file is deleted + + +def test_read_file_tool_not_found(mock_os_path_exists, mock_os_makedirs, mock_get_config, mock_s3_helper, mock_partition, + mock_get_agent_from_id, mock_get_agent_execution_from_id, mock_resource_helper): + mock_os_path_exists.return_value = False + mock_get_agent_from_id.return_value = MagicMock() + mock_get_agent_execution_from_id.return_value = MagicMock() + + tool = ReadFileTool() + + with tempfile.NamedTemporaryFile('w', delete=False, suffix='.txt') as tmp: + tmp.write("This is a file.\nThis is the second line.") + tmp.seek(0) # Reset file pointer to the beginning + tmp.close() # Explicitly close the file + + try: + with pytest.raises(FileNotFoundError): + tool._execute(tmp.name) + finally: + os.remove(tmp.name) # Ensure the temporary file is deleted + + From 83f7a608cdf426ebb1857cedda67829c134fc918 Mon Sep 17 00:00:00 2001 From: Adarshjha619 Date: Mon, 7 Aug 2023 13:14:19 +0530 Subject: [PATCH 4/6] updates read_file and test --- tests/unit_tests/tools/file/test_read_file.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/unit_tests/tools/file/test_read_file.py b/tests/unit_tests/tools/file/test_read_file.py index 03931098d..962f56ed2 100644 --- a/tests/unit_tests/tools/file/test_read_file.py +++ b/tests/unit_tests/tools/file/test_read_file.py @@ -58,6 +58,21 @@ from superagi.tools.file.read_file import ReadFileTool from superagi.models.agent import Agent +@pytest.fixture +def mock_os_path_exists(): + with patch("os.path.exists") as mock_exists: + yield mock_exists + +@pytest.fixture +def mock_os_makedirs(): + with patch("os.makedirs") as mock_makedirs: + yield mock_makedirs + +@pytest.fixture +def mock_get_config(): + with patch("superagi.config.config.get_config") as mock_get_config: + yield mock_get_config + @pytest.fixture def read_file_tool(): From cb9f9c8e59c23bf7098e06bab7b361af7dd9528d Mon Sep 17 00:00:00 2001 From: Fluder-Paradyne <121793617+Fluder-Paradyne@users.noreply.github.com> Date: Mon, 7 Aug 2023 17:52:28 +0530 Subject: [PATCH 5/6] Update tools.json --- tools.json | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools.json b/tools.json index 4b8b1f805..7a587e73a 100644 --- a/tools.json +++ b/tools.json @@ -1,6 +1,4 @@ { "tools": { - "DuckDuckGo": "https://github.com/TransformerOptimus/SuperAGI-Tools/tree/main/DuckDuckGo", - "notion": "https://github.com/TransformerOptimus/SuperAGI-Tools/tree/main/notion" } -} \ No newline at end of file +} From aab3c17750005f735d0102440cc31d97b736d199 Mon Sep 17 00:00:00 2001 From: Fluder-Paradyne <121793617+Fluder-Paradyne@users.noreply.github.com> Date: Mon, 7 Aug 2023 17:53:47 +0530 Subject: [PATCH 6/6] Update test_read_file.py --- tests/unit_tests/tools/file/test_read_file.py | 50 ------------------- 1 file changed, 50 deletions(-) diff --git a/tests/unit_tests/tools/file/test_read_file.py b/tests/unit_tests/tools/file/test_read_file.py index 962f56ed2..46d810b03 100644 --- a/tests/unit_tests/tools/file/test_read_file.py +++ b/tests/unit_tests/tools/file/test_read_file.py @@ -1,53 +1,3 @@ -# import pytest -# from unittest.mock import patch, mock_open, MagicMock - -# from superagi.models.agent_execution import AgentExecution -# from superagi.tools.file.read_file import ReadFileTool -# from superagi.models.agent import Agent - - -# @pytest.fixture -# def read_file_tool(): -# read_file_tool = ReadFileTool() -# read_file_tool.agent_id = 1 # Set a dummy agent ID for testing. - -# yield read_file_tool - - -# def test_read_file_success(read_file_tool): -# # Mock the open function, and make it return a file object that has 'Hello, World!' as its contents. -# mock_file = mock_open(read_data='Hello, World!') -# with patch('builtins.open', mock_file), \ -# patch('os.path.exists', return_value=True), \ -# patch('os.makedirs', return_value=True), \ -# patch('superagi.helper.resource_helper.ResourceHelper.get_root_input_dir', -# return_value="/input_dir/{agent_id}/"), \ -# patch('superagi.helper.resource_helper.ResourceHelper.get_root_output_dir', -# return_value="/output_dir/{agent_id}/"), \ -# patch('superagi.models.agent.Agent.get_agent_from_id', return_value=Agent(id=1, name='TestAgent')), \ -# patch('superagi.models.agent_execution.AgentExecution.get_agent_execution_from_id', -# return_value= -# AgentExecution(id=1, name='TestExecution')): -# read_file_tool.toolkit_config.session = MagicMock() -# file_content = read_file_tool._execute('file.txt') - -# expected_content = 'Hello, World!\n File file.txt read successfully.' -# assert file_content == expected_content - - -# def test_read_file_file_not_found(read_file_tool): -# with patch('os.path.exists', return_value=False), \ -# patch('superagi.helper.resource_helper.ResourceHelper.get_root_input_dir', -# return_value="/input_dir/{agent_id}/"), \ -# patch('superagi.helper.resource_helper.ResourceHelper.get_root_output_dir', -# return_value="/output_dir/{agent_id}/"), \ -# patch('superagi.models.agent.Agent.get_agent_from_id', return_value=Agent(id=1, name='TestAgent')), \ -# patch('superagi.models.agent_execution.AgentExecution.get_agent_execution_from_id', -# return_value=AgentExecution(id=1, name='TestExecution')): -# read_file_tool.toolkit_config.session = MagicMock() -# with pytest.raises(FileNotFoundError): -# read_file_tool._execute('file.txt') - import os import pytest import tempfile