Skip to content

Commit

Permalink
Revert "PDF and DOCX support in Write File - Feature Improvement, close
Browse files Browse the repository at this point in the history
#548 (#928)"

This reverts commit 8b01357.
  • Loading branch information
luciferlinx101 committed Aug 25, 2023
1 parent 8b01357 commit 1da4be6
Show file tree
Hide file tree
Showing 10 changed files with 36 additions and 254 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM python:3.10-slim-bullseye AS compile-image
WORKDIR /app

RUN apt-get update && \
apt-get install --no-install-recommends -y wget libpq-dev gcc g++ python3-dev wkhtmltopdf && \
apt-get install --no-install-recommends -y wget libpq-dev gcc g++ python3-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

Expand All @@ -24,7 +24,7 @@ FROM python:3.10-slim-bullseye AS build-image
WORKDIR /app

RUN apt-get update && \
apt-get install --no-install-recommends -y libpq-dev wkhtmltopdf && \
apt-get install --no-install-recommends -y libpq-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

Expand Down
2 changes: 0 additions & 2 deletions DockerfileCelery
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ FROM python:3.9
WORKDIR /app

#RUN apt-get update && apt-get install --no-install-recommends -y git wget libpq-dev gcc python3-dev && pip install psycopg2
RUN apt-get update && apt-get install -y wkhtmltopdf

RUN pip install --upgrade pip

COPY requirements.txt .
Expand Down
3 changes: 0 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,3 @@ html2text==2020.1.16
duckduckgo-search==3.8.3
google-generativeai==0.1.0
unstructured==0.8.1
beautifulsoup4==4.12.2
pdfkit==1.0.0
htmldocx==0.0.6
7 changes: 2 additions & 5 deletions superagi/models/agent_execution_feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,12 @@ def get_last_tool_response(cls, session: Session, agent_execution_id: int, tool_
AgentExecutionFeed.role == "system").order_by(AgentExecutionFeed.created_at.desc()).all()

for agent_execution_feed in agent_execution_feeds:
if tool_name and not agent_execution_feed.feed.startswith(
f"Tool {tool_name}"
):
if tool_name and not agent_execution_feed.feed.startswith("Tool " + tool_name):
continue
if agent_execution_feed.feed.startswith("Tool"):
return agent_execution_feed.feed
return ""

@classmethod
def fetch_agent_execution_feeds(cls, session, agent_execution_id: int):
agent_execution = AgentExecution.find_by_id(session, agent_execution_id)
Expand All @@ -68,4 +66,3 @@ def fetch_agent_execution_feeds(cls, session, agent_execution_id: int):
return agent_feeds
else:
return agent_feeds[2:]

112 changes: 18 additions & 94 deletions superagi/resource_manager/file_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,11 @@
from superagi.models.agent import Agent
from superagi.models.agent_execution import AgentExecution
from superagi.types.storage_types import StorageType
import pdfkit
from htmldocx import HtmlToDocx

class UnsupportedFileTypeError(Exception):
pass

class FileNotCreatedError(Exception):
pass

class FileManager:
def __init__(self, session: Session, agent_id: int = None, agent_execution_id: int = None):
self.session = session
self.agent_id = agent_id
self.agent_execution_id = agent_execution_id

def write_binary_file(self, file_name: str, data):
if self.agent_id is not None:
final_path = ResourceHelper.get_agent_write_resource_path(file_name,
Expand All @@ -42,7 +32,6 @@ def write_binary_file(self, file_name: str, data):
return f"Binary {file_name} saved successfully"
except Exception as err:
return f"Error write_binary_file: {err}"

def write_to_s3(self, file_name, final_path):
with open(final_path, 'rb') as img:
resource = ResourceHelper.make_written_file_resource(file_name=file_name,
Expand All @@ -56,7 +45,7 @@ def write_to_s3(self, file_name, final_path):
s3_helper = S3Helper()
s3_helper.upload_file(img, path=resource.path)

def write_file(self, file_name: str, content, return_file_path: bool = False):
def write_file(self, file_name: str, content):
if self.agent_id is not None:
final_path = ResourceHelper.get_agent_write_resource_path(file_name,
agent=Agent.get_agent_from_id(self.session,
Expand All @@ -66,19 +55,25 @@ def write_file(self, file_name: str, content, return_file_path: bool = False):
self.agent_execution_id))
else:
final_path = ResourceHelper.get_resource_path(file_name)

try:
self.save_file_by_type(file_name=file_name, file_path=final_path, content=content)
with open(final_path, mode="w") as file:
file.write(content)
file.close()
self.write_to_s3(file_name, final_path)
logger.info(f"{file_name} - File written successfully")
return f"{file_name} - File written successfully"
except Exception as err:
return f"Error write_file: {err}"

logger.info(f"{file_name} - File written successfully")

if return_file_path:
return final_path
return f"{file_name} - File written successfully"

def write_csv_file(self, file_name: str, final_path: str, csv_data) -> str:
def write_csv_file(self, file_name: str, csv_data):
if self.agent_id is not None:
final_path = ResourceHelper.get_agent_write_resource_path(file_name,

Check warning on line 69 in superagi/resource_manager/file_manager.py

View check run for this annotation

Codecov / codecov/patch

superagi/resource_manager/file_manager.py#L69

Added line #L69 was not covered by tests
agent=Agent.get_agent_from_id(self.session,
self.agent_id),
agent_execution=AgentExecution
.get_agent_execution_from_id(self.session,
self.agent_execution_id))
else:
final_path = ResourceHelper.get_resource_path(file_name)

Check warning on line 76 in superagi/resource_manager/file_manager.py

View check run for this annotation

Codecov / codecov/patch

superagi/resource_manager/file_manager.py#L76

Added line #L76 was not covered by tests
try:
with open(final_path, mode="w", newline="") as file:
writer = csv.writer(file, lineterminator="\n")
Expand All @@ -87,63 +82,15 @@ def write_csv_file(self, file_name: str, final_path: str, csv_data) -> str:
logger.info(f"{file_name} - File written successfully")
return f"{file_name} - File written successfully"
except Exception as err:
raise FileNotCreatedError(f"Error write_file: {err}") from err

def write_pdf_file(self, file_name: str ,file_path: str, content):
# Saving the HTML file
html_file_path = f"{file_path[:-4]}.html"
self.write_txt_file(file_name=html_file_path.split('/')[-1], file_path=html_file_path, content=content)

# Convert HTML file to a PDF file
try:
options = {
'quiet': '',
'page-size': 'Letter',
'margin-top': '0.75in',
'margin-right': '0.75in',
'margin-bottom': '0.75in',
'margin-left': '0.75in',
'enable-local-file-access': ''
}
config = pdfkit.configuration(wkhtmltopdf = "/usr/bin/wkhtmltopdf")
pdfkit.from_file(html_file_path, file_path, options = options, configuration = config)
self.write_to_s3(file_name, file_path)
return file_path
return f"Error write_csv_file: {err}"

Check warning on line 85 in superagi/resource_manager/file_manager.py

View check run for this annotation

Codecov / codecov/patch

superagi/resource_manager/file_manager.py#L85

Added line #L85 was not covered by tests

except Exception as err:
raise FileNotCreatedError(f"Error write_file: {err}") from err

def write_docx_file(self, file_name: str ,file_path: str, content):
# Saving the HTML file
html_file_path = f"{file_path[:-4]}.html"
self.write_txt_file(file_name=html_file_path.split('/')[-1], file_path=html_file_path, content=content)

# Convert HTML file to a DOCx file
try:
new_parser = HtmlToDocx()
new_parser.parse_html_file(html_file_path, file_path)
self.write_to_s3(file_name, file_path)
return file_path
except Exception as err:
raise FileNotCreatedError(f"Error write_file: {err}") from err

def write_txt_file(self, file_name: str ,file_path: str, content) -> str:
try:
with open(file_path, mode="w") as file:
file.write(content)
file.close()
self.write_to_s3(file_name, file_path)
return file_path
except Exception as err:
raise FileNotCreatedError(f"Error write_file: {err}") from err

def get_agent_resource_path(self, file_name: str):
return ResourceHelper.get_agent_write_resource_path(file_name, agent=Agent.get_agent_from_id(self.session,
self.agent_id),
agent_execution=AgentExecution
.get_agent_execution_from_id(self.session,
self.agent_execution_id))

def read_file(self, file_name: str):
if self.agent_id is not None:
final_path = self.get_agent_resource_path(file_name)
Expand All @@ -157,7 +104,6 @@ def read_file(self, file_name: str):
return content
except Exception as err:
return f"Error while reading file {file_name}: {err}"

def get_files(self):
"""
Gets all file names generated by the CodingTool.
Expand All @@ -176,25 +122,3 @@ def get_files(self):
logger.error(f"Error while accessing files in {final_path}: {err}")
files = []
return files

def save_file_by_type(self, file_name: str, file_path: str, content):

# Extract the file type from the file_name
file_type = file_name.split('.')[-1].lower()

# Dictionary to map file types to corresponding functions
file_type_handlers = {
'txt': self.write_txt_file,
'pdf': self.write_pdf_file,
'docx': self.write_docx_file,
'doc': self.write_docx_file,
'csv': self.write_csv_file,
'html': self.write_txt_file
# NOTE: Add more file types and corresponding functions as needed, These functions should be defined
}

if file_type in file_type_handlers:
return file_type_handlers[file_type](file_name, file_path, content)
else:
raise UnsupportedFileTypeError(f"Unsupported file type: {file_type}. Cannot save the file.")

4 changes: 0 additions & 4 deletions superagi/tools/file/prompts/add_images_to_html.txt

This file was deleted.

7 changes: 0 additions & 7 deletions superagi/tools/file/prompts/content_to_html_prompt.txt

This file was deleted.

Loading

0 comments on commit 1da4be6

Please sign in to comment.