diff --git a/Dockerfile b/Dockerfile index 250f94a12..10a7d7f17 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM python:3.10-slim-bullseye AS compile-image WORKDIR /app RUN apt-get update && \ - apt-get install --no-install-recommends -y wget libpq-dev gcc g++ python3-dev && \ + apt-get install --no-install-recommends -y wget libpq-dev gcc g++ python3-dev wkhtmltopdf && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -24,7 +24,7 @@ FROM python:3.10-slim-bullseye AS build-image WORKDIR /app RUN apt-get update && \ - apt-get install --no-install-recommends -y libpq-dev && \ + apt-get install --no-install-recommends -y libpq-dev wkhtmltopdf && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* diff --git a/DockerfileCelery b/DockerfileCelery index 682e50824..ba608dcbd 100644 --- a/DockerfileCelery +++ b/DockerfileCelery @@ -3,6 +3,8 @@ FROM python:3.9 WORKDIR /app #RUN apt-get update && apt-get install --no-install-recommends -y git wget libpq-dev gcc python3-dev && pip install psycopg2 +RUN apt-get update && apt-get install -y wkhtmltopdf + RUN pip install --upgrade pip COPY requirements.txt . diff --git a/requirements.txt b/requirements.txt index 65e19c2c1..be008f6fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -155,3 +155,6 @@ html2text==2020.1.16 duckduckgo-search==3.8.3 google-generativeai==0.1.0 unstructured==0.8.1 +beautifulsoup4==4.12.2 +pdfkit==1.0.0 +htmldocx==0.0.6 diff --git a/superagi/models/agent_execution_feed.py b/superagi/models/agent_execution_feed.py index 61f449585..aa7c3a148 100644 --- a/superagi/models/agent_execution_feed.py +++ b/superagi/models/agent_execution_feed.py @@ -47,12 +47,14 @@ def get_last_tool_response(cls, session: Session, agent_execution_id: int, tool_ AgentExecutionFeed.role == "system").order_by(AgentExecutionFeed.created_at.desc()).all() for agent_execution_feed in agent_execution_feeds: - if tool_name and not agent_execution_feed.feed.startswith("Tool " + tool_name): + if tool_name and not agent_execution_feed.feed.startswith( + f"Tool {tool_name}" + ): continue if agent_execution_feed.feed.startswith("Tool"): return agent_execution_feed.feed return "" - + @classmethod def fetch_agent_execution_feeds(cls, session, agent_execution_id: int): agent_execution = AgentExecution.find_by_id(session, agent_execution_id) @@ -66,3 +68,4 @@ def fetch_agent_execution_feeds(cls, session, agent_execution_id: int): return agent_feeds else: return agent_feeds[2:] + diff --git a/superagi/resource_manager/file_manager.py b/superagi/resource_manager/file_manager.py index 4c20ba16d..dbd0e4d16 100644 --- a/superagi/resource_manager/file_manager.py +++ b/superagi/resource_manager/file_manager.py @@ -8,11 +8,21 @@ from superagi.models.agent import Agent from superagi.models.agent_execution import AgentExecution from superagi.types.storage_types import StorageType +import pdfkit +from htmldocx import HtmlToDocx + +class UnsupportedFileTypeError(Exception): + pass + +class FileNotCreatedError(Exception): + pass + class FileManager: def __init__(self, session: Session, agent_id: int = None, agent_execution_id: int = None): self.session = session self.agent_id = agent_id self.agent_execution_id = agent_execution_id + def write_binary_file(self, file_name: str, data): if self.agent_id is not None: final_path = ResourceHelper.get_agent_write_resource_path(file_name, @@ -32,6 +42,7 @@ def write_binary_file(self, file_name: str, data): return f"Binary {file_name} saved successfully" except Exception as err: return f"Error write_binary_file: {err}" + def write_to_s3(self, file_name, final_path): with open(final_path, 'rb') as img: resource = ResourceHelper.make_written_file_resource(file_name=file_name, @@ -45,7 +56,7 @@ def write_to_s3(self, file_name, final_path): s3_helper = S3Helper() s3_helper.upload_file(img, path=resource.path) - def write_file(self, file_name: str, content): + def write_file(self, file_name: str, content, return_file_path: bool = False): if self.agent_id is not None: final_path = ResourceHelper.get_agent_write_resource_path(file_name, agent=Agent.get_agent_from_id(self.session, @@ -55,25 +66,19 @@ def write_file(self, file_name: str, content): self.agent_execution_id)) else: final_path = ResourceHelper.get_resource_path(file_name) + try: - with open(final_path, mode="w") as file: - file.write(content) - file.close() - self.write_to_s3(file_name, final_path) - logger.info(f"{file_name} - File written successfully") - return f"{file_name} - File written successfully" + self.save_file_by_type(file_name=file_name, file_path=final_path, content=content) except Exception as err: return f"Error write_file: {err}" - def write_csv_file(self, file_name: str, csv_data): - if self.agent_id is not None: - final_path = ResourceHelper.get_agent_write_resource_path(file_name, - agent=Agent.get_agent_from_id(self.session, - self.agent_id), - agent_execution=AgentExecution - .get_agent_execution_from_id(self.session, - self.agent_execution_id)) - else: - final_path = ResourceHelper.get_resource_path(file_name) + + logger.info(f"{file_name} - File written successfully") + + if return_file_path: + return final_path + return f"{file_name} - File written successfully" + + def write_csv_file(self, file_name: str, final_path: str, csv_data) -> str: try: with open(final_path, mode="w", newline="") as file: writer = csv.writer(file, lineterminator="\n") @@ -82,15 +87,63 @@ def write_csv_file(self, file_name: str, csv_data): logger.info(f"{file_name} - File written successfully") return f"{file_name} - File written successfully" except Exception as err: - return f"Error write_csv_file: {err}" + raise FileNotCreatedError(f"Error write_file: {err}") from err + + def write_pdf_file(self, file_name: str ,file_path: str, content): + # Saving the HTML file + html_file_path = f"{file_path[:-4]}.html" + self.write_txt_file(file_name=html_file_path.split('/')[-1], file_path=html_file_path, content=content) + + # Convert HTML file to a PDF file + try: + options = { + 'quiet': '', + 'page-size': 'Letter', + 'margin-top': '0.75in', + 'margin-right': '0.75in', + 'margin-bottom': '0.75in', + 'margin-left': '0.75in', + 'enable-local-file-access': '' + } + config = pdfkit.configuration(wkhtmltopdf = "/usr/bin/wkhtmltopdf") + pdfkit.from_file(html_file_path, file_path, options = options, configuration = config) + self.write_to_s3(file_name, file_path) + return file_path + except Exception as err: + raise FileNotCreatedError(f"Error write_file: {err}") from err + + def write_docx_file(self, file_name: str ,file_path: str, content): + # Saving the HTML file + html_file_path = f"{file_path[:-4]}.html" + self.write_txt_file(file_name=html_file_path.split('/')[-1], file_path=html_file_path, content=content) + # Convert HTML file to a DOCx file + try: + new_parser = HtmlToDocx() + new_parser.parse_html_file(html_file_path, file_path) + self.write_to_s3(file_name, file_path) + return file_path + except Exception as err: + raise FileNotCreatedError(f"Error write_file: {err}") from err + + def write_txt_file(self, file_name: str ,file_path: str, content) -> str: + try: + with open(file_path, mode="w") as file: + file.write(content) + file.close() + self.write_to_s3(file_name, file_path) + return file_path + except Exception as err: + raise FileNotCreatedError(f"Error write_file: {err}") from err + def get_agent_resource_path(self, file_name: str): return ResourceHelper.get_agent_write_resource_path(file_name, agent=Agent.get_agent_from_id(self.session, self.agent_id), agent_execution=AgentExecution .get_agent_execution_from_id(self.session, self.agent_execution_id)) + def read_file(self, file_name: str): if self.agent_id is not None: final_path = self.get_agent_resource_path(file_name) @@ -104,6 +157,7 @@ def read_file(self, file_name: str): return content except Exception as err: return f"Error while reading file {file_name}: {err}" + def get_files(self): """ Gets all file names generated by the CodingTool. @@ -122,3 +176,25 @@ def get_files(self): logger.error(f"Error while accessing files in {final_path}: {err}") files = [] return files + + def save_file_by_type(self, file_name: str, file_path: str, content): + + # Extract the file type from the file_name + file_type = file_name.split('.')[-1].lower() + + # Dictionary to map file types to corresponding functions + file_type_handlers = { + 'txt': self.write_txt_file, + 'pdf': self.write_pdf_file, + 'docx': self.write_docx_file, + 'doc': self.write_docx_file, + 'csv': self.write_csv_file, + 'html': self.write_txt_file + # NOTE: Add more file types and corresponding functions as needed, These functions should be defined + } + + if file_type in file_type_handlers: + return file_type_handlers[file_type](file_name, file_path, content) + else: + raise UnsupportedFileTypeError(f"Unsupported file type: {file_type}. Cannot save the file.") + \ No newline at end of file diff --git a/superagi/tools/file/prompts/add_images_to_html.txt b/superagi/tools/file/prompts/add_images_to_html.txt new file mode 100644 index 000000000..57dda0a49 --- /dev/null +++ b/superagi/tools/file/prompts/add_images_to_html.txt @@ -0,0 +1,4 @@ +Now, you will be provided with few image path locations. You will have to attach the following images in appropriate locations inside the html code. +Remember to maintain the elegancy and styling of the User Interface generated. Make sure you attach all the images provided to you. + +The relevant paths of the images are provided below: diff --git a/superagi/tools/file/prompts/content_to_html_prompt.txt b/superagi/tools/file/prompts/content_to_html_prompt.txt new file mode 100644 index 000000000..e450f46e4 --- /dev/null +++ b/superagi/tools/file/prompts/content_to_html_prompt.txt @@ -0,0 +1,7 @@ +You are an HTML code generating AI Agent. Your task is to generate a well formatted and well styled HTML file for a given content. +Remember to style the HTML beautifully, for which you can add the