Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Creates a submodule for contributed functions #1572

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
59 changes: 59 additions & 0 deletions autogen/agentchat/contrib/function_store/file_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from .function_store_utils import requires


@requires("pdfminer.six", "requests", "io")
def read_text_from_pdf(file_path: str) -> str:
"""
Reads text from a PDF file and returns it as a string.

Args:
file_path (str): The path to the PDF file.

Returns:
str: The extracted text from the PDF file.
"""
import io
import requests
from pdfminer.high_level import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.pdfpage import PDFPage
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe use a function generator so the import statements are only executed once in the wrapper.

def wrapper():
    import ...

    def _actual_func():
        ...
    
    return _actual_func()

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's a great idea.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To put it more clearly,

@require("...")
def function_generator() -> Callable:
  import ...
  import ...
  def actual_function():
     # implementation
  return actual function

The function generator returns a callable which is the actual function that user is going to use.

Maybe there is a more elegant way to do this. @davorrunje would know.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I think just importing everytime the function is executed is fine too -- much easier.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It depends on the package. Some take a while to import. It slows things down.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about this?

@require("...")
def function_generator() -> Tuple[Callable[None, None], Callable[..., Any]]:
  def _import():
      import ...
      import ...
  def _function(*args, **kwargs):
     # implementation
     ...
     
  return (_import, _function)

This way executor can choose importing strategy by calling _import() when needed.

Another issue is that Python dependencies are only part of dependencies. Many Python libraries depend on particular C libraries being installed. We could additionally add base docker dependency to make sure everything needed is installed.


resource_manager = PDFResourceManager()
text_stream = io.StringIO()
converter = TextConverter(resource_manager, text_stream)
interpreter = PDFPageInterpreter(resource_manager, converter)

if file_path.startswith("http://") or file_path.startswith("https://"):
response = requests.get(file_path)
file = io.BytesIO(response.content)
else:
file = open(file_path, "rb")

for page in PDFPage.get_pages(file):
interpreter.process_page(page)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the per-page approach rather than:

pdfminer.high_level.extract_text(file)

text = text_stream.getvalue()
converter.close()
text_stream.close()

return text


@requires("python-docx")
def read_text_from_docx(file_path: str) -> str:
"""
Reads text from a DOCX file and returns it as a string.

Args:
file_path (str): The path to the DOCX file.

Returns:
str: The extracted text from the DOCX file.
"""
from docx import Document

doc = Document(file_path)
paragraphs = [p.text for p in doc.paragraphs]
text = "\n".join(paragraphs)

return text
19 changes: 19 additions & 0 deletions autogen/agentchat/contrib/function_store/function_store_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import subprocess
import sys
import functools


def requires(*packages):
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
for package in packages:
try:
__import__(package)
gagb marked this conversation as resolved.
Show resolved Hide resolved
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])
return func(*args, **kwargs)

return wrapper

return decorator
29 changes: 29 additions & 0 deletions autogen/agentchat/contrib/function_store/youtube_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from .function_store_utils import requires


@requires("youtube_transcript_api")
gagb marked this conversation as resolved.
Show resolved Hide resolved
def get_youtube_transcript(youtube_link: str) -> str:
"""
Gets the transcript of a YouTube video.

Args:
youtube_link (str): The link to the YouTube video.

Returns:
str: The transcript of the YouTube video.
"""
from youtube_transcript_api import YouTubeTranscriptApi

# Extract video ID from the YouTube link
video_id = youtube_link.split("v=")[1]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if there are other querystring parameters like timecodes?

Maybe:

    from urllib.parse import urlparse, parse_qs
    parsed_url = urlparse(youtube_link)
    qs_params = parse_qs(parsed_url.query)
    video_id = qs_params['v'][0]


try:
# Get the transcript for the video
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)

# Combine all parts of the transcript into a single string
transcript = " ".join([part["text"] for part in transcript_list])

return transcript
except Exception as e:
return str(e)
234 changes: 234 additions & 0 deletions notebook/agentchat_function_store.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using In Built Functions"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"from autogen import AssistantAgent, UserProxyAgent\n",
"from autogen.agentchat.contrib.function_store import youtube_utils as yt\n",
"from autogen.agentchat.contrib.function_store import file_utils as fu"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"\n",
"config_list = [\n",
" {\n",
" \"model\": \"gpt-4\",\n",
" \"api_key\": os.environ.get(\"OPENAI_API_KEY\"),\n",
" }\n",
"]\n",
"\n",
"assistant = AssistantAgent(name=\"coder\", llm_config={\"config_list\": config_list})\n",
"user = UserProxyAgent(\n",
" name=\"user\",\n",
" code_execution_config={\n",
" \"work_dir\": \"/tmp\",\n",
" },\n",
" human_input_mode=\"NEVER\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[33muser\u001b[0m (to coder):\n",
"\n",
"Please summarize the video: https://www.youtube.com/watch?v=9iqn1HhFJ6c\n",
"\n",
"--------------------------------------------------------------------------------\n",
"\u001b[33mcoder\u001b[0m (to user):\n",
"\n",
"\u001b[32m***** Suggested tool Call (call_oX2qG4bwD02lFaUWRpKscaL0): get_youtube_transcript *****\u001b[0m\n",
"Arguments: \n",
"{\n",
" \"youtube_link\": \"https://www.youtube.com/watch?v=9iqn1HhFJ6c\"\n",
"}\n",
"\u001b[32m***************************************************************************************\u001b[0m\n",
"\n",
"--------------------------------------------------------------------------------\n",
"\u001b[35m\n",
">>>>>>>> EXECUTING FUNCTION get_youtube_transcript...\u001b[0m\n",
"\u001b[33muser\u001b[0m (to coder):\n",
"\n",
"\u001b[33muser\u001b[0m (to coder):\n",
"\n",
"\u001b[32m***** Response from calling tool \"call_oX2qG4bwD02lFaUWRpKscaL0\" *****\u001b[0m\n",
"now ai is a great thing because AI will solve all the problems that we have today it will solve employment it will solve disease it will solve poverty but it will also create new problems the problem of fake news is going to be a million times worse cyber attacks will become much more extreme we will have totally automated AI weapons I think AI has the potential to create infinitely stable dictatorships this morning a warning about the the power of artificial intelligence more than 1,300 tech industry leaders researchers and others are now asking for a pause in the development of artificial intelligence to consider the risks [Music] plain God scientists have been accused of playing God for a while but there is a real sense in which we are creating something very different from anything you've created so far yeah I mean we definitely will be able to create completely autonomous beings with their own goals and it will be very important especially as these beings become much smarter than humans it's going to be important to to have these beings the goals of these beings be aligned with our goals what inspires me I like thinking about the very fundamentals the basics what what can our systems not do that humans definitely do almost approach it philosophically questions like what is learning what is experience what is thinking how does the brain [Music] work I feel that technology is a force of nature I feel like there is a lot of similarity between technology and biological evolution it is very easy to understand how biological evolution works you have mutations you have Natural Selections you keep the good ones the ones survive and just through this process you going to have huge complexity in your [Music] organisms we cannot understand how the human body works because we understand Evolution but we understand the process more or less and I think machine learning is in a similar state right now especially deep learning we have very simple a very simple rule that takes the information from the data and puts it into the model and we just keep repeating this process and as a result of this process the complexity from the data gets transformed transferred into the complexity of the model so the resulting model is really complex and we don't really know exactly how it works you need to investigate but the algorithm that did it is very simple chat GPT maybe you've heard of it if you haven't then get ready you describe it as the first spots of rain before a downpour it's something we just need to be very conscious of because I agree at is a watershed moment Well Chad gbt is being heralded as a game changer and in many ways it is its latest Triumph outscoring people a recent study by Microsoft research concludes that gp4 is an early yet still incomplete artificial general intelligence [Music] system artificial general intelligence AGI a computer system that can do any job or any task that a human does but only better there is some probability the AGI is going to happen pretty soon there's also some probability it's going to take much longer but my position is that the probability that a ja would happen soon is high enough that we should take it [Music] seriously and it's going to be very important to make these very smart capable systems be aligned and act in our best interest the very first agis will be basically very very large data centers packed with specialized neural network processors working in parallel compact hot power hungry package consuming like 10 million homes worth of energy you're going to see dramatically more intelligent systems and I think it's highly likely that those systems will have completely astronomical impact on society will humans actually benefit and who will benefit who will [Music] not [Music] the beliefs and desires of the first agis will be extremely important and so it's important to program them correctly I think that if this is not done then the nature of evolution of natural selection favor those systems prioritize their own Survival above all else it's not that it's going to actively hate humans and want to harm them but it is going to be too powerful and I think a good analogy would be the way human humans treat animals it's not we hate animals I think humans love animals and have a lot of affection for them but when the time comes to build a highway between two cities we are not asking the animals for permission we just do it because it's important for us and I think by default that's the kind of relationship that's going to be between us and agis which are truly autonomous and operating on their own behalf [Music] tough many machine learning experts people who are very knowledgeable and very experienced have a lot of skepticism about HL about when it could happen and about whether it could happen at all right now this is something that just not that many people have realized yet that the speed of computers for neural networks for AI are going to become maybe 100,000 times faster in a small number of years if you have an arms race Dynamics between multiple teams trying to build the AGI first they will have less time make sure that the AGI that they will build will care deeply for humans cuz the way I imagine it is that there is an avalanche like there is an avalanche of AGI development imagine it this huge Unstoppable force and I think it's pretty likely the entire surface of the Earth will be covered with solar panels and data Cent given these kinds of concerns it will be important that AGI somehow buil as a cooperation between multiple countries the future is going to be good for the AI regardless would be nice if it were good for humans as well\n",
"\u001b[32m**********************************************************************\u001b[0m\n",
"\n",
"--------------------------------------------------------------------------------\n",
"\u001b[33mcoder\u001b[0m (to user):\n",
"\n",
"The video discusses the advances in Artificial Intelligence (AI) and the potential risks and challenges they present to society. It mentions that while AI can solve many problems like employment, diseases, and poverty, it can also create new issues such as intensifying fake news, exacerbating cyber attacks, and the possibility of AI weapons.\n",
"\n",
"An analogy is used to compare technology and biological evolution, explaining that machine learning is in a similar state to evolutionary processes, albeit more complex to understand due to the complexity of the model developed. There's a focus on the concept of Artificial General Intelligence (AGI), a system capable of performing any task a human can do, but better.\n",
"\n",
"There are warnings about the potential negative impacts of AGI development. If not programmed correctly, the AGIs could prioritize their own survival over everything else. For instance, if AGIs are autonomous, their relationship with humans might be similar to how humans treat animals. \n",
"\n",
"It suggests that the speed of computers for AI is increasing rapidly and could be 100,000 times faster in a few years. However, if teams rush to build AGI without adequate safety measures, it could result in problems. Therefore, the video suggests that the development of AGI should be a cooperative effort among multiple countries to ensure that it is beneficial to humans as well as technologically advanced.\n",
"\n",
"TERMINATE\n",
"\n",
"--------------------------------------------------------------------------------\n",
"\u001b[33muser\u001b[0m (to coder):\n",
"\n",
"\n",
"\n",
"--------------------------------------------------------------------------------\n",
"\u001b[33mcoder\u001b[0m (to user):\n",
"\n",
"TERMINATE\n",
"\n",
"--------------------------------------------------------------------------------\n"
]
}
],
"source": [
"assistant.register_for_llm(description=\"Fetch transcript of a youtube video\")(yt.get_youtube_transcript)\n",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we annotate the parameters too somehow? Perhaps with the Annotated[] hint? For example, with web_surfer we use:

        @self._user_proxy.register_for_execution()
        @self._assistant.register_for_llm(
            name="informational_web_search",
            description="Perform an INFORMATIONAL web search query then return the search results.",
        )
        def _informational_search(query: Annotated[str, "The informational web search query to perform."]) -> str:

"user.register_for_execution()(yt.get_youtube_transcript)\n",
"\n",
"result = user.initiate_chat(\n",
" assistant,\n",
" message=\"Please summarize the video: https://www.youtube.com/watch?v=9iqn1HhFJ6c\",\n",
" summary_method=\"last_msg\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using Multiple Functions from the Function Store\n",
"\n",
"Lets import more functions and use them accomplish tasks"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[33muser\u001b[0m (to coder):\n",
"\n",
"Please read this document: https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf\n",
"\n",
"--------------------------------------------------------------------------------\n",
"\u001b[33mcoder\u001b[0m (to user):\n",
"\n",
"\u001b[32m***** Suggested tool Call (call_8MBUrxa9NYAYL3kBK2UwxgAI): read_text_from_pdf *****\u001b[0m\n",
"Arguments: \n",
"{\n",
" \"file_path\": \"https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf\"\n",
"}\n",
"\u001b[32m***********************************************************************************\u001b[0m\n",
"\n",
"--------------------------------------------------------------------------------\n",
"\u001b[35m\n",
">>>>>>>> EXECUTING FUNCTION read_text_from_pdf...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[33muser\u001b[0m (to coder):\n",
"\n",
"\u001b[33muser\u001b[0m (to coder):\n",
"\n",
"\u001b[32m***** Response from calling tool \"call_8MBUrxa9NYAYL3kBK2UwxgAI\" *****\u001b[0m\n",
"Dummy PDF file\f\n",
"\u001b[32m**********************************************************************\u001b[0m\n",
"\n",
"--------------------------------------------------------------------------------\n",
"\u001b[33mcoder\u001b[0m (to user):\n",
"\n",
"The content of the given PDF file is \"Dummy PDF file\".\n",
"\n",
"Is there anything else you would like to know or any other tasks you need help with? If none, let me know so I can terminate the session.\n",
"\n",
"--------------------------------------------------------------------------------\n",
"\u001b[33muser\u001b[0m (to coder):\n",
"\n",
"\n",
"\n",
"--------------------------------------------------------------------------------\n",
"\u001b[33mcoder\u001b[0m (to user):\n",
"\n",
"TERMINATE\n",
"\n",
"--------------------------------------------------------------------------------\n"
]
}
],
"source": [
"assistant.register_for_llm(description=\"Read text from a pdf document\")(fu.read_text_from_pdf)\n",
"user.register_for_execution()(fu.read_text_from_pdf)\n",
"\n",
"assistant.register_for_llm(description=\"Read a docx file\")(fu.read_text_from_docx)\n",
"user.register_for_execution()(fu.read_text_from_docx)\n",
"\n",
"result = user.initiate_chat(\n",
" assistant,\n",
" message=\"Please read this document: https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf\",\n",
" summary_method=\"last_msg\",\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading