diff --git a/.coverage b/.coverage new file mode 100644 index 0000000..51834ba Binary files /dev/null and b/.coverage differ diff --git a/.gitignore b/.gitignore index 326e1ae..4613b53 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ dist build run-venv/ .mypy_cache/ +.benchmarks/ diff --git a/README.md b/README.md index bf91d21..31e5782 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Agentrun : Run AI generated code safely +# Agentrun: Run AI Generated Code Safely [![PyPI](https://img.shields.io/pypi/v/agentrun.svg)](https://pypi.org/project/agentrun/) [![Tests](https://github.com/jonathan-adly/agentrun/actions/workflows/test.yml/badge.svg)](https://github.com/jonathan-adly/agentrun/actions/workflows/test.yml) @@ -6,9 +6,25 @@ [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/jonathan-adly/agentrun/blob/main/LICENSE) [![Twitter Follow](https://img.shields.io/twitter/follow/Jonathan_Adly_?style=social)](https://twitter.com/Jonathan_Adly_) -Agentrun is a Python library that makes it a breeze to run python code safely from large language models (LLMs) with a single line of code. Built on top of docker python SDK and RestrictedPython, it provides a simple, transparent, and user-friendly API to manage isolated code exeuction. +Agentrun is a Python library that makes it easy to run Python code safely from large language models (LLMs) with a single line of code. Built on top of the Docker Python SDK and RestrictedPython, it provides a simple, transparent, and user-friendly API to manage isolated code execution. + +Agentrun automatically installs and uninstalls dependencies, limits resource consumption, checks code safety, and sets execution timeouts. It has 97% test coverage with full static typing and only two dependencies. + + +## Why? + +Giving code execution ability to LLMs is a massive upgrade. Consider the following user query: `what is 12345 * 54321?` or even something more ambitious like `what is the average daily move of Apple stock during the last week?`? With code execution it is possible for LLMs to answer both accurately by executing code. + +However, executing untrusted code is dangerous and full of potential footguns. For instance, without proper safeguards, an LLM might generate harmful code like this: + +```python +import os +# deletes all files and directories +os.system('rm -rf /') +``` + +This package gives code execution ability to **any LLM** in a single line of code, while preventing and guarding against dangerous code. -Agentrun automatically install and uninstall dependencies, limits resource consumption, checks code safety, and set execution timeouts. It has >97% test coverage with full static typing and only 2 dependecies. ## Key Features @@ -17,19 +33,20 @@ Agentrun automatically install and uninstall dependencies, limits resource consu - **Configurable Resource Management**: You can set how much compute resources the code can consume, with sane defaults - **Timeouts**: Set time limits on how long a script can take to run - **Dependency Management**: Complete control on what dependencies are allowed to install -- **Automatic Cleanups**: Agentrun cleans any artificats created by the code generated +- **Automatic Cleanups**: Agentrun cleans any artifacts created by the generated code. - **Comes with a REST API**: Hate setting up docker? Agentrun comes with already configured docker setup for self-hosting. -If you want to use your own docker configuration, use this package. If you want an already configured docker setup and API that is ready for self-hosting. Please see here: https://github.com/Jonathan-Adly/agentrun-api +If you want to use your own Docker configuration, install this package with pip and simply initialize Agentrun with a running Docker container. Additionally, you can use an already configured Docker Compose setup and API that is ready for self-hosting by cloning this repo. + +Unless you are comfortable with Docker, **we highly recommend using the REST API with the already configured Docker as a standalone service.** -**We Highly recommend using the REST API with already configured docker as a standalone service. It is available here: https://github.com/Jonathan-Adly/agentrun-api** ## Get Started in Minutes -There are two ways to use agentrun - depending on your needs. With pip if you want to use your own docker setup, or you can directly use it as a rest API as a standalone service (recommended). +There are two ways to use Agentrun, depending on your needs: with pip for your own Docker setup, or directly as a REST API as a standalone service (recommended). -1. Install Agentrun with a single command via pip (you will need to configure your own docker setup) +1. Install Agentrun with a single command via pip (you will need to configure your own Docker setup): ```bash pip install agentrun @@ -48,13 +65,13 @@ print(result) #> "Hello, world!" ``` -Worried about spinning up docker containers? No problem. +Worried about spinning up Docker containers? No problem. -2. Install the agentrun REST api from github and get going immediately +2. Clone this repository and start immediately with a standalone REST API: ```bash -git clone https://github.com/Jonathan-Adly/agentrun-api -cd agentrun-api -cp .example.env .dev.env +git clone https://github.com/Jonathan-Adly/agentrun +cd agentrun/agentrun-api +cp .env.example .env.dev docker-compose up -d --build ``` @@ -92,19 +109,27 @@ Customize | Fully | Partially | ## Usage -Now, let's see AgentRun in action with something more complicated. We will take advantage of function calling and agentrun, to have LLMs write and execute code on the fly to solve arbitrary tasks. You can find the full code under `examples/function_calling.py` +Now, let's see AgentRun in action with something more complicated. We will take advantage of function calling and agentrun, to have LLMs write and execute code on the fly to solve arbitrary tasks. You can find the full code under `examples/` + +First, we will install the needed packages. We are using mixtral here via groq to keep things fast and with minimal depenencies, but agentrun works with any LLM out of the box. All what's required is for the LLM to return a code snippet. -We are using the REST API as it is recommend to seperate the code execution service from the rest of our infrastructure. +> FYI: OpenAI assistant tool `code_interpreter` can execute code. Agentrun is a transparent, open-source version that can work with any LLM. -1. Install needed packages. ```bash -pip install openai requests +!pip install groq +!pip install requests +``` + +Next, we will setup a function that executed the code and returns an output. We are using the API here, so make sure to have it running before trying this. + +Here is the steps to run the API: +```bash +git clone https://github.com/Jonathan-Adly/agentrun +cd agentrun/agentrun-api +cp .env.example .env.dev +docker-compose up -d --build ``` -> We are using openai her to keep the code simple with minimal depenencies, but agentrun works with any LLM out of the box. All what's required is for the LLM to return a code snippet. -> -> FYI: OpenAI assistant tool ` code_interpreter` can execute code. Agentrun is a transparent, open-source version that can work with any LLM. -2. Setup a function that executed the code and returns an output. ```python def execute_python_code(code: str) -> str: response = requests.post("http://localhost:8000/v1/run/", json={"code": code}) @@ -112,15 +137,19 @@ def execute_python_code(code: str) -> str: return output ``` -3. Setup your LLM function calling. +Next, we will setup our LLM function calling skeleton code. We need: + +1. An LLM client such Groq or OpenAI or Anthropic (alternatively, you can use liteLLm as wrapper) +2. The model you will use +3. Our code execution tool - that encourages the LLM model to send us python code to execute reliably ```python -GPT_MODEL = "gpt-4-turbo-preview" +from groq import Groq +import json -# set your API key here. -os.environ["OPENAI_API_KEY"] = "Your OpenAI key here" +client = Groq(api_key ="Your API Key") -client = OpenAI() +MODEL = 'mixtral-8x7b-32768' tools = [ { @@ -143,7 +172,7 @@ tools = [ ] ``` -4. Setup a function to call your LLM of choice. +Next, we will setup a function to call our LLM of choice. ```python def chat_completion_request(messages, tools=None, tool_choice=None, model=GPT_MODEL): try: @@ -160,7 +189,8 @@ def chat_completion_request(messages, tools=None, tool_choice=None, model=GPT_MO return e ``` -5. Pass on the user query and get the answer. +Finally, we will set up a function that takes the user query and returns an answer. Using Agentrun to execute code when the LLM determines code execution is necesary to answer the question + ```python def get_answer(query): messages = [] @@ -192,9 +222,10 @@ def get_answer(query): return answer ``` -Example Response: +Now let's try it! +`get_answer("what's the average daily move of Apple stock in the last 3 days?")` +"The average daily movement of Apple's stock in the last 3 days is approximately $2.60." -- print (get_answer("what's the average daily move of Apple stock in the last 3 days?")) --> "The average daily movement of Apple's stock over the last 3 days was $2.39." **How did get this answer?** @@ -216,7 +247,7 @@ print(f'{average_move:.2f}') ``` That code was sent to agentrun, which outputted: -`'\r[*********************100%%**********************] 1 of 1 completed\n2.391396866861979\n'` +`'\r[*********************100%%**********************] 1 of 1 completed\n2.39'` Lastly, the output was sent to the LLM again to make human friendly. Giving us the final answer: $2.39 @@ -290,3 +321,8 @@ To run the tests: ```bash pytest ``` + +To run the test with coverage +```bash +pytest --cov=agentrun tests/ +``` \ No newline at end of file diff --git a/agentrun-api/.env.dev b/agentrun-api/.env.dev new file mode 100644 index 0000000..d676597 --- /dev/null +++ b/agentrun-api/.env.dev @@ -0,0 +1,2 @@ +# Container name - can change depending on your docker setup +CONTAINER_NAME="agentrun-api-python_runner-1" diff --git a/agentrun-api/.env.example b/agentrun-api/.env.example new file mode 100644 index 0000000..d676597 --- /dev/null +++ b/agentrun-api/.env.example @@ -0,0 +1,2 @@ +# Container name - can change depending on your docker setup +CONTAINER_NAME="agentrun-api-python_runner-1" diff --git a/agentrun-api/LICENSE b/agentrun-api/LICENSE new file mode 100644 index 0000000..57010ed --- /dev/null +++ b/agentrun-api/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024 Jonathan Adly + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/docs/index.md b/agentrun-api/README.md similarity index 88% rename from docs/index.md rename to agentrun-api/README.md index bf91d21..b889763 100644 --- a/docs/index.md +++ b/agentrun-api/README.md @@ -1,4 +1,4 @@ -# Agentrun : Run AI generated code safely +# Agentrun-API : Run AI generated code safely [![PyPI](https://img.shields.io/pypi/v/agentrun.svg)](https://pypi.org/project/agentrun/) [![Tests](https://github.com/jonathan-adly/agentrun/actions/workflows/test.yml/badge.svg)](https://github.com/jonathan-adly/agentrun/actions/workflows/test.yml) @@ -10,6 +10,8 @@ Agentrun is a Python library that makes it a breeze to run python code safely fr Agentrun automatically install and uninstall dependencies, limits resource consumption, checks code safety, and set execution timeouts. It has >97% test coverage with full static typing and only 2 dependecies. +**This is a fastapi wrapper over the library with docker-compose already configured for you.** This where you should start, unless you are very comfortable with docker and want to use the package with your custom docker setup. + ## Key Features - **Safe code execution**: Agentrun checks the generated code for dangerous elements before execution @@ -20,41 +22,30 @@ Agentrun automatically install and uninstall dependencies, limits resource consu - **Automatic Cleanups**: Agentrun cleans any artificats created by the code generated - **Comes with a REST API**: Hate setting up docker? Agentrun comes with already configured docker setup for self-hosting. +## REST API usage +```bash +git clone https://github.com/Jonathan-Adly/agentrun-api +cd agentrun-api +cp .env.example .env.dev +docker-compose up -d --build +``` + +- Visit your browser at localhost:8000 for API swagger documenation. -If you want to use your own docker configuration, use this package. If you want an already configured docker setup and API that is ready for self-hosting. Please see here: https://github.com/Jonathan-Adly/agentrun-api -**We Highly recommend using the REST API with already configured docker as a standalone service. It is available here: https://github.com/Jonathan-Adly/agentrun-api** ## Get Started in Minutes There are two ways to use agentrun - depending on your needs. With pip if you want to use your own docker setup, or you can directly use it as a rest API as a standalone service (recommended). -1. Install Agentrun with a single command via pip (you will need to configure your own docker setup) +> Want to install the library via pip and set up your own docker? Visit here: https://pypi.org/project/agentrun/ -```bash -pip install agentrun -``` - -Now, let's see AgentRun in action with a simple example: - -```Python -from agentrun import AgentRun +Install the agentrun REST api from github and get going immediately -runner = AgentRun(container_name="my_container") # container should be running -code_from_llm = get_code_from_llm(prompt) # "print('hello, world!')" - -result = runner.execute_code_in_container(code_from_llm) -print(result) -#> "Hello, world!" -``` - -Worried about spinning up docker containers? No problem. - -2. Install the agentrun REST api from github and get going immediately ```bash git clone https://github.com/Jonathan-Adly/agentrun-api cd agentrun-api -cp .example.env .dev.env +cp .env.example .env.dev docker-compose up -d --build ``` @@ -94,13 +85,13 @@ Customize | Fully | Partially | Now, let's see AgentRun in action with something more complicated. We will take advantage of function calling and agentrun, to have LLMs write and execute code on the fly to solve arbitrary tasks. You can find the full code under `examples/function_calling.py` -We are using the REST API as it is recommend to seperate the code execution service from the rest of our infrastructure. +We will be using the REST API to seperate the code execution service from the rest of our infrastructure. 1. Install needed packages. ```bash pip install openai requests ``` -> We are using openai her to keep the code simple with minimal depenencies, but agentrun works with any LLM out of the box. All what's required is for the LLM to return a code snippet. +> We are using OpenAI here to keep the code simple with minimal depenencies, but agentrun works with any LLM out of the box. All what's required is for the LLM to return a code snippet. > > FYI: OpenAI assistant tool ` code_interpreter` can execute code. Agentrun is a transparent, open-source version that can work with any LLM. @@ -274,6 +265,13 @@ print(result) #> "Hello, world!" ``` +## Deployment + +For deployment in production you will need to setup Nginx or similar and SSL. Again, we highly recommend to run this as an API standalone server away from your infrastructure. There is no such things as 100% sandboxed Python code. + +A hosted version (paid) is coming soon. + + ## Development To contribute to this library, first checkout the code. Then create a new virtual environment: diff --git a/agentrun-api/bruno_api_docs/Health.bru b/agentrun-api/bruno_api_docs/Health.bru new file mode 100644 index 0000000..105147a --- /dev/null +++ b/agentrun-api/bruno_api_docs/Health.bru @@ -0,0 +1,11 @@ +meta { + name: Health + type: http + seq: 1 +} + +get { + url: {{scheme}}{{host}}/v1/health/ + body: none + auth: none +} diff --git a/agentrun-api/bruno_api_docs/Redirect Docs.bru b/agentrun-api/bruno_api_docs/Redirect Docs.bru new file mode 100644 index 0000000..38ebcfe --- /dev/null +++ b/agentrun-api/bruno_api_docs/Redirect Docs.bru @@ -0,0 +1,11 @@ +meta { + name: Redirect Docs + type: http + seq: 2 +} + +get { + url: {{scheme}}{{host}} + body: none + auth: none +} diff --git a/agentrun-api/bruno_api_docs/Run Code.bru b/agentrun-api/bruno_api_docs/Run Code.bru new file mode 100644 index 0000000..11af814 --- /dev/null +++ b/agentrun-api/bruno_api_docs/Run Code.bru @@ -0,0 +1,17 @@ +meta { + name: Run Code + type: http + seq: 3 +} + +post { + url: {{scheme}}{{host}}/v1/run/ + body: json + auth: none +} + +body:json { + { + "code": "print('hello, world!')" + } +} diff --git a/agentrun-api/bruno_api_docs/bruno.json b/agentrun-api/bruno_api_docs/bruno.json new file mode 100644 index 0000000..142f2ef --- /dev/null +++ b/agentrun-api/bruno_api_docs/bruno.json @@ -0,0 +1,9 @@ +{ + "version": "1", + "name": "genbox", + "type": "collection", + "ignore": [ + "node_modules", + ".git" + ] +} \ No newline at end of file diff --git a/agentrun-api/bruno_api_docs/environments/local.bru b/agentrun-api/bruno_api_docs/environments/local.bru new file mode 100644 index 0000000..f5b1bc3 --- /dev/null +++ b/agentrun-api/bruno_api_docs/environments/local.bru @@ -0,0 +1,4 @@ +vars { + scheme: http:// + host: localhost:8000 +} diff --git a/agentrun-api/docker-compose.yml b/agentrun-api/docker-compose.yml new file mode 100644 index 0000000..e403ec4 --- /dev/null +++ b/agentrun-api/docker-compose.yml @@ -0,0 +1,27 @@ +services: + api: + build: + context: ./ + dockerfile: docker/api/Dockerfile + command: uvicorn api.main:app --host 0.0.0.0 --port 8000 --reload + volumes: + - ./src:/code + - /var/run/docker.sock:/var/run/docker.sock + ports: + - "8000:8000" + env_file: + - ./.env.dev + + python_runner: + build: + context: ./ + dockerfile: docker/code_runner/Dockerfile + volumes: + - code_execution_volume:/code + command: ["tail", "-f", "/dev/null"] + pids_limit: 10 + security_opt: + - no-new-privileges:true + +volumes: + code_execution_volume: \ No newline at end of file diff --git a/agentrun-api/docker/api/Dockerfile b/agentrun-api/docker/api/Dockerfile new file mode 100644 index 0000000..bf3f819 --- /dev/null +++ b/agentrun-api/docker/api/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.12.2-slim-bullseye + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 +ENV PIP_DISABLE_PIP_VERSION_CHECK=1 + +WORKDIR /code + +COPY ./requirements.txt /code/ +RUN pip install -r requirements.txt + +COPY ../ /code/ \ No newline at end of file diff --git a/agentrun-api/docker/code_runner/Dockerfile b/agentrun-api/docker/code_runner/Dockerfile new file mode 100644 index 0000000..ab0f035 --- /dev/null +++ b/agentrun-api/docker/code_runner/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.12.2-slim-bullseye + +# Security best practices +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=off \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 + +# Create a non-root user with its home directory +RUN useradd --create-home pythonuser +USER pythonuser +WORKDIR /code + +# Optional: Only if you have specific dependencies +# COPY --chown=appuser:appuser requirements.txt . +# RUN pip install --user -r requirements.txt diff --git a/agentrun-api/requirements.txt b/agentrun-api/requirements.txt new file mode 100644 index 0000000..15850d8 --- /dev/null +++ b/agentrun-api/requirements.txt @@ -0,0 +1,8 @@ +fastapi==0.110.1 +uvicorn==0.29.0 +agentrun + +# Testing +httpx==0.27.0 +pytest==8.0 +pytest-cov==5.0.0 \ No newline at end of file diff --git a/agentrun-api/src/.coverage b/agentrun-api/src/.coverage new file mode 100644 index 0000000..4eca927 Binary files /dev/null and b/agentrun-api/src/.coverage differ diff --git a/agentrun-api/src/api/__init__.py b/agentrun-api/src/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agentrun-api/src/api/main.py b/agentrun-api/src/api/main.py new file mode 100644 index 0000000..09f76c9 --- /dev/null +++ b/agentrun-api/src/api/main.py @@ -0,0 +1,54 @@ +import asyncio +import os +from concurrent.futures import ThreadPoolExecutor + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import RedirectResponse +from pydantic import BaseModel + +from agentrun import AgentRun + + +class CodeSchema(BaseModel): + code: str + + +class OutputSchema(BaseModel): + output: str + + +app = FastAPI() + +# allow all origins +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.get("/v1/health/", response_model=dict) +async def health(): + return { + "status": "ok", + } + + +@app.get("/") +async def redirect_docs(): + return RedirectResponse(url="/docs") + + +@app.post("/v1/run/", response_model=OutputSchema) +async def run_code(code_schema: CodeSchema): + runner = AgentRun( + container_name=os.environ.get("CONTAINER_NAME", "agentrun-python_runner-1"), + ) + python_code = code_schema.code + with ThreadPoolExecutor() as executor: + future = executor.submit(runner.execute_code_in_container, python_code) + output = await asyncio.wrap_future(future) + return OutputSchema(output=output) diff --git a/agentrun/__init__.py b/agentrun/__init__.py index fe91709..0c9534b 100644 --- a/agentrun/__init__.py +++ b/agentrun/__init__.py @@ -364,6 +364,10 @@ def execute_code_in_container(self, python_code: str) -> str: finally: if container: - self.clean_up(container, script_name, dependencies) + # run clean up in a seperate thread to avoid blocking the main thread + thread = Thread( + target=self.clean_up, args=(container, script_name, dependencies) + ) + thread.start() return output diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 0000000..e45d08e --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1,5 @@ +v0.1.1 +1. More documentation and examples +2. agentrun-api and agentrun combined Repo +3. Cleaning up is now on a seperate thread. Performance improvement. +4. Benchmarks tests diff --git a/examples/function_calling_groq.ipynb b/examples/function_calling_groq.ipynb new file mode 100644 index 0000000..26e95a7 --- /dev/null +++ b/examples/function_calling_groq.ipynb @@ -0,0 +1,306 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Agentrun Usage \n", + "\n", + "Now, let's see AgentRun in action with something more complicated. We will take advantage of function calling and agentrun, to have LLMs write and execute code on the fly to solve arbitrary tasks. You can find the full code under `examples/`\n", + "\n", + "We are using the REST API as it is recommend to seperate the code execution service from the rest of our infrastructure.\n", + "\n", + "First, we will install the needed packages. We are using mixtral here via groq to keep things fast and with minimal depenencies, but agentrun works with any LLM out of the box. All what's required is for the LLM to return a code snippet.\n", + "\n", + "> FYI: OpenAI assistant tool ` code_interpreter` can execute code. Agentrun is a transparent, open-source version that can work with any LLM." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: requests in ./.venv/lib/python3.12/site-packages (2.31.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in ./.venv/lib/python3.12/site-packages (from requests) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in ./.venv/lib/python3.12/site-packages (from requests) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in ./.venv/lib/python3.12/site-packages (from requests) (2.2.1)\n", + "Requirement already satisfied: certifi>=2017.4.17 in ./.venv/lib/python3.12/site-packages (from requests) (2024.2.2)\n", + "Collecting groq\n", + " Using cached groq-0.5.0-py3-none-any.whl.metadata (12 kB)\n", + "Collecting anyio<5,>=3.5.0 (from groq)\n", + " Using cached anyio-4.3.0-py3-none-any.whl.metadata (4.6 kB)\n", + "Collecting distro<2,>=1.7.0 (from groq)\n", + " Using cached distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)\n", + "Collecting httpx<1,>=0.23.0 (from groq)\n", + " Using cached httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)\n", + "Collecting pydantic<3,>=1.9.0 (from groq)\n", + " Using cached pydantic-2.7.0-py3-none-any.whl.metadata (103 kB)\n", + "Collecting sniffio (from groq)\n", + " Using cached sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)\n", + "Collecting typing-extensions<5,>=4.7 (from groq)\n", + " Using cached typing_extensions-4.11.0-py3-none-any.whl.metadata (3.0 kB)\n", + "Requirement already satisfied: idna>=2.8 in ./.venv/lib/python3.12/site-packages (from anyio<5,>=3.5.0->groq) (3.7)\n", + "Requirement already satisfied: certifi in ./.venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->groq) (2024.2.2)\n", + "Collecting httpcore==1.* (from httpx<1,>=0.23.0->groq)\n", + " Using cached httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)\n", + "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->groq)\n", + " Using cached h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n", + "Collecting annotated-types>=0.4.0 (from pydantic<3,>=1.9.0->groq)\n", + " Using cached annotated_types-0.6.0-py3-none-any.whl.metadata (12 kB)\n", + "Collecting pydantic-core==2.18.1 (from pydantic<3,>=1.9.0->groq)\n", + " Downloading pydantic_core-2.18.1-cp312-cp312-macosx_11_0_arm64.whl.metadata (6.5 kB)\n", + "Using cached groq-0.5.0-py3-none-any.whl (75 kB)\n", + "Using cached anyio-4.3.0-py3-none-any.whl (85 kB)\n", + "Using cached distro-1.9.0-py3-none-any.whl (20 kB)\n", + "Using cached httpx-0.27.0-py3-none-any.whl (75 kB)\n", + "Using cached httpcore-1.0.5-py3-none-any.whl (77 kB)\n", + "Using cached pydantic-2.7.0-py3-none-any.whl (407 kB)\n", + "Downloading pydantic_core-2.18.1-cp312-cp312-macosx_11_0_arm64.whl (1.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m15.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n", + "\u001b[?25hUsing cached sniffio-1.3.1-py3-none-any.whl (10 kB)\n", + "Using cached typing_extensions-4.11.0-py3-none-any.whl (34 kB)\n", + "Using cached annotated_types-0.6.0-py3-none-any.whl (12 kB)\n", + "Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n", + "Installing collected packages: typing-extensions, sniffio, h11, distro, annotated-types, pydantic-core, httpcore, anyio, pydantic, httpx, groq\n", + "Successfully installed annotated-types-0.6.0 anyio-4.3.0 distro-1.9.0 groq-0.5.0 h11-0.14.0 httpcore-1.0.5 httpx-0.27.0 pydantic-2.7.0 pydantic-core-2.18.1 sniffio-1.3.1 typing-extensions-4.11.0\n" + ] + } + ], + "source": [ + "# Agentrun Example\n", + "!pip install requests\n", + "!pip install groq" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we will setup a function that executed the code and returns an output. We are using the API here, so make sure to have it running before trying this. \n", + "\n", + "Here is the steps to run the API:\n", + "1. git clone https://github.com/Jonathan-Adly/agentrun\n", + "2. cd agentrun/agentrun-api\n", + "3. cp .env.example .env.dev\n", + "4. docker-compose up -d --build" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Hello, World!\\n'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import requests\n", + "def execute_python_code(code: str) -> str:\n", + " response = requests.post(\"http://localhost:8000/v1/run/\", json={\"code\": code})\n", + " output = response.json()[\"output\"]\n", + " return output\n", + "\n", + "execute_python_code(\"print('Hello, World!')\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we will setup our LLM function calling skeleton code. We need:\n", + "\n", + "1. An LLM client such Groq or OpenAI or Anthropic (alternatively, you can use liteLLm as wrapper)\n", + "2. The model you will use \n", + "3. Our code execution tool - that encourages the LLM model to send us python code to execute reliably" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from groq import Groq\n", + "import json\n", + "\n", + "client = Groq(api_key =\"Your API Key\")\n", + "\n", + "MODEL = 'mixtral-8x7b-32768'\n", + "\n", + "tools = [\n", + " {\n", + " \"type\": \"function\",\n", + " \"function\": {\n", + " \"name\": \"execute_python_code\",\n", + " \"description\": \"Sends a python code snippet to the code execution environment and returns the output. The code execution environment can automatically import any library or package by importing.\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"code\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The code snippet to execute. Must be a valid python code. Must use print() to output the result.\",\n", + " },\n", + " },\n", + " \"required\": [\"code\"],\n", + " },\n", + " },\n", + " },\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we will setup a function to call our LLM of choice." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def chat_completion_request(messages, tools=None, tool_choice=\"auto\", model=MODEL):\n", + " try:\n", + " response = client.chat.completions.create(\n", + " model=model,\n", + " messages=messages,\n", + " tools=tools,\n", + " tool_choice=tool_choice,\n", + " )\n", + " return response\n", + " except Exception as e:\n", + " print(\"Unable to generate ChatCompletion response\")\n", + " print(f\"Exception: {e}\")\n", + " return e" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we will set up a function that takes the user query and returns an answer. Using Agentrun to execute code when the LLM determines code execution is necesary to answer the question" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def get_answer(query):\n", + " messages = []\n", + " messages.append(\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": \"\"\"Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.\\n \n", + " Use the execute_python_code tool to run code if a question is better solved with code. You can use any package in the code snippet by simply importing. Like `import requests` would work fine.\\n\n", + " \"\"\",\n", + " }\n", + " )\n", + " messages.append({\"role\": \"user\", \"content\": query})\n", + "\n", + " chat_response = chat_completion_request(messages, tools=tools)\n", + "\n", + " message = chat_response.choices[0].message\n", + " # tool call versus content\n", + " if message.tool_calls:\n", + " tool_call = message.tool_calls[0]\n", + " arg = json.loads(tool_call.function.arguments)[\"code\"]\n", + " print(f\"Executing code: {arg}\")\n", + " answer = execute_python_code(arg)\n", + " # Optional: call an LLM again to turn the answer to a human friendly response\n", + " query = \"Help translate the code output to a human friendly response. This was the user query: \" + query + \" The code output is: \" + answer\n", + " answer = get_answer(query)\n", + " else:\n", + " answer = message.content\n", + "\n", + " return answer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's try it!" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Executing code: import yfinance as yf\n", + "\n", + "stock = yf.Ticker('AAPL')\n", + "historical_data = stock.history(period='3d')\n", + "\n", + "# Calculate the daily move as the percentage change from close to close\n", + "daily_moves = [(historical_data['Close'][i] - historical_data['Close'][i-1]) / historical_data['Close'][i-1] for i in range(1, len(historical_data['Close']))]\n", + "\n", + "# Calculate the average daily move\n", + "average_daily_move = sum(daily_moves) / len(daily_moves) * 100\n", + "\n", + "print(average_daily_move)\n" + ] + }, + { + "data": { + "text/plain": [ + "\"The average daily movement of Apple's stock in the last 3 days is approximately $2.60.\"" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_answer(\"what's the average daily move of Apple stock in the last 3 days?\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "run-venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/function_calling.py b/examples/function_calling_openai.py similarity index 100% rename from examples/function_calling.py rename to examples/function_calling_openai.py diff --git a/pyproject.toml b/pyproject.toml index 116d0bc..1ba3ef7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "agentrun" -version = "v0.1" +version = "v0.1.1" description = "The easiest way to run AI or user generated python code safely in a docker container" readme = "README.md" requires-python = ">=3.10" @@ -14,9 +14,16 @@ dependencies = [ ] [build-system] -requires = ["setuptools"] +requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" +[tool.setuptools] +include-package-data = false + +[tool.setuptools.packages.find] +include = ["agentrun*"] +exclude = ["agentrun-api*", "docs*", "examples*", "tests*", ".github*"] + [project.urls] Homepage = "https://github.com/jonathan-adly/agentrun" Changelog = "https://github.com/jonathan-adly/agentrun/releases" @@ -25,4 +32,4 @@ CI = "https://github.com/jonathan-adly/agentrun/actions" [project.optional-dependencies] -test = ["pytest", "pytest-cov", "mypy"] +test = ["pytest", "pytest-cov", "pytest-benchmark", "mypy"] diff --git a/tests/test_agentrun.py b/tests/test_agentrun.py index 709259c..b9af7aa 100644 --- a/tests/test_agentrun.py +++ b/tests/test_agentrun.py @@ -232,3 +232,19 @@ def test_execute_code_in_container_with_wrong_container_name(): ) output = runner.execute_code_in_container("print('Hello, World!')") assert output == "Container with name wrong-container-name not found." + + +def execute_code_in_container_benchmark(docker_container): + runner = AgentRun( + container_name=docker_container.name, + ) + code = "import numpy as np\nprint(np.array([1, 2, 3]))" + output = runner.execute_code_in_container(code) + return output + + +def test_dependency_benchmark(benchmark, docker_container): + result = benchmark( + execute_code_in_container_benchmark, docker_container=docker_container + ) + assert result == "[1 2 3]\n"