From 7fae70be5ef91e55ce56f2b76c2ec8bdbc607bf8 Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 14:02:49 -0400 Subject: [PATCH 01/19] Unit test for get_memory function --- .github/workflows/get_memory_test.yml | 98 +++++++++++++++++++++++++++ docker-compose-github.yml | 3 +- tests/test_cases_get_memory.json | 66 ++++++++++++++++++ tests/test_get_memory.py | 24 +++++++ 4 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/get_memory_test.yml create mode 100644 tests/test_cases_get_memory.json create mode 100644 tests/test_get_memory.py diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml new file mode 100644 index 00000000..7fbb795d --- /dev/null +++ b/.github/workflows/get_memory_test.yml @@ -0,0 +1,98 @@ +name: Recipes server tests + +on: [push] + +jobs: + test: + runs-on: ubuntu-latest + environment: "GitHub Actions 1" + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ASSISTANTS_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ASSISTANTS_API_TYPE: ${{ secrets.ASSISTANTS_API_TYPE }} + ASSISTANTS_ID: ${{ secrets.ASSISTANTS_ID }} + ASSISTANTS_BASE_URL: ${{ secrets.ASSISTANTS_BASE_URL }} + ASSISTANTS_MODEL: ${{ secrets.ASSISTANTS_MODEL }} + ASSISTANTS_BOT_NAME: ${{ secrets.ASSISTANTS_BOT_NAME }} + + POSTGRES_DATA_HOST: ${{ secrets.POSTGRES_DATA_HOST }} + POSTGRES_DATA_PORT: ${{ secrets.POSTGRES_DATA_PORT }} + POSTGRES_DATA_DB: ${{ secrets.POSTGRES_DATA_DB }} + POSTGRES_DATA_USER: ${{ secrets.POSTGRES_DATA_USER }} + POSTGRES_DATA_PASSWORD: ${{ secrets.POSTGRES_DATA_PASSWORD }} + + DATA_DB_CONN_STRING: ${{ secrets.DATA_DB_CONN_STRING }} + + POSTGRES_RECIPE_HOST: ${{ secrets.POSTGRES_RECIPE_HOST }} + POSTGRES_RECIPE_PORT: ${{ secrets.POSTGRES_RECIPE_PORT }} + POSTGRES_RECIPE_DB: ${{ secrets.POSTGRES_RECIPE_DB }} + POSTGRES_RECIPE_USER: ${{ secrets.POSTGRES_RECIPE_USER }} + POSTGRES_RECIPE_PASSWORD: ${{ secrets.POSTGRES_RECIPE_PASSWORD }} + + RECIPE_DB_CONN_STRING: "postgresql://${{ secrets.POSTGRES_RECIPE_USER }}:${{ secrets.POSTGRES_RECIPE_PASSWORD }}@${{ secrets.POSTGRES_RECIPE_HOST }}:${{ secrets.POSTGRES_RECIPE_PORT }}/${{ secrets.POSTGRES_RECIPE_DB }}" + + RECIPES_OPENAI_API_TYPE: ${{ secrets.RECIPES_OPENAI_API_TYPE }} + RECIPES_OPENAI_API_KEY: ${{ secrets.RECIPES_OPENAI_API_KEY }} + RECIPES_MODEL: ${{ secrets.RECIPES_MODEL }} + RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME: ${{ secrets.RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME }} + RECIPES_MEMORY_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_MEMORY_SIMILARITY_CUTOFF }} + RECIPES_RECIPE_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_RECIPE_SIMILARITY_CUTOFF }} + RECIPES_HELPER_FUNCTION_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_HELPER_FUNCTION_SIMILARITY_CUTOFF }} + RECIPES_MODEL_TEMP: ${{ secrets.RECIPES_MODEL_TEMP }} + RECIPES_MODEL_MAX_TOKENS: ${{ secrets.RECIPES_MODEL_MAX_TOKENS }} + + IMAGE_HOST: ${{ secrets.IMAGE_HOST }} + RECIPE_SERVER_API: ${{ secrets.RECIPE_SERVER_API }} + + CHAINLIT_AUTH_SECRET: ${{ secrets.CHAINLIT_AUTH_SECRET }} + USER_LOGIN: ${{ secrets.USER_LOGIN }} + USER_PASSWORD: ${{ secrets.USER_PASSWORD }} + + COLUMNS: 150 + + steps: + + - name: Checkout + uses: actions/checkout@v3 + + #- name: Checkout integration tests data + # uses: actions/checkout@master + # with: + # repository: datakind/recipes-ai-test-data + # ssh-key: ${{ secrets.GITHUB_SSH_PRIVATE_KEY}} + # path: recipes-ai-test-data + + - name: Expose GitHub Runtime + uses: crazy-max/ghaction-github-runtime@v2 + + - name: Spin up DB and recipes server + run: | + env > .env + + echo "Installing demo data ..." + pip3 install gdown==5.2.0 + cd data && python3 download_demo_data.py && cd .. + ls data/datadb + + # TODO this should be enhanced to use a buildx bake to leverage layer caching for faster builds, or push to repo and simply have a pull for the run + # TODO docker-compose files should be refactored to use scopes instead of different versions for each environment + echo "Starting docker containers for dbs and server ..." + docker-compose -f ./docker-compose-github.yml pull + docker-compose -f ./docker-compose-github.yml up -d --build + echo "logs datadb ..." + docker-compose -f docker-compose-github.yml logs datadb + docker ps + + # TODO The promptflow docker build wasn't working in GH actions, so deploying promptflow to host for now + - name: Run tests + uses: actions/setup-python@v4 + with: + python-version: "3.11.4" + - run: | + pip3 install pytest + + - run: | + echo "Running tests ..." + pytest + + \ No newline at end of file diff --git a/docker-compose-github.yml b/docker-compose-github.yml index e0aef263..a0e71406 100644 --- a/docker-compose-github.yml +++ b/docker-compose-github.yml @@ -66,8 +66,7 @@ services: - ./templates:/app/templates - ./utils:/app/utils - ./management/skills.py:/app/recipes/skills.py - - + - ./tests:/app/tests volumes: pgdata2: \ No newline at end of file diff --git a/tests/test_cases_get_memory.json b/tests/test_cases_get_memory.json new file mode 100644 index 00000000..a7543ec8 --- /dev/null +++ b/tests/test_cases_get_memory.json @@ -0,0 +1,66 @@ +{ + "tests": [ + { + "test_case": "Test Case 1", + "test_case_description": "Test to evaluate a query that returns a text response", + "user_input": "Get all recipes", + "chat_history": "[{'role': 'user', 'content': 'Get all recipes'}]", + "generate_intent": "true", + "expected_output": { + "result": { + "type": "text", + "file": "", + "value": "| | type | intent |\n|---:|:-------|:-----------------------------------------------------------------------------------------------------------------------------------------------------|\n| 0 | recipe | plot a line chart of conflict events by month for a country using HDX data as an image |\n| 1 | recipe | plot a scatterplot of food price movements and number of fatalities by country using HDXData data, including regression line as an image |\n| 2 | recipe | plot a line chart of fatalities by month for a country using HDX data as an image |\n| 3 | recipe | plot a line chart of commodity prices monthly relative change for a country from 2008-01-01 using HDX data as an image |\n| 4 | recipe | plot a bar chart of humanitarian organizations by sector for a given region using Humanitarian Data Exchange data as an image |\n| 5 | recipe | provide a list of organizations providing food security for a region in a country |\n| 6 | recipe | provide the total population of a provided country using HDX data as text |\n| 7 | recipe | plot population pyramids by age for a country using HDX data as an image |\n| 8 | recipe | plot a scatterplot of food price movements and number of fatalities by country using HDXData data, including regression line as an image |\n| 9 | recipe | plot a map of population by admin1 or admin2 for a country using HAPI data as an image |\n| 10 | recipe | plot a line chart of commodity prices monthly relative change for a country from 2008-01-01 using HDX data as an image |\n| 11 | recipe | List organizations in top 3 states by population in provided IPC Phase in a country, using HAPI data |\n| 12 | recipe | plot a bar chart of humanitarian organizations by sector for a given region using Humanitarian Data Exchange data as an image |\n| 13 | recipe | provide a text summary of metadata by subnational region using HAPI data as text |\n| 14 | recipe | get all recipes |\n| 15 | recipe | plot a map of IPC phase data by admin_1 using HDX data as an image |\n| 0 | memory | plot a line chart of conflict events by month for Chad using HDX data as an image |\n| 1 | memory | plot a scatterplot of food price movements and number of fatalities in TCD from 2008-01-01 using HDXData data, including regression line as an image |\n| 2 | memory | plot a line chart of fatalities by month for Chad using HDX data as an image |\n| 3 | memory | plot a line chart of commodity prices monthly relative change for Chad from 2008-01-01 using HDX data as an image |\n| 4 | memory | plot a bar chart of humanitarian organizations in Wadi Fira by sector using Humanitarian Data Exchange data as an image |\n| 5 | memory | provide a list of organizations providing food security in Wadi Fira, Chad |\n| 6 | memory | provide the total population of Mali using HDX data as text |\n| 7 | memory | plot population pyramids by age for Chad using HDX data as an image |\n| 8 | memory | plot a scatterplot of food price movements and number of fatalities in TCD from 2008-01-01 using HDXData data, including regression line as an image |\n| 9 | memory | plot a map of population by admin1 for Haiti using HAPI data as an image |\n| 10 | memory | plot a line chart of commodity prices monthly relative change for Chad from 2008-01-01 using HDX data as an image |\n| 11 | memory | List organizations in the top 3 states by population in IPC Phase 3+ in Chad, using HAPI data |\n| 12 | memory | plot a bar chart of humanitarian organizations in Wadi Fira by sector using Humanitarian Data Exchange data as an image |\n| 13 | memory | provide a text summary of metadata for Wadi Fira using HAPI data as text |\n| 14 | memory | retrieve all recipes |\n| 15 | memory | plot a map of IPC phase 3 data by admin_1 in Chad using HDX data as an image |" + }, + "metadata": "{\"params\": {}, \"attribution\": \"\", \"data_url\": \"\", \"time_period\": {\"start\": \"\", \"end\": \"\"}}", + "memory_type": "memory", + "memory": "retrieve all recipes", + "memory_found": "true" + } + }, + { + "test_case": "Test Case 2", + "test_case_description": "Test to evaluate a query that returns a text response", + "user_input": "provide the total population of Mali using HDX data as text", + "chat_history": "[{'role': 'user', 'content': 'Get all recipes'}, {'role': 'user', 'content': 'provide the total population of Mali using HDX data as text'}]", + "generate_intent": "true", + "expected_output": { + "result": { + "type": "number", + "file": "", + "value": "17907114" + }, + "metadata": "{\"params\": {\"country_code\": \"MLI\"}, \"attribution\": \"https://data.humdata.org/dataset/ce21c7db-d8f0-40f8-adc2-452d2d2d105c\", \"data_url\": \"https://data.humdata.org/dataset/ce21c7db-d8f0-40f8-adc2-452d2d2d105c/resource/6f243ba2-4d4a-4663-a7c4-e917dbbde73a/download/mli_pop_adm0_v2.csv\", \"time_period\": {\"start\": \"2018-01-01\", \"end\": \"2018-12-31T23:59:59\"}}", + "memory_type": "memory", + "memory": "provide the total population of Mali using HDX data as text", + "memory_found": "true" + } + }, + { + "test_case": "Test Case 3", + "test_case_description": "Test to evaluate a query that returns an image response", + "user_input": "plot a bar chart of humanitarian organizations in Wadi Fira by sector using Humanitarian Data Exchange data as an image", + "chat_history": "[{'role': 'user', 'content': 'Get all recipes'}, {'role': 'user', 'content': 'provide the total population of Mali using HDX data as text'}, {'role': 'user', 'content': 'plot a bar chart of humanitarian organizations in Wadi Fira by sector using Humanitarian Data Exchange data as an image'}]", + "generate_intent": "true", + "expected_output": { + "result": { + "type": "image", + "file": "http://localhost:8000/public/images/memory_image_e9d3e3ee-977f-4291-a51a-7e4e3e4cd5f3.png", + "value": "" + }, + "metadata": "{\"params\": {\"region\": \"Wadi Fira\"}, \"attribution\": \"https://data.humdata.org/dataset/682c3db6-e253-430f-a3f3-305ef079e2de\", \"data_url\": \"https://data.humdata.org/dataset/682c3db6-e253-430f-a3f3-305ef079e2de/resource/1e3ba1f4-2dbc-4f46-9ce7-2274e157188e/download/3w-tcd-20240508.xlsx\", \"time_period\": {\"start\": \"2023-12-01\", \"end\": \"2024-05-31T23:59:59.999999\"}}", + "memory_type": "memory", + "memory": "plot a bar chart of humanitarian organizations in Wadi Fira by sector using Humanitarian Data Exchange data as an image", + "memory_found": "true" + } + }, + { + "test_case": "Test Case 4", + "test_case_description": "Test to evaluate a query that does not return a memory", + "user_input": "Plot the distribution of internet access in Chad", + "chat_history": "[{'role': 'user', 'content': 'Get all recipes'}, {'role': 'user', 'content': 'provide the total population of Mali using HDX data as text'}, {'role': 'user', 'content': 'plot a bar chart of humanitarian organizations in Wadi Fira by sector using Humanitarian Data Exchange data as an image'}, {'role': 'user', 'content': 'Plot the distribution of internet access in Chad'}]", + "generate_intent": "true", + "expected_output": {"result":"Sorry, no recipe or memory found","memory_found":"false"} + } + ] +} \ No newline at end of file diff --git a/tests/test_get_memory.py b/tests/test_get_memory.py new file mode 100644 index 00000000..86805da9 --- /dev/null +++ b/tests/test_get_memory.py @@ -0,0 +1,24 @@ +from utils.general import call_execute_query_api, call_get_memory_recipe_api +import json +import pytest + +#load json file into variable and print it +@pytest.fixture +def get_test_cases(): + with open('test_cases_get_memory.json') as f: + test_data = json.load(f) + return test_data + +def test_get_memory_recipe(get_test_cases): + """ + Tests the get memory recipe API endpoint. + """ + + for test in get_test_cases.get('tests', []): + user_input = test["user_input"] + chat_history = test["chat_history"] + generate_intent = test["generate_intent"] + expected_output = test["expected_output"] + response = call_get_memory_recipe_api(user_input, chat_history, generate_intent) + assert response == expected_output + From f01c1b1120dec08284ddd10fc5f4a415d5ce7cc4 Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 14:38:43 -0400 Subject: [PATCH 02/19] Fixing docker compose file and workflow --- .github/workflows/get_memory_test.yml | 2 ++ docker-compose-github.yml | 1 + 2 files changed, 3 insertions(+) diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml index 7fbb795d..73c72d4f 100644 --- a/.github/workflows/get_memory_test.yml +++ b/.github/workflows/get_memory_test.yml @@ -92,6 +92,8 @@ jobs: pip3 install pytest - run: | + cd tests/ + chmod -R 755 . echo "Running tests ..." pytest diff --git a/docker-compose-github.yml b/docker-compose-github.yml index a0e71406..68ff2d37 100644 --- a/docker-compose-github.yml +++ b/docker-compose-github.yml @@ -67,6 +67,7 @@ services: - ./utils:/app/utils - ./management/skills.py:/app/recipes/skills.py - ./tests:/app/tests + - ./utils:/app/test/utils volumes: pgdata2: \ No newline at end of file From 432bcfd2a4c6c13cd23bf3abdba6c3c290b198c5 Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 14:58:25 -0400 Subject: [PATCH 03/19] fixing docker compose --- docker-compose-github.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose-github.yml b/docker-compose-github.yml index 68ff2d37..d90c5ef6 100644 --- a/docker-compose-github.yml +++ b/docker-compose-github.yml @@ -67,7 +67,7 @@ services: - ./utils:/app/utils - ./management/skills.py:/app/recipes/skills.py - ./tests:/app/tests - - ./utils:/app/test/utils + - ./utils:/app/tests/utils volumes: pgdata2: \ No newline at end of file From 592ce3b3d429d18f33199841111a3af0aebd6cb4 Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 15:03:25 -0400 Subject: [PATCH 04/19] more fixes --- .github/workflows/get_memory_test.yml | 4 ---- requirements.txt | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml index 73c72d4f..14d6249d 100644 --- a/.github/workflows/get_memory_test.yml +++ b/.github/workflows/get_memory_test.yml @@ -88,12 +88,8 @@ jobs: uses: actions/setup-python@v4 with: python-version: "3.11.4" - - run: | - pip3 install pytest - - run: | cd tests/ - chmod -R 755 . echo "Running tests ..." pytest diff --git a/requirements.txt b/requirements.txt index f62fb740..40e76259 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,3 +28,4 @@ robocorp-actions robocorp-truststore seaborn==0.13.2 sqlalchemy==2.0.30 +pytest==8.2.2 From 6f8996535f062915df2963a11bf366633ab91466 Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 15:06:50 -0400 Subject: [PATCH 05/19] adding pytest installation back in --- .github/workflows/get_memory_test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml index 14d6249d..e3a15ce2 100644 --- a/.github/workflows/get_memory_test.yml +++ b/.github/workflows/get_memory_test.yml @@ -88,6 +88,9 @@ jobs: uses: actions/setup-python@v4 with: python-version: "3.11.4" + - run: | + pip3 install pytest + - run: | cd tests/ echo "Running tests ..." From 7e64e5062f3e3609a80e15cb239eafb10437bcef Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 15:15:14 -0400 Subject: [PATCH 06/19] Fix --- .github/workflows/get_memory_test.yml | 1 + server/fastapi/Dockerfile | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml index e3a15ce2..a30851d4 100644 --- a/.github/workflows/get_memory_test.yml +++ b/.github/workflows/get_memory_test.yml @@ -93,6 +93,7 @@ jobs: - run: | cd tests/ + ls echo "Running tests ..." pytest diff --git a/server/fastapi/Dockerfile b/server/fastapi/Dockerfile index 660bd0ad..c1724a7f 100644 --- a/server/fastapi/Dockerfile +++ b/server/fastapi/Dockerfile @@ -14,6 +14,7 @@ COPY ./templates /app/templates COPY ./utils /app/utils COPY ./management/skills.py /app/recipes/skills.py COPY requirements.txt /app +COPY ./utils /app/tests/utils # Install any needed packages specified in requirements.txt. RUN pip install --upgrade pip From 6784605ed0267bc9e350d3b8587b8fde48123e6e Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 15:23:08 -0400 Subject: [PATCH 07/19] temporary fix to isolate the error --- tests/utils/__init__.py | 0 tests/utils/db.py | 131 ++++++++++ tests/utils/general.py | 153 ++++++++++++ tests/utils/llm.py | 213 ++++++++++++++++ tests/utils/recipes.py | 530 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 1027 insertions(+) create mode 100644 tests/utils/__init__.py create mode 100644 tests/utils/db.py create mode 100644 tests/utils/general.py create mode 100644 tests/utils/llm.py create mode 100644 tests/utils/recipes.py diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/utils/db.py b/tests/utils/db.py new file mode 100644 index 00000000..12e65b58 --- /dev/null +++ b/tests/utils/db.py @@ -0,0 +1,131 @@ +import os + +import pandas as pd +import psycopg2 +from dotenv import load_dotenv +from sqlalchemy import create_engine + +from utils.general import call_execute_query_api, is_running_in_docker + +load_dotenv() + + +def get_connection(instance="data"): + """ + This function gets a connection to the database + + Args: + + instance (str): The instance of the database to connect to, "recipe" or "data". Default is "data" + """ + instance = instance.upper() + + host = os.getenv(f"POSTGRES_{instance}_HOST") + port = os.getenv(f"POSTGRES_{instance}_PORT") + database = os.getenv(f"POSTGRES_{instance}_DB") + user = os.getenv(f"POSTGRES_{instance}_USER") + password = os.getenv(f"POSTGRES_{instance}_PASSWORD") + + conn = psycopg2.connect( + dbname=database, user=user, password=password, host=host, port=port + ) + return conn + + +def execute_query(query, instance="data"): + """ + Executes a SQL query and returns the result as a DataFrame. + + Parameters: + query (str): The SQL query to execute. + instance (str): The database instance to connect to. Default is "data". + + Returns: + pandas.DataFrame: The result of the query as a DataFrame + """ + + conn = get_connection(instance) + cur = conn.cursor() + + # Set to read-only mode + cur.execute("SET TRANSACTION READ ONLY;") + + print(f"Executing query: {query}") + + # Execute the query + cur.execute(query) + + # Fetch all the returned rows + rows = cur.fetchall() + + print(f"Query returned {len(rows)} rows") + + # Get column names + column_names = [desc[0] for desc in cur.description] + + # Close the cursor and connection + cur.close() + conn.close() + + # Convert rows to DataFrame + df = pd.DataFrame(rows, columns=column_names) + + return df + + +def connect_to_db(instance="recipe"): + """ + Connects to the specified database instance (RECIPE or DATA) DB and returns a connection object. + + Args: + instance (str): The name of the database instance to connect to. Defaults to "RECIPE". + + Returns: + sqlalchemy.engine.base.Engine: The connection object for the specified database instance. + """ + + instance = instance.upper() + + # Fallback for CLI running outside of docker + if not is_running_in_docker(): + os.environ[f"POSTGRES_{instance}_HOST"] = "localhost" + os.environ[f"POSTGRES_{instance}_PORT"] = "5433" + + host = os.getenv(f"POSTGRES_{instance}_HOST") + port = os.getenv(f"POSTGRES_{instance}_PORT") + database = os.getenv(f"POSTGRES_{instance}_DB") + user = os.getenv(f"POSTGRES_{instance}_USER") + password = os.getenv(f"POSTGRES_{instance}_PASSWORD") + conn_str = f"postgresql://{user}:{password}@{host}:{port}/{database}" + + # add an echo=True to see the SQL queries + conn = create_engine(conn_str) + return conn + + +async def get_data_info(): + """ + Get data info from the database. + + Returns: + str: The data info. + """ + + global data_info + + # run this query: select table_name, summary, columns from table_metadata + + query = """ + SELECT + table_name, + summary, + columns + FROM + table_metadata + --WHERE + -- countries is not null + """ + + data_info = await call_execute_query_api(query) + + return data_info diff --git a/tests/utils/general.py b/tests/utils/general.py new file mode 100644 index 00000000..da689fd5 --- /dev/null +++ b/tests/utils/general.py @@ -0,0 +1,153 @@ +import base64 +import json +import os +import re +import sys +import warnings + +import pandas as pd +import requests +from dotenv import load_dotenv + +load_dotenv() + +# Suppress all warnings +warnings.filterwarnings("ignore") + +execute_query_url = f"{os.getenv('RECIPE_SERVER_API')}execute_query" +get_memory_recipe_url = f"{os.getenv('RECIPE_SERVER_API')}get_memory_recipe" + +data_info = None + + +def replace_env_variables(value): + """ + Recursively replaces environment variable placeholders in a given value. + + Args: + value (dict, list, str): The value to process + + Returns: + The processed value with environment variable placeholders replaced. + + """ + if isinstance(value, dict): + return {k: replace_env_variables(v) for k, v in value.items()} + elif isinstance(value, list): + return [replace_env_variables(v) for v in value] + elif isinstance(value, str): + matches = re.findall(r"\{\{ (.+?) \}\}", value) + for match in matches: + if os.getenv(match) is None: + print(f"Environment variable {match} is not set.") + sys.exit(1) + + value = value.replace("{{ " + match + " }}", os.getenv(match)) + return value + else: + return value + + +def read_integration_config(integration_config_file): + """ + Read the APIs configuration from the integration config file. + + Args: + integration_config_file (str): The path to the integration config file. + + Returns: + apis (dict): A dictionary containing the API configurations. + field_map (dict): A dictionary containing the field mappings. + standard_names (dict): A dictionary containing the standard names. + data_node (str): The data node to use for the integration. + """ + with open(integration_config_file) as f: + print(f"Reading {integration_config_file}") + config = json.load(f) + config = replace_env_variables(config) + apis = config["openapi_interfaces"] + field_map = config["field_map"] + standard_names = config["standard_names"] + + return apis, field_map, standard_names + + +def is_running_in_docker(): + """ + Check if the code is running inside a Docker container. + + Returns: + bool: True if running inside a Docker container, False otherwise. + """ + return os.path.exists("/.dockerenv") + + +def make_api_request(url, payload): + """ + Makes an API request to the specified URL with the given payload. + + Args: + url (str): The URL to make the API request to. + payload (dict): The payload to send with the API request. + + Returns: + dict: The response from the API as a dictionary. + + Raises: + requests.exceptions.RequestException: If an error occurs while making the API request. + """ + headers = {"Content-Type": "application/json"} + print(f"API URL: {url}") + print(f"API Payload: {payload}") + response = requests.post(url, headers=headers, json=payload) + print(f"API Response Status Code: {response.status_code}") + response = response.content + print(f"API Response {response}") + return response + + +def call_execute_query_api(sql): + """ + Calls the execute query action API endpoint with the given SQL query. + + Args: + sql (str): The SQL query to execute. + + Returns: + dict: The response from the API. + + """ + data = {"query": f"{sql}"} + print(f"Calling execute query API {execute_query_url} with {sql} ...") + return make_api_request(execute_query_url, data) + + +def call_get_memory_recipe_api(user_input, history, generate_intent="true"): + """ + Calls the API to get a memory recipe action. + + Args: + user_input (str): The user input. + history (str): The chat history. + generate_intent (str): Whether to generate the intent. + + + Returns: + The API response from the make_api_request function. + """ + + data = { + "user_input": f"{user_input}", + "chat_history": history, + "generate_intent": "true", + } + print(f"Calling execute query API {get_memory_recipe_url} with {data} ...") + result = make_api_request(get_memory_recipe_url, data) + + if isinstance(result, bytes): + result = result.decode("utf-8") + + print("IN API CALL", result) + result = json.loads(result) + + return result diff --git a/tests/utils/llm.py b/tests/utils/llm.py new file mode 100644 index 00000000..d2bb9dc7 --- /dev/null +++ b/tests/utils/llm.py @@ -0,0 +1,213 @@ +import base64 +import json +import os +import sys + +from dotenv import load_dotenv +from jinja2 import Environment, FileSystemLoader +from langchain.schema import HumanMessage, SystemMessage +from langchain_openai import ( + AzureChatOpenAI, + AzureOpenAIEmbeddings, + ChatOpenAI, + OpenAIEmbeddings, +) + +from utils.db import get_data_info + +load_dotenv() + +# Caps for LLM summarization of SQL output and number of rows in the output +llm_prompt_cap = 5000 +sql_rows_cap = 100 + +# Because CLI runs on host. TODO, make this more elegant. +template_dir = "../templates" +if not os.path.exists(template_dir): + template_dir = "./templates" +environment = Environment(loader=FileSystemLoader(template_dir)) +sql_prompt_template = environment.get_template("gen_sql_prompt.jinja2") + +chat = None +embedding_model = None + +data_info = None + + +def get_models(): + """ + Retrieves the embedding model and chat model based on the specified API type. + + Returns: + embedding_model: The embedding model used for text embeddings. + chat: The chat model used for generating responses. + + Raises: + SystemExit: If the specified API type is not supported. + """ + api_key = os.getenv("RECIPES_OPENAI_API_KEY") + base_url = os.getenv("RECIPES_BASE_URL") + api_version = os.getenv("RECIPES_OPENAI_API_VERSION") + api_type = os.getenv("RECIPES_OPENAI_API_TYPE") + completion_model = os.getenv("RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME") + model = os.getenv("RECIPES_MODEL") + temp = os.getenv("RECIPES_MODEL_TEMP") + max_tokens = os.getenv("RECIPES_MODEL_MAX_TOKENS") + + if api_type == "openai": + embedding_model = OpenAIEmbeddings( + api_key=api_key, + ) + chat = ChatOpenAI( + model_name=model, + api_key=api_key, + temperature=temp, + max_tokens=max_tokens, + ) + elif api_type == "azure": + embedding_model = AzureOpenAIEmbeddings( + api_key=api_key, + deployment=completion_model, + azure_endpoint=base_url, + chunk_size=16, + ) + chat = AzureChatOpenAI( + api_key=api_key, + api_version=api_version, + azure_endpoint=base_url, + model_name=model, + temperature=temp, + max_tokens=max_tokens, + ) + else: + print(f"OPENAI API type: {api_type} not supported") + sys.exit(1) + return embedding_model, chat + + +def call_llm(instructions, prompt, image=None): + """ + Call the LLM (Language Learning Model) API with the given instructions and prompt. + + Args: + instructions (str): The instructions to provide to the LLM API. + prompt (str): The prompt to provide to the LLM API. + chat (Langchain Open AI model): Chat model used for AI judging + + Returns: + dict or None: The response from the LLM API as a dictionary, or None if an error occurred. + """ + + global chat, embedding_model + if chat is None or embedding_model is None: + embedding_model, chat = get_models() + + human_message = HumanMessage(content=prompt) + + # Multimodal + if image: + if os.getenv("RECIPES_MODEL") == "gpt-4o": + print("Sending image to LLM ...") + with open(image, "rb") as image_file: + encoded_string = base64.b64encode(image_file.read()).decode() + + human_message = HumanMessage( + content=[ + {"type": "text", "text": prompt}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{encoded_string}" + }, + }, + ] + ) + else: + print("Multimodal not supported for this model") + return None + + try: + messages = [ + SystemMessage(content=instructions), + human_message, + ] + response = chat(messages) + + if hasattr(response, "content"): + response = response.content + + if "content" in response and not response.startswith("```"): + response = json.loads(response) + response = response["content"] + + # Some silly things that sometimes happen + response = response.replace(",}", "}") + + # Different models do different things when prompted for JSON. Here we try and handle this + try: + # Is it already JSON? + response = json.loads(response) + except json.decoder.JSONDecodeError: + # Did the LLM provide JSON in ```json```? + if "```json" in response: + # print("LLM responded with JSON in ```json```") + response = response.split("```json")[1] + response = response.replace("\n", "").split("```")[0] + response = json.loads(response) + elif "```python" in response: + # print("LLM responded with Python in ```python```") + all_sections = response.split("```python")[1] + code = all_sections.replace("\n", "").split("```")[0] + message = all_sections.split("```")[0] + response = {} + response["code"] = code + response["message"] = message + else: + # Finally just send it back + print("LLM response unparsable, using raw results") + print(response) + response = {"content": response} + return response + + except Exception as e: + # print(response) + print("Error calling LLM: ", e) + response = None + + +async def gen_sql(input, chat_history, output): + """ + Generate SQL query based on input, chat history, and output. + + Args: + input (str): The input for generating the SQL query. + chat_history (str): The chat history used for generating the SQL query. + output (str): The output of the SQL query. + + Returns: + str: The generated SQL query. + + Raises: + None + + """ + global data_info + + if data_info is None: + data_info = await get_data_info() + + prompt = sql_prompt_template.render( + input=input, + stdout_output=output, + stderr_output="", + data_info=data_info, + chat_history=chat_history, + ) + + response = call_llm("", prompt) + + query = response["code"] + + query = query.replace(";", "") + f" \nLIMIT {sql_rows_cap};" + + return query diff --git a/tests/utils/recipes.py b/tests/utils/recipes.py new file mode 100644 index 00000000..8978d208 --- /dev/null +++ b/tests/utils/recipes.py @@ -0,0 +1,530 @@ +import ast +import base64 +import io +import json +import logging +import os +import re +import subprocess +import sys +import uuid +import warnings +from pathlib import Path +from typing import List + +import matplotlib.pyplot as plt +import numpy as np +import psycopg2 +import requests +from dotenv import load_dotenv +from jinja2 import Environment, FileSystemLoader +from langchain.docstore.document import Document +from langchain_community.vectorstores.pgvector import PGVector +from PIL import Image + +from utils.db import execute_query +from utils.llm import call_llm, get_models + +environment = Environment(loader=FileSystemLoader("./templates/")) + +db = None + +warnings.filterwarnings("ignore") + +# Get the logger for 'httpx' +httpx_logger = logging.getLogger("httpx") + +# Set the logging level to WARNING to ignore INFO and DEBUG logs +httpx_logger.setLevel(logging.WARNING) + +load_dotenv() + +recipes_work_dir = "./recipes" + +# Lower numbers are more similar +similarity_cutoff = { + "memory": os.getenv("MEMORY_SIMILARITY_CUTOFF", 0.2), + "recipe": os.getenv("RECIPE_SIMILARITY_CUTOFF", 0.3), + "helper_function": os.getenv("HELPER_FUNCTION_SIMILARITY_CUTOFF", 0.1), +} + +conn_params = { + "RECIPES_OPENAI_API_TYPE": os.getenv("RECIPES_OPENAI_API_TYPE"), + "RECIPES_OPENAI_API_KEY": os.getenv("RECIPES_OPENAI_API_KEY"), + "RECIPES_OPENAI_API_ENDPOINT": os.getenv("RECIPES_OPENAI_API_ENDPOINT"), + "RECIPES_OPENAI_API_VERSION": os.getenv("RECIPES_OPENAI_API_VERSION"), + "RECIPES_BASE_URL": os.getenv("RECIPES_BASE_URL"), + "RECIPES_MODEL": os.getenv("RECIPES_MODEL"), + "RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME": os.getenv( + "RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME" + ), + "POSTGRES_DB": os.getenv("POSTGRES_RECIPE_DB"), + "POSTGRES_USER": os.getenv("POSTGRES_RECIPE_USER"), + "POSTGRES_HOST": os.getenv("POSTGRES_RECIPE_HOST"), + "POSTGRES_PORT": os.getenv("POSTGRES_RECIPE_PORT"), + "OPENAI_API_KEY": os.getenv("AZURE_API_KEY"), + "POSTGRES_PASSWORD": os.getenv("POSTGRES_RECIPE_PASSWORD"), +} + + +# Stored in langchain_pg_collection and langchain_pg_embedding as this +def initialize_vector_db(): + """ + Initializes the database by creating store tables if they don't exist and returns the initialized database. + + Returns: + dict: The initialized database with store tables for each memory type. + """ + + CONNECTION_STRING = PGVector.connection_string_from_db_params( + driver=os.environ.get("POSTGRES_DRIVER", "psycopg2"), + host=os.environ.get("POSTGRES_RECIPE_HOST", "localhost"), + port=int(os.environ.get("POSTGRES_RECIPE_PORT", "5432")), + database=os.environ.get("POSTGRES_RECIPE_DB", "postgres"), + user=os.environ.get("POSTGRES_RECIPE_USER", "postgres"), + password=os.environ.get("POSTGRES_RECIPE_PASSWORD", "postgres"), + ) + + db = {} + + embedding_model, chat = get_models() + + # This will create store tables if they don't exist + for mem_type in similarity_cutoff.keys(): + COLLECTION_NAME = f"{mem_type}_embedding" + db[mem_type] = PGVector( + collection_name=COLLECTION_NAME, + connection_string=CONNECTION_STRING, + embedding_function=embedding_model, + ) + + return db + + +def add_recipe_memory(intent, metadata, mem_type="recipe", force=False): + """ + Add a new memory document to the memory store. + + Parameters: + - intent (str): The content of the memory document. + - metadata (dict): Additional metadata for the memory document. + - mem_type (str): The type of memory store to add the document to. + - db (Database): The database object representing the memory store. + - force (bool, optional): If True, force the addition of the memory document even if a similar document already exists. Default is False. + + Returns: + - id (str): The ID of the added memory document. + """ + + global db + if db is None: + db = initialize_vector_db() + + # First see if we already have something in our memory + if force is False: + result_found, result = check_recipe_memory(intent, debug=False) + if result_found is True: + if ( + result["score"] is not None + and result["score"] < similarity_cutoff[mem_type] + ): + message = f"{mem_type} already exists: {result['content']}" + response = { + "already_exists": "true", + "message": message, + "custom_id": result["metadata"]["custom_id"], + } + return response + + print(f"Adding new document to {mem_type} store ...") + data = {} + data["page_content"] = intent + + uuid_str = str(uuid.uuid4()) + metadata["custom_id"] = uuid_str + + metadata["mem_type"] = mem_type + + # print(metadata) + + new_doc = Document(page_content=intent, metadata=metadata) + print(metadata) + id = db[mem_type].add_documents([new_doc], ids=[uuid_str]) + return id + + +def check_recipe_memory(intent, debug=True): + """ + Check the memory for a given intent. + + Args: + intent (str): The intent to search for in the memory. + debug (bool, optional): If True, print debug information. Default is True. + + Returns: + dict: A dictionary containing the score, content, and metadata of the best match found in the memory. + If no match is found, the dictionary values will be None. + """ + + global db + if db is None: + db = initialize_vector_db() + + # First do semantic search across memories and recipies + matches = [] + for mem_type in ["memory", "recipe"]: + if debug: + print(f"======= Checking {mem_type} for intent: {intent}") + docs = db[mem_type].similarity_search_with_score(intent, k=3) + for d in docs: + score = d[1] + content = d[0].page_content + metadata = d[0].metadata + if debug: + print("\n", f"Score: {score} ===> {content}") + + if d[1] < similarity_cutoff[mem_type]: + matches.append(d) + + r = {"score": None, "content": None, "metadata": None} + result_found = False + + # No matches, no point calling the AI + if len(matches) == 0: + return result_found, r + + # Build a list for the AI judge to review + match_list = "" + for i, d in enumerate(matches): + match_list += f"{i+1}. {d[0].page_content}\n" + + ai_memory_judge_prompt = environment.get_template("ai_memory_judge_prompt.jinja2") + prompt = ai_memory_judge_prompt.render( + user_input=intent, possible_matches=match_list + ) + print(prompt) + response = call_llm("", prompt) + print(response) + if "content" in response: + response = response["content"] + if isinstance(response, str): + response = json.loads(response) + if debug: + print("AI Judge of match: ", response, "\n") + if response["answer"].lower() == "yes": + print(" MATCH!") + match_id = response["match_id"] + d = matches[int(match_id) - 1] + score = d[1] + content = d[0].page_content + metadata = d[0].metadata + r["score"] = score + r["content"] = content + r["metadata"] = metadata + result_found = True + print(r) + return result_found, r + + +def get_memory_recipe_metadata(custom_id, mem_type): + """ + Get the metadata for a memory or recipe by Querying the database with the given custom ID. + + Args: + custom_id (str): The custom ID of the memory or recipe document to get the metadata for. + mem_type (str): The type of memory store to search in. Can be 'memory', 'recipe', or 'helper_function'. + + Returns: + dict: The table data of the memory or recipe document with the given custom ID. + """ + + # Execute a SQL query to get the table data for the given custom ID + query = f""" + SELECT + * + FROM + {mem_type} + WHERE + custom_id = '{custom_id}' + """ + print(query) + result = execute_query(query, "recipe") + + if result.shape[0] > 0: + result = result.iloc[0] + result = json.loads(result.to_json()) + return result + else: + raise ValueError( + f"No table data (memory/recipe) found for custom ID {custom_id}" + ) + + +def generate_intent_from_history(chat_history: str) -> dict: + """ + Generate the intent from the user query and chat history. + + Args: + chat_history (str): The chat history. + + Returns: + dict: The generated intent. + + """ + + # Load ninja template + generate_intent_from_history_prompt = environment.get_template( + "generate_intent_from_history_prompt.jinja2" + ) + + # Generate the intent from the chat history + prompt = generate_intent_from_history_prompt.render(chat_history=chat_history) + + intent = call_llm(instructions="", prompt=prompt) + # if not isinstance(intent, dict): + # intent = {"intent": intent} + print(f"Generated intent: {intent}") + return intent + + +def process_image(encoded_string, recipe_id): + """ + Takes a base64 encoded string of a picture, decodes it, and saves it as a PNG file. + + Args: + encoded_string (str): Base64 encoded string of the image. + recipe_id (str): The recipe ID to use in the image file name. + + Returns: + str: Full path to the saved image file. + """ + + print("A visual memory was found. Processing image...") + + # Decode the base64 string + image_data = base64.b64decode(encoded_string) + + # Convert binary data to image + image = Image.open(io.BytesIO(image_data)) + + # Create the full path for saving the image + full_path = os.path.join("./recipes/public/", f"memory_image_{recipe_id}.png") + + # Save the image + image.save(full_path, "PNG") + + print("Image processed and saved successfully.") + + return full_path + + +# TODO recfactor all these functions into simpler single function. Tech debt left +# over from Robocorp rapid prototype +def process_memory_recipe_results(result: dict, table_data: dict) -> str: + """ + Processes the results of a memory recipe search and returns the response text and metadata. + + Args: + result (dict): The result of the memory recipe search. + table_data (dict): The data from the memory or recipe tables. + + Returns: + """ + + mem_type = result["metadata"]["mem_type"] + custom_id = result["metadata"]["custom_id"] + print(result) + content = result["content"] + table_data = get_memory_recipe_metadata(custom_id, mem_type) + recipe_id = table_data["custom_id"] + if "result_metadata" in table_data: + metadata = table_data["result_metadata"] + else: + metadata = table_data["sample_result_metadata"] + if metadata is None: + metadata = "" + + print(f"====> Found {mem_type}") + if table_data["result_type"] == "image": + image = table_data["result"] + process_image(image.replace("data:image/png;base64,", ""), recipe_id) + file = f"{os.getenv('IMAGE_HOST')}/memory_image_{recipe_id}.png" + result = {"type": "image", "file": file, "value": ""} + else: + # TODO 'result' in memory should be a JSON, just like sample_result + # in recipe, then this goes away + if mem_type == "recipe": + result = json.loads(table_data["result"]) + result = json.loads(result) + else: + print("Memory, skipping result json extraction") + result = json.loads(table_data["result"]) + result = result["result"] + print(result) + + print("Recipe ID: ", recipe_id, "Intent: ", content) + + return {"result": result, "metadata": metadata} + + +# TODO Absolutely needs to be converted to registered functions +def run_recipe(custom_id: str, recipe: dict, user_input, chat_history): + """ + Runs a recipe based on the result of a memory recipe search. + + Args: + custom_id (str): The custom ID of the recipe. + recipe(dict): The recipe details + user_input (str): The user input. + chat_history (str): The chat history. + + """ + + print("Attempting to run recipe...") + print(f"Recipe Custom ID: {custom_id}") + + function_code = recipe["function_code"] + + # TODO this should really use the new openapi_json field, + # but that's blank currently, will come back to it. + calling_code = recipe["sample_call"] + + recipe_run_python_prompt = environment.get_template( + "recipe_run_python_prompt.jinja2" + ) + prompt = recipe_run_python_prompt.render( + recipe_code=function_code, + calling_code=calling_code, + user_input=user_input, + chat_history=chat_history, + ) + print("Calling LLM to generate new run code ...") + new_code = call_llm("", prompt, None) + print(new_code) + + result = { + "output": "", + "errors": "", + "metadata": "", + } + + if "new_calling_code" in new_code: + calling_code = new_code["new_calling_code"] + print("New calling code generated ...") + + # Combine function and calling code into a string + code = function_code + "\n\n" + "if __name__ == '__main__':\n\n" + + # Write calling code to code, indented by 4 + code += " " + calling_code.replace("\n", "\n ") + "\n" + + # Make recipes folder if it doesn't exist + if not os.path.exists(recipes_work_dir): + print(f"Creating recipes directory ... {recipes_work_dir}") + os.makedirs(recipes_work_dir) + # Copy skills.py into directory use shutil + print("Copying skills.py into directory ...") + + # Adjust .env location in code + code = code.replace("load_dotenv()", "load_dotenv('../.env')") + + # Adjust any images saved in code + code = re.sub(r"./work/(.*?\.png)", r"/app/recipes/public/\1", code) + + # Adjust path + code = ( + "import os\nimport sys\n# Add parent folder to path\nsys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))\n" + + code + ) + + recipe_path = f"{recipes_work_dir}/{custom_id}.py" + with open(recipe_path, "w") as f: + print(f"Writing recipe to file ... {recipe_path}") + f.write(code) + + os.chdir(recipes_work_dir) + cmd = f"python {custom_id}.py" + run_output = subprocess.run(cmd, shell=True, capture_output=True, text=True) + + result["output"] = run_output.stdout + result["errors"] = run_output.stderr + + # TODO this exists here as well as in cli recipes_sync.py, merge + if "OUTPUT:" in result["output"]: + result["output"] = result["output"].split("OUTPUT:")[1] + print(result["output"]) + result = json.loads(result["output"]) + print("Recipe executed successfully.") + else: + print(result["output"]) + print(result["errors"]) + result["result"] = { + "type": "text", + "file": "", + "value": "Recipe produced no output", + } + result["metadata"] = { + "params": {}, + "attribution": "", + "data_url": "", + "time_period": {"start": "", "end": ""}, + } + + print(result) + return result + + +def get_memory_recipe(user_input, chat_history, generate_intent="true") -> str: + """ + Performs a search in the memory for a given intent and returns the best match found. + + Args: + user_input (str): The user input to search for in the memory. + chat_history (str): The chat history. + generate_intent (str): A flag to indicate whether to generate the intent from the chat history. + + Returns: + str: Matched value + str: metadata + """ + + logging.info("Python HTTP trigger function processed a request.") + # Retrieve the CSV file from the request + + # Generate intent from chat history if generate_intent is true + if generate_intent is not None and generate_intent == "true": + print("********* Generating intent from chat history ...") + print("Chat history: ", chat_history) + user_input = generate_intent_from_history(chat_history) + print("Generated intent: ", user_input) + user_input = user_input["intent"] + + print("Checking my memories ...") + memory_found, result = check_recipe_memory(user_input, debug=True) + if memory_found is True: + custom_id = result["metadata"]["custom_id"] + mem_type = result["metadata"]["mem_type"] + matched_doc = result["content"] + # Get data from memory or recipe tables + table_data = get_memory_recipe_metadata(custom_id, mem_type) + if mem_type == "recipe": + result = run_recipe(custom_id, table_data, user_input, chat_history) + else: + # Take the result directly from memory + result = process_memory_recipe_results(result, table_data) + + print(result) + result["memory_type"] = mem_type + result["memory"] = matched_doc + result["memory_found"] = "true" + + result_string = json.dumps(result, indent=4) + + print(result_string) + return result_string + + result = {"result": "Sorry, no recipe or memory found", "memory_found": "false"} + result = json.dumps(result, indent=4) + print(result) + + return result From 189fa72ef2c12f0e40e947b7517f23651beb5b37 Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 15:28:43 -0400 Subject: [PATCH 08/19] No need for pandas in untils.general - not used --- utils/general.py | 1 - 1 file changed, 1 deletion(-) mode change 100644 => 100755 utils/general.py diff --git a/utils/general.py b/utils/general.py old mode 100644 new mode 100755 index da689fd5..0a41fb1a --- a/utils/general.py +++ b/utils/general.py @@ -5,7 +5,6 @@ import sys import warnings -import pandas as pd import requests from dotenv import load_dotenv From 1c187e15b5e836aec2be32ef2852ce67afca4cb5 Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 15:32:38 -0400 Subject: [PATCH 09/19] Deleting pandas is correct script... --- tests/utils/general.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/utils/general.py b/tests/utils/general.py index da689fd5..0a41fb1a 100644 --- a/tests/utils/general.py +++ b/tests/utils/general.py @@ -5,7 +5,6 @@ import sys import warnings -import pandas as pd import requests from dotenv import load_dotenv From 9a4c41fcbbd5d366c6e4a331efbe7741a24f4815 Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 15:39:06 -0400 Subject: [PATCH 10/19] Adding more imports --- .github/workflows/get_memory_test.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml index a30851d4..4177dc56 100644 --- a/.github/workflows/get_memory_test.yml +++ b/.github/workflows/get_memory_test.yml @@ -89,11 +89,12 @@ jobs: with: python-version: "3.11.4" - run: | - pip3 install pytest + pip3 install pytest==8.2.2 + pip3 install requests==2.32.3 + pip3 install python-dotenv==1.0.1 - run: | cd tests/ - ls echo "Running tests ..." pytest From 4a35c7707858e7014d1ec6f3f0bf048334a9bdc8 Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 15:53:53 -0400 Subject: [PATCH 11/19] Revert workaround now that tests have passed --- .github/workflows/get_memory_test.yml | 1 + tests/utils/__init__.py | 0 tests/utils/db.py | 131 ------- tests/utils/general.py | 152 -------- tests/utils/llm.py | 213 ----------- tests/utils/recipes.py | 530 -------------------------- 6 files changed, 1 insertion(+), 1026 deletions(-) delete mode 100644 tests/utils/__init__.py delete mode 100644 tests/utils/db.py delete mode 100644 tests/utils/general.py delete mode 100644 tests/utils/llm.py delete mode 100644 tests/utils/recipes.py diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml index 4177dc56..6aa9a0d5 100644 --- a/.github/workflows/get_memory_test.yml +++ b/.github/workflows/get_memory_test.yml @@ -95,6 +95,7 @@ jobs: - run: | cd tests/ + cp -r ../utils/ . echo "Running tests ..." pytest diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/utils/db.py b/tests/utils/db.py deleted file mode 100644 index 12e65b58..00000000 --- a/tests/utils/db.py +++ /dev/null @@ -1,131 +0,0 @@ -import os - -import pandas as pd -import psycopg2 -from dotenv import load_dotenv -from sqlalchemy import create_engine - -from utils.general import call_execute_query_api, is_running_in_docker - -load_dotenv() - - -def get_connection(instance="data"): - """ - This function gets a connection to the database - - Args: - - instance (str): The instance of the database to connect to, "recipe" or "data". Default is "data" - """ - instance = instance.upper() - - host = os.getenv(f"POSTGRES_{instance}_HOST") - port = os.getenv(f"POSTGRES_{instance}_PORT") - database = os.getenv(f"POSTGRES_{instance}_DB") - user = os.getenv(f"POSTGRES_{instance}_USER") - password = os.getenv(f"POSTGRES_{instance}_PASSWORD") - - conn = psycopg2.connect( - dbname=database, user=user, password=password, host=host, port=port - ) - return conn - - -def execute_query(query, instance="data"): - """ - Executes a SQL query and returns the result as a DataFrame. - - Parameters: - query (str): The SQL query to execute. - instance (str): The database instance to connect to. Default is "data". - - Returns: - pandas.DataFrame: The result of the query as a DataFrame - """ - - conn = get_connection(instance) - cur = conn.cursor() - - # Set to read-only mode - cur.execute("SET TRANSACTION READ ONLY;") - - print(f"Executing query: {query}") - - # Execute the query - cur.execute(query) - - # Fetch all the returned rows - rows = cur.fetchall() - - print(f"Query returned {len(rows)} rows") - - # Get column names - column_names = [desc[0] for desc in cur.description] - - # Close the cursor and connection - cur.close() - conn.close() - - # Convert rows to DataFrame - df = pd.DataFrame(rows, columns=column_names) - - return df - - -def connect_to_db(instance="recipe"): - """ - Connects to the specified database instance (RECIPE or DATA) DB and returns a connection object. - - Args: - instance (str): The name of the database instance to connect to. Defaults to "RECIPE". - - Returns: - sqlalchemy.engine.base.Engine: The connection object for the specified database instance. - """ - - instance = instance.upper() - - # Fallback for CLI running outside of docker - if not is_running_in_docker(): - os.environ[f"POSTGRES_{instance}_HOST"] = "localhost" - os.environ[f"POSTGRES_{instance}_PORT"] = "5433" - - host = os.getenv(f"POSTGRES_{instance}_HOST") - port = os.getenv(f"POSTGRES_{instance}_PORT") - database = os.getenv(f"POSTGRES_{instance}_DB") - user = os.getenv(f"POSTGRES_{instance}_USER") - password = os.getenv(f"POSTGRES_{instance}_PASSWORD") - conn_str = f"postgresql://{user}:{password}@{host}:{port}/{database}" - - # add an echo=True to see the SQL queries - conn = create_engine(conn_str) - return conn - - -async def get_data_info(): - """ - Get data info from the database. - - Returns: - str: The data info. - """ - - global data_info - - # run this query: select table_name, summary, columns from table_metadata - - query = """ - SELECT - table_name, - summary, - columns - FROM - table_metadata - --WHERE - -- countries is not null - """ - - data_info = await call_execute_query_api(query) - - return data_info diff --git a/tests/utils/general.py b/tests/utils/general.py deleted file mode 100644 index 0a41fb1a..00000000 --- a/tests/utils/general.py +++ /dev/null @@ -1,152 +0,0 @@ -import base64 -import json -import os -import re -import sys -import warnings - -import requests -from dotenv import load_dotenv - -load_dotenv() - -# Suppress all warnings -warnings.filterwarnings("ignore") - -execute_query_url = f"{os.getenv('RECIPE_SERVER_API')}execute_query" -get_memory_recipe_url = f"{os.getenv('RECIPE_SERVER_API')}get_memory_recipe" - -data_info = None - - -def replace_env_variables(value): - """ - Recursively replaces environment variable placeholders in a given value. - - Args: - value (dict, list, str): The value to process - - Returns: - The processed value with environment variable placeholders replaced. - - """ - if isinstance(value, dict): - return {k: replace_env_variables(v) for k, v in value.items()} - elif isinstance(value, list): - return [replace_env_variables(v) for v in value] - elif isinstance(value, str): - matches = re.findall(r"\{\{ (.+?) \}\}", value) - for match in matches: - if os.getenv(match) is None: - print(f"Environment variable {match} is not set.") - sys.exit(1) - - value = value.replace("{{ " + match + " }}", os.getenv(match)) - return value - else: - return value - - -def read_integration_config(integration_config_file): - """ - Read the APIs configuration from the integration config file. - - Args: - integration_config_file (str): The path to the integration config file. - - Returns: - apis (dict): A dictionary containing the API configurations. - field_map (dict): A dictionary containing the field mappings. - standard_names (dict): A dictionary containing the standard names. - data_node (str): The data node to use for the integration. - """ - with open(integration_config_file) as f: - print(f"Reading {integration_config_file}") - config = json.load(f) - config = replace_env_variables(config) - apis = config["openapi_interfaces"] - field_map = config["field_map"] - standard_names = config["standard_names"] - - return apis, field_map, standard_names - - -def is_running_in_docker(): - """ - Check if the code is running inside a Docker container. - - Returns: - bool: True if running inside a Docker container, False otherwise. - """ - return os.path.exists("/.dockerenv") - - -def make_api_request(url, payload): - """ - Makes an API request to the specified URL with the given payload. - - Args: - url (str): The URL to make the API request to. - payload (dict): The payload to send with the API request. - - Returns: - dict: The response from the API as a dictionary. - - Raises: - requests.exceptions.RequestException: If an error occurs while making the API request. - """ - headers = {"Content-Type": "application/json"} - print(f"API URL: {url}") - print(f"API Payload: {payload}") - response = requests.post(url, headers=headers, json=payload) - print(f"API Response Status Code: {response.status_code}") - response = response.content - print(f"API Response {response}") - return response - - -def call_execute_query_api(sql): - """ - Calls the execute query action API endpoint with the given SQL query. - - Args: - sql (str): The SQL query to execute. - - Returns: - dict: The response from the API. - - """ - data = {"query": f"{sql}"} - print(f"Calling execute query API {execute_query_url} with {sql} ...") - return make_api_request(execute_query_url, data) - - -def call_get_memory_recipe_api(user_input, history, generate_intent="true"): - """ - Calls the API to get a memory recipe action. - - Args: - user_input (str): The user input. - history (str): The chat history. - generate_intent (str): Whether to generate the intent. - - - Returns: - The API response from the make_api_request function. - """ - - data = { - "user_input": f"{user_input}", - "chat_history": history, - "generate_intent": "true", - } - print(f"Calling execute query API {get_memory_recipe_url} with {data} ...") - result = make_api_request(get_memory_recipe_url, data) - - if isinstance(result, bytes): - result = result.decode("utf-8") - - print("IN API CALL", result) - result = json.loads(result) - - return result diff --git a/tests/utils/llm.py b/tests/utils/llm.py deleted file mode 100644 index d2bb9dc7..00000000 --- a/tests/utils/llm.py +++ /dev/null @@ -1,213 +0,0 @@ -import base64 -import json -import os -import sys - -from dotenv import load_dotenv -from jinja2 import Environment, FileSystemLoader -from langchain.schema import HumanMessage, SystemMessage -from langchain_openai import ( - AzureChatOpenAI, - AzureOpenAIEmbeddings, - ChatOpenAI, - OpenAIEmbeddings, -) - -from utils.db import get_data_info - -load_dotenv() - -# Caps for LLM summarization of SQL output and number of rows in the output -llm_prompt_cap = 5000 -sql_rows_cap = 100 - -# Because CLI runs on host. TODO, make this more elegant. -template_dir = "../templates" -if not os.path.exists(template_dir): - template_dir = "./templates" -environment = Environment(loader=FileSystemLoader(template_dir)) -sql_prompt_template = environment.get_template("gen_sql_prompt.jinja2") - -chat = None -embedding_model = None - -data_info = None - - -def get_models(): - """ - Retrieves the embedding model and chat model based on the specified API type. - - Returns: - embedding_model: The embedding model used for text embeddings. - chat: The chat model used for generating responses. - - Raises: - SystemExit: If the specified API type is not supported. - """ - api_key = os.getenv("RECIPES_OPENAI_API_KEY") - base_url = os.getenv("RECIPES_BASE_URL") - api_version = os.getenv("RECIPES_OPENAI_API_VERSION") - api_type = os.getenv("RECIPES_OPENAI_API_TYPE") - completion_model = os.getenv("RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME") - model = os.getenv("RECIPES_MODEL") - temp = os.getenv("RECIPES_MODEL_TEMP") - max_tokens = os.getenv("RECIPES_MODEL_MAX_TOKENS") - - if api_type == "openai": - embedding_model = OpenAIEmbeddings( - api_key=api_key, - ) - chat = ChatOpenAI( - model_name=model, - api_key=api_key, - temperature=temp, - max_tokens=max_tokens, - ) - elif api_type == "azure": - embedding_model = AzureOpenAIEmbeddings( - api_key=api_key, - deployment=completion_model, - azure_endpoint=base_url, - chunk_size=16, - ) - chat = AzureChatOpenAI( - api_key=api_key, - api_version=api_version, - azure_endpoint=base_url, - model_name=model, - temperature=temp, - max_tokens=max_tokens, - ) - else: - print(f"OPENAI API type: {api_type} not supported") - sys.exit(1) - return embedding_model, chat - - -def call_llm(instructions, prompt, image=None): - """ - Call the LLM (Language Learning Model) API with the given instructions and prompt. - - Args: - instructions (str): The instructions to provide to the LLM API. - prompt (str): The prompt to provide to the LLM API. - chat (Langchain Open AI model): Chat model used for AI judging - - Returns: - dict or None: The response from the LLM API as a dictionary, or None if an error occurred. - """ - - global chat, embedding_model - if chat is None or embedding_model is None: - embedding_model, chat = get_models() - - human_message = HumanMessage(content=prompt) - - # Multimodal - if image: - if os.getenv("RECIPES_MODEL") == "gpt-4o": - print("Sending image to LLM ...") - with open(image, "rb") as image_file: - encoded_string = base64.b64encode(image_file.read()).decode() - - human_message = HumanMessage( - content=[ - {"type": "text", "text": prompt}, - { - "type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{encoded_string}" - }, - }, - ] - ) - else: - print("Multimodal not supported for this model") - return None - - try: - messages = [ - SystemMessage(content=instructions), - human_message, - ] - response = chat(messages) - - if hasattr(response, "content"): - response = response.content - - if "content" in response and not response.startswith("```"): - response = json.loads(response) - response = response["content"] - - # Some silly things that sometimes happen - response = response.replace(",}", "}") - - # Different models do different things when prompted for JSON. Here we try and handle this - try: - # Is it already JSON? - response = json.loads(response) - except json.decoder.JSONDecodeError: - # Did the LLM provide JSON in ```json```? - if "```json" in response: - # print("LLM responded with JSON in ```json```") - response = response.split("```json")[1] - response = response.replace("\n", "").split("```")[0] - response = json.loads(response) - elif "```python" in response: - # print("LLM responded with Python in ```python```") - all_sections = response.split("```python")[1] - code = all_sections.replace("\n", "").split("```")[0] - message = all_sections.split("```")[0] - response = {} - response["code"] = code - response["message"] = message - else: - # Finally just send it back - print("LLM response unparsable, using raw results") - print(response) - response = {"content": response} - return response - - except Exception as e: - # print(response) - print("Error calling LLM: ", e) - response = None - - -async def gen_sql(input, chat_history, output): - """ - Generate SQL query based on input, chat history, and output. - - Args: - input (str): The input for generating the SQL query. - chat_history (str): The chat history used for generating the SQL query. - output (str): The output of the SQL query. - - Returns: - str: The generated SQL query. - - Raises: - None - - """ - global data_info - - if data_info is None: - data_info = await get_data_info() - - prompt = sql_prompt_template.render( - input=input, - stdout_output=output, - stderr_output="", - data_info=data_info, - chat_history=chat_history, - ) - - response = call_llm("", prompt) - - query = response["code"] - - query = query.replace(";", "") + f" \nLIMIT {sql_rows_cap};" - - return query diff --git a/tests/utils/recipes.py b/tests/utils/recipes.py deleted file mode 100644 index 8978d208..00000000 --- a/tests/utils/recipes.py +++ /dev/null @@ -1,530 +0,0 @@ -import ast -import base64 -import io -import json -import logging -import os -import re -import subprocess -import sys -import uuid -import warnings -from pathlib import Path -from typing import List - -import matplotlib.pyplot as plt -import numpy as np -import psycopg2 -import requests -from dotenv import load_dotenv -from jinja2 import Environment, FileSystemLoader -from langchain.docstore.document import Document -from langchain_community.vectorstores.pgvector import PGVector -from PIL import Image - -from utils.db import execute_query -from utils.llm import call_llm, get_models - -environment = Environment(loader=FileSystemLoader("./templates/")) - -db = None - -warnings.filterwarnings("ignore") - -# Get the logger for 'httpx' -httpx_logger = logging.getLogger("httpx") - -# Set the logging level to WARNING to ignore INFO and DEBUG logs -httpx_logger.setLevel(logging.WARNING) - -load_dotenv() - -recipes_work_dir = "./recipes" - -# Lower numbers are more similar -similarity_cutoff = { - "memory": os.getenv("MEMORY_SIMILARITY_CUTOFF", 0.2), - "recipe": os.getenv("RECIPE_SIMILARITY_CUTOFF", 0.3), - "helper_function": os.getenv("HELPER_FUNCTION_SIMILARITY_CUTOFF", 0.1), -} - -conn_params = { - "RECIPES_OPENAI_API_TYPE": os.getenv("RECIPES_OPENAI_API_TYPE"), - "RECIPES_OPENAI_API_KEY": os.getenv("RECIPES_OPENAI_API_KEY"), - "RECIPES_OPENAI_API_ENDPOINT": os.getenv("RECIPES_OPENAI_API_ENDPOINT"), - "RECIPES_OPENAI_API_VERSION": os.getenv("RECIPES_OPENAI_API_VERSION"), - "RECIPES_BASE_URL": os.getenv("RECIPES_BASE_URL"), - "RECIPES_MODEL": os.getenv("RECIPES_MODEL"), - "RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME": os.getenv( - "RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME" - ), - "POSTGRES_DB": os.getenv("POSTGRES_RECIPE_DB"), - "POSTGRES_USER": os.getenv("POSTGRES_RECIPE_USER"), - "POSTGRES_HOST": os.getenv("POSTGRES_RECIPE_HOST"), - "POSTGRES_PORT": os.getenv("POSTGRES_RECIPE_PORT"), - "OPENAI_API_KEY": os.getenv("AZURE_API_KEY"), - "POSTGRES_PASSWORD": os.getenv("POSTGRES_RECIPE_PASSWORD"), -} - - -# Stored in langchain_pg_collection and langchain_pg_embedding as this -def initialize_vector_db(): - """ - Initializes the database by creating store tables if they don't exist and returns the initialized database. - - Returns: - dict: The initialized database with store tables for each memory type. - """ - - CONNECTION_STRING = PGVector.connection_string_from_db_params( - driver=os.environ.get("POSTGRES_DRIVER", "psycopg2"), - host=os.environ.get("POSTGRES_RECIPE_HOST", "localhost"), - port=int(os.environ.get("POSTGRES_RECIPE_PORT", "5432")), - database=os.environ.get("POSTGRES_RECIPE_DB", "postgres"), - user=os.environ.get("POSTGRES_RECIPE_USER", "postgres"), - password=os.environ.get("POSTGRES_RECIPE_PASSWORD", "postgres"), - ) - - db = {} - - embedding_model, chat = get_models() - - # This will create store tables if they don't exist - for mem_type in similarity_cutoff.keys(): - COLLECTION_NAME = f"{mem_type}_embedding" - db[mem_type] = PGVector( - collection_name=COLLECTION_NAME, - connection_string=CONNECTION_STRING, - embedding_function=embedding_model, - ) - - return db - - -def add_recipe_memory(intent, metadata, mem_type="recipe", force=False): - """ - Add a new memory document to the memory store. - - Parameters: - - intent (str): The content of the memory document. - - metadata (dict): Additional metadata for the memory document. - - mem_type (str): The type of memory store to add the document to. - - db (Database): The database object representing the memory store. - - force (bool, optional): If True, force the addition of the memory document even if a similar document already exists. Default is False. - - Returns: - - id (str): The ID of the added memory document. - """ - - global db - if db is None: - db = initialize_vector_db() - - # First see if we already have something in our memory - if force is False: - result_found, result = check_recipe_memory(intent, debug=False) - if result_found is True: - if ( - result["score"] is not None - and result["score"] < similarity_cutoff[mem_type] - ): - message = f"{mem_type} already exists: {result['content']}" - response = { - "already_exists": "true", - "message": message, - "custom_id": result["metadata"]["custom_id"], - } - return response - - print(f"Adding new document to {mem_type} store ...") - data = {} - data["page_content"] = intent - - uuid_str = str(uuid.uuid4()) - metadata["custom_id"] = uuid_str - - metadata["mem_type"] = mem_type - - # print(metadata) - - new_doc = Document(page_content=intent, metadata=metadata) - print(metadata) - id = db[mem_type].add_documents([new_doc], ids=[uuid_str]) - return id - - -def check_recipe_memory(intent, debug=True): - """ - Check the memory for a given intent. - - Args: - intent (str): The intent to search for in the memory. - debug (bool, optional): If True, print debug information. Default is True. - - Returns: - dict: A dictionary containing the score, content, and metadata of the best match found in the memory. - If no match is found, the dictionary values will be None. - """ - - global db - if db is None: - db = initialize_vector_db() - - # First do semantic search across memories and recipies - matches = [] - for mem_type in ["memory", "recipe"]: - if debug: - print(f"======= Checking {mem_type} for intent: {intent}") - docs = db[mem_type].similarity_search_with_score(intent, k=3) - for d in docs: - score = d[1] - content = d[0].page_content - metadata = d[0].metadata - if debug: - print("\n", f"Score: {score} ===> {content}") - - if d[1] < similarity_cutoff[mem_type]: - matches.append(d) - - r = {"score": None, "content": None, "metadata": None} - result_found = False - - # No matches, no point calling the AI - if len(matches) == 0: - return result_found, r - - # Build a list for the AI judge to review - match_list = "" - for i, d in enumerate(matches): - match_list += f"{i+1}. {d[0].page_content}\n" - - ai_memory_judge_prompt = environment.get_template("ai_memory_judge_prompt.jinja2") - prompt = ai_memory_judge_prompt.render( - user_input=intent, possible_matches=match_list - ) - print(prompt) - response = call_llm("", prompt) - print(response) - if "content" in response: - response = response["content"] - if isinstance(response, str): - response = json.loads(response) - if debug: - print("AI Judge of match: ", response, "\n") - if response["answer"].lower() == "yes": - print(" MATCH!") - match_id = response["match_id"] - d = matches[int(match_id) - 1] - score = d[1] - content = d[0].page_content - metadata = d[0].metadata - r["score"] = score - r["content"] = content - r["metadata"] = metadata - result_found = True - print(r) - return result_found, r - - -def get_memory_recipe_metadata(custom_id, mem_type): - """ - Get the metadata for a memory or recipe by Querying the database with the given custom ID. - - Args: - custom_id (str): The custom ID of the memory or recipe document to get the metadata for. - mem_type (str): The type of memory store to search in. Can be 'memory', 'recipe', or 'helper_function'. - - Returns: - dict: The table data of the memory or recipe document with the given custom ID. - """ - - # Execute a SQL query to get the table data for the given custom ID - query = f""" - SELECT - * - FROM - {mem_type} - WHERE - custom_id = '{custom_id}' - """ - print(query) - result = execute_query(query, "recipe") - - if result.shape[0] > 0: - result = result.iloc[0] - result = json.loads(result.to_json()) - return result - else: - raise ValueError( - f"No table data (memory/recipe) found for custom ID {custom_id}" - ) - - -def generate_intent_from_history(chat_history: str) -> dict: - """ - Generate the intent from the user query and chat history. - - Args: - chat_history (str): The chat history. - - Returns: - dict: The generated intent. - - """ - - # Load ninja template - generate_intent_from_history_prompt = environment.get_template( - "generate_intent_from_history_prompt.jinja2" - ) - - # Generate the intent from the chat history - prompt = generate_intent_from_history_prompt.render(chat_history=chat_history) - - intent = call_llm(instructions="", prompt=prompt) - # if not isinstance(intent, dict): - # intent = {"intent": intent} - print(f"Generated intent: {intent}") - return intent - - -def process_image(encoded_string, recipe_id): - """ - Takes a base64 encoded string of a picture, decodes it, and saves it as a PNG file. - - Args: - encoded_string (str): Base64 encoded string of the image. - recipe_id (str): The recipe ID to use in the image file name. - - Returns: - str: Full path to the saved image file. - """ - - print("A visual memory was found. Processing image...") - - # Decode the base64 string - image_data = base64.b64decode(encoded_string) - - # Convert binary data to image - image = Image.open(io.BytesIO(image_data)) - - # Create the full path for saving the image - full_path = os.path.join("./recipes/public/", f"memory_image_{recipe_id}.png") - - # Save the image - image.save(full_path, "PNG") - - print("Image processed and saved successfully.") - - return full_path - - -# TODO recfactor all these functions into simpler single function. Tech debt left -# over from Robocorp rapid prototype -def process_memory_recipe_results(result: dict, table_data: dict) -> str: - """ - Processes the results of a memory recipe search and returns the response text and metadata. - - Args: - result (dict): The result of the memory recipe search. - table_data (dict): The data from the memory or recipe tables. - - Returns: - """ - - mem_type = result["metadata"]["mem_type"] - custom_id = result["metadata"]["custom_id"] - print(result) - content = result["content"] - table_data = get_memory_recipe_metadata(custom_id, mem_type) - recipe_id = table_data["custom_id"] - if "result_metadata" in table_data: - metadata = table_data["result_metadata"] - else: - metadata = table_data["sample_result_metadata"] - if metadata is None: - metadata = "" - - print(f"====> Found {mem_type}") - if table_data["result_type"] == "image": - image = table_data["result"] - process_image(image.replace("data:image/png;base64,", ""), recipe_id) - file = f"{os.getenv('IMAGE_HOST')}/memory_image_{recipe_id}.png" - result = {"type": "image", "file": file, "value": ""} - else: - # TODO 'result' in memory should be a JSON, just like sample_result - # in recipe, then this goes away - if mem_type == "recipe": - result = json.loads(table_data["result"]) - result = json.loads(result) - else: - print("Memory, skipping result json extraction") - result = json.loads(table_data["result"]) - result = result["result"] - print(result) - - print("Recipe ID: ", recipe_id, "Intent: ", content) - - return {"result": result, "metadata": metadata} - - -# TODO Absolutely needs to be converted to registered functions -def run_recipe(custom_id: str, recipe: dict, user_input, chat_history): - """ - Runs a recipe based on the result of a memory recipe search. - - Args: - custom_id (str): The custom ID of the recipe. - recipe(dict): The recipe details - user_input (str): The user input. - chat_history (str): The chat history. - - """ - - print("Attempting to run recipe...") - print(f"Recipe Custom ID: {custom_id}") - - function_code = recipe["function_code"] - - # TODO this should really use the new openapi_json field, - # but that's blank currently, will come back to it. - calling_code = recipe["sample_call"] - - recipe_run_python_prompt = environment.get_template( - "recipe_run_python_prompt.jinja2" - ) - prompt = recipe_run_python_prompt.render( - recipe_code=function_code, - calling_code=calling_code, - user_input=user_input, - chat_history=chat_history, - ) - print("Calling LLM to generate new run code ...") - new_code = call_llm("", prompt, None) - print(new_code) - - result = { - "output": "", - "errors": "", - "metadata": "", - } - - if "new_calling_code" in new_code: - calling_code = new_code["new_calling_code"] - print("New calling code generated ...") - - # Combine function and calling code into a string - code = function_code + "\n\n" + "if __name__ == '__main__':\n\n" - - # Write calling code to code, indented by 4 - code += " " + calling_code.replace("\n", "\n ") + "\n" - - # Make recipes folder if it doesn't exist - if not os.path.exists(recipes_work_dir): - print(f"Creating recipes directory ... {recipes_work_dir}") - os.makedirs(recipes_work_dir) - # Copy skills.py into directory use shutil - print("Copying skills.py into directory ...") - - # Adjust .env location in code - code = code.replace("load_dotenv()", "load_dotenv('../.env')") - - # Adjust any images saved in code - code = re.sub(r"./work/(.*?\.png)", r"/app/recipes/public/\1", code) - - # Adjust path - code = ( - "import os\nimport sys\n# Add parent folder to path\nsys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))\n" - + code - ) - - recipe_path = f"{recipes_work_dir}/{custom_id}.py" - with open(recipe_path, "w") as f: - print(f"Writing recipe to file ... {recipe_path}") - f.write(code) - - os.chdir(recipes_work_dir) - cmd = f"python {custom_id}.py" - run_output = subprocess.run(cmd, shell=True, capture_output=True, text=True) - - result["output"] = run_output.stdout - result["errors"] = run_output.stderr - - # TODO this exists here as well as in cli recipes_sync.py, merge - if "OUTPUT:" in result["output"]: - result["output"] = result["output"].split("OUTPUT:")[1] - print(result["output"]) - result = json.loads(result["output"]) - print("Recipe executed successfully.") - else: - print(result["output"]) - print(result["errors"]) - result["result"] = { - "type": "text", - "file": "", - "value": "Recipe produced no output", - } - result["metadata"] = { - "params": {}, - "attribution": "", - "data_url": "", - "time_period": {"start": "", "end": ""}, - } - - print(result) - return result - - -def get_memory_recipe(user_input, chat_history, generate_intent="true") -> str: - """ - Performs a search in the memory for a given intent and returns the best match found. - - Args: - user_input (str): The user input to search for in the memory. - chat_history (str): The chat history. - generate_intent (str): A flag to indicate whether to generate the intent from the chat history. - - Returns: - str: Matched value - str: metadata - """ - - logging.info("Python HTTP trigger function processed a request.") - # Retrieve the CSV file from the request - - # Generate intent from chat history if generate_intent is true - if generate_intent is not None and generate_intent == "true": - print("********* Generating intent from chat history ...") - print("Chat history: ", chat_history) - user_input = generate_intent_from_history(chat_history) - print("Generated intent: ", user_input) - user_input = user_input["intent"] - - print("Checking my memories ...") - memory_found, result = check_recipe_memory(user_input, debug=True) - if memory_found is True: - custom_id = result["metadata"]["custom_id"] - mem_type = result["metadata"]["mem_type"] - matched_doc = result["content"] - # Get data from memory or recipe tables - table_data = get_memory_recipe_metadata(custom_id, mem_type) - if mem_type == "recipe": - result = run_recipe(custom_id, table_data, user_input, chat_history) - else: - # Take the result directly from memory - result = process_memory_recipe_results(result, table_data) - - print(result) - result["memory_type"] = mem_type - result["memory"] = matched_doc - result["memory_found"] = "true" - - result_string = json.dumps(result, indent=4) - - print(result_string) - return result_string - - result = {"result": "Sorry, no recipe or memory found", "memory_found": "false"} - result = json.dumps(result, indent=4) - print(result) - - return result From c3a6c3b96af84202fe8acd237ad78859ae303a7a Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 16:02:11 -0400 Subject: [PATCH 12/19] Adjust copying of files --- .github/workflows/get_memory_test.yml | 3 ++- server/fastapi/Dockerfile | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml index 6aa9a0d5..9cc5bb0b 100644 --- a/.github/workflows/get_memory_test.yml +++ b/.github/workflows/get_memory_test.yml @@ -95,7 +95,8 @@ jobs: - run: | cd tests/ - cp -r ../utils/ . + ls + ls ./utils echo "Running tests ..." pytest diff --git a/server/fastapi/Dockerfile b/server/fastapi/Dockerfile index c1724a7f..be8d6055 100644 --- a/server/fastapi/Dockerfile +++ b/server/fastapi/Dockerfile @@ -14,7 +14,7 @@ COPY ./templates /app/templates COPY ./utils /app/utils COPY ./management/skills.py /app/recipes/skills.py COPY requirements.txt /app -COPY ./utils /app/tests/utils +COPY ../../utils /app/tests/utils # Install any needed packages specified in requirements.txt. RUN pip install --upgrade pip From b524d0d12c07a24434585323c7010f438fbe125a Mon Sep 17 00:00:00 2001 From: JanPeterDatakind <117680352+JanPeterDatakind@users.noreply.github.com> Date: Fri, 5 Jul 2024 16:05:48 -0400 Subject: [PATCH 13/19] Update get_memory_test.yml --- .github/workflows/get_memory_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml index 9cc5bb0b..07abfb28 100644 --- a/.github/workflows/get_memory_test.yml +++ b/.github/workflows/get_memory_test.yml @@ -96,8 +96,8 @@ jobs: - run: | cd tests/ ls - ls ./utils + ls ./utils/ echo "Running tests ..." pytest - \ No newline at end of file + From 5f9c8c9540a1b05693e7132c7fa992d086225b4b Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 16:19:48 -0400 Subject: [PATCH 14/19] trying to fix copying of folders --- .github/workflows/get_memory_test.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml index 07abfb28..1b671a46 100644 --- a/.github/workflows/get_memory_test.yml +++ b/.github/workflows/get_memory_test.yml @@ -94,10 +94,11 @@ jobs: pip3 install python-dotenv==1.0.1 - run: | + echo "Preparing folder structure" cd tests/ - ls - ls ./utils/ + mkdir -p ./utils + cp ../utils/* ./utils echo "Running tests ..." pytest - + \ No newline at end of file From ad3df89b79efc0c664744eae86c81b5d383f75c6 Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 16:38:23 -0400 Subject: [PATCH 15/19] Using docker exec for get memory test instead --- .github/workflows/get_memory_test.yml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml index 1b671a46..0d01343f 100644 --- a/.github/workflows/get_memory_test.yml +++ b/.github/workflows/get_memory_test.yml @@ -94,11 +94,9 @@ jobs: pip3 install python-dotenv==1.0.1 - run: | - echo "Preparing folder structure" - cd tests/ - mkdir -p ./utils - cp ../utils/* ./utils - echo "Running tests ..." - pytest - + echo "exec into container ..." + docker exec -it recipes-ai-server /bin/bash + cd ./app/tests + ls + pytest \ No newline at end of file From 83c8178db8988d4067f7c3454290b3f6e48e28d9 Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 16:45:04 -0400 Subject: [PATCH 16/19] docker exec -it doesn't work in GitHub environment --- .github/workflows/get_memory_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml index 0d01343f..6c0975a5 100644 --- a/.github/workflows/get_memory_test.yml +++ b/.github/workflows/get_memory_test.yml @@ -95,7 +95,7 @@ jobs: - run: | echo "exec into container ..." - docker exec -it recipes-ai-server /bin/bash + docker exec recipes-ai-server /bin/bash cd ./app/tests ls pytest From ee21f6d9bb28e8b846595305d09cb4c3df44eb6b Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 16:57:54 -0400 Subject: [PATCH 17/19] Streamlining exec approach --- .github/workflows/get_memory_test.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml index 6c0975a5..72c57c57 100644 --- a/.github/workflows/get_memory_test.yml +++ b/.github/workflows/get_memory_test.yml @@ -95,8 +95,5 @@ jobs: - run: | echo "exec into container ..." - docker exec recipes-ai-server /bin/bash - cd ./app/tests - ls - pytest + docker exec recipes-ai-server bash -c "cd tests/ && pytest" \ No newline at end of file From db9bb993fc14a5eed6287e0ec0fa5b7749507da2 Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 19:10:14 -0400 Subject: [PATCH 18/19] Tidying up a bit --- .github/workflows/get_memory_test.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml index 72c57c57..04a680a5 100644 --- a/.github/workflows/get_memory_test.yml +++ b/.github/workflows/get_memory_test.yml @@ -74,8 +74,6 @@ jobs: cd data && python3 download_demo_data.py && cd .. ls data/datadb - # TODO this should be enhanced to use a buildx bake to leverage layer caching for faster builds, or push to repo and simply have a pull for the run - # TODO docker-compose files should be refactored to use scopes instead of different versions for each environment echo "Starting docker containers for dbs and server ..." docker-compose -f ./docker-compose-github.yml pull docker-compose -f ./docker-compose-github.yml up -d --build @@ -83,7 +81,6 @@ jobs: docker-compose -f docker-compose-github.yml logs datadb docker ps - # TODO The promptflow docker build wasn't working in GH actions, so deploying promptflow to host for now - name: Run tests uses: actions/setup-python@v4 with: From 489dc4cd61a36e10695fda9e7328eed03b12bf4b Mon Sep 17 00:00:00 2001 From: JanPeterDatakind Date: Fri, 5 Jul 2024 19:25:31 -0400 Subject: [PATCH 19/19] Testing pre-commit hooks to avoid lint fail --- tests/test_get_memory.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) mode change 100644 => 100755 tests/test_get_memory.py diff --git a/tests/test_get_memory.py b/tests/test_get_memory.py old mode 100644 new mode 100755 index 86805da9..94670e60 --- a/tests/test_get_memory.py +++ b/tests/test_get_memory.py @@ -1,24 +1,30 @@ -from utils.general import call_execute_query_api, call_get_memory_recipe_api import json + import pytest -#load json file into variable and print it +from utils.general import call_execute_query_api, call_get_memory_recipe_api + + +# load json file into variable and print it @pytest.fixture def get_test_cases(): - with open('test_cases_get_memory.json') as f: + """ + Loads test cases from test_cases_get_memory.json. + """ + with open("test_cases_get_memory.json") as f: test_data = json.load(f) return test_data + def test_get_memory_recipe(get_test_cases): """ Tests the get memory recipe API endpoint. """ - - for test in get_test_cases.get('tests', []): + + for test in get_test_cases.get("tests", []): user_input = test["user_input"] chat_history = test["chat_history"] generate_intent = test["generate_intent"] expected_output = test["expected_output"] response = call_get_memory_recipe_api(user_input, chat_history, generate_intent) assert response == expected_output -