Skip to content

Commit

Permalink
Merge pull request #44 from datakind/feat/recipe-runner
Browse files Browse the repository at this point in the history
Tactical fixes for the approaching demo ...

Recipe code execution for different contexts
HDX attribution links
Update system prompt
Here is a little video demo:

Data Recipes Demo

Not terribly stable, work to follow.
  • Loading branch information
dividor authored Jun 7, 2024
2 parents bf78594 + bd9561e commit 59d7b4e
Show file tree
Hide file tree
Showing 10 changed files with 578 additions and 325 deletions.
40 changes: 27 additions & 13 deletions actions/actions_plugins/recipe-server/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import logging
import os
import re
import subprocess
import sys
from functools import lru_cache
Expand Down Expand Up @@ -166,24 +167,34 @@ def run_recipe(custom_id: str, recipe: dict, user_input, chat_history):
with open(recipe_path, "w") as f:
f.write(code)

cmd = f"python {recipe_path}"
os.chdir(recipes_work_dir)
cmd = f"python {custom_id}.py"
run_output = subprocess.run(cmd, shell=True, capture_output=True, text=True)

result["output"] = run_output.stdout
result["errors"] = run_output.stderr
result["attribution"] = "Data was sroued from HDX"
# result["errors"] = run_output.stderr

# Run the recipe here
# exec(recipe)
# TODO - this is terrible, just for the demo, extract JSON between "{" and "}""
# Match { }
if result["output"].find("{") != -1:
result["output"] = result["output"][result["output"].find("{") :]
result["output"] = result["output"][: result["output"].rfind("}") + 1]
print("Output: ", result["output"])
j = json.loads(result["output"].replace("'", '"'))
attribution = j["attribution"]
else:
attribution = "Data was sourced from HDX"

result["attribution"] = attribution

print("Recipe executed successfully.")
print(result)
return result
return run_output + " >> ATTRIBUTION: " + attribution


@lru_cache(maxsize=100)
# @lru_cache(maxsize=100)
@action()
def get_memory_recipe(user_input, chat_history, generate_intent=True) -> str:
def get_memory_recipe(user_input, chat_history, generate_intent="true") -> str:
"""
Performs a search in the memory for a given intent and returns the best match found.
Expand All @@ -200,9 +211,9 @@ def get_memory_recipe(user_input, chat_history, generate_intent=True) -> str:
logging.info("Python HTTP trigger function processed a request.")
# Retrieve the CSV file from the request

generate_intent = False
generate_intent = "false"

if generate_intent is not None and generate_intent is True:
if generate_intent is not None and generate_intent == "true":
# chat history is passed from promptflow as a string representation of a list and this has to be converted back to a list for the intent generation to work!
history_list = ast.literal_eval(chat_history)
history_list.append({"inputs": {"question": user_input}})
Expand All @@ -220,12 +231,14 @@ def get_memory_recipe(user_input, chat_history, generate_intent=True) -> str:
# Get data from memory or recipe tables
table_data = get_memory_recipe_metadata(custom_id, mem_type)
if mem_type == "recipe":
# Run the recipe
result = run_recipe(custom_id, table_data, user_input, chat_history)
else:
# Take the result directly from memory
result = process_memory_recipe_results(result, table_data)
print(result)

result = re.escape(str(result))
print(result)

return str(result)

result = "Sorry, no recipe or found"
Expand All @@ -239,7 +252,8 @@ def get_memory_recipe(user_input, chat_history, generate_intent=True) -> str:
# query = "What's the total population of AFG"
# query = "what's the population of Mali"
# query = "what recipes do you have"
query = "Create a chart that demonstrates the number of organizations working in Sila within each sector"
# query = "Create a chart that demonstrates the number of organizations working in Sila within each sector"
query = "plot a map showing food security in IPC Phase 3 across regions in Chad"
# history = str(
# [
# {
Expand Down
1 change: 1 addition & 0 deletions actions/actions_plugins/recipe-server/package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ dependencies:
- pandas=2.2.2
- seaborn=0.13.2
- geopandas=0.10.2
- hdx_python_api=6.2.4
4 changes: 2 additions & 2 deletions assistants/plugin_assistants/GPT-4o_Assistant.json

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions db/recipedb/2-demo-data-langchain.sql

Large diffs are not rendered by default.

777 changes: 483 additions & 294 deletions db/recipedb/3-demo-data-recipes.sql

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions db/recipedb/4-demo-data-memories.sql

Large diffs are not rendered by default.

26 changes: 19 additions & 7 deletions recipes-management/recipe_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -1107,8 +1107,8 @@ def update_metadata_file_results(recipe_folder, result):

# See if result.stdout is a JSON file, if so extract "file"
try:
result = json.loads(str(result.stdout))
png_file = result["file"]
result_json = json.loads(str(result.stdout))
png_file = result_json["file"]
except json.JSONDecodeError:
print("Extract png file location from stdout")
png_file = re.search(r"(\w+\.png)", result.stdout).group(1)
Expand Down Expand Up @@ -1140,9 +1140,9 @@ def update_metadata_file_results(recipe_folder, result):
"image_validation_prompt.jinja2"
)
prompt = image_validation_prompt.render(user_input=metadata["intent"])
result = call_llm("", prompt, image=png_file_path)
if "answer" in result:
if result["answer"] == "yes":
llm_result = call_llm("", prompt, image=png_file_path)
if "answer" in llm_result:
if llm_result["answer"] == "yes":
print("Image validation passed")
else:
print(
Expand All @@ -1154,6 +1154,15 @@ def update_metadata_file_results(recipe_folder, result):
metadata["sample_result"] = result.stdout
metadata["sample_result_type"] = "text"

# Is there an attribution
if "attribution" in result.stdout:
print(result.stdout)
attribution = re.search(r"'attribution': (.*)\}", result.stdout).group(1)
attribution = attribution.replace("'", "")
metadata["sample_attribution"] = attribution
else:
metadata["sample_attribution"] = ""

with open(metadata_path, "w") as file:
json.dump(metadata, file, indent=4)

Expand Down Expand Up @@ -1367,7 +1376,8 @@ def save_as_memory(recipe_folder):
source,
created_by,
updated_by,
last_updated
last_updated,
attribution
)
VALUES (
:custom_id,
Expand All @@ -1378,7 +1388,8 @@ def save_as_memory(recipe_folder):
:source,
:created_by,
:updated_by,
NOW()
NOW(),
:attribution
)
"""
)
Expand All @@ -1393,6 +1404,7 @@ def save_as_memory(recipe_folder):
"source": "Recipe sample result",
"created_by": metadata["created_by"],
"updated_by": metadata["created_by"],
"attribution": metadata["sample_attribution"],
}
conn.execute(query_template, params)

Expand Down
1 change: 1 addition & 0 deletions recipes-management/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ langchain-community==0.2.1
matplotlib==3.9.0
geopandas==0.10.2
seaborn==0.13.2
hdx_python_api==6.2.4

35 changes: 35 additions & 0 deletions recipes-management/skills.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import psycopg2
import requests
from dotenv import load_dotenv
from hdx.api.configuration import Configuration
from hdx.data.resource import Resource

# This is copied or mounted into Docker image
from utils import *
Expand All @@ -28,6 +30,39 @@
load_dotenv()


def get_hdx_dataset_url(resource_id):
"""
Retrieves the dataset URL based on the given resource ID.
Args:
resource_id (str): The ID of the resource.
Returns:
str: The dataset URL.
Raises:
Exception: If the resource cannot be fetched or the dataset ID cannot be obtained.
"""

try:
Configuration.create(hdx_site='prod', user_agent='Data Recipes AI', hdx_read_only=True)
except Exception:
print('HDX already activated')

print(resource_id)

# Fetch the resource
resource = Resource.read_from_hdx(resource_id)

# Get the dataset ID
dataset_id = resource['package_id']

# Construct the dataset URL
dataset_url = f'https://data.humdata.org/dataset/{dataset_id}'

return dataset_url


def get_connection():
"""
This function gets a connection to the database
Expand Down
2 changes: 1 addition & 1 deletion utils/recipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
load_dotenv()

# Lower numbers are more similar
similarity_cutoff = {"memory": 0.2, "recipe": 0.3, "helper_function": 0.2}
similarity_cutoff = {"memory": 0.3, "recipe": 0.3, "helper_function": 0.2}

conn_params = {
"RECIPES_OPENAI_API_TYPE": os.getenv("RECIPES_OPENAI_API_TYPE"),
Expand Down

0 comments on commit 59d7b4e

Please sign in to comment.