datakind · JanPeterDatakind · Jun 29, 2024 · Jun 25, 2024 · Jun 26, 2024 · Jun 27, 2024
diff --git a/README.md b/README.md
@@ -199,6 +199,29 @@ To activate:
 6. Go to playground and start a new session, select the 'Recipes data Analysis' workflow
 7. Ask 'What is the total population of Mali?'
 
+# Evaluation with Prompt Flow
+
+First, you will need to build the environment to include Prompt Flow ...
+
+`docker compose -f docker-compose.yml -f docker-compose-dev.yml up -d --build`
+
+Then ...
+
+1. Install the DevContainers VSCode extension 
+2. Build data recipes using the `docker compose` command mentioned above
+3. Open the command palette in VSCode (CMD + Shift + P on Mac; CTRL + Shift + P on Windows) and select 
+
+   `Dev Containers: Attach to remote container`. 
+
+   Select the promptflow container. This opens a new VSCode window - use it for the next steps.
+4. Install Promptflow add-in
+5. Open folder `/app`
+6. Click on `flow.dag.yaml`
+7. Top left of main pane, click on 'Visual editor'
+8. On bottom left under connections, configure an Azure OpenAI connection called 'azure_openai'
+9. On the Groundedness node, select your new connection
+10. You can no run by clicking the play icon. See Promptflow documentation for more details
+
 # Deployment
 
 We will add more details here soon, for now, here are some notes on Azure ...

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
@@ -0,0 +1,21 @@
+#version: "3.4"
+
+services:
+  promptflow:
+    #image: mcr.microsoft.com/azureml/promptflow/promptflow-runtime-stable:latest
+    build:
+      context: .
+      dockerfile: ./flows/chainlit-ui-evaluation//Dockerfile
+    container_name: recipes-ai-promptflow
+    env_file:
+          - .env
+    volumes:
+      - ./flows:/app
+      - ./utils:/app/chainlit-ui-evaluation/utils
+      - ./templates:/app/chainlit-ui-evaluation/templates
+      - shared-data:/app/chainlit-ui-evaluation/recipes/public
+      - ./management/skills.py:/app/chainlit-ui-evaluation/recipes/skills.py
+      - ./ui/chat-chainlit-assistant/app.py:/app/chainlit-ui-evaluation/app.py
+volumes:
+  pgdata2:
+  shared-data:
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -140,7 +140,6 @@ services:
       - ./utils:/app/utils
       - ./templates:/app/templates
       - ./db/recipedb:/app/db
-
 volumes:
   pgdata2:
   shared-data:
diff --git a/flows/chainlit-ui-evaluation/Dockerfile b/flows/chainlit-ui-evaluation/Dockerfile
@@ -0,0 +1,6 @@
+FROM mcr.microsoft.com/azureml/promptflow/promptflow-runtime-stable:latest
+
+# No need to copy the app code, we mount via docker-compose-dev.yml
+
+RUN pip3 install --upgrade pip
+RUN pip3 install chainlit==1.1.305
diff --git a/flows/chainlit-ui-evaluation/aggregate_variant_results.py b/flows/chainlit-ui-evaluation/aggregate_variant_results.py
@@ -0,0 +1,38 @@
+from typing import List
+
+import numpy as np
+from promptflow import log_metric, tool
+
+
+@tool
+def aggregate_variants_results(results: List[dict]):
+    """
+    Aggregate the results of multiple variants.
+
+    Args:
+        results (List[dict]): A list of dictionaries containing the results for each variant.
+
+    Returns:
+        dict: A dictionary containing the aggregated results, with the metric names as keys and the aggregated values as values.
+    """
+    aggregate_results = {}
+    for result in results:
+        for name, value in result.items():
+            if name not in aggregate_results.keys():
+                aggregate_results[name] = []
+            try:
+                float_val = float(value)
+            except Exception:
+                float_val = np.nan
+            aggregate_results[name].append(float_val)
+
+    for name, value in aggregate_results.items():
+        metric_name = name
+        aggregate_results[name] = np.nanmean(value)
+        if "pass_rate" in metric_name:
+            metric_name = metric_name + "(%)"
+            aggregate_results[name] = aggregate_results[name] * 100.0
+        aggregate_results[name] = round(aggregate_results[name], 2)
+        log_metric(metric_name, aggregate_results[name])
+
+    return aggregate_results
diff --git a/flows/chainlit-ui-evaluation/azure_openai.yaml b/flows/chainlit-ui-evaluation/azure_openai.yaml
@@ -0,0 +1,6 @@
+$schema: https://azuremlschemas.azureedge.net/promptflow/latest/AzureOpenAIConnection.schema.json
+name: open_ai_connection
+type: azure_open_ai
+api_key: "<user-input>"
+api_base: "<user-input>"
+api_type: "azure"