From 29dc28e039a83362c7844b3389efbb741459d45d Mon Sep 17 00:00:00 2001
From: ZanzyTHEbar <pyr0ndet0s97@gmail.com>
Date: Tue, 18 Jun 2024 13:31:17 +0100
Subject: [PATCH 01/12] fix: dev scripts

- setup scripts to support ZSH and linux oob

- fix some minor bugs related to potential shell interpretation issues
---
 app/cli/dev.sh | 11 ++++++-----
 dev.sh         | 24 ++++++++++++++++++++++--
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/app/cli/dev.sh b/app/cli/dev.sh
index 07a9155a..d0a767c3 100755
--- a/app/cli/dev.sh
+++ b/app/cli/dev.sh
@@ -4,8 +4,9 @@ OUT="${PLANDEX_DEV_CLI_OUT_DIR:-/usr/local/bin}"
 NAME="${PLANDEX_DEV_CLI_NAME:-plandex-dev}"
 ALIAS="${PLANDEX_DEV_CLI_ALIAS:-pdxd}"
 
-go build -o $NAME && \
-rm -f $OUT/$NAME && \
-cp $NAME $OUT/$NAME && \
-ln -sf $OUT/$NAME $OUT/$ALIAS && \
-echo built $NAME cli and added $ALIAS alias to $OUT
+# Double quote to prevent globbing and word splitting.
+sudo go build -o "$NAME" &&
+    sudo rm -f "$OUT"/"$NAME" &&
+    sudo cp "$NAME" "$OUT"/"$NAME" &&
+    sudo ln -sf "$OUT"/"$NAME" "$OUT"/"$ALIAS" &&
+    echo built "$NAME" cli and added "$ALIAS" alias to "$OUT"
diff --git a/dev.sh b/dev.sh
index 6e21caf3..2cd8259e 100755
--- a/dev.sh
+++ b/dev.sh
@@ -1,5 +1,24 @@
 #!/bin/bash
 
+# Detect zsh and trigger it if its the shell
+if [ -n "$ZSH_VERSION" ]; then
+  # shell is zsh
+  zsh -c "source ~/.zshrc && $*"
+fi
+
+# Detect if reflex is installed and install it if not
+if ! [ -x "$(command -v reflex)" ]; then
+  echo 'Error: reflex is not installed. Installing it now...' >&2
+
+  # Check if the $GOPATH is empty
+  if [ -z "$GOPATH" ]; then
+    echo "Error: $GOPATH is not set. Please set it to continue..." >&2
+    exit 1
+  fi
+
+  go get -u github.com/cespare/reflex
+fi
+
 terminate() {
   pkill -f 'plandex-server' # Assuming plandex-server is the name of your process
   kill -TERM "$pid1" 2>/dev/null
@@ -8,7 +27,8 @@ terminate() {
 
 trap terminate SIGTERM SIGINT
 
-cd app
+# Incase cd fails, exit the script
+cd app || exit 1
 
 (cd cli && ./dev.sh)
 
@@ -19,4 +39,4 @@ reflex -r '^(server|shared)/.*\.(go|mod|sum)$' -s -- sh -c 'cd server && go buil
 pid2=$!
 
 wait $pid1
-wait $pid2
\ No newline at end of file
+wait $pid2

From 9c6310771dc638204334751cd1cb4043200b6319 Mon Sep 17 00:00:00 2001
From: ZanzyTHEbar <pyr0ndet0s97@gmail.com>
Date: Tue, 18 Jun 2024 22:13:08 +0100
Subject: [PATCH 02/12] build: begin make impl

- setup barebones make file
- setup automation of promptfoo provider generation
- create go script for rendering promptfoo provider configs
---
 Makefile                                      | 46 +++++++++
 dev.sh => app/scripts/dev.sh                  |  8 +-
 app/scripts/render_config.go                  | 99 +++++++++++++++++++
 .../evals/promptfoo-poc/fix/config.properties |  7 ++
 .../promptfoo-poc/fix/fix.parameters.json     | 78 +++++++++++++++
 test/evals/promptfoo-poc/fix/fix.provider.yml | 82 +++++++++++++++
 .../promptfoo-poc/fix/promptfooconfig.yaml    | 93 +----------------
 .../templates/provider.template.yml           | 14 +++
 .../promptfoo-poc/verify/config.properties    |  7 ++
 .../promptfoo-poc/verify/promptfooconfig.yaml | 91 +----------------
 .../verify/verify.parameters.json             | 74 ++++++++++++++
 .../promptfoo-poc/verify/verify.provider.yml  | 73 ++++++++++++++
 12 files changed, 488 insertions(+), 184 deletions(-)
 create mode 100644 Makefile
 rename dev.sh => app/scripts/dev.sh (92%)
 create mode 100644 app/scripts/render_config.go
 create mode 100644 test/evals/promptfoo-poc/fix/config.properties
 create mode 100644 test/evals/promptfoo-poc/fix/fix.parameters.json
 create mode 100644 test/evals/promptfoo-poc/fix/fix.provider.yml
 create mode 100644 test/evals/promptfoo-poc/templates/provider.template.yml
 create mode 100644 test/evals/promptfoo-poc/verify/config.properties
 create mode 100644 test/evals/promptfoo-poc/verify/verify.parameters.json
 create mode 100644 test/evals/promptfoo-poc/verify/verify.provider.yml

diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..efcd9d29
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,46 @@
+# Go parameters
+GOCMD = go
+GOBUILD = $(GOCMD) build
+GOCLEAN = $(GOCMD) clean
+GOTEST = $(GOCMD) test
+GOGET = $(GOCMD) get
+
+# Main package name
+MAIN_PACKAGE = main
+
+# Output binary name
+BINARY_NAME = plandex
+
+# Check the PLANDEX_ENVIRONMENT environment variable, reassign the BINARY_NAME if necessary
+ifeq ($(PLANDEX_ENVIRONMENT),development)
+BINARY_NAME = plandex-dev
+endif
+
+# create a dev cmd that runs a shell script
+dev:
+	@cd app/scripts && ./dev.sh
+
+# Build target
+build:
+	@$(GOBUILD) -o $(BINARY_NAME) -v $(MAIN_PACKAGE)
+
+# Clean target
+clean:
+	@$(GOCLEAN)
+	@rm -f $(BINARY_NAME)
+
+# Test target
+test: render
+	@$(GOTEST) -v ./...
+
+render:
+	@cd app/scripts && go run render_config.go
+
+# Get dependencies
+deps:
+	$(GOGET) -v ./...
+
+# Default target
+default: build
+
+.PHONY: all render build clean test deps
\ No newline at end of file
diff --git a/dev.sh b/app/scripts/dev.sh
similarity index 92%
rename from dev.sh
rename to app/scripts/dev.sh
index 2cd8259e..870bed11 100755
--- a/dev.sh
+++ b/app/scripts/dev.sh
@@ -3,12 +3,12 @@
 # Detect zsh and trigger it if its the shell
 if [ -n "$ZSH_VERSION" ]; then
   # shell is zsh
+  echo "Detected zsh"
   zsh -c "source ~/.zshrc && $*"
 fi
 
 # Detect if reflex is installed and install it if not
 if ! [ -x "$(command -v reflex)" ]; then
-  echo 'Error: reflex is not installed. Installing it now...' >&2
 
   # Check if the $GOPATH is empty
   if [ -z "$GOPATH" ]; then
@@ -16,6 +16,7 @@ if ! [ -x "$(command -v reflex)" ]; then
     exit 1
   fi
 
+  echo 'Error: reflex is not installed. Installing it now...' >&2
   go get -u github.com/cespare/reflex
 fi
 
@@ -27,10 +28,7 @@ terminate() {
 
 trap terminate SIGTERM SIGINT
 
-# Incase cd fails, exit the script
-cd app || exit 1
-
-(cd cli && ./dev.sh)
+(cd .. && cd cli && ./dev.sh)
 
 reflex -r '^(cli|shared)/.*\.(go|mod|sum)$' -- sh -c 'cd cli && ./dev.sh' &
 pid1=$!
diff --git a/app/scripts/render_config.go b/app/scripts/render_config.go
new file mode 100644
index 00000000..e40653ff
--- /dev/null
+++ b/app/scripts/render_config.go
@@ -0,0 +1,99 @@
+package main
+
+import (
+	"encoding/json"
+	"log"
+	"os"
+	"path/filepath"
+	"strings"
+	"text/template"
+)
+
+var testDir = "../../test/evals/promptfoo-poc"
+var templFile = testDir + "/templates/" + "/provider.template.yml"
+
+func main() {
+
+	testAbsPath, _ := filepath.Abs(testDir)
+	templAbsPath, _ := filepath.Abs(templFile)
+
+	// Function to walk through directories and find required values
+	err := filepath.Walk(testAbsPath, func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+		if !info.IsDir() && filepath.Ext(path) == ".properties" {
+			dirName := filepath.Base(filepath.Dir(path))
+			outputFileName := filepath.Join(filepath.Dir(path), dirName+".provider.yml")
+
+			// Read the template file
+			templateContent, err := os.ReadFile(templAbsPath)
+			if err != nil {
+				log.Fatalf("Error reading template file: %v", err)
+			}
+
+			// Prepare variables (this example assumes properties file is a simple key=value format)
+			variables := map[string]interface{}{}
+			properties, err := os.ReadFile(path)
+			if err != nil {
+				log.Fatalf("Error reading properties file: %v", err)
+			}
+			for _, line := range strings.Split(string(properties), "\n") {
+				if len(line) > 0 {
+					parts := strings.SplitN(line, "=", 2)
+					if len(parts) == 2 {
+						key := strings.TrimSpace(parts[0])
+						value := strings.TrimSpace(parts[1])
+						if key == "nested_parameters_json" {
+							// Read the file path from the nested_properties_json key
+							propertiesJsonFile := filepath.Join(filepath.Dir(path), value)
+							jsonProperties, err := os.ReadFile(propertiesJsonFile)
+							if err != nil {
+								log.Fatalf("Error reading nested parameters JSON file: %v", err)
+							}
+							// Parse the JSON string
+							var nestedProperties map[string]interface{}
+
+							err = json.Unmarshal(jsonProperties, &nestedProperties)
+
+							if err != nil {
+								log.Fatalf("Error unmarshalling nested properties JSON: %v", err)
+							}
+
+							parameters, err := json.Marshal(nestedProperties)
+							if err != nil {
+								log.Fatalf("Error marshalling nested properties JSON: %v", err)
+							}
+
+							// Add the nested properties to the variables
+							variables["parameters"] = string(parameters)
+						} else {
+							variables[key] = value
+						}
+					}
+				}
+			}
+
+			// Parse and execute the template
+			tmpl, err := template.New("yamlTemplate").Parse(string(templateContent))
+			if err != nil {
+				log.Fatalf("Error parsing template: %v", err)
+			}
+			outputFile, err := os.Create(outputFileName)
+			if err != nil {
+				log.Fatalf("Error creating output file: %v", err)
+			}
+			defer outputFile.Close()
+
+			err = tmpl.Execute(outputFile, variables)
+			if err != nil {
+				log.Fatalf("Error executing template: %v", err)
+			}
+			log.Printf("Template rendered and saved to '%s'", outputFileName)
+		}
+		return nil
+	})
+	if err != nil {
+		log.Fatalf("Error walking the path: %v", err)
+	}
+}
diff --git a/test/evals/promptfoo-poc/fix/config.properties b/test/evals/promptfoo-poc/fix/config.properties
new file mode 100644
index 00000000..dc796603
--- /dev/null
+++ b/test/evals/promptfoo-poc/fix/config.properties
@@ -0,0 +1,7 @@
+provider_id=openai:gpt-4o
+function_name=listChangesWithLineNums
+tool_type=function
+function_param_type=object
+tool_choice_type=function
+tool_choice_function_name=listChangesWithLineNums
+nested_parameters_json=fix.parameters.json
diff --git a/test/evals/promptfoo-poc/fix/fix.parameters.json b/test/evals/promptfoo-poc/fix/fix.parameters.json
new file mode 100644
index 00000000..c98ac88d
--- /dev/null
+++ b/test/evals/promptfoo-poc/fix/fix.parameters.json
@@ -0,0 +1,78 @@
+{
+  "type": "object",
+  "properties": {
+    "comments": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "txt": {
+            "type": "string"
+          },
+          "reference": {
+            "type": "boolean"
+          }
+        },
+        "required": ["txt", "reference"]
+      }
+    },
+    "problems": {
+      "type": "string"
+    },
+    "changes": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "summary": {
+            "type": "string"
+          },
+          "hasChange": {
+            "type": "boolean"
+          },
+          "old": {
+            "type": "object",
+            "properties": {
+              "entireFile": {
+                "type": "boolean"
+              },
+              "startLineString": {
+                "type": "string"
+              },
+              "endLineString": {
+                "type": "string"
+              }
+            },
+            "required": ["startLineString", "endLineString"]
+          },
+          "startLineIncludedReasoning": {
+            "type": "string"
+          },
+          "startLineIncluded": {
+            "type": "boolean"
+          },
+          "endLineIncludedReasoning": {
+            "type": "string"
+          },
+          "endLineIncluded": {
+            "type": "boolean"
+          },
+          "new": {
+            "type": "string"
+          }
+        },
+        "required": [
+          "summary",
+          "hasChange",
+          "old",
+          "startLineIncludedReasoning",
+          "startLineIncluded",
+          "endLineIncludedReasoning",
+          "endLineIncluded",
+          "new"
+        ]
+      }
+    }
+  },
+  "required": ["comments", "problems", "changes"]
+}
diff --git a/test/evals/promptfoo-poc/fix/fix.provider.yml b/test/evals/promptfoo-poc/fix/fix.provider.yml
new file mode 100644
index 00000000..a91430f0
--- /dev/null
+++ b/test/evals/promptfoo-poc/fix/fix.provider.yml
@@ -0,0 +1,82 @@
+id: openai:gpt-4o
+config:
+  tools:
+    [
+      {
+        "type": "function",
+        "function":
+          {
+            "name": "listChangesWithLineNums",
+            "parameters":
+              {
+                "properties":
+                  {
+                    "changes":
+                      {
+                        "items":
+                          {
+                            "properties":
+                              {
+                                "endLineIncluded": { "type": "boolean" },
+                                "endLineIncludedReasoning":
+                                  { "type": "string" },
+                                "hasChange": { "type": "boolean" },
+                                "new": { "type": "string" },
+                                "old":
+                                  {
+                                    "properties":
+                                      {
+                                        "endLineString": { "type": "string" },
+                                        "entireFile": { "type": "boolean" },
+                                        "startLineString": { "type": "string" },
+                                      },
+                                    "required":
+                                      ["startLineString", "endLineString"],
+                                    "type": "object",
+                                  },
+                                "startLineIncluded": { "type": "boolean" },
+                                "startLineIncludedReasoning":
+                                  { "type": "string" },
+                                "summary": { "type": "string" },
+                              },
+                            "required":
+                              [
+                                "summary",
+                                "hasChange",
+                                "old",
+                                "startLineIncludedReasoning",
+                                "startLineIncluded",
+                                "endLineIncludedReasoning",
+                                "endLineIncluded",
+                                "new",
+                              ],
+                            "type": "object",
+                          },
+                        "type": "array",
+                      },
+                    "comments":
+                      {
+                        "items":
+                          {
+                            "properties":
+                              {
+                                "reference": { "type": "boolean" },
+                                "txt": { "type": "string" },
+                              },
+                            "required": ["txt", "reference"],
+                            "type": "object",
+                          },
+                        "type": "array",
+                      },
+                    "problems": { "type": "string" },
+                  },
+                "required": ["comments", "problems", "changes"],
+                "type": "object",
+              },
+          },
+      },
+    ]
+  tool_choice:
+    type: "function"
+    function:
+      name: "listChangesWithLineNums"
diff --git a/test/evals/promptfoo-poc/fix/promptfooconfig.yaml b/test/evals/promptfoo-poc/fix/promptfooconfig.yaml
index 12a23881..9f50157f 100644
--- a/test/evals/promptfoo-poc/fix/promptfooconfig.yaml
+++ b/test/evals/promptfoo-poc/fix/promptfooconfig.yaml
@@ -1,101 +1,12 @@
 # This configuration compares LLM output of 2 prompts x 2 GPT models across 3 test cases.
 # Learn more: https://promptfoo.dev/docs/configuration/guide
-description: 'Fixes'
+description: "Fixes"
 
 prompts:
   - file://prompt.txt
 
 providers:
-  - id: openai:gpt-4o
-    config: 
-      tools: [{
-        "type": "function",
-        "function": {
-          "name": "listChangesWithLineNums",
-          "parameters": {
-            "type": "object",
-            "properties": {
-              "comments": {
-                "type": "array",
-                "items": {
-                  "type": "object",
-                  "properties": {
-                    "txt": {
-                      "type": "string"
-                    },
-                    "reference": {
-                      "type": "boolean"
-                    }
-                  },
-                  "required": ["txt", "reference"]
-                }
-              },
-              "problems": {
-                "type": "string"
-              },
-              "changes": {
-                "type": "array",
-                "items": {
-                  "type": "object",
-                  "properties": {
-                    "summary": {
-                      "type": "string"
-                    },
-                    "hasChange": {
-                      "type": "boolean"
-                    },
-                    "old": {
-                      "type": "object",
-                      "properties": {
-                        "entireFile": {
-                          "type": "boolean"
-                        },
-                        "startLineString": {
-                          "type": "string"
-                        },
-                        "endLineString": {
-                          "type": "string"
-                        }
-                      },
-                      "required": ["startLineString", "endLineString"]
-                    },
-                    "startLineIncludedReasoning": {
-                      "type": "string"
-                    },
-                    "startLineIncluded": {
-                      "type": "boolean"
-                    },
-                    "endLineIncludedReasoning": {
-                      "type": "string"
-                    },
-                    "endLineIncluded": {
-                      "type": "boolean"
-                    },
-                    "new": {
-                      "type": "string"
-                    }
-                  },
-                  "required": [
-                    "summary",
-                    "hasChange",
-                    "old",
-                    "startLineIncludedReasoning",
-                    "startLineIncluded",
-                    "endLineIncludedReasoning",
-                    "endLineIncluded",
-                    "new"
-                  ]
-                }
-              }
-            },
-            "required": ["comments", "problems", "changes"]
-          }
-        }
-      }]
-      tool_choice: 
-        type: function
-        function: 
-          name: listChangesWithLineNums
+  - file://fix.provider.yml
 
 tests:
   - vars:
diff --git a/test/evals/promptfoo-poc/templates/provider.template.yml b/test/evals/promptfoo-poc/templates/provider.template.yml
new file mode 100644
index 00000000..0408cd23
--- /dev/null
+++ b/test/evals/promptfoo-poc/templates/provider.template.yml
@@ -0,0 +1,14 @@
+id: {{ .provider_id }}
+config:
+  tools:
+    [
+      {
+        "type": "{{ .tool_type }}",
+        "function":
+          { "name": "{{ .function_name }}", "parameters": {{ .parameters }} },
+      },
+    ]
+  tool_choice:
+    type: "{{ .tool_choice_type }}"
+    function:
+      name: "{{ .tool_choice_function_name }}"
diff --git a/test/evals/promptfoo-poc/verify/config.properties b/test/evals/promptfoo-poc/verify/config.properties
new file mode 100644
index 00000000..c43ce849
--- /dev/null
+++ b/test/evals/promptfoo-poc/verify/config.properties
@@ -0,0 +1,7 @@
+provider_id=openai:gpt-4o
+function_name=verifyOutput
+tool_type=function
+function_param_type=object
+tool_choice_type=function
+tool_choice_function_name=verifyOutput
+nested_parameters_json=verify.parameters.json
diff --git a/test/evals/promptfoo-poc/verify/promptfooconfig.yaml b/test/evals/promptfoo-poc/verify/promptfooconfig.yaml
index b5a6ddb0..f71ea646 100644
--- a/test/evals/promptfoo-poc/verify/promptfooconfig.yaml
+++ b/test/evals/promptfoo-poc/verify/promptfooconfig.yaml
@@ -1,97 +1,12 @@
 # This configuration compares LLM output of 2 prompts x 2 GPT models across 3 test cases.
 # Learn more: https://promptfoo.dev/docs/configuration/guide
-description: 'Verification'
+description: "Verification"
 
 prompts:
   - file://prompt.txt
 
 providers:
-  - id: openai:gpt-4o
-    config: 
-      tools: [{
-        "type": "function",
-        "function": {
-          "name": "verifyOutput",
-          "parameters": {
-            "type": "object",
-            "properties": {
-              "syntaxErrorsReasoning": {
-                "type": "string"
-              },
-              "hasSyntaxErrors": {
-                "type": "boolean"
-              },
-              "removed": {
-                "type": "array",
-                "items": {
-                  "type": "object",
-                  "properties": {
-                    "code": {
-                      "type": "string"
-                    },
-                    "reasoning": {
-                      "type": "string"
-                    },
-                    "correct": {
-                      "type": "boolean"
-                    }
-                  },
-                  "required": ["code", "reasoning", "correct"]
-                }
-              },
-              "removedCodeErrorsReasoning": {
-                "type": "string"
-              },
-              "hasRemovedCodeErrors": {
-                "type": "boolean"
-              },
-              "duplicationErrorsReasoning": {
-                "type": "string"
-              },
-              "hasDuplicationErrors": {
-                "type": "boolean"
-              },
-              "comments": {
-                "type": "array",
-                "items": {
-                  "type": "object",
-                  "properties": {
-                    "txt": {
-                      "type": "string"
-                    },
-                    "reference": {
-                      "type": "boolean"
-                    }
-                  },
-                  "required": ["txt", "reference"]
-                }
-              },
-              "referenceErrorsReasoning": {
-                "type": "string"
-              },
-              "hasReferenceErrors": {
-                "type": "boolean"
-              }
-            },
-            "required": [
-              "syntaxErrorsReasoning",
-              "hasSyntaxErrors",
-              "removed",
-              "removedCodeErrorsReasoning",
-              "hasRemovedCodeErrors",
-              "duplicationErrorsReasoning",
-              "hasDuplicationErrors",
-              "comments",
-              "referenceErrorsReasoning",
-              "hasReferenceErrors"
-            ]
-          }
-        }
-      }]
-      tool_choice: 
-        type: function
-        function: 
-          name: verifyOutput
+  - file://verify.provider.yml
 
 tests:
   - vars:
@@ -111,7 +26,7 @@ tests:
             args.hasDuplicationErrors ||
             args.hasReferenceErrors            
           )
-  
+
   - vars:
       preBuildState: file://tests/shared/pre_build.go
       changes: file://tests/removal/changes.md
diff --git a/test/evals/promptfoo-poc/verify/verify.parameters.json b/test/evals/promptfoo-poc/verify/verify.parameters.json
new file mode 100644
index 00000000..00b84844
--- /dev/null
+++ b/test/evals/promptfoo-poc/verify/verify.parameters.json
@@ -0,0 +1,74 @@
+{
+  "type": "object",
+  "properties": {
+    "syntaxErrorsReasoning": {
+      "type": "string"
+    },
+    "hasSyntaxErrors": {
+      "type": "boolean"
+    },
+    "removed": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "code": {
+            "type": "string"
+          },
+          "reasoning": {
+            "type": "string"
+          },
+          "correct": {
+            "type": "boolean"
+          }
+        },
+        "required": ["code", "reasoning", "correct"]
+      }
+    },
+    "removedCodeErrorsReasoning": {
+      "type": "string"
+    },
+    "hasRemovedCodeErrors": {
+      "type": "boolean"
+    },
+    "duplicationErrorsReasoning": {
+      "type": "string"
+    },
+    "hasDuplicationErrors": {
+      "type": "boolean"
+    },
+    "comments": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "txt": {
+            "type": "string"
+          },
+          "reference": {
+            "type": "boolean"
+          }
+        },
+        "required": ["txt", "reference"]
+      }
+    },
+    "referenceErrorsReasoning": {
+      "type": "string"
+    },
+    "hasReferenceErrors": {
+      "type": "boolean"
+    }
+  },
+  "required": [
+    "syntaxErrorsReasoning",
+    "hasSyntaxErrors",
+    "removed",
+    "removedCodeErrorsReasoning",
+    "hasRemovedCodeErrors",
+    "duplicationErrorsReasoning",
+    "hasDuplicationErrors",
+    "comments",
+    "referenceErrorsReasoning",
+    "hasReferenceErrors"
+  ]
+}
diff --git a/test/evals/promptfoo-poc/verify/verify.provider.yml b/test/evals/promptfoo-poc/verify/verify.provider.yml
new file mode 100644
index 00000000..0db53932
--- /dev/null
+++ b/test/evals/promptfoo-poc/verify/verify.provider.yml
@@ -0,0 +1,73 @@
+id: openai:gpt-4o
+config:
+  tools:
+    [
+      {
+        "type": "function",
+        "function":
+          {
+            "name": "verifyOutput",
+            "parameters":
+              {
+                "properties":
+                  {
+                    "comments":
+                      {
+                        "items":
+                          {
+                            "properties":
+                              {
+                                "reference": { "type": "boolean" },
+                                "txt": { "type": "string" },
+                              },
+                            "required": ["txt", "reference"],
+                            "type": "object",
+                          },
+                        "type": "array",
+                      },
+                    "duplicationErrorsReasoning": { "type": "string" },
+                    "hasDuplicationErrors": { "type": "boolean" },
+                    "hasReferenceErrors": { "type": "boolean" },
+                    "hasRemovedCodeErrors": { "type": "boolean" },
+                    "hasSyntaxErrors": { "type": "boolean" },
+                    "referenceErrorsReasoning": { "type": "string" },
+                    "removed":
+                      {
+                        "items":
+                          {
+                            "properties":
+                              {
+                                "code": { "type": "string" },
+                                "correct": { "type": "boolean" },
+                                "reasoning": { "type": "string" },
+                              },
+                            "required": ["code", "reasoning", "correct"],
+                            "type": "object",
+                          },
+                        "type": "array",
+                      },
+                    "removedCodeErrorsReasoning": { "type": "string" },
+                    "syntaxErrorsReasoning": { "type": "string" },
+                  },
+                "required":
+                  [
+                    "syntaxErrorsReasoning",
+                    "hasSyntaxErrors",
+                    "removed",
+                    "removedCodeErrorsReasoning",
+                    "hasRemovedCodeErrors",
+                    "duplicationErrorsReasoning",
+                    "hasDuplicationErrors",
+                    "comments",
+                    "referenceErrorsReasoning",
+                    "hasReferenceErrors",
+                  ],
+                "type": "object",
+              },
+          },
+      },
+    ]
+  tool_choice:
+    type: "function"
+    function:
+      name: "verifyOutput"

From cbe5f16d1cb232f4fb520e7061d77740138d5751 Mon Sep 17 00:00:00 2001
From: ZanzyTHEbar <pyr0ndet0s97@gmail.com>
Date: Tue, 18 Jun 2024 22:24:42 +0100
Subject: [PATCH 03/12] build: fix plandex env var for dev in makefile

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index efcd9d29..42e676a1 100644
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,7 @@ MAIN_PACKAGE = main
 BINARY_NAME = plandex
 
 # Check the PLANDEX_ENVIRONMENT environment variable, reassign the BINARY_NAME if necessary
-ifeq ($(PLANDEX_ENVIRONMENT),development)
+ifeq ($(PLANDEX_ENV),development)
 BINARY_NAME = plandex-dev
 endif
 

From ff5ff38c5362f5421e328b55a7363b7bcb0ec947 Mon Sep 17 00:00:00 2001
From: ZanzyTHEbar <pyr0ndet0s97@gmail.com>
Date: Tue, 18 Jun 2024 22:30:14 +0100
Subject: [PATCH 04/12] test: change properties to parameters in reference to
 the nested json doc

---
 app/scripts/render_config.go | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/app/scripts/render_config.go b/app/scripts/render_config.go
index e40653ff..0fdcf878 100644
--- a/app/scripts/render_config.go
+++ b/app/scripts/render_config.go
@@ -32,7 +32,7 @@ func main() {
 				log.Fatalf("Error reading template file: %v", err)
 			}
 
-			// Prepare variables (this example assumes properties file is a simple key=value format)
+			// Prepare variables (this assumes properties file is a simple key=value format)
 			variables := map[string]interface{}{}
 			properties, err := os.ReadFile(path)
 			if err != nil {
@@ -45,24 +45,26 @@ func main() {
 						key := strings.TrimSpace(parts[0])
 						value := strings.TrimSpace(parts[1])
 						if key == "nested_parameters_json" {
-							// Read the file path from the nested_properties_json key
-							propertiesJsonFile := filepath.Join(filepath.Dir(path), value)
-							jsonProperties, err := os.ReadFile(propertiesJsonFile)
+							// Read the file path from the nested_parameters_json key
+							parametersJsonFile := filepath.Join(filepath.Dir(path), value)
+							jsonParameters, err := os.ReadFile(parametersJsonFile)
 							if err != nil {
 								log.Fatalf("Error reading nested parameters JSON file: %v", err)
 							}
 							// Parse the JSON string
-							var nestedProperties map[string]interface{}
+							var nestedParameters map[string]interface{}
 
-							err = json.Unmarshal(jsonProperties, &nestedProperties)
+							// We marshal and unmarshal the JSON to ensure that the nested properties are properly formatted for the template, and to ensure that the data is correct json
+
+							err = json.Unmarshal(jsonParameters, &nestedParameters)
 
 							if err != nil {
-								log.Fatalf("Error unmarshalling nested properties JSON: %v", err)
+								log.Fatalf("Error unmarshalling nested parameters JSON: %v", err)
 							}
 
-							parameters, err := json.Marshal(nestedProperties)
+							parameters, err := json.Marshal(nestedParameters)
 							if err != nil {
-								log.Fatalf("Error marshalling nested properties JSON: %v", err)
+								log.Fatalf("Error marshalling nested parameters JSON: %v", err)
 							}
 
 							// Add the nested properties to the variables

From 5acd1dcfe799ca5f1152d8774d18c63754b2bc9f Mon Sep 17 00:00:00 2001
From: ZanzyTHEbar <pyr0ndet0s97@gmail.com>
Date: Tue, 18 Jun 2024 22:41:03 +0100
Subject: [PATCH 05/12] test: clean up render_config.go

---
 app/scripts/render_config.go | 79 ++++++++++++++++++++----------------
 1 file changed, 45 insertions(+), 34 deletions(-)

diff --git a/app/scripts/render_config.go b/app/scripts/render_config.go
index 0fdcf878..ad9d5737 100644
--- a/app/scripts/render_config.go
+++ b/app/scripts/render_config.go
@@ -39,41 +39,52 @@ func main() {
 				log.Fatalf("Error reading properties file: %v", err)
 			}
 			for _, line := range strings.Split(string(properties), "\n") {
-				if len(line) > 0 {
-					parts := strings.SplitN(line, "=", 2)
-					if len(parts) == 2 {
-						key := strings.TrimSpace(parts[0])
-						value := strings.TrimSpace(parts[1])
-						if key == "nested_parameters_json" {
-							// Read the file path from the nested_parameters_json key
-							parametersJsonFile := filepath.Join(filepath.Dir(path), value)
-							jsonParameters, err := os.ReadFile(parametersJsonFile)
-							if err != nil {
-								log.Fatalf("Error reading nested parameters JSON file: %v", err)
-							}
-							// Parse the JSON string
-							var nestedParameters map[string]interface{}
-
-							// We marshal and unmarshal the JSON to ensure that the nested properties are properly formatted for the template, and to ensure that the data is correct json
-
-							err = json.Unmarshal(jsonParameters, &nestedParameters)
-
-							if err != nil {
-								log.Fatalf("Error unmarshalling nested parameters JSON: %v", err)
-							}
-
-							parameters, err := json.Marshal(nestedParameters)
-							if err != nil {
-								log.Fatalf("Error marshalling nested parameters JSON: %v", err)
-							}
-
-							// Add the nested properties to the variables
-							variables["parameters"] = string(parameters)
-						} else {
-							variables[key] = value
-						}
-					}
+				if len(line) == 0 {
+					continue
 				}
+				parts := strings.SplitN(line, "=", 2)
+
+				if len(parts) > 2 {
+					log.Fatalf("Invalid line in properties file: %s", line)
+				}
+
+				if len(parts) < 2 {
+					log.Fatalf("Invalid line in properties file: %s", line)
+				}
+
+				key := strings.TrimSpace(parts[0])
+				value := strings.TrimSpace(parts[1])
+
+				if key != "nested_parameters_json" {
+					variables[key] = value
+					continue
+				}
+
+				// Read the file path from the nested_parameters_json key
+				parametersJsonFile := filepath.Join(filepath.Dir(path), value)
+				jsonParameters, err := os.ReadFile(parametersJsonFile)
+				if err != nil {
+					log.Fatalf("Error reading nested parameters JSON file: %v", err)
+				}
+				// Parse the JSON string
+				var nestedParameters map[string]interface{}
+
+				// We marshal and unmarshal the JSON to ensure that the nested properties are properly formatted 
+				// for the template, and to ensure that the data is correct json
+
+				err = json.Unmarshal(jsonParameters, &nestedParameters)
+
+				if err != nil {
+					log.Fatalf("Error un-marshalling nested parameters JSON: %v", err)
+				}
+
+				parameters, err := json.Marshal(nestedParameters)
+				if err != nil {
+					log.Fatalf("Error marshalling nested parameters JSON: %v", err)
+				}
+
+				// Add the nested properties to the variables
+				variables["parameters"] = string(parameters)
 			}
 
 			// Parse and execute the template

From 6411c324c6f1a61834a7410ca72dbb2a55481240 Mon Sep 17 00:00:00 2001
From: ZanzyTHEbar <pyr0ndet0s97@gmail.com>
Date: Thu, 20 Jun 2024 20:25:06 +0100
Subject: [PATCH 06/12] feat: Add promptfoo configuration files for build and
 fix

- Add gen-test script for automatically creating promptfoo eval directories and required files
- Add promptfooconfig.yml for build
- Add build.config.properties for build
- Rename config.properties to fix.config.properties for fix
- Add promptfooconfig.yaml for fix
- Update promptfoo eval tests for removal, validate, and fix
- Update Makefile with new targets and help message
---
 Makefile                                      |  20 ++-
 app/scripts/cmd/gen/gen.go                    | 116 ++++++++++++++++++
 .../provider/gen_provider.go}                 |   0
 .../build/build.config.properties             |   7 ++
 .../promptfoo-poc/build/build.parameters.json |   0
 .../promptfoo-poc/build/build.prompt.txt      |   0
 .../promptfoo-poc/build/promptfooconfig.yml   |   9 ++
 test/evals/promptfoo-poc/evals.md             |  90 ++++++++++++++
 .../fix/{tests => assets}/removal/changes.md  |   0
 .../{tests => assets}/removal/post_build.go   |   0
 .../{tests => assets}/removal/problems.txt    |   0
 .../fix/{tests => assets}/shared/pre_build.go |   0
 ...onfig.properties => fix.config.properties} |   2 +-
 .../fix/{prompt.txt => fix.prompt.txt}        |   0
 .../promptfoo-poc/fix/promptfooconfig.yaml    |  25 +---
 .../promptfoo-poc/fix/tests/fix.test.yml      |  20 +++
 .../{tests => assets}/removal/changes.md      |   0
 .../verify/{tests => assets}/removal/diff.txt |   0
 .../{tests => assets}/removal/post_build.go   |   0
 .../{tests => assets}/shared/pre_build.go     |   0
 .../verify/{tests => assets}/valid/changes.md |   0
 .../verify/{tests => assets}/valid/diff.txt   |   0
 .../{tests => assets}/valid/post_build.go     |   0
 .../promptfoo-poc/verify/promptfooconfig.yaml |  38 +-----
 .../verify/tests/removal.test.yml             |  13 ++
 .../verify/tests/validate.test.yml            |  18 +++
 ...ig.properties => verify.config.properties} |   0
 .../verify/{prompt.txt => verify.prompt.txt}  |   0
 28 files changed, 298 insertions(+), 60 deletions(-)
 create mode 100644 app/scripts/cmd/gen/gen.go
 rename app/scripts/{render_config.go => cmd/provider/gen_provider.go} (100%)
 create mode 100644 test/evals/promptfoo-poc/build/build.config.properties
 create mode 100644 test/evals/promptfoo-poc/build/build.parameters.json
 create mode 100644 test/evals/promptfoo-poc/build/build.prompt.txt
 create mode 100644 test/evals/promptfoo-poc/build/promptfooconfig.yml
 create mode 100644 test/evals/promptfoo-poc/evals.md
 rename test/evals/promptfoo-poc/fix/{tests => assets}/removal/changes.md (100%)
 rename test/evals/promptfoo-poc/fix/{tests => assets}/removal/post_build.go (100%)
 rename test/evals/promptfoo-poc/fix/{tests => assets}/removal/problems.txt (100%)
 rename test/evals/promptfoo-poc/fix/{tests => assets}/shared/pre_build.go (100%)
 rename test/evals/promptfoo-poc/fix/{config.properties => fix.config.properties} (80%)
 rename test/evals/promptfoo-poc/fix/{prompt.txt => fix.prompt.txt} (100%)
 create mode 100644 test/evals/promptfoo-poc/fix/tests/fix.test.yml
 rename test/evals/promptfoo-poc/verify/{tests => assets}/removal/changes.md (100%)
 rename test/evals/promptfoo-poc/verify/{tests => assets}/removal/diff.txt (100%)
 rename test/evals/promptfoo-poc/verify/{tests => assets}/removal/post_build.go (100%)
 rename test/evals/promptfoo-poc/verify/{tests => assets}/shared/pre_build.go (100%)
 rename test/evals/promptfoo-poc/verify/{tests => assets}/valid/changes.md (100%)
 rename test/evals/promptfoo-poc/verify/{tests => assets}/valid/diff.txt (100%)
 rename test/evals/promptfoo-poc/verify/{tests => assets}/valid/post_build.go (100%)
 create mode 100644 test/evals/promptfoo-poc/verify/tests/removal.test.yml
 create mode 100644 test/evals/promptfoo-poc/verify/tests/validate.test.yml
 rename test/evals/promptfoo-poc/verify/{config.properties => verify.config.properties} (100%)
 rename test/evals/promptfoo-poc/verify/{prompt.txt => verify.prompt.txt} (100%)

diff --git a/Makefile b/Makefile
index 42e676a1..174fbf8a 100644
--- a/Makefile
+++ b/Makefile
@@ -33,8 +33,11 @@ clean:
 test: render
 	@$(GOTEST) -v ./...
 
-render:
-	@cd app/scripts && go run render_config.go
+gen-test:
+	@$(GOCMD) run app/scripts/cmd/gen/gen.go $(filter-out $@,$(MAKECMDGOALS))
+
+gen-provider:
+	@$(GOCMD) run app/scripts/cmd/provider/gen_provider.go
 
 # Get dependencies
 deps:
@@ -43,4 +46,17 @@ deps:
 # Default target
 default: build
 
+# Usage
+help:
+	@echo "Usage:"
+	@echo "  make dev - to run the development scripts"
+	@echo "  make gen-test <directory_path> - to create a new promptfoo eval directory structure"
+	@echo "  make gen-provider - to create a new promptfoo provider file from the promptfoo diretory structure"
+	@echo "  make clean - to remove generated files and directories"
+	@echo "  make help - to display this help message"
+
+# Prevents make from interpreting the arguments as targets
+%:
+	@:
+
 .PHONY: all render build clean test deps
\ No newline at end of file
diff --git a/app/scripts/cmd/gen/gen.go b/app/scripts/cmd/gen/gen.go
new file mode 100644
index 00000000..40fbd558
--- /dev/null
+++ b/app/scripts/cmd/gen/gen.go
@@ -0,0 +1,116 @@
+package main
+
+import (
+	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"text/template"
+)
+
+func main() {
+	if len(os.Args) < 2 {
+		log.Fatalf("Usage: %s <path/to/directory>", os.Args[0])
+	}
+
+	dirPath := os.Args[1]
+	dirName := filepath.Base(dirPath)
+
+	// Create the main directory
+	if err := os.MkdirAll(dirPath, 0755); err != nil {
+		log.Fatalf("Error creating directory: %s", err)
+	}
+
+	f, err := os.Create(fmt.Sprintf("%s/%s", dirPath, "promptfooconfig.yml"))
+	if err != nil {
+		log.Fatalf("Error creating file: %s", err)
+	}
+	f.Close()
+
+	// Create files inside the directory
+	files := []string{
+		"parameters.json",
+		"config.properties",
+		"prompt.txt",
+	}
+
+	for _, file := range files {
+		f, err := os.Create(fmt.Sprintf("%s/%s.%s", dirPath, dirName, file))
+		if err != nil {
+			log.Fatalf("Error creating file: %s", err)
+		}
+		f.Close()
+	}
+
+	// Create assets and tests directories
+	subDirs := []string{"assets", "tests"}
+
+	for _, subDir := range subDirs {
+		if err := os.Mkdir(fmt.Sprintf("%s/%s", dirPath, subDir), 0755); err != nil {
+			log.Fatalf("Error creating subdirectory: %s", err)
+		}
+	}
+
+	// Template for promptfooconfig.yml
+	ymlTemplate := `description: "{{ .Name }}"
+
+prompts:
+  - file://{{ .Name }}.prompt.txt
+
+providers:
+  - file://{{ .Name }}.provider.yml
+
+tests: tests/*.tests.yml
+`
+
+	// Populate promptfooconfig.yml
+	promptFooConfigTmpl, err := template.New("yml").Parse(ymlTemplate)
+	if err != nil {
+		log.Fatalf("Error creating template: %s", err)
+	}
+
+	// Template for config.properties
+	propertiesTemplate := `provider_id=openai:gpt-4o
+function_name=
+tool_type=function
+function_param_type=object
+tool_choice_type=function
+tool_choice_function_name=
+nested_parameters_json={{ .Name }}.parameters.json
+`
+
+	// Populate config.properties
+	configPropertiesTmpl, err := template.New("properties").Parse(propertiesTemplate)
+	if err != nil {
+		log.Fatalf("Error creating template: %s", err)
+	}
+
+	configFile, err := os.Create(fmt.Sprintf("%s/%s.%s", dirPath, dirName, "config.properties"))
+	if err != nil {
+		log.Fatalf("Error creating config.properties: %s", err)
+	}
+	defer configFile.Close()
+
+	file, err := os.Create(fmt.Sprintf("%s/promptfooconfig.yml", dirPath))
+	if err != nil {
+		log.Fatalf("Error creating promptfooconfig.yml: %s", err)
+	}
+	defer file.Close()
+
+	data := struct {
+		Name string
+	}{
+		Name: dirName,
+	}
+
+	if err := promptFooConfigTmpl.Execute(file, data); err != nil {
+		log.Fatalf("Error executing template: %s", err)
+	}
+
+	if err := configPropertiesTmpl.Execute(configFile, data); err != nil {
+		log.Fatalf("Error executing template: %s", err)
+	}
+
+	fmt.Println("Directory created successfully!")
+	fmt.Println("Please check the contents of the directory and proceed with the implementation.")
+}
diff --git a/app/scripts/render_config.go b/app/scripts/cmd/provider/gen_provider.go
similarity index 100%
rename from app/scripts/render_config.go
rename to app/scripts/cmd/provider/gen_provider.go
diff --git a/test/evals/promptfoo-poc/build/build.config.properties b/test/evals/promptfoo-poc/build/build.config.properties
new file mode 100644
index 00000000..fea41b87
--- /dev/null
+++ b/test/evals/promptfoo-poc/build/build.config.properties
@@ -0,0 +1,7 @@
+provider_id=openai:gpt-4o
+function_name=
+tool_type=function
+function_param_type=object
+tool_choice_type=function
+tool_choice_function_name=
+nested_parameters_json=build.parameters.json
diff --git a/test/evals/promptfoo-poc/build/build.parameters.json b/test/evals/promptfoo-poc/build/build.parameters.json
new file mode 100644
index 00000000..e69de29b
diff --git a/test/evals/promptfoo-poc/build/build.prompt.txt b/test/evals/promptfoo-poc/build/build.prompt.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/test/evals/promptfoo-poc/build/promptfooconfig.yml b/test/evals/promptfoo-poc/build/promptfooconfig.yml
new file mode 100644
index 00000000..363f5bc5
--- /dev/null
+++ b/test/evals/promptfoo-poc/build/promptfooconfig.yml
@@ -0,0 +1,9 @@
+description: "build"
+
+prompts:
+  - file://build.prompt.txt
+
+providers:
+  - file://build.provider.yml
+
+tests: tests/*.tests.yml
diff --git a/test/evals/promptfoo-poc/evals.md b/test/evals/promptfoo-poc/evals.md
new file mode 100644
index 00000000..0ad19fa9
--- /dev/null
+++ b/test/evals/promptfoo-poc/evals.md
@@ -0,0 +1,90 @@
+# Evals
+
+Evals for plandex.
+
+## Overview
+
+`Classification:`
+
+- MCC
+- Specificity
+- Sensitivity
+- Accuracy
+
+`Regression:`
+
+- RMSE
+- R2
+- MSE
+
+## Types of Evals
+
+---
+
+### Build Prompts Evaluations
+
+1. **Syntax Check**
+   - Ensure the prompt is syntactically correct and follows the required format.
+2. **Completeness Check**
+   - Verify that all necessary components (e.g., headers, body, footers) are included.
+3. **Clarity and Precision**
+   - Evaluate if the instructions are clear and unambiguous.
+4. **Context Appropriateness**
+   - Assess if the prompt is appropriate for the intended context and audience.
+5. **Error Handling**
+   - Check if there are adequate instructions for handling potential errors.
+6. **Dependency Evaluation**
+   - Ensure all dependencies (libraries, tools) are correctly specified.
+7. **Output Validation**
+   - Define criteria to validate the expected output.
+
+### Verify Prompts Evaluations
+
+1. **Accuracy Check**
+   - Ensure the prompt's instructions lead to the correct and intended results.
+2. **Validation Criteria**
+   - Define and check the validation criteria for the outputs.
+3. **Consistency Check**
+   - Verify if the prompt maintains consistency in terminology and steps.
+4. **Logic and Flow**
+   - Evaluate the logical flow of the instructions to ensure they are coherent.
+5. **Edge Cases Handling**
+   - Assess if the prompt considers and handles edge cases effectively.
+6. **User Feedback Integration**
+   - Ensure there are provisions for incorporating user feedback.
+7. **Performance Metrics**
+   - Define and evaluate the performance metrics for the verification process.
+
+### Fix Prompts Evaluations
+
+1. **Error Identification**
+   - Ensure the prompt accurately identifies the errors to be fixed.
+2. **Correctness of Fix**
+   - Verify that the proposed fix is correct and resolves the issue.
+3. **Impact Analysis**
+   - Assess the impact of the fix on the overall system or application.
+4. **Regression Testing**
+   - Ensure that the fix does not introduce new issues or regressions.
+5. **Documentation Update**
+   - Check if the documentation is updated to reflect the fix.
+6. **Code Quality**
+   - Evaluate the quality of the code after the fix (e.g., readability, maintainability).
+7. **Performance Impact**
+   - Assess if the fix affects the performance and ensure it remains optimal.
+
+### Function Call Schemas Evaluations
+
+1. **Schema Validity**
+   - Ensure the schema is valid and conforms to the defined standards.
+2. **Parameter Consistency**
+   - Verify that the parameters are consistently defined and used.
+3. **Return Type Verification**
+   - Ensure the return types are correctly specified and handled.
+4. **Error Handling Mechanism**
+   - Assess the error handling mechanisms in the schema.
+5. **Compatibility Check**
+   - Check if the schema is compatible with different environments or systems.
+6. **Documentation Completeness**
+   - Ensure the schema is well-documented with clear explanations of each parameter and return type.
+7. **Security Considerations**
+   - Evaluate the schema for potential security vulnerabilities or risks.
diff --git a/test/evals/promptfoo-poc/fix/tests/removal/changes.md b/test/evals/promptfoo-poc/fix/assets/removal/changes.md
similarity index 100%
rename from test/evals/promptfoo-poc/fix/tests/removal/changes.md
rename to test/evals/promptfoo-poc/fix/assets/removal/changes.md
diff --git a/test/evals/promptfoo-poc/fix/tests/removal/post_build.go b/test/evals/promptfoo-poc/fix/assets/removal/post_build.go
similarity index 100%
rename from test/evals/promptfoo-poc/fix/tests/removal/post_build.go
rename to test/evals/promptfoo-poc/fix/assets/removal/post_build.go
diff --git a/test/evals/promptfoo-poc/fix/tests/removal/problems.txt b/test/evals/promptfoo-poc/fix/assets/removal/problems.txt
similarity index 100%
rename from test/evals/promptfoo-poc/fix/tests/removal/problems.txt
rename to test/evals/promptfoo-poc/fix/assets/removal/problems.txt
diff --git a/test/evals/promptfoo-poc/fix/tests/shared/pre_build.go b/test/evals/promptfoo-poc/fix/assets/shared/pre_build.go
similarity index 100%
rename from test/evals/promptfoo-poc/fix/tests/shared/pre_build.go
rename to test/evals/promptfoo-poc/fix/assets/shared/pre_build.go
diff --git a/test/evals/promptfoo-poc/fix/config.properties b/test/evals/promptfoo-poc/fix/fix.config.properties
similarity index 80%
rename from test/evals/promptfoo-poc/fix/config.properties
rename to test/evals/promptfoo-poc/fix/fix.config.properties
index dc796603..28127414 100644
--- a/test/evals/promptfoo-poc/fix/config.properties
+++ b/test/evals/promptfoo-poc/fix/fix.config.properties
@@ -4,4 +4,4 @@ tool_type=function
 function_param_type=object
 tool_choice_type=function
 tool_choice_function_name=listChangesWithLineNums
-nested_parameters_json=fix.parameters.json
+nested_parameters_json=build.parameters.json
diff --git a/test/evals/promptfoo-poc/fix/prompt.txt b/test/evals/promptfoo-poc/fix/fix.prompt.txt
similarity index 100%
rename from test/evals/promptfoo-poc/fix/prompt.txt
rename to test/evals/promptfoo-poc/fix/fix.prompt.txt
diff --git a/test/evals/promptfoo-poc/fix/promptfooconfig.yaml b/test/evals/promptfoo-poc/fix/promptfooconfig.yaml
index 9f50157f..87fbe232 100644
--- a/test/evals/promptfoo-poc/fix/promptfooconfig.yaml
+++ b/test/evals/promptfoo-poc/fix/promptfooconfig.yaml
@@ -1,30 +1,11 @@
 # This configuration compares LLM output of 2 prompts x 2 GPT models across 3 test cases.
 # Learn more: https://promptfoo.dev/docs/configuration/guide
-description: "Fixes"
+description: "fix"
 
 prompts:
-  - file://prompt.txt
+  - file://fix.prompt.txt
 
 providers:
   - file://fix.provider.yml
 
-tests:
-  - vars:
-      preBuildState: file://tests/shared/pre_build.go
-      changes: file://tests/removal/changes.md
-      problems: file://tests/removal/problems.txt
-      postBuildState: file://tests/removal/post_build.go
-    assert:
-      - type: is-json
-      - type: is-valid-openai-tools-call
-      - type: javascript
-        value: |
-          var args = JSON.parse(output[0].function.arguments)
-          return (
-            args.problems && 
-            args.changes.length > 0 &&
-            args.changes.some(
-              change => change.hasChange && 
-                        change.new.includes("var contextRmCmd = &cobra.Command{")
-            )
-          )
+tests: tests/*.tests.yml
diff --git a/test/evals/promptfoo-poc/fix/tests/fix.test.yml b/test/evals/promptfoo-poc/fix/tests/fix.test.yml
new file mode 100644
index 00000000..1084185a
--- /dev/null
+++ b/test/evals/promptfoo-poc/fix/tests/fix.test.yml
@@ -0,0 +1,20 @@
+- description: "Check Fix with Line numbers"
+  vars:
+    preBuildState: file://assets/shared/pre_build.go
+    changes: file://assets/removal/changes.md
+    problems: file://assets/removal/problems.txt
+    postBuildState: file://assets/removal/post_build.go
+  assert:
+    - type: is-json
+    - type: is-valid-openai-tools-call
+    - type: javascript
+      value: |
+        var args = JSON.parse(output[0].function.arguments)
+        return (
+          args.problems && 
+          args.changes.length > 0 &&
+          args.changes.some(
+            change => change.hasChange && 
+                      change.new.includes("var contextRmCmd = &cobra.Command{")
+          )
+        )
diff --git a/test/evals/promptfoo-poc/verify/tests/removal/changes.md b/test/evals/promptfoo-poc/verify/assets/removal/changes.md
similarity index 100%
rename from test/evals/promptfoo-poc/verify/tests/removal/changes.md
rename to test/evals/promptfoo-poc/verify/assets/removal/changes.md
diff --git a/test/evals/promptfoo-poc/verify/tests/removal/diff.txt b/test/evals/promptfoo-poc/verify/assets/removal/diff.txt
similarity index 100%
rename from test/evals/promptfoo-poc/verify/tests/removal/diff.txt
rename to test/evals/promptfoo-poc/verify/assets/removal/diff.txt
diff --git a/test/evals/promptfoo-poc/verify/tests/removal/post_build.go b/test/evals/promptfoo-poc/verify/assets/removal/post_build.go
similarity index 100%
rename from test/evals/promptfoo-poc/verify/tests/removal/post_build.go
rename to test/evals/promptfoo-poc/verify/assets/removal/post_build.go
diff --git a/test/evals/promptfoo-poc/verify/tests/shared/pre_build.go b/test/evals/promptfoo-poc/verify/assets/shared/pre_build.go
similarity index 100%
rename from test/evals/promptfoo-poc/verify/tests/shared/pre_build.go
rename to test/evals/promptfoo-poc/verify/assets/shared/pre_build.go
diff --git a/test/evals/promptfoo-poc/verify/tests/valid/changes.md b/test/evals/promptfoo-poc/verify/assets/valid/changes.md
similarity index 100%
rename from test/evals/promptfoo-poc/verify/tests/valid/changes.md
rename to test/evals/promptfoo-poc/verify/assets/valid/changes.md
diff --git a/test/evals/promptfoo-poc/verify/tests/valid/diff.txt b/test/evals/promptfoo-poc/verify/assets/valid/diff.txt
similarity index 100%
rename from test/evals/promptfoo-poc/verify/tests/valid/diff.txt
rename to test/evals/promptfoo-poc/verify/assets/valid/diff.txt
diff --git a/test/evals/promptfoo-poc/verify/tests/valid/post_build.go b/test/evals/promptfoo-poc/verify/assets/valid/post_build.go
similarity index 100%
rename from test/evals/promptfoo-poc/verify/tests/valid/post_build.go
rename to test/evals/promptfoo-poc/verify/assets/valid/post_build.go
diff --git a/test/evals/promptfoo-poc/verify/promptfooconfig.yaml b/test/evals/promptfoo-poc/verify/promptfooconfig.yaml
index f71ea646..62cee381 100644
--- a/test/evals/promptfoo-poc/verify/promptfooconfig.yaml
+++ b/test/evals/promptfoo-poc/verify/promptfooconfig.yaml
@@ -1,41 +1,9 @@
 # This configuration compares LLM output of 2 prompts x 2 GPT models across 3 test cases.
 # Learn more: https://promptfoo.dev/docs/configuration/guide
-description: "Verification"
+description: "verify"
 
 prompts:
-  - file://prompt.txt
-
+  - file://verify.prompt.txt
 providers:
   - file://verify.provider.yml
-
-tests:
-  - vars:
-      preBuildState: file://tests/shared/pre_build.go
-      changes: file://tests/valid/changes.md
-      postBuildState: file://tests/valid/post_build.go
-      diffs: file://tests/valid/diff.txt
-    assert:
-      - type: is-json
-      - type: is-valid-openai-tools-call
-      - type: javascript
-        value: |
-          var args = JSON.parse(output[0].function.arguments)
-          return !(
-            args.hasSyntaxErrors ||
-            args.hasRemovedCodeErrors ||
-            args.hasDuplicationErrors ||
-            args.hasReferenceErrors            
-          )
-
-  - vars:
-      preBuildState: file://tests/shared/pre_build.go
-      changes: file://tests/removal/changes.md
-      postBuildState: file://tests/removal/post_build.go
-      diffs: file://tests/removal/diff.txt
-    assert:
-      - type: is-json
-      - type: is-valid-openai-tools-call
-      - type: javascript
-        value: |
-          var args = JSON.parse(output[0].function.arguments)
-          return args.hasRemovedCodeErrors
+tests: tests/*.test.yml
diff --git a/test/evals/promptfoo-poc/verify/tests/removal.test.yml b/test/evals/promptfoo-poc/verify/tests/removal.test.yml
new file mode 100644
index 00000000..d48fbf95
--- /dev/null
+++ b/test/evals/promptfoo-poc/verify/tests/removal.test.yml
@@ -0,0 +1,13 @@
+- description: "Removal of code errors"
+  vars:
+      preBuildState: file://assets/shared/pre_build.go
+      changes: file://assets/removal/changes.md
+      postBuildState: file://assets/removal/post_build.go
+      diffs: file://assets/removal/diff.txt
+  assert:
+    - type: is-json
+    - type: is-valid-openai-tools-call
+    - type: javascript
+      value: |
+        var args = JSON.parse(output[0].function.arguments)
+        return args.hasRemovedCodeErrors
\ No newline at end of file
diff --git a/test/evals/promptfoo-poc/verify/tests/validate.test.yml b/test/evals/promptfoo-poc/verify/tests/validate.test.yml
new file mode 100644
index 00000000..13eab397
--- /dev/null
+++ b/test/evals/promptfoo-poc/verify/tests/validate.test.yml
@@ -0,0 +1,18 @@
+- description: "Validation of the code changes"
+  vars:
+    preBuildState: file://assets/shared/pre_build.go
+    changes: file://assets/valid/changes.md
+    postBuildState: file://assets/valid/post_build.go
+    diffs: file://assets/valid/diff.txt
+  assert:
+    - type: is-json
+    - type: is-valid-openai-tools-call
+    - type: javascript
+      value: |
+        var args = JSON.parse(output[0].function.arguments)
+        return !(
+          args.hasSyntaxErrors ||
+          args.hasRemovedCodeErrors ||
+          args.hasDuplicationErrors ||
+          args.hasReferenceErrors            
+        )
diff --git a/test/evals/promptfoo-poc/verify/config.properties b/test/evals/promptfoo-poc/verify/verify.config.properties
similarity index 100%
rename from test/evals/promptfoo-poc/verify/config.properties
rename to test/evals/promptfoo-poc/verify/verify.config.properties
diff --git a/test/evals/promptfoo-poc/verify/prompt.txt b/test/evals/promptfoo-poc/verify/verify.prompt.txt
similarity index 100%
rename from test/evals/promptfoo-poc/verify/prompt.txt
rename to test/evals/promptfoo-poc/verify/verify.prompt.txt

From 616e5749ed826c84f38645fea56cd6ca1de18950 Mon Sep 17 00:00:00 2001
From: ZanzyTHEbar <pyr0ndet0s97@gmail.com>
Date: Thu, 20 Jun 2024 20:44:17 +0100
Subject: [PATCH 07/12] feat: Add gen-eval target to Makefile for creating
 promptfoo eval directories

- add readme to evals folder
---
 Makefile                           |  6 ++--
 test/evals/promptfoo-poc/README.md | 51 ++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 3 deletions(-)
 create mode 100644 test/evals/promptfoo-poc/README.md

diff --git a/Makefile b/Makefile
index 174fbf8a..11be13b4 100644
--- a/Makefile
+++ b/Makefile
@@ -33,8 +33,8 @@ clean:
 test: render
 	@$(GOTEST) -v ./...
 
-gen-test:
-	@$(GOCMD) run app/scripts/cmd/gen/gen.go $(filter-out $@,$(MAKECMDGOALS))
+gen-eval:
+	@$(GOCMD) run app/scripts/cmd/gen/gen.go test/evals/promptfoo-poc/$(filter-out $@,$(MAKECMDGOALS))
 
 gen-provider:
 	@$(GOCMD) run app/scripts/cmd/provider/gen_provider.go
@@ -50,7 +50,7 @@ default: build
 help:
 	@echo "Usage:"
 	@echo "  make dev - to run the development scripts"
-	@echo "  make gen-test <directory_path> - to create a new promptfoo eval directory structure"
+	@echo "  make gen-eval <directory_name> - to create a new promptfoo eval directory structure"
 	@echo "  make gen-provider - to create a new promptfoo provider file from the promptfoo diretory structure"
 	@echo "  make clean - to remove generated files and directories"
 	@echo "  make help - to display this help message"
diff --git a/test/evals/promptfoo-poc/README.md b/test/evals/promptfoo-poc/README.md
new file mode 100644
index 00000000..f08fd60b
--- /dev/null
+++ b/test/evals/promptfoo-poc/README.md
@@ -0,0 +1,51 @@
+# Test Driven Development of Prompts
+
+This directory is dedicated to the systematic development of prompts for the plandex project. The prompts are developed in a test-driven manner, where the prompt is first written in a markdown file, and then the prompt is tested by running the prompt through the various evaluations. The output of the prompt is then graded and A/B tested for various metrics (see [metrics](#metrics)). The prompt is then iteratively improved until it meets the desired metrics.
+
+We have decided to write the majority of the evals using the [promptfoo]() framework, as it is robust and contains customizations with a clear ease of setup.
+
+## Usage
+
+Usage will be broken down into [run](#run-evals) and [create](#create-evals) sections:
+
+### Setup
+
+To run or create evals, you will need to have the following installed:
+
+- [Go](https://golang.org/)
+- [Promptfoo]()
+
+### Run Evals
+
+To run the evaluations, you can cd into the relevant directory and use the following command:
+
+```bash
+make evals
+```
+
+Or, you can run all the evaluations by running the following command:
+
+```bash
+make evals all
+```
+
+### Create Evals
+
+To create the evaluations, you can use our `gen-*` commands. The `gen-*` commands are designed to setup the eval environment and will create the evaluations directory structure in the `evals/promptfoo` directory. You have access to the following commands:
+
+```bash
+make gen-eval # Generates the evaluation directory structure
+make gen-provider # Generates a provider file based on the directory config files
+```
+
+> ![IMPORTANT]\
+> Make sure to run the `gen-eval` command before running the `gen-provider` command.
+> You need to have the config files filled out with your details before running the `gen-provider` command.
+> Depending on the provider you use, you will need to setup an environment variable with the provider's API key.
+
+## Metrics
+
+The metrics we are currently tracking are:
+
+> ![NOTE]\
+> COMING SOON

From 0753deb6a5a61b122a08fa85b16e0702e98e5105 Mon Sep 17 00:00:00 2001
From: ZanzyTHEbar <pyr0ndet0s97@gmail.com>
Date: Thu, 20 Jun 2024 20:46:38 +0100
Subject: [PATCH 08/12] feat: Implement eval command in Makefile

The eval command is currently not implemented in the Makefile. This commit adds the TODO item. On the list of priorities
---
 Makefile | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Makefile b/Makefile
index 11be13b4..c2e04769 100644
--- a/Makefile
+++ b/Makefile
@@ -33,12 +33,20 @@ clean:
 test: render
 	@$(GOTEST) -v ./...
 
+#### Evals and Providers ####
+
+# TODO: Implement eval command
+eval:
+	@echo "Eval command not implemented yet"
+
 gen-eval:
 	@$(GOCMD) run app/scripts/cmd/gen/gen.go test/evals/promptfoo-poc/$(filter-out $@,$(MAKECMDGOALS))
 
 gen-provider:
 	@$(GOCMD) run app/scripts/cmd/provider/gen_provider.go
 
+#### End Evals and Providers ####
+
 # Get dependencies
 deps:
 	$(GOGET) -v ./...

From 1d55db08e9bd936079f4a078e84bd5cebe378954 Mon Sep 17 00:00:00 2001
From: ZanzyTHEbar <pyr0ndet0s97@gmail.com>
Date: Thu, 20 Jun 2024 21:14:01 +0100
Subject: [PATCH 09/12] build: add build prompt file

---
 .../promptfoo-poc/build/build.prompt.txt      | 121 ++++++++++++++++++
 test/evals/promptfoo-poc/fix/fix.prompt.txt   |   2 -
 2 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/test/evals/promptfoo-poc/build/build.prompt.txt b/test/evals/promptfoo-poc/build/build.prompt.txt
index e69de29b..ed7a0926 100644
--- a/test/evals/promptfoo-poc/build/build.prompt.txt
+++ b/test/evals/promptfoo-poc/build/build.prompt.txt
@@ -0,0 +1,121 @@
+You are an AI that analyzes a code file and an AI-generated plan to update the code file and produces a list of changes.
+
+[YOUR INSTRUCTIONS]
+
+Call the 'listChangesWithLineNums' function with a valid JSON object that includes the 'comments', 'problems', and 'changes' keys.
+
+You ABSOLUTELY MUST NOT generate overlapping changes. Group smaller changes together into larger changes where necessary to avoid overlap. Only generate multiple changes when you are ABSOLUTELY CERTAIN that they do not overlap--otherwise group them together into a single change. If changes are close to each other (within several lines), group them together into a single change. You MUST group changes together and make fewer, larger changes rather than many small changes, unless the changes are completely independent of each other and not close to each other in the file. You MUST NEVER generate changes that are adjacent or close to adjacent. Adjacent or closely adjacent changes MUST ALWAYS be grouped into a single larger change.
+
+Furthermore, unless doing so would require a very large change because some changes are far apart in the file, it's ideal to call the 'listChangesWithLineNums' with just a SINGLE change.
+
+Changes must be ordered in the array according to the order they appear in the file. The 'startLineString' of each 'old' property must come after the 'endLineString' of the previous 'old' property. Changes MUST NOT overlap. If a change is dependent on another change or intersects with it, group those changes together into a single change.
+
+You MUST NOT repeat changes to the same block of lines multiple teams. You MUST NOT duplicate changes. It is extremely important that a given change is only applied *once*.
+
+The 'comments' key is an array of objects with two properties: 'txt' and 'reference'. 'txt' is the exact text of a code comment. 'reference' is a boolean that indicates whether the comment is a placeholder of or reference to the original code, like "// rest of the function..." or "# existing init code...", or "// rest of the main function" or "// rest of your function..." or "// Existing methods..." or "// Remaining methods" or "// Existing code..." or "// ... existing setup code ..." or "// ... existing code ..." or "// ..." or other comments which reference code from the original file. References DO NOT need to exactly match any of the previous examples. Use your judgement to determine whether each comment is a reference. If 'reference' is true, the comment is a placeholder or reference to the original code. If 'reference' is false, the comment is not a placeholder or reference to the original code.
+
+In 'comments', you must list EVERY comment included in the proposed updates. Only list *code comments* that are valid comments for the programming language being used. Do not list logging statements or any other non-comment text that is not a valid code comment. If there are no code comments in the proposed updates, 'comments' must be an empty array.
+
+If there are multiple identical comments in the proposed updates, you MUST list them *all* in the 'comments' array--list each identical comment as a separate object in the array.
+
+In the 'problems' key, you MUST explain how you will strategically generate changes in order to avoid any problems in the updated file. You should explain which changes you will make and how you will *avoid* making any overlapping or invalid changes. Consider whether any changes are close together or whether any change is potentially contained by another. If so, group those changes together into a single change.
+
+You must consider whether you will apply partial changes or replace the entire file. If the original file is long, you MUST NOT replace the entire file with a single change. Instead, you should apply changes to specific sections of the file. If the original file is short and the changes are complex, you may consider replacing the entire file with a single change.
+
+You must consider how you will avoid *incorrectly removing or overwriting code* from the original file. Explain whether any code from the original file needs to be merged with the proposed updates in order to avoid removing or overwriting code that should not be removed. It is ABSOLUTELY CRITICAL that no pre-existing code or functionality is removed or overwritten unless the plan explicitly intends for it to be removed or overwritten. New code and functionality introduced in the proposed updates MUST be *merged* with existing code and functionality in the original file. Explain how you will achieve this. 
+
+You must consider how you will avoid including any references in the updated file if any are present in the proposed updates. 
+
+You must consider how you will *avoid incorrect duplication* in making your changes. For example if a 'main' function is present in the original file and the proposed updates include update code for the 'main' function, you must ensure the changes are applied within the existing 'main' function rather than incorrectly adding a duplicate 'main' function.
+
+If the proposed updates include large sections that are identical to the original file, consider whether the changes can be made more minimal in order to only replace sections of code that are *changing*. If you are making the changes more minimal and specific, explain how you will do this without generating any overlapping changes or introducing any new problems.
+
+'changes': An array of NON-OVERLAPPING changes. Each change is an object with properties: 'summary', 'hasChange', 'old', 'startLineIncludedReasoning', 'startLineIncluded', 'endLineIncludedReasoning', 'endLineIncluded', and 'new'.
+
+Note: all line numbers that are used below are prefixed with 'pdx-', like this 'pdx-5: for i := 0; i < 10; i++ {'. This is to help you identify the line numbers in the file. You *must* include the 'pdx-' prefix in the line numbers in the 'old' property.
+
+The 'summary' property is a brief summary of the change. At the end of the summary, consider if this change will overlap with any ensuing changes. If it will, include those changes in *this* change instead. Continue the summary and includes those ensuing changes that would otherwise overlap. Changes that remove code are especially likely to overlap with ensuing changes. 
+
+'summary' examples: 
+	- 'Update loop that aggregates the results to iterate 10 times instead of 5 and log the value of someVar.'
+	- 'Update the Org model to include StripeCustomerId and StripeSubscriptionId fields.'
+	- 'Add function ExecQuery to execute a query.'
+	
+'summary' that is larger to avoid overlap:
+	- 'Insert function ExecQuery after GetResults function in loop body. Update loop that aggregates the results to iterate 10 times instead of 5 and log the value of someVar. Add function ExecQuery to execute a query.'
+
+The 'hasChange' property is a boolean that indicates whether there is anything to change. If there is nothing to change, set 'hasChange' to false. If there is something to change, set 'hasChange' to true.
+
+The 'old' property is an object with 3 properties: 'entireFile', 'startLineString' and 'endLineString'.
+
+	'entireFile' is a boolean that indicates whether the **entire file** is being replaced. If 'entireFile' is true, 'startLineString' and 'endLineString' must be empty strings. If 'entireFile' is false, 'startLineString' and 'endLineString' must be valid strings that exactly match lines from the original file. If 'entireFile' is false, 'startLineString' and 'endLineString' MUST NEVER be empty strings.
+
+	'startLineString' is the **entire, exact line** where the section to be replaced begins in the original file, including the line number. Unless it's the first change, 'startLineString' ABSOLUTELY MUST begin with a line number that is HIGHER than both the 'endLineString' of the previous change and the 'startLineString' of the previous change. **The line number and line MUST EXACTLY MATCH a line from the original file.**
+	
+	If the previous change's 'endLineString' starts with 'pdx-75: ', then the current change's 'startLineString' MUST start with 'pdx-76: ' or higher. It MUST NOT be 'pdx-75: ' or lower. If the previous change's 'startLineString' starts with 'pdx-88: ' and the previous change's 'endLineString' is an empty string, then the current change's 'startLineString' MUST start with 'pdx-89: ' or higher. If the previous change's 'startLineString' starts with 'pdx-100: ' and the previous change's 'endLineString' starts with 'pdx-105: ', then the current change's 'startLineString' MUST start with 'pdx-106: ' or higher.
+	
+	'endLineString' is the **entire, exact line** where the section to be replaced ends in the original file. Pay careful attention to spaces and indentation. 'startLineString' and 'endLineString' must be *entire lines* and *not partial lines*. Even if a line is very long, you must include the entire line, including the line number and all text on the line. **The line number and line MUST EXACTLY MATCH a line from the original file.**
+	
+	**For a single line replacement, 'endLineString' MUST be an empty string.**
+
+	'endLineString' MUST ALWAYS come *after* 'startLineString' in the original file. It must start with a line number that is HIGHER than the 'startLineString' line number. If 'startLineString' starts with 'pdx-22: ', then 'endLineString' MUST either be an empty string (for a single line replacement) or start with 'pdx-23: ' or higher (for a multi-line replacement).	
+
+	If 'hasChange' is false, both 'startLineString' and 'endLineString' must be empty strings. If 'hasChange' is true, 'startLineString' and 'endLineString' must be valid strings that exactly match lines from the original file. If 'hasChange' is true, 'startLineString' and 'endLineString' MUST NEVER be empty strings.
+
+	If you are replacing the entire file, 'startLineString' MUST be the first line of the original file and 'endLineString' MUST be the last line of the original file.
+
+The 'startLineIncludedReasoning' property is a string that very briefly explains whether 'startLineString' should be included in the 'new' property. For example, if the 'startLineString' is the closing bracket of a function and you are adding another function after it, you *MUST* include the 'startLineString' in the 'new' property, or the previous function will lose its closing bracket when the change is applied. Similarly, if the 'startLineString' is a function definition and you are updating the body of the function, you *MUST* also include 'startLineString' so that they function definition is not removed. The only time 'startLineString' should not be included in 'new' is if it is a line that should be removed or replaced. Generalize the above to all types of code blocks, changes, and syntax to ensure the 'new' property will not remove or overwrite code that should not be removed or overwritten. That also includes newlines, line breaks, and indentation.
+
+'startLineIncluded' is a boolean that indicates whether 'startLineString' should be included in the 'new' property. If 'startLineIncluded' is true, 'startLineString' MUST be included in the 'new' property. If 'startLineIncluded' is false, 'startLineString' MUST not be included in the 'new' property.
+
+The 'endLineIncludedReasoning' property is a string that very briefly explains whether 'endLineString' should be included in the 'new' property. For example, if the 'endLineString' is the opening bracket of a function and you are adding another function before it, you *MUST* include the 'endLineString' in the 'new' property, or the subsequent function will lose its opening bracket when the change is applied. Similarly, if the 'endLineString' is the closing bracket of a function and you are updating the body of the function, you *MUST* also include 'endLineString' so that the closing bracket not removed. The only time 'endLineString' should not be included in 'new' is if it is a line that should be removed or replaced. Generalize the above to all types of code blocks, changes, and syntax to ensure the 'new' property will not remove or overwrite code that should not be removed or overwritten. That also includes newlines, line breaks, and indentation.
+
+'endLineIncluded' is a boolean that indicates whether 'endLineString' should be included in the 'new' property. If 'endLineIncluded' is true, 'endLineString' MUST be included in the 'new' property. If 'endLineIncluded' is false, 'endLineString' MUST not be included in the 'new' property.
+
+The 'new' property is a string that represents the new code that will replace the old code. The new code must be valid and consistent with the intention of the plan. If the proposed update is to remove code, the 'new' property should be an empty string. Be precise about newlines, line breaks, and indentation. 'new' must include only full lines of code and *no partial lines*. Do NOT include line numbers in the 'new' property.
+
+If the proposed update includes references to the original code in comments like "// rest of the function..." or "# existing init code...", or "// rest of the main function..." or "// rest of your function..." or **any other reference to the original code,** you *MUST* ensure that the comment making the reference is *NOT* included in the 'new' property. Instead, include the **exact code** from the original file that the comment is referencing. Do not be overly strict in identifying references. If there is a comment that seems like it could plausibly be a reference and there is code in the original file that could plausibly be the code being referenced, then treat that as a reference and handle it accordingly by including the code from the original file in the 'new' property instead of the comment. YOU MUST NOT MISS ANY REFERENCES.
+
+If the 'startLineIncluded' property is true, the 'startLineString' MUST be the first line of 'new'. If the 'startLineIncluded' property is false, the 'startLineString' MUST NOT be included in 'new'. If the 'endLineIncluded' property is true, the 'endLineString' MUST be the last line of 'new'. If the 'endLineIncluded' property is false, the 'endLineString' MUST NOT be included in 'new'.
+
+If the 'hasChange' property is false, the 'new' property must be an empty string. If the 'hasChange' property is true, the 'new' property must be a valid string.
+
+If *any* change has the 'entireFile' key in the 'old' property set to true, the corresponding 'new' key MUST be the entire updated file, and there MUST only be a single change in the 'changes' array.
+
+Example change object:
+  ---
+  {
+    summary: "Fix syntax error in loop body.",
+   	old: {
+      startLineString: "pdx-5: for i := 0; i < 10; i++ { ",
+      endLineString: "pdx-7: }",
+    },
+    new: "for i := 0; i < 10; i++ {\n  execQuery()\n  }\n  }\n}",
+  }
+  ---
+
+Apply changes intelligently **in order** to avoid syntax errors, breaking code, or removing code from the original file that should not be removed. Consider the reason behind the update and make sure the result is consistent with the intention of the plan.
+
+Changes MUST be ordered based on their position in the original file. ALWAYS go from top to bottom IN ORDER when generating replacements. DO NOT EVER GENERATE AN OVERLAPPING CHANGE. If a change would fall within OR overlap a prior change in the list, SKIP that change and move on to the next one.
+
+You ABSOLUTELY MUST NOT overwrite or delete code from the original file unless the plan *clearly intends* for the code to be overwritten or removed. Do NOT replace a full section of code with only new code unless that is the clear intention of the plan. Instead, merge the original code and the proposed updates together intelligently according to the intention of the plan. 
+
+Pay *EXTREMELY close attention* to opening and closing brackets, parentheses, and braces. Never leave them unbalanced when the changes are applied. Also pay *EXTREMELY close attention* to newlines and indentation. Make sure that the indentation of the new code is consistent with the indentation of the original code, and syntactically correct.
+
+The 'listChangesWithLineNums' function MUST be called *valid JSON*. Double quotes within json properties of the 'listChangesWithLineNums' function call parameters JSON object *must be properly escaped* with a backslash. Pay careful attention to newlines, tabs, and other special characters. The JSON object must be properly formatted and must include all required keys. **You generate perfect JSON -every- time**, no matter how many quotes or special characters are in the input. You must always call 'listChangesWithLineNums' with a valid JSON object. Don't call any other function.
+
+[END YOUR INSTRUCTIONS]
+
+
+**The current file is {{filePath}} Original state of the file:**
+
+```
+{{preBuildState}}
+```
+
+Proposed updates:
+
+{{changes}}
+
+
+Now call the 'listChangesWithLineNums' function with a valid JSON array of changes according to your instructions. You must always call 'listChangesWithLineNums' with one or more valid changes. Don't call any other function.
\ No newline at end of file
diff --git a/test/evals/promptfoo-poc/fix/fix.prompt.txt b/test/evals/promptfoo-poc/fix/fix.prompt.txt
index 89f2319c..4667177c 100644
--- a/test/evals/promptfoo-poc/fix/fix.prompt.txt
+++ b/test/evals/promptfoo-poc/fix/fix.prompt.txt
@@ -138,5 +138,3 @@ The 'listChangesWithLineNums' function MUST be called *valid JSON*. Double quote
 --
 
 Now call the 'listChangesWithLineNums' function with a valid JSON array of changes according to your instructions. You must always call 'listChangesWithLineNums' with one or more valid changes. Don't call any other function.
-
-

From 95893908c02a523a54cf1edde3a97773c3c0cc42 Mon Sep 17 00:00:00 2001
From: ZanzyTHEbar <pyr0ndet0s97@gmail.com>
Date: Thu, 20 Jun 2024 22:09:38 +0100
Subject: [PATCH 10/12] feat: Update promptfoo configuration files for build
 and fix

- Add gen-test script for automatically creating promptfoo eval directories and required files
- Add promptfooconfig.yml for build
- Add build.config.properties for build
- Rename config.properties to fix.config.properties for fix
- Add promptfooconfig.yaml for fix
- Update promptfoo eval tests for removal, validate, and fix
- Update Makefile with new targets and help message
---
 Makefile                                      |   9 +-
 app/scripts/cmd/provider/gen_provider.go      |   2 +-
 .../build/assets/build/changes.md             | 177 ++++++++++++++++++
 .../build/assets/build/post_build.go          | 123 ++++++++++++
 .../build/assets/shared/pre_build.go          |  91 +++++++++
 .../build/build.config.properties             |   4 +-
 .../promptfoo-poc/build/build.parameters.json |  78 ++++++++
 .../promptfoo-poc/build/build.provider.yml    |  82 ++++++++
 .../promptfoo-poc/build/promptfooconfig.yml   |   2 -
 .../promptfoo-poc/build/tests/build.test.yml  |  19 ++
 .../promptfoo-poc/fix/fix.config.properties   |   2 +-
 test/evals/promptfoo-poc/fix/fix.provider.yml |  70 +------
 .../promptfoo-poc/verify/verify.provider.yml  |  61 +-----
 13 files changed, 583 insertions(+), 137 deletions(-)
 create mode 100644 test/evals/promptfoo-poc/build/assets/build/changes.md
 create mode 100644 test/evals/promptfoo-poc/build/assets/build/post_build.go
 create mode 100644 test/evals/promptfoo-poc/build/assets/shared/pre_build.go
 create mode 100644 test/evals/promptfoo-poc/build/build.provider.yml
 create mode 100644 test/evals/promptfoo-poc/build/tests/build.test.yml

diff --git a/Makefile b/Makefile
index c2e04769..e9eb25a8 100644
--- a/Makefile
+++ b/Makefile
@@ -35,9 +35,12 @@ test: render
 
 #### Evals and Providers ####
 
-# TODO: Implement eval command
+# TODO: Implement eval all
 eval:
-	@echo "Eval command not implemented yet"
+	@cd test/evals/promptfoo-poc/$(filter-out $@,$(MAKECMDGOALS)) && promptfoo eval --no-cache
+
+view-eval:
+	@cd test/evals/promptfoo-poc/$(filter-out $@,$(MAKECMDGOALS)) && promptfoo view
 
 gen-eval:
 	@$(GOCMD) run app/scripts/cmd/gen/gen.go test/evals/promptfoo-poc/$(filter-out $@,$(MAKECMDGOALS))
@@ -58,6 +61,8 @@ default: build
 help:
 	@echo "Usage:"
 	@echo "  make dev - to run the development scripts"
+	@echo "  make eval <directory_name> - to run the promptfoo eval command on a specific directory"
+	@echo "  make view-eval - to view the promptfoo eval output"
 	@echo "  make gen-eval <directory_name> - to create a new promptfoo eval directory structure"
 	@echo "  make gen-provider - to create a new promptfoo provider file from the promptfoo diretory structure"
 	@echo "  make clean - to remove generated files and directories"
diff --git a/app/scripts/cmd/provider/gen_provider.go b/app/scripts/cmd/provider/gen_provider.go
index ad9d5737..e914d51f 100644
--- a/app/scripts/cmd/provider/gen_provider.go
+++ b/app/scripts/cmd/provider/gen_provider.go
@@ -9,7 +9,7 @@ import (
 	"text/template"
 )
 
-var testDir = "../../test/evals/promptfoo-poc"
+var testDir = "test/evals/promptfoo-poc"
 var templFile = testDir + "/templates/" + "/provider.template.yml"
 
 func main() {
diff --git a/test/evals/promptfoo-poc/build/assets/build/changes.md b/test/evals/promptfoo-poc/build/assets/build/changes.md
new file mode 100644
index 00000000..4e06dca6
--- /dev/null
+++ b/test/evals/promptfoo-poc/build/assets/build/changes.md
@@ -0,0 +1,177 @@
+### Subtask 1: Parse the range of indices from the command-line arguments.
+
+```
+package cmd
+
+import (
+	"fmt"
+	"path/filepath"
+	"plandex/api"
+	"plandex/auth"
+	"plandex/lib"
+	"plandex/term"
+	"strconv"
+	"strings"
+
+	"github.com/plandex/plandex/shared"
+	"github.com/spf13/cobra"
+)
+
+func parseRange(arg string) ([]int, error) {
+	var indices []int
+	parts := strings.Split(arg, "-")
+	if len(parts) == 2 {
+		start, err := strconv.Atoi(parts[0])
+		if err != nil {
+			return nil, err
+		}
+		end, err := strconv.Atoi(parts[1])
+		if err != nil {
+			return nil, err
+		}
+		for i := start; i <= end; i++ {
+			indices = append(indices, i)
+		}
+	} else {
+		index, err := strconv.Atoi(arg)
+		if err != nil {
+			return nil, err
+		}
+		indices = append(indices, index)
+	}
+	return indices, nil
+}
+```
+
+### Subtask 2: Update the logic to handle the range of indices and mark the corresponding contexts for deletion.
+
+```
+package cmd
+
+import (
+	"fmt"
+	"path/filepath"
+	"plandex/api"
+	"plandex/auth"
+	"plandex/lib"
+	"plandex/term"
+	"strconv"
+	"strings"
+
+	"github.com/plandex/plandex/shared"
+	"github.com/spf13/cobra"
+)
+
+func parseRange(arg string) ([]int, error) {
+	var indices []int
+	parts := strings.Split(arg, "-")
+	if len(parts) == 2 {
+		start, err := strconv.Atoi(parts[0])
+		if err != nil {
+			return nil, err
+		}
+		end, err := strconv.Atoi(parts[1])
+		if err != nil {
+			return nil, err
+		}
+		for i := start; i <= end; i++ {
+			indices = append(indices, i)
+		}
+	} else {
+		index, err := strconv.Atoi(arg)
+		if err != nil {
+			return nil, err
+		}
+		indices = append(indices, index)
+	}
+	return indices, nil
+}
+
+func contextRm(cmd *cobra.Command, args []string) {
+	auth.MustResolveAuthWithOrg()
+	lib.MustResolveProject()
+
+	if lib.CurrentPlanId == "" {
+		fmt.Println("🤷‍♂️ No current plan")
+		return
+	}
+
+	term.StartSpinner("")
+	contexts, err := api.Client.ListContext(lib.CurrentPlanId, lib.CurrentBranch)
+
+	if err != nil {
+		term.OutputErrorAndExit("Error retrieving context: %v", err)
+	}
+
+	deleteIds := map[string]bool{}
+
+	for _, arg := range args {
+		indices, err := parseRange(arg)
+		if err != nil {
+			term.OutputErrorAndExit("Error parsing range: %v", err)
+		}
+
+		for _, index := range indices {
+			if index > 0 && index <= len(contexts) {
+				context := contexts[index-1]
+				deleteIds[context.Id] = true
+			}
+		}
+	}
+
+	for i, context := range contexts {
+		for _, id := range args {
+			if fmt.Sprintf("%d", i+1) == id || context.Name == id || context.FilePath == id || context.Url == id {
+				deleteIds[context.Id] = true
+				break
+			} else if context.FilePath != "" {
+				// Check if id is a glob pattern
+				matched, err := filepath.Match(id, context.FilePath)
+				if err != nil {
+					term.OutputErrorAndExit("Error matching glob pattern: %v", err)
+				}
+				if matched {
+					deleteIds[context.Id] = true
+					break
+				}
+
+				// Check if id is a parent directory
+				parentDir := context.FilePath
+				for parentDir != "." && parentDir != "/" && parentDir != "" {
+					if parentDir == id {
+						deleteIds[context.Id] = true
+						break
+					}
+					parentDir = filepath.Dir(parentDir) // Move up one directory
+				}
+
+			}
+		}
+	}
+
+	if len(deleteIds) > 0 {
+		res, err := api.Client.DeleteContext(lib.CurrentPlanId, lib.CurrentBranch, shared.DeleteContextRequest{
+			Ids: deleteIds,
+		})
+		term.StopSpinner()
+
+		if err != nil {
+			term.OutputErrorAndExit("Error deleting context: %v", err)
+		}
+
+		fmt.Println("✅ " + res.Msg)
+	} else {
+		term.StopSpinner()
+		fmt.Println("🤷‍♂️ No context removed")
+	}
+}
+```
+
+### Subtask 3: Update the `contextRm` function to include the new logic for handling ranges.
+
+
+```
+func init() {
+	RootCmd.AddCommand(contextRmCmd)
+}
+```
\ No newline at end of file
diff --git a/test/evals/promptfoo-poc/build/assets/build/post_build.go b/test/evals/promptfoo-poc/build/assets/build/post_build.go
new file mode 100644
index 00000000..4f855553
--- /dev/null
+++ b/test/evals/promptfoo-poc/build/assets/build/post_build.go
@@ -0,0 +1,123 @@
+pdx-1: package cmd
+pdx-2: 
+pdx-3: import (
+pdx-4: 	"fmt"
+pdx-5: 	"path/filepath"
+pdx-6: 	"plandex/api"
+pdx-7: 	"plandex/auth"
+pdx-8: 	"plandex/lib"
+pdx-9: 	"plandex/term"
+pdx-10: 	"strconv"
+pdx-11: 	"strings"
+pdx-12: 
+pdx-13: 	"github.com/plandex/plandex/shared"
+pdx-14: 	"github.com/spf13/cobra"
+pdx-15: )
+pdx-16: 
+pdx-17: func parseRange(arg string) ([]int, error) {
+pdx-18: 	var indices []int 
+pdx-19: 	parts := strings.Split(arg, "-")
+pdx-20: 	if len(parts) == 2 {
+pdx-21: 		start, err := strconv.Atoi(parts[0])
+pdx-22: 		if err != nil {
+pdx-23: 			return nil, err
+pdx-24: 		}
+pdx-25: 		end, err := strconv.Atoi(parts[1])
+pdx-26: 		if err != nil {
+pdx-27: 			return nil, err
+pdx-28: 		}
+pdx-29: 		for i := start; i <= end; i++ {
+pdx-30: 			indices = append(indices, i)
+pdx-31: 		}
+pdx-32: 	} else {
+pdx-33: 		index, err := strconv.Atoi(arg)
+pdx-34: 		if err != nil {
+pdx-35: 			return nil, err
+pdx-36: 		}
+pdx-37: 		indices = append(indices, index)
+pdx-38: 	}
+pdx-39: 	return indices, nil
+pdx-40: }
+pdx-41: 
+pdx-42: func contextRm(cmd *cobra.Command, args []string) {
+pdx-43: 	auth.MustResolveAuthWithOrg()
+pdx-44: 	lib.MustResolveProject()
+pdx-45: 
+pdx-46: 	if lib.CurrentPlanId == "" {
+pdx-47: 		fmt.Println("🤷‍♂️ No current plan")
+pdx-48: 		return
+pdx-49: 	}
+pdx-50: 
+pdx-51: 	term.StartSpinner("")
+pdx-52: 	contexts, err := api.Client.ListContext(lib.CurrentPlanId, lib.CurrentBranch)
+pdx-53: 
+pdx-54: 	if err != nil {
+pdx-55: 		term.OutputErrorAndExit("Error retrieving context: %v", err)
+pdx-56: 	}
+pdx-57: 
+pdx-58: 	deleteIds := map[string]bool{}
+pdx-59: 
+pdx-60: 	for _, arg := range args {
+pdx-61: 		indices, err := parseRange(arg)
+pdx-62: 		if err != nil {
+pdx-63: 			term.OutputErrorAndExit("Error parsing range: %v", err)
+pdx-64: 		}
+pdx-65: 
+pdx-66: 		for _, index := range indices {
+pdx-67: 			if index > 0 && index <= len(contexts) {
+pdx-68: 				context := contexts[index-1]
+pdx-69: 				deleteIds[context.Id] = true
+pdx-70: 			}
+pdx-71: 		}
+pdx-72: 	}
+pdx-73: 
+pdx-74: 	for i, context := range contexts {
+pdx-75: 		for _, id := range args {
+pdx-76: 			if fmt.Sprintf("%d", i+1) == id || context.Name == id || context.FilePath == id || context.Url == id {
+pdx-77: 				deleteIds[context.Id] = true
+pdx-78: 				break
+pdx-79: 			} else if context.FilePath != "" {
+pdx-80: 				// Check if id is a glob pattern
+pdx-81: 				matched, err := filepath.Match(id, context.FilePath)
+pdx-82: 				if err != nil {
+pdx-83: 					term.OutputErrorAndExit("Error matching glob pattern: %v", err)
+pdx-84: 				}
+pdx-85: 				if matched {
+pdx-86: 					deleteIds[context.Id] = true
+pdx-87: 					break
+pdx-88: 				}
+pdx-89: 
+pdx-90: 				// Check if id is a parent directory
+pdx-91: 				parentDir := context.FilePath
+pdx-92: 				for parentDir != "." && parentDir != "/" && parentDir != "" {
+pdx-93: 					if parentDir == id {
+pdx-94: 						deleteIds[context.Id] = true
+pdx-95: 						break
+pdx-96: 					}
+pdx-97: 					parentDir = filepath.Dir(parentDir) // Move up one directory
+pdx-98: 				}
+pdx-99: 			}
+pdx-100: 		}
+pdx-101: 	}
+pdx-102: 
+pdx-103: 	if len(deleteIds) > 0 {
+pdx-104: 		res, err := api.Client.DeleteContext(lib.CurrentPlanId, lib.CurrentBranch, shared.DeleteContextRequest{
+pdx-105: 			Ids: deleteIds,
+pdx-106: 		})
+pdx-107: 		term.StopSpinner()
+pdx-108: 
+pdx-109: 		if err != nil {
+pdx-110: 			term.OutputErrorAndExit("Error deleting context: %v", err)
+pdx-111: 		}
+pdx-112: 
+pdx-113: 		fmt.Println("✅ " + res.Msg)
+pdx-114: 	} else {
+pdx-115: 		term.StopSpinner()
+pdx-116: 		fmt.Println("🤷‍♂️ No context removed")
+pdx-117: 	}
+pdx-118: }
+pdx-119: 
+pdx-120: func init() {
+pdx-121: 	RootCmd.AddCommand(contextRmCmd)
+pdx-122: }
+pdx-123: 
\ No newline at end of file
diff --git a/test/evals/promptfoo-poc/build/assets/shared/pre_build.go b/test/evals/promptfoo-poc/build/assets/shared/pre_build.go
new file mode 100644
index 00000000..c90eeb79
--- /dev/null
+++ b/test/evals/promptfoo-poc/build/assets/shared/pre_build.go
@@ -0,0 +1,91 @@
+package cmd
+
+import (
+	"fmt"
+	"path/filepath"
+	"plandex/api"
+	"plandex/auth"
+	"plandex/lib"
+	"plandex/term"
+
+	"github.com/plandex/plandex/shared"
+	"github.com/spf13/cobra"
+)
+
+var contextRmCmd = &cobra.Command{
+	Use:     "rm",
+	Aliases: []string{"remove", "unload"},
+	Short:   "Remove context",
+	Long:    `Remove context by index, name, or glob.`,
+	Args:    cobra.MinimumNArgs(1),
+	Run:     contextRm,
+}
+
+func contextRm(cmd *cobra.Command, args []string) {
+	auth.MustResolveAuthWithOrg()
+	lib.MustResolveProject()
+
+	if lib.CurrentPlanId == "" {
+		fmt.Println("🤷‍♂️ No current plan")
+		return
+	}
+
+	term.StartSpinner("")
+	contexts, err := api.Client.ListContext(lib.CurrentPlanId, lib.CurrentBranch)
+
+	if err != nil {
+		term.OutputErrorAndExit("Error retrieving context: %v", err)
+	}
+
+	deleteIds := map[string]bool{}
+
+	for i, context := range contexts {
+		for _, id := range args {
+			if fmt.Sprintf("%d", i+1) == id || context.Name == id || context.FilePath == id || context.Url == id {
+				deleteIds[context.Id] = true
+				break
+			} else if context.FilePath != "" {
+				// Check if id is a glob pattern
+				matched, err := filepath.Match(id, context.FilePath)
+				if err != nil {
+					term.OutputErrorAndExit("Error matching glob pattern: %v", err)
+				}
+				if matched {
+					deleteIds[context.Id] = true
+					break
+				}
+
+				// Check if id is a parent directory
+				parentDir := context.FilePath
+				for parentDir != "." && parentDir != "/" && parentDir != "" {
+					if parentDir == id {
+						deleteIds[context.Id] = true
+						break
+					}
+					parentDir = filepath.Dir(parentDir) // Move up one directory
+				}
+
+			}
+		}
+	}
+
+	if len(deleteIds) > 0 {
+		res, err := api.Client.DeleteContext(lib.CurrentPlanId, lib.CurrentBranch, shared.DeleteContextRequest{
+			Ids: deleteIds,
+		})
+		term.StopSpinner()
+
+		if err != nil {
+			term.OutputErrorAndExit("Error deleting context: %v", err)
+		}
+
+		fmt.Println("✅ " + res.Msg)
+	} else {
+		term.StopSpinner()
+		fmt.Println("🤷‍♂️ No context removed")
+	}
+}
+
+func init() {
+	RootCmd.AddCommand(contextRmCmd)
+}
diff --git a/test/evals/promptfoo-poc/build/build.config.properties b/test/evals/promptfoo-poc/build/build.config.properties
index fea41b87..28127414 100644
--- a/test/evals/promptfoo-poc/build/build.config.properties
+++ b/test/evals/promptfoo-poc/build/build.config.properties
@@ -1,7 +1,7 @@
 provider_id=openai:gpt-4o
-function_name=
+function_name=listChangesWithLineNums
 tool_type=function
 function_param_type=object
 tool_choice_type=function
-tool_choice_function_name=
+tool_choice_function_name=listChangesWithLineNums
 nested_parameters_json=build.parameters.json
diff --git a/test/evals/promptfoo-poc/build/build.parameters.json b/test/evals/promptfoo-poc/build/build.parameters.json
index e69de29b..c03bddfb 100644
--- a/test/evals/promptfoo-poc/build/build.parameters.json
+++ b/test/evals/promptfoo-poc/build/build.parameters.json
@@ -0,0 +1,78 @@
+{
+  "type": "object",
+  "properties": {
+    "comments": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "txt": {
+            "type": "string"
+          },
+          "reference": {
+            "type": "boolean"
+          }
+        },
+        "required": ["txt", "reference"]
+      }
+    },
+    "problems": {
+      "type": "string"
+    },
+    "changes": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "summary": {
+            "type": "string"
+          },
+          "hasChange": {
+            "type": "boolean"
+          },
+          "old": {
+            "type": "object",
+            "properties": {
+              "entireFile": {
+                "type": "boolean"
+              },
+              "startLineString": {
+                "type": "string"
+              },
+              "endLineString": {
+                "type": "string"
+              }
+            },
+            "required": ["startLineString", "endLineString"]
+          },
+          "startLineIncludedReasoning": {
+            "type": "string"
+          },
+          "startLineIncluded": {
+            "type": "boolean"
+          },
+          "endLineIncludedReasoning": {
+            "type": "string"
+          },
+          "endLineIncluded": {
+            "type": "boolean"
+          },
+          "new": {
+            "type": "string"
+          }
+        },
+        "required": [
+          "summary",
+          "hasChange",
+          "old",
+          "startLineIncludedReasoning",
+          "startLineIncluded",
+          "endLineIncludedReasoning",
+          "endLineIncluded",
+          "new"
+        ]
+      }
+    }
+  },
+  "required": ["comments", "problems", "changes"]
+}
\ No newline at end of file
diff --git a/test/evals/promptfoo-poc/build/build.provider.yml b/test/evals/promptfoo-poc/build/build.provider.yml
new file mode 100644
index 00000000..a91430f0
--- /dev/null
+++ b/test/evals/promptfoo-poc/build/build.provider.yml
@@ -0,0 +1,82 @@
+id: openai:gpt-4o
+config:
+  tools:
+    [
+      {
+        "type": "function",
+        "function":
+          {
+            "name": "listChangesWithLineNums",
+            "parameters":
+              {
+                "properties":
+                  {
+                    "changes":
+                      {
+                        "items":
+                          {
+                            "properties":
+                              {
+                                "endLineIncluded": { "type": "boolean" },
+                                "endLineIncludedReasoning":
+                                  { "type": "string" },
+                                "hasChange": { "type": "boolean" },
+                                "new": { "type": "string" },
+                                "old":
+                                  {
+                                    "properties":
+                                      {
+                                        "endLineString": { "type": "string" },
+                                        "entireFile": { "type": "boolean" },
+                                        "startLineString": { "type": "string" },
+                                      },
+                                    "required":
+                                      ["startLineString", "endLineString"],
+                                    "type": "object",
+                                  },
+                                "startLineIncluded": { "type": "boolean" },
+                                "startLineIncludedReasoning":
+                                  { "type": "string" },
+                                "summary": { "type": "string" },
+                              },
+                            "required":
+                              [
+                                "summary",
+                                "hasChange",
+                                "old",
+                                "startLineIncludedReasoning",
+                                "startLineIncluded",
+                                "endLineIncludedReasoning",
+                                "endLineIncluded",
+                                "new",
+                              ],
+                            "type": "object",
+                          },
+                        "type": "array",
+                      },
+                    "comments":
+                      {
+                        "items":
+                          {
+                            "properties":
+                              {
+                                "reference": { "type": "boolean" },
+                                "txt": { "type": "string" },
+                              },
+                            "required": ["txt", "reference"],
+                            "type": "object",
+                          },
+                        "type": "array",
+                      },
+                    "problems": { "type": "string" },
+                  },
+                "required": ["comments", "problems", "changes"],
+                "type": "object",
+              },
+          },
+      },
+    ]
+  tool_choice:
+    type: "function"
+    function:
+      name: "listChangesWithLineNums"
diff --git a/test/evals/promptfoo-poc/build/promptfooconfig.yml b/test/evals/promptfoo-poc/build/promptfooconfig.yml
index 363f5bc5..dbce4996 100644
--- a/test/evals/promptfoo-poc/build/promptfooconfig.yml
+++ b/test/evals/promptfoo-poc/build/promptfooconfig.yml
@@ -2,8 +2,6 @@ description: "build"
 
 prompts:
   - file://build.prompt.txt
-
 providers:
   - file://build.provider.yml
-
 tests: tests/*.tests.yml
diff --git a/test/evals/promptfoo-poc/build/tests/build.test.yml b/test/evals/promptfoo-poc/build/tests/build.test.yml
new file mode 100644
index 00000000..7e76ab57
--- /dev/null
+++ b/test/evals/promptfoo-poc/build/tests/build.test.yml
@@ -0,0 +1,19 @@
+- description: "Check Build with Line numbers"
+  vars:
+    preBuildState: file://assets/shared/pre_build.go
+    changes: file://assets/build/changes.md
+    filePath: parse.go
+    postBuildState: file://assets/build/post_build.go
+  assert:
+    - type: is-json
+    - type: is-valid-openai-tools-call
+    - type: javascript
+      value: |
+        var args = JSON.parse(output[0].function.arguments)
+        return ( 
+          args.changes.length > 0 &&
+          args.changes.some(
+            change => change.hasChange && 
+                      change.new.includes("var contextRmCmd = &cobra.Command{")
+          )
+        )
diff --git a/test/evals/promptfoo-poc/fix/fix.config.properties b/test/evals/promptfoo-poc/fix/fix.config.properties
index 28127414..dc796603 100644
--- a/test/evals/promptfoo-poc/fix/fix.config.properties
+++ b/test/evals/promptfoo-poc/fix/fix.config.properties
@@ -4,4 +4,4 @@ tool_type=function
 function_param_type=object
 tool_choice_type=function
 tool_choice_function_name=listChangesWithLineNums
-nested_parameters_json=build.parameters.json
+nested_parameters_json=fix.parameters.json
diff --git a/test/evals/promptfoo-poc/fix/fix.provider.yml b/test/evals/promptfoo-poc/fix/fix.provider.yml
index a91430f0..f760f6e8 100644
--- a/test/evals/promptfoo-poc/fix/fix.provider.yml
+++ b/test/evals/promptfoo-poc/fix/fix.provider.yml
@@ -5,75 +5,7 @@ config:
       {
         "type": "function",
         "function":
-          {
-            "name": "listChangesWithLineNums",
-            "parameters":
-              {
-                "properties":
-                  {
-                    "changes":
-                      {
-                        "items":
-                          {
-                            "properties":
-                              {
-                                "endLineIncluded": { "type": "boolean" },
-                                "endLineIncludedReasoning":
-                                  { "type": "string" },
-                                "hasChange": { "type": "boolean" },
-                                "new": { "type": "string" },
-                                "old":
-                                  {
-                                    "properties":
-                                      {
-                                        "endLineString": { "type": "string" },
-                                        "entireFile": { "type": "boolean" },
-                                        "startLineString": { "type": "string" },
-                                      },
-                                    "required":
-                                      ["startLineString", "endLineString"],
-                                    "type": "object",
-                                  },
-                                "startLineIncluded": { "type": "boolean" },
-                                "startLineIncludedReasoning":
-                                  { "type": "string" },
-                                "summary": { "type": "string" },
-                              },
-                            "required":
-                              [
-                                "summary",
-                                "hasChange",
-                                "old",
-                                "startLineIncludedReasoning",
-                                "startLineIncluded",
-                                "endLineIncludedReasoning",
-                                "endLineIncluded",
-                                "new",
-                              ],
-                            "type": "object",
-                          },
-                        "type": "array",
-                      },
-                    "comments":
-                      {
-                        "items":
-                          {
-                            "properties":
-                              {
-                                "reference": { "type": "boolean" },
-                                "txt": { "type": "string" },
-                              },
-                            "required": ["txt", "reference"],
-                            "type": "object",
-                          },
-                        "type": "array",
-                      },
-                    "problems": { "type": "string" },
-                  },
-                "required": ["comments", "problems", "changes"],
-                "type": "object",
-              },
-          },
+          { "name": "listChangesWithLineNums", "parameters": {"properties":{"changes":{"items":{"properties":{"endLineIncluded":{"type":"boolean"},"endLineIncludedReasoning":{"type":"string"},"hasChange":{"type":"boolean"},"new":{"type":"string"},"old":{"properties":{"endLineString":{"type":"string"},"entireFile":{"type":"boolean"},"startLineString":{"type":"string"}},"required":["startLineString","endLineString"],"type":"object"},"startLineIncluded":{"type":"boolean"},"startLineIncludedReasoning":{"type":"string"},"summary":{"type":"string"}},"required":["summary","hasChange","old","startLineIncludedReasoning","startLineIncluded","endLineIncludedReasoning","endLineIncluded","new"],"type":"object"},"type":"array"},"comments":{"items":{"properties":{"reference":{"type":"boolean"},"txt":{"type":"string"}},"required":["txt","reference"],"type":"object"},"type":"array"},"problems":{"type":"string"}},"required":["comments","problems","changes"],"type":"object"} },
       },
     ]
   tool_choice:
diff --git a/test/evals/promptfoo-poc/verify/verify.provider.yml b/test/evals/promptfoo-poc/verify/verify.provider.yml
index 0db53932..a23b8ad4 100644
--- a/test/evals/promptfoo-poc/verify/verify.provider.yml
+++ b/test/evals/promptfoo-poc/verify/verify.provider.yml
@@ -5,66 +5,7 @@ config:
       {
         "type": "function",
         "function":
-          {
-            "name": "verifyOutput",
-            "parameters":
-              {
-                "properties":
-                  {
-                    "comments":
-                      {
-                        "items":
-                          {
-                            "properties":
-                              {
-                                "reference": { "type": "boolean" },
-                                "txt": { "type": "string" },
-                              },
-                            "required": ["txt", "reference"],
-                            "type": "object",
-                          },
-                        "type": "array",
-                      },
-                    "duplicationErrorsReasoning": { "type": "string" },
-                    "hasDuplicationErrors": { "type": "boolean" },
-                    "hasReferenceErrors": { "type": "boolean" },
-                    "hasRemovedCodeErrors": { "type": "boolean" },
-                    "hasSyntaxErrors": { "type": "boolean" },
-                    "referenceErrorsReasoning": { "type": "string" },
-                    "removed":
-                      {
-                        "items":
-                          {
-                            "properties":
-                              {
-                                "code": { "type": "string" },
-                                "correct": { "type": "boolean" },
-                                "reasoning": { "type": "string" },
-                              },
-                            "required": ["code", "reasoning", "correct"],
-                            "type": "object",
-                          },
-                        "type": "array",
-                      },
-                    "removedCodeErrorsReasoning": { "type": "string" },
-                    "syntaxErrorsReasoning": { "type": "string" },
-                  },
-                "required":
-                  [
-                    "syntaxErrorsReasoning",
-                    "hasSyntaxErrors",
-                    "removed",
-                    "removedCodeErrorsReasoning",
-                    "hasRemovedCodeErrors",
-                    "duplicationErrorsReasoning",
-                    "hasDuplicationErrors",
-                    "comments",
-                    "referenceErrorsReasoning",
-                    "hasReferenceErrors",
-                  ],
-                "type": "object",
-              },
-          },
+          { "name": "verifyOutput", "parameters": {"properties":{"comments":{"items":{"properties":{"reference":{"type":"boolean"},"txt":{"type":"string"}},"required":["txt","reference"],"type":"object"},"type":"array"},"duplicationErrorsReasoning":{"type":"string"},"hasDuplicationErrors":{"type":"boolean"},"hasReferenceErrors":{"type":"boolean"},"hasRemovedCodeErrors":{"type":"boolean"},"hasSyntaxErrors":{"type":"boolean"},"referenceErrorsReasoning":{"type":"string"},"removed":{"items":{"properties":{"code":{"type":"string"},"correct":{"type":"boolean"},"reasoning":{"type":"string"}},"required":["code","reasoning","correct"],"type":"object"},"type":"array"},"removedCodeErrorsReasoning":{"type":"string"},"syntaxErrorsReasoning":{"type":"string"}},"required":["syntaxErrorsReasoning","hasSyntaxErrors","removed","removedCodeErrorsReasoning","hasRemovedCodeErrors","duplicationErrorsReasoning","hasDuplicationErrors","comments","referenceErrorsReasoning","hasReferenceErrors"],"type":"object"} },
       },
     ]
   tool_choice:

From c61ba48bf31eb02f5555417af0ba8b684a39591c Mon Sep 17 00:00:00 2001
From: ZanzyTHEbar <pyr0ndet0s97@gmail.com>
Date: Thu, 20 Jun 2024 22:31:16 +0100
Subject: [PATCH 11/12] Update promptfoo configuration files for build and fix

---
 app/scripts/cmd/gen/gen.go                             | 10 +++++-----
 test/evals/promptfoo-poc/build/build.prompt.txt        | 10 +++++-----
 .../{promptfooconfig.yml => promptfooconfig.yaml}      |  0
 3 files changed, 10 insertions(+), 10 deletions(-)
 rename test/evals/promptfoo-poc/build/{promptfooconfig.yml => promptfooconfig.yaml} (100%)

diff --git a/app/scripts/cmd/gen/gen.go b/app/scripts/cmd/gen/gen.go
index 40fbd558..cf82e72f 100644
--- a/app/scripts/cmd/gen/gen.go
+++ b/app/scripts/cmd/gen/gen.go
@@ -21,7 +21,7 @@ func main() {
 		log.Fatalf("Error creating directory: %s", err)
 	}
 
-	f, err := os.Create(fmt.Sprintf("%s/%s", dirPath, "promptfooconfig.yml"))
+	f, err := os.Create(fmt.Sprintf("%s/%s", dirPath, "promptfooconfig.yaml"))
 	if err != nil {
 		log.Fatalf("Error creating file: %s", err)
 	}
@@ -51,7 +51,7 @@ func main() {
 		}
 	}
 
-	// Template for promptfooconfig.yml
+	// Template for promptfooconfig.yaml
 	ymlTemplate := `description: "{{ .Name }}"
 
 prompts:
@@ -63,7 +63,7 @@ providers:
 tests: tests/*.tests.yml
 `
 
-	// Populate promptfooconfig.yml
+	// Populate promptfooconfig.yaml
 	promptFooConfigTmpl, err := template.New("yml").Parse(ymlTemplate)
 	if err != nil {
 		log.Fatalf("Error creating template: %s", err)
@@ -91,9 +91,9 @@ nested_parameters_json={{ .Name }}.parameters.json
 	}
 	defer configFile.Close()
 
-	file, err := os.Create(fmt.Sprintf("%s/promptfooconfig.yml", dirPath))
+	file, err := os.Create(fmt.Sprintf("%s/promptfooconfig.yaml", dirPath))
 	if err != nil {
-		log.Fatalf("Error creating promptfooconfig.yml: %s", err)
+		log.Fatalf("Error creating promptfooconfig.yaml: %s", err)
 	}
 	defer file.Close()
 
diff --git a/test/evals/promptfoo-poc/build/build.prompt.txt b/test/evals/promptfoo-poc/build/build.prompt.txt
index ed7a0926..9d4afc7b 100644
--- a/test/evals/promptfoo-poc/build/build.prompt.txt
+++ b/test/evals/promptfoo-poc/build/build.prompt.txt
@@ -85,12 +85,12 @@ If *any* change has the 'entireFile' key in the 'old' property set to true, the
 Example change object:
   ---
   {
-    summary: "Fix syntax error in loop body.",
-   	old: {
-      startLineString: "pdx-5: for i := 0; i < 10; i++ { ",
-      endLineString: "pdx-7: }",
+    "summary": "Fix syntax error in loop body.",
+   	"old": {
+      "startLineString": "pdx-5: for i := 0; i < 10; i++ { ",
+      "endLineString": "pdx-7: }"
     },
-    new: "for i := 0; i < 10; i++ {\n  execQuery()\n  }\n  }\n}",
+    "new": "for i := 0; i < 10; i++ {\n  execQuery()\n  }\n  }\n}"
   }
   ---
 
diff --git a/test/evals/promptfoo-poc/build/promptfooconfig.yml b/test/evals/promptfoo-poc/build/promptfooconfig.yaml
similarity index 100%
rename from test/evals/promptfoo-poc/build/promptfooconfig.yml
rename to test/evals/promptfoo-poc/build/promptfooconfig.yaml

From ea7e5e81664891b5779e205844b716bc70757475 Mon Sep 17 00:00:00 2001
From: ZanzyTHEbar <pyr0ndet0s97@gmail.com>
Date: Thu, 20 Jun 2024 22:37:35 +0100
Subject: [PATCH 12/12] feat: Update promptfoo eval command in Makefile

This commit updates the promptfoo eval command in the Makefile to include the option to specify the name of the eval directory. This allows for more flexibility when running evaluations.
---
 test/evals/promptfoo-poc/README.md          |  2 +-
 test/evals/promptfoo-poc/fix/fix.prompt.txt | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/evals/promptfoo-poc/README.md b/test/evals/promptfoo-poc/README.md
index f08fd60b..cc69a9c8 100644
--- a/test/evals/promptfoo-poc/README.md
+++ b/test/evals/promptfoo-poc/README.md
@@ -20,7 +20,7 @@ To run or create evals, you will need to have the following installed:
 To run the evaluations, you can cd into the relevant directory and use the following command:
 
 ```bash
-make evals
+make eval <name_of_eval_dir>
 ```
 
 Or, you can run all the evaluations by running the following command:
diff --git a/test/evals/promptfoo-poc/fix/fix.prompt.txt b/test/evals/promptfoo-poc/fix/fix.prompt.txt
index 4667177c..1ca11aa7 100644
--- a/test/evals/promptfoo-poc/fix/fix.prompt.txt
+++ b/test/evals/promptfoo-poc/fix/fix.prompt.txt
@@ -84,12 +84,12 @@ You MUST ensure the line numbers for the 'old' property correctly remove *ALL* c
   Example change object:
   ---
   {
-    summary: "Fix syntax error in loop body.",
-    old: {
-      startLineString: "pdx-5: for i := 0; i < 10; i++ { ",
-      endLineString: "pdx-7: }",
+    "summary": "Fix syntax error in loop body.",
+   	"old": {
+      "startLineString": "pdx-5: for i := 0; i < 10; i++ { ",
+      "endLineString": "pdx-7: }"
     },
-    new: "for i := 0; i < 10; i++ {\n  execQuery()\n  }\n  }\n}",
+    "new": "for i := 0; i < 10; i++ {\n  execQuery()\n  }\n  }\n}"
   }
   ---