Agenta-AI · mmabrouk · Jul 5, 2024 · Jul 3, 2024 · Jul 3, 2024 · Jul 3, 2024
diff --git a/agenta-backend/agenta_backend/services/evaluators_service.py b/agenta-backend/agenta_backend/services/evaluators_service.py
@@ -1,6 +1,7 @@
 import json
 import logging
 import re
+import traceback
 from typing import Any, Dict, List, Tuple
 
 import httpx
@@ -79,7 +80,8 @@ def auto_exact_match(
             type="error",
             value=None,
             error=Error(
-                message="Error during Auto Exact Match evaluation", stacktrace=str(e)
+                message="Error during Auto Exact Match evaluation",
+                stacktrace=str(traceback.format_exc()),
             ),
         )
 
@@ -103,7 +105,8 @@ def auto_regex_test(
             type="error",
             value=None,
             error=Error(
-                message="Error during Auto Regex evaluation", stacktrace=str(e)
+                message="Error during Auto Regex evaluation",
+                stacktrace=str(traceback.format_exc()),
             ),
         )
 
@@ -186,15 +189,16 @@ def auto_webhook_test(
             value=None,
             error=Error(
                 message="Error during Auto Webhook evaluation; An HTTP error occurred",
-                stacktrace=str(e),
+                stacktrace=str(traceback.format_exc()),
             ),
         )
     except Exception as e:  # pylint: disable=broad-except
         return Result(
             type="error",
             value=None,
             error=Error(
-                message="Error during Auto Webhook evaluation", stacktrace=str(e)
+                message="Error during Auto Webhook evaluation",
+                stacktrace=str(traceback.format_exc()),
             ),
         )
 
@@ -224,7 +228,8 @@ def auto_custom_code_run(
             type="error",
             value=None,
             error=Error(
-                message="Error during Auto Custom Code Evaluation", stacktrace=str(e)
+                message="Error during Auto Custom Code Evaluation",
+                stacktrace=str(traceback.format_exc()),
             ),
         )
 
@@ -281,7 +286,10 @@ def auto_ai_critique(
         return Result(
             type="error",
             value=None,
-            error=Error(message="Error during Auto AI Critique", stacktrace=str(e)),
+            error=Error(
+                message="Error during Auto AI Critique",
+                stacktrace=str(traceback.format_exc()),
+            ),
         )
 
 
@@ -308,7 +316,8 @@ def auto_starts_with(
             type="error",
             value=None,
             error=Error(
-                message="Error during Starts With evaluation", stacktrace=str(e)
+                message="Error during Starts With evaluation",
+                stacktrace=str(traceback.format_exc()),
             ),
         )
 
@@ -335,7 +344,10 @@ def auto_ends_with(
         return Result(
             type="error",
             value=None,
-            error=Error(message="Error during Ends With evaluation", stacktrace=str(e)),
+            error=Error(
+                message="Error during Ends With evaluation",
+                stacktrace=str(traceback.format_exc()),
+            ),
         )
 
 
@@ -361,7 +373,10 @@ def auto_contains(
         return Result(
             type="error",
             value=None,
-            error=Error(message="Error during Contains evaluation", stacktrace=str(e)),
+            error=Error(
+                message="Error during Contains evaluation",
+                stacktrace=str(traceback.format_exc()),
+            ),
         )
 
 
@@ -391,7 +406,8 @@ def auto_contains_any(
             type="error",
             value=None,
             error=Error(
-                message="Error during Contains Any evaluation", stacktrace=str(e)
+                message="Error during Contains Any evaluation",
+                stacktrace=str(traceback.format_exc()),
             ),
         )
 
@@ -422,7 +438,8 @@ def auto_contains_all(
             type="error",
             value=None,
             error=Error(
-                message="Error during Contains All evaluation", stacktrace=str(e)
+                message="Error during Contains All evaluation",
+                stacktrace=str(traceback.format_exc()),
             ),
         )
 
@@ -452,7 +469,8 @@ def auto_contains_json(
             type="error",
             value=None,
             error=Error(
-                message="Error during Contains JSON evaluation", stacktrace=str(e)
+                message="Error during Contains JSON evaluation",
+                stacktrace=str(traceback.format_exc()),
             ),
         )
 
@@ -511,7 +529,7 @@ def auto_levenshtein_distance(
             value=None,
             error=Error(
                 message="Error during Levenshtein threshold evaluation",
-                stacktrace=str(e),
+                stacktrace=str(traceback.format_exc()),
             ),
         )
 
@@ -552,7 +570,7 @@ def auto_similarity_match(
             value=None,
             error=Error(
                 message="Error during Auto Similarity Match evaluation",
-                stacktrace=str(e),
+                stacktrace=str(traceback.format_exc()),
             ),
         )
 

diff --git a/agenta-backend/agenta_backend/tasks/evaluations.py b/agenta-backend/agenta_backend/tasks/evaluations.py
@@ -249,12 +249,14 @@ def evaluate(
                 evaluators_results.append(result_object)
 
             all_correct_answers = [
-                CorrectAnswer(
-                    key=ground_truth_column_name,
-                    value=data_point[ground_truth_column_name],
+                (
+                    CorrectAnswer(
+                        key=ground_truth_column_name,
+                        value=data_point[ground_truth_column_name],
+                    )
+                    if ground_truth_column_name in data_point
+                    else CorrectAnswer(key=ground_truth_column_name, value="")
                 )
-                if ground_truth_column_name in data_point
-                else CorrectAnswer(key=ground_truth_column_name, value="")
                 for ground_truth_column_name in ground_truth_column_names
             ]
             # 4. We save the result of the eval scenario in the db
@@ -313,7 +315,10 @@ def evaluate(
                     "status": Result(
                         type="status",
                         value="EVALUATION_FAILED",
-                        error=Error(message="Evaluation Failed", stacktrace=str(e)),
+                        error=Error(
+                            message="Evaluation Failed !!!",
+                            stacktrace=str(traceback.format_exc()),
+                        ),
                     )
                 },
             )

diff --git a/agenta-web/src/components/pages/evaluations/cellRenderers/cellRenderers.tsx b/agenta-web/src/components/pages/evaluations/cellRenderers/cellRenderers.tsx
@@ -188,14 +188,15 @@ export const StatusRenderer = React.memo(
         )
         const {label, color} = statusMapper(token)[params.data?.status.value as EvaluationStatus]
         const errorMsg = params.data?.status.error?.message
+        const errorStacktrace = params.data?.status.error?.stacktrace
 
         return (
             <Typography.Text className={classes.statusCell}>
                 <div style={{backgroundColor: color}} />
                 <span>{label}</span>
                 {errorMsg && (
                     <span style={{marginRight: 2}}>
-                        <Tooltip title={errorMsg}>
+                        <Tooltip title={errorStacktrace ? errorStacktrace : ""}>
                             <InfoCircleOutlined />
                         </Tooltip>
                     </span>