Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix incomplete stacktraces in evaluations #1842

Merged
merged 3 commits into from
Jul 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 32 additions & 14 deletions agenta-backend/agenta_backend/services/evaluators_service.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import logging
import re
import traceback
from typing import Any, Dict, List, Tuple

import httpx
Expand Down Expand Up @@ -79,7 +80,8 @@ def auto_exact_match(
type="error",
value=None,
error=Error(
message="Error during Auto Exact Match evaluation", stacktrace=str(e)
message="Error during Auto Exact Match evaluation",
stacktrace=str(traceback.format_exc()),
),
)

Expand All @@ -103,7 +105,8 @@ def auto_regex_test(
type="error",
value=None,
error=Error(
message="Error during Auto Regex evaluation", stacktrace=str(e)
message="Error during Auto Regex evaluation",
stacktrace=str(traceback.format_exc()),
),
)

Expand Down Expand Up @@ -186,15 +189,16 @@ def auto_webhook_test(
value=None,
error=Error(
message="Error during Auto Webhook evaluation; An HTTP error occurred",
stacktrace=str(e),
stacktrace=str(traceback.format_exc()),
),
)
except Exception as e: # pylint: disable=broad-except
return Result(
type="error",
value=None,
error=Error(
message="Error during Auto Webhook evaluation", stacktrace=str(e)
message="Error during Auto Webhook evaluation",
stacktrace=str(traceback.format_exc()),
),
)

Expand Down Expand Up @@ -224,7 +228,8 @@ def auto_custom_code_run(
type="error",
value=None,
error=Error(
message="Error during Auto Custom Code Evaluation", stacktrace=str(e)
message="Error during Auto Custom Code Evaluation",
stacktrace=str(traceback.format_exc()),
),
)

Expand Down Expand Up @@ -281,7 +286,10 @@ def auto_ai_critique(
return Result(
type="error",
value=None,
error=Error(message="Error during Auto AI Critique", stacktrace=str(e)),
error=Error(
message="Error during Auto AI Critique",
stacktrace=str(traceback.format_exc()),
),
)


Expand All @@ -308,7 +316,8 @@ def auto_starts_with(
type="error",
value=None,
error=Error(
message="Error during Starts With evaluation", stacktrace=str(e)
message="Error during Starts With evaluation",
stacktrace=str(traceback.format_exc()),
),
)

Expand All @@ -335,7 +344,10 @@ def auto_ends_with(
return Result(
type="error",
value=None,
error=Error(message="Error during Ends With evaluation", stacktrace=str(e)),
error=Error(
message="Error during Ends With evaluation",
stacktrace=str(traceback.format_exc()),
),
)


Expand All @@ -361,7 +373,10 @@ def auto_contains(
return Result(
type="error",
value=None,
error=Error(message="Error during Contains evaluation", stacktrace=str(e)),
error=Error(
message="Error during Contains evaluation",
stacktrace=str(traceback.format_exc()),
),
)


Expand Down Expand Up @@ -391,7 +406,8 @@ def auto_contains_any(
type="error",
value=None,
error=Error(
message="Error during Contains Any evaluation", stacktrace=str(e)
message="Error during Contains Any evaluation",
stacktrace=str(traceback.format_exc()),
),
)

Expand Down Expand Up @@ -422,7 +438,8 @@ def auto_contains_all(
type="error",
value=None,
error=Error(
message="Error during Contains All evaluation", stacktrace=str(e)
message="Error during Contains All evaluation",
stacktrace=str(traceback.format_exc()),
),
)

Expand Down Expand Up @@ -452,7 +469,8 @@ def auto_contains_json(
type="error",
value=None,
error=Error(
message="Error during Contains JSON evaluation", stacktrace=str(e)
message="Error during Contains JSON evaluation",
stacktrace=str(traceback.format_exc()),
),
)

Expand Down Expand Up @@ -511,7 +529,7 @@ def auto_levenshtein_distance(
value=None,
error=Error(
message="Error during Levenshtein threshold evaluation",
stacktrace=str(e),
stacktrace=str(traceback.format_exc()),
),
)

Expand Down Expand Up @@ -552,7 +570,7 @@ def auto_similarity_match(
value=None,
error=Error(
message="Error during Auto Similarity Match evaluation",
stacktrace=str(e),
stacktrace=str(traceback.format_exc()),
),
)

Expand Down
17 changes: 11 additions & 6 deletions agenta-backend/agenta_backend/tasks/evaluations.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,12 +249,14 @@ def evaluate(
evaluators_results.append(result_object)

all_correct_answers = [
CorrectAnswer(
key=ground_truth_column_name,
value=data_point[ground_truth_column_name],
(
CorrectAnswer(
key=ground_truth_column_name,
value=data_point[ground_truth_column_name],
)
if ground_truth_column_name in data_point
else CorrectAnswer(key=ground_truth_column_name, value="")
)
if ground_truth_column_name in data_point
else CorrectAnswer(key=ground_truth_column_name, value="")
for ground_truth_column_name in ground_truth_column_names
]
# 4. We save the result of the eval scenario in the db
Expand Down Expand Up @@ -313,7 +315,10 @@ def evaluate(
"status": Result(
type="status",
value="EVALUATION_FAILED",
error=Error(message="Evaluation Failed", stacktrace=str(e)),
error=Error(
message="Evaluation Failed !!!",
stacktrace=str(traceback.format_exc()),
),
)
},
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,14 +188,15 @@ export const StatusRenderer = React.memo(
)
const {label, color} = statusMapper(token)[params.data?.status.value as EvaluationStatus]
const errorMsg = params.data?.status.error?.message
const errorStacktrace = params.data?.status.error?.stacktrace

return (
<Typography.Text className={classes.statusCell}>
<div style={{backgroundColor: color}} />
<span>{label}</span>
{errorMsg && (
<span style={{marginRight: 2}}>
<Tooltip title={errorMsg}>
<Tooltip title={errorStacktrace ? errorStacktrace : ""}>
<InfoCircleOutlined />
</Tooltip>
</span>
Expand Down
Loading