adding test results telemetry

crewAIInc · Aug 10, 2024 · 5e83a36 · 5e83a36
1 parent 51ee483
commit 5e83a36
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 0 deletions.
diff --git a/src/crewai/crew.py b/src/crewai/crew.py
@@ -936,6 +936,9 @@ def test(
         inputs: Optional[Dict[str, Any]] = None,
     ) -> None:
         """Test and evaluate the Crew with the given inputs for n iterations."""
+        self._test_execution_span = self._telemetry.test_execution_span(
+            self, inputs, openai_model_name
+        )
         evaluator = CrewEvaluator(self, openai_model_name)
 
         for i in range(1, n_iterations + 1):

diff --git a/src/crewai/telemetry/telemetry.py b/src/crewai/telemetry/telemetry.py
@@ -289,6 +289,59 @@ def tool_usage_error(self, llm: Any):
             except Exception:
                 pass
 
+    def individual_test_result_span(
+        self, crew: Crew, quality: int, exec_time: int, model_name: str
+    ):
+        if self.ready:
+            try:
+                tracer = trace.get_tracer("crewai.telemetry")
+                span = tracer.start_span("Crew Individual Test Result")
+
+                self._add_attribute(
+                    span,
+                    "crewai_version",
+                    pkg_resources.get_distribution("crewai").version,
+                )
+                self._add_attribute(span, "crew_key", crew.key)
+                self._add_attribute(span, "crew_id", str(crew.id))
+                self._add_attribute(span, "quality", str(quality))
+                self._add_attribute(span, "exec_time", str(exec_time))
+                self._add_attribute(span, "model_name", model_name)
+                return span
+            except Exception:
+                pass
+
+    def test_execution_span(
+        self,
+        crew: Crew,
+        iterations: int,
+        inputs: dict[str, Any] | None,
+        model_name: str,
+    ):
+        if self.ready:
+            try:
+                tracer = trace.get_tracer("crewai.telemetry")
+                span = tracer.start_span("Crew Test Execution")
+
+                self._add_attribute(
+                    span,
+                    "crewai_version",
+                    pkg_resources.get_distribution("crewai").version,
+                )
+                self._add_attribute(span, "crew_key", crew.key)
+                self._add_attribute(span, "crew_id", str(crew.id))
+                self._add_attribute(span, "iterations", str(iterations))
+                self._add_attribute(span, "model_name", model_name)
+
+                if crew.share_crew:
+                    self._add_attribute(
+                        span, "inputs", json.dumps(inputs) if inputs else None
+                    )
+
+                return span
+            except Exception:
+                pass
+
     def crew_execution_span(self, crew: Crew, inputs: dict[str, Any] | None):
         """Records the complete execution of a crew.
         This is only collected if the user has opted-in to share the crew.

diff --git a/src/crewai/utilities/evaluators/crew_evaluator_handler.py b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -8,6 +8,7 @@
 from crewai.agent import Agent
 from crewai.task import Task
 from crewai.tasks.task_output import TaskOutput
+from crewai.telemetry import Telemetry
 
 
 class TaskEvaluationPydanticOutput(BaseModel):
@@ -34,6 +35,7 @@ class CrewEvaluator:
     def __init__(self, crew, openai_model_name: str):
         self.crew = crew
         self.openai_model_name = openai_model_name
+        self._telemetry = Telemetry()
         self._setup_for_evaluating()
 
     def _setup_for_evaluating(self) -> None:
@@ -155,6 +157,12 @@ def evaluate(self, task_output: TaskOutput):
         evaluation_result = evaluation_task.execute_sync()
 
         if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
+            self._test_result_span = self._telemetry.individual_test_result_span(
+                self,
+                evaluation_result.pydantic.quality,
+                current_task._execution_time,
+                self.openai_model_name,
+            )
             self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
             self.run_execution_times[self.iteration].append(
                 current_task._execution_time