Merge remote-tracking branch 'origin/addlength'

j-space-b · Sep 25, 2023 · d495e29 · d495e29
2 parents 1041721 + dd0236b
commit d495e29
Show file tree

Hide file tree

Showing 9 changed files with 462 additions and 16 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -37,7 +37,7 @@ jobs:
         run: |
           python -c "import sys; print(sys.version)"
           python -m pip install --upgrade pip
-          python -m pip install -r requirements.txt
+          python -m pip install -r requirements.txt ragas
           python -m pip install . pytest-rerunfailures pytest-asyncio
 
       - name: Run Unit Tests (pytest)

diff --git a/deepeval/_version.py b/deepeval/_version.py
@@ -1 +1 @@
-__version__: str = "0.16.4"
+__version__: str = "0.17.1"
diff --git a/deepeval/api.py b/deepeval/api.py
@@ -468,7 +468,13 @@ def list_implementations(self):
 
     def post_test_run(self, test_run: TestRun):
         """Post a test run"""
+        try:
+            body = test_run.model_dump(by_alias=True)
+        except AttributeError:
+            # Pydantic version below 2.0
+            body = test_run.dict(by_alias=True)
+
         return self.post_request(
             endpoint="/v1/test-run",
-            body=test_run.model_dump(by_alias=True),
+            body=body,
         )
diff --git a/deepeval/cli/test.py b/deepeval/cli/test.py
@@ -78,18 +78,25 @@ def sample():
         pass
 
 
-def check_if_legit_file(test_file: str):
-    if test_file.endswith(".py"):
-        if not test_file.startswith("test_"):
-            raise ValueError(
-                "Test will not run. Please ensure the `test_` prefix."
-            )
+def check_if_legit_file(test_file_or_directory: str):
+    if os.path.isfile(test_file_or_directory):
+        if test_file_or_directory.endswith(".py"):
+            if not os.path.basename(test_file_or_directory).startswith("test_"):
+                raise ValueError(
+                    "Test will not run. Please ensure the file starts with `test_` prefix."
+                )
+    elif os.path.isdir(test_file_or_directory):
+        return
+    else:
+        raise ValueError(
+            "Provided path is neither a valid file nor a directory."
+        )
 
 
 @app.command()
 def run(
     test_file_or_directory: str,
-    verbose: bool = False,
+    verbose: bool = True,
     color: str = "yes",
     durations: int = 10,
     pdb: bool = False,
@@ -98,7 +105,8 @@ def run(
     ] = False,
 ):
     """Run a test"""
-    pytest_args = ["-k", test_file_or_directory]
+    check_if_legit_file(test_file_or_directory)
+    pytest_args = [test_file_or_directory]
     if exit_on_first_failure:
         pytest_args.insert(0, "-x")
 
@@ -111,9 +119,10 @@ def run(
             "--verbose" if verbose else "--quiet",
             f"--color={color}",
             f"--durations={durations}",
-            "--pdb" if pdb else "",
         ]
     )
+    if pdb:
+        pytest_args.append("--pdb")
     # Add the deepeval plugin file to pytest arguments
     pytest_args.extend(["-p", "plugins"])
 
@@ -122,7 +131,6 @@ def run(
         TextColumn("[progress.description]{task.description}"),
         transient=True,
     ) as progress:
-        # progress.add_task(description="Preparing tests...", total=None)
         progress.add_task(
             description="Downloading models (may take up to 2 minutes if running for the first time)...",
             total=None,

diff --git a/deepeval/dataset.py b/deepeval/dataset.py
@@ -76,6 +76,54 @@ def from_csv(
     def from_test_cases(self, test_cases: list):
         self.data = test_cases
 
+    @classmethod
+    def from_hf_dataset(
+        cls,
+        dataset_name: str,
+        split: str,
+        query_column: str,
+        expected_output_column: str,
+        context_column: str = None,
+        output_column: str = None,
+        id_column: str = None,
+    ):
+        """
+        Load test cases from a HuggingFace dataset.
+
+        Args:
+            dataset_name (str): The name of the HuggingFace dataset to load.
+            split (str): The split of the dataset to load (e.g., 'train', 'test').
+            query_column (str): The column in the dataset corresponding to the query.
+            expected_output_column (str): The column in the dataset corresponding to the expected output.
+            context_column (str, optional): The column in the dataset corresponding to the context. Defaults to None.
+            output_column (str, optional): The column in the dataset corresponding to the output. Defaults to None.
+            id_column (str, optional): The column in the dataset corresponding to the ID. Defaults to None.
+
+        Returns:
+            EvaluationDataset: An instance of EvaluationDataset containing the loaded test cases.
+        """
+        try:
+            from datasets import load_dataset
+        except ImportError:
+            raise ImportError(
+                "The 'datasets' library is missing. Please install it using pip: pip install datasets"
+            )
+
+        hf_dataset = load_dataset(dataset_name, split=split)
+        test_cases = []
+
+        for i, row in enumerate(hf_dataset):
+            test_cases.append(
+                LLMTestCase(
+                    query=row[query_column],
+                    expected_output=row[expected_output_column],
+                    context=row[context_column] if context_column else None,
+                    output=row[output_column] if output_column else None,
+                    id=row[id_column] if id_column else None,
+                )
+            )
+        return cls(test_cases)
+
     @classmethod
     def from_json(
         cls,

diff --git a/deepeval/metrics/bias_classifier.py b/deepeval/metrics/bias_classifier.py
@@ -25,7 +25,7 @@ def __call__(self, output, expected_output, query: Optional[str] = "-"):
         success = score >= self.minimum_score
         return score
 
-    def measure(self, test_case: LLMTestCase):
+    def measure(self, test_case: LLMTestCase, return_all_scores: bool = False):
         if test_case.output is None:
             raise ValueError("Required attributes for test_case cannot be None")
 
@@ -49,6 +49,8 @@ def measure(self, test_case: LLMTestCase):
             self.success = True
 
         self.score = v
+        if return_all_scores:
+            return results
         return v
 
     def is_successful(self):