diff --git a/polaris/hub/client.py b/polaris/hub/client.py
index f3441989..649fd7d9 100644
--- a/polaris/hub/client.py
+++ b/polaris/hub/client.py
@@ -278,7 +278,8 @@ def login(self, overwrite: bool = False, auto_open_browser: bool = True):
     # =========================
 
     def list_datasets(self, limit: int = 100, offset: int = 0) -> list[str]:
-        """List all available datasets on the Polaris Hub.
+        """List all available datasets (v1 and v2) on the Polaris Hub.
+        We prioritize v2 datasets over v1 datasets.
 
         Args:
             limit: The maximum number of datasets to return.
@@ -288,17 +289,40 @@ def list_datasets(self, limit: int = 100, offset: int = 0) -> list[str]:
             A list of dataset names in the format `owner/dataset_name`.
         """
         with ProgressIndicator(
-            start_msg="Fetching artifacts...",
-            success_msg="Fetched artifacts.",
+            start_msg="Fetching datasets...",
+            success_msg="Fetched datasets.",
             error_msg="Failed to fetch datasets.",
         ):
-            response = self._base_request_to_hub(
-                url="/v1/dataset", method="GET", params={"limit": limit, "offset": offset}
-            )
-            response_data = response.json()
-            dataset_list = [bm["artifactId"] for bm in response_data["data"]]
+            # Step 1: Fetch enough v2 datasets to cover the offset and limit
+            v2_json_response = self._base_request_to_hub(
+                url="/v2/dataset", method="GET", params={"limit": limit, "offset": offset}
+            ).json()
+            v2_data = v2_json_response["data"]
+            v2_datasets = [dataset["artifactId"] for dataset in v2_data]
+
+            # If v2 datasets satisfy the limit, return them
+            if len(v2_datasets) == limit:
+                return v2_datasets
+
+            # Step 2: Calculate the remaining limit and fetch v1 datasets
+            remaining_limit = max(0, limit - len(v2_datasets))
+
+            v1_json_response = self._base_request_to_hub(
+                url="/v1/dataset",
+                method="GET",
+                params={
+                    "limit": remaining_limit,
+                    "offset": max(0, offset - v2_json_response["metadata"]["total"]),
+                },
+            ).json()
+            v1_data = v1_json_response["data"]
+            v1_datasets = [dataset["artifactId"] for dataset in v1_data]
+
+            # Combine the v2 and v1 datasets
+            combined_datasets = v2_datasets + v1_datasets
 
-            return dataset_list
+            # Ensure the final combined list respects the limit
+            return combined_datasets
 
     def get_dataset(
         self,
@@ -323,7 +347,7 @@ def get_dataset(
             error_msg="Failed to fetch dataset.",
         ):
             try:
-                return self._get_v1_dataset(owner, name, ArtifactSubtype.STANDARD.value, verify_checksum)
+                return self._get_v1_dataset(owner, name, ArtifactSubtype.STANDARD, verify_checksum)
             except PolarisRetrieveArtifactError:
                 # If the v1 dataset is not found, try to load a v2 dataset
                 return self._get_v2_dataset(owner, name)
@@ -348,7 +372,7 @@ def _get_v1_dataset(
         """
         url = (
             f"/v1/dataset/{owner}/{name}"
-            if artifact_type == ArtifactSubtype.STANDARD.value
+            if artifact_type == ArtifactSubtype.STANDARD
             else f"/v2/competition/dataset/{owner}/{name}"
         )
         response = self._base_request_to_hub(url=url, method="GET")
@@ -408,18 +432,40 @@ def list_benchmarks(self, limit: int = 100, offset: int = 0) -> list[str]:
             A list of benchmark names in the format `owner/benchmark_name`.
         """
         with ProgressIndicator(
-            start_msg="Fetching artifacts...",
-            success_msg="Fetched artifacts.",
+            start_msg="Fetching benchmarks...",
+            success_msg="Fetched benchmarks.",
             error_msg="Failed to fetch benchmarks.",
         ):
-            # TODO (cwognum): What to do with pagination, i.e. limit and offset?
-            response = self._base_request_to_hub(
-                url="/v1/benchmark", method="GET", params={"limit": limit, "offset": offset}
-            )
-            response_data = response.json()
-            benchmarks_list = [f"{HubOwner(**bm['owner'])}/{bm['name']}" for bm in response_data["data"]]
+            # Step 1: Fetch enough v2 benchmarks to cover the offset and limit
+            v2_json_response = self._base_request_to_hub(
+                url="/v2/benchmark", method="GET", params={"limit": limit, "offset": offset}
+            ).json()
+            v2_data = v2_json_response["data"]
+            v2_benchmarks = [f"{HubOwner(**benchmark['owner'])}/{benchmark['name']}" for benchmark in v2_data]
+
+            # If v2 benchmarks satisfy the limit, return them
+            if len(v2_benchmarks) == limit:
+                return v2_benchmarks
+
+            # Step 2: Calculate the remaining limit and fetch v1 benchmarks
+            remaining_limit = max(0, limit - len(v2_benchmarks))
+
+            v1_json_response = self._base_request_to_hub(
+                url="/v1/benchmark",
+                method="GET",
+                params={
+                    "limit": remaining_limit,
+                    "offset": max(0, offset - v2_json_response["metadata"]["total"]),
+                },
+            ).json()
+            v1_data = v1_json_response["data"]
+            v1_benchmarks = [f"{HubOwner(**benchmark['owner'])}/{benchmark['name']}" for benchmark in v1_data]
+
+            # Combine the v2 and v1 benchmarks
+            combined_benchmarks = v2_benchmarks + v1_benchmarks
 
-            return benchmarks_list
+            # Ensure the final combined list respects the limit
+            return combined_benchmarks
 
     def get_benchmark(
         self,
@@ -582,9 +628,7 @@ def upload_dataset(
             )
 
         if isinstance(dataset, DatasetV1):
-            self._upload_v1_dataset(
-                dataset, ArtifactSubtype.STANDARD.value, timeout, access, owner, if_exists
-            )
+            self._upload_v1_dataset(dataset, ArtifactSubtype.STANDARD, timeout, access, owner, if_exists)
         elif isinstance(dataset, DatasetV2):
             self._upload_v2_dataset(dataset, timeout, access, owner, if_exists)
 
@@ -631,7 +675,7 @@ def _upload_v1_dataset(
             # We do so separately for the Zarr archive and Parquet file.
             url = (
                 f"/v1/dataset/{dataset.artifact_id}"
-                if artifact_type == ArtifactSubtype.STANDARD.value
+                if artifact_type == ArtifactSubtype.STANDARD
                 else f"/v2/competition/dataset/{dataset.owner}/{dataset.name}"
             )
             self._base_request_to_hub(
@@ -674,7 +718,7 @@ def _upload_v1_dataset(
                     )
 
             base_artifact_url = (
-                "datasets" if artifact_type == ArtifactSubtype.STANDARD.value else "/competition/datasets"
+                "datasets" if artifact_type == ArtifactSubtype.STANDARD else "/competition/datasets"
             )
             progress_indicator.update_success_msg(
                 f"Your {artifact_type} dataset has been successfully uploaded to the Hub. "
@@ -774,7 +818,7 @@ def upload_benchmark(
         """
         match benchmark:
             case BenchmarkV1Specification():
-                self._upload_v1_benchmark(benchmark, ArtifactSubtype.STANDARD.value, access, owner)
+                self._upload_v1_benchmark(benchmark, ArtifactSubtype.STANDARD, access, owner)
             case BenchmarkV2Specification():
                 self._upload_v2_benchmark(benchmark, access, owner)
 
@@ -819,9 +863,7 @@ def _upload_v1_benchmark(
             benchmark_json["datasetArtifactId"] = benchmark.dataset.artifact_id
             benchmark_json["access"] = access
 
-            path_params = (
-                "/v1/benchmark" if artifact_type == ArtifactSubtype.STANDARD.value else "/v2/competition"
-            )
+            path_params = "/v1/benchmark" if artifact_type == ArtifactSubtype.STANDARD else "/v2/competition"
             url = f"{path_params}/{benchmark.owner}/{benchmark.name}"
             self._base_request_to_hub(url=url, method="PUT", json=benchmark_json)
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 1c487ab7..407e936d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -374,7 +374,7 @@ def test_multi_task_benchmark_multiple_test_sets(test_dataset, regression_metric
 @pytest.fixture(scope="function")
 def test_docking_dataset(tmp_path, sdf_files, test_org_owner):
     # toy docking dataset
-    factory = DatasetFactory(tmp_path / "ligands.zarr")
+    factory = DatasetFactory(str(tmp_path / "ligands.zarr"))
 
     converter = SDFConverter(mol_prop_as_cols=True)
     factory.register_converter("sdf", converter)
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 206b9014..c343ad84 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -198,11 +198,12 @@ def _check_for_failure(_kwargs):
     kwargs.pop("target_types", None)  # Reset target types that matches deleted target column
     _check_for_failure(kwargs)
 
+    # Input columns
     kwargs = benchmark.model_dump()
     kwargs["input_cols"] = kwargs["input_cols"][1:] + ["iupac"]
     _check_for_failure(kwargs)
 
-    # --- Don't fail if not checksum is provided ---
+    # --- Don't fail if no checksum is provided ---
     kwargs["md5sum"] = None
     dataset = cls(dataset=benchmark.dataset, **kwargs)
     assert dataset.md5sum is not None
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index 80d93597..4b354c8d 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -88,7 +88,7 @@ def test_dataset_checksum(test_dataset):
 def test_dataset_from_zarr(zarr_archive, tmp_path):
     """Test whether loading works when the zarr archive contains a single array or multiple arrays."""
     archive = zarr_archive
-    dataset = create_dataset_from_file(archive, tmp_path / "data")
+    dataset = create_dataset_from_file(archive, str(tmp_path / "data"))
 
     assert len(dataset.table) == 100
     for i in range(100):
@@ -115,8 +115,8 @@ def test_dataset_from_zarr_to_json_and_back(zarr_archive, tmp_path):
     can be saved to and loaded from json.
     """
 
-    json_dir = tmp_path / "json"
-    zarr_dir = tmp_path / "zarr"
+    json_dir = str(tmp_path / "json")
+    zarr_dir = str(tmp_path / "zarr")
 
     archive = zarr_archive
     dataset = create_dataset_from_file(archive, zarr_dir)
@@ -132,8 +132,8 @@ def test_dataset_from_zarr_to_json_and_back(zarr_archive, tmp_path):
 def test_dataset_caching(zarr_archive, tmp_path):
     """Test whether the dataset remains the same after caching."""
 
-    original_dataset = create_dataset_from_file(zarr_archive, tmp_path / "original1")
-    cached_dataset = create_dataset_from_file(zarr_archive, tmp_path / "original2")
+    original_dataset = create_dataset_from_file(zarr_archive, str(tmp_path / "original1"))
+    cached_dataset = create_dataset_from_file(zarr_archive, str(tmp_path / "original2"))
     assert original_dataset == cached_dataset
 
     cached_dataset._cache_dir = str(tmp_path / "cached")
@@ -153,7 +153,7 @@ def test_dataset_index():
 
 def test_dataset_in_memory_optimization(zarr_archive, tmp_path):
     """Check if optimization makes a default Zarr archive faster."""
-    dataset = create_dataset_from_file(zarr_archive, tmp_path / "dataset")
+    dataset = create_dataset_from_file(zarr_archive, str(tmp_path / "dataset"))
     subset = Subset(dataset=dataset, indices=range(100), input_cols=["A"], target_cols=["B"])
 
     t1 = perf_counter()
@@ -208,7 +208,7 @@ def test_dataset__get_item__():
 def test_dataset__get_item__with_pointer_columns(zarr_archive, tmp_path):
     """Test the __getitem__() interface for a dataset with pointer columns (i.e. part of the data stored in Zarr)."""
 
-    dataset = create_dataset_from_file(zarr_archive, tmp_path / "data")
+    dataset = create_dataset_from_file(zarr_archive, str(tmp_path / "data"))
     root = zarr.open(zarr_archive)
 
     # Get a specific cell
diff --git a/tests/test_dataset_v2.py b/tests/test_dataset_v2.py
index 11b7caa7..a9c18137 100644
--- a/tests/test_dataset_v2.py
+++ b/tests/test_dataset_v2.py
@@ -66,7 +66,7 @@ def test_dataset_v2_load_to_memory(test_dataset_v2):
 
 
 def test_dataset_v2_serialization(test_dataset_v2, tmp_path):
-    save_dir = tmp_path / "save_dir"
+    save_dir = str(tmp_path / "save_dir")
     path = test_dataset_v2.to_json(save_dir)
     new_dataset = DatasetV2.from_json(path)
     for i in range(5):
@@ -86,7 +86,7 @@ def test_dataset_v1_v2_compatibility(test_dataset, tmp_path):
     # We can thus also saved these same arrays to a Zarr archive
     df = test_dataset.table
 
-    path = tmp_path / "data/v1v2.zarr"
+    path = str(tmp_path / "data" / "v1v2.zarr")
 
     root = zarr.open(path, "w")
     root.array("smiles", data=df["smiles"].values, dtype=object, object_codec=numcodecs.VLenUTF8())
@@ -96,7 +96,7 @@ def test_dataset_v1_v2_compatibility(test_dataset, tmp_path):
     zarr.consolidate_metadata(path)
 
     kwargs = test_dataset.model_dump(exclude=["table", "zarr_root_path"])
-    dataset = DatasetV2(**kwargs, zarr_root_path=str(path))
+    dataset = DatasetV2(**kwargs, zarr_root_path=path)
 
     subset_1 = Subset(dataset=test_dataset, indices=range(5), input_cols=["smiles"], target_cols=["calc"])
     subset_2 = Subset(dataset=dataset, indices=range(5), input_cols=["smiles"], target_cols=["calc"])
diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
index 38823155..c3822905 100644
--- a/tests/test_evaluate.py
+++ b/tests/test_evaluate.py
@@ -1,4 +1,4 @@
-import os
+from pathlib import Path
 
 import datamol as dm
 import numpy as np
@@ -17,7 +17,7 @@
 from polaris.utils.types import HubOwner
 
 
-def test_result_to_json(tmp_path: str, test_user_owner: HubOwner):
+def test_result_to_json(tmp_path: Path, test_user_owner: HubOwner):
     scores = pd.DataFrame(
         {
             "Test set": ["A", "A", "A", "A", "B", "B", "B", "B"],
@@ -41,7 +41,7 @@ def test_result_to_json(tmp_path: str, test_user_owner: HubOwner):
         contributors=["my-user", "other-user"],
     )
 
-    path = os.path.join(tmp_path, "result.json")
+    path = str(tmp_path / "result.json")
     result.to_json(path)
 
     BenchmarkResults.from_json(path)
diff --git a/tests/test_factory.py b/tests/test_factory.py
index 0fe6d308..01fafb5c 100644
--- a/tests/test_factory.py
+++ b/tests/test_factory.py
@@ -39,7 +39,7 @@ def _check_dataset(dataset, ground_truth, mol_props_as_col):
 
 def test_sdf_zarr_conversion(sdf_file, caffeine, tmp_path):
     """Test conversion between SDF and Zarr with utility function"""
-    dataset = create_dataset_from_file(sdf_file, tmp_path / "archive.zarr")
+    dataset = create_dataset_from_file(sdf_file, str(tmp_path / "archive.zarr"))
     _check_dataset(dataset, [caffeine], True)
 
 
@@ -47,7 +47,7 @@ def test_sdf_zarr_conversion(sdf_file, caffeine, tmp_path):
 def test_factory_sdf_with_prop_as_col(sdf_file, caffeine, tmp_path, mol_props_as_col):
     """Test conversion between SDF and Zarr with factory pattern"""
 
-    factory = DatasetFactory(tmp_path / "archive.zarr")
+    factory = DatasetFactory(str(tmp_path / "archive.zarr"))
 
     converter = SDFConverter(mol_prop_as_cols=mol_props_as_col)
     factory.register_converter("sdf", converter)
@@ -60,7 +60,7 @@ def test_factory_sdf_with_prop_as_col(sdf_file, caffeine, tmp_path, mol_props_as
 
 def test_zarr_to_zarr_conversion(zarr_archive, tmp_path):
     """Test conversion between Zarr and Zarr with utility function"""
-    dataset = create_dataset_from_file(zarr_archive, tmp_path / "archive.zarr")
+    dataset = create_dataset_from_file(zarr_archive, str(tmp_path / "archive.zarr"))
     assert len(dataset) == 100
     assert len(dataset.columns) == 2
     assert all(c in dataset.columns for c in ["A", "B"])
@@ -71,7 +71,7 @@ def test_zarr_to_zarr_conversion(zarr_archive, tmp_path):
 def test_zarr_with_factory_pattern(zarr_archive, tmp_path):
     """Test conversion between Zarr and Zarr with factory pattern"""
 
-    factory = DatasetFactory(tmp_path / "archive.zarr")
+    factory = DatasetFactory(str(tmp_path / "archive.zarr"))
     converter = ZarrConverter()
     factory.register_converter("zarr", converter)
     factory.add_from_file(zarr_archive)
@@ -90,7 +90,7 @@ def test_zarr_with_factory_pattern(zarr_archive, tmp_path):
 
 def test_factory_pdb(pdbs_structs, pdb_paths, tmp_path):
     """Test conversion between PDB file and Zarr with factory pattern"""
-    factory = DatasetFactory(tmp_path / "pdb.zarr")
+    factory = DatasetFactory(str(tmp_path / "pdb.zarr"))
 
     converter = PDBConverter()
     factory.register_converter("pdb", converter)
@@ -104,7 +104,7 @@ def test_factory_pdb(pdbs_structs, pdb_paths, tmp_path):
 def test_factory_pdbs(pdbs_structs, pdb_paths, tmp_path):
     """Test conversion between PDB files and Zarr with factory pattern"""
 
-    factory = DatasetFactory(tmp_path / "pdbs.zarr")
+    factory = DatasetFactory(str(tmp_path / "pdbs.zarr"))
 
     converter = PDBConverter()
     factory.register_converter("pdb", converter)
@@ -119,7 +119,7 @@ def test_factory_pdbs(pdbs_structs, pdb_paths, tmp_path):
 def test_pdbs_zarr_conversion(pdbs_structs, pdb_paths, tmp_path):
     """Test conversion between PDBs and Zarr with utility function"""
 
-    dataset = create_dataset_from_files(pdb_paths, tmp_path / "pdbs_2.zarr", axis=0)
+    dataset = create_dataset_from_files(pdb_paths, str(tmp_path / "pdbs_2.zarr"), axis=0)
 
     assert dataset.table.shape[0] == len(pdb_paths)
     _check_pdb_dataset(dataset, pdbs_structs)
@@ -128,7 +128,7 @@ def test_pdbs_zarr_conversion(pdbs_structs, pdb_paths, tmp_path):
 def test_factory_sdfs(sdf_files, caffeine, ibuprofen, tmp_path):
     """Test conversion between SDF and Zarr with factory pattern"""
 
-    factory = DatasetFactory(tmp_path / "sdfs.zarr")
+    factory = DatasetFactory(str(tmp_path / "sdfs.zarr"))
 
     converter = SDFConverter(mol_prop_as_cols=True)
     factory.register_converter("sdf", converter)
@@ -142,7 +142,7 @@ def test_factory_sdfs(sdf_files, caffeine, ibuprofen, tmp_path):
 def test_factory_sdf_pdb(sdf_file, pdb_paths, caffeine, pdbs_structs, tmp_path):
     """Test conversion between SDF and PDB from files to Zarr with factory pattern"""
 
-    factory = DatasetFactory(tmp_path / "sdf_pdb.zarr")
+    factory = DatasetFactory(str(tmp_path / "sdf_pdb.zarr"))
 
     sdf_converter = SDFConverter(mol_prop_as_cols=False)
     factory.register_converter("sdf", sdf_converter)
@@ -158,7 +158,7 @@ def test_factory_sdf_pdb(sdf_file, pdb_paths, caffeine, pdbs_structs, tmp_path):
 
 
 def test_factory_from_files_same_column(sdf_files, pdb_paths, tmp_path):
-    factory = DatasetFactory(tmp_path / "files.zarr")
+    factory = DatasetFactory(str(tmp_path / "files.zarr"))
 
     sdf_converter = SDFConverter(mol_prop_as_cols=False)
     factory.register_converter("sdf", sdf_converter)
diff --git a/tests/test_zarr_checksum.py b/tests/test_zarr_checksum.py
index 4815b09f..cad8c859 100644
--- a/tests/test_zarr_checksum.py
+++ b/tests/test_zarr_checksum.py
@@ -110,13 +110,13 @@ def test_checksum_for_zarr_archive(zarr_archive, tmp_path):
     # NOTE: This test was not in the original code base of the zarr-checksum package.
     checksum, _ = compute_zarr_checksum(zarr_archive)
 
-    path = tmp_path / "copy"
+    path = str(tmp_path / "copy")
     copytree(zarr_archive, path)
-    assert checksum == compute_zarr_checksum(str(path))[0]
+    assert checksum == compute_zarr_checksum(path)[0]
 
     root = zarr.open(path)
     root["A"][0:10] = 0
-    assert checksum != compute_zarr_checksum(str(path))[0]
+    assert checksum != compute_zarr_checksum(path)[0]
 
 
 def test_zarr_leaf_to_checksum(zarr_archive):