From 65e8ac713215efe62bc8d668568aa61fd134b869 Mon Sep 17 00:00:00 2001 From: Julien St-Laurent Date: Thu, 19 Dec 2024 22:46:41 -0500 Subject: [PATCH] Move DatasetV2 out of experimental --- docs/api/dataset.md | 8 +++++++- polaris/competition/__init__.py | 12 ++++-------- polaris/dataset/__init__.py | 14 ++++++++------ polaris/{experimental => dataset}/_dataset_v2.py | 0 polaris/experimental/_benchmark_v2.py | 3 +-- polaris/hub/client.py | 9 +++------ tests/conftest.py | 3 +-- tests/test_dataset_v2.py | 4 ++-- 8 files changed, 26 insertions(+), 27 deletions(-) rename polaris/{experimental => dataset}/_dataset_v2.py (100%) diff --git a/docs/api/dataset.md b/docs/api/dataset.md index 2b3cb7c4..225d6390 100644 --- a/docs/api/dataset.md +++ b/docs/api/dataset.md @@ -4,6 +4,12 @@ --- +::: polaris.dataset.DatasetV2 + options: + filters: ["!^_"] + +--- + ::: polaris.dataset._base.BaseDataset options: filters: ["!^_"] @@ -20,4 +26,4 @@ options: filters: ["!^_"] ---- \ No newline at end of file +--- diff --git a/polaris/competition/__init__.py b/polaris/competition/__init__.py index bd4b56c0..8836f931 100644 --- a/polaris/competition/__init__.py +++ b/polaris/competition/__init__.py @@ -8,9 +8,8 @@ from polaris.benchmark import BenchmarkSpecification from polaris.benchmark._base import ColumnName -from polaris.dataset import Subset +from polaris.dataset import DatasetV2, Subset from polaris.evaluate import CompetitionPredictions -from polaris.experimental._dataset_v2 import DatasetV2 from polaris.utils.dict2html import dict2html from polaris.utils.errors import InvalidCompetitionError from polaris.utils.misc import listit @@ -26,7 +25,7 @@ class CompetitionSpecification(DatasetV2, BenchmarkSpecification): """An instance of this class represents a Polaris competition. It defines fields and functionality - that in combination with the `polaris.experimental._dataset_v2.DatasetV2` class, allow + that in combination with the `polaris.dataset.DatasetV2` class, allow users to participate in competitions hosted on Polaris Hub. Examples: @@ -70,7 +69,7 @@ class CompetitionSpecification(DatasetV2, BenchmarkSpecification): end_time: The time at which the competition stops accepting prediction submissions. n_classes: The number of classes within target columns that define a classification task. - For additional meta-data attributes, see the `polaris.experimental._dataset_v2.DatasetV2` class. + For additional meta-data attributes, see the `polaris.dataset.DatasetV2` class. """ _artifact_type = "competition" @@ -166,7 +165,6 @@ def _serialize_split(self, v: SplitType): """Convert any tuple to list to make sure it's serializable""" return listit(v) - @computed_field @property def dataset_artifact_id(self) -> str: @@ -201,9 +199,7 @@ def test_set_labels(self) -> list[str]: def n_test_datapoints(self) -> dict[str, int]: """The size of (each of) the test set(s).""" if self.n_test_sets == 1: - return { - "test": len(self.split[1]) - } + return {"test": len(self.split[1])} else: return {k: len(v) for k, v in self.split[1].items()} diff --git a/polaris/dataset/__init__.py b/polaris/dataset/__init__.py index 932b4470..8084749c 100644 --- a/polaris/dataset/__init__.py +++ b/polaris/dataset/__init__.py @@ -1,17 +1,19 @@ from polaris.dataset._column import ColumnAnnotation, KnownContentType, Modality from polaris.dataset._dataset import DatasetV1, DatasetV1 as Dataset +from polaris.dataset._dataset_v2 import DatasetV2 from polaris.dataset._factory import DatasetFactory, create_dataset_from_file, create_dataset_from_files from polaris.dataset._subset import Subset + __all__ = [ + "create_dataset_from_file", + "create_dataset_from_files", "ColumnAnnotation", "Dataset", - "Subset", - "Modality", - "KnownContentType", "DatasetFactory", - "create_dataset_from_file", - "create_dataset_from_files", "DatasetV1", - "Dataset", + "DatasetV2", + "KnownContentType", + "Modality", + "Subset", ] diff --git a/polaris/experimental/_dataset_v2.py b/polaris/dataset/_dataset_v2.py similarity index 100% rename from polaris/experimental/_dataset_v2.py rename to polaris/dataset/_dataset_v2.py diff --git a/polaris/experimental/_benchmark_v2.py b/polaris/experimental/_benchmark_v2.py index fc081021..2bcf9cb1 100644 --- a/polaris/experimental/_benchmark_v2.py +++ b/polaris/experimental/_benchmark_v2.py @@ -10,8 +10,7 @@ from polaris.benchmark import BenchmarkSpecification from polaris.benchmark._base import ColumnName -from polaris.dataset import Subset -from polaris.experimental._dataset_v2 import DatasetV2 +from polaris.dataset import DatasetV2, Subset from polaris.utils.errors import InvalidBenchmarkError diff --git a/polaris/hub/client.py b/polaris/hub/client.py index 18db62a0..ef33e0ac 100644 --- a/polaris/hub/client.py +++ b/polaris/hub/client.py @@ -22,10 +22,9 @@ SingleTaskBenchmarkSpecification, ) from polaris.competition import CompetitionSpecification -from polaris.dataset import Dataset, DatasetV1 +from polaris.dataset import Dataset, DatasetV1, DatasetV2 from polaris.evaluate import BenchmarkResults, CompetitionPredictions from polaris.experimental._benchmark_v2 import BenchmarkV2Specification -from polaris.experimental._dataset_v2 import DatasetV2 from polaris.hub.external_client import ExternalAuthClient from polaris.hub.oauth import CachedTokenAuth from polaris.hub.settings import PolarisHubSettings @@ -616,7 +615,7 @@ def _upload_v1_dataset( # Instead of directly uploading the data, we announce to the hub that we intend to upload it. # We do so separately for the Zarr archive and Parquet file. url = f"/v1/dataset/{dataset.artifact_id}" - response = self._base_request_to_hub( + self._base_request_to_hub( url=url, method="PUT", json={ @@ -850,8 +849,7 @@ def get_competition(self, artifact_id: str) -> CompetitionSpecification: """Load a competition from the Polaris Hub. Args: - owner: The owner of the competition. Can be either a user or organization from the Polaris Hub. - name: The name of the competition. + artifact_id: The artifact identifier for the competition Returns: A `CompetitionSpecification` instance, if it exists. @@ -898,7 +896,6 @@ def submit_competition_predictions( method="POST", json=prediction_payload, ) - response_data = response.json() # Log success and return submission response progress_indicator.update_success_msg( diff --git a/tests/conftest.py b/tests/conftest.py index 8bdbd35c..0c4b64cd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,9 +12,8 @@ SingleTaskBenchmarkSpecification, ) from polaris.competition import CompetitionSpecification -from polaris.dataset import ColumnAnnotation, DatasetFactory, DatasetV1 +from polaris.dataset import ColumnAnnotation, DatasetFactory, DatasetV1, DatasetV2 from polaris.dataset.converters import SDFConverter -from polaris.experimental._dataset_v2 import DatasetV2 from polaris.utils.types import HubOwner diff --git a/tests/test_dataset_v2.py b/tests/test_dataset_v2.py index 11b7caa7..177d5495 100644 --- a/tests/test_dataset_v2.py +++ b/tests/test_dataset_v2.py @@ -9,11 +9,11 @@ import zarr from pydantic import ValidationError -from polaris.dataset import Subset +from polaris.dataset import DatasetV2, Subset +from polaris.dataset._dataset_v2 import _INDEX_ARRAY_KEY from polaris.dataset._factory import DatasetFactory from polaris.dataset.converters._pdb import PDBConverter from polaris.dataset.zarr._manifest import generate_zarr_manifest -from polaris.experimental._dataset_v2 import _INDEX_ARRAY_KEY, DatasetV2 def test_dataset_v2_get_columns(test_dataset_v2):