diff --git a/examples/30_extended/tasks_tutorial.py b/examples/30_extended/tasks_tutorial.py index 4befe1a07..c755d265e 100644 --- a/examples/30_extended/tasks_tutorial.py +++ b/examples/30_extended/tasks_tutorial.py @@ -8,6 +8,7 @@ # License: BSD 3-Clause import openml +from openml.tasks import TaskType import pandas as pd ############################################################################ @@ -30,7 +31,7 @@ # # We will start by simply listing only *supervised classification* tasks: -tasks = openml.tasks.list_tasks(task_type_id=1) +tasks = openml.tasks.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION) ############################################################################ # **openml.tasks.list_tasks()** returns a dictionary of dictionaries by default, which we convert @@ -45,7 +46,9 @@ # As conversion to a pandas dataframe is a common task, we have added this functionality to the # OpenML-Python library which can be used by passing ``output_format='dataframe'``: -tasks_df = openml.tasks.list_tasks(task_type_id=1, output_format="dataframe") +tasks_df = openml.tasks.list_tasks( + task_type=TaskType.SUPERVISED_CLASSIFICATION, output_format="dataframe" +) print(tasks_df.head()) ############################################################################ @@ -155,7 +158,7 @@ # # Creating a task requires the following input: # -# * task_type_id: The task type ID, required (see below). Required. +# * task_type: The task type ID, required (see below). Required. # * dataset_id: The dataset ID. Required. # * target_name: The name of the attribute you aim to predict. Optional. # * estimation_procedure_id : The ID of the estimation procedure used to create train-test @@ -186,9 +189,8 @@ openml.config.start_using_configuration_for_example() try: - tasktypes = openml.tasks.TaskTypeEnum my_task = openml.tasks.create_task( - task_type_id=tasktypes.SUPERVISED_CLASSIFICATION, + task_type=TaskType.SUPERVISED_CLASSIFICATION, dataset_id=128, target_name="class", evaluation_measure="predictive_accuracy", diff --git a/examples/40_paper/2015_neurips_feurer_example.py b/examples/40_paper/2015_neurips_feurer_example.py index c68189784..733a436ad 100644 --- a/examples/40_paper/2015_neurips_feurer_example.py +++ b/examples/40_paper/2015_neurips_feurer_example.py @@ -58,7 +58,7 @@ # deactivated, which also deactivated the tasks on them. More information on active or inactive # datasets can be found in the `online docs `_. tasks = openml.tasks.list_tasks( - task_type_id=openml.tasks.TaskTypeEnum.SUPERVISED_CLASSIFICATION, + task_type=openml.tasks.TaskType.SUPERVISED_CLASSIFICATION, status="all", output_format="dataframe", ) diff --git a/openml/runs/functions.py b/openml/runs/functions.py index a3888d3a1..2b767eaa1 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -32,7 +32,7 @@ ) from .run import OpenMLRun from .trace import OpenMLRunTrace -from ..tasks import TaskTypeEnum, get_task +from ..tasks import TaskType, get_task # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: @@ -274,7 +274,7 @@ def run_flow_on_task( run.parameter_settings = flow.extension.obtain_parameter_values(flow) # now we need to attach the detailed evaluations - if task.task_type_id == TaskTypeEnum.LEARNING_CURVE: + if task.task_type_id == TaskType.LEARNING_CURVE: run.sample_evaluations = sample_evaluations else: run.fold_evaluations = fold_evaluations @@ -772,7 +772,7 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None): if "predictions" not in files and from_server is True: task = openml.tasks.get_task(task_id) - if task.task_type_id == TaskTypeEnum.SUBGROUP_DISCOVERY: + if task.task_type_id == TaskType.SUBGROUP_DISCOVERY: raise NotImplementedError("Subgroup discovery tasks are not yet supported.") else: # JvR: actually, I am not sure whether this error should be raised. @@ -1008,7 +1008,7 @@ def __list_runs(api_call, output_format="dict"): "setup_id": int(run_["oml:setup_id"]), "flow_id": int(run_["oml:flow_id"]), "uploader": int(run_["oml:uploader"]), - "task_type": int(run_["oml:task_type_id"]), + "task_type": TaskType(int(run_["oml:task_type_id"])), "upload_time": str(run_["oml:upload_time"]), "error_message": str((run_["oml:error_message"]) or ""), } diff --git a/openml/runs/run.py b/openml/runs/run.py index b8be9c3a3..0311272b2 100644 --- a/openml/runs/run.py +++ b/openml/runs/run.py @@ -16,7 +16,7 @@ from ..flows import get_flow from ..tasks import ( get_task, - TaskTypeEnum, + TaskType, OpenMLClassificationTask, OpenMLLearningCurveTask, OpenMLClusteringTask, @@ -401,17 +401,13 @@ def get_metric_fn(self, sklearn_fn, kwargs=None): attribute_names = [att[0] for att in predictions_arff["attributes"]] if ( - task.task_type_id - in [TaskTypeEnum.SUPERVISED_CLASSIFICATION, TaskTypeEnum.LEARNING_CURVE] + task.task_type_id in [TaskType.SUPERVISED_CLASSIFICATION, TaskType.LEARNING_CURVE] and "correct" not in attribute_names ): raise ValueError('Attribute "correct" should be set for ' "classification task runs") - if ( - task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION - and "truth" not in attribute_names - ): + if task.task_type_id == TaskType.SUPERVISED_REGRESSION and "truth" not in attribute_names: raise ValueError('Attribute "truth" should be set for ' "regression task runs") - if task.task_type_id != TaskTypeEnum.CLUSTERING and "prediction" not in attribute_names: + if task.task_type_id != TaskType.CLUSTERING and "prediction" not in attribute_names: raise ValueError('Attribute "predict" should be set for ' "supervised task runs") def _attribute_list_to_dict(attribute_list): @@ -431,11 +427,11 @@ def _attribute_list_to_dict(attribute_list): predicted_idx = attribute_dict["prediction"] # Assume supervised task if ( - task.task_type_id == TaskTypeEnum.SUPERVISED_CLASSIFICATION - or task.task_type_id == TaskTypeEnum.LEARNING_CURVE + task.task_type_id == TaskType.SUPERVISED_CLASSIFICATION + or task.task_type_id == TaskType.LEARNING_CURVE ): correct_idx = attribute_dict["correct"] - elif task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION: + elif task.task_type_id == TaskType.SUPERVISED_REGRESSION: correct_idx = attribute_dict["truth"] has_samples = False if "sample" in attribute_dict: @@ -465,14 +461,14 @@ def _attribute_list_to_dict(attribute_list): samp = 0 # No learning curve sample, always 0 if task.task_type_id in [ - TaskTypeEnum.SUPERVISED_CLASSIFICATION, - TaskTypeEnum.LEARNING_CURVE, + TaskType.SUPERVISED_CLASSIFICATION, + TaskType.LEARNING_CURVE, ]: prediction = predictions_arff["attributes"][predicted_idx][1].index( line[predicted_idx] ) correct = predictions_arff["attributes"][predicted_idx][1].index(line[correct_idx]) - elif task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION: + elif task.task_type_id == TaskType.SUPERVISED_REGRESSION: prediction = line[predicted_idx] correct = line[correct_idx] if rep not in values_predict: diff --git a/openml/tasks/__init__.py b/openml/tasks/__init__.py index f5e046f37..cba0aa14f 100644 --- a/openml/tasks/__init__.py +++ b/openml/tasks/__init__.py @@ -7,7 +7,7 @@ OpenMLRegressionTask, OpenMLClusteringTask, OpenMLLearningCurveTask, - TaskTypeEnum, + TaskType, ) from .split import OpenMLSplit from .functions import ( @@ -29,5 +29,5 @@ "get_tasks", "list_tasks", "OpenMLSplit", - "TaskTypeEnum", + "TaskType", ] diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index a82ce4a12..f775f5e10 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -15,7 +15,7 @@ OpenMLClassificationTask, OpenMLClusteringTask, OpenMLLearningCurveTask, - TaskTypeEnum, + TaskType, OpenMLRegressionTask, OpenMLSupervisedTask, OpenMLTask, @@ -109,7 +109,7 @@ def _get_estimation_procedure_list(): procs.append( { "id": int(proc_["oml:id"]), - "task_type_id": int(proc_["oml:ttid"]), + "task_type_id": TaskType(int(proc_["oml:ttid"])), "name": proc_["oml:name"], "type": proc_["oml:type"], } @@ -119,7 +119,7 @@ def _get_estimation_procedure_list(): def list_tasks( - task_type_id: Optional[int] = None, + task_type: Optional[TaskType] = None, offset: Optional[int] = None, size: Optional[int] = None, tag: Optional[str] = None, @@ -127,14 +127,14 @@ def list_tasks( **kwargs ) -> Union[Dict, pd.DataFrame]: """ - Return a number of tasks having the given tag and task_type_id + Return a number of tasks having the given tag and task_type Parameters ---------- - Filter task_type_id is separated from the other filters because - it is used as task_type_id in the task description, but it is named + Filter task_type is separated from the other filters because + it is used as task_type in the task description, but it is named type when used as a filter in list tasks call. - task_type_id : int, optional + task_type : TaskType, optional ID of the task type as detailed `here `_. - Supervised classification: 1 - Supervised regression: 2 @@ -162,12 +162,12 @@ def list_tasks( Returns ------- dict - All tasks having the given task_type_id and the give tag. Every task is + All tasks having the given task_type and the give tag. Every task is represented by a dictionary containing the following information: task id, dataset id, task_type and status. If qualities are calculated for the associated dataset, some of these are also returned. dataframe - All tasks having the given task_type_id and the give tag. Every task is + All tasks having the given task_type and the give tag. Every task is represented by a row in the data frame containing the following information as columns: task id, dataset id, task_type and status. If qualities are calculated for the associated dataset, some of these are also returned. @@ -179,7 +179,7 @@ def list_tasks( return openml.utils._list_all( output_format=output_format, listing_call=_list_tasks, - task_type_id=task_type_id, + task_type=task_type, offset=offset, size=size, tag=tag, @@ -187,15 +187,15 @@ def list_tasks( ) -def _list_tasks(task_type_id=None, output_format="dict", **kwargs): +def _list_tasks(task_type=None, output_format="dict", **kwargs): """ Perform the api call to return a number of tasks having the given filters. Parameters ---------- - Filter task_type_id is separated from the other filters because - it is used as task_type_id in the task description, but it is named + Filter task_type is separated from the other filters because + it is used as task_type in the task description, but it is named type when used as a filter in list tasks call. - task_type_id : int, optional + task_type : TaskType, optional ID of the task type as detailed `here `_. - Supervised classification: 1 @@ -220,8 +220,8 @@ def _list_tasks(task_type_id=None, output_format="dict", **kwargs): dict or dataframe """ api_call = "task/list" - if task_type_id is not None: - api_call += "/type/%d" % int(task_type_id) + if task_type is not None: + api_call += "/type/%d" % task_type.value if kwargs is not None: for operator, value in kwargs.items(): if operator == "task_id": @@ -259,7 +259,7 @@ def __list_tasks(api_call, output_format="dict"): tid = int(task_["oml:task_id"]) task = { "tid": tid, - "ttid": int(task_["oml:task_type_id"]), + "ttid": TaskType(int(task_["oml:task_type_id"])), "did": int(task_["oml:did"]), "name": task_["oml:name"], "task_type": task_["oml:task_type"], @@ -417,18 +417,18 @@ def _create_task_from_xml(xml): "oml:evaluation_measure" ] - task_type_id = int(dic["oml:task_type_id"]) + task_type = TaskType(int(dic["oml:task_type_id"])) common_kwargs = { "task_id": dic["oml:task_id"], "task_type": dic["oml:task_type"], - "task_type_id": dic["oml:task_type_id"], + "task_type_id": task_type, "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"], "evaluation_measure": evaluation_measures, } - if task_type_id in ( - TaskTypeEnum.SUPERVISED_CLASSIFICATION, - TaskTypeEnum.SUPERVISED_REGRESSION, - TaskTypeEnum.LEARNING_CURVE, + if task_type in ( + TaskType.SUPERVISED_CLASSIFICATION, + TaskType.SUPERVISED_REGRESSION, + TaskType.LEARNING_CURVE, ): # Convert some more parameters for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][ @@ -448,18 +448,18 @@ def _create_task_from_xml(xml): ]["oml:data_splits_url"] cls = { - TaskTypeEnum.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask, - TaskTypeEnum.SUPERVISED_REGRESSION: OpenMLRegressionTask, - TaskTypeEnum.CLUSTERING: OpenMLClusteringTask, - TaskTypeEnum.LEARNING_CURVE: OpenMLLearningCurveTask, - }.get(task_type_id) + TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask, + TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask, + TaskType.CLUSTERING: OpenMLClusteringTask, + TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, + }.get(task_type) if cls is None: raise NotImplementedError("Task type %s not supported." % common_kwargs["task_type"]) return cls(**common_kwargs) def create_task( - task_type_id: int, + task_type: TaskType, dataset_id: int, estimation_procedure_id: int, target_name: Optional[str] = None, @@ -480,7 +480,7 @@ def create_task( Parameters ---------- - task_type_id : int + task_type : TaskType Id of the task type. dataset_id : int The id of the dataset for the task. @@ -501,17 +501,17 @@ def create_task( OpenMLLearningCurveTask, OpenMLClusteringTask """ task_cls = { - TaskTypeEnum.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask, - TaskTypeEnum.SUPERVISED_REGRESSION: OpenMLRegressionTask, - TaskTypeEnum.CLUSTERING: OpenMLClusteringTask, - TaskTypeEnum.LEARNING_CURVE: OpenMLLearningCurveTask, - }.get(task_type_id) + TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask, + TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask, + TaskType.CLUSTERING: OpenMLClusteringTask, + TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, + }.get(task_type) if task_cls is None: - raise NotImplementedError("Task type {0:d} not supported.".format(task_type_id)) + raise NotImplementedError("Task type {0:d} not supported.".format(task_type)) else: return task_cls( - task_type_id=task_type_id, + task_type_id=task_type, task_type=None, data_set_id=dataset_id, target_name=target_name, diff --git a/openml/tasks/task.py b/openml/tasks/task.py index b5d95d6d1..ab54db780 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -2,6 +2,7 @@ from abc import ABC from collections import OrderedDict +from enum import Enum import io import os from typing import Union, Tuple, Dict, List, Optional, Any @@ -18,12 +19,24 @@ from ..utils import _create_cache_directory_for_id +class TaskType(Enum): + SUPERVISED_CLASSIFICATION = 1 + SUPERVISED_REGRESSION = 2 + LEARNING_CURVE = 3 + SUPERVISED_DATASTREAM_CLASSIFICATION = 4 + CLUSTERING = 5 + MACHINE_LEARNING_CHALLENGE = 6 + SURVIVAL_ANALYSIS = 7 + SUBGROUP_DISCOVERY = 8 + MULTITASK_REGRESSION = 9 + + class OpenMLTask(OpenMLBase): """OpenML Task object. Parameters ---------- - task_type_id : int + task_type_id : TaskType Refers to the type of task. task_type : str Refers to the task. @@ -36,7 +49,7 @@ class OpenMLTask(OpenMLBase): def __init__( self, task_id: Optional[int], - task_type_id: int, + task_type_id: TaskType, task_type: str, data_set_id: int, estimation_procedure_id: int = 1, @@ -47,7 +60,7 @@ def __init__( ): self.task_id = int(task_id) if task_id is not None else None - self.task_type_id = int(task_type_id) + self.task_type_id = task_type_id self.task_type = task_type self.dataset_id = int(data_set_id) self.evaluation_measure = evaluation_measure @@ -155,10 +168,10 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]": task_container = OrderedDict() # type: OrderedDict[str, OrderedDict] task_dict = OrderedDict( [("@xmlns:oml", "http://openml.org/openml")] - ) # type: OrderedDict[str, Union[List, str, int]] + ) # type: OrderedDict[str, Union[List, str, TaskType]] task_container["oml:task_inputs"] = task_dict - task_dict["oml:task_type_id"] = self.task_type_id + task_dict["oml:task_type_id"] = self.task_type_id.value # having task_inputs and adding a type annotation # solves wrong warnings @@ -196,7 +209,7 @@ class OpenMLSupervisedTask(OpenMLTask, ABC): def __init__( self, - task_type_id: int, + task_type_id: TaskType, task_type: str, data_set_id: int, target_name: str, @@ -240,7 +253,11 @@ def get_X_and_y( """ dataset = self.get_dataset() - if self.task_type_id not in (1, 2, 3): + if self.task_type_id not in ( + TaskType.SUPERVISED_CLASSIFICATION, + TaskType.SUPERVISED_REGRESSION, + TaskType.LEARNING_CURVE, + ): raise NotImplementedError(self.task_type) X, y, _, _ = dataset.get_data(dataset_format=dataset_format, target=self.target_name,) return X, y @@ -286,7 +303,7 @@ class OpenMLClassificationTask(OpenMLSupervisedTask): def __init__( self, - task_type_id: int, + task_type_id: TaskType, task_type: str, data_set_id: int, target_name: str, @@ -327,7 +344,7 @@ class OpenMLRegressionTask(OpenMLSupervisedTask): def __init__( self, - task_type_id: int, + task_type_id: TaskType, task_type: str, data_set_id: int, target_name: str, @@ -366,7 +383,7 @@ class OpenMLClusteringTask(OpenMLTask): def __init__( self, - task_type_id: int, + task_type_id: TaskType, task_type: str, data_set_id: int, estimation_procedure_id: int = 17, @@ -440,7 +457,7 @@ class OpenMLLearningCurveTask(OpenMLClassificationTask): def __init__( self, - task_type_id: int, + task_type_id: TaskType, task_type: str, data_set_id: int, target_name: str, @@ -467,14 +484,3 @@ def __init__( class_labels=class_labels, cost_matrix=cost_matrix, ) - - -class TaskTypeEnum(object): - SUPERVISED_CLASSIFICATION = 1 - SUPERVISED_REGRESSION = 2 - LEARNING_CURVE = 3 - SUPERVISED_DATASTREAM_CLASSIFICATION = 4 - CLUSTERING = 5 - MACHINE_LEARNING_CHALLENGE = 6 - SURVIVAL_ANALYSIS = 7 - SUBGROUP_DISCOVERY = 8 diff --git a/openml/testing.py b/openml/testing.py index e4338effd..0b4c50972 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -17,7 +17,7 @@ from oslo_concurrency import lockutils import openml -from openml.tasks import TaskTypeEnum +from openml.tasks import TaskType import logging @@ -199,7 +199,7 @@ def _check_fold_timing_evaluations( num_repeats: int, num_folds: int, max_time_allowed: float = 60000.0, - task_type: int = TaskTypeEnum.SUPERVISED_CLASSIFICATION, + task_type: TaskType = TaskType.SUPERVISED_CLASSIFICATION, check_scores: bool = True, ): """ @@ -225,9 +225,9 @@ def _check_fold_timing_evaluations( } if check_scores: - if task_type in (TaskTypeEnum.SUPERVISED_CLASSIFICATION, TaskTypeEnum.LEARNING_CURVE): + if task_type in (TaskType.SUPERVISED_CLASSIFICATION, TaskType.LEARNING_CURVE): check_measures["predictive_accuracy"] = (0, 1.0) - elif task_type == TaskTypeEnum.SUPERVISED_REGRESSION: + elif task_type == TaskType.SUPERVISED_REGRESSION: check_measures["mean_absolute_error"] = (0, float("inf")) self.assertIsInstance(fold_evaluations, dict) diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index fc53ea366..dcc7b0b96 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -24,7 +24,7 @@ from openml.testing import TestBase, SimpleImputer from openml.runs.functions import _run_task_get_arffcontent, run_exists, format_prediction from openml.runs.trace import OpenMLRunTrace -from openml.tasks import TaskTypeEnum +from openml.tasks import TaskType from sklearn.naive_bayes import GaussianNB from sklearn.model_selection._search import BaseSearchCV @@ -391,7 +391,7 @@ def _run_and_upload( seed=1, metric=sklearn.metrics.accuracy_score, metric_name="predictive_accuracy", - task_type=TaskTypeEnum.SUPERVISED_CLASSIFICATION, + task_type=TaskType.SUPERVISED_CLASSIFICATION, sentinel=None, ): def determine_grid_size(param_grid): @@ -476,7 +476,7 @@ def _run_and_upload_classification( num_iterations = 5 # for base search algorithms metric = sklearn.metrics.accuracy_score # metric class metric_name = "predictive_accuracy" # openml metric name - task_type = TaskTypeEnum.SUPERVISED_CLASSIFICATION # task type + task_type = TaskType.SUPERVISED_CLASSIFICATION # task type return self._run_and_upload( clf=clf, @@ -499,7 +499,7 @@ def _run_and_upload_regression( num_iterations = 5 # for base search algorithms metric = sklearn.metrics.mean_absolute_error # metric class metric_name = "mean_absolute_error" # openml metric name - task_type = TaskTypeEnum.SUPERVISED_REGRESSION # task type + task_type = TaskType.SUPERVISED_REGRESSION # task type return self._run_and_upload( clf=clf, @@ -1098,7 +1098,7 @@ def test__run_task_get_arffcontent(self): # trace. SGD does not produce any self.assertIsInstance(trace, type(None)) - task_type = TaskTypeEnum.SUPERVISED_CLASSIFICATION + task_type = TaskType.SUPERVISED_CLASSIFICATION self._check_fold_timing_evaluations( fold_evaluations, num_repeats, num_folds, task_type=task_type ) diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py index b19be7017..4f03f8bff 100644 --- a/tests/test_tasks/test_classification_task.py +++ b/tests/test_tasks/test_classification_task.py @@ -2,7 +2,7 @@ import numpy as np -from openml.tasks import get_task +from openml.tasks import TaskType, get_task from .test_supervised_task import OpenMLSupervisedTaskTest @@ -14,7 +14,7 @@ def setUp(self, n_levels: int = 1): super(OpenMLClassificationTaskTest, self).setUp() self.task_id = 119 - self.task_type_id = 1 + self.task_type = TaskType.SUPERVISED_CLASSIFICATION self.estimation_procedure = 1 def test_get_X_and_Y(self): @@ -30,7 +30,7 @@ def test_download_task(self): task = super(OpenMLClassificationTaskTest, self).test_download_task() self.assertEqual(task.task_id, self.task_id) - self.assertEqual(task.task_type_id, 1) + self.assertEqual(task.task_type_id, TaskType.SUPERVISED_CLASSIFICATION) self.assertEqual(task.dataset_id, 20) def test_class_labels(self): diff --git a/tests/test_tasks/test_clustering_task.py b/tests/test_tasks/test_clustering_task.py index e46369802..c5a7a3829 100644 --- a/tests/test_tasks/test_clustering_task.py +++ b/tests/test_tasks/test_clustering_task.py @@ -1,6 +1,7 @@ # License: BSD 3-Clause import openml +from openml.tasks import TaskType from openml.testing import TestBase from .test_task import OpenMLTaskTest from openml.exceptions import OpenMLServerException @@ -14,7 +15,7 @@ def setUp(self, n_levels: int = 1): super(OpenMLClusteringTaskTest, self).setUp() self.task_id = 146714 - self.task_type_id = 5 + self.task_type = TaskType.CLUSTERING self.estimation_procedure = 17 def test_get_dataset(self): @@ -28,7 +29,7 @@ def test_download_task(self): openml.config.server = self.production_server task = super(OpenMLClusteringTaskTest, self).test_download_task() self.assertEqual(task.task_id, self.task_id) - self.assertEqual(task.task_type_id, 5) + self.assertEqual(task.task_type_id, TaskType.CLUSTERING) self.assertEqual(task.dataset_id, 36) def test_upload_task(self): @@ -38,7 +39,7 @@ def test_upload_task(self): dataset_id = compatible_datasets[i % len(compatible_datasets)] # Upload a clustering task without a ground truth. task = openml.tasks.create_task( - task_type_id=self.task_type_id, + task_type=self.task_type, dataset_id=dataset_id, estimation_procedure_id=self.estimation_procedure, ) @@ -59,5 +60,5 @@ def test_upload_task(self): raise e else: raise ValueError( - "Could not create a valid task for task type ID {}".format(self.task_type_id) + "Could not create a valid task for task type ID {}".format(self.task_type) ) diff --git a/tests/test_tasks/test_learning_curve_task.py b/tests/test_tasks/test_learning_curve_task.py index b8e156ee6..9f0157187 100644 --- a/tests/test_tasks/test_learning_curve_task.py +++ b/tests/test_tasks/test_learning_curve_task.py @@ -2,7 +2,7 @@ import numpy as np -from openml.tasks import get_task +from openml.tasks import TaskType, get_task from .test_supervised_task import OpenMLSupervisedTaskTest @@ -14,7 +14,7 @@ def setUp(self, n_levels: int = 1): super(OpenMLLearningCurveTaskTest, self).setUp() self.task_id = 801 - self.task_type_id = 3 + self.task_type = TaskType.LEARNING_CURVE self.estimation_procedure = 13 def test_get_X_and_Y(self): @@ -30,7 +30,7 @@ def test_download_task(self): task = super(OpenMLLearningCurveTaskTest, self).test_download_task() self.assertEqual(task.task_id, self.task_id) - self.assertEqual(task.task_type_id, 3) + self.assertEqual(task.task_type_id, TaskType.LEARNING_CURVE) self.assertEqual(task.dataset_id, 20) def test_class_labels(self): diff --git a/tests/test_tasks/test_regression_task.py b/tests/test_tasks/test_regression_task.py index fbb3ff607..e751e63b5 100644 --- a/tests/test_tasks/test_regression_task.py +++ b/tests/test_tasks/test_regression_task.py @@ -2,6 +2,7 @@ import numpy as np +from openml.tasks import TaskType from .test_supervised_task import OpenMLSupervisedTaskTest @@ -13,7 +14,7 @@ def setUp(self, n_levels: int = 1): super(OpenMLRegressionTaskTest, self).setUp() self.task_id = 625 - self.task_type_id = 2 + self.task_type = TaskType.SUPERVISED_REGRESSION self.estimation_procedure = 7 def test_get_X_and_Y(self): @@ -29,5 +30,5 @@ def test_download_task(self): task = super(OpenMLRegressionTaskTest, self).test_download_task() self.assertEqual(task.task_id, self.task_id) - self.assertEqual(task.task_type_id, 2) + self.assertEqual(task.task_type_id, TaskType.SUPERVISED_REGRESSION) self.assertEqual(task.dataset_id, 105) diff --git a/tests/test_tasks/test_task.py b/tests/test_tasks/test_task.py index ae92f12ad..318785991 100644 --- a/tests/test_tasks/test_task.py +++ b/tests/test_tasks/test_task.py @@ -10,7 +10,7 @@ get_dataset, list_datasets, ) -from openml.tasks import create_task, get_task +from openml.tasks import TaskType, create_task, get_task class OpenMLTaskTest(TestBase): @@ -47,7 +47,7 @@ def test_upload_task(self): dataset_id = compatible_datasets[i % len(compatible_datasets)] # TODO consider implementing on the diff task types. task = create_task( - task_type_id=self.task_type_id, + task_type=self.task_type, dataset_id=dataset_id, target_name=self._get_random_feature(dataset_id), estimation_procedure_id=self.estimation_procedure, @@ -70,7 +70,7 @@ def test_upload_task(self): raise e else: raise ValueError( - "Could not create a valid task for task type ID {}".format(self.task_type_id) + "Could not create a valid task for task type ID {}".format(self.task_type) ) def _get_compatible_rand_dataset(self) -> List: @@ -81,13 +81,13 @@ def _get_compatible_rand_dataset(self) -> List: # depending on the task type, find either datasets # with only symbolic features or datasets with only # numerical features. - if self.task_type_id == 2: + if self.task_type == TaskType.SUPERVISED_REGRESSION: # regression task for dataset_id, dataset_info in active_datasets.items(): if "NumberOfSymbolicFeatures" in dataset_info: if dataset_info["NumberOfSymbolicFeatures"] == 0: compatible_datasets.append(dataset_id) - elif self.task_type_id == 5: + elif self.task_type == TaskType.CLUSTERING: # clustering task compatible_datasets = list(active_datasets.keys()) else: @@ -114,7 +114,7 @@ def _get_random_feature(self, dataset_id: int) -> str: while True: random_feature_index = randint(0, len(random_dataset.features) - 1) random_feature = random_dataset.features[random_feature_index] - if self.task_type_id == 2: + if self.task_type == TaskType.SUPERVISED_REGRESSION: if random_feature.data_type == "numeric": break else: diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py index ec62c953a..5f9b65495 100644 --- a/tests/test_tasks/test_task_functions.py +++ b/tests/test_tasks/test_task_functions.py @@ -3,6 +3,7 @@ import os from unittest import mock +from openml.tasks import TaskType from openml.testing import TestBase from openml import OpenMLSplit, OpenMLTask from openml.exceptions import OpenMLCacheException @@ -45,12 +46,14 @@ def test__get_estimation_procedure_list(self): estimation_procedures = openml.tasks.functions._get_estimation_procedure_list() self.assertIsInstance(estimation_procedures, list) self.assertIsInstance(estimation_procedures[0], dict) - self.assertEqual(estimation_procedures[0]["task_type_id"], 1) + self.assertEqual( + estimation_procedures[0]["task_type_id"], TaskType.SUPERVISED_CLASSIFICATION + ) def test_list_clustering_task(self): # as shown by #383, clustering tasks can give list/dict casting problems openml.config.server = self.production_server - openml.tasks.list_tasks(task_type_id=5, size=10) + openml.tasks.list_tasks(task_type=TaskType.CLUSTERING, size=10) # the expected outcome is that it doesn't crash. No assertions. def _check_task(self, task): @@ -64,16 +67,16 @@ def _check_task(self, task): def test_list_tasks_by_type(self): num_curves_tasks = 200 # number is flexible, check server if fails - ttid = 3 - tasks = openml.tasks.list_tasks(task_type_id=ttid) + ttid = TaskType.LEARNING_CURVE + tasks = openml.tasks.list_tasks(task_type=ttid) self.assertGreaterEqual(len(tasks), num_curves_tasks) for tid in tasks: self.assertEqual(ttid, tasks[tid]["ttid"]) self._check_task(tasks[tid]) def test_list_tasks_output_format(self): - ttid = 3 - tasks = openml.tasks.list_tasks(task_type_id=ttid, output_format="dataframe") + ttid = TaskType.LEARNING_CURVE + tasks = openml.tasks.list_tasks(task_type=ttid, output_format="dataframe") self.assertIsInstance(tasks, pd.DataFrame) self.assertGreater(len(tasks), 100) @@ -109,10 +112,14 @@ def test_list_tasks_paginate(self): def test_list_tasks_per_type_paginate(self): size = 10 max = 100 - task_types = 4 - for j in range(1, task_types): + task_types = [ + TaskType.SUPERVISED_CLASSIFICATION, + TaskType.SUPERVISED_REGRESSION, + TaskType.LEARNING_CURVE, + ] + for j in task_types: for i in range(0, max, size): - tasks = openml.tasks.list_tasks(task_type_id=j, offset=i, size=size) + tasks = openml.tasks.list_tasks(task_type=j, offset=i, size=size) self.assertGreaterEqual(size, len(tasks)) for tid in tasks: self.assertEqual(j, tasks[tid]["ttid"])