openml · PGijsbers · Oct 5, 2020 · Oct 2, 2020 · Oct 2, 2020 · Oct 3, 2020
diff --git a/examples/30_extended/tasks_tutorial.py b/examples/30_extended/tasks_tutorial.py
@@ -8,6 +8,7 @@
 # License: BSD 3-Clause
 
 import openml
+from openml.tasks import TaskType
 import pandas as pd
 
 ############################################################################
@@ -30,7 +31,7 @@
 #
 # We will start by simply listing only *supervised classification* tasks:
 
-tasks = openml.tasks.list_tasks(task_type_id=1)
+tasks = openml.tasks.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION)
 
 ############################################################################
 # **openml.tasks.list_tasks()** returns a dictionary of dictionaries by default, which we convert
@@ -45,7 +46,9 @@
 
 # As conversion to a pandas dataframe is a common task, we have added this functionality to the
 # OpenML-Python library which can be used by passing ``output_format='dataframe'``:
-tasks_df = openml.tasks.list_tasks(task_type_id=1, output_format="dataframe")
+tasks_df = openml.tasks.list_tasks(
+    task_type=TaskType.SUPERVISED_CLASSIFICATION, output_format="dataframe"
+)
 print(tasks_df.head())
 
 ############################################################################
@@ -155,7 +158,7 @@
 #
 # Creating a task requires the following input:
 #
-# * task_type_id: The task type ID, required (see below). Required.
+# * task_type: The task type ID, required (see below). Required.
 # * dataset_id: The dataset ID. Required.
 # * target_name: The name of the attribute you aim to predict. Optional.
 # * estimation_procedure_id : The ID of the estimation procedure used to create train-test
@@ -186,9 +189,8 @@
 openml.config.start_using_configuration_for_example()
 
 try:
-    tasktypes = openml.tasks.TaskTypeEnum
     my_task = openml.tasks.create_task(
-        task_type_id=tasktypes.SUPERVISED_CLASSIFICATION,
+        task_type=TaskType.SUPERVISED_CLASSIFICATION,
         dataset_id=128,
         target_name="class",
         evaluation_measure="predictive_accuracy",

diff --git a/examples/40_paper/2015_neurips_feurer_example.py b/examples/40_paper/2015_neurips_feurer_example.py
@@ -58,7 +58,7 @@
 # deactivated, which also deactivated the tasks on them. More information on active or inactive
 # datasets can be found in the `online docs <https://docs.openml.org/#dataset-status>`_.
 tasks = openml.tasks.list_tasks(
-    task_type_id=openml.tasks.TaskTypeEnum.SUPERVISED_CLASSIFICATION,
+    task_type=openml.tasks.TaskType.SUPERVISED_CLASSIFICATION,
     status="all",
     output_format="dataframe",
 )

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -32,7 +32,7 @@
 )
 from .run import OpenMLRun
 from .trace import OpenMLRunTrace
-from ..tasks import TaskTypeEnum, get_task
+from ..tasks import TaskType, get_task
 
 # Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
 if TYPE_CHECKING:
@@ -274,7 +274,7 @@ def run_flow_on_task(
         run.parameter_settings = flow.extension.obtain_parameter_values(flow)
 
     # now we need to attach the detailed evaluations
-    if task.task_type_id == TaskTypeEnum.LEARNING_CURVE:
+    if task.task_type_id == TaskType.LEARNING_CURVE:
         run.sample_evaluations = sample_evaluations
     else:
         run.fold_evaluations = fold_evaluations
@@ -772,7 +772,7 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):
 
     if "predictions" not in files and from_server is True:
         task = openml.tasks.get_task(task_id)
-        if task.task_type_id == TaskTypeEnum.SUBGROUP_DISCOVERY:
+        if task.task_type_id == TaskType.SUBGROUP_DISCOVERY:
             raise NotImplementedError("Subgroup discovery tasks are not yet supported.")
         else:
             # JvR: actually, I am not sure whether this error should be raised.
@@ -1008,7 +1008,7 @@ def __list_runs(api_call, output_format="dict"):
             "setup_id": int(run_["oml:setup_id"]),
             "flow_id": int(run_["oml:flow_id"]),
             "uploader": int(run_["oml:uploader"]),
-            "task_type": int(run_["oml:task_type_id"]),
+            "task_type": TaskType(int(run_["oml:task_type_id"])),
             "upload_time": str(run_["oml:upload_time"]),
             "error_message": str((run_["oml:error_message"]) or ""),
         }

diff --git a/openml/runs/run.py b/openml/runs/run.py
@@ -16,7 +16,7 @@
 from ..flows import get_flow
 from ..tasks import (
     get_task,
-    TaskTypeEnum,
+    TaskType,
     OpenMLClassificationTask,
     OpenMLLearningCurveTask,
     OpenMLClusteringTask,
@@ -401,17 +401,13 @@ def get_metric_fn(self, sklearn_fn, kwargs=None):
 
         attribute_names = [att[0] for att in predictions_arff["attributes"]]
         if (
-            task.task_type_id
-            in [TaskTypeEnum.SUPERVISED_CLASSIFICATION, TaskTypeEnum.LEARNING_CURVE]
+            task.task_type_id in [TaskType.SUPERVISED_CLASSIFICATION, TaskType.LEARNING_CURVE]
             and "correct" not in attribute_names
         ):
             raise ValueError('Attribute "correct" should be set for ' "classification task runs")
-        if (
-            task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION
-            and "truth" not in attribute_names
-        ):
+        if task.task_type_id == TaskType.SUPERVISED_REGRESSION and "truth" not in attribute_names:
             raise ValueError('Attribute "truth" should be set for ' "regression task runs")
-        if task.task_type_id != TaskTypeEnum.CLUSTERING and "prediction" not in attribute_names:
+        if task.task_type_id != TaskType.CLUSTERING and "prediction" not in attribute_names:
             raise ValueError('Attribute "predict" should be set for ' "supervised task runs")
 
         def _attribute_list_to_dict(attribute_list):
@@ -431,11 +427,11 @@ def _attribute_list_to_dict(attribute_list):
         predicted_idx = attribute_dict["prediction"]  # Assume supervised task
 
         if (
-            task.task_type_id == TaskTypeEnum.SUPERVISED_CLASSIFICATION
-            or task.task_type_id == TaskTypeEnum.LEARNING_CURVE
+            task.task_type_id == TaskType.SUPERVISED_CLASSIFICATION
+            or task.task_type_id == TaskType.LEARNING_CURVE
         ):
             correct_idx = attribute_dict["correct"]
-        elif task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION:
+        elif task.task_type_id == TaskType.SUPERVISED_REGRESSION:
             correct_idx = attribute_dict["truth"]
         has_samples = False
         if "sample" in attribute_dict:
@@ -465,14 +461,14 @@ def _attribute_list_to_dict(attribute_list):
                 samp = 0  # No learning curve sample, always 0
 
             if task.task_type_id in [
-                TaskTypeEnum.SUPERVISED_CLASSIFICATION,
-                TaskTypeEnum.LEARNING_CURVE,
+                TaskType.SUPERVISED_CLASSIFICATION,
+                TaskType.LEARNING_CURVE,
             ]:
                 prediction = predictions_arff["attributes"][predicted_idx][1].index(
                     line[predicted_idx]
                 )
                 correct = predictions_arff["attributes"][predicted_idx][1].index(line[correct_idx])
-            elif task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION:
+            elif task.task_type_id == TaskType.SUPERVISED_REGRESSION:
                 prediction = line[predicted_idx]
                 correct = line[correct_idx]
             if rep not in values_predict:

diff --git a/openml/tasks/__init__.py b/openml/tasks/__init__.py
@@ -7,7 +7,7 @@
     OpenMLRegressionTask,
     OpenMLClusteringTask,
     OpenMLLearningCurveTask,
-    TaskTypeEnum,
+    TaskType,
 )
 from .split import OpenMLSplit
 from .functions import (
@@ -29,5 +29,5 @@
     "get_tasks",
     "list_tasks",
     "OpenMLSplit",
-    "TaskTypeEnum",
+    "TaskType",
 ]
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
@@ -15,7 +15,7 @@
     OpenMLClassificationTask,
     OpenMLClusteringTask,
     OpenMLLearningCurveTask,
-    TaskTypeEnum,
+    TaskType,
     OpenMLRegressionTask,
     OpenMLSupervisedTask,
     OpenMLTask,
@@ -109,7 +109,7 @@ def _get_estimation_procedure_list():
         procs.append(
             {
                 "id": int(proc_["oml:id"]),
-                "task_type_id": int(proc_["oml:ttid"]),
+                "task_type_id": TaskType(int(proc_["oml:ttid"])),
                 "name": proc_["oml:name"],
                 "type": proc_["oml:type"],
             }
@@ -119,22 +119,22 @@ def _get_estimation_procedure_list():
 
 
 def list_tasks(
-    task_type_id: Optional[int] = None,
+    task_type: Optional[TaskType] = None,
     offset: Optional[int] = None,
     size: Optional[int] = None,
     tag: Optional[str] = None,
     output_format: str = "dict",
     **kwargs
 ) -> Union[Dict, pd.DataFrame]:
     """
-    Return a number of tasks having the given tag and task_type_id
+    Return a number of tasks having the given tag and task_type
 
     Parameters
     ----------
-    Filter task_type_id is separated from the other filters because
-    it is used as task_type_id in the task description, but it is named
+    Filter task_type is separated from the other filters because
+    it is used as task_type in the task description, but it is named
     type when used as a filter in list tasks call.
-    task_type_id : int, optional
+    task_type : TaskType, optional
         ID of the task type as detailed `here <https://www.openml.org/search?type=task_type>`_.
         - Supervised classification: 1
         - Supervised regression: 2
@@ -162,12 +162,12 @@ def list_tasks(
     Returns
     -------
     dict
-        All tasks having the given task_type_id and the give tag. Every task is
+        All tasks having the given task_type and the give tag. Every task is
         represented by a dictionary containing the following information:
         task id, dataset id, task_type and status. If qualities are calculated
         for the associated dataset, some of these are also returned.
     dataframe
-        All tasks having the given task_type_id and the give tag. Every task is
+        All tasks having the given task_type and the give tag. Every task is
         represented by a row in the data frame containing the following information
         as columns: task id, dataset id, task_type and status. If qualities are
         calculated for the associated dataset, some of these are also returned.
@@ -179,23 +179,23 @@ def list_tasks(
     return openml.utils._list_all(
         output_format=output_format,
         listing_call=_list_tasks,
-        task_type_id=task_type_id,
+        task_type=task_type,
         offset=offset,
         size=size,
         tag=tag,
         **kwargs
     )
 
 
-def _list_tasks(task_type_id=None, output_format="dict", **kwargs):
+def _list_tasks(task_type=None, output_format="dict", **kwargs):
     """
     Perform the api call to return a number of tasks having the given filters.
     Parameters
     ----------
-    Filter task_type_id is separated from the other filters because
-    it is used as task_type_id in the task description, but it is named
+    Filter task_type is separated from the other filters because
+    it is used as task_type in the task description, but it is named
     type when used as a filter in list tasks call.
-    task_type_id : int, optional
+    task_type : TaskType, optional
         ID of the task type as detailed
         `here <https://www.openml.org/search?type=task_type>`_.
         - Supervised classification: 1
@@ -220,8 +220,8 @@ def _list_tasks(task_type_id=None, output_format="dict", **kwargs):
     dict or dataframe
     """
     api_call = "task/list"
-    if task_type_id is not None:
-        api_call += "/type/%d" % int(task_type_id)
+    if task_type is not None:
+        api_call += "/type/%d" % task_type.value
     if kwargs is not None:
         for operator, value in kwargs.items():
             if operator == "task_id":
@@ -259,7 +259,7 @@ def __list_tasks(api_call, output_format="dict"):
             tid = int(task_["oml:task_id"])
             task = {
                 "tid": tid,
-                "ttid": int(task_["oml:task_type_id"]),
+                "ttid": TaskType(int(task_["oml:task_type_id"])),
                 "did": int(task_["oml:did"]),
                 "name": task_["oml:name"],
                 "task_type": task_["oml:task_type"],
@@ -417,18 +417,18 @@ def _create_task_from_xml(xml):
             "oml:evaluation_measure"
         ]
 
-    task_type_id = int(dic["oml:task_type_id"])
+    task_type = TaskType(int(dic["oml:task_type_id"]))
     common_kwargs = {
         "task_id": dic["oml:task_id"],
         "task_type": dic["oml:task_type"],
-        "task_type_id": dic["oml:task_type_id"],
+        "task_type_id": task_type,
         "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
         "evaluation_measure": evaluation_measures,
     }
-    if task_type_id in (
-        TaskTypeEnum.SUPERVISED_CLASSIFICATION,
-        TaskTypeEnum.SUPERVISED_REGRESSION,
-        TaskTypeEnum.LEARNING_CURVE,
+    if task_type in (
+        TaskType.SUPERVISED_CLASSIFICATION,
+        TaskType.SUPERVISED_REGRESSION,
+        TaskType.LEARNING_CURVE,
     ):
         # Convert some more parameters
         for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][
@@ -448,18 +448,18 @@ def _create_task_from_xml(xml):
         ]["oml:data_splits_url"]
 
     cls = {
-        TaskTypeEnum.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
-        TaskTypeEnum.SUPERVISED_REGRESSION: OpenMLRegressionTask,
-        TaskTypeEnum.CLUSTERING: OpenMLClusteringTask,
-        TaskTypeEnum.LEARNING_CURVE: OpenMLLearningCurveTask,
-    }.get(task_type_id)
+        TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
+        TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask,
+        TaskType.CLUSTERING: OpenMLClusteringTask,
+        TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
+    }.get(task_type)
     if cls is None:
         raise NotImplementedError("Task type %s not supported." % common_kwargs["task_type"])
     return cls(**common_kwargs)
 
 
 def create_task(
-    task_type_id: int,
+    task_type: TaskType,
     dataset_id: int,
     estimation_procedure_id: int,
     target_name: Optional[str] = None,
@@ -480,7 +480,7 @@ def create_task(
 
     Parameters
     ----------
-    task_type_id : int
+    task_type : TaskType
         Id of the task type.
     dataset_id : int
         The id of the dataset for the task.
@@ -501,17 +501,17 @@ def create_task(
     OpenMLLearningCurveTask, OpenMLClusteringTask
     """
     task_cls = {
-        TaskTypeEnum.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
-        TaskTypeEnum.SUPERVISED_REGRESSION: OpenMLRegressionTask,
-        TaskTypeEnum.CLUSTERING: OpenMLClusteringTask,
-        TaskTypeEnum.LEARNING_CURVE: OpenMLLearningCurveTask,
-    }.get(task_type_id)
+        TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
+        TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask,
+        TaskType.CLUSTERING: OpenMLClusteringTask,
+        TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
+    }.get(task_type)
 
     if task_cls is None:
-        raise NotImplementedError("Task type {0:d} not supported.".format(task_type_id))
+        raise NotImplementedError("Task type {0:d} not supported.".format(task_type))
     else:
         return task_cls(
-            task_type_id=task_type_id,
+            task_type_id=task_type,
             task_type=None,
             data_set_id=dataset_id,
             target_name=target_name,