Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Class to enum #958

Merged
merged 4 commits into from
Oct 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions examples/30_extended/tasks_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# License: BSD 3-Clause

import openml
from openml.tasks import TaskType
import pandas as pd

############################################################################
Expand All @@ -30,7 +31,7 @@
#
# We will start by simply listing only *supervised classification* tasks:

tasks = openml.tasks.list_tasks(task_type_id=1)
tasks = openml.tasks.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION)

############################################################################
# **openml.tasks.list_tasks()** returns a dictionary of dictionaries by default, which we convert
Expand All @@ -45,7 +46,9 @@

# As conversion to a pandas dataframe is a common task, we have added this functionality to the
# OpenML-Python library which can be used by passing ``output_format='dataframe'``:
tasks_df = openml.tasks.list_tasks(task_type_id=1, output_format="dataframe")
tasks_df = openml.tasks.list_tasks(
task_type=TaskType.SUPERVISED_CLASSIFICATION, output_format="dataframe"
)
print(tasks_df.head())

############################################################################
Expand Down Expand Up @@ -155,7 +158,7 @@
#
# Creating a task requires the following input:
#
# * task_type_id: The task type ID, required (see below). Required.
# * task_type: The task type ID, required (see below). Required.
# * dataset_id: The dataset ID. Required.
# * target_name: The name of the attribute you aim to predict. Optional.
# * estimation_procedure_id : The ID of the estimation procedure used to create train-test
Expand Down Expand Up @@ -186,9 +189,8 @@
openml.config.start_using_configuration_for_example()

try:
tasktypes = openml.tasks.TaskTypeEnum
my_task = openml.tasks.create_task(
task_type_id=tasktypes.SUPERVISED_CLASSIFICATION,
task_type=TaskType.SUPERVISED_CLASSIFICATION,
dataset_id=128,
target_name="class",
evaluation_measure="predictive_accuracy",
Expand Down
2 changes: 1 addition & 1 deletion examples/40_paper/2015_neurips_feurer_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
# deactivated, which also deactivated the tasks on them. More information on active or inactive
# datasets can be found in the `online docs <https://docs.openml.org/#dataset-status>`_.
tasks = openml.tasks.list_tasks(
task_type_id=openml.tasks.TaskTypeEnum.SUPERVISED_CLASSIFICATION,
task_type=openml.tasks.TaskType.SUPERVISED_CLASSIFICATION,
status="all",
output_format="dataframe",
)
Expand Down
8 changes: 4 additions & 4 deletions openml/runs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
)
from .run import OpenMLRun
from .trace import OpenMLRunTrace
from ..tasks import TaskTypeEnum, get_task
from ..tasks import TaskType, get_task

# Avoid import cycles: https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
if TYPE_CHECKING:
Expand Down Expand Up @@ -274,7 +274,7 @@ def run_flow_on_task(
run.parameter_settings = flow.extension.obtain_parameter_values(flow)

# now we need to attach the detailed evaluations
if task.task_type_id == TaskTypeEnum.LEARNING_CURVE:
if task.task_type_id == TaskType.LEARNING_CURVE:
run.sample_evaluations = sample_evaluations
else:
run.fold_evaluations = fold_evaluations
Expand Down Expand Up @@ -772,7 +772,7 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):

if "predictions" not in files and from_server is True:
task = openml.tasks.get_task(task_id)
if task.task_type_id == TaskTypeEnum.SUBGROUP_DISCOVERY:
if task.task_type_id == TaskType.SUBGROUP_DISCOVERY:
raise NotImplementedError("Subgroup discovery tasks are not yet supported.")
else:
# JvR: actually, I am not sure whether this error should be raised.
Expand Down Expand Up @@ -1008,7 +1008,7 @@ def __list_runs(api_call, output_format="dict"):
"setup_id": int(run_["oml:setup_id"]),
"flow_id": int(run_["oml:flow_id"]),
"uploader": int(run_["oml:uploader"]),
"task_type": int(run_["oml:task_type_id"]),
"task_type": TaskType(int(run_["oml:task_type_id"])),
"upload_time": str(run_["oml:upload_time"]),
"error_message": str((run_["oml:error_message"]) or ""),
}
Expand Down
24 changes: 10 additions & 14 deletions openml/runs/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from ..flows import get_flow
from ..tasks import (
get_task,
TaskTypeEnum,
TaskType,
OpenMLClassificationTask,
OpenMLLearningCurveTask,
OpenMLClusteringTask,
Expand Down Expand Up @@ -401,17 +401,13 @@ def get_metric_fn(self, sklearn_fn, kwargs=None):

attribute_names = [att[0] for att in predictions_arff["attributes"]]
if (
task.task_type_id
in [TaskTypeEnum.SUPERVISED_CLASSIFICATION, TaskTypeEnum.LEARNING_CURVE]
task.task_type_id in [TaskType.SUPERVISED_CLASSIFICATION, TaskType.LEARNING_CURVE]
and "correct" not in attribute_names
):
raise ValueError('Attribute "correct" should be set for ' "classification task runs")
if (
task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION
and "truth" not in attribute_names
):
if task.task_type_id == TaskType.SUPERVISED_REGRESSION and "truth" not in attribute_names:
raise ValueError('Attribute "truth" should be set for ' "regression task runs")
if task.task_type_id != TaskTypeEnum.CLUSTERING and "prediction" not in attribute_names:
if task.task_type_id != TaskType.CLUSTERING and "prediction" not in attribute_names:
raise ValueError('Attribute "predict" should be set for ' "supervised task runs")

def _attribute_list_to_dict(attribute_list):
Expand All @@ -431,11 +427,11 @@ def _attribute_list_to_dict(attribute_list):
predicted_idx = attribute_dict["prediction"] # Assume supervised task

if (
task.task_type_id == TaskTypeEnum.SUPERVISED_CLASSIFICATION
or task.task_type_id == TaskTypeEnum.LEARNING_CURVE
task.task_type_id == TaskType.SUPERVISED_CLASSIFICATION
or task.task_type_id == TaskType.LEARNING_CURVE
):
correct_idx = attribute_dict["correct"]
elif task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION:
elif task.task_type_id == TaskType.SUPERVISED_REGRESSION:
correct_idx = attribute_dict["truth"]
has_samples = False
if "sample" in attribute_dict:
Expand Down Expand Up @@ -465,14 +461,14 @@ def _attribute_list_to_dict(attribute_list):
samp = 0 # No learning curve sample, always 0

if task.task_type_id in [
TaskTypeEnum.SUPERVISED_CLASSIFICATION,
TaskTypeEnum.LEARNING_CURVE,
TaskType.SUPERVISED_CLASSIFICATION,
TaskType.LEARNING_CURVE,
]:
prediction = predictions_arff["attributes"][predicted_idx][1].index(
line[predicted_idx]
)
correct = predictions_arff["attributes"][predicted_idx][1].index(line[correct_idx])
elif task.task_type_id == TaskTypeEnum.SUPERVISED_REGRESSION:
elif task.task_type_id == TaskType.SUPERVISED_REGRESSION:
prediction = line[predicted_idx]
correct = line[correct_idx]
if rep not in values_predict:
Expand Down
4 changes: 2 additions & 2 deletions openml/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
OpenMLRegressionTask,
OpenMLClusteringTask,
OpenMLLearningCurveTask,
TaskTypeEnum,
TaskType,
)
from .split import OpenMLSplit
from .functions import (
Expand All @@ -29,5 +29,5 @@
"get_tasks",
"list_tasks",
"OpenMLSplit",
"TaskTypeEnum",
"TaskType",
]
74 changes: 37 additions & 37 deletions openml/tasks/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
OpenMLClassificationTask,
OpenMLClusteringTask,
OpenMLLearningCurveTask,
TaskTypeEnum,
TaskType,
OpenMLRegressionTask,
OpenMLSupervisedTask,
OpenMLTask,
Expand Down Expand Up @@ -109,7 +109,7 @@ def _get_estimation_procedure_list():
procs.append(
{
"id": int(proc_["oml:id"]),
"task_type_id": int(proc_["oml:ttid"]),
"task_type_id": TaskType(int(proc_["oml:ttid"])),
"name": proc_["oml:name"],
"type": proc_["oml:type"],
}
Expand All @@ -119,22 +119,22 @@ def _get_estimation_procedure_list():


def list_tasks(
task_type_id: Optional[int] = None,
task_type: Optional[TaskType] = None,
offset: Optional[int] = None,
size: Optional[int] = None,
tag: Optional[str] = None,
output_format: str = "dict",
**kwargs
) -> Union[Dict, pd.DataFrame]:
"""
Return a number of tasks having the given tag and task_type_id
Return a number of tasks having the given tag and task_type

Parameters
----------
Filter task_type_id is separated from the other filters because
it is used as task_type_id in the task description, but it is named
Filter task_type is separated from the other filters because
it is used as task_type in the task description, but it is named
type when used as a filter in list tasks call.
task_type_id : int, optional
task_type : TaskType, optional
ID of the task type as detailed `here <https://www.openml.org/search?type=task_type>`_.
- Supervised classification: 1
- Supervised regression: 2
Expand Down Expand Up @@ -162,12 +162,12 @@ def list_tasks(
Returns
-------
dict
All tasks having the given task_type_id and the give tag. Every task is
All tasks having the given task_type and the give tag. Every task is
represented by a dictionary containing the following information:
task id, dataset id, task_type and status. If qualities are calculated
for the associated dataset, some of these are also returned.
dataframe
All tasks having the given task_type_id and the give tag. Every task is
All tasks having the given task_type and the give tag. Every task is
represented by a row in the data frame containing the following information
as columns: task id, dataset id, task_type and status. If qualities are
calculated for the associated dataset, some of these are also returned.
Expand All @@ -179,23 +179,23 @@ def list_tasks(
return openml.utils._list_all(
output_format=output_format,
listing_call=_list_tasks,
task_type_id=task_type_id,
task_type=task_type,
offset=offset,
size=size,
tag=tag,
**kwargs
)


def _list_tasks(task_type_id=None, output_format="dict", **kwargs):
def _list_tasks(task_type=None, output_format="dict", **kwargs):
"""
Perform the api call to return a number of tasks having the given filters.
Parameters
----------
Filter task_type_id is separated from the other filters because
it is used as task_type_id in the task description, but it is named
Filter task_type is separated from the other filters because
it is used as task_type in the task description, but it is named
type when used as a filter in list tasks call.
task_type_id : int, optional
task_type : TaskType, optional
ID of the task type as detailed
`here <https://www.openml.org/search?type=task_type>`_.
- Supervised classification: 1
Expand All @@ -220,8 +220,8 @@ def _list_tasks(task_type_id=None, output_format="dict", **kwargs):
dict or dataframe
"""
api_call = "task/list"
if task_type_id is not None:
api_call += "/type/%d" % int(task_type_id)
if task_type is not None:
api_call += "/type/%d" % task_type.value
if kwargs is not None:
for operator, value in kwargs.items():
if operator == "task_id":
Expand Down Expand Up @@ -259,7 +259,7 @@ def __list_tasks(api_call, output_format="dict"):
tid = int(task_["oml:task_id"])
task = {
"tid": tid,
"ttid": int(task_["oml:task_type_id"]),
"ttid": TaskType(int(task_["oml:task_type_id"])),
"did": int(task_["oml:did"]),
"name": task_["oml:name"],
"task_type": task_["oml:task_type"],
Expand Down Expand Up @@ -417,18 +417,18 @@ def _create_task_from_xml(xml):
"oml:evaluation_measure"
]

task_type_id = int(dic["oml:task_type_id"])
task_type = TaskType(int(dic["oml:task_type_id"]))
common_kwargs = {
"task_id": dic["oml:task_id"],
"task_type": dic["oml:task_type"],
"task_type_id": dic["oml:task_type_id"],
"task_type_id": task_type,
"data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
"evaluation_measure": evaluation_measures,
}
if task_type_id in (
TaskTypeEnum.SUPERVISED_CLASSIFICATION,
TaskTypeEnum.SUPERVISED_REGRESSION,
TaskTypeEnum.LEARNING_CURVE,
if task_type in (
TaskType.SUPERVISED_CLASSIFICATION,
TaskType.SUPERVISED_REGRESSION,
TaskType.LEARNING_CURVE,
):
# Convert some more parameters
for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][
Expand All @@ -448,18 +448,18 @@ def _create_task_from_xml(xml):
]["oml:data_splits_url"]

cls = {
TaskTypeEnum.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
TaskTypeEnum.SUPERVISED_REGRESSION: OpenMLRegressionTask,
TaskTypeEnum.CLUSTERING: OpenMLClusteringTask,
TaskTypeEnum.LEARNING_CURVE: OpenMLLearningCurveTask,
}.get(task_type_id)
TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask,
TaskType.CLUSTERING: OpenMLClusteringTask,
TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
}.get(task_type)
if cls is None:
raise NotImplementedError("Task type %s not supported." % common_kwargs["task_type"])
return cls(**common_kwargs)


def create_task(
task_type_id: int,
task_type: TaskType,
dataset_id: int,
estimation_procedure_id: int,
target_name: Optional[str] = None,
Expand All @@ -480,7 +480,7 @@ def create_task(

Parameters
----------
task_type_id : int
task_type : TaskType
Id of the task type.
dataset_id : int
The id of the dataset for the task.
Expand All @@ -501,17 +501,17 @@ def create_task(
OpenMLLearningCurveTask, OpenMLClusteringTask
"""
task_cls = {
TaskTypeEnum.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
TaskTypeEnum.SUPERVISED_REGRESSION: OpenMLRegressionTask,
TaskTypeEnum.CLUSTERING: OpenMLClusteringTask,
TaskTypeEnum.LEARNING_CURVE: OpenMLLearningCurveTask,
}.get(task_type_id)
TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask,
TaskType.CLUSTERING: OpenMLClusteringTask,
TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
}.get(task_type)

if task_cls is None:
raise NotImplementedError("Task type {0:d} not supported.".format(task_type_id))
raise NotImplementedError("Task type {0:d} not supported.".format(task_type))
else:
return task_cls(
task_type_id=task_type_id,
task_type_id=task_type,
task_type=None,
data_set_id=dataset_id,
target_name=target_name,
Expand Down
Loading