diff --git a/.flake8 b/.flake8
index c0fe5e06f..08bb8ea10 100644
--- a/.flake8
+++ b/.flake8
@@ -1,7 +1,7 @@
 [flake8]
 max-line-length = 100
 show-source = True
-select = C,E,F,W,B
+select = C,E,F,W,B,T
 ignore = E203, E402, W503
 per-file-ignores =
     *__init__.py:F401
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 75e53f0dd..b3a1d2aba 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -19,6 +19,10 @@ repos:
       - id: flake8
         name: flake8 openml
         files: openml/*
+        additional_dependencies:
+          - flake8-print==3.1.4
       - id: flake8
         name: flake8 tests
         files: tests/*
+        additional_dependencies:
+          - flake8-print==3.1.4
diff --git a/.travis.yml b/.travis.yml
index dcfda6d37..80f3bda42 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,14 +15,20 @@ env:
   - TEST_DIR=/tmp/test_dir/
   - MODULE=openml
   matrix:
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2" TEST_DIST="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" RUN_FLAKE8="true" SKIP_TESTS="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" COVERAGE="true" DOCPUSH="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.2"
-  # Checks for older scikit-learn versions (which also don't nicely work with
-  # Python3.7)
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.19.2"
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.18.2" SCIPY_VERSION=1.2.0
+    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" RUN_FLAKE8="true" SKIP_TESTS="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" COVERAGE="true" DOCPUSH="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.8" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.8" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2" TEST_DIST="true"
+    - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.2"
+    # Checks for older scikit-learn versions (which also don't nicely work with
+    # Python3.7)
+    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.19.2"
+    - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.18.2" SCIPY_VERSION=1.2.0
 
 # Travis issue
 # https://github.com/travis-ci/travis-ci/issues/8920
diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
index 67cd1bb38..29181c5c4 100644
--- a/ci_scripts/install.sh
+++ b/ci_scripts/install.sh
@@ -1,5 +1,7 @@
 # License: BSD 3-Clause
 
+set -e
+
 # Deactivate the travis-provided virtual environment and setup a
 # conda-based environment instead
 deactivate
diff --git a/doc/progress.rst b/doc/progress.rst
index 976c5c750..ef5ed6bae 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -8,7 +8,7 @@ Changelog
 
 0.11.0
 ~~~~~~
-
+* ADD #929: Add data edit API
 * FIX #873: Fixes an issue which resulted in incorrect URLs when printing OpenML objects after
   switching the server.
 * FIX #885: Logger no longer registered by default. Added utility functions to easily register
diff --git a/doc/usage.rst b/doc/usage.rst
index 36c8584ff..d7ad0d523 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -29,6 +29,35 @@ the OpenML Python connector, followed up by a simple example.
 
 * `Introduction <examples/introduction_tutorial.html>`_
 
+~~~~~~~~~~~~~
+Configuration
+~~~~~~~~~~~~~
+
+The configuration file resides in a directory ``.openml`` in the home
+directory of the user and is called config. It consists of ``key = value`` pairs
+which are separated by newlines. The following keys are defined:
+
+* apikey:
+    * required to access the server. The `OpenML setup <https://openml.github.io/openml-python/master/examples/20_basic/introduction_tutorial.html#authentication>`_ describes how to obtain an API key.
+
+* server:
+    * default: ``http://www.openml.org``. Alternatively, use ``test.openml.org`` for the test server.
+
+* cachedir:
+    * if not given, will default to ``~/.openml/cache``
+
+* avoid_duplicate_runs:
+    * if set to ``True``, when ``run_flow_on_task`` or similar methods are called a lookup is performed to see if there already exists such a run on the server. If so, download those results instead.
+    * if not given, will default to ``True``.
+
+* connection_n_retries:
+    * number of connection retries.
+    * default: 2. Maximum number of retries: 20.
+
+* verbosity:
+    * 0: normal output
+    * 1: info output
+    * 2: debug output
 
 ~~~~~~~~~~~~
 Key concepts
diff --git a/examples/30_extended/custom_flow_tutorial.py b/examples/30_extended/custom_flow_tutorial.py
new file mode 100644
index 000000000..3b918e108
--- /dev/null
+++ b/examples/30_extended/custom_flow_tutorial.py
@@ -0,0 +1,205 @@
+"""
+================================
+Creating and Using a Custom Flow
+================================
+
+The most convenient way to create a flow for your machine learning workflow is to generate it
+automatically as described in the `Obtain Flow IDs <https://openml.github.io/openml-python/master/examples/30_extended/flow_id_tutorial.html#sphx-glr-examples-30-extended-flow-id-tutorial-py>`_ tutorial.  # noqa E501
+However, there are scenarios where this is not possible, such
+as when the flow uses a framework without an extension or when the flow is described by a script.
+
+In those cases you can still create a custom flow by following the steps of this tutorial.
+As an example we will use the flows generated for the `AutoML Benchmark <https://openml.github.io/automlbenchmark/>`_,
+and also show how to link runs to the custom flow.
+"""
+
+####################################################################################################
+
+# License: BSD 3-Clause
+# .. warning:: This example uploads data. For that reason, this example
+#   connects to the test server at test.openml.org. This prevents the main
+#   server from crowding with example datasets, tasks, runs, and so on.
+from collections import OrderedDict
+import numpy as np
+
+import openml
+from openml import OpenMLClassificationTask
+from openml.runs.functions import format_prediction
+
+openml.config.start_using_configuration_for_example()
+
+####################################################################################################
+# 1. Defining the flow
+# ====================
+# The first step is to define all the hyperparameters of your flow.
+# The API pages feature a descriptions of each variable of the `OpenMLFlow <https://openml.github.io/openml-python/master/generated/openml.OpenMLFlow.html#openml.OpenMLFlow>`_.  # noqa E501
+# Note that `external version` and `name` together uniquely identify a flow.
+#
+# The AutoML Benchmark runs AutoML systems across a range of tasks.
+# OpenML stores Flows for each AutoML system. However, the AutoML benchmark adds
+# preprocessing to the flow, so should be described in a new flow.
+#
+# We will break down the flow arguments into several groups, for the tutorial.
+# First we will define the name and version information.
+# Make sure to leave enough information so others can determine exactly which
+# version of the package/script is used. Use tags so users can find your flow easily.
+
+general = dict(
+    name="automlbenchmark_autosklearn",
+    description=(
+        "Auto-sklearn as set up by the AutoML Benchmark"
+        "Source: https://github.com/openml/automlbenchmark/releases/tag/v0.9"
+    ),
+    external_version="amlb==0.9",
+    language="English",
+    tags=["amlb", "benchmark", "study_218"],
+    dependencies="amlb==0.9",
+)
+
+####################################################################################################
+# Next we define the flow hyperparameters. We define their name and default value in `parameters`,
+# and provide meta-data for each hyperparameter through `parameters_meta_info`.
+# Note that even though the argument name is `parameters` they describe the hyperparameters.
+# The use of ordered dicts is required.
+
+flow_hyperparameters = dict(
+    parameters=OrderedDict(time="240", memory="32", cores="8"),
+    parameters_meta_info=OrderedDict(
+        cores=OrderedDict(description="number of available cores", data_type="int"),
+        memory=OrderedDict(description="memory in gigabytes", data_type="int"),
+        time=OrderedDict(description="time in minutes", data_type="int"),
+    ),
+)
+
+####################################################################################################
+# It is possible to build a flow which uses other flows.
+# For example, the Random Forest Classifier is a flow, but you could also construct a flow
+# which uses a Random Forest Classifier in a ML pipeline. When constructing the pipeline flow,
+# you can use the Random Forest Classifier flow as a *subflow*. It allows for
+# all hyperparameters of the Random Classifier Flow to also be specified in your pipeline flow.
+#
+# In this example, the auto-sklearn flow is a subflow: the auto-sklearn flow is entirely executed as part of this flow.
+# This allows people to specify auto-sklearn hyperparameters used in this flow.
+# In general, using a subflow is not required.
+#
+# Note: flow 15275 is not actually the right flow on the test server,
+# but that does not matter for this demonstration.
+
+autosklearn_flow = openml.flows.get_flow(15275)  # auto-sklearn 0.5.1
+subflow = dict(components=OrderedDict(automl_tool=autosklearn_flow),)
+
+####################################################################################################
+# With all parameters of the flow defined, we can now initialize the OpenMLFlow and publish.
+# Because we provided all the details already, we do not need to provide a `model` to the flow.
+#
+# In our case, we don't even have a model. It is possible to have a model but still require
+# to follow these steps when the model (python object) does not have an extensions from which
+# to automatically extract the hyperparameters.
+# So whether you have a model with no extension or no model at all, explicitly set
+# the model of the flow to `None`.
+
+autosklearn_amlb_flow = openml.flows.OpenMLFlow(
+    **general, **flow_hyperparameters, **subflow, model=None,
+)
+autosklearn_amlb_flow.publish()
+print(f"autosklearn flow created: {autosklearn_amlb_flow.flow_id}")
+
+####################################################################################################
+# 2. Using the flow
+# ====================
+# This Section will show how to upload run data for your custom flow.
+# Take care to change the values of parameters as well as the task id,
+# to reflect the actual run.
+# Task and parameter values in the example are fictional.
+
+flow_id = autosklearn_amlb_flow.flow_id
+
+parameters = [
+    OrderedDict([("oml:name", "cores"), ("oml:value", 4), ("oml:component", flow_id)]),
+    OrderedDict([("oml:name", "memory"), ("oml:value", 16), ("oml:component", flow_id)]),
+    OrderedDict([("oml:name", "time"), ("oml:value", 120), ("oml:component", flow_id)]),
+]
+
+task_id = 1408  # Iris Task
+task = openml.tasks.get_task(task_id)
+dataset_id = task.get_dataset().dataset_id
+
+
+####################################################################################################
+# The last bit of information for the run we need are the predicted values.
+# The exact format of the predictions will depend on the task.
+#
+# The predictions should always be a list of lists, each list should contain:
+# - the repeat number: for repeated evaluation strategies. (e.g. repeated cross-validation)
+# - the fold number: for cross-validation. (what should this be for holdout?)
+# - 0: this field is for backward compatibility.
+# - index: the row (of the original dataset) for which the prediction was made.
+# - p_1, ..., p_c: for each class the predicted probability of the sample
+#   belonging to that class. (no elements for regression tasks)
+#   Make sure the order of these elements follows the order of `task.class_labels`.
+# - the predicted class/value for the sample
+# - the true class/value for the sample
+#
+# When using openml-python extensions (such as through `run_model_on_task`),
+# all of this formatting is automatic.
+# Unfortunately we can not automate this procedure for custom flows,
+# which means a little additional effort is required.
+#
+# Here we generated some random predictions in place.
+# You can ignore this code, or use it to better understand the formatting of the predictions.
+#
+# Find the repeats/folds for this task:
+n_repeats, n_folds, _ = task.get_split_dimensions()
+all_test_indices = [
+    (repeat, fold, index)
+    for repeat in range(n_repeats)
+    for fold in range(n_folds)
+    for index in task.get_train_test_split_indices(fold, repeat)[1]
+]
+
+# random class probabilities (Iris has 150 samples and 3 classes):
+r = np.random.rand(150 * n_repeats, 3)
+# scale the random values so that the probabilities of each sample sum to 1:
+y_proba = r / r.sum(axis=1).reshape(-1, 1)
+y_pred = y_proba.argmax(axis=1)
+
+class_map = dict(zip(range(3), task.class_labels))
+_, y_true = task.get_X_and_y()
+y_true = [class_map[y] for y in y_true]
+
+# We format the predictions with the utility function `format_prediction`.
+# It will organize the relevant data in the expected format/order.
+predictions = []
+for where, y, yp, proba in zip(all_test_indices, y_true, y_pred, y_proba):
+    repeat, fold, index = where
+
+    prediction = format_prediction(
+        task=task,
+        repeat=repeat,
+        fold=fold,
+        index=index,
+        prediction=class_map[yp],
+        truth=y,
+        proba={c: pb for (c, pb) in zip(task.class_labels, proba)},
+    )
+    predictions.append(prediction)
+
+####################################################################################################
+# Finally we can create the OpenMLRun object and upload.
+# We use the argument setup_string because the used flow was a script.
+
+benchmark_command = f"python3 runbenchmark.py auto-sklearn medium -m aws -t 119"
+my_run = openml.runs.OpenMLRun(
+    task_id=task_id,
+    flow_id=flow_id,
+    dataset_id=dataset_id,
+    parameter_settings=parameters,
+    setup_string=benchmark_command,
+    data_content=predictions,
+    tags=["study_218"],
+    description_text="Run generated by the Custom Flow tutorial.",
+)
+my_run.publish()
+print("run created:", my_run.run_id)
+
+openml.config.stop_using_configuration_for_example()
diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
index d7971d0f1..e129b7718 100644
--- a/examples/30_extended/datasets_tutorial.py
+++ b/examples/30_extended/datasets_tutorial.py
@@ -5,12 +5,13 @@
 
 How to list and download datasets.
 """
-############################################################################
+""
 
 # License: BSD 3-Clauses
 
 import openml
 import pandas as pd
+from openml.datasets.functions import edit_dataset, get_dataset
 
 ############################################################################
 # Exercise 0
@@ -20,7 +21,7 @@
 #
 #   * Use the output_format parameter to select output type
 #   * Default gives 'dict' (other option: 'dataframe', see below)
-
+#
 openml_list = openml.datasets.list_datasets()  # returns a dict
 
 # Show a nice table with some key data properties
@@ -42,9 +43,9 @@
 # * Find a dataset called 'eeg_eye_state'.
 # * Find all datasets with more than 50 classes.
 datalist[datalist.NumberOfInstances > 10000].sort_values(["NumberOfInstances"]).head(n=20)
-############################################################################
+""
 datalist.query('name == "eeg-eye-state"')
-############################################################################
+""
 datalist.query("NumberOfClasses > 50")
 
 ############################################################################
@@ -108,3 +109,41 @@
     alpha=0.8,
     cmap="plasma",
 )
+
+
+############################################################################
+# Edit a created dataset
+# =================================================
+# This example uses the test server, to avoid editing a dataset on the main server.
+openml.config.start_using_configuration_for_example()
+############################################################################
+# Edit non-critical fields, allowed for all authorized users:
+# description, creator, contributor, collection_date, language, citation,
+# original_data_url, paper_url
+desc = (
+    "This data sets consists of 3 different types of irises' "
+    "(Setosa, Versicolour, and Virginica) petal and sepal length,"
+    " stored in a 150x4 numpy.ndarray"
+)
+did = 128
+data_id = edit_dataset(
+    did,
+    description=desc,
+    creator="R.A.Fisher",
+    collection_date="1937",
+    citation="The use of multiple measurements in taxonomic problems",
+    language="English",
+)
+edited_dataset = get_dataset(data_id)
+print(f"Edited dataset ID: {data_id}")
+
+
+############################################################################
+# Edit critical fields, allowed only for owners of the dataset:
+# default_target_attribute, row_id_attribute, ignore_attribute
+# To edit critical fields of a dataset owned by you, configure the API key:
+# openml.config.apikey = 'FILL_IN_OPENML_API_KEY'
+data_id = edit_dataset(564, default_target_attribute="y")
+print(f"Edited dataset ID: {data_id}")
+
+openml.config.stop_using_configuration_for_example()
diff --git a/examples/30_extended/fetch_evaluations_tutorial.py b/examples/30_extended/fetch_evaluations_tutorial.py
index de636e074..2823eabf3 100644
--- a/examples/30_extended/fetch_evaluations_tutorial.py
+++ b/examples/30_extended/fetch_evaluations_tutorial.py
@@ -63,7 +63,7 @@
 
 metric = "predictive_accuracy"
 evals = openml.evaluations.list_evaluations(
-    function=metric, task=[task_id], output_format="dataframe"
+    function=metric, tasks=[task_id], output_format="dataframe"
 )
 # Displaying the first 10 rows
 print(evals.head(n=10))
@@ -162,7 +162,7 @@ def plot_flow_compare(evaluations, top_n=10, metric="predictive_accuracy"):
 # List evaluations in descending order based on predictive_accuracy with
 # hyperparameters
 evals_setups = openml.evaluations.list_evaluations_setups(
-    function="predictive_accuracy", task=[31], size=100, sort_order="desc"
+    function="predictive_accuracy", tasks=[31], size=100, sort_order="desc"
 )
 
 ""
@@ -173,7 +173,7 @@ def plot_flow_compare(evaluations, top_n=10, metric="predictive_accuracy"):
 # with hyperparameters. parameters_in_separate_columns returns parameters in
 # separate columns
 evals_setups = openml.evaluations.list_evaluations_setups(
-    function="predictive_accuracy", flow=[6767], size=100, parameters_in_separate_columns=True
+    function="predictive_accuracy", flows=[6767], size=100, parameters_in_separate_columns=True
 )
 
 ""
diff --git a/examples/30_extended/plot_svm_hyperparameters_tutorial.py b/examples/30_extended/plot_svm_hyperparameters_tutorial.py
index aac84bcd4..e366c56df 100644
--- a/examples/30_extended/plot_svm_hyperparameters_tutorial.py
+++ b/examples/30_extended/plot_svm_hyperparameters_tutorial.py
@@ -20,8 +20,8 @@
 # uploaded runs (called *setup*).
 df = openml.evaluations.list_evaluations_setups(
     function="predictive_accuracy",
-    flow=[8353],
-    task=[6],
+    flows=[8353],
+    tasks=[6],
     output_format="dataframe",
     # Using this flag incorporates the hyperparameters into the returned dataframe. Otherwise,
     # the dataframe would contain a field ``paramaters`` containing an unparsed dictionary.
diff --git a/examples/40_paper/2018_ida_strang_example.py b/examples/40_paper/2018_ida_strang_example.py
index 74c6fde5f..687d973c2 100644
--- a/examples/40_paper/2018_ida_strang_example.py
+++ b/examples/40_paper/2018_ida_strang_example.py
@@ -47,7 +47,7 @@
 
 # Downloads all evaluation records related to this study
 evaluations = openml.evaluations.list_evaluations(
-    measure, flow=flow_ids, study=study_id, output_format="dataframe"
+    measure, flows=flow_ids, study=study_id, output_format="dataframe"
 )
 # gives us a table with columns data_id, flow1_value, flow2_value
 evaluations = evaluations.pivot(index="data_id", columns="flow_id", values="value").dropna()
diff --git a/examples/40_paper/2018_kdd_rijn_example.py b/examples/40_paper/2018_kdd_rijn_example.py
index e5d998e35..752419ea3 100644
--- a/examples/40_paper/2018_kdd_rijn_example.py
+++ b/examples/40_paper/2018_kdd_rijn_example.py
@@ -88,8 +88,8 @@
     # note that we explicitly only include tasks from the benchmark suite that was specified (as per the for-loop)
     evals = openml.evaluations.list_evaluations_setups(
         evaluation_measure,
-        flow=[flow_id],
-        task=[task_id],
+        flows=[flow_id],
+        tasks=[task_id],
         size=limit_per_task,
         output_format="dataframe",
     )
diff --git a/examples/40_paper/2018_neurips_perrone_example.py b/examples/40_paper/2018_neurips_perrone_example.py
index 8639e0a3a..60d212116 100644
--- a/examples/40_paper/2018_neurips_perrone_example.py
+++ b/examples/40_paper/2018_neurips_perrone_example.py
@@ -91,9 +91,9 @@ def fetch_evaluations(run_full=False, flow_type="svm", metric="area_under_roc_cu
     # Fetching evaluations
     eval_df = openml.evaluations.list_evaluations_setups(
         function=metric,
-        task=task_ids,
-        flow=[flow_id],
-        uploader=[2702],
+        tasks=task_ids,
+        flows=[flow_id],
+        uploaders=[2702],
         output_format="dataframe",
         parameters_in_separate_columns=True,
     )
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 79fa82867..0f3037a74 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -799,6 +799,104 @@ def status_update(data_id, status):
         raise ValueError("Data id/status does not collide")
 
 
+def edit_dataset(
+    data_id,
+    description=None,
+    creator=None,
+    contributor=None,
+    collection_date=None,
+    language=None,
+    default_target_attribute=None,
+    ignore_attribute=None,
+    citation=None,
+    row_id_attribute=None,
+    original_data_url=None,
+    paper_url=None,
+) -> int:
+    """
+      Edits an OpenMLDataset.
+      Specify atleast one field to edit, apart from data_id
+       - For certain fields, a new dataset version is created : attributes, data,
+       default_target_attribute, ignore_attribute, row_id_attribute.
+
+       - For other fields, the uploader can edit the exisiting version.
+        Noone except the uploader can edit the exisitng version.
+
+      Parameters
+      ----------
+      data_id : int
+          ID of the dataset.
+      description : str
+          Description of the dataset.
+      creator : str
+          The person who created the dataset.
+      contributor : str
+          People who contributed to the current version of the dataset.
+      collection_date : str
+          The date the data was originally collected, given by the uploader.
+      language : str
+          Language in which the data is represented.
+          Starts with 1 upper case letter, rest lower case, e.g. 'English'.
+      default_target_attribute : str
+          The default target attribute, if it exists.
+          Can have multiple values, comma separated.
+      ignore_attribute : str | list
+          Attributes that should be excluded in modelling,
+          such as identifiers and indexes.
+      citation : str
+          Reference(s) that should be cited when building on this data.
+      row_id_attribute : str, optional
+          The attribute that represents the row-id column, if present in the
+          dataset. If ``data`` is a dataframe and ``row_id_attribute`` is not
+          specified, the index of the dataframe will be used as the
+          ``row_id_attribute``. If the name of the index is ``None``, it will
+          be discarded.
+
+          .. versionadded: 0.8
+              Inference of ``row_id_attribute`` from a dataframe.
+      original_data_url : str, optional
+          For derived data, the url to the original dataset.
+      paper_url : str, optional
+          Link to a paper describing the dataset.
+
+
+      Returns
+      -------
+      data_id of the existing edited version or the new version created and published"""
+    if not isinstance(data_id, int):
+        raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id)))
+
+    # compose data edit parameters as xml
+    form_data = {"data_id": data_id}
+    xml = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
+    xml["oml:data_edit_parameters"] = OrderedDict()
+    xml["oml:data_edit_parameters"]["@xmlns:oml"] = "http://openml.org/openml"
+    xml["oml:data_edit_parameters"]["oml:description"] = description
+    xml["oml:data_edit_parameters"]["oml:creator"] = creator
+    xml["oml:data_edit_parameters"]["oml:contributor"] = contributor
+    xml["oml:data_edit_parameters"]["oml:collection_date"] = collection_date
+    xml["oml:data_edit_parameters"]["oml:language"] = language
+    xml["oml:data_edit_parameters"]["oml:default_target_attribute"] = default_target_attribute
+    xml["oml:data_edit_parameters"]["oml:row_id_attribute"] = row_id_attribute
+    xml["oml:data_edit_parameters"]["oml:ignore_attribute"] = ignore_attribute
+    xml["oml:data_edit_parameters"]["oml:citation"] = citation
+    xml["oml:data_edit_parameters"]["oml:original_data_url"] = original_data_url
+    xml["oml:data_edit_parameters"]["oml:paper_url"] = paper_url
+
+    # delete None inputs
+    for k in list(xml["oml:data_edit_parameters"]):
+        if not xml["oml:data_edit_parameters"][k]:
+            del xml["oml:data_edit_parameters"][k]
+
+    file_elements = {"edit_parameters": ("description.xml", xmltodict.unparse(xml))}
+    result_xml = openml._api_calls._perform_api_call(
+        "data/edit", "post", data=form_data, file_elements=file_elements
+    )
+    result = xmltodict.parse(result_xml)
+    data_id = result["oml:data_edit"]["oml:id"]
+    return int(data_id)
+
+
 def _get_dataset_description(did_cache_dir, dataset_id):
     """Get the dataset description as xml dictionary.
 
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
index adaf419ef..4c17f8ce7 100644
--- a/openml/evaluations/functions.py
+++ b/openml/evaluations/functions.py
@@ -17,11 +17,11 @@ def list_evaluations(
     function: str,
     offset: Optional[int] = None,
     size: Optional[int] = None,
-    task: Optional[List] = None,
-    setup: Optional[List] = None,
-    flow: Optional[List] = None,
-    run: Optional[List] = None,
-    uploader: Optional[List] = None,
+    tasks: Optional[List[Union[str, int]]] = None,
+    setups: Optional[List[Union[str, int]]] = None,
+    flows: Optional[List[Union[str, int]]] = None,
+    runs: Optional[List[Union[str, int]]] = None,
+    uploaders: Optional[List[Union[str, int]]] = None,
     tag: Optional[str] = None,
     study: Optional[int] = None,
     per_fold: Optional[bool] = None,
@@ -41,17 +41,18 @@ def list_evaluations(
     size : int, optional
         the maximum number of runs to show
 
-    task : list, optional
-
-    setup: list, optional
-
-    flow : list, optional
-
-    run : list, optional
-
-    uploader : list, optional
-
+    tasks : list[int,str], optional
+        the list of task IDs
+    setups: list[int,str], optional
+        the list of setup IDs
+    flows : list[int,str], optional
+        the list of flow IDs
+    runs :list[int,str], optional
+        the list of run IDs
+    uploaders : list[int,str], optional
+        the list of uploader IDs
     tag : str, optional
+        filter evaluation based on given tag
 
     study : int, optional
 
@@ -85,11 +86,11 @@ def list_evaluations(
         function=function,
         offset=offset,
         size=size,
-        task=task,
-        setup=setup,
-        flow=flow,
-        run=run,
-        uploader=uploader,
+        tasks=tasks,
+        setups=setups,
+        flows=flows,
+        runs=runs,
+        uploaders=uploaders,
         tag=tag,
         study=study,
         sort_order=sort_order,
@@ -99,11 +100,11 @@ def list_evaluations(
 
 def _list_evaluations(
     function: str,
-    task: Optional[List] = None,
-    setup: Optional[List] = None,
-    flow: Optional[List] = None,
-    run: Optional[List] = None,
-    uploader: Optional[List] = None,
+    tasks: Optional[List] = None,
+    setups: Optional[List] = None,
+    flows: Optional[List] = None,
+    runs: Optional[List] = None,
+    uploaders: Optional[List] = None,
     study: Optional[int] = None,
     sort_order: Optional[str] = None,
     output_format: str = "object",
@@ -120,15 +121,16 @@ def _list_evaluations(
     function : str
         the evaluation function. e.g., predictive_accuracy
 
-    task : list, optional
-
-    setup: list, optional
-
-    flow : list, optional
-
-    run : list, optional
-
-    uploader : list, optional
+    tasks : list[int,str], optional
+        the list of task IDs
+    setups: list[int,str], optional
+        the list of setup IDs
+    flows : list[int,str], optional
+        the list of flow IDs
+    runs :list[int,str], optional
+        the list of run IDs
+    uploaders : list[int,str], optional
+        the list of uploader IDs
 
     study : int, optional
 
@@ -155,16 +157,16 @@ def _list_evaluations(
     if kwargs is not None:
         for operator, value in kwargs.items():
             api_call += "/%s/%s" % (operator, value)
-    if task is not None:
-        api_call += "/task/%s" % ",".join([str(int(i)) for i in task])
-    if setup is not None:
-        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setup])
-    if flow is not None:
-        api_call += "/flow/%s" % ",".join([str(int(i)) for i in flow])
-    if run is not None:
-        api_call += "/run/%s" % ",".join([str(int(i)) for i in run])
-    if uploader is not None:
-        api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploader])
+    if tasks is not None:
+        api_call += "/task/%s" % ",".join([str(int(i)) for i in tasks])
+    if setups is not None:
+        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setups])
+    if flows is not None:
+        api_call += "/flow/%s" % ",".join([str(int(i)) for i in flows])
+    if runs is not None:
+        api_call += "/run/%s" % ",".join([str(int(i)) for i in runs])
+    if uploaders is not None:
+        api_call += "/uploader/%s" % ",".join([str(int(i)) for i in uploaders])
     if study is not None:
         api_call += "/study/%d" % study
     if sort_order is not None:
@@ -276,11 +278,11 @@ def list_evaluations_setups(
     function: str,
     offset: Optional[int] = None,
     size: Optional[int] = None,
-    task: Optional[List] = None,
-    setup: Optional[List] = None,
-    flow: Optional[List] = None,
-    run: Optional[List] = None,
-    uploader: Optional[List] = None,
+    tasks: Optional[List] = None,
+    setups: Optional[List] = None,
+    flows: Optional[List] = None,
+    runs: Optional[List] = None,
+    uploaders: Optional[List] = None,
     tag: Optional[str] = None,
     per_fold: Optional[bool] = None,
     sort_order: Optional[str] = None,
@@ -299,15 +301,15 @@ def list_evaluations_setups(
         the number of runs to skip, starting from the first
     size : int, optional
         the maximum number of runs to show
-    task : list[int], optional
+    tasks : list[int], optional
         the list of task IDs
-    setup: list[int], optional
+    setups: list[int], optional
         the list of setup IDs
-    flow : list[int], optional
+    flows : list[int], optional
         the list of flow IDs
-    run : list[int], optional
+    runs : list[int], optional
         the list of run IDs
-    uploader : list[int], optional
+    uploaders : list[int], optional
         the list of uploader IDs
     tag : str, optional
         filter evaluation based on given tag
@@ -327,7 +329,7 @@ def list_evaluations_setups(
     -------
     dict or dataframe with hyperparameter settings as a list of tuples.
     """
-    if parameters_in_separate_columns and (flow is None or len(flow) != 1):
+    if parameters_in_separate_columns and (flows is None or len(flows) != 1):
         raise ValueError(
             "Can set parameters_in_separate_columns to true " "only for single flow_id"
         )
@@ -337,11 +339,11 @@ def list_evaluations_setups(
         function=function,
         offset=offset,
         size=size,
-        run=run,
-        task=task,
-        setup=setup,
-        flow=flow,
-        uploader=uploader,
+        runs=runs,
+        tasks=tasks,
+        setups=setups,
+        flows=flows,
+        uploaders=uploaders,
         tag=tag,
         per_fold=per_fold,
         sort_order=sort_order,
@@ -359,24 +361,26 @@ def list_evaluations_setups(
         setup_chunks = np.array_split(
             ary=evals["setup_id"].unique(), indices_or_sections=((length - 1) // N) + 1
         )
-        setups = pd.DataFrame()
-        for setup in setup_chunks:
-            result = pd.DataFrame(openml.setups.list_setups(setup=setup, output_format="dataframe"))
+        setup_data = pd.DataFrame()
+        for setups in setup_chunks:
+            result = pd.DataFrame(
+                openml.setups.list_setups(setup=setups, output_format="dataframe")
+            )
             result.drop("flow_id", axis=1, inplace=True)
             # concat resulting setup chunks into single datframe
-            setups = pd.concat([setups, result], ignore_index=True)
+            setup_data = pd.concat([setup_data, result], ignore_index=True)
         parameters = []
         # Convert parameters of setup into list of tuples of (hyperparameter, value)
-        for parameter_dict in setups["parameters"]:
+        for parameter_dict in setup_data["parameters"]:
             if parameter_dict is not None:
                 parameters.append(
                     {param["full_name"]: param["value"] for param in parameter_dict.values()}
                 )
             else:
                 parameters.append({})
-        setups["parameters"] = parameters
+        setup_data["parameters"] = parameters
         # Merge setups with evaluations
-        df = pd.merge(evals, setups, on="setup_id", how="left")
+        df = pd.merge(evals, setup_data, on="setup_id", how="left")
 
     if parameters_in_separate_columns:
         df = pd.concat([df.drop("parameters", axis=1), df["parameters"].apply(pd.Series)], axis=1)
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index af0b42144..4a3015bdc 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -994,12 +994,16 @@ def _get_fn_arguments_with_defaults(self, fn_name: Callable) -> Tuple[Dict, Set]
             a set with all parameters that do not have a default value
         """
         # parameters with defaults are optional, all others are required.
-        signature = inspect.getfullargspec(fn_name)
-        if signature.defaults:
-            optional_params = dict(zip(reversed(signature.args), reversed(signature.defaults)))
-        else:
-            optional_params = dict()
-        required_params = {arg for arg in signature.args if arg not in optional_params}
+        parameters = inspect.signature(fn_name).parameters
+        required_params = set()
+        optional_params = dict()
+        for param in parameters.keys():
+            parameter = parameters.get(param)
+            default_val = parameter.default  # type: ignore
+            if default_val is inspect.Signature.empty:
+                required_params.add(param)
+            else:
+                optional_params[param] = default_val
         return optional_params, required_params
 
     def _deserialize_model(
@@ -1312,7 +1316,7 @@ def _prevent_optimize_n_jobs(self, model):
                         "Could not find attribute "
                         "param_distributions."
                     )
-                print(
+                logger.warning(
                     "Warning! Using subclass BaseSearchCV other than "
                     "{GridSearchCV, RandomizedSearchCV}. "
                     "Should implement param check. "
@@ -1346,7 +1350,7 @@ def _can_measure_cputime(self, model: Any) -> bool:
         # check the parameters for n_jobs
         n_jobs_vals = SklearnExtension._get_parameter_values_recursive(model.get_params(), "n_jobs")
         for val in n_jobs_vals:
-            if val is not None and val != 1:
+            if val is not None and val != 1 and val != "deprecated":
                 return False
         return True
 
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index b3b15d16e..a3888d3a1 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -4,6 +4,7 @@
 import io
 import itertools
 import os
+import time
 from typing import Any, List, Dict, Optional, Set, Tuple, Union, TYPE_CHECKING  # noqa F401
 import warnings
 
@@ -250,7 +251,8 @@ def run_flow_on_task(
     )
 
     data_content, trace, fold_evaluations, sample_evaluations = res
-
+    fields = [*run_environment, time.strftime("%c"), "Created by run_flow_on_task"]
+    generated_description = "\n".join(fields)
     run = OpenMLRun(
         task_id=task.task_id,
         flow_id=flow_id,
@@ -262,6 +264,7 @@ def run_flow_on_task(
         data_content=data_content,
         flow=flow,
         setup_string=flow.extension.create_setup_string(flow.model),
+        description_text=generated_description,
     )
 
     if (upload_flow or avoid_duplicate_runs) and flow.flow_id is not None:
@@ -478,13 +481,17 @@ def _calculate_local_measure(sklearn_fn, openml_name):
 
             for i, tst_idx in enumerate(test_indices):
 
-                arff_line = [rep_no, fold_no, sample_no, tst_idx]  # type: List[Any]
                 if task.class_labels is not None:
-                    for j, class_label in enumerate(task.class_labels):
-                        arff_line.append(proba_y[i][j])
-
-                    arff_line.append(task.class_labels[pred_y[i]])
-                    arff_line.append(task.class_labels[test_y[i]])
+                    arff_line = format_prediction(
+                        task=task,
+                        repeat=rep_no,
+                        fold=fold_no,
+                        sample=sample_no,
+                        index=tst_idx,
+                        prediction=task.class_labels[pred_y[i]],
+                        truth=task.class_labels[test_y[i]],
+                        proba=dict(zip(task.class_labels, proba_y[i])),
+                    )
                 else:
                     raise ValueError("The task has no class labels")
 
@@ -498,7 +505,15 @@ def _calculate_local_measure(sklearn_fn, openml_name):
         elif isinstance(task, OpenMLRegressionTask):
 
             for i in range(0, len(test_indices)):
-                arff_line = [rep_no, fold_no, test_indices[i], pred_y[i], test_y[i]]
+                arff_line = format_prediction(
+                    task=task,
+                    repeat=rep_no,
+                    fold=fold_no,
+                    index=test_indices[i],
+                    prediction=pred_y[i],
+                    truth=test_y[i],
+                )
+
                 arff_datacontent.append(arff_line)
 
             if add_local_measures:
@@ -815,7 +830,7 @@ def list_runs(
     study: Optional[int] = None,
     display_errors: bool = False,
     output_format: str = "dict",
-    **kwargs
+    **kwargs,
 ) -> Union[Dict, pd.DataFrame]:
     """
     List all runs matching all of the given filters.
@@ -887,7 +902,7 @@ def list_runs(
         tag=tag,
         study=study,
         display_errors=display_errors,
-        **kwargs
+        **kwargs,
     )
 
 
@@ -900,7 +915,7 @@ def _list_runs(
     study: Optional[int] = None,
     display_errors: bool = False,
     output_format: str = "dict",
-    **kwargs
+    **kwargs,
 ) -> Union[Dict, pd.DataFrame]:
     """
     Perform API call `/run/list/{filters}'
@@ -1004,3 +1019,63 @@ def __list_runs(api_call, output_format="dict"):
         runs = pd.DataFrame.from_dict(runs, orient="index")
 
     return runs
+
+
+def format_prediction(
+    task: OpenMLSupervisedTask,
+    repeat: int,
+    fold: int,
+    index: int,
+    prediction: Union[str, int, float],
+    truth: Union[str, int, float],
+    sample: Optional[int] = None,
+    proba: Optional[Dict[str, float]] = None,
+) -> List[Union[str, int, float]]:
+    """ Format the predictions in the specific order as required for the run results.
+
+    Parameters
+    ----------
+    task: OpenMLSupervisedTask
+        Task for which to format the predictions.
+    repeat: int
+        From which repeat this predictions is made.
+    fold: int
+        From which fold this prediction is made.
+    index: int
+        For which index this prediction is made.
+    prediction: str, int or float
+        The predicted class label or value.
+    truth: str, int or float
+        The true class label or value.
+    sample: int, optional (default=None)
+        From which sample set this prediction is made.
+        Required only for LearningCurve tasks.
+    proba: Dict[str, float], optional (default=None)
+        For classification tasks only.
+        A mapping from each class label to their predicted probability.
+        The dictionary should contain an entry for each of the `task.class_labels`.
+        E.g.: {"Iris-Setosa": 0.2, "Iris-Versicolor": 0.7, "Iris-Virginica": 0.1}
+
+    Returns
+    -------
+    A list with elements for the prediction results of a run.
+
+    """
+    if isinstance(task, OpenMLClassificationTask):
+        if proba is None:
+            raise ValueError("`proba` is required for classification task")
+        if task.class_labels is None:
+            raise ValueError("The classification task must have class labels set")
+        if not set(task.class_labels) == set(proba):
+            raise ValueError("Each class should have a predicted probability")
+        if sample is None:
+            if isinstance(task, OpenMLLearningCurveTask):
+                raise ValueError("`sample` can not be none for LearningCurveTask")
+            else:
+                sample = 0
+        probabilities = [proba[c] for c in task.class_labels]
+        return [repeat, fold, sample, index, *probabilities, truth, prediction]
+    elif isinstance(task, OpenMLRegressionTask):
+        return [repeat, fold, index, truth, prediction]
+    else:
+        raise NotImplementedError(f"Formatting for {type(task)} is not supported.")
diff --git a/openml/runs/run.py b/openml/runs/run.py
index a61fc4688..b8be9c3a3 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -27,14 +27,37 @@
 class OpenMLRun(OpenMLBase):
     """OpenML Run: result of running a model on an openml dataset.
 
-       Parameters
-       ----------
-       task_id : int
-           Refers to the task.
-       flow_id : int
-           Refers to the flow.
-       dataset_id: int
-           Refers to the data.
+    Parameters
+    ----------
+    task_id: int
+    flow_id: int
+    dataset_id: int
+    setup_string: str
+    output_files: Dict[str, str]
+        A dictionary that specifies where each related file can be found.
+    setup_id: int
+    tags: List[str]
+    uploader: int
+        User ID of the uploader.
+    uploader_name: str
+    evaluations: Dict
+    fold_evaluations: Dict
+    sample_evaluations: Dict
+    data_content: List[List]
+        The predictions generated from executing this run.
+    trace: OpenMLRunTrace
+    model: object
+    task_type: str
+    task_evaluation_measure: str
+    flow_name: str
+    parameter_settings: List[OrderedDict]
+    predictions_url: str
+    task: OpenMLTask
+    flow: OpenMLFlow
+    run_id: int
+    description_text: str, optional
+        Description text to add to the predictions file.
+        If left None,
     """
 
     def __init__(
@@ -62,6 +85,7 @@ def __init__(
         task=None,
         flow=None,
         run_id=None,
+        description_text=None,
     ):
         self.uploader = uploader
         self.uploader_name = uploader_name
@@ -87,6 +111,7 @@ def __init__(
         self.model = model
         self.tags = tags
         self.predictions_url = predictions_url
+        self.description_text = description_text
 
     @property
     def id(self) -> Optional[int]:
@@ -264,16 +289,13 @@ def _generate_arff_dict(self) -> "OrderedDict[str, Any]":
         if self.flow is None:
             self.flow = get_flow(self.flow_id)
 
-        run_environment = (
-            self.flow.extension.get_version_information()
-            + [time.strftime("%c")]
-            + ["Created by run_task()"]
-        )
+        if self.description_text is None:
+            self.description_text = time.strftime("%c")
         task = get_task(self.task_id)
 
         arff_dict = OrderedDict()  # type: 'OrderedDict[str, Any]'
         arff_dict["data"] = self.data_content
-        arff_dict["description"] = "\n".join(run_environment)
+        arff_dict["description"] = self.description_text
         arff_dict["relation"] = "openml_task_{}_predictions".format(task.task_id)
 
         if isinstance(task, OpenMLLearningCurveTask):
@@ -485,9 +507,9 @@ def _get_file_elements(self) -> Dict:
         Derived child classes should overwrite this method as necessary.
         The description field will be populated automatically if not provided.
         """
-        if self.model is None:
+        if self.parameter_settings is None and self.model is None:
             raise PyOpenMLError(
-                "OpenMLRun obj does not contain a model. " "(This should never happen.) "
+                "OpenMLRun must contain a model or be initialized with parameter_settings."
             )
         if self.flow_id is None:
             if self.flow is None:
diff --git a/setup.py b/setup.py
index f1f7a5871..476becc10 100644
--- a/setup.py
+++ b/setup.py
@@ -96,5 +96,6 @@
         "Programming Language :: Python :: 3",
         "Programming Language :: Python :: 3.6",
         "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
     ],
 )
diff --git a/tests/conftest.py b/tests/conftest.py
index 59fa33aca..461a513fd 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -40,7 +40,6 @@
 # exploiting the fact that conftest.py always resides in the root directory for tests
 static_dir = os.path.dirname(os.path.abspath(__file__))
 logger.info("static directory: {}".format(static_dir))
-print("static directory: {}".format(static_dir))
 while True:
     if "openml" in os.listdir(static_dir):
         break
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 958d28d94..5076d06c2 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -16,11 +16,17 @@
 
 import openml
 from openml import OpenMLDataset
-from openml.exceptions import OpenMLCacheException, OpenMLHashException, OpenMLPrivateDatasetError
+from openml.exceptions import (
+    OpenMLCacheException,
+    OpenMLHashException,
+    OpenMLPrivateDatasetError,
+    OpenMLServerException,
+)
 from openml.testing import TestBase
 from openml.utils import _tag_entity, _create_cache_directory_for_id
 from openml.datasets.functions import (
     create_dataset,
+    edit_dataset,
     attributes_arff_from_df,
     _get_cached_dataset,
     _get_cached_dataset_features,
@@ -1154,7 +1160,9 @@ def test_publish_fetch_ignore_attribute(self):
             except Exception as e:
                 # returned code 273: Dataset not processed yet
                 # returned code 362: No qualities found
-                print("Failed to fetch dataset:{} with '{}'.".format(dataset.id, str(e)))
+                TestBase.logger.error(
+                    "Failed to fetch dataset:{} with '{}'.".format(dataset.id, str(e))
+                )
                 time.sleep(10)
                 continue
         if downloaded_dataset is None:
@@ -1331,3 +1339,71 @@ def test_get_dataset_cache_format_feather(self):
         self.assertEqual(X.shape, (150, 5))
         self.assertEqual(len(categorical), X.shape[1])
         self.assertEqual(len(attribute_names), X.shape[1])
+
+    def test_data_edit(self):
+        # Case 1
+        # All users can edit non-critical fields of datasets
+        desc = (
+            "This data sets consists of 3 different types of irises' "
+            "(Setosa, Versicolour, and Virginica) petal and sepal length,"
+            " stored in a 150x4 numpy.ndarray"
+        )
+        did = 128
+        result = edit_dataset(
+            did,
+            description=desc,
+            creator="R.A.Fisher",
+            collection_date="1937",
+            citation="The use of multiple measurements in taxonomic problems",
+            language="English",
+        )
+        self.assertEqual(did, result)
+        edited_dataset = openml.datasets.get_dataset(did)
+        self.assertEqual(edited_dataset.description, desc)
+
+        # Case 2
+        # only owners (or admin) can edit all critical fields of datasets
+        # this is a dataset created by CI, so it is editable by this test
+        did = 315
+        result = edit_dataset(did, default_target_attribute="col_1", ignore_attribute="col_2")
+        self.assertEqual(did, result)
+        edited_dataset = openml.datasets.get_dataset(did)
+        self.assertEqual(edited_dataset.ignore_attribute, ["col_2"])
+
+    def test_data_edit_errors(self):
+        # Check server exception when no field to edit is provided
+        self.assertRaisesRegex(
+            OpenMLServerException,
+            "Please provide atleast one field among description, creator, "
+            "contributor, collection_date, language, citation, "
+            "original_data_url, default_target_attribute, row_id_attribute, "
+            "ignore_attribute or paper_url to edit.",
+            edit_dataset,
+            data_id=564,
+        )
+        # Check server exception when unknown dataset is provided
+        self.assertRaisesRegex(
+            OpenMLServerException,
+            "Unknown dataset",
+            edit_dataset,
+            data_id=100000,
+            description="xor operation dataset",
+        )
+        # Check server exception when owner/admin edits critical features of dataset with tasks
+        self.assertRaisesRegex(
+            OpenMLServerException,
+            "Critical features default_target_attribute, row_id_attribute and ignore_attribute "
+            "can only be edited for datasets without any tasks.",
+            edit_dataset,
+            data_id=223,
+            default_target_attribute="y",
+        )
+        # Check server exception when a non-owner or non-admin tries to edit critical features
+        self.assertRaisesRegex(
+            OpenMLServerException,
+            "Critical features default_target_attribute, row_id_attribute and ignore_attribute "
+            "can be edited only by the owner. Fork the dataset if changes are required.",
+            edit_dataset,
+            data_id=128,
+            default_target_attribute="y",
+        )
diff --git a/tests/test_evaluations/test_evaluation_functions.py b/tests/test_evaluations/test_evaluation_functions.py
index 6fcaea2d4..0127309a7 100644
--- a/tests/test_evaluations/test_evaluation_functions.py
+++ b/tests/test_evaluations/test_evaluation_functions.py
@@ -41,7 +41,7 @@ def test_evaluation_list_filter_task(self):
 
         task_id = 7312
 
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", task=[task_id])
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", tasks=[task_id])
 
         self.assertGreater(len(evaluations), 100)
         for run_id in evaluations.keys():
@@ -56,7 +56,7 @@ def test_evaluation_list_filter_uploader_ID_16(self):
 
         uploader_id = 16
         evaluations = openml.evaluations.list_evaluations(
-            "predictive_accuracy", uploader=[uploader_id], output_format="dataframe"
+            "predictive_accuracy", uploaders=[uploader_id], output_format="dataframe"
         )
         self.assertEqual(evaluations["uploader"].unique(), [uploader_id])
 
@@ -66,7 +66,7 @@ def test_evaluation_list_filter_uploader_ID_10(self):
         openml.config.server = self.production_server
 
         setup_id = 10
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", setup=[setup_id])
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", setups=[setup_id])
 
         self.assertGreater(len(evaluations), 50)
         for run_id in evaluations.keys():
@@ -81,7 +81,7 @@ def test_evaluation_list_filter_flow(self):
 
         flow_id = 100
 
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", flow=[flow_id])
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", flows=[flow_id])
 
         self.assertGreater(len(evaluations), 2)
         for run_id in evaluations.keys():
@@ -96,7 +96,7 @@ def test_evaluation_list_filter_run(self):
 
         run_id = 12
 
-        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", run=[run_id])
+        evaluations = openml.evaluations.list_evaluations("predictive_accuracy", runs=[run_id])
 
         self.assertEqual(len(evaluations), 1)
         for run_id in evaluations.keys():
@@ -132,9 +132,9 @@ def test_evaluation_list_per_fold(self):
             "predictive_accuracy",
             size=size,
             offset=0,
-            task=task_ids,
-            flow=flow_ids,
-            uploader=uploader_ids,
+            tasks=task_ids,
+            flows=flow_ids,
+            uploaders=uploader_ids,
             per_fold=True,
         )
 
@@ -149,9 +149,9 @@ def test_evaluation_list_per_fold(self):
             "predictive_accuracy",
             size=size,
             offset=0,
-            task=task_ids,
-            flow=flow_ids,
-            uploader=uploader_ids,
+            tasks=task_ids,
+            flows=flow_ids,
+            uploaders=uploader_ids,
             per_fold=False,
         )
         for run_id in evaluations.keys():
@@ -164,11 +164,11 @@ def test_evaluation_list_sort(self):
         task_id = 6
         # Get all evaluations of the task
         unsorted_eval = openml.evaluations.list_evaluations(
-            "predictive_accuracy", offset=0, task=[task_id]
+            "predictive_accuracy", offset=0, tasks=[task_id]
         )
         # Get top 10 evaluations of the same task
         sorted_eval = openml.evaluations.list_evaluations(
-            "predictive_accuracy", size=size, offset=0, task=[task_id], sort_order="desc"
+            "predictive_accuracy", size=size, offset=0, tasks=[task_id], sort_order="desc"
         )
         self.assertEqual(len(sorted_eval), size)
         self.assertGreater(len(unsorted_eval), 0)
@@ -191,11 +191,11 @@ def test_list_evaluations_setups_filter_flow(self):
         openml.config.server = self.production_server
         flow_id = [405]
         size = 100
-        evals = self._check_list_evaluation_setups(flow=flow_id, size=size)
+        evals = self._check_list_evaluation_setups(flows=flow_id, size=size)
         # check if parameters in separate columns works
         evals_cols = openml.evaluations.list_evaluations_setups(
             "predictive_accuracy",
-            flow=flow_id,
+            flows=flow_id,
             size=size,
             sort_order="desc",
             output_format="dataframe",
@@ -209,4 +209,4 @@ def test_list_evaluations_setups_filter_task(self):
         openml.config.server = self.production_server
         task_id = [6]
         size = 121
-        self._check_list_evaluation_setups(task=task_id, size=size)
+        self._check_list_evaluation_setups(tasks=task_id, size=size)
diff --git a/tests/test_evaluations/test_evaluations_example.py b/tests/test_evaluations/test_evaluations_example.py
index 61b6c359e..5715b570a 100644
--- a/tests/test_evaluations/test_evaluations_example.py
+++ b/tests/test_evaluations/test_evaluations_example.py
@@ -14,8 +14,8 @@ def test_example_python_paper(self):
 
         df = openml.evaluations.list_evaluations_setups(
             "predictive_accuracy",
-            flow=[8353],
-            task=[6],
+            flows=[8353],
+            tasks=[6],
             output_format="dataframe",
             parameters_in_separate_columns=True,
         )  # Choose an SVM flow, for example 8353, and a task.
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 48832b58f..acc93b024 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -77,11 +77,14 @@ def test_serialize_model(self):
                 criterion="entropy", max_features="auto", max_leaf_nodes=2000
             )
 
-            fixture_name = "sklearn.tree.tree.DecisionTreeClassifier"
+            tree_name = "tree" if LooseVersion(sklearn.__version__) < "0.22" else "_classes"
+            fixture_name = "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name)
             fixture_short_name = "sklearn.DecisionTreeClassifier"
             # str obtained from self.extension._get_sklearn_description(model)
             fixture_description = "A decision tree classifier."
             version_fixture = "sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9" % sklearn.__version__
+
+            presort_val = "false" if LooseVersion(sklearn.__version__) < "0.22" else '"deprecated"'
             # min_impurity_decrease has been introduced in 0.20
             # min_impurity_split has been deprecated in 0.20
             if LooseVersion(sklearn.__version__) < "0.19":
@@ -114,12 +117,16 @@ def test_serialize_model(self):
                         ("min_samples_leaf", "1"),
                         ("min_samples_split", "2"),
                         ("min_weight_fraction_leaf", "0.0"),
-                        ("presort", "false"),
+                        ("presort", presort_val),
                         ("random_state", "null"),
                         ("splitter", '"best"'),
                     )
                 )
-            structure_fixture = {"sklearn.tree.tree.DecisionTreeClassifier": []}
+            if LooseVersion(sklearn.__version__) >= "0.22":
+                fixture_parameters.update({"ccp_alpha": "0.0"})
+                fixture_parameters.move_to_end("ccp_alpha", last=False)
+
+            structure_fixture = {"sklearn.tree.{}.DecisionTreeClassifier".format(tree_name): []}
 
             serialization = self.extension.model_to_flow(model)
             structure = serialization.get_structure("name")
@@ -161,11 +168,18 @@ def test_serialize_model_clustering(self):
         with mock.patch.object(self.extension, "_check_dependencies") as check_dependencies_mock:
             model = sklearn.cluster.KMeans()
 
-            fixture_name = "sklearn.cluster.k_means_.KMeans"
+            cluster_name = "k_means_" if LooseVersion(sklearn.__version__) < "0.22" else "_kmeans"
+            fixture_name = "sklearn.cluster.{}.KMeans".format(cluster_name)
             fixture_short_name = "sklearn.KMeans"
             # str obtained from self.extension._get_sklearn_description(model)
-            fixture_description = "K-Means clustering"
+            fixture_description = "K-Means clustering{}".format(
+                "" if LooseVersion(sklearn.__version__) < "0.22" else "."
+            )
             version_fixture = "sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9" % sklearn.__version__
+
+            n_jobs_val = "null" if LooseVersion(sklearn.__version__) < "0.23" else '"deprecated"'
+            precomp_val = '"auto"' if LooseVersion(sklearn.__version__) < "0.23" else '"deprecated"'
+
             # n_jobs default has changed to None in 0.20
             if LooseVersion(sklearn.__version__) < "0.20":
                 fixture_parameters = OrderedDict(
@@ -192,14 +206,14 @@ def test_serialize_model_clustering(self):
                         ("max_iter", "300"),
                         ("n_clusters", "8"),
                         ("n_init", "10"),
-                        ("n_jobs", "null"),
-                        ("precompute_distances", '"auto"'),
+                        ("n_jobs", n_jobs_val),
+                        ("precompute_distances", precomp_val),
                         ("random_state", "null"),
                         ("tol", "0.0001"),
                         ("verbose", "0"),
                     )
                 )
-            fixture_structure = {"sklearn.cluster.k_means_.KMeans": []}
+            fixture_structure = {"sklearn.cluster.{}.KMeans".format(cluster_name): []}
 
             serialization = self.extension.model_to_flow(model)
             structure = serialization.get_structure("name")
@@ -230,11 +244,15 @@ def test_serialize_model_with_subcomponent(self):
             n_estimators=100, base_estimator=sklearn.tree.DecisionTreeClassifier()
         )
 
+        weight_name = "{}weight_boosting".format(
+            "" if LooseVersion(sklearn.__version__) < "0.22" else "_"
+        )
+        tree_name = "tree" if LooseVersion(sklearn.__version__) < "0.22" else "_classes"
         fixture_name = (
-            "sklearn.ensemble.weight_boosting.AdaBoostClassifier"
-            "(base_estimator=sklearn.tree.tree.DecisionTreeClassifier)"
+            "sklearn.ensemble.{}.AdaBoostClassifier"
+            "(base_estimator=sklearn.tree.{}.DecisionTreeClassifier)".format(weight_name, tree_name)
         )
-        fixture_class_name = "sklearn.ensemble.weight_boosting.AdaBoostClassifier"
+        fixture_class_name = "sklearn.ensemble.{}.AdaBoostClassifier".format(weight_name)
         fixture_short_name = "sklearn.AdaBoostClassifier"
         # str obtained from self.extension._get_sklearn_description(model)
         fixture_description = (
@@ -246,13 +264,13 @@ def test_serialize_model_with_subcomponent(self):
             " on difficult cases.\n\nThis class implements the algorithm known "
             "as AdaBoost-SAMME [2]."
         )
-        fixture_subcomponent_name = "sklearn.tree.tree.DecisionTreeClassifier"
-        fixture_subcomponent_class_name = "sklearn.tree.tree.DecisionTreeClassifier"
+        fixture_subcomponent_name = "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name)
+        fixture_subcomponent_class_name = "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name)
         # str obtained from self.extension._get_sklearn_description(model.base_estimator)
         fixture_subcomponent_description = "A decision tree classifier."
         fixture_structure = {
             fixture_name: [],
-            "sklearn.tree.tree.DecisionTreeClassifier": ["base_estimator"],
+            "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name): ["base_estimator"],
         }
 
         serialization = self.extension.model_to_flow(model)
@@ -298,10 +316,11 @@ def test_serialize_pipeline(self):
         dummy = sklearn.dummy.DummyClassifier(strategy="prior")
         model = sklearn.pipeline.Pipeline(steps=[("scaler", scaler), ("dummy", dummy)])
 
+        scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
         fixture_name = (
             "sklearn.pipeline.Pipeline("
-            "scaler=sklearn.preprocessing.data.StandardScaler,"
-            "dummy=sklearn.dummy.DummyClassifier)"
+            "scaler=sklearn.preprocessing.{}.StandardScaler,"
+            "dummy=sklearn.dummy.DummyClassifier)".format(scaler_name)
         )
         fixture_short_name = "sklearn.Pipeline(StandardScaler,DummyClassifier)"
 
@@ -327,7 +346,7 @@ def test_serialize_pipeline(self):
 
         fixture_structure = {
             fixture_name: [],
-            "sklearn.preprocessing.data.StandardScaler": ["scaler"],
+            "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
             "sklearn.dummy.DummyClassifier": ["dummy"],
         }
 
@@ -402,10 +421,12 @@ def test_serialize_pipeline_clustering(self):
         km = sklearn.cluster.KMeans()
         model = sklearn.pipeline.Pipeline(steps=[("scaler", scaler), ("clusterer", km)])
 
+        scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
+        cluster_name = "k_means_" if LooseVersion(sklearn.__version__) < "0.22" else "_kmeans"
         fixture_name = (
             "sklearn.pipeline.Pipeline("
-            "scaler=sklearn.preprocessing.data.StandardScaler,"
-            "clusterer=sklearn.cluster.k_means_.KMeans)"
+            "scaler=sklearn.preprocessing.{}.StandardScaler,"
+            "clusterer=sklearn.cluster.{}.KMeans)".format(scaler_name, cluster_name)
         )
         fixture_short_name = "sklearn.Pipeline(StandardScaler,KMeans)"
 
@@ -430,10 +451,9 @@ def test_serialize_pipeline_clustering(self):
             fixture_description = self.extension._get_sklearn_description(model)
         fixture_structure = {
             fixture_name: [],
-            "sklearn.preprocessing.data.StandardScaler": ["scaler"],
-            "sklearn.cluster.k_means_.KMeans": ["clusterer"],
+            "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
+            "sklearn.cluster.{}.KMeans".format(cluster_name): ["clusterer"],
         }
-
         serialization = self.extension.model_to_flow(model)
         structure = serialization.get_structure("name")
 
@@ -519,10 +539,12 @@ def test_serialize_column_transformer(self):
             ],
             remainder="passthrough",
         )
+
+        scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
         fixture = (
             "sklearn.compose._column_transformer.ColumnTransformer("
-            "numeric=sklearn.preprocessing.data.StandardScaler,"
-            "nominal=sklearn.preprocessing._encoders.OneHotEncoder)"
+            "numeric=sklearn.preprocessing.{}.StandardScaler,"
+            "nominal=sklearn.preprocessing._encoders.OneHotEncoder)".format(scaler_name)
         )
         fixture_short_name = "sklearn.ColumnTransformer"
 
@@ -543,7 +565,7 @@ def test_serialize_column_transformer(self):
 
         fixture_structure = {
             fixture: [],
-            "sklearn.preprocessing.data.StandardScaler": ["numeric"],
+            "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["numeric"],
             "sklearn.preprocessing._encoders.OneHotEncoder": ["nominal"],
         }
 
@@ -587,21 +609,26 @@ def test_serialize_column_transformer_pipeline(self):
         model = sklearn.pipeline.Pipeline(
             steps=[("transformer", inner), ("classifier", sklearn.tree.DecisionTreeClassifier())]
         )
+        scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
+        tree_name = "tree" if LooseVersion(sklearn.__version__) < "0.22" else "_classes"
         fixture_name = (
             "sklearn.pipeline.Pipeline("
             "transformer=sklearn.compose._column_transformer."
             "ColumnTransformer("
-            "numeric=sklearn.preprocessing.data.StandardScaler,"
+            "numeric=sklearn.preprocessing.{}.StandardScaler,"
             "nominal=sklearn.preprocessing._encoders.OneHotEncoder),"
-            "classifier=sklearn.tree.tree.DecisionTreeClassifier)"
+            "classifier=sklearn.tree.{}.DecisionTreeClassifier)".format(scaler_name, tree_name)
         )
         fixture_structure = {
-            "sklearn.preprocessing.data.StandardScaler": ["transformer", "numeric"],
+            "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): [
+                "transformer",
+                "numeric",
+            ],
             "sklearn.preprocessing._encoders.OneHotEncoder": ["transformer", "nominal"],
             "sklearn.compose._column_transformer.ColumnTransformer(numeric="
-            "sklearn.preprocessing.data.StandardScaler,nominal=sklearn."
-            "preprocessing._encoders.OneHotEncoder)": ["transformer"],
-            "sklearn.tree.tree.DecisionTreeClassifier": ["classifier"],
+            "sklearn.preprocessing.{}.StandardScaler,nominal=sklearn."
+            "preprocessing._encoders.OneHotEncoder)".format(scaler_name): ["transformer"],
+            "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name): ["classifier"],
             fixture_name: [],
         }
 
@@ -630,6 +657,7 @@ def test_serialize_column_transformer_pipeline(self):
         structure = serialization.get_structure("name")
         self.assertEqual(serialization.name, fixture_name)
         self.assertEqual(serialization.description, fixture_description)
+
         self.assertDictEqual(structure, fixture_structure)
         # del serialization.model
         new_model = self.extension.flow_to_model(serialization)
@@ -656,15 +684,18 @@ def test_serialize_feature_union(self):
         structure = serialization.get_structure("name")
         # OneHotEncoder was moved to _encoders module in 0.20
         module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
+        scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
         fixture_name = (
             "sklearn.pipeline.FeatureUnion("
             "ohe=sklearn.preprocessing.{}.OneHotEncoder,"
-            "scaler=sklearn.preprocessing.data.StandardScaler)".format(module_name_encoder)
+            "scaler=sklearn.preprocessing.{}.StandardScaler)".format(
+                module_name_encoder, scaler_name
+            )
         )
         fixture_structure = {
             fixture_name: [],
             "sklearn.preprocessing.{}." "OneHotEncoder".format(module_name_encoder): ["ohe"],
-            "sklearn.preprocessing.data.StandardScaler": ["scaler"],
+            "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
         }
         self.assertEqual(serialization.name, fixture_name)
         self.assertDictEqual(structure, fixture_structure)
@@ -728,17 +759,20 @@ def test_serialize_feature_union_switched_names(self):
         fu2_serialization = self.extension.model_to_flow(fu2)
         # OneHotEncoder was moved to _encoders module in 0.20
         module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
+        scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
         self.assertEqual(
             fu1_serialization.name,
             "sklearn.pipeline.FeatureUnion("
             "ohe=sklearn.preprocessing.{}.OneHotEncoder,"
-            "scaler=sklearn.preprocessing.data.StandardScaler)".format(module_name_encoder),
+            "scaler=sklearn.preprocessing.{}.StandardScaler)".format(
+                module_name_encoder, scaler_name
+            ),
         )
         self.assertEqual(
             fu2_serialization.name,
             "sklearn.pipeline.FeatureUnion("
             "scaler=sklearn.preprocessing.{}.OneHotEncoder,"
-            "ohe=sklearn.preprocessing.data.StandardScaler)".format(module_name_encoder),
+            "ohe=sklearn.preprocessing.{}.StandardScaler)".format(module_name_encoder, scaler_name),
         )
 
     def test_serialize_complex_flow(self):
@@ -766,10 +800,15 @@ def test_serialize_complex_flow(self):
         # OneHotEncoder was moved to _encoders module in 0.20
         module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
         ohe_name = "sklearn.preprocessing.%s.OneHotEncoder" % module_name_encoder
-        scaler_name = "sklearn.preprocessing.data.StandardScaler"
-        tree_name = "sklearn.tree.tree.DecisionTreeClassifier"
-        boosting_name = (
-            "sklearn.ensemble.weight_boosting.AdaBoostClassifier" "(base_estimator=%s)" % tree_name
+        scaler_name = "sklearn.preprocessing.{}.StandardScaler".format(
+            "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data"
+        )
+        tree_name = "sklearn.tree.{}.DecisionTreeClassifier".format(
+            "tree" if LooseVersion(sklearn.__version__) < "0.22" else "_classes"
+        )
+        weight_name = "weight" if LooseVersion(sklearn.__version__) < "0.22" else "_weight"
+        boosting_name = "sklearn.ensemble.{}_boosting.AdaBoostClassifier(base_estimator={})".format(
+            weight_name, tree_name
         )
         pipeline_name = "sklearn.pipeline.Pipeline(ohe=%s,scaler=%s," "boosting=%s)" % (
             ohe_name,
@@ -1195,12 +1234,24 @@ def test__get_fn_arguments_with_defaults(self):
                 (sklearn.tree.DecisionTreeClassifier.__init__, 13),
                 (sklearn.pipeline.Pipeline.__init__, 1),
             ]
-        else:
+        elif sklearn_version < "0.22":
             fns = [
                 (sklearn.ensemble.RandomForestRegressor.__init__, 16),
                 (sklearn.tree.DecisionTreeClassifier.__init__, 13),
                 (sklearn.pipeline.Pipeline.__init__, 2),
             ]
+        elif sklearn_version < "0.23":
+            fns = [
+                (sklearn.ensemble.RandomForestRegressor.__init__, 18),
+                (sklearn.tree.DecisionTreeClassifier.__init__, 14),
+                (sklearn.pipeline.Pipeline.__init__, 2),
+            ]
+        else:
+            fns = [
+                (sklearn.ensemble.RandomForestRegressor.__init__, 18),
+                (sklearn.tree.DecisionTreeClassifier.__init__, 14),
+                (sklearn.pipeline.Pipeline.__init__, 2),
+            ]
 
         for fn, num_params_with_defaults in fns:
             defaults, defaultless = self.extension._get_fn_arguments_with_defaults(fn)
@@ -1225,11 +1276,18 @@ def test_deserialize_with_defaults(self):
         pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
 
         pipe_adjusted = sklearn.clone(pipe_orig)
-        params = {
-            "Imputer__strategy": "median",
-            "OneHotEncoder__sparse": False,
-            "Estimator__min_samples_leaf": 42,
-        }
+        if LooseVersion(sklearn.__version__) < "0.23":
+            params = {
+                "Imputer__strategy": "median",
+                "OneHotEncoder__sparse": False,
+                "Estimator__min_samples_leaf": 42,
+            }
+        else:
+            params = {
+                "Imputer__strategy": "mean",
+                "OneHotEncoder__sparse": True,
+                "Estimator__min_samples_leaf": 1,
+            }
         pipe_adjusted.set_params(**params)
         flow = self.extension.model_to_flow(pipe_adjusted)
         pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
@@ -1256,11 +1314,18 @@ def test_deserialize_adaboost_with_defaults(self):
         pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
 
         pipe_adjusted = sklearn.clone(pipe_orig)
-        params = {
-            "Imputer__strategy": "median",
-            "OneHotEncoder__sparse": False,
-            "Estimator__n_estimators": 10,
-        }
+        if LooseVersion(sklearn.__version__) < "0.22":
+            params = {
+                "Imputer__strategy": "median",
+                "OneHotEncoder__sparse": False,
+                "Estimator__n_estimators": 10,
+            }
+        else:
+            params = {
+                "Imputer__strategy": "mean",
+                "OneHotEncoder__sparse": True,
+                "Estimator__n_estimators": 50,
+            }
         pipe_adjusted.set_params(**params)
         flow = self.extension.model_to_flow(pipe_adjusted)
         pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
@@ -1293,14 +1358,24 @@ def test_deserialize_complex_with_defaults(self):
         pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
 
         pipe_adjusted = sklearn.clone(pipe_orig)
-        params = {
-            "Imputer__strategy": "median",
-            "OneHotEncoder__sparse": False,
-            "Estimator__n_estimators": 10,
-            "Estimator__base_estimator__n_estimators": 10,
-            "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
-            "Estimator__base_estimator__base_estimator__loss__n_neighbors": 13,
-        }
+        if LooseVersion(sklearn.__version__) < "0.23":
+            params = {
+                "Imputer__strategy": "median",
+                "OneHotEncoder__sparse": False,
+                "Estimator__n_estimators": 10,
+                "Estimator__base_estimator__n_estimators": 10,
+                "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
+                "Estimator__base_estimator__base_estimator__loss__n_neighbors": 13,
+            }
+        else:
+            params = {
+                "Imputer__strategy": "mean",
+                "OneHotEncoder__sparse": True,
+                "Estimator__n_estimators": 50,
+                "Estimator__base_estimator__n_estimators": 10,
+                "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
+                "Estimator__base_estimator__base_estimator__loss__n_neighbors": 5,
+            }
         pipe_adjusted.set_params(**params)
         flow = self.extension.model_to_flow(pipe_adjusted)
         pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
@@ -1349,7 +1424,10 @@ def test_openml_param_name_to_sklearn(self):
     def test_obtain_parameter_values_flow_not_from_server(self):
         model = sklearn.linear_model.LogisticRegression(solver="lbfgs")
         flow = self.extension.model_to_flow(model)
-        msg = "Flow sklearn.linear_model.logistic.LogisticRegression has no " "flow_id!"
+        logistic_name = "logistic" if LooseVersion(sklearn.__version__) < "0.22" else "_logistic"
+        msg = "Flow sklearn.linear_model.{}.LogisticRegression has no flow_id!".format(
+            logistic_name
+        )
 
         with self.assertRaisesRegex(ValueError, msg):
             self.extension.obtain_parameter_values(flow)
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index 9f289870e..8d08f4eaf 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -305,15 +305,27 @@ def test_publish_error(self, api_call_mock, flow_exists_mock, get_flow_mock):
                 "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)
             )
 
-        fixture = (
-            "The flow on the server is inconsistent with the local flow. "
-            "The server flow ID is 1. Please check manually and remove "
-            "the flow if necessary! Error is:\n"
-            "'Flow sklearn.ensemble.forest.RandomForestClassifier: "
-            "values for attribute 'name' differ: "
-            "'sklearn.ensemble.forest.RandomForestClassifier'"
-            "\nvs\n'sklearn.ensemble.forest.RandomForestClassifie'.'"
-        )
+        if LooseVersion(sklearn.__version__) < "0.22":
+            fixture = (
+                "The flow on the server is inconsistent with the local flow. "
+                "The server flow ID is 1. Please check manually and remove "
+                "the flow if necessary! Error is:\n"
+                "'Flow sklearn.ensemble.forest.RandomForestClassifier: "
+                "values for attribute 'name' differ: "
+                "'sklearn.ensemble.forest.RandomForestClassifier'"
+                "\nvs\n'sklearn.ensemble.forest.RandomForestClassifie'.'"
+            )
+        else:
+            # sklearn.ensemble.forest -> sklearn.ensemble._forest
+            fixture = (
+                "The flow on the server is inconsistent with the local flow. "
+                "The server flow ID is 1. Please check manually and remove "
+                "the flow if necessary! Error is:\n"
+                "'Flow sklearn.ensemble._forest.RandomForestClassifier: "
+                "values for attribute 'name' differ: "
+                "'sklearn.ensemble._forest.RandomForestClassifier'"
+                "\nvs\n'sklearn.ensemble._forest.RandomForestClassifie'.'"
+            )
 
         self.assertEqual(context_manager.exception.args[0], fixture)
         self.assertEqual(get_flow_mock.call_count, 2)
@@ -463,19 +475,40 @@ def test_sklearn_to_upload_to_flow(self):
 
         # OneHotEncoder was moved to _encoders module in 0.20
         module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data"
-        fixture_name = (
-            "%ssklearn.model_selection._search.RandomizedSearchCV("
-            "estimator=sklearn.pipeline.Pipeline("
-            "ohe=sklearn.preprocessing.%s.OneHotEncoder,"
-            "scaler=sklearn.preprocessing.data.StandardScaler,"
-            "fu=sklearn.pipeline.FeatureUnion("
-            "pca=sklearn.decomposition.truncated_svd.TruncatedSVD,"
-            "fs="
-            "sklearn.feature_selection.univariate_selection.SelectPercentile),"
-            "boosting=sklearn.ensemble.weight_boosting.AdaBoostClassifier("
-            "base_estimator=sklearn.tree.tree.DecisionTreeClassifier)))"
-            % (sentinel, module_name_encoder)
-        )
+        if LooseVersion(sklearn.__version__) < "0.22":
+            fixture_name = (
+                "%ssklearn.model_selection._search.RandomizedSearchCV("
+                "estimator=sklearn.pipeline.Pipeline("
+                "ohe=sklearn.preprocessing.%s.OneHotEncoder,"
+                "scaler=sklearn.preprocessing.data.StandardScaler,"
+                "fu=sklearn.pipeline.FeatureUnion("
+                "pca=sklearn.decomposition.truncated_svd.TruncatedSVD,"
+                "fs="
+                "sklearn.feature_selection.univariate_selection.SelectPercentile),"
+                "boosting=sklearn.ensemble.weight_boosting.AdaBoostClassifier("
+                "base_estimator=sklearn.tree.tree.DecisionTreeClassifier)))"
+                % (sentinel, module_name_encoder)
+            )
+        else:
+            # sklearn.sklearn.preprocessing.data -> sklearn.sklearn.preprocessing._data
+            # sklearn.sklearn.decomposition.truncated_svd -> sklearn.decomposition._truncated_svd
+            # sklearn.feature_selection.univariate_selection ->
+            #     sklearn.feature_selection._univariate_selection
+            # sklearn.ensemble.weight_boosting -> sklearn.ensemble._weight_boosting
+            # sklearn.tree.tree.DecisionTree... -> sklearn.tree._classes.DecisionTree...
+            fixture_name = (
+                "%ssklearn.model_selection._search.RandomizedSearchCV("
+                "estimator=sklearn.pipeline.Pipeline("
+                "ohe=sklearn.preprocessing.%s.OneHotEncoder,"
+                "scaler=sklearn.preprocessing._data.StandardScaler,"
+                "fu=sklearn.pipeline.FeatureUnion("
+                "pca=sklearn.decomposition._truncated_svd.TruncatedSVD,"
+                "fs="
+                "sklearn.feature_selection._univariate_selection.SelectPercentile),"
+                "boosting=sklearn.ensemble._weight_boosting.AdaBoostClassifier("
+                "base_estimator=sklearn.tree._classes.DecisionTreeClassifier)))"
+                % (sentinel, module_name_encoder)
+            )
         self.assertEqual(new_flow.name, fixture_name)
         new_flow.model.fit(X, y)
 
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 74f011b7c..fc53ea366 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -22,10 +22,7 @@
 
 import openml.extensions.sklearn
 from openml.testing import TestBase, SimpleImputer
-from openml.runs.functions import (
-    _run_task_get_arffcontent,
-    run_exists,
-)
+from openml.runs.functions import _run_task_get_arffcontent, run_exists, format_prediction
 from openml.runs.trace import OpenMLRunTrace
 from openml.tasks import TaskTypeEnum
 
@@ -199,8 +196,11 @@ def _perform_run(
         classes_without_random_state = [
             "sklearn.model_selection._search.GridSearchCV",
             "sklearn.pipeline.Pipeline",
-            "sklearn.linear_model.base.LinearRegression",
         ]
+        if LooseVersion(sklearn.__version__) < "0.22":
+            classes_without_random_state.append("sklearn.linear_model.base.LinearRegression")
+        else:
+            classes_without_random_state.append("sklearn.linear_model._base.LinearRegression")
 
         def _remove_random_state(flow):
             if "random_state" in flow.parameters:
@@ -779,10 +779,13 @@ def _test_local_evaluations(self, run):
             (sklearn.metrics.cohen_kappa_score, {"weights": None}),
             (sklearn.metrics.roc_auc_score, {}),
             (sklearn.metrics.average_precision_score, {}),
-            (sklearn.metrics.jaccard_similarity_score, {}),
             (sklearn.metrics.precision_score, {"average": "macro"}),
             (sklearn.metrics.brier_score_loss, {}),
         ]
+        if LooseVersion(sklearn.__version__) < "0.23":
+            tests.append((sklearn.metrics.jaccard_similarity_score, {}))
+        else:
+            tests.append((sklearn.metrics.jaccard_score, {}))
         for test_idx, test in enumerate(tests):
             alt_scores = run.get_metric_fn(sklearn_fn=test[0], kwargs=test[1],)
             self.assertEqual(len(alt_scores), 10)
@@ -1336,3 +1339,48 @@ def test_run_flow_on_task_downloaded_flow(self):
         run.publish()
         TestBase._mark_entity_for_removal("run", run.run_id)
         TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], run.run_id))
+
+    def test_format_prediction_non_supervised(self):
+        # non-supervised tasks don't exist on the test server
+        openml.config.server = self.production_server
+        clustering = openml.tasks.get_task(126033, download_data=False)
+        ignored_input = [0] * 5
+        with self.assertRaisesRegex(
+            NotImplementedError, r"Formatting for <class '[\w.]+'> is not supported."
+        ):
+            format_prediction(clustering, *ignored_input)
+
+    def test_format_prediction_classification_no_probabilities(self):
+        classification = openml.tasks.get_task(self.TEST_SERVER_TASK_SIMPLE[0], download_data=False)
+        ignored_input = [0] * 5
+        with self.assertRaisesRegex(ValueError, "`proba` is required for classification task"):
+            format_prediction(classification, *ignored_input, proba=None)
+
+    def test_format_prediction_classification_incomplete_probabilities(self):
+        classification = openml.tasks.get_task(self.TEST_SERVER_TASK_SIMPLE[0], download_data=False)
+        ignored_input = [0] * 5
+        incomplete_probabilities = {c: 0.2 for c in classification.class_labels[1:]}
+        with self.assertRaisesRegex(ValueError, "Each class should have a predicted probability"):
+            format_prediction(classification, *ignored_input, proba=incomplete_probabilities)
+
+    def test_format_prediction_task_without_classlabels_set(self):
+        classification = openml.tasks.get_task(self.TEST_SERVER_TASK_SIMPLE[0], download_data=False)
+        classification.class_labels = None
+        ignored_input = [0] * 5
+        with self.assertRaisesRegex(
+            ValueError, "The classification task must have class labels set"
+        ):
+            format_prediction(classification, *ignored_input, proba={})
+
+    def test_format_prediction_task_learning_curve_sample_not_set(self):
+        learning_curve = openml.tasks.get_task(801, download_data=False)
+        probabilities = {c: 0.2 for c in learning_curve.class_labels}
+        ignored_input = [0] * 5
+        with self.assertRaisesRegex(ValueError, "`sample` can not be none for LearningCurveTask"):
+            format_prediction(learning_curve, *ignored_input, sample=None, proba=probabilities)
+
+    def test_format_prediction_task_regression(self):
+        regression = openml.tasks.get_task(self.TEST_SERVER_TASK_REGRESSION[0], download_data=False)
+        ignored_input = [0] * 5
+        res = format_prediction(regression, *ignored_input)
+        self.assertListEqual(res, [0] * 5)
diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py
index 2c403aa84..14e2405f2 100644
--- a/tests/test_study/test_study_examples.py
+++ b/tests/test_study/test_study_examples.py
@@ -48,10 +48,12 @@ def test_Figure1a(self):
                 clf, task, avoid_duplicate_runs=False
             )  # run classifier on splits (requires API key)
             score = run.get_metric_fn(sklearn.metrics.accuracy_score)  # print accuracy score
-            print("Data set: %s; Accuracy: %0.2f" % (task.get_dataset().name, score.mean()))
+            TestBase.logger.info(
+                "Data set: %s; Accuracy: %0.2f" % (task.get_dataset().name, score.mean())
+            )
             run.publish()  # publish the experiment on OpenML (optional)
             TestBase._mark_entity_for_removal("run", run.run_id)
             TestBase.logger.info(
                 "collected from {}: {}".format(__file__.split("/")[-1], run.run_id)
             )
-            print("URL for run: %s/run/%d" % (openml.config.server, run.run_id))
+            TestBase.logger.info("URL for run: %s/run/%d" % (openml.config.server, run.run_id))