From 12b67f1542a2f559be0593ac8e349bd873a5dff7 Mon Sep 17 00:00:00 2001
From: sahithyaravi1493 <sahithyaravi1493@gmail.com>
Date: Wed, 19 Aug 2020 09:59:38 +0200
Subject: [PATCH 1/4] change edit_api to reflect server

---
 openml/datasets/functions.py                  | 64 +----------------
 tests/test_datasets/test_dataset_functions.py | 69 +++++++++----------
 2 files changed, 37 insertions(+), 96 deletions(-)

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index bda02d419..0f3037a74 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -806,8 +806,6 @@ def edit_dataset(
     contributor=None,
     collection_date=None,
     language=None,
-    attributes=None,
-    data=None,
     default_target_attribute=None,
     ignore_attribute=None,
     citation=None,
@@ -839,17 +837,6 @@ def edit_dataset(
       language : str
           Language in which the data is represented.
           Starts with 1 upper case letter, rest lower case, e.g. 'English'.
-      attributes : list, dict, or 'auto'
-          A list of tuples. Each tuple consists of the attribute name and type.
-          If passing a pandas DataFrame, the attributes can be automatically
-          inferred by passing ``'auto'``. Specific attributes can be manually
-          specified by a passing a dictionary where the key is the name of the
-          attribute and the value is the data type of the attribute.
-      data : ndarray, list, dataframe, coo_matrix, shape (n_samples, n_features)
-          An array that contains both the attributes and the targets. When
-          providing a dataframe, the attribute names and type can be inferred by
-          passing ``attributes='auto'``.
-          The target feature is indicated as meta-data of the dataset.
       default_target_attribute : str
           The default target attribute, if it exists.
           Can have multiple values, comma separated.
@@ -879,54 +866,6 @@ def edit_dataset(
     if not isinstance(data_id, int):
         raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id)))
 
-    # case 1, changing these fields creates a new version of the dataset with changed field
-    if any(
-        field is not None
-        for field in [
-            data,
-            attributes,
-            default_target_attribute,
-            row_id_attribute,
-            ignore_attribute,
-        ]
-    ):
-        logger.warning("Creating a new version of dataset, cannot edit existing version")
-
-        # Get old dataset and features
-        dataset = get_dataset(data_id)
-        df, y, categorical, attribute_names = dataset.get_data(dataset_format="dataframe")
-        attributes_old = attributes_arff_from_df(df)
-
-        # Sparse data needs to be provided in a different format from dense data
-        if dataset.format == "sparse_arff":
-            df, y, categorical, attribute_names = dataset.get_data(dataset_format="array")
-            data_old = coo_matrix(df)
-        else:
-            data_old = df
-        data_new = data if data is not None else data_old
-        dataset_new = create_dataset(
-            name=dataset.name,
-            description=description or dataset.description,
-            creator=creator or dataset.creator,
-            contributor=contributor or dataset.contributor,
-            collection_date=collection_date or dataset.collection_date,
-            language=language or dataset.language,
-            licence=dataset.licence,
-            attributes=attributes or attributes_old,
-            data=data_new,
-            default_target_attribute=default_target_attribute or dataset.default_target_attribute,
-            ignore_attribute=ignore_attribute or dataset.ignore_attribute,
-            citation=citation or dataset.citation,
-            row_id_attribute=row_id_attribute or dataset.row_id_attribute,
-            original_data_url=original_data_url or dataset.original_data_url,
-            paper_url=paper_url or dataset.paper_url,
-            update_comment=dataset.update_comment,
-            version_label=dataset.version_label,
-        )
-        dataset_new.publish()
-        return dataset_new.dataset_id
-
-    # case 2, changing any of these fields will update existing dataset
     # compose data edit parameters as xml
     form_data = {"data_id": data_id}
     xml = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
@@ -937,6 +876,9 @@ def edit_dataset(
     xml["oml:data_edit_parameters"]["oml:contributor"] = contributor
     xml["oml:data_edit_parameters"]["oml:collection_date"] = collection_date
     xml["oml:data_edit_parameters"]["oml:language"] = language
+    xml["oml:data_edit_parameters"]["oml:default_target_attribute"] = default_target_attribute
+    xml["oml:data_edit_parameters"]["oml:row_id_attribute"] = row_id_attribute
+    xml["oml:data_edit_parameters"]["oml:ignore_attribute"] = ignore_attribute
     xml["oml:data_edit_parameters"]["oml:citation"] = citation
     xml["oml:data_edit_parameters"]["oml:original_data_url"] = original_data_url
     xml["oml:data_edit_parameters"]["oml:paper_url"] = paper_url
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index a3be7b2b7..957dadd7b 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1341,47 +1341,34 @@ def test_get_dataset_cache_format_feather(self):
         self.assertEqual(len(attribute_names), X.shape[1])
 
     def test_data_edit(self):
-
-        # admin key for test server (only admins or owners can edit datasets).
-        # all users can edit their own datasets)
-        openml.config.apikey = "d488d8afd93b32331cf6ea9d7003d4c3"
-
-        # case 1, editing description, creator, contributor, collection_date, original_data_url,
-        # paper_url, citation, language edits existing dataset.
+        # Case 1
+        # All users can edit non-critical fields of datasets
+        desc = "xor dataset representing XOR operation"
         did = 564
         result = edit_dataset(
             did,
-            description="xor dataset represents XOR operation",
-            contributor="",
+            description=desc,
+            contributor="xxx",
             collection_date="2019-10-29 17:06:18",
             original_data_url="https://www.kaggle.com/ancientaxe/and-or-xor",
             paper_url="",
             citation="kaggle",
             language="English",
         )
-        self.assertEqual(result, did)
-
-        # case 2, editing data, attributes, default_target_attribute, row_id_attribute,
-        # ignore_attribute generates a new dataset
+        self.assertEqual(did, result)
+        edited_dataset = openml.datasets.get_dataset(did)
+        self.assertEqual(edited_dataset.description, desc)
 
-        column_names = [
-            ("input1", "REAL"),
-            ("input2", "REAL"),
-            ("y", "REAL"),
-        ]
+        # Case 2
+        # only admins or owners can edit all critical fields of datasets
+        # admin key for test server
+        openml.config.apikey = "d488d8afd93b32331cf6ea9d7003d4c3"
         desc = "xor dataset represents XOR operation"
-        result = edit_dataset(
-            564,
-            description=desc,
-            contributor="",
-            collection_date="2019-10-29 17:06:18",
-            attributes=column_names,
-            original_data_url="https://www.kaggle.com/ancientaxe/and-or-xor",
-            paper_url="",
-            citation="kaggle",
-            language="English",
-        )
-        self.assertNotEqual(did, result)
+        did = 565
+        result = edit_dataset(did, default_target_attribute="y", ignore_attribute="input1")
+        self.assertEqual(did, result)
+        edited_dataset = openml.datasets.get_dataset(did)
+        self.assertEqual(edited_dataset.ignore_attribute, ["input1"])
 
     def test_data_edit_errors(self):
 
@@ -1390,8 +1377,10 @@ def test_data_edit_errors(self):
         # Check server exception when no field to edit is provided
         self.assertRaisesRegex(
             OpenMLServerException,
-            "Please provide atleast one field among description, creator, contributor, "
-            "collection_date, language, citation, original_data_url or paper_url to edit.",
+            "Please provide atleast one field among description, creator, "
+            "contributor, collection_date, language, citation, "
+            "original_data_url, default_target_attribute, row_id_attribute, "
+            "ignore_attribute or paper_url to edit.",
             edit_dataset,
             data_id=564,
         )
@@ -1403,12 +1392,22 @@ def test_data_edit_errors(self):
             data_id=100000,
             description="xor operation dataset",
         )
-        # Check server exception when a non-owner or non-admin tries to edit existing dataset
+        # Check server exception when owner/admin edits critical features of dataset with tasks
+        self.assertRaisesRegex(
+            OpenMLServerException,
+            "Critical features default_target_attribute, row_id_attribute and ignore_attribute "
+            "can only be edited for datasets without any tasks.",
+            edit_dataset,
+            data_id=1,
+            default_target_attribute="y",
+        )
+        # Check server exception when a non-owner or non-admin tries to edit critical features
         openml.config.apikey = "5f0b74b33503e4ad4a7181a91e28719f"
         self.assertRaisesRegex(
             OpenMLServerException,
-            "Dataset is not owned by you",
+            "Critical features default_target_attribute, row_id_attribute and ignore_attribute "
+            "can be edited only by the owner. Fork the dataset if changes are required.",
             edit_dataset,
             data_id=564,
-            description="xor data",
+            default_target_attribute="y",
         )

From d1147b6e99da93eb352fe19376f34656b0363e1a Mon Sep 17 00:00:00 2001
From: sahithyaravi1493 <sahithyaravi1493@gmail.com>
Date: Thu, 27 Aug 2020 10:09:11 +0200
Subject: [PATCH 2/4] change test and example to reflect rest API changes

---
 examples/30_extended/datasets_tutorial.py     | 36 +++++++++----------
 tests/test_datasets/test_dataset_functions.py | 36 +++++++++----------
 2 files changed, 34 insertions(+), 38 deletions(-)

diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
index 40b35bbea..ed90424ab 100644
--- a/examples/30_extended/datasets_tutorial.py
+++ b/examples/30_extended/datasets_tutorial.py
@@ -21,7 +21,7 @@
 #
 #   * Use the output_format parameter to select output type
 #   * Default gives 'dict' (other option: 'dataframe', see below)
-
+#
 openml_list = openml.datasets.list_datasets()  # returns a dict
 
 # Show a nice table with some key data properties
@@ -117,15 +117,19 @@
 # This example uses the test server, to avoid editing a dataset on the main server.
 openml.config.start_using_configuration_for_example()
 ############################################################################
-# Changes to these field edits existing version: allowed only for dataset owner
+# Change the non-critical fields
+desc = (
+    "This data sets consists of 3 different types of irises' "
+    "(Setosa, Versicolour, and Virginica) petal and sepal length,"
+    " stored in a 150x4 numpy.ndarray"
+)
+did = 128
 data_id = edit_dataset(
-    564,
-    description="xor dataset represents XOR operation",
-    contributor="",
-    collection_date="2019-10-29 17:06:18",
-    original_data_url="https://www.kaggle.com/ancientaxe/and-or-xor",
-    paper_url="",
-    citation="kaggle",
+    did,
+    description=desc,
+    creator="R.A.Fisher",
+    collection_date="1937",
+    citation="The use of multiple measurements in taxonomic problems",
     language="English",
 )
 edited_dataset = get_dataset(data_id)
@@ -133,15 +137,11 @@
 
 
 ############################################################################
-# Changes to these fields: attributes, default_target_attribute,
-# row_id_attribute, ignore_attribute generates a new edited version: allowed for anyone
-
-new_attributes = [
-    ("x0", "REAL"),
-    ("x1", "REAL"),
-    ("y", "REAL"),
-]
-data_id = edit_dataset(564, attributes=new_attributes)
+# Changes to these fields: default_target_attribute, row_id_attribute,
+# ignore_attribute can only be performed by owner
+# To edit critical fields of a dataset owned by you, configure the API key:
+# openml.config.apikey = 'FILL_IN_OPENML_API_KEY'
+data_id = edit_dataset(564, default_target_attribute="y")
 print(f"Edited dataset ID: {data_id}")
 
 openml.config.stop_using_configuration_for_example()
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 957dadd7b..5076d06c2 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1343,16 +1343,18 @@ def test_get_dataset_cache_format_feather(self):
     def test_data_edit(self):
         # Case 1
         # All users can edit non-critical fields of datasets
-        desc = "xor dataset representing XOR operation"
-        did = 564
+        desc = (
+            "This data sets consists of 3 different types of irises' "
+            "(Setosa, Versicolour, and Virginica) petal and sepal length,"
+            " stored in a 150x4 numpy.ndarray"
+        )
+        did = 128
         result = edit_dataset(
             did,
             description=desc,
-            contributor="xxx",
-            collection_date="2019-10-29 17:06:18",
-            original_data_url="https://www.kaggle.com/ancientaxe/and-or-xor",
-            paper_url="",
-            citation="kaggle",
+            creator="R.A.Fisher",
+            collection_date="1937",
+            citation="The use of multiple measurements in taxonomic problems",
             language="English",
         )
         self.assertEqual(did, result)
@@ -1360,20 +1362,15 @@ def test_data_edit(self):
         self.assertEqual(edited_dataset.description, desc)
 
         # Case 2
-        # only admins or owners can edit all critical fields of datasets
-        # admin key for test server
-        openml.config.apikey = "d488d8afd93b32331cf6ea9d7003d4c3"
-        desc = "xor dataset represents XOR operation"
-        did = 565
-        result = edit_dataset(did, default_target_attribute="y", ignore_attribute="input1")
+        # only owners (or admin) can edit all critical fields of datasets
+        # this is a dataset created by CI, so it is editable by this test
+        did = 315
+        result = edit_dataset(did, default_target_attribute="col_1", ignore_attribute="col_2")
         self.assertEqual(did, result)
         edited_dataset = openml.datasets.get_dataset(did)
-        self.assertEqual(edited_dataset.ignore_attribute, ["input1"])
+        self.assertEqual(edited_dataset.ignore_attribute, ["col_2"])
 
     def test_data_edit_errors(self):
-
-        # admin key for test server (only admins or owners can edit datasets).
-        openml.config.apikey = "d488d8afd93b32331cf6ea9d7003d4c3"
         # Check server exception when no field to edit is provided
         self.assertRaisesRegex(
             OpenMLServerException,
@@ -1398,16 +1395,15 @@ def test_data_edit_errors(self):
             "Critical features default_target_attribute, row_id_attribute and ignore_attribute "
             "can only be edited for datasets without any tasks.",
             edit_dataset,
-            data_id=1,
+            data_id=223,
             default_target_attribute="y",
         )
         # Check server exception when a non-owner or non-admin tries to edit critical features
-        openml.config.apikey = "5f0b74b33503e4ad4a7181a91e28719f"
         self.assertRaisesRegex(
             OpenMLServerException,
             "Critical features default_target_attribute, row_id_attribute and ignore_attribute "
             "can be edited only by the owner. Fork the dataset if changes are required.",
             edit_dataset,
-            data_id=564,
+            data_id=128,
             default_target_attribute="y",
         )

From 17fb46bbb77f2e56f821eb06d309004fc89072ed Mon Sep 17 00:00:00 2001
From: sahithyaravi1493 <sahithyaravi1493@gmail.com>
Date: Fri, 28 Aug 2020 12:03:18 +0200
Subject: [PATCH 3/4] tutorial comments

---
 examples/30_extended/datasets_tutorial.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
index ed90424ab..e4c7e6888 100644
--- a/examples/30_extended/datasets_tutorial.py
+++ b/examples/30_extended/datasets_tutorial.py
@@ -117,7 +117,9 @@
 # This example uses the test server, to avoid editing a dataset on the main server.
 openml.config.start_using_configuration_for_example()
 ############################################################################
-# Change the non-critical fields
+# Edit non-critical fields, allowed for all authorized users:
+# description, creator, contributor, collection_date, language, citation,
+# row_id_attribute, original_data_url,paper_url
 desc = (
     "This data sets consists of 3 different types of irises' "
     "(Setosa, Versicolour, and Virginica) petal and sepal length,"
@@ -137,8 +139,8 @@
 
 
 ############################################################################
-# Changes to these fields: default_target_attribute, row_id_attribute,
-# ignore_attribute can only be performed by owner
+# Edit critical fields, allowed only for owners of the dataset:
+# default_target_attribute, row_id_attribute, ignore_attribute
 # To edit critical fields of a dataset owned by you, configure the API key:
 # openml.config.apikey = 'FILL_IN_OPENML_API_KEY'
 data_id = edit_dataset(564, default_target_attribute="y")

From cfa25133af41c256916136d9de950aa4ff283035 Mon Sep 17 00:00:00 2001
From: Sahithya Ravi <44670788+sahithyaravi1493@users.noreply.github.com>
Date: Fri, 28 Aug 2020 12:06:54 +0200
Subject: [PATCH 4/4] Update datasets_tutorial.py

---
 examples/30_extended/datasets_tutorial.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
index e4c7e6888..e129b7718 100644
--- a/examples/30_extended/datasets_tutorial.py
+++ b/examples/30_extended/datasets_tutorial.py
@@ -119,7 +119,7 @@
 ############################################################################
 # Edit non-critical fields, allowed for all authorized users:
 # description, creator, contributor, collection_date, language, citation,
-# row_id_attribute, original_data_url,paper_url
+# original_data_url, paper_url
 desc = (
     "This data sets consists of 3 different types of irises' "
     "(Setosa, Versicolour, and Virginica) petal and sepal length,"