From b9bf25d59540500de7aac1895286240828e249db Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Sat, 8 Jun 2019 13:08:58 -0400
Subject: [PATCH 01/17] Modularize default load and save argument handling

---
 kedro/contrib/io/azure/csv_blob.py            |  8 +++----
 .../io/bioinformatics/sequence_dataset.py     | 13 +-----------
 kedro/contrib/io/pyspark/spark_data_set.py    |  3 +--
 kedro/contrib/io/pyspark/spark_jdbc.py        |  3 +--
 kedro/io/core.py                              | 21 ++++++++++++++++++-
 kedro/io/csv_local.py                         | 15 +++----------
 kedro/io/csv_s3.py                            |  8 +++----
 kedro/io/excel_local.py                       | 17 ++++-----------
 kedro/io/hdf_local.py                         | 13 +-----------
 kedro/io/hdf_s3.py                            | 14 +------------
 kedro/io/json_local.py                        | 15 +++----------
 kedro/io/parquet_local.py                     | 17 +++------------
 kedro/io/pickle_local.py                      | 14 +------------
 kedro/io/pickle_s3.py                         | 14 +------------
 kedro/io/sql.py                               | 16 +++-----------
 kedro/io/text_local.py                        | 17 ++++-----------
 16 files changed, 53 insertions(+), 155 deletions(-)

diff --git a/kedro/contrib/io/azure/csv_blob.py b/kedro/contrib/io/azure/csv_blob.py
index 2fdf168a51..ce6b9a5c0d 100644
--- a/kedro/contrib/io/azure/csv_blob.py
+++ b/kedro/contrib/io/azure/csv_blob.py
@@ -61,6 +61,8 @@ class CSVBlobDataSet(AbstractDataSet):
         >>> assert data.equals(reloaded)
     """
 
+    DEFAULT_SAVE_ARGS = {"index": False}
+
     def _describe(self) -> Dict[str, Any]:
         return dict(
             filepath=self._filepath,
@@ -106,16 +108,12 @@ def __init__(
                 All defaults are preserved, but "index", which is set to False.
 
         """
-        default_save_args = {"index": False}
-        self._save_args = (
-            {**default_save_args, **save_args} if save_args else default_save_args
-        )
-        self._load_args = load_args if load_args else {}
         self._filepath = filepath
         self._container_name = container_name
         self._credentials = credentials if credentials else {}
         self._blob_to_text_args = blob_to_text_args if blob_to_text_args else {}
         self._blob_from_text_args = blob_from_text_args if blob_from_text_args else {}
+        super().__init__(load_args, save_args)
 
     def _load(self) -> pd.DataFrame:
         blob_service = BlockBlobService(**self._credentials)
diff --git a/kedro/contrib/io/bioinformatics/sequence_dataset.py b/kedro/contrib/io/bioinformatics/sequence_dataset.py
index 908f22f8ec..b85a44fc74 100644
--- a/kedro/contrib/io/bioinformatics/sequence_dataset.py
+++ b/kedro/contrib/io/bioinformatics/sequence_dataset.py
@@ -95,18 +95,7 @@ def __init__(
 
         """
         self._filepath = filepath
-        default_load_args = {}
-        default_save_args = {}
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_save_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
+        super().__init__(load_args, save_args)
 
     def _load(self) -> List:
         return list(SeqIO.parse(self._filepath, **self._load_args))
diff --git a/kedro/contrib/io/pyspark/spark_data_set.py b/kedro/contrib/io/pyspark/spark_data_set.py
index b6e18a83d1..536587c7b1 100644
--- a/kedro/contrib/io/pyspark/spark_data_set.py
+++ b/kedro/contrib/io/pyspark/spark_data_set.py
@@ -106,8 +106,7 @@ def __init__(
 
         self._filepath = filepath
         self._file_format = file_format
-        self._load_args = load_args if load_args is not None else {}
-        self._save_args = save_args if save_args is not None else {}
+        super().__init__(load_args, save_args)
 
     @staticmethod
     def _get_spark():
diff --git a/kedro/contrib/io/pyspark/spark_jdbc.py b/kedro/contrib/io/pyspark/spark_jdbc.py
index a087fd3982..f95e724ef6 100644
--- a/kedro/contrib/io/pyspark/spark_jdbc.py
+++ b/kedro/contrib/io/pyspark/spark_jdbc.py
@@ -140,8 +140,7 @@ def __init__(
 
         self._url = url
         self._table = table
-        self._load_args = load_args if load_args is not None else {}
-        self._save_args = save_args if save_args is not None else {}
+        super().__init__(load_args, save_args)
 
         # Update properties in load_args and save_args with credentials.
         if credentials is not None:
diff --git a/kedro/io/core.py b/kedro/io/core.py
index 45ec91597d..7af898806a 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -37,7 +37,7 @@
 from datetime import datetime, timezone
 from glob import iglob
 from pathlib import Path, PurePosixPath
-from typing import Any, Dict, Type
+from typing import Any, Dict, Optional, Type
 from warnings import warn
 
 from kedro.utils import load_obj
@@ -101,6 +101,9 @@ class AbstractDataSet(abc.ABC):
         >>>         return dict(param1=self._param1, param2=self._param2)
     """
 
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
+
     @classmethod
     def from_config(
         cls: Type,
@@ -189,6 +192,22 @@ def from_config(
             )
         return data_set
 
+    def __init__(
+        self,
+        load_args: Optional[Dict[str, Any]] = None,
+        save_args: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        self._load_args = (
+            {**self.DEFAULT_LOAD_ARGS, **load_args}
+            if load_args is not None
+            else self.DEFAULT_LOAD_ARGS
+        )
+        self._save_args = (
+            {**self.DEFAULT_SAVE_ARGS, **save_args}
+            if save_args is not None
+            else self.DEFAULT_SAVE_ARGS
+        )
+
     def load(self) -> Any:
         """Loads data by delegation to the provided load method.
 
diff --git a/kedro/io/csv_local.py b/kedro/io/csv_local.py
index b512156d0d..f01aa98286 100644
--- a/kedro/io/csv_local.py
+++ b/kedro/io/csv_local.py
@@ -61,6 +61,8 @@ class CSVLocalDataSet(AbstractDataSet, FilepathVersionMixIn):
 
     """
 
+    DEFAULT_SAVE_ARGS = {"index": False}
+
     def _describe(self) -> Dict[str, Any]:
         return dict(
             filepath=self._filepath,
@@ -94,19 +96,8 @@ def __init__(
                 None, the latest version will be loaded. If its ``save``
                 attribute is None, save version will be autogenerated.
         """
-        default_save_args = {"index": False}
-        default_load_args = {}
         self._filepath = filepath
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_save_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
+        super().__init__(load_args, save_args)
         self._version = version
 
     def _load(self) -> pd.DataFrame:
diff --git a/kedro/io/csv_s3.py b/kedro/io/csv_s3.py
index 306bf79602..d7a277b6b5 100644
--- a/kedro/io/csv_s3.py
+++ b/kedro/io/csv_s3.py
@@ -60,6 +60,8 @@ class CSVS3DataSet(AbstractDataSet, S3PathVersionMixIn):
         >>> assert data.equals(reloaded)
     """
 
+    DEFAULT_SAVE_ARGS = {"index": False}
+
     def _describe(self) -> Dict[str, Any]:
         return dict(
             filepath=self._filepath,
@@ -101,14 +103,10 @@ def __init__(
                 attribute is None, save version will be autogenerated.
 
         """
-        default_save_args = {"index": False}
-        self._save_args = (
-            {**default_save_args, **save_args} if save_args else default_save_args
-        )
-        self._load_args = load_args if load_args else {}
         self._filepath = filepath
         self._bucket_name = bucket_name
         self._credentials = credentials if credentials else {}
+        super().__init__(load_args, save_args)
         self._version = version
         self._s3 = S3FileSystem(client_kwargs=self._credentials)
 
diff --git a/kedro/io/excel_local.py b/kedro/io/excel_local.py
index c88b32b977..194cfd90a3 100644
--- a/kedro/io/excel_local.py
+++ b/kedro/io/excel_local.py
@@ -61,6 +61,9 @@ class ExcelLocalDataSet(AbstractDataSet, FilepathVersionMixIn):
 
     """
 
+    DEFAULT_LOAD_ARGS = {"engine": "xlrd"}
+    DEFAULT_SAVE_ARGS = {"index": False}
+
     def _describe(self) -> Dict[str, Any]:
         return dict(
             filepath=self._filepath,
@@ -105,19 +108,7 @@ def __init__(
 
         """
         self._filepath = filepath
-        default_save_args = {"index": False}
-        default_load_args = {"engine": "xlrd"}
-
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_save_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
+        super().__init__(load_args, save_args)
         self._engine = engine
         self._version = version
 
diff --git a/kedro/io/hdf_local.py b/kedro/io/hdf_local.py
index 7d0d3e5be2..e074ef481c 100644
--- a/kedro/io/hdf_local.py
+++ b/kedro/io/hdf_local.py
@@ -92,20 +92,9 @@ def __init__(
                 attribute is None, save version will be autogenerated.
 
         """
-        default_load_args = {}
-        default_save_args = {}
         self._filepath = filepath
         self._key = key
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_load_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
+        super().__init__(load_args, save_args)
         self._version = version
 
     def _load(self) -> pd.DataFrame:
diff --git a/kedro/io/hdf_s3.py b/kedro/io/hdf_s3.py
index e9b0ae61a2..a3b453443b 100644
--- a/kedro/io/hdf_s3.py
+++ b/kedro/io/hdf_s3.py
@@ -40,7 +40,6 @@
 HDFSTORE_DRIVER = "H5FD_CORE"
 
 
-# pylint: disable=too-many-instance-attributes
 class HDFS3DataSet(AbstractDataSet, S3PathVersionMixIn):
     """``HDFS3DataSet`` loads and saves data to a S3 bucket. The
     underlying functionality is supported by pandas, so it supports all
@@ -100,22 +99,11 @@ def __init__(
                 attribute is None, save version will be autogenerated.
 
         """
-        default_load_args = {}
-        default_save_args = {}
         self._filepath = filepath
         self._key = key
         self._bucket_name = bucket_name
         self._credentials = credentials if credentials else {}
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_load_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
+        super().__init__(load_args, save_args)
         self._version = version
         self._s3 = S3FileSystem(client_kwargs=self._credentials)
 
diff --git a/kedro/io/json_local.py b/kedro/io/json_local.py
index 809d014802..3df1dcf0a1 100644
--- a/kedro/io/json_local.py
+++ b/kedro/io/json_local.py
@@ -58,6 +58,8 @@ class JSONLocalDataSet(AbstractDataSet, FilepathVersionMixIn):
 
     """
 
+    DEFAULT_SAVE_ARGS = {"indent": 4}
+
     def _describe(self) -> Dict[str, Any]:
         return dict(
             filepath=self._filepath,
@@ -90,19 +92,8 @@ def __init__(
                 attribute is None, save version will be autogenerated.
 
         """
-        default_save_args = {"indent": 4}
-        default_load_args = {}
         self._filepath = filepath
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_save_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
+        super().__init__(load_args, save_args)
         self._version = version
 
     def _load(self) -> Any:
diff --git a/kedro/io/parquet_local.py b/kedro/io/parquet_local.py
index 2b4826fcbe..6998ac44ea 100644
--- a/kedro/io/parquet_local.py
+++ b/kedro/io/parquet_local.py
@@ -61,6 +61,8 @@ class ParquetLocalDataSet(AbstractDataSet, FilepathVersionMixIn):
         >>> assert data.equals(loaded_data)
     """
 
+    DEFAULT_SAVE_ARGS = {"compression": None}
+
     def _describe(self) -> Dict[str, Any]:
         return dict(
             filepath=self._filepath,
@@ -107,22 +109,9 @@ def __init__(
                 attribute is None, save version will be autogenerated.
 
         """
-        default_save_args = {"compression": None}
-        default_load_args = {}
-
         self._filepath = filepath
         self._engine = engine
-
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_save_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
+        super().__init__(load_args, save_args)
         self._version = version
 
     def _load(self) -> pd.DataFrame:
diff --git a/kedro/io/pickle_local.py b/kedro/io/pickle_local.py
index 58fe4f76d1..5860f15ff3 100644
--- a/kedro/io/pickle_local.py
+++ b/kedro/io/pickle_local.py
@@ -113,9 +113,6 @@ def __init__(
             ImportError: If 'backend' could not be imported.
 
         """
-        default_save_args = {}
-        default_load_args = {}
-
         if backend not in ["pickle", "joblib"]:
             raise ValueError(
                 "backend should be one of ['pickle', 'joblib'], got %s" % backend
@@ -128,16 +125,7 @@ def __init__(
 
         self._filepath = filepath
         self._backend = backend
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_save_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
+        super().__init__(load_args, save_args)
         self._version = version
 
     def _load(self) -> Any:
diff --git a/kedro/io/pickle_s3.py b/kedro/io/pickle_s3.py
index adce6efe3f..24a5f18c46 100644
--- a/kedro/io/pickle_s3.py
+++ b/kedro/io/pickle_s3.py
@@ -95,23 +95,11 @@ def __init__(
                 None, the latest version will be loaded. If its ``save``
                 attribute is None, save version will be autogenerated.
         """
-        default_load_args = {}
-        default_save_args = {}
-
         self._filepath = filepath
         self._bucket_name = bucket_name
         self._credentials = credentials if credentials else {}
+        super().__init__(load_args, save_args)
         self._version = version
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_save_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
         self._s3 = S3FileSystem(client_kwargs=self._credentials)
 
     @property
diff --git a/kedro/io/sql.py b/kedro/io/sql.py
index 0917b4a20a..6e0a126dd5 100644
--- a/kedro/io/sql.py
+++ b/kedro/io/sql.py
@@ -139,6 +139,8 @@ class SQLTableDataSet(AbstractDataSet):
 
     """
 
+    DEFAULT_SAVE_ARGS = {"index": False}
+
     def _describe(self) -> Dict[str, Any]:
         load_args = self._load_args.copy()
         save_args = self._save_args.copy()
@@ -193,19 +195,7 @@ def __init__(
                 "provide a SQLAlchemy connection string."
             )
 
-        default_save_args = {"index": False}
-        default_load_args = {}
-
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_save_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
+        super().__init__(load_args, save_args)
 
         self._load_args["table_name"] = table_name
         self._save_args["name"] = table_name
diff --git a/kedro/io/text_local.py b/kedro/io/text_local.py
index 37870b25fa..020daaceaf 100644
--- a/kedro/io/text_local.py
+++ b/kedro/io/text_local.py
@@ -50,6 +50,9 @@ class TextLocalDataSet(AbstractDataSet, FilepathVersionMixIn):
         >>> reloaded = data_set.load()
     """
 
+    DEFAULT_LOAD_ARGS = {"mode": "r"}
+    DEFAULT_SAVE_ARGS = {"mode": "w"}
+
     def _describe(self) -> Dict[str, Any]:
         return dict(
             filepath=self._filepath,
@@ -80,20 +83,8 @@ def __init__(
                 None, the latest version will be loaded. If its ``save``
                 attribute is None, save version will be autogenerated.
         """
-        default_save_args = {"mode": "w"}
-        default_load_args = {"mode": "r"}
-
         self._filepath = filepath
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_save_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
+        super().__init__(load_args, save_args)
         self._version = version
 
     def _load(self) -> str:

From ba18548018e95b1d5e4075788ae2b5e2a40cf3c7 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Sun, 9 Jun 2019 12:26:11 -0700
Subject: [PATCH 02/17] Suppress ``super-init-not-called`` pylint messages

---
 kedro/contrib/io/pyspark/spark_jdbc.py | 4 +++-
 kedro/io/lambda_data_set.py            | 1 +
 kedro/io/memory_data_set.py            | 1 +
 kedro/io/sql.py                        | 1 +
 tests/io/test_data_catalog.py          | 1 +
 5 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/kedro/contrib/io/pyspark/spark_jdbc.py b/kedro/contrib/io/pyspark/spark_jdbc.py
index f95e724ef6..3bc16d542f 100644
--- a/kedro/contrib/io/pyspark/spark_jdbc.py
+++ b/kedro/contrib/io/pyspark/spark_jdbc.py
@@ -123,6 +123,7 @@ def __init__(
             DataSetError: When either ``url`` or ``table`` is empty.
 
         """
+        # pylint: disable=super-init-not-called
 
         if not url:
             raise DataSetError(
@@ -140,7 +141,8 @@ def __init__(
 
         self._url = url
         self._table = table
-        super().__init__(load_args, save_args)
+        self._load_args = load_args if load_args is not None else {}
+        self._save_args = save_args if save_args is not None else {}
 
         # Update properties in load_args and save_args with credentials.
         if credentials is not None:
diff --git a/kedro/io/lambda_data_set.py b/kedro/io/lambda_data_set.py
index 48f60bf030..61b2ef58d7 100644
--- a/kedro/io/lambda_data_set.py
+++ b/kedro/io/lambda_data_set.py
@@ -113,6 +113,7 @@ def __init__(
             DataSetError: If load and/or save is specified, but is not a Callable.
 
         """
+        # pylint: disable=super-init-not-called
 
         if load is not None and not callable(load):
             raise DataSetError(
diff --git a/kedro/io/memory_data_set.py b/kedro/io/memory_data_set.py
index cb08139024..c57863158e 100644
--- a/kedro/io/memory_data_set.py
+++ b/kedro/io/memory_data_set.py
@@ -80,6 +80,7 @@ def __init__(self, data: Any = None, max_loads: int = None):
                 method call.
 
         """
+        # pylint: disable=super-init-not-called
         self._data = None
         self._max_loads = max_loads
         if data is not None:
diff --git a/kedro/io/sql.py b/kedro/io/sql.py
index 6e0a126dd5..d287209347 100644
--- a/kedro/io/sql.py
+++ b/kedro/io/sql.py
@@ -281,6 +281,7 @@ def __init__(
             DataSetError: When either ``sql`` or ``con`` parameters is emtpy.
 
         """
+        # pylint: disable=super-init-not-called
 
         if not sql:
             raise DataSetError(
diff --git a/tests/io/test_data_catalog.py b/tests/io/test_data_catalog.py
index 06f27a50e2..a65a282ae8 100644
--- a/tests/io/test_data_catalog.py
+++ b/tests/io/test_data_catalog.py
@@ -93,6 +93,7 @@ def conflicting_feed_dict():
 
 class BadDataSet(AbstractDataSet):  # pragma: no cover
     def __init__(self, filepath):
+        # pylint: disable=super-init-not-called
         self.filepath = filepath
         raise Exception("Naughty!")
 

From 41b40b27d6046d74c8ad82ede7427e7985ec0444 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Fri, 14 Jun 2019 14:52:58 -0700
Subject: [PATCH 03/17] Copy default args to prevent accidental mutation

---
 kedro/io/core.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index 7af898806a..1b453ce8ee 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -197,16 +197,12 @@ def __init__(
         load_args: Optional[Dict[str, Any]] = None,
         save_args: Optional[Dict[str, Any]] = None,
     ) -> None:
-        self._load_args = (
-            {**self.DEFAULT_LOAD_ARGS, **load_args}
-            if load_args is not None
-            else self.DEFAULT_LOAD_ARGS
-        )
-        self._save_args = (
-            {**self.DEFAULT_SAVE_ARGS, **save_args}
-            if save_args is not None
-            else self.DEFAULT_SAVE_ARGS
-        )
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
 
     def load(self) -> Any:
         """Loads data by delegation to the provided load method.

From c10a654782d8b2b3f0af3982b6ecdbb42ed666a5 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Fri, 14 Jun 2019 15:53:59 -0700
Subject: [PATCH 04/17] Restore ``super().__init__`` given default arg fix

---
 kedro/contrib/io/pyspark/spark_jdbc.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kedro/contrib/io/pyspark/spark_jdbc.py b/kedro/contrib/io/pyspark/spark_jdbc.py
index 3bc16d542f..f95e724ef6 100644
--- a/kedro/contrib/io/pyspark/spark_jdbc.py
+++ b/kedro/contrib/io/pyspark/spark_jdbc.py
@@ -123,7 +123,6 @@ def __init__(
             DataSetError: When either ``url`` or ``table`` is empty.
 
         """
-        # pylint: disable=super-init-not-called
 
         if not url:
             raise DataSetError(
@@ -141,8 +140,7 @@ def __init__(
 
         self._url = url
         self._table = table
-        self._load_args = load_args if load_args is not None else {}
-        self._save_args = save_args if save_args is not None else {}
+        super().__init__(load_args, save_args)
 
         # Update properties in load_args and save_args with credentials.
         if credentials is not None:

From e83502cebe42b8b270367b1a213113ea030b5900 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Tue, 2 Jul 2019 13:54:22 -0400
Subject: [PATCH 05/17] Refactor abstract base class modification as mixin

---
 kedro/contrib/io/__init__.py                  |  2 +
 kedro/contrib/io/azure/csv_blob.py            |  3 +-
 .../io/bioinformatics/sequence_dataset.py     |  3 +-
 kedro/contrib/io/core.py                      | 51 +++++++++++++++++++
 kedro/contrib/io/pyspark/spark_data_set.py    |  3 +-
 kedro/contrib/io/pyspark/spark_jdbc.py        |  3 +-
 6 files changed, 61 insertions(+), 4 deletions(-)
 create mode 100644 kedro/contrib/io/core.py

diff --git a/kedro/contrib/io/__init__.py b/kedro/contrib/io/__init__.py
index 2aa315c599..d26777acd5 100644
--- a/kedro/contrib/io/__init__.py
+++ b/kedro/contrib/io/__init__.py
@@ -31,3 +31,5 @@
 `kedro.io` module (e.g. additional ``AbstractDataSet``s and
 extensions/alternative ``DataCatalog``s.
 """
+
+from .core import DefaultArgumentsMixIn  # NOQA
diff --git a/kedro/contrib/io/azure/csv_blob.py b/kedro/contrib/io/azure/csv_blob.py
index f4640dee4d..c84c313d45 100644
--- a/kedro/contrib/io/azure/csv_blob.py
+++ b/kedro/contrib/io/azure/csv_blob.py
@@ -35,10 +35,11 @@
 import pandas as pd
 from azure.storage.blob import BlockBlobService
 
+from kedro.contrib.io import DefaultArgumentsMixIn
 from kedro.io import AbstractDataSet
 
 
-class CSVBlobDataSet(AbstractDataSet):
+class CSVBlobDataSet(DefaultArgumentsMixIn, AbstractDataSet):
     """``CSVBlobDataSet`` loads and saves csv files in Microsoft's Azure
     blob storage. It uses azure storage SDK to read and write in azure and
     pandas to handle the csv file locally.
diff --git a/kedro/contrib/io/bioinformatics/sequence_dataset.py b/kedro/contrib/io/bioinformatics/sequence_dataset.py
index fdf160c6f9..7acf777578 100644
--- a/kedro/contrib/io/bioinformatics/sequence_dataset.py
+++ b/kedro/contrib/io/bioinformatics/sequence_dataset.py
@@ -35,10 +35,11 @@
 
 from Bio import SeqIO
 
+from kedro.contrib.io import DefaultArgumentsMixIn
 from kedro.io import AbstractDataSet
 
 
-class BioSequenceLocalDataSet(AbstractDataSet):
+class BioSequenceLocalDataSet(DefaultArgumentsMixIn, AbstractDataSet):
     """``BioSequenceLocalDataSet`` loads and saves data to a sequence file.
 
     Example:
diff --git a/kedro/contrib/io/core.py b/kedro/contrib/io/core.py
new file mode 100644
index 0000000000..a417b9b3cc
--- /dev/null
+++ b/kedro/contrib/io/core.py
@@ -0,0 +1,51 @@
+# Copyright 2018-2019 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited (“QuantumBlack”) name and logo
+# (either separately or in combination, “QuantumBlack Trademarks”) are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This module extends the set of classes ``kedro.io.core`` provides."""
+
+from typing import Any, Dict, Optional
+
+
+# pylint: disable=too-few-public-methods
+class DefaultArgumentsMixIn:
+    """Mixin class that helps handle default load and save arguments."""
+
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
+
+    def __init__(
+        self,
+        load_args: Optional[Dict[str, Any]] = None,
+        save_args: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
diff --git a/kedro/contrib/io/pyspark/spark_data_set.py b/kedro/contrib/io/pyspark/spark_data_set.py
index 7f36142b34..7594bf5297 100644
--- a/kedro/contrib/io/pyspark/spark_data_set.py
+++ b/kedro/contrib/io/pyspark/spark_data_set.py
@@ -36,10 +36,11 @@
 from pyspark.sql import DataFrame, SparkSession
 from pyspark.sql.utils import AnalysisException
 
+from kedro.contrib.io import DefaultArgumentsMixIn
 from kedro.io import AbstractDataSet
 
 
-class SparkDataSet(AbstractDataSet):
+class SparkDataSet(DefaultArgumentsMixIn, AbstractDataSet):
     """``SparkDataSet`` loads and saves Spark data frames.
 
     Example:
diff --git a/kedro/contrib/io/pyspark/spark_jdbc.py b/kedro/contrib/io/pyspark/spark_jdbc.py
index cb118e42be..762e7ad73c 100644
--- a/kedro/contrib/io/pyspark/spark_jdbc.py
+++ b/kedro/contrib/io/pyspark/spark_jdbc.py
@@ -31,12 +31,13 @@
 
 from pyspark.sql import DataFrame, SparkSession
 
+from kedro.contrib.io import DefaultArgumentsMixIn
 from kedro.io import AbstractDataSet, DataSetError
 
 __all__ = ["SparkJDBCDataSet"]
 
 
-class SparkJDBCDataSet(AbstractDataSet):
+class SparkJDBCDataSet(DefaultArgumentsMixIn, AbstractDataSet):
     """``SparkJDBCDataSet`` loads data from a database table accessible
     via JDBC URL url and connection properties and saves the content of
     a PySpark DataFrame to an external database table via JDBC.  It uses

From 63fda574f5488dbaa0559d4e35ca1f2a98db2e79 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Wed, 3 Jul 2019 09:14:56 -0400
Subject: [PATCH 06/17] Homogenize default load and save argument handling

---
 kedro/contrib/io/azure/csv_blob.py                 |  2 +-
 .../contrib/io/bioinformatics/sequence_dataset.py  |  2 +-
 kedro/contrib/io/pyspark/spark_data_set.py         |  2 +-
 kedro/contrib/io/pyspark/spark_jdbc.py             |  2 +-
 kedro/io/core.py                                   | 14 +-------------
 kedro/io/csv_local.py                              |  8 +++++++-
 kedro/io/csv_s3.py                                 |  8 +++++++-
 kedro/io/excel_local.py                            |  7 ++++++-
 kedro/io/hdf_local.py                              | 10 +++++++++-
 kedro/io/hdf_s3.py                                 | 11 ++++++++++-
 kedro/io/json_local.py                             |  8 +++++++-
 kedro/io/lambda_data_set.py                        |  1 -
 kedro/io/memory_data_set.py                        |  1 -
 kedro/io/parquet_local.py                          |  8 +++++++-
 kedro/io/pickle_local.py                           | 10 +++++++++-
 kedro/io/pickle_s3.py                              | 10 +++++++++-
 kedro/io/sql.py                                    |  9 +++++++--
 kedro/io/text_local.py                             |  7 ++++++-
 tests/io/test_data_catalog.py                      |  1 -
 19 files changed, 89 insertions(+), 32 deletions(-)

diff --git a/kedro/contrib/io/azure/csv_blob.py b/kedro/contrib/io/azure/csv_blob.py
index c84c313d45..37dca6bc7e 100644
--- a/kedro/contrib/io/azure/csv_blob.py
+++ b/kedro/contrib/io/azure/csv_blob.py
@@ -39,7 +39,7 @@
 from kedro.io import AbstractDataSet
 
 
-class CSVBlobDataSet(DefaultArgumentsMixIn, AbstractDataSet):
+class CSVBlobDataSet(AbstractDataSet, DefaultArgumentsMixIn):
     """``CSVBlobDataSet`` loads and saves csv files in Microsoft's Azure
     blob storage. It uses azure storage SDK to read and write in azure and
     pandas to handle the csv file locally.
diff --git a/kedro/contrib/io/bioinformatics/sequence_dataset.py b/kedro/contrib/io/bioinformatics/sequence_dataset.py
index 7acf777578..6f844d1e52 100644
--- a/kedro/contrib/io/bioinformatics/sequence_dataset.py
+++ b/kedro/contrib/io/bioinformatics/sequence_dataset.py
@@ -39,7 +39,7 @@
 from kedro.io import AbstractDataSet
 
 
-class BioSequenceLocalDataSet(DefaultArgumentsMixIn, AbstractDataSet):
+class BioSequenceLocalDataSet(AbstractDataSet, DefaultArgumentsMixIn):
     """``BioSequenceLocalDataSet`` loads and saves data to a sequence file.
 
     Example:
diff --git a/kedro/contrib/io/pyspark/spark_data_set.py b/kedro/contrib/io/pyspark/spark_data_set.py
index 7594bf5297..39acebc4bc 100644
--- a/kedro/contrib/io/pyspark/spark_data_set.py
+++ b/kedro/contrib/io/pyspark/spark_data_set.py
@@ -40,7 +40,7 @@
 from kedro.io import AbstractDataSet
 
 
-class SparkDataSet(DefaultArgumentsMixIn, AbstractDataSet):
+class SparkDataSet(AbstractDataSet, DefaultArgumentsMixIn):
     """``SparkDataSet`` loads and saves Spark data frames.
 
     Example:
diff --git a/kedro/contrib/io/pyspark/spark_jdbc.py b/kedro/contrib/io/pyspark/spark_jdbc.py
index 762e7ad73c..842568b1d4 100644
--- a/kedro/contrib/io/pyspark/spark_jdbc.py
+++ b/kedro/contrib/io/pyspark/spark_jdbc.py
@@ -37,7 +37,7 @@
 __all__ = ["SparkJDBCDataSet"]
 
 
-class SparkJDBCDataSet(DefaultArgumentsMixIn, AbstractDataSet):
+class SparkJDBCDataSet(AbstractDataSet, DefaultArgumentsMixIn):
     """``SparkJDBCDataSet`` loads data from a database table accessible
     via JDBC URL url and connection properties and saves the content of
     a PySpark DataFrame to an external database table via JDBC.  It uses
diff --git a/kedro/io/core.py b/kedro/io/core.py
index 282ccd067e..c0f3f42d46 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -37,7 +37,7 @@
 from datetime import datetime, timezone
 from glob import iglob
 from pathlib import Path, PurePosixPath
-from typing import Any, Dict, Optional, Type
+from typing import Any, Dict, Type
 from warnings import warn
 
 from kedro.utils import load_obj
@@ -192,18 +192,6 @@ def from_config(
             )
         return data_set
 
-    def __init__(
-        self,
-        load_args: Optional[Dict[str, Any]] = None,
-        save_args: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
-        if load_args is not None:
-            self._load_args.update(load_args)
-        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
-        if save_args is not None:
-            self._save_args.update(save_args)
-
     def load(self) -> Any:
         """Loads data by delegation to the provided load method.
 
diff --git a/kedro/io/csv_local.py b/kedro/io/csv_local.py
index 0e8cd2ac7b..ae21c08a0e 100644
--- a/kedro/io/csv_local.py
+++ b/kedro/io/csv_local.py
@@ -61,6 +61,7 @@ class CSVLocalDataSet(AbstractDataSet, FilepathVersionMixIn):
 
     """
 
+    DEFAULT_LOAD_ARGS = {}
     DEFAULT_SAVE_ARGS = {"index": False}
 
     def _describe(self) -> Dict[str, Any]:
@@ -97,7 +98,12 @@ def __init__(
                 attribute is None, save version will be autogenerated.
         """
         self._filepath = filepath
-        super().__init__(load_args, save_args)
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
         self._version = version
 
     def _load(self) -> pd.DataFrame:
diff --git a/kedro/io/csv_s3.py b/kedro/io/csv_s3.py
index 81219f5ddb..1d8d7baad5 100644
--- a/kedro/io/csv_s3.py
+++ b/kedro/io/csv_s3.py
@@ -60,6 +60,7 @@ class CSVS3DataSet(AbstractDataSet, S3PathVersionMixIn):
         >>> assert data.equals(reloaded)
     """
 
+    DEFAULT_LOAD_ARGS = {}
     DEFAULT_SAVE_ARGS = {"index": False}
 
     def _describe(self) -> Dict[str, Any]:
@@ -106,7 +107,12 @@ def __init__(
         self._filepath = filepath
         self._bucket_name = bucket_name
         self._credentials = credentials if credentials else {}
-        super().__init__(load_args, save_args)
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
         self._version = version
         self._s3 = S3FileSystem(client_kwargs=self._credentials)
 
diff --git a/kedro/io/excel_local.py b/kedro/io/excel_local.py
index 22258fffa7..a3123358ad 100644
--- a/kedro/io/excel_local.py
+++ b/kedro/io/excel_local.py
@@ -108,8 +108,13 @@ def __init__(
 
         """
         self._filepath = filepath
-        super().__init__(load_args, save_args)
         self._engine = engine
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
         self._version = version
 
     def _load(self) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
diff --git a/kedro/io/hdf_local.py b/kedro/io/hdf_local.py
index b7d4268db0..6be61a90bd 100644
--- a/kedro/io/hdf_local.py
+++ b/kedro/io/hdf_local.py
@@ -63,6 +63,9 @@ class HDFLocalDataSet(AbstractDataSet, FilepathVersionMixIn):
 
     """
 
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
+
     # pylint: disable=too-many-arguments
     def __init__(
         self,
@@ -94,7 +97,12 @@ def __init__(
         """
         self._filepath = filepath
         self._key = key
-        super().__init__(load_args, save_args)
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
         self._version = version
 
     def _load(self) -> pd.DataFrame:
diff --git a/kedro/io/hdf_s3.py b/kedro/io/hdf_s3.py
index 570e0736c6..3d6ef04f22 100644
--- a/kedro/io/hdf_s3.py
+++ b/kedro/io/hdf_s3.py
@@ -40,6 +40,7 @@
 HDFSTORE_DRIVER = "H5FD_CORE"
 
 
+# pylint: disable=too-many-instance-attributes
 class HDFS3DataSet(AbstractDataSet, S3PathVersionMixIn):
     """``HDFS3DataSet`` loads and saves data to a S3 bucket. The
     underlying functionality is supported by pandas, so it supports all
@@ -66,6 +67,9 @@ class HDFS3DataSet(AbstractDataSet, S3PathVersionMixIn):
 
     """
 
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
+
     # pylint: disable=too-many-arguments
     def __init__(
         self,
@@ -104,7 +108,12 @@ def __init__(
         self._key = key
         self._bucket_name = bucket_name
         self._credentials = credentials if credentials else {}
-        super().__init__(load_args, save_args)
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
         self._version = version
         self._s3 = S3FileSystem(client_kwargs=self._credentials)
 
diff --git a/kedro/io/json_local.py b/kedro/io/json_local.py
index 991a80d643..c3d31fac99 100644
--- a/kedro/io/json_local.py
+++ b/kedro/io/json_local.py
@@ -58,6 +58,7 @@ class JSONLocalDataSet(AbstractDataSet, FilepathVersionMixIn):
 
     """
 
+    DEFAULT_LOAD_ARGS = {}
     DEFAULT_SAVE_ARGS = {"indent": 4}
 
     def _describe(self) -> Dict[str, Any]:
@@ -93,7 +94,12 @@ def __init__(
 
         """
         self._filepath = filepath
-        super().__init__(load_args, save_args)
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
         self._version = version
 
     def _load(self) -> Any:
diff --git a/kedro/io/lambda_data_set.py b/kedro/io/lambda_data_set.py
index 1435c47d70..0219ce83c9 100644
--- a/kedro/io/lambda_data_set.py
+++ b/kedro/io/lambda_data_set.py
@@ -121,7 +121,6 @@ def __init__(
             DataSetError: If a method is specified, but is not a Callable.
 
         """
-        # pylint: disable=super-init-not-called
 
         for name, value in [
             ("load", load),
diff --git a/kedro/io/memory_data_set.py b/kedro/io/memory_data_set.py
index 5d1574f051..ccfca5a7ec 100644
--- a/kedro/io/memory_data_set.py
+++ b/kedro/io/memory_data_set.py
@@ -74,7 +74,6 @@ def __init__(self, data: Any = None):
         Args:
             data: Python object containing the data.
         """
-        # pylint: disable=super-init-not-called
         self._data = None
         if data is not None:
             self._save(data)
diff --git a/kedro/io/parquet_local.py b/kedro/io/parquet_local.py
index 8583c36dac..42a6931378 100644
--- a/kedro/io/parquet_local.py
+++ b/kedro/io/parquet_local.py
@@ -61,6 +61,7 @@ class ParquetLocalDataSet(AbstractDataSet, FilepathVersionMixIn):
         >>> assert data.equals(loaded_data)
     """
 
+    DEFAULT_LOAD_ARGS = {}
     DEFAULT_SAVE_ARGS = {"compression": None}
 
     def _describe(self) -> Dict[str, Any]:
@@ -111,7 +112,12 @@ def __init__(
         """
         self._filepath = filepath
         self._engine = engine
-        super().__init__(load_args, save_args)
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
         self._version = version
 
     def _load(self) -> pd.DataFrame:
diff --git a/kedro/io/pickle_local.py b/kedro/io/pickle_local.py
index f5a0e79569..871f1c85b1 100644
--- a/kedro/io/pickle_local.py
+++ b/kedro/io/pickle_local.py
@@ -67,6 +67,9 @@ class PickleLocalDataSet(AbstractDataSet, FilepathVersionMixIn):
         >>> reloaded = data_set.load()
     """
 
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
+
     BACKENDS = {"pickle": pickle, "joblib": joblib}
 
     # pylint: disable=too-many-arguments
@@ -125,7 +128,12 @@ def __init__(
 
         self._filepath = filepath
         self._backend = backend
-        super().__init__(load_args, save_args)
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
         self._version = version
 
     def _load(self) -> Any:
diff --git a/kedro/io/pickle_s3.py b/kedro/io/pickle_s3.py
index 56b14d59bb..9ed834c707 100644
--- a/kedro/io/pickle_s3.py
+++ b/kedro/io/pickle_s3.py
@@ -61,6 +61,9 @@ class PickleS3DataSet(AbstractDataSet, S3PathVersionMixIn):
             >>> reloaded = data_set.load()
     """
 
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
+
     # pylint: disable=too-many-arguments
     def __init__(
         self,
@@ -98,7 +101,12 @@ def __init__(
         self._filepath = filepath
         self._bucket_name = bucket_name
         self._credentials = credentials if credentials else {}
-        super().__init__(load_args, save_args)
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
         self._version = version
         self._s3 = S3FileSystem(client_kwargs=self._credentials)
 
diff --git a/kedro/io/sql.py b/kedro/io/sql.py
index 5eb99ad18b..41626df6bb 100644
--- a/kedro/io/sql.py
+++ b/kedro/io/sql.py
@@ -139,6 +139,7 @@ class SQLTableDataSet(AbstractDataSet):
 
     """
 
+    DEFAULT_LOAD_ARGS = {}
     DEFAULT_SAVE_ARGS = {"index": False}
 
     def _describe(self) -> Dict[str, Any]:
@@ -195,7 +196,12 @@ def __init__(
                 "provide a SQLAlchemy connection string."
             )
 
-        super().__init__(load_args, save_args)
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
 
         self._load_args["table_name"] = table_name
         self._save_args["name"] = table_name
@@ -281,7 +287,6 @@ def __init__(
             DataSetError: When either ``sql`` or ``con`` parameters is emtpy.
 
         """
-        # pylint: disable=super-init-not-called
 
         if not sql:
             raise DataSetError(
diff --git a/kedro/io/text_local.py b/kedro/io/text_local.py
index 7c8b9f0509..218ac7c464 100644
--- a/kedro/io/text_local.py
+++ b/kedro/io/text_local.py
@@ -85,7 +85,12 @@ def __init__(
                 attribute is None, save version will be autogenerated.
         """
         self._filepath = os.path.expanduser(filepath)
-        super().__init__(load_args, save_args)
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
         self._version = version
 
     def _load(self) -> str:
diff --git a/tests/io/test_data_catalog.py b/tests/io/test_data_catalog.py
index d2dceed1de..07e3597f78 100644
--- a/tests/io/test_data_catalog.py
+++ b/tests/io/test_data_catalog.py
@@ -94,7 +94,6 @@ def conflicting_feed_dict():
 
 class BadDataSet(AbstractDataSet):  # pragma: no cover
     def __init__(self, filepath):
-        # pylint: disable=super-init-not-called
         self.filepath = filepath
         raise Exception("Naughty!")
 

From 050577314468f4a4eed22ea96ba1f85d1b257cfd Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Wed, 3 Jul 2019 10:07:36 -0400
Subject: [PATCH 07/17] Demarcate load and save argument handling :dragon:

---
 kedro/io/csv_local.py     | 3 +++
 kedro/io/csv_s3.py        | 3 +++
 kedro/io/excel_local.py   | 3 +++
 kedro/io/hdf_local.py     | 3 +++
 kedro/io/hdf_s3.py        | 3 +++
 kedro/io/json_local.py    | 3 +++
 kedro/io/parquet_local.py | 3 +++
 kedro/io/pickle_local.py  | 3 +++
 kedro/io/pickle_s3.py     | 3 +++
 kedro/io/sql.py           | 1 +
 kedro/io/text_local.py    | 3 +++
 11 files changed, 31 insertions(+)

diff --git a/kedro/io/csv_local.py b/kedro/io/csv_local.py
index ae21c08a0e..4dc19c2a5d 100644
--- a/kedro/io/csv_local.py
+++ b/kedro/io/csv_local.py
@@ -98,12 +98,15 @@ def __init__(
                 attribute is None, save version will be autogenerated.
         """
         self._filepath = filepath
+
+        # Handle default load and save arguments
         self._load_args = self.DEFAULT_LOAD_ARGS.copy()
         if load_args is not None:
             self._load_args.update(load_args)
         self._save_args = self.DEFAULT_SAVE_ARGS.copy()
         if save_args is not None:
             self._save_args.update(save_args)
+
         self._version = version
 
     def _load(self) -> pd.DataFrame:
diff --git a/kedro/io/csv_s3.py b/kedro/io/csv_s3.py
index 1d8d7baad5..8795704590 100644
--- a/kedro/io/csv_s3.py
+++ b/kedro/io/csv_s3.py
@@ -107,12 +107,15 @@ def __init__(
         self._filepath = filepath
         self._bucket_name = bucket_name
         self._credentials = credentials if credentials else {}
+
+        # Handle default load and save arguments
         self._load_args = self.DEFAULT_LOAD_ARGS.copy()
         if load_args is not None:
             self._load_args.update(load_args)
         self._save_args = self.DEFAULT_SAVE_ARGS.copy()
         if save_args is not None:
             self._save_args.update(save_args)
+
         self._version = version
         self._s3 = S3FileSystem(client_kwargs=self._credentials)
 
diff --git a/kedro/io/excel_local.py b/kedro/io/excel_local.py
index a3123358ad..edabc7dd00 100644
--- a/kedro/io/excel_local.py
+++ b/kedro/io/excel_local.py
@@ -109,12 +109,15 @@ def __init__(
         """
         self._filepath = filepath
         self._engine = engine
+
+        # Handle default load and save arguments
         self._load_args = self.DEFAULT_LOAD_ARGS.copy()
         if load_args is not None:
             self._load_args.update(load_args)
         self._save_args = self.DEFAULT_SAVE_ARGS.copy()
         if save_args is not None:
             self._save_args.update(save_args)
+
         self._version = version
 
     def _load(self) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
diff --git a/kedro/io/hdf_local.py b/kedro/io/hdf_local.py
index 6be61a90bd..8a87c4834e 100644
--- a/kedro/io/hdf_local.py
+++ b/kedro/io/hdf_local.py
@@ -97,12 +97,15 @@ def __init__(
         """
         self._filepath = filepath
         self._key = key
+
+        # Handle default load and save arguments
         self._load_args = self.DEFAULT_LOAD_ARGS.copy()
         if load_args is not None:
             self._load_args.update(load_args)
         self._save_args = self.DEFAULT_SAVE_ARGS.copy()
         if save_args is not None:
             self._save_args.update(save_args)
+
         self._version = version
 
     def _load(self) -> pd.DataFrame:
diff --git a/kedro/io/hdf_s3.py b/kedro/io/hdf_s3.py
index 3d6ef04f22..e4f243986b 100644
--- a/kedro/io/hdf_s3.py
+++ b/kedro/io/hdf_s3.py
@@ -108,12 +108,15 @@ def __init__(
         self._key = key
         self._bucket_name = bucket_name
         self._credentials = credentials if credentials else {}
+
+        # Handle default load and save arguments
         self._load_args = self.DEFAULT_LOAD_ARGS.copy()
         if load_args is not None:
             self._load_args.update(load_args)
         self._save_args = self.DEFAULT_SAVE_ARGS.copy()
         if save_args is not None:
             self._save_args.update(save_args)
+
         self._version = version
         self._s3 = S3FileSystem(client_kwargs=self._credentials)
 
diff --git a/kedro/io/json_local.py b/kedro/io/json_local.py
index c3d31fac99..b53e78202b 100644
--- a/kedro/io/json_local.py
+++ b/kedro/io/json_local.py
@@ -94,12 +94,15 @@ def __init__(
 
         """
         self._filepath = filepath
+
+        # Handle default load and save arguments
         self._load_args = self.DEFAULT_LOAD_ARGS.copy()
         if load_args is not None:
             self._load_args.update(load_args)
         self._save_args = self.DEFAULT_SAVE_ARGS.copy()
         if save_args is not None:
             self._save_args.update(save_args)
+
         self._version = version
 
     def _load(self) -> Any:
diff --git a/kedro/io/parquet_local.py b/kedro/io/parquet_local.py
index 42a6931378..341e7ff004 100644
--- a/kedro/io/parquet_local.py
+++ b/kedro/io/parquet_local.py
@@ -112,12 +112,15 @@ def __init__(
         """
         self._filepath = filepath
         self._engine = engine
+
+        # Handle default load and save arguments
         self._load_args = self.DEFAULT_LOAD_ARGS.copy()
         if load_args is not None:
             self._load_args.update(load_args)
         self._save_args = self.DEFAULT_SAVE_ARGS.copy()
         if save_args is not None:
             self._save_args.update(save_args)
+
         self._version = version
 
     def _load(self) -> pd.DataFrame:
diff --git a/kedro/io/pickle_local.py b/kedro/io/pickle_local.py
index 871f1c85b1..91d5ebad6d 100644
--- a/kedro/io/pickle_local.py
+++ b/kedro/io/pickle_local.py
@@ -128,12 +128,15 @@ def __init__(
 
         self._filepath = filepath
         self._backend = backend
+
+        # Handle default load and save arguments
         self._load_args = self.DEFAULT_LOAD_ARGS.copy()
         if load_args is not None:
             self._load_args.update(load_args)
         self._save_args = self.DEFAULT_SAVE_ARGS.copy()
         if save_args is not None:
             self._save_args.update(save_args)
+
         self._version = version
 
     def _load(self) -> Any:
diff --git a/kedro/io/pickle_s3.py b/kedro/io/pickle_s3.py
index 9ed834c707..12c431b7b3 100644
--- a/kedro/io/pickle_s3.py
+++ b/kedro/io/pickle_s3.py
@@ -101,12 +101,15 @@ def __init__(
         self._filepath = filepath
         self._bucket_name = bucket_name
         self._credentials = credentials if credentials else {}
+
+        # Handle default load and save arguments
         self._load_args = self.DEFAULT_LOAD_ARGS.copy()
         if load_args is not None:
             self._load_args.update(load_args)
         self._save_args = self.DEFAULT_SAVE_ARGS.copy()
         if save_args is not None:
             self._save_args.update(save_args)
+
         self._version = version
         self._s3 = S3FileSystem(client_kwargs=self._credentials)
 
diff --git a/kedro/io/sql.py b/kedro/io/sql.py
index 41626df6bb..ec351bce15 100644
--- a/kedro/io/sql.py
+++ b/kedro/io/sql.py
@@ -196,6 +196,7 @@ def __init__(
                 "provide a SQLAlchemy connection string."
             )
 
+        # Handle default load and save arguments
         self._load_args = self.DEFAULT_LOAD_ARGS.copy()
         if load_args is not None:
             self._load_args.update(load_args)
diff --git a/kedro/io/text_local.py b/kedro/io/text_local.py
index 218ac7c464..5798691b97 100644
--- a/kedro/io/text_local.py
+++ b/kedro/io/text_local.py
@@ -85,12 +85,15 @@ def __init__(
                 attribute is None, save version will be autogenerated.
         """
         self._filepath = os.path.expanduser(filepath)
+
+        # Handle default load and save arguments
         self._load_args = self.DEFAULT_LOAD_ARGS.copy()
         if load_args is not None:
             self._load_args.update(load_args)
         self._save_args = self.DEFAULT_SAVE_ARGS.copy()
         if save_args is not None:
             self._save_args.update(save_args)
+
         self._version = version
 
     def _load(self) -> str:

From a93abf202989030bf6f63211f4fd7ffd247c8f7f Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Wed, 3 Jul 2019 11:39:02 -0400
Subject: [PATCH 08/17] Cover load and save argument handling :paw_prints:

---
 tests/io/test_hdf_local.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/io/test_hdf_local.py b/tests/io/test_hdf_local.py
index e5659d79fe..c00a60f75c 100644
--- a/tests/io/test_hdf_local.py
+++ b/tests/io/test_hdf_local.py
@@ -43,6 +43,16 @@ def hdf_data_set(filepath_hdf):
     return HDFLocalDataSet(filepath=filepath_hdf, key="test_hdf")
 
 
+@pytest.fixture
+def hdf_data_set_with_args(filepath_hdf):
+    return HDFLocalDataSet(
+        filepath=filepath_hdf,
+        key="test_hdf",
+        load_args={"errors": "ignore"},
+        save_args={"errors": "ignore"},
+    )
+
+
 @pytest.fixture
 def versioned_hdf_data_set(filepath_hdf, load_version, save_version):
     return HDFLocalDataSet(
@@ -88,6 +98,13 @@ def test_overwrite_if_exists(self, hdf_data_set, dummy_dataframe):
         reloaded_df = hdf_data_set.load()
         assert_frame_equal(reloaded_df, dummy_dataframe.T)
 
+    def test_save_and_load_args(self, hdf_data_set_with_args, dummy_dataframe):
+        """Test saving and reloading the data set."""
+        hdf_data_set_with_args.save(dummy_dataframe)
+        reloaded_df = hdf_data_set_with_args.load()
+
+        assert_frame_equal(reloaded_df, dummy_dataframe)
+
 
 class TestHDFLocalDataSetVersioned:
     def test_save_and_load(self, versioned_hdf_data_set, dummy_dataframe):

From 4226c2eda970e6e097679e3d2562b95892974150 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Wed, 3 Jul 2019 18:23:53 -0400
Subject: [PATCH 09/17] Add tests to cover load/save argument conditionals

---
 tests/io/test_hdf_s3.py       | 20 ++++++++++++++++++++
 tests/io/test_json_local.py   | 10 ++++++++++
 tests/io/test_pickle_local.py | 15 +++++++++++++++
 tests/io/test_pickle_s3.py    | 25 +++++++++++++++++++++++++
 tests/io/test_text_local.py   | 22 ++++++++++++++++++++++
 5 files changed, 92 insertions(+)

diff --git a/tests/io/test_hdf_s3.py b/tests/io/test_hdf_s3.py
index 0877d049e9..fd2bb1c358 100644
--- a/tests/io/test_hdf_s3.py
+++ b/tests/io/test_hdf_s3.py
@@ -70,6 +70,18 @@ def mocked_s3_object(mocked_s3_bucket, dummy_dataframe):
     return mocked_s3_bucket
 
 
+@pytest.fixture
+def hdf_data_set_with_args():
+    return HDFS3DataSet(
+        filepath=FILENAME,
+        bucket_name=BUCKET_NAME,
+        credentials=AWS_CREDENTIALS,
+        key="test_hdf",
+        load_args={"title": "test_hdf"},
+        save_args={"title": "test_hdf"},
+    )
+
+
 @pytest.fixture
 def versioned_hdf_data_set(load_version, save_version):
     return HDFS3DataSet(
@@ -166,6 +178,14 @@ def test_overwrite_if_exists(self, hdf_data_set, dummy_dataframe):
         reloaded_df = hdf_data_set.load()
         assert_frame_equal(reloaded_df, dummy_dataframe.T)
 
+    @pytest.mark.usefixtures("mocked_s3_object")
+    def test_save_and_load_args(self, hdf_data_set_with_args, dummy_dataframe):
+        """Test saving and reloading the data set."""
+        hdf_data_set_with_args.save(dummy_dataframe)
+        reloaded_df = hdf_data_set_with_args.load()
+
+        assert_frame_equal(reloaded_df, dummy_dataframe)
+
 
 class TestHDFS3DataSetVersioned:
     @pytest.mark.usefixtures("mocked_s3_object")
diff --git a/tests/io/test_json_local.py b/tests/io/test_json_local.py
index 99acbf9ded..0161abdc15 100644
--- a/tests/io/test_json_local.py
+++ b/tests/io/test_json_local.py
@@ -45,6 +45,11 @@ def json_data_set(filepath_json):
     return JSONLocalDataSet(filepath=filepath_json)
 
 
+@pytest.fixture
+def json_data_set_with_load_args(filepath_json):
+    return JSONLocalDataSet(filepath=filepath_json, load_args={"parse_float": Decimal})
+
+
 @pytest.fixture
 def versioned_json_data_set(filepath_json, load_version, save_version):
     return JSONLocalDataSet(
@@ -92,6 +97,11 @@ def test_exists(self, json_data_set, json_data):
         json_data_set.save(json_data)
         assert json_data_set.exists()
 
+    def test_load_args(self, json_data_set_with_load_args):
+        """Test reloading the data set with load arguments specified."""
+        json_data_set_with_load_args.save([1.1])
+        assert json_data_set_with_load_args.load() == [Decimal("1.1")]
+
     def test_allow_nan(self, json_data_set, filepath_json):
         """Strict JSON specification does not allow out of range float values,
         however the python implementation accepts them by default. Test both
diff --git a/tests/io/test_pickle_local.py b/tests/io/test_pickle_local.py
index 4b48519577..b1b063c450 100644
--- a/tests/io/test_pickle_local.py
+++ b/tests/io/test_pickle_local.py
@@ -45,6 +45,15 @@ def pickle_data_set(filepath_pkl, request):
     return PickleLocalDataSet(filepath=filepath_pkl, backend=request.param)
 
 
+@pytest.fixture
+def pickle_data_set_with_args(filepath_pkl):
+    return PickleLocalDataSet(
+        filepath=filepath_pkl,
+        load_args={"fix_imports": False},
+        save_args={"fix_imports": False},
+    )
+
+
 @pytest.fixture
 def versioned_pickle_data_set(filepath_pkl, load_version, save_version):
     return PickleLocalDataSet(
@@ -97,6 +106,12 @@ def test_joblib_not_installed(self, filepath_pkl, mocker):
         with pytest.raises(ImportError, match=pattern):
             PickleLocalDataSet(filepath=filepath_pkl, backend="joblib")
 
+    def test_save_and_load_args(self, pickle_data_set_with_args, dummy_dataframe):
+        """Test saving and reloading the data with different options."""
+        pickle_data_set_with_args.save(dummy_dataframe)
+        reloaded_df = pickle_data_set_with_args.load()
+        assert_frame_equal(reloaded_df, dummy_dataframe)
+
 
 class TestPickleLocalDataSetVersioned:
     def test_save_and_load(self, versioned_pickle_data_set, dummy_dataframe):
diff --git a/tests/io/test_pickle_s3.py b/tests/io/test_pickle_s3.py
index 7a6354c00c..8c3665dbb9 100644
--- a/tests/io/test_pickle_s3.py
+++ b/tests/io/test_pickle_s3.py
@@ -53,6 +53,17 @@ def s3_data_set():
     )
 
 
+@pytest.fixture
+def s3_data_set_with_args():
+    return PickleS3DataSet(
+        filepath=FILENAME,
+        bucket_name=BUCKET_NAME,
+        credentials=AWS_CREDENTIALS,
+        load_args={"fix_imports": False},
+        save_args={"fix_imports": False},
+    )
+
+
 @pytest.fixture
 def versioned_s3_data_set(load_version, save_version):
     return PickleS3DataSet(
@@ -113,6 +124,12 @@ def test_load(self, s3_data_set):
         loaded_data = s3_data_set.load()
         assert loaded_data == DUMMY_PICKABLE_OBJECT
 
+    @pytest.mark.usefixtures("mocked_s3_object")
+    def test_load_args(self, s3_data_set_with_args):
+        """Test loading the data from S3 with options."""
+        loaded_data = s3_data_set_with_args.load()
+        assert loaded_data == DUMMY_PICKABLE_OBJECT
+
     @pytest.mark.parametrize(
         "bad_credentials",
         [{"aws_secret_access_key": "SECRET"}, {"aws_access_key_id": "KEY"}],
@@ -171,6 +188,14 @@ def test_save(self, s3_data_set):
         loaded_data = s3_data_set.load()
         assert loaded_data == new_data
 
+    @pytest.mark.usefixtures("mocked_s3_object")
+    def test_save_args(self, s3_data_set_with_args):
+        """Test saving the data to S3 with options."""
+        new_data = {"x": "y"}
+        s3_data_set_with_args.save(new_data)
+        loaded_data = s3_data_set_with_args.load()
+        assert loaded_data == new_data
+
     def test_serializable(self, s3_data_set):
         ForkingPickler.dumps(s3_data_set)
 
diff --git a/tests/io/test_text_local.py b/tests/io/test_text_local.py
index 3d53537ed2..268c9c02ef 100644
--- a/tests/io/test_text_local.py
+++ b/tests/io/test_text_local.py
@@ -45,6 +45,16 @@ def txt_data_set(filepath_txt, request):
     return TextLocalDataSet(filepath=filepath_txt, **request.param)
 
 
+@pytest.fixture(params=[dict()])
+def txt_data_set_with_args(filepath_txt, request):
+    return TextLocalDataSet(
+        filepath=filepath_txt,
+        load_args={"errors": "ignore"},
+        save_args={"errors": "ignore"},
+        **request.param
+    )
+
+
 @pytest.fixture
 def versioned_txt_data_set(filepath_txt, load_version, save_version):
     return TextLocalDataSet(
@@ -67,6 +77,12 @@ def test_should_write_to_file(self, txt_data_set, sample_text, filepath_txt):
         txt_data_set.save(sample_text)
         assert Path(filepath_txt).read_text("utf-8") == sample_text
 
+    def test_should_write_to_file_with_args(
+        self, txt_data_set_with_args, sample_text, filepath_txt
+    ):
+        txt_data_set_with_args.save(sample_text)
+        assert Path(filepath_txt).read_text("utf-8") == sample_text
+
     def test_load_missing_txt_file(self, txt_data_set):
         """Check the error raised when trying to load nonexistent txt file."""
         pattern = r"Failed while loading data from data set TextLocalDataSet"
@@ -77,6 +93,12 @@ def test_should_read_from_file(self, txt_data_set, sample_text, filepath_txt):
         traditional_write(filepath_txt, sample_text)
         assert sample_text == txt_data_set.load()
 
+    def test_should_read_from_file_with_args(
+        self, txt_data_set_with_args, sample_text, filepath_txt
+    ):
+        traditional_write(filepath_txt, sample_text)
+        assert sample_text == txt_data_set_with_args.load()
+
     def test_assess_if_file_exists(self, txt_data_set, sample_text, filepath_txt):
         assert not txt_data_set.exists()
         traditional_write(filepath_txt, sample_text)

From a17ae9e95136f27d30746ce18b68a1bc49d58b36 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Wed, 3 Jul 2019 18:35:42 -0400
Subject: [PATCH 10/17] Fix non-ASCII characters in legal header :pencil2:

---
 kedro/contrib/io/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kedro/contrib/io/core.py b/kedro/contrib/io/core.py
index a417b9b3cc..3eeee1ac00 100644
--- a/kedro/contrib/io/core.py
+++ b/kedro/contrib/io/core.py
@@ -14,8 +14,8 @@
 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #
-# The QuantumBlack Visual Analytics Limited (“QuantumBlack”) name and logo
-# (either separately or in combination, “QuantumBlack Trademarks”) are
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
 # trademarks of QuantumBlack. The License does not grant you any right or
 # license to the QuantumBlack Trademarks. You may not use the QuantumBlack
 # Trademarks or any confusingly similar mark as a trademark for your product,

From f7b2373c2296fdd12ed98a04e44e8621c7d2fe91 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Sat, 6 Jul 2019 23:00:56 -0400
Subject: [PATCH 11/17] Remove load/save defaults from ``AbstractDataSet``

---
 kedro/io/core.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kedro/io/core.py b/kedro/io/core.py
index c0f3f42d46..b0a347b770 100644
--- a/kedro/io/core.py
+++ b/kedro/io/core.py
@@ -101,9 +101,6 @@ class AbstractDataSet(abc.ABC):
         >>>         return dict(param1=self._param1, param2=self._param2)
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {}
-
     @classmethod
     def from_config(
         cls: Type,

From 124d66348811446defc22b13856dd588a70db601 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Tue, 9 Jul 2019 19:16:58 -0400
Subject: [PATCH 12/17] Call ``super().__init__`` in mix-in implementation

---
 kedro/contrib/io/core.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kedro/contrib/io/core.py b/kedro/contrib/io/core.py
index 3eeee1ac00..dbec7c1a82 100644
--- a/kedro/contrib/io/core.py
+++ b/kedro/contrib/io/core.py
@@ -43,6 +43,7 @@ def __init__(
         load_args: Optional[Dict[str, Any]] = None,
         save_args: Optional[Dict[str, Any]] = None,
     ) -> None:
+        super().__init__()
         self._load_args = self.DEFAULT_LOAD_ARGS.copy()
         if load_args is not None:
             self._load_args.update(load_args)

From d3c7153bc9c825dc4fee127f00ef0b20709744ce Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Tue, 9 Jul 2019 19:31:33 -0400
Subject: [PATCH 13/17] Fix MRO when subclassing ``DefaultArgumentsMixIn``

---
 kedro/contrib/io/azure/csv_blob.py                  | 2 +-
 kedro/contrib/io/bioinformatics/sequence_dataset.py | 2 +-
 kedro/contrib/io/pyspark/spark_data_set.py          | 2 +-
 kedro/contrib/io/pyspark/spark_jdbc.py              | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/kedro/contrib/io/azure/csv_blob.py b/kedro/contrib/io/azure/csv_blob.py
index 37dca6bc7e..c84c313d45 100644
--- a/kedro/contrib/io/azure/csv_blob.py
+++ b/kedro/contrib/io/azure/csv_blob.py
@@ -39,7 +39,7 @@
 from kedro.io import AbstractDataSet
 
 
-class CSVBlobDataSet(AbstractDataSet, DefaultArgumentsMixIn):
+class CSVBlobDataSet(DefaultArgumentsMixIn, AbstractDataSet):
     """``CSVBlobDataSet`` loads and saves csv files in Microsoft's Azure
     blob storage. It uses azure storage SDK to read and write in azure and
     pandas to handle the csv file locally.
diff --git a/kedro/contrib/io/bioinformatics/sequence_dataset.py b/kedro/contrib/io/bioinformatics/sequence_dataset.py
index 6f844d1e52..7acf777578 100644
--- a/kedro/contrib/io/bioinformatics/sequence_dataset.py
+++ b/kedro/contrib/io/bioinformatics/sequence_dataset.py
@@ -39,7 +39,7 @@
 from kedro.io import AbstractDataSet
 
 
-class BioSequenceLocalDataSet(AbstractDataSet, DefaultArgumentsMixIn):
+class BioSequenceLocalDataSet(DefaultArgumentsMixIn, AbstractDataSet):
     """``BioSequenceLocalDataSet`` loads and saves data to a sequence file.
 
     Example:
diff --git a/kedro/contrib/io/pyspark/spark_data_set.py b/kedro/contrib/io/pyspark/spark_data_set.py
index 39acebc4bc..7594bf5297 100644
--- a/kedro/contrib/io/pyspark/spark_data_set.py
+++ b/kedro/contrib/io/pyspark/spark_data_set.py
@@ -40,7 +40,7 @@
 from kedro.io import AbstractDataSet
 
 
-class SparkDataSet(AbstractDataSet, DefaultArgumentsMixIn):
+class SparkDataSet(DefaultArgumentsMixIn, AbstractDataSet):
     """``SparkDataSet`` loads and saves Spark data frames.
 
     Example:
diff --git a/kedro/contrib/io/pyspark/spark_jdbc.py b/kedro/contrib/io/pyspark/spark_jdbc.py
index 842568b1d4..762e7ad73c 100644
--- a/kedro/contrib/io/pyspark/spark_jdbc.py
+++ b/kedro/contrib/io/pyspark/spark_jdbc.py
@@ -37,7 +37,7 @@
 __all__ = ["SparkJDBCDataSet"]
 
 
-class SparkJDBCDataSet(AbstractDataSet, DefaultArgumentsMixIn):
+class SparkJDBCDataSet(DefaultArgumentsMixIn, AbstractDataSet):
     """``SparkJDBCDataSet`` loads data from a database table accessible
     via JDBC URL url and connection properties and saves the content of
     a PySpark DataFrame to an external database table via JDBC.  It uses

From cac0c78daa121b0015c27522272858d644f7781d Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Tue, 9 Jul 2019 21:50:34 -0400
Subject: [PATCH 14/17] Copy default argument dicts with ``copy.deepcopy``

---
 kedro/contrib/io/core.py  | 5 +++--
 kedro/io/csv_local.py     | 5 +++--
 kedro/io/csv_s3.py        | 5 +++--
 kedro/io/excel_local.py   | 5 +++--
 kedro/io/hdf_local.py     | 5 +++--
 kedro/io/hdf_s3.py        | 5 +++--
 kedro/io/json_local.py    | 5 +++--
 kedro/io/parquet_local.py | 5 +++--
 kedro/io/pickle_local.py  | 5 +++--
 kedro/io/pickle_s3.py     | 5 +++--
 kedro/io/sql.py           | 5 +++--
 kedro/io/text_local.py    | 5 +++--
 12 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/kedro/contrib/io/core.py b/kedro/contrib/io/core.py
index dbec7c1a82..ed169c4884 100644
--- a/kedro/contrib/io/core.py
+++ b/kedro/contrib/io/core.py
@@ -28,6 +28,7 @@
 
 """This module extends the set of classes ``kedro.io.core`` provides."""
 
+import copy
 from typing import Any, Dict, Optional
 
 
@@ -44,9 +45,9 @@ def __init__(
         save_args: Optional[Dict[str, Any]] = None,
     ) -> None:
         super().__init__()
-        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
         if load_args is not None:
             self._load_args.update(load_args)
-        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
         if save_args is not None:
             self._save_args.update(save_args)
diff --git a/kedro/io/csv_local.py b/kedro/io/csv_local.py
index 4dc19c2a5d..3f9f1dd247 100644
--- a/kedro/io/csv_local.py
+++ b/kedro/io/csv_local.py
@@ -30,6 +30,7 @@
 underlying functionality is supported by pandas, so it supports all
 allowed pandas options for loading and saving csv files.
 """
+import copy
 from pathlib import Path
 from typing import Any, Dict
 
@@ -100,10 +101,10 @@ def __init__(
         self._filepath = filepath
 
         # Handle default load and save arguments
-        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
         if load_args is not None:
             self._load_args.update(load_args)
-        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
         if save_args is not None:
             self._save_args.update(save_args)
 
diff --git a/kedro/io/csv_s3.py b/kedro/io/csv_s3.py
index 8795704590..54517af1c0 100644
--- a/kedro/io/csv_s3.py
+++ b/kedro/io/csv_s3.py
@@ -29,6 +29,7 @@
 """``CSVS3DataSet`` loads and saves data to a file in S3. It uses s3fs
 to read and write from S3 and pandas to handle the csv file.
 """
+import copy
 from typing import Any, Dict, Optional
 
 import pandas as pd
@@ -109,10 +110,10 @@ def __init__(
         self._credentials = credentials if credentials else {}
 
         # Handle default load and save arguments
-        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
         if load_args is not None:
             self._load_args.update(load_args)
-        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
         if save_args is not None:
             self._save_args.update(save_args)
 
diff --git a/kedro/io/excel_local.py b/kedro/io/excel_local.py
index edabc7dd00..fc4ccd2398 100644
--- a/kedro/io/excel_local.py
+++ b/kedro/io/excel_local.py
@@ -30,6 +30,7 @@
 underlying functionality is supported by pandas, so it supports all
 allowed pandas options for loading and saving Excel files.
 """
+import copy
 from pathlib import Path
 from typing import Any, Dict, Union
 
@@ -111,10 +112,10 @@ def __init__(
         self._engine = engine
 
         # Handle default load and save arguments
-        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
         if load_args is not None:
             self._load_args.update(load_args)
-        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
         if save_args is not None:
             self._save_args.update(save_args)
 
diff --git a/kedro/io/hdf_local.py b/kedro/io/hdf_local.py
index 8a87c4834e..1e483c49de 100644
--- a/kedro/io/hdf_local.py
+++ b/kedro/io/hdf_local.py
@@ -30,6 +30,7 @@
 underlying functionality is supported by pandas, so it supports all
 allowed pandas options for loading and saving hdf files.
 """
+import copy
 from pathlib import Path
 from typing import Any, Dict
 
@@ -99,10 +100,10 @@ def __init__(
         self._key = key
 
         # Handle default load and save arguments
-        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
         if load_args is not None:
             self._load_args.update(load_args)
-        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
         if save_args is not None:
             self._save_args.update(save_args)
 
diff --git a/kedro/io/hdf_s3.py b/kedro/io/hdf_s3.py
index e4f243986b..3838742330 100644
--- a/kedro/io/hdf_s3.py
+++ b/kedro/io/hdf_s3.py
@@ -30,6 +30,7 @@
 underlying functionality is supported by pandas HDFStore and PyTables,
 so it supports all allowed PyTables options for loading and saving hdf files.
 """
+import copy
 from typing import Any, Dict, Optional
 
 import pandas as pd
@@ -110,10 +111,10 @@ def __init__(
         self._credentials = credentials if credentials else {}
 
         # Handle default load and save arguments
-        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
         if load_args is not None:
             self._load_args.update(load_args)
-        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
         if save_args is not None:
             self._save_args.update(save_args)
 
diff --git a/kedro/io/json_local.py b/kedro/io/json_local.py
index b53e78202b..0420eec70c 100644
--- a/kedro/io/json_local.py
+++ b/kedro/io/json_local.py
@@ -29,6 +29,7 @@
 """``JSONLocalDataSet`` encodes a given object to json and saves it to a local
 file.
 """
+import copy
 import json
 from pathlib import Path
 from typing import Any, Dict
@@ -96,10 +97,10 @@ def __init__(
         self._filepath = filepath
 
         # Handle default load and save arguments
-        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
         if load_args is not None:
             self._load_args.update(load_args)
-        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
         if save_args is not None:
             self._save_args.update(save_args)
 
diff --git a/kedro/io/parquet_local.py b/kedro/io/parquet_local.py
index 341e7ff004..549d97c22c 100644
--- a/kedro/io/parquet_local.py
+++ b/kedro/io/parquet_local.py
@@ -36,6 +36,7 @@
 https://arrow.apache.org/docs/python/index.html
 """
 
+import copy
 from pathlib import Path
 from typing import Any, Dict
 
@@ -114,10 +115,10 @@ def __init__(
         self._engine = engine
 
         # Handle default load and save arguments
-        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
         if load_args is not None:
             self._load_args.update(load_args)
-        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
         if save_args is not None:
             self._save_args.update(save_args)
 
diff --git a/kedro/io/pickle_local.py b/kedro/io/pickle_local.py
index 91d5ebad6d..9783a71e88 100644
--- a/kedro/io/pickle_local.py
+++ b/kedro/io/pickle_local.py
@@ -32,6 +32,7 @@
 all allowed options for loading and saving pickle files.
 """
 
+import copy
 import pickle
 from pathlib import Path
 from typing import Any, Dict
@@ -130,10 +131,10 @@ def __init__(
         self._backend = backend
 
         # Handle default load and save arguments
-        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
         if load_args is not None:
             self._load_args.update(load_args)
-        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
         if save_args is not None:
             self._save_args.update(save_args)
 
diff --git a/kedro/io/pickle_s3.py b/kedro/io/pickle_s3.py
index 12c431b7b3..43210a740a 100644
--- a/kedro/io/pickle_s3.py
+++ b/kedro/io/pickle_s3.py
@@ -30,6 +30,7 @@
 The underlying functionality is supported by the ``pickle`` library, so
 it supports all allowed options for loading and saving pickle files.
 """
+import copy
 import pickle
 from typing import Any, Dict, Optional
 
@@ -103,10 +104,10 @@ def __init__(
         self._credentials = credentials if credentials else {}
 
         # Handle default load and save arguments
-        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
         if load_args is not None:
             self._load_args.update(load_args)
-        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
         if save_args is not None:
             self._save_args.update(save_args)
 
diff --git a/kedro/io/sql.py b/kedro/io/sql.py
index ec351bce15..07373e54d3 100644
--- a/kedro/io/sql.py
+++ b/kedro/io/sql.py
@@ -27,6 +27,7 @@
 # limitations under the License.
 """``SQLDataSet`` to load and save data to a SQL backend."""
 
+import copy
 import re
 from typing import Any, Dict, Optional
 
@@ -197,10 +198,10 @@ def __init__(
             )
 
         # Handle default load and save arguments
-        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
         if load_args is not None:
             self._load_args.update(load_args)
-        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
         if save_args is not None:
             self._save_args.update(save_args)
 
diff --git a/kedro/io/text_local.py b/kedro/io/text_local.py
index 5798691b97..915a3409dd 100644
--- a/kedro/io/text_local.py
+++ b/kedro/io/text_local.py
@@ -28,6 +28,7 @@
 """``TextLocalDataSet`` loads and saves data to a local text file. The data is
 accessed text data using the python open function.
 """
+import copy
 import os
 from pathlib import Path
 from typing import Any, Dict
@@ -87,10 +88,10 @@ def __init__(
         self._filepath = os.path.expanduser(filepath)
 
         # Handle default load and save arguments
-        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
         if load_args is not None:
             self._load_args.update(load_args)
-        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
         if save_args is not None:
             self._save_args.update(save_args)
 

From 5896daa56895ce3a17243847242fb38ee1078e63 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Wed, 10 Jul 2019 12:11:47 -0400
Subject: [PATCH 15/17] Annotate types for default load and save arguments

---
 kedro/contrib/io/core.py  | 4 ++--
 kedro/io/csv_local.py     | 4 ++--
 kedro/io/csv_s3.py        | 4 ++--
 kedro/io/hdf_local.py     | 4 ++--
 kedro/io/hdf_s3.py        | 4 ++--
 kedro/io/json_local.py    | 4 ++--
 kedro/io/parquet_local.py | 4 ++--
 kedro/io/pickle_local.py  | 4 ++--
 kedro/io/pickle_s3.py     | 4 ++--
 kedro/io/sql.py           | 4 ++--
 kedro/io/text_local.py    | 4 ++--
 11 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/kedro/contrib/io/core.py b/kedro/contrib/io/core.py
index ed169c4884..d4c5fb7c10 100644
--- a/kedro/contrib/io/core.py
+++ b/kedro/contrib/io/core.py
@@ -36,8 +36,8 @@
 class DefaultArgumentsMixIn:
     """Mixin class that helps handle default load and save arguments."""
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {}
+    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
+    DEFAULT_SAVE_ARGS: Dict[str, Any] = {}
 
     def __init__(
         self,
diff --git a/kedro/io/csv_local.py b/kedro/io/csv_local.py
index 9dcdeb0410..74da6bfa99 100644
--- a/kedro/io/csv_local.py
+++ b/kedro/io/csv_local.py
@@ -62,8 +62,8 @@ class CSVLocalDataSet(AbstractVersionedDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {"index": False}
+    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
+    DEFAULT_SAVE_ARGS: Dict[str, Any] = {"index": False}
 
     def __init__(
         self,
diff --git a/kedro/io/csv_s3.py b/kedro/io/csv_s3.py
index af010c1377..3e16ac85d0 100644
--- a/kedro/io/csv_s3.py
+++ b/kedro/io/csv_s3.py
@@ -63,8 +63,8 @@ class CSVS3DataSet(AbstractVersionedDataSet):
         >>> assert data.equals(reloaded)
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {"index": False}
+    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
+    DEFAULT_SAVE_ARGS: Dict[str, Any] = {"index": False}
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/hdf_local.py b/kedro/io/hdf_local.py
index 568ba5e845..cf3edec161 100644
--- a/kedro/io/hdf_local.py
+++ b/kedro/io/hdf_local.py
@@ -64,8 +64,8 @@ class HDFLocalDataSet(AbstractVersionedDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {}
+    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
+    DEFAULT_SAVE_ARGS: Dict[str, Any] = {}
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/hdf_s3.py b/kedro/io/hdf_s3.py
index 3c2313129e..0a55b3b644 100644
--- a/kedro/io/hdf_s3.py
+++ b/kedro/io/hdf_s3.py
@@ -69,8 +69,8 @@ class HDFS3DataSet(AbstractVersionedDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {}
+    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
+    DEFAULT_SAVE_ARGS: Dict[str, Any] = {}
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/json_local.py b/kedro/io/json_local.py
index 214066f5a2..e2bccf0a8c 100644
--- a/kedro/io/json_local.py
+++ b/kedro/io/json_local.py
@@ -59,8 +59,8 @@ class JSONLocalDataSet(AbstractVersionedDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {"indent": 4}
+    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
+    DEFAULT_SAVE_ARGS: Dict[str, Any] = {"indent": 4}
 
     def __init__(
         self,
diff --git a/kedro/io/parquet_local.py b/kedro/io/parquet_local.py
index 1fd154dbf8..b85bf87079 100644
--- a/kedro/io/parquet_local.py
+++ b/kedro/io/parquet_local.py
@@ -62,8 +62,8 @@ class ParquetLocalDataSet(AbstractVersionedDataSet):
         >>> assert data.equals(loaded_data)
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {"compression": None}
+    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
+    DEFAULT_SAVE_ARGS: Dict[str, Any] = {"compression": None}
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/pickle_local.py b/kedro/io/pickle_local.py
index 9299559d43..ba01dcbc69 100644
--- a/kedro/io/pickle_local.py
+++ b/kedro/io/pickle_local.py
@@ -68,8 +68,8 @@ class PickleLocalDataSet(AbstractVersionedDataSet):
         >>> reloaded = data_set.load()
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {}
+    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
+    DEFAULT_SAVE_ARGS: Dict[str, Any] = {}
 
     BACKENDS = {"pickle": pickle, "joblib": joblib}
 
diff --git a/kedro/io/pickle_s3.py b/kedro/io/pickle_s3.py
index cc19e82c68..99e67562f0 100644
--- a/kedro/io/pickle_s3.py
+++ b/kedro/io/pickle_s3.py
@@ -65,8 +65,8 @@ class PickleS3DataSet(AbstractVersionedDataSet):
             >>> reloaded = data_set.load()
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {}
+    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
+    DEFAULT_SAVE_ARGS: Dict[str, Any] = {}
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/sql.py b/kedro/io/sql.py
index e94576fb94..bcd82714dc 100644
--- a/kedro/io/sql.py
+++ b/kedro/io/sql.py
@@ -140,8 +140,8 @@ class SQLTableDataSet(AbstractDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {"index": False}
+    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
+    DEFAULT_SAVE_ARGS: Dict[str, Any] = {"index": False}
 
     def _describe(self) -> Dict[str, Any]:
         load_args = self._load_args.copy()
diff --git a/kedro/io/text_local.py b/kedro/io/text_local.py
index b308d6b9f0..12e00e79fa 100644
--- a/kedro/io/text_local.py
+++ b/kedro/io/text_local.py
@@ -51,8 +51,8 @@ class TextLocalDataSet(AbstractVersionedDataSet):
         >>> reloaded = data_set.load()
     """
 
-    DEFAULT_LOAD_ARGS = {"mode": "r"}
-    DEFAULT_SAVE_ARGS = {"mode": "w"}
+    DEFAULT_LOAD_ARGS: Dict[str, Any] = {"mode": "r"}
+    DEFAULT_SAVE_ARGS: Dict[str, Any] = {"mode": "w"}
 
     def __init__(
         self,

From 39317447d04e3a1428555fa246df1c3fcebe0121 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Wed, 10 Jul 2019 12:36:56 -0400
Subject: [PATCH 16/17] Revert "Annotate types for default load and save
 arguments"

This reverts commit 5896daa56895ce3a17243847242fb38ee1078e63.
---
 kedro/contrib/io/core.py  | 4 ++--
 kedro/io/csv_local.py     | 4 ++--
 kedro/io/csv_s3.py        | 4 ++--
 kedro/io/hdf_local.py     | 4 ++--
 kedro/io/hdf_s3.py        | 4 ++--
 kedro/io/json_local.py    | 4 ++--
 kedro/io/parquet_local.py | 4 ++--
 kedro/io/pickle_local.py  | 4 ++--
 kedro/io/pickle_s3.py     | 4 ++--
 kedro/io/sql.py           | 4 ++--
 kedro/io/text_local.py    | 4 ++--
 11 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/kedro/contrib/io/core.py b/kedro/contrib/io/core.py
index d4c5fb7c10..ed169c4884 100644
--- a/kedro/contrib/io/core.py
+++ b/kedro/contrib/io/core.py
@@ -36,8 +36,8 @@
 class DefaultArgumentsMixIn:
     """Mixin class that helps handle default load and save arguments."""
 
-    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
-    DEFAULT_SAVE_ARGS: Dict[str, Any] = {}
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
 
     def __init__(
         self,
diff --git a/kedro/io/csv_local.py b/kedro/io/csv_local.py
index 74da6bfa99..9dcdeb0410 100644
--- a/kedro/io/csv_local.py
+++ b/kedro/io/csv_local.py
@@ -62,8 +62,8 @@ class CSVLocalDataSet(AbstractVersionedDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
-    DEFAULT_SAVE_ARGS: Dict[str, Any] = {"index": False}
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {"index": False}
 
     def __init__(
         self,
diff --git a/kedro/io/csv_s3.py b/kedro/io/csv_s3.py
index 3e16ac85d0..af010c1377 100644
--- a/kedro/io/csv_s3.py
+++ b/kedro/io/csv_s3.py
@@ -63,8 +63,8 @@ class CSVS3DataSet(AbstractVersionedDataSet):
         >>> assert data.equals(reloaded)
     """
 
-    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
-    DEFAULT_SAVE_ARGS: Dict[str, Any] = {"index": False}
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {"index": False}
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/hdf_local.py b/kedro/io/hdf_local.py
index cf3edec161..568ba5e845 100644
--- a/kedro/io/hdf_local.py
+++ b/kedro/io/hdf_local.py
@@ -64,8 +64,8 @@ class HDFLocalDataSet(AbstractVersionedDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
-    DEFAULT_SAVE_ARGS: Dict[str, Any] = {}
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/hdf_s3.py b/kedro/io/hdf_s3.py
index 0a55b3b644..3c2313129e 100644
--- a/kedro/io/hdf_s3.py
+++ b/kedro/io/hdf_s3.py
@@ -69,8 +69,8 @@ class HDFS3DataSet(AbstractVersionedDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
-    DEFAULT_SAVE_ARGS: Dict[str, Any] = {}
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/json_local.py b/kedro/io/json_local.py
index e2bccf0a8c..214066f5a2 100644
--- a/kedro/io/json_local.py
+++ b/kedro/io/json_local.py
@@ -59,8 +59,8 @@ class JSONLocalDataSet(AbstractVersionedDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
-    DEFAULT_SAVE_ARGS: Dict[str, Any] = {"indent": 4}
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {"indent": 4}
 
     def __init__(
         self,
diff --git a/kedro/io/parquet_local.py b/kedro/io/parquet_local.py
index b85bf87079..1fd154dbf8 100644
--- a/kedro/io/parquet_local.py
+++ b/kedro/io/parquet_local.py
@@ -62,8 +62,8 @@ class ParquetLocalDataSet(AbstractVersionedDataSet):
         >>> assert data.equals(loaded_data)
     """
 
-    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
-    DEFAULT_SAVE_ARGS: Dict[str, Any] = {"compression": None}
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {"compression": None}
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/pickle_local.py b/kedro/io/pickle_local.py
index ba01dcbc69..9299559d43 100644
--- a/kedro/io/pickle_local.py
+++ b/kedro/io/pickle_local.py
@@ -68,8 +68,8 @@ class PickleLocalDataSet(AbstractVersionedDataSet):
         >>> reloaded = data_set.load()
     """
 
-    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
-    DEFAULT_SAVE_ARGS: Dict[str, Any] = {}
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
 
     BACKENDS = {"pickle": pickle, "joblib": joblib}
 
diff --git a/kedro/io/pickle_s3.py b/kedro/io/pickle_s3.py
index 99e67562f0..cc19e82c68 100644
--- a/kedro/io/pickle_s3.py
+++ b/kedro/io/pickle_s3.py
@@ -65,8 +65,8 @@ class PickleS3DataSet(AbstractVersionedDataSet):
             >>> reloaded = data_set.load()
     """
 
-    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
-    DEFAULT_SAVE_ARGS: Dict[str, Any] = {}
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/sql.py b/kedro/io/sql.py
index bcd82714dc..e94576fb94 100644
--- a/kedro/io/sql.py
+++ b/kedro/io/sql.py
@@ -140,8 +140,8 @@ class SQLTableDataSet(AbstractDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS: Dict[str, Any] = {}
-    DEFAULT_SAVE_ARGS: Dict[str, Any] = {"index": False}
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {"index": False}
 
     def _describe(self) -> Dict[str, Any]:
         load_args = self._load_args.copy()
diff --git a/kedro/io/text_local.py b/kedro/io/text_local.py
index 12e00e79fa..b308d6b9f0 100644
--- a/kedro/io/text_local.py
+++ b/kedro/io/text_local.py
@@ -51,8 +51,8 @@ class TextLocalDataSet(AbstractVersionedDataSet):
         >>> reloaded = data_set.load()
     """
 
-    DEFAULT_LOAD_ARGS: Dict[str, Any] = {"mode": "r"}
-    DEFAULT_SAVE_ARGS: Dict[str, Any] = {"mode": "w"}
+    DEFAULT_LOAD_ARGS = {"mode": "r"}
+    DEFAULT_SAVE_ARGS = {"mode": "w"}
 
     def __init__(
         self,

From b2e4c1c401eb5f6ab5b2fb379c9242156131aa53 Mon Sep 17 00:00:00 2001
From: Deepyaman Datta <deepyaman.datta@utexas.edu>
Date: Wed, 10 Jul 2019 12:44:18 -0400
Subject: [PATCH 17/17] Annotate types for default load and save arguments

---
 kedro/contrib/io/core.py  | 4 ++--
 kedro/io/csv_local.py     | 4 ++--
 kedro/io/csv_s3.py        | 4 ++--
 kedro/io/hdf_local.py     | 4 ++--
 kedro/io/hdf_s3.py        | 4 ++--
 kedro/io/json_local.py    | 4 ++--
 kedro/io/parquet_local.py | 4 ++--
 kedro/io/pickle_local.py  | 4 ++--
 kedro/io/pickle_s3.py     | 4 ++--
 kedro/io/sql.py           | 4 ++--
 kedro/io/text_local.py    | 4 ++--
 11 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/kedro/contrib/io/core.py b/kedro/contrib/io/core.py
index ed169c4884..c963db4a2f 100644
--- a/kedro/contrib/io/core.py
+++ b/kedro/contrib/io/core.py
@@ -36,8 +36,8 @@
 class DefaultArgumentsMixIn:
     """Mixin class that helps handle default load and save arguments."""
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {}
+    DEFAULT_LOAD_ARGS = {}  # type: Dict[str, Any]
+    DEFAULT_SAVE_ARGS = {}  # type: Dict[str, Any]
 
     def __init__(
         self,
diff --git a/kedro/io/csv_local.py b/kedro/io/csv_local.py
index 9dcdeb0410..17ad9bee94 100644
--- a/kedro/io/csv_local.py
+++ b/kedro/io/csv_local.py
@@ -62,8 +62,8 @@ class CSVLocalDataSet(AbstractVersionedDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {"index": False}
+    DEFAULT_LOAD_ARGS = {}  # type: Dict[str, Any]
+    DEFAULT_SAVE_ARGS = {"index": False}  # type: Dict[str, Any]
 
     def __init__(
         self,
diff --git a/kedro/io/csv_s3.py b/kedro/io/csv_s3.py
index af010c1377..de8043a055 100644
--- a/kedro/io/csv_s3.py
+++ b/kedro/io/csv_s3.py
@@ -63,8 +63,8 @@ class CSVS3DataSet(AbstractVersionedDataSet):
         >>> assert data.equals(reloaded)
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {"index": False}
+    DEFAULT_LOAD_ARGS = {}  # type: Dict[str, Any]
+    DEFAULT_SAVE_ARGS = {"index": False}  # type: Dict[str, Any]
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/hdf_local.py b/kedro/io/hdf_local.py
index 568ba5e845..cb80fe5786 100644
--- a/kedro/io/hdf_local.py
+++ b/kedro/io/hdf_local.py
@@ -64,8 +64,8 @@ class HDFLocalDataSet(AbstractVersionedDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {}
+    DEFAULT_LOAD_ARGS = {}  # type: Dict[str, Any]
+    DEFAULT_SAVE_ARGS = {}  # type: Dict[str, Any]
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/hdf_s3.py b/kedro/io/hdf_s3.py
index 3c2313129e..6c83fff405 100644
--- a/kedro/io/hdf_s3.py
+++ b/kedro/io/hdf_s3.py
@@ -69,8 +69,8 @@ class HDFS3DataSet(AbstractVersionedDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {}
+    DEFAULT_LOAD_ARGS = {}  # type: Dict[str, Any]
+    DEFAULT_SAVE_ARGS = {}  # type: Dict[str, Any]
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/json_local.py b/kedro/io/json_local.py
index 214066f5a2..e6e359d94a 100644
--- a/kedro/io/json_local.py
+++ b/kedro/io/json_local.py
@@ -59,8 +59,8 @@ class JSONLocalDataSet(AbstractVersionedDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {"indent": 4}
+    DEFAULT_LOAD_ARGS = {}  # type: Dict[str, Any]
+    DEFAULT_SAVE_ARGS = {"indent": 4}  # type: Dict[str, Any]
 
     def __init__(
         self,
diff --git a/kedro/io/parquet_local.py b/kedro/io/parquet_local.py
index 1fd154dbf8..1ef3ecf86c 100644
--- a/kedro/io/parquet_local.py
+++ b/kedro/io/parquet_local.py
@@ -62,8 +62,8 @@ class ParquetLocalDataSet(AbstractVersionedDataSet):
         >>> assert data.equals(loaded_data)
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {"compression": None}
+    DEFAULT_LOAD_ARGS = {}  # type: Dict[str, Any]
+    DEFAULT_SAVE_ARGS = {"compression": None}  # type: Dict[str, Any]
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/pickle_local.py b/kedro/io/pickle_local.py
index 9299559d43..d7ada37e93 100644
--- a/kedro/io/pickle_local.py
+++ b/kedro/io/pickle_local.py
@@ -68,8 +68,8 @@ class PickleLocalDataSet(AbstractVersionedDataSet):
         >>> reloaded = data_set.load()
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {}
+    DEFAULT_LOAD_ARGS = {}  # type: Dict[str, Any]
+    DEFAULT_SAVE_ARGS = {}  # type: Dict[str, Any]
 
     BACKENDS = {"pickle": pickle, "joblib": joblib}
 
diff --git a/kedro/io/pickle_s3.py b/kedro/io/pickle_s3.py
index cc19e82c68..283a5b679c 100644
--- a/kedro/io/pickle_s3.py
+++ b/kedro/io/pickle_s3.py
@@ -65,8 +65,8 @@ class PickleS3DataSet(AbstractVersionedDataSet):
             >>> reloaded = data_set.load()
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {}
+    DEFAULT_LOAD_ARGS = {}  # type: Dict[str, Any]
+    DEFAULT_SAVE_ARGS = {}  # type: Dict[str, Any]
 
     # pylint: disable=too-many-arguments
     def __init__(
diff --git a/kedro/io/sql.py b/kedro/io/sql.py
index e94576fb94..b4f8e0fc1a 100644
--- a/kedro/io/sql.py
+++ b/kedro/io/sql.py
@@ -140,8 +140,8 @@ class SQLTableDataSet(AbstractDataSet):
 
     """
 
-    DEFAULT_LOAD_ARGS = {}
-    DEFAULT_SAVE_ARGS = {"index": False}
+    DEFAULT_LOAD_ARGS = {}  # type: Dict[str, Any]
+    DEFAULT_SAVE_ARGS = {"index": False}  # type: Dict[str, Any]
 
     def _describe(self) -> Dict[str, Any]:
         load_args = self._load_args.copy()
diff --git a/kedro/io/text_local.py b/kedro/io/text_local.py
index b308d6b9f0..483fbcd01e 100644
--- a/kedro/io/text_local.py
+++ b/kedro/io/text_local.py
@@ -51,8 +51,8 @@ class TextLocalDataSet(AbstractVersionedDataSet):
         >>> reloaded = data_set.load()
     """
 
-    DEFAULT_LOAD_ARGS = {"mode": "r"}
-    DEFAULT_SAVE_ARGS = {"mode": "w"}
+    DEFAULT_LOAD_ARGS = {"mode": "r"}  # type: Dict[str, Any]
+    DEFAULT_SAVE_ARGS = {"mode": "w"}  # type: Dict[str, Any]
 
     def __init__(
         self,