kedro-org · idanov · Jul 23, 2019 · Jun 8, 2019 · Jun 9, 2019 · Jun 14, 2019
@@ -31,3 +31,5 @@
 `kedro.io` module (e.g. additional ``AbstractDataSet``s and
 extensions/alternative ``DataCatalog``s.
 """
+
+from .core import DefaultArgumentsMixIn  # NOQA
@@ -35,10 +35,11 @@
 import pandas as pd
 from azure.storage.blob import BlockBlobService
 
+from kedro.contrib.io import DefaultArgumentsMixIn
 from kedro.io import AbstractDataSet
 
 
-class CSVBlobDataSet(AbstractDataSet):
+class CSVBlobDataSet(AbstractDataSet, DefaultArgumentsMixIn):
     """``CSVBlobDataSet`` loads and saves csv files in Microsoft's Azure
     blob storage. It uses azure storage SDK to read and write in azure and
     pandas to handle the csv file locally.
@@ -61,6 +62,8 @@ class CSVBlobDataSet(AbstractDataSet):
         >>> assert data.equals(reloaded)
     """
 
+    DEFAULT_SAVE_ARGS = {"index": False}
+
     def _describe(self) -> Dict[str, Any]:
         return dict(
             filepath=self._filepath,
@@ -106,16 +109,12 @@ def __init__(
                 All defaults are preserved, but "index", which is set to False.
 
         """
-        default_save_args = {"index": False}
-        self._save_args = (
-            {**default_save_args, **save_args} if save_args else default_save_args
-        )
-        self._load_args = load_args if load_args else {}
         self._filepath = filepath
         self._container_name = container_name
         self._credentials = credentials if credentials else {}
         self._blob_to_text_args = blob_to_text_args if blob_to_text_args else {}
         self._blob_from_text_args = blob_from_text_args if blob_from_text_args else {}
+        super().__init__(load_args, save_args)
 
     def _load(self) -> pd.DataFrame:
         blob_service = BlockBlobService(**self._credentials)

@@ -35,10 +35,11 @@
 
 from Bio import SeqIO
 
+from kedro.contrib.io import DefaultArgumentsMixIn
 from kedro.io import AbstractDataSet
 
 
-class BioSequenceLocalDataSet(AbstractDataSet):
+class BioSequenceLocalDataSet(AbstractDataSet, DefaultArgumentsMixIn):
     """``BioSequenceLocalDataSet`` loads and saves data to a sequence file.
 
     Example:
@@ -95,18 +96,7 @@ def __init__(
 
         """
         self._filepath = filepath
-        default_load_args = {}
-        default_save_args = {}
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_save_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
+        super().__init__(load_args, save_args)
 
     def _load(self) -> List:
         return list(SeqIO.parse(self._filepath, **self._load_args))

@@ -0,0 +1,51 @@
+# Copyright 2018-2019 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This module extends the set of classes ``kedro.io.core`` provides."""
+
+from typing import Any, Dict, Optional
+
+
+# pylint: disable=too-few-public-methods
+class DefaultArgumentsMixIn:
+    """Mixin class that helps handle default load and save arguments."""
+
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
+
+    def __init__(
+        self,
+        load_args: Optional[Dict[str, Any]] = None,
+        save_args: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
@@ -36,10 +36,11 @@
 from pyspark.sql import DataFrame, SparkSession
 from pyspark.sql.utils import AnalysisException
 
+from kedro.contrib.io import DefaultArgumentsMixIn
 from kedro.io import AbstractDataSet
 
 
-class SparkDataSet(AbstractDataSet):
+class SparkDataSet(AbstractDataSet, DefaultArgumentsMixIn):
     """``SparkDataSet`` loads and saves Spark data frames.
 
     Example:
@@ -106,8 +107,7 @@ def __init__(
 
         self._filepath = filepath
         self._file_format = file_format
-        self._load_args = load_args if load_args is not None else {}
-        self._save_args = save_args if save_args is not None else {}
+        super().__init__(load_args, save_args)
 
     @staticmethod
     def _get_spark():

@@ -31,12 +31,13 @@
 
 from pyspark.sql import DataFrame, SparkSession
 
+from kedro.contrib.io import DefaultArgumentsMixIn
 from kedro.io import AbstractDataSet, DataSetError
 
 __all__ = ["SparkJDBCDataSet"]
 
 
-class SparkJDBCDataSet(AbstractDataSet):
+class SparkJDBCDataSet(AbstractDataSet, DefaultArgumentsMixIn):
     """``SparkJDBCDataSet`` loads data from a database table accessible
     via JDBC URL url and connection properties and saves the content of
     a PySpark DataFrame to an external database table via JDBC.  It uses
@@ -140,8 +141,7 @@ def __init__(
 
         self._url = url
         self._table = table
-        self._load_args = load_args if load_args is not None else {}
-        self._save_args = save_args if save_args is not None else {}
+        super().__init__(load_args, save_args)
 
         # Update properties in load_args and save_args with credentials.
         if credentials is not None:

diff --git a/kedro/io/core.py b/kedro/io/core.py
@@ -101,6 +101,9 @@ class AbstractDataSet(abc.ABC):
         >>>         return dict(param1=self._param1, param2=self._param2)
     """
 
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
+
     @classmethod
     def from_config(
         cls: Type,

@@ -61,6 +61,9 @@ class CSVLocalDataSet(AbstractDataSet, FilepathVersionMixIn):
 
     """
 
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {"index": False}
+
     def _describe(self) -> Dict[str, Any]:
         return dict(
             filepath=self._filepath,
@@ -94,19 +97,16 @@ def __init__(
                 None, the latest version will be loaded. If its ``save``
                 attribute is None, save version will be autogenerated.
         """
-        default_save_args = {"index": False}
-        default_load_args = {}
         self._filepath = filepath
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_save_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
+
+        # Handle default load and save arguments
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
+
         self._version = version
 
     def _load(self) -> pd.DataFrame:

@@ -60,6 +60,9 @@ class CSVS3DataSet(AbstractDataSet, S3PathVersionMixIn):
         >>> assert data.equals(reloaded)
     """
 
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {"index": False}
+
     def _describe(self) -> Dict[str, Any]:
         return dict(
             filepath=self._filepath,
@@ -101,14 +104,18 @@ def __init__(
                 attribute is None, save version will be autogenerated.
 
         """
-        default_save_args = {"index": False}
-        self._save_args = (
-            {**default_save_args, **save_args} if save_args else default_save_args
-        )
-        self._load_args = load_args if load_args else {}
         self._filepath = filepath
         self._bucket_name = bucket_name
         self._credentials = credentials if credentials else {}
+
+        # Handle default load and save arguments
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
+
         self._version = version
         self._s3 = S3FileSystem(client_kwargs=self._credentials)
 

@@ -61,6 +61,9 @@ class ExcelLocalDataSet(AbstractDataSet, FilepathVersionMixIn):
 
     """
 
+    DEFAULT_LOAD_ARGS = {"engine": "xlrd"}
+    DEFAULT_SAVE_ARGS = {"index": False}
+
     def _describe(self) -> Dict[str, Any]:
         return dict(
             filepath=self._filepath,
@@ -105,20 +108,16 @@ def __init__(
 
         """
         self._filepath = filepath
-        default_save_args = {"index": False}
-        default_load_args = {"engine": "xlrd"}
-
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_save_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
         self._engine = engine
+
+        # Handle default load and save arguments
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
+
         self._version = version
 
     def _load(self) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:

@@ -63,6 +63,9 @@ class HDFLocalDataSet(AbstractDataSet, FilepathVersionMixIn):
 
     """
 
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
+
     # pylint: disable=too-many-arguments
     def __init__(
         self,
@@ -92,20 +95,17 @@ def __init__(
                 attribute is None, save version will be autogenerated.
 
         """
-        default_load_args = {}
-        default_save_args = {}
         self._filepath = filepath
         self._key = key
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_load_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
+
+        # Handle default load and save arguments
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
+
         self._version = version
 
     def _load(self) -> pd.DataFrame:

@@ -67,6 +67,9 @@ class HDFS3DataSet(AbstractDataSet, S3PathVersionMixIn):
 
     """
 
+    DEFAULT_LOAD_ARGS = {}
+    DEFAULT_SAVE_ARGS = {}
+
     # pylint: disable=too-many-arguments
     def __init__(
         self,
@@ -101,22 +104,19 @@ def __init__(
                 attribute is None, save version will be autogenerated.
 
         """
-        default_load_args = {}
-        default_save_args = {}
         self._filepath = filepath
         self._key = key
         self._bucket_name = bucket_name
         self._credentials = credentials if credentials else {}
-        self._load_args = (
-            {**default_load_args, **load_args}
-            if load_args is not None
-            else default_load_args
-        )
-        self._save_args = (
-            {**default_load_args, **save_args}
-            if save_args is not None
-            else default_save_args
-        )
+
+        # Handle default load and save arguments
+        self._load_args = self.DEFAULT_LOAD_ARGS.copy()
+        if load_args is not None:
+            self._load_args.update(load_args)
+        self._save_args = self.DEFAULT_SAVE_ARGS.copy()
+        if save_args is not None:
+            self._save_args.update(save_args)
+
         self._version = version
         self._s3 = S3FileSystem(client_kwargs=self._credentials)