From b9bf25d59540500de7aac1895286240828e249db Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Sat, 8 Jun 2019 13:08:58 -0400 Subject: [PATCH 01/17] Modularize default load and save argument handling --- kedro/contrib/io/azure/csv_blob.py | 8 +++---- .../io/bioinformatics/sequence_dataset.py | 13 +----------- kedro/contrib/io/pyspark/spark_data_set.py | 3 +-- kedro/contrib/io/pyspark/spark_jdbc.py | 3 +-- kedro/io/core.py | 21 ++++++++++++++++++- kedro/io/csv_local.py | 15 +++---------- kedro/io/csv_s3.py | 8 +++---- kedro/io/excel_local.py | 17 ++++----------- kedro/io/hdf_local.py | 13 +----------- kedro/io/hdf_s3.py | 14 +------------ kedro/io/json_local.py | 15 +++---------- kedro/io/parquet_local.py | 17 +++------------ kedro/io/pickle_local.py | 14 +------------ kedro/io/pickle_s3.py | 14 +------------ kedro/io/sql.py | 16 +++----------- kedro/io/text_local.py | 17 ++++----------- 16 files changed, 53 insertions(+), 155 deletions(-) diff --git a/kedro/contrib/io/azure/csv_blob.py b/kedro/contrib/io/azure/csv_blob.py index 2fdf168a51..ce6b9a5c0d 100644 --- a/kedro/contrib/io/azure/csv_blob.py +++ b/kedro/contrib/io/azure/csv_blob.py @@ -61,6 +61,8 @@ class CSVBlobDataSet(AbstractDataSet): >>> assert data.equals(reloaded) """ + DEFAULT_SAVE_ARGS = {"index": False} + def _describe(self) -> Dict[str, Any]: return dict( filepath=self._filepath, @@ -106,16 +108,12 @@ def __init__( All defaults are preserved, but "index", which is set to False. """ - default_save_args = {"index": False} - self._save_args = ( - {**default_save_args, **save_args} if save_args else default_save_args - ) - self._load_args = load_args if load_args else {} self._filepath = filepath self._container_name = container_name self._credentials = credentials if credentials else {} self._blob_to_text_args = blob_to_text_args if blob_to_text_args else {} self._blob_from_text_args = blob_from_text_args if blob_from_text_args else {} + super().__init__(load_args, save_args) def _load(self) -> pd.DataFrame: blob_service = BlockBlobService(**self._credentials) diff --git a/kedro/contrib/io/bioinformatics/sequence_dataset.py b/kedro/contrib/io/bioinformatics/sequence_dataset.py index 908f22f8ec..b85a44fc74 100644 --- a/kedro/contrib/io/bioinformatics/sequence_dataset.py +++ b/kedro/contrib/io/bioinformatics/sequence_dataset.py @@ -95,18 +95,7 @@ def __init__( """ self._filepath = filepath - default_load_args = {} - default_save_args = {} - self._load_args = ( - {**default_load_args, **load_args} - if load_args is not None - else default_load_args - ) - self._save_args = ( - {**default_save_args, **save_args} - if save_args is not None - else default_save_args - ) + super().__init__(load_args, save_args) def _load(self) -> List: return list(SeqIO.parse(self._filepath, **self._load_args)) diff --git a/kedro/contrib/io/pyspark/spark_data_set.py b/kedro/contrib/io/pyspark/spark_data_set.py index b6e18a83d1..536587c7b1 100644 --- a/kedro/contrib/io/pyspark/spark_data_set.py +++ b/kedro/contrib/io/pyspark/spark_data_set.py @@ -106,8 +106,7 @@ def __init__( self._filepath = filepath self._file_format = file_format - self._load_args = load_args if load_args is not None else {} - self._save_args = save_args if save_args is not None else {} + super().__init__(load_args, save_args) @staticmethod def _get_spark(): diff --git a/kedro/contrib/io/pyspark/spark_jdbc.py b/kedro/contrib/io/pyspark/spark_jdbc.py index a087fd3982..f95e724ef6 100644 --- a/kedro/contrib/io/pyspark/spark_jdbc.py +++ b/kedro/contrib/io/pyspark/spark_jdbc.py @@ -140,8 +140,7 @@ def __init__( self._url = url self._table = table - self._load_args = load_args if load_args is not None else {} - self._save_args = save_args if save_args is not None else {} + super().__init__(load_args, save_args) # Update properties in load_args and save_args with credentials. if credentials is not None: diff --git a/kedro/io/core.py b/kedro/io/core.py index 45ec91597d..7af898806a 100644 --- a/kedro/io/core.py +++ b/kedro/io/core.py @@ -37,7 +37,7 @@ from datetime import datetime, timezone from glob import iglob from pathlib import Path, PurePosixPath -from typing import Any, Dict, Type +from typing import Any, Dict, Optional, Type from warnings import warn from kedro.utils import load_obj @@ -101,6 +101,9 @@ class AbstractDataSet(abc.ABC): >>> return dict(param1=self._param1, param2=self._param2) """ + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {} + @classmethod def from_config( cls: Type, @@ -189,6 +192,22 @@ def from_config( ) return data_set + def __init__( + self, + load_args: Optional[Dict[str, Any]] = None, + save_args: Optional[Dict[str, Any]] = None, + ) -> None: + self._load_args = ( + {**self.DEFAULT_LOAD_ARGS, **load_args} + if load_args is not None + else self.DEFAULT_LOAD_ARGS + ) + self._save_args = ( + {**self.DEFAULT_SAVE_ARGS, **save_args} + if save_args is not None + else self.DEFAULT_SAVE_ARGS + ) + def load(self) -> Any: """Loads data by delegation to the provided load method. diff --git a/kedro/io/csv_local.py b/kedro/io/csv_local.py index b512156d0d..f01aa98286 100644 --- a/kedro/io/csv_local.py +++ b/kedro/io/csv_local.py @@ -61,6 +61,8 @@ class CSVLocalDataSet(AbstractDataSet, FilepathVersionMixIn): """ + DEFAULT_SAVE_ARGS = {"index": False} + def _describe(self) -> Dict[str, Any]: return dict( filepath=self._filepath, @@ -94,19 +96,8 @@ def __init__( None, the latest version will be loaded. If its ``save`` attribute is None, save version will be autogenerated. """ - default_save_args = {"index": False} - default_load_args = {} self._filepath = filepath - self._load_args = ( - {**default_load_args, **load_args} - if load_args is not None - else default_load_args - ) - self._save_args = ( - {**default_save_args, **save_args} - if save_args is not None - else default_save_args - ) + super().__init__(load_args, save_args) self._version = version def _load(self) -> pd.DataFrame: diff --git a/kedro/io/csv_s3.py b/kedro/io/csv_s3.py index 306bf79602..d7a277b6b5 100644 --- a/kedro/io/csv_s3.py +++ b/kedro/io/csv_s3.py @@ -60,6 +60,8 @@ class CSVS3DataSet(AbstractDataSet, S3PathVersionMixIn): >>> assert data.equals(reloaded) """ + DEFAULT_SAVE_ARGS = {"index": False} + def _describe(self) -> Dict[str, Any]: return dict( filepath=self._filepath, @@ -101,14 +103,10 @@ def __init__( attribute is None, save version will be autogenerated. """ - default_save_args = {"index": False} - self._save_args = ( - {**default_save_args, **save_args} if save_args else default_save_args - ) - self._load_args = load_args if load_args else {} self._filepath = filepath self._bucket_name = bucket_name self._credentials = credentials if credentials else {} + super().__init__(load_args, save_args) self._version = version self._s3 = S3FileSystem(client_kwargs=self._credentials) diff --git a/kedro/io/excel_local.py b/kedro/io/excel_local.py index c88b32b977..194cfd90a3 100644 --- a/kedro/io/excel_local.py +++ b/kedro/io/excel_local.py @@ -61,6 +61,9 @@ class ExcelLocalDataSet(AbstractDataSet, FilepathVersionMixIn): """ + DEFAULT_LOAD_ARGS = {"engine": "xlrd"} + DEFAULT_SAVE_ARGS = {"index": False} + def _describe(self) -> Dict[str, Any]: return dict( filepath=self._filepath, @@ -105,19 +108,7 @@ def __init__( """ self._filepath = filepath - default_save_args = {"index": False} - default_load_args = {"engine": "xlrd"} - - self._load_args = ( - {**default_load_args, **load_args} - if load_args is not None - else default_load_args - ) - self._save_args = ( - {**default_save_args, **save_args} - if save_args is not None - else default_save_args - ) + super().__init__(load_args, save_args) self._engine = engine self._version = version diff --git a/kedro/io/hdf_local.py b/kedro/io/hdf_local.py index 7d0d3e5be2..e074ef481c 100644 --- a/kedro/io/hdf_local.py +++ b/kedro/io/hdf_local.py @@ -92,20 +92,9 @@ def __init__( attribute is None, save version will be autogenerated. """ - default_load_args = {} - default_save_args = {} self._filepath = filepath self._key = key - self._load_args = ( - {**default_load_args, **load_args} - if load_args is not None - else default_load_args - ) - self._save_args = ( - {**default_load_args, **save_args} - if save_args is not None - else default_save_args - ) + super().__init__(load_args, save_args) self._version = version def _load(self) -> pd.DataFrame: diff --git a/kedro/io/hdf_s3.py b/kedro/io/hdf_s3.py index e9b0ae61a2..a3b453443b 100644 --- a/kedro/io/hdf_s3.py +++ b/kedro/io/hdf_s3.py @@ -40,7 +40,6 @@ HDFSTORE_DRIVER = "H5FD_CORE" -# pylint: disable=too-many-instance-attributes class HDFS3DataSet(AbstractDataSet, S3PathVersionMixIn): """``HDFS3DataSet`` loads and saves data to a S3 bucket. The underlying functionality is supported by pandas, so it supports all @@ -100,22 +99,11 @@ def __init__( attribute is None, save version will be autogenerated. """ - default_load_args = {} - default_save_args = {} self._filepath = filepath self._key = key self._bucket_name = bucket_name self._credentials = credentials if credentials else {} - self._load_args = ( - {**default_load_args, **load_args} - if load_args is not None - else default_load_args - ) - self._save_args = ( - {**default_load_args, **save_args} - if save_args is not None - else default_save_args - ) + super().__init__(load_args, save_args) self._version = version self._s3 = S3FileSystem(client_kwargs=self._credentials) diff --git a/kedro/io/json_local.py b/kedro/io/json_local.py index 809d014802..3df1dcf0a1 100644 --- a/kedro/io/json_local.py +++ b/kedro/io/json_local.py @@ -58,6 +58,8 @@ class JSONLocalDataSet(AbstractDataSet, FilepathVersionMixIn): """ + DEFAULT_SAVE_ARGS = {"indent": 4} + def _describe(self) -> Dict[str, Any]: return dict( filepath=self._filepath, @@ -90,19 +92,8 @@ def __init__( attribute is None, save version will be autogenerated. """ - default_save_args = {"indent": 4} - default_load_args = {} self._filepath = filepath - self._load_args = ( - {**default_load_args, **load_args} - if load_args is not None - else default_load_args - ) - self._save_args = ( - {**default_save_args, **save_args} - if save_args is not None - else default_save_args - ) + super().__init__(load_args, save_args) self._version = version def _load(self) -> Any: diff --git a/kedro/io/parquet_local.py b/kedro/io/parquet_local.py index 2b4826fcbe..6998ac44ea 100644 --- a/kedro/io/parquet_local.py +++ b/kedro/io/parquet_local.py @@ -61,6 +61,8 @@ class ParquetLocalDataSet(AbstractDataSet, FilepathVersionMixIn): >>> assert data.equals(loaded_data) """ + DEFAULT_SAVE_ARGS = {"compression": None} + def _describe(self) -> Dict[str, Any]: return dict( filepath=self._filepath, @@ -107,22 +109,9 @@ def __init__( attribute is None, save version will be autogenerated. """ - default_save_args = {"compression": None} - default_load_args = {} - self._filepath = filepath self._engine = engine - - self._load_args = ( - {**default_load_args, **load_args} - if load_args is not None - else default_load_args - ) - self._save_args = ( - {**default_save_args, **save_args} - if save_args is not None - else default_save_args - ) + super().__init__(load_args, save_args) self._version = version def _load(self) -> pd.DataFrame: diff --git a/kedro/io/pickle_local.py b/kedro/io/pickle_local.py index 58fe4f76d1..5860f15ff3 100644 --- a/kedro/io/pickle_local.py +++ b/kedro/io/pickle_local.py @@ -113,9 +113,6 @@ def __init__( ImportError: If 'backend' could not be imported. """ - default_save_args = {} - default_load_args = {} - if backend not in ["pickle", "joblib"]: raise ValueError( "backend should be one of ['pickle', 'joblib'], got %s" % backend @@ -128,16 +125,7 @@ def __init__( self._filepath = filepath self._backend = backend - self._load_args = ( - {**default_load_args, **load_args} - if load_args is not None - else default_load_args - ) - self._save_args = ( - {**default_save_args, **save_args} - if save_args is not None - else default_save_args - ) + super().__init__(load_args, save_args) self._version = version def _load(self) -> Any: diff --git a/kedro/io/pickle_s3.py b/kedro/io/pickle_s3.py index adce6efe3f..24a5f18c46 100644 --- a/kedro/io/pickle_s3.py +++ b/kedro/io/pickle_s3.py @@ -95,23 +95,11 @@ def __init__( None, the latest version will be loaded. If its ``save`` attribute is None, save version will be autogenerated. """ - default_load_args = {} - default_save_args = {} - self._filepath = filepath self._bucket_name = bucket_name self._credentials = credentials if credentials else {} + super().__init__(load_args, save_args) self._version = version - self._load_args = ( - {**default_load_args, **load_args} - if load_args is not None - else default_load_args - ) - self._save_args = ( - {**default_save_args, **save_args} - if save_args is not None - else default_save_args - ) self._s3 = S3FileSystem(client_kwargs=self._credentials) @property diff --git a/kedro/io/sql.py b/kedro/io/sql.py index 0917b4a20a..6e0a126dd5 100644 --- a/kedro/io/sql.py +++ b/kedro/io/sql.py @@ -139,6 +139,8 @@ class SQLTableDataSet(AbstractDataSet): """ + DEFAULT_SAVE_ARGS = {"index": False} + def _describe(self) -> Dict[str, Any]: load_args = self._load_args.copy() save_args = self._save_args.copy() @@ -193,19 +195,7 @@ def __init__( "provide a SQLAlchemy connection string." ) - default_save_args = {"index": False} - default_load_args = {} - - self._load_args = ( - {**default_load_args, **load_args} - if load_args is not None - else default_load_args - ) - self._save_args = ( - {**default_save_args, **save_args} - if save_args is not None - else default_save_args - ) + super().__init__(load_args, save_args) self._load_args["table_name"] = table_name self._save_args["name"] = table_name diff --git a/kedro/io/text_local.py b/kedro/io/text_local.py index 37870b25fa..020daaceaf 100644 --- a/kedro/io/text_local.py +++ b/kedro/io/text_local.py @@ -50,6 +50,9 @@ class TextLocalDataSet(AbstractDataSet, FilepathVersionMixIn): >>> reloaded = data_set.load() """ + DEFAULT_LOAD_ARGS = {"mode": "r"} + DEFAULT_SAVE_ARGS = {"mode": "w"} + def _describe(self) -> Dict[str, Any]: return dict( filepath=self._filepath, @@ -80,20 +83,8 @@ def __init__( None, the latest version will be loaded. If its ``save`` attribute is None, save version will be autogenerated. """ - default_save_args = {"mode": "w"} - default_load_args = {"mode": "r"} - self._filepath = filepath - self._load_args = ( - {**default_load_args, **load_args} - if load_args is not None - else default_load_args - ) - self._save_args = ( - {**default_save_args, **save_args} - if save_args is not None - else default_save_args - ) + super().__init__(load_args, save_args) self._version = version def _load(self) -> str: From ba18548018e95b1d5e4075788ae2b5e2a40cf3c7 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Sun, 9 Jun 2019 12:26:11 -0700 Subject: [PATCH 02/17] Suppress ``super-init-not-called`` pylint messages --- kedro/contrib/io/pyspark/spark_jdbc.py | 4 +++- kedro/io/lambda_data_set.py | 1 + kedro/io/memory_data_set.py | 1 + kedro/io/sql.py | 1 + tests/io/test_data_catalog.py | 1 + 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/kedro/contrib/io/pyspark/spark_jdbc.py b/kedro/contrib/io/pyspark/spark_jdbc.py index f95e724ef6..3bc16d542f 100644 --- a/kedro/contrib/io/pyspark/spark_jdbc.py +++ b/kedro/contrib/io/pyspark/spark_jdbc.py @@ -123,6 +123,7 @@ def __init__( DataSetError: When either ``url`` or ``table`` is empty. """ + # pylint: disable=super-init-not-called if not url: raise DataSetError( @@ -140,7 +141,8 @@ def __init__( self._url = url self._table = table - super().__init__(load_args, save_args) + self._load_args = load_args if load_args is not None else {} + self._save_args = save_args if save_args is not None else {} # Update properties in load_args and save_args with credentials. if credentials is not None: diff --git a/kedro/io/lambda_data_set.py b/kedro/io/lambda_data_set.py index 48f60bf030..61b2ef58d7 100644 --- a/kedro/io/lambda_data_set.py +++ b/kedro/io/lambda_data_set.py @@ -113,6 +113,7 @@ def __init__( DataSetError: If load and/or save is specified, but is not a Callable. """ + # pylint: disable=super-init-not-called if load is not None and not callable(load): raise DataSetError( diff --git a/kedro/io/memory_data_set.py b/kedro/io/memory_data_set.py index cb08139024..c57863158e 100644 --- a/kedro/io/memory_data_set.py +++ b/kedro/io/memory_data_set.py @@ -80,6 +80,7 @@ def __init__(self, data: Any = None, max_loads: int = None): method call. """ + # pylint: disable=super-init-not-called self._data = None self._max_loads = max_loads if data is not None: diff --git a/kedro/io/sql.py b/kedro/io/sql.py index 6e0a126dd5..d287209347 100644 --- a/kedro/io/sql.py +++ b/kedro/io/sql.py @@ -281,6 +281,7 @@ def __init__( DataSetError: When either ``sql`` or ``con`` parameters is emtpy. """ + # pylint: disable=super-init-not-called if not sql: raise DataSetError( diff --git a/tests/io/test_data_catalog.py b/tests/io/test_data_catalog.py index 06f27a50e2..a65a282ae8 100644 --- a/tests/io/test_data_catalog.py +++ b/tests/io/test_data_catalog.py @@ -93,6 +93,7 @@ def conflicting_feed_dict(): class BadDataSet(AbstractDataSet): # pragma: no cover def __init__(self, filepath): + # pylint: disable=super-init-not-called self.filepath = filepath raise Exception("Naughty!") From 41b40b27d6046d74c8ad82ede7427e7985ec0444 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Fri, 14 Jun 2019 14:52:58 -0700 Subject: [PATCH 03/17] Copy default args to prevent accidental mutation --- kedro/io/core.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/kedro/io/core.py b/kedro/io/core.py index 7af898806a..1b453ce8ee 100644 --- a/kedro/io/core.py +++ b/kedro/io/core.py @@ -197,16 +197,12 @@ def __init__( load_args: Optional[Dict[str, Any]] = None, save_args: Optional[Dict[str, Any]] = None, ) -> None: - self._load_args = ( - {**self.DEFAULT_LOAD_ARGS, **load_args} - if load_args is not None - else self.DEFAULT_LOAD_ARGS - ) - self._save_args = ( - {**self.DEFAULT_SAVE_ARGS, **save_args} - if save_args is not None - else self.DEFAULT_SAVE_ARGS - ) + self._load_args = self.DEFAULT_LOAD_ARGS.copy() + if load_args is not None: + self._load_args.update(load_args) + self._save_args = self.DEFAULT_SAVE_ARGS.copy() + if save_args is not None: + self._save_args.update(save_args) def load(self) -> Any: """Loads data by delegation to the provided load method. From c10a654782d8b2b3f0af3982b6ecdbb42ed666a5 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Fri, 14 Jun 2019 15:53:59 -0700 Subject: [PATCH 04/17] Restore ``super().__init__`` given default arg fix --- kedro/contrib/io/pyspark/spark_jdbc.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kedro/contrib/io/pyspark/spark_jdbc.py b/kedro/contrib/io/pyspark/spark_jdbc.py index 3bc16d542f..f95e724ef6 100644 --- a/kedro/contrib/io/pyspark/spark_jdbc.py +++ b/kedro/contrib/io/pyspark/spark_jdbc.py @@ -123,7 +123,6 @@ def __init__( DataSetError: When either ``url`` or ``table`` is empty. """ - # pylint: disable=super-init-not-called if not url: raise DataSetError( @@ -141,8 +140,7 @@ def __init__( self._url = url self._table = table - self._load_args = load_args if load_args is not None else {} - self._save_args = save_args if save_args is not None else {} + super().__init__(load_args, save_args) # Update properties in load_args and save_args with credentials. if credentials is not None: From e83502cebe42b8b270367b1a213113ea030b5900 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Tue, 2 Jul 2019 13:54:22 -0400 Subject: [PATCH 05/17] Refactor abstract base class modification as mixin --- kedro/contrib/io/__init__.py | 2 + kedro/contrib/io/azure/csv_blob.py | 3 +- .../io/bioinformatics/sequence_dataset.py | 3 +- kedro/contrib/io/core.py | 51 +++++++++++++++++++ kedro/contrib/io/pyspark/spark_data_set.py | 3 +- kedro/contrib/io/pyspark/spark_jdbc.py | 3 +- 6 files changed, 61 insertions(+), 4 deletions(-) create mode 100644 kedro/contrib/io/core.py diff --git a/kedro/contrib/io/__init__.py b/kedro/contrib/io/__init__.py index 2aa315c599..d26777acd5 100644 --- a/kedro/contrib/io/__init__.py +++ b/kedro/contrib/io/__init__.py @@ -31,3 +31,5 @@ `kedro.io` module (e.g. additional ``AbstractDataSet``s and extensions/alternative ``DataCatalog``s. """ + +from .core import DefaultArgumentsMixIn # NOQA diff --git a/kedro/contrib/io/azure/csv_blob.py b/kedro/contrib/io/azure/csv_blob.py index f4640dee4d..c84c313d45 100644 --- a/kedro/contrib/io/azure/csv_blob.py +++ b/kedro/contrib/io/azure/csv_blob.py @@ -35,10 +35,11 @@ import pandas as pd from azure.storage.blob import BlockBlobService +from kedro.contrib.io import DefaultArgumentsMixIn from kedro.io import AbstractDataSet -class CSVBlobDataSet(AbstractDataSet): +class CSVBlobDataSet(DefaultArgumentsMixIn, AbstractDataSet): """``CSVBlobDataSet`` loads and saves csv files in Microsoft's Azure blob storage. It uses azure storage SDK to read and write in azure and pandas to handle the csv file locally. diff --git a/kedro/contrib/io/bioinformatics/sequence_dataset.py b/kedro/contrib/io/bioinformatics/sequence_dataset.py index fdf160c6f9..7acf777578 100644 --- a/kedro/contrib/io/bioinformatics/sequence_dataset.py +++ b/kedro/contrib/io/bioinformatics/sequence_dataset.py @@ -35,10 +35,11 @@ from Bio import SeqIO +from kedro.contrib.io import DefaultArgumentsMixIn from kedro.io import AbstractDataSet -class BioSequenceLocalDataSet(AbstractDataSet): +class BioSequenceLocalDataSet(DefaultArgumentsMixIn, AbstractDataSet): """``BioSequenceLocalDataSet`` loads and saves data to a sequence file. Example: diff --git a/kedro/contrib/io/core.py b/kedro/contrib/io/core.py new file mode 100644 index 0000000000..a417b9b3cc --- /dev/null +++ b/kedro/contrib/io/core.py @@ -0,0 +1,51 @@ +# Copyright 2018-2019 QuantumBlack Visual Analytics Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND +# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS +# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# The QuantumBlack Visual Analytics Limited (“QuantumBlack”) name and logo +# (either separately or in combination, “QuantumBlack Trademarks”) are +# trademarks of QuantumBlack. The License does not grant you any right or +# license to the QuantumBlack Trademarks. You may not use the QuantumBlack +# Trademarks or any confusingly similar mark as a trademark for your product, +# or use the QuantumBlack Trademarks in any other manner that might cause +# confusion in the marketplace, including but not limited to in advertising, +# on websites, or on software. +# +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module extends the set of classes ``kedro.io.core`` provides.""" + +from typing import Any, Dict, Optional + + +# pylint: disable=too-few-public-methods +class DefaultArgumentsMixIn: + """Mixin class that helps handle default load and save arguments.""" + + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {} + + def __init__( + self, + load_args: Optional[Dict[str, Any]] = None, + save_args: Optional[Dict[str, Any]] = None, + ) -> None: + self._load_args = self.DEFAULT_LOAD_ARGS.copy() + if load_args is not None: + self._load_args.update(load_args) + self._save_args = self.DEFAULT_SAVE_ARGS.copy() + if save_args is not None: + self._save_args.update(save_args) diff --git a/kedro/contrib/io/pyspark/spark_data_set.py b/kedro/contrib/io/pyspark/spark_data_set.py index 7f36142b34..7594bf5297 100644 --- a/kedro/contrib/io/pyspark/spark_data_set.py +++ b/kedro/contrib/io/pyspark/spark_data_set.py @@ -36,10 +36,11 @@ from pyspark.sql import DataFrame, SparkSession from pyspark.sql.utils import AnalysisException +from kedro.contrib.io import DefaultArgumentsMixIn from kedro.io import AbstractDataSet -class SparkDataSet(AbstractDataSet): +class SparkDataSet(DefaultArgumentsMixIn, AbstractDataSet): """``SparkDataSet`` loads and saves Spark data frames. Example: diff --git a/kedro/contrib/io/pyspark/spark_jdbc.py b/kedro/contrib/io/pyspark/spark_jdbc.py index cb118e42be..762e7ad73c 100644 --- a/kedro/contrib/io/pyspark/spark_jdbc.py +++ b/kedro/contrib/io/pyspark/spark_jdbc.py @@ -31,12 +31,13 @@ from pyspark.sql import DataFrame, SparkSession +from kedro.contrib.io import DefaultArgumentsMixIn from kedro.io import AbstractDataSet, DataSetError __all__ = ["SparkJDBCDataSet"] -class SparkJDBCDataSet(AbstractDataSet): +class SparkJDBCDataSet(DefaultArgumentsMixIn, AbstractDataSet): """``SparkJDBCDataSet`` loads data from a database table accessible via JDBC URL url and connection properties and saves the content of a PySpark DataFrame to an external database table via JDBC. It uses From 63fda574f5488dbaa0559d4e35ca1f2a98db2e79 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Wed, 3 Jul 2019 09:14:56 -0400 Subject: [PATCH 06/17] Homogenize default load and save argument handling --- kedro/contrib/io/azure/csv_blob.py | 2 +- .../contrib/io/bioinformatics/sequence_dataset.py | 2 +- kedro/contrib/io/pyspark/spark_data_set.py | 2 +- kedro/contrib/io/pyspark/spark_jdbc.py | 2 +- kedro/io/core.py | 14 +------------- kedro/io/csv_local.py | 8 +++++++- kedro/io/csv_s3.py | 8 +++++++- kedro/io/excel_local.py | 7 ++++++- kedro/io/hdf_local.py | 10 +++++++++- kedro/io/hdf_s3.py | 11 ++++++++++- kedro/io/json_local.py | 8 +++++++- kedro/io/lambda_data_set.py | 1 - kedro/io/memory_data_set.py | 1 - kedro/io/parquet_local.py | 8 +++++++- kedro/io/pickle_local.py | 10 +++++++++- kedro/io/pickle_s3.py | 10 +++++++++- kedro/io/sql.py | 9 +++++++-- kedro/io/text_local.py | 7 ++++++- tests/io/test_data_catalog.py | 1 - 19 files changed, 89 insertions(+), 32 deletions(-) diff --git a/kedro/contrib/io/azure/csv_blob.py b/kedro/contrib/io/azure/csv_blob.py index c84c313d45..37dca6bc7e 100644 --- a/kedro/contrib/io/azure/csv_blob.py +++ b/kedro/contrib/io/azure/csv_blob.py @@ -39,7 +39,7 @@ from kedro.io import AbstractDataSet -class CSVBlobDataSet(DefaultArgumentsMixIn, AbstractDataSet): +class CSVBlobDataSet(AbstractDataSet, DefaultArgumentsMixIn): """``CSVBlobDataSet`` loads and saves csv files in Microsoft's Azure blob storage. It uses azure storage SDK to read and write in azure and pandas to handle the csv file locally. diff --git a/kedro/contrib/io/bioinformatics/sequence_dataset.py b/kedro/contrib/io/bioinformatics/sequence_dataset.py index 7acf777578..6f844d1e52 100644 --- a/kedro/contrib/io/bioinformatics/sequence_dataset.py +++ b/kedro/contrib/io/bioinformatics/sequence_dataset.py @@ -39,7 +39,7 @@ from kedro.io import AbstractDataSet -class BioSequenceLocalDataSet(DefaultArgumentsMixIn, AbstractDataSet): +class BioSequenceLocalDataSet(AbstractDataSet, DefaultArgumentsMixIn): """``BioSequenceLocalDataSet`` loads and saves data to a sequence file. Example: diff --git a/kedro/contrib/io/pyspark/spark_data_set.py b/kedro/contrib/io/pyspark/spark_data_set.py index 7594bf5297..39acebc4bc 100644 --- a/kedro/contrib/io/pyspark/spark_data_set.py +++ b/kedro/contrib/io/pyspark/spark_data_set.py @@ -40,7 +40,7 @@ from kedro.io import AbstractDataSet -class SparkDataSet(DefaultArgumentsMixIn, AbstractDataSet): +class SparkDataSet(AbstractDataSet, DefaultArgumentsMixIn): """``SparkDataSet`` loads and saves Spark data frames. Example: diff --git a/kedro/contrib/io/pyspark/spark_jdbc.py b/kedro/contrib/io/pyspark/spark_jdbc.py index 762e7ad73c..842568b1d4 100644 --- a/kedro/contrib/io/pyspark/spark_jdbc.py +++ b/kedro/contrib/io/pyspark/spark_jdbc.py @@ -37,7 +37,7 @@ __all__ = ["SparkJDBCDataSet"] -class SparkJDBCDataSet(DefaultArgumentsMixIn, AbstractDataSet): +class SparkJDBCDataSet(AbstractDataSet, DefaultArgumentsMixIn): """``SparkJDBCDataSet`` loads data from a database table accessible via JDBC URL url and connection properties and saves the content of a PySpark DataFrame to an external database table via JDBC. It uses diff --git a/kedro/io/core.py b/kedro/io/core.py index 282ccd067e..c0f3f42d46 100644 --- a/kedro/io/core.py +++ b/kedro/io/core.py @@ -37,7 +37,7 @@ from datetime import datetime, timezone from glob import iglob from pathlib import Path, PurePosixPath -from typing import Any, Dict, Optional, Type +from typing import Any, Dict, Type from warnings import warn from kedro.utils import load_obj @@ -192,18 +192,6 @@ def from_config( ) return data_set - def __init__( - self, - load_args: Optional[Dict[str, Any]] = None, - save_args: Optional[Dict[str, Any]] = None, - ) -> None: - self._load_args = self.DEFAULT_LOAD_ARGS.copy() - if load_args is not None: - self._load_args.update(load_args) - self._save_args = self.DEFAULT_SAVE_ARGS.copy() - if save_args is not None: - self._save_args.update(save_args) - def load(self) -> Any: """Loads data by delegation to the provided load method. diff --git a/kedro/io/csv_local.py b/kedro/io/csv_local.py index 0e8cd2ac7b..ae21c08a0e 100644 --- a/kedro/io/csv_local.py +++ b/kedro/io/csv_local.py @@ -61,6 +61,7 @@ class CSVLocalDataSet(AbstractDataSet, FilepathVersionMixIn): """ + DEFAULT_LOAD_ARGS = {} DEFAULT_SAVE_ARGS = {"index": False} def _describe(self) -> Dict[str, Any]: @@ -97,7 +98,12 @@ def __init__( attribute is None, save version will be autogenerated. """ self._filepath = filepath - super().__init__(load_args, save_args) + self._load_args = self.DEFAULT_LOAD_ARGS.copy() + if load_args is not None: + self._load_args.update(load_args) + self._save_args = self.DEFAULT_SAVE_ARGS.copy() + if save_args is not None: + self._save_args.update(save_args) self._version = version def _load(self) -> pd.DataFrame: diff --git a/kedro/io/csv_s3.py b/kedro/io/csv_s3.py index 81219f5ddb..1d8d7baad5 100644 --- a/kedro/io/csv_s3.py +++ b/kedro/io/csv_s3.py @@ -60,6 +60,7 @@ class CSVS3DataSet(AbstractDataSet, S3PathVersionMixIn): >>> assert data.equals(reloaded) """ + DEFAULT_LOAD_ARGS = {} DEFAULT_SAVE_ARGS = {"index": False} def _describe(self) -> Dict[str, Any]: @@ -106,7 +107,12 @@ def __init__( self._filepath = filepath self._bucket_name = bucket_name self._credentials = credentials if credentials else {} - super().__init__(load_args, save_args) + self._load_args = self.DEFAULT_LOAD_ARGS.copy() + if load_args is not None: + self._load_args.update(load_args) + self._save_args = self.DEFAULT_SAVE_ARGS.copy() + if save_args is not None: + self._save_args.update(save_args) self._version = version self._s3 = S3FileSystem(client_kwargs=self._credentials) diff --git a/kedro/io/excel_local.py b/kedro/io/excel_local.py index 22258fffa7..a3123358ad 100644 --- a/kedro/io/excel_local.py +++ b/kedro/io/excel_local.py @@ -108,8 +108,13 @@ def __init__( """ self._filepath = filepath - super().__init__(load_args, save_args) self._engine = engine + self._load_args = self.DEFAULT_LOAD_ARGS.copy() + if load_args is not None: + self._load_args.update(load_args) + self._save_args = self.DEFAULT_SAVE_ARGS.copy() + if save_args is not None: + self._save_args.update(save_args) self._version = version def _load(self) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]: diff --git a/kedro/io/hdf_local.py b/kedro/io/hdf_local.py index b7d4268db0..6be61a90bd 100644 --- a/kedro/io/hdf_local.py +++ b/kedro/io/hdf_local.py @@ -63,6 +63,9 @@ class HDFLocalDataSet(AbstractDataSet, FilepathVersionMixIn): """ + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {} + # pylint: disable=too-many-arguments def __init__( self, @@ -94,7 +97,12 @@ def __init__( """ self._filepath = filepath self._key = key - super().__init__(load_args, save_args) + self._load_args = self.DEFAULT_LOAD_ARGS.copy() + if load_args is not None: + self._load_args.update(load_args) + self._save_args = self.DEFAULT_SAVE_ARGS.copy() + if save_args is not None: + self._save_args.update(save_args) self._version = version def _load(self) -> pd.DataFrame: diff --git a/kedro/io/hdf_s3.py b/kedro/io/hdf_s3.py index 570e0736c6..3d6ef04f22 100644 --- a/kedro/io/hdf_s3.py +++ b/kedro/io/hdf_s3.py @@ -40,6 +40,7 @@ HDFSTORE_DRIVER = "H5FD_CORE" +# pylint: disable=too-many-instance-attributes class HDFS3DataSet(AbstractDataSet, S3PathVersionMixIn): """``HDFS3DataSet`` loads and saves data to a S3 bucket. The underlying functionality is supported by pandas, so it supports all @@ -66,6 +67,9 @@ class HDFS3DataSet(AbstractDataSet, S3PathVersionMixIn): """ + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {} + # pylint: disable=too-many-arguments def __init__( self, @@ -104,7 +108,12 @@ def __init__( self._key = key self._bucket_name = bucket_name self._credentials = credentials if credentials else {} - super().__init__(load_args, save_args) + self._load_args = self.DEFAULT_LOAD_ARGS.copy() + if load_args is not None: + self._load_args.update(load_args) + self._save_args = self.DEFAULT_SAVE_ARGS.copy() + if save_args is not None: + self._save_args.update(save_args) self._version = version self._s3 = S3FileSystem(client_kwargs=self._credentials) diff --git a/kedro/io/json_local.py b/kedro/io/json_local.py index 991a80d643..c3d31fac99 100644 --- a/kedro/io/json_local.py +++ b/kedro/io/json_local.py @@ -58,6 +58,7 @@ class JSONLocalDataSet(AbstractDataSet, FilepathVersionMixIn): """ + DEFAULT_LOAD_ARGS = {} DEFAULT_SAVE_ARGS = {"indent": 4} def _describe(self) -> Dict[str, Any]: @@ -93,7 +94,12 @@ def __init__( """ self._filepath = filepath - super().__init__(load_args, save_args) + self._load_args = self.DEFAULT_LOAD_ARGS.copy() + if load_args is not None: + self._load_args.update(load_args) + self._save_args = self.DEFAULT_SAVE_ARGS.copy() + if save_args is not None: + self._save_args.update(save_args) self._version = version def _load(self) -> Any: diff --git a/kedro/io/lambda_data_set.py b/kedro/io/lambda_data_set.py index 1435c47d70..0219ce83c9 100644 --- a/kedro/io/lambda_data_set.py +++ b/kedro/io/lambda_data_set.py @@ -121,7 +121,6 @@ def __init__( DataSetError: If a method is specified, but is not a Callable. """ - # pylint: disable=super-init-not-called for name, value in [ ("load", load), diff --git a/kedro/io/memory_data_set.py b/kedro/io/memory_data_set.py index 5d1574f051..ccfca5a7ec 100644 --- a/kedro/io/memory_data_set.py +++ b/kedro/io/memory_data_set.py @@ -74,7 +74,6 @@ def __init__(self, data: Any = None): Args: data: Python object containing the data. """ - # pylint: disable=super-init-not-called self._data = None if data is not None: self._save(data) diff --git a/kedro/io/parquet_local.py b/kedro/io/parquet_local.py index 8583c36dac..42a6931378 100644 --- a/kedro/io/parquet_local.py +++ b/kedro/io/parquet_local.py @@ -61,6 +61,7 @@ class ParquetLocalDataSet(AbstractDataSet, FilepathVersionMixIn): >>> assert data.equals(loaded_data) """ + DEFAULT_LOAD_ARGS = {} DEFAULT_SAVE_ARGS = {"compression": None} def _describe(self) -> Dict[str, Any]: @@ -111,7 +112,12 @@ def __init__( """ self._filepath = filepath self._engine = engine - super().__init__(load_args, save_args) + self._load_args = self.DEFAULT_LOAD_ARGS.copy() + if load_args is not None: + self._load_args.update(load_args) + self._save_args = self.DEFAULT_SAVE_ARGS.copy() + if save_args is not None: + self._save_args.update(save_args) self._version = version def _load(self) -> pd.DataFrame: diff --git a/kedro/io/pickle_local.py b/kedro/io/pickle_local.py index f5a0e79569..871f1c85b1 100644 --- a/kedro/io/pickle_local.py +++ b/kedro/io/pickle_local.py @@ -67,6 +67,9 @@ class PickleLocalDataSet(AbstractDataSet, FilepathVersionMixIn): >>> reloaded = data_set.load() """ + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {} + BACKENDS = {"pickle": pickle, "joblib": joblib} # pylint: disable=too-many-arguments @@ -125,7 +128,12 @@ def __init__( self._filepath = filepath self._backend = backend - super().__init__(load_args, save_args) + self._load_args = self.DEFAULT_LOAD_ARGS.copy() + if load_args is not None: + self._load_args.update(load_args) + self._save_args = self.DEFAULT_SAVE_ARGS.copy() + if save_args is not None: + self._save_args.update(save_args) self._version = version def _load(self) -> Any: diff --git a/kedro/io/pickle_s3.py b/kedro/io/pickle_s3.py index 56b14d59bb..9ed834c707 100644 --- a/kedro/io/pickle_s3.py +++ b/kedro/io/pickle_s3.py @@ -61,6 +61,9 @@ class PickleS3DataSet(AbstractDataSet, S3PathVersionMixIn): >>> reloaded = data_set.load() """ + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {} + # pylint: disable=too-many-arguments def __init__( self, @@ -98,7 +101,12 @@ def __init__( self._filepath = filepath self._bucket_name = bucket_name self._credentials = credentials if credentials else {} - super().__init__(load_args, save_args) + self._load_args = self.DEFAULT_LOAD_ARGS.copy() + if load_args is not None: + self._load_args.update(load_args) + self._save_args = self.DEFAULT_SAVE_ARGS.copy() + if save_args is not None: + self._save_args.update(save_args) self._version = version self._s3 = S3FileSystem(client_kwargs=self._credentials) diff --git a/kedro/io/sql.py b/kedro/io/sql.py index 5eb99ad18b..41626df6bb 100644 --- a/kedro/io/sql.py +++ b/kedro/io/sql.py @@ -139,6 +139,7 @@ class SQLTableDataSet(AbstractDataSet): """ + DEFAULT_LOAD_ARGS = {} DEFAULT_SAVE_ARGS = {"index": False} def _describe(self) -> Dict[str, Any]: @@ -195,7 +196,12 @@ def __init__( "provide a SQLAlchemy connection string." ) - super().__init__(load_args, save_args) + self._load_args = self.DEFAULT_LOAD_ARGS.copy() + if load_args is not None: + self._load_args.update(load_args) + self._save_args = self.DEFAULT_SAVE_ARGS.copy() + if save_args is not None: + self._save_args.update(save_args) self._load_args["table_name"] = table_name self._save_args["name"] = table_name @@ -281,7 +287,6 @@ def __init__( DataSetError: When either ``sql`` or ``con`` parameters is emtpy. """ - # pylint: disable=super-init-not-called if not sql: raise DataSetError( diff --git a/kedro/io/text_local.py b/kedro/io/text_local.py index 7c8b9f0509..218ac7c464 100644 --- a/kedro/io/text_local.py +++ b/kedro/io/text_local.py @@ -85,7 +85,12 @@ def __init__( attribute is None, save version will be autogenerated. """ self._filepath = os.path.expanduser(filepath) - super().__init__(load_args, save_args) + self._load_args = self.DEFAULT_LOAD_ARGS.copy() + if load_args is not None: + self._load_args.update(load_args) + self._save_args = self.DEFAULT_SAVE_ARGS.copy() + if save_args is not None: + self._save_args.update(save_args) self._version = version def _load(self) -> str: diff --git a/tests/io/test_data_catalog.py b/tests/io/test_data_catalog.py index d2dceed1de..07e3597f78 100644 --- a/tests/io/test_data_catalog.py +++ b/tests/io/test_data_catalog.py @@ -94,7 +94,6 @@ def conflicting_feed_dict(): class BadDataSet(AbstractDataSet): # pragma: no cover def __init__(self, filepath): - # pylint: disable=super-init-not-called self.filepath = filepath raise Exception("Naughty!") From 050577314468f4a4eed22ea96ba1f85d1b257cfd Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Wed, 3 Jul 2019 10:07:36 -0400 Subject: [PATCH 07/17] Demarcate load and save argument handling :dragon: --- kedro/io/csv_local.py | 3 +++ kedro/io/csv_s3.py | 3 +++ kedro/io/excel_local.py | 3 +++ kedro/io/hdf_local.py | 3 +++ kedro/io/hdf_s3.py | 3 +++ kedro/io/json_local.py | 3 +++ kedro/io/parquet_local.py | 3 +++ kedro/io/pickle_local.py | 3 +++ kedro/io/pickle_s3.py | 3 +++ kedro/io/sql.py | 1 + kedro/io/text_local.py | 3 +++ 11 files changed, 31 insertions(+) diff --git a/kedro/io/csv_local.py b/kedro/io/csv_local.py index ae21c08a0e..4dc19c2a5d 100644 --- a/kedro/io/csv_local.py +++ b/kedro/io/csv_local.py @@ -98,12 +98,15 @@ def __init__( attribute is None, save version will be autogenerated. """ self._filepath = filepath + + # Handle default load and save arguments self._load_args = self.DEFAULT_LOAD_ARGS.copy() if load_args is not None: self._load_args.update(load_args) self._save_args = self.DEFAULT_SAVE_ARGS.copy() if save_args is not None: self._save_args.update(save_args) + self._version = version def _load(self) -> pd.DataFrame: diff --git a/kedro/io/csv_s3.py b/kedro/io/csv_s3.py index 1d8d7baad5..8795704590 100644 --- a/kedro/io/csv_s3.py +++ b/kedro/io/csv_s3.py @@ -107,12 +107,15 @@ def __init__( self._filepath = filepath self._bucket_name = bucket_name self._credentials = credentials if credentials else {} + + # Handle default load and save arguments self._load_args = self.DEFAULT_LOAD_ARGS.copy() if load_args is not None: self._load_args.update(load_args) self._save_args = self.DEFAULT_SAVE_ARGS.copy() if save_args is not None: self._save_args.update(save_args) + self._version = version self._s3 = S3FileSystem(client_kwargs=self._credentials) diff --git a/kedro/io/excel_local.py b/kedro/io/excel_local.py index a3123358ad..edabc7dd00 100644 --- a/kedro/io/excel_local.py +++ b/kedro/io/excel_local.py @@ -109,12 +109,15 @@ def __init__( """ self._filepath = filepath self._engine = engine + + # Handle default load and save arguments self._load_args = self.DEFAULT_LOAD_ARGS.copy() if load_args is not None: self._load_args.update(load_args) self._save_args = self.DEFAULT_SAVE_ARGS.copy() if save_args is not None: self._save_args.update(save_args) + self._version = version def _load(self) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]: diff --git a/kedro/io/hdf_local.py b/kedro/io/hdf_local.py index 6be61a90bd..8a87c4834e 100644 --- a/kedro/io/hdf_local.py +++ b/kedro/io/hdf_local.py @@ -97,12 +97,15 @@ def __init__( """ self._filepath = filepath self._key = key + + # Handle default load and save arguments self._load_args = self.DEFAULT_LOAD_ARGS.copy() if load_args is not None: self._load_args.update(load_args) self._save_args = self.DEFAULT_SAVE_ARGS.copy() if save_args is not None: self._save_args.update(save_args) + self._version = version def _load(self) -> pd.DataFrame: diff --git a/kedro/io/hdf_s3.py b/kedro/io/hdf_s3.py index 3d6ef04f22..e4f243986b 100644 --- a/kedro/io/hdf_s3.py +++ b/kedro/io/hdf_s3.py @@ -108,12 +108,15 @@ def __init__( self._key = key self._bucket_name = bucket_name self._credentials = credentials if credentials else {} + + # Handle default load and save arguments self._load_args = self.DEFAULT_LOAD_ARGS.copy() if load_args is not None: self._load_args.update(load_args) self._save_args = self.DEFAULT_SAVE_ARGS.copy() if save_args is not None: self._save_args.update(save_args) + self._version = version self._s3 = S3FileSystem(client_kwargs=self._credentials) diff --git a/kedro/io/json_local.py b/kedro/io/json_local.py index c3d31fac99..b53e78202b 100644 --- a/kedro/io/json_local.py +++ b/kedro/io/json_local.py @@ -94,12 +94,15 @@ def __init__( """ self._filepath = filepath + + # Handle default load and save arguments self._load_args = self.DEFAULT_LOAD_ARGS.copy() if load_args is not None: self._load_args.update(load_args) self._save_args = self.DEFAULT_SAVE_ARGS.copy() if save_args is not None: self._save_args.update(save_args) + self._version = version def _load(self) -> Any: diff --git a/kedro/io/parquet_local.py b/kedro/io/parquet_local.py index 42a6931378..341e7ff004 100644 --- a/kedro/io/parquet_local.py +++ b/kedro/io/parquet_local.py @@ -112,12 +112,15 @@ def __init__( """ self._filepath = filepath self._engine = engine + + # Handle default load and save arguments self._load_args = self.DEFAULT_LOAD_ARGS.copy() if load_args is not None: self._load_args.update(load_args) self._save_args = self.DEFAULT_SAVE_ARGS.copy() if save_args is not None: self._save_args.update(save_args) + self._version = version def _load(self) -> pd.DataFrame: diff --git a/kedro/io/pickle_local.py b/kedro/io/pickle_local.py index 871f1c85b1..91d5ebad6d 100644 --- a/kedro/io/pickle_local.py +++ b/kedro/io/pickle_local.py @@ -128,12 +128,15 @@ def __init__( self._filepath = filepath self._backend = backend + + # Handle default load and save arguments self._load_args = self.DEFAULT_LOAD_ARGS.copy() if load_args is not None: self._load_args.update(load_args) self._save_args = self.DEFAULT_SAVE_ARGS.copy() if save_args is not None: self._save_args.update(save_args) + self._version = version def _load(self) -> Any: diff --git a/kedro/io/pickle_s3.py b/kedro/io/pickle_s3.py index 9ed834c707..12c431b7b3 100644 --- a/kedro/io/pickle_s3.py +++ b/kedro/io/pickle_s3.py @@ -101,12 +101,15 @@ def __init__( self._filepath = filepath self._bucket_name = bucket_name self._credentials = credentials if credentials else {} + + # Handle default load and save arguments self._load_args = self.DEFAULT_LOAD_ARGS.copy() if load_args is not None: self._load_args.update(load_args) self._save_args = self.DEFAULT_SAVE_ARGS.copy() if save_args is not None: self._save_args.update(save_args) + self._version = version self._s3 = S3FileSystem(client_kwargs=self._credentials) diff --git a/kedro/io/sql.py b/kedro/io/sql.py index 41626df6bb..ec351bce15 100644 --- a/kedro/io/sql.py +++ b/kedro/io/sql.py @@ -196,6 +196,7 @@ def __init__( "provide a SQLAlchemy connection string." ) + # Handle default load and save arguments self._load_args = self.DEFAULT_LOAD_ARGS.copy() if load_args is not None: self._load_args.update(load_args) diff --git a/kedro/io/text_local.py b/kedro/io/text_local.py index 218ac7c464..5798691b97 100644 --- a/kedro/io/text_local.py +++ b/kedro/io/text_local.py @@ -85,12 +85,15 @@ def __init__( attribute is None, save version will be autogenerated. """ self._filepath = os.path.expanduser(filepath) + + # Handle default load and save arguments self._load_args = self.DEFAULT_LOAD_ARGS.copy() if load_args is not None: self._load_args.update(load_args) self._save_args = self.DEFAULT_SAVE_ARGS.copy() if save_args is not None: self._save_args.update(save_args) + self._version = version def _load(self) -> str: From a93abf202989030bf6f63211f4fd7ffd247c8f7f Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Wed, 3 Jul 2019 11:39:02 -0400 Subject: [PATCH 08/17] Cover load and save argument handling :paw_prints: --- tests/io/test_hdf_local.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/io/test_hdf_local.py b/tests/io/test_hdf_local.py index e5659d79fe..c00a60f75c 100644 --- a/tests/io/test_hdf_local.py +++ b/tests/io/test_hdf_local.py @@ -43,6 +43,16 @@ def hdf_data_set(filepath_hdf): return HDFLocalDataSet(filepath=filepath_hdf, key="test_hdf") +@pytest.fixture +def hdf_data_set_with_args(filepath_hdf): + return HDFLocalDataSet( + filepath=filepath_hdf, + key="test_hdf", + load_args={"errors": "ignore"}, + save_args={"errors": "ignore"}, + ) + + @pytest.fixture def versioned_hdf_data_set(filepath_hdf, load_version, save_version): return HDFLocalDataSet( @@ -88,6 +98,13 @@ def test_overwrite_if_exists(self, hdf_data_set, dummy_dataframe): reloaded_df = hdf_data_set.load() assert_frame_equal(reloaded_df, dummy_dataframe.T) + def test_save_and_load_args(self, hdf_data_set_with_args, dummy_dataframe): + """Test saving and reloading the data set.""" + hdf_data_set_with_args.save(dummy_dataframe) + reloaded_df = hdf_data_set_with_args.load() + + assert_frame_equal(reloaded_df, dummy_dataframe) + class TestHDFLocalDataSetVersioned: def test_save_and_load(self, versioned_hdf_data_set, dummy_dataframe): From 4226c2eda970e6e097679e3d2562b95892974150 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Wed, 3 Jul 2019 18:23:53 -0400 Subject: [PATCH 09/17] Add tests to cover load/save argument conditionals --- tests/io/test_hdf_s3.py | 20 ++++++++++++++++++++ tests/io/test_json_local.py | 10 ++++++++++ tests/io/test_pickle_local.py | 15 +++++++++++++++ tests/io/test_pickle_s3.py | 25 +++++++++++++++++++++++++ tests/io/test_text_local.py | 22 ++++++++++++++++++++++ 5 files changed, 92 insertions(+) diff --git a/tests/io/test_hdf_s3.py b/tests/io/test_hdf_s3.py index 0877d049e9..fd2bb1c358 100644 --- a/tests/io/test_hdf_s3.py +++ b/tests/io/test_hdf_s3.py @@ -70,6 +70,18 @@ def mocked_s3_object(mocked_s3_bucket, dummy_dataframe): return mocked_s3_bucket +@pytest.fixture +def hdf_data_set_with_args(): + return HDFS3DataSet( + filepath=FILENAME, + bucket_name=BUCKET_NAME, + credentials=AWS_CREDENTIALS, + key="test_hdf", + load_args={"title": "test_hdf"}, + save_args={"title": "test_hdf"}, + ) + + @pytest.fixture def versioned_hdf_data_set(load_version, save_version): return HDFS3DataSet( @@ -166,6 +178,14 @@ def test_overwrite_if_exists(self, hdf_data_set, dummy_dataframe): reloaded_df = hdf_data_set.load() assert_frame_equal(reloaded_df, dummy_dataframe.T) + @pytest.mark.usefixtures("mocked_s3_object") + def test_save_and_load_args(self, hdf_data_set_with_args, dummy_dataframe): + """Test saving and reloading the data set.""" + hdf_data_set_with_args.save(dummy_dataframe) + reloaded_df = hdf_data_set_with_args.load() + + assert_frame_equal(reloaded_df, dummy_dataframe) + class TestHDFS3DataSetVersioned: @pytest.mark.usefixtures("mocked_s3_object") diff --git a/tests/io/test_json_local.py b/tests/io/test_json_local.py index 99acbf9ded..0161abdc15 100644 --- a/tests/io/test_json_local.py +++ b/tests/io/test_json_local.py @@ -45,6 +45,11 @@ def json_data_set(filepath_json): return JSONLocalDataSet(filepath=filepath_json) +@pytest.fixture +def json_data_set_with_load_args(filepath_json): + return JSONLocalDataSet(filepath=filepath_json, load_args={"parse_float": Decimal}) + + @pytest.fixture def versioned_json_data_set(filepath_json, load_version, save_version): return JSONLocalDataSet( @@ -92,6 +97,11 @@ def test_exists(self, json_data_set, json_data): json_data_set.save(json_data) assert json_data_set.exists() + def test_load_args(self, json_data_set_with_load_args): + """Test reloading the data set with load arguments specified.""" + json_data_set_with_load_args.save([1.1]) + assert json_data_set_with_load_args.load() == [Decimal("1.1")] + def test_allow_nan(self, json_data_set, filepath_json): """Strict JSON specification does not allow out of range float values, however the python implementation accepts them by default. Test both diff --git a/tests/io/test_pickle_local.py b/tests/io/test_pickle_local.py index 4b48519577..b1b063c450 100644 --- a/tests/io/test_pickle_local.py +++ b/tests/io/test_pickle_local.py @@ -45,6 +45,15 @@ def pickle_data_set(filepath_pkl, request): return PickleLocalDataSet(filepath=filepath_pkl, backend=request.param) +@pytest.fixture +def pickle_data_set_with_args(filepath_pkl): + return PickleLocalDataSet( + filepath=filepath_pkl, + load_args={"fix_imports": False}, + save_args={"fix_imports": False}, + ) + + @pytest.fixture def versioned_pickle_data_set(filepath_pkl, load_version, save_version): return PickleLocalDataSet( @@ -97,6 +106,12 @@ def test_joblib_not_installed(self, filepath_pkl, mocker): with pytest.raises(ImportError, match=pattern): PickleLocalDataSet(filepath=filepath_pkl, backend="joblib") + def test_save_and_load_args(self, pickle_data_set_with_args, dummy_dataframe): + """Test saving and reloading the data with different options.""" + pickle_data_set_with_args.save(dummy_dataframe) + reloaded_df = pickle_data_set_with_args.load() + assert_frame_equal(reloaded_df, dummy_dataframe) + class TestPickleLocalDataSetVersioned: def test_save_and_load(self, versioned_pickle_data_set, dummy_dataframe): diff --git a/tests/io/test_pickle_s3.py b/tests/io/test_pickle_s3.py index 7a6354c00c..8c3665dbb9 100644 --- a/tests/io/test_pickle_s3.py +++ b/tests/io/test_pickle_s3.py @@ -53,6 +53,17 @@ def s3_data_set(): ) +@pytest.fixture +def s3_data_set_with_args(): + return PickleS3DataSet( + filepath=FILENAME, + bucket_name=BUCKET_NAME, + credentials=AWS_CREDENTIALS, + load_args={"fix_imports": False}, + save_args={"fix_imports": False}, + ) + + @pytest.fixture def versioned_s3_data_set(load_version, save_version): return PickleS3DataSet( @@ -113,6 +124,12 @@ def test_load(self, s3_data_set): loaded_data = s3_data_set.load() assert loaded_data == DUMMY_PICKABLE_OBJECT + @pytest.mark.usefixtures("mocked_s3_object") + def test_load_args(self, s3_data_set_with_args): + """Test loading the data from S3 with options.""" + loaded_data = s3_data_set_with_args.load() + assert loaded_data == DUMMY_PICKABLE_OBJECT + @pytest.mark.parametrize( "bad_credentials", [{"aws_secret_access_key": "SECRET"}, {"aws_access_key_id": "KEY"}], @@ -171,6 +188,14 @@ def test_save(self, s3_data_set): loaded_data = s3_data_set.load() assert loaded_data == new_data + @pytest.mark.usefixtures("mocked_s3_object") + def test_save_args(self, s3_data_set_with_args): + """Test saving the data to S3 with options.""" + new_data = {"x": "y"} + s3_data_set_with_args.save(new_data) + loaded_data = s3_data_set_with_args.load() + assert loaded_data == new_data + def test_serializable(self, s3_data_set): ForkingPickler.dumps(s3_data_set) diff --git a/tests/io/test_text_local.py b/tests/io/test_text_local.py index 3d53537ed2..268c9c02ef 100644 --- a/tests/io/test_text_local.py +++ b/tests/io/test_text_local.py @@ -45,6 +45,16 @@ def txt_data_set(filepath_txt, request): return TextLocalDataSet(filepath=filepath_txt, **request.param) +@pytest.fixture(params=[dict()]) +def txt_data_set_with_args(filepath_txt, request): + return TextLocalDataSet( + filepath=filepath_txt, + load_args={"errors": "ignore"}, + save_args={"errors": "ignore"}, + **request.param + ) + + @pytest.fixture def versioned_txt_data_set(filepath_txt, load_version, save_version): return TextLocalDataSet( @@ -67,6 +77,12 @@ def test_should_write_to_file(self, txt_data_set, sample_text, filepath_txt): txt_data_set.save(sample_text) assert Path(filepath_txt).read_text("utf-8") == sample_text + def test_should_write_to_file_with_args( + self, txt_data_set_with_args, sample_text, filepath_txt + ): + txt_data_set_with_args.save(sample_text) + assert Path(filepath_txt).read_text("utf-8") == sample_text + def test_load_missing_txt_file(self, txt_data_set): """Check the error raised when trying to load nonexistent txt file.""" pattern = r"Failed while loading data from data set TextLocalDataSet" @@ -77,6 +93,12 @@ def test_should_read_from_file(self, txt_data_set, sample_text, filepath_txt): traditional_write(filepath_txt, sample_text) assert sample_text == txt_data_set.load() + def test_should_read_from_file_with_args( + self, txt_data_set_with_args, sample_text, filepath_txt + ): + traditional_write(filepath_txt, sample_text) + assert sample_text == txt_data_set_with_args.load() + def test_assess_if_file_exists(self, txt_data_set, sample_text, filepath_txt): assert not txt_data_set.exists() traditional_write(filepath_txt, sample_text) From a17ae9e95136f27d30746ce18b68a1bc49d58b36 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Wed, 3 Jul 2019 18:35:42 -0400 Subject: [PATCH 10/17] Fix non-ASCII characters in legal header :pencil2: --- kedro/contrib/io/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kedro/contrib/io/core.py b/kedro/contrib/io/core.py index a417b9b3cc..3eeee1ac00 100644 --- a/kedro/contrib/io/core.py +++ b/kedro/contrib/io/core.py @@ -14,8 +14,8 @@ # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # -# The QuantumBlack Visual Analytics Limited (“QuantumBlack”) name and logo -# (either separately or in combination, “QuantumBlack Trademarks”) are +# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo +# (either separately or in combination, "QuantumBlack Trademarks") are # trademarks of QuantumBlack. The License does not grant you any right or # license to the QuantumBlack Trademarks. You may not use the QuantumBlack # Trademarks or any confusingly similar mark as a trademark for your product, From f7b2373c2296fdd12ed98a04e44e8621c7d2fe91 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Sat, 6 Jul 2019 23:00:56 -0400 Subject: [PATCH 11/17] Remove load/save defaults from ``AbstractDataSet`` --- kedro/io/core.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/kedro/io/core.py b/kedro/io/core.py index c0f3f42d46..b0a347b770 100644 --- a/kedro/io/core.py +++ b/kedro/io/core.py @@ -101,9 +101,6 @@ class AbstractDataSet(abc.ABC): >>> return dict(param1=self._param1, param2=self._param2) """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {} - @classmethod def from_config( cls: Type, From 124d66348811446defc22b13856dd588a70db601 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Tue, 9 Jul 2019 19:16:58 -0400 Subject: [PATCH 12/17] Call ``super().__init__`` in mix-in implementation --- kedro/contrib/io/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kedro/contrib/io/core.py b/kedro/contrib/io/core.py index 3eeee1ac00..dbec7c1a82 100644 --- a/kedro/contrib/io/core.py +++ b/kedro/contrib/io/core.py @@ -43,6 +43,7 @@ def __init__( load_args: Optional[Dict[str, Any]] = None, save_args: Optional[Dict[str, Any]] = None, ) -> None: + super().__init__() self._load_args = self.DEFAULT_LOAD_ARGS.copy() if load_args is not None: self._load_args.update(load_args) From d3c7153bc9c825dc4fee127f00ef0b20709744ce Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Tue, 9 Jul 2019 19:31:33 -0400 Subject: [PATCH 13/17] Fix MRO when subclassing ``DefaultArgumentsMixIn`` --- kedro/contrib/io/azure/csv_blob.py | 2 +- kedro/contrib/io/bioinformatics/sequence_dataset.py | 2 +- kedro/contrib/io/pyspark/spark_data_set.py | 2 +- kedro/contrib/io/pyspark/spark_jdbc.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kedro/contrib/io/azure/csv_blob.py b/kedro/contrib/io/azure/csv_blob.py index 37dca6bc7e..c84c313d45 100644 --- a/kedro/contrib/io/azure/csv_blob.py +++ b/kedro/contrib/io/azure/csv_blob.py @@ -39,7 +39,7 @@ from kedro.io import AbstractDataSet -class CSVBlobDataSet(AbstractDataSet, DefaultArgumentsMixIn): +class CSVBlobDataSet(DefaultArgumentsMixIn, AbstractDataSet): """``CSVBlobDataSet`` loads and saves csv files in Microsoft's Azure blob storage. It uses azure storage SDK to read and write in azure and pandas to handle the csv file locally. diff --git a/kedro/contrib/io/bioinformatics/sequence_dataset.py b/kedro/contrib/io/bioinformatics/sequence_dataset.py index 6f844d1e52..7acf777578 100644 --- a/kedro/contrib/io/bioinformatics/sequence_dataset.py +++ b/kedro/contrib/io/bioinformatics/sequence_dataset.py @@ -39,7 +39,7 @@ from kedro.io import AbstractDataSet -class BioSequenceLocalDataSet(AbstractDataSet, DefaultArgumentsMixIn): +class BioSequenceLocalDataSet(DefaultArgumentsMixIn, AbstractDataSet): """``BioSequenceLocalDataSet`` loads and saves data to a sequence file. Example: diff --git a/kedro/contrib/io/pyspark/spark_data_set.py b/kedro/contrib/io/pyspark/spark_data_set.py index 39acebc4bc..7594bf5297 100644 --- a/kedro/contrib/io/pyspark/spark_data_set.py +++ b/kedro/contrib/io/pyspark/spark_data_set.py @@ -40,7 +40,7 @@ from kedro.io import AbstractDataSet -class SparkDataSet(AbstractDataSet, DefaultArgumentsMixIn): +class SparkDataSet(DefaultArgumentsMixIn, AbstractDataSet): """``SparkDataSet`` loads and saves Spark data frames. Example: diff --git a/kedro/contrib/io/pyspark/spark_jdbc.py b/kedro/contrib/io/pyspark/spark_jdbc.py index 842568b1d4..762e7ad73c 100644 --- a/kedro/contrib/io/pyspark/spark_jdbc.py +++ b/kedro/contrib/io/pyspark/spark_jdbc.py @@ -37,7 +37,7 @@ __all__ = ["SparkJDBCDataSet"] -class SparkJDBCDataSet(AbstractDataSet, DefaultArgumentsMixIn): +class SparkJDBCDataSet(DefaultArgumentsMixIn, AbstractDataSet): """``SparkJDBCDataSet`` loads data from a database table accessible via JDBC URL url and connection properties and saves the content of a PySpark DataFrame to an external database table via JDBC. It uses From cac0c78daa121b0015c27522272858d644f7781d Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Tue, 9 Jul 2019 21:50:34 -0400 Subject: [PATCH 14/17] Copy default argument dicts with ``copy.deepcopy`` --- kedro/contrib/io/core.py | 5 +++-- kedro/io/csv_local.py | 5 +++-- kedro/io/csv_s3.py | 5 +++-- kedro/io/excel_local.py | 5 +++-- kedro/io/hdf_local.py | 5 +++-- kedro/io/hdf_s3.py | 5 +++-- kedro/io/json_local.py | 5 +++-- kedro/io/parquet_local.py | 5 +++-- kedro/io/pickle_local.py | 5 +++-- kedro/io/pickle_s3.py | 5 +++-- kedro/io/sql.py | 5 +++-- kedro/io/text_local.py | 5 +++-- 12 files changed, 36 insertions(+), 24 deletions(-) diff --git a/kedro/contrib/io/core.py b/kedro/contrib/io/core.py index dbec7c1a82..ed169c4884 100644 --- a/kedro/contrib/io/core.py +++ b/kedro/contrib/io/core.py @@ -28,6 +28,7 @@ """This module extends the set of classes ``kedro.io.core`` provides.""" +import copy from typing import Any, Dict, Optional @@ -44,9 +45,9 @@ def __init__( save_args: Optional[Dict[str, Any]] = None, ) -> None: super().__init__() - self._load_args = self.DEFAULT_LOAD_ARGS.copy() + self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) - self._save_args = self.DEFAULT_SAVE_ARGS.copy() + self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) diff --git a/kedro/io/csv_local.py b/kedro/io/csv_local.py index 4dc19c2a5d..3f9f1dd247 100644 --- a/kedro/io/csv_local.py +++ b/kedro/io/csv_local.py @@ -30,6 +30,7 @@ underlying functionality is supported by pandas, so it supports all allowed pandas options for loading and saving csv files. """ +import copy from pathlib import Path from typing import Any, Dict @@ -100,10 +101,10 @@ def __init__( self._filepath = filepath # Handle default load and save arguments - self._load_args = self.DEFAULT_LOAD_ARGS.copy() + self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) - self._save_args = self.DEFAULT_SAVE_ARGS.copy() + self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) diff --git a/kedro/io/csv_s3.py b/kedro/io/csv_s3.py index 8795704590..54517af1c0 100644 --- a/kedro/io/csv_s3.py +++ b/kedro/io/csv_s3.py @@ -29,6 +29,7 @@ """``CSVS3DataSet`` loads and saves data to a file in S3. It uses s3fs to read and write from S3 and pandas to handle the csv file. """ +import copy from typing import Any, Dict, Optional import pandas as pd @@ -109,10 +110,10 @@ def __init__( self._credentials = credentials if credentials else {} # Handle default load and save arguments - self._load_args = self.DEFAULT_LOAD_ARGS.copy() + self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) - self._save_args = self.DEFAULT_SAVE_ARGS.copy() + self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) diff --git a/kedro/io/excel_local.py b/kedro/io/excel_local.py index edabc7dd00..fc4ccd2398 100644 --- a/kedro/io/excel_local.py +++ b/kedro/io/excel_local.py @@ -30,6 +30,7 @@ underlying functionality is supported by pandas, so it supports all allowed pandas options for loading and saving Excel files. """ +import copy from pathlib import Path from typing import Any, Dict, Union @@ -111,10 +112,10 @@ def __init__( self._engine = engine # Handle default load and save arguments - self._load_args = self.DEFAULT_LOAD_ARGS.copy() + self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) - self._save_args = self.DEFAULT_SAVE_ARGS.copy() + self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) diff --git a/kedro/io/hdf_local.py b/kedro/io/hdf_local.py index 8a87c4834e..1e483c49de 100644 --- a/kedro/io/hdf_local.py +++ b/kedro/io/hdf_local.py @@ -30,6 +30,7 @@ underlying functionality is supported by pandas, so it supports all allowed pandas options for loading and saving hdf files. """ +import copy from pathlib import Path from typing import Any, Dict @@ -99,10 +100,10 @@ def __init__( self._key = key # Handle default load and save arguments - self._load_args = self.DEFAULT_LOAD_ARGS.copy() + self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) - self._save_args = self.DEFAULT_SAVE_ARGS.copy() + self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) diff --git a/kedro/io/hdf_s3.py b/kedro/io/hdf_s3.py index e4f243986b..3838742330 100644 --- a/kedro/io/hdf_s3.py +++ b/kedro/io/hdf_s3.py @@ -30,6 +30,7 @@ underlying functionality is supported by pandas HDFStore and PyTables, so it supports all allowed PyTables options for loading and saving hdf files. """ +import copy from typing import Any, Dict, Optional import pandas as pd @@ -110,10 +111,10 @@ def __init__( self._credentials = credentials if credentials else {} # Handle default load and save arguments - self._load_args = self.DEFAULT_LOAD_ARGS.copy() + self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) - self._save_args = self.DEFAULT_SAVE_ARGS.copy() + self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) diff --git a/kedro/io/json_local.py b/kedro/io/json_local.py index b53e78202b..0420eec70c 100644 --- a/kedro/io/json_local.py +++ b/kedro/io/json_local.py @@ -29,6 +29,7 @@ """``JSONLocalDataSet`` encodes a given object to json and saves it to a local file. """ +import copy import json from pathlib import Path from typing import Any, Dict @@ -96,10 +97,10 @@ def __init__( self._filepath = filepath # Handle default load and save arguments - self._load_args = self.DEFAULT_LOAD_ARGS.copy() + self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) - self._save_args = self.DEFAULT_SAVE_ARGS.copy() + self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) diff --git a/kedro/io/parquet_local.py b/kedro/io/parquet_local.py index 341e7ff004..549d97c22c 100644 --- a/kedro/io/parquet_local.py +++ b/kedro/io/parquet_local.py @@ -36,6 +36,7 @@ https://arrow.apache.org/docs/python/index.html """ +import copy from pathlib import Path from typing import Any, Dict @@ -114,10 +115,10 @@ def __init__( self._engine = engine # Handle default load and save arguments - self._load_args = self.DEFAULT_LOAD_ARGS.copy() + self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) - self._save_args = self.DEFAULT_SAVE_ARGS.copy() + self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) diff --git a/kedro/io/pickle_local.py b/kedro/io/pickle_local.py index 91d5ebad6d..9783a71e88 100644 --- a/kedro/io/pickle_local.py +++ b/kedro/io/pickle_local.py @@ -32,6 +32,7 @@ all allowed options for loading and saving pickle files. """ +import copy import pickle from pathlib import Path from typing import Any, Dict @@ -130,10 +131,10 @@ def __init__( self._backend = backend # Handle default load and save arguments - self._load_args = self.DEFAULT_LOAD_ARGS.copy() + self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) - self._save_args = self.DEFAULT_SAVE_ARGS.copy() + self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) diff --git a/kedro/io/pickle_s3.py b/kedro/io/pickle_s3.py index 12c431b7b3..43210a740a 100644 --- a/kedro/io/pickle_s3.py +++ b/kedro/io/pickle_s3.py @@ -30,6 +30,7 @@ The underlying functionality is supported by the ``pickle`` library, so it supports all allowed options for loading and saving pickle files. """ +import copy import pickle from typing import Any, Dict, Optional @@ -103,10 +104,10 @@ def __init__( self._credentials = credentials if credentials else {} # Handle default load and save arguments - self._load_args = self.DEFAULT_LOAD_ARGS.copy() + self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) - self._save_args = self.DEFAULT_SAVE_ARGS.copy() + self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) diff --git a/kedro/io/sql.py b/kedro/io/sql.py index ec351bce15..07373e54d3 100644 --- a/kedro/io/sql.py +++ b/kedro/io/sql.py @@ -27,6 +27,7 @@ # limitations under the License. """``SQLDataSet`` to load and save data to a SQL backend.""" +import copy import re from typing import Any, Dict, Optional @@ -197,10 +198,10 @@ def __init__( ) # Handle default load and save arguments - self._load_args = self.DEFAULT_LOAD_ARGS.copy() + self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) - self._save_args = self.DEFAULT_SAVE_ARGS.copy() + self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) diff --git a/kedro/io/text_local.py b/kedro/io/text_local.py index 5798691b97..915a3409dd 100644 --- a/kedro/io/text_local.py +++ b/kedro/io/text_local.py @@ -28,6 +28,7 @@ """``TextLocalDataSet`` loads and saves data to a local text file. The data is accessed text data using the python open function. """ +import copy import os from pathlib import Path from typing import Any, Dict @@ -87,10 +88,10 @@ def __init__( self._filepath = os.path.expanduser(filepath) # Handle default load and save arguments - self._load_args = self.DEFAULT_LOAD_ARGS.copy() + self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) - self._save_args = self.DEFAULT_SAVE_ARGS.copy() + self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) From 5896daa56895ce3a17243847242fb38ee1078e63 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Wed, 10 Jul 2019 12:11:47 -0400 Subject: [PATCH 15/17] Annotate types for default load and save arguments --- kedro/contrib/io/core.py | 4 ++-- kedro/io/csv_local.py | 4 ++-- kedro/io/csv_s3.py | 4 ++-- kedro/io/hdf_local.py | 4 ++-- kedro/io/hdf_s3.py | 4 ++-- kedro/io/json_local.py | 4 ++-- kedro/io/parquet_local.py | 4 ++-- kedro/io/pickle_local.py | 4 ++-- kedro/io/pickle_s3.py | 4 ++-- kedro/io/sql.py | 4 ++-- kedro/io/text_local.py | 4 ++-- 11 files changed, 22 insertions(+), 22 deletions(-) diff --git a/kedro/contrib/io/core.py b/kedro/contrib/io/core.py index ed169c4884..d4c5fb7c10 100644 --- a/kedro/contrib/io/core.py +++ b/kedro/contrib/io/core.py @@ -36,8 +36,8 @@ class DefaultArgumentsMixIn: """Mixin class that helps handle default load and save arguments.""" - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {} + DEFAULT_LOAD_ARGS: Dict[str, Any] = {} + DEFAULT_SAVE_ARGS: Dict[str, Any] = {} def __init__( self, diff --git a/kedro/io/csv_local.py b/kedro/io/csv_local.py index 9dcdeb0410..74da6bfa99 100644 --- a/kedro/io/csv_local.py +++ b/kedro/io/csv_local.py @@ -62,8 +62,8 @@ class CSVLocalDataSet(AbstractVersionedDataSet): """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {"index": False} + DEFAULT_LOAD_ARGS: Dict[str, Any] = {} + DEFAULT_SAVE_ARGS: Dict[str, Any] = {"index": False} def __init__( self, diff --git a/kedro/io/csv_s3.py b/kedro/io/csv_s3.py index af010c1377..3e16ac85d0 100644 --- a/kedro/io/csv_s3.py +++ b/kedro/io/csv_s3.py @@ -63,8 +63,8 @@ class CSVS3DataSet(AbstractVersionedDataSet): >>> assert data.equals(reloaded) """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {"index": False} + DEFAULT_LOAD_ARGS: Dict[str, Any] = {} + DEFAULT_SAVE_ARGS: Dict[str, Any] = {"index": False} # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/hdf_local.py b/kedro/io/hdf_local.py index 568ba5e845..cf3edec161 100644 --- a/kedro/io/hdf_local.py +++ b/kedro/io/hdf_local.py @@ -64,8 +64,8 @@ class HDFLocalDataSet(AbstractVersionedDataSet): """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {} + DEFAULT_LOAD_ARGS: Dict[str, Any] = {} + DEFAULT_SAVE_ARGS: Dict[str, Any] = {} # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/hdf_s3.py b/kedro/io/hdf_s3.py index 3c2313129e..0a55b3b644 100644 --- a/kedro/io/hdf_s3.py +++ b/kedro/io/hdf_s3.py @@ -69,8 +69,8 @@ class HDFS3DataSet(AbstractVersionedDataSet): """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {} + DEFAULT_LOAD_ARGS: Dict[str, Any] = {} + DEFAULT_SAVE_ARGS: Dict[str, Any] = {} # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/json_local.py b/kedro/io/json_local.py index 214066f5a2..e2bccf0a8c 100644 --- a/kedro/io/json_local.py +++ b/kedro/io/json_local.py @@ -59,8 +59,8 @@ class JSONLocalDataSet(AbstractVersionedDataSet): """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {"indent": 4} + DEFAULT_LOAD_ARGS: Dict[str, Any] = {} + DEFAULT_SAVE_ARGS: Dict[str, Any] = {"indent": 4} def __init__( self, diff --git a/kedro/io/parquet_local.py b/kedro/io/parquet_local.py index 1fd154dbf8..b85bf87079 100644 --- a/kedro/io/parquet_local.py +++ b/kedro/io/parquet_local.py @@ -62,8 +62,8 @@ class ParquetLocalDataSet(AbstractVersionedDataSet): >>> assert data.equals(loaded_data) """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {"compression": None} + DEFAULT_LOAD_ARGS: Dict[str, Any] = {} + DEFAULT_SAVE_ARGS: Dict[str, Any] = {"compression": None} # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/pickle_local.py b/kedro/io/pickle_local.py index 9299559d43..ba01dcbc69 100644 --- a/kedro/io/pickle_local.py +++ b/kedro/io/pickle_local.py @@ -68,8 +68,8 @@ class PickleLocalDataSet(AbstractVersionedDataSet): >>> reloaded = data_set.load() """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {} + DEFAULT_LOAD_ARGS: Dict[str, Any] = {} + DEFAULT_SAVE_ARGS: Dict[str, Any] = {} BACKENDS = {"pickle": pickle, "joblib": joblib} diff --git a/kedro/io/pickle_s3.py b/kedro/io/pickle_s3.py index cc19e82c68..99e67562f0 100644 --- a/kedro/io/pickle_s3.py +++ b/kedro/io/pickle_s3.py @@ -65,8 +65,8 @@ class PickleS3DataSet(AbstractVersionedDataSet): >>> reloaded = data_set.load() """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {} + DEFAULT_LOAD_ARGS: Dict[str, Any] = {} + DEFAULT_SAVE_ARGS: Dict[str, Any] = {} # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/sql.py b/kedro/io/sql.py index e94576fb94..bcd82714dc 100644 --- a/kedro/io/sql.py +++ b/kedro/io/sql.py @@ -140,8 +140,8 @@ class SQLTableDataSet(AbstractDataSet): """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {"index": False} + DEFAULT_LOAD_ARGS: Dict[str, Any] = {} + DEFAULT_SAVE_ARGS: Dict[str, Any] = {"index": False} def _describe(self) -> Dict[str, Any]: load_args = self._load_args.copy() diff --git a/kedro/io/text_local.py b/kedro/io/text_local.py index b308d6b9f0..12e00e79fa 100644 --- a/kedro/io/text_local.py +++ b/kedro/io/text_local.py @@ -51,8 +51,8 @@ class TextLocalDataSet(AbstractVersionedDataSet): >>> reloaded = data_set.load() """ - DEFAULT_LOAD_ARGS = {"mode": "r"} - DEFAULT_SAVE_ARGS = {"mode": "w"} + DEFAULT_LOAD_ARGS: Dict[str, Any] = {"mode": "r"} + DEFAULT_SAVE_ARGS: Dict[str, Any] = {"mode": "w"} def __init__( self, From 39317447d04e3a1428555fa246df1c3fcebe0121 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Wed, 10 Jul 2019 12:36:56 -0400 Subject: [PATCH 16/17] Revert "Annotate types for default load and save arguments" This reverts commit 5896daa56895ce3a17243847242fb38ee1078e63. --- kedro/contrib/io/core.py | 4 ++-- kedro/io/csv_local.py | 4 ++-- kedro/io/csv_s3.py | 4 ++-- kedro/io/hdf_local.py | 4 ++-- kedro/io/hdf_s3.py | 4 ++-- kedro/io/json_local.py | 4 ++-- kedro/io/parquet_local.py | 4 ++-- kedro/io/pickle_local.py | 4 ++-- kedro/io/pickle_s3.py | 4 ++-- kedro/io/sql.py | 4 ++-- kedro/io/text_local.py | 4 ++-- 11 files changed, 22 insertions(+), 22 deletions(-) diff --git a/kedro/contrib/io/core.py b/kedro/contrib/io/core.py index d4c5fb7c10..ed169c4884 100644 --- a/kedro/contrib/io/core.py +++ b/kedro/contrib/io/core.py @@ -36,8 +36,8 @@ class DefaultArgumentsMixIn: """Mixin class that helps handle default load and save arguments.""" - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {} def __init__( self, diff --git a/kedro/io/csv_local.py b/kedro/io/csv_local.py index 74da6bfa99..9dcdeb0410 100644 --- a/kedro/io/csv_local.py +++ b/kedro/io/csv_local.py @@ -62,8 +62,8 @@ class CSVLocalDataSet(AbstractVersionedDataSet): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"index": False} + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {"index": False} def __init__( self, diff --git a/kedro/io/csv_s3.py b/kedro/io/csv_s3.py index 3e16ac85d0..af010c1377 100644 --- a/kedro/io/csv_s3.py +++ b/kedro/io/csv_s3.py @@ -63,8 +63,8 @@ class CSVS3DataSet(AbstractVersionedDataSet): >>> assert data.equals(reloaded) """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"index": False} + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {"index": False} # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/hdf_local.py b/kedro/io/hdf_local.py index cf3edec161..568ba5e845 100644 --- a/kedro/io/hdf_local.py +++ b/kedro/io/hdf_local.py @@ -64,8 +64,8 @@ class HDFLocalDataSet(AbstractVersionedDataSet): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {} # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/hdf_s3.py b/kedro/io/hdf_s3.py index 0a55b3b644..3c2313129e 100644 --- a/kedro/io/hdf_s3.py +++ b/kedro/io/hdf_s3.py @@ -69,8 +69,8 @@ class HDFS3DataSet(AbstractVersionedDataSet): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {} # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/json_local.py b/kedro/io/json_local.py index e2bccf0a8c..214066f5a2 100644 --- a/kedro/io/json_local.py +++ b/kedro/io/json_local.py @@ -59,8 +59,8 @@ class JSONLocalDataSet(AbstractVersionedDataSet): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"indent": 4} + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {"indent": 4} def __init__( self, diff --git a/kedro/io/parquet_local.py b/kedro/io/parquet_local.py index b85bf87079..1fd154dbf8 100644 --- a/kedro/io/parquet_local.py +++ b/kedro/io/parquet_local.py @@ -62,8 +62,8 @@ class ParquetLocalDataSet(AbstractVersionedDataSet): >>> assert data.equals(loaded_data) """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"compression": None} + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {"compression": None} # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/pickle_local.py b/kedro/io/pickle_local.py index ba01dcbc69..9299559d43 100644 --- a/kedro/io/pickle_local.py +++ b/kedro/io/pickle_local.py @@ -68,8 +68,8 @@ class PickleLocalDataSet(AbstractVersionedDataSet): >>> reloaded = data_set.load() """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {} BACKENDS = {"pickle": pickle, "joblib": joblib} diff --git a/kedro/io/pickle_s3.py b/kedro/io/pickle_s3.py index 99e67562f0..cc19e82c68 100644 --- a/kedro/io/pickle_s3.py +++ b/kedro/io/pickle_s3.py @@ -65,8 +65,8 @@ class PickleS3DataSet(AbstractVersionedDataSet): >>> reloaded = data_set.load() """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {} + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {} # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/sql.py b/kedro/io/sql.py index bcd82714dc..e94576fb94 100644 --- a/kedro/io/sql.py +++ b/kedro/io/sql.py @@ -140,8 +140,8 @@ class SQLTableDataSet(AbstractDataSet): """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"index": False} + DEFAULT_LOAD_ARGS = {} + DEFAULT_SAVE_ARGS = {"index": False} def _describe(self) -> Dict[str, Any]: load_args = self._load_args.copy() diff --git a/kedro/io/text_local.py b/kedro/io/text_local.py index 12e00e79fa..b308d6b9f0 100644 --- a/kedro/io/text_local.py +++ b/kedro/io/text_local.py @@ -51,8 +51,8 @@ class TextLocalDataSet(AbstractVersionedDataSet): >>> reloaded = data_set.load() """ - DEFAULT_LOAD_ARGS: Dict[str, Any] = {"mode": "r"} - DEFAULT_SAVE_ARGS: Dict[str, Any] = {"mode": "w"} + DEFAULT_LOAD_ARGS = {"mode": "r"} + DEFAULT_SAVE_ARGS = {"mode": "w"} def __init__( self, From b2e4c1c401eb5f6ab5b2fb379c9242156131aa53 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Wed, 10 Jul 2019 12:44:18 -0400 Subject: [PATCH 17/17] Annotate types for default load and save arguments --- kedro/contrib/io/core.py | 4 ++-- kedro/io/csv_local.py | 4 ++-- kedro/io/csv_s3.py | 4 ++-- kedro/io/hdf_local.py | 4 ++-- kedro/io/hdf_s3.py | 4 ++-- kedro/io/json_local.py | 4 ++-- kedro/io/parquet_local.py | 4 ++-- kedro/io/pickle_local.py | 4 ++-- kedro/io/pickle_s3.py | 4 ++-- kedro/io/sql.py | 4 ++-- kedro/io/text_local.py | 4 ++-- 11 files changed, 22 insertions(+), 22 deletions(-) diff --git a/kedro/contrib/io/core.py b/kedro/contrib/io/core.py index ed169c4884..c963db4a2f 100644 --- a/kedro/contrib/io/core.py +++ b/kedro/contrib/io/core.py @@ -36,8 +36,8 @@ class DefaultArgumentsMixIn: """Mixin class that helps handle default load and save arguments.""" - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {} + DEFAULT_LOAD_ARGS = {} # type: Dict[str, Any] + DEFAULT_SAVE_ARGS = {} # type: Dict[str, Any] def __init__( self, diff --git a/kedro/io/csv_local.py b/kedro/io/csv_local.py index 9dcdeb0410..17ad9bee94 100644 --- a/kedro/io/csv_local.py +++ b/kedro/io/csv_local.py @@ -62,8 +62,8 @@ class CSVLocalDataSet(AbstractVersionedDataSet): """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {"index": False} + DEFAULT_LOAD_ARGS = {} # type: Dict[str, Any] + DEFAULT_SAVE_ARGS = {"index": False} # type: Dict[str, Any] def __init__( self, diff --git a/kedro/io/csv_s3.py b/kedro/io/csv_s3.py index af010c1377..de8043a055 100644 --- a/kedro/io/csv_s3.py +++ b/kedro/io/csv_s3.py @@ -63,8 +63,8 @@ class CSVS3DataSet(AbstractVersionedDataSet): >>> assert data.equals(reloaded) """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {"index": False} + DEFAULT_LOAD_ARGS = {} # type: Dict[str, Any] + DEFAULT_SAVE_ARGS = {"index": False} # type: Dict[str, Any] # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/hdf_local.py b/kedro/io/hdf_local.py index 568ba5e845..cb80fe5786 100644 --- a/kedro/io/hdf_local.py +++ b/kedro/io/hdf_local.py @@ -64,8 +64,8 @@ class HDFLocalDataSet(AbstractVersionedDataSet): """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {} + DEFAULT_LOAD_ARGS = {} # type: Dict[str, Any] + DEFAULT_SAVE_ARGS = {} # type: Dict[str, Any] # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/hdf_s3.py b/kedro/io/hdf_s3.py index 3c2313129e..6c83fff405 100644 --- a/kedro/io/hdf_s3.py +++ b/kedro/io/hdf_s3.py @@ -69,8 +69,8 @@ class HDFS3DataSet(AbstractVersionedDataSet): """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {} + DEFAULT_LOAD_ARGS = {} # type: Dict[str, Any] + DEFAULT_SAVE_ARGS = {} # type: Dict[str, Any] # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/json_local.py b/kedro/io/json_local.py index 214066f5a2..e6e359d94a 100644 --- a/kedro/io/json_local.py +++ b/kedro/io/json_local.py @@ -59,8 +59,8 @@ class JSONLocalDataSet(AbstractVersionedDataSet): """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {"indent": 4} + DEFAULT_LOAD_ARGS = {} # type: Dict[str, Any] + DEFAULT_SAVE_ARGS = {"indent": 4} # type: Dict[str, Any] def __init__( self, diff --git a/kedro/io/parquet_local.py b/kedro/io/parquet_local.py index 1fd154dbf8..1ef3ecf86c 100644 --- a/kedro/io/parquet_local.py +++ b/kedro/io/parquet_local.py @@ -62,8 +62,8 @@ class ParquetLocalDataSet(AbstractVersionedDataSet): >>> assert data.equals(loaded_data) """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {"compression": None} + DEFAULT_LOAD_ARGS = {} # type: Dict[str, Any] + DEFAULT_SAVE_ARGS = {"compression": None} # type: Dict[str, Any] # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/pickle_local.py b/kedro/io/pickle_local.py index 9299559d43..d7ada37e93 100644 --- a/kedro/io/pickle_local.py +++ b/kedro/io/pickle_local.py @@ -68,8 +68,8 @@ class PickleLocalDataSet(AbstractVersionedDataSet): >>> reloaded = data_set.load() """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {} + DEFAULT_LOAD_ARGS = {} # type: Dict[str, Any] + DEFAULT_SAVE_ARGS = {} # type: Dict[str, Any] BACKENDS = {"pickle": pickle, "joblib": joblib} diff --git a/kedro/io/pickle_s3.py b/kedro/io/pickle_s3.py index cc19e82c68..283a5b679c 100644 --- a/kedro/io/pickle_s3.py +++ b/kedro/io/pickle_s3.py @@ -65,8 +65,8 @@ class PickleS3DataSet(AbstractVersionedDataSet): >>> reloaded = data_set.load() """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {} + DEFAULT_LOAD_ARGS = {} # type: Dict[str, Any] + DEFAULT_SAVE_ARGS = {} # type: Dict[str, Any] # pylint: disable=too-many-arguments def __init__( diff --git a/kedro/io/sql.py b/kedro/io/sql.py index e94576fb94..b4f8e0fc1a 100644 --- a/kedro/io/sql.py +++ b/kedro/io/sql.py @@ -140,8 +140,8 @@ class SQLTableDataSet(AbstractDataSet): """ - DEFAULT_LOAD_ARGS = {} - DEFAULT_SAVE_ARGS = {"index": False} + DEFAULT_LOAD_ARGS = {} # type: Dict[str, Any] + DEFAULT_SAVE_ARGS = {"index": False} # type: Dict[str, Any] def _describe(self) -> Dict[str, Any]: load_args = self._load_args.copy() diff --git a/kedro/io/text_local.py b/kedro/io/text_local.py index b308d6b9f0..483fbcd01e 100644 --- a/kedro/io/text_local.py +++ b/kedro/io/text_local.py @@ -51,8 +51,8 @@ class TextLocalDataSet(AbstractVersionedDataSet): >>> reloaded = data_set.load() """ - DEFAULT_LOAD_ARGS = {"mode": "r"} - DEFAULT_SAVE_ARGS = {"mode": "w"} + DEFAULT_LOAD_ARGS = {"mode": "r"} # type: Dict[str, Any] + DEFAULT_SAVE_ARGS = {"mode": "w"} # type: Dict[str, Any] def __init__( self,