Skip to content

Commit

Permalink
fix(datasets): remove deprecation warnings (#255)
Browse files Browse the repository at this point in the history
* Unpin pytest-xdist to avoid deprecation error

Signed-off-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>

* Remove deprecation warnings from kedro-datasets

Signed-off-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>

* Adapt future deprecation of Abstract*DataSet

Signed-off-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>

* Add tests for backwards and forwards compatibility

Signed-off-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>

* Typo

Co-authored-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
Signed-off-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>

* Make _io tests restore old values

Signed-off-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>

* Lint

Signed-off-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>

* Ignore faulty pylint check

See pylint-dev/pylint#8865

Signed-off-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>

* Disable a few more faulty pylint checks

Signed-off-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>

* Remove shim from tensorflow datasets

This needs to be addressed later on, see TODO.

Signed-off-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>

---------

Signed-off-by: Juan Luis Cano Rodríguez <juan_luis_cano@mckinsey.com>
Signed-off-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
Co-authored-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
  • Loading branch information
astrojuanlu and deepyaman authored Jul 24, 2023
1 parent 8fb01ef commit 9cb6a16
Show file tree
Hide file tree
Showing 42 changed files with 274 additions and 156 deletions.
20 changes: 20 additions & 0 deletions kedro-datasets/kedro_datasets/_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
Adapter for kedro.io.core for backwards compatibility.
"""

try:
# kedro 0.18.11 onwards
from kedro.io.core import DatasetError
except ImportError:
# older versions
from kedro.io.core import DataSetError as DatasetError

try:
# kedro 0.18.12 onwards
from kedro.io.core import AbstractDataset, AbstractVersionedDataset
except ImportError:
# older versions
from kedro.io.core import AbstractDataSet as AbstractDataset
from kedro.io.core import AbstractVersionedDataSet as AbstractVersionedDataset

__all__ = ["AbstractDataset", "AbstractVersionedDataset", "DatasetError"]
4 changes: 3 additions & 1 deletion kedro-datasets/kedro_datasets/api/api_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
from typing import Any, Dict, List, Tuple, Union

import requests
from kedro.io.core import AbstractDataSet, DataSetError
from requests import Session, sessions
from requests.auth import AuthBase

from .._io import AbstractDataset as AbstractDataSet
from .._io import DatasetError as DataSetError


class APIDataSet(AbstractDataSet[None, requests.Response]):
"""``APIDataSet`` loads/saves data from/to HTTP(S) APIs.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

import fsspec
from Bio import SeqIO
from kedro.io.core import AbstractDataSet, get_filepath_str, get_protocol_and_path
from kedro.io.core import get_filepath_str, get_protocol_and_path

from .._io import AbstractDataset as AbstractDataSet


class BioSequenceDataSet(AbstractDataSet[List, List]):
Expand Down
4 changes: 3 additions & 1 deletion kedro-datasets/kedro_datasets/dask/parquet_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import dask.dataframe as dd
import fsspec
import triad
from kedro.io.core import AbstractDataSet, get_protocol_and_path
from kedro.io.core import get_protocol_and_path

from .._io import AbstractDataset as AbstractDataSet


class ParquetDataSet(AbstractDataSet[dd.DataFrame, dd.DataFrame]):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,14 @@
from typing import Any, Dict, List, Optional, Union

import pandas as pd
from kedro.io.core import (
AbstractVersionedDataSet,
DataSetError,
Version,
VersionNotFoundError,
)
from kedro.io.core import Version, VersionNotFoundError
from pyspark.sql import DataFrame, SparkSession
from pyspark.sql.types import StructType
from pyspark.sql.utils import AnalysisException, ParseException

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet
from .._io import DatasetError as DataSetError

logger = logging.getLogger(__name__)


Expand Down
11 changes: 4 additions & 7 deletions kedro-datasets/kedro_datasets/email/message_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,10 @@
from typing import Any, Dict

import fsspec
from kedro.io.core import (
AbstractVersionedDataSet,
DataSetError,
Version,
get_filepath_str,
get_protocol_and_path,
)
from kedro.io.core import Version, get_filepath_str, get_protocol_and_path

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet
from .._io import DatasetError as DataSetError


class EmailMessageDataSet(AbstractVersionedDataSet[Message, Message]):
Expand Down
11 changes: 4 additions & 7 deletions kedro-datasets/kedro_datasets/geopandas/geojson_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,10 @@

import fsspec
import geopandas as gpd
from kedro.io.core import (
AbstractVersionedDataSet,
DataSetError,
Version,
get_filepath_str,
get_protocol_and_path,
)
from kedro.io.core import Version, get_filepath_str, get_protocol_and_path

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet
from .._io import DatasetError as DataSetError


class GeoJSONDataSet(
Expand Down
11 changes: 4 additions & 7 deletions kedro-datasets/kedro_datasets/holoviews/holoviews_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,10 @@

import fsspec
import holoviews as hv
from kedro.io.core import (
AbstractVersionedDataSet,
DataSetError,
Version,
get_filepath_str,
get_protocol_and_path,
)
from kedro.io.core import Version, get_filepath_str, get_protocol_and_path

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet
from .._io import DatasetError as DataSetError

# HoloViews to be passed in `hv.save()`
HoloViews = TypeVar("HoloViews")
Expand Down
11 changes: 4 additions & 7 deletions kedro-datasets/kedro_datasets/json/json_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,10 @@
from typing import Any, Dict

import fsspec
from kedro.io.core import (
AbstractVersionedDataSet,
DataSetError,
Version,
get_filepath_str,
get_protocol_and_path,
)
from kedro.io.core import Version, get_filepath_str, get_protocol_and_path

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet
from .._io import DatasetError as DataSetError


class JSONDataSet(AbstractVersionedDataSet[Any, Any]):
Expand Down
11 changes: 4 additions & 7 deletions kedro-datasets/kedro_datasets/matplotlib/matplotlib_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,10 @@

import fsspec
import matplotlib.pyplot as plt
from kedro.io.core import (
AbstractVersionedDataSet,
DataSetError,
Version,
get_filepath_str,
get_protocol_and_path,
)
from kedro.io.core import Version, get_filepath_str, get_protocol_and_path

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet
from .._io import DatasetError as DataSetError


class MatplotlibWriter(
Expand Down
9 changes: 3 additions & 6 deletions kedro-datasets/kedro_datasets/networkx/gml_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@

import fsspec
import networkx
from kedro.io.core import (
AbstractVersionedDataSet,
Version,
get_filepath_str,
get_protocol_and_path,
)
from kedro.io.core import Version, get_filepath_str, get_protocol_and_path

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet


class GMLDataSet(AbstractVersionedDataSet[networkx.Graph, networkx.Graph]):
Expand Down
9 changes: 3 additions & 6 deletions kedro-datasets/kedro_datasets/networkx/graphml_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,9 @@

import fsspec
import networkx
from kedro.io.core import (
AbstractVersionedDataSet,
Version,
get_filepath_str,
get_protocol_and_path,
)
from kedro.io.core import Version, get_filepath_str, get_protocol_and_path

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet


class GraphMLDataSet(AbstractVersionedDataSet[networkx.Graph, networkx.Graph]):
Expand Down
9 changes: 3 additions & 6 deletions kedro-datasets/kedro_datasets/networkx/json_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@

import fsspec
import networkx
from kedro.io.core import (
AbstractVersionedDataSet,
Version,
get_filepath_str,
get_protocol_and_path,
)
from kedro.io.core import Version, get_filepath_str, get_protocol_and_path

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet


class JSONDataSet(AbstractVersionedDataSet[networkx.Graph, networkx.Graph]):
Expand Down
5 changes: 3 additions & 2 deletions kedro-datasets/kedro_datasets/pandas/csv_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
import pandas as pd
from kedro.io.core import (
PROTOCOL_DELIMITER,
AbstractVersionedDataSet,
DataSetError,
Version,
get_filepath_str,
get_protocol_and_path,
)

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet
from .._io import DatasetError as DataSetError

logger = logging.getLogger(__name__)


Expand Down
5 changes: 3 additions & 2 deletions kedro-datasets/kedro_datasets/pandas/excel_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
import pandas as pd
from kedro.io.core import (
PROTOCOL_DELIMITER,
AbstractVersionedDataSet,
DataSetError,
Version,
get_filepath_str,
get_protocol_and_path,
)

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet
from .._io import DatasetError as DataSetError

logger = logging.getLogger(__name__)


Expand Down
3 changes: 2 additions & 1 deletion kedro-datasets/kedro_datasets/pandas/feather_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@
import pandas as pd
from kedro.io.core import (
PROTOCOL_DELIMITER,
AbstractVersionedDataSet,
Version,
get_filepath_str,
get_protocol_and_path,
)

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet

logger = logging.getLogger(__name__)


Expand Down
7 changes: 4 additions & 3 deletions kedro-datasets/kedro_datasets/pandas/gbq_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@
from google.cloud.exceptions import NotFound
from google.oauth2.credentials import Credentials
from kedro.io.core import (
AbstractDataSet,
DataSetError,
get_filepath_str,
get_protocol_and_path,
validate_on_forbidden_chars,
)

from .._io import AbstractDataset as AbstractDataSet
from .._io import DatasetError as DataSetError


class GBQTableDataSet(AbstractDataSet[None, pd.DataFrame]):
"""``GBQTableDataSet`` loads and saves data from/to Google BigQuery.
Expand Down Expand Up @@ -316,5 +317,5 @@ def _load(self) -> pd.DataFrame:
**load_args,
)

def _save(self, data: None) -> NoReturn:
def _save(self, data: None) -> NoReturn: # pylint: disable=no-self-use
raise DataSetError("'save' is not supported on GBQQueryDataSet")
11 changes: 4 additions & 7 deletions kedro-datasets/kedro_datasets/pandas/generic_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,10 @@

import fsspec
import pandas as pd
from kedro.io.core import (
AbstractVersionedDataSet,
DataSetError,
Version,
get_filepath_str,
get_protocol_and_path,
)
from kedro.io.core import Version, get_filepath_str, get_protocol_and_path

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet
from .._io import DatasetError as DataSetError

NON_FILE_SYSTEM_TARGETS = [
"clipboard",
Expand Down
11 changes: 4 additions & 7 deletions kedro-datasets/kedro_datasets/pandas/hdf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,10 @@

import fsspec
import pandas as pd
from kedro.io.core import (
AbstractVersionedDataSet,
DataSetError,
Version,
get_filepath_str,
get_protocol_and_path,
)
from kedro.io.core import Version, get_filepath_str, get_protocol_and_path

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet
from .._io import DatasetError as DataSetError

HDFSTORE_DRIVER = "H5FD_CORE"

Expand Down
5 changes: 3 additions & 2 deletions kedro-datasets/kedro_datasets/pandas/json_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
import pandas as pd
from kedro.io.core import (
PROTOCOL_DELIMITER,
AbstractVersionedDataSet,
DataSetError,
Version,
get_filepath_str,
get_protocol_and_path,
)

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet
from .._io import DatasetError as DataSetError

logger = logging.getLogger(__name__)


Expand Down
5 changes: 3 additions & 2 deletions kedro-datasets/kedro_datasets/pandas/parquet_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
import pandas as pd
from kedro.io.core import (
PROTOCOL_DELIMITER,
AbstractVersionedDataSet,
DataSetError,
Version,
get_filepath_str,
get_protocol_and_path,
)

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet
from .._io import DatasetError as DataSetError

logger = logging.getLogger(__name__)


Expand Down
12 changes: 5 additions & 7 deletions kedro-datasets/kedro_datasets/pandas/sql_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,13 @@

import fsspec
import pandas as pd
from kedro.io.core import (
AbstractDataSet,
DataSetError,
get_filepath_str,
get_protocol_and_path,
)
from kedro.io.core import get_filepath_str, get_protocol_and_path
from sqlalchemy import create_engine, inspect
from sqlalchemy.exc import NoSuchModuleError

from .._io import AbstractDataset as AbstractDataSet
from .._io import DatasetError as DataSetError

__all__ = ["SQLTableDataSet", "SQLQueryDataSet"]

KNOWN_PIP_INSTALL = {
Expand Down Expand Up @@ -511,7 +509,7 @@ def _load(self) -> pd.DataFrame:

return pd.read_sql_query(con=engine, **load_args)

def _save(self, data: None) -> NoReturn:
def _save(self, data: None) -> NoReturn: # pylint: disable=no-self-use
raise DataSetError("'save' is not supported on SQLQueryDataSet")

# For mssql only
Expand Down
Loading

0 comments on commit 9cb6a16

Please sign in to comment.