diff --git a/kedro-datasets/pyproject.toml b/kedro-datasets/pyproject.toml index a5ba7e0d4..90b177f8d 100644 --- a/kedro-datasets/pyproject.toml +++ b/kedro-datasets/pyproject.toml @@ -17,29 +17,46 @@ dependencies = [ dynamic = ["readme", "version"] [project.optional-dependencies] +# Base dependencies +hdfs-base = ["hdfs>=2.5.8, <3.0"] +pandas-base = ["pandas>=1.3, <3.0"] +plotly-base = ["plotly>=4.8.0, <6.0"] +s3fs-base = ["s3fs>=0.3.0, <0.5"] +spark-base = ["pyspark>=2.2, <4.0"] + + +# Datasets dependencies api = ["kedro-datasets[api.APIDataSet]"] -api-apidataset = ["requests~=2.20"] +"api.APIDataSet" = ["requests~=2.20"] + biosequence = ["kedro-datasets[biosequence.BioSequenceDataSet]"] -biosequence-biosequencedataset = ["biopython~=1.73"] +"biosequence.BioSequenceDataSet" = ["biopython~=1.73"] + dask = ["kedro-datasets[dask.ParquetDataSet]"] -dask-parquetdataset = ["dask[complete]>=2021.10", "triad>=0.6.7, <1.0"] +"dask.ParquetDataSet" = ["dask[complete]>=2021.10", "triad>=0.6.7, <1.0"] + databricks = ["kedro-datasets[databricks.ManagedTableDataSet]"] -databricks-managedtabledataset = ["kedro-datasets[spark-base,pandas-base,delta-base]"] -delta-base = ["delta-spark~=1.2.1"] +"databricks.ManagedTableDataSet" = ["kedro-datasets[spark-base,pandas-base]", "delta-spark~=1.2.1"] + geopandas = ["kedro-datasets[geopandas.GeoJSONDataSet]"] -geopandas-geojsondataset = ["geopandas>=0.6.0, <1.0", "pyproj~=3.0"] -hdfs-base = ["hdfs>=2.5.8, <3.0"] +"geopandas.GeoJSONDataSet" = ["geopandas>=0.6.0, <1.0", "pyproj~=3.0"] + holoviews = ["kedro-datasets[holoviews.HoloviewsWriter]"] -holoviews-holoviewswriter = ["holoviews~=1.13.0"] +"holoviews.HoloviewsWriter" = ["holoviews~=1.13.0"] + matplotlib = ["kedro-datasets[matplotlib.MatplotlibWriter]"] -matplotlib-matplotlibwriter = ["matplotlib>=3.0.3, <4.0"] +"matplotlib.MatplotlibWriter" = ["matplotlib>=3.0.3, <4.0"] + networkx = ["kedro-datasets[networkx.NetworkXDataSet]"] -networkx-networkxdataset = ["networkx~=2.4"] +"networkx.NetworkXDataSet" = ["networkx~=2.4"] + pandas = [ """kedro-datasets[\ pandas.CSVDataSet,\ pandas.ExcelDataSet,\ + pandas.DeltaTableDataSet,\ pandas.FeatherDataSet,\ + pandas.GenericDataSet,\ pandas.GBQTableDataSet,\ pandas.GBQQueryDataSet,\ pandas.HDFDataSet,\ @@ -47,81 +64,87 @@ pandas = [ pandas.ParquetDataSet,\ pandas.SQLTableDataSet,\ pandas.SQLQueryDataSet,\ - pandas.XMLDataSet,pandas.GenericDataSet\ + pandas.XMLDataSet\ ]""" ] -pandas-base = ["pandas>=1.3, <3.0"] -pandas-csvdataset = ["kedro-datasets[pandas-base]"] -pandas-exceldataset = ["kedro-datasets[pandas-base]", "openpyxl>=3.0.6, <4.0"] -pandas-deltatabledataset= ["kedro-datasets[pandas-base]", "deltalake>=0.10.0"] -pandas-featherdataset = ["kedro-datasets[pandas-base]"] -pandas-gbqquerydataset = [ +"pandas.CSVDataSet" = ["kedro-datasets[pandas-base]"] +"pandas.ExcelDataSet" = ["kedro-datasets[pandas-base]", "openpyxl>=3.0.6, <4.0"] +"pandas.DeltaTableDataSet" = ["kedro-datasets[pandas-base]", "deltalake>=0.10.0"] +"pandas.FeatherDataSet" = ["kedro-datasets[pandas-base]"] +"pandas.GenericDataSet" = ["kedro-datasets[pandas-base]"] +"pandas.GBQTableDataSet" = [ "kedro-datasets[pandas-base]", "pandas-gbq>=0.12.0, <0.18.0" ] -pandas-gbqtabledataset = [ +"pandas.GBQQueryDataSet" = [ "kedro-datasets[pandas-base]", "pandas-gbq>=0.12.0, <0.18.0" ] -pandas-genericdataset = ["kedro-datasets[pandas-base]"] -pandas-hdfdataset = [ +"pandas.HDFDataSet" = [ "kedro-datasets[pandas-base]", "tables~=3.6.0; platform_system == 'Windows'", "tables~=3.6; platform_system != 'Windows'" ] -pandas-jsondataset = ["kedro-datasets[pandas-base]"] -pandas-parquetdataset = ["kedro-datasets[pandas-base]", "pyarrow>=6.0"] -pandas-sqlquerydataset = [ +"pandas.JSONDataSet" = ["kedro-datasets[pandas-base]"] +"pandas.ParquetDataSet" = ["kedro-datasets[pandas-base]", "pyarrow>=6.0"] +"pandas.SQLTableDataSet" = ["kedro-datasets[pandas-base]", "SQLAlchemy>=1.4, <3.0"] +"pandas.SQLQueryDataSet" = [ "kedro-datasets[pandas-base]", "SQLAlchemy>=1.4, <3.0", "pyodbc~=4.0" ] -pandas-sqltabledataset = ["kedro-datasets[pandas-base]", "SQLAlchemy>=1.4, <3.0"] -pandas-xmldataset = ["kedro-datasets[pandas-base]", "lxml~=4.6"] +"pandas.XMLDataSet" = ["kedro-datasets[pandas-base]", "lxml~=4.6"] + pickle = ["kedro-datasets[pickle.PickleDataSet]"] -pickle-pickledataset = ["compress-pickle[lz4]~=2.1.0"] +"pickle.PickleDataSet" = ["compress-pickle[lz4]~=2.1.0"] + pillow = ["kedro-datasets[pillow.ImageDataSet]"] -pillow-imagedataset = ["Pillow~=9.0"] +"pillow.ImageDataSet" = ["Pillow~=9.0"] + plotly = ["kedro-datasets[plotly.PlotlyDataSet,plotly.JSONDataSet]"] -plotly-base = ["plotly>=4.8.0, <6.0"] -plotly-jsondataset = ["kedro-datasets[plotly-base]"] -plotly-plotlydataset = ["kedro-datasets[pandas-base,plotly-base]"] +"plotly.JSONDataSet" = ["kedro-datasets[plotly-base]"] +"plotly.PlotlyDataSet" = ["kedro-datasets[pandas-base,plotly-base]"] + polars = ["kedro-datasets[polars.CSVDataSet]"] -polars-base = ["polars~=0.17.0"] -polars-csvdataset = ["kedro-datasets[polars-base]"] +"polars.CSVDataSet" = ["polars~=0.17.0"] + redis = ["kedro-datasets[redis.PickleDataSet]"] -redis-pickledataset = ["redis~=4.1"] -s3fs-base = ["s3fs>=0.3.0, <0.5"] +"redis.PickleDataSet" = ["redis~=4.1"] + snowflake = ["kedro-datasets[snowflake.SnowparkTableDataSet]"] -snowflake-snowparktabledataset = [ +"snowflake.SnowparkTableDataSet" = [ "snowflake-snowpark-python~=1.0.0; python_version == '3.8'", "pyarrow~=8.0" ] + spark = [ - "kedro-datasets[spark.SparkDataSet,spark.SparkHiveDataSet,spark.SparkJDBCDataSet,spark.DeltaTableDataSet]" + "kedro-datasets[spark.DeltaTableDataSet,spark.SparkDataSet,spark.SparkHiveDataSet,spark.SparkJDBCDataSet]" ] -spark-base = ["pyspark>=2.2, <4.0"] -spark-deltatabledataset = [ +"spark.DeltaTableDataSet" = [ "kedro-datasets[spark-base,hdfs-base,s3fs-base]", "delta-spark>=1.0, <3.0" ] -spark-sparkdataset = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"] -spark-sparkhivedataset = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"] -spark-sparkjdbcdataset = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"] +"spark.SparkDataSet" = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"] +"spark.SparkHiveDataSet" = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"] +"spark.SparkJDBCDataSet" = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"] + svmlight = ["kedro-datasets[svmlight.SVMLightDataSet]"] -svmlight-svmlightdataset = ["scikit-learn~=1.0.2", "scipy~=1.7.3"] +"svmlight.SVMLightDataSet" = ["scikit-learn~=1.0.2", "scipy~=1.7.3"] + tensorflow = ["kedro-datasets[tensorflow.TensorFlowModelDataSet]"] -tensorflow-tensorflowmodeldataset = [ +"tensorflow.TensorFlowModelDataSet" = [ # currently only TensorFlow V2 supported for saving and loading. # V1 requires HDF5 and serialises differently "tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'", # https://developer.apple.com/metal/tensorflow-plugin/ "tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'" ] + video = ["kedro-datasets[video.VideoDataSet]"] -video-videodataset = ["opencv-python~=4.5.5.64"] +"video.VideoDataSet" = ["opencv-python~=4.5.5.64"] + yaml = ["kedro-datasets[yaml.YAMLDataSet]"] -yaml-yamldataset = ["kedro-datasets[pandas-base]", "PyYAML>=4.2, <7.0"] +"yaml.YAMLDataSet" = ["kedro-datasets[pandas-base]", "PyYAML>=4.2, <7.0"] all = [ """kedro-datasets[\