Skip to content

Commit

Permalink
fix(datasets): Correct pyproject.toml syntax for optional dependencies (
Browse files Browse the repository at this point in the history
kedro-org#302)

* Fix pyproject.toml syntax for optional dependencies

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>

* refacor out the base dependencies

Signed-off-by: Nok <nok.lam.chan@quantumblack.com>

* add comments

Signed-off-by: Nok <nok.lam.chan@quantumblack.com>

* format pyproject.toml

Signed-off-by: Nok <nok.lam.chan@quantumblack.com>

* Reorder pandas dependencies

Signed-off-by: Nok <nok.lam.chan@quantumblack.com>

* reorder spark dependencies

Signed-off-by: Nok <nok.lam.chan@quantumblack.com>

* remove polars-base and delta-base

Signed-off-by: Nok <nok.lam.chan@quantumblack.com>

---------

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>
Signed-off-by: Nok <nok.lam.chan@quantumblack.com>
Co-authored-by: Nok <nok.lam.chan@quantumblack.com>
Signed-off-by: Flavien Lambert <PetitLepton@users.noreply.github.com>
  • Loading branch information
2 people authored and PetitLepton committed Aug 15, 2023
1 parent 39b20a9 commit 90af832
Showing 1 changed file with 68 additions and 45 deletions.
113 changes: 68 additions & 45 deletions kedro-datasets/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,111 +17,134 @@ dependencies = [
dynamic = ["readme", "version"]

[project.optional-dependencies]
# Base dependencies
hdfs-base = ["hdfs>=2.5.8, <3.0"]
pandas-base = ["pandas>=1.3, <3.0"]
plotly-base = ["plotly>=4.8.0, <6.0"]
s3fs-base = ["s3fs>=0.3.0, <0.5"]
spark-base = ["pyspark>=2.2, <4.0"]


# Datasets dependencies
api = ["kedro-datasets[api.APIDataSet]"]
api-apidataset = ["requests~=2.20"]
"api.APIDataSet" = ["requests~=2.20"]

biosequence = ["kedro-datasets[biosequence.BioSequenceDataSet]"]
biosequence-biosequencedataset = ["biopython~=1.73"]
"biosequence.BioSequenceDataSet" = ["biopython~=1.73"]

dask = ["kedro-datasets[dask.ParquetDataSet]"]
dask-parquetdataset = ["dask[complete]>=2021.10", "triad>=0.6.7, <1.0"]
"dask.ParquetDataSet" = ["dask[complete]>=2021.10", "triad>=0.6.7, <1.0"]

databricks = ["kedro-datasets[databricks.ManagedTableDataSet]"]
databricks-managedtabledataset = ["kedro-datasets[spark-base,pandas-base,delta-base]"]
delta-base = ["delta-spark~=1.2.1"]
"databricks.ManagedTableDataSet" = ["kedro-datasets[spark-base,pandas-base]", "delta-spark~=1.2.1"]

geopandas = ["kedro-datasets[geopandas.GeoJSONDataSet]"]
geopandas-geojsondataset = ["geopandas>=0.6.0, <1.0", "pyproj~=3.0"]
hdfs-base = ["hdfs>=2.5.8, <3.0"]
"geopandas.GeoJSONDataSet" = ["geopandas>=0.6.0, <1.0", "pyproj~=3.0"]

holoviews = ["kedro-datasets[holoviews.HoloviewsWriter]"]
holoviews-holoviewswriter = ["holoviews~=1.13.0"]
"holoviews.HoloviewsWriter" = ["holoviews~=1.13.0"]

matplotlib = ["kedro-datasets[matplotlib.MatplotlibWriter]"]
matplotlib-matplotlibwriter = ["matplotlib>=3.0.3, <4.0"]
"matplotlib.MatplotlibWriter" = ["matplotlib>=3.0.3, <4.0"]

networkx = ["kedro-datasets[networkx.NetworkXDataSet]"]
networkx-networkxdataset = ["networkx~=2.4"]
"networkx.NetworkXDataSet" = ["networkx~=2.4"]

pandas = [
"""kedro-datasets[\
pandas.CSVDataSet,\
pandas.ExcelDataSet,\
pandas.DeltaTableDataSet,\
pandas.FeatherDataSet,\
pandas.GenericDataSet,\
pandas.GBQTableDataSet,\
pandas.GBQQueryDataSet,\
pandas.HDFDataSet,\
pandas.JSONDataSet,\
pandas.ParquetDataSet,\
pandas.SQLTableDataSet,\
pandas.SQLQueryDataSet,\
pandas.XMLDataSet,pandas.GenericDataSet\
pandas.XMLDataSet\
]"""
]
pandas-base = ["pandas>=1.3, <3.0"]
pandas-csvdataset = ["kedro-datasets[pandas-base]"]
pandas-exceldataset = ["kedro-datasets[pandas-base]", "openpyxl>=3.0.6, <4.0"]
pandas-deltatabledataset= ["kedro-datasets[pandas-base]", "deltalake>=0.10.0"]
pandas-featherdataset = ["kedro-datasets[pandas-base]"]
pandas-gbqquerydataset = [
"pandas.CSVDataSet" = ["kedro-datasets[pandas-base]"]
"pandas.ExcelDataSet" = ["kedro-datasets[pandas-base]", "openpyxl>=3.0.6, <4.0"]
"pandas.DeltaTableDataSet" = ["kedro-datasets[pandas-base]", "deltalake>=0.10.0"]
"pandas.FeatherDataSet" = ["kedro-datasets[pandas-base]"]
"pandas.GenericDataSet" = ["kedro-datasets[pandas-base]"]
"pandas.GBQTableDataSet" = [
"kedro-datasets[pandas-base]",
"pandas-gbq>=0.12.0, <0.18.0"
]
pandas-gbqtabledataset = [
"pandas.GBQQueryDataSet" = [
"kedro-datasets[pandas-base]",
"pandas-gbq>=0.12.0, <0.18.0"
]
pandas-genericdataset = ["kedro-datasets[pandas-base]"]
pandas-hdfdataset = [
"pandas.HDFDataSet" = [
"kedro-datasets[pandas-base]",
"tables~=3.6.0; platform_system == 'Windows'",
"tables~=3.6; platform_system != 'Windows'"
]
pandas-jsondataset = ["kedro-datasets[pandas-base]"]
pandas-parquetdataset = ["kedro-datasets[pandas-base]", "pyarrow>=6.0"]
pandas-sqlquerydataset = [
"pandas.JSONDataSet" = ["kedro-datasets[pandas-base]"]
"pandas.ParquetDataSet" = ["kedro-datasets[pandas-base]", "pyarrow>=6.0"]
"pandas.SQLTableDataSet" = ["kedro-datasets[pandas-base]", "SQLAlchemy>=1.4, <3.0"]
"pandas.SQLQueryDataSet" = [
"kedro-datasets[pandas-base]",
"SQLAlchemy>=1.4, <3.0",
"pyodbc~=4.0"
]
pandas-sqltabledataset = ["kedro-datasets[pandas-base]", "SQLAlchemy>=1.4, <3.0"]
pandas-xmldataset = ["kedro-datasets[pandas-base]", "lxml~=4.6"]
"pandas.XMLDataSet" = ["kedro-datasets[pandas-base]", "lxml~=4.6"]

pickle = ["kedro-datasets[pickle.PickleDataSet]"]
pickle-pickledataset = ["compress-pickle[lz4]~=2.1.0"]
"pickle.PickleDataSet" = ["compress-pickle[lz4]~=2.1.0"]

pillow = ["kedro-datasets[pillow.ImageDataSet]"]
pillow-imagedataset = ["Pillow~=9.0"]
"pillow.ImageDataSet" = ["Pillow~=9.0"]

plotly = ["kedro-datasets[plotly.PlotlyDataSet,plotly.JSONDataSet]"]
plotly-base = ["plotly>=4.8.0, <6.0"]
plotly-jsondataset = ["kedro-datasets[plotly-base]"]
plotly-plotlydataset = ["kedro-datasets[pandas-base,plotly-base]"]
"plotly.JSONDataSet" = ["kedro-datasets[plotly-base]"]
"plotly.PlotlyDataSet" = ["kedro-datasets[pandas-base,plotly-base]"]

polars = ["kedro-datasets[polars.CSVDataSet]"]
polars-base = ["polars~=0.17.0"]
polars-csvdataset = ["kedro-datasets[polars-base]"]
"polars.CSVDataSet" = ["polars~=0.17.0"]

redis = ["kedro-datasets[redis.PickleDataSet]"]
redis-pickledataset = ["redis~=4.1"]
s3fs-base = ["s3fs>=0.3.0, <0.5"]
"redis.PickleDataSet" = ["redis~=4.1"]

snowflake = ["kedro-datasets[snowflake.SnowparkTableDataSet]"]
snowflake-snowparktabledataset = [
"snowflake.SnowparkTableDataSet" = [
"snowflake-snowpark-python~=1.0.0; python_version == '3.8'",
"pyarrow~=8.0"
]

spark = [
"kedro-datasets[spark.SparkDataSet,spark.SparkHiveDataSet,spark.SparkJDBCDataSet,spark.DeltaTableDataSet]"
"kedro-datasets[spark.DeltaTableDataSet,spark.SparkDataSet,spark.SparkHiveDataSet,spark.SparkJDBCDataSet]"
]
spark-base = ["pyspark>=2.2, <4.0"]
spark-deltatabledataset = [
"spark.DeltaTableDataSet" = [
"kedro-datasets[spark-base,hdfs-base,s3fs-base]",
"delta-spark>=1.0, <3.0"
]
spark-sparkdataset = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"]
spark-sparkhivedataset = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"]
spark-sparkjdbcdataset = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"]
"spark.SparkDataSet" = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"]
"spark.SparkHiveDataSet" = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"]
"spark.SparkJDBCDataSet" = ["kedro-datasets[spark-base,hdfs-base,s3fs-base]"]

svmlight = ["kedro-datasets[svmlight.SVMLightDataSet]"]
svmlight-svmlightdataset = ["scikit-learn~=1.0.2", "scipy~=1.7.3"]
"svmlight.SVMLightDataSet" = ["scikit-learn~=1.0.2", "scipy~=1.7.3"]

tensorflow = ["kedro-datasets[tensorflow.TensorFlowModelDataSet]"]
tensorflow-tensorflowmodeldataset = [
"tensorflow.TensorFlowModelDataSet" = [
# currently only TensorFlow V2 supported for saving and loading.
# V1 requires HDF5 and serialises differently
"tensorflow~=2.0; platform_system != 'Darwin' or platform_machine != 'arm64'",
# https://developer.apple.com/metal/tensorflow-plugin/
"tensorflow-macos~=2.0; platform_system == 'Darwin' and platform_machine == 'arm64'"
]

video = ["kedro-datasets[video.VideoDataSet]"]
video-videodataset = ["opencv-python~=4.5.5.64"]
"video.VideoDataSet" = ["opencv-python~=4.5.5.64"]

yaml = ["kedro-datasets[yaml.YAMLDataSet]"]
yaml-yamldataset = ["kedro-datasets[pandas-base]", "PyYAML>=4.2, <7.0"]
"yaml.YAMLDataSet" = ["kedro-datasets[pandas-base]", "PyYAML>=4.2, <7.0"]

all = [
"""kedro-datasets[\
Expand Down

0 comments on commit 90af832

Please sign in to comment.