From 77aaecd9d2056a4ef4c8d09aa339a9c22b352e01 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 31 May 2024 10:53:44 -0400 Subject: [PATCH] fix(deps): update dependency datafusion to v38 (#9278) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com> --- ibis/backends/datafusion/__init__.py | 4 ++-- ibis/backends/datafusion/tests/test_udf.py | 7 ++++++- ibis/backends/tests/test_aggregation.py | 5 +++++ ibis/backends/tests/test_generic.py | 3 +++ poetry.lock | 14 +++++++------- pyproject.toml | 2 +- requirements-dev.txt | 2 +- 7 files changed, 25 insertions(+), 12 deletions(-) diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index 330ea46685a0..37851690ac06 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -503,8 +503,8 @@ def read_delta( ) delta_table = DeltaTable(source_table, **kwargs) - - return self.register(delta_table.to_pyarrow_dataset(), table_name=table_name) + self.con.register_dataset(table_name, delta_table.to_pyarrow_dataset()) + return self.table(table_name) def to_pyarrow_batches( self, diff --git a/ibis/backends/datafusion/tests/test_udf.py b/ibis/backends/datafusion/tests/test_udf.py index ef6fbe8c8614..f6e79624fba2 100644 --- a/ibis/backends/datafusion/tests/test_udf.py +++ b/ibis/backends/datafusion/tests/test_udf.py @@ -2,13 +2,14 @@ import pandas.testing as tm import pytest +from packaging.version import parse as vparse import ibis.expr.datatypes as dt import ibis.expr.types as ir from ibis import udf from ibis.legacy.udf.vectorized import elementwise, reduction -pytest.importorskip("datafusion") +datafusion = pytest.importorskip("datafusion") pc = pytest.importorskip("pyarrow.compute") with pytest.warns(FutureWarning, match="v9.0"): @@ -68,6 +69,10 @@ def median(a: float) -> float: assert result == con.tables.batting.G.execute().median() +@pytest.mark.xfail( + condition=vparse(datafusion.__version__) == vparse("38.0.1"), + reason="internal error about MEDIAN(G) naming", +) def test_builtin_agg_udf_filtered(con): @udf.agg.builtin def median(a: float, where: bool = True) -> float: diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 79ddf65c24bc..c6d926c892a7 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -892,6 +892,11 @@ def test_quantile( raises=PsycoPg2InternalError, reason="function covar_pop(integer, integer) does not exist", ), + pytest.mark.xfail_version( + datafusion=["datafusion==38.0.1"], + reason="datafusion FILTER syntax seems broken", + strict=False, # passes with no filter condition + ), ], ), param( diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 80c6f9b4ddd0..389345de868d 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1282,6 +1282,9 @@ def test_pivot_longer(backend): assert len(res.execute()) == len(expected) +@pytest.mark.xfail_version( + datafusion=["datafusion==38.0.1"], reason="internal error about MEDIAN(G) naming" +) def test_pivot_wider(backend): diamonds = backend.diamonds expr = ( diff --git a/poetry.lock b/poetry.lock index a661a02698c9..446a3a228550 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1387,16 +1387,16 @@ test = ["pandas[test]", "pre-commit", "pytest", "pytest-cov", "pytest-rerunfailu [[package]] name = "datafusion" -version = "37.1.0" +version = "38.0.1" description = "Build and run queries against data" optional = true python-versions = ">=3.6" files = [ - {file = "datafusion-37.1.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:6304e0e24631798ce25e642e6da1f1b0518e0fa2fe85ef67b1de7c157154706c"}, - {file = "datafusion-37.1.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8888f94e27bdd2b7dbe0941b1c0b9fe6a518d6da361dc7a269544633e614be91"}, - {file = "datafusion-37.1.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8cac79595d1d0201ac5c5bedbe4afe003ed8c91f584432b7bebfa232951a0c00"}, - {file = "datafusion-37.1.0-cp38-abi3-win_amd64.whl", hash = "sha256:033ad0ffd4f0ff7671bbe7f8d1118f7c5bc95b0ee4855ab59c5824807b824c1a"}, - {file = "datafusion-37.1.0.tar.gz", hash = "sha256:2fcdfbfe5cc9b6f68c5fc78c16178ac0812fa9e0a8c3fc55e9de289ef9822c22"}, + {file = "datafusion-38.0.1-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:bbff9ce713586307286688cc0cdb5fec30d542580bf701105422a159f1142e19"}, + {file = "datafusion-38.0.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c7eba54e866fd1f85cb83c9300f464c2327e5c54fb9f6f40ccc4bda53e8ad74"}, + {file = "datafusion-38.0.1-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:bd1ccac3c16822e0c9956e00863328de2f15eae40b096256d349e070769892cc"}, + {file = "datafusion-38.0.1-cp38-abi3-win_amd64.whl", hash = "sha256:acded6a4c892a6ec654fe426117a998aa5697b5b41fadc370bd78ecc82efd8f9"}, + {file = "datafusion-38.0.1.tar.gz", hash = "sha256:d117c1670db39e15f66a4181a5cbc44b268ba5306fe97825aa2fbda6397580f2"}, ] [package.dependencies] @@ -8089,4 +8089,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "4c204b7ba73fd9a0894e8e6ea16cd55084351654217b34c15d9d2ad450bc2e40" +content-hash = "2d0bfc10efa3b7d3ef35cdbe33234fbffb8b47f941998f297aa9271524045a74" diff --git a/pyproject.toml b/pyproject.toml index 13b48017840a..ddfb9427c6d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,7 @@ dask = { version = ">=2022.9.1,<2024.3.0", optional = true, extras = [ "array", "dataframe", ] } -datafusion = { version = ">=0.6,<38", optional = true } +datafusion = { version = ">=0.6,<39", optional = true } db-dtypes = { version = ">=0.3,<2", optional = true } deltalake = { version = ">=0.9.0,<1", optional = true } duckdb = { version = ">=0.8.1,<1", optional = true } diff --git a/requirements-dev.txt b/requirements-dev.txt index ef895dffc668..03a64276181a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -48,7 +48,7 @@ crashtest==0.4.1 ; python_version >= "3.10" and python_version < "4.0" cryptography==42.0.7 ; python_version >= "3.10" and python_version < "4.0" cycler==0.12.1 ; python_version >= "3.10" and python_version < "3.13" dask[array,dataframe]==2024.2.1 ; python_version >= "3.10" and python_version < "4.0" -datafusion==37.1.0 ; python_version >= "3.10" and python_version < "4.0" +datafusion==38.0.1 ; python_version >= "3.10" and python_version < "4.0" db-dtypes==1.2.0 ; python_version >= "3.10" and python_version < "4.0" debugpy==1.8.1 ; python_version >= "3.10" and python_version < "3.13" decorator==5.1.1 ; python_version >= "3.10" and python_version < "4.0"