diff --git a/ibis/backends/sql/compilers/base.py b/ibis/backends/sql/compilers/base.py index c81ad0f22f63..2558a0e65766 100644 --- a/ibis/backends/sql/compilers/base.py +++ b/ibis/backends/sql/compilers/base.py @@ -1637,6 +1637,16 @@ def add_query_to_expr(self, *, name: str, table: ir.Table, query: str) -> str: # generate the SQL string return parsed.sql(dialect) + def _make_sample_backwards_compatible(self, *, sample, parent): + # sample was changed to be owned by the table being sampled in 25.17.0 + # + # this is a small workaround for backwards compatibility + if "this" in sample.__class__.arg_types: + sample.args["this"] = parent + else: + parent.args["sample"] = sample + return sg.select(STAR).from_(parent) + # `__init_subclass__` is uncalled for subclasses - we manually call it here to # autogenerate the base class implementations as well. diff --git a/ibis/backends/sql/compilers/duckdb.py b/ibis/backends/sql/compilers/duckdb.py index fd47a9121fe9..1481510ef37e 100644 --- a/ibis/backends/sql/compilers/duckdb.py +++ b/ibis/backends/sql/compilers/duckdb.py @@ -168,12 +168,12 @@ def visit_Sample( self, op, *, parent, fraction: float, method: str, seed: int | None, **_ ): sample = sge.TableSample( - this=parent, method="bernoulli" if method == "row" else "system", percent=sge.convert(fraction * 100.0), seed=None if seed is None else sge.convert(seed), ) - return sg.select(STAR).from_(sample) + + return self._make_sample_backwards_compatible(sample=sample, parent=parent) def visit_ArraySlice(self, op, *, arg, start, stop): arg_length = self.f.len(arg) diff --git a/ibis/backends/sql/compilers/oracle.py b/ibis/backends/sql/compilers/oracle.py index 20e74766e73e..1f73f5c38828 100644 --- a/ibis/backends/sql/compilers/oracle.py +++ b/ibis/backends/sql/compilers/oracle.py @@ -1,5 +1,7 @@ from __future__ import annotations +import string + import sqlglot as sg import sqlglot.expressions as sge import toolz @@ -490,5 +492,11 @@ def visit_DateDelta(self, op, *, left, right, part): ) return left - right + def visit_RStrip(self, op, *, arg): + return self.f.anon.rtrim(arg, string.whitespace) + + def visit_LStrip(self, op, *, arg): + return self.f.anon.ltrim(arg, string.whitespace) + compiler = OracleCompiler() diff --git a/ibis/backends/sql/compilers/pyspark.py b/ibis/backends/sql/compilers/pyspark.py index 8a1985b17b7b..8db4d1542632 100644 --- a/ibis/backends/sql/compilers/pyspark.py +++ b/ibis/backends/sql/compilers/pyspark.py @@ -334,11 +334,8 @@ def visit_Sample( raise com.UnsupportedOperationError( "PySpark backend does not support sampling with seed." ) - sample = sge.TableSample( - this=parent, - percent=sge.convert(fraction * 100.0), - ) - return sg.select(STAR).from_(sample) + sample = sge.TableSample(percent=sge.convert(int(fraction * 100.0))) + return self._make_sample_backwards_compatible(sample=sample, parent=parent) def visit_WindowBoundary(self, op, *, value, preceding): if isinstance(op.value, ops.Literal) and op.value.value == 0: diff --git a/ibis/backends/sql/compilers/snowflake.py b/ibis/backends/sql/compilers/snowflake.py index ba404ab4957a..c6e49cbba5f6 100644 --- a/ibis/backends/sql/compilers/snowflake.py +++ b/ibis/backends/sql/compilers/snowflake.py @@ -761,12 +761,11 @@ def visit_Sample( self, op, *, parent, fraction: float, method: str, seed: int | None, **_ ): sample = sge.TableSample( - this=parent, method="bernoulli" if method == "row" else "system", percent=sge.convert(fraction * 100.0), seed=None if seed is None else sge.convert(seed), ) - return sg.select(STAR).from_(sample) + return self._make_sample_backwards_compatible(sample=sample, parent=parent) def visit_ArrayMap(self, op, *, arg, param, body): return self.f.transform(arg, sge.Lambda(this=body, expressions=[param])) diff --git a/ibis/backends/sql/compilers/trino.py b/ibis/backends/sql/compilers/trino.py index 93e4b6d2cd9d..db3199830d48 100644 --- a/ibis/backends/sql/compilers/trino.py +++ b/ibis/backends/sql/compilers/trino.py @@ -108,17 +108,16 @@ def _minimize_spec(start, end, spec): def visit_Sample( self, op, *, parent, fraction: float, method: str, seed: int | None, **_ ): - if op.seed is not None: + if seed is not None: raise com.UnsupportedOperationError( "`Table.sample` with a random seed is unsupported" ) sample = sge.TableSample( - this=parent, method="bernoulli" if method == "row" else "system", percent=sge.convert(fraction * 100.0), seed=None if seed is None else sge.convert(seed), ) - return sg.select(STAR).from_(sample) + return self._make_sample_backwards_compatible(sample=sample, parent=parent) def visit_Correlation(self, op, *, left, right, how, where): if how == "sample": diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 52e514ef8bbd..e0320aef8d6f 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -2196,6 +2196,7 @@ def test_dynamic_table_slice_with_computed_offset(backend): ), ], ) +@pytest.mark.xfail_version(pyspark=["sqlglot==25.17.0"]) def test_sample(backend, method): t = backend.functional_alltypes.filter(_.int_col >= 2) diff --git a/poetry.lock b/poetry.lock index 30fac77108bd..126158d854e4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6892,13 +6892,13 @@ sqlcipher = ["sqlcipher3_binary"] [[package]] name = "sqlglot" -version = "25.16.1" +version = "25.17.0" description = "An easily customizable SQL parser and transpiler" optional = false python-versions = ">=3.7" files = [ - {file = "sqlglot-25.16.1-py3-none-any.whl", hash = "sha256:e0511238b71304437441026eefdb8a913412bc067b3a7f224718dc3af721e6b2"}, - {file = "sqlglot-25.16.1.tar.gz", hash = "sha256:8198686415013e4726a30ab8982725eef66920ec65ee49fd72c225f8a299d2fa"}, + {file = "sqlglot-25.17.0-py3-none-any.whl", hash = "sha256:8580475f4ee27032ad00b366b8a1967f3630f119a07ed6da92653adcba7ba731"}, + {file = "sqlglot-25.17.0.tar.gz", hash = "sha256:91f3741f815a5e1d1dd157a428268af3eda43632dad56790d5c547be1c0491d0"}, ] [package.extras] @@ -7900,4 +7900,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "bc5da5a4e0a7840fd31c6bff7f515596cfbf73090520021402121f6f6aece563" +content-hash = "503416bc3a5f652fe5c0db7f0f44e66cf173c93a1e567c0b00628c550f686b90" diff --git a/pyproject.toml b/pyproject.toml index 2828bab57b91..54060b60c309 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ atpublic = ">=2.3,<6" parsy = ">=2,<3" python-dateutil = ">=2.8.2,<3" pytz = ">=2022.7" -sqlglot = ">=23.4,<25.17" +sqlglot = ">=23.4,<25.18" toolz = ">=0.11,<1" typing-extensions = ">=4.3.0,<5" numpy = { version = ">=1.23.2,<3", optional = true } diff --git a/requirements-dev.txt b/requirements-dev.txt index 3bf62f2ff283..cc39e597330f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -256,7 +256,7 @@ sortedcontainers==2.4.0 ; python_version >= "3.10" and python_version < "4.0" soupsieve==2.6 ; python_version >= "3.10" and python_version < "3.13" sphobjinv==2.3.1.1 ; python_version >= "3.10" and python_version < "3.13" sqlalchemy==2.0.32 ; python_version >= "3.10" and python_version < "3.13" -sqlglot==25.16.1 ; python_version >= "3.10" and python_version < "4.0" +sqlglot==25.17.0 ; python_version >= "3.10" and python_version < "4.0" stack-data==0.6.3 ; python_version >= "3.10" and python_version < "4.0" statsmodels==0.14.2 ; python_version >= "3.10" and python_version < "3.13" tabulate==0.9.0 ; python_version >= "3.10" and python_version < "3.13"