Skip to content

Commit

Permalink
fix(snowflake): only compile sample to TABLESAMPLE on physical ta…
Browse files Browse the repository at this point in the history
…bles
  • Loading branch information
jcrist committed Sep 25, 2024
1 parent 321a3b5 commit 927865e
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 19 deletions.
7 changes: 6 additions & 1 deletion ibis/backends/sql/compilers/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,12 @@ class SnowflakeCompiler(SQLGlotCompiler):
LOWERED_OPS = {
ops.Log2: lower_log2,
ops.Log10: lower_log10,
ops.Sample: lower_sample(),
# Snowflake's TABLESAMPLE _can_ work on subqueries, but only by row and without
# a seed. This is effectively the same as `t.filter(random() <= fraction)`, and
# using TABLESAMPLE here would almost certainly have no benefit over the filter
# version in the optimized physical plan. To avoid a special case just for
# snowflake, we only use TABLESAMPLE on physical tables.
ops.Sample: lower_sample(physical_tables_only=True),
}

UNSUPPORTED_OPS = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ FROM (
FROM "test" AS "t0"
WHERE
"t0"."x" > 10
) AS "t1" TABLESAMPLE system (50.0)
) AS "t1"
WHERE
UNIFORM(TO_DOUBLE(0.0), TO_DOUBLE(1.0), RANDOM()) <= 0.5
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ FROM (
FROM "test" AS "t0"
WHERE
"t0"."x" > 10
) AS "t1" TABLESAMPLE bernoulli (50.0)
) AS "t1"
WHERE
UNIFORM(TO_DOUBLE(0.0), TO_DOUBLE(1.0), RANDOM()) <= 0.5
17 changes: 1 addition & 16 deletions ibis/backends/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2125,22 +2125,7 @@ def test_dynamic_table_slice_with_computed_offset(backend):


@pytest.mark.notimpl(["druid", "risingwave"], raises=com.OperationNotDefinedError)
@pytest.mark.parametrize(
"method",
[
"row",
param(
"block",
marks=[
pytest.mark.notimpl(
["snowflake"],
raises=SnowflakeProgrammingError,
reason="SAMPLE clause on views only supports row wise sampling without seed.",
)
],
),
],
)
@pytest.mark.parametrize("method", ["row", "block"])
@pytest.mark.parametrize("subquery", [True, False], ids=["subquery", "table"])
@pytest.mark.xfail_version(pyspark=["sqlglot==25.17.0"])
def test_sample(backend, method, alltypes, subquery):
Expand Down

0 comments on commit 927865e

Please sign in to comment.