Skip to content

Commit

Permalink
fix(clickhouse): workaround EXCEPT and INTERSECT generation in sq…
Browse files Browse the repository at this point in the history
…lglot; add tpcds query 87 (#9959)

Add query 87 and workaround a bug in sqlglot (fixed upstream in
tobymao/sqlglot#4007)
  • Loading branch information
cpcloud authored Aug 30, 2024
1 parent 4c136d8 commit 910b8f5
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 13 deletions.
25 changes: 18 additions & 7 deletions ibis/backends/sql/dialects.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from sqlglot import transforms
from sqlglot.dialects import (
TSQL,
ClickHouse,
Hive,
MySQL,
Oracle,
Expand All @@ -19,15 +18,27 @@
SQLite,
Trino,
)
from sqlglot.dialects import ClickHouse as _ClickHouse
from sqlglot.dialects.dialect import rename_func
from sqlglot.helper import find_new_name, seq_get

ClickHouse.Generator.TRANSFORMS |= {
sge.ArraySize: rename_func("length"),
sge.ArraySort: rename_func("arraySort"),
sge.LogicalAnd: rename_func("min"),
sge.LogicalOr: rename_func("max"),
}

class ClickHouse(_ClickHouse):
class Generator(_ClickHouse.Generator):
_ClickHouse.Generator.TRANSFORMS |= {
sge.ArraySize: rename_func("length"),
sge.ArraySort: rename_func("arraySort"),
sge.LogicalAnd: rename_func("min"),
sge.LogicalOr: rename_func("max"),
}

def except_op(self, expression: sge.Except) -> str:
return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}"

def intersect_op(self, expression: sge.Intersect) -> str:
return (
f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}"
)


class DataFusion(Postgres):
Expand Down
35 changes: 30 additions & 5 deletions ibis/backends/tests/tpc/ds/test_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1912,11 +1912,6 @@ def test_37(item, inventory, date_dim, catalog_sales):


@tpc_test("ds")
@pytest.mark.notyet(
["clickhouse"],
raises=AssertionError,
reason="clickhouse returns an incorrect result for this query",
)
def test_38(store_sales, catalog_sales, web_sales, date_dim, customer):
dates = date_dim.filter(_.d_month_seq.between(1200, 1200 + 11))
columns = "c_last_name", "c_first_name", "d_date"
Expand Down Expand Up @@ -4358,6 +4353,36 @@ def test_86(web_sales, date_dim, item):
raise NotImplementedError()


@tpc_test("ds")
def test_87(store_sales, date_dim, customer, catalog_sales, web_sales):
def cust(sales, sold_date_sk, customer_sk):
return (
sales.join(date_dim, [(sold_date_sk, "d_date_sk")])
.join(customer, [(customer_sk, "c_customer_sk")])
.filter(_.d_month_seq.between(1200, 1200 + 11))
.select(_.c_last_name, _.c_first_name, _.d_date)
.distinct()
)

return ibis.difference(
cust(
store_sales,
sold_date_sk="ss_sold_date_sk",
customer_sk="ss_customer_sk",
),
cust(
catalog_sales,
sold_date_sk="cs_sold_date_sk",
customer_sk="cs_bill_customer_sk",
),
cust(
web_sales,
sold_date_sk="ws_sold_date_sk",
customer_sk="ws_bill_customer_sk",
),
).agg(num_cool=_.count())


@tpc_test("ds")
def test_89(item, store_sales, date_dim, store):
return (
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/tests/tpc/queries/duckdb/ds/87.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SELECT count(*)
SELECT count(*) num_cool
FROM ((SELECT DISTINCT c_last_name,
c_first_name,
d_date
Expand Down

0 comments on commit 910b8f5

Please sign in to comment.