Skip to content

Commit

Permalink
feat(bigquery): implement CountDistinctStar (#9470)
Browse files Browse the repository at this point in the history
Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
  • Loading branch information
ssabdb and cpcloud authored Jul 1, 2024
1 parent d2dff68 commit 273e4bc
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
20 changes: 19 additions & 1 deletion ibis/backends/bigquery/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ class BigQueryCompiler(SQLGlotCompiler):
)

UNSUPPORTED_OPS = (
ops.CountDistinctStar,
ops.DateDiff,
ops.ExtractAuthority,
ops.ExtractUserInfo,
Expand Down Expand Up @@ -645,6 +644,25 @@ def visit_CountStar(self, op, *, arg, where):
return self.f.countif(where)
return self.f.count(STAR)

def visit_CountDistinctStar(self, op, *, where, arg):
# Bigquery does not support count(distinct a,b,c) or count(distinct (a, b, c))
# as expressions must be "groupable":
# https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#group_by_grouping_item
#
# Instead, convert the entire expression to a string
# SELECT COUNT(DISTINCT concat(to_json_string(a), to_json_string(b)))
# This works with an array of datatypes which generates a unique string
# https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_encodings
row = sge.Concat(
expressions=[
self.f.to_json_string(sg.column(x, quoted=self.quoted))
for x in op.arg.schema.keys()
]
)
if where is not None:
row = self.if_(where, row, NULL)
return self.f.count(sge.Distinct(expressions=[row]))

def visit_Degrees(self, op, *, arg):
return self._pudf("degrees", arg)

Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/tests/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ def test_arbitrary(backend, alltypes, df, filtered):
],
)
@pytest.mark.notyet(
["bigquery", "druid", "mssql", "oracle", "sqlite", "flink"],
["druid", "mssql", "oracle", "sqlite", "flink"],
raises=(
OracleDatabaseError,
com.UnsupportedOperationError,
Expand Down

0 comments on commit 273e4bc

Please sign in to comment.