Skip to content

Commit

Permalink
feat: Implement CountDistinctStar for bigquery
Browse files Browse the repository at this point in the history
  • Loading branch information
ssabdb committed Jul 1, 2024
1 parent 601b13b commit 5188ffd
Showing 1 changed file with 17 additions and 1 deletion.
18 changes: 17 additions & 1 deletion ibis/backends/bigquery/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ class BigQueryCompiler(SQLGlotCompiler):
)

UNSUPPORTED_OPS = (
ops.CountDistinctStar,
ops.DateDiff,
ops.ExtractAuthority,
ops.ExtractUserInfo,
Expand Down Expand Up @@ -645,6 +644,23 @@ def visit_CountStar(self, op, *, arg, where):
return self.f.countif(where)
return self.f.count(STAR)

def visit_CountDistinctStar(self, op, *, where, arg):
# Bigquery does not support count(distinct a,b,c) or count(distinct (a, b, c))
# as expressions must be "groupable":
# https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#group_by_grouping_item
#
# Instead, convert the entire expression to a string
# SELECT COUNT(DISTINCT concat(to_json_string(a), to_json_string(b)))
# This works with an array of datatypes which generates a unique string
# https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_encodings
row = sge.Concat(
expressions=[
self.f.to_json_string(sg.column(x, quoted=self.quoted))
for x in op.arg.schema.keys()
]
)
return self.agg.count(sge.Distinct(expressions=[row]), where=where)

Check warning on line 662 in ibis/backends/bigquery/compiler.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/bigquery/compiler.py#L662

Added line #L662 was not covered by tests

def visit_Degrees(self, op, *, arg):
return self._pudf("degrees", arg)

Expand Down

0 comments on commit 5188ffd

Please sign in to comment.