Skip to content

Commit

Permalink
feat: Implement CountDistinctStar for bigquery
Browse files Browse the repository at this point in the history
  • Loading branch information
ssabdb committed Jun 28, 2024
1 parent 601b13b commit 5d35f2d
Showing 1 changed file with 14 additions and 1 deletion.
15 changes: 14 additions & 1 deletion ibis/backends/bigquery/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ class BigQueryCompiler(SQLGlotCompiler):
)

UNSUPPORTED_OPS = (
ops.CountDistinctStar,
ops.DateDiff,
ops.ExtractAuthority,
ops.ExtractUserInfo,
Expand Down Expand Up @@ -645,6 +644,20 @@ def visit_CountStar(self, op, *, arg, where):
return self.f.countif(where)
return self.f.count(STAR)

def visit_CountDistinctStar(self, op, *, where, arg):
# Bigquery does not support count(distinct a,b,c) or count(distinct (a, b, c))
# as expressions must be "groupable":
# https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#group_by_grouping_item
#
# Instead, convert the entire expression to a string
# SELECT COUNT(DISTINCT array_to_string([to_json_string(a), to_json_string(b)], ''))
# This works with an array of datatypes which generates a unique string
# https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_encodings
row = self.f.ARRAY_TO_STRING(sge.Array(
expressions=[self.f.to_json_string(sg.column(x, quoted=self.quoted)) for x in op.arg.schema.keys()]
), '')
return self.agg.count(sge.Distinct(expressions=[row]), where=where)

Check warning on line 659 in ibis/backends/bigquery/compiler.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/bigquery/compiler.py#L659

Added line #L659 was not covered by tests

def visit_Degrees(self, op, *, arg):
return self._pudf("degrees", arg)

Expand Down

0 comments on commit 5d35f2d

Please sign in to comment.