Skip to content

Commit

Permalink
feat(pyspark): implement Distinct for pyspark
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Apr 9, 2022
1 parent 3f48cb8 commit 4306ad9
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 2 deletions.
6 changes: 6 additions & 0 deletions ibis/backends/pyspark/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1176,6 +1176,12 @@ def compile_join(t, expr, scope, timecontext, *, how):
return left_df.join(right_df, pred_columns, how)


@compiles(ops.Distinct)
def compile_distinct(t, expr, scope, timecontext):
op = expr.op()
return t.translate(op.table, scope, timecontext).distinct()


def _canonicalize_interval(t, interval, scope, timecontext, **kwargs):
"""Convert interval to integer timestamp of second
Expand Down
2 changes: 1 addition & 1 deletion ibis/tests/sql/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_table_distinct(con):

def test_column_distinct(con):
t = con.table('functional_alltypes')
expr = t[["string_col"]].distinct()
expr = t[t.string_col].distinct()

result = Compiler.to_sql(expr)
expected = """SELECT DISTINCT `string_col`
Expand Down
2 changes: 1 addition & 1 deletion ibis/tests/sql/test_sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ def test_not_exists(con, not_exists):
).distinct(),
),
(
lambda t: t[["string_col"]].distinct(),
lambda t: t[t.string_col].distinct(),
lambda sat: sa.select([sat.c.string_col.distinct()]),
),
(
Expand Down

0 comments on commit 4306ad9

Please sign in to comment.