diff --git a/ibis/backends/duckdb/registry.py b/ibis/backends/duckdb/registry.py index 49b23b978e0d..a0733196c42a 100644 --- a/ibis/backends/duckdb/registry.py +++ b/ibis/backends/duckdb/registry.py @@ -397,6 +397,16 @@ def _array_remove(t, op): ) +def _hexdigest(translator, op): + how = op.how + + arg_formatted = translator.translate(op.arg) + if how in ("md5", "sha256"): + return getattr(sa.func, how)(arg_formatted) + else: + raise NotImplementedError(how) + + operation_registry.update( { ops.Array: ( @@ -533,6 +543,7 @@ def _array_remove(t, op): ops.MapValues: unary(sa.func.map_values), ops.MapMerge: fixed_arity(sa.func.map_concat, 2), ops.Hash: unary(sa.func.hash), + ops.HexDigest: _hexdigest, ops.Median: reduction(sa.func.median), ops.First: reduction(sa.func.first), ops.Last: reduction(sa.func.last), diff --git a/ibis/backends/pyspark/compiler.py b/ibis/backends/pyspark/compiler.py index 47003a437fb4..f1d5e76b84b6 100644 --- a/ibis/backends/pyspark/compiler.py +++ b/ibis/backends/pyspark/compiler.py @@ -2065,6 +2065,21 @@ def compile_hash_column(t, op, **kwargs): return F.hash(t.translate(op.arg, **kwargs)) +@compiles(ops.HexDigest) +def compile_hexdigest_column(t, op, **kwargs): + how = op.how + arg = t.translate(op.arg, **kwargs) + + if how == "md5": + return F.md5(arg) + elif how == "sha1": + return F.sha1(arg) + elif how in ("sha256", "sha512"): + return F.sha2(arg, int(how[-3:])) + else: + raise NotImplementedError(how) + + @compiles(ops.ArrayZip) def compile_zip(t, op, **kwargs): return F.arrays_zip(*map(partial(t.translate, **kwargs), op.arg))