Skip to content

Commit

Permalink
fix: convert the uint64's from some backends' hash() to the desired i…
Browse files Browse the repository at this point in the history
…nt64

fixes #9135
  • Loading branch information
NickCrews authored and jcrist committed May 16, 2024
1 parent 77d6cb6 commit 900ecca
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 9 deletions.
2 changes: 1 addition & 1 deletion ibis/backends/clickhouse/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def visit_Sign(self, op, *, arg):
return self.f.intDivOrZero(arg, self.f.abs(arg))

def visit_Hash(self, op, *, arg):
return self.f.sipHash64(arg)
return self.f.reinterpretAsInt64(self.f.sipHash64(arg))

def visit_HashBytes(self, op, *, arg, how):
supported_algorithms = {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
sipHash64("t0"."string_col") AS "Hash(string_col)"
reinterpretAsInt64(sipHash64("t0"."string_col")) AS "Hash(string_col)"
FROM "functional_alltypes" AS "t0"
9 changes: 8 additions & 1 deletion ibis/backends/duckdb/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ class DuckDBCompiler(SQLGlotCompiler):
ops.BitXor: "bit_xor",
ops.EndsWith: "suffix",
ops.ExtractIsoYear: "isoyear",
ops.Hash: "hash",
ops.IntegerRange: "range",
ops.TimestampRange: "range",
ops.MapLength: "cardinality",
Expand Down Expand Up @@ -467,6 +466,14 @@ def visit_HexDigest(self, op, *, arg, how):
else:
raise NotImplementedError(f"No available hashing function for {how}")

def visit_Hash(self, op, *, arg):
# duckdb's hash() returns a uint64, but ops.Hash is supposed to be int64
# So do HASH(x)::BITSTRING::BIGINT
raw = self.f.hash(arg)
bitstring = sg.cast(sge.convert(raw), to=sge.DataType.Type.BIT, copy=False)
int64 = sg.cast(bitstring, to=sge.DataType.Type.BIGINT, copy=False)
return int64

def visit_StringConcat(self, op, *, arg):
return reduce(lambda x, y: sge.DPipe(this=x, expression=y), arg)

Expand Down
3 changes: 2 additions & 1 deletion ibis/backends/polars/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1206,7 +1206,8 @@ def execute_union(op, **kw):

@translate.register(ops.Hash)
def execute_hash(op, **kw):
return translate(op.arg, **kw).hash()
# polars' hash() returns a uint64, but we want to return an int64
return translate(op.arg, **kw).hash().reinterpret(signed=True)


def _arg_min_max(op, func, **kw):
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/sql/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def to_ibis(cls, typ: sge.DataType, nullable: bool | None = None) -> dt.DataType

@classmethod
def from_ibis(cls, dtype: dt.DataType) -> sge.DataType:
"""Convert a sqlglot type to an ibis type."""
"""Convert an Ibis dtype to an sqlglot dtype."""

if method := getattr(cls, f"_from_ibis_{dtype.name}", None):
return method(dtype)
Expand Down
11 changes: 7 additions & 4 deletions ibis/backends/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1439,11 +1439,14 @@ def test_distinct_on_keep_is_none(backend, on):
"trino", # checksum returns varbinary
]
)
def test_hash_consistent(backend, alltypes):
h1 = alltypes.string_col.hash().execute(limit=10)
h2 = alltypes.string_col.hash().execute(limit=10)
def test_hash(backend, alltypes):
# check that multiple executions return the same result
h1 = alltypes.string_col.hash().execute(limit=20)
h2 = alltypes.string_col.hash().execute(limit=20)
backend.assert_series_equal(h1, h2)
assert h1.dtype in ("i8", "uint64") # polars likes returning uint64 for this
# check that the result is a signed 64-bit integer, no nulls
assert h1.dtype == "i8"
assert h1.notnull().all()


@pytest.mark.notimpl(["trino", "oracle", "exasol", "snowflake"])
Expand Down

0 comments on commit 900ecca

Please sign in to comment.