From 733fffce5050755f860a7a6e975d137149e17731 Mon Sep 17 00:00:00 2001 From: Jim Crist-Harif Date: Fri, 9 Feb 2024 14:11:15 -0600 Subject: [PATCH] feat(api): add a `uuid` function for returning a new uuid --- ibis/backends/bigquery/compiler.py | 1 + ibis/backends/clickhouse/compiler.py | 1 + ibis/backends/duckdb/compiler.py | 1 + ibis/backends/impala/compiler.py | 1 + ibis/backends/postgres/compiler.py | 1 + ibis/backends/risingwave/compiler.py | 1 + ibis/backends/snowflake/compiler.py | 1 + ibis/backends/sqlite/compiler.py | 1 + ibis/backends/sqlite/udf.py | 6 ++++++ ibis/backends/tests/test_uuid.py | 22 ++++++++++++++++++++++ ibis/backends/trino/compiler.py | 1 + ibis/expr/api.py | 20 ++++++++++++++++++++ ibis/expr/operations/generic.py | 5 +++++ ibis/tests/expr/test_uuid.py | 9 +++++++++ 14 files changed, 71 insertions(+) create mode 100644 ibis/tests/expr/test_uuid.py diff --git a/ibis/backends/bigquery/compiler.py b/ibis/backends/bigquery/compiler.py index da2c0de87bfb..da0be2d3d9e8 100644 --- a/ibis/backends/bigquery/compiler.py +++ b/ibis/backends/bigquery/compiler.py @@ -122,6 +122,7 @@ class BigQueryCompiler(SQLGlotCompiler): ops.Levenshtein: "edit_distance", ops.Modulus: "mod", ops.RandomScalar: "rand", + ops.RandomUUID: "generate_uuid", ops.RegexReplace: "regexp_replace", ops.RegexSearch: "regexp_contains", ops.Time: "time", diff --git a/ibis/backends/clickhouse/compiler.py b/ibis/backends/clickhouse/compiler.py index 3ca46e925e58..3201e11ca811 100644 --- a/ibis/backends/clickhouse/compiler.py +++ b/ibis/backends/clickhouse/compiler.py @@ -98,6 +98,7 @@ class ClickHouseCompiler(SQLGlotCompiler): ops.NullIf: "nullIf", ops.RStrip: "trimRight", ops.RandomScalar: "randCanonical", + ops.RandomUUID: "generateUUIDv4", ops.RegexReplace: "replaceRegexpAll", ops.RowNumber: "row_number", ops.StartsWith: "startsWith", diff --git a/ibis/backends/duckdb/compiler.py b/ibis/backends/duckdb/compiler.py index 208993fe8846..06ceeacf7512 100644 --- a/ibis/backends/duckdb/compiler.py +++ b/ibis/backends/duckdb/compiler.py @@ -51,6 +51,7 @@ class DuckDBCompiler(SQLGlotCompiler): ops.MapMerge: "map_concat", ops.MapValues: "map_values", ops.Mode: "mode", + ops.RandomUUID: "uuid", ops.TimeFromHMS: "make_time", ops.TypeOf: "typeof", ops.GeoPoint: "st_point", diff --git a/ibis/backends/impala/compiler.py b/ibis/backends/impala/compiler.py index e5831a152f00..08988386dd45 100644 --- a/ibis/backends/impala/compiler.py +++ b/ibis/backends/impala/compiler.py @@ -81,6 +81,7 @@ class ImpalaCompiler(SQLGlotCompiler): ops.Ln: "ln", ops.Log10: "log10", ops.Log2: "log2", + ops.RandomUUID: "uuid", ops.RStrip: "rtrim", ops.Strip: "trim", ops.TypeOf: "typeof", diff --git a/ibis/backends/postgres/compiler.py b/ibis/backends/postgres/compiler.py index 1284962824a3..a90e56501ea4 100644 --- a/ibis/backends/postgres/compiler.py +++ b/ibis/backends/postgres/compiler.py @@ -94,6 +94,7 @@ class PostgresCompiler(SQLGlotCompiler): ops.MapContains: "exist", ops.MapKeys: "akeys", ops.MapValues: "avals", + ops.RandomUUID: "gen_random_uuid", ops.RegexSearch: "regexp_like", ops.TimeFromHMS: "make_time", } diff --git a/ibis/backends/risingwave/compiler.py b/ibis/backends/risingwave/compiler.py index df34cda256b0..d6d20abe7278 100644 --- a/ibis/backends/risingwave/compiler.py +++ b/ibis/backends/risingwave/compiler.py @@ -24,6 +24,7 @@ class RisingwaveCompiler(PostgresCompiler): ( ops.DateFromYMD, ops.Mode, + ops.RandomUUID, *( op for op in ALL_OPERATIONS diff --git a/ibis/backends/snowflake/compiler.py b/ibis/backends/snowflake/compiler.py index a1e96943583f..814a86d50d85 100644 --- a/ibis/backends/snowflake/compiler.py +++ b/ibis/backends/snowflake/compiler.py @@ -80,6 +80,7 @@ class SnowflakeCompiler(SQLGlotCompiler): ops.Hash: "hash", ops.Median: "median", ops.Mode: "mode", + ops.RandomUUID: "uuid_string", ops.StringToTimestamp: "to_timestamp_tz", ops.TimeFromHMS: "time_from_parts", ops.TimestampFromYMDHMS: "timestamp_from_parts", diff --git a/ibis/backends/sqlite/compiler.py b/ibis/backends/sqlite/compiler.py index bd92b175f79a..3b18581b6ab9 100644 --- a/ibis/backends/sqlite/compiler.py +++ b/ibis/backends/sqlite/compiler.py @@ -106,6 +106,7 @@ class SQLiteCompiler(SQLGlotCompiler): ops.Mode: "_ibis_mode", ops.Time: "time", ops.Date: "date", + ops.RandomUUID: "uuid", } def _aggregate(self, funcname: str, *args, where): diff --git a/ibis/backends/sqlite/udf.py b/ibis/backends/sqlite/udf.py index d4d646dbb95f..15cfdc46b479 100644 --- a/ibis/backends/sqlite/udf.py +++ b/ibis/backends/sqlite/udf.py @@ -8,6 +8,7 @@ from collections import defaultdict from typing import Any, Callable, NamedTuple from urllib.parse import parse_qs, urlsplit +from uuid import uuid4 try: import regex as re @@ -222,6 +223,11 @@ def pi(): return math.pi +@udf(skip_if_exists=True) +def uuid(): + return str(uuid4()) + + # Additional UDFS diff --git a/ibis/backends/tests/test_uuid.py b/ibis/backends/tests/test_uuid.py index 36bf781544ce..a33102f4f1f6 100644 --- a/ibis/backends/tests/test_uuid.py +++ b/ibis/backends/tests/test_uuid.py @@ -38,3 +38,25 @@ def test_uuid_literal(con, backend): with contextlib.suppress(com.OperationNotDefinedError): assert con.execute(expr.typeof()) == UUID_BACKEND_TYPE[backend_name] + + +@pytest.mark.notimpl( + [ + "datafusion", + "druid", + "exasol", + "flink", + "mssql", + "mysql", + "oracle", + "polars", + "pyspark", + "risingwave", + ], + raises=com.OperationNotDefinedError, +) +@pytest.mark.notimpl(["pandas", "dask"], raises=ValueError) +def test_uuid_function(con): + obj = con.execute(ibis.uuid()) + assert isinstance(obj, uuid.UUID) + assert obj.version == 4 diff --git a/ibis/backends/trino/compiler.py b/ibis/backends/trino/compiler.py index e86e578a0d73..473df45f40a6 100644 --- a/ibis/backends/trino/compiler.py +++ b/ibis/backends/trino/compiler.py @@ -89,6 +89,7 @@ class TrinoCompiler(SQLGlotCompiler): ops.ExtractPath: "url_extract_path", ops.ExtractFragment: "url_extract_fragment", ops.ArrayPosition: "array_position", + ops.RandomUUID: "uuid", } def _aggregate(self, funcname: str, *args, where): diff --git a/ibis/expr/api.py b/ibis/expr/api.py index 9410737db0d5..4dd0727347d9 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -167,6 +167,7 @@ "trailing_range_window", "trailing_window", "union", + "uuid", "watermark", "where", "window", @@ -690,6 +691,25 @@ def random() -> ir.FloatingScalar: return ops.RandomScalar().to_expr() +def uuid() -> ir.UUIDScalar: + """Return a random UUID version 4 value. + + Similar to [('uuid.uuid4`) in the Python standard library. + + Examples + -------- + >>> from ibis.interactive import * + >>> ibis.uuid() # doctest: +SKIP + UUID('e57e927b-aed2-483b-9140-dc32a26cad95') + + Returns + ------- + UUIDScalar + Random UUID value expression + """ + return ops.RandomUUID().to_expr() + + @overload def timestamp( value_or_year: int | ir.IntegerValue | Deferred, diff --git a/ibis/expr/operations/generic.py b/ibis/expr/operations/generic.py index a9e8ee45417a..2fb4218cdc59 100644 --- a/ibis/expr/operations/generic.py +++ b/ibis/expr/operations/generic.py @@ -193,6 +193,11 @@ class RandomScalar(Constant): dtype = dt.float64 +@public +class RandomUUID(Constant): + dtype = dt.uuid + + @public class E(Constant): dtype = dt.float64 diff --git a/ibis/tests/expr/test_uuid.py b/ibis/tests/expr/test_uuid.py new file mode 100644 index 000000000000..12cc9907547d --- /dev/null +++ b/ibis/tests/expr/test_uuid.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +import ibis + + +def test_uuid(): + u = ibis.uuid() + assert u.type().is_uuid() + assert isinstance(u.op().shape, ibis.expr.datashape.Scalar)