From 8db686ee1785ade7657cbfb98ce0d562cb77720a Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Thu, 2 May 2024 11:50:30 -0800 Subject: [PATCH] feat(api): support `type` arg to ibis.null() --- ibis/backends/pandas/kernels.py | 24 +++++++++++++++--- ibis/backends/tests/test_generic.py | 39 ++++++++++++++++++++++++++--- ibis/expr/api.py | 18 +++++++++++-- ibis/expr/types/generic.py | 20 ++++++++++++--- 4 files changed, 88 insertions(+), 13 deletions(-) diff --git a/ibis/backends/pandas/kernels.py b/ibis/backends/pandas/kernels.py index bc1c9fd917b7..5af718c804f3 100644 --- a/ibis/backends/pandas/kernels.py +++ b/ibis/backends/pandas/kernels.py @@ -282,7 +282,8 @@ def arbitrary(arg): ops.ArrayCollect: lambda x: x.tolist(), } -generic = { + +_generic = { ops.Abs: abs, ops.Acos: np.arccos, ops.Add: operator.add, @@ -315,8 +316,6 @@ def arbitrary(arg): ops.IntervalFloorDivide: operator.floordiv, ops.IntervalMultiply: operator.mul, ops.IntervalSubtract: operator.sub, - ops.IsInf: np.isinf, - ops.IsNull: pd.isnull, ops.Less: operator.lt, ops.LessEqual: operator.le, ops.Ln: np.log, @@ -327,7 +326,6 @@ def arbitrary(arg): ops.Negate: lambda x: not x if isinstance(x, (bool, np.bool_)) else -x, ops.Not: lambda x: not x if isinstance(x, (bool, np.bool_)) else ~x, ops.NotEquals: operator.ne, - ops.NotNull: pd.notnull, ops.Or: operator.or_, ops.Power: operator.pow, ops.Radians: np.radians, @@ -349,6 +347,24 @@ def arbitrary(arg): ops.Log: lambda x, base: np.log(x) if base is None else np.log(x) / np.log(base), } + +def none_proof(func): + def wrapper(*args, **kwargs): + if any(map(isnull, args)): + return None + return func(*args, **kwargs) + + return wrapper + + +generic = { + **{k: none_proof(v) for k, v in _generic.items()}, + ops.IsNull: pd.isnull, + ops.NotNull: pd.notnull, + ops.IsInf: np.isinf, +} + + columnwise = { ops.Clip: lambda df: df["arg"].clip(lower=df["lower"], upper=df["upper"]), ops.IfElse: lambda df: df["true_expr"].where( diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 00f7369f2f50..e0b5e8158ffd 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -53,13 +53,32 @@ @pytest.mark.notyet(["flink"], "The runtime does not support untyped `NULL` values.") def test_null_literal(con, backend): expr = ibis.null() - result = con.execute(expr) - assert pd.isna(result) + assert pd.isna(con.execute(expr)) with contextlib.suppress(com.OperationNotDefinedError): backend_name = backend.name() assert con.execute(expr.typeof()) == NULL_BACKEND_TYPES[backend_name] + with pytest.raises(AttributeError): + expr.upper() + with pytest.raises(AttributeError): + expr.cast(str).max() + assert pd.isna(con.execute(expr.cast(str).upper())) + + +@pytest.mark.broken( + "mssql", + reason="https://github.com/ibis-project/ibis/issues/9109", + raises=AssertionError, +) +def test_null_literal_typed(con, backend): + expr = ibis.null(bool) + assert pd.isna(con.execute(expr)) + assert pd.isna(con.execute(expr.negate())) + assert pd.isna(con.execute(expr.cast(str).upper())) + with pytest.raises(AttributeError): + expr.upper() + BOOLEAN_BACKEND_TYPE = { "bigquery": "BOOL", @@ -75,6 +94,19 @@ def test_null_literal(con, backend): } +def test_null_literal_typed_typeof(con, backend): + expr = ibis.null(bool) + TYPES = { + **BOOLEAN_BACKEND_TYPE, + "clickhouse": "Nullable(Bool)", + "flink": "BOOLEAN", + "sqlite": "null", # in sqlite, typeof(x) is determined by the VALUE of x at runtime, not it's static type + } + + with contextlib.suppress(com.OperationNotDefinedError): + assert con.execute(expr.typeof()) == TYPES[backend.name()] + + def test_boolean_literal(con, backend): expr = ibis.literal(False, type=dt.boolean) result = con.execute(expr) @@ -82,8 +114,7 @@ def test_boolean_literal(con, backend): assert type(result) in (np.bool_, bool) with contextlib.suppress(com.OperationNotDefinedError): - backend_name = backend.name() - assert con.execute(expr.typeof()) == BOOLEAN_BACKEND_TYPE[backend_name] + assert con.execute(expr.typeof()) == BOOLEAN_BACKEND_TYPE[backend.name()] @pytest.mark.parametrize( diff --git a/ibis/expr/api.py b/ibis/expr/api.py index 2dd766ac2bc8..f99b99b86ac0 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -200,11 +200,25 @@ NA = null() """The NULL scalar. +This is an untyped NULL. If you want a typed NULL, use eg `ibis.null(str)`. + Examples -------- >>> import ibis ->>> my_null = ibis.NA ->>> my_null.isnull() +>>> assert ibis.NA.execute() is None +>>> ibis.NA.isnull().execute() +True + +datatype-specific methods aren't available on `NA`: + +>>> ibis.NA.upper().execute() is None # quartodoc: +EXPECTED_FAILURE +Traceback (most recent call last): + ... +AttributeError: 'NullScalar' object has no attribute 'upper' + +Instead, use the typed `ibis.null`: + +>>> ibis.null(str).upper().execute() is None True """ diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py index 367d6031a62e..51b6603b41cd 100644 --- a/ibis/expr/types/generic.py +++ b/ibis/expr/types/generic.py @@ -2227,9 +2227,23 @@ class NullColumn(Column, NullValue): @public -def null(): - """Create a NULL/NA scalar.""" - return ops.NULL.to_expr() +def null(type: dt.DataType | str | None = None) -> Value: + """Create a NULL/NA scalar. + + By default, the type will be NULLTYPE. This is castable and comparable to any type, + but lacks datatype-specific methods: + + >>> import ibis + >>> ibis.null().upper().execute() is None + Traceback (most recent call last): + ... + AttributeError: 'NullScalar' object has no attribute 'upper' + >>> ibis.null(str).upper().execute() is None + True + """ + if type is None: + type = dt.null + return ops.Literal(None, type).to_expr() @public