From 1cc67c97ef32fb0d88689a21afc6e08b5734512a Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Tue, 8 Aug 2023 18:45:39 +0200 Subject: [PATCH] feat(datafusion): add coalesce, nullif, ifnull, zeroifnull --- ibis/backends/datafusion/compiler.py | 26 ++++++++++++++++++++++++++ ibis/backends/tests/test_generic.py | 7 ++----- ibis/expr/types/generic.py | 2 +- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/ibis/backends/datafusion/compiler.py b/ibis/backends/datafusion/compiler.py index c2cb4d5087d6..807c8e4b3d06 100644 --- a/ibis/backends/datafusion/compiler.py +++ b/ibis/backends/datafusion/compiler.py @@ -526,6 +526,32 @@ def null_if_zero(op, **kw): return df.functions.nullif(arg, df.literal(0)) +@translate.register(ops.Coalesce) +def coalesce(op, **kw): + args = (translate(arg, **kw) for arg in op.arg) + return df.functions.coalesce(*args) + + +@translate.register(ops.NullIf) +def nullif(op, **kw): + arg = translate(op.arg, **kw) + null_if_value = translate(op.null_if_expr, **kw) + return df.functions.nullif(arg, null_if_value) + + +@translate.register(ops.IfNull) +def if_null(op, **kw): + arg = translate(op.arg, **kw) + ifnull_expr = translate(op.ifnull_expr, **kw) + return df.functions.coalesce(arg, ifnull_expr) + + +@translate.register(ops.ZeroIfNull) +def zero_if_null(op, **kw): + arg = translate(op.arg, **kw) + return df.functions.coalesce(arg, df.literal(0)) + + @translate.register(ops.Log) def log(op, **kw): arg = translate(op.arg, **kw) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index ff1c178088c6..46a96735383c 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -108,7 +108,6 @@ def test_boolean_literal(con, backend): param(ibis.literal(10).nullif(5), 10, id="nullif_not_null"), ], ) -@pytest.mark.notimpl(["datafusion"]) def test_scalar_fillna_nullif(con, expr, expected): if expected is None: # The exact kind of null value used differs per backend (and version). @@ -168,6 +167,7 @@ def test_isna(backend, alltypes, col, filt): "snowflake", "polars", "trino", + "datafusion", ], reason="NaN != NULL for these backends", ), @@ -175,7 +175,7 @@ def test_isna(backend, alltypes, col, filt): ), ], ) -@pytest.mark.notimpl(["datafusion", "mssql", "druid", "oracle"]) +@pytest.mark.notimpl(["mssql", "druid", "oracle"]) def test_column_fillna(backend, alltypes, value): table = alltypes.mutate(missing=ibis.literal(value).cast("float64")) pd_table = table.execute() @@ -193,7 +193,6 @@ def test_column_fillna(backend, alltypes, value): param(ibis.coalesce(ibis.NA, ibis.NA, 3.14), 3.14, id="non_null_last"), ], ) -@pytest.mark.notimpl(["datafusion"]) def test_coalesce(con, expr, expected): result = con.execute(expr.name("tmp")) @@ -670,7 +669,6 @@ def test_logical_negation_column(backend, alltypes, df, op): backend.assert_series_equal(result, expected, check_names=False) -@pytest.mark.notimpl(["datafusion"]) @pytest.mark.parametrize( ("dtype", "zero", "expected"), [("int64", 0, 1), ("float64", 0.0, 1.0)], @@ -680,7 +678,6 @@ def test_zeroifnull_literals(con, dtype, zero, expected): assert con.execute(ibis.literal(expected, type=dtype).zeroifnull()) == expected -@pytest.mark.notimpl(["datafusion"]) def test_zeroifnull_column(backend, alltypes, df): expr = alltypes.int_col.nullif(1).zeroifnull().name("tmp") result = expr.execute().astype("int32") diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py index 6ac863af7141..f6f8dc0d6bdc 100644 --- a/ibis/expr/types/generic.py +++ b/ibis/expr/types/generic.py @@ -279,7 +279,7 @@ def fillna(self, fill_value: Scalar) -> Value: def nullif(self, null_if_expr: Value) -> Value: """Set values to null if they equal the values `null_if_expr`. - Commonly use to avoid divide-by-zero problems by replacing zero with + Commonly used to avoid divide-by-zero problems by replacing zero with `NULL` in the divisor. Parameters