diff --git a/ibis/backends/postgres/registry.py b/ibis/backends/postgres/registry.py index 790546fa315f..0088649acd0f 100644 --- a/ibis/backends/postgres/registry.py +++ b/ibis/backends/postgres/registry.py @@ -605,5 +605,18 @@ def translate(t, op: ops.ArgMin | ops.ArgMax) -> str: ops.Map: fixed_arity(pg.hstore, 2), ops.ArgMin: _arg_min_max(sa.asc), ops.ArgMax: _arg_min_max(sa.desc), + ops.ToJSONArray: unary( + lambda arg: sa.case( + ( + sa.func.json_typeof(arg) == "array", + sa.func.array( + sa.select( + sa.func.json_array_elements(arg).column_valued() + ).scalar_subquery() + ), + ), + else_=sa.null(), + ) + ), } ) diff --git a/ibis/backends/snowflake/registry.py b/ibis/backends/snowflake/registry.py index 28354fb69d01..56bf749421b6 100644 --- a/ibis/backends/snowflake/registry.py +++ b/ibis/backends/snowflake/registry.py @@ -296,6 +296,8 @@ def _unnest(t, op): ops.Unnest: _unnest, ops.ArgMin: reduction(sa.func.min_by), ops.ArgMax: reduction(sa.func.max_by), + ops.ToJSONArray: lambda t, op: t.translate(ops.Cast(op.arg, op.output_dtype)), + ops.ToJSONMap: lambda t, op: t.translate(ops.Cast(op.arg, op.output_dtype)), } ) diff --git a/ibis/backends/tests/test_json.py b/ibis/backends/tests/test_json.py index ebe22d33c41d..5a8d892b808e 100644 --- a/ibis/backends/tests/test_json.py +++ b/ibis/backends/tests/test_json.py @@ -5,10 +5,13 @@ import pytest from pytest import param +pytestmark = [ + pytest.mark.never(["impala"], reason="doesn't support JSON and never will"), + pytest.mark.notyet(["clickhouse"], reason="upstream is broken"), + pytest.mark.notimpl(["datafusion", "mssql"]), +] + -@pytest.mark.notimpl(["datafusion", "mssql"]) -@pytest.mark.notyet(["clickhouse"], reason="upstream is broken") -@pytest.mark.never(["impala"], reason="doesn't support JSON and never will") @pytest.mark.parametrize( ("expr_fn", "expected"), [ @@ -19,8 +22,8 @@ name="res", dtype="object", ), - id="getitem_object", marks=[pytest.mark.min_server_version(sqlite="3.38.0")], + id="getitem_object", ), param( lambda t: t.js[1].name("res"), @@ -38,3 +41,42 @@ def test_json_getitem(json_t, expr_fn, expected): expr = expr_fn(json_t) result = expr.execute() tm.assert_series_equal(result, expected) + + +@pytest.mark.notimpl(["dask", "duckdb", "mysql", "pandas"]) +@pytest.mark.notyet(["bigquery", "sqlite"], reason="doesn't support maps") +@pytest.mark.notyet(["postgres"], reason="only supports map") +@pytest.mark.notyet( + ["pyspark", "trino"], reason="should work but doesn't deserialize JSON" +) +def test_json_map(json_t): + expr = json_t.js.map.name("res") + result = expr.execute() + expected = pd.Series( + [ + {'a': [1, 2, 3, 4], 'b': 1}, + {'a': None, 'b': 2}, + {'a': 'foo', 'c': None}, + None, + None, + None, + ], + dtype="object", + name="res", + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.notimpl(["dask", "duckdb", "mysql", "pandas"]) +@pytest.mark.notyet(["sqlite"], reason="doesn't support arrays") +@pytest.mark.notyet( + ["pyspark", "trino"], reason="should work but doesn't deserialize JSON" +) +@pytest.mark.notyet(["bigquery"], reason="doesn't allow null in arrays") +def test_json_array(json_t): + expr = json_t.js.array.name("res") + result = expr.execute() + expected = pd.Series( + [None, None, None, None, [42, 47, 55], []], name="res", dtype="object" + ) + tm.assert_series_equal(result, expected) diff --git a/ibis/expr/operations/json.py b/ibis/expr/operations/json.py index cd1723884e88..daaa8083bcc7 100644 --- a/ibis/expr/operations/json.py +++ b/ibis/expr/operations/json.py @@ -14,3 +14,19 @@ class JSONGetItem(Value): output_dtype = dt.json output_shape = rlz.shape_like("args") + + +@public +class ToJSONArray(Value): + arg = rlz.json + + output_dtype = dt.Array(dt.json) + output_shape = rlz.shape_like("arg") + + +@public +class ToJSONMap(Value): + arg = rlz.json + + output_dtype = dt.Map(dt.string, dt.json) + output_shape = rlz.shape_like("arg") diff --git a/ibis/expr/types/json.py b/ibis/expr/types/json.py index 12cbbd209881..640af76ce0a7 100644 --- a/ibis/expr/types/json.py +++ b/ibis/expr/types/json.py @@ -6,6 +6,7 @@ from public import public +import ibis.expr.operations as ops from ibis.expr.types import Column, Scalar, Value if TYPE_CHECKING: @@ -17,10 +18,112 @@ class JSONValue(Value): def __getitem__( self, key: str | int | ir.StringValue | ir.IntegerValue ) -> JSONValue: - import ibis.expr.operations as ops + """Access an JSON object's value or JSON array's element at `key`. + Parameters + ---------- + key + Object field name or integer array index + + Returns + ------- + JSONValue + Element located at `key` + + Examples + -------- + Construct a table with a JSON column + + >>> import json, ibis + >>> ibis.options.interactive = True + >>> rows = [{"js": json.dumps({"a": [i, 1]})} for i in range(2)] + >>> t = ibis.memtable(rows, schema=ibis.schema(js="json")) + >>> t + ┏━━━━━━━━━━━━━━━┓ + ┃ js ┃ + ┡━━━━━━━━━━━━━━━┩ + │ json │ + ├───────────────┤ + │ {'a': [0, 1]} │ + │ {'a': [1, 1]} │ + └───────────────┘ + + Extract the `"a"` field + + >>> t.js["a"] + ┏━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ JSONGetItem(js, 'a') ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━┩ + │ json │ + ├──────────────────────┤ + │ [0, 1] │ + │ [1, 1] │ + └──────────────────────┘ + + Extract the first element of the JSON array at `"a"` + + >>> t.js["a"][0] + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ JSONGetItem(JSONGetItem(js, 'a'), 0) ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ json │ + ├──────────────────────────────────────┤ + │ 0 │ + │ 1 │ + └──────────────────────────────────────┘ + + Extract a non-existent field + + >>> t.js.a["foo"] + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ JSONGetItem(JSONGetItem(js, 'a'), 'foo') ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ json │ + ├──────────────────────────────────────────┤ + │ ∅ │ + │ ∅ │ + └──────────────────────────────────────────┘ + + Try to extract an array element, returns `NULL` + + >>> t.js[20] + ┏━━━━━━━━━━━━━━━━━━━━━┓ + ┃ JSONGetItem(js, 20) ┃ + ┡━━━━━━━━━━━━━━━━━━━━━┩ + │ json │ + ├─────────────────────┤ + │ ∅ │ + │ ∅ │ + └─────────────────────┘ + """ return ops.JSONGetItem(self, key).to_expr() + @property + def map(self) -> ir.MapValue: + """Cast JSON to a map of string to JSON. + + Use this property to unlock map functionality on JSON objects. + + Returns + ------- + MapValue + Map of string to JSON + """ + return ops.ToJSONMap(self).to_expr() + + @property + def array(self) -> ir.ArrayValue: + """Cast JSON to an array of JSON. + + Use this property to unlock array functionality on JSON objects. + + Returns + ------- + ArrayValue + Array of JSON objects + """ + return ops.ToJSONArray(self).to_expr() + @public class JSONScalar(Scalar, JSONValue):