Skip to content

Commit

Permalink
feat(api): add map/array accessors for easy conversion of JSON to…
Browse files Browse the repository at this point in the history
… stronger-typed values
  • Loading branch information
cpcloud committed Feb 6, 2023
1 parent bb0ee78 commit d1e9d11
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 5 deletions.
13 changes: 13 additions & 0 deletions ibis/backends/postgres/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,5 +605,18 @@ def translate(t, op: ops.ArgMin | ops.ArgMax) -> str:
ops.Map: fixed_arity(pg.hstore, 2),
ops.ArgMin: _arg_min_max(sa.asc),
ops.ArgMax: _arg_min_max(sa.desc),
ops.ToJSONArray: unary(
lambda arg: sa.case(
(
sa.func.json_typeof(arg) == "array",
sa.func.array(
sa.select(
sa.func.json_array_elements(arg).column_valued()
).scalar_subquery()
),
),
else_=sa.null(),
)
),
}
)
2 changes: 2 additions & 0 deletions ibis/backends/snowflake/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,8 @@ def _unnest(t, op):
ops.Unnest: _unnest,
ops.ArgMin: reduction(sa.func.min_by),
ops.ArgMax: reduction(sa.func.max_by),
ops.ToJSONArray: lambda t, op: t.translate(ops.Cast(op.arg, op.output_dtype)),
ops.ToJSONMap: lambda t, op: t.translate(ops.Cast(op.arg, op.output_dtype)),
}
)

Expand Down
50 changes: 46 additions & 4 deletions ibis/backends/tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
import pytest
from pytest import param

pytestmark = [
pytest.mark.never(["impala"], reason="doesn't support JSON and never will"),
pytest.mark.notyet(["clickhouse"], reason="upstream is broken"),
pytest.mark.notimpl(["datafusion", "mssql"]),
]


@pytest.mark.notimpl(["datafusion", "mssql"])
@pytest.mark.notyet(["clickhouse"], reason="upstream is broken")
@pytest.mark.never(["impala"], reason="doesn't support JSON and never will")
@pytest.mark.parametrize(
("expr_fn", "expected"),
[
Expand All @@ -19,8 +22,8 @@
name="res",
dtype="object",
),
id="getitem_object",
marks=[pytest.mark.min_server_version(sqlite="3.38.0")],
id="getitem_object",
),
param(
lambda t: t.js[1].name("res"),
Expand All @@ -38,3 +41,42 @@ def test_json_getitem(json_t, expr_fn, expected):
expr = expr_fn(json_t)
result = expr.execute()
tm.assert_series_equal(result, expected)


@pytest.mark.notimpl(["dask", "duckdb", "mysql", "pandas"])
@pytest.mark.notyet(["bigquery", "sqlite"], reason="doesn't support maps")
@pytest.mark.notyet(["postgres"], reason="only supports map<string, string>")
@pytest.mark.notyet(
["pyspark", "trino"], reason="should work but doesn't deserialize JSON"
)
def test_json_map(json_t):
expr = json_t.js.map.name("res")
result = expr.execute()
expected = pd.Series(
[
{'a': [1, 2, 3, 4], 'b': 1},
{'a': None, 'b': 2},
{'a': 'foo', 'c': None},
None,
None,
None,
],
dtype="object",
name="res",
)
tm.assert_series_equal(result, expected)


@pytest.mark.notimpl(["dask", "duckdb", "mysql", "pandas"])
@pytest.mark.notyet(["sqlite"], reason="doesn't support arrays")
@pytest.mark.notyet(
["pyspark", "trino"], reason="should work but doesn't deserialize JSON"
)
@pytest.mark.notyet(["bigquery"], reason="doesn't allow null in arrays")
def test_json_array(json_t):
expr = json_t.js.array.name("res")
result = expr.execute()
expected = pd.Series(
[None, None, None, None, [42, 47, 55], []], name="res", dtype="object"
)
tm.assert_series_equal(result, expected)
16 changes: 16 additions & 0 deletions ibis/expr/operations/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,19 @@ class JSONGetItem(Value):

output_dtype = dt.json
output_shape = rlz.shape_like("args")


@public
class ToJSONArray(Value):
arg = rlz.json

output_dtype = dt.Array(dt.json)
output_shape = rlz.shape_like("arg")


@public
class ToJSONMap(Value):
arg = rlz.json

output_dtype = dt.Map(dt.string, dt.json)
output_shape = rlz.shape_like("arg")
105 changes: 104 additions & 1 deletion ibis/expr/types/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from public import public

import ibis.expr.operations as ops
from ibis.expr.types import Column, Scalar, Value

if TYPE_CHECKING:
Expand All @@ -17,10 +18,112 @@ class JSONValue(Value):
def __getitem__(
self, key: str | int | ir.StringValue | ir.IntegerValue
) -> JSONValue:
import ibis.expr.operations as ops
"""Access an JSON object's value or JSON array's element at `key`.
Parameters
----------
key
Object field name or integer array index
Returns
-------
JSONValue
Element located at `key`
Examples
--------
Construct a table with a JSON column
>>> import json, ibis
>>> ibis.options.interactive = True
>>> rows = [{"js": json.dumps({"a": [i, 1]})} for i in range(2)]
>>> t = ibis.memtable(rows, schema=ibis.schema(js="json"))
>>> t
┏━━━━━━━━━━━━━━━┓
┃ js ┃
┡━━━━━━━━━━━━━━━┩
│ json │
├───────────────┤
│ {'a': [0, 1]} │
│ {'a': [1, 1]} │
└───────────────┘
Extract the `"a"` field
>>> t.js["a"]
┏━━━━━━━━━━━━━━━━━━━━━━┓
┃ JSONGetItem(js, 'a') ┃
┡━━━━━━━━━━━━━━━━━━━━━━┩
│ json │
├──────────────────────┤
│ [0, 1] │
│ [1, 1] │
└──────────────────────┘
Extract the first element of the JSON array at `"a"`
>>> t.js["a"][0]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ JSONGetItem(JSONGetItem(js, 'a'), 0) ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ json │
├──────────────────────────────────────┤
│ 0 │
│ 1 │
└──────────────────────────────────────┘
Extract a non-existent field
>>> t.js.a["foo"]
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ JSONGetItem(JSONGetItem(js, 'a'), 'foo') ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ json │
├──────────────────────────────────────────┤
│ ∅ │
│ ∅ │
└──────────────────────────────────────────┘
Try to extract an array element, returns `NULL`
>>> t.js[20]
┏━━━━━━━━━━━━━━━━━━━━━┓
┃ JSONGetItem(js, 20) ┃
┡━━━━━━━━━━━━━━━━━━━━━┩
│ json │
├─────────────────────┤
│ ∅ │
│ ∅ │
└─────────────────────┘
"""
return ops.JSONGetItem(self, key).to_expr()

@property
def map(self) -> ir.MapValue:
"""Cast JSON to a map of string to JSON.
Use this property to unlock map functionality on JSON objects.
Returns
-------
MapValue
Map of string to JSON
"""
return ops.ToJSONMap(self).to_expr()

@property
def array(self) -> ir.ArrayValue:
"""Cast JSON to an array of JSON.
Use this property to unlock array functionality on JSON objects.
Returns
-------
ArrayValue
Array of JSON objects
"""
return ops.ToJSONArray(self).to_expr()


@public
class JSONScalar(Scalar, JSONValue):
Expand Down

0 comments on commit d1e9d11

Please sign in to comment.