Skip to content

Commit

Permalink
feat(postgres): support basic jsonb types and getitem
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Jul 18, 2024
1 parent 0fbec3f commit 15b5fd1
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 36 deletions.
20 changes: 20 additions & 0 deletions ci/schema/postgres.sql
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,26 @@ INSERT INTO json_t VALUES
(13, '42'),
(14, '37.37');

DROP TABLE IF EXISTS jsonb_t CASCADE;

CREATE TABLE IF NOT EXISTS jsonb_t (rowid BIGINT, js JSONB);

INSERT INTO jsonb_t VALUES
(1, '{"a": [1,2,3,4], "b": 1}'),
(2, '{"a":null,"b":2}'),
(3, '{"a":"foo", "c":null}'),
(4, 'null'),
(5, '[42,47,55]'),
(6, '[]'),
(7, '"a"'),
(8, '""'),
(9, '"b"'),
(10, NULL),
(11, 'true'),
(12, 'false'),
(13, '42'),
(14, '37.37');

DROP TABLE IF EXISTS win CASCADE;
CREATE TABLE win (g TEXT, x BIGINT NOT NULL, y BIGINT);
INSERT INTO win VALUES
Expand Down
78 changes: 78 additions & 0 deletions ibis/backends/postgres/tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,20 @@

import json

import numpy as np
import pandas as pd
import pandas.testing as tm

Check warning on line 9 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L7-L9

Added lines #L7 - L9 were not covered by tests
import pytest
from pytest import param

import ibis
import ibis.expr.datatypes as dt
import ibis.expr.types as ir

Check warning on line 15 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L14-L15

Added lines #L14 - L15 were not covered by tests


@pytest.fixture(scope="module")

Check warning on line 18 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L18

Added line #L18 was not covered by tests
def jsonb_t(con):
return con.table("jsonb_t")

Check warning on line 20 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L20

Added line #L20 was not covered by tests


@pytest.mark.parametrize("data", [param({"status": True}, id="status")])
Expand All @@ -16,3 +26,71 @@ def test_json(data, alltypes):
expr = alltypes[[alltypes.id, lit]].head(1)
df = expr.execute()
assert df["tmp"].iloc[0] == data


def test_jsonb_extract_path(con):
json_t = con.table("json_t")
jsonb_t = con.table("jsonb_t")

Check warning on line 33 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L31-L33

Added lines #L31 - L33 were not covered by tests

assert json_t.js.type() == dt.JSON(binary=False)
assert jsonb_t.js.type() == dt.JSON(binary=True)

Check warning on line 36 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L35-L36

Added lines #L35 - L36 were not covered by tests

tm.assert_series_equal(jsonb_t.js["a"].execute(), json_t.js["a"].execute())

Check warning on line 38 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L38

Added line #L38 was not covered by tests


def test_json_getitem_object(jsonb_t):

Check warning on line 41 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L41

Added line #L41 was not covered by tests
expr_fn = lambda t: t.js["a"].name("res")
expected = frozenset([(1, 2, 3, 4), None, "foo"] + [None] * 3)
expr = expr_fn(jsonb_t)

Check warning on line 44 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L43-L44

Added lines #L43 - L44 were not covered by tests
result = frozenset(
expr.execute()
.map(lambda o: tuple(o) if isinstance(o, list) else o)
.replace({np.nan: None})
)
assert result == expected

Check warning on line 50 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L50

Added line #L50 was not covered by tests


def test_json_getitem_array(jsonb_t):

Check warning on line 53 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L53

Added line #L53 was not covered by tests
expr_fn = lambda t: t.js[1].name("res")
expected = frozenset([None] * 4 + [47, None])
expr = expr_fn(jsonb_t)
result = frozenset(expr.execute().replace({np.nan: None}))
assert result == expected

Check warning on line 58 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L55-L58

Added lines #L55 - L58 were not covered by tests


def test_json_array(jsonb_t):
expr = jsonb_t.mutate("rowid", res=jsonb_t.js.array).order_by("rowid")
result = expr.execute().res
expected = pd.Series(

Check warning on line 64 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L61-L64

Added lines #L61 - L64 were not covered by tests
[None, None, None, None, [42, 47, 55], []] + [None] * 8,
name="res",
dtype="object",
)
tm.assert_series_equal(result, expected)

Check warning on line 69 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L69

Added line #L69 was not covered by tests


@pytest.mark.parametrize(
("typ", "expected_data"),
[
("str", [None] * 6 + ["a", "", "b"] + [None] * 5),
("int", [None] * 12 + [42, None]),
("float", [None] * 12 + [42.0, 37.37]),
("bool", [None] * 10 + [True, False, None, None]),
],
ids=["str", "int", "float", "bool"],
)
@pytest.mark.parametrize(
"expr_fn", [getattr, ir.JSONValue.unwrap_as], ids=["getattr", "unwrap_as"]
)
def test_json_unwrap(jsonb_t, typ, expected_data, expr_fn):
expr = expr_fn(jsonb_t.js, typ).name("res")
result = expr.execute()
expected = pd.Series(expected_data, name="res", dtype="object")
tm.assert_series_equal(

Check warning on line 89 in ibis/backends/postgres/tests/test_json.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/tests/test_json.py#L86-L89

Added lines #L86 - L89 were not covered by tests
result.replace(np.nan, None).fillna(pd.NA).sort_values().reset_index(drop=True),
expected.replace(np.nan, None)
.fillna(pd.NA)
.sort_values()
.reset_index(drop=True),
check_dtype=False,
)
64 changes: 34 additions & 30 deletions ibis/backends/sql/compilers/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,66 +315,70 @@ def visit_StructField(self, op, *, arg, field):
#
# but also postgres should really support anonymous structs
return self.cast(
self.f.jsonb_extract_path(self.f.to_jsonb(arg), sge.convert(f"f{idx:d}")),
op.dtype,
self.f.jsonb_extract_path(self.f.to_jsonb(arg), f"f{idx:d}"), op.dtype
)

def json_typeof(self, op, arg):
b = "b" * op.arg.dtype.binary
return self.f[f"json{b}_typeof"](arg)

Check warning on line 323 in ibis/backends/sql/compilers/postgres.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sql/compilers/postgres.py#L322-L323

Added lines #L322 - L323 were not covered by tests

def json_extract_path_text(self, op, arg, *rest):
b = "b" * op.arg.dtype.binary
return self.f[f"json{b}_extract_path_text"](

Check warning on line 327 in ibis/backends/sql/compilers/postgres.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sql/compilers/postgres.py#L326-L327

Added lines #L326 - L327 were not covered by tests
arg,
*rest,
# this is apparently how you pass in no additional arguments to
# a variadic function, see the "Variadic Function Resolution"
# section in
# https://www.postgresql.org/docs/current/typeconv-func.html
sge.Var(this="VARIADIC ARRAY[]::TEXT[]"),
)

def visit_UnwrapJSONString(self, op, *, arg):
return self.if_(
self.f.json_typeof(arg).eq(sge.convert("string")),
self.f.json_extract_path_text(
arg,
# this is apparently how you pass in no additional arguments to
# a variadic function, see the "Variadic Function Resolution"
# section in
# https://www.postgresql.org/docs/current/typeconv-func.html
sge.Var(this="VARIADIC ARRAY[]::TEXT[]"),
),
self.json_typeof(op, arg).eq(sge.convert("string")),
self.json_extract_path_text(op, arg),
NULL,
)

def visit_UnwrapJSONInt64(self, op, *, arg):
text = self.f.json_extract_path_text(
arg, sge.Var(this="VARIADIC ARRAY[]::TEXT[]")
)
text = self.json_extract_path_text(op, arg)

Check warning on line 345 in ibis/backends/sql/compilers/postgres.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sql/compilers/postgres.py#L345

Added line #L345 was not covered by tests
return self.if_(
self.f.json_typeof(arg).eq(sge.convert("number")),
self.json_typeof(op, arg).eq(sge.convert("number")),
self.cast(
self.if_(self.f.regexp_like(text, r"^\d+$", "g"), text, NULL),
op.dtype,
self.if_(self.f.regexp_like(text, r"^\d+$", "g"), text, NULL), op.dtype
),
NULL,
)

def visit_UnwrapJSONFloat64(self, op, *, arg):
text = self.f.json_extract_path_text(
arg, sge.Var(this="VARIADIC ARRAY[]::TEXT[]")
)
text = self.json_extract_path_text(op, arg)

Check warning on line 355 in ibis/backends/sql/compilers/postgres.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sql/compilers/postgres.py#L355

Added line #L355 was not covered by tests
return self.if_(
self.f.json_typeof(arg).eq(sge.convert("number")),
self.json_typeof(op, arg).eq(sge.convert("number")),
self.cast(text, op.dtype),
NULL,
)

def visit_UnwrapJSONBoolean(self, op, *, arg):
return self.if_(
self.f.json_typeof(arg).eq(sge.convert("boolean")),
self.cast(
self.f.json_extract_path_text(
arg, sge.Var(this="VARIADIC ARRAY[]::TEXT[]")
),
op.dtype,
),
self.json_typeof(op, arg).eq(sge.convert("boolean")),
self.cast(self.json_extract_path_text(op, arg), op.dtype),
NULL,
)

def visit_JSONGetItem(self, op, *, arg, index):
if op.arg.dtype.binary:
return self.f.jsonb_extract_path(arg, self.cast(index, dt.string))
return super().visit_JSONGetItem(op, arg=arg, index=index)

Check warning on line 372 in ibis/backends/sql/compilers/postgres.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sql/compilers/postgres.py#L371-L372

Added lines #L371 - L372 were not covered by tests

def visit_StructColumn(self, op, *, names, values):
return self.f.row(*map(self.cast, values, op.dtype.types))

def visit_ToJSONArray(self, op, *, arg):
b = "b" * op.arg.dtype.binary

Check warning on line 378 in ibis/backends/sql/compilers/postgres.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sql/compilers/postgres.py#L378

Added line #L378 was not covered by tests
return self.if_(
self.f.json_typeof(arg).eq(sge.convert("array")),
self.f.array(sg.select(STAR).from_(self.f.json_array_elements(arg))),
self.json_typeof(op, arg).eq(sge.convert("array")),
self.f.array(sg.select(STAR).from_(self.f[f"json{b}_array_elements"](arg))),
NULL,
)

Expand Down
7 changes: 5 additions & 2 deletions ibis/backends/sql/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
typecode.INT: dt.Int32,
typecode.IPADDRESS: dt.INET,
typecode.JSON: dt.JSON,
typecode.JSONB: dt.JSON,
typecode.JSONB: partial(dt.JSON, binary=True),
typecode.LONGBLOB: dt.Binary,
typecode.LONGTEXT: dt.String,
typecode.MEDIUMBLOB: dt.Binary,
Expand Down Expand Up @@ -115,7 +115,6 @@
dt.Float64: typecode.DOUBLE,
dt.String: typecode.VARCHAR,
dt.Binary: typecode.VARBINARY,
dt.JSON: typecode.JSON,
dt.INET: typecode.INET,
dt.UUID: typecode.UUID,
dt.MACADDR: typecode.VARCHAR,
Expand Down Expand Up @@ -325,6 +324,10 @@ def _from_sqlglot_GEOGRAPHY(
srid = int(srid.this.this)
return typeclass(geotype="geography", nullable=cls.default_nullable, srid=srid)

@classmethod
def _from_ibis_JSON(cls, dtype: dt.JSON) -> sge.DataType:
return sge.DataType(this=typecode.JSONB if dtype.binary else typecode.JSON)

@classmethod
def _from_ibis_Interval(cls, dtype: dt.Interval) -> sge.DataType:
assert dtype.unit is not None, "interval unit cannot be None"
Expand Down
3 changes: 3 additions & 0 deletions ibis/expr/datatypes/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,9 @@ class JSON(Variadic):
scalar = "JSONScalar"
column = "JSONColumn"

binary: bool = False
"""True if JSON is stored as binary, e.g., JSONB in PostgreSQL."""


@public
class GeoSpatial(DataType):
Expand Down
13 changes: 10 additions & 3 deletions ibis/expr/operations/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import ibis.expr.datatypes as dt
import ibis.expr.rules as rlz
from ibis.common.annotations import attribute
from ibis.expr.operations import Value


Expand All @@ -16,7 +17,7 @@ class JSONGetItem(Value):
arg: Value[dt.JSON]
index: Value[dt.String | dt.Integer]

dtype = dt.json
dtype = rlz.dtype_like("arg")
shape = rlz.shape_like("args")


Expand All @@ -26,19 +27,25 @@ class ToJSONArray(Value):

arg: Value[dt.JSON]

dtype = dt.Array(dt.json)
shape = rlz.shape_like("arg")

@attribute
def dtype(self) -> dt.DataType:
return dt.Array(self.arg.dtype)


@public
class ToJSONMap(Value):
"""Convert a value to a map of string to JSON."""

arg: Value[dt.JSON]

dtype = dt.Map(dt.string, dt.json)
shape = rlz.shape_like("arg")

@attribute
def dtype(self) -> dt.DataType:
return dt.Map(dt.string, self.arg.dtype)


@public
class UnwrapJSONString(Value):
Expand Down
2 changes: 1 addition & 1 deletion ibis/tests/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def binary_dtype(nullable=_nullable):


def json_dtype(nullable=_nullable):
return st.builds(dt.JSON, nullable=nullable)
return st.builds(dt.JSON, binary=st.booleans(), nullable=nullable)


def inet_dtype(nullable=_nullable):
Expand Down

0 comments on commit 15b5fd1

Please sign in to comment.