From dedb3a63e9c99b53ccdcf040b3051ef114b8a218 Mon Sep 17 00:00:00 2001 From: John Bodley <4567245+john-bodley@users.noreply.github.com> Date: Thu, 3 Nov 2022 17:29:10 -0700 Subject: [PATCH] fix: JSON serializers (#22029) (cherry picked from commit 6bbf4f8718ce9054d6c4c75b532576184e1c3ef6) --- superset/models/core.py | 11 +++- superset/utils/core.py | 76 ++++++++++++++++---------- tests/integration_tests/utils_tests.py | 33 +++++++---- 3 files changed, 79 insertions(+), 41 deletions(-) diff --git a/superset/models/core.py b/superset/models/core.py index 8b937562a1d24..6e85b1655ad70 100755 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -719,9 +719,14 @@ def get_pk_constraint( self, table_name: str, schema: Optional[str] = None ) -> Dict[str, Any]: pk_constraint = self.inspector.get_pk_constraint(table_name, schema) or {} - return { - key: utils.base_json_conv(value) for key, value in pk_constraint.items() - } + + def _convert(value: Any) -> Any: + try: + return utils.base_json_conv(value) + except TypeError: + return None + + return {key: _convert(value) for key, value in pk_constraint.items()} def get_foreign_keys( self, table_name: str, schema: Optional[str] = None diff --git a/superset/utils/core.py b/superset/utils/core.py index 46318dd50ea03..21f35965f4f90 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -550,9 +550,16 @@ def format_timedelta(time_delta: timedelta) -> str: return str(time_delta) -def base_json_conv( # pylint: disable=inconsistent-return-statements - obj: Any, -) -> Any: +def base_json_conv(obj: Any) -> Any: + """ + Tries to convert additional types to JSON compatible forms. + + :param obj: The serializable object + :returns: The JSON compatible form + :raises TypeError: If the object cannot be serialized + :see: https://docs.python.org/3/library/json.html#encoders-and-decoders + """ + if isinstance(obj, memoryview): obj = obj.tobytes() if isinstance(obj, np.int64): @@ -575,47 +582,60 @@ def base_json_conv( # pylint: disable=inconsistent-return-statements except Exception: # pylint: disable=broad-except return "[bytes]" + raise TypeError(f"Unserializable object {obj} of type {type(obj)}") + -def json_iso_dttm_ser(obj: Any, pessimistic: bool = False) -> str: +def json_iso_dttm_ser(obj: Any, pessimistic: bool = False) -> Any: """ - json serializer that deals with dates + A JSON serializer that deals with dates by serializing them to ISO 8601. - >>> dttm = datetime(1970, 1, 1) - >>> json.dumps({'dttm': dttm}, default=json_iso_dttm_ser) - '{"dttm": "1970-01-01T00:00:00"}' + >>> json.dumps({'dttm': datetime(1970, 1, 1)}, default=json_iso_dttm_ser) + '{"dttm": "1970-01-01T00:00:00"}' + + :param obj: The serializable object + :param pessimistic: Whether to be pessimistic regarding serialization + :returns: The JSON compatible form + :raises TypeError: If the non-pessimistic object cannot be serialized """ - val = base_json_conv(obj) - if val is not None: - return val + if isinstance(obj, (datetime, date, pd.Timestamp)): - obj = obj.isoformat() - else: + return obj.isoformat() + + try: + return base_json_conv(obj) + except TypeError as ex: if pessimistic: - return "Unserializable [{}]".format(type(obj)) + return f"Unserializable [{type(obj)}]" - raise TypeError("Unserializable object {} of type {}".format(obj, type(obj))) - return obj + raise ex -def pessimistic_json_iso_dttm_ser(obj: Any) -> str: +def pessimistic_json_iso_dttm_ser(obj: Any) -> Any: """Proxy to call json_iso_dttm_ser in a pessimistic way If one of object is not serializable to json, it will still succeed""" return json_iso_dttm_ser(obj, pessimistic=True) -def json_int_dttm_ser(obj: Any) -> float: - """json serializer that deals with dates""" - val = base_json_conv(obj) - if val is not None: - return val +def json_int_dttm_ser(obj: Any) -> Any: + """ + A JSON serializer that deals with dates by serializing them to EPOCH. + + >>> json.dumps({'dttm': datetime(1970, 1, 1)}, default=json_int_dttm_ser) + '{"dttm": 0.0}' + + :param obj: The serializable object + :returns: The JSON compatible form + :raises TypeError: If the object cannot be serialized + """ + if isinstance(obj, (datetime, pd.Timestamp)): - obj = datetime_to_epoch(obj) - elif isinstance(obj, date): - obj = (obj - EPOCH.date()).total_seconds() * 1000 - else: - raise TypeError("Unserializable object {} of type {}".format(obj, type(obj))) - return obj + return datetime_to_epoch(obj) + + if isinstance(obj, date): + return (obj - EPOCH.date()).total_seconds() * 1000 + + return base_json_conv(obj) def json_dumps_w_dates(payload: Dict[Any, Any], sort_keys: bool = False) -> str: diff --git a/tests/integration_tests/utils_tests.py b/tests/integration_tests/utils_tests.py index da1567cab4c14..d60d46764784f 100644 --- a/tests/integration_tests/utils_tests.py +++ b/tests/integration_tests/utils_tests.py @@ -93,9 +93,10 @@ def test_json_int_dttm_ser(self): assert json_int_dttm_ser(datetime(1970, 1, 1)) == 0 assert json_int_dttm_ser(date(1970, 1, 1)) == 0 assert json_int_dttm_ser(dttm + timedelta(milliseconds=1)) == (ts + 1) + assert json_int_dttm_ser(np.int64(1)) == 1 with self.assertRaises(TypeError): - json_int_dttm_ser("this is not a date") + json_int_dttm_ser(np.datetime64()) def test_json_iso_dttm_ser(self): dttm = datetime(2020, 1, 1) @@ -104,19 +105,31 @@ def test_json_iso_dttm_ser(self): assert json_iso_dttm_ser(dttm) == dttm.isoformat() assert json_iso_dttm_ser(dt) == dt.isoformat() assert json_iso_dttm_ser(t) == t.isoformat() + assert json_iso_dttm_ser(np.int64(1)) == 1 + + assert ( + json_iso_dttm_ser(np.datetime64(), pessimistic=True) + == "Unserializable []" + ) with self.assertRaises(TypeError): - json_iso_dttm_ser("this is not a date") + json_iso_dttm_ser(np.datetime64()) def test_base_json_conv(self): - assert isinstance(base_json_conv(np.bool_(1)), bool) is True - assert isinstance(base_json_conv(np.int64(1)), int) is True - assert isinstance(base_json_conv(np.array([1, 2, 3])), list) is True - assert isinstance(base_json_conv(set([1])), list) is True - assert isinstance(base_json_conv(Decimal("1.0")), float) is True - assert isinstance(base_json_conv(uuid.uuid4()), str) is True - assert isinstance(base_json_conv(time()), str) is True - assert isinstance(base_json_conv(timedelta(0)), str) is True + assert isinstance(base_json_conv(np.bool_(1)), bool) + assert isinstance(base_json_conv(np.int64(1)), int) + assert isinstance(base_json_conv(np.array([1, 2, 3])), list) + assert base_json_conv(np.array(None)) is None + assert isinstance(base_json_conv(set([1])), list) + assert isinstance(base_json_conv(Decimal("1.0")), float) + assert isinstance(base_json_conv(uuid.uuid4()), str) + assert isinstance(base_json_conv(time()), str) + assert isinstance(base_json_conv(timedelta(0)), str) + assert isinstance(base_json_conv(bytes()), str) + assert base_json_conv(bytes("", encoding="utf-16")) == "[bytes]" + + with pytest.raises(TypeError): + base_json_conv(np.datetime64()) def test_zlib_compression(self): json_str = '{"test": 1}'