Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Type annotations for canonicaljson #49

Merged
merged 5 commits into from
Apr 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
name: Tests
on: [push, pull_request]

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
lint:
runs-on: ubuntu-latest
Expand All @@ -9,6 +13,7 @@ jobs:
toxenv:
- "pep8"
- "black"
- "mypy"

steps:
- uses: actions/checkout@v2
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ include *.py
include *.md
include LICENSE
include tox.ini
include pyproject.toml
prune .travis
prune debian
97 changes: 53 additions & 44 deletions canonicaljson.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,14 @@
# limitations under the License.

import platform
from typing import Optional, Type
from typing import Any, Generator, Optional, Type

frozendict_type: Optional[Type]
try:
from typing import Protocol
except ImportError: # pragma: no cover
from typing_extensions import Protocol # type: ignore[misc]

frozendict_type: Optional[Type[Any]]
try:
from frozendict import frozendict as frozendict_type
except ImportError:
Expand All @@ -27,22 +32,37 @@
__version__ = "1.6.0"


def _default(obj): # pragma: no cover
def _default(obj: object) -> object: # pragma: no cover
if type(obj) is frozendict_type:
# If frozendict is available and used, cast `obj` into a dict
return dict(obj)
return dict(obj) # type: ignore[call-overload]
raise TypeError(
"Object of type %s is not JSON serializable" % obj.__class__.__name__
)


class Encoder(Protocol): # pragma: no cover
def encode(self, data: object) -> str:
pass

def iterencode(self, data: object) -> Generator[str, None, None]:
pass

def __call__(self, *args: Any, **kwargs: Any) -> "Encoder":
pass


class JsonLibrary(Protocol):
JSONEncoder: Encoder


# Declare these in the module scope, but they get configured in
# set_json_library.
_canonical_encoder = None
_pretty_encoder = None
_canonical_encoder: Encoder = None # type: ignore[assignment]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you do something silly like import json as _json and then set these types to _json.JSONEncoder?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe---but that might cause further pain having to wrangle with simplejson.

(I was trying to avoid touching any of the import stuff---feels a bit icky.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Being completely honest a lot of this is overkill: the main benefit to Synapse is the existence of -> bytes and -> Generator[bytes, None, None] return types.

_pretty_encoder: Encoder = None # type: ignore[assignment]


def set_json_library(json_lib):
def set_json_library(json_lib: JsonLibrary) -> None:
"""
Set the underlying JSON library that canonicaljson uses to json_lib.

Expand All @@ -69,55 +89,44 @@ def set_json_library(json_lib):
)


def encode_canonical_json(json_object):
"""Encodes the shortest UTF-8 JSON encoding with dictionary keys
lexicographically sorted by unicode code point.
def encode_canonical_json(data: object) -> bytes:
"""Encodes the given `data` as a UTF-8 canonical JSON bytestring.

Args:
json_object (dict): The JSON object to encode.

Returns:
bytes encoding the JSON object"""
s = _canonical_encoder.encode(json_object)
return s.encode("utf-8")


def iterencode_canonical_json(json_object):
"""Encodes the shortest UTF-8 JSON encoding with dictionary keys
This encoding is the shortest possible. Dictionary keys are
lexicographically sorted by unicode code point.
"""
s = _canonical_encoder.encode(data)
return s.encode("utf-8")

Args:
json_object (dict): The JSON object to encode.

Returns:
generator which yields bytes encoding the JSON object"""
for chunk in _canonical_encoder.iterencode(json_object):
yield chunk.encode("utf-8")
def iterencode_canonical_json(data: object) -> Generator[bytes, None, None]:
"""Iteratively encodes the given `data` as a UTF-8 canonical JSON bytestring.

This yields one or more bytestrings; concatenating them all together yields the
full encoding of `data`. Building up the encoding gradually in this way allows us to
encode large pieces of `data` without blocking other tasks.

def encode_pretty_printed_json(json_object):
This encoding is the shortest possible. Dictionary keys are
lexicographically sorted by unicode code point.
"""
Encodes the JSON object dict as human readable UTF-8 bytes.

Args:
json_object (dict): The JSON object to encode.

Returns:
bytes encoding the JSON object"""
for chunk in _canonical_encoder.iterencode(data):
yield chunk.encode("utf-8")

return _pretty_encoder.encode(json_object).encode("utf-8")

def encode_pretty_printed_json(data: object) -> bytes:
"""Encodes the given `data` as a UTF-8 human-readable JSON bytestring."""

def iterencode_pretty_printed_json(json_object):
"""Encodes the JSON object dict as human readable UTF-8 bytes.
return _pretty_encoder.encode(data).encode("utf-8")

Args:
json_object (dict): The JSON object to encode.

Returns:
generator which yields bytes encoding the JSON object"""
def iterencode_pretty_printed_json(data: object) -> Generator[bytes, None, None]:
"""Iteratively encodes the given `data` as a UTF-8 human-readable JSON bytestring.

for chunk in _pretty_encoder.iterencode(json_object):
This yields one or more bytestrings; concatenating them all together yields the
full encoding of `data`. Building up the encoding gradually in this way allows us to
encode large pieces of `data` without blocking other tasks.
"""
for chunk in _pretty_encoder.iterencode(data):
yield chunk.encode("utf-8")


Expand All @@ -132,7 +141,7 @@ def iterencode_pretty_printed_json(json_object):
#
# Note that it seems performance is on par or better using json from the
# standard library as of Python 3.7.
import simplejson as json
import simplejson as json # type: ignore[no-redef]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arg, I wish we could figure out a way to kill this, but I don't think we can due to old Synapse versions which unconditionally install it...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh, I hadn't realised that Synapse imports and uses set_json_library.

IMO the dependencies that we control (canonicaljson, signedjson, matrix-common?) should be specified in Synapse's metadata with semver bounds to avoid this sort of thing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO the dependencies that we control (canonicaljson, signedjson, matrix-common?) should be specified in Synapse's metadata with semver bounds to avoid this sort of thing.

Absolutely, but we didn't set an upper bound previously and there isn't really a way to go back and fix that (unless maybe once the Python's from those versions are unsupported?) 🤷


# Set the JSON library to the backwards compatible version.
set_json_library(json)
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[tool.mypy]
show_error_codes = true
strict = true

files = ["."]
exclude = "setup.py"
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def exec_file(path_segments, name):
# simplerjson versions before 3.14.0 had a bug with some characters
# (e.g. \u2028) if ensure_ascii was set to false.
"simplejson>=3.14.0",
# typing.Protocol was only added to the stdlib in Python 3.8
"typing_extensions>=4.0.0; python_version < '3.8'",
],
extras_require={
# frozendict support can be enabled using the `canonicaljson[frozendict]` syntax
Expand Down
22 changes: 12 additions & 10 deletions test_canonicaljson.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@


class TestCanonicalJson(unittest.TestCase):
def test_encode_canonical(self):
def test_encode_canonical(self) -> None:
self.assertEqual(encode_canonical_json({}), b"{}")

# ctrl-chars should be encoded.
Expand Down Expand Up @@ -68,7 +68,7 @@ def test_encode_canonical(self):
# Iteratively encoding should work.
self.assertEqual(list(iterencode_canonical_json({})), [b"{}"])

def test_ascii(self):
def test_ascii(self) -> None:
"""
Ensure the proper ASCII characters are escaped.

Expand All @@ -95,10 +95,10 @@ def test_ascii(self):
# And other characters are passed unescaped.
unescaped = [0x20, 0x21] + list(range(0x23, 0x5C)) + list(range(0x5D, 0x7E))
for c in unescaped:
c = chr(c)
self.assertEqual(encode_canonical_json(c), b'"' + c.encode("ascii") + b'"')
s = chr(c)
self.assertEqual(encode_canonical_json(s), b'"' + s.encode("ascii") + b'"')

def test_encode_pretty_printed(self):
def test_encode_pretty_printed(self) -> None:
self.assertEqual(encode_pretty_printed_json({}), b"{}")
self.assertEqual(list(iterencode_pretty_printed_json({})), [b"{}"])

Expand All @@ -112,7 +112,9 @@ def test_encode_pretty_printed(self):
frozendict_type is None,
"If `frozendict` is not available, skip test",
)
def test_frozen_dict(self):
def test_frozen_dict(self) -> None:
# For mypy's benefit:
assert frozendict_type is not None
self.assertEqual(
encode_canonical_json(frozendict_type({"a": 1})),
b'{"a":1}',
Expand All @@ -122,7 +124,7 @@ def test_frozen_dict(self):
b'{\n "a": 1\n}',
)

def test_unknown_type(self):
def test_unknown_type(self) -> None:
class Unknown(object):
pass

Expand All @@ -133,7 +135,7 @@ class Unknown(object):
with self.assertRaises(Exception):
encode_pretty_printed_json(unknown_object)

def test_invalid_float_values(self):
def test_invalid_float_values(self) -> None:
"""Infinity/-Infinity/NaN are not allowed in canonicaljson."""

with self.assertRaises(ValueError):
Expand All @@ -154,7 +156,7 @@ def test_invalid_float_values(self):
with self.assertRaises(ValueError):
encode_pretty_printed_json(nan)

def test_set_json(self):
def test_set_json(self) -> None:
"""Ensure that changing the underlying JSON implementation works."""
mock_json = mock.Mock(spec=["JSONEncoder"])
mock_json.JSONEncoder.return_value.encode.return_value = "sentinel"
Expand All @@ -163,6 +165,6 @@ def test_set_json(self):
self.assertEqual(encode_canonical_json({}), b"sentinel")
finally:
# Reset the JSON library to whatever was originally set.
from canonicaljson import json
from canonicaljson import json # type: ignore[attr-defined]

set_json_library(json)
12 changes: 11 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tox]
envlist = packaging, pep8, py37, py38, py39, py310, pypy3
envlist = packaging, pep8, black, py37, py38, py39, py310, pypy3

[testenv]
deps =
Expand All @@ -25,4 +25,14 @@ commands = flake8 .
basepython = python3.7
deps =
black==21.9b0
# Workaround black+click incompatability, see https://github.com/psf/black/issues/2964
click==8.0.4
commands = python -m black --check --diff .

[testenv:mypy]
deps =
mypy==0.942
types-frozendict==2.0.8
types-simplejson==3.17.5
types-setuptools==57.4.14
commands = mypy