From 16a9335cd1a8a0f6b6fe72655fbf2291caf5f53f Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 21 Apr 2022 14:24:17 +0100 Subject: [PATCH 1/5] Type annotations for canonicaljson --- MANIFEST.in | 1 + canonicaljson.py | 44 +++++++++++++++++++++++++++++++------------ pyproject.toml | 18 ++++++++++++++++++ setup.py | 2 ++ test_canonicaljson.py | 22 ++++++++++++---------- tox.ini | 8 ++++++++ 6 files changed, 73 insertions(+), 22 deletions(-) create mode 100644 pyproject.toml diff --git a/MANIFEST.in b/MANIFEST.in index 0ac1524..63f6c5b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,5 +3,6 @@ include *.py include *.md include LICENSE include tox.ini +include pyproject.toml prune .travis prune debian diff --git a/canonicaljson.py b/canonicaljson.py index 7841247..c9d3eb4 100644 --- a/canonicaljson.py +++ b/canonicaljson.py @@ -16,9 +16,14 @@ # limitations under the License. import platform -from typing import Optional, Type +from typing import Any, Generator, Optional, Type -frozendict_type: Optional[Type] +try: + from typing import Protocol +except ImportError: # pragma: no cover + from typing_extensions import Protocol # type: ignore[misc] + +frozendict_type: Optional[Type[Any]] try: from frozendict import frozendict as frozendict_type except ImportError: @@ -27,22 +32,37 @@ __version__ = "1.6.0" -def _default(obj): # pragma: no cover +def _default(obj: object) -> object: # pragma: no cover if type(obj) is frozendict_type: # If frozendict is available and used, cast `obj` into a dict - return dict(obj) + return dict(obj) # type: ignore[call-overload] raise TypeError( "Object of type %s is not JSON serializable" % obj.__class__.__name__ ) +class Encoder(Protocol): # pragma: no cover + def encode(self, data: object) -> str: + pass + + def iterencode(self, data: object) -> Generator[str, None, None]: + pass + + def __call__(self, *args: Any, **kwargs: Any) -> "Encoder": + pass + + +class JsonLibrary(Protocol): + JSONEncoder: Encoder + + # Declare these in the module scope, but they get configured in # set_json_library. -_canonical_encoder = None -_pretty_encoder = None +_canonical_encoder: Encoder = None # type: ignore[assignment] +_pretty_encoder: Encoder = None # type: ignore[assignment] -def set_json_library(json_lib): +def set_json_library(json_lib: JsonLibrary) -> None: """ Set the underlying JSON library that canonicaljson uses to json_lib. @@ -69,7 +89,7 @@ def set_json_library(json_lib): ) -def encode_canonical_json(json_object): +def encode_canonical_json(json_object: object) -> bytes: """Encodes the shortest UTF-8 JSON encoding with dictionary keys lexicographically sorted by unicode code point. @@ -82,7 +102,7 @@ def encode_canonical_json(json_object): return s.encode("utf-8") -def iterencode_canonical_json(json_object): +def iterencode_canonical_json(json_object: object) -> Generator[bytes, None, None]: """Encodes the shortest UTF-8 JSON encoding with dictionary keys lexicographically sorted by unicode code point. @@ -95,7 +115,7 @@ def iterencode_canonical_json(json_object): yield chunk.encode("utf-8") -def encode_pretty_printed_json(json_object): +def encode_pretty_printed_json(json_object: object) -> bytes: """ Encodes the JSON object dict as human readable UTF-8 bytes. @@ -108,7 +128,7 @@ def encode_pretty_printed_json(json_object): return _pretty_encoder.encode(json_object).encode("utf-8") -def iterencode_pretty_printed_json(json_object): +def iterencode_pretty_printed_json(json_object: object) -> Generator[bytes, None, None]: """Encodes the JSON object dict as human readable UTF-8 bytes. Args: @@ -132,7 +152,7 @@ def iterencode_pretty_printed_json(json_object): # # Note that it seems performance is on par or better using json from the # standard library as of Python 3.7. - import simplejson as json + import simplejson as json # type: ignore[no-redef] # Set the JSON library to the backwards compatible version. set_json_library(json) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..834a677 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ +[tool.mypy] +show_error_codes = true +strict = true + +files = ["."] +exclude = "setup.py" +#mypy_path = "stubs" + +#[[tool.mypy.overrides]] +#module = [ +# "idna", +# "netaddr", +# "prometheus_client", +# "signedjson.*", +# "sortedcontainers", +#] +#ignore_missing_imports = true + diff --git a/setup.py b/setup.py index ed59aca..fec43db 100755 --- a/setup.py +++ b/setup.py @@ -49,6 +49,8 @@ def exec_file(path_segments, name): # simplerjson versions before 3.14.0 had a bug with some characters # (e.g. \u2028) if ensure_ascii was set to false. "simplejson>=3.14.0", + # typing.Protocol was only added to the stdlib in Python 3.8 + "typing_extensions>=4.0.0; python_version < '3.8'", ], extras_require={ # frozendict support can be enabled using the `canonicaljson[frozendict]` syntax diff --git a/test_canonicaljson.py b/test_canonicaljson.py index 0e081c0..fc19922 100644 --- a/test_canonicaljson.py +++ b/test_canonicaljson.py @@ -31,7 +31,7 @@ class TestCanonicalJson(unittest.TestCase): - def test_encode_canonical(self): + def test_encode_canonical(self) -> None: self.assertEqual(encode_canonical_json({}), b"{}") # ctrl-chars should be encoded. @@ -68,7 +68,7 @@ def test_encode_canonical(self): # Iteratively encoding should work. self.assertEqual(list(iterencode_canonical_json({})), [b"{}"]) - def test_ascii(self): + def test_ascii(self) -> None: """ Ensure the proper ASCII characters are escaped. @@ -95,10 +95,10 @@ def test_ascii(self): # And other characters are passed unescaped. unescaped = [0x20, 0x21] + list(range(0x23, 0x5C)) + list(range(0x5D, 0x7E)) for c in unescaped: - c = chr(c) - self.assertEqual(encode_canonical_json(c), b'"' + c.encode("ascii") + b'"') + s = chr(c) + self.assertEqual(encode_canonical_json(s), b'"' + s.encode("ascii") + b'"') - def test_encode_pretty_printed(self): + def test_encode_pretty_printed(self) -> None: self.assertEqual(encode_pretty_printed_json({}), b"{}") self.assertEqual(list(iterencode_pretty_printed_json({})), [b"{}"]) @@ -112,7 +112,9 @@ def test_encode_pretty_printed(self): frozendict_type is None, "If `frozendict` is not available, skip test", ) - def test_frozen_dict(self): + def test_frozen_dict(self) -> None: + # For mypy's benefit: + assert frozendict_type is not None self.assertEqual( encode_canonical_json(frozendict_type({"a": 1})), b'{"a":1}', @@ -122,7 +124,7 @@ def test_frozen_dict(self): b'{\n "a": 1\n}', ) - def test_unknown_type(self): + def test_unknown_type(self) -> None: class Unknown(object): pass @@ -133,7 +135,7 @@ class Unknown(object): with self.assertRaises(Exception): encode_pretty_printed_json(unknown_object) - def test_invalid_float_values(self): + def test_invalid_float_values(self) -> None: """Infinity/-Infinity/NaN are not allowed in canonicaljson.""" with self.assertRaises(ValueError): @@ -154,7 +156,7 @@ def test_invalid_float_values(self): with self.assertRaises(ValueError): encode_pretty_printed_json(nan) - def test_set_json(self): + def test_set_json(self) -> None: """Ensure that changing the underlying JSON implementation works.""" mock_json = mock.Mock(spec=["JSONEncoder"]) mock_json.JSONEncoder.return_value.encode.return_value = "sentinel" @@ -163,6 +165,6 @@ def test_set_json(self): self.assertEqual(encode_canonical_json({}), b"sentinel") finally: # Reset the JSON library to whatever was originally set. - from canonicaljson import json + from canonicaljson import json # type: ignore[attr-defined] set_json_library(json) diff --git a/tox.ini b/tox.ini index 2895be7..c87e5f8 100644 --- a/tox.ini +++ b/tox.ini @@ -26,3 +26,11 @@ basepython = python3.7 deps = black==21.9b0 commands = python -m black --check --diff . + +[testenv:mypy] +deps = + mypy==0.942 + types-frozendict==2.0.8 + types-simplejson==3.17.5 + types-setuptools==57.4.14 +commands = mypy From 3ba11e65f3dea30007d7dc9c9d64048b48db4e52 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 21 Apr 2022 15:27:40 +0100 Subject: [PATCH 2/5] Run mypy in CI --- .github/workflows/tests.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 00de968..c4bfa98 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,6 +1,10 @@ name: Tests on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: lint: runs-on: ubuntu-latest @@ -9,6 +13,7 @@ jobs: toxenv: - "pep8" - "black" + - "mypy" steps: - uses: actions/checkout@v2 From aff958eadafa983ed72a34069d2e0682fb6bf850 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 21 Apr 2022 15:36:22 +0100 Subject: [PATCH 3/5] Fix formatting Tox wasn't running black locally grr --- canonicaljson.py | 2 +- tox.ini | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/canonicaljson.py b/canonicaljson.py index c9d3eb4..00a8dc9 100644 --- a/canonicaljson.py +++ b/canonicaljson.py @@ -59,7 +59,7 @@ class JsonLibrary(Protocol): # Declare these in the module scope, but they get configured in # set_json_library. _canonical_encoder: Encoder = None # type: ignore[assignment] -_pretty_encoder: Encoder = None # type: ignore[assignment] +_pretty_encoder: Encoder = None # type: ignore[assignment] def set_json_library(json_lib: JsonLibrary) -> None: diff --git a/tox.ini b/tox.ini index c87e5f8..00ae34c 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = packaging, pep8, py37, py38, py39, py310, pypy3 +envlist = packaging, pep8, black, py37, py38, py39, py310, pypy3 [testenv] deps = @@ -25,6 +25,8 @@ commands = flake8 . basepython = python3.7 deps = black==21.9b0 + # Workaround black+click incompatability, see https://github.com/psf/black/issues/2964 + click==8.0.4 commands = python -m black --check --diff . [testenv:mypy] From 2411f2f8d5dfe8767036d1aa99eb8a4677070631 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 22 Apr 2022 12:12:57 +0100 Subject: [PATCH 4/5] Remove commented out mypy config --- pyproject.toml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 834a677..bd1d867 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,15 +4,3 @@ strict = true files = ["."] exclude = "setup.py" -#mypy_path = "stubs" - -#[[tool.mypy.overrides]] -#module = [ -# "idna", -# "netaddr", -# "prometheus_client", -# "signedjson.*", -# "sortedcontainers", -#] -#ignore_missing_imports = true - From ba77219a287330185cf6a7650c6e410c330fe11d Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 22 Apr 2022 13:18:10 +0100 Subject: [PATCH 5/5] Rework docstrings The data to be encoded doesn't have to be a dictionary. --- canonicaljson.py | 61 ++++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/canonicaljson.py b/canonicaljson.py index 00a8dc9..4f52317 100644 --- a/canonicaljson.py +++ b/canonicaljson.py @@ -89,55 +89,44 @@ def set_json_library(json_lib: JsonLibrary) -> None: ) -def encode_canonical_json(json_object: object) -> bytes: - """Encodes the shortest UTF-8 JSON encoding with dictionary keys - lexicographically sorted by unicode code point. - - Args: - json_object (dict): The JSON object to encode. +def encode_canonical_json(data: object) -> bytes: + """Encodes the given `data` as a UTF-8 canonical JSON bytestring. - Returns: - bytes encoding the JSON object""" - s = _canonical_encoder.encode(json_object) + This encoding is the shortest possible. Dictionary keys are + lexicographically sorted by unicode code point. + """ + s = _canonical_encoder.encode(data) return s.encode("utf-8") -def iterencode_canonical_json(json_object: object) -> Generator[bytes, None, None]: - """Encodes the shortest UTF-8 JSON encoding with dictionary keys - lexicographically sorted by unicode code point. - - Args: - json_object (dict): The JSON object to encode. +def iterencode_canonical_json(data: object) -> Generator[bytes, None, None]: + """Iteratively encodes the given `data` as a UTF-8 canonical JSON bytestring. - Returns: - generator which yields bytes encoding the JSON object""" - for chunk in _canonical_encoder.iterencode(json_object): - yield chunk.encode("utf-8") + This yields one or more bytestrings; concatenating them all together yields the + full encoding of `data`. Building up the encoding gradually in this way allows us to + encode large pieces of `data` without blocking other tasks. - -def encode_pretty_printed_json(json_object: object) -> bytes: + This encoding is the shortest possible. Dictionary keys are + lexicographically sorted by unicode code point. """ - Encodes the JSON object dict as human readable UTF-8 bytes. - - Args: - json_object (dict): The JSON object to encode. - - Returns: - bytes encoding the JSON object""" + for chunk in _canonical_encoder.iterencode(data): + yield chunk.encode("utf-8") - return _pretty_encoder.encode(json_object).encode("utf-8") +def encode_pretty_printed_json(data: object) -> bytes: + """Encodes the given `data` as a UTF-8 human-readable JSON bytestring.""" -def iterencode_pretty_printed_json(json_object: object) -> Generator[bytes, None, None]: - """Encodes the JSON object dict as human readable UTF-8 bytes. + return _pretty_encoder.encode(data).encode("utf-8") - Args: - json_object (dict): The JSON object to encode. - Returns: - generator which yields bytes encoding the JSON object""" +def iterencode_pretty_printed_json(data: object) -> Generator[bytes, None, None]: + """Iteratively encodes the given `data` as a UTF-8 human-readable JSON bytestring. - for chunk in _pretty_encoder.iterencode(json_object): + This yields one or more bytestrings; concatenating them all together yields the + full encoding of `data`. Building up the encoding gradually in this way allows us to + encode large pieces of `data` without blocking other tasks. + """ + for chunk in _pretty_encoder.iterencode(data): yield chunk.encode("utf-8")