matrix-org · DMRobertson · Apr 25, 2022 · Apr 21, 2022 · Apr 21, 2022 · Apr 21, 2022
@@ -1,6 +1,10 @@
 name: Tests
 on: [push, pull_request]
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   lint:
     runs-on: ubuntu-latest
@@ -9,6 +13,7 @@ jobs:
         toxenv:
           - "pep8"
           - "black"
+          - "mypy"
 
     steps:
       - uses: actions/checkout@v2

@@ -3,5 +3,6 @@ include *.py
 include *.md
 include LICENSE
 include tox.ini
+include pyproject.toml
 prune .travis
 prune debian
@@ -16,9 +16,14 @@
 # limitations under the License.
 
 import platform
-from typing import Optional, Type
+from typing import Any, Generator, Optional, Type
 
-frozendict_type: Optional[Type]
+try:
+    from typing import Protocol
+except ImportError:  # pragma: no cover
+    from typing_extensions import Protocol  # type: ignore[misc]
+
+frozendict_type: Optional[Type[Any]]
 try:
     from frozendict import frozendict as frozendict_type
 except ImportError:
@@ -27,22 +32,37 @@
 __version__ = "1.6.0"
 
 
-def _default(obj):  # pragma: no cover
+def _default(obj: object) -> object:  # pragma: no cover
     if type(obj) is frozendict_type:
         # If frozendict is available and used, cast `obj` into a dict
-        return dict(obj)
+        return dict(obj)  # type: ignore[call-overload]
     raise TypeError(
         "Object of type %s is not JSON serializable" % obj.__class__.__name__
     )
 
 
+class Encoder(Protocol):  # pragma: no cover
+    def encode(self, data: object) -> str:
+        pass
+
+    def iterencode(self, data: object) -> Generator[str, None, None]:
+        pass
+
+    def __call__(self, *args: Any, **kwargs: Any) -> "Encoder":
+        pass
+
+
+class JsonLibrary(Protocol):
+    JSONEncoder: Encoder
+
+
 # Declare these in the module scope, but they get configured in
 # set_json_library.
-_canonical_encoder = None
-_pretty_encoder = None
+_canonical_encoder: Encoder = None  # type: ignore[assignment]
+_pretty_encoder: Encoder = None  # type: ignore[assignment]
 
 
-def set_json_library(json_lib):
+def set_json_library(json_lib: JsonLibrary) -> None:
     """
     Set the underlying JSON library that canonicaljson uses to json_lib.
 
@@ -69,55 +89,44 @@ def set_json_library(json_lib):
     )
 
 
-def encode_canonical_json(json_object):
-    """Encodes the shortest UTF-8 JSON encoding with dictionary keys
-    lexicographically sorted by unicode code point.
+def encode_canonical_json(data: object) -> bytes:
+    """Encodes the given `data` as a UTF-8 canonical JSON bytestring.
 
-    Args:
-        json_object (dict): The JSON object to encode.
-
-    Returns:
-        bytes encoding the JSON object"""
-    s = _canonical_encoder.encode(json_object)
-    return s.encode("utf-8")
-
-
-def iterencode_canonical_json(json_object):
-    """Encodes the shortest UTF-8 JSON encoding with dictionary keys
+    This encoding is the shortest possible. Dictionary keys are
     lexicographically sorted by unicode code point.
+    """
+    s = _canonical_encoder.encode(data)
+    return s.encode("utf-8")
 
-    Args:
-        json_object (dict): The JSON object to encode.
 
-    Returns:
-        generator which yields bytes encoding the JSON object"""
-    for chunk in _canonical_encoder.iterencode(json_object):
-        yield chunk.encode("utf-8")
+def iterencode_canonical_json(data: object) -> Generator[bytes, None, None]:
+    """Iteratively encodes the given `data` as a UTF-8 canonical JSON bytestring.
 
+    This yields one or more bytestrings; concatenating them all together yields the
+    full encoding of `data`. Building up the encoding gradually in this way allows us to
+    encode large pieces of `data` without blocking other tasks.
 
-def encode_pretty_printed_json(json_object):
+    This encoding is the shortest possible. Dictionary keys are
+    lexicographically sorted by unicode code point.
     """
-    Encodes the JSON object dict as human readable UTF-8 bytes.
-
-    Args:
-        json_object (dict): The JSON object to encode.
-
-    Returns:
-        bytes encoding the JSON object"""
+    for chunk in _canonical_encoder.iterencode(data):
+        yield chunk.encode("utf-8")
 
-    return _pretty_encoder.encode(json_object).encode("utf-8")
 
+def encode_pretty_printed_json(data: object) -> bytes:
+    """Encodes the given `data` as a UTF-8 human-readable JSON bytestring."""
 
-def iterencode_pretty_printed_json(json_object):
-    """Encodes the JSON object dict as human readable UTF-8 bytes.
+    return _pretty_encoder.encode(data).encode("utf-8")
 
-    Args:
-        json_object (dict): The JSON object to encode.
 
-    Returns:
-        generator which yields bytes encoding the JSON object"""
+def iterencode_pretty_printed_json(data: object) -> Generator[bytes, None, None]:
+    """Iteratively encodes the given `data` as a UTF-8 human-readable JSON bytestring.
 
-    for chunk in _pretty_encoder.iterencode(json_object):
+    This yields one or more bytestrings; concatenating them all together yields the
+    full encoding of `data`. Building up the encoding gradually in this way allows us to
+    encode large pieces of `data` without blocking other tasks.
+    """
+    for chunk in _pretty_encoder.iterencode(data):
         yield chunk.encode("utf-8")
 
 
@@ -132,7 +141,7 @@ def iterencode_pretty_printed_json(json_object):
     #
     # Note that it seems performance is on par or better using json from the
     # standard library as of Python 3.7.
-    import simplejson as json
+    import simplejson as json  # type: ignore[no-redef]
 
 # Set the JSON library to the backwards compatible version.
 set_json_library(json)
@@ -0,0 +1,6 @@
+[tool.mypy]
+show_error_codes = true
+strict = true
+
+files = ["."]
+exclude = "setup.py"
@@ -49,6 +49,8 @@ def exec_file(path_segments, name):
         # simplerjson versions before 3.14.0 had a bug with some characters
         # (e.g. \u2028) if ensure_ascii was set to false.
         "simplejson>=3.14.0",
+        # typing.Protocol was only added to the stdlib in Python 3.8
+        "typing_extensions>=4.0.0; python_version < '3.8'",
     ],
     extras_require={
         # frozendict support can be enabled using the `canonicaljson[frozendict]` syntax

@@ -31,7 +31,7 @@
 
 
 class TestCanonicalJson(unittest.TestCase):
-    def test_encode_canonical(self):
+    def test_encode_canonical(self) -> None:
         self.assertEqual(encode_canonical_json({}), b"{}")
 
         # ctrl-chars should be encoded.
@@ -68,7 +68,7 @@ def test_encode_canonical(self):
         # Iteratively encoding should work.
         self.assertEqual(list(iterencode_canonical_json({})), [b"{}"])
 
-    def test_ascii(self):
+    def test_ascii(self) -> None:
         """
         Ensure the proper ASCII characters are escaped.
 
@@ -95,10 +95,10 @@ def test_ascii(self):
         # And other characters are passed unescaped.
         unescaped = [0x20, 0x21] + list(range(0x23, 0x5C)) + list(range(0x5D, 0x7E))
         for c in unescaped:
-            c = chr(c)
-            self.assertEqual(encode_canonical_json(c), b'"' + c.encode("ascii") + b'"')
+            s = chr(c)
+            self.assertEqual(encode_canonical_json(s), b'"' + s.encode("ascii") + b'"')
 
-    def test_encode_pretty_printed(self):
+    def test_encode_pretty_printed(self) -> None:
         self.assertEqual(encode_pretty_printed_json({}), b"{}")
         self.assertEqual(list(iterencode_pretty_printed_json({})), [b"{}"])
 
@@ -112,7 +112,9 @@ def test_encode_pretty_printed(self):
         frozendict_type is None,
         "If `frozendict` is not available, skip test",
     )
-    def test_frozen_dict(self):
+    def test_frozen_dict(self) -> None:
+        # For mypy's benefit:
+        assert frozendict_type is not None
         self.assertEqual(
             encode_canonical_json(frozendict_type({"a": 1})),
             b'{"a":1}',
@@ -122,7 +124,7 @@ def test_frozen_dict(self):
             b'{\n    "a": 1\n}',
         )
 
-    def test_unknown_type(self):
+    def test_unknown_type(self) -> None:
         class Unknown(object):
             pass
 
@@ -133,7 +135,7 @@ class Unknown(object):
         with self.assertRaises(Exception):
             encode_pretty_printed_json(unknown_object)
 
-    def test_invalid_float_values(self):
+    def test_invalid_float_values(self) -> None:
         """Infinity/-Infinity/NaN are not allowed in canonicaljson."""
 
         with self.assertRaises(ValueError):
@@ -154,7 +156,7 @@ def test_invalid_float_values(self):
         with self.assertRaises(ValueError):
             encode_pretty_printed_json(nan)
 
-    def test_set_json(self):
+    def test_set_json(self) -> None:
         """Ensure that changing the underlying JSON implementation works."""
         mock_json = mock.Mock(spec=["JSONEncoder"])
         mock_json.JSONEncoder.return_value.encode.return_value = "sentinel"
@@ -163,6 +165,6 @@ def test_set_json(self):
             self.assertEqual(encode_canonical_json({}), b"sentinel")
         finally:
             # Reset the JSON library to whatever was originally set.
-            from canonicaljson import json
+            from canonicaljson import json  # type: ignore[attr-defined]
 
             set_json_library(json)
@@ -1,5 +1,5 @@
 [tox]
-envlist = packaging, pep8, py37, py38, py39, py310, pypy3
+envlist = packaging, pep8, black, py37, py38, py39, py310, pypy3
 
 [testenv]
 deps =
@@ -25,4 +25,14 @@ commands = flake8 .
 basepython = python3.7
 deps =
     black==21.9b0
+    # Workaround black+click incompatability, see https://github.com/psf/black/issues/2964
+    click==8.0.4
 commands = python -m black --check --diff .
+
+[testenv:mypy]
+deps =
+    mypy==0.942
+    types-frozendict==2.0.8
+    types-simplejson==3.17.5
+    types-setuptools==57.4.14
+commands = mypy