From 16a9335cd1a8a0f6b6fe72655fbf2291caf5f53f Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 21 Apr 2022 14:24:17 +0100
Subject: [PATCH 1/5] Type annotations for canonicaljson

---
 MANIFEST.in           |  1 +
 canonicaljson.py      | 44 +++++++++++++++++++++++++++++++------------
 pyproject.toml        | 18 ++++++++++++++++++
 setup.py              |  2 ++
 test_canonicaljson.py | 22 ++++++++++++----------
 tox.ini               |  8 ++++++++
 6 files changed, 73 insertions(+), 22 deletions(-)
 create mode 100644 pyproject.toml

diff --git a/MANIFEST.in b/MANIFEST.in
index 0ac1524..63f6c5b 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,5 +3,6 @@ include *.py
 include *.md
 include LICENSE
 include tox.ini
+include pyproject.toml
 prune .travis
 prune debian
diff --git a/canonicaljson.py b/canonicaljson.py
index 7841247..c9d3eb4 100644
--- a/canonicaljson.py
+++ b/canonicaljson.py
@@ -16,9 +16,14 @@
 # limitations under the License.
 
 import platform
-from typing import Optional, Type
+from typing import Any, Generator, Optional, Type
 
-frozendict_type: Optional[Type]
+try:
+    from typing import Protocol
+except ImportError:  # pragma: no cover
+    from typing_extensions import Protocol  # type: ignore[misc]
+
+frozendict_type: Optional[Type[Any]]
 try:
     from frozendict import frozendict as frozendict_type
 except ImportError:
@@ -27,22 +32,37 @@
 __version__ = "1.6.0"
 
 
-def _default(obj):  # pragma: no cover
+def _default(obj: object) -> object:  # pragma: no cover
     if type(obj) is frozendict_type:
         # If frozendict is available and used, cast `obj` into a dict
-        return dict(obj)
+        return dict(obj)  # type: ignore[call-overload]
     raise TypeError(
         "Object of type %s is not JSON serializable" % obj.__class__.__name__
     )
 
 
+class Encoder(Protocol):  # pragma: no cover
+    def encode(self, data: object) -> str:
+        pass
+
+    def iterencode(self, data: object) -> Generator[str, None, None]:
+        pass
+
+    def __call__(self, *args: Any, **kwargs: Any) -> "Encoder":
+        pass
+
+
+class JsonLibrary(Protocol):
+    JSONEncoder: Encoder
+
+
 # Declare these in the module scope, but they get configured in
 # set_json_library.
-_canonical_encoder = None
-_pretty_encoder = None
+_canonical_encoder: Encoder = None  # type: ignore[assignment]
+_pretty_encoder: Encoder = None   # type: ignore[assignment]
 
 
-def set_json_library(json_lib):
+def set_json_library(json_lib: JsonLibrary) -> None:
     """
     Set the underlying JSON library that canonicaljson uses to json_lib.
 
@@ -69,7 +89,7 @@ def set_json_library(json_lib):
     )
 
 
-def encode_canonical_json(json_object):
+def encode_canonical_json(json_object: object) -> bytes:
     """Encodes the shortest UTF-8 JSON encoding with dictionary keys
     lexicographically sorted by unicode code point.
 
@@ -82,7 +102,7 @@ def encode_canonical_json(json_object):
     return s.encode("utf-8")
 
 
-def iterencode_canonical_json(json_object):
+def iterencode_canonical_json(json_object: object) -> Generator[bytes, None, None]:
     """Encodes the shortest UTF-8 JSON encoding with dictionary keys
     lexicographically sorted by unicode code point.
 
@@ -95,7 +115,7 @@ def iterencode_canonical_json(json_object):
         yield chunk.encode("utf-8")
 
 
-def encode_pretty_printed_json(json_object):
+def encode_pretty_printed_json(json_object: object) -> bytes:
     """
     Encodes the JSON object dict as human readable UTF-8 bytes.
 
@@ -108,7 +128,7 @@ def encode_pretty_printed_json(json_object):
     return _pretty_encoder.encode(json_object).encode("utf-8")
 
 
-def iterencode_pretty_printed_json(json_object):
+def iterencode_pretty_printed_json(json_object: object) -> Generator[bytes, None, None]:
     """Encodes the JSON object dict as human readable UTF-8 bytes.
 
     Args:
@@ -132,7 +152,7 @@ def iterencode_pretty_printed_json(json_object):
     #
     # Note that it seems performance is on par or better using json from the
     # standard library as of Python 3.7.
-    import simplejson as json
+    import simplejson as json  # type: ignore[no-redef]
 
 # Set the JSON library to the backwards compatible version.
 set_json_library(json)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..834a677
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,18 @@
+[tool.mypy]
+show_error_codes = true
+strict = true
+
+files = ["."]
+exclude = "setup.py"
+#mypy_path = "stubs"
+
+#[[tool.mypy.overrides]]
+#module = [
+#    "idna",
+#    "netaddr",
+#    "prometheus_client",
+#    "signedjson.*",
+#    "sortedcontainers",
+#]
+#ignore_missing_imports = true
+
diff --git a/setup.py b/setup.py
index ed59aca..fec43db 100755
--- a/setup.py
+++ b/setup.py
@@ -49,6 +49,8 @@ def exec_file(path_segments, name):
         # simplerjson versions before 3.14.0 had a bug with some characters
         # (e.g. \u2028) if ensure_ascii was set to false.
         "simplejson>=3.14.0",
+        # typing.Protocol was only added to the stdlib in Python 3.8
+        "typing_extensions>=4.0.0; python_version < '3.8'",
     ],
     extras_require={
         # frozendict support can be enabled using the `canonicaljson[frozendict]` syntax
diff --git a/test_canonicaljson.py b/test_canonicaljson.py
index 0e081c0..fc19922 100644
--- a/test_canonicaljson.py
+++ b/test_canonicaljson.py
@@ -31,7 +31,7 @@
 
 
 class TestCanonicalJson(unittest.TestCase):
-    def test_encode_canonical(self):
+    def test_encode_canonical(self) -> None:
         self.assertEqual(encode_canonical_json({}), b"{}")
 
         # ctrl-chars should be encoded.
@@ -68,7 +68,7 @@ def test_encode_canonical(self):
         # Iteratively encoding should work.
         self.assertEqual(list(iterencode_canonical_json({})), [b"{}"])
 
-    def test_ascii(self):
+    def test_ascii(self) -> None:
         """
         Ensure the proper ASCII characters are escaped.
 
@@ -95,10 +95,10 @@ def test_ascii(self):
         # And other characters are passed unescaped.
         unescaped = [0x20, 0x21] + list(range(0x23, 0x5C)) + list(range(0x5D, 0x7E))
         for c in unescaped:
-            c = chr(c)
-            self.assertEqual(encode_canonical_json(c), b'"' + c.encode("ascii") + b'"')
+            s = chr(c)
+            self.assertEqual(encode_canonical_json(s), b'"' + s.encode("ascii") + b'"')
 
-    def test_encode_pretty_printed(self):
+    def test_encode_pretty_printed(self) -> None:
         self.assertEqual(encode_pretty_printed_json({}), b"{}")
         self.assertEqual(list(iterencode_pretty_printed_json({})), [b"{}"])
 
@@ -112,7 +112,9 @@ def test_encode_pretty_printed(self):
         frozendict_type is None,
         "If `frozendict` is not available, skip test",
     )
-    def test_frozen_dict(self):
+    def test_frozen_dict(self) -> None:
+        # For mypy's benefit:
+        assert frozendict_type is not None
         self.assertEqual(
             encode_canonical_json(frozendict_type({"a": 1})),
             b'{"a":1}',
@@ -122,7 +124,7 @@ def test_frozen_dict(self):
             b'{\n    "a": 1\n}',
         )
 
-    def test_unknown_type(self):
+    def test_unknown_type(self) -> None:
         class Unknown(object):
             pass
 
@@ -133,7 +135,7 @@ class Unknown(object):
         with self.assertRaises(Exception):
             encode_pretty_printed_json(unknown_object)
 
-    def test_invalid_float_values(self):
+    def test_invalid_float_values(self) -> None:
         """Infinity/-Infinity/NaN are not allowed in canonicaljson."""
 
         with self.assertRaises(ValueError):
@@ -154,7 +156,7 @@ def test_invalid_float_values(self):
         with self.assertRaises(ValueError):
             encode_pretty_printed_json(nan)
 
-    def test_set_json(self):
+    def test_set_json(self) -> None:
         """Ensure that changing the underlying JSON implementation works."""
         mock_json = mock.Mock(spec=["JSONEncoder"])
         mock_json.JSONEncoder.return_value.encode.return_value = "sentinel"
@@ -163,6 +165,6 @@ def test_set_json(self):
             self.assertEqual(encode_canonical_json({}), b"sentinel")
         finally:
             # Reset the JSON library to whatever was originally set.
-            from canonicaljson import json
+            from canonicaljson import json  # type: ignore[attr-defined]
 
             set_json_library(json)
diff --git a/tox.ini b/tox.ini
index 2895be7..c87e5f8 100644
--- a/tox.ini
+++ b/tox.ini
@@ -26,3 +26,11 @@ basepython = python3.7
 deps =
     black==21.9b0
 commands = python -m black --check --diff .
+
+[testenv:mypy]
+deps =
+    mypy==0.942
+    types-frozendict==2.0.8
+    types-simplejson==3.17.5
+    types-setuptools==57.4.14
+commands = mypy

From 3ba11e65f3dea30007d7dc9c9d64048b48db4e52 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 21 Apr 2022 15:27:40 +0100
Subject: [PATCH 2/5] Run mypy in CI

---
 .github/workflows/tests.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 00de968..c4bfa98 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -1,6 +1,10 @@
 name: Tests
 on: [push, pull_request]
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
   lint:
     runs-on: ubuntu-latest
@@ -9,6 +13,7 @@ jobs:
         toxenv:
           - "pep8"
           - "black"
+          - "mypy"
 
     steps:
       - uses: actions/checkout@v2

From aff958eadafa983ed72a34069d2e0682fb6bf850 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Thu, 21 Apr 2022 15:36:22 +0100
Subject: [PATCH 3/5] Fix formatting

Tox wasn't running black locally grr
---
 canonicaljson.py | 2 +-
 tox.ini          | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/canonicaljson.py b/canonicaljson.py
index c9d3eb4..00a8dc9 100644
--- a/canonicaljson.py
+++ b/canonicaljson.py
@@ -59,7 +59,7 @@ class JsonLibrary(Protocol):
 # Declare these in the module scope, but they get configured in
 # set_json_library.
 _canonical_encoder: Encoder = None  # type: ignore[assignment]
-_pretty_encoder: Encoder = None   # type: ignore[assignment]
+_pretty_encoder: Encoder = None  # type: ignore[assignment]
 
 
 def set_json_library(json_lib: JsonLibrary) -> None:
diff --git a/tox.ini b/tox.ini
index c87e5f8..00ae34c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = packaging, pep8, py37, py38, py39, py310, pypy3
+envlist = packaging, pep8, black, py37, py38, py39, py310, pypy3
 
 [testenv]
 deps =
@@ -25,6 +25,8 @@ commands = flake8 .
 basepython = python3.7
 deps =
     black==21.9b0
+    # Workaround black+click incompatability, see https://github.com/psf/black/issues/2964
+    click==8.0.4
 commands = python -m black --check --diff .
 
 [testenv:mypy]

From 2411f2f8d5dfe8767036d1aa99eb8a4677070631 Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 22 Apr 2022 12:12:57 +0100
Subject: [PATCH 4/5] Remove commented out mypy config

---
 pyproject.toml | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 834a677..bd1d867 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,15 +4,3 @@ strict = true
 
 files = ["."]
 exclude = "setup.py"
-#mypy_path = "stubs"
-
-#[[tool.mypy.overrides]]
-#module = [
-#    "idna",
-#    "netaddr",
-#    "prometheus_client",
-#    "signedjson.*",
-#    "sortedcontainers",
-#]
-#ignore_missing_imports = true
-

From ba77219a287330185cf6a7650c6e410c330fe11d Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Fri, 22 Apr 2022 13:18:10 +0100
Subject: [PATCH 5/5] Rework docstrings

The data to be encoded doesn't have to be a dictionary.
---
 canonicaljson.py | 61 ++++++++++++++++++++----------------------------
 1 file changed, 25 insertions(+), 36 deletions(-)

diff --git a/canonicaljson.py b/canonicaljson.py
index 00a8dc9..4f52317 100644
--- a/canonicaljson.py
+++ b/canonicaljson.py
@@ -89,55 +89,44 @@ def set_json_library(json_lib: JsonLibrary) -> None:
     )
 
 
-def encode_canonical_json(json_object: object) -> bytes:
-    """Encodes the shortest UTF-8 JSON encoding with dictionary keys
-    lexicographically sorted by unicode code point.
-
-    Args:
-        json_object (dict): The JSON object to encode.
+def encode_canonical_json(data: object) -> bytes:
+    """Encodes the given `data` as a UTF-8 canonical JSON bytestring.
 
-    Returns:
-        bytes encoding the JSON object"""
-    s = _canonical_encoder.encode(json_object)
+    This encoding is the shortest possible. Dictionary keys are
+    lexicographically sorted by unicode code point.
+    """
+    s = _canonical_encoder.encode(data)
     return s.encode("utf-8")
 
 
-def iterencode_canonical_json(json_object: object) -> Generator[bytes, None, None]:
-    """Encodes the shortest UTF-8 JSON encoding with dictionary keys
-    lexicographically sorted by unicode code point.
-
-    Args:
-        json_object (dict): The JSON object to encode.
+def iterencode_canonical_json(data: object) -> Generator[bytes, None, None]:
+    """Iteratively encodes the given `data` as a UTF-8 canonical JSON bytestring.
 
-    Returns:
-        generator which yields bytes encoding the JSON object"""
-    for chunk in _canonical_encoder.iterencode(json_object):
-        yield chunk.encode("utf-8")
+    This yields one or more bytestrings; concatenating them all together yields the
+    full encoding of `data`. Building up the encoding gradually in this way allows us to
+    encode large pieces of `data` without blocking other tasks.
 
-
-def encode_pretty_printed_json(json_object: object) -> bytes:
+    This encoding is the shortest possible. Dictionary keys are
+    lexicographically sorted by unicode code point.
     """
-    Encodes the JSON object dict as human readable UTF-8 bytes.
-
-    Args:
-        json_object (dict): The JSON object to encode.
-
-    Returns:
-        bytes encoding the JSON object"""
+    for chunk in _canonical_encoder.iterencode(data):
+        yield chunk.encode("utf-8")
 
-    return _pretty_encoder.encode(json_object).encode("utf-8")
 
+def encode_pretty_printed_json(data: object) -> bytes:
+    """Encodes the given `data` as a UTF-8 human-readable JSON bytestring."""
 
-def iterencode_pretty_printed_json(json_object: object) -> Generator[bytes, None, None]:
-    """Encodes the JSON object dict as human readable UTF-8 bytes.
+    return _pretty_encoder.encode(data).encode("utf-8")
 
-    Args:
-        json_object (dict): The JSON object to encode.
 
-    Returns:
-        generator which yields bytes encoding the JSON object"""
+def iterencode_pretty_printed_json(data: object) -> Generator[bytes, None, None]:
+    """Iteratively encodes the given `data` as a UTF-8 human-readable JSON bytestring.
 
-    for chunk in _pretty_encoder.iterencode(json_object):
+    This yields one or more bytestrings; concatenating them all together yields the
+    full encoding of `data`. Building up the encoding gradually in this way allows us to
+    encode large pieces of `data` without blocking other tasks.
+    """
+    for chunk in _pretty_encoder.iterencode(data):
         yield chunk.encode("utf-8")