Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

mypy plugin to check @cached return types #14911

Merged
merged 44 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from 42 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
eb24cdb
WIP mypy plugin to check `@cached` return types
Jan 25, 2023
d2cbac3
Whoops, Sequence is immutable
Jan 31, 2023
90631ac
WIP
Jan 31, 2023
4eb7de9
Merge remote-tracking branch 'origin/develop' into dmr/mypy-check-at-…
clokep Sep 12, 2023
ad1c28a
Update comments.
clokep Sep 12, 2023
88323dd
Simplify code due to other knowledge.
clokep Sep 12, 2023
676c858
Treat containers more similarly.
clokep Sep 12, 2023
008ef3f
cachedList wraps Mapping
clokep Sep 12, 2023
8aa4e87
Fix-up errors in tests.
clokep Sep 13, 2023
0f3c036
Ignore a few calls which purposefully (?) return mutable objects.
clokep Sep 13, 2023
9c94574
Data exfilitration is read-only and update admin APIs.
clokep Sep 13, 2023
f5fec7f
Update account_data & tags methods to be immutable.
clokep Sep 13, 2023
9a62053
FIx-up push related caching.
clokep Sep 13, 2023
cc61862
Update filtering to return immutable objects.
clokep Sep 13, 2023
a27a67f
Update relations with immutable.
clokep Sep 13, 2023
8f7f4d7
Update receipts code.
clokep Sep 13, 2023
9fdc5a1
Update e2e keys & devices.
clokep Sep 13, 2023
d8cce5b
Update appservice stuff.
clokep Sep 13, 2023
ef61f3d
Ensure current hosts is immutable.
clokep Sep 13, 2023
96faa34
Properly check attrs for frozen-ness.
clokep Sep 13, 2023
2f75929
Kick CI
clokep Sep 14, 2023
7849b26
Merge remote-tracking branch 'origin/develop' into dmr/mypy-check-at-…
clokep Sep 14, 2023
8b1b15b
Fix-up return value of get_latest_event_ids_in_room.
clokep Sep 14, 2023
451c9b1
Revert "Kick CI"
clokep Sep 14, 2023
d52e30c
Newsfragment
clokep Sep 14, 2023
47b7ba7
FIx-up sync changes.
clokep Sep 14, 2023
ee77d82
Merge remote-tracking branch 'origin/develop' into dmr/mypy-check-at-…
clokep Sep 18, 2023
0f02ad1
Merge remote-tracking branch 'origin/develop' into dmr/mypy-check-at-…
clokep Sep 19, 2023
51a1a5f
Merge remote-tracking branch 'origin/develop' into dmr/mypy-check-at-…
clokep Sep 20, 2023
07c4531
Merge branch 'develop' into dmr/mypy-check-at-cached
clokep Sep 25, 2023
745ad61
Correct context
erikjohnston Sep 29, 2023
03b0e40
Remove ignores for call-sites.
clokep Sep 29, 2023
2c07b1f
Merge remote-tracking branch 'origin/develop' into dmr/mypy-check-at-…
clokep Sep 29, 2023
460ed3c
Add ignores at definition sites.
clokep Sep 29, 2023
fbecb56
Actually check cachedList.
clokep Sep 29, 2023
dba8e72
Fix incorrect generic.
clokep Sep 29, 2023
4f06d85
Lint
clokep Sep 29, 2023
3875662
Abstract shared code.
clokep Sep 29, 2023
fb4ff5d
ServerAclEvaluator is immutable.
clokep Sep 29, 2023
06ddf65
Update comments.
clokep Sep 29, 2023
9b7ee03
Lint
clokep Sep 29, 2023
e2f599d
Update comments and remove unnecessary argument.
clokep Oct 2, 2023
da377cf
Merge remote-tracking branch 'origin/develop' into dmr/mypy-check-at-…
clokep Oct 2, 2023
a2956a6
Fix duplicate word.
clokep Oct 2, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/14911.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve type hints.
289 changes: 251 additions & 38 deletions scripts-dev/mypy_synapse_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,24 @@
can crop up, e.g the cache descriptors.
"""

from typing import Callable, Optional, Type
from typing import Callable, Optional, Tuple, Type, Union

import mypy.types
from mypy.erasetype import remove_instance_last_known_values
from mypy.nodes import ARG_NAMED_OPT
from mypy.plugin import MethodSigContext, Plugin
from mypy.errorcodes import ErrorCode
from mypy.nodes import ARG_NAMED_OPT, TempNode, Var
from mypy.plugin import FunctionSigContext, MethodSigContext, Plugin
from mypy.typeops import bind_self
from mypy.types import CallableType, Instance, NoneType, UnionType
from mypy.types import (
AnyType,
CallableType,
Instance,
NoneType,
TupleType,
TypeAliasType,
UninhabitedType,
UnionType,
)


class SynapsePlugin(Plugin):
Expand All @@ -36,9 +47,37 @@ def get_method_signature_hook(
)
):
return cached_function_method_signature

if fullname in (
"synapse.util.caches.descriptors._CachedFunctionDescriptor.__call__",
"synapse.util.caches.descriptors._CachedListFunctionDescriptor.__call__",
):
return check_is_cacheable_wrapper

return None


def _get_true_return_type(signature: CallableType) -> mypy.types.Type:
"""
Get the "final" return type of a callable which might return returned an Awaitable/Deferred.
clokep marked this conversation as resolved.
Show resolved Hide resolved
"""
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aside: If we have function returning e.g. Deferred[Deferred[T]], it looks like this function doesn't loop and extract the inner T. But we shouldn't be doing that in the first place.

(Would such a type even be legitimate?)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Would such a type even be legitimate?)

I think it wouldn't make sense? Please don't do that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure that's even really doable since a Deferred returning a Deferred will wait on the inner Deferred? https://docs.twistedmatrix.com/en/stable/core/howto/defer.html#chaining-deferreds

Anyway, that interaction always confuses me and I'm willing to not support it.

if isinstance(signature.ret_type, Instance):
# If a coroutine, unwrap the coroutine's return type.
if signature.ret_type.type.fullname == "typing.Coroutine":
return signature.ret_type.args[2]

# If an awaitable, unwrap the awaitable's final value.
elif signature.ret_type.type.fullname == "typing.Awaitable":
return signature.ret_type.args[0]

# If a Deferred, unwrap the Deferred's final value.
elif signature.ret_type.type.fullname == "twisted.internet.defer.Deferred":
return signature.ret_type.args[0]

# Otherwise, return the raw value of the function.
return signature.ret_type


def cached_function_method_signature(ctx: MethodSigContext) -> CallableType:
"""Fixes the `CachedFunction.__call__` signature to be correct.

Expand All @@ -47,16 +86,17 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType:
1. the `self` argument needs to be marked as "bound";
2. any `cache_context` argument should be removed;
3. an optional keyword argument `on_invalidated` should be added.
4. Wrap the return type to always be a Deferred.
"""

# First we mark this as a bound function signature.
signature = bind_self(ctx.default_signature)
# 1. Mark this as a bound function signature.
signature: CallableType = bind_self(ctx.default_signature)

# Secondly, we remove any "cache_context" args.
# 2. Remove any "cache_context" args.
#
# Note: We should be only doing this if `cache_context=True` is set, but if
# it isn't then the code will raise an exception when its called anyway, so
# its not the end of the world.
# it's not the end of the world.
Comment on lines -59 to +99
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

<3

context_arg_index = None
for idx, name in enumerate(signature.arg_names):
if name == "cache_context":
Expand All @@ -72,7 +112,7 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType:
arg_names.pop(context_arg_index)
arg_kinds.pop(context_arg_index)

# Third, we add an optional "on_invalidate" argument.
# 3. Add an optional "on_invalidate" argument.
#
# This is a either
# - a callable which accepts no input and returns nothing, or
Expand All @@ -94,35 +134,16 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType:
arg_names.append("on_invalidate")
arg_kinds.append(ARG_NAMED_OPT) # Arg is an optional kwarg.

# Finally we ensure the return type is a Deferred.
if (
isinstance(signature.ret_type, Instance)
and signature.ret_type.type.fullname == "twisted.internet.defer.Deferred"
):
# If it is already a Deferred, nothing to do.
ret_type = signature.ret_type
else:
ret_arg = None
if isinstance(signature.ret_type, Instance):
# If a coroutine, wrap the coroutine's return type in a Deferred.
if signature.ret_type.type.fullname == "typing.Coroutine":
ret_arg = signature.ret_type.args[2]

# If an awaitable, wrap the awaitable's final value in a Deferred.
elif signature.ret_type.type.fullname == "typing.Awaitable":
ret_arg = signature.ret_type.args[0]

# Otherwise, wrap the return value in a Deferred.
if ret_arg is None:
ret_arg = signature.ret_type

# This should be able to use ctx.api.named_generic_type, but that doesn't seem
# to find the correct symbol for anything more than 1 module deep.
#
# modules is not part of CheckerPluginInterface. The following is a combination
# of TypeChecker.named_generic_type and TypeChecker.lookup_typeinfo.
sym = ctx.api.modules["twisted.internet.defer"].names.get("Deferred") # type: ignore[attr-defined]
ret_type = Instance(sym.node, [remove_instance_last_known_values(ret_arg)])
# 4. Ensure the return type is a Deferred.
ret_arg = _get_true_return_type(signature)

# This should be able to use ctx.api.named_generic_type, but that doesn't seem
# to find the correct symbol for anything more than 1 module deep.
#
# modules is not part of CheckerPluginInterface. The following is a combination
# of TypeChecker.named_generic_type and TypeChecker.lookup_typeinfo.
sym = ctx.api.modules["twisted.internet.defer"].names.get("Deferred") # type: ignore[attr-defined]
ret_type = Instance(sym.node, [remove_instance_last_known_values(ret_arg)])

signature = signature.copy_modified(
arg_types=arg_types,
Expand All @@ -134,6 +155,198 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType:
return signature


def check_is_cacheable_wrapper(ctx: MethodSigContext) -> CallableType:
clokep marked this conversation as resolved.
Show resolved Hide resolved
"""Asserts that the signature of a method returns a value which can be cached.

Makes no changes to the provided method signature.
Comment on lines +159 to +161
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a sentence explaining why there is a wrapper? It looks like the point is mainly to check that we have a proper CallableType(?)

OTOH if it's just "to have smaller functions" then let's leave it as is.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OTOH if it's just "to have smaller functions" then let's leave it as is.

I almost inlined it, but it seemed nice to keep them separate.

"""
# The true signature, this isn't being modified so this is what will be returned.
signature: CallableType = ctx.default_signature

if not isinstance(ctx.args[0][0], TempNode):
ctx.api.note("Cached function is not a TempNode?!", ctx.context) # type: ignore[attr-defined]
return signature

orig_sig = ctx.args[0][0].type
if not isinstance(orig_sig, CallableType):
ctx.api.fail("Cached 'function' is not a callable", ctx.context)
return signature

check_is_cacheable(orig_sig, ctx)

return signature


def check_is_cacheable(
signature: CallableType,
ctx: Union[MethodSigContext, FunctionSigContext],
) -> None:
"""
Check if a callable returns a type which can be cached.

Args:
signature: The callable to check.
ctx: The signature context, used for error reporting.
"""
# Unwrap the true return type from the cached function.
return_type = _get_true_return_type(signature)

verbose = ctx.api.options.verbosity >= 1
# TODO Technically a cachedList only needs immutable values, but forcing them
# to return Mapping instead of Dict is fine.
ok, note = is_cacheable(return_type, signature, verbose)

if ok:
message = f"function {signature.name} is @cached, returning {return_type}"
else:
message = f"function {signature.name} is @cached, but has mutable return value {return_type}"

if note:
message += f" ({note})"
message = message.replace("builtins.", "").replace("typing.", "")

if ok and note:
ctx.api.note(message, ctx.context) # type: ignore[attr-defined]
elif not ok:
ctx.api.fail(message, ctx.context, code=AT_CACHED_MUTABLE_RETURN)


# Immutable simple values.
IMMUTABLE_VALUE_TYPES = {
"builtins.bool",
"builtins.int",
"builtins.float",
"builtins.str",
"builtins.bytes",
}

# Types defined in Synapse which are known to be immutable.
IMMUTABLE_CUSTOM_TYPES = {
"synapse.synapse_rust.acl.ServerAclEvaluator",
"synapse.synapse_rust.push.FilteredPushRules",
# This is technically not immutable, but close enough.
"signedjson.types.VerifyKey",
}

# Immutable containers only if the values are also immutable.
IMMUTABLE_CONTAINER_TYPES_REQUIRING_IMMUTABLE_ELEMENTS = {
"builtins.frozenset",
"builtins.tuple",
"typing.AbstractSet",
"typing.Sequence",
"immutabledict.immutabledict",
}

MUTABLE_CONTAINER_TYPES = {
"builtins.set",
"builtins.list",
"builtins.dict",
}

AT_CACHED_MUTABLE_RETURN = ErrorCode(
"synapse-@cached-mutable",
"@cached() should have an immutable return type",
"General",
)


def is_cacheable(
rt: mypy.types.Type, signature: CallableType, verbose: bool
) -> Tuple[bool, Optional[str]]:
"""
Check if a particular type is cachable.

A type is cachable if it is immutable; for complex types this recurses to
check each type parameter.

Returns: a 2-tuple (cacheable, message).
- cachable: False means the type is definitely not cacheable;
true means anything else.
- Optional message.
"""

# This should probably be done via a TypeVisitor. Apologies to the reader!
if isinstance(rt, AnyType):
return True, ("may be mutable" if verbose else None)

elif isinstance(rt, Instance):
if (
rt.type.fullname in IMMUTABLE_VALUE_TYPES
or rt.type.fullname in IMMUTABLE_CUSTOM_TYPES
):
# "Simple" types are generally immutable.
return True, None

elif rt.type.fullname == "typing.Mapping":
# Generally mapping keys are immutable, but they only *have* to be
# hashable, which doesn't imply immutability. E.g. Mapping[K, V]
# is cachable iff K and V are cachable.
return is_cacheable(rt.args[0], signature, verbose) and is_cacheable(
rt.args[1], signature, verbose
)

elif rt.type.fullname in IMMUTABLE_CONTAINER_TYPES_REQUIRING_IMMUTABLE_ELEMENTS:
# E.g. Collection[T] is cachable iff T is cachable.
return is_cacheable(rt.args[0], signature, verbose)

elif rt.type.fullname in MUTABLE_CONTAINER_TYPES:
# Mutable containers are mutable regardless of their underlying type.
return False, None

elif "attrs" in rt.type.metadata:
# attrs classes are only cachable iff it is frozen (immutable itself)
# and all attributes are cachable.
frozen = rt.type.metadata["attrs"]["frozen"]
if frozen:
for attribute in rt.type.metadata["attrs"]["attributes"]:
attribute_name = attribute["name"]
symbol_node = rt.type.names[attribute_name].node
assert isinstance(symbol_node, Var)
assert symbol_node.type is not None
ok, note = is_cacheable(symbol_node.type, signature, verbose)
if not ok:
return False, f"non-frozen attrs property: {attribute_name}"
# All attributes were frozen.
return True, None
else:
return False, "non-frozen attrs class"

else:
# Ensure we fail for unknown types, these generally means that the
# above code is not complete.
return (
False,
f"Don't know how to handle {rt.type.fullname} return type instance",
)

elif isinstance(rt, NoneType):
# None is cachable.
return True, None

elif isinstance(rt, (TupleType, UnionType)):
# Tuples and unions are cachable iff all their items are cachable.
for item in rt.items:
ok, note = is_cacheable(item, signature, verbose)
if not ok:
return False, note
# This discards notes but that's probably fine
return True, None

elif isinstance(rt, TypeAliasType):
# For a type alias, check if the underlying real type is cachable.
return is_cacheable(mypy.types.get_proper_type(rt), signature, verbose)

elif isinstance(rt, UninhabitedType) and rt.is_noreturn:
# There is no return value, just consider it cachable. This is only used
# in tests.
return True, None

else:
# Ensure we fail for unknown types, these generally means that the
# above code is not complete.
return False, f"Don't know how to handle {type(rt).__qualname__} return type"


def plugin(version: str) -> Type[SynapsePlugin]:
# This is the entry point of the plugin, and lets us deal with the fact
# that the mypy plugin interface is *not* stable by looking at the version
Expand Down
4 changes: 2 additions & 2 deletions synapse/handlers/room_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
RequestSendFailed,
SynapseError,
)
from synapse.types import JsonDict, ThirdPartyInstanceID
from synapse.types import JsonDict, JsonMapping, ThirdPartyInstanceID
from synapse.util.caches.descriptors import _CacheContext, cached
from synapse.util.caches.response_cache import ResponseCache

Expand Down Expand Up @@ -256,7 +256,7 @@ async def generate_room_entry(
cache_context: _CacheContext,
with_alias: bool = True,
allow_private: bool = False,
) -> Optional[JsonDict]:
) -> Optional[JsonMapping]:
"""Returns the entry for a room

Args:
Expand Down
7 changes: 4 additions & 3 deletions synapse/storage/databases/main/event_push_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ class UserPushAction(EmailPushAction):
profile_tag: str


# TODO This is used as a cached value and is mutable.
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved
@attr.s(slots=True, auto_attribs=True)
class NotifCounts:
"""
Expand All @@ -193,15 +194,15 @@ class NotifCounts:
highlight_count: int = 0


@attr.s(slots=True, auto_attribs=True)
@attr.s(slots=True, frozen=True, auto_attribs=True)
class RoomNotifCounts:
"""
The per-user, per-room count of notifications. Used by sync and push.
"""

main_timeline: NotifCounts
# Map of thread ID to the notification counts.
threads: Dict[str, NotifCounts]
threads: Mapping[str, NotifCounts]

@staticmethod
def empty() -> "RoomNotifCounts":
Expand Down Expand Up @@ -483,7 +484,7 @@ def _get_unread_counts_by_room_for_user_txn(

return room_to_count

@cached(tree=True, max_entries=5000, iterable=True)
@cached(tree=True, max_entries=5000, iterable=True) # type: ignore[synapse-@cached-mutable]
async def get_unread_event_push_actions_by_room_for_user(
self,
room_id: str,
Expand Down
Loading