Skip to content

Commit

Permalink
limit the length of generics._limit_assigned_parameters (#4083)
Browse files Browse the repository at this point in the history
* limit the length of generics._limit_assigned_parameters

* switch to using _limit_cache_size for both

* add change description

* correct `_limit_cache_size` cache

* implemented LimitedDict

* try using UserDict

* try upgrading cython

* stop LimitedDict from inheriting from dict

* separate LimitedDict for typing checking :-(

* fix for __class_getitem__
  • Loading branch information
samuelcolvin authored May 18, 2022
1 parent 5a61292 commit 8846ec4
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 3 deletions.
2 changes: 2 additions & 0 deletions changes/4083-samuelcolvin.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Limit the size of `generics._generic_types_cache` and `generics._assigned_parameters`
to avoid unlimited increase in memory usage.
6 changes: 3 additions & 3 deletions pydantic/generics.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,20 @@
from .main import BaseModel, create_model
from .types import JsonWrapper
from .typing import display_as_type, get_all_type_hints, get_args, get_origin, typing_base
from .utils import all_identical, lenient_issubclass
from .utils import LimitedDict, all_identical, lenient_issubclass

_generic_types_cache: Dict[Tuple[Type[Any], Union[Any, Tuple[Any, ...]]], Type[BaseModel]] = {}
GenericModelT = TypeVar('GenericModelT', bound='GenericModel')
TypeVarType = Any # since mypy doesn't allow the use of TypeVar as a type

Parametrization = Mapping[TypeVarType, Type[Any]]

_generic_types_cache: LimitedDict[Tuple[Type[Any], Union[Any, Tuple[Any, ...]]], Type[BaseModel]] = LimitedDict()
# _assigned_parameters is a Mapping from parametrized version of generic models to assigned types of parametrizations
# as captured during construction of the class (not instances).
# E.g., for generic model `Model[A, B]`, when parametrized model `Model[int, str]` is created,
# `Model[int, str]`: {A: int, B: str}` will be stored in `_assigned_parameters`.
# (This information is only otherwise available after creation from the class name string).
_assigned_parameters: Dict[Type[Any], Parametrization] = {}
_assigned_parameters: LimitedDict[Type[Any], Parametrization] = LimitedDict()


class GenericModel(BaseModel):
Expand Down
38 changes: 38 additions & 0 deletions pydantic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
Iterator,
List,
Mapping,
MutableMapping,
Optional,
Set,
Tuple,
Expand Down Expand Up @@ -73,6 +74,7 @@
'ROOT_KEY',
'get_unique_discriminator_alias',
'get_discriminator_alias_and_values',
'LimitedDict',
)

ROOT_KEY = '__root__'
Expand Down Expand Up @@ -749,3 +751,39 @@ def _get_union_alias_and_all_values(
# unzip: [('alias_a',('v1', 'v2)), ('alias_b', ('v3',))] => [('alias_a', 'alias_b'), (('v1', 'v2'), ('v3',))]
all_aliases, all_values = zip(*zipped_aliases_values)
return get_unique_discriminator_alias(all_aliases, discriminator_key), all_values


KT = TypeVar('KT')
VT = TypeVar('VT')
if TYPE_CHECKING:
# Annoying inheriting from `MutableMapping` and `dict` breaks cython, hence this work around
class LimitedDict(dict, MutableMapping[KT, VT]): # type: ignore[type-arg]
def __init__(self, size_limit: int = 1000):
...

else:

class LimitedDict(dict):
"""
Limit the size/length of a dict used for caching to avoid unlimited increase in memory usage.
Since the dict is ordered, and we always remove elements from the beginning, this is effectively a FIFO cache.
Annoying inheriting from `MutableMapping` breaks cython.
"""

def __init__(self, size_limit: int = 1000):
self.size_limit = size_limit
super().__init__()

def __setitem__(self, __key: Any, __value: Any) -> None:
super().__setitem__(__key, __value)
if len(self) > self.size_limit:
excess = len(self) - self.size_limit + self.size_limit // 10
to_remove = list(self.keys())[:excess]
for key in to_remove:
del self[key]

def __class_getitem__(cls, *args: Any) -> Any:
# to avoid errors with 3.7
pass
41 changes: 41 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from pydantic.utils import (
BUILTIN_COLLECTIONS,
ClassAttribute,
LimitedDict,
ValueItems,
all_identical,
deep_update,
Expand Down Expand Up @@ -525,3 +526,43 @@ def test_all_identical():
def test_undefined_pickle():
undefined2 = pickle.loads(pickle.dumps(Undefined))
assert undefined2 is Undefined


def test_limited_dict():
d = LimitedDict(10)
d[1] = '1'
d[2] = '2'
assert list(d.items()) == [(1, '1'), (2, '2')]
for no in '34567890':
d[int(no)] = no
assert list(d.items()) == [
(1, '1'),
(2, '2'),
(3, '3'),
(4, '4'),
(5, '5'),
(6, '6'),
(7, '7'),
(8, '8'),
(9, '9'),
(0, '0'),
]
d[11] = '11'

# reduce size to 9 after setting 11
assert len(d) == 9
assert list(d.items()) == [
(3, '3'),
(4, '4'),
(5, '5'),
(6, '6'),
(7, '7'),
(8, '8'),
(9, '9'),
(0, '0'),
(11, '11'),
]
d[12] = '12'
assert len(d) == 10
d[13] = '13'
assert len(d) == 9

0 comments on commit 8846ec4

Please sign in to comment.