diff --git a/.github/scripts/flake8_diff.sh b/.github/scripts/flake8_diff.sh index 935d02392..ced33f722 100644 --- a/.github/scripts/flake8_diff.sh +++ b/.github/scripts/flake8_diff.sh @@ -83,7 +83,7 @@ check_files() { # that was not changed does not create failures # The github terminal is 127 characters wide git diff --unified=0 $COMMIT_RANGE -- $files | flake8 --diff --show-source \ - --max-complexity=10 --max-line-length=127 $options + --max-complexity=40 --max-line-length=127 $options fi } diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index d7cc99f6e..bfd33b097 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -44,6 +44,10 @@ jobs: python_version: 3.6 - os: macos-latest python_version: 3.7 + - os: macos-latest + # numpy triggers: RuntimeError: Polyfit sanity test emitted a + # warning + python_version: "pypy3" runs-on: ${{ matrix.os }} @@ -61,7 +65,7 @@ jobs: python -m pip install -r dev-requirements.txt python ci/install_coverage_subprocess_pth.py export - - name: Install optional typing_extensions in Python 3.6 + - name: Install supported dependencies (only test in Python 3.6) shell: bash run: python -m pip install typing-extensions if: matrix.python_version == '3.6' @@ -106,6 +110,14 @@ jobs: python -m pip install -r dev-requirements.txt python -m pip install -e . python ci/install_coverage_subprocess_pth.py + - name: Generate old pickles (backward compat) + shell: bash + run: | + git_head=$(git rev-parse HEAD) + cp tests/generate_old_pickles.py tests/_generate_old_pickles.py + git checkout v1.4.1 + python tests/_generate_old_pickles.py + git checkout ${git_head} - name: Test with pytest run: | COVERAGE_PROCESS_START=$GITHUB_WORKSPACE/.coveragerc \ @@ -210,16 +222,19 @@ jobs: - name: Install project and dependencies run: | python -m pip install --upgrade -r dev-requirements.txt - python -m pip install setproctitle psutil ray==0.6.4 + python -m pip install setproctitle psutil ray==0.8.6 PROJECT_DIR=$(python -c "import os, ray; print(os.path.dirname(ray.__file__), flush=True)") rm $PROJECT_DIR/cloudpickle/cloudpickle.py + git clone https://github.com/ray-project/ray.git ../ray + cp -R ../ray/python/ray/tests $PROJECT_DIR/tests cp cloudpickle/cloudpickle.py $PROJECT_DIR/cloudpickle/cloudpickle.py + cp cloudpickle/cloudpickle_fast.py $PROJECT_DIR/cloudpickle/cloudpickle_fast.py - name: Test the downstream project run: | PROJECT_DIR="$(python -c "import os, ray; print(os.path.dirname(ray.__file__), flush=True)")" COVERAGE_PROCESS_START="$TRAVIS_BUILD_DIR/.coveragerc" PYTHONPATH='.:tests' pytest -r s - pytest -vl $PROJECT_DIR/tests/test_basic.py::test_simple_serialization - pytest -vl $PROJECT_DIR/tests/test_basic.py::test_complex_serialization + pytest -vl $PROJECT_DIR/tests/test_serialization.py::test_simple_serialization + pytest -vl $PROJECT_DIR/tests/test_serialization.py::test_complex_serialization pytest -vl $PROJECT_DIR/tests/test_basic.py::test_ray_recursive_objects - pytest -vl $PROJECT_DIR/tests/test_basic.py::test_serialization_final_fallback - pytest -vl $PROJECT_DIR/tests/test_recursion.py + pytest -vl $PROJECT_DIR/tests/test_serialization.py::test_serialization_final_fallback + pytest -vl $PROJECT_DIR/tests/test_basic.py::test_nested_functions diff --git a/cloudpickle/__init__.py b/cloudpickle/__init__.py index c3ecfdbc7..ae5f85e53 100644 --- a/cloudpickle/__init__.py +++ b/cloudpickle/__init__.py @@ -1,11 +1,7 @@ from __future__ import absolute_import -import sys -import pickle - -from cloudpickle.cloudpickle import * -if sys.version_info[:2] >= (3, 8): - from cloudpickle.cloudpickle_fast import CloudPickler, dumps, dump +from cloudpickle.cloudpickle import * # noqa +from cloudpickle.cloudpickle_fast import CloudPickler, dumps, dump # noqa __version__ = '1.5.0dev0' diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 3cfc7c974..0ab6c83f8 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -42,29 +42,21 @@ """ from __future__ import print_function -import abc import builtins import dis -import io -import itertools -import logging import opcode -import operator import pickle import platform -import struct import sys import types import weakref import uuid import threading import typing -from enum import Enum +import warnings from typing import Generic, Union, Tuple, Callable -from pickle import _Pickler as Pickler from pickle import _getattribute -from io import BytesIO from importlib._bootstrap import _find_spec try: # pragma: no branch @@ -78,6 +70,17 @@ else: # pragma: no cover ClassVar = None +if sys.version_info >= (3, 8): + from types import CellType +else: + def f(): + a = 1 + + def g(): + return a + return g + CellType = type(f().__closure__[0]) + # cloudpickle is meant for inter process communication: we expect all # communicating processes to run the same Python version hence we favor @@ -471,577 +474,61 @@ def _is_parametrized_type_hint(obj): def _create_parametrized_type_hint(origin, args): return origin[args] +else: + _is_parametrized_type_hint = None + _create_parametrized_type_hint = None + + +def parametrized_type_hint_getinitargs(obj): + # The distorted type check sematic for typing construct becomes: + # ``type(obj) is type(TypeHint)``, which means "obj is a + # parametrized TypeHint" + if type(obj) is type(Literal): # pragma: no branch + initargs = (Literal, obj.__values__) + elif type(obj) is type(Final): # pragma: no branch + initargs = (Final, obj.__type__) + elif type(obj) is type(ClassVar): + initargs = (ClassVar, obj.__type__) + elif type(obj) is type(Generic): + parameters = obj.__parameters__ + if len(obj.__parameters__) > 0: + # in early Python 3.5, __parameters__ was sometimes + # preferred to __args__ + initargs = (obj.__origin__, parameters) - -class CloudPickler(Pickler): - - dispatch = Pickler.dispatch.copy() - - def __init__(self, file, protocol=None): - if protocol is None: - protocol = DEFAULT_PROTOCOL - Pickler.__init__(self, file, protocol=protocol) - # map ids to dictionary. used to ensure that functions can share global env - self.globals_ref = {} - - def dump(self, obj): - self.inject_addons() - try: - return Pickler.dump(self, obj) - except RuntimeError as e: - if 'recursion' in e.args[0]: - msg = """Could not pickle object as excessively deep recursion required.""" - raise pickle.PicklingError(msg) from e - else: - raise - - def save_typevar(self, obj): - self.save_reduce(*_typevar_reduce(obj), obj=obj) - - dispatch[typing.TypeVar] = save_typevar - - def save_memoryview(self, obj): - self.save(obj.tobytes()) - - dispatch[memoryview] = save_memoryview - - def save_module(self, obj): - """ - Save a module as an import - """ - if _is_importable(obj): - self.save_reduce(subimport, (obj.__name__,), obj=obj) else: - obj.__dict__.pop('__builtins__', None) - self.save_reduce(dynamic_subimport, (obj.__name__, vars(obj)), - obj=obj) - - dispatch[types.ModuleType] = save_module - - def save_codeobject(self, obj): - """ - Save a code object - """ - if hasattr(obj, "co_posonlyargcount"): # pragma: no branch - args = ( - obj.co_argcount, obj.co_posonlyargcount, - obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize, - obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, - obj.co_varnames, obj.co_filename, obj.co_name, - obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, - obj.co_cellvars - ) + initargs = (obj.__origin__, obj.__args__) + elif type(obj) is type(Union): + if sys.version_info < (3, 5, 3): # pragma: no cover + initargs = (Union, obj.__union_params__) else: - args = ( - obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals, - obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts, - obj.co_names, obj.co_varnames, obj.co_filename, - obj.co_name, obj.co_firstlineno, obj.co_lnotab, - obj.co_freevars, obj.co_cellvars - ) - self.save_reduce(types.CodeType, args, obj=obj) - - dispatch[types.CodeType] = save_codeobject - - def save_function(self, obj, name=None): - """ Registered with the dispatch to handle all function types. - - Determines what kind of function obj is (e.g. lambda, defined at - interactive prompt, etc) and handles the pickling appropriately. - """ - if _is_importable(obj, name=name): - return Pickler.save_global(self, obj, name=name) - elif PYPY and isinstance(obj.__code__, builtin_code_type): - return self.save_pypy_builtin_func(obj) + initargs = (Union, obj.__args__) + elif type(obj) is type(Tuple): + if sys.version_info < (3, 5, 3): # pragma: no cover + initargs = (Tuple, obj.__tuple_params__) else: - return self.save_function_tuple(obj) - - dispatch[types.FunctionType] = save_function - - def save_pypy_builtin_func(self, obj): - """Save pypy equivalent of builtin functions. - - PyPy does not have the concept of builtin-functions. Instead, - builtin-functions are simple function instances, but with a - builtin-code attribute. - Most of the time, builtin functions should be pickled by attribute. But - PyPy has flaky support for __qualname__, so some builtin functions such - as float.__new__ will be classified as dynamic. For this reason only, - we created this special routine. Because builtin-functions are not - expected to have closure or globals, there is no additional hack - (compared the one already implemented in pickle) to protect ourselves - from reference cycles. A simple (reconstructor, newargs, obj.__dict__) - tuple is save_reduced. - - Note also that PyPy improved their support for __qualname__ in v3.6, so - this routing should be removed when cloudpickle supports only PyPy 3.6 - and later. - """ - rv = (types.FunctionType, (obj.__code__, {}, obj.__name__, - obj.__defaults__, obj.__closure__), - obj.__dict__) - self.save_reduce(*rv, obj=obj) - - def _save_dynamic_enum(self, obj, clsdict): - """Special handling for dynamic Enum subclasses - - Use a dedicated Enum constructor (inspired by EnumMeta.__call__) as the - EnumMeta metaclass has complex initialization that makes the Enum - subclasses hold references to their own instances. - """ - members = dict((e.name, e.value) for e in obj) - - self.save_reduce( - _make_skeleton_enum, - (obj.__bases__, obj.__name__, obj.__qualname__, - members, obj.__module__, _get_or_create_tracker_id(obj), None), - obj=obj - ) - - # Cleanup the clsdict that will be passed to _rehydrate_skeleton_class: - # Those attributes are already handled by the metaclass. - for attrname in ["_generate_next_value_", "_member_names_", - "_member_map_", "_member_type_", - "_value2member_map_"]: - clsdict.pop(attrname, None) - for member in members: - clsdict.pop(member) - - def save_dynamic_class(self, obj): - """Save a class that can't be stored as module global. - - This method is used to serialize classes that are defined inside - functions, or that otherwise can't be serialized as attribute lookups - from global modules. - """ - clsdict = _extract_class_dict(obj) - clsdict.pop('__weakref__', None) - - if issubclass(type(obj), abc.ABCMeta): - # If obj is an instance of an ABCMeta subclass, dont pickle the - # cache/negative caches populated during isinstance/issubclass - # checks, but pickle the list of registered subclasses of obj. - clsdict.pop('_abc_cache', None) - clsdict.pop('_abc_negative_cache', None) - clsdict.pop('_abc_negative_cache_version', None) - registry = clsdict.pop('_abc_registry', None) - if registry is None: - # in Python3.7+, the abc caches and registered subclasses of a - # class are bundled into the single _abc_impl attribute - clsdict.pop('_abc_impl', None) - (registry, _, _, _) = abc._get_dump(obj) - - clsdict["_abc_impl"] = [subclass_weakref() - for subclass_weakref in registry] - else: - # In the above if clause, registry is a set of weakrefs -- in - # this case, registry is a WeakSet - clsdict["_abc_impl"] = [type_ for type_ in registry] - - # On PyPy, __doc__ is a readonly attribute, so we need to include it in - # the initial skeleton class. This is safe because we know that the - # doc can't participate in a cycle with the original class. - type_kwargs = {'__doc__': clsdict.pop('__doc__', None)} - - if "__slots__" in clsdict: - type_kwargs['__slots__'] = obj.__slots__ - # pickle string length optimization: member descriptors of obj are - # created automatically from obj's __slots__ attribute, no need to - # save them in obj's state - if isinstance(obj.__slots__, str): - clsdict.pop(obj.__slots__) - else: - for k in obj.__slots__: - clsdict.pop(k, None) - - # If type overrides __dict__ as a property, include it in the type - # kwargs. In Python 2, we can't set this attribute after construction. - # XXX: can this ever happen in Python 3? If so add a test. - __dict__ = clsdict.pop('__dict__', None) - if isinstance(__dict__, property): - type_kwargs['__dict__'] = __dict__ - - save = self.save - write = self.write - - # We write pickle instructions explicitly here to handle the - # possibility that the type object participates in a cycle with its own - # __dict__. We first write an empty "skeleton" version of the class and - # memoize it before writing the class' __dict__ itself. We then write - # instructions to "rehydrate" the skeleton class by restoring the - # attributes from the __dict__. - # - # A type can appear in a cycle with its __dict__ if an instance of the - # type appears in the type's __dict__ (which happens for the stdlib - # Enum class), or if the type defines methods that close over the name - # of the type, (which is common for Python 2-style super() calls). - - # Push the rehydration function. - save(_rehydrate_skeleton_class) - - # Mark the start of the args tuple for the rehydration function. - write(pickle.MARK) - - # Create and memoize an skeleton class with obj's name and bases. - if Enum is not None and issubclass(obj, Enum): - # Special handling of Enum subclasses - self._save_dynamic_enum(obj, clsdict) + initargs = (Tuple, obj.__args__) + elif type(obj) is type(Callable): + if sys.version_info < (3, 5, 3): # pragma: no cover + args = obj.__args__ + result = obj.__result__ + if args != Ellipsis: + if isinstance(args, tuple): + args = list(args) + else: + args = [args] else: - # "Regular" class definition: - tp = type(obj) - self.save_reduce(_make_skeleton_class, - (tp, obj.__name__, _get_bases(obj), type_kwargs, - _get_or_create_tracker_id(obj), None), - obj=obj) - - # Now save the rest of obj's __dict__. Any references to obj - # encountered while saving will point to the skeleton class. - save(clsdict) - - # Write a tuple of (skeleton_class, clsdict). - write(pickle.TUPLE) - - # Call _rehydrate_skeleton_class(skeleton_class, clsdict) - write(pickle.REDUCE) - - def save_function_tuple(self, func): - """ Pickles an actual func object. - - A func comprises: code, globals, defaults, closure, and dict. We - extract and save these, injecting reducing functions at certain points - to recreate the func object. Keep in mind that some of these pieces - can contain a ref to the func itself. Thus, a naive save on these - pieces could trigger an infinite loop of save's. To get around that, - we first create a skeleton func object using just the code (this is - safe, since this won't contain a ref to the func), and memoize it as - soon as it's created. The other stuff can then be filled in later. - """ - if is_tornado_coroutine(func): - self.save_reduce(_rebuild_tornado_coroutine, (func.__wrapped__,), - obj=func) - return - - save = self.save - write = self.write - - code, f_globals, defaults, closure_values, dct, base_globals = self.extract_func_data(func) - - save(_fill_function) # skeleton function updater - write(pickle.MARK) # beginning of tuple that _fill_function expects - - # Extract currently-imported submodules used by func. Storing these - # modules in a smoke _cloudpickle_subimports attribute of the object's - # state will trigger the side effect of importing these modules at - # unpickling time (which is necessary for func to work correctly once - # depickled) - submodules = _find_imported_submodules( - code, - itertools.chain(f_globals.values(), closure_values or ()), - ) - - # create a skeleton function object and memoize it - save(_make_skel_func) - save(( - code, - len(closure_values) if closure_values is not None else -1, - base_globals, - )) - write(pickle.REDUCE) - self.memoize(func) - - # save the rest of the func data needed by _fill_function - state = { - 'globals': f_globals, - 'defaults': defaults, - 'dict': dct, - 'closure_values': closure_values, - 'module': func.__module__, - 'name': func.__name__, - 'doc': func.__doc__, - '_cloudpickle_submodules': submodules - } - if hasattr(func, '__annotations__'): - state['annotations'] = func.__annotations__ - if hasattr(func, '__qualname__'): - state['qualname'] = func.__qualname__ - if hasattr(func, '__kwdefaults__'): - state['kwdefaults'] = func.__kwdefaults__ - save(state) - write(pickle.TUPLE) - write(pickle.REDUCE) # applies _fill_function on the tuple - - def extract_func_data(self, func): - """ - Turn the function into a tuple of data necessary to recreate it: - code, globals, defaults, closure_values, dict - """ - code = func.__code__ - - # extract all global ref's - func_global_refs = _extract_code_globals(code) - - # process all variables referenced by global environment - f_globals = {} - for var in func_global_refs: - if var in func.__globals__: - f_globals[var] = func.__globals__[var] - - # defaults requires no processing - defaults = func.__defaults__ - - # process closure - closure = ( - list(map(_get_cell_contents, func.__closure__)) - if func.__closure__ is not None - else None + (*args, result) = obj.__args__ + if len(args) == 1 and args[0] is Ellipsis: + args = Ellipsis + else: + args = list(args) + initargs = (Callable, (args, result)) + else: # pragma: no cover + raise pickle.PicklingError( + "Cloudpickle Error: Unknown type {}".format(type(obj)) ) - - # save the dict - dct = func.__dict__ - - # base_globals represents the future global namespace of func at - # unpickling time. Looking it up and storing it in globals_ref allow - # functions sharing the same globals at pickling time to also - # share them once unpickled, at one condition: since globals_ref is - # an attribute of a Cloudpickler instance, and that a new CloudPickler is - # created each time pickle.dump or pickle.dumps is called, functions - # also need to be saved within the same invokation of - # cloudpickle.dump/cloudpickle.dumps (for example: cloudpickle.dumps([f1, f2])). There - # is no such limitation when using Cloudpickler.dump, as long as the - # multiple invokations are bound to the same Cloudpickler. - base_globals = self.globals_ref.setdefault(id(func.__globals__), {}) - - if base_globals == {}: - # Add module attributes used to resolve relative imports - # instructions inside func. - for k in ["__package__", "__name__", "__path__", "__file__"]: - # Some built-in functions/methods such as object.__new__ have - # their __globals__ set to None in PyPy - if func.__globals__ is not None and k in func.__globals__: - base_globals[k] = func.__globals__[k] - - return (code, f_globals, defaults, closure, dct, base_globals) - - def save_getset_descriptor(self, obj): - return self.save_reduce(getattr, (obj.__objclass__, obj.__name__)) - - dispatch[types.GetSetDescriptorType] = save_getset_descriptor - - def save_global(self, obj, name=None, pack=struct.pack): - """ - Save a "global". - - The name of this method is somewhat misleading: all types get - dispatched here. - """ - if obj is type(None): - return self.save_reduce(type, (None,), obj=obj) - elif obj is type(Ellipsis): - return self.save_reduce(type, (Ellipsis,), obj=obj) - elif obj is type(NotImplemented): - return self.save_reduce(type, (NotImplemented,), obj=obj) - elif obj in _BUILTIN_TYPE_NAMES: - return self.save_reduce( - _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) - - if sys.version_info[:2] < (3, 7) and _is_parametrized_type_hint(obj): # noqa # pragma: no branch - # Parametrized typing constructs in Python < 3.7 are not compatible - # with type checks and ``isinstance`` semantics. For this reason, - # it is easier to detect them using a duck-typing-based check - # (``_is_parametrized_type_hint``) than to populate the Pickler's - # dispatch with type-specific savers. - self._save_parametrized_type_hint(obj) - elif name is not None: - Pickler.save_global(self, obj, name=name) - elif not _is_importable(obj, name=name): - self.save_dynamic_class(obj) - else: - Pickler.save_global(self, obj, name=name) - - dispatch[type] = save_global - - def save_instancemethod(self, obj): - # Memoization rarely is ever useful due to python bounding - if obj.__self__ is None: - self.save_reduce(getattr, (obj.im_class, obj.__name__)) - else: - self.save_reduce(types.MethodType, (obj.__func__, obj.__self__), obj=obj) - - dispatch[types.MethodType] = save_instancemethod - - def save_property(self, obj): - # properties not correctly saved in python - self.save_reduce(property, (obj.fget, obj.fset, obj.fdel, obj.__doc__), - obj=obj) - - dispatch[property] = save_property - - def save_classmethod(self, obj): - orig_func = obj.__func__ - self.save_reduce(type(obj), (orig_func,), obj=obj) - - dispatch[classmethod] = save_classmethod - dispatch[staticmethod] = save_classmethod - - def save_itemgetter(self, obj): - """itemgetter serializer (needed for namedtuple support)""" - class Dummy: - def __getitem__(self, item): - return item - items = obj(Dummy()) - if not isinstance(items, tuple): - items = (items,) - return self.save_reduce(operator.itemgetter, items) - - if type(operator.itemgetter) is type: - dispatch[operator.itemgetter] = save_itemgetter - - def save_attrgetter(self, obj): - """attrgetter serializer""" - class Dummy(object): - def __init__(self, attrs, index=None): - self.attrs = attrs - self.index = index - def __getattribute__(self, item): - attrs = object.__getattribute__(self, "attrs") - index = object.__getattribute__(self, "index") - if index is None: - index = len(attrs) - attrs.append(item) - else: - attrs[index] = ".".join([attrs[index], item]) - return type(self)(attrs, index) - attrs = [] - obj(Dummy(attrs)) - return self.save_reduce(operator.attrgetter, tuple(attrs)) - - if type(operator.attrgetter) is type: - dispatch[operator.attrgetter] = save_attrgetter - - def save_file(self, obj): - """Save a file""" - - if not hasattr(obj, 'name') or not hasattr(obj, 'mode'): - raise pickle.PicklingError("Cannot pickle files that do not map to an actual file") - if obj is sys.stdout: - return self.save_reduce(getattr, (sys, 'stdout'), obj=obj) - if obj is sys.stderr: - return self.save_reduce(getattr, (sys, 'stderr'), obj=obj) - if obj is sys.stdin: - raise pickle.PicklingError("Cannot pickle standard input") - if obj.closed: - raise pickle.PicklingError("Cannot pickle closed files") - if hasattr(obj, 'isatty') and obj.isatty(): - raise pickle.PicklingError("Cannot pickle files that map to tty objects") - if 'r' not in obj.mode and '+' not in obj.mode: - raise pickle.PicklingError("Cannot pickle files that are not opened for reading: %s" % obj.mode) - - name = obj.name - - # TODO: also support binary mode files with io.BytesIO - retval = io.StringIO() - - try: - # Read the whole file - curloc = obj.tell() - obj.seek(0) - contents = obj.read() - obj.seek(curloc) - except IOError as e: - raise pickle.PicklingError("Cannot pickle file %s as it cannot be read" % name) from e - retval.write(contents) - retval.seek(curloc) - - retval.name = name - self.save(retval) - self.memoize(obj) - - def save_ellipsis(self, obj): - self.save_reduce(_gen_ellipsis, ()) - - def save_not_implemented(self, obj): - self.save_reduce(_gen_not_implemented, ()) - - dispatch[io.TextIOWrapper] = save_file - dispatch[type(Ellipsis)] = save_ellipsis - dispatch[type(NotImplemented)] = save_not_implemented - - def save_weakset(self, obj): - self.save_reduce(weakref.WeakSet, (list(obj),)) - - dispatch[weakref.WeakSet] = save_weakset - - def save_logger(self, obj): - self.save_reduce(logging.getLogger, (obj.name,), obj=obj) - - dispatch[logging.Logger] = save_logger - - def save_root_logger(self, obj): - self.save_reduce(logging.getLogger, (), obj=obj) - - dispatch[logging.RootLogger] = save_root_logger - - if hasattr(types, "MappingProxyType"): # pragma: no branch - def save_mappingproxy(self, obj): - self.save_reduce(types.MappingProxyType, (dict(obj),), obj=obj) - - dispatch[types.MappingProxyType] = save_mappingproxy - - """Special functions for Add-on libraries""" - def inject_addons(self): - """Plug in system. Register additional pickling functions if modules already loaded""" - pass - - if sys.version_info < (3, 7): # pragma: no branch - def _save_parametrized_type_hint(self, obj): - # The distorted type check sematic for typing construct becomes: - # ``type(obj) is type(TypeHint)``, which means "obj is a - # parametrized TypeHint" - if type(obj) is type(Literal): # pragma: no branch - initargs = (Literal, obj.__values__) - elif type(obj) is type(Final): # pragma: no branch - initargs = (Final, obj.__type__) - elif type(obj) is type(ClassVar): - initargs = (ClassVar, obj.__type__) - elif type(obj) is type(Generic): - parameters = obj.__parameters__ - if len(obj.__parameters__) > 0: - # in early Python 3.5, __parameters__ was sometimes - # preferred to __args__ - initargs = (obj.__origin__, parameters) - else: - initargs = (obj.__origin__, obj.__args__) - elif type(obj) is type(Union): - if sys.version_info < (3, 5, 3): # pragma: no cover - initargs = (Union, obj.__union_params__) - else: - initargs = (Union, obj.__args__) - elif type(obj) is type(Tuple): - if sys.version_info < (3, 5, 3): # pragma: no cover - initargs = (Tuple, obj.__tuple_params__) - else: - initargs = (Tuple, obj.__args__) - elif type(obj) is type(Callable): - if sys.version_info < (3, 5, 3): # pragma: no cover - args = obj.__args__ - result = obj.__result__ - if args != Ellipsis: - if isinstance(args, tuple): - args = list(args) - else: - args = [args] - else: - (*args, result) = obj.__args__ - if len(args) == 1 and args[0] is Ellipsis: - args = Ellipsis - else: - args = list(args) - initargs = (Callable, (args, result)) - else: # pragma: no cover - raise pickle.PicklingError( - "Cloudpickle Error: Unknown type {}".format(type(obj)) - ) - self.save_reduce(_create_parametrized_type_hint, initargs, obj=obj) + return initargs # Tornado support @@ -1065,40 +552,6 @@ def _rebuild_tornado_coroutine(func): return gen.coroutine(func) -# Shorthands for legacy support - -def dump(obj, file, protocol=None): - """Serialize obj as bytes streamed into file - - protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to - pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed - between processes running the same Python version. - - Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure - compatibility with older versions of Python. - """ - CloudPickler(file, protocol=protocol).dump(obj) - - -def dumps(obj, protocol=None): - """Serialize obj as a string of bytes allocated in memory - - protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to - pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed - between processes running the same Python version. - - Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure - compatibility with older versions of Python. - """ - file = BytesIO() - try: - cp = CloudPickler(file, protocol=protocol) - cp.dump(obj) - return file.getvalue() - finally: - file.close() - - # including pickles unloading functions in this namespace load = pickle.load loads = pickle.loads @@ -1197,7 +650,7 @@ def _fill_function(*args): if 'annotations' in state: func.__annotations__ = state['annotations'] if 'doc' in state: - func.__doc__ = state['doc'] + func.__doc__ = state['doc'] if 'name' in state: func.__name__ = state['name'] if 'module' in state: @@ -1232,11 +685,24 @@ def _make_empty_cell(): return (lambda: cell).__closure__[0] +def _make_cell(value=_empty_cell_value): + cell = _make_empty_cell() + if value is not _empty_cell_value: + cell_set(cell, value) + return cell + + def _make_skel_func(code, cell_count, base_globals=None): """ Creates a skeleton function object that contains just the provided code and the correct number of cells in func_closure. All other func attributes (e.g. func_globals) are empty. """ + # This function is deprecated and should be removed in cloudpickle 1.7 + warnings.warn( + "A pickle file created using an old (<=1.4.1) version of cloudpicke " + "is currently being loaded. This is not supported by cloudpickle and " + "will break in cloudpickle 1.7", category=UserWarning + ) # This is backward-compatibility code: for cloudpickle versions between # 0.5.4 and 0.7, base_globals could be a string or None. base_globals # should now always be a dictionary. diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index b285482ec..4b52a1e8e 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -15,54 +15,96 @@ import io import itertools import logging -import _pickle import pickle import sys +import struct import types import weakref import typing -from _pickle import Pickler +from enum import Enum +from collections import ChainMap from .cloudpickle import ( _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL, _find_imported_submodules, _get_cell_contents, _is_importable, - _builtin_type, Enum, _get_or_create_tracker_id, _make_skeleton_class, + _builtin_type, _get_or_create_tracker_id, _make_skeleton_class, _make_skeleton_enum, _extract_class_dict, dynamic_subimport, subimport, - _typevar_reduce, _get_bases, + _typevar_reduce, _get_bases, _make_cell, _make_empty_cell, CellType, + _is_parametrized_type_hint, PYPY, cell_set, + parametrized_type_hint_getinitargs, _create_parametrized_type_hint, + builtin_code_type + ) -load, loads = _pickle.load, _pickle.loads +if sys.version_info >= (3, 8) and not PYPY: + from _pickle import Pickler + # Shorthands similar to pickle.dump/pickle.dumps + def dump(obj, file, protocol=None, buffer_callback=None): + """Serialize obj as bytes streamed into file -# Shorthands similar to pickle.dump/pickle.dumps -def dump(obj, file, protocol=None, buffer_callback=None): - """Serialize obj as bytes streamed into file + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication + speed between processes running the same Python version. - protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to - pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed - between processes running the same Python version. + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python. + """ + CloudPickler( + file, protocol=protocol, buffer_callback=buffer_callback + ).dump(obj) - Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure - compatibility with older versions of Python. - """ - CloudPickler(file, protocol=protocol, buffer_callback=buffer_callback).dump(obj) + def dumps(obj, protocol=None, buffer_callback=None): + """Serialize obj as a string of bytes allocated in memory + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication + speed between processes running the same Python version. -def dumps(obj, protocol=None, buffer_callback=None): - """Serialize obj as a string of bytes allocated in memory + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python. + """ + with io.BytesIO() as file: + cp = CloudPickler( + file, protocol=protocol, buffer_callback=buffer_callback + ) + cp.dump(obj) + return file.getvalue() + +else: + from pickle import _Pickler as Pickler + + # Shorthands similar to pickle.dump/pickle.dumps + def dump(obj, file, protocol=None): + """Serialize obj as bytes streamed into file + + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication + speed between processes running the same Python version. + + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python. + """ + CloudPickler(file, protocol=protocol).dump(obj) - protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to - pickle.HIGHEST_PROTOCOL. This setting favors maximum communication speed - between processes running the same Python version. + def dumps(obj, protocol=None): + """Serialize obj as a string of bytes allocated in memory + + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication + speed between processes running the same Python version. + + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python. + """ + with io.BytesIO() as file: + cp = CloudPickler(file, protocol=protocol) + cp.dump(obj) + return file.getvalue() - Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure - compatibility with older versions of Python. - """ - with io.BytesIO() as file: - cp = CloudPickler(file, protocol=protocol, buffer_callback=buffer_callback) - cp.dump(obj) - return file.getvalue() + +load, loads = pickle.load, pickle.loads # COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS @@ -142,10 +184,22 @@ def _class_getstate(obj): # If obj is an instance of an ABCMeta subclass, dont pickle the # cache/negative caches populated during isinstance/issubclass # checks, but pickle the list of registered subclasses of obj. - clsdict.pop('_abc_impl', None) - (registry, _, _, _) = abc._get_dump(obj) - clsdict["_abc_impl"] = [subclass_weakref() - for subclass_weakref in registry] + clsdict.pop('_abc_cache', None) + clsdict.pop('_abc_negative_cache', None) + clsdict.pop('_abc_negative_cache_version', None) + registry = clsdict.pop('_abc_registry', None) + if registry is None: + # in Python3.7+, the abc caches and registered subclasses of a + # class are bundled into the single _abc_impl attribute + clsdict.pop('_abc_impl', None) + (registry, _, _, _) = abc._get_dump(obj) + + clsdict["_abc_impl"] = [subclass_weakref() + for subclass_weakref in registry] + else: + # In the above if clause, registry is a set of weakrefs -- in + # this case, registry is a WeakSet + clsdict["_abc_impl"] = [type_ for type_ in registry] if "__slots__" in clsdict: # pickle string length optimization: member descriptors of obj are @@ -191,14 +245,23 @@ def _enum_getstate(obj): def _code_reduce(obj): """codeobject reducer""" - args = ( - obj.co_argcount, obj.co_posonlyargcount, - obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize, - obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, - obj.co_varnames, obj.co_filename, obj.co_name, - obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, - obj.co_cellvars - ) + if hasattr(obj, "co_posonlyargcount"): # pragma: no branch + args = ( + obj.co_argcount, obj.co_posonlyargcount, + obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize, + obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, + obj.co_varnames, obj.co_filename, obj.co_name, + obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, + obj.co_cellvars + ) + else: + args = ( + obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals, + obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts, + obj.co_names, obj.co_varnames, obj.co_filename, + obj.co_name, obj.co_firstlineno, obj.co_lnotab, + obj.co_freevars, obj.co_cellvars + ) return types.CodeType, args @@ -207,9 +270,9 @@ def _cell_reduce(obj): try: obj.cell_contents except ValueError: # cell is empty - return types.CellType, () + return _make_empty_cell, () else: - return types.CellType, (obj.cell_contents,) + return _make_cell, (obj.cell_contents, ) def _classmethod_reduce(obj): @@ -373,7 +436,7 @@ def _function_setstate(obj, state): value = cell.cell_contents except ValueError: # cell is empty continue - obj.__closure__[i].cell_contents = value + cell_set(obj.__closure__[i], value) for k, v in slotstate.items(): setattr(obj, k, v) @@ -395,95 +458,25 @@ def _class_setstate(obj, state): class CloudPickler(Pickler): - """Fast C Pickler extension with additional reducing routines. - - CloudPickler's extensions exist into into: - - * its dispatch_table containing reducers that are called only if ALL - built-in saving functions were previously discarded. - * a special callback named "reducer_override", invoked before standard - function/class builtin-saving method (save_global), to serialize dynamic - functions - """ - - # cloudpickle's own dispatch_table, containing the additional set of - # objects (compared to the standard library pickle) that cloupickle can - # serialize. - dispatch = {} - dispatch[classmethod] = _classmethod_reduce - dispatch[io.TextIOWrapper] = _file_reduce - dispatch[logging.Logger] = _logger_reduce - dispatch[logging.RootLogger] = _root_logger_reduce - dispatch[memoryview] = _memoryview_reduce - dispatch[property] = _property_reduce - dispatch[staticmethod] = _classmethod_reduce - dispatch[types.CellType] = _cell_reduce - dispatch[types.CodeType] = _code_reduce - dispatch[types.GetSetDescriptorType] = _getset_descriptor_reduce - dispatch[types.ModuleType] = _module_reduce - dispatch[types.MethodType] = _method_reduce - dispatch[types.MappingProxyType] = _mappingproxy_reduce - dispatch[weakref.WeakSet] = _weakset_reduce - dispatch[typing.TypeVar] = _typevar_reduce - - def __init__(self, file, protocol=None, buffer_callback=None): - if protocol is None: - protocol = DEFAULT_PROTOCOL - Pickler.__init__(self, file, protocol=protocol, buffer_callback=buffer_callback) - # map functions __globals__ attribute ids, to ensure that functions - # sharing the same global namespace at pickling time also share their - # global namespace at unpickling time. - self.globals_ref = {} - - # Take into account potential custom reducers registered by external - # modules - self.dispatch_table = copyreg.dispatch_table.copy() - self.dispatch_table.update(self.dispatch) - self.proto = int(protocol) - - def reducer_override(self, obj): - """Type-agnostic reducing callback for function and classes. - - For performance reasons, subclasses of the C _pickle.Pickler class - cannot register custom reducers for functions and classes in the - dispatch_table. Reducer for such types must instead implemented in the - special reducer_override method. - - Note that method will be called for any object except a few - builtin-types (int, lists, dicts etc.), which differs from reducers in - the Pickler's dispatch_table, each of them being invoked for objects of - a specific type only. - - This property comes in handy for classes: although most classes are - instances of the ``type`` metaclass, some of them can be instances of - other custom metaclasses (such as enum.EnumMeta for example). In - particular, the metaclass will likely not be known in advance, and thus - cannot be special-cased using an entry in the dispatch_table. - reducer_override, among other things, allows us to register a reducer - that will be called for any class, independently of its type. - - - Notes: - - * reducer_override has the priority over dispatch_table-registered - reducers. - * reducer_override can be used to fix other limitations of cloudpickle - for other types that suffered from type-specific reducers, such as - Exceptions. See https://github.com/cloudpipe/cloudpickle/issues/248 - """ - t = type(obj) - try: - is_anyclass = issubclass(t, type) - except TypeError: # t is not a class (old Boost; see SF #502085) - is_anyclass = False - - if is_anyclass: - return _class_reduce(obj) - elif isinstance(obj, types.FunctionType): - return self._function_reduce(obj) - else: - # fallback to save_global, including the Pickler's distpatch_table - return NotImplemented + # set of reducers defined and used by cloudpickle (private) + _dispatch_table = {} + _dispatch_table[classmethod] = _classmethod_reduce + _dispatch_table[io.TextIOWrapper] = _file_reduce + _dispatch_table[logging.Logger] = _logger_reduce + _dispatch_table[logging.RootLogger] = _root_logger_reduce + _dispatch_table[memoryview] = _memoryview_reduce + _dispatch_table[property] = _property_reduce + _dispatch_table[staticmethod] = _classmethod_reduce + _dispatch_table[CellType] = _cell_reduce + _dispatch_table[types.CodeType] = _code_reduce + _dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce + _dispatch_table[types.ModuleType] = _module_reduce + _dispatch_table[types.MethodType] = _method_reduce + _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce + _dispatch_table[weakref.WeakSet] = _weakset_reduce + _dispatch_table[typing.TypeVar] = _typevar_reduce + + dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table) # function reducers are defined as instance methods of CloudPickler # objects, as they rely on a CloudPickler attribute (globals_ref) @@ -540,7 +533,7 @@ def _function_getnewargs(self, func): closure = None else: closure = tuple( - types.CellType() for _ in range(len(code.co_freevars))) + _make_empty_cell() for _ in range(len(code.co_freevars))) return code, base_globals, None, None, closure @@ -556,3 +549,185 @@ def dump(self, obj): raise pickle.PicklingError(msg) else: raise + + if pickle.HIGHEST_PROTOCOL >= 5: + # Implementation of the reducer_override callback, in order to + # efficiently serialize dynamic functions and classes by subclassing + # the C-implemented Pickler. + # TODO: decorrelate reducer_override (which is tied to CPython's + # implementation - would it make sense to backport it to pypy? - and + # pickle's protocol 5 which is implementation agnostic. Currently, the + # availability of both notions coincide on CPython's pickle and the + # pickle5 backport, but it may not be the case anymore when pypy + # implements protocol 5 + def __init__(self, file, protocol=None, buffer_callback=None): + if protocol is None: + protocol = DEFAULT_PROTOCOL + Pickler.__init__( + self, file, protocol=protocol, buffer_callback=buffer_callback + ) + # map functions __globals__ attribute ids, to ensure that functions + # sharing the same global namespace at pickling time also share + # their global namespace at unpickling time. + self.globals_ref = {} + self.proto = int(protocol) + + def reducer_override(self, obj): + """Type-agnostic reducing callback for function and classes. + + For performance reasons, subclasses of the C _pickle.Pickler class + cannot register custom reducers for functions and classes in the + dispatch_table. Reducer for such types must instead implemented in + the special reducer_override method. + + Note that method will be called for any object except a few + builtin-types (int, lists, dicts etc.), which differs from reducers + in the Pickler's dispatch_table, each of them being invoked for + objects of a specific type only. + + This property comes in handy for classes: although most classes are + instances of the ``type`` metaclass, some of them can be instances + of other custom metaclasses (such as enum.EnumMeta for example). In + particular, the metaclass will likely not be known in advance, and + thus cannot be special-cased using an entry in the dispatch_table. + reducer_override, among other things, allows us to register a + reducer that will be called for any class, independently of its + type. + + + Notes: + + * reducer_override has the priority over dispatch_table-registered + reducers. + * reducer_override can be used to fix other limitations of + cloudpickle for other types that suffered from type-specific + reducers, such as Exceptions. See + https://github.com/cloudpipe/cloudpickle/issues/248 + """ + t = type(obj) + try: + is_anyclass = issubclass(t, type) + except TypeError: # t is not a class (old Boost; see SF #502085) + is_anyclass = False + + if is_anyclass: + return _class_reduce(obj) + elif isinstance(obj, types.FunctionType): + return self._function_reduce(obj) + else: + # fallback to save_global, including the Pickler's + # distpatch_table + return NotImplemented + + else: + # When reducer_override is not available, hack the pure-Python + # Pickler's types.FunctionType and type savers. Note: the type saver + # must override Pickler.save_global, because pickle.py contains a + # hard-coded call to save_global when pickling meta-classes. + dispatch = Pickler.dispatch.copy() + + def __init__(self, file, protocol=None): + if protocol is None: + protocol = DEFAULT_PROTOCOL + Pickler.__init__(self, file, protocol=protocol) + # map functions __globals__ attribute ids, to ensure that functions + # sharing the same global namespace at pickling time also share + # their global namespace at unpickling time. + self.globals_ref = {} + assert hasattr(self, 'proto') + + def _save_reduce_pickle5(self, func, args, state=None, listitems=None, + dictitems=None, state_setter=None, obj=None): + save = self.save + write = self.write + self.save_reduce( + func, args, state=None, listitems=listitems, + dictitems=dictitems, obj=obj + ) + # backport of the Python 3.8 state_setter pickle operations + save(state_setter) + save(obj) # simple BINGET opcode as obj is already memoized. + save(state) + write(pickle.TUPLE2) + # Trigger a state_setter(obj, state) function call. + write(pickle.REDUCE) + # The purpose of state_setter is to carry-out an + # inplace modification of obj. We do not care about what the + # method might return, so its output is eventually removed from + # the stack. + write(pickle.POP) + + def save_global(self, obj, name=None, pack=struct.pack): + """ + Save a "global". + + The name of this method is somewhat misleading: all types get + dispatched here. + """ + if obj is type(None): # noqa + return self.save_reduce(type, (None,), obj=obj) + elif obj is type(Ellipsis): + return self.save_reduce(type, (Ellipsis,), obj=obj) + elif obj is type(NotImplemented): + return self.save_reduce(type, (NotImplemented,), obj=obj) + elif obj in _BUILTIN_TYPE_NAMES: + return self.save_reduce( + _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) + + if sys.version_info[:2] < (3, 7) and _is_parametrized_type_hint(obj): # noqa # pragma: no branch + # Parametrized typing constructs in Python < 3.7 are not + # compatible with type checks and ``isinstance`` semantics. For + # this reason, it is easier to detect them using a + # duck-typing-based check (``_is_parametrized_type_hint``) than + # to populate the Pickler's dispatch with type-specific savers. + self.save_reduce( + _create_parametrized_type_hint, + parametrized_type_hint_getinitargs(obj), + obj=obj + ) + elif name is not None: + Pickler.save_global(self, obj, name=name) + elif not _is_importable(obj, name=name): + self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj) + else: + Pickler.save_global(self, obj, name=name) + dispatch[type] = save_global + + def save_function(self, obj, name=None): + """ Registered with the dispatch to handle all function types. + + Determines what kind of function obj is (e.g. lambda, defined at + interactive prompt, etc) and handles the pickling appropriately. + """ + if _is_importable(obj, name=name): + return Pickler.save_global(self, obj, name=name) + elif PYPY and isinstance(obj.__code__, builtin_code_type): + return self.save_pypy_builtin_func(obj) + else: + return self._save_reduce_pickle5( + *self._dynamic_function_reduce(obj), obj=obj + ) + + def save_pypy_builtin_func(self, obj): + """Save pypy equivalent of builtin functions. + PyPy does not have the concept of builtin-functions. Instead, + builtin-functions are simple function instances, but with a + builtin-code attribute. + Most of the time, builtin functions should be pickled by attribute. + But PyPy has flaky support for __qualname__, so some builtin + functions such as float.__new__ will be classified as dynamic. For + this reason only, we created this special routine. Because + builtin-functions are not expected to have closure or globals, + there is no additional hack (compared the one already implemented + in pickle) to protect ourselves from reference cycles. A simple + (reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note + also that PyPy improved their support for __qualname__ in v3.6, so + this routing should be removed when cloudpickle supports only PyPy + 3.6 and later. + """ + rv = (types.FunctionType, (obj.__code__, {}, obj.__name__, + obj.__defaults__, obj.__closure__), + obj.__dict__) + self.save_reduce(*rv, obj=obj) + + dispatch[types.FunctionType] = save_function diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index b1821dba6..ad6a9c9a6 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1308,12 +1308,8 @@ def f(): # some setup is required to allow pytest apimodules to be correctly # serializable. from cloudpickle import CloudPickler - if sys.version_info[:2] >= (3, 8): - from cloudpickle import cloudpickle_fast as cp_fast - CloudPickler.dispatch[ - type(py.builtin)] = cp_fast._module_reduce - else: - CloudPickler.dispatch[type(py.builtin)] = CloudPickler.save_module + from cloudpickle import cloudpickle_fast as cp_fast + CloudPickler.dispatch_table[type(py.builtin)] = cp_fast._module_reduce g = cloudpickle.loads(cloudpickle.dumps(f, protocol=self.protocol)) @@ -2248,6 +2244,24 @@ def f(a: int) -> str: f1 = pickle_depickle(f, protocol=self.protocol) assert f1.__annotations__ == f.__annotations__ + def test_always_use_up_to_date_copyreg(self): + # test that updates of copyreg.dispatch_table are taken in account by + # cloudpickle + import copyreg + try: + class MyClass: + pass + + def reduce_myclass(x): + return MyClass, (), {'custom_reduce': True} + + copyreg.dispatch_table[MyClass] = reduce_myclass + my_obj = MyClass() + depickled_myobj = pickle_depickle(my_obj, protocol=self.protocol) + assert hasattr(depickled_myobj, 'custom_reduce') + finally: + copyreg.dispatch_table.pop(MyClass) + class Protocol2CloudPickleTest(CloudPickleTest): diff --git a/tests/generate_old_pickles.py b/tests/generate_old_pickles.py new file mode 100644 index 000000000..c5a1d1c44 --- /dev/null +++ b/tests/generate_old_pickles.py @@ -0,0 +1,91 @@ +"""scripts reproducing pickles used to test cloudpickle backward compat support + + +This file contains a few python scripts that generate pickles of canonical +objects whose pickling is supported by cloudpickle (dynamic functions, enums, +classes, modules etc). These scripts must be run with an "old" version of +cloudpickle. When testing, the generated pickle files are depickled using the +active cloudpickle branch to make sure that cloudpickle is able to depickle old +cloudpickle files. +""" +import sys + +from pathlib import Path +from enum import IntEnum +from types import ModuleType +from typing import TypeVar, Generic + +import cloudpickle + +PYTHON_INFO = "{}_{}{}".format( + sys.implementation.name, sys.version_info.major, sys.version_info.minor +) + +PICKLE_DIRECTORY = Path(__file__).parent / "old_pickles" / PYTHON_INFO + + +def dump_obj(obj, filename): + with open(str(PICKLE_DIRECTORY / filename), "wb") as f: + cloudpickle.dump(obj, f) + + +def nested_function_factory(): + a = 1 + + def nested_function(b): + return a + b + + return nested_function + + +if __name__ == "__main__": + PICKLE_DIRECTORY.mkdir(parents=True) + + # simple dynamic function + def simple_func(x: int, y=1): + return x + y + + dump_obj(simple_func, "simple_func.pkl") + + # simple dynamic class + class SimpleClass: + def __init__(self, attribute): + self.attribute = attribute + + dump_obj(SimpleClass, "simple_class.pkl") + + # simple dynamic module + dynamic_module = ModuleType("dynamic_module") + s = """if 1: + def f(x, y=1): + return x + y + """ + exec(s, vars(dynamic_module)) + assert dynamic_module.f(2, 1) == 3 + dump_obj(dynamic_module, "simple_module.pkl") + + # simple dynamic Enum + class DynamicEnum(IntEnum): + RED = 1 + BLUE = 2 + + dump_obj(DynamicEnum, "simple_enum.pkl") + + # complex dynanic function/classes involing various typing annotations + # supported since cloudpickle 1.4 + T = TypeVar("T") + + class MyClass(Generic[T]): + def __init__(self, attribute: T): + self.attribute = attribute + + dump_obj(MyClass, "class_with_type_hints.pkl") + + def add(x: MyClass[int], y: MyClass[int]): + return MyClass(x.attribute + y.attribute) + + dump_obj([MyClass, add], "function_with_type_hints.pkl") + + # Locally defined closure + nested_function = nested_function_factory() + dump_obj(nested_function, "nested_function.pkl") diff --git a/tests/old_pickles/cpython_35/class_with_type_hints.pkl b/tests/old_pickles/cpython_35/class_with_type_hints.pkl new file mode 100644 index 000000000..6885eca4b Binary files /dev/null and b/tests/old_pickles/cpython_35/class_with_type_hints.pkl differ diff --git a/tests/old_pickles/cpython_35/function_with_type_hints.pkl b/tests/old_pickles/cpython_35/function_with_type_hints.pkl new file mode 100644 index 000000000..554f24686 Binary files /dev/null and b/tests/old_pickles/cpython_35/function_with_type_hints.pkl differ diff --git a/tests/old_pickles/cpython_35/nested_function.pkl b/tests/old_pickles/cpython_35/nested_function.pkl new file mode 100644 index 000000000..a71d0e425 Binary files /dev/null and b/tests/old_pickles/cpython_35/nested_function.pkl differ diff --git a/tests/old_pickles/cpython_35/simple_class.pkl b/tests/old_pickles/cpython_35/simple_class.pkl new file mode 100644 index 000000000..8d3366da1 Binary files /dev/null and b/tests/old_pickles/cpython_35/simple_class.pkl differ diff --git a/tests/old_pickles/cpython_35/simple_enum.pkl b/tests/old_pickles/cpython_35/simple_enum.pkl new file mode 100644 index 000000000..ce7b35c4c Binary files /dev/null and b/tests/old_pickles/cpython_35/simple_enum.pkl differ diff --git a/tests/old_pickles/cpython_35/simple_func.pkl b/tests/old_pickles/cpython_35/simple_func.pkl new file mode 100644 index 000000000..ee6525bb3 Binary files /dev/null and b/tests/old_pickles/cpython_35/simple_func.pkl differ diff --git a/tests/old_pickles/cpython_35/simple_module.pkl b/tests/old_pickles/cpython_35/simple_module.pkl new file mode 100644 index 000000000..ddbbcb02e Binary files /dev/null and b/tests/old_pickles/cpython_35/simple_module.pkl differ diff --git a/tests/old_pickles/cpython_36/class_with_type_hints.pkl b/tests/old_pickles/cpython_36/class_with_type_hints.pkl new file mode 100644 index 000000000..deea20ff7 Binary files /dev/null and b/tests/old_pickles/cpython_36/class_with_type_hints.pkl differ diff --git a/tests/old_pickles/cpython_36/function_with_type_hints.pkl b/tests/old_pickles/cpython_36/function_with_type_hints.pkl new file mode 100644 index 000000000..69fe362b9 Binary files /dev/null and b/tests/old_pickles/cpython_36/function_with_type_hints.pkl differ diff --git a/tests/old_pickles/cpython_36/nested_function.pkl b/tests/old_pickles/cpython_36/nested_function.pkl new file mode 100644 index 000000000..0731174f2 Binary files /dev/null and b/tests/old_pickles/cpython_36/nested_function.pkl differ diff --git a/tests/old_pickles/cpython_36/simple_class.pkl b/tests/old_pickles/cpython_36/simple_class.pkl new file mode 100644 index 000000000..f41002166 Binary files /dev/null and b/tests/old_pickles/cpython_36/simple_class.pkl differ diff --git a/tests/old_pickles/cpython_36/simple_enum.pkl b/tests/old_pickles/cpython_36/simple_enum.pkl new file mode 100644 index 000000000..7351bda2a Binary files /dev/null and b/tests/old_pickles/cpython_36/simple_enum.pkl differ diff --git a/tests/old_pickles/cpython_36/simple_func.pkl b/tests/old_pickles/cpython_36/simple_func.pkl new file mode 100644 index 000000000..a6b82cd59 Binary files /dev/null and b/tests/old_pickles/cpython_36/simple_func.pkl differ diff --git a/tests/old_pickles/cpython_36/simple_module.pkl b/tests/old_pickles/cpython_36/simple_module.pkl new file mode 100644 index 000000000..140333553 Binary files /dev/null and b/tests/old_pickles/cpython_36/simple_module.pkl differ diff --git a/tests/old_pickles/cpython_37/class_with_type_hints.pkl b/tests/old_pickles/cpython_37/class_with_type_hints.pkl new file mode 100644 index 000000000..2716dfdd2 Binary files /dev/null and b/tests/old_pickles/cpython_37/class_with_type_hints.pkl differ diff --git a/tests/old_pickles/cpython_37/function_with_type_hints.pkl b/tests/old_pickles/cpython_37/function_with_type_hints.pkl new file mode 100644 index 000000000..867b18a89 Binary files /dev/null and b/tests/old_pickles/cpython_37/function_with_type_hints.pkl differ diff --git a/tests/old_pickles/cpython_37/nested_function.pkl b/tests/old_pickles/cpython_37/nested_function.pkl new file mode 100644 index 000000000..0731174f2 Binary files /dev/null and b/tests/old_pickles/cpython_37/nested_function.pkl differ diff --git a/tests/old_pickles/cpython_37/simple_class.pkl b/tests/old_pickles/cpython_37/simple_class.pkl new file mode 100644 index 000000000..14902639c Binary files /dev/null and b/tests/old_pickles/cpython_37/simple_class.pkl differ diff --git a/tests/old_pickles/cpython_37/simple_enum.pkl b/tests/old_pickles/cpython_37/simple_enum.pkl new file mode 100644 index 000000000..bfc26fa8f Binary files /dev/null and b/tests/old_pickles/cpython_37/simple_enum.pkl differ diff --git a/tests/old_pickles/cpython_37/simple_func.pkl b/tests/old_pickles/cpython_37/simple_func.pkl new file mode 100644 index 000000000..a6b82cd59 Binary files /dev/null and b/tests/old_pickles/cpython_37/simple_func.pkl differ diff --git a/tests/old_pickles/cpython_37/simple_module.pkl b/tests/old_pickles/cpython_37/simple_module.pkl new file mode 100644 index 000000000..140333553 Binary files /dev/null and b/tests/old_pickles/cpython_37/simple_module.pkl differ diff --git a/tests/old_pickles/cpython_38/class_with_type_hints.pkl b/tests/old_pickles/cpython_38/class_with_type_hints.pkl new file mode 100644 index 000000000..5981dce90 Binary files /dev/null and b/tests/old_pickles/cpython_38/class_with_type_hints.pkl differ diff --git a/tests/old_pickles/cpython_38/function_with_type_hints.pkl b/tests/old_pickles/cpython_38/function_with_type_hints.pkl new file mode 100644 index 000000000..269639184 Binary files /dev/null and b/tests/old_pickles/cpython_38/function_with_type_hints.pkl differ diff --git a/tests/old_pickles/cpython_38/nested_function.pkl b/tests/old_pickles/cpython_38/nested_function.pkl new file mode 100644 index 000000000..e495cd765 Binary files /dev/null and b/tests/old_pickles/cpython_38/nested_function.pkl differ diff --git a/tests/old_pickles/cpython_38/simple_class.pkl b/tests/old_pickles/cpython_38/simple_class.pkl new file mode 100644 index 000000000..bc22f06d8 Binary files /dev/null and b/tests/old_pickles/cpython_38/simple_class.pkl differ diff --git a/tests/old_pickles/cpython_38/simple_enum.pkl b/tests/old_pickles/cpython_38/simple_enum.pkl new file mode 100644 index 000000000..a6c630455 Binary files /dev/null and b/tests/old_pickles/cpython_38/simple_enum.pkl differ diff --git a/tests/old_pickles/cpython_38/simple_func.pkl b/tests/old_pickles/cpython_38/simple_func.pkl new file mode 100644 index 000000000..df7c83a5d Binary files /dev/null and b/tests/old_pickles/cpython_38/simple_func.pkl differ diff --git a/tests/old_pickles/cpython_38/simple_module.pkl b/tests/old_pickles/cpython_38/simple_module.pkl new file mode 100644 index 000000000..a5f850623 Binary files /dev/null and b/tests/old_pickles/cpython_38/simple_module.pkl differ diff --git a/tests/old_pickles/pypy_36/class_with_type_hints.pkl b/tests/old_pickles/pypy_36/class_with_type_hints.pkl new file mode 100644 index 000000000..3bda8c2ae Binary files /dev/null and b/tests/old_pickles/pypy_36/class_with_type_hints.pkl differ diff --git a/tests/old_pickles/pypy_36/function_with_type_hints.pkl b/tests/old_pickles/pypy_36/function_with_type_hints.pkl new file mode 100644 index 000000000..d310462a0 Binary files /dev/null and b/tests/old_pickles/pypy_36/function_with_type_hints.pkl differ diff --git a/tests/old_pickles/pypy_36/simple_class.pkl b/tests/old_pickles/pypy_36/simple_class.pkl new file mode 100644 index 000000000..202e4f795 Binary files /dev/null and b/tests/old_pickles/pypy_36/simple_class.pkl differ diff --git a/tests/old_pickles/pypy_36/simple_enum.pkl b/tests/old_pickles/pypy_36/simple_enum.pkl new file mode 100644 index 000000000..64cbbdddc Binary files /dev/null and b/tests/old_pickles/pypy_36/simple_enum.pkl differ diff --git a/tests/old_pickles/pypy_36/simple_func.pkl b/tests/old_pickles/pypy_36/simple_func.pkl new file mode 100644 index 000000000..1761a387a Binary files /dev/null and b/tests/old_pickles/pypy_36/simple_func.pkl differ diff --git a/tests/old_pickles/pypy_36/simple_module.pkl b/tests/old_pickles/pypy_36/simple_module.pkl new file mode 100644 index 000000000..140333553 Binary files /dev/null and b/tests/old_pickles/pypy_36/simple_module.pkl differ diff --git a/tests/test_backward_compat.py b/tests/test_backward_compat.py new file mode 100644 index 000000000..20977a74b --- /dev/null +++ b/tests/test_backward_compat.py @@ -0,0 +1,97 @@ +"""Limited, best-effort test suite regarding cloudpickle backward-compat. + +Cloudpickle does not officially support reading pickles files +generated with an older version of cloudpickle than the one used to read the +said pickles. However, this policy is not widely known among users that use +libraries that rely on cloudpickle such as mlflow, and is subject to confusion. + +As a compromise, this script make sure cloudpickle is backward compatible for a +few canonical use cases. Cloudpicke backward-compatitibility support remains a +best-effort initiative. +""" +import pickle +import sys + +import pytest + +from .generate_old_pickles import PICKLE_DIRECTORY + + +def load_obj(filename, check_deprecation_warning='auto'): + if check_deprecation_warning == 'auto': + # pickles files generated with cloudpickle_fast.py on old versions of + # coudpickle with Python < 3.8 use non-deprecated reconstructors. + check_deprecation_warning = (sys.version_info < (3, 8)) + pickle_filepath = PICKLE_DIRECTORY / filename + if not pickle_filepath.exists(): + pytest.skip("Could not find {}".format(str(pickle_filepath))) + with open(str(pickle_filepath), "rb") as f: + if check_deprecation_warning: + msg = "A pickle file created using an old" + with pytest.warns(UserWarning, match=msg): + obj = pickle.load(f) + else: + obj = pickle.load(f) + return obj + + +def test_simple_func(): + f = load_obj("simple_func.pkl") + assert f(1) == 2 + assert f(1, 1) == 2 + assert f(2, 2) == 4 + + +def test_simple_class(): + SimpleClass = load_obj("simple_class.pkl") + c = SimpleClass(1) + assert hasattr(c, "attribute") + assert c.attribute == 1 + + # test class tracking feature + assert SimpleClass is load_obj("simple_class.pkl") + + +def test_dynamic_module(): + mod = load_obj("simple_module.pkl") + assert hasattr(mod, "f") + assert mod.f(1) == 2 + assert mod.f(1, 1) == 2 + assert mod.f(2, 2) == 4 + + +def test_simple_enum(): + enum = load_obj("simple_enum.pkl", check_deprecation_warning=False) + assert hasattr(enum, "RED") + assert enum.RED == 1 + assert enum.BLUE == 2 + + # test enum tracking feature + new_enum = load_obj("simple_enum.pkl", check_deprecation_warning=False) + assert new_enum is enum + + +def test_complex_class(): + SimpleClass = load_obj("class_with_type_hints.pkl") + c = SimpleClass(1) + assert hasattr(c, "attribute") + assert c.attribute == 1 + + # test class tracking feature + assert SimpleClass is load_obj("class_with_type_hints.pkl") + + +def test_complex_function(): + MyClass, f = load_obj("function_with_type_hints.pkl") + assert len(f.__annotations__) > 0 + + a = MyClass(1) + b = MyClass(2) + + c = f(a, b) + assert c.attribute == 3 + + +def test_nested_function(): + f = load_obj("nested_function.pkl") + assert f(41) == 42