diff --git a/python/ray/cloudpickle/__init__.py b/python/ray/cloudpickle/__init__.py index 316e4d099b14..91c46bb768c3 100644 --- a/python/ray/cloudpickle/__init__.py +++ b/python/ray/cloudpickle/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from ray.cloudpickle.cloudpickle_fast import * # noqa: F401, F403 -__version__ = "1.2.2.dev0" +__version__ = '1.4.1' diff --git a/python/ray/cloudpickle/cloudpickle.py b/python/ray/cloudpickle/cloudpickle.py index c92b2eac4ffc..c639daab1c87 100644 --- a/python/ray/cloudpickle/cloudpickle.py +++ b/python/ray/cloudpickle/cloudpickle.py @@ -42,8 +42,9 @@ """ from __future__ import print_function +import abc +import builtins import dis -from functools import partial import io import itertools import logging @@ -53,17 +54,30 @@ import platform import struct import sys -import traceback import types import weakref import uuid import threading +import typing +from enum import Enum + +from typing import Generic, Union, Tuple, Callable +from pickle import _Pickler as Pickler +from pickle import _getattribute +from io import BytesIO +from importlib._bootstrap import _find_spec + +try: # pragma: no branch + import typing_extensions as _typing_extensions + from typing_extensions import Literal, Final +except ImportError: + _typing_extensions = Literal = Final = None +if sys.version_info >= (3, 5, 3): + from typing import ClassVar +else: # pragma: no cover + ClassVar = None -try: - from enum import Enum -except ImportError: - Enum = None # cloudpickle is meant for inter process communication: we expect all # communicating processes to run the same Python version hence we favor @@ -84,28 +98,10 @@ # builtin-code objects only exist in pypy builtin_code_type = type(float.__new__.__code__) -if sys.version_info[0] < 3: # pragma: no branch - from pickle import Pickler - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - string_types = (basestring,) # noqa - PY3 = False - PY2 = True -else: - types.ClassType = type - from pickle import _Pickler as Pickler - from io import BytesIO as StringIO - string_types = (str,) - PY3 = True - PY2 = False - from importlib._bootstrap import _find_spec - _extract_code_globals_cache = weakref.WeakKeyDictionary() -def _ensure_tracking(class_def): +def _get_or_create_tracker_id(class_def): with _DYNAMIC_CLASS_TRACKER_LOCK: class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def) if class_tracker_id is None: @@ -123,21 +119,6 @@ def _lookup_class_or_track(class_tracker_id, class_def): _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id return class_def -if sys.version_info[:2] >= (3, 5): - from pickle import _getattribute -elif sys.version_info[:2] >= (3, 4): - from pickle import _getattribute as _py34_getattribute - # pickle._getattribute does not return the parent under Python 3.4 - def _getattribute(obj, name): - return _py34_getattribute(obj, name), None -else: - # pickle._getattribute is a python3 addition and enchancement of getattr, - # that can handle dotted attribute names. In cloudpickle for python2, - # handling dotted names is not needed, so we simply define _getattribute as - # a wrapper around getattr. - def _getattribute(obj, name): - return getattr(obj, name, None), None - def _whichmodule(obj, name): """Find the module an object belongs to. @@ -148,13 +129,31 @@ def _whichmodule(obj, name): - Errors arising during module introspection are ignored, as those errors are considered unwanted side effects. """ - module_name = getattr(obj, '__module__', None) + if sys.version_info[:2] < (3, 7) and isinstance(obj, typing.TypeVar): # pragma: no branch # noqa + # Workaround bug in old Python versions: prior to Python 3.7, + # T.__module__ would always be set to "typing" even when the TypeVar T + # would be defined in a different module. + # + # For such older Python versions, we ignore the __module__ attribute of + # TypeVar instances and instead exhaustively lookup those instances in + # all currently imported modules. + module_name = None + else: + module_name = getattr(obj, '__module__', None) + if module_name is not None: return module_name - # Protect the iteration by using a list copy of sys.modules against dynamic - # modules that trigger imports of other modules upon calls to getattr. - for module_name, module in list(sys.modules.items()): - if module_name == '__main__' or module is None: + # Protect the iteration by using a copy of sys.modules against dynamic + # modules that trigger imports of other modules upon calls to getattr or + # other threads importing at the same time. + for module_name, module in sys.modules.copy().items(): + # Some modules such as coverage can inject non-module objects inside + # sys.modules + if ( + module_name == '__main__' or + module is None or + not isinstance(module, types.ModuleType) + ): continue try: if _getattribute(module, name)[0] is obj: @@ -164,11 +163,18 @@ def _whichmodule(obj, name): return None -def _is_global(obj, name=None): +def _is_importable_by_name(obj, name=None): """Determine if obj can be pickled as attribute of a file-backed module""" + return _lookup_module_and_qualname(obj, name=name) is not None + + +def _lookup_module_and_qualname(obj, name=None): if name is None: name = getattr(obj, '__qualname__', None) - if name is None: + if name is None: # pragma: no cover + # This used to be needed for Python 2.7 support but is probably not + # needed anymore. However we keep the __name__ introspection in case + # users of cloudpickle rely on this old behavior for unknown reasons. name = getattr(obj, '__name__', None) module_name = _whichmodule(obj, name) @@ -176,10 +182,10 @@ def _is_global(obj, name=None): if module_name is None: # In this case, obj.__module__ is None AND obj was not found in any # imported module. obj is thus treated as dynamic. - return False + return None if module_name == "__main__": - return False + return None module = sys.modules.get(module_name, None) if module is None: @@ -188,18 +194,20 @@ def _is_global(obj, name=None): # types.ModuleType. The other possibility is that module was removed # from sys.modules after obj was created/imported. But this case is not # supported, as the standard pickle does not support it either. - return False + return None # module has been added to sys.modules, but it can still be dynamic. if _is_dynamic(module): - return False + return None try: obj2, parent = _getattribute(module, name) except AttributeError: # obj was not found inside the module it points to - return False - return obj2 is obj + return None + if obj2 is not obj: + return None + return module, name def _extract_code_globals(co): @@ -346,41 +354,23 @@ def _cell_set_factory(value): co = _cell_set_factory.__code__ - if PY2: # pragma: no branch - _cell_set_template_code = types.CodeType( - co.co_argcount, - co.co_nlocals, - co.co_stacksize, - co.co_flags, - co.co_code, - co.co_consts, - co.co_names, - co.co_varnames, - co.co_filename, - co.co_name, - co.co_firstlineno, - co.co_lnotab, - co.co_cellvars, # co_freevars is initialized with co_cellvars - (), # co_cellvars is made empty - ) - else: - _cell_set_template_code = types.CodeType( - co.co_argcount, - co.co_kwonlyargcount, # Python 3 only argument - co.co_nlocals, - co.co_stacksize, - co.co_flags, - co.co_code, - co.co_consts, - co.co_names, - co.co_varnames, - co.co_filename, - co.co_name, - co.co_firstlineno, - co.co_lnotab, - co.co_cellvars, # co_freevars is initialized with co_cellvars - (), # co_cellvars is made empty - ) + _cell_set_template_code = types.CodeType( + co.co_argcount, + co.co_kwonlyargcount, # Python 3 only argument + co.co_nlocals, + co.co_stacksize, + co.co_flags, + co.co_code, + co.co_consts, + co.co_names, + co.co_varnames, + co.co_filename, + co.co_name, + co.co_firstlineno, + co.co_lnotab, + co.co_cellvars, # co_freevars is initialized with co_cellvars + (), # co_cellvars is made empty + ) return _cell_set_template_code @@ -403,44 +393,23 @@ def _cell_set_factory(value): def _builtin_type(name): + if name == "ClassType": # pragma: no cover + # Backward compat to load pickle files generated with cloudpickle + # < 1.3 even if loading pickle files from older versions is not + # officially supported. + return type return getattr(types, name) -if sys.version_info < (3, 4): # pragma: no branch - def _walk_global_ops(code): - """ - Yield (opcode, argument number) tuples for all - global-referencing instructions in *code*. - """ - code = getattr(code, 'co_code', b'') - if PY2: # pragma: no branch - code = map(ord, code) - - n = len(code) - i = 0 - extended_arg = 0 - while i < n: - op = code[i] - i += 1 - if op >= HAVE_ARGUMENT: - oparg = code[i] + code[i + 1] * 256 + extended_arg - extended_arg = 0 - i += 2 - if op == EXTENDED_ARG: - extended_arg = oparg * 65536 - if op in GLOBAL_OPS: - yield op, oparg - -else: - def _walk_global_ops(code): - """ - Yield (opcode, argument number) tuples for all - global-referencing instructions in *code*. - """ - for instr in dis.get_instructions(code): - op = instr.opcode - if op in GLOBAL_OPS: - yield op, instr.arg +def _walk_global_ops(code): + """ + Yield (opcode, argument number) tuples for all + global-referencing instructions in *code*. + """ + for instr in dis.get_instructions(code): + op = instr.opcode + if op in GLOBAL_OPS: + yield op, instr.arg def _extract_class_dict(cls): @@ -465,6 +434,32 @@ def _extract_class_dict(cls): return clsdict +if sys.version_info[:2] < (3, 7): # pragma: no branch + def _is_parametrized_type_hint(obj): + # This is very cheap but might generate false positives. + # general typing Constructs + is_typing = getattr(obj, '__origin__', None) is not None + + # typing_extensions.Literal + is_litteral = getattr(obj, '__values__', None) is not None + + # typing_extensions.Final + is_final = getattr(obj, '__type__', None) is not None + + # typing.Union/Tuple for old Python 3.5 + is_union = getattr(obj, '__union_params__', None) is not None + is_tuple = getattr(obj, '__tuple_params__', None) is not None + is_callable = ( + getattr(obj, '__result__', None) is not None and + getattr(obj, '__args__', None) is not None + ) + return any((is_typing, is_litteral, is_final, is_union, is_tuple, + is_callable)) + + def _create_parametrized_type_hint(origin, args): + return origin[args] + + class CloudPickler(Pickler): dispatch = Pickler.dispatch.copy() @@ -487,22 +482,22 @@ def dump(self, obj): else: raise + def save_typevar(self, obj): + self.save_reduce(*_typevar_reduce(obj), obj=obj) + + dispatch[typing.TypeVar] = save_typevar + def save_memoryview(self, obj): self.save(obj.tobytes()) dispatch[memoryview] = save_memoryview - if PY2: # pragma: no branch - def save_buffer(self, obj): - self.save(str(obj)) - - dispatch[buffer] = save_buffer # noqa: F821 'buffer' was removed in Python 3 - def save_module(self, obj): """ Save a module as an import """ if _is_dynamic(obj): + obj.__dict__.pop('__builtins__', None) self.save_reduce(dynamic_subimport, (obj.__name__, vars(obj)), obj=obj) else: @@ -514,29 +509,22 @@ def save_codeobject(self, obj): """ Save a code object """ - if PY3: # pragma: no branch - if hasattr(obj, "co_posonlyargcount"): # pragma: no branch - args = ( - obj.co_argcount, obj.co_posonlyargcount, - obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize, - obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, - obj.co_varnames, obj.co_filename, obj.co_name, - obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, - obj.co_cellvars - ) - else: - args = ( - obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals, - obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts, - obj.co_names, obj.co_varnames, obj.co_filename, - obj.co_name, obj.co_firstlineno, obj.co_lnotab, - obj.co_freevars, obj.co_cellvars - ) + if hasattr(obj, "co_posonlyargcount"): # pragma: no branch + args = ( + obj.co_argcount, obj.co_posonlyargcount, + obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize, + obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, + obj.co_varnames, obj.co_filename, obj.co_name, + obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, + obj.co_cellvars + ) else: args = ( - obj.co_argcount, obj.co_nlocals, obj.co_stacksize, obj.co_flags, obj.co_code, - obj.co_consts, obj.co_names, obj.co_varnames, obj.co_filename, obj.co_name, - obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, obj.co_cellvars + obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals, + obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts, + obj.co_names, obj.co_varnames, obj.co_filename, + obj.co_name, obj.co_firstlineno, obj.co_lnotab, + obj.co_freevars, obj.co_cellvars ) self.save_reduce(types.CodeType, args, obj=obj) @@ -548,7 +536,7 @@ def save_function(self, obj, name=None): Determines what kind of function obj is (e.g. lambda, defined at interactive prompt, etc) and handles the pickling appropriately. """ - if _is_global(obj, name=name): + if _is_importable_by_name(obj, name=name): return Pickler.save_global(self, obj, name=name) elif PYPY and isinstance(obj.__code__, builtin_code_type): return self.save_pypy_builtin_func(obj) @@ -590,13 +578,12 @@ def _save_dynamic_enum(self, obj, clsdict): """ members = dict((e.name, e.value) for e in obj) - # Python 2.7 with enum34 can have no qualname: - qualname = getattr(obj, "__qualname__", None) - - self.save_reduce(_make_skeleton_enum, - (obj.__bases__, obj.__name__, qualname, members, - obj.__module__, _ensure_tracking(obj), None), - obj=obj) + self.save_reduce( + _make_skeleton_enum, + (obj.__bases__, obj.__name__, obj.__qualname__, + members, obj.__module__, _get_or_create_tracker_id(obj), None), + obj=obj + ) # Cleanup the clsdict that will be passed to _rehydrate_skeleton_class: # Those attributes are already handled by the metaclass. @@ -617,26 +604,38 @@ def save_dynamic_class(self, obj): clsdict = _extract_class_dict(obj) clsdict.pop('__weakref__', None) - # For ABCMeta in python3.7+, remove _abc_impl as it is not picklable. - # This is a fix which breaks the cache but this only makes the first - # calls to issubclass slower. - if "_abc_impl" in clsdict: - import abc - (registry, _, _, _) = abc._get_dump(obj) - clsdict["_abc_impl"] = [subclass_weakref() - for subclass_weakref in registry] + if issubclass(type(obj), abc.ABCMeta): + # If obj is an instance of an ABCMeta subclass, dont pickle the + # cache/negative caches populated during isinstance/issubclass + # checks, but pickle the list of registered subclasses of obj. + clsdict.pop('_abc_cache', None) + clsdict.pop('_abc_negative_cache', None) + clsdict.pop('_abc_negative_cache_version', None) + registry = clsdict.pop('_abc_registry', None) + if registry is None: + # in Python3.7+, the abc caches and registered subclasses of a + # class are bundled into the single _abc_impl attribute + clsdict.pop('_abc_impl', None) + (registry, _, _, _) = abc._get_dump(obj) + + clsdict["_abc_impl"] = [subclass_weakref() + for subclass_weakref in registry] + else: + # In the above if clause, registry is a set of weakrefs -- in + # this case, registry is a WeakSet + clsdict["_abc_impl"] = [type_ for type_ in registry] # On PyPy, __doc__ is a readonly attribute, so we need to include it in # the initial skeleton class. This is safe because we know that the # doc can't participate in a cycle with the original class. type_kwargs = {'__doc__': clsdict.pop('__doc__', None)} - if hasattr(obj, "__slots__"): + if "__slots__" in clsdict: type_kwargs['__slots__'] = obj.__slots__ # pickle string length optimization: member descriptors of obj are # created automatically from obj's __slots__ attribute, no need to # save them in obj's state - if isinstance(obj.__slots__, string_types): + if isinstance(obj.__slots__, str): clsdict.pop(obj.__slots__) else: for k in obj.__slots__: @@ -644,6 +643,7 @@ def save_dynamic_class(self, obj): # If type overrides __dict__ as a property, include it in the type # kwargs. In Python 2, we can't set this attribute after construction. + # XXX: can this ever happen in Python 3? If so add a test. __dict__ = clsdict.pop('__dict__', None) if isinstance(__dict__, property): type_kwargs['__dict__'] = __dict__ @@ -677,8 +677,8 @@ def save_dynamic_class(self, obj): # "Regular" class definition: tp = type(obj) self.save_reduce(_make_skeleton_class, - (tp, obj.__name__, obj.__bases__, type_kwargs, - _ensure_tracking(obj), None), + (tp, obj.__name__, _get_bases(obj), type_kwargs, + _get_or_create_tracker_id(obj), None), obj=obj) # Now save the rest of obj's __dict__. Any references to obj @@ -747,7 +747,7 @@ def save_function_tuple(self, func): 'doc': func.__doc__, '_cloudpickle_submodules': submodules } - if hasattr(func, '__annotations__') and sys.version_info >= (3, 4): + if hasattr(func, '__annotations__'): state['annotations'] = func.__annotations__ if hasattr(func, '__qualname__'): state['qualname'] = func.__qualname__ @@ -809,45 +809,6 @@ def extract_func_data(self, func): return (code, f_globals, defaults, closure, dct, base_globals) - if not PY3: # pragma: no branch - # Python3 comes with native reducers that allow builtin functions and - # methods pickling as module/class attributes. The following method - # extends this for python2. - # Please note that currently, neither pickle nor cloudpickle support - # dynamically created builtin functions/method pickling. - def save_builtin_function_or_method(self, obj): - is_bound = getattr(obj, '__self__', None) is not None - if is_bound: - # obj is a bound builtin method. - rv = (getattr, (obj.__self__, obj.__name__)) - return self.save_reduce(obj=obj, *rv) - - is_unbound = hasattr(obj, '__objclass__') - if is_unbound: - # obj is an unbound builtin method (accessed from its class) - rv = (getattr, (obj.__objclass__, obj.__name__)) - return self.save_reduce(obj=obj, *rv) - - # Otherwise, obj is not a method, but a function. Fallback to - # default pickling by attribute. - return Pickler.save_global(self, obj) - - dispatch[types.BuiltinFunctionType] = save_builtin_function_or_method - - # A comprehensive summary of the various kinds of builtin methods can - # be found in PEP 579: https://www.python.org/dev/peps/pep-0579/ - classmethod_descriptor_type = type(float.__dict__['fromhex']) - wrapper_descriptor_type = type(float.__repr__) - method_wrapper_type = type(1.5.__repr__) - - dispatch[classmethod_descriptor_type] = save_builtin_function_or_method - dispatch[wrapper_descriptor_type] = save_builtin_function_or_method - dispatch[method_wrapper_type] = save_builtin_function_or_method - - if sys.version_info[:2] < (3, 4): - method_descriptor = type(str.upper) - dispatch[method_descriptor] = save_builtin_function_or_method - def save_getset_descriptor(self, obj): return self.save_reduce(getattr, (obj.__objclass__, obj.__name__)) @@ -869,81 +830,36 @@ def save_global(self, obj, name=None, pack=struct.pack): elif obj in _BUILTIN_TYPE_NAMES: return self.save_reduce( _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) + + if sys.version_info[:2] < (3, 7) and _is_parametrized_type_hint(obj): # noqa # pragma: no branch + # Parametrized typing constructs in Python < 3.7 are not compatible + # with type checks and ``isinstance`` semantics. For this reason, + # it is easier to detect them using a duck-typing-based check + # (``_is_parametrized_type_hint``) than to populate the Pickler's + # dispatch with type-specific savers. + self._save_parametrized_type_hint(obj) elif name is not None: Pickler.save_global(self, obj, name=name) - elif not _is_global(obj, name=name): + elif not _is_importable_by_name(obj, name=name): self.save_dynamic_class(obj) else: Pickler.save_global(self, obj, name=name) dispatch[type] = save_global - dispatch[types.ClassType] = save_global def save_instancemethod(self, obj): # Memoization rarely is ever useful due to python bounding if obj.__self__ is None: self.save_reduce(getattr, (obj.im_class, obj.__name__)) else: - if PY3: # pragma: no branch - self.save_reduce(types.MethodType, (obj.__func__, obj.__self__), obj=obj) - else: - self.save_reduce( - types.MethodType, - (obj.__func__, obj.__self__, type(obj.__self__)), obj=obj) + self.save_reduce(types.MethodType, (obj.__func__, obj.__self__), obj=obj) dispatch[types.MethodType] = save_instancemethod - def save_inst(self, obj): - """Inner logic to save instance. Based off pickle.save_inst""" - cls = obj.__class__ - - # Try the dispatch table (pickle module doesn't do it) - f = self.dispatch.get(cls) - if f: - f(self, obj) # Call unbound method with explicit self - return - - memo = self.memo - write = self.write - save = self.save - - if hasattr(obj, '__getinitargs__'): - args = obj.__getinitargs__() - len(args) # XXX Assert it's a sequence - pickle._keep_alive(args, memo) - else: - args = () - - write(pickle.MARK) - - if self.bin: - save(cls) - for arg in args: - save(arg) - write(pickle.OBJ) - else: - for arg in args: - save(arg) - write(pickle.INST + cls.__module__ + '\n' + cls.__name__ + '\n') - - self.memoize(obj) - - try: - getstate = obj.__getstate__ - except AttributeError: - stuff = obj.__dict__ - else: - stuff = getstate() - pickle._keep_alive(stuff, memo) - save(stuff) - write(pickle.BUILD) - - if PY2: # pragma: no branch - dispatch[types.InstanceType] = save_inst - def save_property(self, obj): # properties not correctly saved in python - self.save_reduce(property, (obj.fget, obj.fset, obj.fdel, obj.__doc__), obj=obj) + self.save_reduce(property, (obj.fget, obj.fset, obj.fdel, obj.__doc__), + obj=obj) dispatch[property] = save_property @@ -991,10 +907,6 @@ def __getattribute__(self, item): def save_file(self, obj): """Save a file""" - try: - import StringIO as pystringIO # we can't use cStringIO as it lacks the name attribute - except ImportError: - import io as pystringIO if not hasattr(obj, 'name') or not hasattr(obj, 'mode'): raise pickle.PicklingError("Cannot pickle files that do not map to an actual file") @@ -1013,7 +925,8 @@ def save_file(self, obj): name = obj.name - retval = pystringIO.StringIO() + # TODO: also support binary mode files with io.BytesIO + retval = io.StringIO() try: # Read the whole file @@ -1036,11 +949,7 @@ def save_ellipsis(self, obj): def save_not_implemented(self, obj): self.save_reduce(_gen_not_implemented, ()) - try: # Python 2 - dispatch[file] = save_file - except NameError: # Python 3 # pragma: no branch - dispatch[io.TextIOWrapper] = save_file - + dispatch[io.TextIOWrapper] = save_file dispatch[type(Ellipsis)] = save_ellipsis dispatch[type(NotImplemented)] = save_not_implemented @@ -1070,6 +979,57 @@ def inject_addons(self): """Plug in system. Register additional pickling functions if modules already loaded""" pass + if sys.version_info < (3, 7): # pragma: no branch + def _save_parametrized_type_hint(self, obj): + # The distorted type check sematic for typing construct becomes: + # ``type(obj) is type(TypeHint)``, which means "obj is a + # parametrized TypeHint" + if type(obj) is type(Literal): # pragma: no branch + initargs = (Literal, obj.__values__) + elif type(obj) is type(Final): # pragma: no branch + initargs = (Final, obj.__type__) + elif type(obj) is type(ClassVar): + initargs = (ClassVar, obj.__type__) + elif type(obj) is type(Generic): + parameters = obj.__parameters__ + if len(obj.__parameters__) > 0: + # in early Python 3.5, __parameters__ was sometimes + # preferred to __args__ + initargs = (obj.__origin__, parameters) + else: + initargs = (obj.__origin__, obj.__args__) + elif type(obj) is type(Union): + if sys.version_info < (3, 5, 3): # pragma: no cover + initargs = (Union, obj.__union_params__) + else: + initargs = (Union, obj.__args__) + elif type(obj) is type(Tuple): + if sys.version_info < (3, 5, 3): # pragma: no cover + initargs = (Tuple, obj.__tuple_params__) + else: + initargs = (Tuple, obj.__args__) + elif type(obj) is type(Callable): + if sys.version_info < (3, 5, 3): # pragma: no cover + args = obj.__args__ + result = obj.__result__ + if args != Ellipsis: + if isinstance(args, tuple): + args = list(args) + else: + args = [args] + else: + (*args, result) = obj.__args__ + if len(args) == 1 and args[0] is Ellipsis: + args = Ellipsis + else: + args = list(args) + initargs = (Callable, (args, result)) + else: # pragma: no cover + raise pickle.PicklingError( + "Cloudpickle Error: Unknown type {}".format(type(obj)) + ) + self.save_reduce(_create_parametrized_type_hint, initargs, obj=obj) + # Tornado support @@ -1117,7 +1077,7 @@ def dumps(obj, protocol=None): Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure compatibility with older versions of Python. """ - file = StringIO() + file = BytesIO() try: cp = CloudPickler(file, protocol=protocol) cp.dump(obj) @@ -1140,6 +1100,7 @@ def subimport(name): def dynamic_subimport(name, vars): mod = types.ModuleType(name) mod.__dict__.update(vars) + mod.__dict__['__builtins__'] = builtins.__dict__ return mod @@ -1291,7 +1252,10 @@ class id will also reuse this class definition. The "extra" variable is meant to be a dict (or None) that can be used for forward compatibility shall the need arise. """ - skeleton_class = type_constructor(name, bases, type_kwargs) + skeleton_class = types.new_class( + name, bases, {'metaclass': type_constructor}, + lambda ns: ns.update(type_kwargs) + ) return _lookup_class_or_track(class_tracker_id, skeleton_class) @@ -1338,10 +1302,7 @@ class id will also reuse this enum definition. classdict[member_name] = member_value enum_class = metacls.__new__(metacls, name, bases, classdict) enum_class.__module__ = module - - # Python 2.7 compat - if qualname is not None: - enum_class.__qualname__ = qualname + enum_class.__qualname__ = qualname return _lookup_class_or_track(class_tracker_id, enum_class) @@ -1355,41 +1316,69 @@ def _is_dynamic(module): if hasattr(module, '__file__'): return False - if hasattr(module, '__spec__'): - if module.__spec__ is not None: - return False - - # In PyPy, Some built-in modules such as _codecs can have their - # __spec__ attribute set to None despite being imported. For such - # modules, the ``_find_spec`` utility of the standard library is used. - parent_name = module.__name__.rpartition('.')[0] - if parent_name: # pragma: no cover - # This code handles the case where an imported package (and not - # module) remains with __spec__ set to None. It is however untested - # as no package in the PyPy stdlib has __spec__ set to None after - # it is imported. - try: - parent = sys.modules[parent_name] - except KeyError: - msg = "parent {!r} not in sys.modules" - raise ImportError(msg.format(parent_name)) - else: - pkgpath = parent.__path__ + if module.__spec__ is not None: + return False + + # In PyPy, Some built-in modules such as _codecs can have their + # __spec__ attribute set to None despite being imported. For such + # modules, the ``_find_spec`` utility of the standard library is used. + parent_name = module.__name__.rpartition('.')[0] + if parent_name: # pragma: no cover + # This code handles the case where an imported package (and not + # module) remains with __spec__ set to None. It is however untested + # as no package in the PyPy stdlib has __spec__ set to None after + # it is imported. + try: + parent = sys.modules[parent_name] + except KeyError: + msg = "parent {!r} not in sys.modules" + raise ImportError(msg.format(parent_name)) else: - pkgpath = None - return _find_spec(module.__name__, pkgpath, module) is None + pkgpath = parent.__path__ + else: + pkgpath = None + return _find_spec(module.__name__, pkgpath, module) is None + +def _make_typevar(name, bound, constraints, covariant, contravariant, + class_tracker_id): + tv = typing.TypeVar( + name, *constraints, bound=bound, + covariant=covariant, contravariant=contravariant + ) + if class_tracker_id is not None: + return _lookup_class_or_track(class_tracker_id, tv) + else: # pragma: nocover + # Only for Python 3.5.3 compat. + return tv + + +def _decompose_typevar(obj): + try: + class_tracker_id = _get_or_create_tracker_id(obj) + except TypeError: # pragma: nocover + # TypeVar instances are not weakref-able in Python 3.5.3 + class_tracker_id = None + return ( + obj.__name__, obj.__bound__, obj.__constraints__, + obj.__covariant__, obj.__contravariant__, + class_tracker_id, + ) + + +def _typevar_reduce(obj): + # TypeVar instances have no __qualname__ hence we pass the name explicitly. + module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__) + if module_and_name is None: + return (_make_typevar, _decompose_typevar(obj)) + return (getattr, module_and_name) + + +def _get_bases(typ): + if hasattr(typ, '__orig_bases__'): + # For generic types (see PEP 560) + bases_attr = '__orig_bases__' else: - # Backward compat for Python 2 - import imp - try: - path = None - for part in module.__name__.split('.'): - if path is not None: - path = [path] - f, path, description = imp.find_module(part, path) - if f is not None: - f.close() - except ImportError: - return True - return False + # For regular class objects + bases_attr = '__bases__' + return getattr(typ, bases_attr) diff --git a/python/ray/cloudpickle/cloudpickle_fast.py b/python/ray/cloudpickle/cloudpickle_fast.py index 65d142e3cf59..fd16493b8475 100644 --- a/python/ray/cloudpickle/cloudpickle_fast.py +++ b/python/ray/cloudpickle/cloudpickle_fast.py @@ -15,19 +15,17 @@ import io import itertools import logging - import sys import types import weakref - -import numpy +import typing from .cloudpickle import ( _is_dynamic, _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL, - _find_imported_submodules, _get_cell_contents, _is_global, _builtin_type, - Enum, _ensure_tracking, _make_skeleton_class, _make_skeleton_enum, - _extract_class_dict, string_types, dynamic_subimport, subimport, cell_set, - _make_empty_cell + _find_imported_submodules, _get_cell_contents, _is_importable_by_name, _builtin_type, + Enum, _get_or_create_tracker_id, _make_skeleton_class, _make_skeleton_enum, + _extract_class_dict, dynamic_subimport, subimport, _typevar_reduce, _get_bases, + cell_set, _make_empty_cell, ) if sys.version_info[:2] < (3, 8): @@ -40,6 +38,8 @@ from _pickle import Pickler load, loads = _pickle.load, _pickle.loads +import numpy + # Shorthands similar to pickle.dump/pickle.dumps def dump(obj, file, protocol=None, buffer_callback=None): @@ -52,8 +52,7 @@ def dump(obj, file, protocol=None, buffer_callback=None): Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure compatibility with older versions of Python. """ - CloudPickler(file, protocol=protocol, - buffer_callback=buffer_callback).dump(obj) + CloudPickler(file, protocol=protocol, buffer_callback=buffer_callback).dump(obj) def dumps(obj, protocol=None, buffer_callback=None): @@ -67,8 +66,7 @@ def dumps(obj, protocol=None, buffer_callback=None): compatibility with older versions of Python. """ with io.BytesIO() as file: - cp = CloudPickler(file, protocol=protocol, - buffer_callback=buffer_callback) + cp = CloudPickler(file, protocol=protocol, buffer_callback=buffer_callback) cp.dump(obj) return file.getvalue() @@ -78,21 +76,21 @@ def dumps(obj, protocol=None, buffer_callback=None): def _class_getnewargs(obj): type_kwargs = {} - if hasattr(obj, "__slots__"): + if "__slots__" in obj.__dict__: type_kwargs["__slots__"] = obj.__slots__ - __dict__ = obj.__dict__.get("__dict__", None) + __dict__ = obj.__dict__.get('__dict__', None) if isinstance(__dict__, property): - type_kwargs["__dict__"] = __dict__ + type_kwargs['__dict__'] = __dict__ - return (type(obj), obj.__name__, obj.__bases__, type_kwargs, - _ensure_tracking(obj), None) + return (type(obj), obj.__name__, _get_bases(obj), type_kwargs, + _get_or_create_tracker_id(obj), None) def _enum_getnewargs(obj): members = dict((e.name, e.value) for e in obj) return (obj.__bases__, obj.__name__, obj.__qualname__, members, - obj.__module__, _ensure_tracking(obj), None) + obj.__module__, _get_or_create_tracker_id(obj), None) # COLLECTION OF OBJECTS RECONSTRUCTORS @@ -118,6 +116,7 @@ def _function_getstate(func): "__defaults__": func.__defaults__, "__module__": func.__module__, "__doc__": func.__doc__, + "__closure__": func.__closure__, } f_globals_ref = _extract_code_globals(func.__code__) @@ -143,26 +142,46 @@ def _function_getstate(func): def _class_getstate(obj): clsdict = _extract_class_dict(obj) - clsdict.pop("__weakref__", None) - - # For ABCMeta in python3.7+, remove _abc_impl as it is not picklable. - # This is a fix which breaks the cache but this only makes the first - # calls to issubclass slower. - if "_abc_impl" in clsdict: - (registry, _, _, _) = abc._get_dump(obj) - clsdict["_abc_impl"] = [subclass_weakref() - for subclass_weakref in registry] - if hasattr(obj, "__slots__"): + clsdict.pop('__weakref__', None) + + if issubclass(type(obj), abc.ABCMeta): + # If obj is an instance of an ABCMeta subclass, dont pickle the + # cache/negative caches populated during isinstance/issubclass + # checks, but pickle the list of registered subclasses of obj. + clsdict.pop('_abc_cache', None) + clsdict.pop('_abc_negative_cache', None) + clsdict.pop('_abc_negative_cache_version', None) + clsdict.pop('_abc_impl', None) + registry = clsdict.pop('_abc_registry', None) + if registry is None: + # in Python3.7+, the abc caches and registered subclasses of a + # class are bundled into the single _abc_impl attribute + if hasattr(abc, '_get_dump'): + (registry, _, _, _) = abc._get_dump(obj) + clsdict["_abc_impl"] = [subclass_weakref() + for subclass_weakref in registry] + else: + # FIXME(suquark): The upstream cloudpickle cannot work in Ray + # because sometimes both '_abc_registry' and '_get_dump' does + # not exist. Some strange typing objects may cause this issue. + # Here the workaround just set "_abc_impl" to None. + clsdict["_abc_impl"] = None + else: + # In the above if clause, registry is a set of weakrefs -- in + # this case, registry is a WeakSet + clsdict["_abc_impl"] = [type_ for type_ in registry] + + if "__slots__" in clsdict: # pickle string length optimization: member descriptors of obj are # created automatically from obj's __slots__ attribute, no need to # save them in obj's state - if isinstance(obj.__slots__, string_types): + if isinstance(obj.__slots__, str): clsdict.pop(obj.__slots__) else: for k in obj.__slots__: clsdict.pop(k, None) - clsdict.pop("__dict__", None) # unpicklable property object + clsdict.pop('__dict__', None) # unpicklable property object return (clsdict, {}) @@ -304,6 +323,7 @@ def _memoryview_reduce(obj): def _module_reduce(obj): if _is_dynamic(obj): + obj.__dict__.pop('__builtins__', None) return dynamic_subimport, (obj.__name__, vars(obj)) else: return subimport, (obj.__name__,) @@ -321,6 +341,10 @@ def _root_logger_reduce(obj): return logging.getLogger, () +def _property_reduce(obj): + return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__) + + def _weakset_reduce(obj): return weakref.WeakSet, (list(obj),) @@ -355,7 +379,7 @@ def _class_reduce(obj): return type, (NotImplemented,) elif obj in _BUILTIN_TYPE_NAMES: return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],) - elif not _is_global(obj): + elif not _is_importable_by_name(obj): return _dynamic_class_reduce(obj) return NotImplemented @@ -377,6 +401,7 @@ def _function_setstate(obj, state): obj.__dict__.update(state) obj_globals = slotstate.pop("__globals__") + obj_closure = slotstate.pop("__closure__") # _cloudpickle_subimports is a set of submodules that must be loaded for # the pickled function to work correctly at unpickling time. Now that these # submodules are depickled (hence imported), they can be removed from the @@ -387,6 +412,14 @@ def _function_setstate(obj, state): obj.__globals__.update(obj_globals) obj.__globals__["__builtins__"] = __builtins__ + if obj_closure is not None: + for i, cell in enumerate(obj_closure): + try: + value = cell.cell_contents + except ValueError: # cell is empty + continue + cell_set(obj.__closure__[i], value) + for k, v in slotstate.items(): setattr(obj, k, v) @@ -406,11 +439,6 @@ def _class_setstate(obj, state): return obj -def _property_reduce(obj): - # Python < 3.8 only - return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__) - - def _numpy_frombuffer(buffer, dtype, shape, order): # Get the _frombuffer() function for reconstruction from numpy.core.numeric import _frombuffer @@ -477,25 +505,24 @@ class CloudPickler(Pickler): dispatch[logging.Logger] = _logger_reduce dispatch[logging.RootLogger] = _root_logger_reduce dispatch[memoryview] = _memoryview_reduce + dispatch[property] = _property_reduce dispatch[staticmethod] = _classmethod_reduce + if sys.version_info[:2] >= (3, 8): + dispatch[types.CellType] = _cell_reduce + else: + dispatch[type(_make_empty_cell())] = _cell_reduce dispatch[types.CodeType] = _code_reduce dispatch[types.GetSetDescriptorType] = _getset_descriptor_reduce dispatch[types.ModuleType] = _module_reduce dispatch[types.MethodType] = _method_reduce dispatch[types.MappingProxyType] = _mappingproxy_reduce dispatch[weakref.WeakSet] = _weakset_reduce - if sys.version_info[:2] >= (3, 8): - dispatch[types.CellType] = _cell_reduce - else: - dispatch[type(_make_empty_cell())] = _cell_reduce - if sys.version_info[:2] < (3, 8): - dispatch[property] = _property_reduce + dispatch[typing.TypeVar] = _typevar_reduce def __init__(self, file, protocol=None, buffer_callback=None): if protocol is None: protocol = DEFAULT_PROTOCOL - Pickler.__init__(self, file, protocol=protocol, - buffer_callback=buffer_callback) + Pickler.__init__(self, file, protocol=protocol, buffer_callback=buffer_callback) # map functions __globals__ attribute ids, to ensure that functions # sharing the same global namespace at pickling time also share their # global namespace at unpickling time. @@ -582,7 +609,7 @@ def _function_reduce(self, obj): As opposed to cloudpickle.py, There no special handling for builtin pypy functions because cloudpickle_fast is CPython-specific. """ - if _is_global(obj): + if _is_importable_by_name(obj): return NotImplemented else: return self._dynamic_function_reduce(obj) @@ -610,7 +637,19 @@ def _function_getnewargs(self, func): if k in func.__globals__: base_globals[k] = func.__globals__[k] - return code, base_globals, None, None, func.__closure__ + # Do not bind the free variables before the function is created to + # avoid infinite recursion. + if func.__closure__ is None: + closure = None + else: + if sys.version_info[:2] >= (3, 8): + closure = tuple( + types.CellType() for _ in range(len(code.co_freevars))) + else: + closure = tuple( + _make_empty_cell() for _ in range(len(code.co_freevars))) + + return code, base_globals, None, None, closure def dump(self, obj): try: