diff --git a/python/ray/cloudpickle/__init__.py b/python/ray/cloudpickle/__init__.py index 6ab66b325da3..57a8a0b87b3a 100644 --- a/python/ray/cloudpickle/__init__.py +++ b/python/ray/cloudpickle/__init__.py @@ -2,4 +2,4 @@ from ray.cloudpickle.cloudpickle import * -__version__ = '0.5.2' +__version__ = '0.8.0.dev0' diff --git a/python/ray/cloudpickle/cloudpickle.py b/python/ray/cloudpickle/cloudpickle.py index e5aab0591f57..54d745cbcc7f 100644 --- a/python/ray/cloudpickle/cloudpickle.py +++ b/python/ray/cloudpickle/cloudpickle.py @@ -44,7 +44,7 @@ import dis from functools import partial -import imp +import importlib import io import itertools import logging @@ -57,24 +57,25 @@ import types import weakref - # cloudpickle is meant for inter process communication: we expect all # communicating processes to run the same Python version hence we favor # communication speed over compatibility: DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL -if sys.version < '3': +if sys.version_info[0] < 3: # pragma: no branch from pickle import Pickler try: from cStringIO import StringIO except ImportError: from StringIO import StringIO + string_types = (basestring,) # noqa PY3 = False else: types.ClassType = type from pickle import _Pickler as Pickler from io import BytesIO as StringIO + string_types = (str,) PY3 = True @@ -96,7 +97,7 @@ def _stub(value): return _stub - _cell_set_template_code = f() + _cell_set_template_code = f().__code__ This function is _only_ a LOAD_FAST(arg); STORE_DEREF, but that is invalid syntax on Python 2. If we use this function we also don't need @@ -111,7 +112,7 @@ def inner(value): # NOTE: we are marking the cell variable as a free variable intentionally # so that we simulate an inner function instead of the outer function. This # is what gives us the ``nonlocal`` behavior in a Python 2 compatible way. - if not PY3: + if not PY3: # pragma: no branch return types.CodeType( co.co_argcount, co.co_nlocals, @@ -163,7 +164,7 @@ def cell_set(cell, value): )(value) -#relevant opcodes +# relevant opcodes STORE_GLOBAL = opcode.opmap['STORE_GLOBAL'] DELETE_GLOBAL = opcode.opmap['DELETE_GLOBAL'] LOAD_GLOBAL = opcode.opmap['LOAD_GLOBAL'] @@ -173,7 +174,7 @@ def cell_set(cell, value): def islambda(func): - return getattr(func,'__name__') == '' + return getattr(func, '__name__') == '' _BUILTIN_TYPE_NAMES = {} @@ -212,14 +213,14 @@ def _factory(): } -if sys.version_info < (3, 4): +if sys.version_info < (3, 4): # pragma: no branch def _walk_global_ops(code): """ Yield (opcode, argument number) tuples for all global-referencing instructions in *code*. """ code = getattr(code, 'co_code', b'') - if not PY3: + if not PY3: # pragma: no branch code = map(ord, code) n = len(code) @@ -257,8 +258,6 @@ def __init__(self, file, protocol=None): if protocol is None: protocol = DEFAULT_PROTOCOL Pickler.__init__(self, file, protocol=protocol) - # set of modules to unpickle - self.modules = set() # map ids to dictionary. used to ensure that functions can share global env self.globals_ref = {} @@ -270,52 +269,37 @@ def dump(self, obj): if 'recursion' in e.args[0]: msg = """Could not pickle object as excessively deep recursion required.""" raise pickle.PicklingError(msg) + else: + raise def save_memoryview(self, obj): self.save(obj.tobytes()) + dispatch[memoryview] = save_memoryview - if not PY3: + if not PY3: # pragma: no branch def save_buffer(self, obj): self.save(str(obj)) - dispatch[buffer] = save_buffer - - def save_unsupported(self, obj): - raise pickle.PicklingError("Cannot pickle objects of type %s" % type(obj)) - dispatch[types.GeneratorType] = save_unsupported - # itertools objects do not pickle! - for v in itertools.__dict__.values(): - if type(v) is type: - dispatch[v] = save_unsupported + dispatch[buffer] = save_buffer # noqa: F821 'buffer' was removed in Python 3 def save_module(self, obj): """ Save a module as an import """ - mod_name = obj.__name__ - # If module is successfully found then it is not a dynamically created module - if hasattr(obj, '__file__'): - is_dynamic = False - else: - try: - _find_module(mod_name) - is_dynamic = False - except ImportError: - is_dynamic = True - - self.modules.add(obj) - if is_dynamic: - self.save_reduce(dynamic_subimport, (obj.__name__, vars(obj)), obj=obj) + if _is_dynamic(obj): + self.save_reduce(dynamic_subimport, (obj.__name__, vars(obj)), + obj=obj) else: self.save_reduce(subimport, (obj.__name__,), obj=obj) + dispatch[types.ModuleType] = save_module def save_codeobject(self, obj): """ Save a code object """ - if PY3: + if PY3: # pragma: no branch args = ( obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, obj.co_varnames, @@ -329,6 +313,7 @@ def save_codeobject(self, obj): obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, obj.co_cellvars ) self.save_reduce(types.CodeType, args, obj=obj) + dispatch[types.CodeType] = save_codeobject def save_function(self, obj, name=None): @@ -337,7 +322,13 @@ def save_function(self, obj, name=None): Determines what kind of function obj is (e.g. lambda, defined at interactive prompt, etc) and handles the pickling appropriately. """ - if obj in _BUILTIN_TYPE_CONSTRUCTORS: + try: + should_special_case = obj in _BUILTIN_TYPE_CONSTRUCTORS + except TypeError: + # Methods of builtin types aren't hashable in python 2. + should_special_case = False + + if should_special_case: # We keep a special-cased cache of built-in type constructors at # global scope, because these functions are structured very # differently in different python versions and implementations (for @@ -375,7 +366,6 @@ def save_function(self, obj, name=None): lookedup_by_name = None if themodule: - self.modules.add(themodule) if lookedup_by_name is obj: return self.save_global(obj, name) @@ -387,7 +377,7 @@ def save_function(self, obj, name=None): # So we pickle them here using save_reduce; have to do it differently # for different python versions. if not hasattr(obj, '__code__'): - if PY3: + if PY3: # pragma: no branch rv = obj.__reduce_ex__(self.proto) else: if hasattr(obj, '__self__'): @@ -420,26 +410,53 @@ def save_function(self, obj, name=None): else: write(pickle.GLOBAL + modname + '\n' + name + '\n') self.memoize(obj) + dispatch[types.FunctionType] = save_function def _save_subimports(self, code, top_level_dependencies): """ - Ensure de-pickler imports any package child-modules that - are needed by the function + Save submodules used by a function but not listed in its globals. + + In the example below: + + ``` + import concurrent.futures + import cloudpickle + + + def func(): + x = concurrent.futures.ThreadPoolExecutor + + + if __name__ == '__main__': + cloudpickle.dumps(func) + ``` + + the globals extracted by cloudpickle in the function's state include + the concurrent module, but not its submodule (here, + concurrent.futures), which is the module used by func. + + To ensure that calling the depickled function does not raise an + AttributeError, this function looks for any currently loaded submodule + that the function uses and whose parent is present in the function + globals, and saves it before saving the function. """ + # check if any known dependency is an imported package for x in top_level_dependencies: if isinstance(x, types.ModuleType) and hasattr(x, '__package__') and x.__package__: # check if the package has any currently loaded sub-imports prefix = x.__name__ + '.' - for name, module in sys.modules.items(): + # A concurrent thread could mutate sys.modules, + # make sure we iterate over a copy to avoid exceptions + for name in list(sys.modules): # Older versions of pytest will add a "None" module to sys.modules. if name is not None and name.startswith(prefix): # check whether the function can address the sub-module tokens = set(name[len(prefix):].split('.')) if not tokens - set(code.co_names): # ensure unpickler executes this import - self.save(module) + self.save(sys.modules[name]) # then discards the reference to it self.write(pickle.POP) @@ -454,11 +471,31 @@ def save_dynamic_class(self, obj): clsdict = dict(obj.__dict__) # copy dict proxy to a dict clsdict.pop('__weakref__', None) + # For ABCMeta in python3.7+, remove _abc_impl as it is not picklable. + # This is a fix which breaks the cache but this only makes the first + # calls to issubclass slower. + if "_abc_impl" in clsdict: + import abc + (registry, _, _, _) = abc._get_dump(obj) + clsdict["_abc_impl"] = [subclass_weakref() + for subclass_weakref in registry] + # On PyPy, __doc__ is a readonly attribute, so we need to include it in # the initial skeleton class. This is safe because we know that the # doc can't participate in a cycle with the original class. type_kwargs = {'__doc__': clsdict.pop('__doc__', None)} + if hasattr(obj, "__slots__"): + type_kwargs['__slots__'] = obj.__slots__ + # pickle string length optimization: member descriptors of obj are + # created automatically from obj's __slots__ attribute, no need to + # save them in obj's state + if isinstance(obj.__slots__, string_types): + clsdict.pop(obj.__slots__) + else: + for k in obj.__slots__: + clsdict.pop(k, None) + # If type overrides __dict__ as a property, include it in the type kwargs. # In Python 2, we can't set this attribute after construction. __dict__ = clsdict.pop('__dict__', None) @@ -545,9 +582,13 @@ def save_function_tuple(self, func): 'globals': f_globals, 'defaults': defaults, 'dict': dct, - 'module': func.__module__, 'closure_values': closure_values, + 'module': func.__module__, + 'name': func.__name__, + 'doc': func.__doc__, } + if hasattr(func, '__annotations__') and sys.version_info >= (3, 7): + state['annotations'] = func.__annotations__ if hasattr(func, '__qualname__'): state['qualname'] = func.__qualname__ save(state) @@ -572,8 +613,7 @@ def extract_code_globals(cls, co): # PyPy "builtin-code" object out_names = set() else: - out_names = set(names[oparg] - for op, oparg in _walk_global_ops(co)) + out_names = {names[oparg] for _, oparg in _walk_global_ops(co)} # see if nested function have any global refs if co.co_consts: @@ -614,8 +654,17 @@ def extract_func_data(self, func): # save the dict dct = func.__dict__ - base_globals = self.globals_ref.get(id(func.__globals__), {}) - self.globals_ref[id(func.__globals__)] = base_globals + # base_globals represents the future global namespace of func at + # unpickling time. Looking it up and storing it in globals_ref allow + # functions sharing the same globals at pickling time to also + # share them once unpickled, at one condition: since globals_ref is + # an attribute of a Cloudpickler instance, and that a new CloudPickler is + # created each time pickle.dump or pickle.dumps is called, functions + # also need to be saved within the same invokation of + # cloudpickle.dump/cloudpickle.dumps (for example: cloudpickle.dumps([f1, f2])). There + # is no such limitation when using Cloudpickler.dump, as long as the + # multiple invokations are bound to the same Cloudpickler. + base_globals = self.globals_ref.setdefault(id(func.__globals__), {}) return (code, f_globals, defaults, closure, dct, base_globals) @@ -623,6 +672,7 @@ def save_builtin_function(self, obj): if obj.__module__ == "__builtin__": return self.save_global(obj) return self.save_function(obj) + dispatch[types.BuiltinFunctionType] = save_builtin_function def save_global(self, obj, name=None, pack=struct.pack): @@ -632,6 +682,13 @@ def save_global(self, obj, name=None, pack=struct.pack): The name of this method is somewhat misleading: all types get dispatched here. """ + if obj is type(None): + return self.save_reduce(type, (None,), obj=obj) + elif obj is type(Ellipsis): + return self.save_reduce(type, (Ellipsis,), obj=obj) + elif obj is type(NotImplemented): + return self.save_reduce(type, (NotImplemented,), obj=obj) + if obj.__module__ == "__main__": return self.save_dynamic_class(obj) @@ -657,11 +714,12 @@ def save_instancemethod(self, obj): if obj.__self__ is None: self.save_reduce(getattr, (obj.im_class, obj.__name__)) else: - if PY3: + if PY3: # pragma: no branch self.save_reduce(types.MethodType, (obj.__func__, obj.__self__), obj=obj) else: self.save_reduce(types.MethodType, (obj.__func__, obj.__self__, obj.__self__.__class__), - obj=obj) + obj=obj) + dispatch[types.MethodType] = save_instancemethod def save_inst(self, obj): @@ -709,17 +767,19 @@ def save_inst(self, obj): save(stuff) write(pickle.BUILD) - if not PY3: + if not PY3: # pragma: no branch dispatch[types.InstanceType] = save_inst def save_property(self, obj): # properties not correctly saved in python self.save_reduce(property, (obj.fget, obj.fset, obj.fdel, obj.__doc__), obj=obj) + dispatch[property] = save_property def save_classmethod(self, obj): orig_func = obj.__func__ self.save_reduce(type(obj), (orig_func,), obj=obj) + dispatch[classmethod] = save_classmethod dispatch[staticmethod] = save_classmethod @@ -730,7 +790,7 @@ def __getitem__(self, item): return item items = obj(Dummy()) if not isinstance(items, tuple): - items = (items, ) + items = (items,) return self.save_reduce(operator.itemgetter, items) if type(operator.itemgetter) is type: @@ -761,16 +821,16 @@ def __getattribute__(self, item): def save_file(self, obj): """Save a file""" try: - import StringIO as pystringIO #we can't use cStringIO as it lacks the name attribute + import StringIO as pystringIO # we can't use cStringIO as it lacks the name attribute except ImportError: import io as pystringIO - if not hasattr(obj, 'name') or not hasattr(obj, 'mode'): + if not hasattr(obj, 'name') or not hasattr(obj, 'mode'): raise pickle.PicklingError("Cannot pickle files that do not map to an actual file") if obj is sys.stdout: - return self.save_reduce(getattr, (sys,'stdout'), obj=obj) + return self.save_reduce(getattr, (sys, 'stdout'), obj=obj) if obj is sys.stderr: - return self.save_reduce(getattr, (sys,'stderr'), obj=obj) + return self.save_reduce(getattr, (sys, 'stderr'), obj=obj) if obj is sys.stdin: raise pickle.PicklingError("Cannot pickle standard input") if obj.closed: @@ -805,10 +865,10 @@ def save_ellipsis(self, obj): def save_not_implemented(self, obj): self.save_reduce(_gen_not_implemented, ()) - if PY3: - dispatch[io.TextIOWrapper] = save_file - else: + try: # Python 2 dispatch[file] = save_file + except NameError: # Python 3 # pragma: no branch + dispatch[io.TextIOWrapper] = save_file dispatch[type(Ellipsis)] = save_ellipsis dispatch[type(NotImplemented)] = save_not_implemented @@ -823,6 +883,17 @@ def save_logger(self, obj): dispatch[logging.Logger] = save_logger + def save_root_logger(self, obj): + self.save_reduce(logging.getLogger, (), obj=obj) + + dispatch[logging.RootLogger] = save_root_logger + + if hasattr(types, "MappingProxyType"): # pragma: no branch + def save_mappingproxy(self, obj): + self.save_reduce(types.MappingProxyType, (dict(obj),), obj=obj) + + dispatch[types.MappingProxyType] = save_mappingproxy + """Special functions for Add-on libraries""" def inject_addons(self): """Plug in system. Register additional pickling functions if modules already loaded""" @@ -896,9 +967,8 @@ def subimport(name): def dynamic_subimport(name, vars): - mod = imp.new_module(name) + mod = types.ModuleType(name) mod.__dict__.update(vars) - sys.modules[name] = mod return mod @@ -928,7 +998,7 @@ def _modules_to_main(modList): if type(modname) is str: try: mod = __import__(modname) - except Exception as e: + except Exception: sys.stderr.write('warning: could not import %s\n. ' 'Your function may unexpectedly error due to this import failing;' 'A version mismatch is likely. Specific error was:\n' % modname) @@ -937,7 +1007,7 @@ def _modules_to_main(modList): setattr(main, mod.__name__, mod) -#object generators: +# object generators: def _genpartial(func, args, kwds): if not args: args = () @@ -945,9 +1015,11 @@ def _genpartial(func, args, kwds): kwds = {} return partial(func, *args, **kwds) + def _gen_ellipsis(): return Ellipsis + def _gen_not_implemented(): return NotImplemented @@ -1008,9 +1080,25 @@ def _fill_function(*args): else: raise ValueError('Unexpected _fill_value arguments: %r' % (args,)) + # - At pickling time, any dynamic global variable used by func is + # serialized by value (in state['globals']). + # - At unpickling time, func's __globals__ attribute is initialized by + # first retrieving an empty isolated namespace that will be shared + # with other functions pickled from the same original module + # by the same CloudPickler instance and then updated with the + # content of state['globals'] to populate the shared isolated + # namespace with all the global variables that are specifically + # referenced for this function. func.__globals__.update(state['globals']) + func.__defaults__ = state['defaults'] func.__dict__ = state['dict'] + if 'annotations' in state: + func.__annotations__ = state['annotations'] + if 'doc' in state: + func.__doc__ = state['doc'] + if 'name' in state: + func.__name__ = state['name'] if 'module' in state: func.__module__ = state['module'] if 'qualname' in state: @@ -1039,8 +1127,12 @@ def _make_skel_func(code, cell_count, base_globals=None): code and the correct number of cells in func_closure. All other func attributes (e.g. func_globals) are empty. """ - if base_globals is None: + # This is backward-compatibility code: for cloudpickle versions between + # 0.5.4 and 0.7, base_globals could be a string or None. base_globals + # should now always be a dictionary. + if base_globals is None or isinstance(base_globals, str): base_globals = {} + base_globals['__builtins__'] = __builtins__ closure = ( @@ -1056,28 +1148,50 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict): See CloudPickler.save_dynamic_class for more info. """ + registry = None for attrname, attr in class_dict.items(): - setattr(skeleton_class, attrname, attr) + if attrname == "_abc_impl": + registry = attr + else: + setattr(skeleton_class, attrname, attr) + if registry is not None: + for subclass in registry: + skeleton_class.register(subclass) + return skeleton_class -def _find_module(mod_name): +def _is_dynamic(module): """ - Iterate over each part instead of calling imp.find_module directly. - This function is able to find submodules (e.g. sickit.tree) + Return True if the module is special module that cannot be imported by its + name. """ - path = None - for part in mod_name.split('.'): - if path is not None: - path = [path] - file, path, description = imp.find_module(part, path) - if file is not None: - file.close() - return path, description + # Quick check: module that have __file__ attribute are not dynamic modules. + if hasattr(module, '__file__'): + return False + + if hasattr(module, '__spec__'): + return module.__spec__ is None + else: + # Backward compat for Python 2 + import imp + try: + path = None + for part in module.__name__.split('.'): + if path is not None: + path = [path] + f, path, description = imp.find_module(part, path) + if f is not None: + f.close() + except ImportError: + return True + return False + """Constructors for 3rd party libraries Note: These can never be renamed due to client compatibility issues""" + def _getobject(modname, attribute): mod = __import__(modname, fromlist=[attribute]) return mod.__dict__[attribute] @@ -1085,7 +1199,7 @@ def _getobject(modname, attribute): """ Use copy_reg to extend global pickle definitions """ -if sys.version_info < (3, 4): +if sys.version_info < (3, 4): # pragma: no branch method_descriptor = type(str.upper) def _reduce_method_descriptor(obj):