From 260e30eef3cc55fde3ed54e25e3dd8e3c5507bc7 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Fri, 22 Jul 2022 14:12:36 -0300 Subject: [PATCH 1/3] move session-related code to new session module --- dill/__init__.py | 16 +- dill/_dill.py | 563 +----------------------------------- dill/session.py | 578 +++++++++++++++++++++++++++++++++++++ dill/tests/test_session.py | 2 +- docs/source/dill.rst | 14 +- 5 files changed, 607 insertions(+), 566 deletions(-) create mode 100644 dill/session.py diff --git a/dill/__init__.py b/dill/__init__.py index 028112dc..22709a03 100644 --- a/dill/__init__.py +++ b/dill/__init__.py @@ -24,13 +24,17 @@ from ._dill import ( - dump, dumps, load, loads, dump_module, load_module, load_module_asdict, - dump_session, load_session, Pickler, Unpickler, register, copy, pickle, - pickles, check, HIGHEST_PROTOCOL, DEFAULT_PROTOCOL, PicklingError, - UnpicklingError, HANDLE_FMODE, CONTENTS_FMODE, FILE_FMODE, PickleError, - PickleWarning, PicklingWarning, UnpicklingWarning, + Pickler, Unpickler, + check, copy, dump, dumps, load, loads, pickle, pickles, register, + DEFAULT_PROTOCOL, HIGHEST_PROTOCOL, CONTENTS_FMODE, FILE_FMODE, HANDLE_FMODE, + PickleError, PickleWarning, PicklingError, PicklingWarning, UnpicklingError, + UnpicklingWarning, ) -from . import source, temp, detect +from .session import ( + dump_module, load_module, load_module_asdict, + dump_session, load_session # backward compatibility +) +from . import detect, session, source, temp # get global settings from .settings import settings diff --git a/dill/_dill.py b/dill/_dill.py index 789913f3..98f6eb48 100644 --- a/dill/_dill.py +++ b/dill/_dill.py @@ -16,12 +16,11 @@ Test against CH16+ Std. Lib. ... TBD. """ __all__ = [ - 'dump', 'dumps', 'load', 'loads', 'dump_module', 'load_module', - 'load_module_asdict', 'dump_session', 'load_session', 'Pickler', 'Unpickler', - 'register', 'copy', 'pickle', 'pickles', 'check', 'HIGHEST_PROTOCOL', - 'DEFAULT_PROTOCOL', 'PicklingError', 'UnpicklingError', 'HANDLE_FMODE', - 'CONTENTS_FMODE', 'FILE_FMODE', 'PickleError', 'PickleWarning', - 'PicklingWarning', 'UnpicklingWarning' + 'Pickler','Unpickler', + 'check','copy','dump','dumps','load','loads','pickle','pickles','register', + 'DEFAULT_PROTOCOL','HIGHEST_PROTOCOL','CONTENTS_FMODE','FILE_FMODE','HANDLE_FMODE', + 'PickleError','PickleWarning','PicklingError','PicklingWarning','UnpicklingError', + 'UnpicklingWarning', ] __module__ = 'dill' @@ -30,8 +29,6 @@ from .logger import adapter as logger from .logger import trace as _trace -from typing import Optional, Union - import os import sys diff = None @@ -323,555 +320,6 @@ def loads(str, ignore=None, **kwds): ### End: Shorthands ### -### Pickle the Interpreter Session -import pathlib -import re -import tempfile -from types import SimpleNamespace - -TEMPDIR = pathlib.PurePath(tempfile.gettempdir()) - -def _module_map(): - """get map of imported modules""" - from collections import defaultdict - modmap = SimpleNamespace( - by_name=defaultdict(list), - by_id=defaultdict(list), - top_level={}, - ) - for modname, module in sys.modules.items(): - if modname in ('__main__', '__mp_main__') or not isinstance(module, ModuleType): - continue - if '.' not in modname: - modmap.top_level[id(module)] = modname - for objname, modobj in module.__dict__.items(): - modmap.by_name[objname].append((modobj, modname)) - modmap.by_id[id(modobj)].append((modobj, objname, modname)) - return modmap - -SESSION_IMPORTED_AS_TYPES = (ModuleType, TypeType, FunctionType, MethodType, BuiltinMethodType) -SESSION_IMPORTED_AS_MODULES = ('ctypes', 'typing', 'subprocess', 'threading', - r'concurrent\.futures(\.\w+)?', r'multiprocessing(\.\w+)?') -SESSION_IMPORTED_AS_MODULES = tuple(re.compile(x) for x in SESSION_IMPORTED_AS_MODULES) - -def _lookup_module(modmap, name, obj, main_module): - """lookup name or id of obj if module is imported""" - for modobj, modname in modmap.by_name[name]: - if modobj is obj and sys.modules[modname] is not main_module: - return modname, name - __module__ = getattr(obj, '__module__', None) - if isinstance(obj, SESSION_IMPORTED_AS_TYPES) or (__module__ is not None - and any(regex.fullmatch(__module__) for regex in SESSION_IMPORTED_AS_MODULES)): - for modobj, objname, modname in modmap.by_id[id(obj)]: - if sys.modules[modname] is not main_module: - return modname, objname - return None, None - -def _stash_modules(main_module): - modmap = _module_map() - newmod = ModuleType(main_module.__name__) - - imported = [] - imported_as = [] - imported_top_level = [] # keep separated for backward compatibility - original = {} - for name, obj in main_module.__dict__.items(): - if obj is main_module: - original[name] = newmod # self-reference - elif obj is main_module.__dict__: - original[name] = newmod.__dict__ - # Avoid incorrectly matching a singleton value in another package (ex.: __doc__). - elif any(obj is singleton for singleton in (None, False, True)) \ - or isinstance(obj, ModuleType) and _is_builtin_module(obj): # always saved by ref - original[name] = obj - else: - source_module, objname = _lookup_module(modmap, name, obj, main_module) - if source_module is not None: - if objname == name: - imported.append((source_module, name)) - else: - imported_as.append((source_module, objname, name)) - else: - try: - imported_top_level.append((modmap.top_level[id(obj)], name)) - except KeyError: - original[name] = obj - - if len(original) < len(main_module.__dict__): - newmod.__dict__.update(original) - newmod.__dill_imported = imported - newmod.__dill_imported_as = imported_as - newmod.__dill_imported_top_level = imported_top_level - if getattr(newmod, '__loader__', None) is None and _is_imported_module(main_module): - # Trick _is_imported_module() to force saving as an imported module. - newmod.__loader__ = True # will be discarded by save_module() - return newmod - else: - return main_module - -def _restore_modules(unpickler, main_module): - try: - for modname, name in main_module.__dict__.pop('__dill_imported'): - main_module.__dict__[name] = unpickler.find_class(modname, name) - for modname, objname, name in main_module.__dict__.pop('__dill_imported_as'): - main_module.__dict__[name] = unpickler.find_class(modname, objname) - for modname, name in main_module.__dict__.pop('__dill_imported_top_level'): - main_module.__dict__[name] = __import__(modname) - except KeyError: - pass - -#NOTE: 06/03/15 renamed main_module to main -def dump_module( - filename = str(TEMPDIR/'session.pkl'), - module: Union[ModuleType, str] = None, - refimported: bool = False, - **kwds -) -> None: - """Pickle the current state of :py:mod:`__main__` or another module to a file. - - Save the contents of :py:mod:`__main__` (e.g. from an interactive - interpreter session), an imported module, or a module-type object (e.g. - built with :py:class:`~types.ModuleType`), to a file. The pickled - module can then be restored with the function :py:func:`load_module`. - - Parameters: - filename: a path-like object or a writable stream. - module: a module object or the name of an importable module. If `None` - (the default), :py:mod:`__main__` is saved. - refimported: if `True`, all objects identified as having been imported - into the module's namespace are saved by reference. *Note:* this is - similar but independent from ``dill.settings[`byref`]``, as - ``refimported`` refers to virtually all imported objects, while - ``byref`` only affects select objects. - **kwds: extra keyword arguments passed to :py:class:`Pickler()`. - - Raises: - :py:exc:`PicklingError`: if pickling fails. - - Examples: - - - Save current interpreter session state: - - >>> import dill - >>> squared = lambda x: x*x - >>> dill.dump_module() # save state of __main__ to /tmp/session.pkl - - - Save the state of an imported/importable module: - - >>> import dill - >>> import pox - >>> pox.plus_one = lambda x: x+1 - >>> dill.dump_module('pox_session.pkl', module=pox) - - - Save the state of a non-importable, module-type object: - - >>> import dill - >>> from types import ModuleType - >>> foo = ModuleType('foo') - >>> foo.values = [1,2,3] - >>> import math - >>> foo.sin = math.sin - >>> dill.dump_module('foo_session.pkl', module=foo, refimported=True) - - - Restore the state of the saved modules: - - >>> import dill - >>> dill.load_module() - >>> squared(2) - 4 - >>> pox = dill.load_module('pox_session.pkl') - >>> pox.plus_one(1) - 2 - >>> foo = dill.load_module('foo_session.pkl') - >>> [foo.sin(x) for x in foo.values] - [0.8414709848078965, 0.9092974268256817, 0.1411200080598672] - - *Changed in version 0.3.6:* Function ``dump_session()`` was renamed to - ``dump_module()``. Parameters ``main`` and ``byref`` were renamed to - ``module`` and ``refimported``, respectively. - - Note: - Currently, ``dill.settings['byref']`` and ``dill.settings['recurse']`` - don't apply to this function.` - """ - for old_par, par in [('main', 'module'), ('byref', 'refimported')]: - if old_par in kwds: - message = "The argument %r has been renamed %r" % (old_par, par) - if old_par == 'byref': - message += " to distinguish it from dill.settings['byref']" - warnings.warn(message + ".", PendingDeprecationWarning) - if locals()[par]: # the defaults are None and False - raise TypeError("both %r and %r arguments were used" % (par, old_par)) - refimported = kwds.pop('byref', refimported) - module = kwds.pop('main', module) - - from .settings import settings - protocol = settings['protocol'] - main = module - if main is None: - main = _main_module - elif isinstance(main, str): - main = _import_module(main) - if not isinstance(main, ModuleType): - raise TypeError("%r is not a module" % main) - if hasattr(filename, 'write'): - file = filename - else: - file = open(filename, 'wb') - try: - pickler = Pickler(file, protocol, **kwds) - pickler._original_main = main - if refimported: - main = _stash_modules(main) - pickler._main = main #FIXME: dill.settings are disabled - pickler._byref = False # disable pickling by name reference - pickler._recurse = False # disable pickling recursion for globals - pickler._session = True # is best indicator of when pickling a session - pickler._first_pass = True - pickler._main_modified = main is not pickler._original_main - pickler.dump(main) - finally: - if file is not filename: # if newly opened file - file.close() - return - -# Backward compatibility. -def dump_session(filename=str(TEMPDIR/'session.pkl'), main=None, byref=False, **kwds): - warnings.warn("dump_session() has been renamed dump_module()", PendingDeprecationWarning) - dump_module(filename, module=main, refimported=byref, **kwds) -dump_session.__doc__ = dump_module.__doc__ - -class _PeekableReader: - """lightweight stream wrapper that implements peek()""" - def __init__(self, stream): - self.stream = stream - def read(self, n): - return self.stream.read(n) - def readline(self): - return self.stream.readline() - def tell(self): - return self.stream.tell() - def close(self): - return self.stream.close() - def peek(self, n): - stream = self.stream - try: - if hasattr(stream, 'flush'): stream.flush() - position = stream.tell() - stream.seek(position) # assert seek() works before reading - chunk = stream.read(n) - stream.seek(position) - return chunk - except (AttributeError, OSError): - raise NotImplementedError("stream is not peekable: %r", stream) from None - -def _make_peekable(stream): - """return stream as an object with a peek() method""" - import io - if hasattr(stream, 'peek'): - return stream - if not (hasattr(stream, 'tell') and hasattr(stream, 'seek')): - try: - return io.BufferedReader(stream) - except Exception: - pass - return _PeekableReader(stream) - -def _identify_module(file, main=None): - """identify the name of the module stored in the given file-type object""" - from pickletools import genops - UNICODE = {'UNICODE', 'BINUNICODE', 'SHORT_BINUNICODE'} - found_import = False - try: - for opcode, arg, pos in genops(file.peek(256)): - if not found_import: - if opcode.name in ('GLOBAL', 'SHORT_BINUNICODE') and \ - arg.endswith('_import_module'): - found_import = True - else: - if opcode.name in UNICODE: - return arg - else: - raise UnpicklingError("reached STOP without finding main module") - except (NotImplementedError, ValueError) as error: - # ValueError occours when the end of the chunk is reached (without a STOP). - if isinstance(error, NotImplementedError) and main is not None: - # file is not peekable, but we have main. - return None - raise UnpicklingError("unable to identify main module") from error - -def load_module( - filename = str(TEMPDIR/'session.pkl'), - module: Union[ModuleType, str] = None, - **kwds -) -> Optional[ModuleType]: - """Update the selected module (default is :py:mod:`__main__`) with - the state saved at ``filename``. - - Restore a module to the state saved with :py:func:`dump_module`. The - saved module can be :py:mod:`__main__` (e.g. an interpreter session), - an imported module, or a module-type object (e.g. created with - :py:class:`~types.ModuleType`). - - When restoring the state of a non-importable module-type object, the - current instance of this module may be passed as the argument ``main``. - Otherwise, a new instance is created with :py:class:`~types.ModuleType` - and returned. - - Parameters: - filename: a path-like object or a readable stream. - module: a module object or the name of an importable module; - the module name and kind (i.e. imported or non-imported) must - match the name and kind of the module stored at ``filename``. - **kwds: extra keyword arguments passed to :py:class:`Unpickler()`. - - Raises: - :py:exc:`UnpicklingError`: if unpickling fails. - :py:exc:`ValueError`: if the argument ``main`` and module saved - at ``filename`` are incompatible. - - Returns: - A module object, if the saved module is not :py:mod:`__main__` or - a module instance wasn't provided with the argument ``main``. - - Examples: - - - Save the state of some modules: - - >>> import dill - >>> squared = lambda x: x*x - >>> dill.dump_module() # save state of __main__ to /tmp/session.pkl - >>> - >>> import pox # an imported module - >>> pox.plus_one = lambda x: x+1 - >>> dill.dump_module('pox_session.pkl', module=pox) - >>> - >>> from types import ModuleType - >>> foo = ModuleType('foo') # a module-type object - >>> foo.values = [1,2,3] - >>> import math - >>> foo.sin = math.sin - >>> dill.dump_module('foo_session.pkl', module=foo, refimported=True) - - - Restore the state of the interpreter: - - >>> import dill - >>> dill.load_module() # updates __main__ from /tmp/session.pkl - >>> squared(2) - 4 - - - Load the saved state of an importable module: - - >>> import dill - >>> pox = dill.load_module('pox_session.pkl') - >>> pox.plus_one(1) - 2 - >>> import sys - >>> pox in sys.modules.values() - True - - - Load the saved state of a non-importable module-type object: - - >>> import dill - >>> foo = dill.load_module('foo_session.pkl') - >>> [foo.sin(x) for x in foo.values] - [0.8414709848078965, 0.9092974268256817, 0.1411200080598672] - >>> import math - >>> foo.sin is math.sin # foo.sin was saved by reference - True - >>> import sys - >>> foo in sys.modules.values() - False - - - Update the state of a non-importable module-type object: - - >>> import dill - >>> from types import ModuleType - >>> foo = ModuleType('foo') - >>> foo.values = ['a','b'] - >>> foo.sin = lambda x: x*x - >>> dill.load_module('foo_session.pkl', module=foo) - >>> [foo.sin(x) for x in foo.values] - [0.8414709848078965, 0.9092974268256817, 0.1411200080598672] - - *Changed in version 0.3.6:* Function ``load_session()`` was renamed to - ``load_module()``. Parameter ``main`` was renamed to ``module``. - - See also: - :py:func:`load_module_asdict` to load the contents of module saved - with :py:func:`dump_module` into a dictionary. - """ - if 'main' in kwds: - warnings.warn( - "The argument 'main' has been renamed 'module'.", - PendingDeprecationWarning - ) - if module is not None: - raise TypeError("both 'module' and 'main' arguments were used") - module = kwds.pop('main') - main = module - if hasattr(filename, 'read'): - file = filename - else: - file = open(filename, 'rb') - try: - file = _make_peekable(file) - #FIXME: dill.settings are disabled - unpickler = Unpickler(file, **kwds) - unpickler._session = True - - # Resolve unpickler._main - pickle_main = _identify_module(file, main) - if main is None and pickle_main is not None: - main = pickle_main - if isinstance(main, str): - if main.startswith('__runtime__.'): - # Create runtime module to load the session into. - main = ModuleType(main.partition('.')[-1]) - else: - main = _import_module(main) - if main is not None: - if not isinstance(main, ModuleType): - raise TypeError("%r is not a module" % main) - unpickler._main = main - else: - main = unpickler._main - - # Check against the pickle's main. - is_main_imported = _is_imported_module(main) - if pickle_main is not None: - is_runtime_mod = pickle_main.startswith('__runtime__.') - if is_runtime_mod: - pickle_main = pickle_main.partition('.')[-1] - error_msg = "can't update{} module{} %r with the saved state of{} module{} %r" - if is_runtime_mod and is_main_imported: - raise ValueError( - error_msg.format(" imported", "", "", "-type object") - % (main.__name__, pickle_main) - ) - if not is_runtime_mod and not is_main_imported: - raise ValueError( - error_msg.format("", "-type object", " imported", "") - % (pickle_main, main.__name__) - ) - if main.__name__ != pickle_main: - raise ValueError(error_msg.format("", "", "", "") % (main.__name__, pickle_main)) - - # This is for find_class() to be able to locate it. - if not is_main_imported: - runtime_main = '__runtime__.%s' % main.__name__ - sys.modules[runtime_main] = main - - loaded = unpickler.load() - finally: - if not hasattr(filename, 'read'): # if newly opened file - file.close() - try: - del sys.modules[runtime_main] - except (KeyError, NameError): - pass - assert loaded is main - _restore_modules(unpickler, main) - if main is _main_module or main is module: - return None - else: - return main - -# Backward compatibility. -def load_session(filename=str(TEMPDIR/'session.pkl'), main=None, **kwds): - warnings.warn("load_session() has been renamed load_module().", PendingDeprecationWarning) - load_module(filename, module=main, **kwds) -load_session.__doc__ = load_module.__doc__ - -def load_module_asdict( - filename = str(TEMPDIR/'session.pkl'), - update: bool = False, - **kwds -) -> dict: - """ - Load the contents of a saved module into a dictionary. - - ``load_module_asdict()`` is the near-equivalent of:: - - lambda filename: vars(dill.load_module(filename)).copy() - - however, does not alter the original module. Also, the path of - the loaded module is stored in the ``__session__`` attribute. - - Parameters: - filename: a path-like object or a readable stream - update: if `True`, initialize the dictionary with the current state - of the module prior to loading the state stored at filename. - **kwds: extra keyword arguments passed to :py:class:`Unpickler()` - - Raises: - :py:exc:`UnpicklingError`: if unpickling fails - - Returns: - A copy of the restored module's dictionary. - - Note: - If ``update`` is True, the corresponding module may first be imported - into the current namespace before the saved state is loaded from - filename to the dictionary. Note that any module that is imported into - the current namespace as a side-effect of using ``update`` will not be - modified by loading the saved module in filename to a dictionary. - - Example: - >>> import dill - >>> alist = [1, 2, 3] - >>> anum = 42 - >>> dill.dump_module() - >>> anum = 0 - >>> new_var = 'spam' - >>> main = dill.load_module_asdict() - >>> main['__name__'], main['__session__'] - ('__main__', '/tmp/session.pkl') - >>> main is globals() # loaded objects don't reference globals - False - >>> main['alist'] == alist - True - >>> main['alist'] is alist # was saved by value - False - >>> main['anum'] == anum # changed after the session was saved - False - >>> new_var in main # would be True if the option 'update' was set - False - """ - if 'module' in kwds: - raise TypeError("'module' is an invalid keyword argument for load_module_asdict()") - if hasattr(filename, 'read'): - file = filename - else: - file = open(filename, 'rb') - try: - file = _make_peekable(file) - main_name = _identify_module(file) - old_main = sys.modules.get(main_name) - main = ModuleType(main_name) - if update: - if old_main is None: - old_main = _import_module(main_name) - main.__dict__.update(old_main.__dict__) - else: - main.__builtins__ = __builtin__ - sys.modules[main_name] = main - load_module(file, **kwds) - finally: - if not hasattr(filename, 'read'): # if newly opened file - file.close() - try: - if old_main is None: - del sys.modules[main_name] - else: - sys.modules[main_name] = old_main - except NameError: # failed before setting old_main - pass - main.__session__ = str(filename) - return main.__dict__ - -### End: Pickle the Interpreter - class MetaCatchingDict(dict): def get(self, key, default=None): try: @@ -2457,7 +1905,6 @@ def save_capsule(pickler, obj): _incedental_reverse_typemap['PyCapsuleType'] = PyCapsuleType _reverse_typemap['PyCapsuleType'] = PyCapsuleType _incedental_types.add(PyCapsuleType) - SESSION_IMPORTED_AS_TYPES += (PyCapsuleType,) else: _testcapsule = None diff --git a/dill/session.py b/dill/session.py new file mode 100644 index 00000000..c8f79a3c --- /dev/null +++ b/dill/session.py @@ -0,0 +1,578 @@ +#!/usr/bin/env python +# +# Author: Mike McKerns (mmckerns @caltech and @uqfoundation) +# Author: Leonardo Gama (@leogama) +# Copyright (c) 2008-2015 California Institute of Technology. +# Copyright (c) 2016-2022 The Uncertainty Quantification Foundation. +# License: 3-clause BSD. The full license text is available at: +# - https://github.com/uqfoundation/dill/blob/master/LICENSE +""" +Pickle and restore the intepreter session. +""" + +__all__ = [ + 'dump_module', 'load_module', 'load_module_asdict', + 'dump_session', 'load_session' # backward compatibility +] + +import re +import sys +import warnings + +from dill import _dill, Pickler, Unpickler +from ._dill import ( + BuiltinMethodType, FunctionType, MethodType, ModuleType, TypeType, + _import_module, _is_builtin_module, _is_imported_module, _main_module, + _reverse_typemap, +) + +# Type hints. +from typing import Optional, Union + +import pathlib +import tempfile + +TEMPDIR = pathlib.PurePath(tempfile.gettempdir()) + +def _module_map(): + """get map of imported modules""" + from collections import defaultdict + from types import SimpleNamespace + modmap = SimpleNamespace( + by_name=defaultdict(list), + by_id=defaultdict(list), + top_level={}, + ) + for modname, module in sys.modules.items(): + if modname in ('__main__', '__mp_main__') or not isinstance(module, ModuleType): + continue + if '.' not in modname: + modmap.top_level[id(module)] = modname + for objname, modobj in module.__dict__.items(): + modmap.by_name[objname].append((modobj, modname)) + modmap.by_id[id(modobj)].append((modobj, objname, modname)) + return modmap + +IMPORTED_AS_TYPES = (ModuleType, TypeType, FunctionType, MethodType, BuiltinMethodType) +if 'PyCapsuleType' in _reverse_typemap: + IMPORTED_AS_TYPES += (_reverse_typemap['PyCapsuleType'],) +IMPORTED_AS_MODULES = ('ctypes', 'typing', 'subprocess', 'threading', + r'concurrent\.futures(\.\w+)?', r'multiprocessing(\.\w+)?') +IMPORTED_AS_MODULES = tuple(re.compile(x) for x in IMPORTED_AS_MODULES) + +def _lookup_module(modmap, name, obj, main_module): + """lookup name or id of obj if module is imported""" + for modobj, modname in modmap.by_name[name]: + if modobj is obj and sys.modules[modname] is not main_module: + return modname, name + __module__ = getattr(obj, '__module__', None) + if isinstance(obj, IMPORTED_AS_TYPES) or (__module__ is not None + and any(regex.fullmatch(__module__) for regex in IMPORTED_AS_MODULES)): + for modobj, objname, modname in modmap.by_id[id(obj)]: + if sys.modules[modname] is not main_module: + return modname, objname + return None, None + +def _stash_modules(main_module): + modmap = _module_map() + newmod = ModuleType(main_module.__name__) + + imported = [] + imported_as = [] + imported_top_level = [] # keep separated for backward compatibility + original = {} + for name, obj in main_module.__dict__.items(): + if obj is main_module: + original[name] = newmod # self-reference + elif obj is main_module.__dict__: + original[name] = newmod.__dict__ + # Avoid incorrectly matching a singleton value in another package (ex.: __doc__). + elif any(obj is singleton for singleton in (None, False, True)) \ + or isinstance(obj, ModuleType) and _is_builtin_module(obj): # always saved by ref + original[name] = obj + else: + source_module, objname = _lookup_module(modmap, name, obj, main_module) + if source_module is not None: + if objname == name: + imported.append((source_module, name)) + else: + imported_as.append((source_module, objname, name)) + else: + try: + imported_top_level.append((modmap.top_level[id(obj)], name)) + except KeyError: + original[name] = obj + + if len(original) < len(main_module.__dict__): + newmod.__dict__.update(original) + newmod.__dill_imported = imported + newmod.__dill_imported_as = imported_as + newmod.__dill_imported_top_level = imported_top_level + if getattr(newmod, '__loader__', None) is None and _is_imported_module(main_module): + # Trick _is_imported_module() to force saving as an imported module. + newmod.__loader__ = True # will be discarded by save_module() + return newmod + else: + return main_module + +def _restore_modules(unpickler, main_module): + try: + for modname, name in main_module.__dict__.pop('__dill_imported'): + main_module.__dict__[name] = unpickler.find_class(modname, name) + for modname, objname, name in main_module.__dict__.pop('__dill_imported_as'): + main_module.__dict__[name] = unpickler.find_class(modname, objname) + for modname, name in main_module.__dict__.pop('__dill_imported_top_level'): + main_module.__dict__[name] = __import__(modname) + except KeyError: + pass + +#NOTE: 06/03/15 renamed main_module to main +def dump_module( + filename = str(TEMPDIR/'session.pkl'), + module: Union[ModuleType, str] = None, + refimported: bool = False, + **kwds +) -> None: + """Pickle the current state of :py:mod:`__main__` or another module to a file. + + Save the contents of :py:mod:`__main__` (e.g. from an interactive + interpreter session), an imported module, or a module-type object (e.g. + built with :py:class:`~types.ModuleType`), to a file. The pickled + module can then be restored with the function :py:func:`load_module`. + + Parameters: + filename: a path-like object or a writable stream. + module: a module object or the name of an importable module. If `None` + (the default), :py:mod:`__main__` is saved. + refimported: if `True`, all objects identified as having been imported + into the module's namespace are saved by reference. *Note:* this is + similar but independent from ``dill.settings[`byref`]``, as + ``refimported`` refers to virtually all imported objects, while + ``byref`` only affects select objects. + **kwds: extra keyword arguments passed to :py:class:`Pickler()`. + + Raises: + :py:exc:`PicklingError`: if pickling fails. + + Examples: + + - Save current interpreter session state: + + >>> import dill + >>> squared = lambda x: x*x + >>> dill.dump_module() # save state of __main__ to /tmp/session.pkl + + - Save the state of an imported/importable module: + + >>> import dill + >>> import pox + >>> pox.plus_one = lambda x: x+1 + >>> dill.dump_module('pox_session.pkl', module=pox) + + - Save the state of a non-importable, module-type object: + + >>> import dill + >>> from types import ModuleType + >>> foo = ModuleType('foo') + >>> foo.values = [1,2,3] + >>> import math + >>> foo.sin = math.sin + >>> dill.dump_module('foo_session.pkl', module=foo, refimported=True) + + - Restore the state of the saved modules: + + >>> import dill + >>> dill.load_module() + >>> squared(2) + 4 + >>> pox = dill.load_module('pox_session.pkl') + >>> pox.plus_one(1) + 2 + >>> foo = dill.load_module('foo_session.pkl') + >>> [foo.sin(x) for x in foo.values] + [0.8414709848078965, 0.9092974268256817, 0.1411200080598672] + + *Changed in version 0.3.6:* Function ``dump_session()`` was renamed to + ``dump_module()``. Parameters ``main`` and ``byref`` were renamed to + ``module`` and ``refimported``, respectively. + + Note: + Currently, ``dill.settings['byref']`` and ``dill.settings['recurse']`` + don't apply to this function.` + """ + for old_par, par in [('main', 'module'), ('byref', 'refimported')]: + if old_par in kwds: + message = "The argument %r has been renamed %r" % (old_par, par) + if old_par == 'byref': + message += " to distinguish it from dill.settings['byref']" + warnings.warn(message + ".", PendingDeprecationWarning) + if locals()[par]: # the defaults are None and False + raise TypeError("both %r and %r arguments were used" % (par, old_par)) + refimported = kwds.pop('byref', refimported) + module = kwds.pop('main', module) + + from .settings import settings + protocol = settings['protocol'] + main = module + if main is None: + main = _main_module + elif isinstance(main, str): + main = _import_module(main) + if not isinstance(main, ModuleType): + raise TypeError("%r is not a module" % main) + if hasattr(filename, 'write'): + file = filename + else: + file = open(filename, 'wb') + try: + pickler = Pickler(file, protocol, **kwds) + pickler._original_main = main + if refimported: + main = _stash_modules(main) + pickler._main = main #FIXME: dill.settings are disabled + pickler._byref = False # disable pickling by name reference + pickler._recurse = False # disable pickling recursion for globals + pickler._session = True # is best indicator of when pickling a session + pickler._first_pass = True + pickler._main_modified = main is not pickler._original_main + pickler.dump(main) + finally: + if file is not filename: # if newly opened file + file.close() + return + +# Backward compatibility. +def dump_session(filename=str(TEMPDIR/'session.pkl'), main=None, byref=False, **kwds): + warnings.warn("dump_session() has been renamed dump_module()", PendingDeprecationWarning) + dump_module(filename, module=main, refimported=byref, **kwds) +dump_session.__doc__ = dump_module.__doc__ + +class _PeekableReader: + """lightweight stream wrapper that implements peek()""" + def __init__(self, stream): + self.stream = stream + def read(self, n): + return self.stream.read(n) + def readline(self): + return self.stream.readline() + def tell(self): + return self.stream.tell() + def close(self): + return self.stream.close() + def peek(self, n): + stream = self.stream + try: + if hasattr(stream, 'flush'): stream.flush() + position = stream.tell() + stream.seek(position) # assert seek() works before reading + chunk = stream.read(n) + stream.seek(position) + return chunk + except (AttributeError, OSError): + raise NotImplementedError("stream is not peekable: %r", stream) from None + +def _make_peekable(stream): + """return stream as an object with a peek() method""" + import io + if hasattr(stream, 'peek'): + return stream + if not (hasattr(stream, 'tell') and hasattr(stream, 'seek')): + try: + return io.BufferedReader(stream) + except Exception: + pass + return _PeekableReader(stream) + +def _identify_module(file, main=None): + """identify the name of the module stored in the given file-type object""" + from pickletools import genops + UNICODE = {'UNICODE', 'BINUNICODE', 'SHORT_BINUNICODE'} + found_import = False + try: + for opcode, arg, pos in genops(file.peek(256)): + if not found_import: + if opcode.name in ('GLOBAL', 'SHORT_BINUNICODE') and \ + arg.endswith('_import_module'): + found_import = True + else: + if opcode.name in UNICODE: + return arg + else: + raise UnpicklingError("reached STOP without finding main module") + except (NotImplementedError, ValueError) as error: + # ValueError occours when the end of the chunk is reached (without a STOP). + if isinstance(error, NotImplementedError) and main is not None: + # file is not peekable, but we have main. + return None + raise UnpicklingError("unable to identify main module") from error + +def load_module( + filename = str(TEMPDIR/'session.pkl'), + module: Union[ModuleType, str] = None, + **kwds +) -> Optional[ModuleType]: + """Update the selected module (default is :py:mod:`__main__`) with + the state saved at ``filename``. + + Restore a module to the state saved with :py:func:`dump_module`. The + saved module can be :py:mod:`__main__` (e.g. an interpreter session), + an imported module, or a module-type object (e.g. created with + :py:class:`~types.ModuleType`). + + When restoring the state of a non-importable module-type object, the + current instance of this module may be passed as the argument ``main``. + Otherwise, a new instance is created with :py:class:`~types.ModuleType` + and returned. + + Parameters: + filename: a path-like object or a readable stream. + module: a module object or the name of an importable module; + the module name and kind (i.e. imported or non-imported) must + match the name and kind of the module stored at ``filename``. + **kwds: extra keyword arguments passed to :py:class:`Unpickler()`. + + Raises: + :py:exc:`UnpicklingError`: if unpickling fails. + :py:exc:`ValueError`: if the argument ``main`` and module saved + at ``filename`` are incompatible. + + Returns: + A module object, if the saved module is not :py:mod:`__main__` or + a module instance wasn't provided with the argument ``main``. + + Examples: + + - Save the state of some modules: + + >>> import dill + >>> squared = lambda x: x*x + >>> dill.dump_module() # save state of __main__ to /tmp/session.pkl + >>> + >>> import pox # an imported module + >>> pox.plus_one = lambda x: x+1 + >>> dill.dump_module('pox_session.pkl', module=pox) + >>> + >>> from types import ModuleType + >>> foo = ModuleType('foo') # a module-type object + >>> foo.values = [1,2,3] + >>> import math + >>> foo.sin = math.sin + >>> dill.dump_module('foo_session.pkl', module=foo, refimported=True) + + - Restore the state of the interpreter: + + >>> import dill + >>> dill.load_module() # updates __main__ from /tmp/session.pkl + >>> squared(2) + 4 + + - Load the saved state of an importable module: + + >>> import dill + >>> pox = dill.load_module('pox_session.pkl') + >>> pox.plus_one(1) + 2 + >>> import sys + >>> pox in sys.modules.values() + True + + - Load the saved state of a non-importable module-type object: + + >>> import dill + >>> foo = dill.load_module('foo_session.pkl') + >>> [foo.sin(x) for x in foo.values] + [0.8414709848078965, 0.9092974268256817, 0.1411200080598672] + >>> import math + >>> foo.sin is math.sin # foo.sin was saved by reference + True + >>> import sys + >>> foo in sys.modules.values() + False + + - Update the state of a non-importable module-type object: + + >>> import dill + >>> from types import ModuleType + >>> foo = ModuleType('foo') + >>> foo.values = ['a','b'] + >>> foo.sin = lambda x: x*x + >>> dill.load_module('foo_session.pkl', module=foo) + >>> [foo.sin(x) for x in foo.values] + [0.8414709848078965, 0.9092974268256817, 0.1411200080598672] + + *Changed in version 0.3.6:* Function ``load_session()`` was renamed to + ``load_module()``. Parameter ``main`` was renamed to ``module``. + + See also: + :py:func:`load_module_asdict` to load the contents of module saved + with :py:func:`dump_module` into a dictionary. + """ + if 'main' in kwds: + warnings.warn( + "The argument 'main' has been renamed 'module'.", + PendingDeprecationWarning + ) + if module is not None: + raise TypeError("both 'module' and 'main' arguments were used") + module = kwds.pop('main') + main = module + if hasattr(filename, 'read'): + file = filename + else: + file = open(filename, 'rb') + try: + file = _make_peekable(file) + #FIXME: dill.settings are disabled + unpickler = Unpickler(file, **kwds) + unpickler._session = True + + # Resolve unpickler._main + pickle_main = _identify_module(file, main) + if main is None and pickle_main is not None: + main = pickle_main + if isinstance(main, str): + if main.startswith('__runtime__.'): + # Create runtime module to load the session into. + main = ModuleType(main.partition('.')[-1]) + else: + main = _import_module(main) + if main is not None: + if not isinstance(main, ModuleType): + raise TypeError("%r is not a module" % main) + unpickler._main = main + else: + main = unpickler._main + + # Check against the pickle's main. + is_main_imported = _is_imported_module(main) + if pickle_main is not None: + is_runtime_mod = pickle_main.startswith('__runtime__.') + if is_runtime_mod: + pickle_main = pickle_main.partition('.')[-1] + error_msg = "can't update{} module{} %r with the saved state of{} module{} %r" + if is_runtime_mod and is_main_imported: + raise ValueError( + error_msg.format(" imported", "", "", "-type object") + % (main.__name__, pickle_main) + ) + if not is_runtime_mod and not is_main_imported: + raise ValueError( + error_msg.format("", "-type object", " imported", "") + % (pickle_main, main.__name__) + ) + if main.__name__ != pickle_main: + raise ValueError(error_msg.format("", "", "", "") % (main.__name__, pickle_main)) + + # This is for find_class() to be able to locate it. + if not is_main_imported: + runtime_main = '__runtime__.%s' % main.__name__ + sys.modules[runtime_main] = main + + loaded = unpickler.load() + finally: + if not hasattr(filename, 'read'): # if newly opened file + file.close() + try: + del sys.modules[runtime_main] + except (KeyError, NameError): + pass + assert loaded is main + _restore_modules(unpickler, main) + if main is _main_module or main is module: + return None + else: + return main + +# Backward compatibility. +def load_session(filename=str(TEMPDIR/'session.pkl'), main=None, **kwds): + warnings.warn("load_session() has been renamed load_module().", PendingDeprecationWarning) + load_module(filename, module=main, **kwds) +load_session.__doc__ = load_module.__doc__ + +def load_module_asdict( + filename = str(TEMPDIR/'session.pkl'), + update: bool = False, + **kwds +) -> dict: + """ + Load the contents of a saved module into a dictionary. + + ``load_module_asdict()`` is the near-equivalent of:: + + lambda filename: vars(dill.load_module(filename)).copy() + + however, does not alter the original module. Also, the path of + the loaded module is stored in the ``__session__`` attribute. + + Parameters: + filename: a path-like object or a readable stream + update: if `True`, initialize the dictionary with the current state + of the module prior to loading the state stored at filename. + **kwds: extra keyword arguments passed to :py:class:`Unpickler()` + + Raises: + :py:exc:`UnpicklingError`: if unpickling fails + + Returns: + A copy of the restored module's dictionary. + + Note: + If ``update`` is True, the corresponding module may first be imported + into the current namespace before the saved state is loaded from + filename to the dictionary. Note that any module that is imported into + the current namespace as a side-effect of using ``update`` will not be + modified by loading the saved module in filename to a dictionary. + + Example: + >>> import dill + >>> alist = [1, 2, 3] + >>> anum = 42 + >>> dill.dump_module() + >>> anum = 0 + >>> new_var = 'spam' + >>> main = dill.load_module_asdict() + >>> main['__name__'], main['__session__'] + ('__main__', '/tmp/session.pkl') + >>> main is globals() # loaded objects don't reference globals + False + >>> main['alist'] == alist + True + >>> main['alist'] is alist # was saved by value + False + >>> main['anum'] == anum # changed after the session was saved + False + >>> new_var in main # would be True if the option 'update' was set + False + """ + import builtins + if 'module' in kwds: + raise TypeError("'module' is an invalid keyword argument for load_module_asdict()") + if hasattr(filename, 'read'): + file = filename + else: + file = open(filename, 'rb') + try: + file = _make_peekable(file) + main_name = _identify_module(file) + old_main = sys.modules.get(main_name) + main = ModuleType(main_name) + if update: + if old_main is None: + old_main = _import_module(main_name) + main.__dict__.update(old_main.__dict__) + else: + main.__builtins__ = builtins + sys.modules[main_name] = main + load_module(file, **kwds) + finally: + if not hasattr(filename, 'read'): # if newly opened file + file.close() + try: + if old_main is None: + del sys.modules[main_name] + else: + sys.modules[main_name] = old_main + except NameError: # failed before setting old_main + pass + main.__session__ = str(filename) + return main.__dict__ diff --git a/dill/tests/test_session.py b/dill/tests/test_session.py index 9124802c..51128916 100644 --- a/dill/tests/test_session.py +++ b/dill/tests/test_session.py @@ -197,7 +197,7 @@ def test_runtime_module(): runtime = ModuleType(modname) runtime.x = 42 - mod = dill._dill._stash_modules(runtime) + mod = dill.session._stash_modules(runtime) if mod is not runtime: print("There are objects to save by referenece that shouldn't be:", mod.__dill_imported, mod.__dill_imported_as, mod.__dill_imported_top_level, diff --git a/docs/source/dill.rst b/docs/source/dill.rst index 31d41c91..af64599c 100644 --- a/docs/source/dill.rst +++ b/docs/source/dill.rst @@ -11,7 +11,7 @@ dill module :special-members: :show-inheritance: :imported-members: - :exclude-members: dump_session, load_session +.. :exclude-members: detect module ------------- @@ -49,6 +49,18 @@ pointers module :imported-members: .. :exclude-members: +session module +--------------- + +.. automodule:: dill.session + :members: + :undoc-members: + :private-members: + :special-members: + :show-inheritance: + :imported-members: + :exclude-members: dump_session, load_session + settings module --------------- From a7d42ddc16eb15fde2f0a02006002bed78d70c40 Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Fri, 22 Jul 2022 19:27:08 -0300 Subject: [PATCH 2/3] export objects to ._dill for compatibility with 0.3.5.1 --- dill/session.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dill/session.py b/dill/session.py index c8f79a3c..27643b00 100644 --- a/dill/session.py +++ b/dill/session.py @@ -576,3 +576,13 @@ def load_module_asdict( pass main.__session__ = str(filename) return main.__dict__ + + +# Internal exports for backward compatibility with dill v0.3.5.1 +# Can't be placed in dill._dill because of circular import problems. +for name in ( + '_lookup_module', '_module_map', '_restore_modules', '_stash_modules', + 'dump_session', 'load_session' # backward compatibility functions +): + setattr(_dill, name, globals()[name]) +del name From 7099ee21e9175f0707afc29a1a373eb7acf60bdc Mon Sep 17 00:00:00 2001 From: Leonardo Gama Date: Fri, 22 Jul 2022 19:31:08 -0300 Subject: [PATCH 3/3] use dill._dill.__builtin__ instead of builtins for consistency --- dill/session.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dill/session.py b/dill/session.py index 27643b00..6a037d61 100644 --- a/dill/session.py +++ b/dill/session.py @@ -23,7 +23,7 @@ from ._dill import ( BuiltinMethodType, FunctionType, MethodType, ModuleType, TypeType, _import_module, _is_builtin_module, _is_imported_module, _main_module, - _reverse_typemap, + _reverse_typemap, __builtin__, ) # Type hints. @@ -544,7 +544,6 @@ def load_module_asdict( >>> new_var in main # would be True if the option 'update' was set False """ - import builtins if 'module' in kwds: raise TypeError("'module' is an invalid keyword argument for load_module_asdict()") if hasattr(filename, 'read'): @@ -561,7 +560,7 @@ def load_module_asdict( old_main = _import_module(main_name) main.__dict__.update(old_main.__dict__) else: - main.__builtins__ = builtins + main.__builtins__ = __builtin__ sys.modules[main_name] = main load_module(file, **kwds) finally: