From 0ea5c42163b928867baed95c22f78c90a9f703d5 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Sat, 9 Feb 2019 19:12:59 +0100 Subject: [PATCH 01/28] Add support for dynamically defined enums --- cloudpickle/cloudpickle.py | 36 +++++++++++++++++++++++++++++++++++- tests/cloudpickle_test.py | 24 ++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 75c29977e..83c0e5647 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -44,7 +44,6 @@ import dis from functools import partial -import importlib import io import itertools import logging @@ -63,6 +62,12 @@ DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL +try: + from enum import Enum +except ImportError: + Enum = None + + if sys.version_info[0] < 3: # pragma: no branch from pickle import Pickler try: @@ -460,6 +465,25 @@ def func(): # then discards the reference to it self.write(pickle.POP) + def _save_dynamic_enum(self, obj): + """Special handling for dynamic Enum subclasses + + Use the Enum functional API as the EnumMeta metaclass has complex + initialization and and that make the Enum classes hold references to + their own instances. + """ + # XXX: shall we pass type and start kwargs? If so how to retrieve the + # correct info from obj. + elements = dict((e.name, e.value) for e in obj) + extra = { + "__doc__": obj.__doc__, + "__module__": obj.__module__, + "__qualname__": obj.__qualname__, + } + self.save_reduce(_make_dynamic_enum, + (obj.__base__, obj.__name__, elements, extra), + obj=obj) + def save_dynamic_class(self, obj): """ Save a class that can't be stored as module global. @@ -468,6 +492,9 @@ def save_dynamic_class(self, obj): functions, or that otherwise can't be serialized as attribute lookups from global modules. """ + if Enum is not None and issubclass(obj, Enum): + return self._save_dynamic_enum(obj) + clsdict = dict(obj.__dict__) # copy dict proxy to a dict clsdict.pop('__weakref__', None) @@ -1124,6 +1151,13 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict): return skeleton_class +def _make_dynamic_enum(base, name, elements, extra): + cls = base(name, elements, module=extra["__module__"], + qualname=extra["__qualname__"]) + cls.__doc__ = extra["__doc__"] + return cls + + def _is_dynamic(module): """ Return True if the module is special module that cannot be imported by its diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 6ffe2ad0c..f1d58fb7a 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1368,6 +1368,30 @@ def test_dataclass(self): pickle_depickle(DataClass, protocol=self.protocol) assert data.x == pickle_depickle(data, protocol=self.protocol).x == 42 + def test_dynamically_defined_enum(self): + enum = pytest.importorskip("enum") + + class Color(enum.Enum): + """3-element color space""" + RED = 1 + GREEN = 2 + BLUE = 3 + + green1, green2, ClonedColor = pickle_depickle( + [Color.GREEN, Color.GREEN, Color], protocol=self.protocol) + assert green1 is green2 + assert green1 is ClonedColor.GREEN + assert green1 is not ClonedColor.BLUE + + assert ClonedColor.__doc__ == Color.__doc__ + assert ClonedColor.__module__ == Color.__module__ + assert ClonedColor.__qualname__ == Color.__qualname__ + + # cloudpickle systematically creates new copies for locally defined + # classes that cannot be imported by name: + assert green1 is not Color.GREEN + assert ClonedColor is not Color + class Protocol2CloudPickleTest(CloudPickleTest): From 0c92537f0ee7df65e8058dcdbf1ce31f2fd3b169 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Sat, 9 Feb 2019 19:33:19 +0100 Subject: [PATCH 02/28] Fix Python 2 support --- cloudpickle/cloudpickle.py | 17 +++++++++-------- tests/cloudpickle_test.py | 4 +++- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 83c0e5647..dfb233f30 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -475,11 +475,11 @@ def _save_dynamic_enum(self, obj): # XXX: shall we pass type and start kwargs? If so how to retrieve the # correct info from obj. elements = dict((e.name, e.value) for e in obj) - extra = { - "__doc__": obj.__doc__, - "__module__": obj.__module__, - "__qualname__": obj.__qualname__, - } + attrnames = ["__doc__", "__module__", "__qualname__"] + extra = dict( + (attrname[2:-2], getattr(obj, attrname)) + for attrname in attrnames if hasattr(obj, attrname) + ) self.save_reduce(_make_dynamic_enum, (obj.__base__, obj.__name__, elements, extra), obj=obj) @@ -1152,9 +1152,10 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict): def _make_dynamic_enum(base, name, elements, extra): - cls = base(name, elements, module=extra["__module__"], - qualname=extra["__qualname__"]) - cls.__doc__ = extra["__doc__"] + doc = extra.pop("doc", None) + cls = base(name, elements, **extra) + if doc is not None: + cls.__doc__ = doc return cls diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index f1d58fb7a..7db4cdaf1 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1385,7 +1385,9 @@ class Color(enum.Enum): assert ClonedColor.__doc__ == Color.__doc__ assert ClonedColor.__module__ == Color.__module__ - assert ClonedColor.__qualname__ == Color.__qualname__ + + if hasattr(Color, "__qualname__"): + assert ClonedColor.__qualname__ == Color.__qualname__ # cloudpickle systematically creates new copies for locally defined # classes that cannot be imported by name: From a6025737f3a34278fd0e5499028a09b43e4c4f82 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Sat, 9 Feb 2019 20:01:26 +0100 Subject: [PATCH 03/28] Add entry to changelog --- CHANGES.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 160e0d68f..38b8567f6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,15 +1,20 @@ 0.8.0 ===== -- Add support for pickling interactively defined dataclasses. - ([issue #245](https://github.com/cloudpipe/cloudpickle/pull/245)) - - Global variables referenced by functions pickled by cloudpickle are now unpickled in a new and isolated namespace scoped by the CloudPickler instance. This restores the (previously untested) behavior of cloudpickle prior to changes done in 0.5.4 for functions defined in the `__main__` module, and 0.6.0/1 for other dynamic functions. +- Add support for pickling interactively defined dataclasses. + ([issue #245](https://github.com/cloudpipe/cloudpickle/pull/245)) + +- Add support for pickling interactively defined Enum subclasses and + their instances. + ([issue #246](https://github.com/cloudpipe/cloudpickle/pull/246)) + + 0.7.0 ===== From 0af2d33a8f528d8d203d28ec9858bb29d51c82ba Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Sat, 9 Feb 2019 20:07:11 +0100 Subject: [PATCH 04/28] Enum is already supported in Python 2.7 --- cloudpickle/cloudpickle.py | 9 ++------- tests/cloudpickle_test.py | 4 ++-- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index dfb233f30..6c45372a7 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -55,6 +55,7 @@ import traceback import types import weakref +from enum import Enum # cloudpickle is meant for inter process communication: we expect all # communicating processes to run the same Python version hence we favor @@ -62,12 +63,6 @@ DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL -try: - from enum import Enum -except ImportError: - Enum = None - - if sys.version_info[0] < 3: # pragma: no branch from pickle import Pickler try: @@ -492,7 +487,7 @@ def save_dynamic_class(self, obj): functions, or that otherwise can't be serialized as attribute lookups from global modules. """ - if Enum is not None and issubclass(obj, Enum): + if issubclass(obj, Enum): return self._save_dynamic_enum(obj) clsdict = dict(obj.__dict__) # copy dict proxy to a dict diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 7db4cdaf1..15680a216 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -21,6 +21,7 @@ import unittest import weakref import os +from enum import Enum import pytest @@ -1369,9 +1370,8 @@ def test_dataclass(self): assert data.x == pickle_depickle(data, protocol=self.protocol).x == 42 def test_dynamically_defined_enum(self): - enum = pytest.importorskip("enum") - class Color(enum.Enum): + class Color(Enum): """3-element color space""" RED = 1 GREEN = 2 From 0c76780146c05d7374c5b8755895d7bf6cc4a66f Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Sat, 9 Feb 2019 20:42:10 +0100 Subject: [PATCH 05/28] Fix docstring --- cloudpickle/cloudpickle.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 6c45372a7..f69d602c6 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -463,9 +463,9 @@ def func(): def _save_dynamic_enum(self, obj): """Special handling for dynamic Enum subclasses - Use the Enum functional API as the EnumMeta metaclass has complex - initialization and and that make the Enum classes hold references to - their own instances. + Use the Enum functional API (inherited from EnumMeta.__call__) as the + EnumMeta metaclass has complex initialization that makes the Enum + subclasses hold references to their own instances. """ # XXX: shall we pass type and start kwargs? If so how to retrieve the # correct info from obj. From ea34466101c4ce7b82cd012c24b83a82b9fc9be1 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Sat, 9 Feb 2019 21:04:27 +0100 Subject: [PATCH 06/28] Test with IntEnum --- cloudpickle/cloudpickle.py | 5 ++--- tests/cloudpickle_test.py | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index f69d602c6..d8e449e84 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1147,10 +1147,9 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict): def _make_dynamic_enum(base, name, elements, extra): - doc = extra.pop("doc", None) + doc = extra.pop("doc") cls = base(name, elements, **extra) - if doc is not None: - cls.__doc__ = doc + cls.__doc__ = doc return cls diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 15680a216..ea0b6e41e 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -21,7 +21,7 @@ import unittest import weakref import os -from enum import Enum +from enum import Enum, IntEnum import pytest @@ -1394,6 +1394,19 @@ class Color(Enum): assert green1 is not Color.GREEN assert ClonedColor is not Color + # Try again with a IntEnum defined with the functional API + DynamicColor = IntEnum("Color", {"RED": 1, "GREEN": 2, "BLUE": 3}) + + green1, green2, ClonedDynamicColor = pickle_depickle( + [DynamicColor.GREEN, DynamicColor.GREEN, DynamicColor], + protocol=self.protocol) + + assert green1 is green2 + assert green1 is ClonedDynamicColor.GREEN + assert green1 is not ClonedDynamicColor.BLUE + assert ClonedDynamicColor.__module__ == DynamicColor.__module__ + assert ClonedDynamicColor.__doc__ == DynamicColor.__doc__ + class Protocol2CloudPickleTest(CloudPickleTest): From 6b3fd45d226b9e37e3bc0aa0ca7ea224d885d6dd Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 11 Feb 2019 12:40:30 +0100 Subject: [PATCH 07/28] Lookup enum classes from __main__ module at unpickling time --- cloudpickle/cloudpickle.py | 38 ++++++++++++++++++++++++++++---------- tests/cloudpickle_test.py | 24 +++++++++++++++++++++++- 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index d8e449e84..a42438915 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -470,13 +470,19 @@ def _save_dynamic_enum(self, obj): # XXX: shall we pass type and start kwargs? If so how to retrieve the # correct info from obj. elements = dict((e.name, e.value) for e in obj) - attrnames = ["__doc__", "__module__", "__qualname__"] - extra = dict( - (attrname[2:-2], getattr(obj, attrname)) - for attrname in attrnames if hasattr(obj, attrname) - ) + + if obj.__doc__ is not obj.__base__.__doc__: + doc = obj.__doc__ + else: + doc = None + + extra = {} + if hasattr(obj, "__qualname__"): + extra["qualname"] = obj.__qualname__ + self.save_reduce(_make_dynamic_enum, - (obj.__base__, obj.__name__, elements, extra), + (obj.__base__, obj.__name__, elements, doc, + obj.__module__, extra), obj=obj) def save_dynamic_class(self, obj): @@ -1146,10 +1152,22 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict): return skeleton_class -def _make_dynamic_enum(base, name, elements, extra): - doc = extra.pop("doc") - cls = base(name, elements, **extra) - cls.__doc__ = doc +def _make_dynamic_enum(base, name, elements, doc, module, extra): + if module == "__main__": + # Special case: try to lookup enum from main module to make it possible + # to use physical comparison with enum instances returned by a remote + # function calls whose results are communicated back to the main Python + # process with cloudpickle. + try: + lookedup_enum = getattr(sys.modules["__main__"], name) + assert issubclass(lookedup_enum, Enum) + return lookedup_enum + except (KeyError, AttributeError): + # Fall-back to dynamic instanciation. + pass + cls = base(name, elements, module=module, **extra) + if doc is not None: + cls.__doc__ = doc return cls diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index ea0b6e41e..dd6b630c7 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1369,7 +1369,7 @@ def test_dataclass(self): pickle_depickle(DataClass, protocol=self.protocol) assert data.x == pickle_depickle(data, protocol=self.protocol).x == 42 - def test_dynamically_defined_enum(self): + def test_locally_defined_enum(self): class Color(Enum): """3-element color space""" @@ -1407,6 +1407,28 @@ class Color(Enum): assert ClonedDynamicColor.__module__ == DynamicColor.__module__ assert ClonedDynamicColor.__doc__ == DynamicColor.__doc__ + def test_interactively_defined_enum(self): + code = """if True: + from enum import Enum + from testutils import subprocess_pickle_echo + + class Color(Enum): + RED = 1 + GREEN = 2 + + green1, green2, ClonedColor = subprocess_pickle_echo( + [Color.GREEN, Color.GREEN, Color], protocol={protocol}) + assert green1 is green2 + assert green1 is ClonedColor.GREEN + + # Check that the pickle that was return has been matched back to the + # interactively defined Color living in the __main__ module of the + # original Python process. + assert ClonedColor is Color + assert green1 is Color.GREEN + """.format(protocol=self.protocol) + assert_run_python_script(code) + class Protocol2CloudPickleTest(CloudPickleTest): From b42144af3990518ddf71483d054f7651e9f0162b Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 14 Feb 2019 09:13:05 +0100 Subject: [PATCH 08/28] Better test for interactively defined enums --- tests/cloudpickle_test.py | 40 ++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index dd6b630c7..557a2fc49 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1410,22 +1410,36 @@ class Color(Enum): def test_interactively_defined_enum(self): code = """if True: from enum import Enum - from testutils import subprocess_pickle_echo + from testutils import subprocess_worker - class Color(Enum): - RED = 1 - GREEN = 2 + with subprocess_worker(protocol={protocol}) as w: - green1, green2, ClonedColor = subprocess_pickle_echo( - [Color.GREEN, Color.GREEN, Color], protocol={protocol}) - assert green1 is green2 - assert green1 is ClonedColor.GREEN + class Color(Enum): + RED = 1 + GREEN = 2 + + def check_positive(x): + return Color.GREEN if x >= 0 else Color.RED + + result = w.run(check_positive, 1) + + # Check that the returned enum instance is reconciled with the + # locally defined Color enum type definition: + + assert result is Color.GREEN + + # Check that changing the defintion of the Enum class is taken + # into account on the worker for subsequent calls: + + class Color(Enum): + RED = 1 + BLUE = 2 + + def check_positive(x): + return Color.BLUE if x >= 0 else Color.RED - # Check that the pickle that was return has been matched back to the - # interactively defined Color living in the __main__ module of the - # original Python process. - assert ClonedColor is Color - assert green1 is Color.GREEN + result = w.run(check_positive, 1) + assert result is Color.BLUE """.format(protocol=self.protocol) assert_run_python_script(code) From 90ab1d78b4288d4ed6e68ac7c438e6551bf60d6b Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 14 Feb 2019 09:52:38 +0100 Subject: [PATCH 09/28] Restore changelog from master --- CHANGES.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 38b8567f6..b8de66eec 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,19 +1,15 @@ 0.8.0 ===== +- Add support for pickling interactively defined dataclasses. + ([issue #245](https://github.com/cloudpipe/cloudpickle/pull/245)) + - Global variables referenced by functions pickled by cloudpickle are now unpickled in a new and isolated namespace scoped by the CloudPickler instance. This restores the (previously untested) behavior of cloudpickle prior to changes done in 0.5.4 for functions defined in the `__main__` module, and 0.6.0/1 for other dynamic functions. -- Add support for pickling interactively defined dataclasses. - ([issue #245](https://github.com/cloudpipe/cloudpickle/pull/245)) - -- Add support for pickling interactively defined Enum subclasses and - their instances. - ([issue #246](https://github.com/cloudpipe/cloudpickle/pull/246)) - 0.7.0 ===== From ed34c4f9adb8a10557302039c11c1f4bee53919b Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 14 Feb 2019 15:15:49 +0100 Subject: [PATCH 10/28] Fix test under Windows --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 557a2fc49..a7c6bd2bf 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1408,7 +1408,7 @@ class Color(Enum): assert ClonedDynamicColor.__doc__ == DynamicColor.__doc__ def test_interactively_defined_enum(self): - code = """if True: + code = """if __name__ == "__main__": from enum import Enum from testutils import subprocess_worker From 658a7f7da9f52b9eb629a2d5bf5e85cfb095b635 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Fri, 15 Feb 2019 18:59:12 +0100 Subject: [PATCH 11/28] WIP: dynamic class defintion tracking --- cloudpickle/cloudpickle.py | 57 ++++++++++++++++++++++++++------------ tests/cloudpickle_test.py | 44 +++++++++++++++++++++-------- 2 files changed, 72 insertions(+), 29 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index a42438915..c129d23e9 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -55,6 +55,8 @@ import traceback import types import weakref +import uuid +import threading from enum import Enum # cloudpickle is meant for inter process communication: we expect all @@ -63,6 +65,10 @@ DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL +_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() +_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary() +_DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock() + if sys.version_info[0] < 3: # pragma: no branch from pickle import Pickler try: @@ -79,6 +85,25 @@ PY3 = True +def _ensure_tracking(class_def): + with _DYNAMIC_CLASS_TRACKER_LOCK: + class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def) + if class_tracker_id is None: + class_tracker_id = uuid.uuid4().hex + _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id + _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def + return class_tracker_id + + +def _lookup_class_or_track(class_tracker_id, class_def): + if class_tracker_id is not None: + with _DYNAMIC_CLASS_TRACKER_LOCK: + class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault( + class_tracker_id, class_def) + _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id + return class_def + + def _make_cell_set_template_code(): """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF @@ -467,6 +492,8 @@ def _save_dynamic_enum(self, obj): EnumMeta metaclass has complex initialization that makes the Enum subclasses hold references to their own instances. """ + class_tracker_id = _ensure_tracking(obj) + # XXX: shall we pass type and start kwargs? If so how to retrieve the # correct info from obj. elements = dict((e.name, e.value) for e in obj) @@ -482,7 +509,7 @@ def _save_dynamic_enum(self, obj): self.save_reduce(_make_dynamic_enum, (obj.__base__, obj.__name__, elements, doc, - obj.__module__, extra), + obj.__module__, class_tracker_id, extra), obj=obj) def save_dynamic_class(self, obj): @@ -496,7 +523,10 @@ def save_dynamic_class(self, obj): if issubclass(obj, Enum): return self._save_dynamic_enum(obj) + class_tracker_id = _ensure_tracking(obj) + clsdict = dict(obj.__dict__) # copy dict proxy to a dict + clsdict["_cloudpickle_class_tracker_id"] = class_tracker_id clsdict.pop('__weakref__', None) # For ABCMeta in python3.7+, remove _abc_impl as it is not picklable. @@ -1140,6 +1170,8 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict): See CloudPickler.save_dynamic_class for more info. """ registry = None + class_tracker_id = class_dict.pop("_cloudpickle_class_tracker_id", None) + for attrname, attr in class_dict.items(): if attrname == "_abc_impl": registry = attr @@ -1149,26 +1181,15 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict): for subclass in registry: skeleton_class.register(subclass) - return skeleton_class + return _lookup_class_or_track(class_tracker_id, skeleton_class) -def _make_dynamic_enum(base, name, elements, doc, module, extra): - if module == "__main__": - # Special case: try to lookup enum from main module to make it possible - # to use physical comparison with enum instances returned by a remote - # function calls whose results are communicated back to the main Python - # process with cloudpickle. - try: - lookedup_enum = getattr(sys.modules["__main__"], name) - assert issubclass(lookedup_enum, Enum) - return lookedup_enum - except (KeyError, AttributeError): - # Fall-back to dynamic instanciation. - pass - cls = base(name, elements, module=module, **extra) +def _make_dynamic_enum(base, name, elements, doc, module, class_tracker_id, + extra): + class_def = base(name, elements, module=module, **extra) if doc is not None: - cls.__doc__ = doc - return cls + class_def.__doc__ = doc + return _lookup_class_or_track(class_tracker_id, class_def) def _is_dynamic(module): diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index a7c6bd2bf..f332dc0e3 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1198,6 +1198,32 @@ def is_in_main(name): """.format(protocol=self.protocol) assert_run_python_script(code) + def test_interactive_remote_dynamic_type_and_instances(self): + code = """if __name__ == "__main__": + from testutils import subprocess_worker + + with subprocess_worker(protocol={protocol}) as w: + + class CustomCounter: + def __init__(self): + self.count = 0 + def increment(self): + self.count += 1 + return self + + counter = CustomCounter().increment() + assert counter.count == 1 + + returned_counter = w.run(counter.increment) + assert returned_counter.count == 2, returned_counter.count + + # Check that the class definition of the returned instance was + # matched back to the original class defintion living in __main__ + assert isinstance(returned_counter, CustomCounter) + + """.format(protocol=self.protocol) + assert_run_python_script(code) + @pytest.mark.skipif(platform.python_implementation() == 'PyPy', reason="Skip PyPy because memory grows too much") def test_interactive_remote_function_calls_no_memory_leak(self): @@ -1383,16 +1409,13 @@ class Color(Enum): assert green1 is ClonedColor.GREEN assert green1 is not ClonedColor.BLUE - assert ClonedColor.__doc__ == Color.__doc__ - assert ClonedColor.__module__ == Color.__module__ - - if hasattr(Color, "__qualname__"): - assert ClonedColor.__qualname__ == Color.__qualname__ + # cloudpickle systematically tracks procenance of class definitions + # and ensure reconcylation in case of round trips: + assert green1 is Color.GREEN + assert ClonedColor is Color - # cloudpickle systematically creates new copies for locally defined - # classes that cannot be imported by name: - assert green1 is not Color.GREEN - assert ClonedColor is not Color + green3 = pickle_depickle(Color.GREEN, protocol=self.protocol) + assert green3 is Color.GREEN # Try again with a IntEnum defined with the functional API DynamicColor = IntEnum("Color", {"RED": 1, "GREEN": 2, "BLUE": 3}) @@ -1404,8 +1427,7 @@ class Color(Enum): assert green1 is green2 assert green1 is ClonedDynamicColor.GREEN assert green1 is not ClonedDynamicColor.BLUE - assert ClonedDynamicColor.__module__ == DynamicColor.__module__ - assert ClonedDynamicColor.__doc__ == DynamicColor.__doc__ + assert ClonedDynamicColor is DynamicColor def test_interactively_defined_enum(self): code = """if __name__ == "__main__": From b9adfc835559842d4c10bbf33c89901e7f3452d8 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Sat, 16 Feb 2019 16:53:17 +0100 Subject: [PATCH 12/28] Fix provenance tracking + add more tests --- cloudpickle/cloudpickle.py | 22 ++++++---- tests/cloudpickle_test.py | 83 ++++++++++++++++++++++++++++++++------ 2 files changed, 86 insertions(+), 19 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index c129d23e9..fcab57abc 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -64,7 +64,9 @@ # communication speed over compatibility: DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL - +# Track the provenance of reconstructed dynamic classes to make it possible +# to reconcyle instances with a singleton class definition when appropriate +# and preserve the usual "isinstance" semantics of Python objects. _DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() _DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary() _DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock() @@ -523,10 +525,7 @@ def save_dynamic_class(self, obj): if issubclass(obj, Enum): return self._save_dynamic_enum(obj) - class_tracker_id = _ensure_tracking(obj) - clsdict = dict(obj.__dict__) # copy dict proxy to a dict - clsdict["_cloudpickle_class_tracker_id"] = class_tracker_id clsdict.pop('__weakref__', None) # For ABCMeta in python3.7+, remove _abc_impl as it is not picklable. @@ -582,8 +581,11 @@ def save_dynamic_class(self, obj): write(pickle.MARK) # Create and memoize an skeleton class with obj's name and bases. + extra = {"class_tracker_id": _ensure_tracking(obj)} tp = type(obj) - self.save_reduce(tp, (obj.__name__, obj.__bases__, type_kwargs), obj=obj) + self.save_reduce(_make_skeleton_class, + (tp, obj.__name__, obj.__bases__, type_kwargs, extra), + obj=obj) # Now save the rest of obj's __dict__. Any references to obj # encountered while saving will point to the skeleton class. @@ -1164,13 +1166,19 @@ def _make_skel_func(code, cell_count, base_globals=None): return types.FunctionType(code, base_globals, None, None, closure) +def _make_skeleton_class(type_constructor, name, bases, type_kwargs, + extra): + class_tracker_id = extra.get("class_tracker_id") + skeleton_class = type_constructor(name, bases, type_kwargs) + return _lookup_class_or_track(class_tracker_id, skeleton_class) + + def _rehydrate_skeleton_class(skeleton_class, class_dict): """Put attributes from `class_dict` back on `skeleton_class`. See CloudPickler.save_dynamic_class for more info. """ registry = None - class_tracker_id = class_dict.pop("_cloudpickle_class_tracker_id", None) for attrname, attr in class_dict.items(): if attrname == "_abc_impl": @@ -1181,7 +1189,7 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict): for subclass in registry: skeleton_class.register(subclass) - return _lookup_class_or_track(class_tracker_id, skeleton_class) + return skeleton_class def _make_dynamic_enum(base, name, elements, doc, module, class_tracker_id, diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index f332dc0e3..cc9841d4c 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -925,21 +925,18 @@ def func(x): self.assertEqual(cloned.__qualname__, func.__qualname__) def test_namedtuple(self): - MyTuple = collections.namedtuple('MyTuple', ['a', 'b', 'c']) - t = MyTuple(1, 2, 3) - - depickled_t, depickled_MyTuple = pickle_depickle( - [t, MyTuple], protocol=self.protocol) - self.assertTrue(isinstance(depickled_t, depickled_MyTuple)) + t1 = MyTuple(1, 2, 3) + t2 = MyTuple(3, 2, 1) - self.assertEqual((depickled_t.a, depickled_t.b, depickled_t.c), - (1, 2, 3)) - self.assertEqual((depickled_t[0], depickled_t[1], depickled_t[2]), - (1, 2, 3)) + depickled_t1, depickled_MyTuple, depickled_t2 = pickle_depickle( + [t1, MyTuple, t2], protocol=self.protocol) - self.assertEqual(depickled_MyTuple.__name__, 'MyTuple') - self.assertTrue(issubclass(depickled_MyTuple, tuple)) + assert isinstance(depickled_t1, MyTuple) + assert depickled_t1 == t1 + assert depickled_MyTuple is MyTuple + assert isinstance(depickled_t2, MyTuple) + assert depickled_t2 == t2 def test_builtin_type__new__(self): # Functions occasionally take the __new__ of these types as default @@ -1224,6 +1221,68 @@ def increment(self): """.format(protocol=self.protocol) assert_run_python_script(code) + def test_interactive_remote_dynamic_type_and_futures(self): + """Simulate objects stored on workers to check isinstance semantics + + Such instances stored in the memory of running worker processes are + similar to dask-distributed futures for instance. + """ + code = """if __name__ == "__main__": + import cloudpickle, uuid + from testutils import subprocess_worker + + with subprocess_worker(protocol={protocol}) as w: + + class A: + '''Original class defintion''' + pass + + def store(x): + storage = getattr(cloudpickle, "_test_storage", None) + if storage is None: + storage = cloudpickle._test_storage = dict() + obj_id = uuid.uuid4().hex + storage[obj_id] = x + return obj_id + + def lookup(obj_id): + return cloudpickle._test_storage[obj_id] + + id1 = w.run(store, A()) + + # The stored object on the worker is matched to the class + # definition singleton thanks to provenance tracking: + assert w.run(lambda obj_id: isinstance(lookup(obj_id), A), id1) + + # Retrieving the object from the worker yields a local copy that + # is matched back the local class definition this instance stems + # from. + assert isinstance(w.run(lookup, id1), A) + + # Changing the local class definitions should be reflected in + # all subsequent calls. In particular the old instances do not + # map back to the new class definition, neither locally, nor on the + # worker: + + class A: + '''Modified class defintion''' + pass + + assert not w.run(lambda obj_id: isinstance(lookup(obj_id), A), id1) + retrieved1 = w.run(lookup, id1) + assert not isinstance(retrieved1, A) + assert retrieved1.__class__ is not A + assert retrieved1.__class__.__doc__ == "Original class defintion" + + # New instances on the other hand are proper instances of the new + # class definition everywhere: + + id2 = w.run(store, A()) + assert w.run(lambda obj_id: isinstance(lookup(obj_id), A), id2) + assert isinstance(w.run(lookup, id2), A) + """.format(protocol=self.protocol) + assert_run_python_script(code) + @pytest.mark.skipif(platform.python_implementation() == 'PyPy', reason="Skip PyPy because memory grows too much") def test_interactive_remote_function_calls_no_memory_leak(self): From 1979d52f134888210a1718c3b21f367876987ef2 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Sat, 16 Feb 2019 17:22:33 +0100 Subject: [PATCH 13/28] Add more tests for class provenance tracking --- tests/cloudpickle_test.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index cc9841d4c..6557813be 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1216,8 +1216,21 @@ def increment(self): # Check that the class definition of the returned instance was # matched back to the original class defintion living in __main__ + assert isinstance(returned_counter, CustomCounter) + # Check that memoization does not break provenance tracking: + + def echo(*args): + return args + + C1, C2, c1, c2 = w.run(echo, CustomCounter, CustomCounter, + CustomCounter(), returned_counter) + assert C1 is CustomCounter + assert C2 is CustomCounter + assert isinstance(c1, CustomCounter) + assert isinstance(c2, CustomCounter) + """.format(protocol=self.protocol) assert_run_python_script(code) @@ -1476,6 +1489,7 @@ class Color(Enum): green3 = pickle_depickle(Color.GREEN, protocol=self.protocol) assert green3 is Color.GREEN + def test_locally_defined_intenum(self): # Try again with a IntEnum defined with the functional API DynamicColor = IntEnum("Color", {"RED": 1, "GREEN": 2, "BLUE": 3}) From 3876c3fcb44072de4508a4ee353866c618cd90f6 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Sat, 16 Feb 2019 17:36:23 +0100 Subject: [PATCH 14/28] Update changelog --- CHANGES.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index b8de66eec..e1b3e8d90 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,11 @@ +0.9.0 +===== + +- Track the provenance of dynamic classes and enums so as to preseve the + usual `isinstance` relationship between pickled objects and their + original class defintions. + ([issue #246](https://github.com/cloudpipe/cloudpickle/pull/246)) + 0.8.0 ===== @@ -10,7 +18,6 @@ prior to changes done in 0.5.4 for functions defined in the `__main__` module, and 0.6.0/1 for other dynamic functions. - 0.7.0 ===== From 4dba4d9f9b79b24466ad5babddda9899579a0110 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Sun, 17 Feb 2019 18:11:11 +0100 Subject: [PATCH 15/28] Make enum dependency optional --- cloudpickle/cloudpickle.py | 9 +++++++-- tests/cloudpickle_test.py | 8 +++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index fcab57abc..aa16ba14f 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -57,7 +57,12 @@ import weakref import uuid import threading -from enum import Enum + + +try: + from enum import Enum +except ImportError: + Enum = None # cloudpickle is meant for inter process communication: we expect all # communicating processes to run the same Python version hence we favor @@ -522,7 +527,7 @@ def save_dynamic_class(self, obj): functions, or that otherwise can't be serialized as attribute lookups from global modules. """ - if issubclass(obj, Enum): + if Enum is not None and issubclass(obj, Enum): return self._save_dynamic_enum(obj) clsdict = dict(obj.__dict__) # copy dict proxy to a dict diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 6557813be..84a30f88d 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -21,7 +21,6 @@ import unittest import weakref import os -from enum import Enum, IntEnum import pytest @@ -1468,8 +1467,9 @@ def test_dataclass(self): assert data.x == pickle_depickle(data, protocol=self.protocol).x == 42 def test_locally_defined_enum(self): + enum = pytest.importorskip("enum") - class Color(Enum): + class Color(enum.Enum): """3-element color space""" RED = 1 GREEN = 2 @@ -1490,8 +1490,9 @@ class Color(Enum): assert green3 is Color.GREEN def test_locally_defined_intenum(self): + enum = pytest.importorskip("enum") # Try again with a IntEnum defined with the functional API - DynamicColor = IntEnum("Color", {"RED": 1, "GREEN": 2, "BLUE": 3}) + DynamicColor = enum.IntEnum("Color", {"RED": 1, "GREEN": 2, "BLUE": 3}) green1, green2, ClonedDynamicColor = pickle_depickle( [DynamicColor.GREEN, DynamicColor.GREEN, DynamicColor], @@ -1503,6 +1504,7 @@ def test_locally_defined_intenum(self): assert ClonedDynamicColor is DynamicColor def test_interactively_defined_enum(self): + pytest.importorskip("enum") code = """if __name__ == "__main__": from enum import Enum from testutils import subprocess_worker From 90b345f99948f780d02753fb1b751dabaa0199b4 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Sun, 17 Feb 2019 22:18:23 +0100 Subject: [PATCH 16/28] Small improvements + better comments --- cloudpickle/cloudpickle.py | 24 ++++++++++++++++-------- tests/cloudpickle_test.py | 34 ++++++++++++++++++---------------- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index aa16ba14f..cd74878c7 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -69,9 +69,9 @@ # communication speed over compatibility: DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL -# Track the provenance of reconstructed dynamic classes to make it possible -# to reconcyle instances with a singleton class definition when appropriate -# and preserve the usual "isinstance" semantics of Python objects. +# Track the provenance of reconstructed dynamic classes to make it possible to +# recontruct instances from the matching singleton class definition when +# appropriate and preserve the usual "isinstance" semantics of Python objects. _DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() _DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary() _DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock() @@ -586,10 +586,10 @@ def save_dynamic_class(self, obj): write(pickle.MARK) # Create and memoize an skeleton class with obj's name and bases. - extra = {"class_tracker_id": _ensure_tracking(obj)} tp = type(obj) self.save_reduce(_make_skeleton_class, - (tp, obj.__name__, obj.__bases__, type_kwargs, extra), + (tp, obj.__name__, obj.__bases__, type_kwargs, + _ensure_tracking(obj), {}), obj=obj) # Now save the rest of obj's __dict__. Any references to obj @@ -1172,8 +1172,17 @@ def _make_skel_func(code, cell_count, base_globals=None): def _make_skeleton_class(type_constructor, name, bases, type_kwargs, - extra): - class_tracker_id = extra.get("class_tracker_id") + class_tracker_id, extra): + """Build dynamic class with an empty __dict__ to be filled once memoized + + If class_tracker_id is not None, try to lookup an existing class definition + matching that id. If none is found, track a newly reconstructed class + definition under that id so that other instances stemming from the same + class id will also reuse this class definition. + + The extra variable is meant to be a dict use for forward compatibility + shall the need arise. + """ skeleton_class = type_constructor(name, bases, type_kwargs) return _lookup_class_or_track(class_tracker_id, skeleton_class) @@ -1184,7 +1193,6 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict): See CloudPickler.save_dynamic_class for more info. """ registry = None - for attrname, attr in class_dict.items(): if attrname == "_abc_impl": registry = attr diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 84a30f88d..827ec7f2e 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1214,7 +1214,7 @@ def increment(self): assert returned_counter.count == 2, returned_counter.count # Check that the class definition of the returned instance was - # matched back to the original class defintion living in __main__ + # matched back to the original class definition living in __main__. assert isinstance(returned_counter, CustomCounter) @@ -1246,7 +1246,7 @@ def test_interactive_remote_dynamic_type_and_futures(self): with subprocess_worker(protocol={protocol}) as w: class A: - '''Original class defintion''' + '''Original class definition''' pass def store(x): @@ -1262,29 +1262,30 @@ def lookup(obj_id): id1 = w.run(store, A()) - # The stored object on the worker is matched to the class - # definition singleton thanks to provenance tracking: + # The stored object on the worker is matched to a singleton class + # definition thanks to provenance tracking: assert w.run(lambda obj_id: isinstance(lookup(obj_id), A), id1) # Retrieving the object from the worker yields a local copy that - # is matched back the local class definition this instance stems - # from. + # is matched back the local class definition this instance + # originally stems from. assert isinstance(w.run(lookup, id1), A) - # Changing the local class definitions should be reflected in - # all subsequent calls. In particular the old instances do not - # map back to the new class definition, neither locally, nor on the - # worker: + # Changing the local class definition should be taken into account + # in all subsequent calls. In particular the old instances on the + # worker do not map back to the new class definition, neither on + # the worker itself, nor locally on the main program when the old + # instance is retrieved: class A: - '''Modified class defintion''' + '''Updated class definition''' pass assert not w.run(lambda obj_id: isinstance(lookup(obj_id), A), id1) retrieved1 = w.run(lookup, id1) assert not isinstance(retrieved1, A) assert retrieved1.__class__ is not A - assert retrieved1.__class__.__doc__ == "Original class defintion" + assert retrieved1.__class__.__doc__ == "Original class definition" # New instances on the other hand are proper instances of the new # class definition everywhere: @@ -1324,9 +1325,10 @@ def process_data(): import gc w.run(gc.collect) - # By this time the worker process has processed worth of 100MB of - # data passed in the closures its memory size should now have - # grown by more than a few MB. + # By this time the worker process has processed 100MB worth of data + # passed in the closures. The worker memory size should not have + # grown by more than a few MB as closures are garbage collected at + # the end of each remote function call. growth = w.memsize() - reference_size assert growth < 1e7, growth @@ -1525,7 +1527,7 @@ def check_positive(x): assert result is Color.GREEN - # Check that changing the defintion of the Enum class is taken + # Check that changing the definition of the Enum class is taken # into account on the worker for subsequent calls: class Color(Enum): From e525ceababc4ff0192a60de312c1c1c94513ae67 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Tue, 19 Feb 2019 16:11:22 +0100 Subject: [PATCH 17/28] Expand stored instance test + rename some tests --- tests/cloudpickle_test.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 827ec7f2e..2bc25afc5 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1194,7 +1194,7 @@ def is_in_main(name): """.format(protocol=self.protocol) assert_run_python_script(code) - def test_interactive_remote_dynamic_type_and_instances(self): + def test_interactive_dynamic_type_and_remote_instances(self): code = """if __name__ == "__main__": from testutils import subprocess_worker @@ -1233,7 +1233,7 @@ def echo(*args): """.format(protocol=self.protocol) assert_run_python_script(code) - def test_interactive_remote_dynamic_type_and_futures(self): + def test_interactive_dynamic_type_and_stored_remote_instances(self): """Simulate objects stored on workers to check isinstance semantics Such instances stored in the memory of running worker processes are @@ -1290,9 +1290,24 @@ class A: # New instances on the other hand are proper instances of the new # class definition everywhere: - id2 = w.run(store, A()) + a = A() + id2 = w.run(store, a) assert w.run(lambda obj_id: isinstance(lookup(obj_id), A), id2) assert isinstance(w.run(lookup, id2), A) + + # Monkeypatch the class defintion in the main process to a new + # class method: + A.echo = lambda cls, x: x + + # Calling this method on an instance will automatically update + # the remote class definition on the worker to propagate the monkey + # patch dynamically. + assert w.run(a.echo, 42) == 42 + + # The stored instance can therefore also access the new class + # method: + assert w.run(lambda obj_id: lookup(obj_id).echo(43), id2) == 43 + """.format(protocol=self.protocol) assert_run_python_script(code) From c5be1d436bf9c0c639a8c1d7b01cdc36004b8746 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 6 Mar 2019 15:34:10 +0100 Subject: [PATCH 18/28] Handle enums that inherit from a class and rehydrate methods --- cloudpickle/cloudpickle.py | 83 ++++++++++++++++++++++++-------------- tests/cloudpickle_test.py | 17 +++++--- 2 files changed, 64 insertions(+), 36 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index cd74878c7..88639a078 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -492,44 +492,47 @@ def func(): # then discards the reference to it self.write(pickle.POP) - def _save_dynamic_enum(self, obj): + def _save_dynamic_enum(self, obj, clsdict): """Special handling for dynamic Enum subclasses - Use the Enum functional API (inherited from EnumMeta.__call__) as the + Use a dedicated Enum constructor (inspired by EnumMeta.__call__) as the EnumMeta metaclass has complex initialization that makes the Enum subclasses hold references to their own instances. """ - class_tracker_id = _ensure_tracking(obj) - - # XXX: shall we pass type and start kwargs? If so how to retrieve the - # correct info from obj. - elements = dict((e.name, e.value) for e in obj) + members = dict((e.name, e.value) for e in obj) if obj.__doc__ is not obj.__base__.__doc__: doc = obj.__doc__ else: doc = None - extra = {} - if hasattr(obj, "__qualname__"): - extra["qualname"] = obj.__qualname__ + # Python 2.7 with enum34 can have no qualname: + qualname = getattr(obj, "__qualname__", None) - self.save_reduce(_make_dynamic_enum, - (obj.__base__, obj.__name__, elements, doc, - obj.__module__, class_tracker_id, extra), + # future compat: set to None for now but allow to be a dict with extra + # datastructures if needed in the future + extra = None + self.save_reduce(_make_skeleton_enum, + (obj.__bases__, obj.__name__, qualname, members, doc, + obj.__module__, _ensure_tracking(obj), extra), obj=obj) + # Cleanup the clsdict that will be passed to _rehydrate_skeleton_class: + # Those attributes are already handled by the metaclass. + for attrname in ["_generate_next_value_", "_member_names_", + "_member_map_", "_member_type_", + "_value2member_map_"]: + clsdict.pop(attrname) + for member in members: + clsdict.pop(member) + def save_dynamic_class(self, obj): - """ - Save a class that can't be stored as module global. + """Save a class that can't be stored as module global. This method is used to serialize classes that are defined inside functions, or that otherwise can't be serialized as attribute lookups from global modules. """ - if Enum is not None and issubclass(obj, Enum): - return self._save_dynamic_enum(obj) - clsdict = dict(obj.__dict__) # copy dict proxy to a dict clsdict.pop('__weakref__', None) @@ -558,8 +561,8 @@ def save_dynamic_class(self, obj): for k in obj.__slots__: clsdict.pop(k, None) - # If type overrides __dict__ as a property, include it in the type kwargs. - # In Python 2, we can't set this attribute after construction. + # If type overrides __dict__ as a property, include it in the type + # kwargs. In Python 2, we can't set this attribute after construction. __dict__ = clsdict.pop('__dict__', None) if isinstance(__dict__, property): type_kwargs['__dict__'] = __dict__ @@ -586,11 +589,16 @@ def save_dynamic_class(self, obj): write(pickle.MARK) # Create and memoize an skeleton class with obj's name and bases. - tp = type(obj) - self.save_reduce(_make_skeleton_class, - (tp, obj.__name__, obj.__bases__, type_kwargs, - _ensure_tracking(obj), {}), - obj=obj) + if Enum is not None and issubclass(obj, Enum): + # Special handling of Enum subclasses + self._save_dynamic_enum(obj, clsdict) + else: + # "Regular" class definition: + tp = type(obj) + self.save_reduce(_make_skeleton_class, + (tp, obj.__name__, obj.__bases__, type_kwargs, + _ensure_tracking(obj), {}), + obj=obj) # Now save the rest of obj's __dict__. Any references to obj # encountered while saving will point to the skeleton class. @@ -1205,12 +1213,27 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict): return skeleton_class -def _make_dynamic_enum(base, name, elements, doc, module, class_tracker_id, - extra): - class_def = base(name, elements, module=module, **extra) +def _make_skeleton_enum(bases, name, qualname, members, doc, module, + class_tracker_id, extra): + # enum always inherit from their base Enum class at the last subclass + # position: + enum_base = bases[-1] + metacls = enum_base.__class__ + _, first_enum = enum_base._get_mixins_(bases) + classdict = metacls.__prepare__(name, bases) + + for member_name, member_value in members.items(): + classdict[member_name] = member_value + enum_class = metacls.__new__(metacls, name, bases, classdict) + enum_class.__module__ = module + + # Python 2.7 compat + if qualname is not None: + enum_class.__qualname__ = qualname + if doc is not None: - class_def.__doc__ = doc - return _lookup_class_or_track(class_tracker_id, class_def) + enum_class.__doc__ = doc + return _lookup_class_or_track(class_tracker_id, enum_class) def _is_dynamic(module): diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 2bc25afc5..da17aabb5 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1486,20 +1486,25 @@ def test_dataclass(self): def test_locally_defined_enum(self): enum = pytest.importorskip("enum") - class Color(enum.Enum): + class Color(str, enum.Enum): """3-element color space""" - RED = 1 - GREEN = 2 - BLUE = 3 + RED = "1" + GREEN = "2" + BLUE = "3" + + def is_green(self): + return self is Color.GREEN green1, green2, ClonedColor = pickle_depickle( [Color.GREEN, Color.GREEN, Color], protocol=self.protocol) assert green1 is green2 assert green1 is ClonedColor.GREEN assert green1 is not ClonedColor.BLUE + assert isinstance(green1, str) + assert green1.is_green() - # cloudpickle systematically tracks procenance of class definitions - # and ensure reconcylation in case of round trips: + # cloudpickle systematically tracks provenance of class definitions + # and ensure reconciliation in case of round trips: assert green1 is Color.GREEN assert ClonedColor is Color From 5bab490a3f8f88744b7012b7cf1b51c171fa65da Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 6 Mar 2019 15:37:23 +0100 Subject: [PATCH 19/28] More complex type hierarchy --- tests/cloudpickle_test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index da17aabb5..eb096ed5f 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1486,7 +1486,10 @@ def test_dataclass(self): def test_locally_defined_enum(self): enum = pytest.importorskip("enum") - class Color(str, enum.Enum): + class StringEnum(str, enum.Enum): + """Enum when all members are also (and must be) strings""" + + class Color(StringEnum): """3-element color space""" RED = "1" GREEN = "2" From 37260e4927624bed3be39dd5be35c989c0df81ad Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 6 Mar 2019 15:53:05 +0100 Subject: [PATCH 20/28] Cleanup --- cloudpickle/cloudpickle.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 88639a078..de094240e 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -509,12 +509,9 @@ def _save_dynamic_enum(self, obj, clsdict): # Python 2.7 with enum34 can have no qualname: qualname = getattr(obj, "__qualname__", None) - # future compat: set to None for now but allow to be a dict with extra - # datastructures if needed in the future - extra = None self.save_reduce(_make_skeleton_enum, (obj.__bases__, obj.__name__, qualname, members, doc, - obj.__module__, _ensure_tracking(obj), extra), + obj.__module__, _ensure_tracking(obj), None), obj=obj) # Cleanup the clsdict that will be passed to _rehydrate_skeleton_class: @@ -597,7 +594,7 @@ def save_dynamic_class(self, obj): tp = type(obj) self.save_reduce(_make_skeleton_class, (tp, obj.__name__, obj.__bases__, type_kwargs, - _ensure_tracking(obj), {}), + _ensure_tracking(obj), None), obj=obj) # Now save the rest of obj's __dict__. Any references to obj @@ -1188,8 +1185,8 @@ def _make_skeleton_class(type_constructor, name, bases, type_kwargs, definition under that id so that other instances stemming from the same class id will also reuse this class definition. - The extra variable is meant to be a dict use for forward compatibility - shall the need arise. + The "extra" variable is meant to be a dict (or None) that can be used for + forward compatibility shall the need arise. """ skeleton_class = type_constructor(name, bases, type_kwargs) return _lookup_class_or_track(class_tracker_id, skeleton_class) @@ -1215,7 +1212,20 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict): def _make_skeleton_enum(bases, name, qualname, members, doc, module, class_tracker_id, extra): - # enum always inherit from their base Enum class at the last subclass + """Build dynamic enum with an empty __dict__ to be filled once memoized + + The creation of the enum class is inspired by the code of + EnumMeta._create_. + + If class_tracker_id is not None, try to lookup an existing enum definition + matching that id. If none is found, track a newly reconstructed enum + definition under that id so that other instances stemming from the same + class id will also reuse this enum definition. + + The "extra" variable is meant to be a dict (or None) that can be used for + forward compatibility shall the need arise. + """ + # enums always inherit from their base Enum class at the last subclass # position: enum_base = bases[-1] metacls = enum_base.__class__ From 0235a8be9ce6d0fc9ecb1c440e22b73fafff1e56 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 6 Mar 2019 16:22:56 +0100 Subject: [PATCH 21/28] Compat with older Python versions --- cloudpickle/cloudpickle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index de094240e..dbd267745 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -519,7 +519,7 @@ def _save_dynamic_enum(self, obj, clsdict): for attrname in ["_generate_next_value_", "_member_names_", "_member_map_", "_member_type_", "_value2member_map_"]: - clsdict.pop(attrname) + clsdict.pop(attrname, None) for member in members: clsdict.pop(member) From ed8649d16259d8e3ae20573b7712648a81642a59 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 6 Mar 2019 17:08:13 +0100 Subject: [PATCH 22/28] Filter inherited dict members --- cloudpickle/cloudpickle.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index dbd267745..1ef9f67ee 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -533,6 +533,23 @@ def save_dynamic_class(self, obj): clsdict = dict(obj.__dict__) # copy dict proxy to a dict clsdict.pop('__weakref__', None) + # Filter inherited dict entries to make the pickle more lightweight + if len(obj.__bases__) == 1: + inherited_dict = obj.__bases__[0].__dict__ + else: + inherited_dict = {} + for base in reversed(obj.__bases__): + inherited_dict.update(base.__dict__) + to_remove = [] + for name, value in clsdict.items(): + try: + if value is inherited_dict[name]: + to_remove.append(name) + except KeyError: + pass + for name in to_remove: + clsdict.pop(name) + # For ABCMeta in python3.7+, remove _abc_impl as it is not picklable. # This is a fix which breaks the cache but this only makes the first # calls to issubclass slower. From 959b82aa2161c29d4833e2d57686e9a8918228f9 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 7 Mar 2019 16:37:57 +0100 Subject: [PATCH 23/28] Filter inherited methods on Python 2 --- cloudpickle/cloudpickle.py | 66 +++++++++++++++++++++++++------------- tests/cloudpickle_test.py | 24 ++++++++++++++ 2 files changed, 68 insertions(+), 22 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 1ef9f67ee..8ca816db8 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -84,12 +84,18 @@ from StringIO import StringIO string_types = (basestring,) # noqa PY3 = False + PY2 = True + PY2_WRAPPER_DESCRIPTOR_TYPE = type(object.__init__) + PY2_METHOD_WRAPPER_TYPE = type(object.__eq__) + PY2_CLASS_DICT_BLACKLIST = (PY2_METHOD_WRAPPER_TYPE, + PY2_WRAPPER_DESCRIPTOR_TYPE) else: types.ClassType = type from pickle import _Pickler as Pickler from io import BytesIO as StringIO string_types = (str,) PY3 = True + PY2 = False def _ensure_tracking(class_def): @@ -144,7 +150,7 @@ def inner(value): # NOTE: we are marking the cell variable as a free variable intentionally # so that we simulate an inner function instead of the outer function. This # is what gives us the ``nonlocal`` behavior in a Python 2 compatible way. - if not PY3: # pragma: no branch + if PY2: # pragma: no branch return types.CodeType( co.co_argcount, co.co_nlocals, @@ -252,7 +258,7 @@ def _walk_global_ops(code): global-referencing instructions in *code*. """ code = getattr(code, 'co_code', b'') - if not PY3: # pragma: no branch + if PY2: # pragma: no branch code = map(ord, code) n = len(code) @@ -282,6 +288,39 @@ def _walk_global_ops(code): yield op, instr.arg +def _extract_class_dict(cls): + """Retrieve a copy of the dict of a class without the inherited methods""" + clsdict = dict(cls.__dict__) # copy dict proxy to a dict + if len(cls.__bases__) == 1: + inherited_dict = cls.__bases__[0].__dict__ + else: + inherited_dict = {} + for base in reversed(cls.__bases__): + inherited_dict.update(base.__dict__) + to_remove = [] + for name, value in clsdict.items(): + try: + base_value = inherited_dict[name] + if value is base_value: + to_remove.append(name) + elif PY2: + # backward compat for Python 2 + if hasattr(value, "im_func"): + if value.im_func is getattr(base_value, "im_func", None): + to_remove.append(name) + elif isinstance(value, PY2_CLASS_DICT_BLACKLIST): + # On Python 2 we have no way to pickle those specific + # methods types nor to check that they are actually + # inherited. So we assume that they are always inherited + # from builtin types. + to_remove.append(name) + except KeyError: + pass + for name in to_remove: + clsdict.pop(name) + return clsdict + + class CloudPickler(Pickler): dispatch = Pickler.dispatch.copy() @@ -309,7 +348,7 @@ def save_memoryview(self, obj): dispatch[memoryview] = save_memoryview - if not PY3: # pragma: no branch + if PY2: # pragma: no branch def save_buffer(self, obj): self.save(str(obj)) @@ -530,26 +569,9 @@ def save_dynamic_class(self, obj): functions, or that otherwise can't be serialized as attribute lookups from global modules. """ - clsdict = dict(obj.__dict__) # copy dict proxy to a dict + clsdict = _extract_class_dict(obj) clsdict.pop('__weakref__', None) - # Filter inherited dict entries to make the pickle more lightweight - if len(obj.__bases__) == 1: - inherited_dict = obj.__bases__[0].__dict__ - else: - inherited_dict = {} - for base in reversed(obj.__bases__): - inherited_dict.update(base.__dict__) - to_remove = [] - for name, value in clsdict.items(): - try: - if value is inherited_dict[name]: - to_remove.append(name) - except KeyError: - pass - for name in to_remove: - clsdict.pop(name) - # For ABCMeta in python3.7+, remove _abc_impl as it is not picklable. # This is a fix which breaks the cache but this only makes the first # calls to issubclass slower. @@ -854,7 +876,7 @@ def save_inst(self, obj): save(stuff) write(pickle.BUILD) - if not PY3: # pragma: no branch + if PY2: # pragma: no branch dispatch[types.InstanceType] = save_inst def save_property(self, obj): diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index eb096ed5f..3e54498af 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -42,6 +42,7 @@ import cloudpickle from cloudpickle.cloudpickle import _is_dynamic from cloudpickle.cloudpickle import _make_empty_cell, cell_set +from cloudpickle.cloudpickle import _extract_class_dict from .testutils import subprocess_pickle_echo from .testutils import assert_run_python_script @@ -71,6 +72,29 @@ def _escape(raw_filepath): return raw_filepath.replace("\\", r"\\\\") +def test_extract_class_dict(): + class A(int): + def method(self): + return "a" + + class B: + B_CONSTANT = 42 + + def method(self): + return "b" + + class C(A, B): + C_CONSTANT = 43 + + def method_c(self): + return "c" + + clsdict = _extract_class_dict(C) + assert sorted(clsdict.keys()) == ["C_CONSTANT", "method_c"] + assert clsdict["C_CONSTANT"] == 43 + assert clsdict["method_c"](C()) == C().method_c() + + class CloudPickleTest(unittest.TestCase): protocol = cloudpickle.DEFAULT_PROTOCOL From 4057b59a8123628faa26680cfde279dae771d483 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 7 Mar 2019 17:05:37 +0100 Subject: [PATCH 24/28] Simplify handling of dynamic enums' __doc__ --- cloudpickle/cloudpickle.py | 11 ++--------- tests/cloudpickle_test.py | 5 ++++- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 8ca816db8..b989ab7db 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -540,16 +540,11 @@ def _save_dynamic_enum(self, obj, clsdict): """ members = dict((e.name, e.value) for e in obj) - if obj.__doc__ is not obj.__base__.__doc__: - doc = obj.__doc__ - else: - doc = None - # Python 2.7 with enum34 can have no qualname: qualname = getattr(obj, "__qualname__", None) self.save_reduce(_make_skeleton_enum, - (obj.__bases__, obj.__name__, qualname, members, doc, + (obj.__bases__, obj.__name__, qualname, members, obj.__module__, _ensure_tracking(obj), None), obj=obj) @@ -1249,7 +1244,7 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict): return skeleton_class -def _make_skeleton_enum(bases, name, qualname, members, doc, module, +def _make_skeleton_enum(bases, name, qualname, members, module, class_tracker_id, extra): """Build dynamic enum with an empty __dict__ to be filled once memoized @@ -1280,8 +1275,6 @@ class id will also reuse this enum definition. if qualname is not None: enum_class.__qualname__ = qualname - if doc is not None: - enum_class.__doc__ = doc return _lookup_class_or_track(class_tracker_id, enum_class) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 3e54498af..0fdff5ab6 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -74,10 +74,12 @@ def _escape(raw_filepath): def test_extract_class_dict(): class A(int): + """A docstring""" def method(self): return "a" class B: + """B docstring""" B_CONSTANT = 42 def method(self): @@ -90,8 +92,9 @@ def method_c(self): return "c" clsdict = _extract_class_dict(C) - assert sorted(clsdict.keys()) == ["C_CONSTANT", "method_c"] + assert sorted(clsdict.keys()) == ["C_CONSTANT", "__doc__", "method_c"] assert clsdict["C_CONSTANT"] == 43 + assert clsdict["__doc__"] is None assert clsdict["method_c"](C()) == C().method_c() From 7f2b464cfc72c75b21d9457d5f6d7d65b2ccfae2 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 7 Mar 2019 17:27:04 +0100 Subject: [PATCH 25/28] Phrasing --- cloudpickle/cloudpickle.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index b989ab7db..310083d05 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1259,8 +1259,8 @@ class id will also reuse this enum definition. The "extra" variable is meant to be a dict (or None) that can be used for forward compatibility shall the need arise. """ - # enums always inherit from their base Enum class at the last subclass - # position: + # enums always inherit from their base Enum class at the last position in + # the list of base classes: enum_base = bases[-1] metacls = enum_base.__class__ _, first_enum = enum_base._get_mixins_(bases) From 5c11202b8198e15f00cbbbea224db118a0e7f2ce Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 20 Mar 2019 11:18:34 +0100 Subject: [PATCH 26/28] [ci downstream] From b5e28af549d57fcf82a6765adf9c8fbff75f0dea Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 20 Mar 2019 12:40:56 +0100 Subject: [PATCH 27/28] Cleanup old, useless code --- cloudpickle/cloudpickle.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 310083d05..a2e6e9cc1 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1263,7 +1263,6 @@ class id will also reuse this enum definition. # the list of base classes: enum_base = bases[-1] metacls = enum_base.__class__ - _, first_enum = enum_base._get_mixins_(bases) classdict = metacls.__prepare__(name, bases) for member_name, member_value in members.items(): From 0cbd0713bde36d88b92aa6643528a19aa68049ab Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 6 May 2019 13:40:01 +0200 Subject: [PATCH 28/28] Plan provenance tracking for 1.1.0 release --- CHANGES.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 21fd9b9dc..e21ae720d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,4 @@ -1.0.0 +1.1.0 ===== - Track the provenance of dynamic classes and enums so as to preseve the @@ -6,6 +6,9 @@ original class defintions. ([issue #246](https://github.com/cloudpipe/cloudpickle/pull/246)) +1.0.0 +===== + - Fix a bug making functions with keyword-only arguments forget the default values of these arguments after being pickled. ([issue #264](https://github.com/cloudpipe/cloudpickle/pull/264))