From 86757939a3bc7be55f721f269c45f7708166649a Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Sat, 5 Aug 2017 14:55:32 -0400 Subject: [PATCH 1/6] BUG: Fix bug pickling namedtuple. Fixes a crash when trying to set `__dict__` onto a type when `__dict__` is a property. --- cloudpickle/cloudpickle.py | 26 ++++++++++++-------------- tests/cloudpickle_test.py | 11 ++++++++++- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index d168fcca2..4d5b668e1 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -408,15 +408,18 @@ def save_dynamic_class(self, obj): from global modules. """ clsdict = dict(obj.__dict__) # copy dict proxy to a dict - if not isinstance(clsdict.get('__dict__', None), property): - # don't extract dict that are properties - clsdict.pop('__dict__', None) - clsdict.pop('__weakref__', None) + clsdict.pop('__weakref__', None) - # hack as __new__ is stored differently in the __dict__ - new_override = clsdict.get('__new__', None) - if new_override: - clsdict['__new__'] = obj.__new__ + # On PyPy, __doc__ is a readonly attribute, so we need to include it in + # the initial skeleton class. This is safe because we know that the + # doc can't participate in a cycle with the original class. + type_kwargs = {'__doc__': clsdict.pop('__doc__', None)} + + # If type overrides __dict__ as a property, include it in the type kwargs. + # In Python 2, we can't set this attribute after construction. + __dict__ = clsdict.pop('__dict__', None) + if isinstance(__dict__, property): + type_kwargs['__dict__'] = __dict__ save = self.save write = self.write @@ -439,17 +442,12 @@ def save_dynamic_class(self, obj): # Mark the start of the args for the rehydration function. write(pickle.MARK) - # On PyPy, __doc__ is a readonly attribute, so we need to include it in - # the initial skeleton class. This is safe because we know that the - # doc can't participate in a cycle with the original class. - doc_dict = {'__doc__': clsdict.pop('__doc__', None)} - # Create and memoize an empty class with obj's name and bases. save(type(obj)) save(( obj.__name__, obj.__bases__, - doc_dict, + type_kwargs, )) write(pickle.REDUCE) self.memoize(obj) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index db3ce57dc..c4f7150d5 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1,7 +1,7 @@ from __future__ import division import abc - +import collections import base64 import functools import imp @@ -701,5 +701,14 @@ def test_function_module_name(self): func = lambda x: x self.assertEqual(pickle_depickle(func).__module__, func.__module__) + def test_namedtuple(self): + MyTuple = collections.namedtuple('MyTuple', ['a', 'b', 'c']) + + t = MyTuple(1, 2, 3) + depickled_t = pickle_depickle(t) + + self.assertEqual((depickled_t.a, depickled_t.b, depickled_t.c), (1, 2, 3)) + self.assertEqual(vars(t), vars(depickled_t)) + if __name__ == '__main__': unittest.main() From b3f2af93302b8e4e37f9473c8f1d26dab31a4b04 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Wed, 30 Aug 2017 19:01:22 -0400 Subject: [PATCH 2/6] MAINT: Fix namedtuple tests. --- tests/cloudpickle_test.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index c4f7150d5..ea8b7ba6b 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -702,13 +702,18 @@ def test_function_module_name(self): self.assertEqual(pickle_depickle(func).__module__, func.__module__) def test_namedtuple(self): - MyTuple = collections.namedtuple('MyTuple', ['a', 'b', 'c']) + MyTuple = collections.namedtuple('MyTuple', ['a', 'b', 'c']) t = MyTuple(1, 2, 3) - depickled_t = pickle_depickle(t) + + depickled_t, depickled_MyTuple = pickle_depickle([t, MyTuple]) + self.assertIsInstance(depickled_t, depickled_MyTuple) self.assertEqual((depickled_t.a, depickled_t.b, depickled_t.c), (1, 2, 3)) - self.assertEqual(vars(t), vars(depickled_t)) + self.assertEqual((depickled_t[0], depickled_t[1], depickled_t[2]), (1, 2, 3)) + + self.assertEqual(depickled_MyTuple.__name__, 'MyTuple') + self.assertTrue(issubclass(depickled_MyTuple, tuple)) if __name__ == '__main__': unittest.main() From 28e9e7ee61ca389a97e7525653a8f06649bd58f4 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Wed, 30 Aug 2017 19:52:34 -0400 Subject: [PATCH 3/6] MAINT: Handle builtin type __new__ attrs. --- cloudpickle/cloudpickle.py | 28 ++++++++++++++++++++++++++++ tests/cloudpickle_test.py | 8 ++++++++ 2 files changed, 36 insertions(+) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 4d5b668e1..52bddb4c0 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -180,6 +180,32 @@ def _builtin_type(name): return getattr(types, name) +def _make__new__factory(type_): + def _factory(): + return type_.__new__ + return _factory + + +# NOTE: These need to be module globals so that they're pickleable as globals. +_get_dict_new = _make__new__factory(dict) +_get_frozenset_new = _make__new__factory(frozenset) +_get_list_new = _make__new__factory(list) +_get_set_new = _make__new__factory(set) +_get_tuple_new = _make__new__factory(tuple) +_get_object_new = _make__new__factory(object) + +# Pre-defined set of builtin_function_or_method instances that can be +# serialized. +_BUILTIN_TYPE_ATTRS = { + dict.__new__: _get_dict_new, + frozenset.__new__: _get_frozenset_new, + set.__new__: _get_set_new, + list.__new__: _get_list_new, + tuple.__new__: _get_tuple_new, + object.__new__: _get_object_new, +} + + if sys.version_info < (3, 4): def _walk_global_ops(code): """ @@ -579,6 +605,8 @@ def extract_func_data(self, func): def save_builtin_function(self, obj): if obj.__module__ == "__builtin__": return self.save_global(obj) + elif obj in _BUILTIN_TYPE_ATTRS: + return self.save_reduce(_BUILTIN_TYPE_ATTRS[obj], (), obj=obj) return self.save_function(obj) dispatch[types.BuiltinFunctionType] = save_builtin_function diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index ea8b7ba6b..4ef840b5f 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -715,5 +715,13 @@ def test_namedtuple(self): self.assertEqual(depickled_MyTuple.__name__, 'MyTuple') self.assertTrue(issubclass(depickled_MyTuple, tuple)) + def test_builtin_type__new__(self): + # Functions occasionally take the __new__ of these types as default + # parameters for factories. For example, on Python 3.3, + # `tuple.__new__` is a default value for some methods of namedtuple. + for t in list, tuple, set, frozenset, dict, object: + self.assertIs(pickle_depickle(t.__new__), t.__new__) + + if __name__ == '__main__': unittest.main() From a953a5993d9d34f72367a4ecffc7d7882a0a5f3a Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Wed, 30 Aug 2017 20:31:46 -0400 Subject: [PATCH 4/6] MAINT: assertIs doesn't exist in 2.6. --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 4ef840b5f..e0ade143a 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -720,7 +720,7 @@ def test_builtin_type__new__(self): # parameters for factories. For example, on Python 3.3, # `tuple.__new__` is a default value for some methods of namedtuple. for t in list, tuple, set, frozenset, dict, object: - self.assertIs(pickle_depickle(t.__new__), t.__new__) + self.assertTrue(pickle_depickle(t.__new__) is t.__new__) if __name__ == '__main__': From 70e3a463d37c714d33c0819c470e694f4bae9a24 Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Sun, 22 Oct 2017 17:03:35 -0400 Subject: [PATCH 5/6] BUG: Hit the builtin type cache for any function. On PyPy, built-in type constructors aren't necessarily builtin_function_or_method instances, so we need to check for them in `save_function` rather than just in `save_builtin_function`. --- cloudpickle/cloudpickle.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 52bddb4c0..8596cf8ec 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -196,7 +196,7 @@ def _factory(): # Pre-defined set of builtin_function_or_method instances that can be # serialized. -_BUILTIN_TYPE_ATTRS = { +_BUILTIN_TYPE_CONSTRUCTORS = { dict.__new__: _get_dict_new, frozenset.__new__: _get_frozenset_new, set.__new__: _get_set_new, @@ -332,6 +332,18 @@ def save_function(self, obj, name=None): Determines what kind of function obj is (e.g. lambda, defined at interactive prompt, etc) and handles the pickling appropriately. """ + if obj in _BUILTIN_TYPE_CONSTRUCTORS: + # We keep a special-cased cache of built-in type constructors at + # global scope, because these functions are structured very + # differently in different python versions and implementations (for + # example, they're instances of types.BuiltinFunctionType in + # CPython, but they're ordinary types.FunctionType instances in + # PyPy). + # + # If the function we've received is in that cache, we just + # serialize it as a lookup into the cache. + return self.save_reduce(_BUILTIN_TYPE_CONSTRUCTORS[obj], (), obj=obj) + write = self.write if name is None: @@ -358,7 +370,7 @@ def save_function(self, obj, name=None): return self.save_global(obj, name) # a builtin_function_or_method which comes in as an attribute of some - # object (e.g., object.__new__, itertools.chain.from_iterable) will end + # object (e.g., itertools.chain.from_iterable) will end # up with modname "__main__" and so end up here. But these functions # have no __code__ attribute in CPython, so the handling for # user-defined functions below will fail. @@ -605,8 +617,6 @@ def extract_func_data(self, func): def save_builtin_function(self, obj): if obj.__module__ == "__builtin__": return self.save_global(obj) - elif obj in _BUILTIN_TYPE_ATTRS: - return self.save_reduce(_BUILTIN_TYPE_ATTRS[obj], (), obj=obj) return self.save_function(obj) dispatch[types.BuiltinFunctionType] = save_builtin_function From 6fb05ca8e34eda0457a8d6ca25441ae137e5905b Mon Sep 17 00:00:00 2001 From: Scott Sanderson Date: Sun, 22 Oct 2017 17:12:13 -0400 Subject: [PATCH 6/6] TEST: assertIsInstance doesn't exist on 2.6. --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index e0ade143a..06d0460a1 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -707,7 +707,7 @@ def test_namedtuple(self): t = MyTuple(1, 2, 3) depickled_t, depickled_MyTuple = pickle_depickle([t, MyTuple]) - self.assertIsInstance(depickled_t, depickled_MyTuple) + self.assertTrue(isinstance(depickled_t, depickled_MyTuple)) self.assertEqual((depickled_t.a, depickled_t.b, depickled_t.c), (1, 2, 3)) self.assertEqual((depickled_t[0], depickled_t[1], depickled_t[2]), (1, 2, 3))