Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enums #101

Closed
mrocklin opened this issue Jun 16, 2017 · 11 comments
Closed

Enums #101

mrocklin opened this issue Jun 16, 2017 · 11 comments

Comments

@mrocklin
Copy link
Contributor

This fails

import enum
class MyEnum(enum.Enum):
     SPAM = 'SPAM'

import cloudpickle
cloudpickle.dumps(MyEnum.SPAM)
---------------------------------------------------------------------------
RecursionError                            Traceback (most recent call last)
/home/mrocklin/Software/anaconda/lib/python3.6/site-packages/cloudpickle/cloudpickle.py in dump(self, obj)
    145         try:
--> 146             return Pickler.dump(self, obj)
    147         except RuntimeError as e:

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in dump(self, obj)
    408             self.framer.start_framing()
--> 409         self.save(obj)
    410         self.write(STOP)

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    520         # Save the reduce() output and finally memoize the object
--> 521         self.save_reduce(obj=obj, *rv)
    522 

/home/mrocklin/Software/anaconda/lib/python3.6/site-packages/cloudpickle/cloudpickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
    584         else:
--> 585             save(func)
    586             save(args)

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    489             if issc:
--> 490                 self.save_global(obj)
    491                 return

/home/mrocklin/Software/anaconda/lib/python3.6/site-packages/cloudpickle/cloudpickle.py in save_global(self, obj, name, pack)
    424 
--> 425             self.save_reduce(typ, (obj.__name__, obj.__bases__, d), obj=obj)
    426         else:

/home/mrocklin/Software/anaconda/lib/python3.6/site-packages/cloudpickle/cloudpickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
    585             save(func)
--> 586             save(args)
    587             write(pickle.REDUCE)

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    475         if f is not None:
--> 476             f(self, obj) # Call unbound method with explicit self
    477             return

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save_tuple(self, obj)
    735             for element in obj:
--> 736                 save(element)
    737             # Subtle.  Same as in the big comment below.

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    475         if f is not None:
--> 476             f(self, obj) # Call unbound method with explicit self
    477             return

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save_dict(self, obj)
    820         self.memoize(obj)
--> 821         self._batch_setitems(obj.items())
    822 

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in _batch_setitems(self, items)
    846                     save(k)
--> 847                     save(v)
    848                 write(SETITEMS)

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    520         # Save the reduce() output and finally memoize the object
--> 521         self.save_reduce(obj=obj, *rv)
    522 

/home/mrocklin/Software/anaconda/lib/python3.6/site-packages/cloudpickle/cloudpickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
    600         if dictitems is not None:
--> 601             self._batch_setitems(dictitems)
    602 

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in _batch_setitems(self, items)
    851                 save(k)
--> 852                 save(v)
    853                 write(SETITEM)

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    520         # Save the reduce() output and finally memoize the object
--> 521         self.save_reduce(obj=obj, *rv)
    522 

/home/mrocklin/Software/anaconda/lib/python3.6/site-packages/cloudpickle/cloudpickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
    584         else:
--> 585             save(func)
    586             save(args)

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    489             if issc:
--> 490                 self.save_global(obj)
    491                 return

/home/mrocklin/Software/anaconda/lib/python3.6/site-packages/cloudpickle/cloudpickle.py in save_global(self, obj, name, pack)
    424 
--> 425             self.save_reduce(typ, (obj.__name__, obj.__bases__, d), obj=obj)
    426         else:

/home/mrocklin/Software/anaconda/lib/python3.6/site-packages/cloudpickle/cloudpickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
    585             save(func)
--> 586             save(args)
    587             write(pickle.REDUCE)

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    475         if f is not None:
--> 476             f(self, obj) # Call unbound method with explicit self
    477             return

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save_tuple(self, obj)
    735             for element in obj:
--> 736                 save(element)
    737             # Subtle.  Same as in the big comment below.

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    475         if f is not None:
--> 476             f(self, obj) # Call unbound method with explicit self
    477             return

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save_dict(self, obj)
    820         self.memoize(obj)
--> 821         self._batch_setitems(obj.items())
    822 

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in _batch_setitems(self, items)
    846                     save(k)
--> 847                     save(v)
    848                 write(SETITEMS)

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    475         if f is not None:
--> 476             f(self, obj) # Call unbound method with explicit self
    477             return

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save_dict(self, obj)
    820         self.memoize(obj)
--> 821         self._batch_setitems(obj.items())
    822 

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in _batch_setitems(self, items)
    851                 save(k)
--> 852                 save(v)
    853                 write(SETITEM)

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    520         # Save the reduce() output and finally memoize the object
--> 521         self.save_reduce(obj=obj, *rv)
    522 

/home/mrocklin/Software/anaconda/lib/python3.6/site-packages/cloudpickle/cloudpickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
    584         else:
--> 585             save(func)
    586             save(args)

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    489             if issc:
--> 490                 self.save_global(obj)
    491                 return

/home/mrocklin/Software/anaconda/lib/python3.6/site-packages/cloudpickle/cloudpickle.py in save_global(self, obj, name, pack)
    424 
--> 425             self.save_reduce(typ, (obj.__name__, obj.__bases__, d), obj=obj)
    426         else:

/home/mrocklin/Software/anaconda/lib/python3.6/site-packages/cloudpickle/cloudpickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
    585             save(func)
--> 586             save(args)
    587             write(pickle.REDUCE)

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    475         if f is not None:
--> 476             f(self, obj) # Call unbound method with explicit self
    477             return

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save_tuple(self, obj)
    735             for element in obj:
--> 736                 save(element)
    737             # Subtle.  Same as in the big comment below.

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    475         if f is not None:
--> 476             f(self, obj) # Call unbound method with explicit self
    477             return

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save_dict(self, obj)
    820         self.memoize(obj)
--> 821         self._batch_setitems(obj.items())
    822 

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in _batch_setitems(self, items)
    846                     save(k)
--> 847                     save(v)
    848                 write(SETITEMS)

... last 10 frames repeated, from the frame below ...

/home/mrocklin/Software/anaconda/lib/python3.6/pickle.py in save(self, obj, save_persistent_id)
    520         # Save the reduce() output and finally memoize the object
--> 521         self.save_reduce(obj=obj, *rv)
    522 

RecursionError: maximum recursion depth exceeded

During handling of the above exception, another exception occurred:

PicklingError                             Traceback (most recent call last)
<ipython-input-3-d64a2267ce31> in <module>()
----> 1 cloudpickle.dumps(MyEnum.SPAM)

/home/mrocklin/Software/anaconda/lib/python3.6/site-packages/cloudpickle/cloudpickle.py in dumps(obj, protocol)
    704 
    705     cp = CloudPickler(file,protocol)
--> 706     cp.dump(obj)
    707 
    708     return file.getvalue()

/home/mrocklin/Software/anaconda/lib/python3.6/site-packages/cloudpickle/cloudpickle.py in dump(self, obj)
    148             if 'recursion' in e.args[0]:
    149                 msg = """Could not pickle object as excessively deep recursion required."""
--> 150                 raise pickle.PicklingError(msg)
    151 
    152     def save_memoryview(self, obj):

PicklingError: Could not pickle object as excessively deep recursion required.

Originally reported here: dask/distributed#1178 by @AndrewPashkin

@llllllllll
Copy link
Contributor

I believe the root of this issue is that the class dict holds instances of the type it.
A more simple reproduction case is:

In [1]: class C:
   ...:     pass
   ...: 

In [2]: C.c = C()

In [3]: cp.dumps(C)

@AndreiPashkin
Copy link

AndreiPashkin commented Jun 17, 2017

That's correct. Standard pickle can pickle that cases:

In [64]: class C():
    ...:     pass
    ...: 

In [65]: C.c = C()

In [66]: import pickle

In [67]: pickle.dumps(C)
Out[67]: b'\x80\x03c__main__\nC\nq\x00.

In [77]: assert hasattr(pickle.loads(pickle.dumps(C)), 'c')

@ssanderson
Copy link

ssanderson commented Jul 2, 2017

@mrocklin @AndrewPashkin as of #102 trying to serialize an enum no longer triggers an infinite recursion, and it works for enums defined as non-__main__ globals (that may have already been the case before). For dynamic enums, we now crash with an error in the Enum metaclass while deserializing. Assuming we want cloudpickle to support dynamic Enums, the right fix is probably to just add special case dispatches for serializing Enum and EnumMeta, since they're standard library types.

One worry I'd have about supporting dynamic enums is that it's idiomatic to use is to compare Enum values, which could cause unexpected results if users compare values produced by separate calls to loads. Do you have a sense of whether that would be significant footgun in practical usage of cloudpickle?

@mrocklin
Copy link
Contributor Author

mrocklin commented Jul 2, 2017

I personally have no practical experience with Enum types. I was just pushing a Dask error upstream here.

@AndreiPashkin
Copy link

AndreiPashkin commented Jul 2, 2017

@ssanderson

it's idiomatic to use is to compare Enum values

Indeed:
https://docs.python.org/3/library/enum.html#comparisons

Do you have a sense of whether that would be significant footgun in practical usage of cloudpickle?

Would is don't work only in case of dynamic Enums?

@ssanderson
Copy link

@AndrewPashkin is would work sometimes. a is b means "a occupies the same memory as b", which means that is comparisons wouldn't work between enum members that were unpickled by different calls to load. For example, if you have a pickled object containing values from an Enum and you unpickle that object twice, you'll get two different copies of the Enum, which means is comparisons between the two copies of the Enum won't work as expected.

@chebee7i
Copy link

Is this expected soon?

@jakirkham
Copy link
Member

jakirkham commented Apr 2, 2018

Have retested the OP code with cloudpickle version 0.5.2 on Python 3 and it runs correctly without exception.

That said, it appears the round trip fails. Perhaps related to issue ( #117 ).

In [1]: import enum
   ...: class MyEnum(enum.Enum):
   ...:      SPAM = 'SPAM'
   ...: 
   ...: import cloudpickle
   ...: cloudpickle.loads(cloudpickle.dumps(MyEnum.SPAM))
   ...: 
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-1-3cc75bf49a58> in <module>()
      4 
      5 import cloudpickle
----> 6 cloudpickle.loads(cloudpickle.dumps(MyEnum.SPAM))

/zopt/conda2/envs/nanshenv3/lib/python3.6/enum.py in __new__(metacls, cls, bases, classdict)
    133         # save enum items into separate mapping so they don't get baked into
    134         # the new class
--> 135         enum_members = {k: classdict[k] for k in classdict._member_names}
    136         for name in classdict._member_names:
    137             del classdict[name]

AttributeError: 'dict' object has no attribute '_member_names'

@ogrisel
Copy link
Contributor

ogrisel commented Feb 14, 2019

For reference, I started a new similar PR in #246 but as discussed in the comments, I don't think the current cloudpickle behavior is fine. We would need an explicit scoping mechanism to make it possible to load 2 pickle strings that hold instances of the same original dynamically class / types to share the reconstructed type definitions while also making it possible to change redefine classes with the same name in the original __main__ module and have those code changes reflected in subsequent dumps / loads on worker nodes.

@pierreglaser
Copy link
Member

Now that #246 is merged, we can close this.

@jakirkham
Copy link
Member

Confirmed as fixed.

In [1]: import enum 
   ...: class MyEnum(enum.Enum): 
   ...:      SPAM = 'SPAM' 
   ...:  
   ...: import cloudpickle 
   ...: cloudpickle.dumps(MyEnum.SPAM)                                                                                  
Out[1]: b'\x80\x04\x95\xe9\x00\x00\x00\x00\x00\x00\x00\x8c\x17cloudpickle.cloudpickle\x94\x8c\x19_rehydrate_skeleton_class\x94\x93\x94(h\x00\x8c\x13_make_skeleton_enum\x94\x93\x94(\x8c\x04enum\x94\x8c\x04Enum\x94\x93\x94\x85\x94\x8c\x06MyEnum\x94h\t}\x94\x8c\x04SPAM\x94h\x0bs\x8c\x08__main__\x94\x8c a3c0e0e72031415e9f99a4c26e525d65\x94Nt\x94R\x94}\x94(\x8c\n__module__\x94h\x0c\x8c\x07__new__\x94h\x05\x8c\x0cEnum.__new__\x94\x93\x94utRh\x0b\x85\x94R\x94.'

Thanks @pierreglaser!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

8 participants