-
Notifications
You must be signed in to change notification settings - Fork 22
/
protocol_0.go
833 lines (697 loc) · 20.3 KB
/
protocol_0.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
package stalecucumber
import (
"errors"
"fmt"
"math/big"
"strconv"
"unicode/utf16"
) //import "unicode/utf8"
/**
Opcode: INT
Push an integer or bool.
The argument is a newline-terminated decimal literal string.
The intent may have been that this always fit in a short Python int,
but INT can be generated in pickles written on a 64-bit box that
require a Python long on a 32-bit box. The difference between this
and LONG then is that INT skips a trailing 'L', and produces a short
int whenever possible.
Another difference is due to that, when bool was introduced as a
distinct type in 2.3, builtin names True and False were also added to
2.2.2, mapping to ints 1 and 0. For compatibility in both directions,
True gets pickled as INT + "I01\n", and False as INT + "I00\n".
Leading zeroes are never produced for a genuine integer. The 2.3
(and later) unpicklers special-case these and return bool instead;
earlier unpicklers ignore the leading "0" and return the int.
**
Stack before: []
Stack after: [int_or_bool]
**/
func (pm *PickleMachine) opcode_INT() error {
str, err := pm.readString()
if err != nil {
return err
}
//check for boolean sentinels
if len(str) == 2 {
switch str {
case "01":
pm.push(true)
return nil
case "00":
pm.push(false)
return nil
default:
}
}
n, err := strconv.ParseInt(str, 10, 64)
if err != nil {
return err
}
pm.push(n)
return nil
}
/**
Opcode: LONG
Push a long integer.
The same as INT, except that the literal ends with 'L', and always
unpickles to a Python long. There doesn't seem a real purpose to the
trailing 'L'.
Note that LONG takes time quadratic in the number of digits when
unpickling (this is simply due to the nature of decimal->binary
conversion). Proto 2 added linear-time (in C; still quadratic-time
in Python) LONG1 and LONG4 opcodes.
**
Stack before: []
Stack after: [long]
**/
func (pm *PickleMachine) opcode_LONG() error {
i := new(big.Int)
str, err := pm.readString()
if err != nil {
return err
}
if len(str) == 0 {
return fmt.Errorf("String for LONG opcode cannot be zero length")
}
last := str[len(str)-1]
if last != 'L' {
return fmt.Errorf("String for LONG opcode must end with %q not %q", "L", last)
}
v := str[:len(str)-1]
_, err = fmt.Sscan(v, i)
if err != nil {
return err
}
pm.push(i)
return nil
}
/**
Opcode: STRING
Push a Python string object.
The argument is a repr-style string, with bracketing quote characters,
and perhaps embedded escapes. The argument extends until the next
newline character.
**
Stack before: []
Stack after: [str]
**/
var unquoteInputs = []byte{0x27, 0x22, 0x0}
func (pm *PickleMachine) opcode_STRING() error {
str, err := pm.readString()
if err != nil {
return err
}
//For whatever reason, the string is quoted. So the first and last character
//should always be the single quote, unless the string contains a single quote, then it is double quoted
if len(str) < 2 {
return fmt.Errorf("For STRING opcode, argument has invalid length %d", len(str))
}
if (str[0] != '\'' || str[len(str)-1] != '\'') && (str[0] != '"' || str[len(str)-1] != '"') {
return fmt.Errorf("For STRING opcode, argument has poorly formed value %q", str)
}
v := str[1 : len(str)-1]
f := make([]rune, 0, len(v))
for len(v) != 0 {
var vr rune
var replacement string
for _, i := range unquoteInputs {
vr, _, replacement, err = strconv.UnquoteChar(v, i)
if err == nil {
break
}
}
if err != nil {
c := v[0]
return fmt.Errorf("Read thus far %q. Failed to unquote character %c error:%v", string(f), c, err)
}
v = replacement
f = append(f, vr)
}
pm.push(string(f))
return nil
}
/**
Opcode: NONE
Push None on the stack.**
Stack before: []
Stack after: [None]
**/
func (pm *PickleMachine) opcode_NONE() error {
pm.push(PickleNone{})
return nil
}
/**
Opcode: UNICODE
Push a Python Unicode string object.
The argument is a raw-unicode-escape encoding of a Unicode string,
and so may contain embedded escape sequences. The argument extends
until the next newline character.
**
Stack before: []
Stack after: [unicode]
**/
func (pm *PickleMachine) opcode_UNICODE() error {
str, err := pm.readBytes()
if err != nil {
return err
}
f := make([]rune, 0, len(str))
var total int
var consumed int
total = len(str)
for total != consumed {
h := str[consumed]
//Python 'raw-unicode-escape' doesnt
//escape extended ascii
if h > 127 {
ea := utf16.Decode([]uint16{uint16(h)})
f = append(f, ea...)
consumed += 1
continue
}
//Multibyte unicode points are escaped
//so use "UnquoteChar" to handle those
var vr rune
for _, i := range unquoteInputs {
pre := string(str[consumed:])
var post string
vr, _, post, err = strconv.UnquoteChar(pre, i)
if err == nil {
consumed += len(pre) - len(post)
break
}
}
if err != nil {
c := str[0]
return fmt.Errorf("Read thus far %q. Failed to unquote character %c error:%v", string(f), c, err)
}
f = append(f, vr)
}
pm.push(string(f))
return nil
}
/**
Opcode: FLOAT
Newline-terminated decimal float literal.
The argument is repr(a_float), and in general requires 17 significant
digits for roundtrip conversion to be an identity (this is so for
IEEE-754 double precision values, which is what Python float maps to
on most boxes).
In general, FLOAT cannot be used to transport infinities, NaNs, or
minus zero across boxes (or even on a single box, if the platform C
library can't read the strings it produces for such things -- Windows
is like that), but may do less damage than BINFLOAT on boxes with
greater precision or dynamic range than IEEE-754 double.
**
Stack before: []
Stack after: [float]
**/
func (pm *PickleMachine) opcode_FLOAT() error {
str, err := pm.readString()
if err != nil {
return err
}
var v float64
_, err = fmt.Sscanf(str, "%f", &v)
if err != nil {
return err
}
pm.push(v)
return nil
}
/**
Opcode: APPEND
Append an object to a list.
Stack before: ... pylist anyobject
Stack after: ... pylist+[anyobject]
although pylist is really extended in-place.
**
Stack before: [list, any]
Stack after: [list]
**/
func (pm *PickleMachine) opcode_APPEND() error {
v, err := pm.pop()
if err != nil {
return err
}
listI, err := pm.pop()
if err != nil {
return err
}
list, ok := listI.([]interface{})
if !ok {
fmt.Errorf("Second item on top of stack must be of %T not %T", list, listI)
}
list = append(list, v)
pm.push(list)
return nil
}
/**
Opcode: LIST
Build a list out of the topmost stack slice, after markobject.
All the stack entries following the topmost markobject are placed into
a single Python list, which single list object replaces all of the
stack from the topmost markobject onward. For example,
Stack before: ... markobject 1 2 3 'abc'
Stack after: ... [1, 2, 3, 'abc']
**
Stack before: [mark, stackslice]
Stack after: [list]
**/
func (pm *PickleMachine) opcode_LIST() error {
markIndex, err := pm.findMark()
if err != nil {
return err
}
v := make([]interface{}, 0)
for i := markIndex + 1; i != len(pm.Stack); i++ {
v = append(v, pm.Stack[i])
}
//Pop the values off the stack
pm.popAfterIndex(markIndex)
pm.push(v)
return nil
}
/**
Opcode: TUPLE
Build a tuple out of the topmost stack slice, after markobject.
All the stack entries following the topmost markobject are placed into
a single Python tuple, which single tuple object replaces all of the
stack from the topmost markobject onward. For example,
Stack before: ... markobject 1 2 3 'abc'
Stack after: ... (1, 2, 3, 'abc')
**
Stack before: [mark, stackslice]
Stack after: [tuple]
**/
func (pm *PickleMachine) opcode_TUPLE() error {
return pm.opcode_LIST()
}
/**
Opcode: DICT
Build a dict out of the topmost stack slice, after markobject.
All the stack entries following the topmost markobject are placed into
a single Python dict, which single dict object replaces all of the
stack from the topmost markobject onward. The stack slice alternates
key, value, key, value, .... For example,
Stack before: ... markobject 1 2 3 'abc'
Stack after: ... {1: 2, 3: 'abc'}
**
Stack before: [mark, stackslice]
Stack after: [dict]
**/
func (pm *PickleMachine) opcode_DICT() (err error) {
defer func() {
if r := recover(); r != nil {
switch x := r.(type) {
case string:
err = errors.New(x)
case error:
err = x
default:
err = errors.New("Unknown panic")
}
}
}()
markIndex, err := pm.findMark()
if err != nil {
return err
}
v := make(map[interface{}]interface{})
var key interface{}
for i := markIndex + 1; i != len(pm.Stack); i++ {
if key == nil {
key = pm.Stack[i]
} else {
v[key] = pm.Stack[i]
key = nil
}
}
if key != nil {
return fmt.Errorf("For opcode DICT stack after mark contained an odd number of items, this is not valid")
}
pm.popAfterIndex(markIndex)
pm.push(v)
return nil
}
/**
Opcode: SETITEM
Add a key+value pair to an existing dict.
Stack before: ... pydict key value
Stack after: ... pydict
where pydict has been modified via pydict[key] = value.
**
Stack before: [dict, any, any]
Stack after: [dict]
**/
func (pm *PickleMachine) opcode_SETITEM() (err error) {
defer func() {
if r := recover(); r != nil {
switch x := r.(type) {
case string:
err = errors.New(x)
case error:
err = x
default:
err = errors.New("Unknown panic")
}
}
}()
v, err := pm.pop()
if err != nil {
return err
}
k, err := pm.pop()
if err != nil {
return err
}
dictI, err := pm.pop()
if err != nil {
return err
}
dict, ok := dictI.(map[interface{}]interface{})
if !ok {
return fmt.Errorf("For opcode SETITEM stack item 2 from top must be of type %T not %T", dict, dictI)
}
dict[k] = v
pm.push(dict)
return nil
}
/**
Opcode: POP
Discard the top stack item, shrinking the stack by one item.**
Stack before: [any]
Stack after: []
**/
func (pm *PickleMachine) opcode_POP() error {
_, err := pm.pop()
return err
}
/**
Opcode: DUP
Push the top stack item onto the stack again, duplicating it.**
Stack before: [any]
Stack after: [any, any]
**/
func (pm *PickleMachine) opcode_DUP() error {
return ErrOpcodeNotImplemented
}
/**
Opcode: MARK
Push markobject onto the stack.
markobject is a unique object, used by other opcodes to identify a
region of the stack containing a variable number of objects for them
to work on. See markobject.doc for more detail.
**
Stack before: []
Stack after: [mark]
**/
func (pm *PickleMachine) opcode_MARK() error {
pm.lastMark = len(pm.Stack)
pm.push(PickleMark{})
return nil
}
/**
Opcode: GET
Read an object from the memo and push it on the stack.
The index of the memo object to push is given by the newline-terminated
decimal string following. BINGET and LONG_BINGET are space-optimized
versions.
**
Stack before: []
Stack after: [any]
**/
func (pm *PickleMachine) opcode_GET() error {
str, err := pm.readString()
if err != nil {
return err
}
index, err := strconv.Atoi(str)
if err != nil {
return err
}
v, err := pm.readFromMemo(int64(index))
if err != nil {
return err
}
//TODO test if the object we are about to push is mutable
//if so it needs to be somehow deep copied first
pm.push(v)
return nil
}
/**
Opcode: PUT
Store the stack top into the memo. The stack is not popped.
The index of the memo location to write into is given by the newline-
terminated decimal string following. BINPUT and LONG_BINPUT are
space-optimized versions.
**
Stack before: []
Stack after: []
**/
func (pm *PickleMachine) opcode_PUT() error {
if len(pm.Stack) < 1 {
return ErrStackTooSmall
}
str, err := pm.readString()
if err != nil {
return err
}
idx, err := strconv.Atoi(str)
if err != nil {
return err
}
pm.storeMemo(int64(idx), pm.Stack[len(pm.Stack)-1])
return nil
}
/**
Opcode: GLOBAL
Push a global object (module.attr) on the stack.
Two newline-terminated strings follow the GLOBAL opcode. The first is
taken as a module name, and the second as a class name. The class
object module.class is pushed on the stack. More accurately, the
object returned by self.find_class(module, class) is pushed on the
stack, so unpickling subclasses can override this form of lookup.
**
Stack before: []
Stack after: [any]
**/
func (pm *PickleMachine) opcode_GLOBAL() error {
str1, err := pm.readString()
if err != nil {
return err
}
str2, err := pm.readString()
if err != nil {
return err
}
// Push a sentinel object representing the type
pm.push(globalSentinel{Package: str1, Name: str2})
return nil
}
type UnreducibleValueError struct{
Value interface{}
}
func (this UnreducibleValueError) Error() string{
return fmt.Sprintf("Cannot reduce (%T) %v", this.Value, this.Value)
}
/**
Opcode: REDUCE
Push an object built from a callable and an argument tuple.
The opcode is named to remind of the __reduce__() method.
Stack before: ... callable pytuple
Stack after: ... callable(*pytuple)
The callable and the argument tuple are the first two items returned
by a __reduce__ method. Applying the callable to the argtuple is
supposed to reproduce the original object, or at least get it started.
If the __reduce__ method returns a 3-tuple, the last component is an
argument to be passed to the object's __setstate__, and then the REDUCE
opcode is followed by code to create setstate's argument, and then a
BUILD opcode to apply __setstate__ to that argument.
If type(callable) is not ClassType, REDUCE complains unless the
callable has been registered with the copy_reg module's
safe_constructors dict, or the callable has a magic
'__safe_for_unpickling__' attribute with a true value. I'm not sure
why it does this, but I've sure seen this complaint often enough when
I didn't want to <wink>.
**
Stack before: [any, any]
Stack after: [any]
**/
func (pm *PickleMachine) opcode_REDUCE() error {
obj, err := pm.pop()
if err != nil {
return err
}
funcName, err := pm.pop()
if err != nil {
return err
}
// Sentinel should have been placed on the stack by the opcode_GLOBAL function
sentinel, ok := funcName.(globalSentinel)
if !ok {
return UnreducibleValueError{Value: funcName}
}
// Python docs say tuple on the stack, so should always be a slice here in Golang
args, ok := obj.([]interface{})
if !ok {
return UnreducibleValueError{Value: obj}
}
result, err := pm.resolver.Resolve(sentinel.Package, sentinel.Name, args)
if err != nil {
return err
}
pm.push(result)
return nil
}
/**
Opcode: BUILD
Finish building an object, via __setstate__ or dict update.
Stack before: ... anyobject argument
Stack after: ... anyobject
where anyobject may have been mutated, as follows:
If the object has a __setstate__ method,
anyobject.__setstate__(argument)
is called.
Else the argument must be a dict, the object must have a __dict__, and
the object is updated via
anyobject.__dict__.update(argument)
This may raise RuntimeError in restricted execution mode (which
disallows access to __dict__ directly); in that case, the object
is updated instead via
for k, v in argument.items():
anyobject[k] = v
**
Stack before: [any, any]
Stack after: [any]
**/
func (pm *PickleMachine) opcode_BUILD() error {
obj, err := pm.pop()
if err != nil {
return err
}
funcName, err := pm.pop()
if err != nil {
return err
}
// Sentinel should have been placed on the stack by the opcode_INST function
sentinel, ok := funcName.(instanceSentinel)
if !ok {
return UnbuildableValueError{Value: funcName}
}
args := append(sentinel.Args, obj)
if !ok {
return UnbuildableValueError{Value: funcName}
}
result, err := pm.resolver.Resolve(sentinel.Package, sentinel.Name, args)
if err != nil {
return err
}
pm.push(result)
return nil
}
type UnbuildableValueError struct{
Value interface{}
}
func (this UnbuildableValueError) Error() string{
return fmt.Sprintf("Cannot build (%T) %v", this.Value, this.Value)
}
/**
Opcode: INST
Build a class instance.
This is the protocol 0 version of protocol 1's OBJ opcode.
INST is followed by two newline-terminated strings, giving a
module and class name, just as for the GLOBAL opcode (and see
GLOBAL for more details about that). self.find_class(module, name)
is used to get a class object.
In addition, all the objects on the stack following the topmost
markobject are gathered into a tuple and popped (along with the
topmost markobject), just as for the TUPLE opcode.
Now it gets complicated. If all of these are true:
+ The argtuple is empty (markobject was at the top of the stack
at the start).
+ It's an old-style class object (the type of the class object is
ClassType).
+ The class object does not have a __getinitargs__ attribute.
then we want to create an old-style class instance without invoking
its __init__() method (pickle has waffled on this over the years; not
calling __init__() is current wisdom). In this case, an instance of
an old-style dummy class is created, and then we try to rebind its
__class__ attribute to the desired class object. If this succeeds,
the new instance object is pushed on the stack, and we're done. In
restricted execution mode it can fail (assignment to __class__ is
disallowed), and I'm not really sure what happens then -- it looks
like the code ends up calling the class object's __init__ anyway,
via falling into the next case.
Else (the argtuple is not empty, it's not an old-style class object,
or the class object does have a __getinitargs__ attribute), the code
first insists that the class object have a __safe_for_unpickling__
attribute. Unlike as for the __safe_for_unpickling__ check in REDUCE,
it doesn't matter whether this attribute has a true or false value, it
only matters whether it exists (XXX this is a bug; cPickle
requires the attribute to be true). If __safe_for_unpickling__
doesn't exist, UnpicklingError is raised.
Else (the class object does have a __safe_for_unpickling__ attr),
the class object obtained from INST's arguments is applied to the
argtuple obtained from the stack, and the resulting instance object
is pushed on the stack.
NOTE: checks for __safe_for_unpickling__ went away in Python 2.3.
**
Stack before: [mark, stackslice]
Stack after: [any]
**/
func (pm *PickleMachine) opcode_INST() error {
str1, err := pm.readString()
if err != nil {
return err
}
str2, err := pm.readString()
if err != nil {
return err
}
sentinel := instanceSentinel{Package: str1, Name: str2}
markIndex, err := pm.findMark()
if err != nil {
return err
}
args := make([]interface{}, 0, 1)
for i := markIndex + 1; i != len(pm.Stack); i++ {
args = append(args, pm.Stack[i])
}
//Pop the values off the stack
pm.popAfterIndex(markIndex)
sentinel.Args = args
// Push a sentinel object representing the type
pm.push(sentinel)
return nil
}
/**
Opcode: STOP
Stop the unpickling machine.
Every pickle ends with this opcode. The object at the top of the stack
is popped, and that's the result of unpickling. The stack should be
empty then.
**
Stack before: [any]
Stack after: []
**/
func (pm *PickleMachine) opcode_STOP() error {
return ErrOpcodeStopped
}
/**
Opcode: PERSID
Push an object identified by a persistent ID.
The pickle module doesn't define what a persistent ID means. PERSID's
argument is a newline-terminated str-style (no embedded escapes, no
bracketing quote characters) string, which *is* "the persistent ID".
The unpickler passes this string to self.persistent_load(). Whatever
object that returns is pushed on the stack. There is no implementation
of persistent_load() in Python's unpickler: it must be supplied by an
unpickler subclass.
**
Stack before: []
Stack after: [any]
**/
func (pm *PickleMachine) opcode_PERSID() error {
return ErrOpcodeNotImplemented
}