Skip to content

Commit

Permalink
codec: clean up, improved symbol handling and numeric overflow support
Browse files Browse the repository at this point in the history
Binc Handling and Symbols

    codec: remove AsSymbols from EncodeOption, and allow a modified version for binc only

    Currently, encDriver defines an EncodeSymbol function, that is only called
    when writing struct fields or map keys of string type. If we take out the differentiation,
    this can easily be handled by a specific handle, as it can track what the current
    containerState is.

    Instead, we remove all vestiges of Symbol support from encode.go and have it
    be something unique to binc.

    This also reduces the work that is done for all handles.

codec: remove overflow checking from Handles - manage it at the framework

    This means the following:
    - change decDriver DecodeUint and DecodeInt to just DecodeUint64 and DecodeInt64
    - add overflow check logic into checkOverflow type
    - use the overflow check logic in one-line statements across codebase

codec: fast-path: hoist conditional check out of loop

Misc cleanup
- clean up panic calls
- clean up some comments
- clean up error messages
- optimize tracking TypeInfos by reusing same array if possible
  • Loading branch information
ugorji committed Dec 12, 2017
1 parent e60f01b commit 6e9891a
Show file tree
Hide file tree
Showing 20 changed files with 5,812 additions and 4,189 deletions.
14 changes: 7 additions & 7 deletions codec/0doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,11 +225,11 @@ package codec
// - In Go 1.10, when mid-stack inlining is enabled,
// we should use committed functions for writeXXX and readXXX calls.
// This involves uncommenting the methods for decReaderSwitch and encWriterSwitch
// and using those (decReaderSwitch and encWriterSwitch in all handles
// and using those (decReaderSwitch and encWriterSwitch) in all handles
// instead of encWriter and decReader.
// - removing conditionals used to avoid calling no-op functions via interface calls.
// esep, etc.
// It *should* make the code cleaner, and maybe more performant,
// as conditional branches are expensive.
// However, per https://groups.google.com/forum/#!topic/golang-nuts/DNELyNnTzFA ,
// there is no optimization if calling an empty function via an interface.
// The benefit is that, for the (En|De)coder over []byte, the encWriter/decReader
// will be inlined, giving a performance bump for that typical case.
// However, it will only be inlined if mid-stack inlining is enabled,
// as we call panic to raise errors, and panic currently prevents inlining.
// - Clean up comments in the codebase
// Remove all unnecesssary comments, so code is clean.
55 changes: 36 additions & 19 deletions codec/binc.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,15 @@ const (

type bincEncDriver struct {
e *Encoder
h *BincHandle
w encWriter
m map[string]uint16 // symbols
b [scratchByteArrayLen]byte
s uint16 // symbols sequencer
// encNoSeparator
encDriverNoopContainerWriter
// c containerState
encDriverTrackContainerWriter
noBuiltInTypes
// encNoSeparator
}

// func (e *bincEncDriver) IsBuiltinType(rt uintptr) bool {
Expand Down Expand Up @@ -201,13 +203,19 @@ func (e *bincEncDriver) encodeExtPreamble(xtag byte, length int) {

func (e *bincEncDriver) WriteArrayStart(length int) {
e.encLen(bincVdArray<<4, uint64(length))
e.c = containerArrayStart
}

func (e *bincEncDriver) WriteMapStart(length int) {
e.encLen(bincVdMap<<4, uint64(length))
e.c = containerMapStart
}

func (e *bincEncDriver) EncodeString(c charEncoding, v string) {
if e.c == containerMapKey && c == cUTF8 && (e.h.AsSymbols == 0 || e.h.AsSymbols == 1) {
e.EncodeSymbol(v)
return
}
l := uint64(len(v))
e.encBytesLen(c, l)
if l > 0 {
Expand Down Expand Up @@ -522,34 +530,22 @@ func (d *bincDecDriver) decCheckInteger() (ui uint64, neg bool) {
return
}

func (d *bincDecDriver) DecodeInt(bitsize uint8) (i int64) {
func (d *bincDecDriver) DecodeInt64() (i int64) {
ui, neg := d.decCheckInteger()
i, overflow := chkOvf.SignedInt(ui)
if overflow {
d.d.errorf("simple: overflow converting %v to signed integer", ui)
return
}
i = chkOvf.SignedIntV(ui)
if neg {
i = -i
}
if chkOvf.Int(i, bitsize) {
d.d.errorf("binc: overflow integer: %v for num bits: %v", i, bitsize)
return
}
d.bdRead = false
return
}

func (d *bincDecDriver) DecodeUint(bitsize uint8) (ui uint64) {
func (d *bincDecDriver) DecodeUint64() (ui uint64) {
ui, neg := d.decCheckInteger()
if neg {
d.d.errorf("Assigning negative signed value to unsigned type")
return
}
if chkOvf.Uint(ui, bitsize) {
d.d.errorf("binc: overflow integer: %v", ui)
return
}
d.bdRead = false
return
}
Expand All @@ -576,7 +572,7 @@ func (d *bincDecDriver) DecodeFloat64() (f float64) {
} else if vd == bincVdFloat {
f = d.decFloat()
} else {
f = float64(d.DecodeInt(64))
f = float64(d.DecodeInt64())
}
d.bdRead = false
return
Expand Down Expand Up @@ -932,6 +928,26 @@ type BincHandle struct {
BasicHandle
binaryEncodingType
noElemSeparators

// AsSymbols defines what should be encoded as symbols.
//
// Encoding as symbols can reduce the encoded size significantly.
//
// However, during decoding, each string to be encoded as a symbol must
// be checked to see if it has been seen before. Consequently, encoding time
// will increase if using symbols, because string comparisons has a clear cost.
//
// Values:
// - 0: default: library uses best judgement
// - 1: use symbols
// - 2: do not use symbols
AsSymbols byte

// AsSymbols: may later on introduce more options ...
// - m: map keys
// - s: struct fields
// - n: none
// - a: all: same as m, s, ...
}

// Name returns the name of the handle: binc
Expand All @@ -943,7 +959,7 @@ func (h *BincHandle) SetBytesExt(rt reflect.Type, tag uint64, ext BytesExt) (err
}

func (h *BincHandle) newEncDriver(e *Encoder) encDriver {
return &bincEncDriver{e: e, w: e.w}
return &bincEncDriver{e: e, h: h, w: e.w}
}

func (h *BincHandle) newDecDriver(d *Decoder) decDriver {
Expand All @@ -959,6 +975,7 @@ func (h *BincHandle) newDecDriver(d *Decoder) decDriver {
func (e *bincEncDriver) reset() {
e.w = e.e.w
e.s = 0
e.c = 0
e.m = nil
}

Expand Down
46 changes: 15 additions & 31 deletions codec/cbor.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,9 @@ func (e *cborEncDriver) WriteArrayEnd() {
}
}

func (e *cborEncDriver) EncodeSymbol(v string) {
e.encStringBytesS(cborBaseString, v)
}
// func (e *cborEncDriver) EncodeSymbol(v string) {
// e.encStringBytesS(cborBaseString, v)
// }

func (e *cborEncDriver) EncodeString(c charEncoding, v string) {
e.encStringBytesS(cborBaseString, v)
Expand Down Expand Up @@ -350,41 +350,25 @@ func (d *cborDecDriver) decCheckInteger() (neg bool) {
return
}

func (d *cborDecDriver) DecodeInt(bitsize uint8) (i int64) {
func (d *cborDecDriver) DecodeInt64() (i int64) {
neg := d.decCheckInteger()
ui := d.decUint()
// check if this number can be converted to an int without overflow
var overflow bool
if neg {
if i, overflow = chkOvf.SignedInt(ui + 1); overflow {
d.d.errorf("cbor: overflow converting %v to signed integer", ui+1)
return
}
i = -i
i = -(chkOvf.SignedIntV(ui + 1))
} else {
if i, overflow = chkOvf.SignedInt(ui); overflow {
d.d.errorf("cbor: overflow converting %v to signed integer", ui)
return
}
}
if chkOvf.Int(i, bitsize) {
d.d.errorf("cbor: overflow integer: %v", i)
return
i = chkOvf.SignedIntV(ui)
}
d.bdRead = false
return
}

func (d *cborDecDriver) DecodeUint(bitsize uint8) (ui uint64) {
func (d *cborDecDriver) DecodeUint64() (ui uint64) {
if d.decCheckInteger() {
d.d.errorf("Assigning negative signed value to unsigned type")
return
}
ui = d.decUint()
if chkOvf.Uint(ui, bitsize) {
d.d.errorf("cbor: overflow integer: %v", ui)
return
}
d.bdRead = false
return
}
Expand All @@ -400,7 +384,7 @@ func (d *cborDecDriver) DecodeFloat64() (f float64) {
} else if bd == cborBdFloat64 {
f = math.Float64frombits(bigen.Uint64(d.r.readx(8)))
} else if bd >= cborBaseUint && bd < cborBaseBytes {
f = float64(d.DecodeInt(64))
f = float64(d.DecodeInt64())
} else {
d.d.errorf("Float only valid from float16/32/64: Invalid descriptor: %v", bd)
return
Expand Down Expand Up @@ -458,7 +442,7 @@ func (d *cborDecDriver) decAppendIndefiniteBytes(bs []byte) []byte {
break
}
if major := d.bd >> 5; major != cborMajorBytes && major != cborMajorText {
d.d.errorf("cbor: expect bytes or string major type in indefinite string/bytes; got: %v, byte: %v", major, d.bd)
d.d.errorf("expect bytes or string major type in indefinite string/bytes; got: %v, byte: %v", major, d.bd)
return nil
}
n := d.decLen()
Expand Down Expand Up @@ -553,12 +537,12 @@ func (d *cborDecDriver) decodeTime(xtag uint64) (t time.Time) {
f1, f2 := math.Modf(d.DecodeFloat64())
t = time.Unix(int64(f1), int64(f2*1e9))
case d.bd >= cborBaseUint && d.bd < cborBaseNegInt, d.bd >= cborBaseNegInt && d.bd < cborBaseBytes:
t = time.Unix(d.DecodeInt(64), 0)
t = time.Unix(d.DecodeInt64(), 0)
default:
d.d.errorf("cbor: time.Time can only be decoded from a number (or RFC3339 string)")
d.d.errorf("time.Time can only be decoded from a number (or RFC3339 string)")
}
default:
d.d.errorf("cbor: invalid tag for time.Time - expecting 0 or 1, got 0x%x", xtag)
d.d.errorf("invalid tag for time.Time - expecting 0 or 1, got 0x%x", xtag)
}
t = t.UTC().Round(time.Microsecond)
return
Expand Down Expand Up @@ -624,14 +608,14 @@ func (d *cborDecDriver) DecodeNaked() {
case d.bd >= cborBaseUint && d.bd < cborBaseNegInt:
if d.h.SignedInteger {
n.v = valueTypeInt
n.i = d.DecodeInt(64)
n.i = d.DecodeInt64()
} else {
n.v = valueTypeUint
n.u = d.DecodeUint(64)
n.u = d.DecodeUint64()
}
case d.bd >= cborBaseNegInt && d.bd < cborBaseBytes:
n.v = valueTypeInt
n.i = d.DecodeInt(64)
n.i = d.DecodeInt64()
case d.bd >= cborBaseBytes && d.bd < cborBaseString:
n.v = valueTypeBytes
n.l = d.DecodeBytes(nil, false)
Expand Down
39 changes: 21 additions & 18 deletions codec/codec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,8 @@ func testInit() {
// pre-fill them first
bh.EncodeOptions = testEncodeOptions
bh.DecodeOptions = testDecodeOptions
// bh.InterfaceReset = true // TODO: remove
// bh.PreferArrayOverSlice = true // TODO: remove
// bh.InterfaceReset = true
// bh.PreferArrayOverSlice = true
// modify from flag'ish things
bh.InternString = testInternStr
bh.Canonical = testCanonical
Expand Down Expand Up @@ -1878,8 +1878,6 @@ func doTestLargeContainerLen(t *testing.T, h Handle) {
testUnmarshalErr(m2, bs, h, t, "-")
testDeepEqualErr(m, m2, t, "-")

// TODO: skip rest if 32-bit

// do same tests for large strings (encoded as symbols or not)
// skip if 32-bit or not using unsafe mode
if safeMode || (32<<(^uint(0)>>63)) < 64 {
Expand All @@ -1891,10 +1889,11 @@ func doTestLargeContainerLen(t *testing.T, h Handle) {
// to do this, we create a simple one-field struct,
// use use flags to switch from symbols to non-symbols

bh := h.getBasicHandle()
oldAsSymbols := bh.AsSymbols
defer func() { bh.AsSymbols = oldAsSymbols }()

hbinc, okbinc := h.(*BincHandle)
if okbinc {
oldAsSymbols := hbinc.AsSymbols
defer func() { hbinc.AsSymbols = oldAsSymbols }()
}
var out []byte = make([]byte, 0, math.MaxUint16*3/2)
var in []byte = make([]byte, math.MaxUint16*3/2)
for i := range in {
Expand All @@ -1915,7 +1914,9 @@ func doTestLargeContainerLen(t *testing.T, h Handle) {
// fmt.Printf("testcontainerlen: large string: i: %v, |%s|\n", i, s1)
m1[s1] = true

bh.AsSymbols = AsSymbolNone
if okbinc {
hbinc.AsSymbols = 2
}
out = out[:0]
e.ResetBytes(&out)
e.MustEncode(m1)
Expand All @@ -1924,15 +1925,17 @@ func doTestLargeContainerLen(t *testing.T, h Handle) {
testUnmarshalErr(m2, out, h, t, "no-symbols")
testDeepEqualErr(m1, m2, t, "no-symbols")

// now, do as symbols
bh.AsSymbols = AsSymbolAll
out = out[:0]
e.ResetBytes(&out)
e.MustEncode(m1)
// bs, _ = testMarshalErr(m1, h, t, "-")
m2 = make(map[string]bool, 1)
testUnmarshalErr(m2, out, h, t, "symbols")
testDeepEqualErr(m1, m2, t, "symbols")
if okbinc {
// now, do as symbols
hbinc.AsSymbols = 1
out = out[:0]
e.ResetBytes(&out)
e.MustEncode(m1)
// bs, _ = testMarshalErr(m1, h, t, "-")
m2 = make(map[string]bool, 1)
testUnmarshalErr(m2, out, h, t, "symbols")
testDeepEqualErr(m1, m2, t, "symbols")
}
}

}
Expand Down
Loading

0 comments on commit 6e9891a

Please sign in to comment.