diff --git a/codec/cbor.go b/codec/cbor.go index 10944487..d6834f3b 100644 --- a/codec/cbor.go +++ b/codec/cbor.go @@ -427,12 +427,13 @@ func (d *cborDecDriver) decLen() int { return int(d.decUint()) } -func (d *cborDecDriver) decAppendIndefiniteBytes(bs []byte) []byte { +func (d *cborDecDriver) decAppendIndefiniteBytes(bs []byte, major byte) []byte { d.bdRead = false for !d.CheckBreak() { - if major := d.bd >> 5; major != cborMajorBytes && major != cborMajorString { - d.d.errorf("invalid indefinite string/bytes %x (%s); got major %v, expected %v or %v", - d.bd, cbordesc(d.bd), major, cborMajorBytes, cborMajorString) + chunkMajor := d.bd >> 5 + if chunkMajor != major { + d.d.errorf("malformed indefinite string/bytes %x (%s); contains chunk with major type %v, expected %v", + d.bd, cbordesc(d.bd), chunkMajor, major) } n := uint(d.decLen()) oldLen := uint(len(bs)) @@ -445,6 +446,9 @@ func (d *cborDecDriver) decAppendIndefiniteBytes(bs []byte) []byte { bs = bs[:newLen] } d.d.decRd.readb(bs[oldLen:newLen]) + if d.h.ValidateUnicode && major == cborMajorString && !utf8.Valid(bs[oldLen:newLen]) { + d.d.errorf("indefinite-length text string contains chunk that is not a valid utf-8 sequence: 0x%x", bs[oldLen:newLen]) + } d.bdRead = false } d.bdRead = false @@ -580,9 +584,9 @@ func (d *cborDecDriver) DecodeBytes(bs []byte) (bsOut []byte) { d.bdRead = false if bs == nil { d.d.decByteState = decByteStateReuseBuf - return d.decAppendIndefiniteBytes(d.d.b[:0]) + return d.decAppendIndefiniteBytes(d.d.b[:0], d.bd>>5) } - return d.decAppendIndefiniteBytes(bs[:0]) + return d.decAppendIndefiniteBytes(bs[:0], d.bd>>5) } if d.bd == cborBdIndefiniteArray { d.bdRead = false diff --git a/codec/cbor_test.go b/codec/cbor_test.go index 9d62f4b9..6682bd62 100644 --- a/codec/cbor_test.go +++ b/codec/cbor_test.go @@ -61,8 +61,8 @@ func TestCborIndefiniteLength(t *testing.T) { buf.WriteByte(cborBdBreak) buf.WriteByte(cborBdIndefiniteString) - e.MustEncode([]byte("two-")) // encode as bytes, to check robustness of code - e.MustEncode([]byte("value")) + e.MustEncode("two-") + e.MustEncode("value") buf.WriteByte(cborBdBreak) //---- @@ -92,6 +92,41 @@ func TestCborIndefiniteLength(t *testing.T) { } } +// "If any item between the indefinite-length string indicator (0b010_11111 or 0b011_11111) and the +// "break" stop code is not a definite-length string item of the same major type, the string is not +// well-formed." +func TestCborIndefiniteLengthStringChunksCannotMixTypes(t *testing.T) { + defer testSetup(t, nil)() + var handle CborHandle + + for _, in := range [][]byte{ + {cborBdIndefiniteString, 0x40, cborBdBreak}, // byte string chunk in indefinite length text string + {cborBdIndefiniteBytes, 0x60, cborBdBreak}, // text string chunk in indefinite length byte string + } { + var out string + err := NewDecoderBytes(in, &handle).Decode(&out) + if err == nil { + t.Errorf("expected error but decoded 0x%x to: %q", in, out) + } + } +} + +// "If any definite-length text string inside an indefinite-length text string is invalid, the +// indefinite-length text string is invalid. Note that this implies that the UTF-8 bytes of a single +// Unicode code point (scalar value) cannot be spread between chunks: a new chunk of a text string +// can only be started at a code point boundary." +func TestCborIndefiniteLengthTextStringChunksAreUTF8(t *testing.T) { + defer testSetup(t, nil)() + var handle CborHandle + handle.ValidateUnicode = true + + var out string + err := NewDecoderBytes([]byte{cborBdIndefiniteString, 0x61, 0xc2, 0x61, 0xa3, cborBdBreak}, &handle).Decode(&out) + if err == nil { + t.Errorf("expected error but decoded to: %q", out) + } +} + type testCborGolden struct { Base64 string `codec:"cbor"` Hex string `codec:"hex"`