Skip to content

Commit

Permalink
CBOR: Reject malformed and invalid indefinite-length strings (#405)
Browse files Browse the repository at this point in the history
* Test UTF-8 validation of indefinite-length CBOR text string chunks.

Per the CBOR spec, each chunk of an indefinite-length text string must be valid, or the entire
indefinite-length string is invalid.

* Test indefinite-length CBOR string chunks match parent type.

An indefinite-length text or byte string is not well-formed unless all of its chunks are of the same
major type as the indefinite-length string.

* CBOR: Reject malformed/invalid indefinite strings.

This addresses two conformance issues:

1. All chunks of an indefinite-length text string must be valid, so if UTF-8 validation is enabled
for text strings in DecodeOptions, it should be applied to each chunk.

2. All of the chunks of an indefinite-length text or byte string must have the same major type as
the indefinite-length string itself. Indefinitely-length text strings can't contain byte strings and
vice versa.
  • Loading branch information
benluddy committed Nov 28, 2023
1 parent 8286c2d commit f7f63a0
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 8 deletions.
16 changes: 10 additions & 6 deletions codec/cbor.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,12 +427,13 @@ func (d *cborDecDriver) decLen() int {
return int(d.decUint())
}

func (d *cborDecDriver) decAppendIndefiniteBytes(bs []byte) []byte {
func (d *cborDecDriver) decAppendIndefiniteBytes(bs []byte, major byte) []byte {
d.bdRead = false
for !d.CheckBreak() {
if major := d.bd >> 5; major != cborMajorBytes && major != cborMajorString {
d.d.errorf("invalid indefinite string/bytes %x (%s); got major %v, expected %v or %v",
d.bd, cbordesc(d.bd), major, cborMajorBytes, cborMajorString)
chunkMajor := d.bd >> 5
if chunkMajor != major {
d.d.errorf("malformed indefinite string/bytes %x (%s); contains chunk with major type %v, expected %v",
d.bd, cbordesc(d.bd), chunkMajor, major)
}
n := uint(d.decLen())
oldLen := uint(len(bs))
Expand All @@ -445,6 +446,9 @@ func (d *cborDecDriver) decAppendIndefiniteBytes(bs []byte) []byte {
bs = bs[:newLen]
}
d.d.decRd.readb(bs[oldLen:newLen])
if d.h.ValidateUnicode && major == cborMajorString && !utf8.Valid(bs[oldLen:newLen]) {
d.d.errorf("indefinite-length text string contains chunk that is not a valid utf-8 sequence: 0x%x", bs[oldLen:newLen])
}
d.bdRead = false
}
d.bdRead = false
Expand Down Expand Up @@ -580,9 +584,9 @@ func (d *cborDecDriver) DecodeBytes(bs []byte) (bsOut []byte) {
d.bdRead = false
if bs == nil {
d.d.decByteState = decByteStateReuseBuf
return d.decAppendIndefiniteBytes(d.d.b[:0])
return d.decAppendIndefiniteBytes(d.d.b[:0], d.bd>>5)
}
return d.decAppendIndefiniteBytes(bs[:0])
return d.decAppendIndefiniteBytes(bs[:0], d.bd>>5)
}
if d.bd == cborBdIndefiniteArray {
d.bdRead = false
Expand Down
39 changes: 37 additions & 2 deletions codec/cbor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ func TestCborIndefiniteLength(t *testing.T) {
buf.WriteByte(cborBdBreak)

buf.WriteByte(cborBdIndefiniteString)
e.MustEncode([]byte("two-")) // encode as bytes, to check robustness of code
e.MustEncode([]byte("value"))
e.MustEncode("two-")
e.MustEncode("value")
buf.WriteByte(cborBdBreak)

//----
Expand Down Expand Up @@ -92,6 +92,41 @@ func TestCborIndefiniteLength(t *testing.T) {
}
}

// "If any item between the indefinite-length string indicator (0b010_11111 or 0b011_11111) and the
// "break" stop code is not a definite-length string item of the same major type, the string is not
// well-formed."
func TestCborIndefiniteLengthStringChunksCannotMixTypes(t *testing.T) {
defer testSetup(t, nil)()
var handle CborHandle

for _, in := range [][]byte{
{cborBdIndefiniteString, 0x40, cborBdBreak}, // byte string chunk in indefinite length text string
{cborBdIndefiniteBytes, 0x60, cborBdBreak}, // text string chunk in indefinite length byte string
} {
var out string
err := NewDecoderBytes(in, &handle).Decode(&out)
if err == nil {
t.Errorf("expected error but decoded 0x%x to: %q", in, out)
}
}
}

// "If any definite-length text string inside an indefinite-length text string is invalid, the
// indefinite-length text string is invalid. Note that this implies that the UTF-8 bytes of a single
// Unicode code point (scalar value) cannot be spread between chunks: a new chunk of a text string
// can only be started at a code point boundary."
func TestCborIndefiniteLengthTextStringChunksAreUTF8(t *testing.T) {
defer testSetup(t, nil)()
var handle CborHandle
handle.ValidateUnicode = true

var out string
err := NewDecoderBytes([]byte{cborBdIndefiniteString, 0x61, 0xc2, 0x61, 0xa3, cborBdBreak}, &handle).Decode(&out)
if err == nil {
t.Errorf("expected error but decoded to: %q", out)
}
}

type testCborGolden struct {
Base64 string `codec:"cbor"`
Hex string `codec:"hex"`
Expand Down

0 comments on commit f7f63a0

Please sign in to comment.