Skip to content

Commit

Permalink
Treat map keys matching the same struct field as duplicates.
Browse files Browse the repository at this point in the history
This resolves the case where two map keys could match the same struct field without being treated as
duplicate map keys. The DupMapKey documentation has been updated accordingly.

A map key that matches a previously-matched field will no longer quietly fall back to a
case-insensitive match and is now always treated as a duplicate key. Case-sensitive matching of map
key to struct field name is now a map lookup to improve the quadratic worst-case:

                                                                   │ manykeys-before.txt │         manykeys-after.txt          │
                                                                   │       sec/op        │   sec/op     vs base                │
UnmarshalMapToStruct/default_options/many_fields_one_key_per_field          24.137µ ± 4%   5.240µ ± 1%  -78.29% (p=0.000 n=10)

                                                                   │ manykeys-before.txt │        manykeys-after.txt         │
                                                                   │        B/op         │   B/op    vs base                 │
UnmarshalMapToStruct/default_options/many_fields_one_key_per_field            112.0 ± 0%   0.0 ± 0%  -100.00% (p=0.000 n=10)

                                                                   │ manykeys-before.txt │         manykeys-after.txt          │
                                                                   │      allocs/op      │ allocs/op   vs base                 │
UnmarshalMapToStruct/default_options/many_fields_one_key_per_field            1.000 ± 0%   0.000 ± 0%  -100.00% (p=0.000 n=10)

This fix cleared the way to optimize the way duplicate keys are tracked, especially for inputs that
contain no unknown fields:

                                                                               │  before.txt  │              after.txt              │
                                                                               │    sec/op    │   sec/op     vs base                │
UnmarshalMapToStruct/default_options/all_known_fields                             642.7n ± 3%   735.4n ± 0%  +14.42% (p=0.000 n=10)
UnmarshalMapToStruct/default_options/all_known_duplicate_fields                  1611.5n ± 1%   521.1n ± 1%  -67.66% (p=0.000 n=10)
UnmarshalMapToStruct/default_options/all_unknown_fields                           1.731µ ± 3%   1.174µ ± 0%  -32.18% (p=0.000 n=10)
UnmarshalMapToStruct/default_options/all_unknown_duplicate_fields                 1.726µ ± 2%   1.076µ ± 0%  -37.66% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown/all_known_fields                              638.3n ± 1%   725.5n ± 1%  +13.66% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown/all_known_duplicate_fields                    476.1n ± 1%   514.7n ± 0%   +8.11% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown/all_unknown_fields                            448.1n ± 1%   398.8n ± 1%  -11.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown/all_unknown_duplicate_fields                  447.9n ± 0%   399.9n ± 1%  -10.71% (p=0.000 n=10)
UnmarshalMapToStruct/reject_duplicate/all_known_fields                           1825.5n ± 0%   726.0n ± 0%  -60.23% (p=0.000 n=10)
UnmarshalMapToStruct/reject_duplicate/all_known_duplicate_fields                  744.3n ± 0%   440.8n ± 0%  -40.78% (p=0.000 n=10)
UnmarshalMapToStruct/reject_duplicate/all_unknown_fields                          2.885µ ± 1%   3.329µ ± 0%  +15.37% (p=0.000 n=10)
UnmarshalMapToStruct/reject_duplicate/all_unknown_duplicate_fields                844.8n ± 0%   738.7n ± 1%  -12.56% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown_and_duplicate/all_known_fields               1828.5n ± 0%   723.4n ± 0%  -60.44% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown_and_duplicate/all_known_duplicate_fields      744.3n ± 0%   436.4n ± 0%  -41.36% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown_and_duplicate/all_unknown_fields              643.4n ± 0%   435.3n ± 0%  -32.34% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown_and_duplicate/all_unknown_duplicate_fields    642.1n ± 0%   435.6n ± 1%  -32.15% (p=0.000 n=10)
geomean                                                                           936.7n        748.3n       -29.34%

                                                                               │ before.txt  │                after.txt                 │
                                                                               │    B/op     │    B/op      vs base                     │
UnmarshalMapToStruct/default_options/all_known_fields                             16.00 ± 0%     0.00 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/default_options/all_known_duplicate_fields                   16.00 ± 0%     0.00 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/default_options/all_unknown_fields                           16.00 ± 0%     0.00 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/default_options/all_unknown_duplicate_fields                 16.00 ± 0%     0.00 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown/all_known_fields                              16.00 ± 0%     0.00 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown/all_known_duplicate_fields                    24.00 ± 0%     0.00 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown/all_unknown_fields                           24.000 ± 0%    8.000 ± 0%   -66.67% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown/all_unknown_duplicate_fields                 24.000 ± 0%    8.000 ± 0%   -66.67% (p=0.000 n=10)
UnmarshalMapToStruct/reject_duplicate/all_known_fields                            800.0 ± 0%      0.0 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_duplicate/all_known_duplicate_fields                 648.00 ± 0%    40.00 ± 0%   -93.83% (p=0.000 n=10)
UnmarshalMapToStruct/reject_duplicate/all_unknown_fields                          800.0 ± 0%   1285.0 ± 0%   +60.62% (p=0.000 n=10)
UnmarshalMapToStruct/reject_duplicate/all_unknown_duplicate_fields                648.0 ± 0%    248.0 ± 0%   -61.73% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown_and_duplicate/all_known_fields                800.0 ± 0%      0.0 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown_and_duplicate/all_known_duplicate_fields     648.00 ± 0%    40.00 ± 0%   -93.83% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown_and_duplicate/all_unknown_fields             616.00 ± 0%    24.00 ± 0%   -96.10% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown_and_duplicate/all_unknown_duplicate_fields   616.00 ± 0%    24.00 ± 0%   -96.10% (p=0.000 n=10)
geomean                                                                           113.6                     ?                       ¹ ²
¹ summaries must be >0 to compute geomean
² ratios must be >0 to compute geomean

                                                                               │ before.txt │                after.txt                │
                                                                               │ allocs/op  │ allocs/op   vs base                     │
UnmarshalMapToStruct/default_options/all_known_fields                            1.000 ± 0%   0.000 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/default_options/all_known_duplicate_fields                  1.000 ± 0%   0.000 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/default_options/all_unknown_fields                          1.000 ± 0%   0.000 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/default_options/all_unknown_duplicate_fields                1.000 ± 0%   0.000 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown/all_known_fields                             1.000 ± 0%   0.000 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown/all_known_duplicate_fields                   2.000 ± 0%   0.000 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown/all_unknown_fields                           2.000 ± 0%   1.000 ± 0%   -50.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown/all_unknown_duplicate_fields                 2.000 ± 0%   1.000 ± 0%   -50.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_duplicate/all_known_fields                           15.00 ± 0%    0.00 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_duplicate/all_known_duplicate_fields                 5.000 ± 0%   2.000 ± 0%   -60.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_duplicate/all_unknown_fields                         15.00 ± 0%   17.00 ± 0%   +13.33% (p=0.000 n=10)
UnmarshalMapToStruct/reject_duplicate/all_unknown_duplicate_fields               5.000 ± 0%   5.000 ± 0%         ~ (p=1.000 n=10) ¹
UnmarshalMapToStruct/reject_unknown_and_duplicate/all_known_fields               15.00 ± 0%    0.00 ± 0%  -100.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown_and_duplicate/all_known_duplicate_fields     5.000 ± 0%   2.000 ± 0%   -60.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown_and_duplicate/all_unknown_fields             4.000 ± 0%   2.000 ± 0%   -50.00% (p=0.000 n=10)
UnmarshalMapToStruct/reject_unknown_and_duplicate/all_unknown_duplicate_fields   4.000 ± 0%   2.000 ± 0%   -50.00% (p=0.000 n=10)
geomean                                                                          3.043                    ?                       ² ³
¹ all samples are equal
² summaries must be >0 to compute geomean
³ ratios must be >0 to compute geomean

Signed-off-by: Ben Luddy <bluddy@redhat.com>
  • Loading branch information
benluddy committed Feb 14, 2024
1 parent cd8b95e commit cbbabde
Show file tree
Hide file tree
Showing 2 changed files with 152 additions and 42 deletions.
50 changes: 44 additions & 6 deletions cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package cbor
import (
"bytes"
"errors"
"fmt"
"reflect"
"sort"
"strconv"
Expand Down Expand Up @@ -84,9 +85,25 @@ func newTypeInfo(t reflect.Type) *typeInfo {
}

type decodingStructType struct {
fields fields
err error
toArray bool
fields fields
fieldIndicesByName map[string]int
err error
toArray bool
}

// The stdlib errors.Join was introduced in Go 1.20, and we still support Go 1.17, so instead,
// here's a very basic implementation of an aggregated error.
type multierror []error

func (m multierror) Error() string {
var sb strings.Builder
for i, err := range m {
sb.WriteString(err.Error())
if i < len(m)-1 {
sb.WriteString(", ")
}
}
return sb.String()
}

func getDecodingStructType(t reflect.Type) *decodingStructType {
Expand All @@ -98,12 +115,12 @@ func getDecodingStructType(t reflect.Type) *decodingStructType {

toArray := hasToArrayOption(structOptions)

var err error
var errs []error
for i := 0; i < len(flds); i++ {
if flds[i].keyAsInt {
nameAsInt, numErr := strconv.Atoi(flds[i].name)
if numErr != nil {
err = errors.New("cbor: failed to parse field name \"" + flds[i].name + "\" to int (" + numErr.Error() + ")")
errs = append(errs, errors.New("cbor: failed to parse field name \""+flds[i].name+"\" to int ("+numErr.Error()+")"))
break
}
flds[i].nameAsInt = int64(nameAsInt)
Expand All @@ -112,7 +129,28 @@ func getDecodingStructType(t reflect.Type) *decodingStructType {
flds[i].typInfo = getTypeInfo(flds[i].typ)
}

structType := &decodingStructType{fields: flds, err: err, toArray: toArray}
fieldIndicesByName := make(map[string]int, len(flds))
for i, fld := range flds {
if _, ok := fieldIndicesByName[fld.name]; ok {
errs = append(errs, fmt.Errorf("cbor: two or more fields of %v have the same name %q", t, fld.name))
continue
}
fieldIndicesByName[fld.name] = i
}

var multi multierror
for _, each := range errs {
if each != nil {
multi = append(multi, each)
}
}

structType := &decodingStructType{
fields: flds,
fieldIndicesByName: fieldIndicesByName,
err: multi,
toArray: toArray,
}
decodingStructTypeCache.Store(t, structType)
return structType
}
Expand Down
144 changes: 108 additions & 36 deletions decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,12 @@ func (e *UnknownFieldError) Error() string {
return fmt.Sprintf("cbor: found unknown field at map element index %d", e.Index)
}

// DupMapKeyMode specifies how to enforce duplicate map key.
// DupMapKeyMode specifies how to enforce duplicate map key. Two map keys are considered duplicates if:
// 1. When decoding into a struct, both keys match the same struct field. The keys are also
// considered duplicates if neither matches any field and decoding to interface{} would produce
// equal (==) values for both keys.
// 2. When decoding into a map, both keys are equal (==) when decoded into values of the
// destination map's key type.
type DupMapKeyMode int

const (
Expand Down Expand Up @@ -1893,20 +1898,32 @@ func (d *decoder) parseMapToStruct(v reflect.Value, tInfo *typeInfo) error { //n
count := int(val)

// Keeps track of matched struct fields
foundFldIdx := make([]bool, len(structType.fields))
var foundFldIdx []bool
{
const maxStackFields = 128
if nfields := len(structType.fields); nfields <= maxStackFields {
// For structs with typical field counts, expect that this can be
// stack-allocated.
var a [maxStackFields]bool
foundFldIdx = a[:nfields]
} else {
foundFldIdx = make([]bool, len(structType.fields))
}
}

// Keeps track of CBOR map keys to detect duplicate map key
keyCount := 0
var mapKeys map[interface{}]struct{}
if d.dm.dupMapKey == DupMapKeyEnforcedAPF {
mapKeys = make(map[interface{}]struct{}, len(structType.fields))
}

errOnUnknownField := (d.dm.extraReturnErrors & ExtraDecErrorUnknownField) > 0

MapEntryLoop:
for j := 0; (hasSize && j < count) || (!hasSize && !d.foundBreak()); j++ {
var f *field
var k interface{} // Used by duplicate map key detection

// If duplicate field detection is enabled and the key at index j did not match any
// field, k will hold the map key.
var k interface{}

t := d.nextCBORType()
if t == cborTypeTextString || (t == cborTypeByteString && d.dm.fieldNameByteString == FieldNameByteStringAllowed) {
Expand All @@ -1924,30 +1941,61 @@ func (d *decoder) parseMapToStruct(v reflect.Value, tInfo *typeInfo) error { //n
keyBytes, _ = d.parseByteString()
}

keyLen := len(keyBytes)
// Find field with exact match
for i := 0; i < len(structType.fields); i++ {
// Check for exact match on field name.
if i, ok := structType.fieldIndicesByName[string(keyBytes)]; ok {
fld := structType.fields[i]
if !foundFldIdx[i] && len(fld.name) == keyLen && fld.name == string(keyBytes) {

if !foundFldIdx[i] {
f = fld
foundFldIdx[i] = true
break
} else if d.dm.dupMapKey == DupMapKeyEnforcedAPF {
err = &DupMapKeyError{fld.name, j}
d.skip() // skip value
j++
// skip the rest of the map
for ; (hasSize && j < count) || (!hasSize && !d.foundBreak()); j++ {
d.skip()
d.skip()
}
return err
} else {
// discard repeated match
d.skip()
continue MapEntryLoop
}
}

// Find field with case-insensitive match
if f == nil && d.dm.fieldNameMatching == FieldNameMatchingPreferCaseSensitive {
keyLen := len(keyBytes)
keyString := string(keyBytes)
for i := 0; i < len(structType.fields); i++ {
fld := structType.fields[i]
if !foundFldIdx[i] && len(fld.name) == keyLen && strings.EqualFold(fld.name, keyString) {
f = fld
foundFldIdx[i] = true
if len(fld.name) == keyLen && strings.EqualFold(fld.name, keyString) {
if !foundFldIdx[i] {
f = fld
foundFldIdx[i] = true
} else if d.dm.dupMapKey == DupMapKeyEnforcedAPF {
err = &DupMapKeyError{keyString, j}
d.skip() // skip value
j++
// skip the rest of the map
for ; (hasSize && j < count) || (!hasSize && !d.foundBreak()); j++ {
d.skip()
d.skip()
}
return err
} else {
// discard repeated match
d.skip()
continue MapEntryLoop
}
break
}
}
}

if d.dm.dupMapKey == DupMapKeyEnforcedAPF {
if d.dm.dupMapKey == DupMapKeyEnforcedAPF && f == nil {
k = string(keyBytes)
}
} else if t <= cborTypeNegativeInt { // uint/int
Expand Down Expand Up @@ -1975,14 +2023,30 @@ func (d *decoder) parseMapToStruct(v reflect.Value, tInfo *typeInfo) error { //n
// Find field
for i := 0; i < len(structType.fields); i++ {
fld := structType.fields[i]
if !foundFldIdx[i] && fld.keyAsInt && fld.nameAsInt == nameAsInt {
f = fld
foundFldIdx[i] = true
if fld.keyAsInt && fld.nameAsInt == nameAsInt {
if !foundFldIdx[i] {
f = fld
foundFldIdx[i] = true
} else if d.dm.dupMapKey == DupMapKeyEnforcedAPF {
err = &DupMapKeyError{nameAsInt, j}
d.skip() // skip value
j++
// skip the rest of the map
for ; (hasSize && j < count) || (!hasSize && !d.foundBreak()); j++ {
d.skip()
d.skip()
}
return err
} else {
// discard repeated match
d.skip()
continue MapEntryLoop
}
break
}
}

if d.dm.dupMapKey == DupMapKeyEnforcedAPF {
if d.dm.dupMapKey == DupMapKeyEnforcedAPF && f == nil {
k = nameAsInt
}
} else {
Expand Down Expand Up @@ -2010,23 +2074,6 @@ func (d *decoder) parseMapToStruct(v reflect.Value, tInfo *typeInfo) error { //n
}
}

if d.dm.dupMapKey == DupMapKeyEnforcedAPF {
mapKeys[k] = struct{}{}
newKeyCount := len(mapKeys)
if newKeyCount == keyCount {
err = &DupMapKeyError{k, j}
d.skip() // skip value
j++
// skip the rest of the map
for ; (hasSize && j < count) || (!hasSize && !d.foundBreak()); j++ {
d.skip()
d.skip()
}
return err
}
keyCount = newKeyCount
}

if f == nil {
if errOnUnknownField {
err = &UnknownFieldError{j}
Expand All @@ -2039,6 +2086,31 @@ func (d *decoder) parseMapToStruct(v reflect.Value, tInfo *typeInfo) error { //n
}
return err
}

// Two map keys that match the same struct field are immediately considered
// duplicates. This check detects duplicates between two map keys that do
// not match a struct field. If unknown field errors are enabled, then this
// check is never reached.
if d.dm.dupMapKey == DupMapKeyEnforcedAPF {
if mapKeys == nil {
mapKeys = make(map[interface{}]struct{}, 1)
}
mapKeys[k] = struct{}{}
newKeyCount := len(mapKeys)
if newKeyCount == keyCount {
err = &DupMapKeyError{k, j}
d.skip() // skip value
j++
// skip the rest of the map
for ; (hasSize && j < count) || (!hasSize && !d.foundBreak()); j++ {
d.skip()
d.skip()
}
return err
}
keyCount = newKeyCount
}

d.skip() // Skip value
continue
}
Expand Down

0 comments on commit cbbabde

Please sign in to comment.