diff --git a/fieldpath/serialize-pe.go b/fieldpath/serialize-pe.go index eb8464cb..24fb5f89 100644 --- a/fieldpath/serialize-pe.go +++ b/fieldpath/serialize-pe.go @@ -29,40 +29,39 @@ var ErrUnknownPathElementType = errors.New("unknown path element type") const ( // Field indicates that the content of this path element is a field's name - peField = "f" + peField byte = 'f' // Value indicates that the content of this path element is a field's value - peValue = "v" + peValue byte = 'v' // Index indicates that the content of this path element is an index in an array - peIndex = "i" + peIndex byte = 'i' // Key indicates that the content of this path element is a key value map - peKey = "k" + peKey byte = 'k' // Separator separates the type of a path element from the contents - peSeparator = ":" + peSeparator byte = ':' ) var ( - peFieldSepBytes = []byte(peField + peSeparator) - peValueSepBytes = []byte(peValue + peSeparator) - peIndexSepBytes = []byte(peIndex + peSeparator) - peKeySepBytes = []byte(peKey + peSeparator) - peSepBytes = []byte(peSeparator) + peFieldSepBytes = []byte{peField, peSeparator} + peValueSepBytes = []byte{peValue, peSeparator} + peIndexSepBytes = []byte{peIndex, peSeparator} + peKeySepBytes = []byte{peKey, peSeparator} ) // DeserializePathElement parses a serialized path element func DeserializePathElement(s string) (PathElement, error) { - b := []byte(s) + b := builder.StringToReadOnlyByteSlice(s) if len(b) < 2 { return PathElement{}, errors.New("key must be 2 characters long") } - typeSep, b := b[:2], b[2:] - if typeSep[1] != peSepBytes[0] { + typeSep0, typeSep1, b := b[0], b[1], b[2:] + if typeSep1 != peSeparator { return PathElement{}, fmt.Errorf("missing colon: %v", s) } - switch typeSep[0] { + switch typeSep0 { case peFieldSepBytes[0]: // Slice s rather than convert b, to save on // allocations. diff --git a/fieldpath/serialize.go b/fieldpath/serialize.go index f4a28de1..b220195d 100644 --- a/fieldpath/serialize.go +++ b/fieldpath/serialize.go @@ -21,7 +21,6 @@ import ( "io" "sort" - json "sigs.k8s.io/json" "sigs.k8s.io/structured-merge-diff/v4/internal/builder" ) @@ -202,31 +201,31 @@ func (s *Set) FromJSON(r io.Reader) error { return nil } -type setReader struct { - target *Set - isMember bool -} - -func (sr *setReader) UnmarshalJSON(data []byte) error { - children, isMember, err := readIterV1(data) - if err != nil { - return err - } - sr.target = children - sr.isMember = isMember - return nil -} - // returns true if this subtree is also (or only) a member of parent; s is nil // if there are no further children. func readIterV1(data []byte) (children *Set, isMember bool, err error) { - m := map[string]setReader{} + parser := builder.NewFastObjParser(data) - if err := json.UnmarshalCaseSensitivePreserveInts(data, &m); err != nil { - return nil, false, err - } + for { + rawKey, err := parser.Parse() + if err == io.EOF { + break + } else if err != nil { + return nil, false, fmt.Errorf("parsing JSON: %v", err) + } + + rawValue, err := parser.Parse() + if err == io.EOF { + return nil, false, fmt.Errorf("unexpected EOF") + } else if err != nil { + return nil, false, fmt.Errorf("parsing JSON: %v", err) + } + + k, err := builder.UnmarshalString(rawKey) + if err != nil { + return nil, false, fmt.Errorf("decoding key: %v", err) + } - for k, v := range m { if k == "." { isMember = true continue @@ -242,7 +241,12 @@ func readIterV1(data []byte) (children *Set, isMember bool, err error) { return nil, false, fmt.Errorf("parsing key as path element: %v", err) } - if v.isMember { + grandChildren, isChildMember, err := readIterV1(rawValue) + if err != nil { + return nil, false, fmt.Errorf("parsing value as set: %v", err) + } + + if isChildMember { if children == nil { children = &Set{} } @@ -252,26 +256,21 @@ func readIterV1(data []byte) (children *Set, isMember bool, err error) { *m = append(*m, pe) } - if v.target != nil { + if grandChildren != nil { if children == nil { children = &Set{} } // Append the child to the children list, we will sort it later m := &children.Children.members - *m = append(*m, setNode{pe, v.target}) + *m = append(*m, setNode{pe, grandChildren}) } } // Sort the members and children if children != nil { - sort.Slice(children.Members.members, func(i, j int) bool { - return children.Members.members[i].Less(children.Members.members[j]) - }) - - sort.Slice(children.Children.members, func(i, j int) bool { - return children.Children.members[i].pathElement.Less(children.Children.members[j].pathElement) - }) + sort.Sort(children.Members.members) + sort.Sort(children.Children.members) } if children == nil { diff --git a/internal/builder/fastobjparse.go b/internal/builder/fastobjparse.go new file mode 100644 index 00000000..86551342 --- /dev/null +++ b/internal/builder/fastobjparse.go @@ -0,0 +1,224 @@ +package builder + +import ( + gojson "encoding/json" + "fmt" + "io" + "reflect" + "runtime" + "unsafe" + + "sigs.k8s.io/json" +) + +type parserState int + +const ( + stateLookingForObj parserState = iota + stateLookingForItem + stateLookingForKeyValueSep + stateLookingForItemSep + stateLookingForValue + stateEnd +) + +type FastObjParser struct { + input []byte + pos int + + state parserState +} + +func NewFastObjParser(input []byte) FastObjParser { + return FastObjParser{ + input: input, + state: stateLookingForObj, + } +} + +var whitespace = [256]bool{ + ' ': true, + '\t': true, + '\n': true, + '\r': true, +} + +func isWhitespace(c byte) bool { + return whitespace[c] +} + +func (p *FastObjParser) getValue(startPos int) ([]byte, error) { + foundRootValue := false + isQuoted := false + isEscaped := false + level := 0 + i := startPos +Loop: + for ; i < len(p.input); i++ { + if isQuoted { + // Skip escaped character + if isEscaped { + isEscaped = false + continue + } + + switch p.input[i] { + case '\\': + isEscaped = true + case '"': + isQuoted = false + } + + continue + } + + // Skip whitespace + if isWhitespace(p.input[i]) { + continue + } + + // If we are at the top level and find the next object, we are done + if level == 0 && foundRootValue { + switch p.input[i] { + case ',', '}', ']', ':', '{', '[': + break Loop + } + } + + switch p.input[i] { + // Keep track of the nesting level + case '{': + level++ + case '}': + level-- + case '[': + level++ + case ']': + level-- + + // Start of a string + case '"': + isQuoted = true + } + + foundRootValue = true + } + + if level != 0 { + return nil, fmt.Errorf("expected '}' or ']' but reached end of input") + } + + if isQuoted { + return nil, fmt.Errorf("expected '\"' but reached end of input") + } + + if !foundRootValue { + return nil, fmt.Errorf("expected value but reached end of input") + } + + return p.input[startPos:i], nil +} + +func (p *FastObjParser) Parse() ([]byte, error) { + for { + if p.pos >= len(p.input) { + return nil, io.EOF + } + + // Skip whitespace + if isWhitespace(p.input[p.pos]) { + p.pos++ + continue + } + + switch p.state { + case stateLookingForObj: + if p.input[p.pos] != '{' { + return nil, fmt.Errorf("expected '{' at position %d", p.pos) + } + + p.state = stateLookingForItem + + case stateLookingForItem: + if p.input[p.pos] == '}' { + p.state = stateEnd + return nil, io.EOF + } + + strSlice, err := p.getValue(p.pos) + if err != nil { + return nil, err + } + + p.pos += len(strSlice) + p.state = stateLookingForKeyValueSep + return strSlice, nil + + case stateLookingForKeyValueSep: + if p.input[p.pos] != ':' { + return nil, fmt.Errorf("expected ':' at position %d", p.pos) + } + + p.state = stateLookingForValue + + case stateLookingForValue: + valueSlice, err := p.getValue(p.pos) + if err != nil { + return nil, err + } + + p.pos += len(valueSlice) + p.state = stateLookingForItemSep + return valueSlice, nil + + case stateLookingForItemSep: + if p.input[p.pos] == ',' { + p.state = stateLookingForItem + } else if p.input[p.pos] == '}' { + p.state = stateEnd + } else { + return nil, fmt.Errorf("expected ',' or '}' at position %d", p.pos) + } + + case stateEnd: + return nil, io.EOF + } + + p.pos++ + } +} + +func UnmarshalString(input []byte) (string, error) { + var v string + // No need to enable case sensitivity or int preservation here, as we are only unmarshalling strings. + if err := gojson.Unmarshal(input, (*string)(noescape(unsafe.Pointer(&v)))); err != nil { + return "", err + } + + runtime.KeepAlive(v) + + return v, nil +} + +func UnmarshalInterface(input []byte) (interface{}, error) { + var v interface{} + if err := json.UnmarshalCaseSensitivePreserveInts(input, (*interface{})(noescape(unsafe.Pointer(&v)))); err != nil { + return "", err + } + + runtime.KeepAlive(v) + + return v, nil +} + +// Create a read-only byte array from a string +func StringToReadOnlyByteSlice(s string) []byte { + // Get StringHeader from string + stringHeader := (*reflect.StringHeader)(unsafe.Pointer(&s)) + + // Construct SliceHeader with capacity equal to the length + sliceHeader := reflect.SliceHeader{Data: stringHeader.Data, Len: stringHeader.Len, Cap: stringHeader.Len} + + // Convert SliceHeader to a byte slice + return *(*[]byte)(unsafe.Pointer(&sliceHeader)) +} diff --git a/internal/builder/fastobjparse_test.go b/internal/builder/fastobjparse_test.go new file mode 100644 index 00000000..18fada0d --- /dev/null +++ b/internal/builder/fastobjparse_test.go @@ -0,0 +1,49 @@ +package builder + +import ( + "io" + "testing" +) + +func TestFastObjParse(t *testing.T) { + testCases := map[string][]string{ + `{}`: {}, + `{"a": 1, "b": {}}`: {`"a"`, `1`, `"b"`, `{}`}, + `{"a": 1, "b": 2}`: {`"a"`, `1`, `"b"`, `2`}, + `{"a": 1, "b": 2, "c": 3}`: {`"a"`, `1`, `"b"`, `2`, `"c"`, `3`}, + `{"a": "1", "b": "2", "c": "3"}`: {`"a"`, `"1"`, `"b"`, `"2"`, `"c"`, `"3"`}, + `{"a": "1", "b": {"c": 3}}`: {`"a"`, `"1"`, `"b"`, `{"c": 3}`}, + `{"a": "1", "b": {"c": []}, "d": "4"}`: {`"a"`, `"1"`, `"b"`, `{"c": []}`, `"d"`, `"4"`}, + `{"port":443,"protocol":"tcp"}`: {`"port"`, `443`, `"protocol"`, `"tcp"`}, + } + + for tc, ans := range testCases { + tc := tc + ans := ans + t.Run(tc, func(t *testing.T) { + parser := NewFastObjParser([]byte(tc)) + + results := []string{} + for { + v, err := parser.Parse() + if err == io.EOF { + break + } else if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + results = append(results, string(v)) + } + + if len(results) != len(ans) { + t.Fatalf("unexpected results: %v", results) + } + + for i := 0; i < len(results); i++ { + if results[i] != ans[i] { + t.Fatalf("unexpected results: got %v, want %v", results, ans) + } + } + }) + } +} diff --git a/value/fields.go b/value/fields.go index baa78399..0f4cae74 100644 --- a/value/fields.go +++ b/value/fields.go @@ -17,10 +17,11 @@ limitations under the License. package value import ( + "fmt" + "io" "sort" "strings" - "sigs.k8s.io/json" "sigs.k8s.io/structured-merge-diff/v4/internal/builder" ) @@ -36,14 +37,35 @@ type FieldList []Field // FieldListFromJSON is a helper function for reading a JSON document. func FieldListFromJSON(input []byte) (FieldList, error) { - v := map[string]interface{}{} - if err := json.UnmarshalCaseSensitivePreserveInts(input, &v); err != nil { - return nil, err - } + parser := builder.NewFastObjParser(input) + + var fields FieldList + for { + rawKey, err := parser.Parse() + if err == io.EOF { + break + } else if err != nil { + return nil, fmt.Errorf("parsing JSON: %v", err) + } + + rawValue, err := parser.Parse() + if err == io.EOF { + return nil, fmt.Errorf("unexpected EOF") + } else if err != nil { + return nil, fmt.Errorf("parsing JSON: %v", err) + } + + k, err := builder.UnmarshalString(rawKey) + if err != nil { + return nil, fmt.Errorf("parsing JSON: %v", err) + } + + v, err := builder.UnmarshalInterface(rawValue) + if err != nil { + return nil, fmt.Errorf("parsing JSON: %v", err) + } - fields := make(FieldList, 0, len(v)) - for k, raw := range v { - fields = append(fields, Field{Name: k, Value: NewValueInterface(raw)}) + fields = append(fields, Field{Name: k, Value: NewValueInterface(v)}) } return fields, nil diff --git a/value/value.go b/value/value.go index 359795f5..11d90c82 100644 --- a/value/value.go +++ b/value/value.go @@ -21,7 +21,6 @@ import ( "strings" "gopkg.in/yaml.v2" - "sigs.k8s.io/json" "sigs.k8s.io/structured-merge-diff/v4/internal/builder" ) @@ -77,10 +76,11 @@ type Value interface { // FromJSON is a helper function for reading a JSON document. func FromJSON(input []byte) (Value, error) { - var v interface{} - if err := json.UnmarshalCaseSensitivePreserveInts(input, &v); err != nil { + v, err := builder.UnmarshalInterface(input) + if err != nil { return nil, err } + return NewValueInterface(v), nil }