Skip to content

Commit

Permalink
Reduce number of allocations during deserialize
Browse files Browse the repository at this point in the history
  • Loading branch information
inteon committed Apr 12, 2024
1 parent 59b4860 commit 4f4fbff
Show file tree
Hide file tree
Showing 6 changed files with 349 additions and 56 deletions.
27 changes: 13 additions & 14 deletions fieldpath/serialize-pe.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,40 +29,39 @@ var ErrUnknownPathElementType = errors.New("unknown path element type")

const (
// Field indicates that the content of this path element is a field's name
peField = "f"
peField byte = 'f'

// Value indicates that the content of this path element is a field's value
peValue = "v"
peValue byte = 'v'

// Index indicates that the content of this path element is an index in an array
peIndex = "i"
peIndex byte = 'i'

// Key indicates that the content of this path element is a key value map
peKey = "k"
peKey byte = 'k'

// Separator separates the type of a path element from the contents
peSeparator = ":"
peSeparator byte = ':'
)

var (
peFieldSepBytes = []byte(peField + peSeparator)
peValueSepBytes = []byte(peValue + peSeparator)
peIndexSepBytes = []byte(peIndex + peSeparator)
peKeySepBytes = []byte(peKey + peSeparator)
peSepBytes = []byte(peSeparator)
peFieldSepBytes = []byte{peField, peSeparator}
peValueSepBytes = []byte{peValue, peSeparator}
peIndexSepBytes = []byte{peIndex, peSeparator}
peKeySepBytes = []byte{peKey, peSeparator}
)

// DeserializePathElement parses a serialized path element
func DeserializePathElement(s string) (PathElement, error) {
b := []byte(s)
b := builder.StringToReadOnlyByteSlice(s)
if len(b) < 2 {
return PathElement{}, errors.New("key must be 2 characters long")
}
typeSep, b := b[:2], b[2:]
if typeSep[1] != peSepBytes[0] {
typeSep0, typeSep1, b := b[0], b[1], b[2:]
if typeSep1 != peSeparator {
return PathElement{}, fmt.Errorf("missing colon: %v", s)
}
switch typeSep[0] {
switch typeSep0 {
case peFieldSepBytes[0]:
// Slice s rather than convert b, to save on
// allocations.
Expand Down
61 changes: 30 additions & 31 deletions fieldpath/serialize.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"io"
"sort"

json "sigs.k8s.io/json"
"sigs.k8s.io/structured-merge-diff/v4/internal/builder"
)

Expand Down Expand Up @@ -202,31 +201,31 @@ func (s *Set) FromJSON(r io.Reader) error {
return nil
}

type setReader struct {
target *Set
isMember bool
}

func (sr *setReader) UnmarshalJSON(data []byte) error {
children, isMember, err := readIterV1(data)
if err != nil {
return err
}
sr.target = children
sr.isMember = isMember
return nil
}

// returns true if this subtree is also (or only) a member of parent; s is nil
// if there are no further children.
func readIterV1(data []byte) (children *Set, isMember bool, err error) {
m := map[string]setReader{}
parser := builder.NewFastObjParser(data)

if err := json.UnmarshalCaseSensitivePreserveInts(data, &m); err != nil {
return nil, false, err
}
for {
rawKey, err := parser.Parse()
if err == io.EOF {
break
} else if err != nil {
return nil, false, fmt.Errorf("parsing JSON: %v", err)
}

rawValue, err := parser.Parse()
if err == io.EOF {
return nil, false, fmt.Errorf("unexpected EOF")
} else if err != nil {
return nil, false, fmt.Errorf("parsing JSON: %v", err)
}

k, err := builder.UnmarshalString(rawKey)
if err != nil {
return nil, false, fmt.Errorf("decoding key: %v", err)
}

for k, v := range m {
if k == "." {
isMember = true
continue
Expand All @@ -242,7 +241,12 @@ func readIterV1(data []byte) (children *Set, isMember bool, err error) {
return nil, false, fmt.Errorf("parsing key as path element: %v", err)
}

if v.isMember {
grandChildren, isChildMember, err := readIterV1(rawValue)
if err != nil {
return nil, false, fmt.Errorf("parsing value as set: %v", err)
}

if isChildMember {
if children == nil {
children = &Set{}
}
Expand All @@ -252,26 +256,21 @@ func readIterV1(data []byte) (children *Set, isMember bool, err error) {
*m = append(*m, pe)
}

if v.target != nil {
if grandChildren != nil {
if children == nil {
children = &Set{}
}

// Append the child to the children list, we will sort it later
m := &children.Children.members
*m = append(*m, setNode{pe, v.target})
*m = append(*m, setNode{pe, grandChildren})
}
}

// Sort the members and children
if children != nil {
sort.Slice(children.Members.members, func(i, j int) bool {
return children.Members.members[i].Less(children.Members.members[j])
})

sort.Slice(children.Children.members, func(i, j int) bool {
return children.Children.members[i].pathElement.Less(children.Children.members[j].pathElement)
})
sort.Sort(children.Members.members)
sort.Sort(children.Children.members)
}

if children == nil {
Expand Down
224 changes: 224 additions & 0 deletions internal/builder/fastobjparse.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
package builder

import (
gojson "encoding/json"
"fmt"
"io"
"reflect"
"runtime"
"unsafe"

"sigs.k8s.io/json"
)

type parserState int

const (
stateLookingForObj parserState = iota
stateLookingForItem
stateLookingForKeyValueSep
stateLookingForItemSep
stateLookingForValue
stateEnd
)

type FastObjParser struct {
input []byte
pos int

state parserState
}

func NewFastObjParser(input []byte) FastObjParser {
return FastObjParser{
input: input,
state: stateLookingForObj,
}
}

var whitespace = [256]bool{
' ': true,
'\t': true,
'\n': true,
'\r': true,
}

func isWhitespace(c byte) bool {
return whitespace[c]
}

func (p *FastObjParser) getValue(startPos int) ([]byte, error) {
foundRootValue := false
isQuoted := false
isEscaped := false
level := 0
i := startPos
Loop:
for ; i < len(p.input); i++ {
if isQuoted {
// Skip escaped character
if isEscaped {
isEscaped = false
continue
}

switch p.input[i] {
case '\\':
isEscaped = true
case '"':
isQuoted = false
}

continue
}

// Skip whitespace
if isWhitespace(p.input[i]) {
continue
}

// If we are at the top level and find the next object, we are done
if level == 0 && foundRootValue {
switch p.input[i] {
case ',', '}', ']', ':', '{', '[':
break Loop
}
}

switch p.input[i] {
// Keep track of the nesting level
case '{':
level++
case '}':
level--
case '[':
level++
case ']':
level--

// Start of a string
case '"':
isQuoted = true
}

foundRootValue = true
}

if level != 0 {
return nil, fmt.Errorf("expected '}' or ']' but reached end of input")
}

if isQuoted {
return nil, fmt.Errorf("expected '\"' but reached end of input")
}

if !foundRootValue {
return nil, fmt.Errorf("expected value but reached end of input")
}

return p.input[startPos:i], nil
}

func (p *FastObjParser) Parse() ([]byte, error) {
for {
if p.pos >= len(p.input) {
return nil, io.EOF
}

// Skip whitespace
if isWhitespace(p.input[p.pos]) {
p.pos++
continue
}

switch p.state {
case stateLookingForObj:
if p.input[p.pos] != '{' {
return nil, fmt.Errorf("expected '{' at position %d", p.pos)
}

p.state = stateLookingForItem

case stateLookingForItem:
if p.input[p.pos] == '}' {
p.state = stateEnd
return nil, io.EOF
}

strSlice, err := p.getValue(p.pos)
if err != nil {
return nil, err
}

p.pos += len(strSlice)
p.state = stateLookingForKeyValueSep
return strSlice, nil

case stateLookingForKeyValueSep:
if p.input[p.pos] != ':' {
return nil, fmt.Errorf("expected ':' at position %d", p.pos)
}

p.state = stateLookingForValue

case stateLookingForValue:
valueSlice, err := p.getValue(p.pos)
if err != nil {
return nil, err
}

p.pos += len(valueSlice)
p.state = stateLookingForItemSep
return valueSlice, nil

case stateLookingForItemSep:
if p.input[p.pos] == ',' {
p.state = stateLookingForItem
} else if p.input[p.pos] == '}' {
p.state = stateEnd
} else {
return nil, fmt.Errorf("expected ',' or '}' at position %d", p.pos)
}

case stateEnd:
return nil, io.EOF
}

p.pos++
}
}

func UnmarshalString(input []byte) (string, error) {
var v string
// No need to enable case sensitivity or int preservation here, as we are only unmarshalling strings.
if err := gojson.Unmarshal(input, (*string)(noescape(unsafe.Pointer(&v)))); err != nil {
return "", err
}

runtime.KeepAlive(v)

return v, nil
}

func UnmarshalInterface(input []byte) (interface{}, error) {
var v interface{}
if err := json.UnmarshalCaseSensitivePreserveInts(input, (*interface{})(noescape(unsafe.Pointer(&v)))); err != nil {
return "", err
}

runtime.KeepAlive(v)

return v, nil
}

// Create a read-only byte array from a string
func StringToReadOnlyByteSlice(s string) []byte {
// Get StringHeader from string
stringHeader := (*reflect.StringHeader)(unsafe.Pointer(&s))

// Construct SliceHeader with capacity equal to the length
sliceHeader := reflect.SliceHeader{Data: stringHeader.Data, Len: stringHeader.Len, Cap: stringHeader.Len}

// Convert SliceHeader to a byte slice
return *(*[]byte)(unsafe.Pointer(&sliceHeader))
}
Loading

0 comments on commit 4f4fbff

Please sign in to comment.