Skip to content

Commit

Permalink
encoding/json: encode struct field names ahead of time
Browse files Browse the repository at this point in the history
Struct field names are static, so we can run HTMLEscape on them when
building each struct type encoder. Then, when running the struct
encoder, we can select either the original or the escaped field name to
write directly.

When the encoder is not escaping HTML, using the original string works
because neither Go struct field names nor JSON tags allow any characters
that would need to be escaped, like '"', '\\', or '\n'.

When the encoder is escaping HTML, the only difference is that '<', '>',
and '&' are allowed via JSON struct field tags, hence why we use
HTMLEscape to properly escape them.

All of the above lets us encode field names with a simple if/else and
WriteString calls, which are considerably simpler and faster than
encoding an arbitrary string.

While at it, also include the quotes and colon in these strings, to
avoid three WriteByte calls in the loop hot path.

Also added a few tests, to ensure that the behavior in these edge cases
is not broken. The output of the tests is the same if this optimization
is reverted.

name           old time/op    new time/op    delta
CodeEncoder-4    7.12ms ± 0%    6.14ms ± 0%  -13.85%  (p=0.004 n=6+5)

name           old speed      new speed      delta
CodeEncoder-4   272MB/s ± 0%   316MB/s ± 0%  +16.08%  (p=0.004 n=6+5)

name           old alloc/op   new alloc/op   delta
CodeEncoder-4    91.9kB ± 0%    93.2kB ± 0%   +1.43%  (p=0.002 n=6+6)

name           old allocs/op  new allocs/op  delta
CodeEncoder-4      0.00           0.00          ~     (all equal)

Updates #5683.

Change-Id: I6f6a340d0de4670799ce38cf95b2092822d2e3ef
Reviewed-on: https://go-review.googlesource.com/122460
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
  • Loading branch information
mvdan committed Aug 21, 2018
1 parent e7f59f0 commit 2d3599e
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/encoding/json/decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ var (
umstructXY = ustructText{unmarshalerText{"x", "y"}}

ummapType = map[unmarshalerText]bool{}
ummapXY = map[unmarshalerText]bool{unmarshalerText{"x", "y"}: true}
ummapXY = map[unmarshalerText]bool{{"x", "y"}: true}
)

// Test data structures for anonymous fields.
Expand Down
27 changes: 23 additions & 4 deletions src/encoding/json/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -641,8 +641,11 @@ func (se *structEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) {
} else {
e.WriteByte(',')
}
e.string(f.name, opts.escapeHTML)
e.WriteByte(':')
if opts.escapeHTML {
e.WriteString(f.nameEscHTML)
} else {
e.WriteString(f.nameNonEsc)
}
opts.quoted = f.quoted
se.fieldEncs[i](e, fv, opts)
}
Expand Down Expand Up @@ -1036,6 +1039,9 @@ type field struct {
nameBytes []byte // []byte(name)
equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent

nameNonEsc string // `"` + name + `":`
nameEscHTML string // `"` + HTMLEscape(name) + `":`

tag bool
index []int
typ reflect.Type
Expand Down Expand Up @@ -1086,6 +1092,9 @@ func typeFields(t reflect.Type) []field {
// Fields found.
var fields []field

// Buffer to run HTMLEscape on field names.
var nameEscBuf bytes.Buffer

for len(next) > 0 {
current, next = next, current[:0]
count, nextCount = nextCount, map[reflect.Type]int{}
Expand Down Expand Up @@ -1152,14 +1161,24 @@ func typeFields(t reflect.Type) []field {
if name == "" {
name = sf.Name
}
fields = append(fields, fillField(field{
field := fillField(field{
name: name,
tag: tagged,
index: index,
typ: ft,
omitEmpty: opts.Contains("omitempty"),
quoted: quoted,
}))
})

// Build nameEscHTML and nameNonEsc ahead of time.
nameEscBuf.Reset()
nameEscBuf.WriteString(`"`)
HTMLEscape(&nameEscBuf, field.nameBytes)
nameEscBuf.WriteString(`":`)
field.nameEscHTML = nameEscBuf.String()
field.nameNonEsc = `"` + field.name + `":`

fields = append(fields, field)
if count[f.typ] > 1 {
// If there were multiple instances, add a second,
// so that the annihilation code will see a duplicate.
Expand Down
15 changes: 15 additions & 0 deletions src/encoding/json/encode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -995,3 +995,18 @@ func TestMarshalPanic(t *testing.T) {
Marshal(&marshalPanic{})
t.Error("Marshal should have panicked")
}

func TestMarshalUncommonFieldNames(t *testing.T) {
v := struct {
A0, À, int
}{}
b, err := Marshal(v)
if err != nil {
t.Fatal("Marshal:", err)
}
want := `{"A0":0,"À":0,"Aβ":0}`
got := string(b)
if got != want {
t.Fatalf("Marshal: got %s want %s", got, want)
}
}
9 changes: 9 additions & 0 deletions src/encoding/json/stream_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ func TestEncoderIndent(t *testing.T) {
func TestEncoderSetEscapeHTML(t *testing.T) {
var c C
var ct CText
var tagStruct struct {
Valid int `json:"<>&#! "`
Invalid int `json:"\\"`
}
for _, tt := range []struct {
name string
v interface{}
Expand All @@ -102,6 +106,11 @@ func TestEncoderSetEscapeHTML(t *testing.T) {
{"c", c, `"\u003c\u0026\u003e"`, `"<&>"`},
{"ct", ct, `"\"\u003c\u0026\u003e\""`, `"\"<&>\""`},
{`"<&>"`, "<&>", `"\u003c\u0026\u003e"`, `"<&>"`},
{
"tagStruct", tagStruct,
`{"\u003c\u003e\u0026#! ":0,"Invalid":0}`,
`{"<>&#! ":0,"Invalid":0}`,
},
} {
var buf bytes.Buffer
enc := NewEncoder(&buf)
Expand Down

0 comments on commit 2d3599e

Please sign in to comment.