-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve string encoding by following json approach #1350
Changes from 1 commit
414a7b5
044300b
e12743b
85815f6
9165693
f974d60
c00d33c
dc239f1
ced79e2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,8 +23,10 @@ | |
import ( | ||
"encoding/base64" | ||
"math" | ||
"reflect" | ||
"time" | ||
"unicode/utf8" | ||
"unsafe" | ||
|
||
"go.uber.org/zap/buffer" | ||
"go.uber.org/zap/internal/bufferpool" | ||
|
@@ -486,67 +488,65 @@ | |
// Unlike the standard library's encoder, it doesn't attempt to protect the | ||
// user from browser vulnerabilities or JSONP-related problems. | ||
func (enc *jsonEncoder) safeAddString(s string) { | ||
for i := 0; i < len(s); { | ||
if enc.tryAddRuneSelf(s[i]) { | ||
i++ | ||
continue | ||
} | ||
r, size := utf8.DecodeRuneInString(s[i:]) | ||
if enc.tryAddRuneError(r, size) { | ||
i++ | ||
continue | ||
} | ||
enc.buf.AppendString(s[i : i+size]) | ||
i += size | ||
} | ||
enc.safeAddByteString(*(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{ | ||
Data: (*reflect.StringHeader)(unsafe.Pointer(&s)).Data, | ||
Len: len(s), | ||
Cap: len(s), | ||
}))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Putting aside that this is decidedly Not Safe (in a function called safeAddString), starting Go 1.20, the above isn't the best way for an unsafe string to byte slice conversion. It's better to now do: |
||
} | ||
|
||
// safeAddByteString is no-alloc equivalent of safeAddString(string(s)) for s []byte. | ||
func (enc *jsonEncoder) safeAddByteString(s []byte) { | ||
start := 0 | ||
for i := 0; i < len(s); { | ||
if enc.tryAddRuneSelf(s[i]) { | ||
if s[i] < utf8.RuneSelf { | ||
if s[i] >= 0x20 && s[i] != '\\' && s[i] != '"' { | ||
i++ | ||
continue | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Very nice! This right here is the performance win. I'm in favor of such a change, but I would prefer if we could do this without the unsafe. |
||
} | ||
|
||
enc.buf.AppendByteV(s[start:i]...) | ||
|
||
switch s[i] { | ||
case '\\', '"': | ||
enc.buf.AppendByte('\\') | ||
enc.buf.AppendByte(s[i]) | ||
case '\n': | ||
enc.buf.AppendByte('\\') | ||
enc.buf.AppendByte('n') | ||
case '\r': | ||
enc.buf.AppendByte('\\') | ||
enc.buf.AppendByte('r') | ||
case '\t': | ||
enc.buf.AppendByte('\\') | ||
enc.buf.AppendByte('t') | ||
default: | ||
// Encode bytes < 0x20, except for the escape sequences above. | ||
enc.buf.AppendString(`\u00`) | ||
enc.buf.AppendByte(_hex[s[i]>>4]) | ||
enc.buf.AppendByte(_hex[s[i]&0xF]) | ||
} | ||
|
||
i++ | ||
start = i | ||
continue | ||
} | ||
|
||
enc.buf.AppendByteV(s[start:i]...) | ||
|
||
r, size := utf8.DecodeRune(s[i:]) | ||
if enc.tryAddRuneError(r, size) { | ||
i++ | ||
start = i | ||
continue | ||
} | ||
enc.buf.Write(s[i : i+size]) | ||
i += size | ||
start = i | ||
} | ||
} | ||
|
||
// tryAddRuneSelf appends b if it is valid UTF-8 character represented in a single byte. | ||
func (enc *jsonEncoder) tryAddRuneSelf(b byte) bool { | ||
if b >= utf8.RuneSelf { | ||
return false | ||
} | ||
if b >= 0x20 && b != '\\' && b != '"' { | ||
enc.buf.AppendByte(b) | ||
return true | ||
} | ||
switch b { | ||
case '\\', '"': | ||
enc.buf.AppendByte('\\') | ||
enc.buf.AppendByte(b) | ||
case '\n': | ||
enc.buf.AppendByte('\\') | ||
enc.buf.AppendByte('n') | ||
case '\r': | ||
enc.buf.AppendByte('\\') | ||
enc.buf.AppendByte('r') | ||
case '\t': | ||
enc.buf.AppendByte('\\') | ||
enc.buf.AppendByte('t') | ||
default: | ||
// Encode bytes < 0x20, except for the escape sequences above. | ||
enc.buf.AppendString(`\u00`) | ||
enc.buf.AppendByte(_hex[b>>4]) | ||
enc.buf.AppendByte(_hex[b&0xF]) | ||
} | ||
return true | ||
// add remaining | ||
enc.buf.AppendByteV(s[start:]...) | ||
} | ||
|
||
func (enc *jsonEncoder) tryAddRuneError(r rune, size int) bool { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should probably be named
AppendBytes
and take a[]byte
, not a vararg. (Also, the docstring is inaccurate.)(FYI, there's also Buffer.Write which does the same, while satisfying io.Writer, but there's no problem with also having this method. However, if we have both, maybe Write should call AppendBytes.)