diff --git a/.github/workflows/push-check-benchmark.yml b/.github/workflows/benchmark.yml similarity index 94% rename from .github/workflows/push-check-benchmark.yml rename to .github/workflows/benchmark.yml index 72ba8d853..199fa3a1f 100644 --- a/.github/workflows/push-check-benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -1,4 +1,4 @@ -name: Pull Request Check +name: Pull Request Benchmark on: pull_request diff --git a/.github/workflows/push-check-go-windows.yml b/.github/workflows/push-check-go-windows.yml deleted file mode 100644 index 2b2f91166..000000000 --- a/.github/workflows/push-check-go-windows.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: Push Check Go Windows - -on: push - -jobs: - build: - strategy: - matrix: - go-version: [1.15.x, 1.16.x, 1.17.x, 1.18.x] - os: [windows-latest] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v2 - - - name: Set up Go - uses: actions/setup-go@v3 - with: - go-version: ${{ matrix.go-version }} - - - name: Unit Test - run: go test -v -gcflags -d=checkptr=0 -covermode atomic -coverprofile coverage.out ./... - env: - GOMAXPROCS: 4 - - - name: Generic Test - run: go test -v -gcflags -d=checkptr=0 -covermode atomic ./generic_test \ No newline at end of file diff --git a/.github/workflows/push-check-go115.yml b/.github/workflows/push-check-go115.yml deleted file mode 100644 index d4502b887..000000000 --- a/.github/workflows/push-check-go115.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: Push Check Go1.15 - -on: push - -jobs: - build: - runs-on: self-hosted - steps: - - uses: actions/checkout@v2 - - - name: Set up Go - uses: actions/setup-go@v2 - with: - go-version: 1.15 - - - uses: actions/cache@v2 - with: - path: ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-go- - - - name: Unit Test - run: GOMAXPROCS=4 go test -v -gcflags=-d=checkptr=0 -covermode=atomic -coverprofile=coverage.out ./... diff --git a/.github/workflows/push-check-go116.yml b/.github/workflows/push-check-go116.yml deleted file mode 100644 index a962f08fb..000000000 --- a/.github/workflows/push-check-go116.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: Push Check Go1.16 - -on: push - -jobs: - build: - runs-on: self-hosted - steps: - - uses: actions/checkout@v2 - - - name: Set up Go - uses: actions/setup-go@v2 - with: - go-version: 1.16 - - - uses: actions/cache@v2 - with: - path: ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-go- - - - name: Unit Test - run: GOMAXPROCS=4 go test -v -gcflags=-d=checkptr=0 -race -covermode=atomic -coverprofile=coverage.out ./... diff --git a/.github/workflows/push-check-go118.yml b/.github/workflows/push-check-linux-go118.yml similarity index 79% rename from .github/workflows/push-check-go118.yml rename to .github/workflows/push-check-linux-go118.yml index 8519ef440..2d4a5dd50 100644 --- a/.github/workflows/push-check-go118.yml +++ b/.github/workflows/push-check-linux-go118.yml @@ -21,7 +21,7 @@ jobs: ${{ runner.os }}-go- - name: Unit Test - run: GOMAXPROCS=4 go test -v -gcflags=-d=checkptr=0 -race -covermode=atomic -coverprofile=coverage.out ./... + run: GOMAXPROCS=4 go test -v -gcflags=-d=checkptr=0 -race ./... - name: Generic Test - run: go test -v -gcflags=-d=checkptr=0 -race -covermode=atomic ./generic_test + run: go test -v -gcflags=-d=checkptr=0 -race ./generic_test diff --git a/.github/workflows/push-check-go117.yml b/.github/workflows/push-check-linux.yml similarity index 65% rename from .github/workflows/push-check-go117.yml rename to .github/workflows/push-check-linux.yml index 0ff0782a6..de8ac972a 100644 --- a/.github/workflows/push-check-go117.yml +++ b/.github/workflows/push-check-linux.yml @@ -1,9 +1,12 @@ -name: Push Check Go1.17 +name: Push Check Linux on: push jobs: build: + strategy: + matrix: + go-version: [1.15.x, 1.16.x, 1.17.x] runs-on: self-hosted steps: - uses: actions/checkout@v2 @@ -11,7 +14,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v2 with: - go-version: 1.17 + go-version: ${{ matrix.go-version }} - uses: actions/cache@v2 with: @@ -21,4 +24,4 @@ jobs: ${{ runner.os }}-go- - name: Unit Test - run: GOMAXPROCS=4 go test -v -gcflags=-d=checkptr=0 -race -covermode=atomic -coverprofile=coverage.out ./... + run: GOMAXPROCS=4 go test -v -gcflags=-d=checkptr=0 ./... diff --git a/.github/workflows/push-check-windows.yml b/.github/workflows/push-check-windows.yml new file mode 100644 index 000000000..85b4efb07 --- /dev/null +++ b/.github/workflows/push-check-windows.yml @@ -0,0 +1,26 @@ +# name: Push Check Go Windows + +# on: push + +# jobs: +# build: +# strategy: +# matrix: +# go-version: [1.15.x, 1.16.x, 1.17.x, 1.18.x] +# os: [windows-latest] +# runs-on: ${{ matrix.os }} +# steps: +# - uses: actions/checkout@v2 + +# - name: Set up Go +# uses: actions/setup-go@v3 +# with: +# go-version: ${{ matrix.go-version }} + +# - name: Unit Test +# run: go test -v -gcflags -d=checkptr=0 -covermode atomic -coverprofile coverage.out ./... +# env: +# GOMAXPROCS: 4 + +# - name: Generic Test +# run: go test -v -gcflags -d=checkptr=0 -covermode atomic ./generic_test \ No newline at end of file diff --git a/README.md b/README.md index 99d11bf74..07a5a0c8e 100644 --- a/README.md +++ b/README.md @@ -97,9 +97,36 @@ var data YourSchema // Marshal output, err := sonic.Marshal(&data) // Unmarshal -err := sonic.Unmarshal(output, &data) +err := sonic.Unmarshal(output, &data) ``` +### Streaming IO +Sonic supports to decode json from `io.Reader` or encode objects into `io.Writer`, aiming at handling multiple values as well as reducing memory consuming. +- encoder +```go +import "github.com/bytedance/sonic/encoder" + +var o1 = map[string]interface{}{ + "a": "b" +} +var o2 = 1 +var w = bytes.NewBuffer(nil) +var enc = encoder.NewStreamEncoder(w) +enc.Encode(o) +println(w1.String()) // "{\"a\":\"b\"}\n1" +``` +- decoder +```go +import "github.com/bytedance/sonic/decoder" + +var o = map[string]interface{}{} +var r = strings.NewReader(`{"a":"b"}{"1":"2"}`) +var dec = decoder.NewStreamDecoder(r) +dec.Decode(&o) +dec.Decode(&o) +fmt.Printf("%+v", o) // map[1:2 a:b] +``` + ### Use Number/Use Int64 ```go import "github.com/bytedance/sonic/decoder" diff --git a/bench.sh b/bench.sh index 7b1344d7e..d72ff5e1b 100644 --- a/bench.sh +++ b/bench.sh @@ -4,17 +4,15 @@ pwd=$(pwd) export SONIC_NO_ASYNC_GC=1 cd $pwd/encoder -go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkEncoder_Generic_Sonic|BenchmarkEncoder_Generic_Sonic_Fast|BenchmarkEncoder_Generic_JsonIter|BenchmarkEncoder_Generic_GoJson|BenchmarkEncoder_Generic_StdLib|BenchmarkEncoder_Binding_Sonic|BenchmarkEncoder_Binding_Sonic_Fast|BenchmarkEncoder_Binding_JsonIter|BenchmarkEncoder_Binding_GoJson|BenchmarkEncoder_Binding_StdLib|BenchmarkEncoder_Parallel_Generic_Sonic|BenchmarkEncoder_Parallel_Generic_Sonic_Fast|BenchmarkEncoder_Parallel_Generic_JsonIter|BenchmarkEncoder_Parallel_Generic_GoJson|BenchmarkEncoder_Parallel_Generic_StdLib|BenchmarkEncoder_Parallel_Binding_Sonic|BenchmarkEncoder_Parallel_Binding_Sonic_Fast|BenchmarkEncoder_Parallel_Binding_JsonIter|BenchmarkEncoder_Parallel_Binding_GoJson|BenchmarkEncoder_Parallel_Binding_StdLib)$" +go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkEncoder_.*)$" cd $pwd/decoder -go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkDecoder_Generic_Sonic|BenchmarkDecoder_Generic_Sonic_Fast|BenchmarkDecoder_Generic_StdLib|BenchmarkDecoder_Generic_JsonIter|BenchmarkDecoder_Generic_GoJson|BenchmarkDecoder_Binding_Sonic|BenchmarkDecoder_Binding_Sonic_Fast|BenchmarkDecoder_Binding_StdLib|BenchmarkDecoder_Binding_JsonIter|BenchmarkDecoder_Binding_GoJson|BenchmarkDecoder_Parallel_Generic_Sonic|BenchmarkDecoder_Parallel_Generic_Sonic_Fast|BenchmarkDecoder_Parallel_Generic_StdLib|BenchmarkDecoder_Parallel_Generic_JsonIter|BenchmarkDecoder_Parallel_Generic_GoJson|BenchmarkDecoder_Parallel_Binding_Sonic|BenchmarkDecoder_Parallel_Binding_Sonic_Fast|BenchmarkDecoder_Parallel_Binding_StdLib|BenchmarkDecoder_Parallel_Binding_JsonIter|BenchmarkDecoder_Parallel_Binding_GoJson)$" +go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkDecoder_.*)$" cd $pwd/ast -go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkGetOne_Sonic|BenchmarkGetOne_Gjson|BenchmarkGetOne_Jsoniter|BenchmarkGetOne_Parallel_Sonic|BenchmarkGetOne_Parallel_Gjson|BenchmarkGetOne_Parallel_Jsoniter|BenchmarkSetOne_Sonic|BenchmarkSetOne_Sjson|BenchmarkSetOne_Jsoniter|BenchmarkSetOne_Parallel_Sonic|BenchmarkSetOne_Parallel_Sjson|BenchmarkSetOne_Parallel_Jsoniter)$" +go test -benchmem -run=^$ -benchtime=100000x -bench "^(BenchmarkGet.*|BenchmarkEncode.*)$" -go test -benchmem -run=^$ -benchtime=10000x -bench "^(BenchmarkParser_Sonic|BenchmarkParser_Gjson|BenchmarkParser_JsonIter|BenchmarkParser_Parallel_Sonic|BenchmarkParser_Parallel_Gjson|BenchmarkParser_Parallel_StdLib|BenchmarkParser_Parallel_JsonIter|BenchmarkParseOne_Sonic|BenchmarkParseOne_Gjson|BenchmarkParseOne_Jsoniter|BenchmarkParseOne_Parallel_Sonic|BenchmarkParseOne_Parallel_Gjson|BenchmarkParseOne_Parallel_Jsoniter|BenchmarkParseSeven_Sonic|BenchmarkParseSeven_Gjson|BenchmarkParseSeven_Jsoniter|BenchmarkParseSeven_Parallel_Sonic|BenchmarkParseSeven_Parallel_Gjson|BenchmarkParseSeven_Parallel_Jsoniter)$" - -go test -benchmem -run=^$ -benchtime=100000x -bench '^(BenchmarkEncodeRaw|BenchmarkEncodeSkip|BenchmarkEncodeLoad)$' +go test -benchmem -run=^$ -benchtime=10000x -bench "^(BenchmarkParser_.*)$" go test -benchmem -run=^$ -benchtime=10000000x -bench "^(BenchmarkNodeGetByPath|BenchmarkStructGetByPath|BenchmarkNodeIndex|BenchmarkStructIndex|BenchmarkSliceIndex|BenchmarkMapIndex|BenchmarkNodeGet|BenchmarkSliceGet|BenchmarkMapGet|BenchmarkNodeSet|BenchmarkMapSet|BenchmarkNodeSetByIndex|BenchmarkSliceSetByIndex|BenchmarkStructSetByIndex|BenchmarkNodeUnset|BenchmarkMapUnset|BenchmarkNodUnsetByIndex|BenchmarkSliceUnsetByIndex|BenchmarkNodeAdd|BenchmarkSliceAdd|BenchmarkMapAdd)$" diff --git a/decoder/decoder.go b/decoder/decoder.go index 122a35e94..d6fadda2d 100644 --- a/decoder/decoder.go +++ b/decoder/decoder.go @@ -54,6 +54,12 @@ func (self *Decoder) Pos() int { return self.i } +func (self *Decoder) Reset(s string) { + self.s = s + self.i = 0 + // self.f = 0 +} + // Decode parses the JSON-encoded data from current position and stores the result // in the value pointed to by val. func (self *Decoder) Decode(val interface{}) error { @@ -75,9 +81,6 @@ func (self *Decoder) Decode(val interface{}) error { nb, err := decodeTypedPointer(self.s, self.i, etp, vp, sb, self.f) /* return the stack back */ - if err != nil { - resetStack(sb) - } self.i = nb freeStack(sb) diff --git a/decoder/errors.go b/decoder/errors.go index c9272a258..24d4caed2 100644 --- a/decoder/errors.go +++ b/decoder/errors.go @@ -32,6 +32,7 @@ type SyntaxError struct { Pos int Src string Code types.ParsingError + Msg string } func (self SyntaxError) Error() string { @@ -73,13 +74,20 @@ func (self SyntaxError) Description() string { return fmt.Sprintf( "Syntax error at index %d: %s\n\n\t%s\n\t%s^%s\n", self.Pos, - self.Code.Message(), + self.Message(), self.Src[p:q], strings.Repeat(".", x), strings.Repeat(".", y), ) } +func (self SyntaxError) Message() string { + if self.Msg == "" { + return self.Code.Message() + } + return self.Msg +} + func clamp_zero(v int) int { if v < 0 { return 0 diff --git a/decoder/stream.go b/decoder/stream.go new file mode 100644 index 000000000..273b4391a --- /dev/null +++ b/decoder/stream.go @@ -0,0 +1,205 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package decoder + +import ( + `bytes` + `io` + `sync` + + `github.com/bytedance/sonic/internal/native/types` +) + +var ( + defaultBufferSize uint = 1024 + growSliceFactorShift uint = 1 + minLeftBufferShift uint = 2 +) + +type StreamDecoder struct { + r io.Reader + buf []byte + scanp int + scanned int64 + err error + Decoder +} + +var bufPool = sync.Pool{ + New: func () interface{} { + return make([]byte, 0, defaultBufferSize) + }, +} + +// NewStreamDecoder adapts to encoding/json.NewDecoder API. +// +// NewStreamDecoder returns a new decoder that reads from r. +func NewStreamDecoder(r io.Reader) *StreamDecoder { + return &StreamDecoder{r : r} +} + +// Decode decodes input stream into val with corresponding data. +// Redundantly bytes may be read and left in its buffer, and can be used at next call. +// Either io error from underlying io.Reader (except io.EOF) +// or syntax error from data will be recorded and stop subsequently decoding. +func (self *StreamDecoder) Decode(val interface{}) (err error) { + if self.err != nil { + return self.err + } + + var buf = self.buf[self.scanp:] + var p = 0 + var recycle bool + if cap(buf) == 0 { + buf = bufPool.Get().([]byte) + recycle = true + } + + var first = true + var repeat = true +read_more: + for { + l := len(buf) + realloc(&buf) + n, err := self.r.Read(buf[l:cap(buf)]) + buf = buf[:l+n] + if err != nil { + repeat = false + if err == io.EOF { + break + } + self.err = err + return err + } + if n > 0 || first { + break + } + } + first = false + + if len(buf) > 0 { + self.Decoder.Reset(string(buf)) + err = self.Decoder.Decode(val) + if err != nil { + if ee, ok := err.(SyntaxError); repeat && ok && ee.Code == types.ERR_EOF { + goto read_more + } + self.err = err + } + + p = self.Decoder.Pos() + self.scanned += int64(p) + self.scanp = 0 + } + + if len(buf) > p { + // remain undecoded bytes, so copy them into self.buf + self.buf = append(self.buf[:0], buf[p:]...) + } else { + self.buf = nil + recycle = true + } + + if recycle { + buf = buf[:0] + bufPool.Put(buf) + } + return err +} + +// InputOffset returns the input stream byte offset of the current decoder position. +// The offset gives the location of the end of the most recently returned token and the beginning of the next token. +func (self *StreamDecoder) InputOffset() int64 { + return self.scanned + int64(self.scanp) +} + +// Buffered returns a reader of the data remaining in the Decoder's buffer. +// The reader is valid until the next call to Decode. +func (self *StreamDecoder) Buffered() io.Reader { + return bytes.NewReader(self.buf[self.scanp:]) +} + +// More reports whether there is another element in the +// current array or object being parsed. +func (self *StreamDecoder) More() bool { + if self.err != nil { + return false + } + c, err := self.peek() + return err == nil && c != ']' && c != '}' +} + +func (self *StreamDecoder) peek() (byte, error) { + var err error + for { + for i := self.scanp; i < len(self.buf); i++ { + c := self.buf[i] + if isSpace(c) { + continue + } + self.scanp = i + return c, nil + } + // buffer has been scanned, now report any error + if err != nil { + if err != io.EOF { + self.err = err + } + return 0, err + } + err = self.refill() + } +} + +func isSpace(c byte) bool { + return types.SPACE_MASK & (1 << c) != 0 +} + +func (self *StreamDecoder) refill() error { + // Make room to read more into the buffer. + // First slide down data already consumed. + if self.scanp > 0 { + self.scanned += int64(self.scanp) + n := copy(self.buf, self.buf[self.scanp:]) + self.buf = self.buf[:n] + self.scanp = 0 + } + + // Grow buffer if not large enough. + realloc(&self.buf) + + // Read. Delay error for next iteration (after scan). + n, err := self.r.Read(self.buf[len(self.buf):cap(self.buf)]) + self.buf = self.buf[0 : len(self.buf)+n] + + return err +} + +func realloc(buf *[]byte) { + l := uint(len(*buf)) + c := uint(cap(*buf)) + if c - l <= c >> minLeftBufferShift { + e := l+(l>>minLeftBufferShift) + if e < defaultBufferSize { + e = defaultBufferSize + } + tmp := make([]byte, l, e) + copy(tmp, *buf) + *buf = tmp + } +} + diff --git a/decoder/stream_test.go b/decoder/stream_test.go new file mode 100644 index 000000000..a18fde35f --- /dev/null +++ b/decoder/stream_test.go @@ -0,0 +1,374 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package decoder + +import ( + `encoding/json` + `io` + `io/ioutil` + `strings` + `testing` + + jsoniter `github.com/json-iterator/go` + `github.com/stretchr/testify/assert` + `github.com/stretchr/testify/require` +) + +var ( + _Single_JSON = `{"aaaaa":"` + strings.Repeat("b",1024) + `"} { ` + _Double_JSON = `{"aaaaa":"` + strings.Repeat("b",1024) + `"} {"11111":"` + strings.Repeat("2",1024) + `"}` + _Triple_JSON = `{"aaaaa":"` + strings.Repeat("b",1024) + `"}{ } {"11111":"` + + strings.Repeat("2",1024)+`"} b {}` +) + +func TestDecodeSingle(t *testing.T) { + var str = _Single_JSON + + var r1 = strings.NewReader(str) + var v1 map[string]interface{} + var d1 = jsoniter.NewDecoder(r1) + var r2 = strings.NewReader(str) + var v2 map[string]interface{} + var d2 = NewStreamDecoder(r2) + + require.Equal(t, d1.More(), d2.More()) + es1 := d1.Decode(&v1) + ee1 := d2.Decode(&v2) + assert.Equal(t, es1, ee1) + assert.Equal(t, v1, v2) + // assert.Equal(t, d1.InputOffset(), d2.InputOffset()) + + require.Equal(t, d1.More(), d2.More()) + es3 := d1.Decode(&v1) + assert.NotNil(t, es3) + ee3 := d2.Decode(&v2) + assert.NotNil(t, ee3) + // assert.Equal(t, d1.InputOffset(), d2.InputOffset()) +} + +func TestDecodeMulti(t *testing.T) { + var str = _Triple_JSON + + var r1 = strings.NewReader(str) + var v1 map[string]interface{} + var d1 = jsoniter.NewDecoder(r1) + var r2 = strings.NewReader(str) + var v2 map[string]interface{} + var d2 = NewStreamDecoder(r2) + + require.Equal(t, d1.More(), d2.More()) + es1 := d1.Decode(&v1) + ee1 := d2.Decode(&v2) + assert.Equal(t, es1, ee1) + assert.Equal(t, v1, v2) + // assert.Equal(t, d1.InputOffset(), d2.InputOffset()) + + require.Equal(t, d1.More(), d2.More()) + es4 := d1.Decode(&v1) + ee4 := d2.Decode(&v2) + assert.Equal(t, es4, ee4) + assert.Equal(t, v1, v2) + // assert.Equal(t, d1.InputOffset(), d2.InputOffset()) + + require.Equal(t, d1.More(), d2.More()) + es2 := d1.Decode(&v1) + ee2 := d2.Decode(&v2) + assert.Equal(t, es2, ee2) + assert.Equal(t, v1, v2) + // assert.Equal(t, d1.InputOffset(), d2.InputOffset()) + // fmt.Printf("v:%#v\n", v1) + + require.Equal(t, d1.More(), d2.More()) + es3 := d1.Decode(&v1) + assert.NotNil(t, es3) + ee3 := d2.Decode(&v2) + assert.NotNil(t, ee3) + + require.Equal(t, d1.More(), d2.More()) + es5 := d1.Decode(&v1) + assert.NotNil(t, es5) + ee5 := d2.Decode(&v2) + assert.NotNil(t, ee5) +} + +type HaltReader struct { + halts map[int]bool + buf string + p int +} + +func NewHaltReader(buf string, halts map[int]bool) *HaltReader { + return &HaltReader{ + halts: halts, + buf: buf, + p: 0, + } +} + +func (self *HaltReader) Read(p []byte) (int, error) { + t := 0 + for ; t < len(p); { + if self.p >= len(self.buf) { + return t, io.EOF + } + if b, ok := self.halts[self.p]; b { + self.halts[self.p] = false + return t, nil + } else if ok { + delete(self.halts, self.p) + return 0, nil + } + p[t] = self.buf[self.p] + self.p++ + t++ + } + return t, nil +} + +func (self *HaltReader) Reset(buf string) { + self.p = 0 + self.buf = buf +} + +var testHalts = func () map[int]bool { + return map[int]bool{ + 1: true, + 10:true, + 20: true} +} + +func TestDecodeHalt(t *testing.T) { + var str = _Triple_JSON + var r1 = NewHaltReader(str, testHalts()) + var r2 = NewHaltReader(str, testHalts()) + var v1 map[string]interface{} + var v2 map[string]interface{} + var d1 = jsoniter.NewDecoder(r1) + var d2 = NewStreamDecoder(r2) + + require.Equal(t, d1.More(), d2.More()) + err1 := d1.Decode(&v1) + err2 := d2.Decode(&v2) + assert.Equal(t, err1, err2) + assert.Equal(t, v1, v2) + // assert.Equal(t, d1.InputOffset(), d2.InputOffset()) + + require.Equal(t, d1.More(), d2.More()) + es4 := d1.Decode(&v1) + ee4 := d2.Decode(&v2) + assert.Equal(t, es4, ee4) + assert.Equal(t, v1, v2) + // assert.Equal(t, d1.InputOffset(), d2.InputOffset()) + + require.Equal(t, d1.More(), d2.More()) + es2 := d1.Decode(&v1) + ee2 := d2.Decode(&v2) + assert.Equal(t, es2, ee2) + assert.Equal(t, v1, v2) + // assert.Equal(t, d1.InputOffset(), d2.InputOffset()) + + require.Equal(t, d1.More(), d2.More()) + es3 := d1.Decode(&v1) + assert.NotNil(t, es3) + ee3 := d2.Decode(&v2) + assert.NotNil(t, ee3) + + require.Equal(t, d1.More(), d2.More()) + es5 := d1.Decode(&v1) + assert.NotNil(t, es5) + ee5 := d2.Decode(&v2) + assert.NotNil(t, ee5) +} + +func TestBuffered(t *testing.T) { + var str = _Triple_JSON + var r1 = NewHaltReader(str, testHalts()) + var v1 map[string]interface{} + var d1 = json.NewDecoder(r1) + require.Nil(t, d1.Decode(&v1)) + var r2 = NewHaltReader(str, testHalts()) + var v2 map[string]interface{} + var d2 = NewStreamDecoder(r2) + require.Nil(t, d2.Decode(&v2)) + left1, err1 := ioutil.ReadAll(d1.Buffered()) + require.Nil(t, err1) + left2, err2 := ioutil.ReadAll(d2.Buffered()) + require.Nil(t, err2) + require.Equal(t, d1.InputOffset(), d2.InputOffset()) + min := len(left1) + if min > len(left2) { + min = len(left2) + } + require.Equal(t, left1[:min], left2[:min]) + + es4 := d1.Decode(&v1) + ee4 := d2.Decode(&v2) + assert.Equal(t, es4, ee4) + assert.Equal(t, d1.InputOffset(), d2.InputOffset()) + + es2 := d1.Decode(&v1) + ee2 := d2.Decode(&v2) + assert.Equal(t, es2, ee2) + assert.Equal(t, d1.InputOffset(), d2.InputOffset()) +} + +func TestMore(t *testing.T) { + var str = _Triple_JSON + var r2 = NewHaltReader(str, testHalts()) + var v2 map[string]interface{} + var d2 = NewStreamDecoder(r2) + var r1 = NewHaltReader(str, testHalts()) + var v1 map[string]interface{} + var d1 = jsoniter.NewDecoder(r1) + require.Nil(t, d1.Decode(&v1)) + require.Nil(t, d2.Decode(&v2)) + require.Equal(t, d1.More(), d2.More()) + + es4 := d1.Decode(&v1) + ee4 := d2.Decode(&v2) + assert.Equal(t, es4, ee4) + assert.Equal(t, v1, v2) + require.Equal(t, d1.More(), d2.More()) + + es2 := d1.Decode(&v1) + ee2 := d2.Decode(&v2) + assert.Equal(t, es2, ee2) + assert.Equal(t, v1, v2) + require.Equal(t, d1.More(), d2.More()) + + es3 := d1.Decode(&v1) + assert.NotNil(t, es3) + ee3 := d2.Decode(&v2) + assert.NotNil(t, ee3) + require.Equal(t, d1.More(), d2.More()) + + es5 := d1.Decode(&v1) + assert.NotNil(t, es5) + ee5 := d2.Decode(&v2) + assert.NotNil(t, ee5) + require.Equal(t, d1.More(), d2.More()) +} + +func BenchmarkDecodeStream_Std(b *testing.B) { + b.Run("single", func (b *testing.B) { + var str = _Single_JSON + for i:=0; i 0 { + n, err = enc.w.Write(out) + out = out[n:] + if err != nil { + goto free_bytes + } + } + + // according to standard library, terminate each value with a newline... + enc.w.Write([]byte{'\n'}) + } + +free_bytes: + freeBytes(out) + return err +} \ No newline at end of file diff --git a/encoder/stream_test.go b/encoder/stream_test.go new file mode 100644 index 000000000..a1fd6c299 --- /dev/null +++ b/encoder/stream_test.go @@ -0,0 +1,190 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package encoder + +import ( + `bytes` + `encoding/json` + `strings` + `testing` + + jsoniter `github.com/json-iterator/go` + `github.com/stretchr/testify/require` +) + + func TestEncodeStream(t *testing.T) { + var o = map[string]interface{}{ + "a": "<>", + "b": json.RawMessage(" [ ] "), + } + var w1 = bytes.NewBuffer(nil) + var w2 = bytes.NewBuffer(nil) + var enc1 = json.NewEncoder(w1) + var enc2 = NewStreamEncoder(w2) + enc2.SetEscapeHTML(true) + enc2.SortKeys() + enc2.SetCompactMarshaler(true) + + require.Nil(t, enc1.Encode(o)) + require.Nil(t, enc2.Encode(o)) + require.Equal(t, w1.String(), w2.String()) + + enc1.SetEscapeHTML(true) + enc2.SetEscapeHTML(true) + enc1.SetIndent("你好", "\b") + enc2.SetIndent("你好", "\b") + require.Nil(t, enc1.Encode(o)) + require.Nil(t, enc2.Encode(o)) + require.Equal(t, w1.String(), w2.String()) + + enc1.SetEscapeHTML(false) + enc2.SetEscapeHTML(false) + enc1.SetIndent("", "") + enc2.SetIndent("", "") + require.Nil(t, enc1.Encode(o)) + require.Nil(t, enc2.Encode(o)) + require.Equal(t, w1.String(), w2.String()) + } + + func BenchmarkEncodeStream_Sonic(b *testing.B) { + var o = map[string]interface{}{ + "a": `<`+strings.Repeat("1", 1024)+`>`, + "b": json.RawMessage(` [ `+strings.Repeat(" ", 1024)+` ] `), + } + + b.Run("single", func(b *testing.B){ + var w = bytes.NewBuffer(nil) + var enc = NewStreamEncoder(w) + b.ResetTimer() + for i:=0; i`, + "b": json.RawMessage(` [ `+strings.Repeat(" ", 1024)+` ] `), + } + + b.Run("single", func(b *testing.B){ + var w = bytes.NewBuffer(nil) + var enc = json.NewEncoder(w) + b.ResetTimer() + for i:=0; i`, + "b": json.RawMessage(` [ `+strings.Repeat(" ", 1024)+` ] `), + } + + b.Run("single", func(b *testing.B){ + var w = bytes.NewBuffer(nil) + var jt = jsoniter.Config{ + ValidateJsonRawMessage: true, + }.Froze() + var enc = jt.NewEncoder(w) + b.ResetTimer() + for i:=0; i