Skip to content

Commit

Permalink
feat: streaming encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
tdakkota committed Jan 25, 2023
1 parent 644d74e commit 57ff135
Show file tree
Hide file tree
Showing 17 changed files with 1,192 additions and 0 deletions.
2 changes: 2 additions & 0 deletions internal/byteseq/byteseq.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
// Package byteseq provides a Byteseq type that can be used to represent a sequence of bytes.
package byteseq

import "unicode/utf8"

// Byteseq is common interface for byte slices and strings.
type Byteseq interface {
string | []byte
}
Expand Down
76 changes: 76 additions & 0 deletions stream/enc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package stream

import "io"

// Encoder writes json tokens to given writer.
type Encoder[W io.Writer] struct {
w writer[W]
indent int // count of spaces for single indentation level

// first handles state for comma and indentation writing.
//
// New Object or Array appends new level to this slice, and
// last element of this slice denotes whether first element was written.
//
// We write commas only before non-first element of Array or Object.
//
// See comma, begin, end and FieldStart for implementation details.
//
// Note: probably, this can be optimized as bit set to ease memory
// consumption.
//
// See https://yourbasic.org/algorithms/your-basic-int/#simple-sets
first []bool
}

// NewEncoder returns new Encoder that writes to given writer.
func NewEncoder[W io.Writer](w W) *Encoder[W] {
return &Encoder[W]{
w: writer[W]{
writer: w,
buf: make([]byte, 0, 512),
},
}
}

// Close flushes all buffered data to underlying writer.
func (e *Encoder[W]) Close() error {
e.w.flush()
return e.w.flushErr
}

// SetIdent sets length of single indentation step.
func (e *Encoder[W]) SetIdent(n int) {
e.indent = n
}

// Reset resets underlying buffer.
func (e *Encoder[W]) Reset(w W) {
e.w.Reset(w)
e.first = e.first[:0]
}

// RawStr writes string as raw json.
func (e *Encoder[W]) RawStr(v string) bool {
return e.comma() || e.w.writeString(v)
}

// Raw writes byte slice as raw json.
func (e *Encoder[W]) Raw(b []byte) bool {
return e.comma() || e.w.writeBytes(b...)
}

func (e *Encoder[W]) writeIndent() bool {
if e.indent == 0 {
return false
}
if e.w.writeByte('\n') {
return true
}
for i := 0; i < len(e.first)*e.indent; i++ {
if e.w.writeByte(' ') {
return true
}
}
return false
}
40 changes: 40 additions & 0 deletions stream/enc_arr.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package stream

// ArrStart writes start of array, performing indentation if needed.
//
// Use Arr as convenience helper for writing arrays.
func (e *Encoder[W]) ArrStart() bool {
if e.comma() || e.w.writeByte('[') {
return true
}
e.begin()
return e.writeIndent()
}

// ArrEnd writes end of array, performing indentation if needed.
//
// Use Arr as convenience helper for writing arrays.
func (e *Encoder[W]) ArrEnd() bool {
e.end()
return e.writeIndent() ||
e.w.writeByte(']')
}

// ArrEmpty writes empty array.
func (e *Encoder[W]) ArrEmpty() bool {
return e.comma() ||
e.ArrStart() ||
e.ArrEnd()
}

// Arr writes start of array, invokes callback and writes end of array.
//
// If callback is nil, writes empty array.
func (e *Encoder[W]) Arr(f func(e *Encoder[W]) bool) bool {
if f == nil {
return e.ArrEmpty()
}
return e.ArrStart() ||
f(e) ||
e.ArrEnd()
}
50 changes: 50 additions & 0 deletions stream/enc_b64.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package stream

import "github.com/segmentio/asm/base64"

// Base64 encodes data as standard base64 encoded string.
//
// Same as encoding/json, base64.StdEncoding or RFC 4648.
func (e *Encoder[W]) Base64(data []byte) bool {
if data == nil {
return e.Null()
}

if e.comma() || e.w.writeByte('"') {
return true
}

encodedLen := base64.StdEncoding.EncodedLen(len(data))
buf := e.w.buf
switch {
case encodedLen <= cap(buf):
// Case 2: There is enough space in the buffer after flushing.
if e.w.flush() {
return true
}
fallthrough
case len(buf)+encodedLen <= cap(buf):
// Case 1: There is enough space in the buffer.
base64.StdEncoding.Encode(buf[len(buf):len(buf)+encodedLen], data)
e.w.buf = buf[:len(buf)+encodedLen]
default:
// Case 3: There is not enough space in the buffer.
// We need to flush the buffer and then write the encoded data directly.
if e.w.flush() {
return true
}

// StdEncoding includes padding, so we can't just split the data into chunks.
// FIXME(tdakkota): Is there a way to avoid this allocation?
// If we can't, we should at least use a pool.
// Or use streaming encoder from stdlib.
// Or remove this method.
r := make([]byte, encodedLen)
base64.StdEncoding.Encode(r, data)
if e.w.writeBytes(r...) {
return true
}
}

return e.w.writeByte('"')
}
19 changes: 19 additions & 0 deletions stream/enc_bool.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package stream

// True writes true.
func (e *Encoder[W]) True() bool {
return e.comma() || e.w.writeString("true")
}

// False writes false.
func (e *Encoder[W]) False() bool {
return e.comma() || e.w.writeString("false")
}

// Bool encodes boolean.
func (e *Encoder[W]) Bool(v bool) bool {
if v {
return e.True()
}
return e.False()
}
33 changes: 33 additions & 0 deletions stream/enc_comma.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package stream

// begin should be called before new Array or Object.
func (e *Encoder[W]) begin() {
e.first = append(e.first, true)
}

// end should be called after Array or Object.
func (e *Encoder[W]) end() {
if len(e.first) == 0 {
return
}
e.first = e.first[:e.current()]
}

func (e *Encoder[W]) current() int { return len(e.first) - 1 }

// comma should be called before any new value.
func (e *Encoder[W]) comma() bool {
// Writing commas.
// 1. Before every field expect first.
// 2. Before every array element except first.
if len(e.first) == 0 {
return false
}
current := e.current()
_ = e.first[current]
if e.first[current] {
e.first[current] = false
return false
}
return e.w.writeByte(',') || e.writeIndent()
}
56 changes: 56 additions & 0 deletions stream/enc_float.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package stream

import (
"math"
"strconv"
)

// Float32 encodes float32.
//
// NB: Infinities and NaN are represented as null.
func (e *Encoder[W]) Float32(v float32) bool { return e.Float(float64(v), 32) }

// Float64 encodes float64.
//
// NB: Infinities and NaN are represented as null.
func (e *Encoder[W]) Float64(v float64) bool { return e.Float(v, 64) }

// Float writes float value to buffer.
func (e *Encoder[W]) Float(v float64, bits int) bool {
if math.IsNaN(v) || math.IsInf(v, 0) {
// Like in ECMA:
// NaN and Infinity regardless of sign are represented
// as the String null.
//
// JSON.stringify({"foo":NaN}) -> {"foo":null}
return e.Null() // FIXME(tdakkota): probably, we should return false here and set error
}

// From go std sources, strconv/ftoa.go:

// Convert as if by ES6 number to string conversion.
// This matches most other JSON generators.
// See golang.org/issue/6384 and golang.org/issue/14135.
// Like fmt %g, but the exponent cutoffs are different
// and exponents themselves are not padded to two digits.
abs := math.Abs(v)
fmt := byte('f')
// Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right.
if abs != 0 {
if bits == 64 && (abs < 1e-6 || abs >= 1e21) || bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
fmt = 'e'
}
}

b := make([]byte, 0, 32) // FIXME(tdakkota): use e.w.buf?
b = strconv.AppendFloat(b, v, fmt, -1, bits)
if fmt == 'e' {
// clean up e-09 to e-9
n := len(b)
if n >= 4 && b[n-4] == 'e' && b[n-3] == '-' && b[n-2] == '0' {
b[n-2] = b[n-1]
b = b[:n-1]
}
}
return e.comma() || e.w.writeBytes(b...)
}
Loading

0 comments on commit 57ff135

Please sign in to comment.