Skip to content

Commit

Permalink
Merge pull request #70 from tdakkota/fix/int-tail-handling
Browse files Browse the repository at this point in the history
fix: handle end of the number correctly
  • Loading branch information
tdakkota authored Jan 24, 2023
2 parents 75ecdde + 03c1e5e commit 797756e
Show file tree
Hide file tree
Showing 10 changed files with 822 additions and 321 deletions.
273 changes: 109 additions & 164 deletions dec_float.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ package jx

import (
"bytes"
"io"
"math/big"
"strconv"

"github.com/go-faster/errors"
Expand All @@ -15,57 +13,35 @@ var (
)

const (
invalidCharForNumber = int8(-1)
endOfNumber = int8(-2)
dotInNumber = int8(-3)
maxFloat64 = 1<<63 - 1
dotInNumber int8 = -iota - 1
expInNumber
plusInNumber
minusInNumber
endOfNumber
invalidCharForNumber

maxFloat64 = 1<<63 - 1
)

func init() {
for i := 0; i < len(floatDigits); i++ {
floatDigits[i] = invalidCharForNumber
}
for i := int8('0'); i <= int8('9'); i++ {
floatDigits[i] = i - int8('0')
}
floatDigits[','] = endOfNumber
floatDigits[']'] = endOfNumber
floatDigits['}'] = endOfNumber
floatDigits[' '] = endOfNumber
floatDigits['\t'] = endOfNumber
floatDigits['\n'] = endOfNumber
floatDigits['.'] = dotInNumber
}

// BigFloat read big.Float
func (d *Decoder) BigFloat() (*big.Float, error) {
str, err := d.numberAppend(nil)
if err != nil {
return nil, errors.Wrap(err, "number")
}
prec := 64
if len(str) > prec {
prec = len(str)
}
val, _, err := big.ParseFloat(string(str), 10, uint(prec), big.ToZero)
if err != nil {
return nil, errors.Wrap(err, "float")
}
return val, nil
}

// BigInt read big.Int
func (d *Decoder) BigInt() (*big.Int, error) {
str, err := d.numberAppend(nil)
if err != nil {
return nil, errors.Wrap(err, "number")
}
v := big.NewInt(0)
var ok bool
if v, ok = v.SetString(string(str), 10); !ok {
return nil, errors.New("invalid")
for i := int8('0'); i <= int8('9'); i++ {
floatDigits[i] = i - int8('0')
}
return v, nil
floatDigits['.'] = dotInNumber
floatDigits['e'] = expInNumber
floatDigits['E'] = expInNumber
floatDigits['+'] = plusInNumber
floatDigits['-'] = minusInNumber
}

// Float32 reads float32 value.
Expand Down Expand Up @@ -97,19 +73,22 @@ func (d *Decoder) positiveFloat32() (float32, error) {
i++
ind := floatDigits[c]
switch ind {
case invalidCharForNumber:
return d.float32Slow()
case endOfNumber:
return 0, errors.New("empty")
case dotInNumber:
return 0, errors.New("leading dot")
case invalidCharForNumber, endOfNumber:
return 0, badToken(c, d.offset())
case dotInNumber, plusInNumber, expInNumber:
err := badToken(c, d.offset())
return 0, errors.Wrapf(err, "leading %q", c)
case minusInNumber: // minus handled by caller
err := badToken(c, d.offset())
return 0, errors.Wrap(err, "double minus")
case 0:
if i == d.tail {
return d.float32Slow()
}
c = d.buf[i]
if floatDigits[c] >= 0 {
return 0, errors.New("leading zero")
err := badToken(c, d.offset()+1)
return 0, errors.Wrap(err, "leading zero")
}
}
value := uint64(ind)
Expand All @@ -120,11 +99,11 @@ NonDecimalLoop:
ind := floatDigits[c]
switch ind {
case invalidCharForNumber:
return d.float32Slow()
return 0, badToken(c, d.offset()+i)
case endOfNumber:
d.head = i
return float32(value), nil
case dotInNumber:
case dotInNumber, expInNumber:
break NonDecimalLoop
}
if value > uint64SafeToMultiple10 {
Expand All @@ -150,8 +129,10 @@ NonDecimalLoop:
}
// too many decimal places
return d.float32Slow()
case invalidCharForNumber, dotInNumber:
case dotInNumber, expInNumber, plusInNumber, minusInNumber:
return d.float32Slow()
case invalidCharForNumber:
return 0, badToken(c, d.offset()+i)
}
decimalPlaces++
if value > uint64SafeToMultiple10 {
Expand All @@ -163,87 +144,23 @@ NonDecimalLoop:
return d.float32Slow()
}

var numberSet = [256]byte{
'+': 1,
'-': 1,
'.': 1,
'e': 1,
'E': 1,
'0': 1,
'1': 1,
'2': 1,
'3': 1,
'4': 1,
'5': 1,
'6': 1,
'7': 1,
'8': 1,
'9': 1,
}

func (d *Decoder) number() []byte {
start := d.head
buf := d.buf[d.head:d.tail]
for i, c := range buf {
if numberSet[c] == 0 {
// End of number.
d.head += i
return d.buf[start:d.head]
}
}
// Buffer is number within head:tail.
d.head = d.tail
return d.buf[start:d.tail]
}

func (d *Decoder) numberAppend(b []byte) ([]byte, error) {
for {
b = append(b, d.number()...)
if d.head != d.tail {
return b, nil
}
if err := d.read(); err != nil {
if err == io.EOF {
return b, nil
}
return b, err
}
}
}

const (
size32 = 32
size64 = 64
)

func (d *Decoder) float32Slow() (float32, error) {
v, err := d.floatSlow(size32)
if err != nil {
return 0, err
}
return float32(v), err
}

// Float64 read float64
func (d *Decoder) Float64() (float64, error) {
c, err := d.more()
if err != nil {
return 0, err
}
if floatDigits[c] >= 0 {
if c != '-' {
d.unread()
return d.positiveFloat64()
}
switch c {
case '-':
v, err := d.positiveFloat64()
if err != nil {
return 0, err
}
return -v, err
default:
return 0, badToken(c, d.offset())
v, err := d.positiveFloat64()
if err != nil {
return 0, err
}
if c == '-' {
v *= -1
}
return v, nil
}

func (d *Decoder) positiveFloat64() (float64, error) {
Expand All @@ -256,20 +173,22 @@ func (d *Decoder) positiveFloat64() (float64, error) {
i++
ind := floatDigits[c]
switch ind {
case invalidCharForNumber:
return d.float64Slow()
case endOfNumber:
return 0, errors.New("empty")
case dotInNumber:
return 0, errors.New("leading dot")
case invalidCharForNumber, endOfNumber:
return 0, badToken(c, d.offset())
case dotInNumber, plusInNumber, expInNumber:
err := badToken(c, d.offset())
return 0, errors.Wrapf(err, "leading %q", c)
case minusInNumber: // minus handled by caller
err := badToken(c, d.offset())
return 0, errors.Wrap(err, "double minus")
case 0:
if i == d.tail {
return d.float64Slow()
}
c = d.buf[i]
switch c {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return 0, errors.New("leading zero")
if floatDigits[c] >= 0 {
err := badToken(c, d.offset()+1)
return 0, errors.Wrap(err, "leading zero")
}
}
value := uint64(ind)
Expand All @@ -280,11 +199,11 @@ NonDecimal:
ind := floatDigits[c]
switch ind {
case invalidCharForNumber:
return d.float64Slow()
return 0, badToken(c, d.offset()+i)
case endOfNumber:
d.head = i
return float64(value), nil
case dotInNumber:
case dotInNumber, expInNumber:
break NonDecimal
}
if value > uint64SafeToMultiple10 {
Expand All @@ -310,8 +229,10 @@ NonDecimal:
}
// too many decimal places
return d.float64Slow()
case invalidCharForNumber, dotInNumber:
case dotInNumber, expInNumber, plusInNumber, minusInNumber:
return d.float64Slow()
case invalidCharForNumber:
return 0, badToken(c, d.offset()+i)
}
decimalPlaces++
// Not checking for uint64SafeToMultiple10 here because
Expand All @@ -326,46 +247,29 @@ NonDecimal:
return d.float64Slow()
}

func (d *Decoder) float64Slow() (float64, error) { return d.floatSlow(size64) }

func validateFloat(str []byte) error {
// strconv.ParseFloat is not validating `1.` or `1.e1`
if len(str) == 0 {
return errors.New("empty")
}
if str[0] == '-' {
return errors.New("double minus")
}
if len(str) >= 2 && str[0] == '0' {
switch str[1] {
case 'e', 'E', '.':
default:
return errors.New("leading zero")
}
}
dotPos := bytes.IndexByte(str, '.')
if dotPos != -1 {
if dotPos == len(str)-1 {
return errors.New("dot as last char")
}
switch str[dotPos+1] {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
default:
return errors.New("no digit after dot")
}
func (d *Decoder) float32Slow() (float32, error) {
v, err := d.floatSlow(32)
if err != nil {
return 0, err
}
return nil
return float32(v), err
}

func (d *Decoder) float64Slow() (float64, error) { return d.floatSlow(64) }

func (d *Decoder) floatSlow(size int) (float64, error) {
var buf [32]byte
var (
buf [32]byte
offset = d.offset()
)

str, err := d.numberAppend(buf[:0])
if err != nil {
return 0, errors.Wrap(err, "number")
}
if err := validateFloat(str); err != nil {
return 0, errors.Wrap(err, "invalid")

if err := validateFloat(str, offset); err != nil {
return 0, err
}

val, err := strconv.ParseFloat(string(str), size)
Expand All @@ -375,3 +279,44 @@ func (d *Decoder) floatSlow(size int) (float64, error) {

return val, nil
}

func validateFloat(str []byte, offset int) error {
// strconv.ParseFloat is not validating `1.` or `1.e1`
if len(str) == 0 {
// FIXME(tdakkota): use io.ErrUnexpectedEOF?
return errors.New("empty")
}

switch c := str[0]; floatDigits[c] {
case dotInNumber, plusInNumber, expInNumber:
err := badToken(c, offset)
return errors.Wrapf(err, "leading %q", c)
case minusInNumber: // minus handled by caller
err := badToken(c, offset)
return errors.Wrap(err, "double minus")
case 0:
if len(str) >= 2 {
switch str[1] {
case 'e', 'E', '.':
default:
err := badToken(str[1], offset+1)
return errors.Wrap(err, "leading zero")
}
}
}

dotPos := bytes.IndexByte(str, '.')
if dotPos != -1 {
if dotPos == len(str)-1 {
// FIXME(tdakkota): use io.ErrUnexpectedEOF?
return errors.New("dot as last char")
}
switch c := str[dotPos+1]; c {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
default:
err := badToken(c, offset+dotPos+1)
return errors.Wrap(err, "no digit after dot")
}
}
return nil
}
Loading

0 comments on commit 797756e

Please sign in to comment.