Skip to content

Commit

Permalink
Rework package sam/expr/coerce (#5086)
Browse files Browse the repository at this point in the history
Rework the coerce package so that it uses native values instead of their
byte representation.
  • Loading branch information
mattnibs authored Mar 26, 2024
1 parent 05fdd90 commit 19b2eb5
Show file tree
Hide file tree
Showing 8 changed files with 318 additions and 386 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ require (
github.com/x448/float16 v0.8.4
github.com/yuin/goldmark v1.4.13
go.uber.org/zap v1.23.0
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
golang.org/x/sync v0.4.0
golang.org/x/sys v0.13.0
golang.org/x/term v0.13.0
Expand Down Expand Up @@ -67,7 +68,6 @@ require (
go.opentelemetry.io/otel v0.16.0 // indirect
go.uber.org/atomic v1.7.0 // indirect
go.uber.org/multierr v1.8.0 // indirect
golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect
golang.org/x/mod v0.13.0 // indirect
golang.org/x/net v0.17.0 // indirect
golang.org/x/tools v0.14.0 // indirect
Expand Down
6 changes: 1 addition & 5 deletions runtime/sam/expr/agg/math.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ type mathReducer struct {
function *anymath.Function
hasval bool
math consumer
pair coerce.Pair
}

var _ Function = (*mathReducer)(nil)
Expand All @@ -47,10 +46,7 @@ func (m *mathReducer) consumeVal(val zed.Value) {
var id int
if m.math != nil {
var err error
// XXX We're not using the value coercion parts of coerce.Pair here.
// Would be better if coerce had a function that just compared types
// and returned the type to coerce to.
id, err = m.pair.Coerce(zed.NewValue(m.math.typ(), nil), val)
id, err = coerce.Promote(zed.NewValue(m.math.typ(), nil), val)
if err != nil {
// Skip invalid values.
return
Expand Down
180 changes: 42 additions & 138 deletions runtime/sam/expr/coerce/coerce.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,158 +2,59 @@ package coerce

import (
"bytes"
"errors"
"math"

"github.com/brimdata/zed"
"github.com/brimdata/zed/pkg/byteconv"
"github.com/brimdata/zed/runtime/sam/expr/result"
"github.com/brimdata/zed/zcode"
"github.com/brimdata/zed/zson"
"golang.org/x/exp/constraints"
)

var Overflow = errors.New("integer overflow: uint64 value too large for int64")
var IncompatibleTypes = errors.New("incompatible types")

// XXX Named types should probably be preserved according to the rank
// of the underlying number type.

// Pair provides a buffer to decode values into while doing comparisons
// so the same buffers can be reused on each call without zcode.Bytes buffers
// escaping to GC. This method uses the zed.AppendInt(), zed.AppendUint(),
// etc to encode zcode.Bytes as an in-place slice instead of allocating
// new slice buffers for every value created.
type Pair struct {
// a and b point to inputs that can't change
A zcode.Bytes
B zcode.Bytes
// Buffer is a scratch buffer that stays around between calls and is the
// landing place for either the a or b value if one of them needs to
// be coerced (you never need to coerce both). Then we point a or b
// at buf and let go of the other input pointer.
result.Buffer
buf2 result.Buffer
}

func (c *Pair) Equal() bool {
// bytes.Equal() returns true for nil compared to an empty-slice,
// which doesn't work for Zed null comparisons, so we explicitly check
// for the nil condition here.
if c.A == nil {
return c.B == nil
}
if c.B == nil {
return c.A == nil
}
return bytes.Equal(c.A, c.B)
}

func (c *Pair) Coerce(a, b zed.Value) (int, error) {
a, b = a.Under(), b.Under()
c.A = a.Bytes()
c.B = b.Bytes()
aid := a.Type().ID()
bid := b.Type().ID()
if aid == bid {
return aid, nil
}
if aid == zed.IDNull {
return bid, nil
}
if bid == zed.IDNull {
return aid, nil
}
if zed.IsNumber(aid) {
if !zed.IsNumber(bid) {
return 0, IncompatibleTypes
func Equal(a, b zed.Value) bool {
if a.IsNull() {
return b.IsNull()
} else if b.IsNull() {
// We know a isn't null.
return false
}
switch aid, bid := a.Type().ID(), b.Type().ID(); {
case !zed.IsNumber(aid) || !zed.IsNumber(bid):
return aid == bid && bytes.Equal(a.Bytes(), b.Bytes())
case zed.IsFloat(aid):
return a.Float() == ToNumeric[float64](b)
case zed.IsFloat(bid):
return b.Float() == ToNumeric[float64](a)
case zed.IsSigned(aid):
av := a.Int()
if zed.IsUnsigned(bid) {
return uint64(av) == b.Uint() && av >= 0
}
id, ok := c.coerceNumbers(aid, bid)
if !ok {
return 0, Overflow
return av == b.Int()
case zed.IsSigned(bid):
bv := b.Int()
if zed.IsUnsigned(aid) {
return uint64(bv) == a.Uint() && bv >= 0
}
return id, nil
}
return 0, IncompatibleTypes
}

func intToFloat(id int, b zcode.Bytes) float64 {
if zed.IsSigned(id) {
return float64(zed.DecodeInt(b))
}
return float64(zed.DecodeUint(b))
}

func (c *Pair) promoteToSigned(in zcode.Bytes) (zcode.Bytes, bool) {
v := zed.DecodeUint(in)
if v > math.MaxInt64 {
return nil, false
return bv == a.Int()
default:
return a.Uint() == b.Uint()
}
return c.Int(int64(v)), true
}

func (c *Pair) promoteToUnsigned(in zcode.Bytes) (zcode.Bytes, bool) {
v := zed.DecodeInt(in)
if v < 0 {
return nil, false
}
return c.Uint(uint64(v)), true
}

func (c *Pair) coerceNumbers(aid, bid int) (int, bool) {
if zed.IsFloat(aid) {
if aid == zed.IDFloat16 {
c.A = c.buf2.Float64(float64(zed.DecodeFloat16(c.A)))
} else if aid == zed.IDFloat32 {
c.A = c.buf2.Float64(float64(zed.DecodeFloat32(c.A)))
}
c.B = c.Float64(intToFloat(bid, c.B))
return aid, true
}
if zed.IsFloat(bid) {
if bid == zed.IDFloat16 {
c.B = c.buf2.Float64(float64(zed.DecodeFloat16(c.B)))
} else if bid == zed.IDFloat32 {
c.B = c.buf2.Float64(float64(zed.DecodeFloat32(c.B)))
}
c.A = c.Float64(intToFloat(aid, c.A))
return bid, true
}
aIsSigned := zed.IsSigned(aid)
if aIsSigned == zed.IsSigned(bid) {
// They have the same signed-ness. Promote to the wider
// type by rank and leave the zcode.Bytes as is since
// the varint encoding is the same for all the widths.
// Width increasese with type ID.
id := aid
if bid > id {
id = bid
}
return id, true
}
id := promoteInt(aid, bid)

// Otherwise, we'll promote mixed signed-ness to signed unless
// the unsigned value is greater than signed maxint, in which
// case, we report an overflow error.
var ok bool
if aIsSigned {
c.B, ok = c.promoteToSigned(c.B)
} else {
c.A, ok = c.promoteToSigned(c.A)
}
if !ok {
// We got overflow trying to turn the unsigned to signed,
// so try turning the signed into unsigned.
if aIsSigned {
c.A, ok = c.promoteToUnsigned(c.A)
} else {
c.B, ok = c.promoteToUnsigned(c.B)
}
id = zed.IDUint64
func ToNumeric[T constraints.Integer | constraints.Float](val zed.Value) T {
val = val.Under()
switch id := val.Type().ID(); {
case zed.IsUnsigned(id):
return T(val.Uint())
case zed.IsSigned(id):
return T(val.Int())
case zed.IsFloat(id):
return T(val.Float())
}
return id, ok
panic(zson.FormatValue(val))
}

func ToFloat(val zed.Value) (float64, bool) {
val = val.Under()
switch id := val.Type().ID(); {
case zed.IsUnsigned(id):
return float64(val.Uint()), true
Expand All @@ -169,6 +70,7 @@ func ToFloat(val zed.Value) (float64, bool) {
}

func ToUint(val zed.Value) (uint64, bool) {
val = val.Under()
switch id := val.Type().ID(); {
case zed.IsUnsigned(id):
return val.Uint(), true
Expand All @@ -188,6 +90,7 @@ func ToUint(val zed.Value) (uint64, bool) {
}

func ToInt(val zed.Value) (int64, bool) {
val = val.Under()
switch id := val.Type().ID(); {
case zed.IsUnsigned(id):
return int64(val.Uint()), true
Expand All @@ -204,6 +107,7 @@ func ToInt(val zed.Value) (int64, bool) {
}

func ToBool(val zed.Value) (bool, bool) {
val = val.Under()
if val.IsString() {
v, err := byteconv.ParseBool(val.Bytes())
return v, err == nil
Expand Down
85 changes: 74 additions & 11 deletions runtime/sam/expr/coerce/promote.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,83 @@
package coerce

import (
"errors"
"math"

"github.com/brimdata/zed"
)

var promote = []int{
var ErrIncompatibleTypes = errors.New("incompatible types")
var ErrOverflow = errors.New("integer overflow: uint64 value too large for int64")

func Promote(a, b zed.Value) (int, error) {
a, b = a.Under(), b.Under()
aid, bid := a.Type().ID(), b.Type().ID()
switch {
case aid == bid:
return aid, nil
case aid == zed.IDNull:
return bid, nil
case bid == zed.IDNull:
return aid, nil
case !zed.IsNumber(aid) || !zed.IsNumber(bid):
return 0, ErrIncompatibleTypes
case zed.IsFloat(aid):
if !zed.IsFloat(bid) {
bid = promoteFloat[bid]
}
case zed.IsFloat(bid):
if !zed.IsFloat(aid) {
aid = promoteFloat[aid]
}
case zed.IsSigned(aid):
if zed.IsUnsigned(bid) {
if b.Uint() > math.MaxInt64 {
return 0, ErrOverflow
}
bid = promoteInt[bid]
}
case zed.IsSigned(bid):
if zed.IsUnsigned(aid) {
if a.Uint() > math.MaxInt64 {
return 0, ErrOverflow
}
aid = promoteInt[aid]
}
}
if aid > bid {
return aid, nil
}
return bid, nil
}

var promoteFloat = []int{
zed.IDFloat16, // IDUint8 = 0
zed.IDFloat16, // IDUint16 = 1
zed.IDFloat32, // IDUint32 = 2
zed.IDFloat64, // IDUint64 = 3
zed.IDFloat128, // IDUint128 = 4
zed.IDFloat256, // IDUint256 = 5
zed.IDFloat16, // IDInt8 = 6
zed.IDFloat16, // IDInt16 = 7
zed.IDFloat32, // IDInt32 = 8
zed.IDFloat64, // IDInt64 = 9
zed.IDFloat128, // IDInt128 = 10
zed.IDFloat256, // IDInt256 = 11
zed.IDFloat64, // IDDuration = 12
zed.IDFloat64, // IDTime = 13
zed.IDFloat16, // IDFloat16 = 14
zed.IDFloat32, // IDFloat32 = 15
zed.IDFloat64, // IDFloat64 = 16
zed.IDFloat128, // IDFloat64 = 17
zed.IDFloat256, // IDFloat64 = 18
zed.IDFloat32, // IDDecimal32 = 19
zed.IDFloat64, // IDDecimal64 = 20
zed.IDFloat128, // IDDecimal128 = 21
zed.IDFloat256, // IDDecimal256 = 22
}

var promoteInt = []int{
zed.IDInt8, // IDUint8 = 0
zed.IDInt16, // IDUint16 = 1
zed.IDInt32, // IDUint32 = 2
Expand All @@ -29,13 +102,3 @@ var promote = []int{
zed.IDDecimal128, // IDDecimal128 = 21
zed.IDDecimal256, // IDDecimal256 = 22
}

// promoteInt promotes type to the largest signed type where the IDs must both
// satisfy zed.IsNumber.
func promoteInt(aid, bid int) int {
id := promote[aid]
if bid := promote[bid]; bid > id {
id = bid
}
return id
}
Loading

0 comments on commit 19b2eb5

Please sign in to comment.