Skip to content

Commit

Permalink
sql: add width_bucket builtin
Browse files Browse the repository at this point in the history
Implements the width_bucket() builtin function

Details on the Postgres implementation can be found here:
https://www.postgresql.org/docs/11/functions-math.html

Resolves #38855

Release note (sql change): add the width_bucket builtin function.
  • Loading branch information
kevinbarbour committed Aug 2, 2019
1 parent ca9184d commit d64c8e1
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 0 deletions.
8 changes: 8 additions & 0 deletions docs/generated/sql/functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,14 @@ has no relationship with the commit order of concurrent transactions.</p>
<tr><td><code>trunc(val: <a href="decimal.html">decimal</a>) &rarr; <a href="decimal.html">decimal</a></code></td><td><span class="funcdesc"><p>Truncates the decimal values of <code>val</code>.</p>
</span></td></tr>
<tr><td><code>trunc(val: <a href="float.html">float</a>) &rarr; <a href="float.html">float</a></code></td><td><span class="funcdesc"><p>Truncates the decimal values of <code>val</code>.</p>
</span></td></tr>
<tr><td><code>width_bucket(operand: <a href="decimal.html">decimal</a>, b1: <a href="decimal.html">decimal</a>, b2: <a href="decimal.html">decimal</a>, count: <a href="int.html">int</a>) &rarr; <a href="int.html">int</a></code></td><td><span class="funcdesc"><p>return the bucket number to which operand would be assigned in a histogram having count equal-width buckets spanning the range b1 to b2.</p>
</span></td></tr>
<tr><td><code>width_bucket(operand: <a href="float.html">float</a>, b1: <a href="float.html">float</a>, b2: <a href="float.html">float</a>, count: <a href="int.html">int</a>) &rarr; <a href="int.html">int</a></code></td><td><span class="funcdesc"><p>return the bucket number to which operand would be assigned in a histogram having count equal-width buckets spanning the range b1 to b2.</p>
</span></td></tr>
<tr><td><code>width_bucket(operand: <a href="int.html">int</a>, b1: <a href="int.html">int</a>, b2: <a href="int.html">int</a>, count: <a href="int.html">int</a>) &rarr; <a href="int.html">int</a></code></td><td><span class="funcdesc"><p>return the bucket number to which operand would be assigned in a histogram having count equal-width buckets spanning the range b1 to b2.</p>
</span></td></tr>
<tr><td><code>width_bucket(operand: anyelement, thresholds: anyelement[]) &rarr; <a href="int.html">int</a></code></td><td><span class="funcdesc"><p>return the bucket number to which operand would be assigned given an array listing the lower bounds of the buckets; returns 0 for an input less than the first lower bound; the thresholds array must be sorted, smallest first, or unexpected results will be obtained</p>
</span></td></tr></tbody>
</table>

Expand Down
25 changes: 25 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/builtin_function
Original file line number Diff line number Diff line change
Expand Up @@ -2246,3 +2246,28 @@ query BBB
SELECT pg_type_is_visible('int'::regtype), pg_type_is_visible(NULL), pg_type_is_visible(99999)
----
true NULL NULL


# Tests for width_bucket builtin
query I
SELECT width_bucket(8.0, 2.0, 3.0, 5)
----
6

query I
SELECT width_bucket(5.35, 0.024, 10.06, 5)
----
3

query I
SELECT width_bucket(7, 3, 11, 5)
----
3

query I
SELECT width_bucket(now(), array['yesterday', 'today', 'tomorrow']::timestamptz[])
----
2

query error pq: width_bucket\(\): Operand and thresholds must be of the same type
SELECT width_bucket(1, array['a', 'h', 'l', 'z']);
87 changes: 87 additions & 0 deletions pkg/sql/sem/builtins/builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -2348,6 +2348,74 @@ may increase either contention or retry errors, or both.`,
}, "Truncates the decimal values of `val`."),
),

"width_bucket": makeBuiltin(defProps(),
tree.Overload{
Types: tree.ArgTypes{{"operand", types.Float}, {"b1", types.Float},
{"b2", types.Float}, {"count", types.Int}},
ReturnType: tree.FixedReturnType(types.Int),
Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) {
operand := float64(*args[0].(*tree.DFloat))
b1 := float64(*args[1].(*tree.DFloat))
b2 := float64(*args[2].(*tree.DFloat))
count := int(tree.MustBeDInt(args[3]))
return tree.NewDInt(tree.DInt(widthBucket(operand, b1, b2, count))), nil
},
Info: "return the bucket number to which operand would be assigned in a histogram having count " +
"equal-width buckets spanning the range b1 to b2.",
},
tree.Overload{
Types: tree.ArgTypes{{"operand", types.Decimal}, {"b1", types.Decimal},
{"b2", types.Decimal}, {"count", types.Int}},
ReturnType: tree.FixedReturnType(types.Int),
Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) {
operand, _ := args[0].(*tree.DDecimal).Float64()
b1, _ := args[1].(*tree.DDecimal).Float64()
b2, _ := args[2].(*tree.DDecimal).Float64()
count := int(tree.MustBeDInt(args[3]))
return tree.NewDInt(tree.DInt(widthBucket(operand, b1, b2, count))), nil
},
Info: "return the bucket number to which operand would be assigned in a histogram having count " +
"equal-width buckets spanning the range b1 to b2.",
},
tree.Overload{
Types: tree.ArgTypes{{"operand", types.Int}, {"b1", types.Int},
{"b2", types.Int}, {"count", types.Int}},
ReturnType: tree.FixedReturnType(types.Int),
Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) {
operand := float64(tree.MustBeDInt(args[0]))
b1 := float64(tree.MustBeDInt(args[1]))
b2 := float64(tree.MustBeDInt(args[2]))
count := int(tree.MustBeDInt(args[3]))
return tree.NewDInt(tree.DInt(widthBucket(operand, b1, b2, count))), nil
},
Info: "return the bucket number to which operand would be assigned in a histogram having count " +
"equal-width buckets spanning the range b1 to b2.",
},
tree.Overload{
Types: tree.ArgTypes{{"operand", types.Any}, {"thresholds", types.AnyArray}},
ReturnType: tree.FixedReturnType(types.Int),
Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) {
operand := args[0]
thresholds := tree.MustBeDArray(args[1])

if !operand.ResolvedType().Equivalent(thresholds.Array[0].ResolvedType()) {
return tree.NewDInt(0), errors.New("Operand and thresholds must be of the same type")
}

for i, v := range thresholds.Array {
if operand.Compare(ctx, v) < 0 {
return tree.NewDInt(tree.DInt(i)), nil
}
}

return tree.NewDInt(tree.DInt(thresholds.Len())), nil
},
Info: "return the bucket number to which operand would be assigned given an array listing the " +
"lower bounds of the buckets; returns 0 for an input less than the first lower bound; the " +
"thresholds array must be sorted, smallest first, or unexpected results will be obtained",
},
),

// Array functions.

"string_to_array": makeBuiltin(arrayPropsNullableArgs(),
Expand Down Expand Up @@ -4654,6 +4722,25 @@ func rpad(s string, length int, fill string) (string, error) {
return buf.String(), nil
}

// widthBucket returns the bucket number to which operand would be assigned in a histogram having count
// equal-width buckets spanning the range b1 to b2
func widthBucket(operand float64, b1 float64, b2 float64, count int) int {
bucket := 0
if (b1 < b2 && operand > b2) || (b1 > b2 && operand < b2) {
return count + 1
}

if (b1 < b2 && operand < b1) || (b1 > b2 && operand > b1) {
return 0
}

width := (b2 - b1) / float64(count)
difference := operand - b1
bucket = int(math.Floor(difference/width) + 1)

return bucket
}

// CleanEncodingName sanitizes the string meant to represent a
// recognized encoding. This ignores any non-alphanumeric character.
//
Expand Down
28 changes: 28 additions & 0 deletions pkg/sql/sem/builtins/builtins_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,3 +234,31 @@ func TestLPadRPad(t *testing.T) {
}
}
}

func TestFloatWidthBucket(t *testing.T) {
testCases := []struct {
operand float64
b1 float64
b2 float64
count int
expected int
}{
{0.5, 2, 3, 5, 0},
{8, 2, 3, 5, 6},
{1.5, 1, 3, 2, 1},
{5.35, 0.024, 10.06, 5, 3},
{-3.0, -5, 5, 10, 3},
{1, 1, 10, 2, 1}, // minimum should be inclusive
{10, 1, 10, 2, 3}, // maximum should be exclusive
{4, 10, 1, 4, 3},
{11, 10, 1, 4, 0},
{0, 10, 1, 4, 5},
}

for _, tc := range testCases {
got := widthBucket(tc.operand, tc.b1, tc.b2, tc.count)
if got != tc.expected {
t.Errorf("expected %d, found %d", tc.expected, got)
}
}
}

0 comments on commit d64c8e1

Please sign in to comment.