From 48599ad32ae13cae8959eee5cf9f35b0c5f2f6c1 Mon Sep 17 00:00:00 2001
From: Kevin Barbour
Date: Fri, 2 Aug 2019 10:12:59 -0400
Subject: [PATCH] sql: add width_bucket builtin
Implements the width_bucket() builtin function
Details on the Postgres implementation can be found here:
https://www.postgresql.org/docs/11/functions-math.html
Resolves #38855
Release note (sql change): add the width_bucket builtin function.
---
docs/generated/sql/functions.md | 6 ++
.../testdata/logic_test/builtin_function | 25 +++++++
pkg/sql/sem/builtins/builtins.go | 73 +++++++++++++++++++
pkg/sql/sem/builtins/builtins_test.go | 28 +++++++
4 files changed, 132 insertions(+)
diff --git a/docs/generated/sql/functions.md b/docs/generated/sql/functions.md
index e901220c8c7a..ae0d95210273 100644
--- a/docs/generated/sql/functions.md
+++ b/docs/generated/sql/functions.md
@@ -637,6 +637,12 @@ has no relationship with the commit order of concurrent transactions.
trunc(val: decimal) → decimal | Truncates the decimal values of val .
|
trunc(val: float) → float | Truncates the decimal values of val .
+ |
+width_bucket(operand: decimal, b1: decimal, b2: decimal, count: int) → int | return the bucket number to which operand would be assigned in a histogram having count equal-width buckets spanning the range b1 to b2.
+ |
+width_bucket(operand: int, b1: int, b2: int, count: int) → int | return the bucket number to which operand would be assigned in a histogram having count equal-width buckets spanning the range b1 to b2.
+ |
+width_bucket(operand: anyelement, thresholds: anyelement[]) → int | return the bucket number to which operand would be assigned given an array listing the lower bounds of the buckets; returns 0 for an input less than the first lower bound; the thresholds array must be sorted, smallest first, or unexpected results will be obtained
|
diff --git a/pkg/sql/logictest/testdata/logic_test/builtin_function b/pkg/sql/logictest/testdata/logic_test/builtin_function
index c07ab1e00a1c..718820c156a5 100644
--- a/pkg/sql/logictest/testdata/logic_test/builtin_function
+++ b/pkg/sql/logictest/testdata/logic_test/builtin_function
@@ -2246,3 +2246,28 @@ query BBB
SELECT pg_type_is_visible('int'::regtype), pg_type_is_visible(NULL), pg_type_is_visible(99999)
----
true NULL NULL
+
+
+# Tests for width_bucket builtin
+query I
+SELECT width_bucket(8.0, 2.0, 3.0, 5)
+----
+6
+
+query I
+SELECT width_bucket(5.35, 0.024, 10.06, 5)
+----
+3
+
+query I
+SELECT width_bucket(7, 3, 11, 5)
+----
+3
+
+query I
+SELECT width_bucket(now(), array['yesterday', 'today', 'tomorrow']::timestamptz[])
+----
+2
+
+query error pq: width_bucket\(\): Operand and thresholds must be of the same type
+SELECT width_bucket(1, array['a', 'h', 'l', 'z']);
\ No newline at end of file
diff --git a/pkg/sql/sem/builtins/builtins.go b/pkg/sql/sem/builtins/builtins.go
index 456cfd23fa2d..49883a53a183 100644
--- a/pkg/sql/sem/builtins/builtins.go
+++ b/pkg/sql/sem/builtins/builtins.go
@@ -2348,6 +2348,60 @@ may increase either contention or retry errors, or both.`,
}, "Truncates the decimal values of `val`."),
),
+ "width_bucket": makeBuiltin(defProps(),
+ tree.Overload{
+ Types: tree.ArgTypes{{"operand", types.Decimal}, {"b1", types.Decimal},
+ {"b2", types.Decimal}, {"count", types.Int}},
+ ReturnType: tree.FixedReturnType(types.Int),
+ Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) {
+ operand, _ := args[0].(*tree.DDecimal).Float64()
+ b1, _ := args[1].(*tree.DDecimal).Float64()
+ b2, _ := args[2].(*tree.DDecimal).Float64()
+ count := int(tree.MustBeDInt(args[3]))
+ return tree.NewDInt(tree.DInt(widthBucket(operand, b1, b2, count))), nil
+ },
+ Info: "return the bucket number to which operand would be assigned in a histogram having count " +
+ "equal-width buckets spanning the range b1 to b2.",
+ },
+ tree.Overload{
+ Types: tree.ArgTypes{{"operand", types.Int}, {"b1", types.Int},
+ {"b2", types.Int}, {"count", types.Int}},
+ ReturnType: tree.FixedReturnType(types.Int),
+ Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) {
+ operand := float64(tree.MustBeDInt(args[0]))
+ b1 := float64(tree.MustBeDInt(args[1]))
+ b2 := float64(tree.MustBeDInt(args[2]))
+ count := int(tree.MustBeDInt(args[3]))
+ return tree.NewDInt(tree.DInt(widthBucket(operand, b1, b2, count))), nil
+ },
+ Info: "return the bucket number to which operand would be assigned in a histogram having count " +
+ "equal-width buckets spanning the range b1 to b2.",
+ },
+ tree.Overload{
+ Types: tree.ArgTypes{{"operand", types.Any}, {"thresholds", types.AnyArray}},
+ ReturnType: tree.FixedReturnType(types.Int),
+ Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) {
+ operand := args[0]
+ thresholds := tree.MustBeDArray(args[1])
+
+ if !operand.ResolvedType().Equivalent(thresholds.Array[0].ResolvedType()) {
+ return tree.NewDInt(0), errors.New("Operand and thresholds must be of the same type")
+ }
+
+ for i, v := range thresholds.Array {
+ if operand.Compare(ctx, v) < 0 {
+ return tree.NewDInt(tree.DInt(i)), nil
+ }
+ }
+
+ return tree.NewDInt(tree.DInt(thresholds.Len())), nil
+ },
+ Info: "return the bucket number to which operand would be assigned given an array listing the " +
+ "lower bounds of the buckets; returns 0 for an input less than the first lower bound; the " +
+ "thresholds array must be sorted, smallest first, or unexpected results will be obtained",
+ },
+ ),
+
// Array functions.
"string_to_array": makeBuiltin(arrayPropsNullableArgs(),
@@ -4654,6 +4708,25 @@ func rpad(s string, length int, fill string) (string, error) {
return buf.String(), nil
}
+// widthBucket returns the bucket number to which operand would be assigned in a histogram having count
+// equal-width buckets spanning the range b1 to b2
+func widthBucket(operand float64, b1 float64, b2 float64, count int) int {
+ bucket := 0
+ if (b1 < b2 && operand > b2) || (b1 > b2 && operand < b2) {
+ return count + 1
+ }
+
+ if (b1 < b2 && operand < b1) || (b1 > b2 && operand > b1) {
+ return 0
+ }
+
+ width := (b2 - b1) / float64(count)
+ difference := operand - b1
+ bucket = int(math.Floor(difference/width) + 1)
+
+ return bucket
+}
+
// CleanEncodingName sanitizes the string meant to represent a
// recognized encoding. This ignores any non-alphanumeric character.
//
diff --git a/pkg/sql/sem/builtins/builtins_test.go b/pkg/sql/sem/builtins/builtins_test.go
index fc6ba638c90f..cef0032ba90f 100644
--- a/pkg/sql/sem/builtins/builtins_test.go
+++ b/pkg/sql/sem/builtins/builtins_test.go
@@ -234,3 +234,31 @@ func TestLPadRPad(t *testing.T) {
}
}
}
+
+func TestFloatWidthBucket(t *testing.T) {
+ testCases := []struct {
+ operand float64
+ b1 float64
+ b2 float64
+ count int
+ expected int
+ }{
+ {0.5, 2, 3, 5, 0},
+ {8, 2, 3, 5, 6},
+ {1.5, 1, 3, 2, 1},
+ {5.35, 0.024, 10.06, 5, 3},
+ {-3.0, -5, 5, 10, 3},
+ {1, 1, 10, 2, 1}, // minimum should be inclusive
+ {10, 1, 10, 2, 3}, // maximum should be exclusive
+ {4, 10, 1, 4, 3},
+ {11, 10, 1, 4, 0},
+ {0, 10, 1, 4, 5},
+ }
+
+ for _, tc := range testCases {
+ got := widthBucket(tc.operand, tc.b1, tc.b2, tc.count)
+ if got != tc.expected {
+ t.Errorf("expected %d, found %d", tc.expected, got)
+ }
+ }
+}