From 48599ad32ae13cae8959eee5cf9f35b0c5f2f6c1 Mon Sep 17 00:00:00 2001
From: Kevin Barbour <barbourkd@vcu.edu>
Date: Fri, 2 Aug 2019 10:12:59 -0400
Subject: [PATCH] sql: add width_bucket builtin

Implements the width_bucket() builtin function

Details on the Postgres implementation can be found here:
https://www.postgresql.org/docs/11/functions-math.html

Resolves #38855

Release note (sql change): add the width_bucket builtin function.
---
 docs/generated/sql/functions.md               |  6 ++
 .../testdata/logic_test/builtin_function      | 25 +++++++
 pkg/sql/sem/builtins/builtins.go              | 73 +++++++++++++++++++
 pkg/sql/sem/builtins/builtins_test.go         | 28 +++++++
 4 files changed, 132 insertions(+)
diff --git a/docs/generated/sql/functions.md b/docs/generated/sql/functions.md
index e901220c8c7a..ae0d95210273 100644
--- a/docs/generated/sql/functions.md
+++ b/docs/generated/sql/functions.md
@@ -637,6 +637,12 @@ has no relationship with the commit order of concurrent transactions.</p>
 <tr><td><code>trunc(val: <a href="decimal.html">decimal</a>) &rarr; <a href="decimal.html">decimal</a></code></td><td><span class="funcdesc"><p>Truncates the decimal values of <code>val</code>.</p>
 </span></td></tr>
 <tr><td><code>trunc(val: <a href="float.html">float</a>) &rarr; <a href="float.html">float</a></code></td><td><span class="funcdesc"><p>Truncates the decimal values of <code>val</code>.</p>
+</span></td></tr>
+<tr><td><code>width_bucket(operand: <a href="decimal.html">decimal</a>, b1: <a href="decimal.html">decimal</a>, b2: <a href="decimal.html">decimal</a>, count: <a href="int.html">int</a>) &rarr; <a href="int.html">int</a></code></td><td><span class="funcdesc"><p>return the bucket number to which operand would be assigned in a histogram having count equal-width buckets spanning the range b1 to b2.</p>
+</span></td></tr>
+<tr><td><code>width_bucket(operand: <a href="int.html">int</a>, b1: <a href="int.html">int</a>, b2: <a href="int.html">int</a>, count: <a href="int.html">int</a>) &rarr; <a href="int.html">int</a></code></td><td><span class="funcdesc"><p>return the bucket number to which operand would be assigned in a histogram having count equal-width buckets spanning the range b1 to b2.</p>
+</span></td></tr>
+<tr><td><code>width_bucket(operand: anyelement, thresholds: anyelement[]) &rarr; <a href="int.html">int</a></code></td><td><span class="funcdesc"><p>return the bucket number to which operand would be assigned given an array listing the lower bounds of the buckets; returns 0 for an input less than the first lower bound; the thresholds array must be sorted, smallest first, or unexpected results will be obtained</p>
 </span></td></tr></tbody>
 </table>
 
diff --git a/pkg/sql/logictest/testdata/logic_test/builtin_function b/pkg/sql/logictest/testdata/logic_test/builtin_function
index c07ab1e00a1c..718820c156a5 100644
--- a/pkg/sql/logictest/testdata/logic_test/builtin_function
+++ b/pkg/sql/logictest/testdata/logic_test/builtin_function
@@ -2246,3 +2246,28 @@ query BBB
 SELECT pg_type_is_visible('int'::regtype), pg_type_is_visible(NULL), pg_type_is_visible(99999)
 ----
 true  NULL  NULL
+
+
+# Tests for width_bucket builtin
+query I
+SELECT width_bucket(8.0, 2.0, 3.0, 5)
+----
+6
+
+query I
+SELECT width_bucket(5.35, 0.024, 10.06, 5)
+----
+3
+
+query I
+SELECT width_bucket(7, 3, 11, 5)
+----
+3
+
+query I
+SELECT width_bucket(now(), array['yesterday', 'today', 'tomorrow']::timestamptz[])
+----
+2
+
+query error pq: width_bucket\(\): Operand and thresholds must be of the same type
+SELECT width_bucket(1, array['a', 'h', 'l', 'z']);
\ No newline at end of file
diff --git a/pkg/sql/sem/builtins/builtins.go b/pkg/sql/sem/builtins/builtins.go
index 456cfd23fa2d..49883a53a183 100644
--- a/pkg/sql/sem/builtins/builtins.go
+++ b/pkg/sql/sem/builtins/builtins.go
@@ -2348,6 +2348,60 @@ may increase either contention or retry errors, or both.`,
 		}, "Truncates the decimal values of `val`."),
 	),
 
+	"width_bucket": makeBuiltin(defProps(),
+		tree.Overload{
+			Types: tree.ArgTypes{{"operand", types.Decimal}, {"b1", types.Decimal},
+				{"b2", types.Decimal}, {"count", types.Int}},
+			ReturnType: tree.FixedReturnType(types.Int),
+			Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) {
+				operand, _ := args[0].(*tree.DDecimal).Float64()
+				b1, _ := args[1].(*tree.DDecimal).Float64()
+				b2, _ := args[2].(*tree.DDecimal).Float64()
+				count := int(tree.MustBeDInt(args[3]))
+				return tree.NewDInt(tree.DInt(widthBucket(operand, b1, b2, count))), nil
+			},
+			Info: "return the bucket number to which operand would be assigned in a histogram having count " +
+				"equal-width buckets spanning the range b1 to b2.",
+		},
+		tree.Overload{
+			Types: tree.ArgTypes{{"operand", types.Int}, {"b1", types.Int},
+				{"b2", types.Int}, {"count", types.Int}},
+			ReturnType: tree.FixedReturnType(types.Int),
+			Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) {
+				operand := float64(tree.MustBeDInt(args[0]))
+				b1 := float64(tree.MustBeDInt(args[1]))
+				b2 := float64(tree.MustBeDInt(args[2]))
+				count := int(tree.MustBeDInt(args[3]))
+				return tree.NewDInt(tree.DInt(widthBucket(operand, b1, b2, count))), nil
+			},
+			Info: "return the bucket number to which operand would be assigned in a histogram having count " +
+				"equal-width buckets spanning the range b1 to b2.",
+		},
+		tree.Overload{
+			Types:      tree.ArgTypes{{"operand", types.Any}, {"thresholds", types.AnyArray}},
+			ReturnType: tree.FixedReturnType(types.Int),
+			Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) {
+				operand := args[0]
+				thresholds := tree.MustBeDArray(args[1])
+
+				if !operand.ResolvedType().Equivalent(thresholds.Array[0].ResolvedType()) {
+					return tree.NewDInt(0), errors.New("Operand and thresholds must be of the same type")
+				}
+
+				for i, v := range thresholds.Array {
+					if operand.Compare(ctx, v) < 0 {
+						return tree.NewDInt(tree.DInt(i)), nil
+					}
+				}
+
+				return tree.NewDInt(tree.DInt(thresholds.Len())), nil
+			},
+			Info: "return the bucket number to which operand would be assigned given an array listing the " +
+				"lower bounds of the buckets; returns 0 for an input less than the first lower bound; the " +
+				"thresholds array must be sorted, smallest first, or unexpected results will be obtained",
+		},
+	),
+
 	// Array functions.
 
 	"string_to_array": makeBuiltin(arrayPropsNullableArgs(),
@@ -4654,6 +4708,25 @@ func rpad(s string, length int, fill string) (string, error) {
 	return buf.String(), nil
 }
 
+// widthBucket returns the bucket number to which operand would be assigned in a histogram having count
+// equal-width buckets spanning the range b1 to b2
+func widthBucket(operand float64, b1 float64, b2 float64, count int) int {
+	bucket := 0
+	if (b1 < b2 && operand > b2) || (b1 > b2 && operand < b2) {
+		return count + 1
+	}
+
+	if (b1 < b2 && operand < b1) || (b1 > b2 && operand > b1) {
+		return 0
+	}
+
+	width := (b2 - b1) / float64(count)
+	difference := operand - b1
+	bucket = int(math.Floor(difference/width) + 1)
+
+	return bucket
+}
+
 // CleanEncodingName sanitizes the string meant to represent a
 // recognized encoding. This ignores any non-alphanumeric character.
 //
diff --git a/pkg/sql/sem/builtins/builtins_test.go b/pkg/sql/sem/builtins/builtins_test.go
index fc6ba638c90f..cef0032ba90f 100644
--- a/pkg/sql/sem/builtins/builtins_test.go
+++ b/pkg/sql/sem/builtins/builtins_test.go
@@ -234,3 +234,31 @@ func TestLPadRPad(t *testing.T) {
 		}
 	}
 }
+
+func TestFloatWidthBucket(t *testing.T) {
+	testCases := []struct {
+		operand  float64
+		b1       float64
+		b2       float64
+		count    int
+		expected int
+	}{
+		{0.5, 2, 3, 5, 0},
+		{8, 2, 3, 5, 6},
+		{1.5, 1, 3, 2, 1},
+		{5.35, 0.024, 10.06, 5, 3},
+		{-3.0, -5, 5, 10, 3},
+		{1, 1, 10, 2, 1},  // minimum should be inclusive
+		{10, 1, 10, 2, 3}, // maximum should be exclusive
+		{4, 10, 1, 4, 3},
+		{11, 10, 1, 4, 0},
+		{0, 10, 1, 4, 5},
+	}
+
+	for _, tc := range testCases {
+		got := widthBucket(tc.operand, tc.b1, tc.b2, tc.count)
+		if got != tc.expected {
+			t.Errorf("expected %d, found %d", tc.expected, got)
+		}
+	}
+}