From 48599ad32ae13cae8959eee5cf9f35b0c5f2f6c1 Mon Sep 17 00:00:00 2001 From: Kevin Barbour Date: Fri, 2 Aug 2019 10:12:59 -0400 Subject: [PATCH] sql: add width_bucket builtin Implements the width_bucket() builtin function Details on the Postgres implementation can be found here: https://www.postgresql.org/docs/11/functions-math.html Resolves #38855 Release note (sql change): add the width_bucket builtin function. --- docs/generated/sql/functions.md | 6 ++ .../testdata/logic_test/builtin_function | 25 +++++++ pkg/sql/sem/builtins/builtins.go | 73 +++++++++++++++++++ pkg/sql/sem/builtins/builtins_test.go | 28 +++++++ 4 files changed, 132 insertions(+) diff --git a/docs/generated/sql/functions.md b/docs/generated/sql/functions.md index e901220c8c7a..ae0d95210273 100644 --- a/docs/generated/sql/functions.md +++ b/docs/generated/sql/functions.md @@ -637,6 +637,12 @@ has no relationship with the commit order of concurrent transactions.

trunc(val: decimal) → decimal

Truncates the decimal values of val.

trunc(val: float) → float

Truncates the decimal values of val.

+
+width_bucket(operand: decimal, b1: decimal, b2: decimal, count: int) → int

return the bucket number to which operand would be assigned in a histogram having count equal-width buckets spanning the range b1 to b2.

+
+width_bucket(operand: int, b1: int, b2: int, count: int) → int

return the bucket number to which operand would be assigned in a histogram having count equal-width buckets spanning the range b1 to b2.

+
+width_bucket(operand: anyelement, thresholds: anyelement[]) → int

return the bucket number to which operand would be assigned given an array listing the lower bounds of the buckets; returns 0 for an input less than the first lower bound; the thresholds array must be sorted, smallest first, or unexpected results will be obtained

diff --git a/pkg/sql/logictest/testdata/logic_test/builtin_function b/pkg/sql/logictest/testdata/logic_test/builtin_function index c07ab1e00a1c..718820c156a5 100644 --- a/pkg/sql/logictest/testdata/logic_test/builtin_function +++ b/pkg/sql/logictest/testdata/logic_test/builtin_function @@ -2246,3 +2246,28 @@ query BBB SELECT pg_type_is_visible('int'::regtype), pg_type_is_visible(NULL), pg_type_is_visible(99999) ---- true NULL NULL + + +# Tests for width_bucket builtin +query I +SELECT width_bucket(8.0, 2.0, 3.0, 5) +---- +6 + +query I +SELECT width_bucket(5.35, 0.024, 10.06, 5) +---- +3 + +query I +SELECT width_bucket(7, 3, 11, 5) +---- +3 + +query I +SELECT width_bucket(now(), array['yesterday', 'today', 'tomorrow']::timestamptz[]) +---- +2 + +query error pq: width_bucket\(\): Operand and thresholds must be of the same type +SELECT width_bucket(1, array['a', 'h', 'l', 'z']); \ No newline at end of file diff --git a/pkg/sql/sem/builtins/builtins.go b/pkg/sql/sem/builtins/builtins.go index 456cfd23fa2d..49883a53a183 100644 --- a/pkg/sql/sem/builtins/builtins.go +++ b/pkg/sql/sem/builtins/builtins.go @@ -2348,6 +2348,60 @@ may increase either contention or retry errors, or both.`, }, "Truncates the decimal values of `val`."), ), + "width_bucket": makeBuiltin(defProps(), + tree.Overload{ + Types: tree.ArgTypes{{"operand", types.Decimal}, {"b1", types.Decimal}, + {"b2", types.Decimal}, {"count", types.Int}}, + ReturnType: tree.FixedReturnType(types.Int), + Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) { + operand, _ := args[0].(*tree.DDecimal).Float64() + b1, _ := args[1].(*tree.DDecimal).Float64() + b2, _ := args[2].(*tree.DDecimal).Float64() + count := int(tree.MustBeDInt(args[3])) + return tree.NewDInt(tree.DInt(widthBucket(operand, b1, b2, count))), nil + }, + Info: "return the bucket number to which operand would be assigned in a histogram having count " + + "equal-width buckets spanning the range b1 to b2.", + }, + tree.Overload{ + Types: tree.ArgTypes{{"operand", types.Int}, {"b1", types.Int}, + {"b2", types.Int}, {"count", types.Int}}, + ReturnType: tree.FixedReturnType(types.Int), + Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) { + operand := float64(tree.MustBeDInt(args[0])) + b1 := float64(tree.MustBeDInt(args[1])) + b2 := float64(tree.MustBeDInt(args[2])) + count := int(tree.MustBeDInt(args[3])) + return tree.NewDInt(tree.DInt(widthBucket(operand, b1, b2, count))), nil + }, + Info: "return the bucket number to which operand would be assigned in a histogram having count " + + "equal-width buckets spanning the range b1 to b2.", + }, + tree.Overload{ + Types: tree.ArgTypes{{"operand", types.Any}, {"thresholds", types.AnyArray}}, + ReturnType: tree.FixedReturnType(types.Int), + Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) { + operand := args[0] + thresholds := tree.MustBeDArray(args[1]) + + if !operand.ResolvedType().Equivalent(thresholds.Array[0].ResolvedType()) { + return tree.NewDInt(0), errors.New("Operand and thresholds must be of the same type") + } + + for i, v := range thresholds.Array { + if operand.Compare(ctx, v) < 0 { + return tree.NewDInt(tree.DInt(i)), nil + } + } + + return tree.NewDInt(tree.DInt(thresholds.Len())), nil + }, + Info: "return the bucket number to which operand would be assigned given an array listing the " + + "lower bounds of the buckets; returns 0 for an input less than the first lower bound; the " + + "thresholds array must be sorted, smallest first, or unexpected results will be obtained", + }, + ), + // Array functions. "string_to_array": makeBuiltin(arrayPropsNullableArgs(), @@ -4654,6 +4708,25 @@ func rpad(s string, length int, fill string) (string, error) { return buf.String(), nil } +// widthBucket returns the bucket number to which operand would be assigned in a histogram having count +// equal-width buckets spanning the range b1 to b2 +func widthBucket(operand float64, b1 float64, b2 float64, count int) int { + bucket := 0 + if (b1 < b2 && operand > b2) || (b1 > b2 && operand < b2) { + return count + 1 + } + + if (b1 < b2 && operand < b1) || (b1 > b2 && operand > b1) { + return 0 + } + + width := (b2 - b1) / float64(count) + difference := operand - b1 + bucket = int(math.Floor(difference/width) + 1) + + return bucket +} + // CleanEncodingName sanitizes the string meant to represent a // recognized encoding. This ignores any non-alphanumeric character. // diff --git a/pkg/sql/sem/builtins/builtins_test.go b/pkg/sql/sem/builtins/builtins_test.go index fc6ba638c90f..cef0032ba90f 100644 --- a/pkg/sql/sem/builtins/builtins_test.go +++ b/pkg/sql/sem/builtins/builtins_test.go @@ -234,3 +234,31 @@ func TestLPadRPad(t *testing.T) { } } } + +func TestFloatWidthBucket(t *testing.T) { + testCases := []struct { + operand float64 + b1 float64 + b2 float64 + count int + expected int + }{ + {0.5, 2, 3, 5, 0}, + {8, 2, 3, 5, 6}, + {1.5, 1, 3, 2, 1}, + {5.35, 0.024, 10.06, 5, 3}, + {-3.0, -5, 5, 10, 3}, + {1, 1, 10, 2, 1}, // minimum should be inclusive + {10, 1, 10, 2, 3}, // maximum should be exclusive + {4, 10, 1, 4, 3}, + {11, 10, 1, 4, 0}, + {0, 10, 1, 4, 5}, + } + + for _, tc := range testCases { + got := widthBucket(tc.operand, tc.b1, tc.b2, tc.count) + if got != tc.expected { + t.Errorf("expected %d, found %d", tc.expected, got) + } + } +}