diff --git a/pkg/sql/opt/memo/testdata/stats/groupby b/pkg/sql/opt/memo/testdata/stats/groupby index a58f6171c78d..b8964cac8062 100644 --- a/pkg/sql/opt/memo/testdata/stats/groupby +++ b/pkg/sql/opt/memo/testdata/stats/groupby @@ -526,3 +526,76 @@ project │ └── bool_or:5 [type=bool, outer=(5), constraints=(/5: [/true - /true]; tight), fd=()-->(5)] └── projections └── 1 [as="?column?":6, type=int] + +# Regression test for #74667. +exec-ddl +CREATE TABLE t74667 (col TIMETZ PRIMARY KEY) +---- + +exec-ddl +ALTER TABLE t74667 INJECT STATISTICS '[ + { + "columns": [ + "col" + ], + "created_at": "2000-01-01 00:00:00+00:00", + "distinct_count": 950814763580487611, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 3873172219268179689, + "num_range": 0, + "upper_bound": "00:00:00+15:59:00" + }, + { + "distinct_range": 400000, + "num_eq": 3000000000, + "num_range": 400000, + "upper_bound": "04:40:23.558699+11:08:00" + }, + { + "distinct_range": 381143202295070850, + "num_eq": 6399369578112136136, + "num_range": 381143202295070816, + "upper_bound": "06:12:15.740051+06:40:00" + } + ], + "histo_col_type": "TIMETZ", + "name": "__auto__", + "null_count": 0, + "row_count": 1188522479222658429 + } +]' +---- + +norm +SELECT count(*) +FROM t74667 +WHERE col < '03:33:05.598931+07:11:00':::TIMETZ +GROUP BY col; +---- +project + ├── columns: count:4(int!null) + ├── stats: [rows=2.00509067e+16] + └── group-by (hash) + ├── columns: col:1(timetz!null) count_rows:4(int!null) + ├── grouping columns: col:1(timetz!null) + ├── stats: [rows=2.00509067e+16, distinct(1)=2.00509067e+16, null(1)=0] + ├── key: (1) + ├── fd: (1)-->(4) + ├── select + │ ├── columns: col:1(timetz!null) + │ ├── stats: [rows=4.52141047e+17, distinct(1)=2.00509067e+16, null(1)=0] + │ │ histogram(1)= 0 4.3209e+17 44624 3.3468e+08 2.0051e+16 0 + │ │ <--- '00:00:00+15:59:00' ------- '04:40:23.558699+11:08:00' ------------ '03:33:06.598931+07:11:01' + │ ├── key: (1) + │ ├── scan t74667 + │ │ ├── columns: col:1(timetz!null) + │ │ ├── stats: [rows=1.18852248e+18, distinct(1)=9.50814764e+17, null(1)=0] + │ │ │ histogram(1)= 0 4.3209e+17 44624 3.3468e+08 4.252e+16 7.1391e+17 + │ │ │ <--- '00:00:00+15:59:00' ------- '04:40:23.558699+11:08:00' ----------- '06:12:15.740051+06:40:00' + │ │ └── key: (1) + │ └── filters + │ └── col:1 < '03:33:05.598931+07:11:00' [type=bool, outer=(1), constraints=(/1: (/NULL - /'03:33:06.598931+07:11:01']; tight)] + └── aggregations + └── count-rows [as=count_rows:4, type=int] diff --git a/pkg/sql/opt/props/BUILD.bazel b/pkg/sql/opt/props/BUILD.bazel index 303fb59b16e5..f6aead24f882 100644 --- a/pkg/sql/opt/props/BUILD.bazel +++ b/pkg/sql/opt/props/BUILD.bazel @@ -28,6 +28,7 @@ go_library( "//pkg/util/buildutil", "//pkg/util/encoding", "//pkg/util/log", + "//pkg/util/timetz", "@com_github_cockroachdb_errors//:errors", "@com_github_olekukonko_tablewriter//:tablewriter", ], diff --git a/pkg/sql/opt/props/histogram.go b/pkg/sql/opt/props/histogram.go index c6b0b26b8d3a..d20f14d2c359 100644 --- a/pkg/sql/opt/props/histogram.go +++ b/pkg/sql/opt/props/histogram.go @@ -17,6 +17,7 @@ import ( "io" "math" "sort" + "time" "github.com/cockroachdb/cockroach/pkg/sql/inverted" "github.com/cockroachdb/cockroach/pkg/sql/opt" @@ -26,6 +27,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/encoding" + "github.com/cockroachdb/cockroach/pkg/util/timetz" "github.com/cockroachdb/errors" "github.com/olekukonko/tablewriter" ) @@ -805,12 +807,23 @@ func getRangesBeforeAndAfter( return rangeBefore, rangeAfter, true case types.TimeTZFamily: - lowerBefore := beforeLowerBound.(*tree.DTimeTZ).TimeOfDay - upperBefore := beforeUpperBound.(*tree.DTimeTZ).TimeOfDay - lowerAfter := afterLowerBound.(*tree.DTimeTZ).TimeOfDay - upperAfter := afterUpperBound.(*tree.DTimeTZ).TimeOfDay - rangeBefore = float64(upperBefore) - float64(lowerBefore) - rangeAfter = float64(upperAfter) - float64(lowerAfter) + // timeTZOffsetSecsRange is the total number of possible values for offset. + timeTZOffsetSecsRange := timetz.MaxTimeTZOffsetSecs - timetz.MinTimeTZOffsetSecs + 1 + + // Find the ranges in microseconds based on the absolute times of the range + // boundaries. + lowerBefore := beforeLowerBound.(*tree.DTimeTZ) + upperBefore := beforeUpperBound.(*tree.DTimeTZ) + lowerAfter := afterLowerBound.(*tree.DTimeTZ) + upperAfter := afterUpperBound.(*tree.DTimeTZ) + rangeBefore = float64(upperBefore.ToTime().Sub(lowerBefore.ToTime()) / time.Microsecond) + rangeAfter = float64(upperAfter.ToTime().Sub(lowerAfter.ToTime()) / time.Microsecond) + + // Account for the offset. + rangeBefore *= float64(timeTZOffsetSecsRange) + rangeAfter *= float64(timeTZOffsetSecsRange) + rangeBefore += float64(upperBefore.OffsetSecs - lowerBefore.OffsetSecs) + rangeAfter += float64(upperAfter.OffsetSecs - lowerAfter.OffsetSecs) return rangeBefore, rangeAfter, true case types.StringFamily, types.BytesFamily, types.UuidFamily, types.INetFamily: diff --git a/pkg/sql/opt/props/histogram_test.go b/pkg/sql/opt/props/histogram_test.go index 583d7e66ba59..ee94f7beafc9 100644 --- a/pkg/sql/opt/props/histogram_test.go +++ b/pkg/sql/opt/props/histogram_test.go @@ -384,7 +384,7 @@ func TestFilterBucket(t *testing.T) { t.Fatalf("for span %s expected an error", testCase.span) } if !reflect.DeepEqual(testCase.expected, actual) { - t.Fatalf("for span %s exected %v but found %v", testCase.span, testCase.expected, actual) + t.Errorf("for span %s exected %v but found %v", testCase.span, testCase.expected, actual) } } } @@ -612,17 +612,17 @@ func TestFilterBucket(t *testing.T) { }) t.Run("timetz", func(t *testing.T) { - upperBound, _, err := tree.ParseDTimeTZ(&evalCtx, "05:00:00", time.Microsecond) + upperBound1, _, err := tree.ParseDTimeTZ(&evalCtx, "05:00:00", time.Microsecond) if err != nil { t.Fatal(err) } - lowerBound, _, err := tree.ParseDTimeTZ(&evalCtx, "04:00:00", time.Microsecond) + lowerBound1, _, err := tree.ParseDTimeTZ(&evalCtx, "04:00:00", time.Microsecond) if err != nil { t.Fatal(err) } - h := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ - {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(lowerBound)}, - {NumEq: 1, NumRange: 62, DistinctRange: 31, UpperBound: upperBound}, + h1 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ + {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(lowerBound1)}, + {NumEq: 1, NumRange: 62, DistinctRange: 31, UpperBound: upperBound1}, }} ub1, _, err := tree.ParseDTimeTZ(&evalCtx, "04:15:00", time.Microsecond) @@ -630,18 +630,70 @@ func TestFilterBucket(t *testing.T) { t.Fatal(err) } - testData := []testCase{ + testData1 := []testCase{ { span: "[/04:00:00 - /04:15:00]", expected: &cat.HistogramBucket{NumEq: 0, NumRange: 15.5, DistinctRange: 7.75, UpperBound: ub1}, }, { span: "[/04:30:00 - /05:00:00]", - expected: &cat.HistogramBucket{NumEq: 1, NumRange: 31, DistinctRange: 15.5, UpperBound: upperBound}, + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 31, DistinctRange: 15.5, UpperBound: upperBound1}, + }, + { + span: "[/06:30:00+02:00:00 - /05:00:00]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 31, DistinctRange: 15.5, UpperBound: upperBound1}, + }, + { + span: "[/08:30:00+04:00:00 - /05:00:00]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 31, DistinctRange: 15.5, UpperBound: upperBound1}, + }, + } + + // This test distinguishes between values that have the same time but + // different offsets. + upperBound2, _, err := tree.ParseDTimeTZ(&evalCtx, "05:00:00.000001", time.Microsecond) + if err != nil { + t.Fatal(err) + } + h2 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ + {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: upperBound1}, + {NumEq: 1, NumRange: 10000, DistinctRange: 1000, UpperBound: upperBound2}, + }} + + ub2, _, err := tree.ParseDTimeTZ(&evalCtx, "20:59:00.000001+15:59:00", time.Microsecond) + if err != nil { + t.Fatal(err) + } + + testData2 := []testCase{ + { + span: "[/05:00:00.000001 - /05:00:00.000001]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 0, DistinctRange: 0, UpperBound: upperBound2}, + }, + { + span: "[/07:00:00.000001+02:00:00 - /05:00:00.000001]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 625.65, DistinctRange: 62.57, UpperBound: upperBound2}, + }, + { + span: "[/09:00:00.000001+04:00:00 - /05:00:00.000001]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 1251.3, DistinctRange: 125.13, UpperBound: upperBound2}, + }, + { + span: "[/04:59:59-00:00:01 - /20:59:00.000001+15:59:00]", + expected: &cat.HistogramBucket{NumEq: 0, NumRange: 5000, DistinctRange: 500, UpperBound: ub2}, + }, + { + span: "[/20:59:00.000001+15:59:00 - /05:00:00.000001]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 5000, DistinctRange: 500, UpperBound: upperBound2}, + }, + { + span: "[/04:59:58-00:00:02 - /05:00:00.000001]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 9999.91, DistinctRange: 999.99, UpperBound: upperBound2}, }, } - runTest(h, testData, types.TimeTZFamily) + runTest(h1, testData1, types.TimeTZFamily) + runTest(h2, testData2, types.TimeTZFamily) }) t.Run("string-bytes", func(t *testing.T) {