From 81e83ae906864020809858f0f4cd2a510f7bc51e Mon Sep 17 00:00:00 2001 From: Rebecca Taft Date: Mon, 17 Jan 2022 11:43:48 -0600 Subject: [PATCH 1/2] sql/sem/tree: fix Next() and Prev() for DTimeTZ Prior to this commit, the DTimeTZ functions Next() and Prev() could skip over valid values according to the ordering of DTimeTZ values in an index (which matches the ordering defined by the TimeTZ functions After() and Before()). This commit fixes these functions so that Next() now returns smallest valid DTimeTZ that is greater than the receiver, and Prev() returns the largest valid DTimeTZ that is less than the receiver. This is an important invariant that the optimizer relies on when building index constraints. Fixes #74912 Release note (bug fix): Fixed a bug that could occur when a TIMETZ column was indexed, and a query predicate constrained that column using a < or > operator with a timetz constant. If the column contained values with time zones that did not match the time zone of the timetz constant, it was possible that not all matching values could be returned by the query. Specifically, the results may not have included values within one microsecond of the predicate's absolute time. This bug was introduced when the timetz datatype was first added in 20.1. It exists on all versions of 20.1, 20.2, 21.1, and 21.2 prior to this patch. --- pkg/sql/logictest/testdata/logic_test/timetz | 35 ++++ pkg/sql/sem/tree/datum.go | 46 +++++- pkg/sql/sem/tree/datum_integration_test.go | 162 +++++++++++++++++++ pkg/util/duration/duration.go | 4 +- 4 files changed, 244 insertions(+), 3 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/timetz b/pkg/sql/logictest/testdata/logic_test/timetz index eebf31ab8836..91a48cc3a961 100644 --- a/pkg/sql/logictest/testdata/logic_test/timetz +++ b/pkg/sql/logictest/testdata/logic_test/timetz @@ -670,3 +670,38 @@ SELECT a FROM regression_44774 ORDER BY a statement ok DROP TABLE regression_44774 + +# Check that index scans of timetz columns do not miss values. +subtest regression_74912 + +# Create a table with two identical columns. One is indexed, and one is not. +statement ok +CREATE TABLE regression_74912 (t1 TIMETZ PRIMARY KEY, t2 TIMETZ); +INSERT INTO regression_74912 VALUES + ('05:00:00.000001', '05:00:00.000001'), + ('07:00:00.000001+02:00:00', '07:00:00.000001+02:00:00'), + ('09:00:00.000001+04:00:00', '09:00:00.000001+04:00:00'), + ('20:59:00.000001+15:59:00', '20:59:00.000001+15:59:00'); + +query I +SELECT count(*) FROM regression_74912@{NO_FULL_SCAN} WHERE t1 > '05:00:00'; +---- +4 + +query I +SELECT count(*) FROM regression_74912@{NO_FULL_SCAN} WHERE t1 < '05:00:00.000001'; +---- +3 + +query I +SELECT count(*) FROM regression_74912 WHERE t2 > '05:00:00'; +---- +4 + +query I +SELECT count(*) FROM regression_74912 WHERE t2 < '05:00:00.000001'; +---- +3 + +statement ok +DROP TABLE regression_74912 diff --git a/pkg/sql/sem/tree/datum.go b/pkg/sql/sem/tree/datum.go index 8cbb484c991d..0cba3e2c4bfa 100644 --- a/pkg/sql/sem/tree/datum.go +++ b/pkg/sql/sem/tree/datum.go @@ -2308,7 +2308,28 @@ func (d *DTimeTZ) Prev(ctx *EvalContext) (Datum, bool) { if d.IsMin(ctx) { return nil, false } - return NewDTimeTZFromOffset(d.TimeOfDay-1, d.OffsetSecs), true + // In the common case, the absolute time doesn't change, we simply decrement + // the offset by one second and increment the time of day by one second. Once + // we hit the minimum offset for the current absolute time, then we decrement + // the absolute time by one microsecond and wrap around to the highest offset + // for the new absolute time. This aligns with how Before and After are + // defined for TimeTZ. + var newTimeOfDay timeofday.TimeOfDay + var newOffsetSecs int32 + if d.OffsetSecs == timetz.MinTimeTZOffsetSecs || + d.TimeOfDay+duration.MicrosPerSec > timeofday.Max { + newTimeOfDay = d.TimeOfDay - 1 + shiftSeconds := int32((newTimeOfDay - timeofday.Min) / duration.MicrosPerSec) + if d.OffsetSecs+shiftSeconds > timetz.MaxTimeTZOffsetSecs { + shiftSeconds = timetz.MaxTimeTZOffsetSecs - d.OffsetSecs + } + newOffsetSecs = d.OffsetSecs + shiftSeconds + newTimeOfDay -= timeofday.TimeOfDay(shiftSeconds) * duration.MicrosPerSec + } else { + newTimeOfDay = d.TimeOfDay + duration.MicrosPerSec + newOffsetSecs = d.OffsetSecs - 1 + } + return NewDTimeTZFromOffset(newTimeOfDay, newOffsetSecs), true } // Next implements the Datum interface. @@ -2316,7 +2337,28 @@ func (d *DTimeTZ) Next(ctx *EvalContext) (Datum, bool) { if d.IsMax(ctx) { return nil, false } - return NewDTimeTZFromOffset(d.TimeOfDay+1, d.OffsetSecs), true + // In the common case, the absolute time doesn't change, we simply increment + // the offset by one second and decrement the time of day by one second. Once + // we hit the maximum offset for the current absolute time, then we increment + // the absolute time by one microsecond and wrap around to the lowest offset + // for the new absolute time. This aligns with how Before and After are + // defined for TimeTZ. + var newTimeOfDay timeofday.TimeOfDay + var newOffsetSecs int32 + if d.OffsetSecs == timetz.MaxTimeTZOffsetSecs || + d.TimeOfDay-duration.MicrosPerSec < timeofday.Min { + newTimeOfDay = d.TimeOfDay + 1 + shiftSeconds := int32((timeofday.Max - newTimeOfDay) / duration.MicrosPerSec) + if d.OffsetSecs-shiftSeconds < timetz.MinTimeTZOffsetSecs { + shiftSeconds = d.OffsetSecs - timetz.MinTimeTZOffsetSecs + } + newOffsetSecs = d.OffsetSecs - shiftSeconds + newTimeOfDay += timeofday.TimeOfDay(shiftSeconds) * duration.MicrosPerSec + } else { + newTimeOfDay = d.TimeOfDay - duration.MicrosPerSec + newOffsetSecs = d.OffsetSecs + 1 + } + return NewDTimeTZFromOffset(newTimeOfDay, newOffsetSecs), true } // IsMax implements the Datum interface. diff --git a/pkg/sql/sem/tree/datum_integration_test.go b/pkg/sql/sem/tree/datum_integration_test.go index 596569b5034f..4a6fc4d2b0b8 100644 --- a/pkg/sql/sem/tree/datum_integration_test.go +++ b/pkg/sql/sem/tree/datum_integration_test.go @@ -20,6 +20,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/parser" + "github.com/cockroachdb/cockroach/pkg/sql/randgen" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" "github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb" @@ -27,6 +28,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/duration" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/cockroach/pkg/util/timeofday" "github.com/cockroachdb/cockroach/pkg/util/timetz" "github.com/cockroachdb/cockroach/pkg/util/timeutil" @@ -1003,6 +1005,166 @@ func TestDTimeTZ(t *testing.T) { } } +func checkTimeTZ(t *testing.T, d *tree.DTimeTZ) { + t.Helper() + if d.OffsetSecs < timetz.MinTimeTZOffsetSecs || d.OffsetSecs > timetz.MaxTimeTZOffsetSecs { + t.Fatalf("d.OffsetSecs out of range: %d", d.OffsetSecs) + } + if d.TimeOfDay < timeofday.Min || d.TimeOfDay > timeofday.Max { + t.Fatalf("d.TimeOfDay out of range: %d", d.TimeOfDay) + } +} + +func TestDTimeTZPrev(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + rng, _ := randutil.NewTestRand() + evalCtx := &tree.EvalContext{ + SessionDataStack: sessiondata.NewStack(&sessiondata.SessionData{ + Location: time.UTC, + }), + } + + // Check a few specific values. + closeToMidnight, depOnCtx, err := tree.ParseDTimeTZ(evalCtx, "23:59:59.865326-03:15:29", time.Microsecond) + require.NoError(t, err) + require.False(t, depOnCtx) + prev, ok := closeToMidnight.Prev(evalCtx) + require.True(t, ok) + require.Equal(t, "'11:16:28.865325-15:59:00'", prev.String()) + prevPrev, ok := prev.Prev(evalCtx) + require.True(t, ok) + assert.Equal(t, "'11:16:29.865325-15:58:59'", prevPrev.String()) + + maxTime, depOnCtx, err := tree.ParseDTimeTZ(evalCtx, "24:00:00-1559", time.Microsecond) + require.NoError(t, err) + require.False(t, depOnCtx) + prev, ok = maxTime.Prev(evalCtx) + require.True(t, ok) + assert.Equal(t, "'23:59:59.999999-15:59:00'", prev.String()) + + minTime, depOnCtx, err := tree.ParseDTimeTZ(evalCtx, "00:00:00+1559", time.Microsecond) + require.NoError(t, err) + require.False(t, depOnCtx) + _, ok = minTime.Prev(evalCtx) + assert.False(t, ok) + + minTimePlusOne, depOnCtx, err := tree.ParseDTimeTZ(evalCtx, "00:00:00.000001+1559", time.Microsecond) + require.NoError(t, err) + require.False(t, depOnCtx) + prev, ok = minTimePlusOne.Prev(evalCtx) + require.True(t, ok) + assert.Equal(t, minTime, prev) + + // Choose a random start time, and run Prev for 10000 iterations. + startTime := randgen.RandDatum(rng, types.TimeTZ, false /* nullOk */) + var total int + for datum, ok := startTime, true; ok && total < 10000; datum, ok = datum.Prev(evalCtx) { + total++ + + // Check that the result of calling Prev is valid. + timeTZ, ok := datum.(*tree.DTimeTZ) + require.True(t, ok) + checkTimeTZ(t, timeTZ) + + // Check that the result of calling Next on this new value is valid. + nextDatum, nextOk := timeTZ.Next(evalCtx) + if !nextOk { + assert.Equal(t, timeTZ, tree.DMaxTimeTZ) + continue + } + nextTimeTZ, ok := nextDatum.(*tree.DTimeTZ) + require.True(t, ok) + checkTimeTZ(t, nextTimeTZ) + + // Check that the two datums have the expected relationship to one another. + assert.True(t, nextTimeTZ.After(timeTZ.TimeTZ)) + assert.True(t, timeTZ.Before(nextTimeTZ.TimeTZ)) + if nextTimeTZ.OffsetSecs == timetz.MinTimeTZOffsetSecs || + nextTimeTZ.TimeOfDay+duration.MicrosPerSec > timeofday.Max { + assert.True(t, nextTimeTZ.ToTime().Sub(timeTZ.ToTime()) == time.Microsecond) + } else { + assert.True(t, nextTimeTZ.ToTime().Equal(timeTZ.ToTime())) + } + } +} + +func TestDTimeTZNext(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + rng, _ := randutil.NewTestRand() + evalCtx := &tree.EvalContext{ + SessionDataStack: sessiondata.NewStack(&sessiondata.SessionData{ + Location: time.UTC, + }), + } + + // Check a few specific values. + closeToMidnight, depOnCtx, err := tree.ParseDTimeTZ(evalCtx, "00:00:00.865326+03:15:29", time.Microsecond) + require.NoError(t, err) + require.False(t, depOnCtx) + next, ok := closeToMidnight.Next(evalCtx) + require.True(t, ok) + require.Equal(t, "'12:43:31.865327+15:59:00'", next.String()) + nextNext, ok := next.Next(evalCtx) + require.True(t, ok) + assert.Equal(t, "'12:43:30.865327+15:58:59'", nextNext.String()) + + minTime, depOnCtx, err := tree.ParseDTimeTZ(evalCtx, "00:00:00+1559", time.Microsecond) + require.NoError(t, err) + require.False(t, depOnCtx) + next, ok = minTime.Next(evalCtx) + require.True(t, ok) + assert.Equal(t, "'00:00:00.000001+15:59:00'", next.String()) + + maxTime, depOnCtx, err := tree.ParseDTimeTZ(evalCtx, "24:00:00-1559", time.Microsecond) + require.NoError(t, err) + require.False(t, depOnCtx) + _, ok = maxTime.Next(evalCtx) + assert.False(t, ok) + + maxTimeMinusOne, depOnCtx, err := tree.ParseDTimeTZ(evalCtx, "23:59:59.999999-1559", time.Microsecond) + require.NoError(t, err) + require.False(t, depOnCtx) + next, ok = maxTimeMinusOne.Next(evalCtx) + require.True(t, ok) + assert.Equal(t, maxTime, next) + + // Choose a random start time, and run Next for 10000 iterations. + startTime := randgen.RandDatum(rng, types.TimeTZ, false /* nullOk */) + var total int + for datum, ok := startTime, true; ok && total < 10000; datum, ok = datum.Next(evalCtx) { + total++ + + // Check that the result of calling Next is valid. + timeTZ, ok := datum.(*tree.DTimeTZ) + require.True(t, ok) + checkTimeTZ(t, timeTZ) + + // Check that the result of calling Prev on this new value is valid. + prevDatum, prevOk := timeTZ.Prev(evalCtx) + if !prevOk { + assert.Equal(t, timeTZ, tree.DMinTimeTZ) + continue + } + prevTimeTZ, ok := prevDatum.(*tree.DTimeTZ) + require.True(t, ok) + checkTimeTZ(t, prevTimeTZ) + + // Check that the two datums have the expected relationship to one another. + assert.True(t, prevTimeTZ.Before(timeTZ.TimeTZ)) + assert.True(t, timeTZ.After(prevTimeTZ.TimeTZ)) + if prevTimeTZ.OffsetSecs == timetz.MaxTimeTZOffsetSecs || + prevTimeTZ.TimeOfDay-duration.MicrosPerSec < timeofday.Min { + assert.True(t, timeTZ.ToTime().Sub(prevTimeTZ.ToTime()) == time.Microsecond) + } else { + assert.True(t, timeTZ.ToTime().Equal(prevTimeTZ.ToTime())) + } + } +} + func TestIsDistinctFrom(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) diff --git a/pkg/util/duration/duration.go b/pkg/util/duration/duration.go index ecdee8c3f585..48da83f97a5e 100644 --- a/pkg/util/duration/duration.go +++ b/pkg/util/duration/duration.go @@ -28,8 +28,10 @@ import ( const ( // MicrosPerMilli is the amount of microseconds in a millisecond. MicrosPerMilli = 1000 - // MillisPerSec is the amount of seconds in a millisecond. + // MillisPerSec is the amount of milliseconds in a second. MillisPerSec = 1000 + // MicrosPerSec is the amount of microseconds in a second. + MicrosPerSec = MicrosPerMilli * MillisPerSec // SecsPerMinute is the amount of seconds in a minute. SecsPerMinute = 60 // SecsPerHour is the amount of seconds in an hour. From ad7bf0441e2c26381bdea35e69637807a6e4a954 Mon Sep 17 00:00:00 2001 From: Rebecca Taft Date: Fri, 14 Jan 2022 17:31:44 -0600 Subject: [PATCH 2/2] opt: fix bug in histogram calculation for TimeTZ This commit fixes a bug in the histogram estimation code for TimeTZ that made the faulty assumption that TimeTZ values are ordered by TimeOfDay. This is incorrect since it does not take the OffsetSecs into account. As a result, it was possible to estimate that the size of a histogram bucket was negative, which caused problems in the statistics estimation code. This commit fixes the problem by taking into account both TimeOfDay and OffsetSecs when estimating the size of a bucket in a TimeTZ histogram. Fixes #74667 Release note (bug fix): Fixed an internal error, "estimated row count must be non-zero", that could occur during planning for queries over a table with a TimeTZ column. This error was due to a faulty assumption in the statistics estimation code about ordering of TimeTZ values, which has now been fixed. The error could occur when TimeTZ values used in the query had a different time zone offset than the TimeTZ values stored in the table. --- pkg/sql/opt/memo/testdata/stats/groupby | 73 +++++++++++++++++++++++++ pkg/sql/opt/props/BUILD.bazel | 1 + pkg/sql/opt/props/histogram.go | 25 +++++++-- pkg/sql/opt/props/histogram_test.go | 70 +++++++++++++++++++++--- 4 files changed, 154 insertions(+), 15 deletions(-) diff --git a/pkg/sql/opt/memo/testdata/stats/groupby b/pkg/sql/opt/memo/testdata/stats/groupby index a58f6171c78d..b8964cac8062 100644 --- a/pkg/sql/opt/memo/testdata/stats/groupby +++ b/pkg/sql/opt/memo/testdata/stats/groupby @@ -526,3 +526,76 @@ project │ └── bool_or:5 [type=bool, outer=(5), constraints=(/5: [/true - /true]; tight), fd=()-->(5)] └── projections └── 1 [as="?column?":6, type=int] + +# Regression test for #74667. +exec-ddl +CREATE TABLE t74667 (col TIMETZ PRIMARY KEY) +---- + +exec-ddl +ALTER TABLE t74667 INJECT STATISTICS '[ + { + "columns": [ + "col" + ], + "created_at": "2000-01-01 00:00:00+00:00", + "distinct_count": 950814763580487611, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 3873172219268179689, + "num_range": 0, + "upper_bound": "00:00:00+15:59:00" + }, + { + "distinct_range": 400000, + "num_eq": 3000000000, + "num_range": 400000, + "upper_bound": "04:40:23.558699+11:08:00" + }, + { + "distinct_range": 381143202295070850, + "num_eq": 6399369578112136136, + "num_range": 381143202295070816, + "upper_bound": "06:12:15.740051+06:40:00" + } + ], + "histo_col_type": "TIMETZ", + "name": "__auto__", + "null_count": 0, + "row_count": 1188522479222658429 + } +]' +---- + +norm +SELECT count(*) +FROM t74667 +WHERE col < '03:33:05.598931+07:11:00':::TIMETZ +GROUP BY col; +---- +project + ├── columns: count:4(int!null) + ├── stats: [rows=2.00509067e+16] + └── group-by (hash) + ├── columns: col:1(timetz!null) count_rows:4(int!null) + ├── grouping columns: col:1(timetz!null) + ├── stats: [rows=2.00509067e+16, distinct(1)=2.00509067e+16, null(1)=0] + ├── key: (1) + ├── fd: (1)-->(4) + ├── select + │ ├── columns: col:1(timetz!null) + │ ├── stats: [rows=4.52141047e+17, distinct(1)=2.00509067e+16, null(1)=0] + │ │ histogram(1)= 0 4.3209e+17 44624 3.3468e+08 2.0051e+16 0 + │ │ <--- '00:00:00+15:59:00' ------- '04:40:23.558699+11:08:00' ------------ '03:33:06.598931+07:11:01' + │ ├── key: (1) + │ ├── scan t74667 + │ │ ├── columns: col:1(timetz!null) + │ │ ├── stats: [rows=1.18852248e+18, distinct(1)=9.50814764e+17, null(1)=0] + │ │ │ histogram(1)= 0 4.3209e+17 44624 3.3468e+08 4.252e+16 7.1391e+17 + │ │ │ <--- '00:00:00+15:59:00' ------- '04:40:23.558699+11:08:00' ----------- '06:12:15.740051+06:40:00' + │ │ └── key: (1) + │ └── filters + │ └── col:1 < '03:33:05.598931+07:11:00' [type=bool, outer=(1), constraints=(/1: (/NULL - /'03:33:06.598931+07:11:01']; tight)] + └── aggregations + └── count-rows [as=count_rows:4, type=int] diff --git a/pkg/sql/opt/props/BUILD.bazel b/pkg/sql/opt/props/BUILD.bazel index 303fb59b16e5..f6aead24f882 100644 --- a/pkg/sql/opt/props/BUILD.bazel +++ b/pkg/sql/opt/props/BUILD.bazel @@ -28,6 +28,7 @@ go_library( "//pkg/util/buildutil", "//pkg/util/encoding", "//pkg/util/log", + "//pkg/util/timetz", "@com_github_cockroachdb_errors//:errors", "@com_github_olekukonko_tablewriter//:tablewriter", ], diff --git a/pkg/sql/opt/props/histogram.go b/pkg/sql/opt/props/histogram.go index c6b0b26b8d3a..d20f14d2c359 100644 --- a/pkg/sql/opt/props/histogram.go +++ b/pkg/sql/opt/props/histogram.go @@ -17,6 +17,7 @@ import ( "io" "math" "sort" + "time" "github.com/cockroachdb/cockroach/pkg/sql/inverted" "github.com/cockroachdb/cockroach/pkg/sql/opt" @@ -26,6 +27,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/encoding" + "github.com/cockroachdb/cockroach/pkg/util/timetz" "github.com/cockroachdb/errors" "github.com/olekukonko/tablewriter" ) @@ -805,12 +807,23 @@ func getRangesBeforeAndAfter( return rangeBefore, rangeAfter, true case types.TimeTZFamily: - lowerBefore := beforeLowerBound.(*tree.DTimeTZ).TimeOfDay - upperBefore := beforeUpperBound.(*tree.DTimeTZ).TimeOfDay - lowerAfter := afterLowerBound.(*tree.DTimeTZ).TimeOfDay - upperAfter := afterUpperBound.(*tree.DTimeTZ).TimeOfDay - rangeBefore = float64(upperBefore) - float64(lowerBefore) - rangeAfter = float64(upperAfter) - float64(lowerAfter) + // timeTZOffsetSecsRange is the total number of possible values for offset. + timeTZOffsetSecsRange := timetz.MaxTimeTZOffsetSecs - timetz.MinTimeTZOffsetSecs + 1 + + // Find the ranges in microseconds based on the absolute times of the range + // boundaries. + lowerBefore := beforeLowerBound.(*tree.DTimeTZ) + upperBefore := beforeUpperBound.(*tree.DTimeTZ) + lowerAfter := afterLowerBound.(*tree.DTimeTZ) + upperAfter := afterUpperBound.(*tree.DTimeTZ) + rangeBefore = float64(upperBefore.ToTime().Sub(lowerBefore.ToTime()) / time.Microsecond) + rangeAfter = float64(upperAfter.ToTime().Sub(lowerAfter.ToTime()) / time.Microsecond) + + // Account for the offset. + rangeBefore *= float64(timeTZOffsetSecsRange) + rangeAfter *= float64(timeTZOffsetSecsRange) + rangeBefore += float64(upperBefore.OffsetSecs - lowerBefore.OffsetSecs) + rangeAfter += float64(upperAfter.OffsetSecs - lowerAfter.OffsetSecs) return rangeBefore, rangeAfter, true case types.StringFamily, types.BytesFamily, types.UuidFamily, types.INetFamily: diff --git a/pkg/sql/opt/props/histogram_test.go b/pkg/sql/opt/props/histogram_test.go index 583d7e66ba59..ee94f7beafc9 100644 --- a/pkg/sql/opt/props/histogram_test.go +++ b/pkg/sql/opt/props/histogram_test.go @@ -384,7 +384,7 @@ func TestFilterBucket(t *testing.T) { t.Fatalf("for span %s expected an error", testCase.span) } if !reflect.DeepEqual(testCase.expected, actual) { - t.Fatalf("for span %s exected %v but found %v", testCase.span, testCase.expected, actual) + t.Errorf("for span %s exected %v but found %v", testCase.span, testCase.expected, actual) } } } @@ -612,17 +612,17 @@ func TestFilterBucket(t *testing.T) { }) t.Run("timetz", func(t *testing.T) { - upperBound, _, err := tree.ParseDTimeTZ(&evalCtx, "05:00:00", time.Microsecond) + upperBound1, _, err := tree.ParseDTimeTZ(&evalCtx, "05:00:00", time.Microsecond) if err != nil { t.Fatal(err) } - lowerBound, _, err := tree.ParseDTimeTZ(&evalCtx, "04:00:00", time.Microsecond) + lowerBound1, _, err := tree.ParseDTimeTZ(&evalCtx, "04:00:00", time.Microsecond) if err != nil { t.Fatal(err) } - h := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ - {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(lowerBound)}, - {NumEq: 1, NumRange: 62, DistinctRange: 31, UpperBound: upperBound}, + h1 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ + {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(lowerBound1)}, + {NumEq: 1, NumRange: 62, DistinctRange: 31, UpperBound: upperBound1}, }} ub1, _, err := tree.ParseDTimeTZ(&evalCtx, "04:15:00", time.Microsecond) @@ -630,18 +630,70 @@ func TestFilterBucket(t *testing.T) { t.Fatal(err) } - testData := []testCase{ + testData1 := []testCase{ { span: "[/04:00:00 - /04:15:00]", expected: &cat.HistogramBucket{NumEq: 0, NumRange: 15.5, DistinctRange: 7.75, UpperBound: ub1}, }, { span: "[/04:30:00 - /05:00:00]", - expected: &cat.HistogramBucket{NumEq: 1, NumRange: 31, DistinctRange: 15.5, UpperBound: upperBound}, + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 31, DistinctRange: 15.5, UpperBound: upperBound1}, + }, + { + span: "[/06:30:00+02:00:00 - /05:00:00]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 31, DistinctRange: 15.5, UpperBound: upperBound1}, + }, + { + span: "[/08:30:00+04:00:00 - /05:00:00]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 31, DistinctRange: 15.5, UpperBound: upperBound1}, + }, + } + + // This test distinguishes between values that have the same time but + // different offsets. + upperBound2, _, err := tree.ParseDTimeTZ(&evalCtx, "05:00:00.000001", time.Microsecond) + if err != nil { + t.Fatal(err) + } + h2 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{ + {NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: upperBound1}, + {NumEq: 1, NumRange: 10000, DistinctRange: 1000, UpperBound: upperBound2}, + }} + + ub2, _, err := tree.ParseDTimeTZ(&evalCtx, "20:59:00.000001+15:59:00", time.Microsecond) + if err != nil { + t.Fatal(err) + } + + testData2 := []testCase{ + { + span: "[/05:00:00.000001 - /05:00:00.000001]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 0, DistinctRange: 0, UpperBound: upperBound2}, + }, + { + span: "[/07:00:00.000001+02:00:00 - /05:00:00.000001]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 625.65, DistinctRange: 62.57, UpperBound: upperBound2}, + }, + { + span: "[/09:00:00.000001+04:00:00 - /05:00:00.000001]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 1251.3, DistinctRange: 125.13, UpperBound: upperBound2}, + }, + { + span: "[/04:59:59-00:00:01 - /20:59:00.000001+15:59:00]", + expected: &cat.HistogramBucket{NumEq: 0, NumRange: 5000, DistinctRange: 500, UpperBound: ub2}, + }, + { + span: "[/20:59:00.000001+15:59:00 - /05:00:00.000001]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 5000, DistinctRange: 500, UpperBound: upperBound2}, + }, + { + span: "[/04:59:58-00:00:02 - /05:00:00.000001]", + expected: &cat.HistogramBucket{NumEq: 1, NumRange: 9999.91, DistinctRange: 999.99, UpperBound: upperBound2}, }, } - runTest(h, testData, types.TimeTZFamily) + runTest(h1, testData1, types.TimeTZFamily) + runTest(h2, testData2, types.TimeTZFamily) }) t.Run("string-bytes", func(t *testing.T) {