Skip to content

Commit

Permalink
sql: move histogram decoding into the stats cache
Browse files Browse the repository at this point in the history
Currently the stats cache exposes the encoded histogram data, and the
opt catalog decodes it. The problem is that opt catalog objects are
not shared across connections so this is inefficient (especially in
memory usage).

This change moves the decoding into the stats cache. The opt catalog
objects now simply point to the data computed by the cache.

There are still inefficiencies to improve on in the future: the opt
catalog can hold on to multiple versions of tables, so we will keep
many versions of histograms "alive".

Informs cockroachdb#41206.
Informs cockroachdb#40922.

Release justification: fix for new functionality.

Release note: None
  • Loading branch information
RaduBerinde committed Sep 30, 2019
1 parent 3711de6 commit 0de36e2
Show file tree
Hide file tree
Showing 11 changed files with 237 additions and 216 deletions.
6 changes: 4 additions & 2 deletions pkg/ccl/backupccl/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -1045,7 +1045,7 @@ func backupPlanHook(
}

statsCache := p.ExecCfg().TableStatsCache
tableStatistics := make([]*stats.TableStatistic, 0)
tableStatistics := make([]*stats.TableStatisticProto, 0)
var tables []*sqlbase.TableDescriptor
for _, desc := range targetDescs {
if dbDesc := desc.GetDatabase(); dbDesc != nil {
Expand All @@ -1064,7 +1064,9 @@ func backupPlanHook(
if err != nil {
return err
}
tableStatistics = append(tableStatistics, tableStatisticsAcc...)
for i := range tableStatisticsAcc {
tableStatistics = append(tableStatistics, &tableStatisticsAcc[i].TableStatisticProto)
}
}
}

Expand Down
152 changes: 76 additions & 76 deletions pkg/ccl/backupccl/backup.pb.go

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pkg/ccl/backupccl/backup.proto
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ message BackupDescriptor {
(gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/util/uuid.UUID"];
repeated string partition_descriptor_filenames = 19;
repeated string locality_kvs = 20 [(gogoproto.customname) = "LocalityKVs"];
repeated sql.stats.TableStatistic statistics = 21;
repeated sql.stats.TableStatisticProto statistics = 21;
}

message BackupPartitionDescriptor{
Expand Down
6 changes: 3 additions & 3 deletions pkg/ccl/backupccl/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -1644,7 +1644,7 @@ type restoreResumer struct {
databases []*sqlbase.DatabaseDescriptor
tables []*sqlbase.TableDescriptor
exec sqlutil.InternalExecutor
latestStats []*stats.TableStatistic
latestStats []*stats.TableStatisticProto
statsRefresher *stats.Refresher
}

Expand All @@ -1656,8 +1656,8 @@ type restoreResumer struct {
// table is being restored.
func remapRelevantStatistics(
backup BackupDescriptor, tableRewrites TableRewriteMap,
) []*stats.TableStatistic {
relevantTableStatistics := make([]*stats.TableStatistic, 0, len(backup.Statistics))
) []*stats.TableStatisticProto {
relevantTableStatistics := make([]*stats.TableStatisticProto, 0, len(backup.Statistics))

for i := range backup.Statistics {
stat := backup.Statistics[i]
Expand Down
43 changes: 8 additions & 35 deletions pkg/sql/opt_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/sql/stats"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -1001,22 +1000,14 @@ func (oi *optIndex) PartitionByListPrefixes() []tree.Datums {
}

type optTableStat struct {
createdAt time.Time
stat *stats.TableStatistic
columnOrdinals []int
rowCount uint64
distinctCount uint64
nullCount uint64
histogram []cat.HistogramBucket
}

var _ cat.TableStatistic = &optTableStat{}

func (os *optTableStat) init(tab *optTable, stat *stats.TableStatistic) (ok bool, _ error) {
os.createdAt = stat.CreatedAt
os.rowCount = stat.RowCount
os.distinctCount = stat.DistinctCount
os.nullCount = stat.NullCount

os.stat = stat
os.columnOrdinals = make([]int, len(stat.ColumnIDs))
for i, c := range stat.ColumnIDs {
var ok bool
Expand All @@ -1028,31 +1019,13 @@ func (os *optTableStat) init(tab *optTable, stat *stats.TableStatistic) (ok bool
}
}

if stat.Histogram != nil {
os.histogram = make([]cat.HistogramBucket, len(stat.Histogram.Buckets))
typ := &stat.Histogram.ColumnType
var a sqlbase.DatumAlloc
for i := range os.histogram {
bucket := &stat.Histogram.Buckets[i]
datum, _, err := sqlbase.DecodeTableKey(&a, typ, bucket.UpperBound, encoding.Ascending)
if err != nil {
return false, err
}
os.histogram[i] = cat.HistogramBucket{
NumEq: float64(bucket.NumEq),
NumRange: float64(bucket.NumRange),
DistinctRange: bucket.DistinctRange,
UpperBound: datum,
}
}
}
return true, nil
}

func (os *optTableStat) equals(other *optTableStat) bool {
// Two table statistics are considered equal if they have been created at the
// same time, on the same set of columns.
if os.createdAt != other.createdAt || len(os.columnOrdinals) != len(other.columnOrdinals) {
if os.CreatedAt() != other.CreatedAt() || len(os.columnOrdinals) != len(other.columnOrdinals) {
return false
}
for i, c := range os.columnOrdinals {
Expand All @@ -1065,7 +1038,7 @@ func (os *optTableStat) equals(other *optTableStat) bool {

// CreatedAt is part of the cat.TableStatistic interface.
func (os *optTableStat) CreatedAt() time.Time {
return os.createdAt
return os.stat.CreatedAt
}

// ColumnCount is part of the cat.TableStatistic interface.
Expand All @@ -1080,22 +1053,22 @@ func (os *optTableStat) ColumnOrdinal(i int) int {

// RowCount is part of the cat.TableStatistic interface.
func (os *optTableStat) RowCount() uint64 {
return os.rowCount
return os.stat.RowCount
}

// DistinctCount is part of the cat.TableStatistic interface.
func (os *optTableStat) DistinctCount() uint64 {
return os.distinctCount
return os.stat.DistinctCount
}

// NullCount is part of the cat.TableStatistic interface.
func (os *optTableStat) NullCount() uint64 {
return os.nullCount
return os.stat.NullCount
}

// Histogram is part of the cat.TableStatistic interface.
func (os *optTableStat) Histogram() []cat.HistogramBucket {
return os.histogram
return os.stat.Histogram
}

// optFamily is a wrapper around sqlbase.ColumnFamilyDescriptor that keeps a
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/stats/delete_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func TestDeleteOldStatsForColumns(t *testing.T) {

// The test data must be ordered by CreatedAt DESC so the calculated set of
// expected deleted stats is correct.
testData := []TableStatistic{
testData := []TableStatisticProto{
{
TableID: sqlbase.ID(100),
StatisticID: 1,
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/stats/new_stat.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func InsertNewStats(
ctx context.Context,
executor sqlutil.InternalExecutor,
txn *client.Txn,
tableStats []*TableStatistic,
tableStats []*TableStatisticProto,
) error {
var err error
for _, statistic := range tableStats {
Expand All @@ -42,7 +42,7 @@ func InsertNewStats(
int64(statistic.RowCount),
int64(statistic.DistinctCount),
int64(statistic.NullCount),
statistic.Histogram,
statistic.HistogramData,
)
if err != nil {
return err
Expand Down
57 changes: 44 additions & 13 deletions pkg/sql/stats/stats_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,28 @@ import (
"github.com/cockroachdb/cockroach/pkg/gossip"
"github.com/cockroachdb/cockroach/pkg/internal/client"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/sql/sqlutil"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/cache"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/pkg/errors"
)

// A TableStatistic object holds a statistic for a particular column or group
// of columns.
type TableStatistic struct {
TableStatisticProto

// Histogram is the decoded histogram data.
Histogram []cat.HistogramBucket
}

// A TableStatisticsCache contains two underlying LRU caches:
// (1) A cache of []*TableStatistic objects, keyed by table ID.
// Each entry consists of all the statistics for different columns and
Expand Down Expand Up @@ -262,33 +273,53 @@ func parseStats(datums tree.Datums) (*TableStatistic, error) {
}

// Extract datum values.
tableStatistic := &TableStatistic{
TableID: sqlbase.ID((int32)(*datums[tableIDIndex].(*tree.DInt))),
StatisticID: (uint64)(*datums[statisticsIDIndex].(*tree.DInt)),
CreatedAt: datums[createdAtIndex].(*tree.DTimestamp).Time,
RowCount: (uint64)(*datums[rowCountIndex].(*tree.DInt)),
DistinctCount: (uint64)(*datums[distinctCountIndex].(*tree.DInt)),
NullCount: (uint64)(*datums[nullCountIndex].(*tree.DInt)),
res := &TableStatistic{
TableStatisticProto: TableStatisticProto{
TableID: sqlbase.ID((int32)(*datums[tableIDIndex].(*tree.DInt))),
StatisticID: (uint64)(*datums[statisticsIDIndex].(*tree.DInt)),
CreatedAt: datums[createdAtIndex].(*tree.DTimestamp).Time,
RowCount: (uint64)(*datums[rowCountIndex].(*tree.DInt)),
DistinctCount: (uint64)(*datums[distinctCountIndex].(*tree.DInt)),
NullCount: (uint64)(*datums[nullCountIndex].(*tree.DInt)),
},
}
columnIDs := datums[columnIDsIndex].(*tree.DArray)
tableStatistic.ColumnIDs = make([]sqlbase.ColumnID, len(columnIDs.Array))
res.ColumnIDs = make([]sqlbase.ColumnID, len(columnIDs.Array))
for i, d := range columnIDs.Array {
tableStatistic.ColumnIDs[i] = sqlbase.ColumnID((int32)(*d.(*tree.DInt)))
res.ColumnIDs[i] = sqlbase.ColumnID((int32)(*d.(*tree.DInt)))
}
if datums[nameIndex] != tree.DNull {
tableStatistic.Name = string(*datums[nameIndex].(*tree.DString))
res.Name = string(*datums[nameIndex].(*tree.DString))
}
if datums[histogramIndex] != tree.DNull {
tableStatistic.Histogram = &HistogramData{}
res.HistogramData = &HistogramData{}
if err := protoutil.Unmarshal(
[]byte(*datums[histogramIndex].(*tree.DBytes)),
tableStatistic.Histogram,
res.HistogramData,
); err != nil {
return nil, err
}

// Decode the histogram data so that it's usable by the opt catalog.
res.Histogram = make([]cat.HistogramBucket, len(res.HistogramData.Buckets))
typ := &res.HistogramData.ColumnType
var a sqlbase.DatumAlloc
for i := range res.Histogram {
bucket := &res.HistogramData.Buckets[i]
datum, _, err := sqlbase.DecodeTableKey(&a, typ, bucket.UpperBound, encoding.Ascending)
if err != nil {
return nil, err
}
res.Histogram[i] = cat.HistogramBucket{
NumEq: float64(bucket.NumEq),
NumRange: float64(bucket.NumRange),
DistinctRange: bucket.DistinctRange,
UpperBound: datum,
}
}
}

return tableStatistic, nil
return res, nil
}

// getTableStatsFromDB retrieves the statistics in system.table_statistics
Expand Down
35 changes: 25 additions & 10 deletions pkg/sql/stats/stats_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
)

func insertTableStat(
ctx context.Context, db *client.DB, ex sqlutil.InternalExecutor, stat *TableStatistic,
ctx context.Context, db *client.DB, ex sqlutil.InternalExecutor, stat *TableStatisticProto,
) error {
insertStatStmt := `
INSERT INTO system.table_statistics ("tableID", "statisticID", name, "columnIDs", "createdAt",
Expand Down Expand Up @@ -62,8 +62,8 @@ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
if len(stat.Name) != 0 {
args[2] = stat.Name
}
if stat.Histogram != nil {
histogramBytes, err := protoutil.Marshal(stat.Histogram)
if stat.HistogramData != nil {
histogramBytes, err := protoutil.Marshal(stat.HistogramData)
if err != nil {
return err
}
Expand Down Expand Up @@ -94,7 +94,10 @@ func lookupTableStats(
}

func checkStatsForTable(
ctx context.Context, sc *TableStatisticsCache, expected []*TableStatistic, tableID sqlbase.ID,
ctx context.Context,
sc *TableStatisticsCache,
expected []*TableStatisticProto,
tableID sqlbase.ID,
) error {
// Initially the stats won't be in the cache.
if statsList, ok := lookupTableStats(ctx, sc, tableID); ok {
Expand All @@ -107,7 +110,7 @@ func checkStatsForTable(
if err != nil {
return errors.Errorf(err.Error())
}
if !reflect.DeepEqual(statsList, expected) {
if !checkStats(statsList, expected) {
return errors.Errorf("for lookup of key %d, expected stats %s, got %s", tableID, expected, statsList)
}

Expand All @@ -118,12 +121,24 @@ func checkStatsForTable(
return nil
}

func checkStats(actual []*TableStatistic, expected []*TableStatisticProto) bool {
if len(actual) == 0 && len(expected) == 0 {
// DeepEqual differentiates between nil and empty slices, we don't.
return true
}
var protoList []*TableStatisticProto
for i := range actual {
protoList = append(protoList, &actual[i].TableStatisticProto)
}
return reflect.DeepEqual(protoList, expected)
}

func initTestData(
ctx context.Context, db *client.DB, ex sqlutil.InternalExecutor,
) (map[sqlbase.ID][]*TableStatistic, error) {
) (map[sqlbase.ID][]*TableStatisticProto, error) {
// The expected stats must be ordered by TableID+, CreatedAt- so they can
// later be compared with the returned stats using reflect.DeepEqual.
expStatsList := []TableStatistic{
expStatsList := []TableStatisticProto{
{
TableID: sqlbase.ID(100),
StatisticID: 0,
Expand All @@ -133,7 +148,7 @@ func initTestData(
RowCount: 32,
DistinctCount: 30,
NullCount: 0,
Histogram: &HistogramData{ColumnType: *types.Int, Buckets: []HistogramData_Bucket{
HistogramData: &HistogramData{ColumnType: *types.Int, Buckets: []HistogramData_Bucket{
{NumEq: 3, NumRange: 30, UpperBound: encoding.EncodeVarintAscending(nil, 3000)}},
},
},
Expand Down Expand Up @@ -169,7 +184,7 @@ func initTestData(

// Insert the stats into system.table_statistics
// and store them in maps for fast retrieval.
expectedStats := make(map[sqlbase.ID][]*TableStatistic)
expectedStats := make(map[sqlbase.ID][]*TableStatisticProto)
for i := range expStatsList {
stat := &expStatsList[i]

Expand Down Expand Up @@ -284,7 +299,7 @@ func TestCacheWait(t *testing.T) {
stats, err := sc.GetTableStats(ctx, id)
if err != nil {
t.Error(err)
} else if !reflect.DeepEqual(stats, expectedStats[id]) {
} else if !checkStats(stats, expectedStats[id]) {
t.Errorf("for table %d, expected stats %s, got %s", id, expectedStats[id], stats)
}
wg.Done()
Expand Down
Loading

0 comments on commit 0de36e2

Please sign in to comment.