Skip to content

Commit

Permalink
Merge #41220 #41231
Browse files Browse the repository at this point in the history
41220: sql: move histogram decoding into the stats cache r=RaduBerinde a=RaduBerinde

Currently the stats cache exposes the encoded histogram data, and the
opt catalog decodes it. The problem is that opt catalog objects are
not shared across connections so this is inefficient (especially in
memory usage).

This change moves the decoding into the stats cache. The opt catalog
objects now simply point to the data computed by the cache.

There are still inefficiencies to improve on in the future: the opt
catalog can hold on to multiple versions of tables, so we will keep
many versions of histograms "alive".

Informs #41206.
Informs #40922.

Release justification: fix for new functionality.

Release note: None

41231: util/log: ensure that secondary loggers do not leak memory r=petermattis,nvanbenschoten a=knz

Fixes #41230.

Note: the primary cause of this issue is removed by #40993 but that PR is blocked until 19.2 is out. I'm cherry-picking the subset of those changes sufficient to solve issue #41230, here.

Prior to this patch, logging via a secondary logger would allocate a
buffer, then add it to the buffer free list of the secondary logger.

This was causing a memory leak because only the free list from the
main logger is used to allocate buffers (even in secondary loggers),
so all the now-unused buffers from secondary logs would remain unused
and accumulate, locked from Go's GC attention because they are
referenced somewhere.

Release justification: bug fix

Release note (bug fix): A memory leak was fixed that affected
secondary logging (SQL audit logs, statement execution, and RocksDB
logging).

Co-authored-by: Radu Berinde <radu@cockroachlabs.com>
Co-authored-by: Raphael 'kena' Poss <knz@thaumogen.net>
  • Loading branch information
3 people committed Oct 1, 2019
3 parents 02d9edd + 0de36e2 + ab74a33 commit 5f1b177
Show file tree
Hide file tree
Showing 14 changed files with 263 additions and 241 deletions.
6 changes: 4 additions & 2 deletions pkg/ccl/backupccl/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -1045,7 +1045,7 @@ func backupPlanHook(
}

statsCache := p.ExecCfg().TableStatsCache
tableStatistics := make([]*stats.TableStatistic, 0)
tableStatistics := make([]*stats.TableStatisticProto, 0)
var tables []*sqlbase.TableDescriptor
for _, desc := range targetDescs {
if dbDesc := desc.GetDatabase(); dbDesc != nil {
Expand All @@ -1064,7 +1064,9 @@ func backupPlanHook(
if err != nil {
return err
}
tableStatistics = append(tableStatistics, tableStatisticsAcc...)
for i := range tableStatisticsAcc {
tableStatistics = append(tableStatistics, &tableStatisticsAcc[i].TableStatisticProto)
}
}
}

Expand Down
152 changes: 76 additions & 76 deletions pkg/ccl/backupccl/backup.pb.go

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pkg/ccl/backupccl/backup.proto
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ message BackupDescriptor {
(gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/util/uuid.UUID"];
repeated string partition_descriptor_filenames = 19;
repeated string locality_kvs = 20 [(gogoproto.customname) = "LocalityKVs"];
repeated sql.stats.TableStatistic statistics = 21;
repeated sql.stats.TableStatisticProto statistics = 21;
}

message BackupPartitionDescriptor{
Expand Down
6 changes: 3 additions & 3 deletions pkg/ccl/backupccl/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -1644,7 +1644,7 @@ type restoreResumer struct {
databases []*sqlbase.DatabaseDescriptor
tables []*sqlbase.TableDescriptor
exec sqlutil.InternalExecutor
latestStats []*stats.TableStatistic
latestStats []*stats.TableStatisticProto
statsRefresher *stats.Refresher
}

Expand All @@ -1656,8 +1656,8 @@ type restoreResumer struct {
// table is being restored.
func remapRelevantStatistics(
backup BackupDescriptor, tableRewrites TableRewriteMap,
) []*stats.TableStatistic {
relevantTableStatistics := make([]*stats.TableStatistic, 0, len(backup.Statistics))
) []*stats.TableStatisticProto {
relevantTableStatistics := make([]*stats.TableStatisticProto, 0, len(backup.Statistics))

for i := range backup.Statistics {
stat := backup.Statistics[i]
Expand Down
43 changes: 8 additions & 35 deletions pkg/sql/opt_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/sql/stats"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -1001,22 +1000,14 @@ func (oi *optIndex) PartitionByListPrefixes() []tree.Datums {
}

type optTableStat struct {
createdAt time.Time
stat *stats.TableStatistic
columnOrdinals []int
rowCount uint64
distinctCount uint64
nullCount uint64
histogram []cat.HistogramBucket
}

var _ cat.TableStatistic = &optTableStat{}

func (os *optTableStat) init(tab *optTable, stat *stats.TableStatistic) (ok bool, _ error) {
os.createdAt = stat.CreatedAt
os.rowCount = stat.RowCount
os.distinctCount = stat.DistinctCount
os.nullCount = stat.NullCount

os.stat = stat
os.columnOrdinals = make([]int, len(stat.ColumnIDs))
for i, c := range stat.ColumnIDs {
var ok bool
Expand All @@ -1028,31 +1019,13 @@ func (os *optTableStat) init(tab *optTable, stat *stats.TableStatistic) (ok bool
}
}

if stat.Histogram != nil {
os.histogram = make([]cat.HistogramBucket, len(stat.Histogram.Buckets))
typ := &stat.Histogram.ColumnType
var a sqlbase.DatumAlloc
for i := range os.histogram {
bucket := &stat.Histogram.Buckets[i]
datum, _, err := sqlbase.DecodeTableKey(&a, typ, bucket.UpperBound, encoding.Ascending)
if err != nil {
return false, err
}
os.histogram[i] = cat.HistogramBucket{
NumEq: float64(bucket.NumEq),
NumRange: float64(bucket.NumRange),
DistinctRange: bucket.DistinctRange,
UpperBound: datum,
}
}
}
return true, nil
}

func (os *optTableStat) equals(other *optTableStat) bool {
// Two table statistics are considered equal if they have been created at the
// same time, on the same set of columns.
if os.createdAt != other.createdAt || len(os.columnOrdinals) != len(other.columnOrdinals) {
if os.CreatedAt() != other.CreatedAt() || len(os.columnOrdinals) != len(other.columnOrdinals) {
return false
}
for i, c := range os.columnOrdinals {
Expand All @@ -1065,7 +1038,7 @@ func (os *optTableStat) equals(other *optTableStat) bool {

// CreatedAt is part of the cat.TableStatistic interface.
func (os *optTableStat) CreatedAt() time.Time {
return os.createdAt
return os.stat.CreatedAt
}

// ColumnCount is part of the cat.TableStatistic interface.
Expand All @@ -1080,22 +1053,22 @@ func (os *optTableStat) ColumnOrdinal(i int) int {

// RowCount is part of the cat.TableStatistic interface.
func (os *optTableStat) RowCount() uint64 {
return os.rowCount
return os.stat.RowCount
}

// DistinctCount is part of the cat.TableStatistic interface.
func (os *optTableStat) DistinctCount() uint64 {
return os.distinctCount
return os.stat.DistinctCount
}

// NullCount is part of the cat.TableStatistic interface.
func (os *optTableStat) NullCount() uint64 {
return os.nullCount
return os.stat.NullCount
}

// Histogram is part of the cat.TableStatistic interface.
func (os *optTableStat) Histogram() []cat.HistogramBucket {
return os.histogram
return os.stat.Histogram
}

// optFamily is a wrapper around sqlbase.ColumnFamilyDescriptor that keeps a
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/stats/delete_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func TestDeleteOldStatsForColumns(t *testing.T) {

// The test data must be ordered by CreatedAt DESC so the calculated set of
// expected deleted stats is correct.
testData := []TableStatistic{
testData := []TableStatisticProto{
{
TableID: sqlbase.ID(100),
StatisticID: 1,
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/stats/new_stat.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func InsertNewStats(
ctx context.Context,
executor sqlutil.InternalExecutor,
txn *client.Txn,
tableStats []*TableStatistic,
tableStats []*TableStatisticProto,
) error {
var err error
for _, statistic := range tableStats {
Expand All @@ -42,7 +42,7 @@ func InsertNewStats(
int64(statistic.RowCount),
int64(statistic.DistinctCount),
int64(statistic.NullCount),
statistic.Histogram,
statistic.HistogramData,
)
if err != nil {
return err
Expand Down
57 changes: 44 additions & 13 deletions pkg/sql/stats/stats_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,28 @@ import (
"github.com/cockroachdb/cockroach/pkg/gossip"
"github.com/cockroachdb/cockroach/pkg/internal/client"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/sql/sqlutil"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/cache"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/pkg/errors"
)

// A TableStatistic object holds a statistic for a particular column or group
// of columns.
type TableStatistic struct {
TableStatisticProto

// Histogram is the decoded histogram data.
Histogram []cat.HistogramBucket
}

// A TableStatisticsCache contains two underlying LRU caches:
// (1) A cache of []*TableStatistic objects, keyed by table ID.
// Each entry consists of all the statistics for different columns and
Expand Down Expand Up @@ -262,33 +273,53 @@ func parseStats(datums tree.Datums) (*TableStatistic, error) {
}

// Extract datum values.
tableStatistic := &TableStatistic{
TableID: sqlbase.ID((int32)(*datums[tableIDIndex].(*tree.DInt))),
StatisticID: (uint64)(*datums[statisticsIDIndex].(*tree.DInt)),
CreatedAt: datums[createdAtIndex].(*tree.DTimestamp).Time,
RowCount: (uint64)(*datums[rowCountIndex].(*tree.DInt)),
DistinctCount: (uint64)(*datums[distinctCountIndex].(*tree.DInt)),
NullCount: (uint64)(*datums[nullCountIndex].(*tree.DInt)),
res := &TableStatistic{
TableStatisticProto: TableStatisticProto{
TableID: sqlbase.ID((int32)(*datums[tableIDIndex].(*tree.DInt))),
StatisticID: (uint64)(*datums[statisticsIDIndex].(*tree.DInt)),
CreatedAt: datums[createdAtIndex].(*tree.DTimestamp).Time,
RowCount: (uint64)(*datums[rowCountIndex].(*tree.DInt)),
DistinctCount: (uint64)(*datums[distinctCountIndex].(*tree.DInt)),
NullCount: (uint64)(*datums[nullCountIndex].(*tree.DInt)),
},
}
columnIDs := datums[columnIDsIndex].(*tree.DArray)
tableStatistic.ColumnIDs = make([]sqlbase.ColumnID, len(columnIDs.Array))
res.ColumnIDs = make([]sqlbase.ColumnID, len(columnIDs.Array))
for i, d := range columnIDs.Array {
tableStatistic.ColumnIDs[i] = sqlbase.ColumnID((int32)(*d.(*tree.DInt)))
res.ColumnIDs[i] = sqlbase.ColumnID((int32)(*d.(*tree.DInt)))
}
if datums[nameIndex] != tree.DNull {
tableStatistic.Name = string(*datums[nameIndex].(*tree.DString))
res.Name = string(*datums[nameIndex].(*tree.DString))
}
if datums[histogramIndex] != tree.DNull {
tableStatistic.Histogram = &HistogramData{}
res.HistogramData = &HistogramData{}
if err := protoutil.Unmarshal(
[]byte(*datums[histogramIndex].(*tree.DBytes)),
tableStatistic.Histogram,
res.HistogramData,
); err != nil {
return nil, err
}

// Decode the histogram data so that it's usable by the opt catalog.
res.Histogram = make([]cat.HistogramBucket, len(res.HistogramData.Buckets))
typ := &res.HistogramData.ColumnType
var a sqlbase.DatumAlloc
for i := range res.Histogram {
bucket := &res.HistogramData.Buckets[i]
datum, _, err := sqlbase.DecodeTableKey(&a, typ, bucket.UpperBound, encoding.Ascending)
if err != nil {
return nil, err
}
res.Histogram[i] = cat.HistogramBucket{
NumEq: float64(bucket.NumEq),
NumRange: float64(bucket.NumRange),
DistinctRange: bucket.DistinctRange,
UpperBound: datum,
}
}
}

return tableStatistic, nil
return res, nil
}

// getTableStatsFromDB retrieves the statistics in system.table_statistics
Expand Down
35 changes: 25 additions & 10 deletions pkg/sql/stats/stats_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
)

func insertTableStat(
ctx context.Context, db *client.DB, ex sqlutil.InternalExecutor, stat *TableStatistic,
ctx context.Context, db *client.DB, ex sqlutil.InternalExecutor, stat *TableStatisticProto,
) error {
insertStatStmt := `
INSERT INTO system.table_statistics ("tableID", "statisticID", name, "columnIDs", "createdAt",
Expand Down Expand Up @@ -62,8 +62,8 @@ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
if len(stat.Name) != 0 {
args[2] = stat.Name
}
if stat.Histogram != nil {
histogramBytes, err := protoutil.Marshal(stat.Histogram)
if stat.HistogramData != nil {
histogramBytes, err := protoutil.Marshal(stat.HistogramData)
if err != nil {
return err
}
Expand Down Expand Up @@ -94,7 +94,10 @@ func lookupTableStats(
}

func checkStatsForTable(
ctx context.Context, sc *TableStatisticsCache, expected []*TableStatistic, tableID sqlbase.ID,
ctx context.Context,
sc *TableStatisticsCache,
expected []*TableStatisticProto,
tableID sqlbase.ID,
) error {
// Initially the stats won't be in the cache.
if statsList, ok := lookupTableStats(ctx, sc, tableID); ok {
Expand All @@ -107,7 +110,7 @@ func checkStatsForTable(
if err != nil {
return errors.Errorf(err.Error())
}
if !reflect.DeepEqual(statsList, expected) {
if !checkStats(statsList, expected) {
return errors.Errorf("for lookup of key %d, expected stats %s, got %s", tableID, expected, statsList)
}

Expand All @@ -118,12 +121,24 @@ func checkStatsForTable(
return nil
}

func checkStats(actual []*TableStatistic, expected []*TableStatisticProto) bool {
if len(actual) == 0 && len(expected) == 0 {
// DeepEqual differentiates between nil and empty slices, we don't.
return true
}
var protoList []*TableStatisticProto
for i := range actual {
protoList = append(protoList, &actual[i].TableStatisticProto)
}
return reflect.DeepEqual(protoList, expected)
}

func initTestData(
ctx context.Context, db *client.DB, ex sqlutil.InternalExecutor,
) (map[sqlbase.ID][]*TableStatistic, error) {
) (map[sqlbase.ID][]*TableStatisticProto, error) {
// The expected stats must be ordered by TableID+, CreatedAt- so they can
// later be compared with the returned stats using reflect.DeepEqual.
expStatsList := []TableStatistic{
expStatsList := []TableStatisticProto{
{
TableID: sqlbase.ID(100),
StatisticID: 0,
Expand All @@ -133,7 +148,7 @@ func initTestData(
RowCount: 32,
DistinctCount: 30,
NullCount: 0,
Histogram: &HistogramData{ColumnType: *types.Int, Buckets: []HistogramData_Bucket{
HistogramData: &HistogramData{ColumnType: *types.Int, Buckets: []HistogramData_Bucket{
{NumEq: 3, NumRange: 30, UpperBound: encoding.EncodeVarintAscending(nil, 3000)}},
},
},
Expand Down Expand Up @@ -169,7 +184,7 @@ func initTestData(

// Insert the stats into system.table_statistics
// and store them in maps for fast retrieval.
expectedStats := make(map[sqlbase.ID][]*TableStatistic)
expectedStats := make(map[sqlbase.ID][]*TableStatisticProto)
for i := range expStatsList {
stat := &expStatsList[i]

Expand Down Expand Up @@ -284,7 +299,7 @@ func TestCacheWait(t *testing.T) {
stats, err := sc.GetTableStats(ctx, id)
if err != nil {
t.Error(err)
} else if !reflect.DeepEqual(stats, expectedStats[id]) {
} else if !checkStats(stats, expectedStats[id]) {
t.Errorf("for table %d, expected stats %s, got %s", id, expectedStats[id], stats)
}
wg.Done()
Expand Down
Loading

0 comments on commit 5f1b177

Please sign in to comment.