Skip to content

Commit

Permalink
server: add a configuration to enable GC of system.rangelog
Browse files Browse the repository at this point in the history
system.rangelog table currently grows unboundedly. The rate of
growth is slow (as long as there is no replica rebalancing
thrashing), but it can still become a problem in long running
clusters.

This commit adds cluster settings to specify interval and TTL
for rows in system.rangelog.
By default, TTL of system.rangelog is set to 30 days, and that
for system.eventlog to 90 days.

Fixes cockroachdb#21260.

Release note (sql change): the range log and system events logs will
automatically purge records older than 30 and 90 days, respectively.
This can be adjusted via the server.rangelog.ttl and server.eventlog.ttl
cluster settings.
  • Loading branch information
tbg committed Oct 17, 2018
1 parent 670c7d3 commit ed3042b
Show file tree
Hide file tree
Showing 6 changed files with 475 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,11 @@
<tr><td><code>server.clock.persist_upper_bound_interval</code></td><td>duration</td><td><code>0s</code></td><td>the interval between persisting the wall time upper bound of the clock. The clock does not generate a wall time greater than the persisted timestamp and will panic if it sees a wall time greater than this value. When cockroach starts, it waits for the wall time to catch-up till this persisted timestamp. This guarantees monotonic wall time across server restarts. Not setting this or setting a value of 0 disables this feature.</td></tr>
<tr><td><code>server.consistency_check.interval</code></td><td>duration</td><td><code>24h0m0s</code></td><td>the time between range consistency checks; set to 0 to disable consistency checking</td></tr>
<tr><td><code>server.declined_reservation_timeout</code></td><td>duration</td><td><code>1s</code></td><td>the amount of time to consider the store throttled for up-replication after a reservation was declined</td></tr>
<tr><td><code>server.eventlog.ttl</code></td><td>duration</td><td><code>2160h0m0s</code></td><td>if nonzero, event log entries older than this duration are deleted every 10m0s. Should not be lowered below 24 hours</td></tr>
<tr><td><code>server.failed_reservation_timeout</code></td><td>duration</td><td><code>5s</code></td><td>the amount of time to consider the store throttled for up-replication after a failed reservation call</td></tr>
<tr><td><code>server.heap_profile.max_profiles</code></td><td>integer</td><td><code>5</code></td><td>maximum number of profiles to be kept. Profiles with lower score are GC'ed, but latest profile is always kept</td></tr>
<tr><td><code>server.heap_profile.system_memory_threshold_fraction</code></td><td>float</td><td><code>0.85</code></td><td>fraction of system memory beyond which if Rss increases, then heap profile is triggered</td></tr>
<tr><td><code>server.rangelog.ttl</code></td><td>duration</td><td><code>720h0m0s</code></td><td>if nonzero, range log entries older than this duration are deleted every 10m0s. Should not be lowered below 24 hours</td></tr>
<tr><td><code>server.remote_debugging.mode</code></td><td>string</td><td><code>local</code></td><td>set to enable remote debugging, localhost-only or disable (any, local, off)</td></tr>
<tr><td><code>server.shutdown.drain_wait</code></td><td>duration</td><td><code>0s</code></td><td>the amount of time a server waits in an unready state before proceeding with the rest of the shutdown process</td></tr>
<tr><td><code>server.shutdown.query_wait</code></td><td>duration</td><td><code>10s</code></td><td>the server will wait for at least this amount of time for active queries to finish</td></tr>
Expand Down
2 changes: 2 additions & 0 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1680,6 +1680,8 @@ func (s *Server) Start(ctx context.Context) error {
})
}

s.startSystemLogsGC(ctx)

// Record that this node joined the cluster in the event log. Since this
// executes a SQL query, this must be done after the SQL layer is ready.
s.node.recordJoinEvent()
Expand Down
217 changes: 217 additions & 0 deletions pkg/server/server_systemlog_gc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
// Copyright 2018 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

package server

import (
"context"
"fmt"
"time"

"github.com/pkg/errors"

"github.com/cockroachdb/cockroach/pkg/internal/client"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
)

const (
// systemLogGCPeriod is the period for running gc on systemlog tables.
systemLogGCPeriod = 10 * time.Minute
)

var (
// rangeLogTTL is the TTL for rows in system.rangelog. If non zero, range log
// entries are periodically garbage collected.
rangeLogTTL = settings.RegisterDurationSetting(
"server.rangelog.ttl",
fmt.Sprintf(
"if nonzero, range log entries older than this duration are deleted every %s. "+
"Should not be lowered below 24 hours",
systemLogGCPeriod,
),
30*24*time.Hour, // 30 days
)

// eventLogTTL is the TTL for rows in system.eventlog. If non zero, event log
// entries are periodically garbage collected.
eventLogTTL = settings.RegisterDurationSetting(
"server.eventlog.ttl",
fmt.Sprintf(
"if nonzero, event log entries older than this duration are deleted every %s. "+
"Should not be lowered below 24 hours",
systemLogGCPeriod,
),
90*24*time.Hour, // 90 days
)
)

// gcSystemLog deletes entries in the given system log table between
// timestampLowerBound and timestampUpperBound if the server is the lease holder
// for range 1.
// Leaseholder constraint is present so that only one node in the cluster
// performs gc.
// The system log table is expected to have a "timestamp" column.
// It returns the timestampLowerBound to be used in the next iteration, number
// of rows affected and error (if any).
func (s *Server) gcSystemLog(
ctx context.Context, table string, timestampLowerBound, timestampUpperBound time.Time,
) (time.Time, int64, error) {
var totalRowsAffected int64
repl, err := s.node.stores.GetReplicaForRangeID(roachpb.RangeID(1))
if err != nil {
return timestampLowerBound, 0, nil
}

if !repl.IsFirstRange() || !repl.OwnsValidLease(s.clock.Now()) {
return timestampLowerBound, 0, nil
}

deleteStmt := fmt.Sprintf(
`SELECT count(1), max(timestamp) FROM
[DELETE FROM system.%s WHERE timestamp >= $1 AND timestamp <= $2 LIMIT 1000 RETURNING timestamp]`,
table,
)

for {
var rowsAffected int64
err := s.db.Txn(ctx, func(ctx context.Context, txn *client.Txn) error {
var err error
row, err := s.internalExecutor.QueryRow(
ctx,
table+"-gc",
txn,
deleteStmt,
timestampLowerBound,
timestampUpperBound,
)
if err != nil {
return err
}

if row != nil {
rowCount, ok := row[0].(*tree.DInt)
if !ok {
return errors.Errorf("row count is of unknown type %T", row[0])
}
if rowCount == nil {
return errors.New("error parsing row count")
}
rowsAffected = int64(*rowCount)

if rowsAffected > 0 {
maxTimestamp, ok := row[1].(*tree.DTimestamp)
if !ok {
return errors.Errorf("timestamp is of unknown type %T", row[1])
}
if maxTimestamp == nil {
return errors.New("error parsing timestamp")
}
timestampLowerBound = maxTimestamp.Time
}
}
return nil
})
totalRowsAffected += rowsAffected
if err != nil {
return timestampLowerBound, totalRowsAffected, err
}

if rowsAffected == 0 {
return timestampUpperBound, totalRowsAffected, nil
}
}
}

// systemLogGCConfig has configurations for gc of systemlog.
type systemLogGCConfig struct {
// ttl is the time to live for rows in systemlog table.
ttl *settings.DurationSetting
// timestampLowerBound is the timestamp below which rows are gc'ed.
// It is maintained to avoid hitting tombstones during gc and is updated
// after every gc run.
timestampLowerBound time.Time
}

// startSystemLogsGC starts a worker which periodically GCs system.rangelog
// and system.eventlog.
// The TTLs for each of these logs is retrieved from cluster settings.
func (s *Server) startSystemLogsGC(ctx context.Context) {
systemLogsToGC := map[string]*systemLogGCConfig{
"rangelog": {
ttl: rangeLogTTL,
timestampLowerBound: timeutil.Unix(0, 0),
},
"eventlog": {
ttl: eventLogTTL,
timestampLowerBound: timeutil.Unix(0, 0),
},
}

s.stopper.RunWorker(ctx, func(ctx context.Context) {
period := systemLogGCPeriod
if storeKnobs, ok := s.cfg.TestingKnobs.Store.(*storage.StoreTestingKnobs); ok && storeKnobs.SystemLogsGCPeriod != 0 {
period = storeKnobs.SystemLogsGCPeriod
}

t := time.NewTicker(period)
defer t.Stop()

for {
select {
case <-t.C:
for table, gcConfig := range systemLogsToGC {
ttl := gcConfig.ttl.Get(&s.cfg.Settings.SV)
if ttl > 0 {
timestampUpperBound := timeutil.Unix(0, s.clock.PhysicalNow()-int64(ttl))
newTimestampLowerBound, rowsAffected, err := s.gcSystemLog(
ctx,
table,
gcConfig.timestampLowerBound,
timestampUpperBound,
)
if err != nil {
log.Warningf(
ctx,
"error garbage collecting %s: %v",
table,
err,
)
} else {
gcConfig.timestampLowerBound = newTimestampLowerBound
if log.V(1) {
log.Infof(ctx, "garbage collected %d rows from %s", rowsAffected, table)
}
}
}
}

if storeKnobs, ok := s.cfg.TestingKnobs.Store.(*storage.StoreTestingKnobs); ok && storeKnobs.SystemLogsGCGCDone != nil {
select {
case storeKnobs.SystemLogsGCGCDone <- struct{}{}:
case <-s.stopper.ShouldStop():
// Test has finished.
return
}
}
case <-s.stopper.ShouldStop():
return
}
}
})
}
Loading

0 comments on commit ed3042b

Please sign in to comment.