Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

backport-2.1: server: add a configuration to enable GC of system.rangelog #31328

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,11 @@
<tr><td><code>server.clock.persist_upper_bound_interval</code></td><td>duration</td><td><code>0s</code></td><td>the interval between persisting the wall time upper bound of the clock. The clock does not generate a wall time greater than the persisted timestamp and will panic if it sees a wall time greater than this value. When cockroach starts, it waits for the wall time to catch-up till this persisted timestamp. This guarantees monotonic wall time across server restarts. Not setting this or setting a value of 0 disables this feature.</td></tr>
<tr><td><code>server.consistency_check.interval</code></td><td>duration</td><td><code>24h0m0s</code></td><td>the time between range consistency checks; set to 0 to disable consistency checking</td></tr>
<tr><td><code>server.declined_reservation_timeout</code></td><td>duration</td><td><code>1s</code></td><td>the amount of time to consider the store throttled for up-replication after a reservation was declined</td></tr>
<tr><td><code>server.eventlog.ttl</code></td><td>duration</td><td><code>2160h0m0s</code></td><td>if nonzero, event log entries older than this duration are deleted every 10m0s. Should not be lowered below 24 hours</td></tr>
<tr><td><code>server.failed_reservation_timeout</code></td><td>duration</td><td><code>5s</code></td><td>the amount of time to consider the store throttled for up-replication after a failed reservation call</td></tr>
<tr><td><code>server.heap_profile.max_profiles</code></td><td>integer</td><td><code>5</code></td><td>maximum number of profiles to be kept. Profiles with lower score are GC'ed, but latest profile is always kept</td></tr>
<tr><td><code>server.heap_profile.system_memory_threshold_fraction</code></td><td>float</td><td><code>0.85</code></td><td>fraction of system memory beyond which if Rss increases, then heap profile is triggered</td></tr>
<tr><td><code>server.rangelog.ttl</code></td><td>duration</td><td><code>720h0m0s</code></td><td>if nonzero, range log entries older than this duration are deleted every 10m0s. Should not be lowered below 24 hours</td></tr>
<tr><td><code>server.remote_debugging.mode</code></td><td>string</td><td><code>local</code></td><td>set to enable remote debugging, localhost-only or disable (any, local, off)</td></tr>
<tr><td><code>server.shutdown.drain_wait</code></td><td>duration</td><td><code>0s</code></td><td>the amount of time a server waits in an unready state before proceeding with the rest of the shutdown process</td></tr>
<tr><td><code>server.shutdown.query_wait</code></td><td>duration</td><td><code>10s</code></td><td>the server will wait for at least this amount of time for active queries to finish</td></tr>
Expand Down
2 changes: 2 additions & 0 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1680,6 +1680,8 @@ func (s *Server) Start(ctx context.Context) error {
})
}

s.startSystemLogsGC(ctx)

// Record that this node joined the cluster in the event log. Since this
// executes a SQL query, this must be done after the SQL layer is ready.
s.node.recordJoinEvent()
Expand Down
217 changes: 217 additions & 0 deletions pkg/server/server_systemlog_gc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
// Copyright 2018 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

package server

import (
"context"
"fmt"
"time"

"github.com/pkg/errors"

"github.com/cockroachdb/cockroach/pkg/internal/client"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
)

const (
// systemLogGCPeriod is the period for running gc on systemlog tables.
systemLogGCPeriod = 10 * time.Minute
)

var (
// rangeLogTTL is the TTL for rows in system.rangelog. If non zero, range log
// entries are periodically garbage collected.
rangeLogTTL = settings.RegisterDurationSetting(
"server.rangelog.ttl",
fmt.Sprintf(
"if nonzero, range log entries older than this duration are deleted every %s. "+
"Should not be lowered below 24 hours",
systemLogGCPeriod,
),
0, // disabled
)

// eventLogTTL is the TTL for rows in system.eventlog. If non zero, event log
// entries are periodically garbage collected.
eventLogTTL = settings.RegisterDurationSetting(
"server.eventlog.ttl",
fmt.Sprintf(
"if nonzero, event log entries older than this duration are deleted every %s. "+
"Should not be lowered below 24 hours",
systemLogGCPeriod,
),
0, // disabled
)
)

// gcSystemLog deletes entries in the given system log table between
// timestampLowerBound and timestampUpperBound if the server is the lease holder
// for range 1.
// Leaseholder constraint is present so that only one node in the cluster
// performs gc.
// The system log table is expected to have a "timestamp" column.
// It returns the timestampLowerBound to be used in the next iteration, number
// of rows affected and error (if any).
func (s *Server) gcSystemLog(
ctx context.Context, table string, timestampLowerBound, timestampUpperBound time.Time,
) (time.Time, int64, error) {
var totalRowsAffected int64
repl, err := s.node.stores.GetReplicaForRangeID(roachpb.RangeID(1))
if err != nil {
return timestampLowerBound, 0, nil
}

if !repl.IsFirstRange() || !repl.OwnsValidLease(s.clock.Now()) {
return timestampLowerBound, 0, nil
}

deleteStmt := fmt.Sprintf(
`SELECT count(1), max(timestamp) FROM
[DELETE FROM system.%s WHERE timestamp >= $1 AND timestamp <= $2 LIMIT 1000 RETURNING timestamp]`,
table,
)

for {
var rowsAffected int64
err := s.db.Txn(ctx, func(ctx context.Context, txn *client.Txn) error {
var err error
row, err := s.internalExecutor.QueryRow(
ctx,
table+"-gc",
txn,
deleteStmt,
timestampLowerBound,
timestampUpperBound,
)
if err != nil {
return err
}

if row != nil {
rowCount, ok := row[0].(*tree.DInt)
if !ok {
return errors.Errorf("row count is of unknown type %T", row[0])
}
if rowCount == nil {
return errors.New("error parsing row count")
}
rowsAffected = int64(*rowCount)

if rowsAffected > 0 {
maxTimestamp, ok := row[1].(*tree.DTimestamp)
if !ok {
return errors.Errorf("timestamp is of unknown type %T", row[1])
}
if maxTimestamp == nil {
return errors.New("error parsing timestamp")
}
timestampLowerBound = maxTimestamp.Time
}
}
return nil
})
totalRowsAffected += rowsAffected
if err != nil {
return timestampLowerBound, totalRowsAffected, err
}

if rowsAffected == 0 {
return timestampUpperBound, totalRowsAffected, nil
}
}
}

// systemLogGCConfig has configurations for gc of systemlog.
type systemLogGCConfig struct {
// ttl is the time to live for rows in systemlog table.
ttl *settings.DurationSetting
// timestampLowerBound is the timestamp below which rows are gc'ed.
// It is maintained to avoid hitting tombstones during gc and is updated
// after every gc run.
timestampLowerBound time.Time
}

// startSystemLogsGC starts a worker which periodically GCs system.rangelog
// and system.eventlog.
// The TTLs for each of these logs is retrieved from cluster settings.
func (s *Server) startSystemLogsGC(ctx context.Context) {
systemLogsToGC := map[string]*systemLogGCConfig{
"rangelog": {
ttl: rangeLogTTL,
timestampLowerBound: timeutil.Unix(0, 0),
},
"eventlog": {
ttl: eventLogTTL,
timestampLowerBound: timeutil.Unix(0, 0),
},
}

s.stopper.RunWorker(ctx, func(ctx context.Context) {
period := systemLogGCPeriod
if storeKnobs, ok := s.cfg.TestingKnobs.Store.(*storage.StoreTestingKnobs); ok && storeKnobs.SystemLogsGCPeriod != 0 {
period = storeKnobs.SystemLogsGCPeriod
}

t := time.NewTicker(period)
defer t.Stop()

for {
select {
case <-t.C:
for table, gcConfig := range systemLogsToGC {
ttl := gcConfig.ttl.Get(&s.cfg.Settings.SV)
if ttl > 0 {
timestampUpperBound := timeutil.Unix(0, s.clock.PhysicalNow()-int64(ttl))
newTimestampLowerBound, rowsAffected, err := s.gcSystemLog(
ctx,
table,
gcConfig.timestampLowerBound,
timestampUpperBound,
)
if err != nil {
log.Warningf(
ctx,
"error garbage collecting %s: %v",
table,
err,
)
} else {
gcConfig.timestampLowerBound = newTimestampLowerBound
if log.V(1) {
log.Infof(ctx, "garbage collected %d rows from %s", rowsAffected, table)
}
}
}
}

if storeKnobs, ok := s.cfg.TestingKnobs.Store.(*storage.StoreTestingKnobs); ok && storeKnobs.SystemLogsGCGCDone != nil {
select {
case storeKnobs.SystemLogsGCGCDone <- struct{}{}:
case <-s.stopper.ShouldStop():
// Test has finished.
return
}
}
case <-s.stopper.ShouldStop():
return
}
}
})
}
Loading