-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
server, tenant: hot ranges structured logging for all cluster types
This patch introduces periodic logging of hot ranges info (ie QPS and relevant DB objects info). It is done for both dedicated and serverless cluster types, and by default called every 4h. Stats collection can be enabled/disabled, or changed interval through cluster settings. Release note: None
- Loading branch information
Showing
16 changed files
with
567 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") | ||
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") | ||
|
||
go_library( | ||
name = "structlogging", | ||
srcs = ["hot_ranges_log.go"], | ||
importpath = "github.com/cockroachdb/cockroach/pkg/server/structlogging", | ||
visibility = ["//visibility:public"], | ||
deps = [ | ||
"//pkg/server/serverpb", | ||
"//pkg/settings", | ||
"//pkg/settings/cluster", | ||
"//pkg/sql", | ||
"//pkg/util/log", | ||
"//pkg/util/log/eventpb", | ||
"//pkg/util/log/logpb", | ||
"//pkg/util/log/logutil", | ||
"//pkg/util/stop", | ||
"//pkg/util/timeutil", | ||
], | ||
) | ||
|
||
go_test( | ||
name = "structlogging_test", | ||
srcs = [ | ||
"hot_ranges_log_test.go", | ||
"main_test.go", | ||
], | ||
args = ["-test.timeout=295s"], | ||
deps = [ | ||
":structlogging", | ||
"//pkg/base", | ||
"//pkg/ccl", | ||
"//pkg/kv/kvserver", | ||
"//pkg/roachpb", | ||
"//pkg/security/securityassets", | ||
"//pkg/security/securitytest", | ||
"//pkg/server", | ||
"//pkg/server/serverpb", | ||
"//pkg/testutils", | ||
"//pkg/testutils/serverutils", | ||
"//pkg/testutils/testcluster", | ||
"//pkg/util/leaktest", | ||
"//pkg/util/log", | ||
"//pkg/util/log/logcrash", | ||
"//pkg/util/log/logtestutils", | ||
"//pkg/util/randutil", | ||
"@com_github_cockroachdb_errors//:errors", | ||
"@com_github_stretchr_testify//require", | ||
], | ||
) | ||
|
||
get_x_data(name = "get_x_data") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
// Copyright 2022 The Cockroach Authors. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.txt. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0, included in the file | ||
// licenses/APL.txt. | ||
|
||
package structlogging | ||
|
||
import ( | ||
"context" | ||
"time" | ||
|
||
"github.com/cockroachdb/cockroach/pkg/server/serverpb" | ||
"github.com/cockroachdb/cockroach/pkg/settings" | ||
"github.com/cockroachdb/cockroach/pkg/settings/cluster" | ||
"github.com/cockroachdb/cockroach/pkg/sql" | ||
"github.com/cockroachdb/cockroach/pkg/util/log" | ||
"github.com/cockroachdb/cockroach/pkg/util/log/eventpb" | ||
"github.com/cockroachdb/cockroach/pkg/util/log/logpb" | ||
"github.com/cockroachdb/cockroach/pkg/util/log/logutil" | ||
"github.com/cockroachdb/cockroach/pkg/util/stop" | ||
"github.com/cockroachdb/cockroach/pkg/util/timeutil" | ||
) | ||
|
||
// ReportTopHottestRanges limits the number of ranges to be reported per iteration | ||
const ReportTopHottestRanges = 5 | ||
|
||
var TelemetryHotRangesStatsInterval = settings.RegisterDurationSetting( | ||
settings.TenantWritable, | ||
"server.telemetry.hot_ranges_stats.interval", | ||
"the time interval to log hot ranges stats", | ||
4*time.Hour, | ||
settings.NonNegativeDuration, | ||
) | ||
|
||
var TelemetryHotRangesStatsEnabled = settings.RegisterBoolSetting( | ||
settings.TenantWritable, | ||
"server.telemetry.hot_ranges_stats.enabled", | ||
"enable/disable capturing hot ranges statistics to the telemetry logging channel", | ||
true, | ||
) | ||
|
||
var TelemetryHotRangesStatsLoggingDelay = settings.RegisterDurationSetting( | ||
settings.TenantWritable, | ||
"server.telemetry.hot_ranges_stats.logging_delay", | ||
"the time delay between emitting individual hot ranges stats logs", | ||
1*time.Second, | ||
settings.NonNegativeDuration, | ||
) | ||
|
||
// hotRangesLoggingScheduler is responsible for logging index usage stats | ||
// on a scheduled interval. | ||
type hotRangesLoggingScheduler struct { | ||
ie sql.InternalExecutor | ||
sServer serverpb.TenantStatusServer | ||
st *cluster.Settings | ||
} | ||
|
||
// StartHotRangesLoggingScheduler starts the capture index usage statistics logging scheduler. | ||
func StartHotRangesLoggingScheduler( | ||
ctx context.Context, | ||
stopper *stop.Stopper, | ||
sServer serverpb.TenantStatusServer, | ||
ie sql.InternalExecutor, | ||
st *cluster.Settings, | ||
) { | ||
scheduler := hotRangesLoggingScheduler{ | ||
ie: ie, | ||
sServer: sServer, | ||
st: st, | ||
} | ||
scheduler.start(ctx, stopper) | ||
} | ||
|
||
func (s *hotRangesLoggingScheduler) start(ctx context.Context, stopper *stop.Stopper) { | ||
_ = stopper.RunAsyncTask(ctx, "hot-ranges-stats", func(ctx context.Context) { | ||
ticker := time.NewTicker(TelemetryHotRangesStatsInterval.Get(&s.st.SV)) | ||
defer ticker.Stop() | ||
|
||
TelemetryHotRangesStatsInterval.SetOnChange(&s.st.SV, func(ctx context.Context) { | ||
ticker.Reset(TelemetryHotRangesStatsInterval.Get(&s.st.SV)) | ||
}) | ||
|
||
for { | ||
select { | ||
case <-stopper.ShouldQuiesce(): | ||
return | ||
case <-ctx.Done(): | ||
return | ||
case <-ticker.C: | ||
if !TelemetryHotRangesStatsEnabled.Get(&s.st.SV) { | ||
continue | ||
} | ||
resp, err := s.sServer.HotRangesV2(ctx, &serverpb.HotRangesRequest{PageSize: ReportTopHottestRanges}) | ||
if err != nil { | ||
log.Warningf(ctx, "failed to get hot ranges: %s", err) | ||
continue | ||
} | ||
var events []logpb.EventPayload | ||
ts := timeutil.Now().UnixNano() | ||
|
||
for _, r := range resp.Ranges { | ||
hrEvent := &eventpb.HotRangesStats{ | ||
RangeID: int64(r.RangeID), | ||
Qps: r.QPS, | ||
DatabaseName: r.DatabaseName, | ||
SchemaName: r.SchemaName, | ||
TableName: r.TableName, | ||
IndexName: r.IndexName, | ||
CPUTimePerSecond: r.CPUTimePerSecond, | ||
ReadBytesPerSecond: r.ReadBytesPerSecond, | ||
WriteBytesPerSecond: r.WriteBytesPerSecond, | ||
ReadsPerSecond: r.ReadsPerSecond, | ||
WritesPerSecond: r.WritesPerSecond, | ||
LeaseholderNodeID: int32(r.LeaseholderNodeID), | ||
CommonEventDetails: logpb.CommonEventDetails{ | ||
Timestamp: ts, | ||
}, | ||
} | ||
events = append(events, hrEvent) | ||
} | ||
logutil.LogEventsWithDelay(ctx, events, stopper, TelemetryHotRangesStatsLoggingDelay.Get(&s.st.SV)) | ||
} | ||
} | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
// Copyright 2022 The Cockroach Authors. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.txt. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0, included in the file | ||
// licenses/APL.txt. | ||
|
||
package structlogging_test | ||
|
||
import ( | ||
"context" | ||
"math" | ||
"regexp" | ||
"testing" | ||
"time" | ||
|
||
"github.com/cockroachdb/cockroach/pkg/base" | ||
"github.com/cockroachdb/cockroach/pkg/ccl" | ||
"github.com/cockroachdb/cockroach/pkg/kv/kvserver" | ||
"github.com/cockroachdb/cockroach/pkg/roachpb" | ||
"github.com/cockroachdb/cockroach/pkg/server/serverpb" | ||
"github.com/cockroachdb/cockroach/pkg/server/structlogging" | ||
"github.com/cockroachdb/cockroach/pkg/testutils" | ||
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils" | ||
"github.com/cockroachdb/cockroach/pkg/util/leaktest" | ||
"github.com/cockroachdb/cockroach/pkg/util/log" | ||
"github.com/cockroachdb/cockroach/pkg/util/log/logcrash" | ||
"github.com/cockroachdb/cockroach/pkg/util/log/logtestutils" | ||
"github.com/cockroachdb/errors" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestHotRangesStats(t *testing.T) { | ||
ctx := context.Background() | ||
defer leaktest.AfterTest(t)() | ||
ccl.TestingEnableEnterprise() | ||
defer ccl.TestingDisableEnterprise() | ||
sc := log.ScopeWithoutShowLogs(t) | ||
defer sc.Close(t) | ||
|
||
cleanup := logtestutils.InstallTelemetryLogFileSink(sc, t) | ||
defer cleanup() | ||
|
||
s, _, _ := serverutils.StartServer(t, base.TestServerArgs{ | ||
DefaultTestTenant: base.TestTenantDisabled, | ||
StoreSpecs: []base.StoreSpec{ | ||
base.DefaultTestStoreSpec, | ||
base.DefaultTestStoreSpec, | ||
base.DefaultTestStoreSpec, | ||
}, | ||
Knobs: base.TestingKnobs{ | ||
Store: &kvserver.StoreTestingKnobs{ | ||
DisableReplicaRebalancing: true, | ||
}, | ||
}, | ||
}) | ||
defer s.Stopper().Stop(ctx) | ||
|
||
logcrash.DiagnosticsReportingEnabled.Override(ctx, &s.ClusterSettings().SV, true) | ||
structlogging.TelemetryHotRangesStatsEnabled.Override(ctx, &s.ClusterSettings().SV, true) | ||
structlogging.TelemetryHotRangesStatsInterval.Override(ctx, &s.ClusterSettings().SV, 500*time.Millisecond) | ||
structlogging.TelemetryHotRangesStatsLoggingDelay.Override(ctx, &s.ClusterSettings().SV, 10*time.Millisecond) | ||
|
||
tenantID := roachpb.MustMakeTenantID(2) | ||
tt, err := s.StartTenant(ctx, base.TestTenantArgs{ | ||
TenantID: tenantID, | ||
Settings: s.ClusterSettings(), | ||
}) | ||
require.NoError(t, err) | ||
|
||
testutils.SucceedsSoon(t, func() error { | ||
ss := tt.TenantStatusServer().(serverpb.TenantStatusServer) | ||
resp, err := ss.HotRangesV2(ctx, &serverpb.HotRangesRequest{TenantID: tenantID.String()}) | ||
if err != nil { | ||
return err | ||
} | ||
if len(resp.Ranges) == 0 { | ||
return errors.New("waiting for hot ranges to be collected") | ||
} | ||
return nil | ||
}) | ||
|
||
testutils.SucceedsWithin(t, func() error { | ||
log.Flush() | ||
entries, err := log.FetchEntriesFromFiles( | ||
0, | ||
math.MaxInt64, | ||
10000, | ||
regexp.MustCompile(`"EventType":"hot_ranges_stats"`), | ||
log.WithMarkedSensitiveData, | ||
) | ||
if err != nil { | ||
return err | ||
} | ||
if len(entries) == 0 { | ||
return errors.New("waiting for logs") | ||
} | ||
return nil | ||
}, 5*time.Second) | ||
} |
Oops, something went wrong.