From 281aff2be47a6ff6508feb77dcdb91540dea3dde Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Fri, 11 Nov 2022 12:09:13 +0100 Subject: [PATCH] cli,server: make `PreStart` responsible for starting diagnostics Release note: None --- pkg/base/test_server_args.go | 12 ++++++++++++ pkg/cli/context.go | 2 +- pkg/cli/demo.go | 4 +--- pkg/cli/democluster/context.go | 3 --- pkg/cli/democluster/demo_cluster.go | 10 ++-------- pkg/cli/flags.go | 3 +++ pkg/cli/start.go | 12 ------------ pkg/server/config.go | 6 ++++++ pkg/server/server.go | 11 +++++++---- pkg/server/server_controller.go | 5 ----- pkg/server/tenant.go | 9 +++++++-- pkg/server/testserver.go | 23 +++++++++++++---------- 12 files changed, 52 insertions(+), 48 deletions(-) diff --git a/pkg/base/test_server_args.go b/pkg/base/test_server_args.go index d5e95809490c..255aa519f82f 100644 --- a/pkg/base/test_server_args.go +++ b/pkg/base/test_server_args.go @@ -153,6 +153,12 @@ type TestServerArgs struct { // or if some of the functionality being tested is not accessible from // within tenants. DisableDefaultTestTenant bool + + // StartDiagnosticsReporting checks cluster.TelemetryOptOut(), and + // if not disabled starts the asynchronous goroutine that checks for + // CockroachDB upgrades and periodically reports diagnostics to + // Cockroach Labs. Should remain disabled during unit testing. + StartDiagnosticsReporting bool } // TestClusterArgs contains the parameters one can set when creating a test @@ -309,4 +315,10 @@ type TestTenantArgs struct { // heapprofiler. If empty, no heap profiles will be collected during the test. // If set, this directory should be cleaned up after the test completes. HeapProfileDirName string + + // StartDiagnosticsReporting checks cluster.TelemetryOptOut(), and + // if not disabled starts the asynchronous goroutine that checks for + // CockroachDB upgrades and periodically reports diagnostics to + // Cockroach Labs. Should remain disabled during unit testing. + StartDiagnosticsReporting bool } diff --git a/pkg/cli/context.go b/pkg/cli/context.go index f4303ea34912..9beafaf9a831 100644 --- a/pkg/cli/context.go +++ b/pkg/cli/context.go @@ -112,6 +112,7 @@ func setServerContextDefaults() { serverCfg.HeapProfileDirName = "" serverCfg.CPUProfileDirName = "" serverCfg.InflightTraceDirName = "" + serverCfg.StartDiagnosticsReporting = false // overridden in flags.go. serverCfg.AutoInitializeCluster = false serverCfg.ReadyFn = nil @@ -617,7 +618,6 @@ func setDemoContextDefaults() { demoCtx.RunWorkload = false demoCtx.Localities = nil demoCtx.GeoPartitionedReplicas = false - demoCtx.DisableTelemetry = false demoCtx.DefaultKeySize = defaultKeySize demoCtx.DefaultCALifetime = defaultCALifetime demoCtx.DefaultCertLifetime = defaultCertLifetime diff --git a/pkg/cli/demo.go b/pkg/cli/demo.go index 0ec1ba607e12..22940ee83378 100644 --- a/pkg/cli/demo.go +++ b/pkg/cli/demo.go @@ -144,8 +144,6 @@ func checkDemoConfiguration( return nil, errors.Newf("--%s cannot be used with --%s", cliflags.Global.Name, cliflags.DemoNodeLocality.Name) } - demoCtx.DisableTelemetry = cluster.TelemetryOptOut() - // Whether or not we enable enterprise feature is a combination of: // // - whether the user wants them (they can disable enterprise @@ -293,7 +291,7 @@ func runDemoInternal( // Only print details about the telemetry configuration if the // user has control over it. - if demoCtx.DisableTelemetry { + if cluster.TelemetryOptOut() { cliCtx.PrintlnUnlessEmbedded("#\n# Telemetry disabled by configuration.") } else { cliCtx.PrintlnUnlessEmbedded("#\n" + diff --git a/pkg/cli/democluster/context.go b/pkg/cli/democluster/context.go index 3314c9cf8906..de8eb5dfde2a 100644 --- a/pkg/cli/democluster/context.go +++ b/pkg/cli/democluster/context.go @@ -35,9 +35,6 @@ type Context struct { // CacheSize is the size of the storage cache for each KV server. CacheSize int64 - // DisableTelemetry requests that telemetry be disabled. - DisableTelemetry bool - // NoExampleDatabase prevents the auto-creation of a demo database // from a workload. NoExampleDatabase bool diff --git a/pkg/cli/democluster/demo_cluster.go b/pkg/cli/democluster/demo_cluster.go index 89fed623af7a..86a957078c8d 100644 --- a/pkg/cli/democluster/demo_cluster.go +++ b/pkg/cli/democluster/demo_cluster.go @@ -487,14 +487,6 @@ func (c *transientCluster) Start(ctx context.Context) (err error) { } } - // Start up the update check loop. - // We don't do this in (*server.Server).Start() because we don't want this - // overhead and possible interference in tests. - if !c.demoCtx.DisableTelemetry { - c.infoLog(ctx, "starting telemetry") - c.firstServer.StartDiagnostics(ctx) - } - return nil }(phaseCtx); err != nil { return err @@ -530,6 +522,8 @@ func (c *transientCluster) createAndAddNode( if idx == 0 { // The first node also auto-inits the cluster. args.NoAutoInitializeCluster = false + // The first node also runs diagnostics (unless disabled by cluster.TelemetryOptOut). + args.StartDiagnosticsReporting = true } serverKnobs := args.Knobs.Server.(*server.TestingKnobs) diff --git a/pkg/cli/flags.go b/pkg/cli/flags.go index 1eb47644ea76..0aed90c25e68 100644 --- a/pkg/cli/flags.go +++ b/pkg/cli/flags.go @@ -1143,6 +1143,9 @@ func extraServerFlagInit(cmd *cobra.Command) error { } serverCfg.LocalityAddresses = localityAdvertiseHosts + // Ensure that diagnostic reporting is enabled for server startup commands. + serverCfg.StartDiagnosticsReporting = true + return nil } diff --git a/pkg/cli/start.go b/pkg/cli/start.go index 4625f465b0fa..91a25a8c3752 100644 --- a/pkg/cli/start.go +++ b/pkg/cli/start.go @@ -330,11 +330,6 @@ type serverStartupInterface interface { // before the first client is accepted. PreStart(ctx context.Context) error - // StartDiagnostics starts periodic diagnostics reporting and update checking. - // NOTE: This is not called in PreStart so that it's disabled by default for - // testing. - StartDiagnostics(ctx context.Context) - // AcceptClients starts listening for incoming SQL clients over the network. AcceptClients(ctx context.Context) error @@ -705,13 +700,6 @@ func createAndStartServerAsync( // all these startup steps to fail. So we do not need to look at // the "shutdown status" in serverStatusMu any more. - // Start up the diagnostics reporting and update check loops. - // We don't do this in (*server.Server).Start() because we don't - // want this overhead and possible interference in tests. - if !cluster.TelemetryOptOut() { - s.StartDiagnostics(ctx) - } - // Run one-off cluster initialization. if err := s.RunInitialSQL(ctx, startSingleNode, "" /* adminUser */, "" /* adminPassword */); err != nil { return err diff --git a/pkg/server/config.go b/pkg/server/config.go index 967a0012f4a8..3bf1a7e0a592 100644 --- a/pkg/server/config.go +++ b/pkg/server/config.go @@ -204,6 +204,12 @@ type BaseConfig struct { // Stores is specified to enable durable key-value storage. Stores base.StoreSpecList + + // StartDiagnosticsReporting starts the asynchronous goroutine that + // checks for CockroachDB upgrades and periodically reports + // diagnostics to Cockroach Labs. + // Should remain disabled during unit testing. + StartDiagnosticsReporting bool } // MakeBaseConfig returns a BaseConfig with default values. diff --git a/pkg/server/server.go b/pkg/server/server.go index 81bf1108379f..999d6beddf95 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -1725,6 +1725,11 @@ func (s *Server) PreStart(ctx context.Context) error { return err } + // If enabled, start reporting diagnostics. + if s.sqlServer.cfg.StartDiagnosticsReporting && !cluster.TelemetryOptOut() { + s.startDiagnostics(workersCtx) + } + // Enable the Obs Server. s.eventsServer.SetResourceInfo(state.clusterID, int32(state.nodeID), build.BinaryVersion()) @@ -1832,10 +1837,8 @@ func (s *Server) LogicalClusterID() uuid.UUID { return s.sqlServer.LogicalClusterID() } -// StartDiagnostics starts periodic diagnostics reporting and update checking. -// NOTE: This is not called in PreStart so that it's disabled by default for -// testing. -func (s *Server) StartDiagnostics(ctx context.Context) { +// startDiagnostics starts periodic diagnostics reporting and update checking. +func (s *Server) startDiagnostics(ctx context.Context) { s.updates.PeriodicallyCheckForUpdates(ctx, s.stopper) s.sqlServer.StartDiagnostics(ctx) } diff --git a/pkg/server/server_controller.go b/pkg/server/server_controller.go index b4f3507bc0d8..6341018460a6 100644 --- a/pkg/server/server_controller.go +++ b/pkg/server/server_controller.go @@ -266,11 +266,6 @@ func (s *Server) startInMemoryTenantServerInternal( return stopper, tenantServer, err } - // Start up the diagnostics reporting loop. - if !cluster.TelemetryOptOut() { - tenantServer.StartDiagnostics(startCtx) - } - // Show the tenant details in logs. // TODO(knz): Remove this once we can use a single listener. if err := reportTenantInfo(startCtx, baseCfg, sqlCfg); err != nil { diff --git a/pkg/server/tenant.go b/pkg/server/tenant.go index 62970dd605a6..6dacbbd1c0df 100644 --- a/pkg/server/tenant.go +++ b/pkg/server/tenant.go @@ -592,6 +592,11 @@ func (s *SQLServerWrapper) PreStart(ctx context.Context) error { return err } + // If enabled, start reporting diagnostics. + if s.sqlServer.cfg.StartDiagnosticsReporting && !cluster.TelemetryOptOut() { + s.startDiagnostics(workersCtx) + } + // Enable the Obs Server. // There is more logic here than in (*Server).PreStart() because // we care about the SQL instance ID too. @@ -692,9 +697,9 @@ func (s *SQLServerWrapper) LogicalClusterID() uuid.UUID { return s.sqlServer.LogicalClusterID() } -// StartDiagnostics begins the diagnostic loop of this tenant server. +// startDiagnostics begins the diagnostic loop of this tenant server. // Used in cli/mt_start_sql.go. -func (s *SQLServerWrapper) StartDiagnostics(ctx context.Context) { +func (s *SQLServerWrapper) startDiagnostics(ctx context.Context) { s.sqlServer.StartDiagnostics(ctx) } diff --git a/pkg/server/testserver.go b/pkg/server/testserver.go index 42c1c4e190af..3426b89a31c2 100644 --- a/pkg/server/testserver.go +++ b/pkg/server/testserver.go @@ -155,6 +155,7 @@ func makeTestConfigFromParams(params base.TestServerArgs) Config { cfg.SocketFile = params.SocketFile cfg.RetryOptions = params.RetryOptions cfg.Locality = params.Locality + cfg.StartDiagnosticsReporting = params.StartDiagnosticsReporting if params.TraceDir != "" { if err := initTraceDir(params.TraceDir); err == nil { cfg.InflightTraceDirName = params.TraceDir @@ -533,16 +534,17 @@ func (ts *TestServer) maybeStartDefaultTestTenant(ctx context.Context) error { params := base.TestTenantArgs{ // Currently, all the servers leverage the same tenant ID. We may // want to change this down the road, for more elaborate testing. - TenantID: serverutils.TestTenantID(), - MemoryPoolSize: ts.params.SQLMemoryPoolSize, - TempStorageConfig: &tempStorageConfig, - Locality: ts.params.Locality, - ExternalIODir: ts.params.ExternalIODir, - ExternalIODirConfig: ts.params.ExternalIODirConfig, - ForceInsecure: ts.Insecure(), - UseDatabase: ts.params.UseDatabase, - SSLCertsDir: ts.params.SSLCertsDir, - TestingKnobs: ts.params.Knobs, + TenantID: serverutils.TestTenantID(), + MemoryPoolSize: ts.params.SQLMemoryPoolSize, + TempStorageConfig: &tempStorageConfig, + Locality: ts.params.Locality, + ExternalIODir: ts.params.ExternalIODir, + ExternalIODirConfig: ts.params.ExternalIODirConfig, + ForceInsecure: ts.Insecure(), + UseDatabase: ts.params.UseDatabase, + SSLCertsDir: ts.params.SSLCertsDir, + TestingKnobs: ts.params.Knobs, + StartDiagnosticsReporting: ts.params.StartDiagnosticsReporting, } // Since we're creating a tenant, it doesn't make sense to pass through the @@ -848,6 +850,7 @@ func (ts *TestServer) StartTenant( baseCfg.HeapProfileDirName = params.HeapProfileDirName baseCfg.GoroutineDumpDirName = params.GoroutineDumpDirName baseCfg.ClusterName = ts.Cfg.ClusterName + baseCfg.StartDiagnosticsReporting = params.StartDiagnosticsReporting // For now, we don't support split RPC/SQL ports for secondary tenants // in test servers.