From 3cdcfe841295be199d209627aab3a008e819d6ed Mon Sep 17 00:00:00 2001 From: Renato Costa Date: Tue, 6 Feb 2024 10:14:05 -0500 Subject: [PATCH 1/4] roachprod: use `system` as default virtual cluster name This changes `DefaultStartOpts` to use `system` as `VirtualClusterName`. This simplifies the starting logic slightly, as it ensures that the field is always non-empty when starting clusters. When using `roachprod start`, the field will correctly be set to `system`; when using `roachprod start-sql`, the field will be the name of the tenant being started. Epic: none Release note: None --- pkg/roachprod/install/cockroach.go | 6 ------ pkg/roachprod/roachprod.go | 1 + 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/pkg/roachprod/install/cockroach.go b/pkg/roachprod/install/cockroach.go index 86d283b83acc..104ac26ad396 100644 --- a/pkg/roachprod/install/cockroach.go +++ b/pkg/roachprod/install/cockroach.go @@ -1038,9 +1038,6 @@ func (c *SyncedCluster) createAdminUserForSecureCluster( // TODO(renato): use the same combination once we're able to select // the virtual cluster we are connecting to in the console. var password = startOpts.VirtualClusterName - if startOpts.VirtualClusterName == "" { - password = DefaultPassword - } stmts := strings.Join([]string{ fmt.Sprintf("CREATE USER IF NOT EXISTS %s WITH LOGIN PASSWORD '%s'", DefaultUser, password), @@ -1055,9 +1052,6 @@ func (c *SyncedCluster) createAdminUserForSecureCluster( if err := retryOpts.Do(ctx, func(ctx context.Context) error { // We use the first node in the virtual cluster to create the user. firstNode := c.TargetNodes()[0] - if startOpts.VirtualClusterName == "" { - startOpts.VirtualClusterName = SystemInterfaceName - } results, err := c.ExecSQL( ctx, l, Nodes{firstNode}, startOpts.VirtualClusterName, startOpts.SQLInstance, []string{ "-e", stmts, diff --git a/pkg/roachprod/roachprod.go b/pkg/roachprod/roachprod.go index 8a8e799db96e..6f4c960de0e8 100644 --- a/pkg/roachprod/roachprod.go +++ b/pkg/roachprod/roachprod.go @@ -683,6 +683,7 @@ func DefaultStartOpts() install.StartOpts { ScheduleBackupArgs: "", InitTarget: 1, SQLPort: 0, + VirtualClusterName: install.SystemInterfaceName, // TODO(DarrylWong): revert back to 0 once #117125 is addressed. AdminUIPort: config.DefaultAdminUIPort, } From 5628bf6d86332f15c9f5fa124ac4942010431f59 Mon Sep 17 00:00:00 2001 From: Renato Costa Date: Tue, 6 Feb 2024 15:41:45 -0500 Subject: [PATCH 2/4] roachprod: update `start-sql` to accept `schedule-backups` We will soon create default backup schedules for tenants as they are started as well. This flag will allow backup management to be controlled the same way as they currently are for the system tenant. Epic: none Release note: None --- pkg/cmd/roachprod/flags.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/cmd/roachprod/flags.go b/pkg/cmd/roachprod/flags.go index 257e37712f2d..dedc55e82202 100644 --- a/pkg/cmd/roachprod/flags.go +++ b/pkg/cmd/roachprod/flags.go @@ -202,13 +202,6 @@ func initFlags() { "init-target", startOpts.InitTarget, "node on which to run initialization") startCmd.Flags().IntVar(&startOpts.StoreCount, "store-count", startOpts.StoreCount, "number of stores to start each node with") - startCmd.Flags().BoolVar(&startOpts.ScheduleBackups, - "schedule-backups", startOpts.ScheduleBackups, - "create a cluster backup schedule once the cluster has started (by default, "+ - "full backup hourly and incremental every 15 minutes)") - startCmd.Flags().StringVar(&startOpts.ScheduleBackupArgs, "schedule-backup-args", "", - `Recurrence and scheduled backup options specification. -Default is "RECURRING '*/15 * * * *' FULL BACKUP '@hourly' WITH SCHEDULE OPTIONS first_run = 'now'"`) startInstanceCmd.Flags().StringVarP(&storageCluster, "storage-cluster", "S", "", "storage cluster") _ = startInstanceCmd.MarkFlagRequired("storage-cluster") @@ -337,6 +330,13 @@ Default is "RECURRING '*/15 * * * *' FULL BACKUP '@hourly' WITH SCHEDULE OPTIONS } for _, cmd := range []*cobra.Command{startCmd, startInstanceCmd} { + cmd.Flags().BoolVar(&startOpts.ScheduleBackups, + "schedule-backups", startOpts.ScheduleBackups, + "create a cluster backup schedule once the cluster has started (by default, "+ + "full backup hourly and incremental every 15 minutes)") + cmd.Flags().StringVar(&startOpts.ScheduleBackupArgs, + "schedule-backup-args", startOpts.ScheduleBackupArgs, + "Recurrence and scheduled backup options specification") cmd.Flags().Int64Var(&startOpts.NumFilesLimit, "num-files-limit", startOpts.NumFilesLimit, "limit the number of files that can be created by the cockroach process") cmd.Flags().IntVar(&startOpts.SQLPort, From dde28e409269f128f92eca5a9ccce977dc12bf8f Mon Sep 17 00:00:00 2001 From: Renato Costa Date: Tue, 6 Feb 2024 09:27:03 -0500 Subject: [PATCH 3/4] roachprod: create admin user and backup schedule for virtual clusters This commit updates `roachprod start{-sql}` and unifies the set of steps performed on the cluster regardless of whether we are starting the system tenant for the first time or creating a new tenant. Specifically, tenants will also have an admin user configured and a default backup schedule is created when `--schedule-backups` is passed. Epic: none Release note: None --- pkg/roachprod/install/cluster_synced.go | 4 +- pkg/roachprod/install/cockroach.go | 153 ++++++++++++------------ pkg/roachprod/roachprod.go | 6 +- 3 files changed, 84 insertions(+), 79 deletions(-) diff --git a/pkg/roachprod/install/cluster_synced.go b/pkg/roachprod/install/cluster_synced.go index 7c818151cee2..7fd3ea47f5eb 100644 --- a/pkg/roachprod/install/cluster_synced.go +++ b/pkg/roachprod/install/cluster_synced.go @@ -2921,8 +2921,8 @@ func (c *SyncedCluster) ParallelE( // to maintain parity with auto-init behavior of `roachprod start` (when // --skip-init) is not specified. func (c *SyncedCluster) Init(ctx context.Context, l *logger.Logger, node Node) error { - if res, err := c.initializeCluster(ctx, l, node); err != nil || (res != nil && res.Err != nil) { - return errors.WithDetail(errors.CombineErrors(err, res.Err), "install.Init() failed: unable to initialize cluster.") + if err := c.initializeCluster(ctx, l, node); err != nil { + return errors.WithDetail(err, "install.Init() failed: unable to initialize cluster.") } if err := c.setClusterSettings(ctx, l, node, ""); err != nil { diff --git a/pkg/roachprod/install/cockroach.go b/pkg/roachprod/install/cockroach.go index 104ac26ad396..09fb4ad747c8 100644 --- a/pkg/roachprod/install/cockroach.go +++ b/pkg/roachprod/install/cockroach.go @@ -383,48 +383,60 @@ func (c *SyncedCluster) Start(ctx context.Context, l *logger.Logger, startOpts S } } - l.Printf("%s: starting nodes", c.Name) - // For single node non-virtual clusters, `init` can be skipped - // because during the c.StartNode call above, the - // `--start-single-node` flag will handle all of this for us. - shouldInit := startOpts.Target == StartDefault && !c.useStartSingleNode() && !startOpts.SkipInit - for _, node := range c.Nodes { - // NB: if cockroach started successfully, we ignore the output as it is - // some harmless start messaging. - res, err := c.startNode(ctx, l, node, startOpts) - if err != nil || res.Err != nil { - // If err is non-nil, then this will not be retried, but if res.Err is non-nil, it will be. - return errors.CombineErrors(err, res.Err) - } - // We reserve a few special operations (bootstrapping, and setting - // cluster settings) to the InitTarget. - if startOpts.Target == StartDefault { - if startOpts.GetInitTarget() != node || startOpts.SkipInit { + // Start cockroach processes and `init` cluster, if necessary. + if startOpts.Target != StartSharedProcessForVirtualCluster { + l.Printf("%s (%s): starting cockroach processes", c.Name, startOpts.VirtualClusterName) + // For single node non-virtual clusters, `init` can be skipped + // because during the c.StartNode call above, the + // `--start-single-node` flag will handle all of this for us. + shouldInit := startOpts.Target == StartDefault && !c.useStartSingleNode() && !startOpts.SkipInit + + for _, node := range c.Nodes { + // NB: if cockroach started successfully, we ignore the output as it is + // some harmless start messaging. + if err := c.startNode(ctx, l, node, startOpts); err != nil { + return err + } + // We reserve a few special operations (bootstrapping, and setting + // cluster settings) to the InitTarget. + if startOpts.GetInitTarget() != node { continue } - } - if shouldInit { - if res, err = c.initializeCluster(ctx, l, node); err != nil || res.Err != nil { - // If err is non-nil, then this will not be retried, but if res.Err is non-nil, it will be. - return errors.CombineErrors(err, res.Err) + + if shouldInit { + if err := c.initializeCluster(ctx, l, node); err != nil { + return err + } } } } + // If we did not skip calling `init` on the cluster, we also set up + // default cluster settings, an admin user, and a backup schedule on + // the new cluster, making it the cluster a little more realistic + // and convenient to manage. if !startOpts.SkipInit { storageCluster := c if startOpts.KVCluster != nil { storageCluster = startOpts.KVCluster } - if startOpts.Target == StartDefault { - if err = storageCluster.setClusterSettings(ctx, l, startOpts.GetInitTarget(), startOpts.VirtualClusterName); err != nil { - return err - } - storageCluster.createAdminUserForSecureCluster(ctx, l, startOpts) + // We use the `storageCluster` even if starting a virtual cluster + // because `setClusterSettings` uses SQL statements that are meant + // to run on the system tenant. + if err := storageCluster.setClusterSettings( + ctx, l, startOpts.GetInitTarget(), startOpts.VirtualClusterName, + ); err != nil { + return err + } + + c.createAdminUserForSecureCluster(ctx, l, startOpts.VirtualClusterName, startOpts.SQLInstance) - if startOpts.ScheduleBackups && shouldInit && config.CockroachDevLicense != "" { - if err := storageCluster.createFixedBackupSchedule(ctx, l, startOpts.ScheduleBackupArgs); err != nil { + if startOpts.ScheduleBackups { + if config.CockroachDevLicense == "" { + l.Printf("WARNING: no backup schedules will be created as there is no enterprise license configured") + } else { + if err := c.createFixedBackupSchedule(ctx, l, startOpts); err != nil { return err } } @@ -505,9 +517,8 @@ const ( // authentication. AuthUserCert - DefaultUser = "roach" - - DefaultPassword = SystemInterfaceName + DefaultUser = "roachprod" + DefaultPassword = "cockroachdb" ) // NodeURL constructs a postgres URL. If sharedTenantName is not empty, it will @@ -636,14 +647,10 @@ func (c *SyncedCluster) ExecSQL( func (c *SyncedCluster) startNode( ctx context.Context, l *logger.Logger, node Node, startOpts StartOpts, -) (*RunResultDetails, error) { - if startOpts.Target == StartSharedProcessForVirtualCluster { - return &RunResultDetails{}, nil - } - +) error { startCmd, err := c.generateStartCmd(ctx, l, node, startOpts) if err != nil { - return nil, err + return err } var uploadCmd string if c.IsLocal() { @@ -656,7 +663,7 @@ func (c *SyncedCluster) startNode( uploadOpts.stdin = strings.NewReader(startCmd) res, err = c.runCmdOnSingleNode(ctx, l, node, uploadCmd, uploadOpts) if err != nil || res.Err != nil { - return res, err + return errors.CombineErrors(err, res.Err) } var runScriptCmd string @@ -664,7 +671,8 @@ func (c *SyncedCluster) startNode( runScriptCmd = fmt.Sprintf(`cd %s ; `, c.localVMDir(node)) } runScriptCmd += "./cockroach.sh" - return c.runCmdOnSingleNode(ctx, l, node, runScriptCmd, defaultCmdOpts("run-start-script")) + res, err = c.runCmdOnSingleNode(ctx, l, node, runScriptCmd, defaultCmdOpts("run-start-script")) + return errors.CombineErrors(err, res.Err) } func (c *SyncedCluster) generateStartCmd( @@ -998,23 +1006,20 @@ func (c *SyncedCluster) maybeScaleMem(val int) int { return val } -func (c *SyncedCluster) initializeCluster( - ctx context.Context, l *logger.Logger, node Node, -) (*RunResultDetails, error) { +func (c *SyncedCluster) initializeCluster(ctx context.Context, l *logger.Logger, node Node) error { l.Printf("%s: initializing cluster\n", c.Name) cmd, err := c.generateInitCmd(ctx, node) if err != nil { - return nil, err + return err } res, err := c.runCmdOnSingleNode(ctx, l, node, cmd, defaultCmdOpts("init-cluster")) if res != nil { - out := strings.TrimSpace(res.CombinedOut) - if out != "" { + if out := strings.TrimSpace(res.CombinedOut); out != "" { l.Printf(out) } } - return res, err + return errors.CombineErrors(err, res.Err) } // createAdminUserForSecureCluster creates a `roach` user with admin @@ -1024,24 +1029,21 @@ func (c *SyncedCluster) initializeCluster( // variety of contexts within roachtests, and a failure to create a // user might be "expected" depending on what the test is doing. func (c *SyncedCluster) createAdminUserForSecureCluster( - ctx context.Context, l *logger.Logger, startOpts StartOpts, + ctx context.Context, l *logger.Logger, virtualClusterName string, sqlInstance int, ) { if !c.Secure { return } - // N.B.: although using the same username/password combination would - // be easier to remember, if we do it for the system interface and - // virtual clusters we would be unable to log-in to the virtual - // cluster console due to #109691. - // - // TODO(renato): use the same combination once we're able to select - // the virtual cluster we are connecting to in the console. - var password = startOpts.VirtualClusterName + // Use the same combination for username and password to allow + // people using the DB console to easily switch between tenants in + // the UI, if managing UA clusters. + const username = DefaultUser + const password = DefaultPassword stmts := strings.Join([]string{ - fmt.Sprintf("CREATE USER IF NOT EXISTS %s WITH LOGIN PASSWORD '%s'", DefaultUser, password), - fmt.Sprintf("GRANT ADMIN TO %s WITH ADMIN OPTION", DefaultUser), + fmt.Sprintf("CREATE USER IF NOT EXISTS %s WITH LOGIN PASSWORD '%s'", username, password), + fmt.Sprintf("GRANT ADMIN TO %s WITH ADMIN OPTION", username), }, "; ") // We retry a few times here because cockroach process might not be @@ -1053,7 +1055,7 @@ func (c *SyncedCluster) createAdminUserForSecureCluster( // We use the first node in the virtual cluster to create the user. firstNode := c.TargetNodes()[0] results, err := c.ExecSQL( - ctx, l, Nodes{firstNode}, startOpts.VirtualClusterName, startOpts.SQLInstance, []string{ + ctx, l, Nodes{firstNode}, virtualClusterName, sqlInstance, []string{ "-e", stmts, }) @@ -1069,8 +1071,8 @@ func (c *SyncedCluster) createAdminUserForSecureCluster( } var virtualClusterInfo string - if startOpts.VirtualClusterName != "" && startOpts.VirtualClusterName != SystemInterfaceName { - virtualClusterInfo = fmt.Sprintf(" for virtual cluster %s", startOpts.VirtualClusterName) + if virtualClusterName != SystemInterfaceName { + virtualClusterInfo = fmt.Sprintf(" for virtual cluster %s", virtualClusterName) } l.Printf("log into DB console%s with user=%s password=%s", virtualClusterInfo, DefaultUser, password) @@ -1079,7 +1081,7 @@ func (c *SyncedCluster) createAdminUserForSecureCluster( func (c *SyncedCluster) setClusterSettings( ctx context.Context, l *logger.Logger, node Node, virtualCluster string, ) error { - l.Printf("%s: setting cluster settings", c.Name) + l.Printf("%s (%s): setting cluster settings", c.Name, virtualCluster) cmd, err := c.generateClusterSettingCmd(ctx, l, node, virtualCluster) if err != nil { return err @@ -1104,7 +1106,7 @@ func (c *SyncedCluster) generateClusterSettingCmd( ctx context.Context, l *logger.Logger, node Node, virtualCluster string, ) (string, error) { if config.CockroachDevLicense == "" { - l.Printf("%s: COCKROACH_DEV_LICENSE unset: enterprise features will be unavailable\n", + l.Printf("%s: COCKROACH_DEV_LICENSE unset: enterprise features will be unavailable", c.Name) } @@ -1335,7 +1337,7 @@ func (c *SyncedCluster) shouldAdvertisePublicIP() bool { // nodelocal, and otherwise in 'gs://cockroachdb-backup-testing'. // This cmd also ensures that only one schedule will be created for the cluster. func (c *SyncedCluster) createFixedBackupSchedule( - ctx context.Context, l *logger.Logger, scheduledBackupArgs string, + ctx context.Context, l *logger.Logger, startOpts StartOpts, ) error { externalStoragePath := fmt.Sprintf("gs://%s", testutils.BackupTestingBucket()) for _, cloud := range c.Clouds() { @@ -1344,27 +1346,26 @@ func (c *SyncedCluster) createFixedBackupSchedule( return nil } } - l.Printf("%s: creating backup schedule", c.Name) + l.Printf("%s (%s): creating backup schedule", c.Name, startOpts.VirtualClusterName) auth := "AUTH=implicit" - collectionPath := fmt.Sprintf(`%s/roachprod-scheduled-backups/%s/%v?%s`, - externalStoragePath, c.Name, timeutil.Now().UnixNano(), auth) + collectionPath := fmt.Sprintf(`%s/roachprod-scheduled-backups/%s/%s/%v?%s`, + externalStoragePath, c.Name, startOpts.VirtualClusterName, timeutil.Now().UnixNano(), auth) - // Default scheduled backup runs a full backup every hour and an incremental - // every 15 minutes. - scheduleArgs := `RECURRING '*/15 * * * *' FULL BACKUP '@hourly' WITH SCHEDULE OPTIONS first_run = 'now'` - if scheduledBackupArgs != "" { - scheduleArgs = scheduledBackupArgs - } createScheduleCmd := fmt.Sprintf(`CREATE SCHEDULE IF NOT EXISTS test_only_backup FOR BACKUP INTO '%s' %s`, - collectionPath, scheduleArgs) + collectionPath, startOpts.ScheduleBackupArgs) node := c.Nodes[0] binary := cockroachNodeBinary(c, node) - port, err := c.NodePort(ctx, node, "" /* virtualClusterName */, 0 /* sqlInstance */) + port, err := c.NodePort(ctx, node, startOpts.VirtualClusterName, startOpts.SQLInstance) if err != nil { return err } - url := c.NodeURL("localhost", port, SystemInterfaceName /* virtualClusterName */, ServiceModeShared, AuthRootCert) + serviceMode := ServiceModeShared + if startOpts.Target == StartServiceForVirtualCluster { + serviceMode = ServiceModeExternal + } + + url := c.NodeURL("localhost", port, startOpts.VirtualClusterName, serviceMode, AuthRootCert) fullCmd := fmt.Sprintf(`COCKROACH_CONNECT_TIMEOUT=%d %s sql --url %s -e %q`, startSQLTimeout, binary, url, createScheduleCmd) // Instead of using `c.ExecSQL()`, use `c.runCmdOnSingleNode()`, which allows us to diff --git a/pkg/roachprod/roachprod.go b/pkg/roachprod/roachprod.go index 6f4c960de0e8..29514bf61d3a 100644 --- a/pkg/roachprod/roachprod.go +++ b/pkg/roachprod/roachprod.go @@ -671,6 +671,10 @@ func Extend(l *logger.Logger, clusterName string, lifetime time.Duration) error return nil } +// Default scheduled backup runs a full backup every hour and an incremental +// every 15 minutes. +const DefaultBackupSchedule = `RECURRING '*/15 * * * *' FULL BACKUP '@hourly' WITH SCHEDULE OPTIONS first_run = 'now'` + // DefaultStartOpts returns a StartOpts populated with default values. func DefaultStartOpts() install.StartOpts { return install.StartOpts{ @@ -680,7 +684,7 @@ func DefaultStartOpts() install.StartOpts { StoreCount: 1, VirtualClusterID: 2, ScheduleBackups: false, - ScheduleBackupArgs: "", + ScheduleBackupArgs: DefaultBackupSchedule, InitTarget: 1, SQLPort: 0, VirtualClusterName: install.SystemInterfaceName, From 3d1995ae12cdc4ceba4913cd8f0ad9dceb316cee Mon Sep 17 00:00:00 2001 From: Renato Costa Date: Wed, 7 Feb 2024 11:11:34 -0500 Subject: [PATCH 4/4] roachtest: standardize start options for UA and non UA More concretly, build `StartOpts` based on roachtest's implementation of `DefaultStartOpts`, which includes creating a default backup schedule on start. Epic: none Release note: None --- pkg/cmd/roachtest/option/options.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/cmd/roachtest/option/options.go b/pkg/cmd/roachtest/option/options.go index f99ee3db843f..6a736600f278 100644 --- a/pkg/cmd/roachtest/option/options.go +++ b/pkg/cmd/roachtest/option/options.go @@ -59,7 +59,7 @@ func DefaultStartSingleNodeOpts() StartOpts { // DefaultStartVirtualClusterOpts returns StartOpts for starting an external // process virtual cluster with the given tenant name and SQL instance. func DefaultStartVirtualClusterOpts(tenantName string, sqlInstance int) StartOpts { - startOpts := StartOpts{RoachprodOpts: roachprod.DefaultStartOpts()} + startOpts := DefaultStartOpts() startOpts.RoachprodOpts.Target = install.StartServiceForVirtualCluster startOpts.RoachprodOpts.VirtualClusterName = tenantName startOpts.RoachprodOpts.SQLInstance = sqlInstance @@ -71,7 +71,7 @@ func DefaultStartVirtualClusterOpts(tenantName string, sqlInstance int) StartOpt // DefaultStartSharedVirtualClusterOpts returns StartOpts for starting a shared // process virtual cluster with the given tenant name. func DefaultStartSharedVirtualClusterOpts(tenantName string) StartOpts { - startOpts := StartOpts{RoachprodOpts: roachprod.DefaultStartOpts()} + startOpts := DefaultStartOpts() startOpts.RoachprodOpts.Target = install.StartSharedProcessForVirtualCluster startOpts.RoachprodOpts.VirtualClusterName = tenantName return startOpts