From 3caed9e4b37a06333b0fcc8ad216e8f01cc9d48b Mon Sep 17 00:00:00 2001 From: elliebababa Date: Wed, 7 Apr 2021 13:01:36 -0700 Subject: [PATCH 1/4] cliccl/debug_backup.go: refactor backup inspection tool Previously, cli backup inspection tool was incorporated in cliccl/load.go. Both the command and syntax are confusing in terms of backup inspection. This patch refactors the naming and syntax of the tool and hand the tool under `cockroach debug`, so that it makes more sense when using it to debug backups. Release note (cli change): Backup inspection used to be done via `cockroach load show ..`, which was confusing to users with ambiguous verbs in the command chain. We refactor the syntax of tool and make the syntax more clear and indicative for users to debug backups. The changes are: `load show summary ` -> `debug backup show ` `load show incremental ` -> `debug backup list-incremental ` `load show backups ` -> `debug backup list-backups ` `load show data -> `debug backup export --table=` --- pkg/ccl/backupccl/targets.go | 9 +- pkg/ccl/cliccl/BUILD.bazel | 4 +- pkg/ccl/cliccl/{load.go => debug_backup.go} | 132 +++++++++--------- .../{load_test.go => debug_backup_test.go} | 87 ++++++------ pkg/cli/auth.go | 2 +- pkg/cli/cert.go | 2 +- pkg/cli/cli.go | 5 - pkg/cli/cli_test.go | 2 +- pkg/cli/cliflags/flags.go | 9 +- pkg/cli/format_table.go | 25 ++-- pkg/cli/gen.go | 2 +- pkg/cli/node.go | 10 +- pkg/cli/sql.go | 4 +- 13 files changed, 153 insertions(+), 140 deletions(-) rename pkg/ccl/cliccl/{load.go => debug_backup.go} (86%) rename pkg/ccl/cliccl/{load_test.go => debug_backup_test.go} (88%) diff --git a/pkg/ccl/backupccl/targets.go b/pkg/ccl/backupccl/targets.go index 0b395d77ff85..7982096de809 100644 --- a/pkg/ccl/backupccl/targets.go +++ b/pkg/ccl/backupccl/targets.go @@ -474,9 +474,12 @@ func MakeBackupTableEntry( for i, b := range backupManifests { if b.StartTime.Less(endTime) && endTime.LessEq(b.EndTime) { if endTime != b.EndTime && b.MVCCFilter != MVCCFilter_All { - return BackupTableEntry{}, errors.Newf( - "reading data for requested time requires that BACKUP was created with %q"+ - " or should specify the time to be an exact backup time, nearest backup time is %s", backupOptRevisionHistory, timeutil.Unix(0, b.EndTime.WallTime).UTC()) + errorHints := "reading data for requested time requires that BACKUP was created with %q" + + " or should specify the time to be an exact backup time, nearest backup time is %s" + return BackupTableEntry{}, errors.WithHintf( + errors.Newf("unknown read time: %s", timeutil.Unix(0, endTime.WallTime).UTC()), + errorHints, backupOptRevisionHistory, timeutil.Unix(0, b.EndTime.WallTime).UTC(), + ) } ind = i break diff --git a/pkg/ccl/cliccl/BUILD.bazel b/pkg/ccl/cliccl/BUILD.bazel index 67842ad216a6..43af066cdf13 100644 --- a/pkg/ccl/cliccl/BUILD.bazel +++ b/pkg/ccl/cliccl/BUILD.bazel @@ -5,8 +5,8 @@ go_library( srcs = [ "cliccl.go", "debug.go", + "debug_backup.go", "demo.go", - "load.go", "mtproxy.go", "start.go", ], @@ -65,7 +65,7 @@ go_test( name = "cliccl_test", size = "small", srcs = [ - "load_test.go", + "debug_backup_test.go", "main_test.go", ], embed = [":cliccl"], diff --git a/pkg/ccl/cliccl/load.go b/pkg/ccl/cliccl/debug_backup.go similarity index 86% rename from pkg/ccl/cliccl/load.go rename to pkg/ccl/cliccl/debug_backup.go index 480e35981e13..ebf504611841 100644 --- a/pkg/ccl/cliccl/load.go +++ b/pkg/ccl/cliccl/debug_backup.go @@ -18,7 +18,6 @@ import ( "os" "path/filepath" "strings" - "text/tabwriter" "time" "github.com/cockroachdb/apd/v2" @@ -54,6 +53,7 @@ import ( ) var externalIODir string +var exportTableName string var readTime string var destination string var format string @@ -61,41 +61,41 @@ var nullas string func init() { - loadShowSummaryCmd := &cobra.Command{ - Use: "summary ", - Short: "show backups summary", + showCmd := &cobra.Command{ + Use: "show ", + Short: "show backup summary", Long: "Shows summary of meta information about a SQL backup.", Args: cobra.ExactArgs(1), - RunE: cli.MaybeDecorateGRPCError(runLoadShowSummary), + RunE: cli.MaybeDecorateGRPCError(runShowCmd), } - loadShowBackupsCmd := &cobra.Command{ - Use: "backups ", - Short: "show backups in collections", - Long: "Shows full backups in a backup collections.", + listBackupsCmd := &cobra.Command{ + Use: "list-backups ", + Short: "show backups in collection", + Long: "Shows full backup paths in a backup collection.", Args: cobra.ExactArgs(1), - RunE: cli.MaybeDecorateGRPCError(runLoadShowBackups), + RunE: cli.MaybeDecorateGRPCError(runListBackupsCmd), } - loadShowIncrementalCmd := &cobra.Command{ - Use: "incremental ", + listIncrementalCmd := &cobra.Command{ + Use: "list-incremental ", Short: "show incremental backups", Long: "Shows incremental chain of a SQL backup.", Args: cobra.ExactArgs(1), - RunE: cli.MaybeDecorateGRPCError(runLoadShowIncremental), + RunE: cli.MaybeDecorateGRPCError(runListIncrementalCmd), } - loadShowDataCmd := &cobra.Command{ - Use: "data ", - Short: "show data", - Long: "Shows data of a SQL backup.", - Args: cobra.MinimumNArgs(2), - RunE: cli.MaybeDecorateGRPCError(runLoadShowData), + exportDataCmd := &cobra.Command{ + Use: "export ", + Short: "export table data from a backup", + Long: "export table data from a backup, requires specifying --table to export data from", + Args: cobra.MinimumNArgs(1), + RunE: cli.MaybeDecorateGRPCError(runExportDataCmd), } - loadShowCmds := &cobra.Command{ - Use: "show [command]", - Short: "show backups", + backupCmds := &cobra.Command{ + Use: "backup [command]", + Short: "debug backups", Long: "Shows information about a SQL backup.", Args: cobra.MinimumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { @@ -103,64 +103,61 @@ func init() { }, } - loadCmds := &cobra.Command{ - Use: "load [command]", - Short: "load backup commands", - Long: `Commands for bulk loading external files.`, - RunE: func(cmd *cobra.Command, args []string) error { - return cmd.Usage() - }, - } - - loadFlags := loadCmds.Flags() - loadFlags.StringVarP( + backupFlags := backupCmds.Flags() + backupFlags.StringVarP( &externalIODir, cliflags.ExternalIODir.Name, cliflags.ExternalIODir.Shorthand, "", /*value*/ cliflags.ExternalIODir.Usage()) - loadShowDataCmd.Flags().StringVarP( + exportDataCmd.Flags().StringVarP( + &exportTableName, + cliflags.ExportTableTarget.Name, + cliflags.ExportTableTarget.Shorthand, + "", /*value*/ + cliflags.ExportTableTarget.Usage()) + + exportDataCmd.Flags().StringVarP( &readTime, cliflags.ReadTime.Name, cliflags.ReadTime.Shorthand, "", /*value*/ cliflags.ReadTime.Usage()) - loadShowDataCmd.Flags().StringVarP( + exportDataCmd.Flags().StringVarP( &destination, cliflags.ExportDestination.Name, cliflags.ExportDestination.Shorthand, "", /*value*/ cliflags.ExportDestination.Usage()) - loadShowDataCmd.Flags().StringVarP( + exportDataCmd.Flags().StringVarP( &format, cliflags.ExportTableFormat.Name, cliflags.ExportTableFormat.Shorthand, "csv", /*value*/ cliflags.ExportTableFormat.Usage()) - loadShowDataCmd.Flags().StringVarP( + exportDataCmd.Flags().StringVarP( &nullas, cliflags.ExportCSVNullas.Name, cliflags.ExportCSVNullas.Shorthand, "null", /*value*/ cliflags.ExportCSVNullas.Usage()) - cli.AddCmd(loadCmds) - loadCmds.AddCommand(loadShowCmds) + cli.DebugCmd.AddCommand(backupCmds) - loadShowSubCmds := []*cobra.Command{ - loadShowSummaryCmd, - loadShowBackupsCmd, - loadShowIncrementalCmd, - loadShowDataCmd, + backupSubCmds := []*cobra.Command{ + showCmd, + listBackupsCmd, + listIncrementalCmd, + exportDataCmd, } - for _, cmd := range loadShowSubCmds { - loadShowCmds.AddCommand(cmd) - cmd.Flags().AddFlagSet(loadFlags) + for _, cmd := range backupSubCmds { + backupCmds.AddCommand(cmd) + cmd.Flags().AddFlagSet(backupFlags) } } @@ -198,7 +195,7 @@ func getManifestFromURI(ctx context.Context, path string) (backupccl.BackupManif return backupManifest, nil } -func runLoadShowSummary(cmd *cobra.Command, args []string) error { +func runShowCmd(cmd *cobra.Command, args []string) error { path := args[0] ctx := context.Background() @@ -217,7 +214,7 @@ func runLoadShowSummary(cmd *cobra.Command, args []string) error { return nil } -func runLoadShowBackups(cmd *cobra.Command, args []string) error { +func runListBackupsCmd(cmd *cobra.Command, args []string) error { path := args[0] if !strings.Contains(path, "://") { @@ -235,18 +232,17 @@ func runLoadShowBackups(cmd *cobra.Command, args []string) error { return errors.Wrapf(err, "list full backups in collection") } - if len(backupPaths) == 0 { - fmt.Println("no backups found.") - } - + cols := []string{"path"} + rows := make([][]string, 0) for _, backupPath := range backupPaths { - fmt.Println("./" + backupPath) + newRow := []string{"./" + backupPath} + rows = append(rows, newRow) } - - return nil + rowSliceIter := cli.NewRowSliceIter(rows, "l" /*align*/) + return cli.PrintQueryOutput(os.Stdout, cols, rowSliceIter) } -func runLoadShowIncremental(cmd *cobra.Command, args []string) error { +func runListIncrementalCmd(cmd *cobra.Command, args []string) error { path := args[0] if !strings.Contains(path, "://") { @@ -270,12 +266,12 @@ func runLoadShowIncremental(cmd *cobra.Command, args []string) error { return err } - w := tabwriter.NewWriter(os.Stdout, 28 /*minwidth*/, 1 /*tabwidth*/, 2 /*padding*/, ' ' /*padchar*/, 0 /*flags*/) basepath := uri.Path manifestPaths := append([]string{""}, incPaths...) stores := make([]cloud.ExternalStorage, len(manifestPaths)) stores[0] = store + rows := make([][]string, 0) for i := range manifestPaths { if i > 0 { @@ -296,19 +292,21 @@ func runLoadShowIncremental(cmd *cobra.Command, args []string) error { if i == 0 { startTime = "-" } - fmt.Fprintf(w, "%s %s %s\n", uri.Path, startTime, endTime) - } - - if err := w.Flush(); err != nil { - return err + newRow := []string{uri.Path, startTime, endTime} + rows = append(rows, newRow) } - return nil + cols := []string{"path", "start time", "end time"} + rowSliceIter := cli.NewRowSliceIter(rows, "lll" /*align*/) + return cli.PrintQueryOutput(os.Stdout, cols, rowSliceIter) } -func runLoadShowData(cmd *cobra.Command, args []string) error { +func runExportDataCmd(cmd *cobra.Command, args []string) error { - fullyQualifiedTableName := strings.ToLower(args[0]) - manifestPaths := args[1:] + if exportTableName == "" { + return errors.New("export data requires table name specified by --table flag") + } + fullyQualifiedTableName := strings.ToLower(exportTableName) + manifestPaths := args ctx := context.Background() manifests := make([]backupccl.BackupManifest, 0, len(manifestPaths)) diff --git a/pkg/ccl/cliccl/load_test.go b/pkg/ccl/cliccl/debug_backup_test.go similarity index 88% rename from pkg/ccl/cliccl/load_test.go rename to pkg/ccl/cliccl/debug_backup_test.go index d49fef6caca6..6bdf13fdefaf 100644 --- a/pkg/ccl/cliccl/load_test.go +++ b/pkg/ccl/cliccl/debug_backup_test.go @@ -14,7 +14,6 @@ import ( "fmt" "strings" "testing" - "text/tabwriter" "time" "github.com/cockroachdb/cockroach/pkg/base" @@ -31,7 +30,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestLoadShowSummary(t *testing.T) { +func TestShowSummary(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -63,7 +62,7 @@ func TestLoadShowSummary(t *testing.T) { sqlDB.Exec(t, `BACKUP DATABASE testDB TO $1 AS OF SYSTEM TIME `+ts2.AsOfSystemTime(), backupPath) t.Run("show-summary-without-types-or-tables", func(t *testing.T) { - out, err := c.RunWithCapture(fmt.Sprintf("load show summary %s --external-io-dir=%s", dbOnlyBackupPath, dir)) + out, err := c.RunWithCapture(fmt.Sprintf("debug backup show %s --external-io-dir=%s", dbOnlyBackupPath, dir)) require.NoError(t, err) expectedOutput := fmt.Sprintf( `{ @@ -92,7 +91,7 @@ func TestLoadShowSummary(t *testing.T) { }) t.Run("show-summary-with-full-information", func(t *testing.T) { - out, err := c.RunWithCapture(fmt.Sprintf("load show summary %s --external-io-dir=%s", backupPath, dir)) + out, err := c.RunWithCapture(fmt.Sprintf("debug backup show %s --external-io-dir=%s", backupPath, dir)) require.NoError(t, err) var sstFile string @@ -149,7 +148,7 @@ func TestLoadShowSummary(t *testing.T) { }) } -func TestLoadShowBackups(t *testing.T) { +func TestListBackups(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -168,30 +167,30 @@ func TestLoadShowBackups(t *testing.T) { const backupPath = "nodelocal://0/fooFolder" ts := generateBackupTimestamps(3) - t.Run("show-backups-without-backups-in-collection", func(t *testing.T) { - out, err := c.RunWithCapture(fmt.Sprintf("load show backups %s --external-io-dir=%s", backupPath, dir)) - require.NoError(t, err) - expectedOutput := "no backups found.\n" - checkExpectedOutput(t, expectedOutput, out) - }) - sqlDB.Exec(t, fmt.Sprintf(`BACKUP DATABASE testDB INTO $1 AS OF SYSTEM TIME '%s'`, ts[0].AsOfSystemTime()), backupPath) sqlDB.Exec(t, fmt.Sprintf(`BACKUP DATABASE testDB INTO $1 AS OF SYSTEM TIME '%s'`, ts[1].AsOfSystemTime()), backupPath) sqlDB.Exec(t, fmt.Sprintf(`BACKUP DATABASE testDB INTO $1 AS OF SYSTEM TIME '%s'`, ts[2].AsOfSystemTime()), backupPath) t.Run("show-backups-with-backups-in-collection", func(t *testing.T) { - out, err := c.RunWithCapture(fmt.Sprintf("load show backups %s --external-io-dir=%s", backupPath, dir)) + out, err := c.RunWithCapture(fmt.Sprintf("debug backup list-backups %s --external-io-dir=%s", backupPath, dir)) require.NoError(t, err) - expectedOutput := fmt.Sprintf(".%s\n.%s\n.%s\n", - ts[0].GoTime().Format(backupccl.DateBasedIntoFolderName), - ts[1].GoTime().Format(backupccl.DateBasedIntoFolderName), - ts[2].GoTime().Format(backupccl.DateBasedIntoFolderName)) - checkExpectedOutput(t, expectedOutput, out) + var buf bytes.Buffer + rows := [][]string{ + {"." + ts[0].GoTime().Format(backupccl.DateBasedIntoFolderName)}, + {"." + ts[1].GoTime().Format(backupccl.DateBasedIntoFolderName)}, + {"." + ts[2].GoTime().Format(backupccl.DateBasedIntoFolderName)}, + } + cols := []string{"path"} + rowSliceIter := cli.NewRowSliceIter(rows, "l" /*align*/) + if err := cli.PrintQueryOutput(&buf, cols, rowSliceIter); err != nil { + t.Fatalf("TestListBackups: PrintQueryOutput: %v", err) + } + checkExpectedOutput(t, buf.String(), out) }) } -func TestLoadShowIncremental(t *testing.T) { +func TestListIncremental(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -213,23 +212,26 @@ func TestLoadShowIncremental(t *testing.T) { sqlDB.Exec(t, fmt.Sprintf(`BACKUP DATABASE testDB TO $1 AS OF SYSTEM TIME '%s'`, ts[1].AsOfSystemTime()), backupPath) sqlDB.Exec(t, fmt.Sprintf(`BACKUP DATABASE testDB TO $1 AS OF SYSTEM TIME '%s'`, ts[2].AsOfSystemTime()), backupPath) - out, err := c.RunWithCapture(fmt.Sprintf("load show incremental %s --external-io-dir=%s", backupPath, dir)) + out, err := c.RunWithCapture(fmt.Sprintf("debug backup list-incremental %s --external-io-dir=%s", backupPath, dir)) require.NoError(t, err) expectedIncFolder := ts[1].GoTime().Format(backupccl.DateBasedIncFolderName) expectedIncFolder2 := ts[2].GoTime().Format(backupccl.DateBasedIncFolderName) var buf bytes.Buffer - w := tabwriter.NewWriter(&buf, 28 /*minwidth*/, 1 /*tabwidth*/, 2 /*padding*/, ' ' /*padchar*/, 0 /*flags*/) - fmt.Fprintf(w, "/fooFolder - %s\n", ts[0].GoTime().Format(time.RFC3339)) - fmt.Fprintf(w, "/fooFolder%s %s %s\n", expectedIncFolder, ts[0].GoTime().Format(time.RFC3339), ts[1].GoTime().Format(time.RFC3339)) - fmt.Fprintf(w, "/fooFolder%s %s %s\n", expectedIncFolder2, ts[1].GoTime().Format(time.RFC3339), ts[2].GoTime().Format(time.RFC3339)) - if err := w.Flush(); err != nil { - t.Fatalf("TestLoadShowIncremental: flush: %v", err) + rows := [][]string{ + {"/fooFolder", "-", ts[0].GoTime().Format(time.RFC3339)}, + {"/fooFolder" + expectedIncFolder, ts[0].GoTime().Format(time.RFC3339), ts[1].GoTime().Format(time.RFC3339)}, + {"/fooFolder" + expectedIncFolder2, ts[1].GoTime().Format(time.RFC3339), ts[2].GoTime().Format(time.RFC3339)}, + } + cols := []string{"path", "start time", "end time"} + rowSliceIter := cli.NewRowSliceIter(rows, "lll" /*align*/) + if err := cli.PrintQueryOutput(&buf, cols, rowSliceIter); err != nil { + t.Fatalf("TestListIncremental: PrintQueryOutput: %v", err) } checkExpectedOutput(t, buf.String(), out) } -func TestLoadShowData(t *testing.T) { +func TestShowData(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -283,13 +285,18 @@ func TestLoadShowData(t *testing.T) { "testDB.testschema.fooTable", []string{backupPublicSchemaPath}, "ERROR: fetching entry: table testdb.testschema.footable not found\n", + }, { + "show-data-fail-without-table-specified", + "", + []string{backupPublicSchemaPath}, + "ERROR: export data requires table name specified by --table flag\n", }, } for _, tc := range testCasesOnError { t.Run(tc.name, func(t *testing.T) { - out, err := c.RunWithCapture(fmt.Sprintf("load show data %s %s --external-io-dir=%s", - tc.tableName, + out, err := c.RunWithCapture(fmt.Sprintf("debug backup export %s --table=%s --external-io-dir=%s", strings.Join(tc.backupPaths, " "), + tc.tableName, dir)) require.NoError(t, err) checkExpectedOutput(t, tc.expectedOutput, out) @@ -323,9 +330,9 @@ func TestLoadShowData(t *testing.T) { for _, tc := range testCasesDatumOutput { t.Run(tc.name, func(t *testing.T) { - out, err := c.RunWithCapture(fmt.Sprintf("load show data %s %s --external-io-dir=%s", - tc.tableName, + out, err := c.RunWithCapture(fmt.Sprintf("debug backup export %s --table=%s --external-io-dir=%s", strings.Join(tc.backupPaths, " "), + tc.tableName, dir)) require.NoError(t, err) checkExpectedOutput(t, tc.expectedDatums, out) @@ -333,7 +340,7 @@ func TestLoadShowData(t *testing.T) { } } -func TestLoadShowDataAOST(t *testing.T) { +func TestShowDataAOST(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -384,9 +391,9 @@ func TestLoadShowDataAOST(t *testing.T) { t.Run("show-data-as-of-a-uncovered-timestamp", func(t *testing.T) { tsNotCovered := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()} - out, err := c.RunWithCapture(fmt.Sprintf("load show data %s %s --as-of=%s --external-io-dir=%s", - "testDB.public.fooTable", + out, err := c.RunWithCapture(fmt.Sprintf("debug backup export %s --table=%s --as-of=%s --external-io-dir=%s", backupPath, + "testDB.public.fooTable", tsNotCovered.AsOfSystemTime(), dir)) require.NoError(t, err) @@ -397,15 +404,17 @@ func TestLoadShowDataAOST(t *testing.T) { }) t.Run("show-data-as-of-non-backup-ts-should-return-error", func(t *testing.T) { - out, err := c.RunWithCapture(fmt.Sprintf("load show data %s %s --as-of=%s --external-io-dir=%s", - "testDB.public.fooTable", + out, err := c.RunWithCapture(fmt.Sprintf("debug backup export %s --table=%s --as-of=%s --external-io-dir=%s", backupPath, + "testDB.public.fooTable", ts.AsOfSystemTime(), dir)) require.NoError(t, err) expectedError := fmt.Sprintf( - "ERROR: fetching entry: reading data for requested time requires that BACKUP was created with \"revision_history\" "+ + "ERROR: fetching entry: unknown read time: %s\n"+ + "HINT: reading data for requested time requires that BACKUP was created with \"revision_history\" "+ "or should specify the time to be an exact backup time, nearest backup time is %s\n", + timeutil.Unix(0, ts.WallTime).UTC(), timeutil.Unix(0, ts1.WallTime).UTC()) checkExpectedOutput(t, expectedError, out) }) @@ -558,9 +567,9 @@ func TestLoadShowDataAOST(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - out, err := c.RunWithCapture(fmt.Sprintf("load show data %s %s --as-of=%s --external-io-dir=%s ", - tc.tableName, + out, err := c.RunWithCapture(fmt.Sprintf("debug backup export %s --table=%s --as-of=%s --external-io-dir=%s ", strings.Join(tc.backupPaths, " "), + tc.tableName, tc.asof, dir)) require.NoError(t, err) diff --git a/pkg/cli/auth.go b/pkg/cli/auth.go index 682871525367..41bffa862f1e 100644 --- a/pkg/cli/auth.go +++ b/pkg/cli/auth.go @@ -66,7 +66,7 @@ func runLogin(cmd *cobra.Command, args []string) error { rows := [][]string{ {username, fmt.Sprintf("%d", id), hC}, } - if err := printQueryOutput(os.Stdout, cols, newRowSliceIter(rows, "ll")); err != nil { + if err := PrintQueryOutput(os.Stdout, cols, NewRowSliceIter(rows, "ll")); err != nil { return err } diff --git a/pkg/cli/cert.go b/pkg/cli/cert.go index fd2477a19936..2344dd1781bc 100644 --- a/pkg/cli/cert.go +++ b/pkg/cli/cert.go @@ -299,7 +299,7 @@ func runListCerts(cmd *cobra.Command, args []string) error { addRow(cert, fmt.Sprintf("user: %s", user)) } - return printQueryOutput(os.Stdout, certTableHeaders, newRowSliceIter(rows, alignment)) + return PrintQueryOutput(os.Stdout, certTableHeaders, NewRowSliceIter(rows, alignment)) } var certCmds = []*cobra.Command{ diff --git a/pkg/cli/cli.go b/pkg/cli/cli.go index cc87aa1fb010..14d31154019d 100644 --- a/pkg/cli/cli.go +++ b/pkg/cli/cli.go @@ -309,11 +309,6 @@ func hasParentCmd(cmd, refParent *cobra.Command) bool { return hasParent } -// AddCmd adds a command to the cli. -func AddCmd(c *cobra.Command) { - cockroachCmd.AddCommand(c) -} - // Run ... func Run(args []string) error { cockroachCmd.SetArgs(args) diff --git a/pkg/cli/cli_test.go b/pkg/cli/cli_test.go index 4f8b25eb3550..106b780f663b 100644 --- a/pkg/cli/cli_test.go +++ b/pkg/cli/cli_test.go @@ -1018,7 +1018,7 @@ func TestRenderHTML(t *testing.T) { t.Run(name, func(t *testing.T) { var buf bytes.Buffer err := render(&tc.reporter, &buf, - cols, newRowSliceIter(rows, align), + cols, NewRowSliceIter(rows, align), nil /* completedHook */, nil /* noRowsHook */) if err != nil { t.Fatal(err) diff --git a/pkg/cli/cliflags/flags.go b/pkg/cli/cliflags/flags.go index 67338df08ec3..eff1b076ce80 100644 --- a/pkg/cli/cliflags/flags.go +++ b/pkg/cli/cliflags/flags.go @@ -1434,12 +1434,17 @@ exist. The interval is specified with a suffix of 's' for seconds, `, } + ExportTableTarget = FlagInfo{ + Name: "table", + Description: `Select the table to export data from.`, + } + ExportDestination = FlagInfo{ Name: "destination", Description: ` The destination to export data. -If the export format is readable and this flag left specified, -defaults to displays the exported data in the terminal output. +If the export format is readable and this flag left unspecified, +defaults to display the exported data in the terminal output. `, } diff --git a/pkg/cli/format_table.go b/pkg/cli/format_table.go index 137c6d5e5b3f..220ef0777eb9 100644 --- a/pkg/cli/format_table.go +++ b/pkg/cli/format_table.go @@ -29,7 +29,7 @@ import ( "github.com/olekukonko/tablewriter" ) -// rowStrIter is an iterator interface for the printQueryOutput function. It is +// rowStrIter is an iterator interface for the PrintQueryOutput function. It is // used so that results can be streamed to the row formatters as they arrive // to the CLI. type rowStrIter interface { @@ -38,16 +38,17 @@ type rowStrIter interface { Align() []int } -// rowSliceIter is an implementation of the rowStrIter interface and it is used +// RowSliceIter is an implementation of the rowStrIter interface and it is used // to wrap a slice of rows that have already been completely buffered into // memory. -type rowSliceIter struct { +type RowSliceIter struct { allRows [][]string index int align []int } -func (iter *rowSliceIter) Next() (row []string, err error) { +// Next returns next row of RowSliceIter. +func (iter *RowSliceIter) Next() (row []string, err error) { if iter.index >= len(iter.allRows) { return nil, io.EOF } @@ -56,11 +57,13 @@ func (iter *rowSliceIter) Next() (row []string, err error) { return row, nil } -func (iter *rowSliceIter) ToSlice() ([][]string, error) { +// ToSlice returns all rows of RowSliceIter. +func (iter *RowSliceIter) ToSlice() ([][]string, error) { return iter.allRows, nil } -func (iter *rowSliceIter) Align() []int { +// Align returns alignment setting of RowSliceIter. +func (iter *RowSliceIter) Align() []int { return iter.align } @@ -81,11 +84,11 @@ func convertAlign(align string) []int { return result } -// newRowSliceIter is an implementation of the rowStrIter interface and it is +// NewRowSliceIter is an implementation of the rowStrIter interface and it is // used when the rows have not been buffered into memory yet and we want to // stream them to the row formatters as they arrive over the network. -func newRowSliceIter(allRows [][]string, align string) *rowSliceIter { - return &rowSliceIter{ +func NewRowSliceIter(allRows [][]string, align string) *RowSliceIter { + return &RowSliceIter{ allRows: allRows, index: 0, align: convertAlign(align), @@ -639,9 +642,9 @@ func makeReporter(w io.Writer) (rowReporter, func(), error) { } } -// printQueryOutput takes a list of column names and a list of row +// PrintQueryOutput takes a list of column names and a list of row // contents writes a formatted table to 'w'. -func printQueryOutput(w io.Writer, cols []string, allRows rowStrIter) error { +func PrintQueryOutput(w io.Writer, cols []string, allRows rowStrIter) error { reporter, cleanup, err := makeReporter(w) if err != nil { return err diff --git a/pkg/cli/gen.go b/pkg/cli/gen.go index 295c1ef76b5a..22055c5c0b93 100644 --- a/pkg/cli/gen.go +++ b/pkg/cli/gen.go @@ -253,7 +253,7 @@ Output the list of cluster settings known to this binary. } cols := []string{"Setting", "Type", "Default", "Description"} return render(reporter, os.Stdout, - cols, newRowSliceIter(rows, "dddd"), nil /* completedHook */, nil /* noRowsHook*/) + cols, NewRowSliceIter(rows, "dddd"), nil /* completedHook */, nil /* noRowsHook*/) }, } diff --git a/pkg/cli/node.go b/pkg/cli/node.go index 1eb6ba9dfb8b..5c57a1a4b07a 100644 --- a/pkg/cli/node.go +++ b/pkg/cli/node.go @@ -74,7 +74,7 @@ func runLsNodes(cmd *cobra.Command, args []string) error { return err } - return printQueryOutput(os.Stdout, lsNodesColumnHeaders, newRowSliceIter(rows, "r")) + return PrintQueryOutput(os.Stdout, lsNodesColumnHeaders, NewRowSliceIter(rows, "r")) } var baseNodeColumnHeaders = []string{ @@ -129,8 +129,8 @@ func runStatusNode(cmd *cobra.Command, args []string) error { return err } - sliceIter := newRowSliceIter(rows, getStatusNodeAlignment()) - return printQueryOutput(os.Stdout, getStatusNodeHeaders(), sliceIter) + sliceIter := NewRowSliceIter(rows, getStatusNodeAlignment()) + return PrintQueryOutput(os.Stdout, getStatusNodeHeaders(), sliceIter) } func runStatusNodeInner(showDecommissioned bool, args []string) ([]string, [][]string, error) { @@ -528,8 +528,8 @@ signaling the affected nodes to participate in the cluster again. } func printDecommissionStatus(resp serverpb.DecommissionStatusResponse) error { - return printQueryOutput(os.Stdout, decommissionNodesColumnHeaders, - newRowSliceIter(decommissionResponseValueToRows(resp.Status), decommissionResponseAlignment())) + return PrintQueryOutput(os.Stdout, decommissionNodesColumnHeaders, + NewRowSliceIter(decommissionResponseValueToRows(resp.Status), decommissionResponseAlignment())) } func runRecommissionNode(cmd *cobra.Command, args []string) error { diff --git a/pkg/cli/sql.go b/pkg/cli/sql.go index 0606ee47b8d1..cfc8bba48116 100644 --- a/pkg/cli/sql.go +++ b/pkg/cli/sql.go @@ -448,9 +448,9 @@ func (c *cliState) handleSet(args []string, nextState, errState cliStateEnum) cl } optData = append(optData, []string{n, options[n].display(), options[n].description}) } - err := printQueryOutput(os.Stdout, + err := PrintQueryOutput(os.Stdout, []string{"Option", "Value", "Description"}, - newRowSliceIter(optData, "lll" /*align*/)) + NewRowSliceIter(optData, "lll" /*align*/)) if err != nil { panic(err) } From 4e5389a9b802b991e95c9a1d21c8743cf98be451 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 21 Apr 2021 19:17:12 -0400 Subject: [PATCH 2/4] kv: don't clear raftRequestQueue of right-hand side of Range split This commit fixes a test flake of `TestLeaderAfterSplit` I observed in CI and which we've seen at least once in https://github.com/cockroachdb/cockroach/issues/43564#issuecomment-794543331. I bisected the flake back to a591707, but that wasn't the real source of the flakiness - the move from `multiTestContext` to `TestCluster` just changed transport mechanism between replicas and revealed an existing bug. The real issue here was that, upon applying a split, any previously established `raftRequestQueue` to the RHS replica was discarded. The effect of this is that we could see the following series of events: ``` 1. r1 is created from a split 2. r1 campaigns to establish a leader for the new range 3. r1 sends MsgPreVote msgs to r2 and r3 4. s2 and s3 both receive the messages for the uninitialized r2 and r3, respectively. 5. raftRequestQueues are established for r2 and r3, and the MsgPreVotes are added 6. the split triggers to create r2 and r3 finally fire 7. the raftRequestQueues for r2 and r3 are discarded 8. the election stalls indefinitely, because the test sets RaftElectionTimeoutTicks=1000000 ``` Of course, in real deployments, `RaftElectionTimeoutTicks` will never be set so high, so a new election will be called again after about 3 seconds. Still, this could cause unavailability immediately after a split for about 3s even in real deployments, so it seems worthwhile to fix. This commit fixes the issue by removing the logic to discard an uninitialized replica's `raftRequestQueue` upon applying a split that initializes the replica. That logic looks quite intentional, but if we look back at when it was added, we see that it wasn't entirely deliberate. The code was added in d3b0e73, which extracted everything except the call to `s.mu.replicas.Delete(int64(rangeID))` from `unlinkReplicaByRangeIDLocked`. So the change wasn't intentionally discarding the queue, it was just trying not to change the existing behavior. This change is safe and does not risk leaking the `raftRequestQueue` because we are removing from `s.mu.uninitReplicas` but will immediately call into `addReplicaInternalLocked` to add an initialized replica. Release notes (bug fix): Fix a rare race that could lead to a 3 second stall before a Raft leader was elected on a Range immediately after it was split off from its left-hand neighbor. --- pkg/kv/kvserver/store_split.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pkg/kv/kvserver/store_split.go b/pkg/kv/kvserver/store_split.go index 4c6e9db5b192..081cf6fc3dfd 100644 --- a/pkg/kv/kvserver/store_split.go +++ b/pkg/kv/kvserver/store_split.go @@ -311,11 +311,9 @@ func (s *Store) SplitRange( if exRng != rightReplOrNil { log.Fatalf(ctx, "found unexpected uninitialized replica: %s vs %s", exRng, rightReplOrNil) } - // NB: We only remove from uninitReplicas and the replicaQueues maps here - // so that we don't leave open a window where a replica is temporarily not - // present in Store.mu.replicas. + // NB: We only remove from uninitReplicas here so that we don't leave open a + // window where a replica is temporarily not present in Store.mu.replicas. delete(s.mu.uninitReplicas, rightDesc.RangeID) - s.replicaQueues.Delete(int64(rightDesc.RangeID)) } leftRepl.setDescRaftMuLocked(ctx, newLeftDesc) From 3e4adfcd7e89abcc345e8fb3f038b60b5e2de94b Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 21 Apr 2021 19:58:16 -0400 Subject: [PATCH 3/4] kv: don't allow node liveness to regress in Gossip network In #64028, we fixed a long-standing flake in `TestLeaderAfterSplit`. However, the test had actually gotten more flaky recently, which I bisected back to df826cd. The problem we occasionally see with the test is that all three replicas of a post-split Range call an election, resulting in a hung vote. Since the test is configured with RaftElectionTimeoutTicks=1000000, a follow-up election is never called, so the test times out. After some debugging, I found that the range would occasionally split while the non-leaseholder nodes (n2 and n3) thought that the leaseholder node (n1) was not live. This meant that their call to `shouldCampaignOnWake` in the split trigger considered the RHS's epoch-based lease to be invalid (state = ERROR). So all three replicas would call an election and the test would get stuck. The offending commit introduced this new flake because of this change: https://github.com/cockroachdb/cockroach/commit/df826cdf700a79948d083827ca67967016a1a1af#diff-488a090afc4b6eaf56cd6d13b347bac67cb3313ce11c49df9ee8cd95fd73b3e8R454 Now that the call to `MaybeGossipNodeLiveness` is asynchronous on the node-liveness range, it was possible for two calls to `MaybeGossipNodeLiveness` to race, one asynchronously triggered by `leasePostApplyLocked` and one synchronously triggered by `handleReadWriteLocalEvalResult` due to a node liveness update. This allowed for the following ordering of events: ``` - async call reads liveness(nid:1 epo:0 exp:0,0) - sync call writes and then reads liveness(nid:1 epo:1 exp:1619645671.921265300,0) - sync call adds liveness(nid:1 epo:1 exp:1619645671.921265300,0) to gossip - async call adds liveness(nid:1 epo:0 exp:0,0) to gossip ``` One this had occurred, n2 and n3 never again considered n1 live. Gossip never recovered from this state because the liveness record was never heartbeated again, due to the test's configuration of `RaftElectionTimeoutTicks=1000000`. This commit fixes the bug by ensuring that all calls to MaybeGossipNodeLiveness and MaybeGossipSystemConfig hold the raft mutex. This provides the necessary serialization to avoid data races, which was actually already documented on MaybeGossipSystemConfig. --- pkg/kv/kvserver/client_replica_test.go | 8 +++- pkg/kv/kvserver/replica_gossip.go | 57 +++++++++++++++----------- pkg/kv/kvserver/replica_proposal.go | 17 +++++--- 3 files changed, 51 insertions(+), 31 deletions(-) diff --git a/pkg/kv/kvserver/client_replica_test.go b/pkg/kv/kvserver/client_replica_test.go index 9e0e2dffb00f..28c7356abcf0 100644 --- a/pkg/kv/kvserver/client_replica_test.go +++ b/pkg/kv/kvserver/client_replica_test.go @@ -3211,7 +3211,9 @@ func TestStrictGCEnforcement(t *testing.T) { _, r := getFirstStoreReplica(t, s, tableKey) if _, z := r.DescAndZone(); z.GC.TTLSeconds != int32(exp) { _, sysCfg := getFirstStoreReplica(t, tc.Server(i), keys.SystemConfigSpan.Key) - require.NoError(t, sysCfg.MaybeGossipSystemConfig(ctx)) + sysCfg.RaftLock() + require.NoError(t, sysCfg.MaybeGossipSystemConfigRaftMuLocked(ctx)) + sysCfg.RaftUnlock() return errors.Errorf("expected %d, got %d", exp, z.GC.TTLSeconds) } } @@ -3225,7 +3227,9 @@ func TestStrictGCEnforcement(t *testing.T) { for i := 0; i < tc.NumServers(); i++ { s, r := getFirstStoreReplica(t, tc.Server(i), keys.SystemConfigSpan.Key) if kvserver.StrictGCEnforcement.Get(&s.ClusterSettings().SV) != val { - require.NoError(t, r.MaybeGossipSystemConfig(ctx)) + r.RaftLock() + require.NoError(t, r.MaybeGossipSystemConfigRaftMuLocked(ctx)) + r.RaftUnlock() return errors.Errorf("expected %v, got %v", val, !val) } } diff --git a/pkg/kv/kvserver/replica_gossip.go b/pkg/kv/kvserver/replica_gossip.go index 384360f2dd69..8e265d27f8b8 100644 --- a/pkg/kv/kvserver/replica_gossip.go +++ b/pkg/kv/kvserver/replica_gossip.go @@ -64,20 +64,23 @@ func (r *Replica) shouldGossip(ctx context.Context) bool { return r.OwnsValidLease(ctx, r.store.Clock().NowAsClockTimestamp()) } -// MaybeGossipSystemConfig scans the entire SystemConfig span and gossips it. -// Further calls come from the trigger on EndTxn or range lease acquisition. +// MaybeGossipSystemConfigRaftMuLocked scans the entire SystemConfig span and +// gossips it. Further calls come from the trigger on EndTxn or range lease +// acquisition. // -// Note that MaybeGossipSystemConfig gossips information only when the -// lease is actually held. The method does not request a range lease -// here since RequestLease and applyRaftCommand call the method and we -// need to avoid deadlocking in redirectOnOrAcquireLease. +// Note that MaybeGossipSystemConfigRaftMuLocked gossips information only when +// the lease is actually held. The method does not request a range lease here +// since RequestLease and applyRaftCommand call the method and we need to avoid +// deadlocking in redirectOnOrAcquireLease. // -// MaybeGossipSystemConfig must only be called from Raft commands -// (which provide the necessary serialization to avoid data races). +// MaybeGossipSystemConfigRaftMuLocked must only be called from Raft commands +// while holding the raftMu (which provide the necessary serialization to avoid +// data races). // -// TODO(nvanbenschoten,bdarnell): even though this is best effort, we -// should log louder when we continually fail to gossip system config. -func (r *Replica) MaybeGossipSystemConfig(ctx context.Context) error { +// TODO(nvanbenschoten,bdarnell): even though this is best effort, we should log +// louder when we continually fail to gossip system config. +func (r *Replica) MaybeGossipSystemConfigRaftMuLocked(ctx context.Context) error { + r.raftMu.AssertHeld() if r.store.Gossip() == nil { log.VEventf(ctx, 2, "not gossiping system config because gossip isn't initialized") return nil @@ -124,30 +127,36 @@ func (r *Replica) MaybeGossipSystemConfig(ctx context.Context) error { return nil } -// MaybeGossipSystemConfigIfHaveFailure is a trigger to gossip the system config -// due to an abort of a transaction keyed in the system config span. It will -// call MaybeGossipSystemConfig if failureToGossipSystemConfig is true. -func (r *Replica) MaybeGossipSystemConfigIfHaveFailure(ctx context.Context) error { +// MaybeGossipSystemConfigIfHaveFailureRaftMuLocked is a trigger to gossip the +// system config due to an abort of a transaction keyed in the system config +// span. It will call MaybeGossipSystemConfigRaftMuLocked if +// failureToGossipSystemConfig is true. +func (r *Replica) MaybeGossipSystemConfigIfHaveFailureRaftMuLocked(ctx context.Context) error { r.mu.RLock() failed := r.mu.failureToGossipSystemConfig r.mu.RUnlock() if !failed { return nil } - return r.MaybeGossipSystemConfig(ctx) + return r.MaybeGossipSystemConfigRaftMuLocked(ctx) } -// MaybeGossipNodeLiveness gossips information for all node liveness -// records stored on this range. To scan and gossip, this replica -// must hold the lease to a range which contains some or all of the -// node liveness records. After scanning the records, it checks -// against what's already in gossip and only gossips records which -// are out of date. -func (r *Replica) MaybeGossipNodeLiveness(ctx context.Context, span roachpb.Span) error { +// MaybeGossipNodeLivenessRaftMuLocked gossips information for all node liveness +// records stored on this range. To scan and gossip, this replica must hold the +// lease to a range which contains some or all of the node liveness records. +// After scanning the records, it checks against what's already in gossip and +// only gossips records which are out of date. +// +// MaybeGossipNodeLivenessRaftMuLocked must only be called from Raft commands +// while holding the raftMu (which provide the necessary serialization to avoid +// data races). +func (r *Replica) MaybeGossipNodeLivenessRaftMuLocked( + ctx context.Context, span roachpb.Span, +) error { + r.raftMu.AssertHeld() if r.store.Gossip() == nil || !r.IsInitialized() { return nil } - if !r.ContainsKeyRange(span.Key, span.EndKey) || !r.shouldGossip(ctx) { return nil } diff --git a/pkg/kv/kvserver/replica_proposal.go b/pkg/kv/kvserver/replica_proposal.go index 5bd7f506744f..7ce8818c310c 100644 --- a/pkg/kv/kvserver/replica_proposal.go +++ b/pkg/kv/kvserver/replica_proposal.go @@ -503,10 +503,17 @@ func (r *Replica) leasePostApplyLocked( // NB: run these in an async task to keep them out of the critical section // (r.mu is held here). _ = r.store.stopper.RunAsyncTask(ctx, "lease-triggers", func(ctx context.Context) { - if err := r.MaybeGossipSystemConfig(ctx); err != nil { + // Re-acquire the raftMu, as we are now in an async task. + r.raftMu.Lock() + defer r.raftMu.Unlock() + if _, err := r.IsDestroyed(); err != nil { + // Nothing to do. + return + } + if err := r.MaybeGossipSystemConfigRaftMuLocked(ctx); err != nil { log.Errorf(ctx, "%v", err) } - if err := r.MaybeGossipNodeLiveness(ctx, keys.NodeLivenessSpan); err != nil { + if err := r.MaybeGossipNodeLivenessRaftMuLocked(ctx, keys.NodeLivenessSpan); err != nil { log.Errorf(ctx, "%v", err) } @@ -700,21 +707,21 @@ func (r *Replica) handleReadWriteLocalEvalResult(ctx context.Context, lResult re } if lResult.MaybeGossipSystemConfig { - if err := r.MaybeGossipSystemConfig(ctx); err != nil { + if err := r.MaybeGossipSystemConfigRaftMuLocked(ctx); err != nil { log.Errorf(ctx, "%v", err) } lResult.MaybeGossipSystemConfig = false } if lResult.MaybeGossipSystemConfigIfHaveFailure { - if err := r.MaybeGossipSystemConfigIfHaveFailure(ctx); err != nil { + if err := r.MaybeGossipSystemConfigIfHaveFailureRaftMuLocked(ctx); err != nil { log.Errorf(ctx, "%v", err) } lResult.MaybeGossipSystemConfigIfHaveFailure = false } if lResult.MaybeGossipNodeLiveness != nil { - if err := r.MaybeGossipNodeLiveness(ctx, *lResult.MaybeGossipNodeLiveness); err != nil { + if err := r.MaybeGossipNodeLivenessRaftMuLocked(ctx, *lResult.MaybeGossipNodeLiveness); err != nil { log.Errorf(ctx, "%v", err) } lResult.MaybeGossipNodeLiveness = nil From d97419369a542ce16c8b32abd98c93277f716b19 Mon Sep 17 00:00:00 2001 From: Steven Danna Date: Thu, 22 Apr 2021 16:04:34 +0100 Subject: [PATCH 4/4] build: retry register.cockrachdb.com requests Retry transient problems getting a developer license. We may want to avoid the retries here as a forcing function to make that service more reliable, but given that we might see timeouts from non-service related local networking problems, I'd prefer to not fail the test run because of a single curl failure. Release note: None --- build/teamcity-local-roachtest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/teamcity-local-roachtest.sh b/build/teamcity-local-roachtest.sh index 33f0e23885f2..6cd0ea88a58a 100755 --- a/build/teamcity-local-roachtest.sh +++ b/build/teamcity-local-roachtest.sh @@ -6,7 +6,7 @@ source "$(dirname "${0}")/teamcity-support.sh" tc_start_block "Prepare environment" # Grab a testing license good for one hour. -COCKROACH_DEV_LICENSE=$(curl -f "https://register.cockroachdb.com/api/prodtest") +COCKROACH_DEV_LICENSE=$(curl --retry 3 --retry-connrefused -f "https://register.cockroachdb.com/api/prodtest") run mkdir -p artifacts maybe_ccache tc_end_block "Prepare environment"