diff --git a/pkg/cmd/roachtest/test/test_interface.go b/pkg/cmd/roachtest/test/test_interface.go index 6ccbc94adbb9..7da9b1c030d5 100644 --- a/pkg/cmd/roachtest/test/test_interface.go +++ b/pkg/cmd/roachtest/test/test_interface.go @@ -37,6 +37,7 @@ type Test interface { VersionsBinaryOverride() map[string]string Skip(args ...interface{}) Skipf(format string, args ...interface{}) + Error(args ...interface{}) Errorf(string, ...interface{}) FailNow() Fatal(args ...interface{}) diff --git a/pkg/cmd/roachtest/test_impl.go b/pkg/cmd/roachtest/test_impl.go index d3725a639a54..49d18125c021 100644 --- a/pkg/cmd/roachtest/test_impl.go +++ b/pkg/cmd/roachtest/test_impl.go @@ -11,13 +11,10 @@ package main import ( - "bytes" "context" "fmt" "io" - // For the debug http handlers. - _ "net/http/pprof" - "runtime" + "os" "strings" "time" @@ -27,6 +24,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/cockroach/pkg/util/version" + "github.com/cockroachdb/errors" "github.com/petermattis/goid" ) @@ -71,23 +69,33 @@ type testImpl struct { mu struct { syncutil.RWMutex - done bool - failed bool - timeout bool // if failed == true, this indicates whether the test timed out - // cancel, if set, is called from the t.Fatal() family of functions when the - // test is being marked as failed (i.e. when the failed field above is also - // set). This is used to cancel the context passed to t.spec.Run(), so async - // test goroutines can be notified. + done bool + cancel func() failLoc struct { file string line int } - failureMsg string + + // Errors are all the errors passed to `addFailure`, in order of + // these calls. + // + // NB: the first failure is not always the relevant one due to: + // https://github.com/cockroachdb/cockroach/issues/44436 + errors []error + // If len(errors)>0, this indicates whether the test timed out + // cancel, if set, is called from the t.Fatal() family of functions when the + // test is being marked as failed (i.e. when the failed field above is also + // set). This is used to cancel the context passed to t.spec.Run(), so async + // test goroutines can be notified. + // status is a map from goroutine id to status set by that goroutine. A // special goroutine is indicated by runnerID; that one provides the test's // "main status". status map[int64]testStatus + + // TODO(test-eng): this should just be an in-mem (ring) buffer attached to + // `t.L()`. output []byte } // Map from version to path to the cockroach binary to be used when @@ -246,6 +254,23 @@ func (t *testImpl) Skipf(format string, args ...interface{}) { panic(errTestFatal) } +// This creates an error from the first arg, and adds each subsequent arg +// as error detail +func argsToErr(depth int, args ...interface{}) error { + // NB: we'd probably not allow multiple arguments here and we'd want + // the one remaining arg to be an `error`, but we are trying to be + // compatible with `(*testing.T).Fatal`. + var err error + for _, arg := range args { + if err == nil { + err = errors.NewWithDepthf(depth+1, "%v", arg) + continue + } + err = errors.WithDetailf(err, "%v", arg) + } + return err +} + // Fatal marks the test as failed, prints the args to t.L(), and calls // panic(errTestFatal). It can be called multiple times. // @@ -255,148 +280,80 @@ func (t *testImpl) Skipf(format string, args ...interface{}) { // ATTENTION: Since this calls panic(errTestFatal), it should only be called // from a test's closure. The test runner itself should never call this. func (t *testImpl) Fatal(args ...interface{}) { - t.markFailedInner("" /* format */, args...) + t.addFailure(argsToErr(1, args...)) panic(errTestFatal) } // Fatalf is like Fatal, but takes a format string. func (t *testImpl) Fatalf(format string, args ...interface{}) { - t.markFailedInner(format, args...) + t.addFailure(errors.NewWithDepthf(1, format, args...)) panic(errTestFatal) } // FailNow implements the TestingT interface. func (t *testImpl) FailNow() { - t.Fatal() + t.addFailure(errors.NewWithDepthf(1, "FailNow called")) + panic(errTestFatal) } -// Errorf implements the TestingT interface. -func (t *testImpl) Errorf(format string, args ...interface{}) { - t.markFailedInner(format, args...) +func (t *testImpl) Error(args ...interface{}) { + t.addFailure(argsToErr(1, args...)) } -func (t *testImpl) markFailedInner(format string, args ...interface{}) { - // Skip two frames: our own and the caller. - if format != "" { - t.printfAndFail(2 /* skip */, format, args...) - } else { - t.printAndFail(2 /* skip */, args...) - } +// Errorf implements the TestingT interface. +func (t *testImpl) Errorf(format string, args ...interface{}) { + t.addFailure(errors.NewWithDepthf(1, format, args...)) } -func (t *testImpl) printAndFail(skip int, args ...interface{}) { - var msg string - if len(args) == 1 { - // If we were passed only an error, then format it with "%+v" in order to - // get any stack traces. - if err, ok := args[0].(error); ok { - msg = fmt.Sprintf("%+v", err) +func formatFailure(b *strings.Builder, errs ...error) { + for i, err := range errs { + if i > 0 { + fmt.Fprintln(b) } + file, line, fn, ok := errors.GetOneLineSource(err) + if !ok { + file, line, fn = "", 0, "unknown" + } + fmt.Fprintf(b, "(%s:%d).%s: %v", file, line, fn, err) } - if msg == "" { - msg = fmt.Sprint(args...) - } - t.failWithMsg(t.decorate(skip+1, msg)) -} - -func (t *testImpl) printfAndFail(skip int, format string, args ...interface{}) { - if format == "" { - panic(fmt.Sprintf("invalid empty format. args: %s", args)) - } - t.failWithMsg(t.decorate(skip+1, fmt.Sprintf(format, args...))) } -func (t *testImpl) failWithMsg(msg string) { +func (t *testImpl) addFailure(reportErr error) { t.mu.Lock() defer t.mu.Unlock() - prefix := "" - if t.mu.failed { - prefix = "[not the first failure] " - // NB: the first failure is not always the relevant one due to: - // https://github.com/cockroachdb/cockroach/issues/44436 - // - // So we chain all failures together in the order in which we see - // them. - msg = "\n" + msg + t.mu.errors = append(t.mu.errors, reportErr) + + var b strings.Builder + formatFailure(&b, reportErr) + msg := b.String() + + t.L().Printf("test failure #%d: %s", len(t.mu.errors), msg) + // Also dump the verbose error (incl. all stack traces) to a log file, in case + // we need it. The stacks are sometimes helpful, but we don't want them in the + // main log as they are highly verbose. + { + cl, err := t.L().ChildLogger( + fmt.Sprintf("failure_%d", len(t.mu.errors)), + logger.QuietStderr, logger.QuietStdout, + ) + if err == nil { + // We don't actually log through this logger since it adds an unrelated + // file:line caller (namely ours). The error already has stack traces + // so it's better to write only it to the file to avoid confusion. + path := cl.File.Name() + cl.Close() // we just wanted the filename + _ = os.WriteFile(path, []byte(fmt.Sprintf("%+v", reportErr)), 0644) + } } - t.L().Printf("%stest failure: %s", prefix, msg) - t.mu.failed = true - t.mu.failureMsg += msg t.mu.output = append(t.mu.output, msg...) + t.mu.output = append(t.mu.output, '\n') if t.mu.cancel != nil { t.mu.cancel() } } -// Args: -// skip: The number of stack frames to exclude from the result. 0 means that -// -// the caller will be the first frame identified. 1 means the caller's caller -// will be the first, etc. -func (t *testImpl) decorate(skip int, s string) string { - // Skip two extra frames to account for this function and runtime.Callers - // itself. - var pc [50]uintptr - n := runtime.Callers(2+skip, pc[:]) - if n == 0 { - panic("zero callers found") - } - - buf := new(bytes.Buffer) - frames := runtime.CallersFrames(pc[:n]) - sep := "\t" - runnerFound := false - for { - if runnerFound { - break - } - - frame, more := frames.Next() - if !more { - break - } - if frame.Function == t.runner { - runnerFound = true - - // Handle the special case of the runner function being the caller of - // t.Fatal(). In that case, that's the line to be used for issue creation. - if t.mu.failLoc.file == "" { - t.mu.failLoc.file = frame.File - t.mu.failLoc.line = frame.Line - } - } - if !t.mu.failed && !runnerFound { - // Keep track of the highest stack frame that is lower than the t.runner - // stack frame. This is used to determine the author of that line of code - // and issue assignment. - t.mu.failLoc.file = frame.File - t.mu.failLoc.line = frame.Line - } - file := frame.File - if index := strings.LastIndexByte(file, '/'); index >= 0 { - file = file[index+1:] - } - fmt.Fprintf(buf, "%s%s:%d", sep, file, frame.Line) - sep = "," - } - buf.WriteString(": ") - - lines := strings.Split(s, "\n") - if l := len(lines); l > 1 && lines[l-1] == "" { - lines = lines[:l-1] - } - for i, line := range lines { - if i > 0 { - buf.WriteString("\n\t\t") - } - buf.WriteString(line) - } - buf.WriteByte('\n') - return buf.String() -} - func (t *testImpl) duration() time.Duration { return t.end.Sub(t.start) } @@ -404,13 +361,19 @@ func (t *testImpl) duration() time.Duration { func (t *testImpl) Failed() bool { t.mu.RLock() defer t.mu.RUnlock() - return t.mu.failed + return t.failedRLocked() +} + +func (t *testImpl) failedRLocked() bool { + return len(t.mu.errors) > 0 } func (t *testImpl) FailureMsg() string { t.mu.RLock() defer t.mu.RUnlock() - return t.mu.failureMsg + var b strings.Builder + formatFailure(&b, t.mu.errors...) + return b.String() } func (t *testImpl) ArtifactsDir() string { diff --git a/pkg/cmd/roachtest/test_runner.go b/pkg/cmd/roachtest/test_runner.go index 6bb2b33f280c..2e325dd98924 100644 --- a/pkg/cmd/roachtest/test_runner.go +++ b/pkg/cmd/roachtest/test_runner.go @@ -576,22 +576,23 @@ func (r *testRunner) runWorker( testToRun.spec.Name, clusterCreateErr) } } - // Prepare the test's logger. - logPath := "" - var artifactsDir string - var artifactsSpec string - if artifactsRootDir != "" { - escapedTestName := teamCityNameEscape(testToRun.spec.Name) - runSuffix := "run_" + strconv.Itoa(testToRun.runNum) - - artifactsDir = filepath.Join(filepath.Join(artifactsRootDir, escapedTestName), runSuffix) - logPath = filepath.Join(artifactsDir, "test.log") - - // Map artifacts/TestFoo/run_?/** => TestFoo/run_?/**, i.e. collect the artifacts - // for this test exactly as they are laid out on disk (when the time - // comes). - artifactsSpec = fmt.Sprintf("%s/%s/** => %s/%s", filepath.Join(literalArtifactsDir, escapedTestName), runSuffix, escapedTestName, runSuffix) + // Prepare the test's logger. Always set this up with real files, using a + // temp dir if necessary. This simplifies testing. + if artifactsRootDir == "" { + artifactsRootDir, _ = os.MkdirTemp("", "roachtest-logger") } + + escapedTestName := teamCityNameEscape(testToRun.spec.Name) + runSuffix := "run_" + strconv.Itoa(testToRun.runNum) + + artifactsDir := filepath.Join(filepath.Join(artifactsRootDir, escapedTestName), runSuffix) + logPath := filepath.Join(artifactsDir, "test.log") + + // Map artifacts/TestFoo/run_?/** => TestFoo/run_?/**, i.e. collect the artifacts + // for this test exactly as they are laid out on disk (when the time + // comes). + artifactsSpec := fmt.Sprintf("%s/%s/** => %s/%s", filepath.Join(literalArtifactsDir, escapedTestName), runSuffix, escapedTestName, runSuffix) + testL, err := logger.RootLogger(logPath, teeOpt) if err != nil { return err @@ -621,7 +622,7 @@ func (r *testRunner) runWorker( oldName := t.spec.Name oldOwner := t.spec.Owner // Generate failure reason and mark the test failed to preclude fetching (cluster) artifacts. - t.printAndFail(0, clusterCreateErr) + t.addFailure(clusterCreateErr) issueOutput := "test %s was skipped due to %s" issueOutput = fmt.Sprintf(issueOutput, oldName, t.FailureMsg()) // N.B. issue title is of the form "roachtest: ${t.spec.Name} failed" (see UnitTestFormatter). @@ -668,7 +669,7 @@ func (r *testRunner) runWorker( shout(ctx, l, stdout, "test returned error: %s: %s", t.Name(), err) // Mark the test as failed if it isn't already. if !t.Failed() { - t.printAndFail(0 /* skip */, err) + t.addFailure(err) } } else { msg := "test passed: %s (run %d)" @@ -805,10 +806,10 @@ func (r *testRunner) runTest( // during the post-flight checks; the test itself runs on a different // goroutine and has similar code to terminate errTestFatal. if err := recover(); err != nil && err != errTestFatal { - t.mu.Lock() - t.mu.failed = true - t.mu.output = append(t.mu.output, t.decorate(0 /* skip */, fmt.Sprint(err))...) - t.mu.Unlock() + if _, ok := err.(error); !ok { + err = errors.Newf("%v", err) + } + t.addFailure(err.(error)) } t.mu.Lock() @@ -817,9 +818,7 @@ func (r *testRunner) runTest( durationStr := fmt.Sprintf("%.2fs", t.duration().Seconds()) if t.Failed() { - t.mu.Lock() - output := fmt.Sprintf("test artifacts and logs in: %s\n", t.ArtifactsDir()) + string(t.mu.output) - t.mu.Unlock() + output := fmt.Sprintf("test artifacts and logs in: %s\n%s", t.ArtifactsDir(), t.FailureMsg()) if teamCity { shout(ctx, l, stdout, "##teamcity[testFailed name='%s' details='%s' flowId='%s']", @@ -920,6 +919,8 @@ func (r *testRunner) runTest( // produced by t.Fatal*(). if r := recover(); r != nil && r != errTestFatal { // NB: we're careful to avoid t.Fatalf here, which re-panics. + // Note that the error will be logged to a file, and the stack will + // contain the source of the panic. t.Errorf("test panicked: %v", r) } }() diff --git a/pkg/cmd/roachtest/test_test.go b/pkg/cmd/roachtest/test_test.go index 0ed208df8c98..b61659325479 100644 --- a/pkg/cmd/roachtest/test_test.go +++ b/pkg/cmd/roachtest/test_test.go @@ -15,6 +15,7 @@ import ( "context" "io" "math/rand" + "path/filepath" "regexp" "sort" "strings" @@ -184,6 +185,7 @@ func TestRunnerRun(t *testing.T) { if exp := c.expOut; exp != "" && !strings.Contains(out, exp) { t.Fatalf("'%s' not found in output:\n%s", exp, out) } + t.Log(out) }) } } @@ -253,7 +255,13 @@ func setupRunnerTest(t *testing.T, r testRegistryImpl, testFilters []string) *ru var stdout syncedBuffer var stderr syncedBuffer lopt := loggingOpt{ - l: nilLogger(), + l: func() *logger.Logger { + l, err := logger.RootLogger(filepath.Join(t.TempDir(), "test.log"), logger.NoTee) + if err != nil { + panic(err) + } + return l + }(), tee: logger.NoTee, stdout: &stdout, stderr: &stderr, diff --git a/pkg/testutils/lint/passes/fmtsafe/fmtsafe.go b/pkg/testutils/lint/passes/fmtsafe/fmtsafe.go index f55046ac48ee..f7f4029f3695 100644 --- a/pkg/testutils/lint/passes/fmtsafe/fmtsafe.go +++ b/pkg/testutils/lint/passes/fmtsafe/fmtsafe.go @@ -251,7 +251,8 @@ func checkCallExpr(pass *analysis.Pass, enclosingFnName string, call *ast.CallEx // Tip is exported for use in tests. var Tip = ` -Tip: use YourFuncf("descriptive prefix %%s", ...) or list new formatting wrappers in pkg/testutils/lint/passes/fmtsafe/functions.go.` +Tip: use YourFuncf("descriptive prefix %%s", ...) or list new formatting wrappers in pkg/testutils/lint/passes/fmtsafe/functions.go. +N.B. additional entry is required functions in the main package. See functions.go#requireConstFmt` func hasNoLintComment(pass *analysis.Pass, call *ast.CallExpr, idx int) bool { fPos, f := findContainingFile(pass, call) diff --git a/pkg/testutils/lint/passes/fmtsafe/functions.go b/pkg/testutils/lint/passes/fmtsafe/functions.go index 8b3fbb104f59..5bd4fe3ff1b0 100644 --- a/pkg/testutils/lint/passes/fmtsafe/functions.go +++ b/pkg/testutils/lint/passes/fmtsafe/functions.go @@ -32,8 +32,14 @@ var requireConstMsg = map[string]bool{ "(*github.com/cockroachdb/cockroach/pkg/sql.optPlanningCtx).log": true, } -// requireConstFmt records functions for which the string arg -// before the final ellipsis must be a constant string. +/* +requireConstFmt records functions for which the string arg +before the final ellipsis must be a constant string. + +Definitions surrounded in parentheses are functions attached to a struct. +For functions defined in the main package, a *second* entry is required +in the form (main.yourStruct).yourFuncF +*/ var requireConstFmt = map[string]bool{ // Logging things. "log.Printf": true, @@ -85,6 +91,14 @@ var requireConstFmt = map[string]bool{ "(*github.com/cockroachdb/cockroach/pkg/util/grpcutil.grpcLogger).Errorf": true, "(*github.com/cockroachdb/cockroach/pkg/util/grpcutil.grpcLogger).Fatalf": true, + // Both of these signatures need to be included for the linter to not flag + // roachtest testImpl.Errorf since it is in the main package + "(*main.testImpl).Errorf": true, + "(*github.com/cockroachdb/cockroach/pkg/cmd/roachtest.testImpl).Errorf": true, + + "(*main.testImpl).Fatalf": true, + "(*github.com/cockroachdb/cockroach/pkg/cmd/roachtest.testImpl).Fatalf": true, + "(*github.com/cockroachdb/cockroach/pkg/kv/kvserver.raftLogger).Debugf": true, "(*github.com/cockroachdb/cockroach/pkg/kv/kvserver.raftLogger).Infof": true, "(*github.com/cockroachdb/cockroach/pkg/kv/kvserver.raftLogger).Warningf": true,