roachprod-microbench: improvements to reporting

This change introduces a few small improvements to the reporting capabilities in roachprod-microbench. Missing microbenchmarks are now classified by timeouts, errors, or skipped status. These will be outputted to the log for inspection afterward. Also adds a quiet mode for reducing unnecessary output on CI builds.
srosenberg · May 10, 2023 · bed253e · bed253e
1 parent a961c9d
commit bed253e
Show file tree

Hide file tree

Showing 7 changed files with 106 additions and 43 deletions.
diff --git a/pkg/cmd/roachprod-microbench/BUILD.bazel b/pkg/cmd/roachprod-microbench/BUILD.bazel
@@ -17,6 +17,7 @@ go_library(
         "//pkg/cmd/roachprod-microbench/cluster",
         "//pkg/cmd/roachprod-microbench/google",
         "//pkg/roachprod",
+        "//pkg/roachprod/config",
         "//pkg/roachprod/logger",
         "//pkg/roachprod/ssh",
         "//pkg/roachprod/vm",

diff --git a/pkg/cmd/roachprod-microbench/benchmark.go b/pkg/cmd/roachprod-microbench/benchmark.go
@@ -37,6 +37,12 @@ type benchmarkIndexed struct {
 	index int
 }
 
+type benchmarkExtractionResult struct {
+	results [][]string
+	errors  bool
+	skipped bool
+}
+
 // prepareCluster prepares the cluster for executing microbenchmarks. It copies
 // the binaries to the remote cluster and extracts it. It also copies the lib
 // directory to the remote cluster. The number of nodes in the cluster is
@@ -130,7 +136,9 @@ func listBenchmarks(
 	errorCount := 0
 	benchmarkCounts := make(map[benchmark]int)
 	cluster.ExecuteRemoteCommands(log, *flagCluster, commands, numNodes, true, func(response cluster.RemoteResponse) {
-		fmt.Print(".")
+		if !*flagQuiet {
+			fmt.Print(".")
+		}
 		if response.Err == nil {
 			pkg := response.Metadata.(string)
 		outer:
@@ -140,7 +148,9 @@ func listBenchmarks(
 					continue
 				}
 				if !isValidBenchmarkName(benchmarkName) {
-					fmt.Println()
+					if !*flagQuiet {
+						fmt.Println()
+					}
 					l.Printf("Ignoring invalid benchmark name: %s", benchmarkName)
 					continue
 				}
@@ -154,13 +164,17 @@ func listBenchmarks(
 				benchmarkCounts[benchmarkEntry]++
 			}
 		} else {
-			fmt.Println()
+			if !*flagQuiet {
+				fmt.Println()
+			}
 			l.Errorf("Remote command = {%s}, error = {%v}, stderr output = {%s}",
 				strings.Join(response.Args, " "), response.Err, response.Stderr)
 			errorCount++
 		}
 	})
-	fmt.Println()
+	if !*flagQuiet {
+		fmt.Println()
+	}
 
 	if errorCount > 0 {
 		return nil, nil, errors.New("Failed to list benchmarks")
@@ -241,7 +255,7 @@ func executeBenchmarks(binaries, packages []string) error {
 	// Generate commands for running benchmarks.
 	commands := make([][]cluster.RemoteCommand, 0)
 	for _, bench := range benchmarks {
-		runCommand := fmt.Sprintf("./run.sh %s -test.benchmem -test.bench=^%s$ -test.run=^$",
+		runCommand := fmt.Sprintf("./run.sh %s -test.benchmem -test.bench=^%s$ -test.run=^$ -test.v",
 			strings.Join(testArgs, " "), bench.name)
 		if *flagTimeout != "" {
 			runCommand = fmt.Sprintf("timeout %s %s", *flagTimeout, runCommand)
@@ -277,22 +291,32 @@ func executeBenchmarks(binaries, packages []string) error {
 	errorCount := 0
 	logIndex := 0
 	missingBenchmarks := make(map[benchmark]int, 0)
+	failedBenchmarks := make(map[benchmark]int, 0)
+	skippedBenchmarks := make(map[benchmark]int, 0)
 	l.Printf("Found %d benchmarks, distributing and running benchmarks for %d iteration(s) across cluster %s",
 		len(benchmarks), *flagIterations, *flagCluster)
 	cluster.ExecuteRemoteCommands(muteLogger, *flagCluster, commands, numNodes, !*flagLenient, func(response cluster.RemoteResponse) {
-		fmt.Print(".")
-		benchmarkResults, containsErrors := extractBenchmarkResults(response.Stdout)
+		if !*flagQuiet {
+			fmt.Print(".")
+		}
+		extractResults := extractBenchmarkResults(response.Stdout)
 		benchmarkResponse := response.Metadata.(benchmarkIndexed)
 		report := reporters[benchmarkResponse.index]
-		for _, benchmarkResult := range benchmarkResults {
+		for _, benchmarkResult := range extractResults.results {
 			if _, writeErr := report.benchmarkOutput[benchmarkResponse.pkg].WriteString(
 				fmt.Sprintf("%s\n", strings.Join(benchmarkResult, " "))); writeErr != nil {
 				l.Errorf("Failed to write benchmark result to file - %v", writeErr)
 			}
 		}
-		if containsErrors || response.Err != nil {
-			fmt.Println()
-			err = report.writeBenchmarkErrorLogs(response, logIndex)
+		if extractResults.errors || response.Err != nil {
+			if !*flagQuiet {
+				fmt.Println()
+			}
+			tag := fmt.Sprintf("%d", logIndex)
+			if response.ExitStatus == 124 {
+				tag = fmt.Sprintf("%d-timeout", logIndex)
+			}
+			err = report.writeBenchmarkErrorLogs(response, tag)
 			if err != nil {
 				l.Errorf("Failed to write error logs - %v", err)
 			}
@@ -304,17 +328,34 @@ func executeBenchmarks(binaries, packages []string) error {
 				response.Duration.Milliseconds())); writeErr != nil {
 			l.Errorf("Failed to write analytics to file - %v", writeErr)
 		}
-		if len(benchmarkResults) == 0 {
-			missingBenchmarks[benchmarkResponse.benchmark]++
+
+		// If we didn't find any results, increment the appropriate counter.
+		if len(extractResults.results) == 0 {
+			switch {
+			case extractResults.errors:
+				failedBenchmarks[benchmarkResponse.benchmark]++
+			case extractResults.skipped:
+				skippedBenchmarks[benchmarkResponse.benchmark]++
+			default:
+				missingBenchmarks[benchmarkResponse.benchmark]++
+			}
 		}
 	})
 
-	fmt.Println()
-	l.Printf("Completed benchmarks, results located at %s for time %s", workingDir, timestamp.Format(timeFormat))
-	if len(missingBenchmarks) > 0 {
-		l.Errorf("Failed to find results for %d benchmarks", len(missingBenchmarks))
-		l.Errorf("Missing benchmarks %v", missingBenchmarks)
+	if !*flagQuiet {
+		fmt.Println()
+	}
+	for res, count := range failedBenchmarks {
+		l.Errorf("Failed benchmark: %s/%s in %d iterations", res.pkg, res.name, count)
+	}
+	for res, count := range skippedBenchmarks {
+		l.Errorf("Skipped benchmark: %s/%s in %d iterations", res.pkg, res.name, count)
 	}
+	for res, count := range missingBenchmarks {
+		l.Errorf("Missing benchmark: %s/%s in %d iterations", res.pkg, res.name, count)
+	}
+
+	l.Printf("Completed benchmarks, results located at %s for time %s", workingDir, timestamp.Format(timeFormat))
 	if errorCount != 0 {
 		if *flagLenient {
 			l.Printf("Ignoring errors in benchmark results (lenient flag was set)")
@@ -350,12 +391,12 @@ func getRegexExclusionPairs() [][]*regexp.Regexp {
 }
 
 // extractBenchmarkResults extracts the microbenchmark results generated by a
-// test binary and reports if any failures were found in the output. This method
-// makes specific assumptions regarding the format of the output, and attempts
-// to ignore any spurious output that the test binary may have logged. The
-// returned list of string arrays each represent a row of metrics as outputted
-// by the test binary.
-func extractBenchmarkResults(benchmarkOutput string) ([][]string, bool) {
+// test binary and reports if any failures or skips were found in the output.
+// This method makes specific assumptions regarding the format of the output,
+// and attempts to ignore any spurious output that the test binary may have
+// logged. The returned list of string arrays each represent a row of metrics as
+// outputted by the test binary.
+func extractBenchmarkResults(benchmarkOutput string) benchmarkExtractionResult {
 	keywords := map[string]struct{}{
 		"ns/op":     {},
 		"b/op":      {},
@@ -364,12 +405,16 @@ func extractBenchmarkResults(benchmarkOutput string) ([][]string, bool) {
 	results := make([][]string, 0)
 	buf := make([]string, 0)
 	containsErrors := false
+	skipped := false
 	var benchName string
 	for _, line := range strings.Split(benchmarkOutput, "\n") {
 		elems := strings.Fields(line)
 		for _, s := range elems {
 			if !containsErrors {
-				containsErrors = strings.Contains(s, "FAIL") || strings.Contains(s, "panic")
+				containsErrors = strings.Contains(s, "FAIL") || strings.Contains(s, "panic:")
+			}
+			if !skipped {
+				skipped = strings.Contains(s, "SKIP")
 			}
 			if strings.HasPrefix(s, "Benchmark") && len(s) > 9 {
 				benchName = s
@@ -390,5 +435,5 @@ func extractBenchmarkResults(benchmarkOutput string) ([][]string, bool) {
 			}
 		}
 	}
-	return results, containsErrors
+	return benchmarkExtractionResult{results, containsErrors, skipped}
 }
diff --git a/pkg/cmd/roachprod-microbench/benchmark_test.go b/pkg/cmd/roachprod-microbench/benchmark_test.go
@@ -25,8 +25,8 @@ func TestExtractBenchmarkResultsDataDriven(t *testing.T) {
 		if d.Cmd != "benchmark" {
 			d.Fatalf(t, "unknown command %s", d.Cmd)
 		}
-		result, fail := extractBenchmarkResults(d.Input)
-		output := fmt.Sprintf("%v %v", fail, result)
+		result := extractBenchmarkResults(d.Input)
+		output := fmt.Sprintf("%v", result)
 		return output
 	})
 }
diff --git a/pkg/cmd/roachprod-microbench/cluster/execute.go b/pkg/cmd/roachprod-microbench/cluster/execute.go
@@ -33,10 +33,11 @@ type RemoteCommand struct {
 // A Duration of -1 indicates that the command was cancelled.
 type RemoteResponse struct {
 	RemoteCommand
-	Stdout   string
-	Stderr   string
-	Err      error
-	Duration time.Duration
+	Stdout     string
+	Stderr     string
+	Err        error
+	ExitStatus int
+	Duration   time.Duration
 }
 
 func remoteWorker(
@@ -54,7 +55,7 @@ func remoteWorker(
 		for index, command := range commands {
 			if errors.Is(ctx.Err(), context.Canceled) {
 				for _, cancelCommand := range commands[index:] {
-					response <- RemoteResponse{cancelCommand, "", "", nil, -1}
+					response <- RemoteResponse{cancelCommand, "", "", nil, -1, -1}
 				}
 				break
 			}
@@ -63,12 +64,14 @@ func remoteWorker(
 				"", "", false, command.Args)
 			duration := timeutil.Since(start)
 			var stdout, stderr string
+			var exitStatus int
 			if len(runResult) > 0 {
 				stdout = runResult[0].Stdout
 				stderr = runResult[0].Stderr
+				exitStatus = runResult[0].RemoteExitStatus
 				err = errors.CombineErrors(err, runResult[0].Err)
 			}
-			response <- RemoteResponse{command, stdout, stderr, err, duration}
+			response <- RemoteResponse{command, stdout, stderr, err, exitStatus, duration}
 		}
 	}
 }

diff --git a/pkg/cmd/roachprod-microbench/main.go b/pkg/cmd/roachprod-microbench/main.go
@@ -21,6 +21,7 @@ import (
 	"time"
 
 	"github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/google"
+	"github.com/cockroachdb/cockroach/pkg/roachprod/config"
 	"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
 	"github.com/cockroachdb/cockroach/pkg/roachprod/ssh"
 	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
@@ -46,6 +47,7 @@ var (
 	flagCopy            = flags.Bool("copy", true, "copy and extract test binaries and libraries to the target cluster")
 	flagLenient         = flags.Bool("lenient", true, "tolerate errors in the benchmark results")
 	flagAffinity        = flags.Bool("affinity", true, "run benchmarks with iterations and binaries having affinity to the same node")
+	flagQuiet           = flags.Bool("quiet", false, "suppress progress output")
 	flagIterations      = flags.Int("iterations", 1, "number of iterations to run each benchmark")
 	workingDir          string
 	testArgs            []string
@@ -118,6 +120,7 @@ func setupVars() error {
 		}
 	}
 
+	config.Quiet = *flagQuiet
 	timestamp = timeutil.Now()
 	initLogger(filepath.Join(workingDir, fmt.Sprintf("roachprod-microbench-%s.log", timestamp.Format(timeFormat))))
 	l.Printf("roachprod-microbench %s", strings.Join(os.Args, " "))
@@ -130,7 +133,6 @@ func run() error {
 	if err != nil {
 		return err
 	}
-
 	var packages []string
 	if *flagCluster != "" {
 		binaries := []string{*flagBinaries}

diff --git a/pkg/cmd/roachprod-microbench/report.go b/pkg/cmd/roachprod-microbench/report.go
@@ -90,10 +90,10 @@ func (report *Report) closeReports() {
 	}
 }
 
-func (report *Report) writeBenchmarkErrorLogs(response cluster.RemoteResponse, index int) error {
+func (report *Report) writeBenchmarkErrorLogs(response cluster.RemoteResponse, tag string) error {
 	benchmarkResponse := response.Metadata.(benchmarkIndexed)
-	stdoutLogName := fmt.Sprintf("%s-%d-stdout.log", benchmarkResponse.name, index)
-	stderrLogName := fmt.Sprintf("%s-%d-stderr.log", benchmarkResponse.name, index)
+	stdoutLogName := fmt.Sprintf("%s-%s-stdout.log", benchmarkResponse.name, tag)
+	stderrLogName := fmt.Sprintf("%s-%s-stderr.log", benchmarkResponse.name, tag)
 	l.Printf("Writing error logs for benchmark at %s, %s\n", stdoutLogName, stderrLogName)
 
 	if err := os.WriteFile(filepath.Join(report.path, stdoutLogName), []byte(response.Stdout), 0644); err != nil {

diff --git a/pkg/cmd/roachprod-microbench/testdata/benchmark b/pkg/cmd/roachprod-microbench/testdata/benchmark
@@ -8,7 +8,7 @@ BenchmarkFastIntMap/4x4-4/map-sized-24  	       1	       977.0 ns/op	       0 B/
 BenchmarkFastIntMap/4x4-4/slice-24      	       1	      1406 ns/op	      64 B/op	       3 allocs/op
 PASS
 ----
-false [[BenchmarkFastIntMap/4x4-4/fastintmap-24 1 603.0 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-24 1 1039 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-sized-24 1 977.0 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]]
+{[[BenchmarkFastIntMap/4x4-4/fastintmap-24 1 603.0 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-24 1 1039 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-sized-24 1 977.0 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]] false false}
 
 # Scattered benchmark output
 benchmark
@@ -28,7 +28,7 @@ extra log noise
 1	      1406 ns/op	      64 B/op	       3 allocs/op
 PASS
 ----
-false [[BenchmarkFastIntMap/4x4-4/fastintmap-24 1 603.0 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-24 1 1039 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-sized-24 1 977.0 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]]
+{[[BenchmarkFastIntMap/4x4-4/fastintmap-24 1 603.0 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-24 1 1039 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-sized-24 1 977.0 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]] false false}
 
 # Missing benchmark name, and missing benchmark metrics (incorrect column count)
 benchmark
@@ -40,7 +40,7 @@ BenchmarkFastIntMap/4x4-4/map-sized-24  	       1	       977.0 ns/op
 BenchmarkFastIntMap/4x4-4/slice-24      	       1	      1406 ns/op	      64 B/op	       3 allocs/op
 PASS
 ----
-false [[BenchmarkFastIntMap/4x4-4/map-24 1 1039 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-sized-24 1 977.0 ns/op] [BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]]
+{[[BenchmarkFastIntMap/4x4-4/map-24 1 1039 ns/op 0 B/op 0 allocs/op] [BenchmarkFastIntMap/4x4-4/map-sized-24 1 977.0 ns/op] [BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]] false false}
 
 # Failed benchmark
 benchmark
@@ -49,7 +49,7 @@ cpu: Intel(R) Xeon(R) CPU @ 2.80GHz
 BenchmarkFastIntMap/4x4-4/slice-24      	       1	      1406 ns/op	      64 B/op	       3 allocs/op
 FAIL
 ----
-true [[BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]]
+{[[BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]] true false}
 
 # Benchmark with panic
 benchmark
@@ -58,4 +58,16 @@ cpu: Intel(R) Xeon(R) CPU @ 2.80GHz
 BenchmarkFastIntMap/4x4-4/slice-24      	       1	      1406 ns/op	      64 B/op	       3 allocs/op
 panic: something went wrong
 ----
-true [[BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]]
+{[[BenchmarkFastIntMap/4x4-4/slice-24 1 1406 ns/op 64 B/op 3 allocs/op]] true false}
+
+# Benchmark with skipped benchmarks
+benchmark
+goos: linux
+goarch: amd64
+cpu: Intel(R) Xeon(R) CPU @ 2.80GHz
+BenchmarkIntersectsLargePolygonsAndPoints
+    binary_predicates_bench_test.go:51: [disabled under -short]
+--- SKIP: BenchmarkIntersectsLargePolygonsAndPoints
+PASS
+----
+{[] false true}