ci: Retry and detect flaky tests (#15798)

oven-sh · Dec 17, 2024 · e8b85cf · e8b85cf
1 parent d5f1f2f
commit e8b85cf
Show file tree

Hide file tree

Showing 9 changed files with 349 additions and 323 deletions.
diff --git a/scripts/runner.node.mjs b/scripts/runner.node.mjs
@@ -102,6 +102,10 @@ const { values: options, positionals: filters } = parseArgs({
       type: "string",
       default: undefined,
     },
+    ["retries"]: {
+      type: "string",
+      default: isCI ? "4" : "0", // N retries = N+1 attempts
+    },
   },
 });
 
@@ -141,51 +145,91 @@ async function runTests() {
 
   let i = 0;
   let total = vendorTotal + tests.length + 2;
-  const results = [];
+
+  const okResults = [];
+  const flakyResults = [];
+  const failedResults = [];
+  const maxAttempts = 1 + (parseInt(options["retries"]) || 0);
 
   /**
    * @param {string} title
    * @param {function} fn
    * @returns {Promise<TestResult>}
    */
   const runTest = async (title, fn) => {
-    const label = `${getAnsi("gray")}[${++i}/${total}]${getAnsi("reset")} ${title}`;
-    const result = await startGroup(label, fn);
-    results.push(result);
+    const index = ++i;
+
+    let result, failure, flaky;
+    for (let attempt = 1; attempt <= maxAttempts; attempt++) {
+      if (attempt > 1) {
+        await new Promise(resolve => setTimeout(resolve, 5000 + Math.random() * 10_000));
+      }
+
+      result = await startGroup(
+        attempt === 1
+          ? `${getAnsi("gray")}[${index}/${total}]${getAnsi("reset")} ${title}`
+          : `${getAnsi("gray")}[${index}/${total}]${getAnsi("reset")} ${title} ${getAnsi("gray")}[attempt #${attempt}]${getAnsi("reset")}`,
+        fn,
+      );
+
+      const { ok, stdoutPreview, error } = result;
+      if (ok) {
+        if (failure) {
+          flakyResults.push(failure);
+        } else {
+          okResults.push(result);
+        }
+        break;
+      }
+
+      const color = attempt >= maxAttempts ? "red" : "yellow";
+      const label = `${getAnsi(color)}[${index}/${total}] ${title} - ${error}${getAnsi("reset")}`;
+      startGroup(label, () => {
+        process.stderr.write(stdoutPreview);
+      });
+
+      failure ||= result;
+      flaky ||= true;
+
+      if (attempt >= maxAttempts) {
+        flaky = false;
+        failedResults.push(failure);
+      }
+    }
+
+    if (!failure) {
+      return result;
+    }
 
     if (isBuildkite) {
-      const { ok, error, stdoutPreview } = result;
+      // Group flaky tests together, regardless of the title
+      const context = flaky ? "flaky" : title;
+      const style = flaky || title.startsWith("vendor") ? "warning" : "error";
+
       if (title.startsWith("vendor")) {
-        const markdown = formatTestToMarkdown({ ...result, testPath: title });
-        if (markdown) {
-          reportAnnotationToBuildKite({ label: title, content: markdown, style: "warning", priority: 5 });
+        const content = formatTestToMarkdown({ ...failure, testPath: title });
+        if (content) {
+          reportAnnotationToBuildKite({ context, label: title, content, style });
         }
       } else {
-        const markdown = formatTestToMarkdown(result);
-        if (markdown) {
-          reportAnnotationToBuildKite({ label: title, content: markdown, style: "error" });
+        const content = formatTestToMarkdown(failure);
+        if (content) {
+          reportAnnotationToBuildKite({ context, label: title, content, style });
         }
       }
-
-      if (!ok) {
-        const label = `${getAnsi("red")}[${i}/${total}] ${title} - ${error}${getAnsi("reset")}`;
-        startGroup(label, () => {
-          process.stderr.write(stdoutPreview);
-        });
-      }
     }
 
     if (isGithubAction) {
       const summaryPath = process.env["GITHUB_STEP_SUMMARY"];
       if (summaryPath) {
-        const longMarkdown = formatTestToMarkdown(result);
+        const longMarkdown = formatTestToMarkdown(failure);
         appendFileSync(summaryPath, longMarkdown);
       }
-      const shortMarkdown = formatTestToMarkdown(result, true);
+      const shortMarkdown = formatTestToMarkdown(failure, true);
       appendFileSync("comment.md", shortMarkdown);
     }
 
-    if (options["bail"] && !result.ok) {
+    if (options["bail"]) {
       process.exit(getExitCode("fail"));
     }
 
@@ -199,7 +243,7 @@ async function runTests() {
     }
   }
 
-  if (results.every(({ ok }) => ok)) {
+  if (!failedResults.length) {
     for (const testPath of tests) {
       const title = relative(cwd, join(testsPath, testPath)).replace(/\\/g, "/");
       if (title.startsWith("test/js/node/test/parallel/")) {
@@ -270,21 +314,37 @@ async function runTests() {
     }
   }
 
-  const failedTests = results.filter(({ ok }) => !ok);
   if (isGithubAction) {
-    reportOutputToGitHubAction("failing_tests_count", failedTests.length);
-    const markdown = formatTestToMarkdown(failedTests);
+    reportOutputToGitHubAction("failing_tests_count", failedResults.length);
+    const markdown = formatTestToMarkdown(failedResults);
     reportOutputToGitHubAction("failing_tests", markdown);
   }
 
-  if (!isCI) {
-    !isQuiet && console.log("-------");
-    !isQuiet && console.log("passing", results.length - failedTests.length, "/", results.length);
-    for (const { testPath } of failedTests) {
-      !isQuiet && console.log("-", testPath);
+  if (!isCI && !isQuiet) {
+    console.table({
+      "Total Tests": okResults.length + failedResults.length + flakyResults.length,
+      "Passed Tests": okResults.length,
+      "Failing Tests": failedResults.length,
+      "Flaky Tests": flakyResults.length,
+    });
+
+    if (failedResults.length) {
+      console.log(`${getAnsi("red")}Failing Tests:${getAnsi("reset")}`);
+      for (const { testPath } of failedResults) {
+        console.log(`${getAnsi("red")}- ${testPath}${getAnsi("reset")}`);
+      }
+    }
+
+    if (flakyResults.length) {
+      console.log(`${getAnsi("yellow")}Flaky Tests:${getAnsi("reset")}`);
+      for (const { testPath } of flakyResults) {
+        console.log(`${getAnsi("yellow")}- ${testPath}${getAnsi("reset")}`);
+      }
     }
   }
-  return results;
+
+  // Exclude flaky tests from the final results
+  return [...okResults, ...failedResults];
 }
 
 /**
@@ -1293,6 +1353,7 @@ function listArtifactsFromBuildKite(glob, step) {
 
 /**
  * @typedef {object} BuildkiteAnnotation
+ * @property {string} [context]
  * @property {string} label
  * @property {string} content
  * @property {"error" | "warning" | "info"} [style]
@@ -1303,10 +1364,10 @@ function listArtifactsFromBuildKite(glob, step) {
 /**
  * @param {BuildkiteAnnotation} annotation
  */
-function reportAnnotationToBuildKite({ label, content, style = "error", priority = 3, attempt = 0 }) {
+function reportAnnotationToBuildKite({ context, label, content, style = "error", priority = 3, attempt = 0 }) {
   const { error, status, signal, stderr } = spawnSync(
     "buildkite-agent",
-    ["annotate", "--append", "--style", `${style}`, "--context", `${label}`, "--priority", `${priority}`],
+    ["annotate", "--append", "--style", `${style}`, "--context", `${context || label}`, "--priority", `${priority}`],
     {
       input: content,
       stdio: ["pipe", "ignore", "pipe"],

diff --git a/test/bundler/bundler_edgecase.test.ts b/test/bundler/bundler_edgecase.test.ts
@@ -1336,7 +1336,7 @@ describe("bundler", () => {
     target: "bun",
     run: true,
     todo: isBroken && isWindows,
-    debugTimeoutScale: 5,
+    timeoutScale: 5,
   });
   itBundled("edgecase/PackageExternalDoNotBundleNodeModules", {
     files: {

diff --git a/test/bundler/bundler_plugin.test.ts b/test/bundler/bundler_plugin.test.ts
@@ -689,7 +689,7 @@ describe("bundler", () => {
         expect(resolveCount).toBe(5050);
         expect(loadCount).toBe(101);
       },
-      debugTimeoutScale: 3,
+      timeoutScale: 3,
     };
   });
   // itBundled("plugin/ManyPlugins", ({ root }) => {
@@ -832,4 +832,3 @@ describe("bundler", () => {
     };
   });
 });
-
diff --git a/test/bundler/expectBundled.ts b/test/bundler/expectBundled.ts
@@ -6,7 +6,7 @@ import { callerSourceOrigin } from "bun:jsc";
 import type { Matchers } from "bun:test";
 import * as esbuild from "esbuild";
 import { existsSync, mkdirSync, mkdtempSync, readdirSync, readFileSync, realpathSync, rmSync, writeFileSync } from "fs";
-import { bunEnv, bunExe, isDebug } from "harness";
+import { bunEnv, bunExe, isCI, isDebug } from "harness";
 import { tmpdir } from "os";
 import path from "path";
 import { SourceMapConsumer } from "source-map";
@@ -278,8 +278,6 @@ export interface BundlerTestInput {
 
   /** Multiplier for test timeout */
   timeoutScale?: number;
-  /** Multiplier for test timeout when using bun-debug. Debug builds already have a higher timeout. */
-  debugTimeoutScale?: number;
 
   /* determines whether or not anything should be passed to outfile, outdir, etc. */
   generateOutput?: boolean;
@@ -1663,8 +1661,7 @@ export function itBundled(
       id,
       () => expectBundled(id, opts as any),
       // sourcemap code is slow
-      (opts.snapshotSourceMap ? (isDebug ? Infinity : 30_000) : isDebug ? 15_000 : 5_000) *
-        ((isDebug ? opts.debugTimeoutScale : opts.timeoutScale) ?? 1),
+      isCI ? undefined : isDebug ? Infinity : (opts.snapshotSourceMap ? 30_000 : 5_000) * (opts.timeoutScale ?? 1),
     );
   }
   return ref;
@@ -1676,8 +1673,7 @@ itBundled.only = (id: string, opts: BundlerTestInput) => {
     id,
     () => expectBundled(id, opts as any),
     // sourcemap code is slow
-    (opts.snapshotSourceMap ? (isDebug ? Infinity : 30_000) : isDebug ? 15_000 : 5_000) *
-      ((isDebug ? opts.debugTimeoutScale : opts.timeoutScale) ?? 1),
+    isCI ? undefined : isDebug ? Infinity : (opts.snapshotSourceMap ? 30_000 : 5_000) * (opts.timeoutScale ?? 1),
   );
 };
 

diff --git a/test/cli/run/require-cache.test.ts b/test/cli/run/require-cache.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, test } from "bun:test";
-import { bunEnv, bunExe, isWindows, tempDirWithFiles } from "harness";
+import { bunEnv, bunExe, isBroken, isIntelMacOS, isWindows, tempDirWithFiles } from "harness";
 import { join } from "path";
 
 test("require.cache is not an empty object literal when inspected", () => {
@@ -32,7 +32,7 @@ test("require.cache does not include unevaluated modules", () => {
   expect(exitCode).toBe(0);
 });
 
-describe("files transpiled and loaded don't leak the output source code", () => {
+describe.skipIf(isBroken && isIntelMacOS)("files transpiled and loaded don't leak the output source code", () => {
   test("via require() with a lot of long export names", () => {
     let text = "";
     for (let i = 0; i < 10000; i++) {

diff --git a/test/harness.ts b/test/harness.ts
@@ -18,8 +18,10 @@ export const isWindows = process.platform === "win32";
 export const isIntelMacOS = isMacOS && process.arch === "x64";
 export const isDebug = Bun.version.includes("debug");
 export const isCI = process.env.CI !== undefined;
-export const isBuildKite = process.env.BUILDKITE === "true";
 export const libcFamily = detectLibc.familySync() as "glibc" | "musl";
+export const isMusl = isLinux && libcFamily === "musl";
+export const isGlibc = isLinux && libcFamily === "glibc";
+export const isBuildKite = process.env.BUILDKITE === "true";
 export const isVerbose = process.env.DEBUG === "1";
 
 // Use these to mark a test as flaky or broken.