Skip to content

Commit

Permalink
ci: Retry and detect flaky tests (#15798)
Browse files Browse the repository at this point in the history
  • Loading branch information
Electroid authored Dec 17, 2024
1 parent d5f1f2f commit e8b85cf
Show file tree
Hide file tree
Showing 9 changed files with 349 additions and 323 deletions.
127 changes: 94 additions & 33 deletions scripts/runner.node.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ const { values: options, positionals: filters } = parseArgs({
type: "string",
default: undefined,
},
["retries"]: {
type: "string",
default: isCI ? "4" : "0", // N retries = N+1 attempts
},
},
});

Expand Down Expand Up @@ -141,51 +145,91 @@ async function runTests() {

let i = 0;
let total = vendorTotal + tests.length + 2;
const results = [];

const okResults = [];
const flakyResults = [];
const failedResults = [];
const maxAttempts = 1 + (parseInt(options["retries"]) || 0);

/**
* @param {string} title
* @param {function} fn
* @returns {Promise<TestResult>}
*/
const runTest = async (title, fn) => {
const label = `${getAnsi("gray")}[${++i}/${total}]${getAnsi("reset")} ${title}`;
const result = await startGroup(label, fn);
results.push(result);
const index = ++i;

let result, failure, flaky;
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
if (attempt > 1) {
await new Promise(resolve => setTimeout(resolve, 5000 + Math.random() * 10_000));
}

result = await startGroup(
attempt === 1
? `${getAnsi("gray")}[${index}/${total}]${getAnsi("reset")} ${title}`
: `${getAnsi("gray")}[${index}/${total}]${getAnsi("reset")} ${title} ${getAnsi("gray")}[attempt #${attempt}]${getAnsi("reset")}`,
fn,
);

const { ok, stdoutPreview, error } = result;
if (ok) {
if (failure) {
flakyResults.push(failure);
} else {
okResults.push(result);
}
break;
}

const color = attempt >= maxAttempts ? "red" : "yellow";
const label = `${getAnsi(color)}[${index}/${total}] ${title} - ${error}${getAnsi("reset")}`;
startGroup(label, () => {
process.stderr.write(stdoutPreview);
});

failure ||= result;
flaky ||= true;

if (attempt >= maxAttempts) {
flaky = false;
failedResults.push(failure);
}
}

if (!failure) {
return result;
}

if (isBuildkite) {
const { ok, error, stdoutPreview } = result;
// Group flaky tests together, regardless of the title
const context = flaky ? "flaky" : title;
const style = flaky || title.startsWith("vendor") ? "warning" : "error";

if (title.startsWith("vendor")) {
const markdown = formatTestToMarkdown({ ...result, testPath: title });
if (markdown) {
reportAnnotationToBuildKite({ label: title, content: markdown, style: "warning", priority: 5 });
const content = formatTestToMarkdown({ ...failure, testPath: title });
if (content) {
reportAnnotationToBuildKite({ context, label: title, content, style });
}
} else {
const markdown = formatTestToMarkdown(result);
if (markdown) {
reportAnnotationToBuildKite({ label: title, content: markdown, style: "error" });
const content = formatTestToMarkdown(failure);
if (content) {
reportAnnotationToBuildKite({ context, label: title, content, style });
}
}

if (!ok) {
const label = `${getAnsi("red")}[${i}/${total}] ${title} - ${error}${getAnsi("reset")}`;
startGroup(label, () => {
process.stderr.write(stdoutPreview);
});
}
}

if (isGithubAction) {
const summaryPath = process.env["GITHUB_STEP_SUMMARY"];
if (summaryPath) {
const longMarkdown = formatTestToMarkdown(result);
const longMarkdown = formatTestToMarkdown(failure);
appendFileSync(summaryPath, longMarkdown);
}
const shortMarkdown = formatTestToMarkdown(result, true);
const shortMarkdown = formatTestToMarkdown(failure, true);
appendFileSync("comment.md", shortMarkdown);
}

if (options["bail"] && !result.ok) {
if (options["bail"]) {
process.exit(getExitCode("fail"));
}

Expand All @@ -199,7 +243,7 @@ async function runTests() {
}
}

if (results.every(({ ok }) => ok)) {
if (!failedResults.length) {
for (const testPath of tests) {
const title = relative(cwd, join(testsPath, testPath)).replace(/\\/g, "/");
if (title.startsWith("test/js/node/test/parallel/")) {
Expand Down Expand Up @@ -270,21 +314,37 @@ async function runTests() {
}
}

const failedTests = results.filter(({ ok }) => !ok);
if (isGithubAction) {
reportOutputToGitHubAction("failing_tests_count", failedTests.length);
const markdown = formatTestToMarkdown(failedTests);
reportOutputToGitHubAction("failing_tests_count", failedResults.length);
const markdown = formatTestToMarkdown(failedResults);
reportOutputToGitHubAction("failing_tests", markdown);
}

if (!isCI) {
!isQuiet && console.log("-------");
!isQuiet && console.log("passing", results.length - failedTests.length, "/", results.length);
for (const { testPath } of failedTests) {
!isQuiet && console.log("-", testPath);
if (!isCI && !isQuiet) {
console.table({
"Total Tests": okResults.length + failedResults.length + flakyResults.length,
"Passed Tests": okResults.length,
"Failing Tests": failedResults.length,
"Flaky Tests": flakyResults.length,
});

if (failedResults.length) {
console.log(`${getAnsi("red")}Failing Tests:${getAnsi("reset")}`);
for (const { testPath } of failedResults) {
console.log(`${getAnsi("red")}- ${testPath}${getAnsi("reset")}`);
}
}

if (flakyResults.length) {
console.log(`${getAnsi("yellow")}Flaky Tests:${getAnsi("reset")}`);
for (const { testPath } of flakyResults) {
console.log(`${getAnsi("yellow")}- ${testPath}${getAnsi("reset")}`);
}
}
}
return results;

// Exclude flaky tests from the final results
return [...okResults, ...failedResults];
}

/**
Expand Down Expand Up @@ -1293,6 +1353,7 @@ function listArtifactsFromBuildKite(glob, step) {

/**
* @typedef {object} BuildkiteAnnotation
* @property {string} [context]
* @property {string} label
* @property {string} content
* @property {"error" | "warning" | "info"} [style]
Expand All @@ -1303,10 +1364,10 @@ function listArtifactsFromBuildKite(glob, step) {
/**
* @param {BuildkiteAnnotation} annotation
*/
function reportAnnotationToBuildKite({ label, content, style = "error", priority = 3, attempt = 0 }) {
function reportAnnotationToBuildKite({ context, label, content, style = "error", priority = 3, attempt = 0 }) {
const { error, status, signal, stderr } = spawnSync(
"buildkite-agent",
["annotate", "--append", "--style", `${style}`, "--context", `${label}`, "--priority", `${priority}`],
["annotate", "--append", "--style", `${style}`, "--context", `${context || label}`, "--priority", `${priority}`],
{
input: content,
stdio: ["pipe", "ignore", "pipe"],
Expand Down
2 changes: 1 addition & 1 deletion test/bundler/bundler_edgecase.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1336,7 +1336,7 @@ describe("bundler", () => {
target: "bun",
run: true,
todo: isBroken && isWindows,
debugTimeoutScale: 5,
timeoutScale: 5,
});
itBundled("edgecase/PackageExternalDoNotBundleNodeModules", {
files: {
Expand Down
3 changes: 1 addition & 2 deletions test/bundler/bundler_plugin.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,7 @@ describe("bundler", () => {
expect(resolveCount).toBe(5050);
expect(loadCount).toBe(101);
},
debugTimeoutScale: 3,
timeoutScale: 3,
};
});
// itBundled("plugin/ManyPlugins", ({ root }) => {
Expand Down Expand Up @@ -832,4 +832,3 @@ describe("bundler", () => {
};
});
});

10 changes: 3 additions & 7 deletions test/bundler/expectBundled.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { callerSourceOrigin } from "bun:jsc";
import type { Matchers } from "bun:test";
import * as esbuild from "esbuild";
import { existsSync, mkdirSync, mkdtempSync, readdirSync, readFileSync, realpathSync, rmSync, writeFileSync } from "fs";
import { bunEnv, bunExe, isDebug } from "harness";
import { bunEnv, bunExe, isCI, isDebug } from "harness";
import { tmpdir } from "os";
import path from "path";
import { SourceMapConsumer } from "source-map";
Expand Down Expand Up @@ -278,8 +278,6 @@ export interface BundlerTestInput {

/** Multiplier for test timeout */
timeoutScale?: number;
/** Multiplier for test timeout when using bun-debug. Debug builds already have a higher timeout. */
debugTimeoutScale?: number;

/* determines whether or not anything should be passed to outfile, outdir, etc. */
generateOutput?: boolean;
Expand Down Expand Up @@ -1663,8 +1661,7 @@ export function itBundled(
id,
() => expectBundled(id, opts as any),
// sourcemap code is slow
(opts.snapshotSourceMap ? (isDebug ? Infinity : 30_000) : isDebug ? 15_000 : 5_000) *
((isDebug ? opts.debugTimeoutScale : opts.timeoutScale) ?? 1),
isCI ? undefined : isDebug ? Infinity : (opts.snapshotSourceMap ? 30_000 : 5_000) * (opts.timeoutScale ?? 1),
);
}
return ref;
Expand All @@ -1676,8 +1673,7 @@ itBundled.only = (id: string, opts: BundlerTestInput) => {
id,
() => expectBundled(id, opts as any),
// sourcemap code is slow
(opts.snapshotSourceMap ? (isDebug ? Infinity : 30_000) : isDebug ? 15_000 : 5_000) *
((isDebug ? opts.debugTimeoutScale : opts.timeoutScale) ?? 1),
isCI ? undefined : isDebug ? Infinity : (opts.snapshotSourceMap ? 30_000 : 5_000) * (opts.timeoutScale ?? 1),
);
};

Expand Down
4 changes: 2 additions & 2 deletions test/cli/run/require-cache.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, expect, test } from "bun:test";
import { bunEnv, bunExe, isWindows, tempDirWithFiles } from "harness";
import { bunEnv, bunExe, isBroken, isIntelMacOS, isWindows, tempDirWithFiles } from "harness";
import { join } from "path";

test("require.cache is not an empty object literal when inspected", () => {
Expand Down Expand Up @@ -32,7 +32,7 @@ test("require.cache does not include unevaluated modules", () => {
expect(exitCode).toBe(0);
});

describe("files transpiled and loaded don't leak the output source code", () => {
describe.skipIf(isBroken && isIntelMacOS)("files transpiled and loaded don't leak the output source code", () => {
test("via require() with a lot of long export names", () => {
let text = "";
for (let i = 0; i < 10000; i++) {
Expand Down
4 changes: 3 additions & 1 deletion test/harness.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ export const isWindows = process.platform === "win32";
export const isIntelMacOS = isMacOS && process.arch === "x64";
export const isDebug = Bun.version.includes("debug");
export const isCI = process.env.CI !== undefined;
export const isBuildKite = process.env.BUILDKITE === "true";
export const libcFamily = detectLibc.familySync() as "glibc" | "musl";
export const isMusl = isLinux && libcFamily === "musl";
export const isGlibc = isLinux && libcFamily === "glibc";
export const isBuildKite = process.env.BUILDKITE === "true";
export const isVerbose = process.env.DEBUG === "1";

// Use these to mark a test as flaky or broken.
Expand Down
Loading

0 comments on commit e8b85cf

Please sign in to comment.