From 2e4bcfcb96031595e60f32af10dc87f5a2119942 Mon Sep 17 00:00:00 2001
From: Antoine du Hamel <duhamelantoine1995@gmail.com>
Date: Mon, 25 Jul 2022 14:01:41 +0200
Subject: [PATCH] tools: add more options to track flaky tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Refs: https://github.com/nodejs/node/pull/43929#issuecomment-1193104729

PR-URL: https://github.com/nodejs/node/pull/43954
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: Tobias Nießen <tniessen@tnie.de>
Reviewed-By: Feng Yu <F3n67u@outlook.com>
---
 .github/workflows/build-tarball.yml    |  4 +--
 .github/workflows/build-windows.yml    |  2 +-
 .github/workflows/coverage-linux.yml   |  4 +--
 .github/workflows/coverage-windows.yml |  2 +-
 .github/workflows/doc.yml              |  2 +-
 .github/workflows/test-asan.yml        |  4 +--
 .github/workflows/test-internet.yml    |  2 +-
 .github/workflows/test-linux.yml       |  4 +--
 .github/workflows/test-macos.yml       |  4 +--
 tools/test.py                          | 41 ++++++++++++++++++--------
 10 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/build-tarball.yml b/.github/workflows/build-tarball.yml
index 4509450172f277..2ced884f35c539 100644
--- a/.github/workflows/build-tarball.yml
+++ b/.github/workflows/build-tarball.yml
@@ -29,7 +29,7 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.10'
-  FLAKY_TESTS: dontcare
+  FLAKY_TESTS: keep_retrying
 
 permissions:
   contents: read
@@ -94,4 +94,4 @@ jobs:
       - name: Test
         run: |
           cd $TAR_DIR
-          make run-ci -j2 V=1 TEST_CI_ARGS="-p dots"
+          make run-ci -j2 V=1 TEST_CI_ARGS="-p dots --measure-flakiness 9"
diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml
index 6ef65b9c64fbea..af376d3aa5c533 100644
--- a/.github/workflows/build-windows.yml
+++ b/.github/workflows/build-windows.yml
@@ -24,7 +24,7 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.10'
-  FLAKY_TESTS: dontcare
+  FLAKY_TESTS: keep_retrying
 
 permissions:
   contents: read
diff --git a/.github/workflows/coverage-linux.yml b/.github/workflows/coverage-linux.yml
index abd69801f60045..db0a82c9e21d73 100644
--- a/.github/workflows/coverage-linux.yml
+++ b/.github/workflows/coverage-linux.yml
@@ -27,7 +27,7 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.10'
-  FLAKY_TESTS: dontcare
+  FLAKY_TESTS: keep_retrying
 
 permissions:
   contents: read
@@ -53,7 +53,7 @@ jobs:
       # TODO(bcoe): fix the couple tests that fail with the inspector enabled.
       # The cause is most likely coverage's use of the inspector.
       - name: Test
-        run: NODE_V8_COVERAGE=coverage/tmp make test-cov -j2 V=1 TEST_CI_ARGS="-p dots" || exit 0
+        run: NODE_V8_COVERAGE=coverage/tmp make test-cov -j2 V=1 TEST_CI_ARGS="-p dots --measure-flakiness 9" || exit 0
       - name: Report JS
         run: npx c8 report --check-coverage
         env:
diff --git a/.github/workflows/coverage-windows.yml b/.github/workflows/coverage-windows.yml
index d9c5bfb58d61e6..78465acb17566d 100644
--- a/.github/workflows/coverage-windows.yml
+++ b/.github/workflows/coverage-windows.yml
@@ -29,7 +29,7 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.10'
-  FLAKY_TESTS: dontcare
+  FLAKY_TESTS: keep_retrying
 
 permissions:
   contents: read
diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
index 72abb16ad5083c..76660343ca2f46 100644
--- a/.github/workflows/doc.yml
+++ b/.github/workflows/doc.yml
@@ -40,4 +40,4 @@ jobs:
           name: docs
           path: out/doc
       - name: Test
-        run: NODE=$(command -v node) make test-doc-ci TEST_CI_ARGS="-p actions"
+        run: NODE=$(command -v node) make test-doc-ci TEST_CI_ARGS="-p actions --measure-flakiness 9"
diff --git a/.github/workflows/test-asan.yml b/.github/workflows/test-asan.yml
index ba30449e90ba40..c8e7a09e2e4efd 100644
--- a/.github/workflows/test-asan.yml
+++ b/.github/workflows/test-asan.yml
@@ -31,7 +31,7 @@ concurrency:
 env:
   ASAN_OPTIONS: intercept_tls_get_addr=0
   PYTHON_VERSION: '3.10'
-  FLAKY_TESTS: dontcare
+  FLAKY_TESTS: keep_retrying
 
 permissions:
   contents: read
@@ -58,4 +58,4 @@ jobs:
       - name: Build
         run: make build-ci -j2 V=1
       - name: Test
-        run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions -t 300"
+        run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions -t 300 --measure-flakiness 9"
diff --git a/.github/workflows/test-internet.yml b/.github/workflows/test-internet.yml
index 297ceea987a76a..4fcb18a14f5ce9 100644
--- a/.github/workflows/test-internet.yml
+++ b/.github/workflows/test-internet.yml
@@ -22,7 +22,7 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.10'
-  FLAKY_TESTS: dontcare
+  FLAKY_TESTS: keep_retrying
 
 permissions:
   contents: read
diff --git a/.github/workflows/test-linux.yml b/.github/workflows/test-linux.yml
index 4cc09f22c25105..24196849d1bac8 100644
--- a/.github/workflows/test-linux.yml
+++ b/.github/workflows/test-linux.yml
@@ -24,7 +24,7 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.10'
-  FLAKY_TESTS: dontcare
+  FLAKY_TESTS: keep_retrying
 
 permissions:
   contents: read
@@ -46,4 +46,4 @@ jobs:
       - name: Build
         run: make build-ci -j2 V=1 CONFIG_FLAGS="--error-on-warn"
       - name: Test
-        run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions"
+        run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions --measure-flakiness 9"
diff --git a/.github/workflows/test-macos.yml b/.github/workflows/test-macos.yml
index 5f93730d666fc0..7faddc8eaedaec 100644
--- a/.github/workflows/test-macos.yml
+++ b/.github/workflows/test-macos.yml
@@ -30,7 +30,7 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.10'
-  FLAKY_TESTS: dontcare
+  FLAKY_TESTS: keep_retrying
 
 permissions:
   contents: read
@@ -60,4 +60,4 @@ jobs:
       - name: Build
         run: make build-ci -j2 V=1 CONFIG_FLAGS="--error-on-warn"
       - name: Test
-        run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions"
+        run: make run-ci -j2 V=1 TEST_CI_ARGS="-p actions --measure-flakiness 9"
diff --git a/tools/test.py b/tools/test.py
index 9a7de5ed24d706..35153057f0b9f2 100755
--- a/tools/test.py
+++ b/tools/test.py
@@ -96,10 +96,11 @@ def get_module(name, path):
 
 class ProgressIndicator(object):
 
-  def __init__(self, cases, flaky_tests_mode):
+  def __init__(self, cases, flaky_tests_mode, measure_flakiness):
     self.cases = cases
     self.serial_id = 0
     self.flaky_tests_mode = flaky_tests_mode
+    self.measure_flakiness = measure_flakiness
     self.parallel_queue = Queue(len(cases))
     self.sequential_queue = Queue(len(cases))
     for case in cases:
@@ -211,10 +212,22 @@ def RunSingle(self, parallel, thread_id):
       if output.UnexpectedOutput():
         if FLAKY in output.test.outcomes and self.flaky_tests_mode == DONTCARE:
           self.flaky_failed.append(output)
+        elif FLAKY in output.test.outcomes and self.flaky_tests_mode == KEEP_RETRYING:
+          for _ in range(99):
+            if not case.Run().UnexpectedOutput():
+              self.flaky_failed.append(output)
+              break
+          else:
+            # If after 100 tries, the test is not passing, it's not flaky.
+            self.failed.append(output)
         else:
           self.failed.append(output)
           if output.HasCrashed():
             self.crashed += 1
+          if self.measure_flakiness:
+            outputs = [case.Run() for _ in range(self.measure_flakiness)]
+            # +1s are there because the test already failed once at this point.
+            print(f" failed {len([i for i in outputs if i.UnexpectedOutput()]) + 1} out of {self.measure_flakiness + 1}")
       else:
         self.succeeded += 1
       self.remaining -= 1
@@ -436,8 +449,8 @@ def Done(self):
 
 class CompactProgressIndicator(ProgressIndicator):
 
-  def __init__(self, cases, flaky_tests_mode, templates):
-    super(CompactProgressIndicator, self).__init__(cases, flaky_tests_mode)
+  def __init__(self, cases, flaky_tests_mode, measure_flakiness, templates):
+    super(CompactProgressIndicator, self).__init__(cases, flaky_tests_mode, measure_flakiness)
     self.templates = templates
     self.last_status_length = 0
     self.start_time = time.time()
@@ -492,13 +505,13 @@ def PrintProgress(self, name):
 
 class ColorProgressIndicator(CompactProgressIndicator):
 
-  def __init__(self, cases, flaky_tests_mode):
+  def __init__(self, cases, flaky_tests_mode, measure_flakiness):
     templates = {
       'status_line': "[%(mins)02i:%(secs)02i|\033[34m%%%(remaining) 4d\033[0m|\033[32m+%(passed) 4d\033[0m|\033[31m-%(failed) 4d\033[0m]: %(test)s",
       'stdout': "\033[1m%s\033[0m",
       'stderr': "\033[31m%s\033[0m",
     }
-    super(ColorProgressIndicator, self).__init__(cases, flaky_tests_mode, templates)
+    super(ColorProgressIndicator, self).__init__(cases, flaky_tests_mode, measure_flakiness, templates)
 
   def ClearLine(self, last_line_length):
     print("\033[1K\r", end='')
@@ -506,7 +519,7 @@ def ClearLine(self, last_line_length):
 
 class MonochromeProgressIndicator(CompactProgressIndicator):
 
-  def __init__(self, cases, flaky_tests_mode):
+  def __init__(self, cases, flaky_tests_mode, measure_flakiness):
     templates = {
       'status_line': "[%(mins)02i:%(secs)02i|%%%(remaining) 4d|+%(passed) 4d|-%(failed) 4d]: %(test)s",
       'stdout': '%s',
@@ -514,7 +527,7 @@ def __init__(self, cases, flaky_tests_mode):
       'clear': lambda last_line_length: ("\r" + (" " * last_line_length) + "\r"),
       'max_length': 78
     }
-    super(MonochromeProgressIndicator, self).__init__(cases, flaky_tests_mode, templates)
+    super(MonochromeProgressIndicator, self).__init__(cases, flaky_tests_mode, measure_flakiness, templates)
 
   def ClearLine(self, last_line_length):
     print(("\r" + (" " * last_line_length) + "\r"), end='')
@@ -948,8 +961,8 @@ def GetTimeout(self, mode, section=''):
       timeout = timeout * 6
     return timeout
 
-def RunTestCases(cases_to_run, progress, tasks, flaky_tests_mode):
-  progress = PROGRESS_INDICATORS[progress](cases_to_run, flaky_tests_mode)
+def RunTestCases(cases_to_run, progress, tasks, flaky_tests_mode, measure_flakiness):
+  progress = PROGRESS_INDICATORS[progress](cases_to_run, flaky_tests_mode, measure_flakiness)
   return progress.Run(tasks)
 
 # -------------------------------------------
@@ -967,6 +980,7 @@ def RunTestCases(cases_to_run, progress, tasks, flaky_tests_mode):
 SLOW = 'slow'
 FLAKY = 'flaky'
 DONTCARE = 'dontcare'
+KEEP_RETRYING = 'keep_retrying'
 
 class Expression(object):
   pass
@@ -1355,8 +1369,11 @@ def BuildOptions():
   result.add_option("--cat", help="Print the source of the tests",
       default=False, action="store_true")
   result.add_option("--flaky-tests",
-      help="Regard tests marked as flaky (run|skip|dontcare)",
+      help="Regard tests marked as flaky (run|skip|dontcare|keep_retrying)",
       default="run")
+  result.add_option("--measure-flakiness",
+      help="When a test fails, re-run it x number of times",
+      default=0, type="int")
   result.add_option("--skip-tests",
       help="Tests that should not be executed (comma-separated)",
       default="")
@@ -1433,7 +1450,7 @@ def ProcessOptions(options):
     # -j and ignoring -J, which is the opposite of what we used to do before -J
     # became a legacy no-op.
     print('Warning: Legacy -J option is ignored. Using the -j option.')
-  if options.flaky_tests not in [RUN, SKIP, DONTCARE]:
+  if options.flaky_tests not in [RUN, SKIP, DONTCARE, KEEP_RETRYING]:
     print("Unknown flaky-tests mode %s" % options.flaky_tests)
     return False
   return True
@@ -1733,7 +1750,7 @@ def should_keep(case):
   else:
     try:
       start = time.time()
-      if RunTestCases(cases_to_run, options.progress, options.j, options.flaky_tests):
+      if RunTestCases(cases_to_run, options.progress, options.j, options.flaky_tests, options.measure_flakiness):
         result = 0
       else:
         result = 1