From 085999065a4cd98da0b170062c5a37907f43fba5 Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Wed, 21 Jun 2023 17:58:29 +0100
Subject: [PATCH] Fix and improve the reporting of errors from timeouts and
 crashes (#82)

* Test we retry on timeouts

* Refactor test-error handling to always record the error

* Record more informative error: include timeout/abort info

* Update reference tests

* Test case where we timeout then pass on retry

* fixup! Test case where we timeout then pass on retry

* fixup! Test case where we timeout then pass on retry

* Reduce time of test so worker=0 case isn't so slow

* Handle test failure case differently to worker timeout or crash

* Bump version

* fixup! Handle test failure case differently to worker timeout or crash

* fixup! fixup! Handle test failure case differently to worker timeout or crash

* Update test/integrationtests.jl - typo

* Don't use `tempdir()`
---
 Project.toml                                  |   5 +-
 src/ReTestItems.jl                            | 104 +++---
 test/integrationtests.jl                      |  36 +-
 test/junit_xml.jl                             |  18 +-
 test/references/retry_tests_report.xml        | 222 ------------
 .../references/retry_tests_report_0worker.xml | 242 +++++++++++++
 .../references/retry_tests_report_1worker.xml | 328 ++++++++++++++++++
 test/testfiles/_retry_tests.jl                |  41 ++-
 8 files changed, 704 insertions(+), 292 deletions(-)
 delete mode 100644 test/references/retry_tests_report.xml
 create mode 100644 test/references/retry_tests_report_0worker.xml
 create mode 100644 test/references/retry_tests_report_1worker.xml

diff --git a/Project.toml b/Project.toml
index a1e1c979..b3988621 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "ReTestItems"
 uuid = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
-version = "1.8.1"
+version = "1.9.0"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
@@ -23,8 +23,9 @@ DeepDiffs = "ab62b9b5-e342-54a8-a765-a90f495de1a6"
 IOCapture = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 XMLDict = "228000da-037f-5747-90a9-8195ccbf91a5"
 
 [targets]
-test = ["AutoHashEquals", "DeepDiffs", "IOCapture", "Logging", "Pkg", "Test", "XMLDict"]
+test = ["AutoHashEquals", "DeepDiffs", "IOCapture", "Logging", "Pkg", "Random", "Test", "XMLDict"]
diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index a39a543d..efbdbb59 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -354,16 +354,31 @@ function start_worker(proj_name, nworker_threads, worker_init_expr, ntestitems)
     return w
 end
 
+function record_timeout!(testitem, run_number::Int, timeout_limit::Real)
+    timeout_s = round(Int, timeout_limit)
+    time_str = if timeout_s < 60
+        string(timeout_s, "s")
+    else
+        mins, secs = divrem(timeout_s, 60)
+        if iszero(secs)
+            string(mins, "m")
+        else
+            string(mins, "m", lpad(secs, 2, "0"), "s")
+        end
+    end
+    msg = "Timed out after $time_str evaluating test item $(repr(testitem.name)) (run=$run_number)"
+    record_test_error!(testitem, msg)
+end
 
-# This is only used to signal that we need to retry. We don't use Test.TestSetException as
-# that requires information that we are not going to use anyway.
-struct TestSetFailure <: Exception end
-throw_if_failed(ts) = ts.anynonpass ? throw(TestSetFailure()) : nothing
+function record_worker_terminated!(testitem, run_number::Int)
+    msg = "Worker aborted evaluating test item $(repr(testitem.name)) (run=$run_number)"
+    record_test_error!(testitem, msg)
+end
 
-function record_test_error!(testitem, ntries)
+function record_test_error!(testitem, msg)
     Test.TESTSET_PRINT_ENABLE[] = false
     ts = DefaultTestSet(testitem.name)
-    err = ErrorException("test item $(repr(testitem.name)) didn't succeed after $ntries tries")
+    err = ErrorException(msg)
     Test.record(ts, Test.Error(:nontest_error, Test.Expr(:tuple), err,
         Base.ExceptionStack([(exception=err, backtrace=Union{Ptr{Nothing}, Base.InterpreterIP}[])]),
         LineNumberNode(testitem.line, testitem.file)))
@@ -384,7 +399,7 @@ function start_and_manage_worker(
 )
     ntestitems = length(testitems.testitems)
     worker = start_worker(proj_name, nworker_threads, worker_init_expr, ntestitems)
-    nretries = 0
+    run_number = 1
     while testitem !== nothing
         ch = Channel{TestItemResult}(1)
         testitem.workerid[] = worker.pid
@@ -415,70 +430,55 @@ function start_and_manage_worker(
                 ts = testitem_result.testset
                 push!(testitem.testsets, ts)
                 push!(testitem.stats, testitem_result.stats)
-                print_errors_and_captured_logs(testitem, nretries + 1; logs)
+                print_errors_and_captured_logs(testitem, run_number; logs)
                 report_empty_testsets(testitem, ts)
                 # Run GC to free memory on the worker before next testitem.
                 @debugv 2 "Running GC on $worker"
                 remote_fetch(worker, :(GC.gc(true); GC.gc(false)))
-                # If the result isn't a pass, we throw to go to the outer try-catch
-                throw_if_failed(ts)
-                testitem = next_testitem(testitems, testitem.id[])
-                nretries = 0
+                if ts.anynonpass && (run_number != (1 + retry_limit))
+                    run_number += 1
+                    @info "Retrying $(repr(testitem.name)) on $worker. Run=$run_number."
+                else
+                    testitem = next_testitem(testitems, testitem.id[])
+                    run_number = 1
+                end
             finally
                 close(timer)
             end
         catch e
             @debugv 2 "Error" exception=e
-            if !(e isa WorkerTerminatedException || e isa TimeoutException || e isa TestSetFailure)
-                # we don't expect any other kind of error, so rethrow, which will propagate
-                # back up to the main coordinator task and throw to the user
-                rethrow()
-            end
-
-            if !(e isa TestSetFailure)
-                println(DEFAULT_STDOUT[])
-                # Explicitly show captured logs or say there weren't any in case we're about
-                # to terminte the worker
-                _print_captured_logs(DEFAULT_STDOUT[], testitem, nretries + 1)
-            end
-
+            println(DEFAULT_STDOUT[])
+            _print_captured_logs(DEFAULT_STDOUT[], testitem, run_number)
+            # Handle the exception
             if e isa TimeoutException
                 @debugv 1 "Test item $(repr(testitem.name)) timed out. Terminating worker $worker"
                 terminate!(worker)
                 wait(worker)
+                @warn "$worker timed out evaluating test item $(repr(testitem.name)) afer $timeout seconds. \
+                    Recording test error."
+                record_timeout!(testitem, run_number, timeout)
+            elseif e isa WorkerTerminatedException
+                @warn "$worker died evaluating test item $(repr(testitem.name)). \
+                    Recording test error."
+                record_worker_terminated!(testitem, run_number)
+            else
+                # We don't expect any other kind of error, so rethrow, which will propagate
+                # back up to the main coordinator task and throw to the user
+                rethrow()
             end
-            if nretries == retry_limit
-                if e isa TimeoutException
-                    @warn "$worker timed out evaluating test item $(repr(testitem.name)) afer $timeout seconds. \
-                        Recording test error, and starting a new worker."
-                    record_test_error!(testitem, nretries + 1)
-                elseif e isa WorkerTerminatedException
-                    @warn "$worker died evaluating test item $(repr(testitem.name)). \
-                        Recording test error, and starting a new worker."
-                    record_test_error!(testitem, nretries + 1)
-                else
-                    @assert e isa TestSetFailure
-                    # We already printed the error and recorded the testset.
-                end
+            # Handle retries
+            if run_number == (1 + retry_limit)
                 testitem = next_testitem(testitems, testitem.id[])
-                nretries = 0
+                run_number = 1
             else
-                nretries += 1
-                if e isa TimeoutException
-                    @warn "$worker timed out evaluating test item $(repr(testitem.name)) afer $timeout seconds. \
-                        Starting a new worker and retrying. Retry=$nretries."
-                elseif e isa WorkerTerminatedException
-                    @warn "$worker died evaluating test item $(repr(testitem.name)). \
-                        Starting a new worker and retrying. Retry=$nretries."
-                else
-                    @assert e isa TestSetFailure
-                    @warn "Test item $(repr(testitem.name)) failed. Retrying on $worker. Retry=$nretries."
-                end
+                run_number += 1
+                @info "Retrying $(repr(testitem.name)) on a new worker. Run=$run_number."
             end
-            if (e isa TimeoutException || e isa WorkerTerminatedException)
+            # The worker was terminated, so replace it unless there are no more testitems to run
+            if testitem !== nothing
                 worker = start_worker(proj_name, nworker_threads, worker_init_expr, ntestitems)
             end
-            # now we loop back around to reschedule the testitem
+            # Now loop back around to reschedule the testitem
             continue
         end
     end
diff --git a/test/integrationtests.jl b/test/integrationtests.jl
index 59038214..0da345e3 100644
--- a/test/integrationtests.jl
+++ b/test/integrationtests.jl
@@ -604,29 +604,45 @@ end
     # Test the error is as expected
     err = only(non_passes(results))
     @test err.test_type == :nontest_error
-    @test err.value == string(ErrorException("test item \"Abort\" didn't succeed after 1 tries"))
+    @test err.value == string(ErrorException("Worker aborted evaluating test item \"Abort\" (run=1)"))
 end
 
 @testset "test retrying failing testitem" begin
     file = joinpath(TEST_FILES_DIR, "_retry_tests.jl")
-    results = encased_testset(()->runtests(file; nworkers=1, retries=2))
+    # This directory must match what's set in `_retry_tests`
+    tmpdir = joinpath("/tmp", "JL_RETESTITEMS_TEST_TMPDIR")
+    # must run with `testitem_timeout < 20` for test to timeout as expected.
+    # and must run with `nworkers > 0` for retries to be supported.
+    results = encased_testset(()->runtests(file; nworkers=1, retries=2, testitem_timeout=3))
     # Test we _ran_ each test-item the expected number of times
     read_count(x) = parse(Int, read(x, String))
     # Passes on second attempt, so only need to retry once.
-    @test read_count(joinpath(tempdir(), "num_runs_1")) == 2
+    @test read_count(joinpath(tmpdir, "num_runs_1")) == 2
     # Doesn't pass til third attempt, so needs both retries.
-    @test read_count(joinpath(tempdir(), "num_runs_2")) == 3
+    @test read_count(joinpath(tmpdir, "num_runs_2")) == 3
     # Doesn't pass ever, so need all retries. This testitem set `retries=4` which is greater
     # than the `retries=2` that `runtests` set, so we should retry 4 times.
-    @test read_count(joinpath(tempdir(), "num_runs_3")) == 5
+    @test read_count(joinpath(tmpdir, "num_runs_3")) == 5
     # Doesn't pass ever, so need all retries. This testitem set `retries=1` which is less
     # than the `retries=2` that `runtests` set, so we should retry 2 times.
-    @test read_count(joinpath(tempdir(), "num_runs_4")) == 3
+    @test read_count(joinpath(tmpdir, "num_runs_4")) == 3
+    # Times out always, so should retry as many times as allowed.
+    # Since it will be a new worker for each retry, we write one file for each.
+    @test count(contains("num_runs_5"), readdir(tmpdir)) == 3
+    # Times out on first run, then passes on second attempt.
+    @test count(contains("num_runs_6"), readdir(tmpdir)) == 2
 
     # Test we _report_ the expected number of test-items
-    @test n_tests(results) == 4
-    # The first two testitems pass on retry.
-    @test n_passed(results) == 2
+    @test n_tests(results) == 6
+    # Testitems 1, 2, and 6 pass on retry.
+    @test n_passed(results) == 3
+
+    # Clear out any files created by this testset
+    foreach(readdir(tmpdir; join=true)) do tmp
+        # `force` in case it gets cleaned up between `readdir` and `rm`
+        contains(tmp, "num_runs") && rm(tmp; force=true)
+    end
+    rm(tmpdir; force=true)
 end
 
 @testset "testitem timeout" begin
@@ -641,7 +657,7 @@ end
     # Test the error is as expected
     err = only(non_passes(results))
     @test err.test_type == :nontest_error
-    @test err.value == string(ErrorException("test item \"Test item takes 60 seconds\" didn't succeed after 1 tries"))
+    @test err.value == string(ErrorException("Timed out after 4s evaluating test item \"Test item takes 60 seconds\" (run=1)"))
 end
 
 @testset "Error outside `@testitem`" begin
diff --git a/test/junit_xml.jl b/test/junit_xml.jl
index eb8d8ee2..d64c3d09 100644
--- a/test/junit_xml.jl
+++ b/test/junit_xml.jl
@@ -100,11 +100,25 @@ end
         mktempdir() do dir
             withenv("RETESTITEMS_REPORT_LOCATION" => dir, "RETESTITEMS_NWORKERS" => nworkers) do
                 try # Ignore the fact that the `_retry_tests.jl` testset has failures/errors.
-                    run(`$(Base.julia_cmd()) --project -e 'using ReTestItems; runtests("testfiles/_retry_tests.jl"; report=true, retries=2)'`)
+                    run(`$(Base.julia_cmd()) --project -e 'using ReTestItems; runtests("testfiles/_retry_tests.jl"; testitem_timeout=5, report=true, retries=2)'`)
                 catch
+                finally
+                    # running `_retry_tests` creates files which we need to clean up before
+                    # we run `_retry_tests` again, because these files store state that is
+                    # used to control the behaviour of the tests.
+                    foreach(readdir(joinpath("/tmp", "JL_RETESTITEMS_TEST_TMPDIR"); join=true)) do tmp
+                        rm(tmp; force=true)
+                    end
                 end
                 report = only(filter(endswith("xml"), readdir(dir, join=true)))
-                test_reference(joinpath(REF_DIR, "retry_tests_report.xml"), report)
+                # timeouts are not supported when running without workers, so
+                # with 0 workers the "retry tests" that are expected to timeout will pass.
+                ref = if nworkers == 0
+                    joinpath(REF_DIR, "retry_tests_report_0worker.xml")
+                else
+                    joinpath(REF_DIR, "retry_tests_report_1worker.xml")
+                end
+                test_reference(ref, report)
             end
         end
     end
diff --git a/test/references/retry_tests_report.xml b/test/references/retry_tests_report.xml
deleted file mode 100644
index d51eef63..00000000
--- a/test/references/retry_tests_report.xml
+++ /dev/null
@@ -1,222 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<testsuites timestamp="2023-05-09T17:29:21.338" time="0.561" tests="13" skipped="0" failures="10" errors="1">
-<testsuite name="test/testfiles/_retry_tests.jl" timestamp="2023-05-09T17:29:21.338" time="0.561" tests="13" skipped="0" failures="10" errors="1">
-	<testcase name="Pass on second run" timestamp="2023-05-09T17:29:21.338" time="0.093" tests="1" skipped="0" failures="1" errors="0">
-		<properties>
-		<property name="dd_tags[perf.bytes]" value="2479618"></property>
-		<property name="dd_tags[perf.allocs]" value="40557"></property>
-		<property name="dd_tags[perf.gctime]" value="0.0"></property>
-		<property name="dd_tags[perf.compile_time]" value="0.033515877"></property>
-		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
-		<property name="dd_tags[perf.eval_time]" value="0.003170539"></property>
-		</properties>
-		<error message="Test failed at test/testfiles/_retry_tests.jl:15">Captured logs for test setup "StatefulSetup" (dependency of "Pass on second run") at test/testfiles/_retry_tests.jl:1 on worker 63497
-These are the test setup logs.
-No Captured Logs for test item "Pass on second run" at test/testfiles/_retry_tests.jl:10 on worker 63497
-Error in testset "Pass on second run" on worker 63497:
-Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:15
-  Expression: x
-		</error>
-	</testcase>
-	<testcase name="Pass on second run" timestamp="2023-05-09T17:29:21.575" time="0.002" tests="1" skipped="0" failures="0" errors="0">
-		<properties>
-		<property name="dd_tags[perf.bytes]" value="75878"></property>
-		<property name="dd_tags[perf.allocs]" value="1193"></property>
-		<property name="dd_tags[perf.gctime]" value="0.0"></property>
-		<property name="dd_tags[perf.compile_time]" value="1.25e-7"></property>
-		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
-		<property name="dd_tags[perf.eval_time]" value="0.002476959"></property>
-		</properties>
-	</testcase>
-	<testcase name="Error, then Fail, then Pass" timestamp="2023-05-09T17:29:21.628" time="0.435" tests="1" skipped="0" failures="0" errors="1">
-		<properties>
-		<property name="dd_tags[perf.bytes]" value="72685834"></property>
-		<property name="dd_tags[perf.allocs]" value="1352181"></property>
-		<property name="dd_tags[perf.gctime]" value="0.004853958"></property>
-		<property name="dd_tags[perf.compile_time]" value="0.36293654"></property>
-		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
-		<property name="dd_tags[perf.eval_time]" value="0.06754441800000005"></property>
-		</properties>
-		<error message="Error during test at test/testfiles/_retry_tests.jl:24">Captured logs for test setup "StatefulSetup" (dependency of "Error, then Fail, then Pass") at test/testfiles/_retry_tests.jl:1 on worker 63497
-These are the test setup logs.
-Captured Logs for test item "Error, then Fail, then Pass" at test/testfiles/_retry_tests.jl:18 on worker 63497
-These are the logs for run number: 1
-Error in testset "Error, then Fail, then Pass" on worker 63497:
-Error During Test at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:24
-  Test threw exception
-  Expression: not_defined_err
-  UndefVarError: not_defined_err not defined
-  Stacktrace:
-   [1] macro expansion
-     @ /Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/Test.jl:464 [inlined]
-   [2] top-level scope
-     @ /repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:24
-		</error>
-	</testcase>
-	<testcase name="Error, then Fail, then Pass" timestamp="2023-05-09T17:29:22.123" time="0.008" tests="1" skipped="0" failures="1" errors="0">
-		<properties>
-		<property name="dd_tags[perf.bytes]" value="212803"></property>
-		<property name="dd_tags[perf.allocs]" value="3670"></property>
-		<property name="dd_tags[perf.gctime]" value="0.0"></property>
-		<property name="dd_tags[perf.compile_time]" value="0.004507333"></property>
-		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
-		<property name="dd_tags[perf.eval_time]" value="0.0037332090000000004"></property>
-		</properties>
-		<error message="Test failed at test/testfiles/_retry_tests.jl:26">Captured logs for test setup "StatefulSetup" (dependency of "Error, then Fail, then Pass") at test/testfiles/_retry_tests.jl:1 on worker 63497
-These are the test setup logs.
-Captured Logs for test item "Error, then Fail, then Pass" at test/testfiles/_retry_tests.jl:18 on worker 63497
-These are the logs for run number: 2
-Error in testset "Error, then Fail, then Pass" on worker 63497:
-Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:26
-  Expression: 2 + 2 == 5
-   Evaluated: 4 == 5
-		</error>
-	</testcase>
-	<testcase name="Error, then Fail, then Pass" timestamp="2023-05-09T17:29:22.18" time="0.003" tests="1" skipped="0" failures="0" errors="0">
-		<properties>
-		<property name="dd_tags[perf.bytes]" value="100270"></property>
-		<property name="dd_tags[perf.allocs]" value="1675"></property>
-		<property name="dd_tags[perf.gctime]" value="0.0"></property>
-		<property name="dd_tags[perf.compile_time]" value="1.66e-7"></property>
-		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
-		<property name="dd_tags[perf.eval_time]" value="0.0033688340000000002"></property>
-		</properties>
-	</testcase>
-	<testcase name="Has retries=4 and always fails" timestamp="2023-05-09T17:29:22.231" time="0.002" tests="1" skipped="0" failures="1" errors="0">
-		<properties>
-		<property name="dd_tags[perf.bytes]" value="73062"></property>
-		<property name="dd_tags[perf.allocs]" value="1137"></property>
-		<property name="dd_tags[perf.gctime]" value="0.0"></property>
-		<property name="dd_tags[perf.compile_time]" value="2.08e-7"></property>
-		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
-		<property name="dd_tags[perf.eval_time]" value="0.00243075"></property>
-		</properties>
-		<error message="Test failed at test/testfiles/_retry_tests.jl:35">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 63497
-These are the test setup logs.
-No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:32 on worker 63497
-Error in testset "Has retries=4 and always fails" on worker 63497:
-Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:35
-  Expression: false
-		</error>
-	</testcase>
-	<testcase name="Has retries=4 and always fails" timestamp="2023-05-09T17:29:22.281" time="0.002" tests="1" skipped="0" failures="1" errors="0">
-		<properties>
-		<property name="dd_tags[perf.bytes]" value="73062"></property>
-		<property name="dd_tags[perf.allocs]" value="1137"></property>
-		<property name="dd_tags[perf.gctime]" value="0.0"></property>
-		<property name="dd_tags[perf.compile_time]" value="2.08e-7"></property>
-		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
-		<property name="dd_tags[perf.eval_time]" value="0.0022761250000000004"></property>
-		</properties>
-		<error message="Test failed at test/testfiles/_retry_tests.jl:35">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 63497
-These are the test setup logs.
-No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:32 on worker 63497
-Error in testset "Has retries=4 and always fails" on worker 63497:
-Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:35
-  Expression: false
-		</error>
-	</testcase>
-	<testcase name="Has retries=4 and always fails" timestamp="2023-05-09T17:29:22.332" time="0.002" tests="1" skipped="0" failures="1" errors="0">
-		<properties>
-		<property name="dd_tags[perf.bytes]" value="73062"></property>
-		<property name="dd_tags[perf.allocs]" value="1137"></property>
-		<property name="dd_tags[perf.gctime]" value="0.0"></property>
-		<property name="dd_tags[perf.compile_time]" value="8.4e-8"></property>
-		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
-		<property name="dd_tags[perf.eval_time]" value="0.002157541"></property>
-		</properties>
-		<error message="Test failed at test/testfiles/_retry_tests.jl:35">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 63497
-These are the test setup logs.
-No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:32 on worker 63497
-Error in testset "Has retries=4 and always fails" on worker 63497:
-Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:35
-  Expression: false
-		</error>
-	</testcase>
-	<testcase name="Has retries=4 and always fails" timestamp="2023-05-09T17:29:22.383" time="0.002" tests="1" skipped="0" failures="1" errors="0">
-		<properties>
-		<property name="dd_tags[perf.bytes]" value="73062"></property>
-		<property name="dd_tags[perf.allocs]" value="1137"></property>
-		<property name="dd_tags[perf.gctime]" value="0.0"></property>
-		<property name="dd_tags[perf.compile_time]" value="1.25e-7"></property>
-		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
-		<property name="dd_tags[perf.eval_time]" value="0.002307417"></property>
-		</properties>
-		<error message="Test failed at test/testfiles/_retry_tests.jl:35">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 63497
-These are the test setup logs.
-No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:32 on worker 63497
-Error in testset "Has retries=4 and always fails" on worker 63497:
-Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:35
-  Expression: false
-		</error>
-	</testcase>
-	<testcase name="Has retries=4 and always fails" timestamp="2023-05-09T17:29:22.433" time="0.002" tests="1" skipped="0" failures="1" errors="0">
-		<properties>
-		<property name="dd_tags[perf.bytes]" value="73062"></property>
-		<property name="dd_tags[perf.allocs]" value="1137"></property>
-		<property name="dd_tags[perf.gctime]" value="0.0"></property>
-		<property name="dd_tags[perf.compile_time]" value="1.25e-7"></property>
-		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
-		<property name="dd_tags[perf.eval_time]" value="0.002202792"></property>
-		</properties>
-		<error message="Test failed at test/testfiles/_retry_tests.jl:35">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 63497
-These are the test setup logs.
-No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:32 on worker 63497
-Error in testset "Has retries=4 and always fails" on worker 63497:
-Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:35
-  Expression: false
-		</error>
-	</testcase>
-	<testcase name="Has retries=1 and always fails" timestamp="2023-05-09T17:29:22.483" time="0.002" tests="1" skipped="0" failures="1" errors="0">
-		<properties>
-		<property name="dd_tags[perf.bytes]" value="73062"></property>
-		<property name="dd_tags[perf.allocs]" value="1137"></property>
-		<property name="dd_tags[perf.gctime]" value="0.0"></property>
-		<property name="dd_tags[perf.compile_time]" value="1.25e-7"></property>
-		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
-		<property name="dd_tags[perf.eval_time]" value="0.0021228329999999998"></property>
-		</properties>
-		<error message="Test failed at test/testfiles/_retry_tests.jl:41">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=1 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 63497
-These are the test setup logs.
-No Captured Logs for test item "Has retries=1 and always fails" at test/testfiles/_retry_tests.jl:38 on worker 63497
-Error in testset "Has retries=1 and always fails" on worker 63497:
-Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:41
-  Expression: false
-		</error>
-	</testcase>
-	<testcase name="Has retries=1 and always fails" timestamp="2023-05-09T17:29:22.532" time="0.002" tests="1" skipped="0" failures="1" errors="0">
-		<properties>
-		<property name="dd_tags[perf.bytes]" value="73062"></property>
-		<property name="dd_tags[perf.allocs]" value="1137"></property>
-		<property name="dd_tags[perf.gctime]" value="0.0"></property>
-		<property name="dd_tags[perf.compile_time]" value="8.4e-8"></property>
-		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
-		<property name="dd_tags[perf.eval_time]" value="0.002159833"></property>
-		</properties>
-		<error message="Test failed at test/testfiles/_retry_tests.jl:41">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=1 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 63497
-These are the test setup logs.
-No Captured Logs for test item "Has retries=1 and always fails" at test/testfiles/_retry_tests.jl:38 on worker 63497
-Error in testset "Has retries=1 and always fails" on worker 63497:
-Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:41
-  Expression: false
-		</error>
-	</testcase>
-	<testcase name="Has retries=1 and always fails" timestamp="2023-05-09T17:29:22.582" time="0.002" tests="1" skipped="0" failures="1" errors="0">
-		<properties>
-		<property name="dd_tags[perf.bytes]" value="73062"></property>
-		<property name="dd_tags[perf.allocs]" value="1137"></property>
-		<property name="dd_tags[perf.gctime]" value="0.0"></property>
-		<property name="dd_tags[perf.compile_time]" value="8.3e-8"></property>
-		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
-		<property name="dd_tags[perf.eval_time]" value="0.002272417"></property>
-		</properties>
-		<error message="Test failed at test/testfiles/_retry_tests.jl:41">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=1 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 63497
-These are the test setup logs.
-No Captured Logs for test item "Has retries=1 and always fails" at test/testfiles/_retry_tests.jl:38 on worker 63497
-Error in testset "Has retries=1 and always fails" on worker 63497:
-Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:41
-  Expression: false
-		</error>
-	</testcase>
-</testsuite>
-</testsuites>
\ No newline at end of file
diff --git a/test/references/retry_tests_report_0worker.xml b/test/references/retry_tests_report_0worker.xml
new file mode 100644
index 00000000..c8d72bea
--- /dev/null
+++ b/test/references/retry_tests_report_0worker.xml
@@ -0,0 +1,242 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuites timestamp="2023-06-21T16:43:38.289" time="40.666" tests="15" skipped="0" failures="10" errors="1">
+<testsuite name="test/testfiles/_retry_tests.jl" timestamp="2023-06-21T16:43:38.289" time="40.666" tests="15" skipped="0" failures="10" errors="1">
+	<testcase name="Pass on second run" timestamp="2023-06-21T16:43:38.289" time="0.157" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="2476162"></property>
+		<property name="dd_tags[perf.allocs]" value="40391"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.028837001"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.0029076239999999975"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:17">Captured logs for test setup "StatefulSetup" (dependency of "Pass on second run") at test/testfiles/_retry_tests.jl:1 on worker 78453
+These are the test setup logs.
+No Captured Logs for test item "Pass on second run" at test/testfiles/_retry_tests.jl:12 on worker 78453
+Error in testset "Pass on second run" on worker 78453:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:17
+  Expression: x
+		</error>
+	</testcase>
+	<testcase name="Pass on second run" timestamp="2023-06-21T16:43:38.623" time="0.002" tests="1" skipped="0" failures="0" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="79494"></property>
+		<property name="dd_tags[perf.allocs]" value="1227"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.0022935"></property>
+		</properties>
+	</testcase>
+	<testcase name="Error, then Fail, then Pass" timestamp="2023-06-21T16:43:38.626" time="0.443" tests="1" skipped="0" failures="0" errors="1">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="72680426"></property>
+		<property name="dd_tags[perf.allocs]" value="1352076"></property>
+		<property name="dd_tags[perf.gctime]" value="0.004835375"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.370915293"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.06755041500000003"></property>
+		</properties>
+		<error message="Error during test at test/testfiles/_retry_tests.jl:26">Captured logs for test setup "StatefulSetup" (dependency of "Error, then Fail, then Pass") at test/testfiles/_retry_tests.jl:1 on worker 78453
+These are the test setup logs.
+Captured Logs for test item "Error, then Fail, then Pass" at test/testfiles/_retry_tests.jl:20 on worker 78453
+These are the logs for run number: 1
+Error in testset "Error, then Fail, then Pass" on worker 78453:
+Error During Test at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:26
+  Test threw exception
+  Expression: not_defined_err
+  UndefVarError: not_defined_err not defined
+  Stacktrace:
+   [1] macro expansion
+     @ /Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/Test.jl:464 [inlined]
+   [2] top-level scope
+     @ /repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:26
+		</error>
+	</testcase>
+	<testcase name="Error, then Fail, then Pass" timestamp="2023-06-21T16:43:39.129" time="0.008" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="216419"></property>
+		<property name="dd_tags[perf.allocs]" value="3704"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.005032125"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.0034412920000000003"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:28">Captured logs for test setup "StatefulSetup" (dependency of "Error, then Fail, then Pass") at test/testfiles/_retry_tests.jl:1 on worker 78453
+These are the test setup logs.
+Captured Logs for test item "Error, then Fail, then Pass" at test/testfiles/_retry_tests.jl:20 on worker 78453
+These are the logs for run number: 2
+Error in testset "Error, then Fail, then Pass" on worker 78453:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:28
+  Expression: 2 + 2 == 5
+   Evaluated: 4 == 5
+		</error>
+	</testcase>
+	<testcase name="Error, then Fail, then Pass" timestamp="2023-06-21T16:43:39.189" time="0.003" tests="1" skipped="0" failures="0" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="103886"></property>
+		<property name="dd_tags[perf.allocs]" value="1709"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="4.2e-8"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.003226124"></property>
+		</properties>
+	</testcase>
+	<testcase name="Has retries=4 and always fails" timestamp="2023-06-21T16:43:39.192" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76678"></property>
+		<property name="dd_tags[perf.allocs]" value="1171"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="1.25e-7"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.0022902079999999997"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:37">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 78453
+These are the test setup logs.
+No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:34 on worker 78453
+Error in testset "Has retries=4 and always fails" on worker 78453:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:37
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Has retries=4 and always fails" timestamp="2023-06-21T16:43:39.244" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76678"></property>
+		<property name="dd_tags[perf.allocs]" value="1171"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.002130708"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:37">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 78453
+These are the test setup logs.
+No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:34 on worker 78453
+Error in testset "Has retries=4 and always fails" on worker 78453:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:37
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Has retries=4 and always fails" timestamp="2023-06-21T16:43:39.295" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76678"></property>
+		<property name="dd_tags[perf.allocs]" value="1171"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="4.2e-8"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.002129916"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:37">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 78453
+These are the test setup logs.
+No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:34 on worker 78453
+Error in testset "Has retries=4 and always fails" on worker 78453:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:37
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Has retries=4 and always fails" timestamp="2023-06-21T16:43:39.347" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76678"></property>
+		<property name="dd_tags[perf.allocs]" value="1171"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="1.66e-7"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.002082001"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:37">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 78453
+These are the test setup logs.
+No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:34 on worker 78453
+Error in testset "Has retries=4 and always fails" on worker 78453:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:37
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Has retries=4 and always fails" timestamp="2023-06-21T16:43:39.398" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76678"></property>
+		<property name="dd_tags[perf.allocs]" value="1171"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="8.3e-8"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.002061083"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:37">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 78453
+These are the test setup logs.
+No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:34 on worker 78453
+Error in testset "Has retries=4 and always fails" on worker 78453:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:37
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Has retries=1 and always fails" timestamp="2023-06-21T16:43:39.401" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76678"></property>
+		<property name="dd_tags[perf.allocs]" value="1171"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="4.2e-8"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.0017605"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:43">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=1 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 78453
+These are the test setup logs.
+No Captured Logs for test item "Has retries=1 and always fails" at test/testfiles/_retry_tests.jl:40 on worker 78453
+Error in testset "Has retries=1 and always fails" on worker 78453:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:43
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Has retries=1 and always fails" timestamp="2023-06-21T16:43:39.452" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76678"></property>
+		<property name="dd_tags[perf.allocs]" value="1171"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="8.4e-8"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.002114"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:43">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=1 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 78453
+These are the test setup logs.
+No Captured Logs for test item "Has retries=1 and always fails" at test/testfiles/_retry_tests.jl:40 on worker 78453
+Error in testset "Has retries=1 and always fails" on worker 78453:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:43
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Has retries=1 and always fails" timestamp="2023-06-21T16:43:39.503" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76678"></property>
+		<property name="dd_tags[perf.allocs]" value="1171"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="4.2e-8"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.002125624"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:43">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=1 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 78453
+These are the test setup logs.
+No Captured Logs for test item "Has retries=1 and always fails" at test/testfiles/_retry_tests.jl:40 on worker 78453
+Error in testset "Has retries=1 and always fails" on worker 78453:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:43
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Timeout always" timestamp="2023-06-21T16:43:39.506" time="20.011" tests="1" skipped="0" failures="0" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="487732"></property>
+		<property name="dd_tags[perf.allocs]" value="8937"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.006507084"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="20.004509791"></property>
+		</properties>
+	</testcase>
+	<testcase name="Timeout first, pass after" timestamp="2023-06-21T16:43:59.518" time="20.024" tests="1" skipped="0" failures="0" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="3374482"></property>
+		<property name="dd_tags[perf.allocs]" value="59585"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.015807416"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="20.008099625"></property>
+		</properties>
+	</testcase>
+</testsuite>
+</testsuites>
\ No newline at end of file
diff --git a/test/references/retry_tests_report_1worker.xml b/test/references/retry_tests_report_1worker.xml
new file mode 100644
index 00000000..d00e829b
--- /dev/null
+++ b/test/references/retry_tests_report_1worker.xml
@@ -0,0 +1,328 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuites timestamp="2023-06-21T16:45:02.149" time="0.911" tests="18" skipped="0" failures="10" errors="5">
+<testsuite name="test/testfiles/_retry_tests.jl" timestamp="2023-06-21T16:45:02.149" time="0.911" tests="18" skipped="0" failures="10" errors="5">
+	<testcase name="Pass on second run" timestamp="2023-06-21T16:45:02.149" time="0.097" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="2480386"></property>
+		<property name="dd_tags[perf.allocs]" value="40352"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.02862379"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.0027746679999999975"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:17">Captured logs for test setup "StatefulSetup" (dependency of "Pass on second run") at test/testfiles/_retry_tests.jl:1 on worker 81105
+These are the test setup logs.
+No Captured Logs for test item "Pass on second run" at test/testfiles/_retry_tests.jl:12 on worker 81105
+Error in testset "Pass on second run" on worker 81105:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:17
+  Expression: x
+		</error>
+	</testcase>
+	<testcase name="Pass on second run" timestamp="2023-06-21T16:45:02.468" time="0.002" tests="1" skipped="0" failures="0" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="79686"></property>
+		<property name="dd_tags[perf.allocs]" value="1232"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="1.67e-7"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.00225875"></property>
+		</properties>
+	</testcase>
+	<testcase name="Error, then Fail, then Pass" timestamp="2023-06-21T16:45:02.527" time="0.419" tests="1" skipped="0" failures="0" errors="1">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="72234978"></property>
+		<property name="dd_tags[perf.allocs]" value="1344089"></property>
+		<property name="dd_tags[perf.gctime]" value="0.003617292"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.354842791"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.060723833000000005"></property>
+		</properties>
+		<error message="Error during test at test/testfiles/_retry_tests.jl:26">Captured logs for test setup "StatefulSetup" (dependency of "Error, then Fail, then Pass") at test/testfiles/_retry_tests.jl:1 on worker 81105
+These are the test setup logs.
+Captured Logs for test item "Error, then Fail, then Pass" at test/testfiles/_retry_tests.jl:20 on worker 81105
+These are the logs for run number: 1
+Error in testset "Error, then Fail, then Pass" on worker 81105:
+Error During Test at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:26
+  Test threw exception
+  Expression: not_defined_err
+  UndefVarError: not_defined_err not defined
+  Stacktrace:
+   [1] macro expansion
+     @ /Applications/Julia-1.8.app/Contents/Resources/julia/share/julia/stdlib/v1.8/Test/src/Test.jl:464 [inlined]
+   [2] top-level scope
+     @ /repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:26
+		</error>
+	</testcase>
+	<testcase name="Error, then Fail, then Pass" timestamp="2023-06-21T16:45:03.015" time="0.008" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="217427"></property>
+		<property name="dd_tags[perf.allocs]" value="3710"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.005092666"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.0033679170000000007"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:28">Captured logs for test setup "StatefulSetup" (dependency of "Error, then Fail, then Pass") at test/testfiles/_retry_tests.jl:1 on worker 81105
+These are the test setup logs.
+Captured Logs for test item "Error, then Fail, then Pass" at test/testfiles/_retry_tests.jl:20 on worker 81105
+These are the logs for run number: 2
+Error in testset "Error, then Fail, then Pass" on worker 81105:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:28
+  Expression: 2 + 2 == 5
+   Evaluated: 4 == 5
+		</error>
+	</testcase>
+	<testcase name="Error, then Fail, then Pass" timestamp="2023-06-21T16:45:03.084" time="0.004" tests="1" skipped="0" failures="0" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="104078"></property>
+		<property name="dd_tags[perf.allocs]" value="1714"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="8.4e-8"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.003518082"></property>
+		</properties>
+	</testcase>
+	<testcase name="Has retries=4 and always fails" timestamp="2023-06-21T16:45:03.145" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76870"></property>
+		<property name="dd_tags[perf.allocs]" value="1176"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="2.5e-7"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.0022587500000000003"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:37">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 81105
+These are the test setup logs.
+No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:34 on worker 81105
+Error in testset "Has retries=4 and always fails" on worker 81105:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:37
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Has retries=4 and always fails" timestamp="2023-06-21T16:45:03.206" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76870"></property>
+		<property name="dd_tags[perf.allocs]" value="1176"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="8.4e-8"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.002122999"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:37">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 81105
+These are the test setup logs.
+No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:34 on worker 81105
+Error in testset "Has retries=4 and always fails" on worker 81105:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:37
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Has retries=4 and always fails" timestamp="2023-06-21T16:45:03.26" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76870"></property>
+		<property name="dd_tags[perf.allocs]" value="1176"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="8.3e-8"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.002095625"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:37">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 81105
+These are the test setup logs.
+No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:34 on worker 81105
+Error in testset "Has retries=4 and always fails" on worker 81105:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:37
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Has retries=4 and always fails" timestamp="2023-06-21T16:45:03.314" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76870"></property>
+		<property name="dd_tags[perf.allocs]" value="1176"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="1.66e-7"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.0021430840000000004"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:37">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 81105
+These are the test setup logs.
+No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:34 on worker 81105
+Error in testset "Has retries=4 and always fails" on worker 81105:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:37
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Has retries=4 and always fails" timestamp="2023-06-21T16:45:03.369" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76870"></property>
+		<property name="dd_tags[perf.allocs]" value="1176"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="4.1e-8"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.002065292"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:37">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=4 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 81105
+These are the test setup logs.
+No Captured Logs for test item "Has retries=4 and always fails" at test/testfiles/_retry_tests.jl:34 on worker 81105
+Error in testset "Has retries=4 and always fails" on worker 81105:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:37
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Has retries=1 and always fails" timestamp="2023-06-21T16:45:03.42" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76870"></property>
+		<property name="dd_tags[perf.allocs]" value="1176"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="4.2e-8"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.002043875"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:43">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=1 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 81105
+These are the test setup logs.
+No Captured Logs for test item "Has retries=1 and always fails" at test/testfiles/_retry_tests.jl:40 on worker 81105
+Error in testset "Has retries=1 and always fails" on worker 81105:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:43
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Has retries=1 and always fails" timestamp="2023-06-21T16:45:03.472" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76870"></property>
+		<property name="dd_tags[perf.allocs]" value="1176"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="8.4e-8"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.002056583"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:43">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=1 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 81105
+These are the test setup logs.
+No Captured Logs for test item "Has retries=1 and always fails" at test/testfiles/_retry_tests.jl:40 on worker 81105
+Error in testset "Has retries=1 and always fails" on worker 81105:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:43
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Has retries=1 and always fails" timestamp="2023-06-21T16:45:03.526" time="0.002" tests="1" skipped="0" failures="1" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="76870"></property>
+		<property name="dd_tags[perf.allocs]" value="1176"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="4.2e-8"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.002347583"></property>
+		</properties>
+		<error message="Test failed at test/testfiles/_retry_tests.jl:43">Captured logs for test setup "StatefulSetup" (dependency of "Has retries=1 and always fails") at test/testfiles/_retry_tests.jl:1 on worker 81105
+These are the test setup logs.
+No Captured Logs for test item "Has retries=1 and always fails" at test/testfiles/_retry_tests.jl:40 on worker 81105
+Error in testset "Has retries=1 and always fails" on worker 81105:
+Test Failed at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:43
+  Expression: false
+		</error>
+	</testcase>
+	<testcase name="Timeout always" timestamp="2023-06-21T16:45:08.784" time="0.303" tests="1" skipped="0" failures="0" errors="1">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="0"></property>
+		<property name="dd_tags[perf.allocs]" value="0"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.0"></property>
+		</properties>
+		<error message="Error during test at test/testfiles/_retry_tests.jl:54">Captured Logs for test item "Timeout always" at test/testfiles/_retry_tests.jl:54 on worker 81822
+
+signal (15): Terminated: 15
+in expression starting at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:60
+__psynch_cvwait at /usr/lib/system/libsystem_kernel.dylib (unknown line)
+unknown function (ip: 0x0)
+kevent at /usr/lib/system/libsystem_kernel.dylib (unknown line)
+unknown function (ip: 0x0)
+Allocations: 5231398 (Pool: 5229065; Big: 2333); GC: 30
+Error in testset "Timeout always" on worker 81822:
+Error During Test at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:54
+  Got exception outside of a @test
+  Timed out after 5s evaluating test item "Timeout always" (run=1)
+		</error>
+	</testcase>
+	<testcase name="Timeout always" timestamp="2023-06-21T16:45:14.846" time="0.0" tests="1" skipped="0" failures="0" errors="1">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="0"></property>
+		<property name="dd_tags[perf.allocs]" value="0"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.0"></property>
+		</properties>
+		<error message="Error during test at test/testfiles/_retry_tests.jl:54">Captured Logs for test item "Timeout always" at test/testfiles/_retry_tests.jl:54 on worker 81822
+
+signal (15): Terminated: 15
+in expression starting at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:60
+__psynch_cvwait at /usr/lib/system/libsystem_kernel.dylib (unknown line)
+unknown function (ip: 0x0)
+kevent at /usr/lib/system/libsystem_kernel.dylib (unknown line)
+unknown function (ip: 0x0)
+Allocations: 3068722 (Pool: 3067612; Big: 1110); GC: 3
+Error in testset "Timeout always" on worker 81822:
+Error During Test at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:54
+  Got exception outside of a @test
+  Timed out after 5s evaluating test item "Timeout always" (run=2)
+		</error>
+	</testcase>
+	<testcase name="Timeout always" timestamp="2023-06-21T16:45:20.618" time="0.0" tests="1" skipped="0" failures="0" errors="1">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="0"></property>
+		<property name="dd_tags[perf.allocs]" value="0"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.0"></property>
+		</properties>
+		<error message="Error during test at test/testfiles/_retry_tests.jl:54">Captured Logs for test item "Timeout always" at test/testfiles/_retry_tests.jl:54 on worker 81822
+
+signal (15): Terminated: 15
+in expression starting at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:60
+__psynch_cvwait at /usr/lib/system/libsystem_kernel.dylib (unknown line)
+unknown function (ip: 0x0)
+kevent at /usr/lib/system/libsystem_kernel.dylib (unknown line)
+unknown function (ip: 0x0)
+Allocations: 3068695 (Pool: 3067583; Big: 1112); GC: 3
+Error in testset "Timeout always" on worker 81822:
+Error During Test at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:54
+  Got exception outside of a @test
+  Timed out after 5s evaluating test item "Timeout always" (run=3)
+		</error>
+	</testcase>
+	<testcase name="Timeout first, pass after" timestamp="2023-06-21T16:45:26.413" time="0.0" tests="1" skipped="0" failures="0" errors="1">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="0"></property>
+		<property name="dd_tags[perf.allocs]" value="0"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.0"></property>
+		</properties>
+		<error message="Error during test at test/testfiles/_retry_tests.jl:64">Captured Logs for test item "Timeout first, pass after" at test/testfiles/_retry_tests.jl:64 on worker 81835
+
+signal (15): Terminated: 15
+in expression starting at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:71
+__psynch_cvwait at /usr/lib/system/libsystem_kernel.dylib (unknown line)
+unknown function (ip: 0x0)
+kevent at /usr/lib/system/libsystem_kernel.dylib (unknown line)
+unknown function (ip: 0x0)
+Allocations: 3061005 (Pool: 3059893; Big: 1112); GC: 3
+Error in testset "Timeout first, pass after" on worker 81835:
+Error During Test at /Users/nickr/repos/ReTestItems.jl/test/testfiles/_retry_tests.jl:64
+  Got exception outside of a @test
+  Timed out after 5s evaluating test item "Timeout first, pass after" (run=1)
+		</error>
+	</testcase>
+	<testcase name="Timeout first, pass after" timestamp="2023-06-21T16:45:27.43" time="0.06" tests="1" skipped="0" failures="0" errors="0">
+		<properties>
+		<property name="dd_tags[perf.bytes]" value="8002087"></property>
+		<property name="dd_tags[perf.allocs]" value="136513"></property>
+		<property name="dd_tags[perf.gctime]" value="0.0"></property>
+		<property name="dd_tags[perf.compile_time]" value="0.052138169"></property>
+		<property name="dd_tags[perf.recompile_time]" value="0.0"></property>
+		<property name="dd_tags[perf.eval_time]" value="0.003812705999999999"></property>
+		</properties>
+	</testcase>
+</testsuite>
+</testsuites>
\ No newline at end of file
diff --git a/test/testfiles/_retry_tests.jl b/test/testfiles/_retry_tests.jl
index 162de5f2..4a3d21a7 100644
--- a/test/testfiles/_retry_tests.jl
+++ b/test/testfiles/_retry_tests.jl
@@ -1,5 +1,7 @@
 @testsetup module StatefulSetup
     export NUM_RUNS_1, NUM_RUNS_2, NUM_RUNS_3, NUM_RUNS_4
+    export tmpdir
+    const tmpdir = mkpath(joinpath("/tmp", "JL_RETESTITEMS_TEST_TMPDIR"))
     const NUM_RUNS_1 = Ref{Int}(0)
     const NUM_RUNS_2 = Ref{Int}(0)
     const NUM_RUNS_3 = Ref{Int}(0)
@@ -9,7 +11,7 @@ end
 
 @testitem "Pass on second run" setup=[StatefulSetup] begin
     NUM_RUNS_1[] += 1
-    write(joinpath(tempdir(), "num_runs_1"), string(NUM_RUNS_1[]))
+    write(joinpath(tmpdir, "num_runs_1"), string(NUM_RUNS_1[]))
 
     x = NUM_RUNS_1[] == 2
     @test x
@@ -17,7 +19,7 @@ end
 
 @testitem "Error, then Fail, then Pass" setup=[StatefulSetup] begin
     NUM_RUNS_2[] += 1
-    write(joinpath(tempdir(), "num_runs_2"), string(NUM_RUNS_2[]))
+    write(joinpath(tmpdir, "num_runs_2"), string(NUM_RUNS_2[]))
     println("These are the logs for run number: ", NUM_RUNS_2[])
 
     if NUM_RUNS_2[] == 1
@@ -31,12 +33,43 @@ end
 
 @testitem "Has retries=4 and always fails" setup=[StatefulSetup] retries=4 begin
     NUM_RUNS_3[] += 1
-    write(joinpath(tempdir(), "num_runs_3"), string(NUM_RUNS_3[]))
+    write(joinpath(tmpdir, "num_runs_3"), string(NUM_RUNS_3[]))
     @test false
 end
 
 @testitem "Has retries=1 and always fails" setup=[StatefulSetup] retries=1 begin
     NUM_RUNS_4[] += 1
-    write(joinpath(tempdir(), "num_runs_4"), string(NUM_RUNS_4[]))
+    write(joinpath(tmpdir, "num_runs_4"), string(NUM_RUNS_4[]))
     @test false
 end
+
+
+# For these tests to timeout, must be run with `testitem_timeout < 20`
+# Cannot use `StatefulSetup` for testing timeouts, as it will be a new worker
+# every retry, so the `setup` will always have been re-evaluated anew.
+# Instead we write a new file for each run. We don't use `tempdir()` in case files written
+# there get cleaned up as soon as the worker dies.
+# We need to write a new file each time for our counting to be correct, so if the assertion
+# fails we need to switch to a more robust ways of creating unique filenames.
+@testitem "Timeout always" retries=1 begin
+    using Random
+    tmpdir = mkpath(joinpath("/tmp", "JL_RETESTITEMS_TEST_TMPDIR"))
+    filename = joinpath(tmpdir, "num_runs_5_" * randstring())
+    @assert !isfile(filename)
+    write(filename, "1")
+    sleep(20.0)
+    @test true
+end
+
+@testitem "Timeout first, pass after" retries=1 begin
+    using Random
+    tmpdir = mkpath(joinpath("/tmp", "JL_RETESTITEMS_TEST_TMPDIR"))
+    filename = joinpath(tmpdir, "num_runs_6_" * randstring())
+    @assert !isfile(filename)
+    is_first_run = !any(contains("num_runs_6"), readdir(tmpdir))
+    write(filename, "1")
+    if is_first_run
+        sleep(20.0)
+    end
+    @test true
+end