From fd604713e8f3ed336e96f386ca3c400a42d9df9c Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 9 Dec 2020 17:36:49 -0500 Subject: [PATCH] Try to close race condition in FileWatching tests (#38407) * Try to close race condition in FreeBSD tests We're seeing frequent test failures in the FileWatching test on FreeBSD. Here's my theory of what happens: - Both the timer callback and the poll callback execute on the same libuv loop - They each schedule their respective tasks - Whichever task gets scheduled first first determines the result However, in the test, we expect that, if the poll callback ran, (which we know because we know there was an event pending), then that result does actually get delivered to the toplevel task. This PR tries to close this hole by adding the following condition: If the task is no longer waiting on the file watcher (because libuv already scheduled it), then wait for the task to run to completion, independent of any timeout. I believe this should close the above race condition and hopefully fix the test. * Add another super-short timeout to try to trigger the same-tick issue (cherry picked from commit 9a8a675819892a6ec6ff58a6e3b6417673619719) --- stdlib/FileWatching/src/FileWatching.jl | 18 ++++++++++++++---- stdlib/FileWatching/test/runtests.jl | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl index 1c8a684310fec..d929d1ebfb98d 100644 --- a/stdlib/FileWatching/src/FileWatching.jl +++ b/stdlib/FileWatching/src/FileWatching.jl @@ -254,6 +254,10 @@ mutable struct _FDWatcher end end +function iswaiting(fwd::_FDWatcher, t::Task) + return fwd.notify.waitq === t.queue +end + mutable struct FDWatcher watcher::_FDWatcher readable::Bool @@ -653,10 +657,10 @@ function poll_fd(s::Union{RawFD, Sys.iswindows() ? WindowsRawSocket : Union{}}, try if timeout_s >= 0 result::FDEvent = FDEvent() - timer = Timer(timeout_s) do t - notify(wt) - end - @async begin + t = @async begin + timer = Timer(timeout_s) do t + notify(wt) + end try result = wait(fdw, readable=readable, writable=writable) catch e @@ -666,6 +670,12 @@ function poll_fd(s::Union{RawFD, Sys.iswindows() ? WindowsRawSocket : Union{}}, notify(wt) end wait(wt) + # It's possible that both the timer and the poll fired on the same + # libuv loop. In that case, which event we see here first depends + # on task schedule order. If we can see that the task isn't waiting + # on the file watcher anymore, just let it finish so we can see + # the modification to `result` + iswaiting(fdw, t) || wait(t) return result else return wait(fdw, readable=readable, writable=writable) diff --git a/stdlib/FileWatching/test/runtests.jl b/stdlib/FileWatching/test/runtests.jl index deee66cf52997..345ffce07482f 100644 --- a/stdlib/FileWatching/test/runtests.jl +++ b/stdlib/FileWatching/test/runtests.jl @@ -12,7 +12,7 @@ using Base: uv_error, Experimental # Writable ends are always tested for write-ability before a write n = 20 -intvls = [2, .2, .1, .005] +intvls = [2, .2, .1, .005, .00001] pipe_fds = fill((Base.INVALID_OS_HANDLE, Base.INVALID_OS_HANDLE), n) for i in 1:n