From 786b3f793e944de036ac5ceba1effca8f461571f Mon Sep 17 00:00:00 2001 From: Aleksey Kliger Date: Mon, 23 Oct 2023 11:41:03 -0400 Subject: [PATCH 1/6] [mono] Fix deadlock in static constructor initialization If two threads (A and B) need to call the static constructors for 3 classes X, Y, Z in this order: Thread A: X, Z, Y Thread B: Y, Z, X where the cctor for X in thread A invokes the cctor for Z and for Y, and the cctor for Y in thread B invokes the cctor for Z and X, then we can get into a situation where A and B both start the cctors for X and Y (so they will be in the type_initialization_hash for those two classes) and then both will try to init Z. In that case it could be that A will be responsible for initializing Z and B will block. Then A could finish initializing Z but B may not have woken up yet (and so it will be in blocked_thread_hash waiting for Z). At that point A (who is at this point already need to init Y) may think that it can wait for B to finish initializing Y. That is we get to `mono_coop_cond_timedwait (&lock->cond, &lock->mutex, timeout_ms)` with "lock" being the lock for `Y`. But in fact thread B will not be able to complete initializing Y because it will attempt to init X next - but meanwhile we did not indicate that thread A is blocked. As a result in thread A the timed wait will eventually timeout. And in this case we need to go back to the top and now correctly detect that A is waiting for Y and B is waiting for X. (At that point there is a cctor deadlock and ECMA rules allow one of the threads to return without calling the cctor) The old code here used to do an infinite wait: while (!lock->done) mono_coop_cond_wait (&lock->cond, &lock->mutex) This cannot succeed because "lock" (in thread A it's the lock for Y) will not be signaled since B (who is supposed to init Y) will instead block on the cctor for X. Fixes https://github.com/dotnet/runtime/issues/93778 --- src/mono/mono/metadata/object.c | 53 +++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/src/mono/mono/metadata/object.c b/src/mono/mono/metadata/object.c index 0b79c0d713b67..37de76825b82e 100644 --- a/src/mono/mono/metadata/object.c +++ b/src/mono/mono/metadata/object.c @@ -458,6 +458,7 @@ mono_runtime_class_init_full (MonoVTable *vtable, MonoError *error) * on this cond var. */ +retry_top: mono_type_initialization_lock (); /* double check... */ if (vtable->initialized) { @@ -507,10 +508,18 @@ mono_runtime_class_init_full (MonoVTable *vtable, MonoError *error) blocked = GUINT_TO_POINTER (MONO_NATIVE_THREAD_ID_TO_UINT (lock->initializing_tid)); while ((pending_lock = (TypeInitializationLock*) g_hash_table_lookup (blocked_thread_hash, blocked))) { if (mono_native_thread_id_equals (pending_lock->initializing_tid, tid)) { + if (mono_trace_is_traced (G_LOG_LEVEL_DEBUG, MONO_TRACE_TYPE) && strstr (m_class_get_image(klass)->name, "HelloWorld") != NULL) { + char* type_name = mono_type_full_name (m_class_get_byval_arg (klass)); + mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_TYPE, "Detected deadlock for class .cctor for %s from '%s'", type_name, m_class_get_image (klass)->name); + g_free (type_name); + } + if (!pending_lock->done) { mono_type_initialization_unlock (); goto return_true; } else { + mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_TYPE, "special case"); + /* the thread doing the initialization is blocked on this thread, but on a lock that has already been freed. It just hasn't got time to awake */ @@ -605,9 +614,49 @@ mono_runtime_class_init_full (MonoVTable *vtable, MonoError *error) } else { /* this just blocks until the initializing thread is done */ mono_type_init_lock (lock); - while (!lock->done) - mono_coop_cond_wait (&lock->cond, &lock->mutex); + if (!lock->done) { + int timeout_ms = 500; + int wait_result = mono_coop_cond_timedwait (&lock->cond, &lock->mutex, timeout_ms); + if (wait_result == -1) { + /* timed out - go around again from the beginning. If we got here + * from the "is_blocked = FALSE" case, above (another thread was + * blocked on the current thread, but on a lock that was already + * done but it didn't get to wake up yet), then it might still be + * the case that the current thread cannot proceed even if the other + * thread got to wake up - there might be a new deadlock. We need + * to re-evaluate. + * + * This can happen if two threads A and B need to call the cctors + * for classes X and Y but in opposite orders, and also call a cctor + * for a third class Z. (That is thread A wants to init: X, Z, Y; + * thread B wants to init: Y, Z, X.) In that case, if B is waiting + * for A to finish initializing Z, and A (the current thread ) + * already finished Z and wants to init Y. In A, control will come + * here with "lock" being Y's lock. But we will time out because B + * will see that A is responsible for initializing X and will also + * block. So A is waiting for B to finish Y and B is waiting for A + * to finish X. So the fact that A allowed B to wait for Z to + * finish didn't actually let us make progress. Thread A must go + * around to the top once more and try to init Y - and detect that + * there is now a deadlock between X and Y. + */ + mono_type_init_unlock (lock); + // clean up blocked thread hash and lock refcount. + mono_type_initialization_lock (); + g_hash_table_remove (blocked_thread_hash, GUINT_TO_POINTER (tid)); + gboolean deleted = unref_type_lock (lock); + if (deleted) + g_hash_table_remove (type_initialization_hash, vtable); + mono_type_initialization_unlock (); + goto retry_top; + } else if (wait_result == 0) { + /* success.. we were signaled that the other thread is done. Proceed */ + } else { + g_error ("unexpected return value %d from mono_coop_cond_timedwait", wait_result); + } + } mono_type_init_unlock (lock); + g_assert (lock->done); } /* Do cleanup and setting vtable->initialized inside the global lock again */ From 6fc9aa6e2555a27c3fb8c5fc9319c904ec98907c Mon Sep 17 00:00:00 2001 From: Aleksey Kliger Date: Mon, 23 Oct 2023 15:04:22 -0400 Subject: [PATCH 2/6] Add test case --- .../CircularCctorTwoThreadsThreeCC.cs | 182 ++++++++++++++++++ .../CircularCctorTwoThreadsThreeCC.csproj | 9 + 2 files changed, 191 insertions(+) create mode 100644 src/tests/Loader/classloader/TypeInitialization/CircularCctors/CircularCctorTwoThreadsThreeCC.cs create mode 100644 src/tests/Loader/classloader/TypeInitialization/CircularCctors/CircularCctorTwoThreadsThreeCC.csproj diff --git a/src/tests/Loader/classloader/TypeInitialization/CircularCctors/CircularCctorTwoThreadsThreeCC.cs b/src/tests/Loader/classloader/TypeInitialization/CircularCctors/CircularCctorTwoThreadsThreeCC.cs new file mode 100644 index 0000000000000..cd5b45ca95527 --- /dev/null +++ b/src/tests/Loader/classloader/TypeInitialization/CircularCctors/CircularCctorTwoThreadsThreeCC.cs @@ -0,0 +1,182 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Threading; + +using Xunit; + +// Regression test for https://github.com/dotnet/runtime/issues/93778 +namespace CircularCctorTwoThreadsThreeCC; + +[Flags] +public enum SlotConstants : int +{ + ZInited = 1, + YInitedFromX = 2, + XInitedFromY = 4, + + Thread1 = 1 << 16, + Thread2 = 2 << 16, +} + +/// X and Y both try to use each other, and also both use Z. +/// We expect to see exactly one thread initialize Z and +/// either X inited from Y or Y inited from X. +public class X +{ + public static X Singleton = new(); + private X() { + Z.Singleton.Ping(); + Y.Singleton?.Pong(); + } + + public void Pong() => Coordinator.Note(SlotConstants.XInitedFromY); +} + +public class Y +{ + public static Y Singleton = new(); + private Y() { + Z.Singleton.Ping(); + X.Singleton?.Pong(); + } + + public void Pong() => Coordinator.Note(SlotConstants.YInitedFromX); +} + +public class Z +{ + public static Z Singleton = new(); + + private Z() { + Coordinator.Note(SlotConstants.ZInited); + } + + public void Ping() { } + +} + +public class Coordinator +{ + [ThreadStatic] + private static SlotConstants t_threadTag; + + private static int s_NextNote; + private static readonly SlotConstants[] Notes = new SlotConstants[12]; + + private static SlotConstants DecorateWithThread(SlotConstants c) + { + return c | t_threadTag; + } + + public static void Note(SlotConstants s) { + int idx = Interlocked.Increment(ref s_NextNote); + idx--; + Notes[idx] = DecorateWithThread (s); + } + + public static Coordinator CreateThread(bool xThenY, SlotConstants threadTag) + { + return new Coordinator(xThenY, threadTag); + } + + public readonly Thread Thread; + private static readonly Barrier s_barrier = new (3); + + private Coordinator(bool xThenY, SlotConstants threadTag) + { + var t = new Thread(() => { + t_threadTag = threadTag; + // Log("started"); + NextPhase(); + // Log("racing"); + DoConstructions(xThenY); + NextPhase(); + // Log("done"); + }); + Thread = t; + t.Start(); + } + + public static void NextPhase() { s_barrier.SignalAndWait(); } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void DoConstructions(bool xThenY) + { + if (xThenY) { + XCreate(); + } else { + YCreate(); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void XCreate() + { + var _ = X.Singleton; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void YCreate() + { + var _ = Y.Singleton; + } + + public static void Log(string msg) + { + Console.WriteLine ($"{Thread.CurrentThread.ManagedThreadId}: {msg}"); + } + + [Fact] + public static void RunTestCase() + { + var c1 = CreateThread(xThenY: true, threadTag: SlotConstants.Thread1); + var c2 = CreateThread(xThenY: false, threadTag: SlotConstants.Thread2); + // created all threads + NextPhase(); + // racing + NextPhase(); + // done + + // one second should be plenty for all these threads, but it's arbitrary + int threadJoinTimeoutMS = 1000; + var j1 = c1.Thread.Join(threadJoinTimeoutMS); + var j2 = c2.Thread.Join(threadJoinTimeoutMS); + Assert.True(j1); + Assert.True(j2); + // all joined + + // exactly one thread inited Z + Assert.Equal(1, CountNotes(SlotConstants.ZInited)); + // either X was inited or Y, not both. + Assert.Equal(1, Count2Notes(SlotConstants.XInitedFromY, SlotConstants.YInitedFromX)); + } + + private static int CountNotes(SlotConstants mask) + { + int found = 0; + foreach (var note in Notes) { + if ((note & mask) != (SlotConstants)0) { + found++; + } + } + return found; + } + + private static int Count2Notes(SlotConstants mask1, SlotConstants mask2) + { + int found = 0; + foreach (var note in Notes) { + if ((note & mask1) != (SlotConstants)0) { + found++; + } + if ((note & mask2) != (SlotConstants)0) { + found++; + } + } + return found; + } + +} diff --git a/src/tests/Loader/classloader/TypeInitialization/CircularCctors/CircularCctorTwoThreadsThreeCC.csproj b/src/tests/Loader/classloader/TypeInitialization/CircularCctors/CircularCctorTwoThreadsThreeCC.csproj new file mode 100644 index 0000000000000..c9936332a8676 --- /dev/null +++ b/src/tests/Loader/classloader/TypeInitialization/CircularCctors/CircularCctorTwoThreadsThreeCC.csproj @@ -0,0 +1,9 @@ + + + true + 0 + + + + + From 1b9276b838e73840e11b54d98911f731cc7dd693 Mon Sep 17 00:00:00 2001 From: Aleksey Kliger Date: Mon, 23 Oct 2023 15:06:20 -0400 Subject: [PATCH 3/6] remove prototyping log messages --- src/mono/mono/metadata/object.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/mono/mono/metadata/object.c b/src/mono/mono/metadata/object.c index 37de76825b82e..98fd05173c39a 100644 --- a/src/mono/mono/metadata/object.c +++ b/src/mono/mono/metadata/object.c @@ -508,7 +508,7 @@ mono_runtime_class_init_full (MonoVTable *vtable, MonoError *error) blocked = GUINT_TO_POINTER (MONO_NATIVE_THREAD_ID_TO_UINT (lock->initializing_tid)); while ((pending_lock = (TypeInitializationLock*) g_hash_table_lookup (blocked_thread_hash, blocked))) { if (mono_native_thread_id_equals (pending_lock->initializing_tid, tid)) { - if (mono_trace_is_traced (G_LOG_LEVEL_DEBUG, MONO_TRACE_TYPE) && strstr (m_class_get_image(klass)->name, "HelloWorld") != NULL) { + if (mono_trace_is_traced (G_LOG_LEVEL_DEBUG, MONO_TRACE_TYPE)) { char* type_name = mono_type_full_name (m_class_get_byval_arg (klass)); mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_TYPE, "Detected deadlock for class .cctor for %s from '%s'", type_name, m_class_get_image (klass)->name); g_free (type_name); @@ -518,8 +518,6 @@ mono_runtime_class_init_full (MonoVTable *vtable, MonoError *error) mono_type_initialization_unlock (); goto return_true; } else { - mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_TYPE, "special case"); - /* the thread doing the initialization is blocked on this thread, but on a lock that has already been freed. It just hasn't got time to awake */ From fed88d231b98e3cb36d5ae88a7ada2aeafd90bc1 Mon Sep 17 00:00:00 2001 From: Aleksey Kliger Date: Tue, 24 Oct 2023 09:41:06 -0400 Subject: [PATCH 4/6] disable mt test on wasm --- src/tests/issues.targets | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tests/issues.targets b/src/tests/issues.targets index 5c9868e334b08..ab75e76075a03 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -2991,6 +2991,9 @@ https://github.com/dotnet/runtime/issues/41472 + + https://github.com/dotnet/runtime/issues/41472 + https://github.com/dotnet/runtime/issues/41472 From 9e9ecff9489ec4e6551dad8077d626532e05d25e Mon Sep 17 00:00:00 2001 From: Aleksey Kliger Date: Tue, 24 Oct 2023 13:16:59 -0400 Subject: [PATCH 5/6] better issues.target exclusion --- src/tests/issues.targets | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tests/issues.targets b/src/tests/issues.targets index ab75e76075a03..537655fa553ce 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -2991,9 +2991,6 @@ https://github.com/dotnet/runtime/issues/41472 - - https://github.com/dotnet/runtime/issues/41472 - https://github.com/dotnet/runtime/issues/41472 @@ -3262,6 +3259,9 @@ System.Threading.Thread.UnsafeStart not supported + + System.Threading.Thread.ThrowIfNoThreadStart: PlatformNotSupportedException + Could not load legacy Microsoft.Diagnostics.Tools.RuntimeClient From 99b4d75c0d89faccb223a3f91baf528e60c31e29 Mon Sep 17 00:00:00 2001 From: Aleksey Kliger Date: Wed, 25 Oct 2023 08:51:42 -0400 Subject: [PATCH 6/6] code review feedback --- src/mono/mono/metadata/object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mono/mono/metadata/object.c b/src/mono/mono/metadata/object.c index 98fd05173c39a..f9f0ebfe3ad0f 100644 --- a/src/mono/mono/metadata/object.c +++ b/src/mono/mono/metadata/object.c @@ -648,9 +648,9 @@ mono_runtime_class_init_full (MonoVTable *vtable, MonoError *error) mono_type_initialization_unlock (); goto retry_top; } else if (wait_result == 0) { - /* success.. we were signaled that the other thread is done. Proceed */ + /* Success: we were signaled that the other thread is done. Proceed */ } else { - g_error ("unexpected return value %d from mono_coop_cond_timedwait", wait_result); + g_assert_not_reached (); } } mono_type_init_unlock (lock);