From effaf28d0ca74ab9ae11fc947f524393e545af0d Mon Sep 17 00:00:00 2001 From: monojenkins Date: Sat, 17 Oct 2020 08:04:40 -0400 Subject: [PATCH] [threads] At shutdown, don't wait for native threads that aren't calling Mono (#43174) * [test] Call P/Invoke callback delegates from foreign threads Change post-detach-1.cs to also have versions that call reverse pinvokes from foreign threads that were not attached to mono. The foreign threads should not prevent GC and should not prevent Mono from shutting down using mono_manage_internal. * [threads] Don't wait for native threads that aren't calling Mono If a thread is started from native code, and it interacts with the runtime (by calling a thunk that invokes a managed method), the runtime will attach the thread - it will create a `MonoInternalThread` and add it to the list of threads hash (in threads.c). If the thread returns from the managed call, it will still be recorded by the runtime, but as long as it is not running managed code anymore, it will prevent shutdown. The problem is when we try to suspend the thread in order to abort it, `mono_thread_state_init_from_handle` will see a NULL domain (because `mono_threads_detach_coop_internal` will restore it to NULL when a managed method returns back to native code). (On systems using POSIX signals to suspend, the same check is in `mono_thread_state_init_from_sigctx`). As a result, `mono_threads_suspend_begin_async_suspend` (or `suspend_signal_handler` on POSIX) will set `suspend_can_continue` to FALSE, and `mono_thread_info_safe_suspend_and_run` will not run the suspend callback. As a result, when `mono_manage_internal` calls `abort_threads`, it will add the thread handle to the wait list, but it will not actually request the thread to abort. As a result, after `abort_threads` returns, the subsequent call to `wait_for_tids` will block until the native thread terminates (at which point the TLS destructor will notify the thread handle and wait_for_tids will unblock). This commit changes the behavior of `abort_threads` to ignore threads that do not run `async_suspend_critical` and not to add them to the wait list. As a result, if a native thread calls into managed and then returns to native code, the runtime will not wait for it. * [threads] Warn if mono_thread_manage_internal can't abort a thread Give a hint to embedders to aid debugging * rename AbortThreadData:thread_will_abort field It's used to keep track of whether the thread will eventually throw a TAE (and thus that we need to wait for it). The issue is that under full coop suspend, we treat threads in GC Safe (BLOCKING) state as if they're suspended and always execute async_abort_critical. So the field has nothing to do with whether the thread was suscessfully suspended, but rather whether it will (eventually) be aborted. * [threads] Fix async_abort_critical for full coop If the foreign external thread doesn't have any managed methods on its callstack, but it once called a native-to-managed wrapper, it will be left by mono_threads_detach_coop in GC Safe (BLOCKING) state. But under full coop, GC Safe state is considered suspended, so mono_thread_info_safe_suspend_and_run will run async_abort_critical for the thread. But the thread may never call into Mono again, in which case it will never safepoint and aknowledge the abort interruption. So set thread_will_abort to FALSE in this case, so that mono_thread_manage_internal won't try to wait for it. --- Related to an issue first identified in https://github.com/mono/mono/pull/18517 --- This supersedes mono/mono#18656 Co-authored-by: lambdageek --- src/mono/mono/metadata/threads-types.h | 3 +- src/mono/mono/metadata/threads.c | 52 +++++++++++++++---- src/mono/mono/tests/libtest.c | 69 ++++++++++++++++++------- src/mono/mono/tests/pinvoke-detach-1.cs | 34 ++++++++++-- 4 files changed, 123 insertions(+), 35 deletions(-) diff --git a/src/mono/mono/metadata/threads-types.h b/src/mono/mono/metadata/threads-types.h index ab72d65a0de83..f78570ccec8fe 100644 --- a/src/mono/mono/metadata/threads-types.h +++ b/src/mono/mono/metadata/threads-types.h @@ -294,7 +294,8 @@ MONO_PROFILER_API MonoInternalThread *mono_thread_internal_current (void); MonoInternalThreadHandle mono_thread_internal_current_handle (void); -void mono_thread_internal_abort (MonoInternalThread *thread, gboolean appdomain_unload); +gboolean +mono_thread_internal_abort (MonoInternalThread *thread, gboolean appdomain_unload); void mono_thread_internal_suspend_for_shutdown (MonoInternalThread *thread); gboolean mono_thread_internal_has_appdomain_ref (MonoInternalThread *thread, MonoDomain *domain); diff --git a/src/mono/mono/metadata/threads.c b/src/mono/mono/metadata/threads.c index 362cd30227767..fe3de4d5ccc36 100644 --- a/src/mono/mono/metadata/threads.c +++ b/src/mono/mono/metadata/threads.c @@ -212,7 +212,7 @@ static void mono_free_static_data (gpointer* static_data); static void mono_init_static_data_info (StaticDataInfo *static_data); static guint32 mono_alloc_static_data_slot (StaticDataInfo *static_data, guint32 size, guint32 align); static gboolean mono_thread_resume (MonoInternalThread* thread); -static void async_abort_internal (MonoInternalThread *thread, gboolean install_async_abort); +static gboolean async_abort_internal (MonoInternalThread *thread, gboolean install_async_abort); static void self_abort_internal (MonoError *error); static void async_suspend_internal (MonoInternalThread *thread, gboolean interrupt); static void self_suspend_internal (void); @@ -1866,7 +1866,7 @@ ves_icall_System_Threading_Thread_Thread_internal (MonoThreadObjectHandle thread internal->state &= ~ThreadState_Unstarted; - THREAD_DEBUG (g_message ("%s: Started thread ID %" G_GSIZE_FORMAT " (handle %p)", __func__, tid, thread)); + THREAD_DEBUG (g_message ("%s: Started thread ID %" G_GSIZE_FORMAT " (handle %p)", __func__, (gsize)internal->tid, internal)); UNLOCK_THREAD (internal); return TRUE; @@ -2898,15 +2898,16 @@ ves_icall_System_Threading_Thread_Abort (MonoInternalThreadHandle thread_handle, * mono_thread_internal_abort: * Request thread \p thread to be aborted. * \p thread MUST NOT be the current thread. + * \returns true if the request was successful */ -void +gboolean mono_thread_internal_abort (MonoInternalThread *thread, gboolean appdomain_unload) { g_assert (thread != mono_thread_internal_current ()); if (!request_thread_abort (thread, NULL, appdomain_unload)) - return; - async_abort_internal (thread, TRUE); + return FALSE; + return async_abort_internal (thread, TRUE); } #ifndef ENABLE_NETCORE @@ -3776,12 +3777,20 @@ abort_threads (gpointer key, gpointer value, gpointer user) if ((thread->flags & MONO_THREAD_FLAG_DONT_MANAGE)) return; - wait->handles[wait->num] = mono_threads_open_thread_handle (thread->handle); - wait->threads[wait->num] = thread; - wait->num++; - + MonoThreadHandle *handle = mono_threads_open_thread_handle (thread->handle); THREAD_DEBUG (g_print ("%s: Aborting id: %" G_GSIZE_FORMAT "\n", __func__, (gsize)thread->tid)); - mono_thread_internal_abort (thread, FALSE); + if (!mono_thread_internal_abort (thread, FALSE)) { + g_warning ("%s: Failed aborting id: %p, mono_thread_manage will ignore it\n", __func__, (void*)(intptr_t)(gsize)thread->tid); + /* close the handle, we're not going to wait for the thread to be aborted */ + mono_threads_close_thread_handle (handle); + } else { + /* commit to waiting for the thread to be aborted */ + wait->handles[wait->num] = handle; + wait->threads[wait->num] = thread; + wait->num++; + } + + } /** @@ -5615,8 +5624,11 @@ mono_thread_info_get_last_managed (MonoThreadInfo *info) } typedef struct { + /* inputs */ MonoInternalThread *thread; gboolean install_async_abort; + /* outputs */ + gboolean thread_will_abort; MonoThreadInfoInterruptToken *interrupt_token; } AbortThreadData; @@ -5629,6 +5641,8 @@ async_abort_critical (MonoThreadInfo *info, gpointer ud) gboolean protected_wrapper; gboolean running_managed; + data->thread_will_abort = TRUE; + if (mono_get_eh_callbacks ()->mono_install_handler_block_guard (mono_thread_info_get_suspend_state (info))) return MonoResumeThread; @@ -5656,11 +5670,20 @@ async_abort_critical (MonoThreadInfo *info, gpointer ud) */ data->interrupt_token = mono_thread_info_prepare_interrupt (info); + if (!ji && !info->runtime_thread) { + /* Under full cooperative suspend, if a thread is in GC Safe mode (blocking + * state), mono_thread_info_safe_suspend_and_run will treat the thread as + * suspended and run async_abort_critical. If the thread has no managed + * frames, it is some native thread that may not call Mono again anymore, so + * don't wait for it to abort. + */ + data->thread_will_abort = FALSE; + } return MonoResumeThread; } } -static void +static gboolean async_abort_internal (MonoInternalThread *thread, gboolean install_async_abort) { AbortThreadData data; @@ -5668,6 +5691,7 @@ async_abort_internal (MonoInternalThread *thread, gboolean install_async_abort) g_assert (thread != mono_thread_internal_current ()); data.thread = thread; + data.thread_will_abort = FALSE; data.install_async_abort = install_async_abort; data.interrupt_token = NULL; @@ -5675,6 +5699,12 @@ async_abort_internal (MonoInternalThread *thread, gboolean install_async_abort) if (data.interrupt_token) mono_thread_info_finish_interrupt (data.interrupt_token); /*FIXME we need to wait for interruption to complete -- figure out how much into interruption we should wait for here*/ + + /* If the thread was not suspended (async_abort_critical did not run), or the thread is + * "suspended" (BLOCKING) under full coop, and the thread was running native code without + * any managed callers, we can't be sure that it will aknowledge the abort request and + * actually abort. */ + return data.thread_will_abort; } static void diff --git a/src/mono/mono/tests/libtest.c b/src/mono/mono/tests/libtest.c index c2e3639f1418e..c9457a25020a3 100644 --- a/src/mono/mono/tests/libtest.c +++ b/src/mono/mono/tests/libtest.c @@ -8375,19 +8375,39 @@ invoke_foreign_thread (void* user_data) destroy_invoke_names (names); return NULL; } + +static void* +invoke_foreign_delegate (void *user_data) +{ + VoidVoidCallback del = (VoidVoidCallback)user_data; + for (int i = 0; i < 5; ++i) { + del (); + sleep (2); + } + return NULL; +} + #endif LIBTEST_API mono_bool STDCALL -mono_test_attach_invoke_foreign_thread (const char *assm_name, const char *name_space, const char *name, const char *meth_name) +mono_test_attach_invoke_foreign_thread (const char *assm_name, const char *name_space, const char *name, const char *meth_name, VoidVoidCallback del) { #ifndef HOST_WIN32 - struct invoke_names *names = make_invoke_names (assm_name, name_space, name, meth_name); - pthread_t t; - int res = pthread_create (&t, NULL, invoke_foreign_thread, (void*)names); - g_assert (res == 0); - pthread_join (t, NULL); - return 0; + if (!del) { + struct invoke_names *names = make_invoke_names (assm_name, name_space, name, meth_name); + pthread_t t; + int res = pthread_create (&t, NULL, invoke_foreign_thread, (void*)names); + g_assert (res == 0); + pthread_join (t, NULL); + return 0; + } else { + pthread_t t; + int res = pthread_create (&t, NULL, invoke_foreign_delegate, del); + g_assert (res == 0); + pthread_join (t, NULL); + return 0; + } #else // TODO: Win32 version of this test return 1; @@ -8396,6 +8416,8 @@ mono_test_attach_invoke_foreign_thread (const char *assm_name, const char *name_ #ifndef HOST_WIN32 struct names_and_mutex { + /* if del is NULL, use names, otherwise just call del */ + VoidVoidCallback del; struct invoke_names *names; /* mutex to coordinate test and foreign thread */ pthread_mutex_t coord_mutex; @@ -8410,7 +8432,11 @@ invoke_block_foreign_thread (void *user_data) // This thread calls into the runtime and then blocks. It should not // prevent the runtime from shutting down. struct names_and_mutex *nm = (struct names_and_mutex *)user_data; - test_invoke_by_name (nm->names); + if (!nm->del) { + test_invoke_by_name (nm->names); + } else { + nm->del (); + } pthread_mutex_lock (&nm->coord_mutex); /* signal the test thread that we called the runtime */ pthread_cond_signal (&nm->coord_cond); @@ -8422,26 +8448,31 @@ invoke_block_foreign_thread (void *user_data) #endif LIBTEST_API mono_bool STDCALL -mono_test_attach_invoke_block_foreign_thread (const char *assm_name, const char *name_space, const char *name, const char *meth_name) +mono_test_attach_invoke_block_foreign_thread (const char *assm_name, const char *name_space, const char *name, const char *meth_name, VoidVoidCallback del) { #ifndef HOST_WIN32 - struct invoke_names *names = make_invoke_names (assm_name, name_space, name, meth_name); struct names_and_mutex *nm = malloc (sizeof (struct names_and_mutex)); - nm->names = names; - pthread_mutex_init (&nm->coord_mutex, NULL); - pthread_cond_init (&nm->coord_cond, NULL); + nm->del = del; + if (!del) { + struct invoke_names *names = make_invoke_names (assm_name, name_space, name, meth_name); + nm->names = names; + } else { + nm->names = NULL; + } + pthread_mutex_init (&nm->coord_mutex, NULL); + pthread_cond_init (&nm->coord_cond, NULL); pthread_mutex_init (&nm->deadlock_mutex, NULL); pthread_mutex_lock (&nm->deadlock_mutex); // lock the mutex and never unlock it. pthread_t t; int res = pthread_create (&t, NULL, invoke_block_foreign_thread, (void*)nm); g_assert (res == 0); - /* wait for the foreign thread to finish calling the runtime before - * detaching it and returning - */ - pthread_mutex_lock (&nm->coord_mutex); - pthread_cond_wait (&nm->coord_cond, &nm->coord_mutex); - pthread_mutex_unlock (&nm->coord_mutex); + /* wait for the foreign thread to finish calling the runtime before + * detaching it and returning + */ + pthread_mutex_lock (&nm->coord_mutex); + pthread_cond_wait (&nm->coord_cond, &nm->coord_mutex); + pthread_mutex_unlock (&nm->coord_mutex); pthread_detach (t); return 0; #else diff --git a/src/mono/mono/tests/pinvoke-detach-1.cs b/src/mono/mono/tests/pinvoke-detach-1.cs index 5b475fd536014..78ef5a24250cf 100644 --- a/src/mono/mono/tests/pinvoke-detach-1.cs +++ b/src/mono/mono/tests/pinvoke-detach-1.cs @@ -28,29 +28,55 @@ private static void MethodInvokedFromNative () } [DllImport ("libtest", EntryPoint="mono_test_attach_invoke_foreign_thread")] - public static extern bool mono_test_attach_invoke_foreign_thread (string assm_name, string name_space, string class_name, string method_name); + public static extern bool mono_test_attach_invoke_foreign_thread (string assm_name, string name_space, string class_name, string method_name, VoidVoidDelegate del); public static int test_0_attach_invoke_foreign_thread () { was_called = 0; - bool skipped = mono_test_attach_invoke_foreign_thread (typeof (Tests).Assembly.Location, "", "Tests", "MethodInvokedFromNative"); + bool skipped = mono_test_attach_invoke_foreign_thread (typeof (Tests).Assembly.Location, "", "Tests", "MethodInvokedFromNative", null); + GC.Collect (); // should not hang waiting for the foreign thread return skipped || was_called == 5 ? 0 : 1; } + static int was_called_del; + + [MonoPInvokeCallback (typeof (VoidVoidDelegate))] + private static void MethodInvokedFromNative_del () + { + was_called_del++; + } + + public static int test_0_attach_invoke_foreign_thread_delegate () + { + var del = new VoidVoidDelegate (MethodInvokedFromNative_del); + was_called_del = 0; + bool skipped = mono_test_attach_invoke_foreign_thread (null, null, null, null, del); + GC.Collect (); // should not hang waiting for the foreign thread + return skipped || was_called_del == 5 ? 0 : 1; + } + [MonoPInvokeCallback (typeof (VoidVoidDelegate))] private static void MethodInvokedFromNative2 () { } [DllImport ("libtest", EntryPoint="mono_test_attach_invoke_block_foreign_thread")] - public static extern bool mono_test_attach_invoke_block_foreign_thread (string assm_name, string name_space, string class_name, string method_name); + public static extern bool mono_test_attach_invoke_block_foreign_thread (string assm_name, string name_space, string class_name, string method_name, VoidVoidDelegate del); public static int test_0_attach_invoke_block_foreign_thread () { - bool skipped = mono_test_attach_invoke_block_foreign_thread (typeof (Tests).Assembly.Location, "", "Tests", "MethodInvokedFromNative2"); + bool skipped = mono_test_attach_invoke_block_foreign_thread (typeof (Tests).Assembly.Location, "", "Tests", "MethodInvokedFromNative2", null); GC.Collect (); // should not hang waiting for the foreign thread return 0; // really we succeed if the app can shut down without hanging } + // This one fails because we haven't fully detached, so shutdown is waiting for the thread + public static int test_0_attach_invoke_block_foreign_thread_delegate () + { + var del = new VoidVoidDelegate (MethodInvokedFromNative2); + bool skipped = mono_test_attach_invoke_block_foreign_thread (null, null, null, null, del); + GC.Collect (); // should not hang waiting for the foreign thread + return 0; // really we succeed if the app can shut down without hanging + } }