Skip to content

Commit

Permalink
Fix deadloack in dlopen. NFC (#18487)
Browse files Browse the repository at this point in the history
When within dlopen itself we hold an exclusive lock so we need to
disable the automatic code synchronization during that time.

Split out from #18376
  • Loading branch information
sbc100 authored Jan 12, 2023
1 parent 207848f commit 888f452
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 58 deletions.
150 changes: 92 additions & 58 deletions system/lib/libc/dynlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,82 @@ void _emscripten_dlopen_js(struct dso* handle,
void __dl_vseterr(const char*, va_list);

static struct dso * _Atomic head, * _Atomic tail;

#ifdef _REENTRANT
static thread_local struct dso* thread_local_tail;
static pthread_rwlock_t lock;

static void dlsync_locked() {
if (!thread_local_tail) {
thread_local_tail = head;
}
while (thread_local_tail->next) {
struct dso* p = thread_local_tail->next;
dbg("dlsync_locked: %s mem_addr=%p "
"mem_size=%zu table_addr=%p table_size=%zu",
p->name,
p->mem_addr,
p->mem_size,
p->table_addr,
p->table_size);
void* success = _dlopen_js(p);
if (!success) {
// If any on the libraries fails to load here then we give up.
// TODO(sbc): Ideally this would never happen and we could/should
// abort, but on the main thread (where we don't have sync xhr) its
// often not possible to syncronously load side module.
_emscripten_errf("_dlopen_js failed: %s", dlerror());
break;
}
thread_local_tail = p;
}
}

// This function is called from emscripten_yield which itself is called whenever
// we block on a futex. We need to check to avoid infinite recursion when
// taking the lock below.
static thread_local bool skip_dlsync = false;

static void ensure_init();

void _emscripten_thread_sync_code() {
if (skip_dlsync) {
return;
}
skip_dlsync = true;
ensure_init();
if (thread_local_tail != tail) {
dbg("emscripten_thread_sync_code: catching up %p %p", thread_local_tail, tail);
pthread_rwlock_rdlock(&lock);
dlsync_locked();
pthread_rwlock_unlock(&lock);
dbg("emscripten_thread_sync_code: done");
}
skip_dlsync = false;
}

static void do_read_lock() {
skip_dlsync = true;
pthread_rwlock_rdlock(&lock);
}

static void do_write_lock() {
// Once we have the lock we want to avoid automatic code sync as that would
// result in a deadlock.
skip_dlsync = true;
pthread_rwlock_wrlock(&lock);
}

static void do_unlock() {
pthread_rwlock_unlock(&lock);
skip_dlsync = false;
}
#else
#define do_unlock()
#define do_read_lock()
#define do_write_lock()
#endif

static void error(const char* fmt, ...) {
va_list ap;
va_start(ap, fmt);
Expand Down Expand Up @@ -80,13 +153,16 @@ static void load_library_done(struct dso* p) {
p->table_addr,
p->table_size);

#ifdef _REENTRANT
thread_local_tail = p;
#endif

// insert into linked list
p->prev = tail;
if (tail) {
tail->next = p;
}
tail = p;
thread_local_tail = p;

if (!head) {
head = p;
Expand Down Expand Up @@ -114,14 +190,14 @@ static void dlopen_js_onsuccess(struct dso* dso, struct async_data* data) {
dso->mem_addr,
dso->mem_size);
load_library_done(dso);
pthread_rwlock_unlock(&lock);
do_unlock();
data->onsuccess(data->user_data, dso);
free(data);
}

static void dlopen_js_onerror(struct dso* dso, struct async_data* data) {
dbg("dlopen_js_onerror: dso=%p", dso);
pthread_rwlock_unlock(&lock);
do_unlock();
data->onerror(data->user_data);
free(dso);
free(data);
Expand All @@ -134,7 +210,7 @@ static void ensure_init() {
return;
}
// Initialize the dso list. This happens on first run.
pthread_rwlock_wrlock(&lock);
do_write_lock();
if (!head) {
// Flags are not important since the main module is already loaded.
struct dso* p = load_library_start("__main__", RTLD_NOW|RTLD_GLOBAL);
Expand All @@ -143,7 +219,7 @@ static void ensure_init() {
load_library_done(p);
assert(head);
}
pthread_rwlock_unlock(&lock);
do_unlock();
}

void* dlopen(const char* file, int flags) {
Expand All @@ -156,7 +232,11 @@ void* dlopen(const char* file, int flags) {
struct dso* p;
int cs;
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
pthread_rwlock_wrlock(&lock);
do_write_lock();
#ifdef _REENTRANT
// Make sure we are in sync before loading any new DSOs.
dlsync_locked();
#endif

/* Search for the name to see if it's already loaded */
for (p = head; p; p = p->next) {
Expand All @@ -180,7 +260,7 @@ void* dlopen(const char* file, int flags) {
dbg("dlopen_js: success: %p", p);
load_library_done(p);
end:
pthread_rwlock_unlock(&lock);
do_unlock();
pthread_setcancelstate(cs, 0);
return p;
}
Expand All @@ -192,10 +272,10 @@ void emscripten_dlopen(const char* filename, int flags, void* user_data,
onsuccess(user_data, head);
return;
}
pthread_rwlock_wrlock(&lock);
do_write_lock();
struct dso* p = load_library_start(filename, flags);
if (!p) {
pthread_rwlock_unlock(&lock);
do_unlock();
onerror(user_data);
return;
}
Expand All @@ -217,9 +297,10 @@ void* __dlsym(void* restrict p, const char* restrict s, void* restrict ra) {
return 0;
}
void* res;
pthread_rwlock_rdlock(&lock);
do_read_lock();
res = _dlsym_js(p, s);
pthread_rwlock_unlock(&lock);
do_unlock();
dbg("__dlsym done dso:%p res:%p", p, res);
return res;
}

Expand All @@ -232,50 +313,3 @@ int dladdr(const void* addr, Dl_info* info) {
info->dli_saddr = NULL;
return 1;
}

#ifdef _REENTRANT
void _emscripten_thread_sync_code() {
// This function is called from emscripten_yeild which itself is called
// whenever we block on a futex. We need to check to avoid infinite
// recursion when taking the lock below.
static thread_local bool syncing = false;
if (syncing) {
return;
}
syncing = true;
ensure_init();
if (thread_local_tail == tail) {
dbg("emscripten_thread_sync_code: already in sync");
goto done;
}
pthread_rwlock_rdlock(&lock);
if (!thread_local_tail) {
thread_local_tail = head;
}
while (thread_local_tail->next) {
struct dso* p = thread_local_tail->next;
dbg("emscripten_thread_sync_code: %s mem_addr=%p "
"mem_size=%zu table_addr=%p table_size=%zu",
p->name,
p->mem_addr,
p->mem_size,
p->table_addr,
p->table_size);
void* success = _dlopen_js(p);
if (!success) {
// If any on the libraries fails to load here then we give up.
// TODO(sbc): Ideally this would never happen and we could/should
// abort, but on the main thread (where we don't have sync xhr) its
// often not possible to syncronously load side module.
_emscripten_errf("emscripten_thread_sync_code failed: %s", dlerror());
break;
}
thread_local_tail = p;
}
pthread_rwlock_unlock(&lock);
dbg("emscripten_thread_sync_code done");

done:
syncing = false;
}
#endif
72 changes: 72 additions & 0 deletions test/core/pthread/test_pthread_dlopen_many.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#include <assert.h>
#include <dlfcn.h>
#include <pthread.h>
#include <stdio.h>

#ifndef NUM_THREADS
#define NUM_THREADS 2
#endif

typedef int* (*sidey_data_type)();
typedef int (*func_t)();
typedef func_t (*sidey_func_type)();

pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;

_Atomic int thread_count = 0;

void* thread_main(void* arg) {
int num = (intptr_t)arg;
printf("thread_main %d %p\n", num, pthread_self());
thread_count++;

// busy wait until all threads are running
while (thread_count != NUM_THREADS) {}

char filename[255];
sprintf(filename, "liblib%d.so", num);
printf("loading %s\n", filename);
void* handle = dlopen(filename, RTLD_NOW|RTLD_GLOBAL);
printf("done loading %s\n", filename);
if (!handle) {
printf("dlerror: %s\n", dlerror());
}
assert(handle);
/*
* TODO(sbc): We have a bug that new functions added to the table via dlsym
* are not yet correctly synchronized between threads.
* Uncommenting the code below will cause a "table out of sync" error.
*/
/*
sidey_data_type p_side_data_address;
sidey_func_type p_side_func_address;
p_side_data_address = dlsym(handle, "side_data_address");
printf("p_side_data_address=%p\n", p_side_data_address);
p_side_func_address = dlsym(handle, "side_func_address");
printf("p_side_func_address=%p\n", p_side_func_address);
*/

printf("done thread_main %d\n", num);
return NULL;
}

int main() {
printf("in main: %p\n", pthread_self());
pthread_mutex_lock(&mutex);

// start a bunch of threads while holding the lock
pthread_t threads[NUM_THREADS];
for (int i = 0; i < NUM_THREADS; i++) {
pthread_create(&threads[i], NULL, thread_main, (void*)i);
}

// busy wait until all threads are running
while (thread_count != NUM_THREADS) {}

for (int i = 0; i < NUM_THREADS; i++) {
pthread_join(threads[i], NULL);
}

printf("main done\n");
return 0;
}
6 changes: 6 additions & 0 deletions test/core/pthread/test_pthread_dlopen_side.c
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
#include <stdio.h>
#include <stdlib.h>

typedef int (*myfunc_type)();

static int mydata[10] = { 44 };

static void dtor() {
puts("side module atexit ..");
}

__attribute__((constructor)) static void ctor() {
puts("side module ctor");
atexit(dtor);
}

static int myfunc() {
Expand Down
18 changes: 18 additions & 0 deletions test/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9348,6 +9348,24 @@ def test_pthread_dlopen(self, do_yield):
'invalid index into function table',
assert_returncode=NON_ZERO)

@needs_dylink
@node_pthreads
def test_pthread_dlopen_many(self):
nthreads = 10
self.set_setting('USE_PTHREADS')
self.emcc_args.append('-Wno-experimental')
self.build_dlfcn_lib(test_file('core/pthread/test_pthread_dlopen_side.c'))
for i in range(nthreads):
shutil.copyfile('liblib.so', f'liblib{i}.so')

self.prep_dlfcn_main()
self.set_setting('EXIT_RUNTIME')
self.set_setting('PROXY_TO_PTHREAD')
self.do_runf(test_file('core/pthread/test_pthread_dlopen_many.c'),
['side module ctor', 'main done', 'side module atexit'],
emcc_args=[f'-DNUM_THREADS={nthreads}'],
assert_all=True)

@needs_dylink
@node_pthreads
def test_pthread_dlsym(self):
Expand Down

0 comments on commit 888f452

Please sign in to comment.