From 5e5567c2aaf83709a8f950fe945fda7881022b5e Mon Sep 17 00:00:00 2001 From: Harald Hoyer Date: Tue, 25 Oct 2022 15:33:26 +0200 Subject: [PATCH] feat: setup stacks and TLS in `wasi_thread_start` and cleanup in `pthread_exit()` Signed-off-by: Harald Hoyer --- Makefile | 4 + .../wasm32-wasi/posix/defined-symbols.txt | 11 +++ .../wasm32-wasi/posix/undefined-symbols.txt | 12 +-- .../wasm32-wasi/single/defined-symbols.txt | 1 + libc-bottom-half/crt/crt1-command.c | 8 ++ libc-bottom-half/sources/__wasilibc_real.c | 6 +- libc-top-half/musl/arch/wasm32/pthread_arch.h | 12 +-- libc-top-half/musl/src/env/__init_tls.c | 33 +++++++- .../musl/src/internal/pthread_impl.h | 13 ++++ .../musl/src/thread/pthread_create.c | 77 ++++++++++++++++--- libc-top-half/musl/src/thread/pthread_self.c | 4 +- 11 files changed, 151 insertions(+), 30 deletions(-) diff --git a/Makefile b/Makefile index 50519fdd4..4d881de96 100644 --- a/Makefile +++ b/Makefile @@ -190,9 +190,12 @@ LIBC_TOP_HALF_MUSL_SOURCES = \ ifeq ($(THREAD_MODEL), posix) LIBC_TOP_HALF_MUSL_SOURCES += \ $(addprefix $(LIBC_TOP_HALF_MUSL_SRC_DIR)/, \ + env/__init_tls.c \ + stdio/__lockfile.c \ thread/__lock.c \ thread/__wait.c \ thread/__timedwait.c \ + thread/default_attr.c \ thread/pthread_cleanup_push.c \ thread/pthread_cond_broadcast.c \ thread/pthread_cond_destroy.c \ @@ -233,6 +236,7 @@ LIBC_TOP_HALF_MUSL_SOURCES += \ thread/pthread_rwlockattr_init.c \ thread/pthread_rwlockattr_setpshared.c \ thread/pthread_setcancelstate.c \ + thread/pthread_self.c \ thread/pthread_testcancel.c \ thread/sem_destroy.c \ thread/sem_getvalue.c \ diff --git a/expected/wasm32-wasi/posix/defined-symbols.txt b/expected/wasm32-wasi/posix/defined-symbols.txt index fe29f30b4..646b5a3c4 100644 --- a/expected/wasm32-wasi/posix/defined-symbols.txt +++ b/expected/wasm32-wasi/posix/defined-symbols.txt @@ -23,6 +23,7 @@ __c_locale __clock __clock_gettime __clock_nanosleep +__copy_tls __cos __cosdf __cosl @@ -38,6 +39,8 @@ __ctype_tolower_loc __ctype_toupper_loc __cxa_atexit __cxa_finalize +__default_guardsize +__default_stacksize __des_setkey __do_cleanup_pop __do_cleanup_push @@ -87,6 +90,8 @@ __getopt_msg __gmtime_r __hwcap __inet_aton +__init_tls +__init_tp __intscan __invtrigl_R __isalnum_l @@ -144,6 +149,7 @@ __locale_lock __locale_lockptr __localtime_r __lock +__lockfile __log2_data __log2f_data __log_data @@ -265,6 +271,7 @@ __tan __tandf __tanl __testcancel +__thread_list_lock __timedwait __timedwait_cp __tl_lock @@ -288,6 +295,7 @@ __tsearch_balance __uflow __unlist_locked_file __unlock +__unlockfile __uselocale __utc __wait @@ -371,6 +379,7 @@ __wasilibc_nocwd_scandirat __wasilibc_nocwd_symlinkat __wasilibc_nocwd_utimensat __wasilibc_open_nomode +__wasilibc_pthread_self __wasilibc_register_preopened_fd __wasilibc_rename_newat __wasilibc_rename_oldat @@ -992,6 +1001,7 @@ pthread_rwlock_wrlock pthread_rwlockattr_destroy pthread_rwlockattr_init pthread_rwlockattr_setpshared +pthread_self pthread_setcancelstate pthread_testcancel pthread_timedjoin_np @@ -1182,6 +1192,7 @@ tfind tgamma tgammaf tgammal +thrd_current thrd_sleep time timegm diff --git a/expected/wasm32-wasi/posix/undefined-symbols.txt b/expected/wasm32-wasi/posix/undefined-symbols.txt index 407a6b71b..7def0a9f4 100644 --- a/expected/wasm32-wasi/posix/undefined-symbols.txt +++ b/expected/wasm32-wasi/posix/undefined-symbols.txt @@ -1,7 +1,4 @@ __addtf3 -__copy_tls -__default_guardsize -__default_stacksize __divtf3 __eqtf2 __extenddftf2 @@ -59,19 +56,18 @@ __imported_wasi_snapshot_preview1_sock_accept __imported_wasi_snapshot_preview1_sock_recv __imported_wasi_snapshot_preview1_sock_send __imported_wasi_snapshot_preview1_sock_shutdown -__imported_wasi_snapshot_preview2_thread_spawn +__imported_wasi_thread_spawn __letf2 -__lockfile __lttf2 __main_argc_argv __netf2 __stack_pointer __subtf3 -__thread_list_lock +__tls_align __tls_base +__tls_size __trunctfdf2 __trunctfsf2 -__unlockfile __unordtf2 -__wasilibc_pthread_self __wasm_call_ctors +__wasm_init_tls diff --git a/expected/wasm32-wasi/single/defined-symbols.txt b/expected/wasm32-wasi/single/defined-symbols.txt index f8f445707..9812feea8 100644 --- a/expected/wasm32-wasi/single/defined-symbols.txt +++ b/expected/wasm32-wasi/single/defined-symbols.txt @@ -80,6 +80,7 @@ __getopt_msg __gmtime_r __hwcap __inet_aton +__init_tls __intscan __invtrigl_R __isalnum_l diff --git a/libc-bottom-half/crt/crt1-command.c b/libc-bottom-half/crt/crt1-command.c index 6e2bcd942..ea0d16a74 100644 --- a/libc-bottom-half/crt/crt1-command.c +++ b/libc-bottom-half/crt/crt1-command.c @@ -1,3 +1,4 @@ +#include "libc.h" #include extern void __wasm_call_ctors(void); extern int __main_void(void); @@ -7,6 +8,11 @@ extern void __wasm_call_dtors(void); // that the `_start` function isn't started more than once. static volatile int started = 0; +static void dummy_0(size_t *mem) +{ +} +weak_alias(dummy_0, __init_tls); + __attribute__((export_name("_start"))) void _start(void) { // Don't allow the program to be called multiple times. @@ -18,6 +24,8 @@ void _start(void) { // The linker synthesizes this to call constructors. __wasm_call_ctors(); + __init_tls(NULL); + // Call `__main_void` which will either be the application's zero-argument // `__main_void` function or a libc routine which obtains the command-line // arguments and calls `__main_argv_argc`. diff --git a/libc-bottom-half/sources/__wasilibc_real.c b/libc-bottom-half/sources/__wasilibc_real.c index 2648ac9fa..855a2c6dd 100644 --- a/libc-bottom-half/sources/__wasilibc_real.c +++ b/libc-bottom-half/sources/__wasilibc_real.c @@ -660,13 +660,13 @@ __wasi_errno_t __wasi_sock_shutdown( } #ifdef _REENTRANT -int32_t __imported_wasi_snapshot_preview2_thread_spawn(int32_t arg0) __attribute__(( - __import_module__("wasi_snapshot_preview2"), +int32_t __imported_wasi_thread_spawn(int32_t arg0) __attribute__(( + __import_module__("wasi"), __import_name__("thread_spawn") )); __wasi_errno_t __wasi_thread_spawn(void* start_arg) { - int32_t ret = __imported_wasi_snapshot_preview2_thread_spawn((int32_t) start_arg); + int32_t ret = __imported_wasi_thread_spawn((int32_t) start_arg); return (uint16_t) ret; } #endif diff --git a/libc-top-half/musl/arch/wasm32/pthread_arch.h b/libc-top-half/musl/arch/wasm32/pthread_arch.h index e23eaf8f2..5e4bcdb2b 100644 --- a/libc-top-half/musl/arch/wasm32/pthread_arch.h +++ b/libc-top-half/musl/arch/wasm32/pthread_arch.h @@ -1,11 +1,11 @@ +#include + static inline uintptr_t __get_tp(void) { #if _REENTRANT - int val; - __asm__("global.get __wasilibc_pthread_self\n" - "local.set %0" - : "=r"(val)); - return val; + extern thread_local uintptr_t __wasilibc_pthread_self; + return __wasilibc_pthread_self; #else - return 0; + return 0; #endif } + diff --git a/libc-top-half/musl/src/env/__init_tls.c b/libc-top-half/musl/src/env/__init_tls.c index a93141ed3..6b0afe4e3 100644 --- a/libc-top-half/musl/src/env/__init_tls.c +++ b/libc-top-half/musl/src/env/__init_tls.c @@ -1,13 +1,19 @@ #define SYSCALL_NO_TLS 1 +#ifdef __wasilibc_unmodified_upstream #include +#endif #include +#ifdef __wasilibc_unmodified_upstream #include +#endif #include #include #include "pthread_impl.h" #include "libc.h" #include "atomic.h" +#ifdef __wasilibc_unmodified_upstream #include "syscall.h" +#endif volatile int __thread_list_lock; @@ -19,7 +25,9 @@ int __init_tp(void *p) if (r < 0) return -1; if (!r) libc.can_do_threads = 1; td->detach_state = DT_JOINABLE; +#ifdef __wasilibc_unmodified_upstream td->tid = __syscall(SYS_set_tid_address, &__thread_list_lock); +#endif td->locale = &libc.global_locale; td->robust_list.head = &td->robust_list.head; td->sysinfo = __sysinfo; @@ -61,16 +69,19 @@ void *__copy_tls(unsigned char *mem) mem -= (uintptr_t)mem & (libc.tls_align-1); td = (pthread_t)mem; +#ifdef __wasilibc_unmodified_upstream for (i=1, p=libc.tls_head; p; i++, p=p->next) { dtv[i] = (uintptr_t)(mem - p->offset) + DTP_OFFSET; memcpy(mem - p->offset, p->image, p->len); } +#endif #endif dtv[0] = libc.tls_cnt; td->dtv = dtv; return td; } +#ifdef __wasilibc_unmodified_upstream #if ULONG_MAX == 0xffffffff typedef Elf32_Phdr Phdr; #else @@ -78,14 +89,20 @@ typedef Elf64_Phdr Phdr; #endif extern weak hidden const size_t _DYNAMIC[]; +#endif +#ifdef __wasilibc_unmodified_upstream static void static_init_tls(size_t *aux) +#else +void __init_tls(size_t *aux) +#endif { + void *mem; +#ifdef __wasilibc_unmodified_upstream unsigned char *p; size_t n; Phdr *phdr, *tls_phdr=0; size_t base = 0; - void *mem; for (p=(void *)aux[AT_PHDR],n=aux[AT_PHNUM]; n; n--,p+=aux[AT_PHENT]) { phdr = (void *)p; @@ -121,7 +138,13 @@ static void static_init_tls(size_t *aux) main_tls.offset = main_tls.size; #endif if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN; - +#else // __wasilibc_unmodified_upstream + main_tls.size = __builtin_wasm_tls_size(); + main_tls.offset = main_tls.size; + main_tls.align = __builtin_wasm_tls_align(); + if (main_tls.size > 0) + libc.tls_cnt = 1; +#endif // __wasilibc_unmodified_upstream libc.tls_align = main_tls.align; libc.tls_size = 2*sizeof(void *) + sizeof(struct pthread) #ifdef TLS_ABOVE_TP @@ -130,6 +153,7 @@ static void static_init_tls(size_t *aux) + main_tls.size + main_tls.align + MIN_TLS_ALIGN-1 & -MIN_TLS_ALIGN; +#ifdef __wasilibc_unmodified_upstream // FIXME if (libc.tls_size > sizeof builtin_tls) { #ifndef SYS_mmap2 #define SYS_mmap2 SYS_mmap @@ -144,10 +168,15 @@ static void static_init_tls(size_t *aux) } else { mem = builtin_tls; } +#else // __wasilibc_unmodified_upstream + mem = builtin_tls; +#endif // __wasilibc_unmodified_upstream /* Failure to initialize thread pointer is always fatal. */ if (__init_tp(__copy_tls(mem)) < 0) a_crash(); } +#ifdef __wasilibc_unmodified_upstream weak_alias(static_init_tls, __init_tls); +#endif diff --git a/libc-top-half/musl/src/internal/pthread_impl.h b/libc-top-half/musl/src/internal/pthread_impl.h index 22e557d58..2fffc2c24 100644 --- a/libc-top-half/musl/src/internal/pthread_impl.h +++ b/libc-top-half/musl/src/internal/pthread_impl.h @@ -164,7 +164,15 @@ extern hidden void *__pthread_tsd_main[]; extern hidden volatile int __eintr_valid_flag; hidden int __clone(int (*)(void *), void *, int, void *, ...); +#ifdef __wasilibc_unmodified_upstream hidden int __set_thread_area(void *); +#else +static inline int __set_thread_area(void *p) { + extern thread_local uintptr_t __wasilibc_pthread_self; + __wasilibc_pthread_self = (uintptr_t) p; + return 0; +} +#endif #ifdef __wasilibc_unmodified_upstream /* WASI has no sigaction */ hidden int __libc_sigaction(int, const struct sigaction *, struct sigaction *); #endif @@ -213,8 +221,13 @@ extern hidden volatile int __abort_lock[1]; extern hidden unsigned __default_stacksize; extern hidden unsigned __default_guardsize; +#ifdef __wasilibc_unmodified_upstream #define DEFAULT_STACK_SIZE 131072 #define DEFAULT_GUARD_SIZE 8192 +#else +#define DEFAULT_STACK_SIZE 131072 +#define DEFAULT_GUARD_SIZE 0 +#endif #define DEFAULT_STACK_MAX (8<<20) #define DEFAULT_GUARD_MAX (1<<20) diff --git a/libc-top-half/musl/src/thread/pthread_create.c b/libc-top-half/musl/src/thread/pthread_create.c index d0168987f..5b865660b 100644 --- a/libc-top-half/musl/src/thread/pthread_create.c +++ b/libc-top-half/musl/src/thread/pthread_create.c @@ -174,6 +174,15 @@ _Noreturn void __pthread_exit(void *result) * and then exits without touching the stack. */ __unmapself(self->map_base, self->map_size); } +#else + if (state==DT_DETACHED && self->map_base) { + // __syscall(SYS_exit) would unlock the thread, list + // do it manually here + __tl_unlock(); + free(self->map_base); + // Can't use `exit()` here, because it is too high level + for (;;) __wasi_proc_exit(0); + } #endif /* Wake any joiner. */ @@ -185,11 +194,14 @@ _Noreturn void __pthread_exit(void *result) * it that it's no longer available. */ self->tid = 0; UNLOCK(self->killlock); - #ifdef __wasilibc_unmodified_upstream for (;;) __syscall(SYS_exit, 0); #else - for (;;) exit(0); + // __syscall(SYS_exit) would unlock the thread, list + // do it manually here + __tl_unlock(); + // Can't use `exit()` here, because it is too high level + for (;;) __wasi_proc_exit(0); #endif } @@ -248,23 +260,70 @@ static int start_c11(void *p) return 0; } #else +static inline void *get_stack_ptr() { + void *val; + __asm__("global.get __stack_pointer\n" + "local.set %0" + : "=r"(val)); + return val; +} + +static inline void set_stack_ptr(void *val) { + __asm__("local.get %0\n" + "global.set __stack_pointer" + :: "r"(val)); +} + +static _Noreturn void wasi_thread_start_2(int tid, struct start_args *args); +static _Noreturn void wasi_thread_start_3(int tid, struct start_args *args); + +// Small function, which should/must not use $__stack_pointer __attribute__((export_name("wasi_thread_start"))) -int wasi_thread_start(int tid, void *p) +_Noreturn void wasi_thread_start(int tid, struct start_args *args) { - struct start_args *args = p; + void *stack = args->thread->stack; + stack -= (uintptr_t)stack % sizeof(uintptr_t); + stack -= sizeof(struct start_args); + stack -= (uintptr_t)stack % 0x10; + + set_stack_ptr(stack); + wasi_thread_start_2(tid, args); +} + +extern void __wasm_init_tls(void *memory); + +// make it noinline, so wasi_thread_start does not grow and wants the stack pointer +__attribute__((noinline)) +static _Noreturn void wasi_thread_start_2(int tid, struct start_args *args) { + // Initialize TLS + // round up + void *tls = (void *) ((uintptr_t)(args->thread->stack + __builtin_wasm_tls_align() - 1)); + tls -= (uintptr_t) tls & (__builtin_wasm_tls_align() - 1); + if (__builtin_wasm_tls_size()) { + hidden void __wasm_init_tls(void *); + __wasm_init_tls(tls); + } + wasi_thread_start_3(tid, args); +} + +// make it noinline, so wasi_thread_start does not grow and wants the stack pointer +// Stack and TLS are now initialized +__attribute__((noinline)) +static _Noreturn void wasi_thread_start_3(int tid, struct start_args *args) +{ + // Save the pointer to the pthread structure as the global `pthread_self`. + extern thread_local uintptr_t __wasilibc_pthread_self; + __wasilibc_pthread_self = (uintptr_t) args->thread; + // Set the thread ID (TID) on the pthread structure. The TID is stored // atomically since it is also stored by the parent thread; this way, // whichever thread (parent or child) reaches this point first can proceed // without waiting. atomic_store((atomic_int *) &(args->thread->tid), tid); - // Save the pointer to the pthread structure as the global `pthread_self`. - __asm__("local.set %0\n" - "global.set __wasilibc_pthread_self\n" - : "=r"(args->thread)); + // Execute the user's start function. int (*start)(void*) = (int(*)(void*)) args->start_func; __pthread_exit((void *)(uintptr_t)start(args->start_arg)); - return 0; } #endif diff --git a/libc-top-half/musl/src/thread/pthread_self.c b/libc-top-half/musl/src/thread/pthread_self.c index 197c6830b..bbb30aeca 100644 --- a/libc-top-half/musl/src/thread/pthread_self.c +++ b/libc-top-half/musl/src/thread/pthread_self.c @@ -3,9 +3,9 @@ #if !defined(__wasilibc_unmodified_upstream) && defined(__wasm__) && \ defined(_REENTRANT) -// We need some place to store the thread ID. This WebAssembly global fits the +// We need some place to store the thread ID. This WebAssembly thread_local fits the // bill and is used by `__get_tp` elsewhere. -__asm__(".globaltype __wasilibc_pthread_self, i32\n"); +thread_local uintptr_t __wasilibc_pthread_self = 0; #endif static pthread_t __pthread_self_internal()