diff --git a/core/unix/loader.c b/core/unix/loader.c index 181a03a5af7..e7e0aca2cf1 100644 --- a/core/unix/loader.c +++ b/core/unix/loader.c @@ -723,6 +723,15 @@ privload_os_finalize(privmod_t *privmod) if ((ver[0] == '\0' || ver[0] < '2') || ver[1] != '.' || ver[2] < '3' || (ver[2] == '3' && ver[3] < '4')) return; +# ifndef X86 + /* XXX i#6611: We have privload_set_pthread_tls_fields() setting the pthread tid + * field on x86, but not other arches. Since we have the glibc version here and + * believe this to be limited to 2.37 we warn about it here. + */ + if (ver[2] == '3' && ver[3] >= '7') { + SYSLOG_INTERNAL_WARNING("glibc 2.37+ i#6611 pthread tid fix NYI for non-x86"); + } +# endif if (privmod_ld_linux == NULL) { SYSLOG_INTERNAL_WARNING("glibc 2.34+ i#5437 workaround failed: missed ld"); return; diff --git a/core/unix/loader_linux.c b/core/unix/loader_linux.c index f194e6ad00e..eb307818b87 100644 --- a/core/unix/loader_linux.c +++ b/core/unix/loader_linux.c @@ -39,6 +39,7 @@ #include "../module_shared.h" #include "os_private.h" #include "../ir/instr.h" /* SEG_GS/SEG_FS */ +#include "../ir/decode.h" #include "module.h" #include "module_private.h" #include "../heap.h" /* HEAPACCT */ @@ -283,7 +284,108 @@ privload_mod_tls_init(privmod_t *mod) } static void -privload_copy_tls_block(app_pc priv_tls_base, uint mod_idx) +privload_set_pthread_tls_fields(privmod_t *mod, app_pc priv_tls_base) +{ + /* Set pthreads fields for glibc 3.24+ where pthreads is inside libc and has + * special undocumented initialization by ld.so. + */ + if (strstr(mod->name, "libc.so") != mod->name) + return; + os_privmod_data_t *opd = (os_privmod_data_t *)mod->os_privmod_data; + /* i#6611: Find the pthread tid TLS field offset by decoding a function known + * to reference it. + */ +# define PTHREAD_TID_FUNC_NAME "pthread_mutex_consistent" + void (*tid_using_func)(bool) = (void (*)(bool))get_proc_address_from_os_data( + &opd->os_data, opd->load_delta, PTHREAD_TID_FUNC_NAME, NULL); + if (tid_using_func == NULL) + return; + LOG(GLOBAL, LOG_LOADER, 2, "%s: decoding %s to find tid offset\n", __FUNCTION__, + PTHREAD_TID_FUNC_NAME); + app_pc pc = (app_pc)tid_using_func; + instr_t instr; + dcontext_t *dcontext = get_thread_private_dcontext(); + if (dcontext == NULL) + dcontext = GLOBAL_DCONTEXT; + instr_init(dcontext, &instr); + /* This is a small function with 10-15 instructions, and we stop when we hit + * a return. We set a just-in-case upper limit of 64 to ensure we don't + * loop for too long if something goes wrong. + */ +# define MAX_INSTRS_TO_DECODE 64 + int instr_count = 0; + while (instr_count < MAX_INSTRS_TO_DECODE) { + IF_DEBUG(app_pc prev_pc = pc;) + pc = decode(dcontext, pc, &instr); + if (pc == NULL || !instr_valid(&instr)) { + SYSLOG_INTERNAL_WARNING("%s: failed to decode from %p\n", __FUNCTION__, + prev_pc); + break; + } + if (instr_is_return(&instr)) { + SYSLOG_INTERNAL_WARNING("%s: failed to find TLS offset\n", __FUNCTION__); + break; + } + long *tid_slot = NULL; +# ifdef X86 + /* We're looking for the only far ref in the function, like this: + * 8ac26: 64 8b 04 25 d0 02 00 mov %fs:0x2d0,%eax + * 8ac2d: 00 + */ + if (instr_get_opcode(&instr) == OP_mov_ld && + opnd_is_far_base_disp(instr_get_src(&instr, 0)) && + opnd_get_segment(instr_get_src(&instr, 0)) == LIB_SEG_TLS) { + int offs = opnd_get_disp(instr_get_src(&instr, 0)); + tid_slot = (long *)(priv_tls_base + offs); + } +# elif defined(AARCH64) + /* TODO i#6611: We have to decode something like this to come up with + * privlib_tls_base - 0x700 + 208 but we need a 2.37+ machine to test on: + * 13dec: d53bd042 mrs x2, tpidr_el0 + * 13df0: 52800000 mov w0, #0x0 + * 13df4: d11c0042 sub x2, x2, #0x700 + * 13df8: b940d042 ldr w2, [x2, #208] + */ + /* We have a glibc 2.37+ SYSLOG_INTERNAL_WARNING in privload_os_finalize(). */ + break; +# else + /* XXX i#6611: Not supported yet. */ + /* We have a glibc 2.37+ SYSLOG_INTERNAL_WARNING in privload_os_finalize(). */ + break; +# endif + if (tid_slot != NULL) { + long cur_tid; + thread_id_t real_tid = get_sys_thread_id(); + if (!d_r_safe_read(tid_slot, sizeof(cur_tid), &cur_tid)) { + SYSLOG_INTERNAL_WARNING("%s: failed to read tid from slot %p\n", + __FUNCTION__, tid_slot); + } else if (cur_tid == real_tid) { + LOG(GLOBAL, LOG_LOADER, 2, "%s: tid slot is already correct\n", + __FUNCTION__); + } else { + LOG(GLOBAL, LOG_LOADER, 2, "%s: writing tid " TIDFMT " to slot %p\n", + __FUNCTION__, real_tid, tid_slot); + size_t written; + if (!safe_write_ex(tid_slot, sizeof(*tid_slot), &real_tid, &written) || + written != sizeof(*tid_slot)) { + SYSLOG_INTERNAL_WARNING("%s: failed to write tid to slot %p\n", + __FUNCTION__, tid_slot); + } + } + break; + } + instr_reset(dcontext, &instr); + ++instr_count; + } + if (instr_count >= MAX_INSTRS_TO_DECODE) { + SYSLOG_INTERNAL_WARNING("%s: decoding hit max instr count before target or ret\n", + __FUNCTION__); + } + instr_free(dcontext, &instr); +} + +static void +privload_copy_tls_block(privmod_t *mod, app_pc priv_tls_base, uint mod_idx) { os_privmod_data_t *opd = tls_info.mods[mod_idx]->os_privmod_data; void *dest; @@ -306,6 +408,7 @@ privload_copy_tls_block(app_pc priv_tls_base, uint mod_idx) */ ASSERT(opd->tls_block_size >= opd->tls_image_size); memset(dest + opd->tls_image_size, 0, opd->tls_block_size - opd->tls_image_size); + privload_set_pthread_tls_fields(mod, priv_tls_base); } /* Called post-reloc. */ @@ -320,7 +423,7 @@ privload_mod_tls_primary_thread_init(privmod_t *mod) os_local_state_t *os_tls = get_os_tls(); app_pc priv_tls_base = os_tls->os_seg_info.priv_lib_tls_base; os_privmod_data_t *opd = (os_privmod_data_t *)mod->os_privmod_data; - privload_copy_tls_block(priv_tls_base, opd->tls_modid); + privload_copy_tls_block(mod, priv_tls_base, opd->tls_modid); } #endif @@ -401,7 +504,7 @@ privload_tls_init(void *app_tp) if (dynamo_initialized) { uint i; for (i = 0; i < tls_info.num_mods; i++) - privload_copy_tls_block(dr_tp, i); + privload_copy_tls_block(tls_info.mods[i], dr_tp, i); } return dr_tp;