diff --git a/elf/arch-i386.cc b/elf/arch-i386.cc index 5c534256f1..1494d98d97 100644 --- a/elf/arch-i386.cc +++ b/elf/arch-i386.cc @@ -254,6 +254,34 @@ static void relax_ld_to_le(u8 *loc, ElfRel rel, u64 val) { } } +static u32 relax_tlsdesc_to_ie(u8 *loc) { + switch ((loc[0] << 8) | loc[1]) { + case 0x8d83: return 0x8b83; // lea 0(%ebx), %eax -> mov 0(%ebx), %eax + case 0x8d9b: return 0x8b9b; // lea 0(%ebx), %ebx -> mov 0(%ebx), %ebx + case 0x8d8b: return 0x8b8b; // lea 0(%ebx), %ecx -> mov 0(%ebx), %ecx + case 0x8d93: return 0x8b93; // lea 0(%ebx), %edx -> mov 0(%ebx), %edx + case 0x8db3: return 0x8bb3; // lea 0(%ebx), %esi -> mov 0(%ebx), %esi + case 0x8dbb: return 0x8bbb; // lea 0(%ebx), %edi -> mov 0(%ebx), %edi + case 0x8da3: return 0x8ba3; // lea 0(%ebx), %esp -> mov 0(%ebx), %esp + case 0x8dab: return 0x8bab; // lea 0(%ebx), %ebp -> mov 0(%ebx), %ebp + } + return 0; +} + +static u32 relax_tlsdesc_to_le(u8 *loc) { + switch ((loc[0] << 8) | loc[1]) { + case 0x8d83: return 0x90b8; // lea 0(%ebx), %eax -> mov $0, %eax + case 0x8d9b: return 0x90bb; // lea 0(%ebx), %ebx -> mov $0, %ebx + case 0x8d8b: return 0x90b9; // lea 0(%ebx), %ecx -> mov $0, %ecx + case 0x8d93: return 0x90ba; // lea 0(%ebx), %edx -> mov $0, %edx + case 0x8db3: return 0x90be; // lea 0(%ebx), %esi -> mov $0, %esi + case 0x8dbb: return 0x90bf; // lea 0(%ebx), %edi -> mov $0, %edi + case 0x8da3: return 0x90bc; // lea 0(%ebx), %esp -> mov $0, %esp + case 0x8dab: return 0x90bd; // lea 0(%ebx), %ebp -> mov $0, %ebp + } + return 0; +} + template <> void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); @@ -373,15 +401,30 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { // // mov $foo@TPOFF, %eax // nop + // + // We allow the following alternative code sequence too because + // LLVM emits such code. + // + // lea 0(%ebx), %reg + // R_386_TLS_GOTDESC foo + // mov %reg, %eax + // call *(%eax) + // R_386_TLS_DESC_CALL foo if (sym.has_tlsdesc(ctx)) { *(ul32 *)loc = sym.get_tlsdesc_addr(ctx) + A - GOT; } else if (sym.has_gottp(ctx)) { - loc[-2] = 0x8b; - loc[-1] = 0x83; + u32 insn = relax_tlsdesc_to_ie(loc - 2); + if (!insn) + Fatal(ctx) << *this << ": illegal instruction sequence for TLSDESC"; + loc[-2] = insn >> 8; + loc[-1] = insn; *(ul32 *)loc = sym.get_gottp_addr(ctx) + A - GOT; } else { - loc[-2] = 0x90; - loc[-1] = 0xb8; + u32 insn = relax_tlsdesc_to_le(loc - 2); + if (!insn) + Fatal(ctx) << *this << ": illegal instruction sequence for TLSDESC"; + loc[-2] = insn >> 8; + loc[-1] = insn; *(ul32 *)loc = S + A - ctx.tp_addr; } break; diff --git a/elf/arch-x86-64.cc b/elf/arch-x86-64.cc index af86adecc4..9266b95754 100644 --- a/elf/arch-x86-64.cc +++ b/elf/arch-x86-64.cc @@ -178,6 +178,50 @@ static u32 relax_gottpoff(u8 *loc) { return 0; } +static u32 relax_tlsdesc_to_ie(u8 *loc) { + switch ((loc[0] << 16) | (loc[1] << 8) | loc[2]) { + case 0x488d05: return 0x488b05; // lea 0(%rip), %rax -> mov 0(%rip), %rax + case 0x488d0d: return 0x488b0d; // lea 0(%rip), %rcx -> mov 0(%rip), %rcx + case 0x488d15: return 0x488b15; // lea 0(%rip), %rdx -> mov 0(%rip), %rdx + case 0x488d1d: return 0x488b1d; // lea 0(%rip), %rbx -> mov 0(%rip), %rbx + case 0x488d25: return 0x488b25; // lea 0(%rip), %rsp -> mov 0(%rip), %rsp + case 0x488d2d: return 0x488b2d; // lea 0(%rip), %rbp -> mov 0(%rip), %rbp + case 0x488d35: return 0x488b35; // lea 0(%rip), %rsi -> mov 0(%rip), %rsi + case 0x488d3d: return 0x488b3d; // lea 0(%rip), %rdi -> mov 0(%rip), %rdi + case 0x4c8d05: return 0x4c8b05; // lea 0(%rip), %r8 -> mov 0(%rip), %r8 + case 0x4c8d0d: return 0x4c8b0d; // lea 0(%rip), %r9 -> mov 0(%rip), %r9 + case 0x4c8d15: return 0x4c8b15; // lea 0(%rip), %r10 -> mov 0(%rip), %r10 + case 0x4c8d1d: return 0x4c8b1d; // lea 0(%rip), %r11 -> mov 0(%rip), %r11 + case 0x4c8d25: return 0x4c8b25; // lea 0(%rip), %r12 -> mov 0(%rip), %r12 + case 0x4c8d2d: return 0x4c8b2d; // lea 0(%rip), %r13 -> mov 0(%rip), %r13 + case 0x4c8d35: return 0x4c8b35; // lea 0(%rip), %r14 -> mov 0(%rip), %r14 + case 0x4c8d3d: return 0x4c8b3d; // lea 0(%rip), %r15 -> mov 0(%rip), %r15 + } + return 0; +} + +static u32 relax_tlsdesc_to_le(u8 *loc) { + switch ((loc[0] << 16) | (loc[1] << 8) | loc[2]) { + case 0x488d05: return 0x48c7c0; // lea 0(%rip), %rax -> mov $0, %rax + case 0x488d0d: return 0x48c7c1; // lea 0(%rip), %rcx -> mov $0, %rcx + case 0x488d15: return 0x48c7c2; // lea 0(%rip), %rdx -> mov $0, %rdx + case 0x488d1d: return 0x48c7c3; // lea 0(%rip), %rbx -> mov $0, %rbx + case 0x488d25: return 0x48c7c4; // lea 0(%rip), %rsp -> mov $0, %rsp + case 0x488d2d: return 0x48c7c5; // lea 0(%rip), %rbp -> mov $0, %rbp + case 0x488d35: return 0x48c7c6; // lea 0(%rip), %rsi -> mov $0, %rsi + case 0x488d3d: return 0x48c7c7; // lea 0(%rip), %rdi -> mov $0, %rdi + case 0x4c8d05: return 0x49c7c0; // lea 0(%rip), %r8 -> mov $0, %r8 + case 0x4c8d0d: return 0x49c7c1; // lea 0(%rip), %r9 -> mov $0, %r9 + case 0x4c8d15: return 0x49c7c2; // lea 0(%rip), %r10 -> mov $0, %r10 + case 0x4c8d1d: return 0x49c7c3; // lea 0(%rip), %r11 -> mov $0, %r11 + case 0x4c8d25: return 0x49c7c4; // lea 0(%rip), %r12 -> mov $0, %r12 + case 0x4c8d2d: return 0x49c7c5; // lea 0(%rip), %r13 -> mov $0, %r13 + case 0x4c8d35: return 0x49c7c6; // lea 0(%rip), %r14 -> mov $0, %r14 + case 0x4c8d3d: return 0x49c7c7; // lea 0(%rip), %r15 -> mov $0, %r15 + } + return 0; +} + // Rewrite a function call to __tls_get_addr to a cheaper instruction // sequence. We can do this when we know the thread-local variable's TP- // relative address at link-time. @@ -501,19 +545,32 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { // // mov $foo@TPOFF, %rax // nop + // + // We allow the following alternative code sequence too because + // LLVM emits such code. + // + // lea 0(%rip), %reg + // R_X86_64_GOTPC32_TLSDESC foo + // mov %reg, %rax + // call *(%rax) + // R_X86_64_TLSDESC_CALL foo if (sym.has_tlsdesc(ctx)) { write32s(sym.get_tlsdesc_addr(ctx) + A - P); } else if (sym.has_gottp(ctx)) { - // mov foo@gottpoff(%rip), %rax - loc[-3] = 0x48; - loc[-2] = 0x8b; - loc[-1] = 0x05; + u32 insn = relax_tlsdesc_to_ie(loc - 3); + if (!insn) + Fatal(ctx) << *this << ": illegal instruction sequence for TLSDESC"; + loc[-3] = insn >> 16; + loc[-2] = insn >> 8; + loc[-1] = insn; write32s(sym.get_gottp_addr(ctx) + A - P); } else { - // mov $foo@tpoff, %rax - loc[-3] = 0x48; - loc[-2] = 0xc7; - loc[-1] = 0xc0; + u32 insn = relax_tlsdesc_to_le(loc - 3); + if (!insn) + Fatal(ctx) << *this << ": illegal instruction sequence for TLSDESC"; + loc[-3] = insn >> 16; + loc[-2] = insn >> 8; + loc[-1] = insn; write32s(S - ctx.tp_addr); } break; diff --git a/test/elf/i386_tls-module-base.sh b/test/elf/i686_tls-module-base.sh similarity index 100% rename from test/elf/i386_tls-module-base.sh rename to test/elf/i686_tls-module-base.sh diff --git a/test/elf/i686_tlsdesc.sh b/test/elf/i686_tlsdesc.sh new file mode 100755 index 0000000000..6363b380f9 --- /dev/null +++ b/test/elf/i686_tlsdesc.sh @@ -0,0 +1,48 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +supports_tlsdesc || skip + +cat <<'EOF' | $GCC -c -o $t/a.o -xassembler - +.globl get_foo +.type get_foo, @function +get_foo: + pushl %ebx + call __x86.get_pc_thunk.bx + addl $_GLOBAL_OFFSET_TABLE_, %ebx + subl $8, %esp + leal foo@TLSDESC(%ebx), %ebx + movl %ebx, %eax + call *foo@TLSCALL(%eax) + movl %gs:(%eax), %eax + addl $8, %esp + popl %ebx + ret +EOF + +cat < + +_Thread_local int foo; + +int get_foo(); + +int main() { + foo = 42; + printf("%d\n", get_foo()); +} +EOF + +$CC -B. -o $t/exe1 $t/a.o $t/b.o +$QEMU $t/exe1 | grep -q 42 + +$CC -B. -o $t/exe2 $t/a.o $t/b.o -Wl,-no-relax +$QEMU $t/exe2 | grep -q 42 + +$CC -B. -shared -o $t/c.so $t/a.o +$CC -B. -o $t/exe3 $t/b.o $t/c.so +$QEMU $t/exe3 | grep -q 42 + +$CC -B. -shared -o $t/c.so $t/a.o -Wl,-no-relax +$CC -B. -o $t/exe4 $t/b.o $t/c.so -Wl,-no-relax +$QEMU $t/exe4 | grep -q 42 diff --git a/test/elf/x86_64_tlsdesc.sh b/test/elf/x86_64_tlsdesc.sh new file mode 100755 index 0000000000..9160371b95 --- /dev/null +++ b/test/elf/x86_64_tlsdesc.sh @@ -0,0 +1,47 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +supports_tlsdesc || skip + +cat < + +_Thread_local int foo; + +int get_foo(); + +int main() { + foo = 42; + printf("%d\n", get_foo()); +} +EOF + +$CC -B. -o $t/exe1 $t/a.o $t/b.o +$QEMU $t/exe1 | grep -q 42 + +$CC -B. -o $t/exe2 $t/a.o $t/b.o -Wl,-no-relax +$QEMU $t/exe2 | grep -q 42 + +$CC -B. -shared -o $t/c.so $t/a.o +$CC -B. -o $t/exe3 $t/b.o $t/c.so +$QEMU $t/exe3 | grep -q 42 + +$CC -B. -shared -o $t/c.so $t/a.o -Wl,-no-relax +$CC -B. -o $t/exe4 $t/b.o $t/c.so -Wl,-no-relax +$QEMU $t/exe4 | grep -q 42