Skip to content

Commit

Permalink
Do not emit text relocations for IFUNC symbols in PDEs
Browse files Browse the repository at this point in the history
IFUNC symbols are resolved at process startup by executing the function
that the symbol points to. This is used to select the "best" function at
runtime; for instance, the runtime may choose a faster version of memcpy
that uses SIMD instructions if they are available on the current system.

Thus, an IFUNC symbol has two addresses: the initial address (or the
resolver's address) and the resolved address, which is the return value of
the resolver.

In position-independent executables (PIEs), function pointers are loaded
from the GOT indirectly, and symbols are not directly referenced. In such
executables, the initial value of the GOT slot for an IFUNC symbol
contains the resolver address, and this is overwritten at runtime to the
resolved address upon process startup. When user code takes a pointer to
an IFUNC, it always reads the resolved address from GOT.

In contrast, position-dependent executables (PDEs) may have instructions
that directly refer to an IFUNC symbol, such as movabs on x86-64. The GOT
entry for an IFUNC holds the resolved address, so any direct reference
must also produce the resolved address to maintain pointer equality.
(C/C++ standards require that two pointers must be equal if and only if
they are taken for the same symbol.)

Previously, we emitted text relocations to modify instruction operands.
However, text relocations are undesirable and not always reliable. For
example, on ARM64, multiple instructions are used to materialize a
symbol's address, and it's not feasible to issue a dynamic relocation to
alter those instructions since the dynamic loader generally can only
modify 32-bit or 64-bit words.

In this commit, I have adopted a different strategy. An IFUNC symbol now
occupies two consecutive GOT slots in a PDE. The first slot holds the
symbol's PLT address, and the second slot holds the resolved address. The
PLT address is consistently used as the symbol's address throughout the
process, while the second slot is used only by the PLT entry to jump to
the resolved address.

This method ensures pointer equality without the need to emit text
relocations for IFUNC symbols in PDEs.
  • Loading branch information
rui314 committed Nov 2, 2023
1 parent 9516da1 commit 4cdfc7e
Show file tree
Hide file tree
Showing 16 changed files with 152 additions and 20 deletions.
2 changes: 1 addition & 1 deletion elf/arch-arm32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ void write_plt_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
template <>
void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
memcpy(buf, plt_entry, sizeof(plt_entry));
*(ul32 *)(buf + 12) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 12;
*(ul32 *)(buf + 12) = sym.get_got_pltgot_addr(ctx) - sym.get_plt_addr(ctx) - 12;
}

// ARM does not use .eh_frame for exception handling. Instead, it uses
Expand Down
2 changes: 1 addition & 1 deletion elf/arch-arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
0xd503'201f, // nop
};

u64 got = sym.get_got_addr(ctx);
u64 got = sym.get_got_pltgot_addr(ctx);
u64 plt = sym.get_plt_addr(ctx);

memcpy(buf, insn, sizeof(insn));
Expand Down
4 changes: 2 additions & 2 deletions elf/arch-i386.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,15 +164,15 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding)
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 6) = sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr;
*(ul32 *)(buf + 6) = sym.get_got_pltgot_addr(ctx) - ctx.got->shdr.sh_addr;
} else {
static const u8 insn[] = {
0xf3, 0x0f, 0x1e, 0xfb, // endbr32
0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding)
};
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 6) = sym.get_got_addr(ctx);
*(ul32 *)(buf + 6) = sym.get_got_pltgot_addr(ctx);
}
}

Expand Down
2 changes: 1 addition & 1 deletion elf/arch-loongarch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ void write_plt_entry<E>(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {

template <>
void write_pltgot_entry<E>(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
u64 got = sym.get_got_addr(ctx);
u64 got = sym.get_got_pltgot_addr(ctx);
u64 plt = sym.get_plt_addr(ctx);

memcpy(buf, E::is_64 ? plt_entry_64 : plt_entry_32, E::plt_size);
Expand Down
2 changes: 1 addition & 1 deletion elf/arch-m68k.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
};

memcpy(buf, insn, sizeof(insn));
*(ub32 *)(buf + 4) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 2;
*(ub32 *)(buf + 4) = sym.get_got_pltgot_addr(ctx) - sym.get_plt_addr(ctx) - 2;
}

template <>
Expand Down
2 changes: 1 addition & 1 deletion elf/arch-ppc32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
memcpy(buf, plt_entry, sizeof(plt_entry));

ub32 *loc = (ub32 *)buf;
i64 offset = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 8;
i64 offset = sym.get_got_pltgot_addr(ctx) - sym.get_plt_addr(ctx) - 8;
loc[4] |= higha(offset);
loc[5] |= lo(offset);
}
Expand Down
2 changes: 1 addition & 1 deletion elf/arch-riscv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ void write_plt_entry<E>(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {

template <>
void write_pltgot_entry<E>(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
u64 got = sym.get_got_addr(ctx);
u64 got = sym.get_got_pltgot_addr(ctx);
u64 plt = sym.get_plt_addr(ctx);

memcpy(buf, E::is_64 ? plt_entry_64 : plt_entry_32, E::plt_size);
Expand Down
2 changes: 1 addition & 1 deletion elf/arch-s390x.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
};

memcpy(buf, insn, sizeof(insn));
*(ub32 *)(buf + 2) = (sym.get_got_addr(ctx) - sym.get_plt_addr(ctx)) >> 1;
*(ub32 *)(buf + 2) = (sym.get_got_pltgot_addr(ctx) - sym.get_plt_addr(ctx)) >> 1;
}

template <>
Expand Down
4 changes: 2 additions & 2 deletions elf/arch-sh4.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {

static_assert(sizeof(insn) == E::pltgot_size);
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 8) = sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr;
*(ul32 *)(buf + 8) = sym.get_got_pltgot_addr(ctx) - ctx.got->shdr.sh_addr;
} else {
static const u8 insn[] = {
0x01, 0xd0, // mov.l 1f, r0
Expand All @@ -181,7 +181,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {

static_assert(sizeof(insn) == E::pltgot_size);
memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 8) = sym.get_got_addr(ctx);
*(ul32 *)(buf + 8) = sym.get_got_pltgot_addr(ctx);
}
}

Expand Down
2 changes: 1 addition & 1 deletion elf/arch-sparc64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
};

memcpy(buf, entry, sizeof(entry));
*(ub64 *)(buf + 24) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 4;
*(ub64 *)(buf + 24) = sym.get_got_pltgot_addr(ctx) - sym.get_plt_addr(ctx) - 4;
}

template <>
Expand Down
2 changes: 1 addition & 1 deletion elf/arch-x86-64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ void write_pltgot_entry(Context<E> &ctx, u8 *buf, Symbol<E> &sym) {
};

memcpy(buf, insn, sizeof(insn));
*(ul32 *)(buf + 2) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 6;
*(ul32 *)(buf + 2) = sym.get_got_pltgot_addr(ctx) - sym.get_plt_addr(ctx) - 6;
}

template <>
Expand Down
11 changes: 6 additions & 5 deletions elf/input-sections.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
namespace mold::elf {

typedef enum {
NONE, ERROR, COPYREL, DYN_COPYREL, PLT, CPLT, DYN_CPLT, DYNREL, BASEREL, IFUNC,
NONE, ERROR, COPYREL, DYN_COPYREL, PLT, CPLT, DYN_CPLT, DYNREL,
BASEREL, IFUNC_DYNREL,
} Action;

template <typename E>
Expand Down Expand Up @@ -203,7 +204,7 @@ static void scan_rel(Context<E> &ctx, InputSection<E> &isec, Symbol<E> &sym,
if (!isec.is_relr_reloc(ctx, rel))
isec.file.num_dynrel++;
break;
case IFUNC:
case IFUNC_DYNREL:
// Create an IRELATIVE relocation for a GNU ifunc symbol.
//
// We usually create an IRELATIVE relocation in .got for each ifunc.
Expand Down Expand Up @@ -273,7 +274,7 @@ static Action get_absrel_action(Context<E> &ctx, Symbol<E> &sym) {
template <typename E>
static Action get_dyn_absrel_action(Context<E> &ctx, Symbol<E> &sym) {
if (sym.is_ifunc())
return IFUNC;
return ctx.arg.pic ? IFUNC_DYNREL : NONE;

// This is a decision table for absolute relocations for the pointer
// size data (e.g. R_X86_64_64). Unlike the absrel_table, we can emit
Expand All @@ -291,7 +292,7 @@ static Action get_dyn_absrel_action(Context<E> &ctx, Symbol<E> &sym) {
template <typename E>
static Action get_ppc64_toc_action(Context<E> &ctx, Symbol<E> &sym) {
if (sym.is_ifunc())
return IFUNC;
return IFUNC_DYNREL;

// As a special case, we do not create copy relocations nor canonical
// PLTs for .toc sections. PPC64's .toc is a compiler-generated
Expand Down Expand Up @@ -406,7 +407,7 @@ static void apply_absrel(Context<E> &ctx, InputSection<E> &isec,
case DYNREL:
emit_abs_dynrel();
break;
case IFUNC:
case IFUNC_DYNREL:
if constexpr (supports_ifunc<E>) {
u64 addr = sym.get_addr(ctx, NO_PLT) + A;
*dynrel++ = ElfRel<E>(P, E::R_IRELATIVE, 0, addr);
Expand Down
27 changes: 27 additions & 0 deletions elf/mold.h
Original file line number Diff line number Diff line change
Expand Up @@ -1940,6 +1940,7 @@ class Symbol {
u64 get_tlsdesc_addr(Context<E> &ctx) const;
u64 get_plt_addr(Context<E> &ctx) const;
u64 get_opd_addr(Context<E> &ctx) const;
u64 get_got_pltgot_addr(Context<E> &ctx) const;

void set_got_idx(Context<E> &ctx, i32 idx);
void set_gottp_idx(Context<E> &ctx, i32 idx);
Expand Down Expand Up @@ -1973,6 +1974,7 @@ class Symbol {
bool is_relative() const { return !is_absolute(); }
bool is_local(Context<E> &ctx) const;
bool is_ifunc() const { return get_type() == STT_GNU_IFUNC; }
bool is_pde_ifunc(Context<E> &ctx) const;
bool is_remaining_undef_weak() const;

bool is_pcrel_linktime_const(Context<E> &ctx) const;
Expand Down Expand Up @@ -2574,6 +2576,25 @@ inline u64 Symbol<E>::get_opd_addr(Context<E> &ctx) const {
get_opd_idx(ctx) * PPC64OpdSection::ENTRY_SIZE;
}

template <typename E>
inline u64 Symbol<E>::get_got_pltgot_addr(Context<E> &ctx) const {
// An ifunc symbol occupies two consecutive GOT slots in a
// position-dependent executable (PDE). The first slot contains the
// symbol's PLT address, and the second slot holds the resolved
// address. A PDE uses the ifunc symbol's PLT entry as the address
// for the symbol, akin to a canonical PLT.
//
// This function returns the address that the PLT entry should use
// to jump to the resolved address.
//
// Note that we don't use this function for PPC64. In PPC64, symbols
// are always accessed through the TOC table regardless of the
// -fno-PIE setting. We don't need canonical PLTs on the psABIs too.
if (is_pde_ifunc(ctx))
return get_got_addr(ctx) + sizeof(Word<E>);
return get_got_addr(ctx);
}

template <typename E>
inline void Symbol<E>::set_got_idx(Context<E> &ctx, i32 idx) {
assert(aux_idx != -1);
Expand Down Expand Up @@ -2702,6 +2723,12 @@ inline bool Symbol<E>::is_local(Context<E> &ctx) const {
return !is_imported && !is_exported;
}

template <typename E>
inline bool Symbol<E>::is_pde_ifunc(Context<E> &ctx) const {
// Returns true if this is an ifunc tha uses two GOT slots
return is_ifunc() && !ctx.arg.pic && !is_ppc64<E>;
}

// A remaining weak undefined symbol is promoted to a dynamic symbol
// in DSO and resolved to 0 in an executable. This function returns
// true if it's latter.
Expand Down
23 changes: 21 additions & 2 deletions elf/output-chunks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1082,7 +1082,14 @@ void OutputSection<E>::populate_symtab(Context<E> &ctx) {
template <typename E>
void GotSection<E>::add_got_symbol(Context<E> &ctx, Symbol<E> *sym) {
sym->set_got_idx(ctx, this->shdr.sh_size / sizeof(Word<E>));
this->shdr.sh_size += sizeof(Word<E>);

// An IFUNC symbol uses two GOT slots in a position-dependent
// executable.
if (sym->is_pde_ifunc(ctx))
this->shdr.sh_size += sizeof(Word<E>) * 2;
else
this->shdr.sh_size += sizeof(Word<E>);

got_syms.push_back(sym);
}

Expand Down Expand Up @@ -1176,7 +1183,12 @@ static std::vector<GotEntry<E>> get_got_entries(Context<E> &ctx) {
// IFUNC always needs to be fixed up by the dynamic linker.
if constexpr (supports_ifunc<E>) {
if (sym->is_ifunc()) {
add({idx, sym->get_addr(ctx, NO_PLT), E::R_IRELATIVE});
if (sym->is_pde_ifunc(ctx)) {
add({idx, sym->get_plt_addr(ctx)});
add({idx + 1, sym->get_addr(ctx, NO_PLT), E::R_IRELATIVE});
} else {
add({idx, sym->get_addr(ctx, NO_PLT), E::R_IRELATIVE});
}
continue;
}
}
Expand Down Expand Up @@ -1656,8 +1668,15 @@ ElfSym<E> to_output_esym(Context<E> &ctx, Symbol<E> &sym, u32 st_name,
esym.st_shndx = SHN_ABS;
esym.st_value = sym.get_addr(ctx);
} else if (sym.get_type() == STT_TLS) {
// TLS symbol
shndx = get_st_shndx(sym);
esym.st_value = sym.get_addr(ctx) - ctx.tls_begin;
} else if (sym.is_pde_ifunc(ctx)) {
// IFUNC symbol in PDE that uses two GOT slots
shndx = get_st_shndx(sym);
esym.st_type = STT_FUNC;
esym.st_visibility = sym.visibility;
esym.st_value = sym.get_addr(ctx);
} else {
shndx = get_st_shndx(sym);
esym.st_visibility = sym.visibility;
Expand Down
33 changes: 33 additions & 0 deletions test/elf/ifunc-address-equality-exported.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash
. $(dirname $0)/common.inc

supports_ifunc || skip

cat <<EOF | $CC -c -fPIC -o $t/a.o -xc -
typedef void Func();
void foo();
Func *get_foo() { return foo; }
EOF

$CC -B. -shared -o $t/b.so $t/a.o

cat <<EOF | $CC -c -fno-PIE -o $t/c.o -xc -
#include <stdio.h>
typedef void Func();
__attribute__((ifunc("resolve_foo"))) void foo(void);
void real_foo(void) { printf("foo "); }
Func *resolve_foo() { return real_foo; }
Func *get_foo();
int main() {
printf("%p %p\n", foo, get_foo());
foo();
printf("\n");
}
EOF

$CC -B. -o $t/exe1 $t/c.o $t/b.so -no-pie
$QEMU $t/exe1 | grep -Eq '^(\S+) \1'
52 changes: 52 additions & 0 deletions test/elf/ifunc-address-equality.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/bash
. $(dirname $0)/common.inc

supports_ifunc || skip

cat <<EOF | $CC -c -fno-PIE -o $t/a.o -xc -
#include <stdio.h>
typedef void Func();
__attribute__((ifunc("resolve_foo"))) void foo(void);
void real_foo(void) { printf("foo "); }
Func *resolve_foo() { return real_foo; }
__attribute__((ifunc("resolve_bar"))) void bar(void);
void real_bar(void) { printf("bar "); }
Func *resolve_bar() { return real_bar; }
EOF

cat <<EOF | $CC -c -fPIC -o $t/b.o -xc -
typedef void Func();
void foo();
void bar();
Func *get_foo() { return foo; }
Func *get_bar() { return bar; }
EOF

cat <<EOF | $CC -c -fno-PIE -o $t/c.o -xc -
#include <stdio.h>
typedef void Func();
void foo();
void bar();
Func *get_foo();
Func *get_bar();
int main() {
printf("%p %p %p %p\n", foo, get_foo(), bar, get_bar());
foo();
bar();
printf("\n");
}
EOF

$CC -B. -o $t/exe1 $t/a.o $t/b.o $t/c.o -no-pie
$QEMU $t/exe1 | grep -Eq '^(\S+) \1 (\S+) \2'

readelf --dynamic $t/exe1 > $t/log1
! grep -q TEXTREL $t/log1 || false

0 comments on commit 4cdfc7e

Please sign in to comment.