From 14c03c4e963efa76d2e8276bde1c71256b17ec80 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Mon, 4 Nov 2024 15:23:31 -0500 Subject: [PATCH] create-diff-object: Create __patchable_function_entries sections The __mcount_loc section contains the addresses of patchable ftrace sites which is used by the ftrace infrastructure in the kernel to create a list of tracable functions and to know where to patch to enable tracing of them. On some kernel configurations, section is called __patchable_function_entries and is generated by the compiler. Either of __mcount_loc or __patchable_function_entries is recognised by the kernel but for these configurations, use __patchable_function_entries as it is what is expected. The x86_64 arch is special (of course). Unlike other arches (ppc64le and aarch64) a x86_64 kernel built with -fpatchable-function-entry will generate nops AND create rela__patchable_function_entries for functions even marked as notrace. For this arch, always create __mount_loc sections and rely on __fentry__ relocations to indicate ftrace call sites. Note: this patch is a non-arch translation of original code by Pete Swain for aarch64. At the same time, this version squashes several follow up commits from him and zimao . The intent is minimize the eventual changeset for aarch64 support now that other arches are making use of __patchable_function_entries sections. Signed-off-by: Joe Lawrence --- kpatch-build/create-diff-object.c | 308 +++++++++++++++++++++++++++--- kpatch-build/kpatch-elf.c | 41 +++- kpatch-build/kpatch-elf.h | 4 + 3 files changed, 322 insertions(+), 31 deletions(-) mode change 100644 => 100755 kpatch-build/kpatch-elf.c diff --git a/kpatch-build/create-diff-object.c b/kpatch-build/create-diff-object.c index 25710e921..55c5743c3 100644 --- a/kpatch-build/create-diff-object.c +++ b/kpatch-build/create-diff-object.c @@ -615,9 +615,14 @@ static void kpatch_compare_correlated_section(struct section *sec) !is_text_section(sec1))) DIFF_FATAL("%s section header details differ from %s", sec1->name, sec2->name); - /* Short circuit for mcount sections, we rebuild regardless */ + /* + * Short circuit for mcount and patchable_function_entries + * sections, we rebuild regardless + */ if (!strcmp(sec->name, ".rela__mcount_loc") || - !strcmp(sec->name, "__mcount_loc")) { + !strcmp(sec->name, "__mcount_loc") || + !strcmp(sec->name, ".rela__patchable_function_entries") || + !strcmp(sec->name, "__patchable_function_entries")) { sec->status = SAME; goto out; } @@ -3676,6 +3681,206 @@ static void kpatch_create_callbacks_objname_rela(struct kpatch_elf *kelf, char * } } +static void kpatch_set_pfe_link(struct kpatch_elf *kelf) +{ + struct section* sec; + struct rela *rela; + + if (!kelf->has_pfe) + return; + + list_for_each_entry(sec, &kelf->sections, list) { + if (strcmp(sec->name, "__patchable_function_entries")) { + continue; + } + + if (!sec->rela) { + continue; + } + list_for_each_entry(rela, &sec->rela->relas, list) { + rela->sym->sec->pfe = sec; + } + } +} + +static void kpatch_create_pfe_sections(struct kpatch_elf *kelf) +{ + int nr, index; + struct section *sec, *relasec; + struct symbol *sym; + struct rela *pfe_rela; + + nr = 0; + list_for_each_entry(sym, &kelf->symbols, list) + if (sym->type == STT_FUNC && sym->status != SAME && + sym->has_func_profiling) + nr++; + + /* + * We will create separate __patchable_function_entries + * sections for each symbols. + */ + kelf->has_pfe = true; + sec = find_section_by_name(&kelf->sections, "__patchable_function_entries"); + relasec = NULL; + + /* populate sections */ + index = 0; + list_for_each_entry(sym, &kelf->symbols, list) { + unsigned long insn_offset = 0; + unsigned char *insn; + + if (sym->type != STT_FUNC || sym->status == SAME) + continue; + + if (!sym->has_func_profiling) { + log_debug("function %s doesn't have patchable function entry, no __patchable_function_entries record is needed\n", + sym->name); + continue; + } + + switch(kelf->arch) { + case PPC64: + /* + * Assume ppc64le is built with -fpatchable-function-entry=2, which means that all 2 nops are + * after the (local) entry point of the function. + * + * Example 1 - TOC setup for global entry + * Disassembly of section .text.c_stop: + * + * 0000000000000000 : + * ... + * 0: R_PPC64_REL64 .TOC.-0x8 + * + * 0000000000000008 : + * 8: f8 ff 4c e8 ld r2,-8(r12) + * 8: R_PPC64_ENTRY *ABS* + * c: 14 62 42 7c add r2,r2,r12 + * 10: 00 00 00 60 nop << << + * 14: 00 00 00 60 nop + * + * Relocation section '.rela__patchable_function_entries' at offset 0x17870 contains 1 entry: + * Offset Info Type Symbol's Value Symbol's Name + Addend + * 0000000000000000 0000001100000026 R_PPC64_ADDR64 0000000000000000 .text.c_stop + 10 + * ^^ + * + * Example 2 - no TOC setup, local entry only + * Disassembly of section .text.c_stop: + * + * 0000000000000000 : + * 0: 00 00 00 60 nop << << + * 4: 00 00 00 60 nop + * 8: 20 00 80 4e blr + * + * Relocation section '.rela__patchable_function_entries' at offset 0x386a8 contains 1 entry: + * Offset Info Type Symbol's Value Symbol's Name + Addend + * 0000000000000000 0000001800000026 R_PPC64_ADDR64 0000000000000000 .text.c_stop + 0 + * ^ + */ + insn_offset = sym->sym.st_value + PPC64_LOCAL_ENTRY_OFFSET(sym->sym.st_other); + insn = sym->sec->data->d_buf + insn_offset; + + /* verify nops */ + if (insn[0] != 0x00 || insn[1] != 0x00 || insn[2] != 0x00 || insn[3] != 0x60 || + insn[4] != 0x00 || insn[5] != 0x00 || insn[6] != 0x00 || insn[7] != 0x60) { + ERROR("%s: unexpected instruction in patch section of function\n", sym->name); + } + + break; + case X86_64: + /* + * Assume x86_64 is built with -fpatchable-function-entry=16,16, which means all 16 nops are + * before the entry point of the function. This is rather odd, but since call __fentry__ is + * still used for the ftrace callsite, this option may only be currently used for alignment + * purposes (and not ftrace as on other arches). + * + * Disassembly of section .text.cmdline_proc_show: + * + * 0000000000000000 : + * 0: 90 nop << << + * 1: 90 nop + * 2: 90 nop + * 3: 90 nop + * 4: 90 nop + * 5: 90 nop + * 6: 90 nop + * 7: 90 nop + * 8: 90 nop + * 9: 90 nop + * a: 90 nop + * b: 90 nop + * c: 90 nop + * d: 90 nop + * e: 90 nop + * f: 90 nop + * + * 0000000000000010 : + * 10: f3 0f 1e fa endbr64 + * 14: e8 00 00 00 00 call 19 + * 15: R_X86_64_PLT32 __fentry__-0x4 + * + * Relocation section '.rela__patchable_function_entries' at offset 0x113f8 contains 1 entry: + * Offset Info Type Symbol's Value Symbol's Name + Addend + * 0000000000000000 0000000700000001 R_X86_64_64 0000000000000000 .text.cmdline_proc_show + 0 + * ^ + */ + insn_offset = 0; + insn = sym->sec->data->d_buf + insn_offset; + + /* verify nops */ + for (int i=0; i<16; i++) { + if (insn[0] != 0x90) { + ERROR("%s: unexpected instruction in CONFIG_FUNCTION_PADDING_BYTES section of function\n", sym->name); + } + insn++; + } + + /* optional endbr64 instruction */ + if (insn[0] == 0xf3 && insn[1] == 0x0f && insn[2] == 0x1e && insn[3] == 0xfa) + insn+=4; + + /* + * We can't verify the ftrace call site as x86_64 adds nops (and + * rela__patchable_function_entries) regardless of whether it is ftrace-able or not. + * This arch still relies on __mcount_loc section, so we will check then. + */ + break; + default: + ERROR("unsupported arch"); + } + + /* Allocate __patchable_function_entries for symbol */ + sec = create_section_pair(kelf, "__patchable_function_entries", sizeof(void *), 1); + sec->sh.sh_flags |= SHF_WRITE | SHF_LINK_ORDER; + /* We will reset this sh_link in the reindex function. */ + sec->sh.sh_link = 0; + + relasec = sec->rela; + sym->sec->pfe = sec; + + /* + * 'rela' points to the patchable function entry + * + * Create a .rela__patchable_function_entries entry which also points to it. + */ + ALLOC_LINK(pfe_rela, &relasec->relas); + struct symbol *section_sym; + + /* __patchable_function_entries relocates off the section symbol */ + section_sym = find_symbol_by_name(&kelf->symbols, sym->sec->name); + pfe_rela->sym = section_sym; + pfe_rela->type = absolute_rela_type(kelf); + pfe_rela->addend = insn_offset - section_sym->sym.st_value; + pfe_rela->offset = 0; + + index++; + } + + /* sanity check, index should equal nr */ + if (index != nr) + ERROR("size mismatch in funcs sections"); +} + /* * This function basically reimplements the functionality of the Linux * recordmcount script, so that patched functions can be recognized by ftrace. @@ -3802,6 +4007,16 @@ static void kpatch_create_mcount_sections(struct kpatch_elf *kelf) ERROR("size mismatch in funcs sections"); } +static void kpatch_create_ftrace_callsite_sections(struct kpatch_elf *kelf, bool create_pfe) +{ + if (create_pfe) + kpatch_create_pfe_sections(kelf); + + /* x86 is special, it always creates mcount_loc section */ + if (!create_pfe || kelf->arch == X86_64) + kpatch_create_mcount_sections(kelf); +} + /* * This function strips out symbols that were referenced by changed rela * sections, but the rela entries that referenced them were converted to @@ -3945,6 +4160,32 @@ static void kpatch_no_sibling_calls_ppc64le(struct kpatch_elf *kelf) sibling_call_errors); } +static bool kpatch_symbol_has_pfe_entry(struct kpatch_elf *kelf, struct symbol *sym) +{ + struct section *sec; + struct rela *rela; + + if (!kelf->has_pfe) + return false; + + list_for_each_entry(sec, &kelf->sections, list) { + if (strcmp(sec->name, "__patchable_function_entries")) + continue; + if (sym->sec->pfe != sec) + continue; + if (!sec->rela) + continue; + + list_for_each_entry(rela, &sec->rela->relas, list) { + if (rela->sym->sec && sym->sec == rela->sym->sec) { + return true; + } + } + } + + return false; +} + /* Check which functions have fentry/mcount calls; save this info for later use. */ static void kpatch_find_func_profiling_calls(struct kpatch_elf *kelf) { @@ -3952,37 +4193,51 @@ static void kpatch_find_func_profiling_calls(struct kpatch_elf *kelf) struct rela *rela; unsigned char *insn; list_for_each_entry(sym, &kelf->symbols, list) { - if (sym->type != STT_FUNC || sym->is_pfx || - !sym->sec || !sym->sec->rela) + if (sym->type != STT_FUNC || sym->is_pfx || !sym->sec) continue; switch(kelf->arch) { case PPC64: - list_for_each_entry(rela, &sym->sec->rela->relas, list) { - if (!strcmp(rela->sym->name, "_mcount")) { - sym->has_func_profiling = 1; - break; + if (kpatch_symbol_has_pfe_entry(kelf, sym)) { + sym->has_func_profiling = 1; + } else if (sym->sec->rela) { + list_for_each_entry(rela, &sym->sec->rela->relas, list) { + if (!strcmp(rela->sym->name, "_mcount")) { + sym->has_func_profiling = 1; + break; + } } } break; case X86_64: - rela = list_first_entry(&sym->sec->rela->relas, struct rela, - list); - if ((rela->type != R_X86_64_NONE && - rela->type != R_X86_64_PC32 && - rela->type != R_X86_64_PLT32) || - strcmp(rela->sym->name, "__fentry__")) - continue; + /* + * x86_64 still uses __fentry__, cannot rely on + * pfe to indicate ftrace call site + */ + if (sym->sec->rela) { + rela = list_first_entry(&sym->sec->rela->relas, struct rela, + list); + if ((rela->type != R_X86_64_NONE && + rela->type != R_X86_64_PC32 && + rela->type != R_X86_64_PLT32) || + strcmp(rela->sym->name, "__fentry__")) + continue; - sym->has_func_profiling = 1; + sym->has_func_profiling = 1; + } break; case S390: - /* Check for compiler generated fentry nop - jgnop 0 */ - insn = sym->sec->data->d_buf; - if (insn[0] == 0xc0 && insn[1] == 0x04 && - insn[2] == 0x00 && insn[3] == 0x00 && - insn[4] == 0x00 && insn[5] == 0x00) - sym->has_func_profiling = 1; + if (kpatch_symbol_has_pfe_entry(kelf, sym)) { + ERROR("unsupported arch"); + } else if (sym->sec->rela) { + + /* Check for compiler generated fentry nop - jgnop 0 */ + insn = sym->sec->data->d_buf; + if (insn[0] == 0xc0 && insn[1] == 0x04 && + insn[2] == 0x00 && insn[3] == 0x00 && + insn[4] == 0x00 && insn[5] == 0x00) + sym->has_func_profiling = 1; + } break; default: ERROR("unsupported arch"); @@ -4045,6 +4300,7 @@ int main(int argc, char *argv[]) struct section *relasec, *symtab; char *orig_obj, *patched_obj, *parent_name; char *parent_symtab, *mod_symvers, *patch_name, *output_obj; + bool create_pfe = false; memset(&arguments, 0, sizeof(arguments)); argp_parse (&argp, argc, argv, 0, NULL, &arguments); @@ -4067,6 +4323,12 @@ int main(int argc, char *argv[]) kelf_orig = kpatch_elf_open(orig_obj); kelf_patched = kpatch_elf_open(patched_obj); + + kpatch_set_pfe_link(kelf_orig); + kpatch_set_pfe_link(kelf_patched); + if (kelf_orig->has_pfe || kelf_patched->has_pfe) + create_pfe = true; + kpatch_find_func_profiling_calls(kelf_orig); kpatch_find_func_profiling_calls(kelf_patched); @@ -4146,7 +4408,7 @@ int main(int argc, char *argv[]) kpatch_create_callbacks_objname_rela(kelf_out, parent_name); kpatch_build_strings_section_data(kelf_out); - kpatch_create_mcount_sections(kelf_out); + kpatch_create_ftrace_callsite_sections(kelf_out, create_pfe); /* * At this point, the set of output sections and symbols is diff --git a/kpatch-build/kpatch-elf.c b/kpatch-build/kpatch-elf.c old mode 100644 new mode 100755 index 374d424cc..f8554c59d --- a/kpatch-build/kpatch-elf.c +++ b/kpatch-build/kpatch-elf.c @@ -88,17 +88,29 @@ struct section *find_section_by_index(struct list_head *list, unsigned int index return NULL; } -struct section *find_section_by_name(struct list_head *list, const char *name) +struct section *find_nth_section_by_name( struct list_head *list, int nth, const char *name) { struct section *sec; - list_for_each_entry(sec, list, list) - if (!strcmp(sec->name, name)) - return sec; + if (!list || !list->next || !name) + return NULL; + + list_for_each_entry(sec, list, list) { + if (strcmp(sec->name, name)) + continue; + if (--nth >= 0) + continue; + return sec; + } return NULL; } +struct section *find_section_by_name(struct list_head *list, const char *name) +{ + return find_nth_section_by_name(list, 0, name); +} + struct symbol *find_symbol_by_index(struct list_head *list, size_t index) { struct symbol *sym; @@ -608,6 +620,9 @@ struct kpatch_elf *kpatch_elf_open(const char *name) kpatch_create_rela_list(kelf, relasec); } + if (find_section_by_name(&kelf->sections, "__patchable_function_entries")) + kelf->has_pfe = true; + return kelf; } @@ -644,6 +659,7 @@ void kpatch_dump_kelf(struct kpatch_elf *kelf) if (sec->rela) printf(", rela-> %s", sec->rela->name); } + printf(", pfe-> [%d]", (sec->pfe) == NULL ? -1 : (int)sec->pfe->index); next: printf("\n"); } @@ -653,8 +669,10 @@ void kpatch_dump_kelf(struct kpatch_elf *kelf) printf("sym %02d, type %d, bind %d, ndx %02d, name %s (%s)", sym->index, sym->type, sym->bind, sym->sym.st_shndx, sym->name, status_str(sym->status)); - if (sym->sec && (sym->type == STT_FUNC || sym->type == STT_OBJECT)) + if (sym->sec && (sym->type == STT_FUNC || sym->type == STT_OBJECT)) { printf(" -> %s", sym->sec->name); + printf(", profiling: %d", sym->has_func_profiling); + } printf("\n"); } } @@ -923,6 +941,7 @@ struct section *create_section_pair(struct kpatch_elf *kelf, char *name, relasec->sh.sh_type = SHT_RELA; relasec->sh.sh_entsize = sizeof(GElf_Rela); relasec->sh.sh_addralign = 8; + relasec->sh.sh_flags = SHF_INFO_LINK; /* set text rela section pointer */ sec->rela = relasec; @@ -977,11 +996,17 @@ void kpatch_reindex_elements(struct kpatch_elf *kelf) index = 0; list_for_each_entry(sym, &kelf->symbols, list) { sym->index = index++; - if (sym->sec) + if (sym->sec) { sym->sym.st_shndx = (unsigned short)sym->sec->index; - else if (sym->sym.st_shndx != SHN_ABS && - sym->sym.st_shndx != SHN_LIVEPATCH) + if (sym->sec->pfe) { + sym->sec->pfe->sh.sh_link = sym->sec->index; + if (sym->sec->pfe->rela) + sym->sec->pfe->rela->sh.sh_info = sym->sec->index; + } + } else if (sym->sym.st_shndx != SHN_ABS && + sym->sym.st_shndx != SHN_LIVEPATCH) { sym->sym.st_shndx = SHN_UNDEF; + } } } diff --git a/kpatch-build/kpatch-elf.h b/kpatch-build/kpatch-elf.h index e32209b72..e3d0685ca 100644 --- a/kpatch-build/kpatch-elf.h +++ b/kpatch-build/kpatch-elf.h @@ -65,6 +65,7 @@ struct section { struct symbol *secsym, *sym; }; }; + struct section *pfe; /* per-function __patchable_function_entries */ }; enum symbol_strip { @@ -125,6 +126,7 @@ struct kpatch_elf { struct list_head strings; Elf_Data *symtab_shndx; int fd; + bool has_pfe; }; /******************* @@ -137,6 +139,8 @@ bool is_debug_section(struct section *sec); struct section *find_section_by_index(struct list_head *list, unsigned int index); struct section *find_section_by_name(struct list_head *list, const char *name); +struct section *find_nth_section_by_name(struct list_head *list, int nth, + const char *name); struct symbol *find_symbol_by_index(struct list_head *list, size_t index); struct symbol *find_symbol_by_name(struct list_head *list, const char *name); struct rela *find_rela_by_offset(struct section *relasec, unsigned int offset);