Skip to content

Commit

Permalink
perf: add userspace frame pointer unwinder
Browse files Browse the repository at this point in the history
This change introduces the UNWIND_FRAME_POINTER option to the UnwindMode
enum, allowing users to select the frame pointer unwinder for unwinding
userspace stack traces.

github issue: #907

Change-Id: I82b612a7c534f3d3adaa1b0faf28651023429325
  • Loading branch information
simpleton committed Nov 8, 2024
1 parent e095aee commit ab3bf51
Show file tree
Hide file tree
Showing 13 changed files with 610 additions and 53 deletions.
2 changes: 2 additions & 0 deletions Android.bp
Original file line number Diff line number Diff line change
Expand Up @@ -11517,6 +11517,7 @@ filegroup {
name: "perfetto_src_profiling_perf_producer_unittests",
srcs: [
"src/profiling/perf/event_config_unittest.cc",
"src/profiling/perf/frame_pointer_unwinder_unittest.cc",
"src/profiling/perf/perf_producer_unittest.cc",
"src/profiling/perf/unwind_queue_unittest.cc",
],
Expand All @@ -11542,6 +11543,7 @@ filegroup {
filegroup {
name: "perfetto_src_profiling_perf_unwinding",
srcs: [
"src/profiling/perf/frame_pointer_unwinder.cc",
"src/profiling/perf/unwinding.cc",
],
}
Expand Down
2 changes: 2 additions & 0 deletions protos/perfetto/config/perfetto_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1957,6 +1957,8 @@ message PerfEventConfig {
UNWIND_SKIP = 1;
// Use libunwindstack (default):
UNWIND_DWARF = 2;
// Use userspace frame pointer unwinder:
UNWIND_FRAME_POINTER = 3;
}
}

Expand Down
2 changes: 2 additions & 0 deletions protos/perfetto/config/profiling/perf_event_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -217,5 +217,7 @@ message PerfEventConfig {
UNWIND_SKIP = 1;
// Use libunwindstack (default):
UNWIND_DWARF = 2;
// Use userspace frame pointer unwinder:
UNWIND_FRAME_POINTER = 3;
}
}
2 changes: 2 additions & 0 deletions protos/perfetto/trace/perfetto_trace.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1957,6 +1957,8 @@ message PerfEventConfig {
UNWIND_SKIP = 1;
// Use libunwindstack (default):
UNWIND_DWARF = 2;
// Use userspace frame pointer unwinder:
UNWIND_FRAME_POINTER = 3;
}
}

Expand Down
3 changes: 3 additions & 0 deletions src/profiling/perf/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ source_set("unwinding") {
"../common:unwind_support",
]
sources = [
"frame_pointer_unwinder.cc",
"frame_pointer_unwinder.h",
"unwind_queue.h",
"unwinding.cc",
"unwinding.h",
Expand Down Expand Up @@ -147,6 +149,7 @@ source_set("producer_unittests") {
]
sources = [
"event_config_unittest.cc",
"frame_pointer_unwinder_unittest.cc",
"perf_producer_unittest.cc",
"unwind_queue_unittest.cc",
]
Expand Down
66 changes: 41 additions & 25 deletions src/profiling/perf/event_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,20 @@ std::optional<PerfCounter> MakePerfCounter(
}
}

bool IsSupportedUnwindMode(
protos::gen::PerfEventConfig::UnwindMode unwind_mode) {
using protos::gen::PerfEventConfig;
switch (static_cast<int>(unwind_mode)) { // cast to pacify -Wswitch-enum
case PerfEventConfig::UNWIND_UNKNOWN:
case PerfEventConfig::UNWIND_SKIP:
case PerfEventConfig::UNWIND_DWARF:
case PerfEventConfig::UNWIND_FRAME_POINTER:
return true;
default:
return false;
}
}

} // namespace

// static
Expand Down Expand Up @@ -371,32 +385,18 @@ std::optional<EventConfig> EventConfig::Create(
}

// Callstack sampling.
bool user_frames = false;
bool kernel_frames = false;
// Disable user_frames by default.
auto unwind_mode = protos::gen::PerfEventConfig::UNWIND_SKIP;

TargetFilter target_filter;
bool legacy_config = pb_config.all_cpus(); // all_cpus was mandatory before
if (pb_config.has_callstack_sampling() || legacy_config) {
user_frames = true;

// Userspace callstacks.
using protos::gen::PerfEventConfig;
switch (static_cast<int>(pb_config.callstack_sampling().user_frames())) {
case PerfEventConfig::UNWIND_UNKNOWN:
// default to true, both for backwards compatibility and because it's
// almost always what the user wants.
user_frames = true;
break;
case PerfEventConfig::UNWIND_SKIP:
user_frames = false;
break;
case PerfEventConfig::UNWIND_DWARF:
user_frames = true;
break;
default:
// enum value from the future that we don't yet know, refuse the config
// TODO(rsavitski): double-check that both pbzero and ::gen propagate
// unknown enum values.
return std::nullopt;
unwind_mode = pb_config.callstack_sampling().user_frames();
if (!IsSupportedUnwindMode(unwind_mode)) {
// enum value from the future that we don't yet know, refuse the config
return std::nullopt;
}

// Process scoping. Sharding parameter is supplied from outside as it is
Expand Down Expand Up @@ -482,7 +482,7 @@ std::optional<EventConfig> EventConfig::Create(
pe.clockid = ToClockId(pb_config.timebase().timestamp_clock());
pe.use_clockid = true;

if (user_frames) {
if (IsUserFramesEnabled(unwind_mode)) {
pe.sample_type |= PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER;
// PERF_SAMPLE_STACK_USER:
// Needs to be < ((u16)(~0u)), and have bottom 8 bits clear.
Expand Down Expand Up @@ -529,19 +529,35 @@ std::optional<EventConfig> EventConfig::Create(

return EventConfig(
raw_ds_config, pe, std::move(pe_followers), timebase_event, followers,
user_frames, kernel_frames, std::move(target_filter),
kernel_frames, unwind_mode, std::move(target_filter),
ring_buffer_pages.value(), read_tick_period_ms, samples_per_tick_limit,
remote_descriptor_timeout_ms, pb_config.unwind_state_clear_period_ms(),
max_enqueued_footprint_bytes, pb_config.target_installed_by());
}

// static
bool EventConfig::IsUserFramesEnabled(
const protos::gen::PerfEventConfig::UnwindMode unwind_mode) {
using protos::gen::PerfEventConfig;
switch (unwind_mode) {
case PerfEventConfig::UNWIND_UNKNOWN:
// default to true, both for backwards compatibility and because it's
// almost always what the user wants.
case PerfEventConfig::UNWIND_DWARF:
case PerfEventConfig::UNWIND_FRAME_POINTER:
return true;
case PerfEventConfig::UNWIND_SKIP:
return false;
}
}

EventConfig::EventConfig(const DataSourceConfig& raw_ds_config,
const perf_event_attr& pe_timebase,
std::vector<perf_event_attr> pe_followers,
const PerfCounter& timebase_event,
std::vector<PerfCounter> follower_events,
bool user_frames,
bool kernel_frames,
protos::gen::PerfEventConfig::UnwindMode unwind_mode,
TargetFilter target_filter,
uint32_t ring_buffer_pages,
uint32_t read_tick_period_ms,
Expand All @@ -554,8 +570,8 @@ EventConfig::EventConfig(const DataSourceConfig& raw_ds_config,
perf_event_followers_(std::move(pe_followers)),
timebase_event_(timebase_event),
follower_events_(std::move(follower_events)),
user_frames_(user_frames),
kernel_frames_(kernel_frames),
unwind_mode_(unwind_mode),
target_filter_(std::move(target_filter)),
ring_buffer_pages_(ring_buffer_pages),
read_tick_period_ms_(read_tick_period_ms),
Expand Down
19 changes: 13 additions & 6 deletions src/profiling/perf/event_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "perfetto/tracing/core/data_source_config.h"

#include "protos/perfetto/common/perf_events.gen.h"
#include "protos/perfetto/config/profiling/perf_event_config.gen.h"

namespace perfetto {
namespace protos {
Expand Down Expand Up @@ -136,9 +137,12 @@ class EventConfig {
uint64_t max_enqueued_footprint_bytes() const {
return max_enqueued_footprint_bytes_;
}
bool sample_callstacks() const { return user_frames_ || kernel_frames_; }
bool user_frames() const { return user_frames_; }
bool sample_callstacks() const { return user_frames() || kernel_frames_; }
bool user_frames() const { return IsUserFramesEnabled(unwind_mode_); }
bool kernel_frames() const { return kernel_frames_; }
protos::gen::PerfEventConfig::UnwindMode unwind_mode() const {
return unwind_mode_;
}
const TargetFilter& filter() const { return target_filter_; }
perf_event_attr* perf_attr() const {
return const_cast<perf_event_attr*>(&perf_event_attr_);
Expand All @@ -158,13 +162,16 @@ class EventConfig {
const DataSourceConfig& raw_ds_config() const { return raw_ds_config_; }

private:
static bool IsUserFramesEnabled(
const protos::gen::PerfEventConfig::UnwindMode unwind_mode);

EventConfig(const DataSourceConfig& raw_ds_config,
const perf_event_attr& pe_timebase,
std::vector<perf_event_attr> pe_followers,
const PerfCounter& timebase_event,
std::vector<PerfCounter> follower_events,
bool user_frames,
bool kernel_frames,
protos::gen::PerfEventConfig::UnwindMode unwind_mode,
TargetFilter target_filter,
uint32_t ring_buffer_pages,
uint32_t read_tick_period_ms,
Expand All @@ -187,12 +194,12 @@ class EventConfig {
// Timebase event, which are already described by |perf_event_followers_|.
std::vector<PerfCounter> follower_events_;

// If true, include userspace frames in sampled callstacks.
const bool user_frames_;

// If true, include kernel frames in sampled callstacks.
const bool kernel_frames_;

// Userspace unwinding mode.
const protos::gen::PerfEventConfig::UnwindMode unwind_mode_;

// Parsed allow/deny-list for filtering samples.
const TargetFilter target_filter_;

Expand Down
156 changes: 156 additions & 0 deletions src/profiling/perf/frame_pointer_unwinder.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/profiling/perf/frame_pointer_unwinder.h"

#include <cinttypes>

#include "perfetto/base/logging.h"

namespace perfetto {
namespace profiling {

void FramePointerUnwinder::Unwind() {
if (!IsArchSupported()) {
PERFETTO_ELOG("Unsupported architecture: %d", arch_);
last_error_.code = unwindstack::ErrorCode::ERROR_UNSUPPORTED;
return;
}

if (maps_ == nullptr || maps_->Total() == 0) {
PERFETTO_ELOG("No maps provided");
last_error_.code = unwindstack::ErrorCode::ERROR_INVALID_MAP;
return;
}

PERFETTO_DCHECK(stack_size_ > 0u);

frames_.reserve(max_frames_);
ClearErrors();
TryUnwind();
}

void FramePointerUnwinder::TryUnwind() {
uint64_t fp = 0;
switch (arch_) {
case unwindstack::ARCH_ARM64:
fp = reinterpret_cast<uint64_t*>(
regs_->RawData())[unwindstack::Arm64Reg::ARM64_REG_R29];
break;
case unwindstack::ARCH_X86_64:
fp = reinterpret_cast<uint64_t*>(
regs_->RawData())[unwindstack::X86_64Reg::X86_64_REG_RBP];
break;
case unwindstack::ARCH_RISCV64:
fp = reinterpret_cast<uint64_t*>(
regs_->RawData())[unwindstack::Riscv64Reg::RISCV64_REG_S0];
break;
case unwindstack::ARCH_UNKNOWN:
case unwindstack::ARCH_ARM:
case unwindstack::ARCH_X86:
// not supported
;
}
uint64_t sp = regs_->sp();
uint64_t pc = regs_->pc();
for (size_t i = 0; i < max_frames_; i++) {
if (!IsFrameValid(fp, sp))
return;

// retrive the map info and elf info
std::shared_ptr<unwindstack::MapInfo> map_info = maps_->Find(pc);
if (map_info == nullptr) {
last_error_.code = unwindstack::ErrorCode::ERROR_INVALID_MAP;
return;
}

unwindstack::FrameData frame;
frame.num = i;
frame.rel_pc = pc;
frame.pc = pc;
frame.map_info = map_info;
unwindstack::Elf* elf = map_info->GetElf(process_memory_, arch_);
if (elf != nullptr) {
uint64_t relative_pc = elf->GetRelPc(pc, map_info.get());
uint64_t pc_adjustment = GetPcAdjustment(relative_pc, elf, arch_);
frame.rel_pc = relative_pc - pc_adjustment;
frame.pc = pc - pc_adjustment;
if (!resolve_names_ ||
!elf->GetFunctionName(frame.rel_pc, &frame.function_name,
&frame.function_offset)) {
frame.function_name = "";
frame.function_offset = 0;
}
}
frames_.push_back(frame);
// move to the next frame
fp = DecodeFrame(fp, &pc, &sp);
}
}

uint64_t FramePointerUnwinder::DecodeFrame(uint64_t fp,
uint64_t* next_pc,
uint64_t* next_sp) {
uint64_t next_fp;
if (!process_memory_->ReadFully(static_cast<uint64_t>(fp), &next_fp,
sizeof(next_fp)))
return 0;

uint64_t pc;
if (!process_memory_->ReadFully(static_cast<uint64_t>(fp + sizeof(uint64_t)),
&pc, sizeof(pc)))
return 0;

// Ensure there's not a stack overflow.
if (__builtin_add_overflow(fp, sizeof(uint64_t) * 2, next_sp))
return 0;

*next_pc = static_cast<uint64_t>(pc);
return next_fp;
}

bool FramePointerUnwinder::IsFrameValid(uint64_t fp, uint64_t sp) {
uint64_t align_mask = 0;
switch (arch_) {
case unwindstack::ARCH_ARM64:
align_mask = 0x1;
break;
case unwindstack::ARCH_X86_64:
align_mask = 0xf;
break;
case unwindstack::ARCH_RISCV64:
align_mask = 0x7;
break;
case unwindstack::ARCH_UNKNOWN:
case unwindstack::ARCH_ARM:
case unwindstack::ARCH_X86:
// not supported
;
}

if (fp == 0 || fp <= sp)
return false;

// Ensure there's space on the stack to read two values: the caller's
// frame pointer and the return address.
uint64_t result;
if (__builtin_add_overflow(fp, sizeof(uint64_t) * 2, &result))
return false;

return result <= stack_end_ && (fp & align_mask) == 0;
}

} // namespace profiling
} // namespace perfetto
Loading

0 comments on commit ab3bf51

Please sign in to comment.