From 0cff3f5052b72f8fbb0896a98c5c315aee3cdbba Mon Sep 17 00:00:00 2001 From: danielfenner <56686285+danielfenner@users.noreply.github.com> Date: Mon, 19 Sep 2022 11:42:41 +0000 Subject: [PATCH] Tid translation for target processes running in a separate namespace. (#4195) If the target process is running in a separate namespace, thread ids (tids) observed from within the target process are different from the tids observed in the root namespace. This is a problem for events we obtain from the target process. These are userspace instrumentation (usi), manual instrumentation and the Vulkan layer. This PR is only about usi. Usi events are obtained for the game, received in ProducerEventProcessorHijackingFunctionEntryExitForLinuxTracing and piped back into the LinuxTracing code. Finally they are processed by UprobesUnwindingVisitor where they are used to emit FunctionCall's and to help with the call stack handling. The UprobesUnwindingVisitor needs to translate the tids from inside the UserSpaceFunction{Entry,Exit}PerfEventData events into root namespace tids before handling them. For that purpose we have the UprobesUnwindingVisitor observe the clone{3} exit tracepoints, the task_newtask tracepoint. By that we are able to keep track of new threads and their tids. Additionally, at the beginning of the capture, we also send the initial state of the tid mapping. We obtain this information from the /proc filesystem. --- src/LinuxTracing/KernelTracepoints.h | 6 + src/LinuxTracing/LinuxTracingUtils.cpp | 34 ++++ src/LinuxTracing/LinuxTracingUtils.h | 6 + src/LinuxTracing/LinuxTracingUtilsTest.cpp | 9 + src/LinuxTracing/PerfEvent.h | 10 +- src/LinuxTracing/PerfEventReaders.cpp | 23 +++ src/LinuxTracing/PerfEventReaders.h | 3 + src/LinuxTracing/PerfEventVisitor.h | 1 + src/LinuxTracing/TracerImpl.cpp | 48 +++++- src/LinuxTracing/TracerImpl.h | 5 + src/LinuxTracing/UprobesUnwindingVisitor.cpp | 91 +++++++++- src/LinuxTracing/UprobesUnwindingVisitor.h | 15 ++ ...ndingVisitorDynamicInstrumentationTest.cpp | 162 ++++++++++++++++++ 13 files changed, 396 insertions(+), 17 deletions(-) diff --git a/src/LinuxTracing/KernelTracepoints.h b/src/LinuxTracing/KernelTracepoints.h index fff7373f6bb..14c771ffde1 100644 --- a/src/LinuxTracing/KernelTracepoints.h +++ b/src/LinuxTracing/KernelTracepoints.h @@ -105,4 +105,10 @@ struct __attribute__((__packed__)) dma_fence_signaled_tracepoint { uint32_t seqno; }; +struct __attribute__((__packed__)) syscall_exit_tracepoint { + tracepoint_common common; + uint64_t syscall_nr; + uint64_t ret; +}; + #endif // LINUX_TRACING_KERNEL_TRACEPOINTS_H_ diff --git a/src/LinuxTracing/LinuxTracingUtils.cpp b/src/LinuxTracing/LinuxTracingUtils.cpp index b361d0a874b..245706d44b2 100644 --- a/src/LinuxTracing/LinuxTracingUtils.cpp +++ b/src/LinuxTracing/LinuxTracingUtils.cpp @@ -28,6 +28,7 @@ #include "ModuleUtils/ReadLinuxMaps.h" #include "ModuleUtils/VirtualAndAbsoluteAddresses.h" #include "OrbitBase/ExecuteCommand.h" +#include "OrbitBase/GetProcessIds.h" #include "OrbitBase/Logging.h" #include "OrbitBase/ReadFileToString.h" #include "OrbitBase/SafeStrerror.h" @@ -333,4 +334,37 @@ std::map FindFunctionsThatUprobesCannotInstrumentWithMess return function_ids_to_error_messages; } +absl::flat_hash_map RetrieveInitialTidToRootNamespaceTidMapping( + pid_t pid_in_root_namespace) { + absl::flat_hash_map tid_mapping; + for (pid_t tid_in_root_namespace : orbit_base::GetTidsOfProcess(pid_in_root_namespace)) { + const std::string status_file_name = absl::StrFormat("/proc/%d/status", tid_in_root_namespace); + auto reading_result = orbit_base::ReadFileToString(status_file_name); + if (reading_result.has_error()) { + // This means the thread exited before we were able to read the status file. It is fine to + // just skip this thread. + continue; + } + const std::vector lines = + absl::StrSplit(reading_result.value(), '\n', absl::SkipEmpty()); + for (std::string_view line : lines) { + if (!absl::StartsWith(line, "NSpid:")) continue; + // The line in the status file looks like this: + // NSpid: pid pid_1 ... pid_n + // where pid is the pid in the root namespace, pid_1 is the pid in the first nested namespace + // and pid_n is the pid in the innermost namespace. + const std::vector splits = + absl::StrSplit(line, absl::ByAnyChar(": \t"), absl::SkipWhitespace{}); + pid_t tid_in_target_process_namespace = 0; + if (!absl::SimpleAtoi(splits.back(), &tid_in_target_process_namespace)) { + ORBIT_ERROR("Line in %s starting with 'NSpid:' did not end with a pid. Entire line was: %s", + status_file_name, line); + break; + } + tid_mapping[tid_in_target_process_namespace] = tid_in_root_namespace; + } + } + return tid_mapping; +} + } // namespace orbit_linux_tracing diff --git a/src/LinuxTracing/LinuxTracingUtils.h b/src/LinuxTracing/LinuxTracingUtils.h index 166c5a5f7a3..859e0f25c6b 100644 --- a/src/LinuxTracing/LinuxTracingUtils.h +++ b/src/LinuxTracing/LinuxTracingUtils.h @@ -5,6 +5,7 @@ #ifndef LINUX_TRACING_LINUX_TRACING_UTILS_H_ #define LINUX_TRACING_LINUX_TRACING_UTILS_H_ +#include #include #include @@ -76,6 +77,11 @@ inline size_t GetPageSize() { const std::vector& modules, const std::vector& functions); +// Returns the map of tids in the target process namespace to the corresponding tids in the root +// namespace. +[[nodiscard]] absl::flat_hash_map RetrieveInitialTidToRootNamespaceTidMapping( + pid_t pid_in_root_namespace); + } // namespace orbit_linux_tracing #endif // LINUX_TRACING_LINUX_TRACING_UTILS_H_ diff --git a/src/LinuxTracing/LinuxTracingUtilsTest.cpp b/src/LinuxTracing/LinuxTracingUtilsTest.cpp index e3e2635d721..259209af057 100644 --- a/src/LinuxTracing/LinuxTracingUtilsTest.cpp +++ b/src/LinuxTracing/LinuxTracingUtilsTest.cpp @@ -405,4 +405,13 @@ TEST(FindFunctionsThatUprobesCannotInstrumentWithMessages, ModuleNotInMaps) { "not loaded by the process.")); } +TEST(RetrieveInitialTidToRootNamespaceTidMapping, TrivialMapFromTestProcess) { + const pid_t pid = orbit_base::ToNativeThreadId(orbit_base::GetCurrentProcessId()); + const auto tid_mappings = RetrieveInitialTidToRootNamespaceTidMapping(pid); + EXPECT_FALSE(tid_mappings.empty()); + for (const auto& tid_mapping : tid_mappings) { + EXPECT_EQ(tid_mapping.first, tid_mapping.second); + } +} + } // namespace orbit_linux_tracing diff --git a/src/LinuxTracing/PerfEvent.h b/src/LinuxTracing/PerfEvent.h index 744f122e542..f03d9e4b6bc 100644 --- a/src/LinuxTracing/PerfEvent.h +++ b/src/LinuxTracing/PerfEvent.h @@ -377,6 +377,14 @@ struct SchedSwitchWithStackPerfEventData { }; using SchedSwitchWithStackPerfEvent = TypedPerfEvent; +struct CloneExitPerfEventData { + // The tid of the thread invoking clone (in the root namespace). + pid_t tid; + // The return value of clone. This is the tid of the new thread (in the target process namespace). + pid_t ret_tid; +}; +using CloneExitPerfEvent = TypedPerfEvent; + // This struct holds the data we need from any of the possible perf_event_open events that we // collect. The top-level fields (`timestamp` and `ordered_in_file_descriptor`) are common to all // events, while each of the possible `...PerfEventData`s in the `std::variant` contains the data @@ -411,7 +419,7 @@ struct PerfEvent { TaskRenamePerfEventData, SchedSwitchPerfEventData, SchedWakeupPerfEventData, SchedSwitchWithStackPerfEventData, SchedWakeupWithStackPerfEventData, AmdgpuCsIoctlPerfEventData, AmdgpuSchedRunJobPerfEventData, - DmaFenceSignaledPerfEventData> + DmaFenceSignaledPerfEventData, CloneExitPerfEventData> data; void Accept(PerfEventVisitor* visitor) const; diff --git a/src/LinuxTracing/PerfEventReaders.cpp b/src/LinuxTracing/PerfEventReaders.cpp index cd7c94cd3c7..4d7f2e08721 100644 --- a/src/LinuxTracing/PerfEventReaders.cpp +++ b/src/LinuxTracing/PerfEventReaders.cpp @@ -685,4 +685,27 @@ ConsumeAmdgpuSchedRunJobPerfEvent(PerfEventRingBuffer* ring_buffer, header); } +[[nodiscard]] CloneExitPerfEvent ConsumeCloneExitPerfEvent(PerfEventRingBuffer* ring_buffer, + const perf_event_header& header) { + const perf_event_attr flags{ + .sample_type = PERF_SAMPLE_RAW | SAMPLE_TYPE_TID_TIME_STREAMID_CPU, + }; + + PerfRecordSample res = ConsumeRecordSample(ring_buffer, header, flags); + + syscall_exit_tracepoint sys_exit; + std::memcpy(&sys_exit, res.raw_data.get(), sizeof(syscall_exit_tracepoint)); + + ring_buffer->SkipRecord(header); + return CloneExitPerfEvent{ + .timestamp = res.time, + .ordered_stream = PerfEventOrderedStream::FileDescriptor(ring_buffer->GetFileDescriptor()), + .data = + { + .tid = static_cast(res.tid), + .ret_tid = static_cast(sys_exit.ret), + }, + }; +} + } // namespace orbit_linux_tracing diff --git a/src/LinuxTracing/PerfEventReaders.h b/src/LinuxTracing/PerfEventReaders.h index 7266b8c0973..d6081cf33bc 100644 --- a/src/LinuxTracing/PerfEventReaders.h +++ b/src/LinuxTracing/PerfEventReaders.h @@ -69,6 +69,9 @@ AmdgpuSchedRunJobPerfEvent ConsumeAmdgpuSchedRunJobPerfEvent(PerfEventRingBuffer DmaFenceSignaledPerfEvent ConsumeDmaFenceSignaledPerfEvent(PerfEventRingBuffer* ring_buffer, const perf_event_header& header); + +CloneExitPerfEvent ConsumeCloneExitPerfEvent(PerfEventRingBuffer* ring_buffer, + const perf_event_header& header); } // namespace orbit_linux_tracing #endif // LINUX_TRACING_PERF_EVENT_READERS_H_ diff --git a/src/LinuxTracing/PerfEventVisitor.h b/src/LinuxTracing/PerfEventVisitor.h index 7cab9d969fa..21b9c6ea17d 100644 --- a/src/LinuxTracing/PerfEventVisitor.h +++ b/src/LinuxTracing/PerfEventVisitor.h @@ -53,6 +53,7 @@ class PerfEventVisitor { const DmaFenceSignaledPerfEventData& /*event_data*/) {} virtual void Visit(uint64_t /*event_timestamp*/, const GenericTracepointPerfEventData& /*event_data*/) {} + virtual void Visit(uint64_t /*event_timestamp*/, const CloneExitPerfEventData& /*event_data*/) {} }; } // namespace orbit_linux_tracing diff --git a/src/LinuxTracing/TracerImpl.cpp b/src/LinuxTracing/TracerImpl.cpp index 0ad299c60e5..ebc0fbb2131 100644 --- a/src/LinuxTracing/TracerImpl.cpp +++ b/src/LinuxTracing/TracerImpl.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -220,6 +221,11 @@ void TracerImpl::InitUprobesEventVisitor() { &absolute_address_to_size_of_functions_to_stop_unwinding_at_); uprobes_unwinding_visitor_->SetUnwindErrorsAndDiscardedSamplesCounters( &stats_.unwind_error_count, &stats_.samples_in_uretprobes_count); + // Get the initial mapping of the tids in the target process to the corresponing tids in the + // root namespace. + absl::flat_hash_map tid_mappings = + RetrieveInitialTidToRootNamespaceTidMapping(target_pid_); + uprobes_unwinding_visitor_->SetInitialTidToRootNamespaceTidMapping(std::move(tid_mappings)); event_processor_.AddVisitor(uprobes_unwinding_visitor_.get()); } @@ -677,6 +683,20 @@ bool TracerImpl::OpenInstrumentedTracepoints(const std::vector& cpus) { return !tracepoint_event_open_errors; } +// We maintain a map of tids from the target process pid namespace to the root namespace. When a new +// thread is created the new tid in the root namespace is reported by the task:task_newtask which is +// already opened by OpenThreadNameTracepoints. The respective tid in the target process namespace +// is obtained from the next hit to syscalls:sys_exit_clone or syscalls:sys_exit_clone3. +bool TracerImpl::OpenCloneExitTracepoints(const std::vector& cpus) { + ORBIT_SCOPE_FUNCTION; + absl::flat_hash_map pid_mapping_tracepoint_ring_buffer_fds_per_cpu; + return OpenFileDescriptorsAndRingBuffersForAllTracepoints( + {{"syscalls", "sys_exit_clone", &sys_exit_clone_ids_}, + {"syscalls", "sys_exit_clone3", &sys_exit_clone3_ids_}}, + cpus, &tracing_fds_, CLONE_EXIT_RING_BUFFER_SIZE_KB, + &pid_mapping_tracepoint_ring_buffer_fds_per_cpu, &ring_buffers_, stack_dump_size_); +} + void TracerImpl::InitLostAndDiscardedEventVisitor() { ORBIT_SCOPE_FUNCTION; lost_and_discarded_event_visitor_ = std::make_unique(listener_); @@ -752,6 +772,11 @@ void TracerImpl::Startup() { bool perf_event_open_errors = false; std::vector perf_event_open_error_details; + if (bool opened = OpenCloneExitTracepoints(all_cpus); !opened) { + perf_event_open_error_details.emplace_back("clone exit tracepoints"); + perf_event_open_errors = true; + } + if (bool opened = OpenMmapTask(all_cpus); !opened) { perf_event_open_error_details.emplace_back("mmap events, fork and exit events"); perf_event_open_errors = true; @@ -1120,15 +1145,17 @@ uint64_t TracerImpl::ProcessSampleEventAndReturnTimestamp(const perf_event_heade bool is_amdgpu_sched_run_job_event = amdgpu_sched_run_job_ids_.contains(stream_id); bool is_dma_fence_signaled_event = dma_fence_signaled_ids_.contains(stream_id); bool is_user_instrumented_tracepoint = ids_to_tracepoint_info_.contains(stream_id); - - ORBIT_CHECK(is_uprobe + is_uprobe_with_args + is_uprobe_with_stack + is_uretprobe + - is_uretprobe_with_retval + is_stack_sample + is_callchain_sample + - is_task_newtask + is_task_rename + is_sched_switch + is_sched_wakeup + - is_sched_switch_with_callchain + is_sched_wakeup_with_callchain + - is_sched_switch_with_stack + is_sched_wakeup_with_stack + - is_amdgpu_cs_ioctl_event + is_amdgpu_sched_run_job_event + - is_dma_fence_signaled_event + is_user_instrumented_tracepoint <= - 1); + bool is_clone_exit_tracepoint = + sys_exit_clone_ids_.contains(stream_id) || sys_exit_clone3_ids_.contains(stream_id); + + ORBIT_CHECK( + is_uprobe + is_uprobe_with_args + is_uprobe_with_stack + is_uretprobe + + is_uretprobe_with_retval + is_stack_sample + is_callchain_sample + is_task_newtask + + is_task_rename + is_sched_switch + is_sched_wakeup + is_sched_switch_with_callchain + + is_sched_wakeup_with_callchain + is_sched_switch_with_stack + is_sched_wakeup_with_stack + + is_amdgpu_cs_ioctl_event + is_amdgpu_sched_run_job_event + is_dma_fence_signaled_event + + is_user_instrumented_tracepoint + is_clone_exit_tracepoint <= + 1); int fd = ring_buffer->GetFileDescriptor(); @@ -1410,6 +1437,9 @@ uint64_t TracerImpl::ProcessSampleEventAndReturnTimestamp(const perf_event_heade tracepoint_info->set_category(it->second.category()); listener_->OnTracepointEvent(std::move(tracepoint_event)); + } else if (is_clone_exit_tracepoint) { + CloneExitPerfEvent event = ConsumeCloneExitPerfEvent(ring_buffer, header); + DeferEvent(std::move(event)); } else { ORBIT_ERROR("PERF_EVENT_SAMPLE with unexpected stream_id: %lu", stream_id); ring_buffer->SkipRecord(header); diff --git a/src/LinuxTracing/TracerImpl.h b/src/LinuxTracing/TracerImpl.h index 2804978b0ee..f129863a98e 100644 --- a/src/LinuxTracing/TracerImpl.h +++ b/src/LinuxTracing/TracerImpl.h @@ -99,6 +99,8 @@ class TracerImpl : public Tracer { bool OpenInstrumentedTracepoints(const std::vector& cpus); + bool OpenCloneExitTracepoints(const std::vector& cpus); + void InitLostAndDiscardedEventVisitor(); [[nodiscard]] uint64_t ProcessForkEventAndReturnTimestamp(const perf_event_header& header, @@ -135,6 +137,7 @@ class TracerImpl : public Tracer { static constexpr uint64_t MMAP_TASK_RING_BUFFER_SIZE_KB = 64; static constexpr uint64_t SAMPLING_RING_BUFFER_SIZE_KB = 16 * 1024; static constexpr uint64_t THREAD_NAMES_RING_BUFFER_SIZE_KB = 64; + static constexpr uint64_t CLONE_EXIT_RING_BUFFER_SIZE_KB = 64; static constexpr uint64_t CONTEXT_SWITCHES_AND_THREAD_STATE_RING_BUFFER_SIZE_KB = 2 * 1024; static constexpr uint64_t CONTEXT_SWITCHES_AND_THREAD_STATE_WITH_STACKS_RING_BUFFER_SIZE_KB = 64 * 1024; @@ -191,6 +194,8 @@ class TracerImpl : public Tracer { absl::flat_hash_set amdgpu_cs_ioctl_ids_; absl::flat_hash_set amdgpu_sched_run_job_ids_; absl::flat_hash_set dma_fence_signaled_ids_; + absl::flat_hash_set sys_exit_clone_ids_; + absl::flat_hash_set sys_exit_clone3_ids_; absl::flat_hash_map ids_to_tracepoint_info_; uint64_t effective_capture_start_timestamp_ns_ = 0; diff --git a/src/LinuxTracing/UprobesUnwindingVisitor.cpp b/src/LinuxTracing/UprobesUnwindingVisitor.cpp index d1c257f3702..ab1c6602f1a 100644 --- a/src/LinuxTracing/UprobesUnwindingVisitor.cpp +++ b/src/LinuxTracing/UprobesUnwindingVisitor.cpp @@ -528,22 +528,50 @@ void UprobesUnwindingVisitor::Visit(uint64_t event_timestamp, void UprobesUnwindingVisitor::Visit(uint64_t event_timestamp, const UserSpaceFunctionEntryPerfEventData& event_data) { - function_call_manager_->ProcessFunctionEntry(event_data.tid, event_data.function_id, - event_timestamp, std::nullopt); + const auto& tid_to_root_namespace_tid_it = tid_to_root_namespace_tid_.find(event_data.tid); + if (tid_to_root_namespace_tid_it == tid_to_root_namespace_tid_.end()) { + ORBIT_ERROR_ONCE( + "Received function entry event from unknown thread with tid %d. Dropping this event and " + "also all subsequent events from unknown threads.", + event_data.tid); + return; + } + const pid_t tid = tid_to_root_namespace_tid_it->second; + + function_call_manager_->ProcessFunctionEntry(tid, event_data.function_id, event_timestamp, + std::nullopt); - return_address_manager_->ProcessFunctionEntry(event_data.tid, event_data.sp, - event_data.return_address); + return_address_manager_->ProcessFunctionEntry(tid, event_data.sp, event_data.return_address); } void UprobesUnwindingVisitor::Visit(uint64_t event_timestamp, const UserSpaceFunctionExitPerfEventData& event_data) { - std::optional function_call = function_call_manager_->ProcessFunctionExit( - event_data.pid, event_data.tid, event_timestamp, std::nullopt); + const auto& tid_to_root_namespace_tid_it = tid_to_root_namespace_tid_.find(event_data.tid); + if (tid_to_root_namespace_tid_it == tid_to_root_namespace_tid_.end()) { + ORBIT_ERROR_ONCE( + "Received function exit event from unknown thread with tid %d. Dropping this event and " + "also all subsequent user space dynamic instrumentation events from unknown threads.", + event_data.tid); + return; + } + const auto& pid_to_root_namespace_pid_it = tid_to_root_namespace_tid_.find(event_data.pid); + if (pid_to_root_namespace_pid_it == tid_to_root_namespace_tid_.end()) { + ORBIT_ERROR_ONCE( + "Received function exit event from unknown process with pid %d. Dropping this event and " + "also all subsequent user space dynamic instrumentation events from unknown processes.", + event_data.pid); + return; + } + const pid_t tid = tid_to_root_namespace_tid_it->second; + const pid_t pid = pid_to_root_namespace_pid_it->second; + + std::optional function_call = + function_call_manager_->ProcessFunctionExit(pid, tid, event_timestamp, std::nullopt); if (function_call.has_value()) { listener_->OnFunctionCall(std::move(function_call.value())); } - return_address_manager_->ProcessFunctionExit(event_data.tid); + return_address_manager_->ProcessFunctionExit(tid); } void UprobesUnwindingVisitor::Visit(uint64_t /*event_timestamp*/, @@ -759,4 +787,53 @@ void UprobesUnwindingVisitor::Visit(uint64_t event_timestamp, const MmapPerfEven listener_->OnModuleUpdate(std::move(module_update_event)); } +// We observe the task_newtask tracepoint. This gets triggered for each new thread that is created. +// It reports the tid of the parent thread and the tid of the new thread, both in the root +// namespace. We store the mapping from parent tid to new tid in +// new_task_root_namespace_parent_tid_to_root_namespace_tid_. +// Immediately after this the clone or clone3 syscall that caused the creation of the new thread +// returns. We also observe the tracepoint that tracks the return from the clone call (see below). +// The clone tracepoint also provides the pid of the parent thread (in the root namespace) and the +// return value of clone which is the tid of the new thread in the namespace of the target process. +// We use the parent tid to match these two tracepoints and by that obtain the mapping from the tid +// in the namespace of the target process to the tid in the root namespace. This mapping is stored +// in tid_to_root_namespace_tid_. +void UprobesUnwindingVisitor::Visit(uint64_t /*event_timestamp*/, + const TaskNewtaskPerfEventData& event_data) { + if (new_task_root_namespace_parent_tid_to_root_namespace_tid_.contains( + event_data.was_created_by_tid)) { + ORBIT_ERROR( + "Observed a task_newtask event from thread %d without matching clone exit event. This " + "should never happen.", + event_data.was_created_by_tid); + } + new_task_root_namespace_parent_tid_to_root_namespace_tid_[event_data.was_created_by_tid] = + event_data.new_tid; +} + +void UprobesUnwindingVisitor::Visit(uint64_t /*event_timestamp*/, + const CloneExitPerfEventData& event_data) { + // If the return value of clone is zero this tracepoint is hit from the execution path of the + // newly created thread. We are not interested in these events and discard them. + if (event_data.ret_tid == 0) return; + + const pid_t parent_tid = event_data.tid; + const pid_t tid_in_target_process_namespace = event_data.ret_tid; + const auto& new_task_root_namespace_parent_tid_to_root_namespace_tid_it = + new_task_root_namespace_parent_tid_to_root_namespace_tid_.find(parent_tid); + if (new_task_root_namespace_parent_tid_to_root_namespace_tid_it == + new_task_root_namespace_parent_tid_to_root_namespace_tid_.end()) { + ORBIT_ERROR( + "Observed a return from clone without previously seeing a task_newtask from the same " + "parent thread. parent_tid was %d; clone return was %d. We will ignore user space dynamic " + "instrumentation form this thread.", + parent_tid, tid_in_target_process_namespace); + return; + } + tid_to_root_namespace_tid_[tid_in_target_process_namespace] = + new_task_root_namespace_parent_tid_to_root_namespace_tid_it->second; + new_task_root_namespace_parent_tid_to_root_namespace_tid_.erase( + new_task_root_namespace_parent_tid_to_root_namespace_tid_it); +} + } // namespace orbit_linux_tracing diff --git a/src/LinuxTracing/UprobesUnwindingVisitor.h b/src/LinuxTracing/UprobesUnwindingVisitor.h index 7d704977edd..405293bfde6 100644 --- a/src/LinuxTracing/UprobesUnwindingVisitor.h +++ b/src/LinuxTracing/UprobesUnwindingVisitor.h @@ -81,6 +81,10 @@ class UprobesUnwindingVisitor : public PerfEventVisitor { samples_in_uretprobes_counter_ = samples_in_uretprobes_counter; } + void SetInitialTidToRootNamespaceTidMapping(absl::flat_hash_map&& tid_mappings) { + tid_to_root_namespace_tid_ = std::move(tid_mappings); + } + void Visit(uint64_t event_timestamp, const StackSamplePerfEventData& event_data) override; void Visit(uint64_t event_timestamp, const SchedWakeupWithStackPerfEventData& event_data) override; @@ -99,6 +103,8 @@ class UprobesUnwindingVisitor : public PerfEventVisitor { void Visit(uint64_t event_timestamp, const UserSpaceFunctionExitPerfEventData& event_data) override; void Visit(uint64_t event_timestamp, const MmapPerfEventData& event_data) override; + void Visit(uint64_t event_timestamp, const TaskNewtaskPerfEventData& event_data) override; + void Visit(uint64_t event_timestamp, const CloneExitPerfEventData& event_data) override; private: // This struct holds a copy of some stack data collected from the target process. @@ -146,6 +152,15 @@ class UprobesUnwindingVisitor : public PerfEventVisitor { absl::flat_hash_map> thread_id_stream_id_to_stack_slices_{}; + + // tid_to_root_namespace_tid_ holds a mapping from all tids in the target process namespace to the + // corresponding tids in the root namespace. + // We obtain the initial state of this mapping at the beginning of the capture via + // SetInitialTidToRootNamespaceTidMapping. During the capture we observe task_newtask and clone{3} + // tracepoints to keep track of new threads + // (new_task_root_namespace_parent_tid_to_root_namespace_tid_ is required for this bookkeeping). + absl::flat_hash_map tid_to_root_namespace_tid_; + absl::flat_hash_map new_task_root_namespace_parent_tid_to_root_namespace_tid_; }; } // namespace orbit_linux_tracing diff --git a/src/LinuxTracing/UprobesUnwindingVisitorDynamicInstrumentationTest.cpp b/src/LinuxTracing/UprobesUnwindingVisitorDynamicInstrumentationTest.cpp index 1922f05f822..62b77d57184 100644 --- a/src/LinuxTracing/UprobesUnwindingVisitorDynamicInstrumentationTest.cpp +++ b/src/LinuxTracing/UprobesUnwindingVisitorDynamicInstrumentationTest.cpp @@ -50,6 +50,7 @@ TEST_F(UprobesUnwindingVisitorDynamicInstrumentationTest, VisitDynamicInstrumentationPerfEventsInVariousCombinationsSendsFunctionCalls) { constexpr pid_t kPid = 42; constexpr pid_t kTid = 43; + visitor_.SetInitialTidToRootNamespaceTidMapping({{kPid, kPid}, {kTid, kTid}}); constexpr uint32_t kCpu = 1; { @@ -301,4 +302,165 @@ TEST_F(UprobesUnwindingVisitorDynamicInstrumentationTest, } } +TEST_F(UprobesUnwindingVisitorDynamicInstrumentationTest, + VisitDynamicInstrumentationPerfEventsWithTidNamespaceTranslation) { + constexpr pid_t kPidTargetNamespace = 42; + constexpr pid_t kTidTargetNamespace = 43; + constexpr pid_t kPidRootNamespace = 1042; + constexpr pid_t kTidRootNamespace = 1043; + visitor_.SetInitialTidToRootNamespaceTidMapping( + {{kPidTargetNamespace, kPidRootNamespace}, {kTidTargetNamespace, kTidRootNamespace}}); + + constexpr pid_t kPidUnknown = 44; + constexpr pid_t kTidUnknown = 45; + + constexpr pid_t kTidNewTargetNamespace = 54; + constexpr pid_t kTidNewRootNamespace = 1054; + + { + UserSpaceFunctionEntryPerfEvent function_entry1{ + .timestamp = 300, + .data = + { + .pid = kPidTargetNamespace, + .tid = kTidTargetNamespace, + .function_id = 1, + .sp = 0x30, + .return_address = 0x02, + }, + }; + + EXPECT_CALL(return_address_manager_, ProcessFunctionEntry(kTidRootNamespace, 0x30, 0x02)) + .Times(1); + PerfEvent{function_entry1}.Accept(&visitor_); + Mock::VerifyAndClearExpectations(&return_address_manager_); + } + + { + UserSpaceFunctionExitPerfEvent function_exit1{ + .timestamp = 800, + .data = + { + .pid = kPidTargetNamespace, + .tid = kTidTargetNamespace, + }, + }; + + EXPECT_CALL(return_address_manager_, ProcessFunctionExit(kTidRootNamespace)).Times(1); + orbit_grpc_protos::FunctionCall actual_function_call; + EXPECT_CALL(listener_, OnFunctionCall).Times(1).WillOnce(SaveArg<0>(&actual_function_call)); + PerfEvent{function_exit1}.Accept(&visitor_); + Mock::VerifyAndClearExpectations(&return_address_manager_); + Mock::VerifyAndClearExpectations(&listener_); + EXPECT_EQ(actual_function_call.pid(), kPidRootNamespace); + EXPECT_EQ(actual_function_call.tid(), kTidRootNamespace); + EXPECT_EQ(actual_function_call.function_id(), 1); + EXPECT_EQ(actual_function_call.duration_ns(), 500); + EXPECT_EQ(actual_function_call.end_timestamp_ns(), 800); + EXPECT_EQ(actual_function_call.depth(), 0); + EXPECT_EQ(actual_function_call.return_value(), 0); + EXPECT_THAT(actual_function_call.registers(), ElementsAre()); + } + + { + UserSpaceFunctionEntryPerfEvent function_entry2{ + .timestamp = 900, + .data = + { + .pid = kPidUnknown, + .tid = kTidUnknown, + .function_id = 1, + .sp = 0x30, + .return_address = 0x02, + }, + }; + + EXPECT_CALL(return_address_manager_, ProcessFunctionEntry(kTidUnknown, 0x30, 0x02)).Times(0); + PerfEvent{function_entry2}.Accept(&visitor_); + Mock::VerifyAndClearExpectations(&return_address_manager_); + } + + { + UserSpaceFunctionExitPerfEvent function_exit2{ + .timestamp = 1000, + .data = + { + .pid = kPidUnknown, + .tid = kTidUnknown, + }, + }; + + EXPECT_CALL(return_address_manager_, ProcessFunctionExit(kTidUnknown)).Times(0); + EXPECT_CALL(listener_, OnFunctionCall).Times(0); + PerfEvent{function_exit2}.Accept(&visitor_); + Mock::VerifyAndClearExpectations(&return_address_manager_); + Mock::VerifyAndClearExpectations(&listener_); + } + + { + TaskNewtaskPerfEvent task_newtask{ + .timestamp = 1100, + .data = + { + .new_tid = kTidNewRootNamespace, + .was_created_by_tid = kTidRootNamespace, + }, + }; + PerfEvent{task_newtask}.Accept(&visitor_); + CloneExitPerfEvent clone_exit_event{ + .timestamp = 1101, + .data = + { + .tid = kTidRootNamespace, + .ret_tid = kTidNewTargetNamespace, + }, + }; + PerfEvent{clone_exit_event}.Accept(&visitor_); + } + + { + UserSpaceFunctionEntryPerfEvent function_entry3{ + .timestamp = 1300, + .data = + { + .pid = kPidTargetNamespace, + .tid = kTidNewTargetNamespace, + .function_id = 3, + .sp = 0x30, + .return_address = 0x02, + }, + }; + EXPECT_CALL(return_address_manager_, ProcessFunctionEntry(kTidNewRootNamespace, 0x30, 0x02)) + .Times(1); + PerfEvent{function_entry3}.Accept(&visitor_); + Mock::VerifyAndClearExpectations(&return_address_manager_); + } + + { + UserSpaceFunctionExitPerfEvent function_exit3{ + .timestamp = 1800, + .data = + { + .pid = kPidTargetNamespace, + .tid = kTidNewTargetNamespace, + }, + }; + + EXPECT_CALL(return_address_manager_, ProcessFunctionExit(kTidNewRootNamespace)).Times(1); + orbit_grpc_protos::FunctionCall actual_function_call; + EXPECT_CALL(listener_, OnFunctionCall).Times(1).WillOnce(SaveArg<0>(&actual_function_call)); + PerfEvent{function_exit3}.Accept(&visitor_); + Mock::VerifyAndClearExpectations(&return_address_manager_); + Mock::VerifyAndClearExpectations(&listener_); + EXPECT_EQ(actual_function_call.pid(), kPidRootNamespace); + EXPECT_EQ(actual_function_call.tid(), kTidNewRootNamespace); + EXPECT_EQ(actual_function_call.function_id(), 3); + EXPECT_EQ(actual_function_call.duration_ns(), 500); + EXPECT_EQ(actual_function_call.end_timestamp_ns(), 1800); + EXPECT_EQ(actual_function_call.depth(), 0); + EXPECT_EQ(actual_function_call.return_value(), 0); + EXPECT_THAT(actual_function_call.registers(), ElementsAre()); + } +} + } // namespace orbit_linux_tracing