diff --git a/CMakeLists.txt b/CMakeLists.txt index 9beb1e69..8e8cb91b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -180,11 +180,11 @@ set(SOURCE_FILES src/monitor/tracepoint_monitor.cpp src/process_controller.cpp - src/perf/event_provider.cpp - src/perf/event.cpp + src/perf/event_resolver.cpp + src/perf/event_attr.cpp src/perf/bio/block_device.cpp - src/perf/counter/counter_provider.cpp + src/perf/event_composer.cpp src/perf/counter/group/reader.cpp src/perf/counter/userspace/reader.cpp @@ -194,7 +194,7 @@ set(SOURCE_FILES src/perf/sample/writer.cpp src/perf/time/converter.cpp src/perf/time/reader.cpp src/perf/tracepoint/writer.cpp - src/perf/tracepoint/event.cpp + src/perf/tracepoint/event_attr.cpp src/perf/syscall/writer.cpp src/time/time.cpp diff --git a/include/lo2s/build_config.hpp.in b/include/lo2s/build_config.hpp.in index b2e438df..68afb4e7 100644 --- a/include/lo2s/build_config.hpp.in +++ b/include/lo2s/build_config.hpp.in @@ -1,4 +1,4 @@ -// Predefined events which are only present in more modern kernels src/perf/event_provider.cpp +// Predefined events which are only present in more modern kernels src/perf/event_resolver.cpp #cmakedefine HAVE_PERF_EVENT_STALLED_CYCLES_FRONTEND diff --git a/include/lo2s/config.hpp b/include/lo2s/config.hpp index 53fd6f36..1a8d79c5 100644 --- a/include/lo2s/config.hpp +++ b/include/lo2s/config.hpp @@ -22,6 +22,7 @@ #pragma once #include +#include #include #include @@ -51,55 +52,57 @@ struct Config Process process; std::vector command; std::string command_line; - bool quiet; - bool drop_root; + bool quiet = false; + bool drop_root = false; std::string user = ""; // Optional features std::vector tracepoint_events; #ifdef HAVE_X86_ADAPT std::vector x86_adapt_knobs; #endif - bool use_sensors; + bool use_sensors = false; int cgroup_fd = -1; // OTF2 std::string trace_path; // perf std::size_t mmap_pages; - bool exclude_kernel; + bool exclude_kernel = false; // Instruction sampling - bool sampling; + bool sampling = false; std::uint64_t sampling_period; std::string sampling_event; - bool enable_cct; - bool suppress_ip; - bool disassemble; + bool enable_cct = false; + bool suppress_ip = false; + bool disassemble = false; // Interval monitors std::chrono::nanoseconds read_interval; std::chrono::nanoseconds userspace_read_interval; std::chrono::nanoseconds perf_read_interval = std::chrono::nanoseconds(0); // Metrics - bool metric_use_frequency; + bool metric_use_frequency = true; std::uint64_t metric_count; std::uint64_t metric_frequency; - // time synchronization - bool use_clockid; - bool use_pebs; - clockid_t clockid; + std::string metric_leader; + std::vector group_counters; + std::vector userspace_counters; + // time synchronizatio + std::optional clockid; + bool use_pebs = false; // x86_energy - bool use_x86_energy; + bool use_x86_energy = false; // block I/O - bool use_block_io; + bool use_block_io = false; // syscalls bool use_syscalls = false; std::vector syscall_filter; // NEC SX-Aurora Tsubasa - bool use_nec; + bool use_nec = false; std::chrono::microseconds nec_read_interval; std::chrono::milliseconds nec_check_interval; // Nvidia CUPTI - bool use_nvidia; + bool use_nvidia = false; std::string cuda_injectionlib_path; uint64_t nvidia_ringbuf_size; }; diff --git a/include/lo2s/perf/bio/writer.hpp b/include/lo2s/perf/bio/writer.hpp index ff259b35..fc3eb60f 100644 --- a/include/lo2s/perf/bio/writer.hpp +++ b/include/lo2s/perf/bio/writer.hpp @@ -21,6 +21,7 @@ #pragma once +#include #include #include #include @@ -155,14 +156,14 @@ class Writer } } - std::vector get_tracepoints() + std::vector get_tracepoints() { bio_queue_ = - perf::EventProvider::instance().create_tracepoint_event("block:block_bio_queue"); + perf::EventComposer::instance().create_tracepoint_event("block:block_bio_queue"); bio_issue_ = - perf::EventProvider::instance().create_tracepoint_event("block:block_rq_issue"); + perf::EventComposer::instance().create_tracepoint_event("block:block_rq_issue"); bio_complete_ = - perf::EventProvider::instance().create_tracepoint_event("block:block_rq_complete"); + perf::EventComposer::instance().create_tracepoint_event("block:block_rq_complete"); return { bio_queue_.value(), bio_issue_.value(), bio_complete_.value() }; } @@ -185,9 +186,9 @@ class Writer time::Converter& time_converter_; // Unavailable until get_tracepoints() is called - std::optional bio_queue_; - std::optional bio_issue_; - std::optional bio_complete_; + std::optional bio_queue_; + std::optional bio_issue_; + std::optional bio_complete_; // The unit "sector" is always 512 bit large, regardless of the actual sector size of the device static constexpr int SECTOR_SIZE = 512; diff --git a/include/lo2s/perf/counter/counter_collection.hpp b/include/lo2s/perf/counter/counter_collection.hpp index f0269ed9..a3081698 100644 --- a/include/lo2s/perf/counter/counter_collection.hpp +++ b/include/lo2s/perf/counter/counter_collection.hpp @@ -21,7 +21,7 @@ #pragma once -#include +#include #include #include @@ -34,14 +34,18 @@ namespace counter { struct CounterCollection { - std::optional leader_; - std::vector counters; + CounterCollection() : leader(std::nullopt) + { + } + + std::optional leader = std::nullopt; + std::vector counters; double get_scale(int index) const { if (index == 0) { - return leader_.value().scale(); + return leader.value().scale(); } else { @@ -49,14 +53,9 @@ struct CounterCollection } } - Event& leader() const - { - return const_cast(leader_.value()); - } - friend bool operator==(const CounterCollection& lhs, const CounterCollection& rhs) { - if (lhs.leader_.value() == rhs.leader_.value()) + if (lhs.leader.value() == rhs.leader.value()) { return lhs.counters == rhs.counters; } @@ -65,11 +64,11 @@ struct CounterCollection friend bool operator<(const CounterCollection& lhs, const CounterCollection& rhs) { - if (lhs.leader_.value() == rhs.leader_.value()) + if (lhs.leader.value() == rhs.leader.value()) { return lhs.counters < rhs.counters; } - return lhs.leader_.value() < rhs.leader_.value(); + return lhs.leader.value() < rhs.leader.value(); } }; diff --git a/include/lo2s/perf/counter/counter_provider.hpp b/include/lo2s/perf/counter/counter_provider.hpp deleted file mode 100644 index 662bd84c..00000000 --- a/include/lo2s/perf/counter/counter_provider.hpp +++ /dev/null @@ -1,69 +0,0 @@ -/* - * This file is part of the lo2s software. - * Linux OTF2 sampling - * - * Copyright (c) 2017, - * Technische Universitaet Dresden, Germany - * - * lo2s is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * lo2s is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with lo2s. If not, see . - */ - -#pragma once - -#include -#include -#include - -#include -#include - -namespace lo2s -{ -namespace perf -{ -namespace counter -{ -class CounterProvider -{ -public: - CounterProvider() - { - } - - static CounterProvider& instance() - { - static CounterProvider provider; - return provider; - } - - void initialize_group_counters(const std::string& leader, - const std::vector& counters); - void initialize_userspace_counters(const std::vector& counters); - void initialize_tracepoints(const std::vector& tracepoints); - - bool has_group_counters(ExecutionScope scope); - bool has_userspace_counters(ExecutionScope scope); - - CounterCollection collection_for(MeasurementScope scope); - std::vector get_tracepoint_event_names(); - -private: - std::optional group_leader_; - std::vector group_events_; - std::vector userspace_events_; - std::vector tracepoint_events_; -}; -} // namespace counter -} // namespace perf -} // namespace lo2s diff --git a/include/lo2s/perf/counter/group/reader.hpp b/include/lo2s/perf/counter/group/reader.hpp index 877914bb..249b6161 100644 --- a/include/lo2s/perf/counter/group/reader.hpp +++ b/include/lo2s/perf/counter/group/reader.hpp @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include diff --git a/include/lo2s/perf/counter/userspace/reader.hpp b/include/lo2s/perf/counter/userspace/reader.hpp index 0b5ae22b..567ad27f 100644 --- a/include/lo2s/perf/counter/userspace/reader.hpp +++ b/include/lo2s/perf/counter/userspace/reader.hpp @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/include/lo2s/perf/event.hpp b/include/lo2s/perf/event_attr.hpp similarity index 50% rename from include/lo2s/perf/event.hpp rename to include/lo2s/perf/event_attr.hpp index 86b6d7ab..e5be1e74 100644 --- a/include/lo2s/perf/event.hpp +++ b/include/lo2s/perf/event_attr.hpp @@ -1,5 +1,5 @@ -/* - * This file is part of the lo2s software. + +/* This file is part of the lo2s software. * Linux OTF2 sampling * * Copyright (c) 2024, @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -59,10 +60,17 @@ enum class Availability UNIVERSAL }; -inline Availability& operator|=(Availability& a, Availability b) noexcept +enum class EventFlag { - return a = static_cast(static_cast(a) | static_cast(b)); -} + SAMPLE_ID_ALL, + COMM, + CONTEXT_SWITCH, + MMAP, + EXCLUDE_KERNEL, + TASK, + ENABLE_ON_EXEC, + DISABLED +}; class EventGuard; @@ -70,11 +78,21 @@ class EventGuard; * Base class for all Event types * contains common attributes */ -class Event + +class EventAttr { public: - Event(const std::string& name, perf_type_id type, std::uint64_t config, - std::uint64_t config1 = 0); + EventAttr(const std::string& name, perf_type_id type, std::uint64_t config, + std::uint64_t config1 = 0); + + class InvalidEvent : public std::runtime_error + { + public: + InvalidEvent(const std::string& event_description) + : std::runtime_error(std::string{ "Invalid event: " } + event_description) + { + } + }; /** * returns an opened instance of any Event object @@ -107,12 +125,7 @@ class Event return unit_; } - const perf_event_attr& attr() const - { - return attr_; - } - - perf_event_attr& mut_attr() + perf_event_attr& attr() { return attr_; } @@ -132,94 +145,196 @@ class Event unit_ = unit; } - void clock_attrs([[maybe_unused]] bool use_clockid, [[maybe_unused]] clockid_t clockid) + void set_clockid(std::optional clockid) { -#ifndef USE_HW_BREAKPOINT_COMPAT - attr_.use_clockid = use_clockid; - attr_.clockid = clockid; -#endif + if (!clockid.has_value()) + { + attr_.use_clockid = 0; + } + else + { + attr_.use_clockid = 1; + attr_.clockid = clockid.value(); + } } - void time_attrs([[maybe_unused]] uint64_t addr, bool enable_on_exec); + void set_read_format(uint64_t read_format) + { + attr_.read_format = read_format; + } - // When we poll on the fd given by perf_event_open, wakeup, when our buffer is 80% full - // Default behaviour is to wakeup on every event, which is horrible performance wise - void watermark(size_t mmap_pages) + void set_flags(const std::vector& flags) { - attr_.watermark = 1; - attr_.wakeup_watermark = static_cast(0.8 * mmap_pages * sysconf(_SC_PAGESIZE)); + for (const auto& flag : flags) + { + switch (flag) + { + case EventFlag::SAMPLE_ID_ALL: + attr_.sample_id_all = 1; + break; + case EventFlag::COMM: + attr_.comm = 1; + break; + case EventFlag::CONTEXT_SWITCH: + attr_.context_switch = 1; + break; + case EventFlag::MMAP: + attr_.mmap = 1; + break; + case EventFlag::EXCLUDE_KERNEL: + attr_.exclude_kernel = 1; + break; + case EventFlag::TASK: + attr_.task = 1; + break; + case EventFlag::ENABLE_ON_EXEC: + attr_.enable_on_exec = 1; + break; + case EventFlag::DISABLED: + attr_.disabled = 1; + break; + } + } + } + + bool get_flag(EventFlag flag) + { + switch (flag) + { + case EventFlag::SAMPLE_ID_ALL: + return attr_.sample_id_all == 1; + case EventFlag::COMM: + return attr_.comm == 1; + case EventFlag::CONTEXT_SWITCH: + return attr_.context_switch == 1; + case EventFlag::MMAP: + return attr_.mmap == 1; + case EventFlag::EXCLUDE_KERNEL: + return attr_.exclude_kernel == 1; + case EventFlag::TASK: + return attr_.task == 1; + case EventFlag::ENABLE_ON_EXEC: + return attr_.enable_on_exec == 1; + case EventFlag::DISABLED: + return attr_.disabled == 1; + default: + return false; + } + } + + uint64_t get_precise_ip() + { + return attr_.precise_ip; + } + + void set_precise_ip(uint64_t precise_ip) + { + if (precise_ip > 3) + { + throw std::runtime_error("precise_ip set to > 3!"); + } + attr_.precise_ip = precise_ip; + } + + void set_sample_type(uint64_t format) + { + attr_.sample_type |= format; } - friend std::ostream& operator<<(std::ostream& stream, const Event& event); + friend std::ostream& operator<<(std::ostream& stream, const EventAttr& event); - void exclude_kernel(bool exclude_kernel) + void set_watermark(uint64_t bytes) { - attr_.exclude_kernel = exclude_kernel; + attr_.watermark = 1; + attr_.wakeup_watermark = static_cast(bytes); } void sample_period(const int& period); void sample_freq(const uint64_t& freq); void event_attr_update(std::uint64_t value, const std::string& format); - void parse_pmu_path(const std::string& ev_name); - void parse_cpus(); const std::set& supported_cpus() const; - bool is_valid() const; bool event_is_openable(); bool is_available_in(ExecutionScope scope) const { - // per-process should always work. the counter will just not count if the process is - // scheduled on a core that is not supprted by that counter - return scope.is_thread() || cpus_.empty() || cpus_.count(scope.as_cpu()); + if (availability_ == Availability::UNAVAILABLE) + { + return false; + } + if (scope.is_thread() || scope.is_process()) + { + return availability_ != Availability::SYSTEM_MODE; + } + else + { + return availability_ != Availability::PROCESS_MODE && + (cpus_.empty() || cpus_.count(scope.as_cpu())); + } } bool degrade_precision(); - friend bool operator==(const Event& lhs, const Event& rhs) + friend bool operator==(const EventAttr& lhs, const EventAttr& rhs) { - return !memcmp(&lhs.attr_, &rhs.attr_, sizeof(struct perf_event_attr)); + return memcmp(&lhs.attr_, &rhs.attr_, sizeof(struct perf_event_attr)) == 0; } - friend bool operator<(const Event& lhs, const Event& rhs) + friend bool operator<(const EventAttr& lhs, const EventAttr& rhs) { - return memcmp(&rhs.attr_, &lhs.attr_, sizeof(struct perf_event_attr)); + return memcmp(&rhs.attr_, &lhs.attr_, sizeof(struct perf_event_attr)) < 0; } - friend bool operator>(const Event& lhs, const Event& rhs) + friend bool operator>(const EventAttr& lhs, const EventAttr& rhs) { - return memcmp(&lhs.attr_, &rhs.attr_, sizeof(struct perf_event_attr)); + return memcmp(&lhs.attr_, &rhs.attr_, sizeof(struct perf_event_attr)) > 0; } protected: void update_availability(); - void set_common_attrs(bool enable_on_exec); - struct perf_event_attr attr_; double scale_ = 1; std::string unit_ = "#"; - std::string name_ = ""; + std::string name_; std::set cpus_; Availability availability_ = Availability::UNAVAILABLE; +}; - std::filesystem::path pmu_path_; - std::string pmu_name_; +class SimpleEventAttr : public EventAttr +{ +public: + SimpleEventAttr(const std::string& name, perf_type_id type, std::uint64_t config, + std::uint64_t config1 = 0); + static SimpleEventAttr raw(const std::string& name); }; +#ifndef USE_HW_BREAKPOINT_COMPAT +class BreakpointEventAttr : public EventAttr +{ +public: + BreakpointEventAttr(uint64_t addr, uint64_t bp_type) + : EventAttr(std::to_string(addr), PERF_TYPE_BREAKPOINT, 0) + { + + attr_.bp_type = bp_type; + attr_.bp_addr = addr; + attr_.bp_len = HW_BREAKPOINT_LEN_8; + } +}; +#endif + /** * Contains an event parsed from sysfs * @note call on use_sampling_options() after creation to get a valid * event, otherwise the availability will be set to UNAVAILABLE */ -class SysfsEvent : public Event +class SysfsEventAttr : public EventAttr + { public: - SysfsEvent(const std::string& ev_name, bool enable_on_exec = false); - - void make_invalid(); - void use_sampling_options(const bool& use_pebs, const bool& sampling, const bool& enable_cct); + SysfsEventAttr(const std::string ev_name); }; /** @@ -229,7 +344,7 @@ class SysfsEvent : public Event class EventGuard { public: - EventGuard(Event& ev, std::variant location, int group_fd, int cgroup_fd); + EventGuard(EventAttr& ev, std::variant location, int group_fd, int cgroup_fd); EventGuard() = delete; EventGuard(const EventGuard& other) = delete; @@ -249,7 +364,7 @@ class EventGuard /** * opens child as a counter of the calling (leader) event */ - EventGuard open_child(Event child, ExecutionScope location, int cgroup_fd = -1); + EventGuard open_child(EventAttr child, ExecutionScope location, int cgroup_fd = -1); void enable(); void disable(); diff --git a/include/lo2s/perf/event_composer.hpp b/include/lo2s/perf/event_composer.hpp new file mode 100644 index 00000000..10df7e2d --- /dev/null +++ b/include/lo2s/perf/event_composer.hpp @@ -0,0 +1,70 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2016, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#pragma once + +#include +#include +#include +#include + +namespace lo2s +{ +namespace perf +{ +class EventComposer +{ + +public: + EventComposer(); + + static EventComposer& instance() + { + static EventComposer e; + return e; + } + + counter::CounterCollection counters_for(MeasurementScope scope); + + EventAttr create_time_event(uint64_t local_time); + EventAttr create_sampling_event(); + perf::tracepoint::TracepointEventAttr create_tracepoint_event(const std::string& name); + std::vector get_tracepoints(); + +private: + // When we poll on the fd given by perf_event_open, wakeup, when our buffer is 80% full + // Default behaviour is to wakeup on every event, which is horrible performance wise + void watermark(EventAttr& ev) + { + ev.set_watermark(0.8 * config().mmap_pages * sysconf(_SC_PAGESIZE)); + } + + void read_userspace_counters(); + void read_group_counters(); + + std::optional sampling_event_; + std::optional group_counters_; + std::optional userspace_counters_; + std::optional> tracepoint_events_; + bool exclude_kernel_; +}; +} // namespace perf +} // namespace lo2s diff --git a/include/lo2s/perf/event_provider.hpp b/include/lo2s/perf/event_provider.hpp deleted file mode 100644 index 5c2a0c57..00000000 --- a/include/lo2s/perf/event_provider.hpp +++ /dev/null @@ -1,91 +0,0 @@ -/* - * This file is part of the lo2s software. - * Linux OTF2 sampling - * - * Copyright (c) 2017, - * Technische Universitaet Dresden, Germany - * - * lo2s is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * lo2s is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with lo2s. If not, see . - */ - -#pragma once - -#include -#include -#include -#include - -#include - -namespace lo2s -{ -namespace perf -{ - -class EventProvider -{ -public: - EventProvider(); - EventProvider(const EventProvider&) = delete; - void operator=(const EventProvider&) = delete; - - static const EventProvider& instance() - { - return instance_mutable(); - } - - static Event get_event_by_name(const std::string& name); - - static bool has_event(const std::string& name); - - static std::vector get_predefined_events(); - static std::vector get_pmu_events(); - - static Event fallback_metric_leader_event(); - - static Event create_time_event(std::uint64_t local_time, bool enable_on_exec = false); - static Event create_event(const std::string& name, perf_type_id type, std::uint64_t config, - std::uint64_t config1 = 0); - static SysfsEvent create_sampling_event(bool enable_on_exec); - static SysfsEvent create_sysfs_event(const std::string& name, bool use_config = true); - static tracepoint::TracepointEvent create_tracepoint_event(const std::string& name, - bool use_config = true, - bool enable_on_exec = false); - - class InvalidEvent : public std::runtime_error - { - public: - InvalidEvent(const std::string& event_description) - : std::runtime_error(std::string{ "Invalid event: " } + event_description) - { - } - }; - -private: - static EventProvider& instance_mutable() - { - static EventProvider e; - return e; - } - - static void apply_config_attrs(Event& event); - static void apply_default_attrs(Event& event); - - Event cache_event(const std::string& name); - - std::unordered_map event_map_; -}; - -} // namespace perf -} // namespace lo2s diff --git a/include/lo2s/perf/event_resolver.hpp b/include/lo2s/perf/event_resolver.hpp new file mode 100644 index 00000000..f2fb5e5f --- /dev/null +++ b/include/lo2s/perf/event_resolver.hpp @@ -0,0 +1,68 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2017, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#pragma once + +#include +#include +#include +#include + +#include + +namespace lo2s +{ +namespace perf +{ + +class EventResolver +{ +public: + EventAttr get_event_by_name(const std::string& name); + + bool has_event(const std::string& name); + + std::vector get_predefined_events(); + std::vector get_pmu_events(); + + EventAttr get_metric_leader(const std::string& metric_leader); + + std::vector get_tracepoint_event_names(); + + static EventResolver& instance() + { + static EventResolver e; + return e; + } + +private: + EventAttr fallback_metric_leader_event(); + EventResolver(); + EventResolver(const EventResolver&) = delete; + void operator=(const EventResolver&) = delete; + + EventAttr cache_event(const std::string& name); + + std::unordered_map> event_map_; +}; + +} // namespace perf +} // namespace lo2s diff --git a/include/lo2s/perf/io_reader.hpp b/include/lo2s/perf/io_reader.hpp index 6003b026..782ecdc3 100644 --- a/include/lo2s/perf/io_reader.hpp +++ b/include/lo2s/perf/io_reader.hpp @@ -22,9 +22,10 @@ #pragma once #include -#include +#include #include -#include +#include +#include #include #include @@ -61,34 +62,39 @@ struct __attribute((__packed__)) TracepointSampleType struct IoReaderIdentity { - IoReaderIdentity(std::string tracepoint_name, Cpu cpu) : cpu(cpu) + IoReaderIdentity(perf::tracepoint::TracepointEventAttr event, Cpu cpu) + : tracepoint_(event), cpu(cpu) { - tracepoint_.value() = EventProvider::instance().create_tracepoint_event(tracepoint_name); } - std::optional tracepoint_; + tracepoint::TracepointEventAttr tracepoint_; Cpu cpu; - tracepoint::TracepointEvent tracepoint() + tracepoint::TracepointEventAttr tracepoint() const { - return tracepoint_.value(); + return tracepoint_; } friend bool operator>(const IoReaderIdentity& lhs, const IoReaderIdentity& rhs) { if (lhs.cpu == rhs.cpu) { - return lhs.tracepoint_.value() > rhs.tracepoint_.value(); + return lhs.tracepoint_ > rhs.tracepoint_; } return lhs.cpu > rhs.cpu; } + friend bool operator==(const IoReaderIdentity& lhs, const IoReaderIdentity& rhs) + { + return lhs.cpu == rhs.cpu && lhs.tracepoint_ == rhs.tracepoint_; + } + friend bool operator<(const IoReaderIdentity& lhs, const IoReaderIdentity& rhs) { if (lhs.cpu == rhs.cpu) { - return lhs.tracepoint_.value() < rhs.tracepoint_.value(); + return lhs.tracepoint_ < rhs.tracepoint_; } return lhs.cpu < rhs.cpu; diff --git a/include/lo2s/perf/multi_reader.hpp b/include/lo2s/perf/multi_reader.hpp index 4692067a..f2d0f7a0 100644 --- a/include/lo2s/perf/multi_reader.hpp +++ b/include/lo2s/perf/multi_reader.hpp @@ -53,7 +53,7 @@ class MultiReader { for (auto tp : writer_.get_tracepoints()) { - IoReaderIdentity id(tp.name(), cpu); + IoReaderIdentity id(tp, cpu); auto reader = readers_.emplace(std::piecewise_construct, std::forward_as_tuple(id), std::forward_as_tuple(id)); fds_.emplace_back(reader.first->second.fd()); diff --git a/include/lo2s/perf/pfm.hpp b/include/lo2s/perf/pfm.hpp index b7a2a1c9..2c610601 100644 --- a/include/lo2s/perf/pfm.hpp +++ b/include/lo2s/perf/pfm.hpp @@ -21,7 +21,7 @@ #pragma once -#include +#include #include #include @@ -56,7 +56,7 @@ class PFM4 pfm_terminate(); } - std::optional pfm4_read_event(const std::string& ev_desc) const + EventAttr pfm4_read_event(const std::string& ev_desc) const { pfm_perf_encode_arg_t arg; struct perf_event_attr attr; @@ -69,22 +69,17 @@ class PFM4 if (ret != PFM_SUCCESS) { - return std::nullopt; + throw EventAttr::InvalidEvent("Coudld not read PFM event encoding!"); } - Event ev = Event(ev_desc, (perf_type_id)attr.type, attr.config, attr.config1); - - if (!ev.event_is_openable()) - { - return std::nullopt; - } + EventAttr ev = SimpleEventAttr(ev_desc, (perf_type_id)attr.type, attr.config, attr.config1); return ev; } - std::vector get_pfm4_events() const + std::vector get_pfm4_events() const { - std::vector events; + std::vector events; pfm_pmu_info_t pinfo; pfm_event_info_t info; @@ -133,32 +128,40 @@ class PFM4 else { has_umask = true; - auto uevent = pfm4_read_event( - fmt::format("{}:{}", full_event_name, attr_info.name)); - if (uevent) + try + { + + auto uevent = pfm4_read_event( + fmt::format("{}:{}", full_event_name, attr_info.name)); + events.emplace_back(uevent); + } + catch (EventAttr::InvalidEvent& e) { - events.emplace_back(std::move(*uevent)); } } } if (!has_umask) { - auto event = pfm4_read_event(std::string(full_event_name)); - - if (event) + try + { + auto event = pfm4_read_event(std::string(full_event_name)); + events.emplace_back(event); + } + catch (EventAttr::InvalidEvent& e) { - events.emplace_back(std::move(*event)); } } } else { - auto event = pfm4_read_event(std::string(full_event_name)); - - if (event) + try + { + auto event = pfm4_read_event(std::string(full_event_name)); + events.emplace_back(event); + } + catch (EventAttr::InvalidEvent& e) { - events.emplace_back(std::move(*event)); } } } diff --git a/include/lo2s/perf/sample/reader.hpp b/include/lo2s/perf/sample/reader.hpp index 2b818226..c16e4cd0 100644 --- a/include/lo2s/perf/sample/reader.hpp +++ b/include/lo2s/perf/sample/reader.hpp @@ -21,8 +21,9 @@ #pragma once -#include +#include #include +#include #include #include @@ -84,40 +85,18 @@ class Reader : public EventReader Log::debug() << "initializing event_reader for:" << scope.name() << ", enable_on_exec: " << enable_on_exec; - Event event = EventProvider::instance().create_sampling_event(enable_on_exec); - - do + EventAttr event = EventComposer::instance().create_sampling_event(); + if (enable_on_exec) { - try - { - event_ = event.open(scope, config().cgroup_fd); - } - catch (const std::system_error& e) - { - if (e.code().value() == EACCES && !event.attr().exclude_kernel && - perf_event_paranoid() > 1) - { - event.mut_attr().exclude_kernel = 1; - perf_warn_paranoid(); - continue; - } - - if (!event.degrade_precision()) - { - Log::error() << "perf_event_open for sampling failed: " << e.what(); - - if (event.attr().use_clockid) - { - Log::error() << "maybe the specified clock is unavailable?"; - } - throw_errno(); - } - } - } while (!event_.value().is_valid()); + event.set_flags({ EventFlag::ENABLE_ON_EXEC }); + } + else + { + event.set_flags({ EventFlag::DISABLED }); + } - Log::debug() << "Using precise_ip level: " << event.attr().precise_ip; + event_ = event.open(scope, config().cgroup_fd); - // Exception safe, so much wow! try { init_mmap(event_.value().get_fd()); diff --git a/include/lo2s/perf/syscall/reader.hpp b/include/lo2s/perf/syscall/reader.hpp index 5a51d15b..7a3c1db5 100644 --- a/include/lo2s/perf/syscall/reader.hpp +++ b/include/lo2s/perf/syscall/reader.hpp @@ -23,8 +23,9 @@ #include #include -#include +#include #include +#include #include #include #include @@ -69,11 +70,13 @@ class Reader : public EventReader Reader(Cpu cpu) : cpu_(cpu) { - tracepoint::TracepointEvent enter_event = - EventProvider::instance().create_tracepoint_event("raw_syscalls:sys_enter"); - tracepoint::TracepointEvent exit_event = - EventProvider::instance().create_tracepoint_event("raw_syscalls:sys_exit"); + tracepoint::TracepointEventAttr enter_event = + EventComposer::instance().create_tracepoint_event("raw_syscalls:sys_enter"); + tracepoint::TracepointEventAttr exit_event = + EventComposer::instance().create_tracepoint_event("raw_syscalls:sys_exit"); + enter_event.set_sample_type(PERF_SAMPLE_IDENTIFIER); + exit_event.set_sample_type(PERF_SAMPLE_IDENTIFIER); try { enter_ev_ = enter_event.open(cpu_, config().cgroup_fd); diff --git a/include/lo2s/perf/time/reader.hpp b/include/lo2s/perf/time/reader.hpp index 468e81cc..31f1df23 100644 --- a/include/lo2s/perf/time/reader.hpp +++ b/include/lo2s/perf/time/reader.hpp @@ -22,8 +22,8 @@ #pragma once #include -#include #include +#include #include diff --git a/include/lo2s/perf/tracepoint/event.hpp b/include/lo2s/perf/tracepoint/event_attr.hpp similarity index 88% rename from include/lo2s/perf/tracepoint/event.hpp rename to include/lo2s/perf/tracepoint/event_attr.hpp index ffc7001a..cb883707 100644 --- a/include/lo2s/perf/tracepoint/event.hpp +++ b/include/lo2s/perf/tracepoint/event_attr.hpp @@ -20,7 +20,7 @@ */ #pragma once -#include +#include #include namespace lo2s @@ -33,7 +33,7 @@ namespace tracepoint /** * Contains an event that is addressable via name */ -class TracepointEvent : public Event +class TracepointEventAttr : public EventAttr { public: class ParseError : public std::runtime_error @@ -46,7 +46,7 @@ class TracepointEvent : public Event ParseError(const std::string& what, int error_code); }; - TracepointEvent(const std::string& name, bool enable_on_exec = false); + TracepointEventAttr(const std::string& name); void parse_format(); @@ -72,17 +72,11 @@ class TracepointEvent : public Event return id_; } - std::string name() const - { - return name_; - } - private: void parse_format_line(const std::string& line); const static std::filesystem::path base_path_; int id_; - std::string name_; std::vector fields_; }; diff --git a/include/lo2s/perf/tracepoint/reader.hpp b/include/lo2s/perf/tracepoint/reader.hpp index a743a890..831c8928 100644 --- a/include/lo2s/perf/tracepoint/reader.hpp +++ b/include/lo2s/perf/tracepoint/reader.hpp @@ -23,8 +23,9 @@ #include -#include +#include #include +#include #include #include @@ -115,8 +116,7 @@ class Reader : public EventReader RecordDynamicFormat raw_data; }; - Reader(Cpu cpu, std::string name) - : event_(EventProvider::instance().create_tracepoint_event(name)), cpu_(cpu) + Reader(Cpu cpu, perf::tracepoint::TracepointEventAttr ev) : event_(ev), cpu_(cpu) { try { @@ -158,7 +158,7 @@ class Reader : public EventReader protected: using EventReader::init_mmap; - TracepointEvent event_; + TracepointEventAttr event_; private: Cpu cpu_; diff --git a/include/lo2s/perf/tracepoint/writer.hpp b/include/lo2s/perf/tracepoint/writer.hpp index aad1a46e..ac5c8496 100644 --- a/include/lo2s/perf/tracepoint/writer.hpp +++ b/include/lo2s/perf/tracepoint/writer.hpp @@ -44,7 +44,7 @@ namespace tracepoint class Writer : public Reader { public: - Writer(Cpu cpu, const std::string& name, trace::Trace& trace, + Writer(Cpu cpu, perf::tracepoint::TracepointEventAttr event, trace::Trace& trace, const otf2::definition::metric_class& metric_class); Writer(const Writer& other) = delete; diff --git a/include/lo2s/platform.hpp b/include/lo2s/platform.hpp index 62d4faa0..e44eda4b 100644 --- a/include/lo2s/platform.hpp +++ b/include/lo2s/platform.hpp @@ -32,7 +32,7 @@ #include -#include +#include /* gracefully copied from https://github.com/deater/perf_event_tests/blob/master/ */ @@ -94,6 +94,6 @@ enum class Processor ARM1176 = 204, }; -std::vector get_mem_events(); +std::vector get_mem_events(); } // namespace platform } // namespace lo2s diff --git a/include/lo2s/topology.hpp b/include/lo2s/topology.hpp index 06b8e290..0d9cc97d 100644 --- a/include/lo2s/topology.hpp +++ b/include/lo2s/topology.hpp @@ -134,7 +134,5 @@ class Topology std::map cpu_to_package_; bool hypervised_ = false; - - const static std::filesystem::path base_path; }; } // namespace lo2s diff --git a/include/lo2s/trace/reg_keys.hpp b/include/lo2s/trace/reg_keys.hpp index 01734c65..c4484c89 100644 --- a/include/lo2s/trace/reg_keys.hpp +++ b/include/lo2s/trace/reg_keys.hpp @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include @@ -137,7 +137,7 @@ struct BySamplingEventName { }; -using BySamplingEvent = SimpleKeyType; +using BySamplingEvent = SimpleKeyType; struct ByCounterCollectionTag { diff --git a/include/lo2s/trace/trace.hpp b/include/lo2s/trace/trace.hpp index 2a4f7cc9..0674b718 100644 --- a/include/lo2s/trace/trace.hpp +++ b/include/lo2s/trace/trace.hpp @@ -19,7 +19,6 @@ * along with lo2s. If not, see . */ #pragma once -#include "otf2xx/definition/calling_context.hpp" #include #include #include @@ -28,8 +27,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -176,7 +175,7 @@ class Trace return cpuid_metric_class_; } - otf2::definition::metric_member& get_event_metric_member(perf::Event event) + otf2::definition::metric_member& get_event_metric_member(perf::EventAttr event) { return registry_.emplace( BySamplingEvent(event), intern(event.name()), intern(event.name()), @@ -233,7 +232,7 @@ class Trace } const perf::counter::CounterCollection& counter_collection = - perf::counter::CounterProvider::instance().collection_for(scope); + perf::EventComposer::instance().counters_for(scope); if (registry_.has(ByCounterCollection(counter_collection))) { @@ -247,7 +246,7 @@ class Trace if (scope.type == MeasurementScopeType::GROUP_METRIC) { - metric_class.add_member(get_event_metric_member(counter_collection.leader())); + metric_class.add_member(get_event_metric_member(counter_collection.leader.value())); } for (const auto& counter : counter_collection.counters) @@ -275,7 +274,7 @@ class Trace } otf2::definition::metric_class& - tracepoint_metric_class(const perf::tracepoint::TracepointEvent& event); + tracepoint_metric_class(const perf::tracepoint::TracepointEventAttr& event); const otf2::definition::interrupt_generator& interrupt_generator() const { diff --git a/src/config.cpp b/src/config.cpp index 28377b5a..d69c029c 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -25,8 +25,7 @@ #include #include -#include -#include +#include #ifdef HAVE_LIBPFM #include #endif @@ -72,16 +71,11 @@ static inline void list_arguments_sorted(std::ostream& os, const std::string& de } static inline void print_availability(std::ostream& os, const std::string& description, - std::vector events) + std::vector events) { std::vector event_names; for (const auto& ev : events) { - if (!ev.is_valid()) - { - continue; - } - std::string availability = ""; std::string cpu = ""; if (ev.availability() == perf::Availability::PROCESS_MODE) @@ -91,13 +85,32 @@ static inline void print_availability(std::ostream& os, const std::string& descr else if (ev.availability() == perf::Availability::SYSTEM_MODE) { availability = " #"; - } - if (ev.supported_cpus() != Topology::instance().cpus()) - { - const auto& cpus = ev.supported_cpus(); - cpu = - fmt::format(" [ CPUs {}-{} ]", std::min_element(cpus.begin(), cpus.end())->as_int(), - std::max_element(cpus.begin(), cpus.end())->as_int()); + if (ev.supported_cpus() != Topology::instance().cpus()) + { + const auto& cpus = ev.supported_cpus(); + std::vector cpus_int; + std::transform(cpus.begin(), cpus.end(), std::back_inserter(cpus_int), + [](Cpu cpu) { return cpu.as_int(); }); + + if (cpus_int.size() == 1) + { + cpu = fmt::format(" [ CPU {} ]", *cpus_int.begin()); + } + else + { + int min_cpu = *std::min_element(cpus_int.begin(), cpus_int.end()); + int max_cpu = *std::max_element(cpus_int.begin(), cpus_int.end()); + + if (max_cpu - min_cpu + 1 == static_cast(cpus_int.size())) + { + cpu = fmt::format(" [ CPUs {}-{} ]", min_cpu, max_cpu); + } + else + { + cpu = fmt::format(" [ CPUs {}]", fmt::join(cpus_int, ", ")); + } + } + } } event_names.push_back(ev.name() + availability + cpu); @@ -407,7 +420,7 @@ void parse_program_options(int argc, const char** argv) config.use_x86_energy = arguments.given("x86-energy"); config.use_sensors = arguments.given("sensors"); config.use_block_io = arguments.given("block-io"); - + config.tracepoint_events = arguments.get_all("tracepoint"); #ifdef HAVE_CUDA config.cuda_injectionlib_path = arguments.get("nvidia-injection-path"); #endif @@ -425,6 +438,26 @@ void parse_program_options(int argc, const char** argv) std::exit(EXIT_SUCCESS); } + if (!arguments.get_all("tracepoint").empty() || arguments.given("block-io") || + !arguments.get_all("syscall").empty()) + { + try + { + if (!std::filesystem::exists("/sys/kernel/debug/tracing")) + { + Log::error() << "syscall, block-io and tracepoint recording require access to " + "/sys/kernel/debug/tracing, make sure it exists and is accessible"; + std::exit(EXIT_FAILURE); + } + } + catch (std::filesystem::filesystem_error&) + { + Log::error() << "syscall, block-io and tracepoint recording require access to " + "/sys/kernel/debug/tracing, make sure it exists and is accessible"; + std::exit(EXIT_FAILURE); + } + } + if (arguments.given("quiet") && arguments.given("verbose")) { lo2s::Log::warn() << "Cannot be quiet and verbose at the same time. Refusing to be quiet."; @@ -487,11 +520,11 @@ void parse_program_options(int argc, const char** argv) if (arguments.given("list-events")) { print_availability(std::cout, "predefined events", - perf::EventProvider::get_predefined_events()); - + perf::EventResolver::instance().get_predefined_events()); // TODO: find a better solution ? - std::vector sys_events = perf::EventProvider::get_pmu_events(); - std::vector events(sys_events.begin(), sys_events.end()); + std::vector sys_events = + perf::EventResolver::instance().get_pmu_events(); + std::vector events(sys_events.begin(), sys_events.end()); print_availability(std::cout, "Kernel PMU events", events); #ifdef HAVE_LIBPFM @@ -508,7 +541,7 @@ void parse_program_options(int argc, const char** argv) if (arguments.given("list-tracepoints")) { std::vector tracepoints = - perf::counter::CounterProvider::instance().get_tracepoint_event_names(); + perf::EventResolver::instance().get_tracepoint_event_names(); if (tracepoints.empty()) { @@ -667,7 +700,7 @@ void parse_program_options(int argc, const char** argv) perf::perf_check_disabled(); } - if (config.sampling && !perf::EventProvider::has_event(config.sampling_event)) + if (config.sampling && !perf::EventResolver::instance().has_event(config.sampling_event)) { lo2s::Log::fatal() << "requested sampling event \'" << config.sampling_event << "\' is not available!"; @@ -675,8 +708,9 @@ void parse_program_options(int argc, const char** argv) } // time synchronization - config.use_clockid = false; config.use_pebs = false; + config.clockid = std::nullopt; + try { std::string requested_clock_name = arguments.get("clockid"); @@ -692,7 +726,6 @@ void parse_program_options(int argc, const char** argv) lo2s::Log::debug() << "Using clock \'" << clock.name << "\'."; #ifndef USE_HW_BREAKPOINT_COMPAT - config.use_clockid = true; config.clockid = clock.id; #else if (requested_clock_name != "monotonic-raw") @@ -795,12 +828,9 @@ void parse_program_options(int argc, const char** argv) perf_group_events.emplace_back("cpu-cycles"); } - perf::counter::CounterProvider::instance().initialize_tracepoints( - arguments.get_all("tracepoint")); - perf::counter::CounterProvider::instance().initialize_group_counters( - arguments.get("metric-leader"), perf_group_events); - perf::counter::CounterProvider::instance().initialize_userspace_counters(perf_userspace_events); - + config.metric_leader = arguments.get("metric-leader"); + config.group_counters = perf_group_events; + config.userspace_counters = perf_userspace_events; config.exclude_kernel = !static_cast(arguments.given("kernel")); if (arguments.count("x86-adapt-knob")) diff --git a/src/monitor/cpu_set_monitor.cpp b/src/monitor/cpu_set_monitor.cpp index 44fb8586..cc2d8eb9 100644 --- a/src/monitor/cpu_set_monitor.cpp +++ b/src/monitor/cpu_set_monitor.cpp @@ -27,7 +27,6 @@ #include #include -#include #include diff --git a/src/monitor/main_monitor.cpp b/src/monitor/main_monitor.cpp index e2a168fe..8b5313bd 100644 --- a/src/monitor/main_monitor.cpp +++ b/src/monitor/main_monitor.cpp @@ -33,6 +33,7 @@ namespace lo2s { namespace monitor { + MainMonitor::MainMonitor() : trace_(), metrics_(trace_) { if (config().sampling) diff --git a/src/monitor/process_monitor.cpp b/src/monitor/process_monitor.cpp index f4d66047..d8551760 100644 --- a/src/monitor/process_monitor.cpp +++ b/src/monitor/process_monitor.cpp @@ -21,7 +21,6 @@ #include #include -#include #include namespace lo2s @@ -51,15 +50,20 @@ void ProcessMonitor::insert_thread(Process process, Thread thread, std::string n process_infos_.try_emplace(process, process, spawn); } + ExecutionScope scope = ExecutionScope(thread); if (config().sampling || - perf::counter::CounterProvider::instance().has_group_counters(ExecutionScope(thread)) || - perf::counter::CounterProvider::instance().has_userspace_counters(ExecutionScope(thread))) + !perf::EventComposer::instance() + .counters_for(MeasurementScope::group_metric(scope)) + .counters.empty() || + !perf::EventComposer::instance() + .counters_for(MeasurementScope::userspace_metric(scope)) + .counters.empty()) { try { - auto inserted = threads_.emplace( - std::piecewise_construct, std::forward_as_tuple(thread), - std::forward_as_tuple(ExecutionScope(thread), *this, spawn, is_process)); + auto inserted = + threads_.emplace(std::piecewise_construct, std::forward_as_tuple(thread), + std::forward_as_tuple(scope, *this, spawn, is_process)); assert(inserted.second); // actually start thread inserted.first->second.start(); diff --git a/src/monitor/process_monitor_main.cpp b/src/monitor/process_monitor_main.cpp index 7927c801..4ffe7a1d 100644 --- a/src/monitor/process_monitor_main.cpp +++ b/src/monitor/process_monitor_main.cpp @@ -158,9 +158,9 @@ std::vector to_vector_of_c_str(const std::vector& vec) { env = { "CUDA_INJECTION64_PATH=" + config().cuda_injectionlib_path }; - if (config().use_clockid) + if (config().clockid) { - env.push_back("LO2S_CLOCKID=" + std::to_string(config().clockid)); + env.push_back("LO2S_CLOCKID=" + std::to_string(config().clockid.value())); } } #endif diff --git a/src/monitor/scope_monitor.cpp b/src/monitor/scope_monitor.cpp index af9f6af9..d2c65e68 100644 --- a/src/monitor/scope_monitor.cpp +++ b/src/monitor/scope_monitor.cpp @@ -56,14 +56,18 @@ ScopeMonitor::ScopeMonitor(ExecutionScope scope, MainMonitor& parent, bool enabl add_fd(syscall_writer_->fd()); } - if (perf::counter::CounterProvider::instance().has_group_counters(scope)) + if (!perf::EventComposer::instance() + .counters_for(MeasurementScope::group_metric(scope)) + .counters.empty()) { group_counter_writer_ = std::make_unique(scope, parent.trace(), enable_on_exec); add_fd(group_counter_writer_->fd()); } - if (perf::counter::CounterProvider::instance().has_userspace_counters(scope)) + if (!perf::EventComposer::instance() + .counters_for(MeasurementScope::userspace_metric(scope)) + .counters.empty()) { userspace_counter_writer_ = std::make_unique(scope, parent.trace()); diff --git a/src/monitor/tracepoint_monitor.cpp b/src/monitor/tracepoint_monitor.cpp index 35bbc37d..882ffe15 100644 --- a/src/monitor/tracepoint_monitor.cpp +++ b/src/monitor/tracepoint_monitor.cpp @@ -36,15 +36,14 @@ namespace monitor TracepointMonitor::TracepointMonitor(trace::Trace& trace, Cpu cpu) : monitor::PollMonitor(trace, "", config().perf_read_interval), cpu_(cpu) { - perf::counter::CounterCollection tracepoint_collection = - perf::counter::CounterProvider::instance().collection_for( - MeasurementScope::tracepoint(cpu_.as_scope())); + std::vector tracepoint_events = + perf::EventComposer::instance().get_tracepoints(); - for (const auto& event : tracepoint_collection.counters) + for (const auto& event : tracepoint_events) { - auto& mc = trace.tracepoint_metric_class(event.name()); + auto& mc = trace.tracepoint_metric_class(event); std::unique_ptr writer = - std::make_unique(cpu, event.name(), trace, mc); + std::make_unique(cpu, event, trace, mc); add_fd(writer->fd()); perf_writers_.emplace(std::piecewise_construct, std::forward_as_tuple(writer->fd()), diff --git a/src/perf/counter/counter_provider.cpp b/src/perf/counter/counter_provider.cpp deleted file mode 100644 index eb6922c5..00000000 --- a/src/perf/counter/counter_provider.cpp +++ /dev/null @@ -1,246 +0,0 @@ -/* - * This file is part of the lo2s software. - * Linux OTF2 sampling - * - * Copyright (c) 2017, - * Technische Universitaet Dresden, Germany - * - * lo2s is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * lo2s is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with lo2s. If not, see . - */ - -#include -#include -#include -#include -#include - -#include - -namespace lo2s -{ -namespace perf -{ -namespace counter -{ -void CounterProvider::initialize_tracepoints(const std::vector& tracepoints) -{ - assert(tracepoint_events_.empty()); - - for (const auto& ev_name : tracepoints) - { - try - { - tracepoint_events_.emplace_back( - EventProvider::instance().create_tracepoint_event(ev_name, false)); - } - catch (const perf::EventProvider::InvalidEvent& e) - { - Log::warn() << "'" << ev_name - << "' does not name a known event, ignoring! (reason: " << e.what() << ")"; - } - } -} - -void CounterProvider::initialize_userspace_counters(const std::vector& counters) -{ - assert(userspace_events_.empty()); - - for (const auto& ev : counters) - { - try - { - userspace_events_.emplace_back(perf::EventProvider::get_event_by_name(ev)); - userspace_events_.back().sample_period(0); - } - catch (const perf::EventProvider::InvalidEvent& e) - { - Log::warn() << "'" << ev - << "' does not name a known event, ignoring! (reason: " << e.what() << ")"; - } - } -} - -void CounterProvider::initialize_group_counters(const std::string& leader, - const std::vector& counters) -{ - assert(group_events_.empty()); - - if (leader == "") - { - Log::info() << "choosing default metric-leader"; - - try - { - group_leader_ = EventProvider::get_event_by_name("cpu-clock"); - } - catch (const EventProvider::InvalidEvent& e) - { - Log::warn() << "cpu-clock isn't available, trying to use a fallback event"; - try - { - group_leader_ = EventProvider::fallback_metric_leader_event(); - } - catch (const perf::EventProvider::InvalidEvent& e) - { - Log::error() << "Failed to determine a suitable metric leader event"; - Log::error() << "Try manually specifying one with --metric-leader."; - - throw perf::EventProvider::InvalidEvent(leader); - } - } - } - else - { - try - { - group_leader_ = perf::EventProvider::get_event_by_name(leader); - } - catch (const perf::EventProvider::InvalidEvent& e) - { - Log::error() << "Metric leader " << leader << " not available."; - Log::error() << "Please choose another metric leader."; - - throw perf::EventProvider::InvalidEvent(leader); - } - } - - // DONT do group_leader_.sample_freq() here, since it requires config() to be complete - - for (const auto& ev : counters) - { - try - { - // skip event if it has already been declared as group leader - if (ev == group_leader_.value().name()) - { - Log::info() << "'" << ev - << "' has been requested as both the metric leader event and a regular " - "metric event. Will treat it as the leader."; - continue; - } - - group_events_.emplace_back(perf::EventProvider::get_event_by_name(ev)); - group_events_.back().sample_period(0); - } - catch (const perf::EventProvider::InvalidEvent& e) - { - Log::warn() << "'" << ev - << "' does not name a known event, ignoring! (reason: " << e.what() << ")"; - } - } -} - -CounterCollection CounterProvider::collection_for(MeasurementScope scope) -{ - assert(scope.type == MeasurementScopeType::GROUP_METRIC || - scope.type == MeasurementScopeType::USERSPACE_METRIC || - scope.type == MeasurementScopeType::TRACEPOINT); - - CounterCollection res; - if (scope.type == MeasurementScopeType::GROUP_METRIC) - { - if (group_leader_.value().is_available_in(scope.scope)) - { - res.leader() = group_leader_.value(); - for (auto& ev : group_events_) - { - if (ev.is_available_in(scope.scope)) - { - res.counters.emplace_back(std::move(ev)); - } - } - } - } - else if (scope.type == MeasurementScopeType::USERSPACE_METRIC) - { - for (auto& ev : userspace_events_) - { - if (ev.is_available_in(scope.scope)) - { - res.counters.emplace_back(std::move(ev)); - } - } - } - else - { - for (auto& ev : tracepoint_events_) - { - if (ev.is_available_in(scope.scope)) - { - res.counters.emplace_back(std::move(ev)); - } - } - } - return res; -} - -std::vector CounterProvider::get_tracepoint_event_names() -{ - try - { - std::ifstream ifs_available_events; - ifs_available_events.exceptions(std::ios::failbit | std::ios::badbit); - - ifs_available_events.open("/sys/kernel/debug/tracing/available_events"); - ifs_available_events.exceptions(std::ios::badbit); - - std::vector available; - - for (std::string tracepoint; std::getline(ifs_available_events, tracepoint);) - { - available.emplace_back(std::move(tracepoint)); - } - - return available; - } - catch (const std::ios_base::failure& e) - { - Log::debug() << "Retrieving kernel tracepoint event names failed: " << e.what(); - return {}; - } -} - -bool CounterProvider::has_group_counters(ExecutionScope scope) -{ - if (scope.is_process()) - { - return !group_events_.empty(); - } - else - { - return group_leader_.value().is_available_in(scope) && - std::any_of(group_events_.begin(), group_events_.end(), - [scope](const auto& ev) { return ev.is_available_in(scope); }); - } - return false; -} - -bool CounterProvider::has_userspace_counters(ExecutionScope scope) -{ - if (scope.is_process()) - { - return !userspace_events_.empty(); - } - else - { - return std::any_of(userspace_events_.begin(), userspace_events_.end(), - [scope](const auto& ev) { return ev.is_available_in(scope); }); - } - - return false; -} - -} // namespace counter -} // namespace perf -} // namespace lo2s diff --git a/src/perf/counter/group/reader.cpp b/src/perf/counter/group/reader.cpp index 39d0dd45..bd746bb3 100644 --- a/src/perf/counter/group/reader.cpp +++ b/src/perf/counter/group/reader.cpp @@ -25,8 +25,9 @@ #include #include -#include -#include +#include +#include +#include #include #include @@ -50,84 +51,30 @@ namespace group template Reader::Reader(ExecutionScope scope, bool enable_on_exec) : counter_collection_( - CounterProvider::instance().collection_for(MeasurementScope::group_metric(scope))), + EventComposer::instance().counters_for(MeasurementScope::group_metric(scope))), counter_buffer_(counter_collection_.counters.size() + 1) { - if (config().metric_use_frequency) + Log::debug() << "counter::Reader: leader event: '" << counter_collection_.leader->name() << "'"; + + if (enable_on_exec) { - counter_collection_.leader().sample_freq(config().metric_frequency); + counter_collection_.leader->set_flags({ EventFlag::ENABLE_ON_EXEC }); } else { - counter_collection_.leader().sample_period(config().metric_count); + counter_collection_.leader->set_flags({ EventFlag::DISABLED }); } - do - { - try - { - counter_leader_ = - counter_collection_.leader().open_as_group_leader(scope, config().cgroup_fd); - } - catch (const std::system_error& e) - { - // perf_try_event_open was used here before - if (counter_leader_.value().get_fd() < 0 && errno == EACCES && - !counter_collection_.leader().attr().exclude_kernel && perf_event_paranoid() > 1) - { - counter_collection_.leader().mut_attr().exclude_kernel = 1; - perf_warn_paranoid(); - - continue; - } - - if (!counter_leader_.value().is_valid()) - { - Log::error() << "perf_event_open for counter group leader failed"; - throw_errno(); - } - } - } while (!counter_leader_.value().is_valid()); - - Log::debug() << "counter::Reader: leader event: '" << counter_collection_.leader().name() - << "'"; + counter_leader_ = counter_collection_.leader->open(scope, config().cgroup_fd); for (auto& counter_ev : counter_collection_.counters) { - if (counter_ev.is_available_in(scope)) - { - std::optional counter = std::nullopt; - counter_ev.mut_attr().exclude_kernel = - counter_collection_.leader().attr().exclude_kernel; - - try - { - counter.value() = counter_leader_.value().open_child(counter_ev, scope); - counters_.emplace_back(std::move(counter.value())); - } - catch (const std::system_error& e) - { - if (!counter.value().is_valid()) - { - Log::error() << "failed to add counter '" << counter_ev.name() - << "': " << e.code().message(); - - if (e.code().value() == EINVAL) - { - Log::error() << "opening " << counter_collection_.counters.size() - << " counters at once might exceed the hardware limit of " - "simultaneously " - "openable counters."; - } - throw e; - } - } - } + counters_.emplace_back(counter_leader_->open_child(counter_ev, scope)); } if (!enable_on_exec) { - counter_leader_.value().enable(); + counter_leader_->enable(); } EventReader::init_mmap(counter_leader_.value().get_fd()); diff --git a/src/perf/counter/userspace/reader.cpp b/src/perf/counter/userspace/reader.cpp index c0a30141..e0c31c92 100644 --- a/src/perf/counter/userspace/reader.cpp +++ b/src/perf/counter/userspace/reader.cpp @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include @@ -46,42 +46,15 @@ namespace userspace template Reader::Reader(ExecutionScope scope) : counter_collection_( - CounterProvider::instance().collection_for(MeasurementScope::userspace_metric(scope))), + EventComposer::instance().counters_for(MeasurementScope::userspace_metric(scope))), counter_buffer_(counter_collection_.counters.size()), timer_fd_(timerfd_from_ns(config().userspace_read_interval)), data_(counter_collection_.counters.size()) { for (auto& event : counter_collection_.counters) { - std::optional counter = std::nullopt; - - try - { - counter.value() = event.open(scope); - counters_.emplace_back(std::move(counter.value())); - } - catch (const std::system_error& e) - { - // perf_try_event_open was used here before - if (counter.value().get_fd() < 0 && errno == EACCES && !event.attr().exclude_kernel && - perf_event_paranoid() > 1) - { - event.mut_attr().exclude_kernel = 1; - perf_warn_paranoid(); - - counter = event.open(scope); - } - - if (!counter.value().is_valid()) - { - Log::error() << "perf_event_open for counter failed"; - throw_errno(); - } - else - { - counters_.emplace_back(std::move(counter.value())); - } - } + std::optional counter = event.open(scope); + counters_.emplace_back(std::move(counter.value())); } } diff --git a/src/perf/event.cpp b/src/perf/event_attr.cpp similarity index 75% rename from src/perf/event.cpp rename to src/perf/event_attr.cpp index 3cda98c8..e6cc7d3a 100644 --- a/src/perf/event.cpp +++ b/src/perf/event_attr.cpp @@ -20,8 +20,8 @@ */ #include -#include -#include +#include +#include #include #include @@ -33,7 +33,6 @@ extern "C" { #include -#include #include } @@ -42,6 +41,23 @@ namespace lo2s namespace perf { +std::set test_cpus(EventAttr ev) +{ + std::set cpus = std::set(); + for (const auto& cpu : Topology::instance().cpus()) + { + try + { + EventGuard ev_instance = ev.open(cpu.as_scope(), -1); + cpus.emplace(cpu); + } + catch (const std::system_error& e) + { + } + } + return cpus; +} + template std::optional try_read_file(const std::string& filename) { @@ -77,7 +93,7 @@ static std::uint64_t parse_bitmask(const std::string& format) const auto len = (end + 1) - start; if (start < 0 || end > 63 || len > 64) { - throw EventProvider::InvalidEvent("invalid config mask"); + throw EventAttr::InvalidEvent("invalid config mask"); } /* Set `len` bits and shift them to where they should start. @@ -114,65 +130,31 @@ static constexpr std::uint64_t apply_mask(std::uint64_t value, std::uint64_t mas return res; } -Event::Event(const std::string& name, perf_type_id type, std::uint64_t config, - std::uint64_t config1) +EventAttr::EventAttr(const std::string& name, perf_type_id type, std::uint64_t config, + std::uint64_t config1) + : name_(name) { memset(&attr_, 0, sizeof(attr_)); attr_.size = sizeof(attr_); - attr_.sample_type = PERF_SAMPLE_TIME; attr_.type = type; attr_.config = config; attr_.config1 = config1; - - // Needed when scaling multiplexed events, and recognize activation phases - attr_.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; - - try - { - parse_pmu_path(name_); - } - catch (const EventProvider::InvalidEvent&) // ignore - { - } - - parse_cpus(); - update_availability(); } -void Event::parse_pmu_path(const std::string& ev_name) -{ - static const std::regex ev_name_regex(R"(([a-z0-9-_]+)[\/:]([a-z0-9-_]+)\/?)"); - std::smatch ev_name_match; +SimpleEventAttr::SimpleEventAttr(const std::string& name, perf_type_id type, std::uint64_t config, - if (!std::regex_match(ev_name, ev_name_match, ev_name_regex)) - { - pmu_path_ = std::filesystem::path(); - throw EventProvider::InvalidEvent("invalid event description format"); - } - - name_ = ev_name_match[2]; - pmu_name_ = ev_name_match[1]; - pmu_path_ = std::filesystem::path("/sys/bus/event_source/devices") / pmu_name_; -} - -void Event::set_common_attrs(bool enable_on_exec) + std::uint64_t config1) +: EventAttr(name, type, config, config1) { - memset(&attr_, 0, sizeof(attr_)); - attr_.size = sizeof(attr_); - attr_.type = -1; - attr_.disabled = 1; - attr_.sample_period = 1; - attr_.enable_on_exec = enable_on_exec; + cpus_ = test_cpus(*this); - // Needed when scaling multiplexed events, and recognize activation phases - attr_.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; - attr_.sample_type = PERF_SAMPLE_TIME; + event_is_openable(); } -void Event::event_attr_update(std::uint64_t value, const std::string& format) +void EventAttr::event_attr_update(std::uint64_t value, const std::string& format) { // Parse config terms // @@ -186,7 +168,7 @@ void Event::event_attr_update(std::uint64_t value, const std::string& format) const auto colon = format.find_first_of(':'); if (colon == npos) { - throw EventProvider::InvalidEvent("invalid format description: missing colon"); + throw EventAttr::InvalidEvent("invalid format description: missing colon"); } const auto target_config = format.substr(0, colon); @@ -203,91 +185,36 @@ void Event::event_attr_update(std::uint64_t value, const std::string& format) } } -void Event::time_attrs([[maybe_unused]] uint64_t addr, bool enable_on_exec) -{ - set_common_attrs(enable_on_exec); - -#ifndef USE_HW_BREAKPOINT_COMPAT - attr_.type = PERF_TYPE_BREAKPOINT; - attr_.bp_type = HW_BREAKPOINT_W; - attr_.bp_addr = addr; - attr_.bp_len = HW_BREAKPOINT_LEN_8; - attr_.wakeup_events = 1; -#else - attr_.type = PERF_TYPE_HARDWARE; - attr_.config = PERF_COUNT_HW_INSTRUCTIONS; - attr_.sample_period = 100000000; - attr_.task = 1; -#endif -} - -void Event::parse_cpus() -{ - if (pmu_path_.empty()) - { - for (const auto& cpu : Topology::instance().cpus()) - { - try - { - EventGuard ev_instance = open(cpu.as_scope(), -1); - cpus_.emplace(cpu); - } - catch (const std::system_error& e) - { - } - } - - return; - } - - // If the processor is heterogenous, "cpus" contains the cores that support this PMU. If the - // PMU is an uncore PMU "cpumask" contains the cores that are logically assigned to that - // PMU. Why there need to be two seperate files instead of one, nobody knows, but simply - // parse both. - auto cpuids = parse_list_from_file(pmu_path_ / "cpus"); - - if (cpuids.empty()) - { - cpuids = parse_list_from_file(pmu_path_ / "cpumask"); - } - - std::transform(cpuids.begin(), cpuids.end(), std::inserter(cpus_, cpus_.end()), - [](uint32_t cpuid) { return Cpu(cpuid); }); -} - -void Event::sample_period(const int& period) +void EventAttr::sample_period(const int& period) { Log::debug() << "counter::Reader: sample_period: " << period; + attr_.freq = false; attr_.sample_period = period; } -void Event::sample_freq(const uint64_t& freq) +void EventAttr::sample_freq(const uint64_t& freq) { Log::debug() << "counter::Reader: sample_freq: " << freq; + attr_.freq = true; attr_.sample_freq = freq; } -const std::set& Event::supported_cpus() const +const std::set& EventAttr::supported_cpus() const { return cpus_; } -bool Event::is_valid() const -{ - return (availability_ != Availability::UNAVAILABLE); -} - -bool Event::event_is_openable() +bool EventAttr::event_is_openable() { update_availability(); - if (!is_valid()) + if (availability_ == Availability::UNAVAILABLE) { Log::debug() << "perf event not openable, retrying with exclude_kernel=1"; attr_.exclude_kernel = 1; update_availability(); - if (!is_valid()) + if (availability_ == Availability::UNAVAILABLE) { switch (errno) { @@ -299,23 +226,25 @@ bool Event::event_is_openable() << " not available: " << std::string(std::strerror(errno)); break; } + throw EventAttr::InvalidEvent("not available!"); + return false; } } return true; } -void Event::update_availability() +void EventAttr::update_availability() { - availability_ = Availability::UNAVAILABLE; - + bool proc = false; + bool system = false; try { EventGuard proc_ev = open(Thread(0)); if (proc_ev.get_fd() != -1) { - availability_ |= Availability::PROCESS_MODE; + proc = true; } } catch (const std::system_error& e) @@ -328,28 +257,39 @@ void Event::update_availability() if (sys_ev.get_fd() != -1) { - availability_ |= Availability::SYSTEM_MODE; + system = true; } } catch (const std::system_error& e) { } -} -bool Event::degrade_precision() -{ - /* reduce exactness of IP can help if the kernel does not support really exact events */ - if (attr_.precise_ip == 0) + if (proc == false && system == false) + { + availability_ = Availability::UNAVAILABLE; + } + else if (proc == true && system == false) { - return false; + availability_ = Availability::PROCESS_MODE; + } + else if (proc == false && system == true) + { + availability_ = Availability::SYSTEM_MODE; } else { - attr_.precise_ip--; - return true; + availability_ = Availability::UNIVERSAL; } } +SimpleEventAttr SimpleEventAttr::raw(const std::string& ev_name) +{ + // Do not check whether the event_is_openable because we don't know whether we are in + // system or process mode + SimpleEventAttr event(ev_name, PERF_TYPE_RAW, std::stoull(ev_name.substr(1), nullptr, 16), 0); + return event; +} + static void print_bits(std::ostream& stream, const std::string& name, std::map known_bits, uint64_t value) { @@ -382,7 +322,7 @@ static void print_bits(std::ostream& stream, const std::string& name, stream << "\t" << name << ": " << active_bits_str << "\n"; } -std::ostream& operator<<(std::ostream& stream, const Event& event) +std::ostream& operator<<(std::ostream& stream, const EventAttr& event) { stream << "{\n"; switch (event.attr_.type) @@ -590,11 +530,9 @@ std::ostream& operator<<(std::ostream& stream, const Event& event) return stream; } -SysfsEvent::SysfsEvent(const std::string& ev_name, bool enable_on_exec) -: Event(ev_name, static_cast(0), 0) +SysfsEventAttr::SysfsEventAttr(const std::string ev_name) +: EventAttr(ev_name, static_cast(0), 0) { - set_common_attrs(enable_on_exec); - // Parse event description // /* Event description format: @@ -620,17 +558,29 @@ SysfsEvent::SysfsEvent(const std::string& ev_name, bool enable_on_exec) ED_NAME, }; - parse_pmu_path(ev_name); + static const std::regex ev_name_regex(R"(([a-z0-9-_]+)[\/:]([a-z0-9-_]+)\/?)"); + std::smatch ev_name_match; + + std::filesystem::path pmu_path; + + if (!std::regex_match(ev_name, ev_name_match, ev_name_regex)) + { + pmu_path = std::filesystem::path(); + } + + name_ = ev_name_match[2]; + std::string pmu_name = ev_name_match[1]; + pmu_path = std::filesystem::path("/sys/bus/event_source/devices") / pmu_name; - Log::debug() << "parsing event description: pmu='" << pmu_name_ << "', event='" << name_ << "'"; + Log::debug() << "parsing event description: pmu='" << pmu_name << "', event='" << name_ << "'"; // read PMU type id - auto type = try_read_file::type>(pmu_path_ / "type"); + auto type = try_read_file::type>(pmu_path / "type"); if (!type.has_value()) { using namespace std::string_literals; - throw EventProvider::InvalidEvent("unknown PMU '"s + pmu_name_ + "'"); + throw EventAttr::InvalidEvent("unknown PMU '"s + pmu_name + "'"); } attr_.type = static_cast(type.value()); @@ -640,13 +590,12 @@ SysfsEvent::SysfsEvent(const std::string& ev_name, bool enable_on_exec) // Parse event configuration from sysfs // // read event configuration - std::filesystem::path event_path = pmu_path_ / "events" / name_; + std::filesystem::path event_path = pmu_path / "events" / name_; auto ev_cfg = try_read_file(event_path); if (!ev_cfg.has_value()) { using namespace std::string_literals; - throw EventProvider::InvalidEvent("unknown event '"s + name_ + "' for PMU '"s + pmu_name_ + - "'"); + throw EventAttr::InvalidEvent("unknown event '"s + name_ + "' for PMU '"s + pmu_name + "'"); } name_ = ev_name; @@ -670,6 +619,26 @@ SysfsEvent::SysfsEvent(const std::string& ev_name, bool enable_on_exec) EC_VALUE, }; + // If the processor is heterogenous, "cpus" contains the cores that support this PMU. If the + // PMU is an uncore PMU "cpumask" contains the cores that are logically assigned to that + // PMU. Why there need to be two seperate files instead of one, nobody knows, but simply + // parse both. + auto cpuids = parse_list_from_file(pmu_path / "cpus"); + + if (cpuids.empty()) + { + cpuids = parse_list_from_file(pmu_path / "cpumask"); + } + if (cpuids.empty()) + { + cpus_ = test_cpus(*this); + } + else + { + std::transform(cpuids.begin(), cpuids.end(), std::inserter(cpus_, cpus_.end()), + [](uint32_t cpuid) { return Cpu(cpuid); }); + } + static const std::regex kv_regex(R"(([^=,]+)(?:=([^,]+))?)"); Log::debug() << "parsing event configuration: " << ev_cfg.value(); @@ -682,10 +651,10 @@ SysfsEvent::SysfsEvent(const std::string& ev_name, bool enable_on_exec) const std::string& value = (kv_match[EC_VALUE].length() != 0) ? kv_match[EC_VALUE] : default_value; - auto format = try_read_file(pmu_path_ / "format" / term); + auto format = try_read_file(pmu_path / "format" / term); if (!format.has_value()) { - throw EventProvider::InvalidEvent("cannot read event format"); + throw EventAttr::InvalidEvent("cannot read event format"); } static_assert(sizeof(std::uint64_t) >= sizeof(unsigned long), @@ -699,7 +668,7 @@ SysfsEvent::SysfsEvent(const std::string& ev_name, bool enable_on_exec) ev_cfg = kv_match.suffix(); } - Log::debug() << std::hex << std::showbase << "parsed event description: " << pmu_name_ << "/" + Log::debug() << std::hex << std::showbase << "parsed event description: " << pmu_name << "/" << name_ << "/type=" << attr_.type << ",config=" << attr_.config << ",config1=" << attr_.config1 << std::dec << std::noshowbase << "/"; @@ -708,63 +677,17 @@ SysfsEvent::SysfsEvent(const std::string& ev_name, bool enable_on_exec) if (!event_is_openable()) { - throw EventProvider::InvalidEvent( + throw EventAttr::InvalidEvent( "Event can not be opened in process- or system-monitoring-mode"); } } -void SysfsEvent::make_invalid() -{ - availability_ = Availability::UNAVAILABLE; -} - -void SysfsEvent::use_sampling_options(const bool& use_pebs, const bool& sampling, - const bool& enable_cct) -{ - if (use_pebs) - { - attr_.use_clockid = 0; - } - - if (sampling) - { - Log::debug() << "using sampling event \'" << name_ << "\', period: " << attr_.sample_period; - - attr_.mmap = 1; - } - else - { - // Set up a dummy event for recording calling context enter/leaves only - attr_.type = PERF_TYPE_SOFTWARE; - attr_.config = PERF_COUNT_SW_DUMMY; - } - - attr_.sample_id_all = 1; - // Generate PERF_RECORD_COMM events to trace changes to the command - // name of a task. This is used to write a meaningful name for any - // traced thread to the archive. - attr_.comm = 1; - attr_.context_switch = 1; - - // TODO see if we can remove remove tid - attr_.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_CPU; - if (enable_cct) - { - attr_.sample_type |= PERF_SAMPLE_CALLCHAIN; - } - - attr_.precise_ip = 3; - - // make event available if possible - update_availability(); -} - -EventGuard Event::open(std::variant location, int cgroup_fd) +EventGuard EventAttr::open(std::variant location, int cgroup_fd) { return EventGuard(*this, location, -1, cgroup_fd); } -EventGuard Event::open(ExecutionScope location, int cgroup_fd) +EventGuard EventAttr::open(ExecutionScope location, int cgroup_fd) { if (location.is_cpu()) { @@ -776,7 +699,7 @@ EventGuard Event::open(ExecutionScope location, int cgroup_fd) } } -EventGuard Event::open_as_group_leader(ExecutionScope location, int cgroup_fd) +EventGuard EventAttr::open_as_group_leader(ExecutionScope location, int cgroup_fd) { attr_.read_format |= PERF_FORMAT_GROUP; attr_.sample_type |= PERF_SAMPLE_READ; @@ -784,7 +707,7 @@ EventGuard Event::open_as_group_leader(ExecutionScope location, int cgroup_fd) return open(location, cgroup_fd); } -EventGuard EventGuard::open_child(Event child, ExecutionScope location, int cgroup_fd) +EventGuard EventGuard::open_child(EventAttr child, ExecutionScope location, int cgroup_fd) { if (location.is_cpu()) { @@ -796,7 +719,8 @@ EventGuard EventGuard::open_child(Event child, ExecutionScope location, int cgro } } -EventGuard::EventGuard(Event& ev, std::variant location, int group_fd, int cgroup_fd) +EventGuard::EventGuard(EventAttr& ev, std::variant location, int group_fd, + int cgroup_fd) : fd_(-1) { ExecutionScope scope; @@ -805,7 +729,7 @@ EventGuard::EventGuard(Event& ev, std::variant location, int group_ Log::trace() << "Opening perf event: " << ev.name() << "[" << scope.name() << ", group fd: " << group_fd << ", cgroup fd: " << cgroup_fd << "]"; Log::trace() << ev; - fd_ = perf_event_open(&ev.mut_attr(), scope, group_fd, 0, cgroup_fd); + fd_ = perf_event_open(&ev.attr(), scope, group_fd, 0, cgroup_fd); if (fd_ < 0) { diff --git a/src/perf/event_composer.cpp b/src/perf/event_composer.cpp new file mode 100644 index 00000000..9f197836 --- /dev/null +++ b/src/perf/event_composer.cpp @@ -0,0 +1,308 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2016, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#include + +namespace lo2s +{ +namespace perf +{ + +EventComposer::EventComposer() +{ + auto test_event = EventResolver::instance().get_event_by_name("cpu-cycles"); + + std::optional guard; + do + { + try + { + guard = test_event.open(Thread(0)); + } + catch (std::system_error& e) + { + if (test_event.get_flag(EventFlag::EXCLUDE_KERNEL) && e.code().value() == EACCES && + perf_event_paranoid() > 1) + { + perf_warn_paranoid(); + exclude_kernel_ = 1; + test_event.set_flags({ EventFlag::EXCLUDE_KERNEL }); + } + else + { + throw; + } + } + } while (!guard.has_value()); +} + +EventAttr EventComposer::create_sampling_event() +{ + if (sampling_event_.has_value()) + { + return sampling_event_.value(); + } + + if (config().sampling) + { + sampling_event_ = EventResolver::instance().get_event_by_name(config().sampling_event); + Log::debug() << "using sampling event \'" << sampling_event_->name() + << "\', period: " << config().sampling_period; + + sampling_event_->sample_period(config().sampling_period); + + if (config().use_pebs) + { + sampling_event_->set_clockid(std::nullopt); + } + else + { + sampling_event_->set_clockid(config().clockid); + } + sampling_event_->set_flags({ EventFlag::MMAP }); + + sampling_event_->set_precise_ip(3); + } + else + { + EventAttr event = EventResolver::instance().get_event_by_name("dummy"); + } + + sampling_event_->set_flags( + { EventFlag::SAMPLE_ID_ALL, EventFlag::COMM, EventFlag::CONTEXT_SWITCH }); + + if (exclude_kernel_) + { + sampling_event_->set_flags({ EventFlag::EXCLUDE_KERNEL }); + } + watermark(sampling_event_.value()); + + // TODO see if we can remove remove tid + sampling_event_->set_sample_type(PERF_SAMPLE_TIME | PERF_SAMPLE_IP | PERF_SAMPLE_TID | + PERF_SAMPLE_CPU); + + if (config().enable_cct) + { + sampling_event_->set_sample_type(PERF_SAMPLE_CALLCHAIN); + } + + if (config().sampling) + { + uint64_t precise_ip = 3; + do + { + + try + { + auto guard = sampling_event_->open(Thread(0)); + return sampling_event_.value(); + } + catch (...) + { + if (precise_ip == 0) + { + throw; + } + sampling_event_->set_precise_ip(--precise_ip); + } + + } while (true); + } + + return sampling_event_.value(); +} + +EventAttr EventComposer::create_time_event(uint64_t local_time [[maybe_unused]]) + +{ +#ifndef USE_HW_BREAKPOINT_COMPAT + BreakpointEventAttr ev(local_time, HW_BREAKPOINT_W); + ev.sample_period(1); + ev.set_watermark(1); + ev.set_clockid(config().clockid); + ev.set_sample_type(PERF_SAMPLE_TIME); +#else + EventAttr ev = EventResolver::instance().get_event_by_name("instructions"); + ev.sample_period(100000000); + ev.set_flags({ EventFlag::TASK }); +#endif + + ev.set_flags({ EventFlag::EXCLUDE_KERNEL, EventFlag::DISABLED }); + + return ev; +} + +std::vector EventComposer::get_tracepoints() +{ + if (tracepoint_events_.has_value()) + { + return tracepoint_events_.value(); + } + + tracepoint_events_ = std::vector(); + for (const auto& ev_name : config().tracepoint_events) + { + tracepoint_events_->emplace_back(create_tracepoint_event(ev_name)); + } + return tracepoint_events_.value(); +} + +perf::tracepoint::TracepointEventAttr +EventComposer::create_tracepoint_event(const std::string& name) +{ + auto ev = tracepoint::TracepointEventAttr(name); + watermark(ev); + ev.set_clockid(config().clockid); + ev.sample_period(1); + ev.set_sample_type(PERF_SAMPLE_RAW | PERF_SAMPLE_TIME); + ev.set_flags({ EventFlag::DISABLED }); + return ev; +} + +void EventComposer::read_userspace_counters() +{ + if (userspace_counters_.has_value()) + { + return; + } + + counter::CounterCollection res; + for (const auto& ev : config().userspace_counters) + { + try + { + res.counters.emplace_back(perf::EventResolver::instance().get_event_by_name(ev)); + } + catch (const EventAttr::InvalidEvent& e) + { + Log::warn() << "'" << ev + << "' does not name a known event, ignoring! (reason: " << e.what() << ")"; + } + } +} + +void EventComposer::read_group_counters() +{ + + if (group_counters_.has_value()) + { + return; + } + + counter::CounterCollection res; + res.leader = EventResolver::instance().get_metric_leader(config().metric_leader); + + res.leader->set_sample_type(PERF_SAMPLE_TIME | PERF_SAMPLE_READ); + if (config().metric_use_frequency) + { + res.leader->sample_freq(config().metric_frequency); + } + else + { + res.leader->sample_period(config().metric_count); + } + res.leader->set_clockid(config().clockid); + + res.leader->set_read_format(PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | + PERF_FORMAT_GROUP); + + watermark(res.leader.value()); + + // DONT do group_leader_.sample_freq() here, since it requires config() to be complete + + for (const auto& ev_name : config().group_counters) + { + try + { + // skip event if it has already been declared as group leader + if (ev_name == res.leader->name()) + { + Log::info() << "'" << ev_name + << "' has been requested as both the metric leader event and a regular " + "metric event. Will treat it as the leader."; + continue; + } + + EventAttr ev = perf::EventResolver::instance().get_event_by_name(ev_name); + res.counters.emplace_back(ev); + res.counters.back().set_clockid(config().clockid); + res.counters.back().set_read_format(PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING); + } + catch (const EventAttr::InvalidEvent& e) + { + Log::warn() << "'" << ev_name + << "' does not name a known event, ignoring! (reason: " << e.what() << ")"; + } + } + + group_counters_ = res; +} + +counter::CounterCollection EventComposer::counters_for(MeasurementScope scope) +{ + assert(scope.type == MeasurementScopeType::GROUP_METRIC || + scope.type == MeasurementScopeType::USERSPACE_METRIC); + + counter::CounterCollection res; + if (scope.type == MeasurementScopeType::GROUP_METRIC) + { + read_group_counters(); + if (group_counters_->leader->is_available_in(scope.scope)) + { + res.leader = group_counters_->leader.value(); + for (auto& ev : group_counters_->counters) + { + if (ev.is_available_in(scope.scope)) + { + + res.counters.emplace_back(ev); + } + else + { + Log::warn() << "Scope " << scope.scope.name() << ": skipping " << ev.name() + << ": not available!"; + } + } + } + } + else if (scope.type == MeasurementScopeType::USERSPACE_METRIC) + { + read_userspace_counters(); + for (auto& ev : userspace_counters_->counters) + { + if (ev.is_available_in(scope.scope)) + { + res.counters.emplace_back(ev); + } + else + { + Log::warn() << "Skipping " << ev.name() << " not availabe in " << scope.scope.name() + << "!"; + } + } + } + + return res; +} + +} // namespace perf +} // namespace lo2s diff --git a/src/perf/event_provider.cpp b/src/perf/event_provider.cpp deleted file mode 100644 index 5f2befaa..00000000 --- a/src/perf/event_provider.cpp +++ /dev/null @@ -1,480 +0,0 @@ -/* - * This file is part of the lo2s software. - * Linux OTF2 sampling - * - * Copyright (c) 2017, - * Technische Universitaet Dresden, Germany - * - * lo2s is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * lo2s is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with lo2s. If not, see . - */ - -#include -#include -#include -#include -#ifdef HAVE_LIBPFM -#include -#endif -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -extern "C" -{ -#include -#include -#include -} - -namespace -{ -#define PERF_EVENT(name, type, id) \ - { \ - (name), (type), (id) \ - } -#define PERF_EVENT_HW(name, id) PERF_EVENT(name, PERF_TYPE_HARDWARE, PERF_COUNT_HW_##id) -#define PERF_EVENT_SW(name, id) PERF_EVENT(name, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_##id) - -#define PERF_MAKE_CACHE_ID(id) (id) -#define PERF_MAKE_CACHE_OP_ID(id) ((id) << 8) -#define PERF_MAKE_CACHE_OP_RES_ID(id) ((id) << 16) - -template -struct string_to_id -{ - const char* name; - T id; -}; - -static string_to_id CACHE_NAME_TABLE[] = { - { "L1-dcache", PERF_COUNT_HW_CACHE_L1D }, { "L1-icache", PERF_COUNT_HW_CACHE_L1I }, - { "LLC", PERF_COUNT_HW_CACHE_LL }, { "dTLB", PERF_COUNT_HW_CACHE_DTLB }, - { "iTLB", PERF_COUNT_HW_CACHE_ITLB }, { "branch", PERF_COUNT_HW_CACHE_BPU }, -#ifdef HAVE_PERF_EVENT_CACHE_NODE - { "node", PERF_COUNT_HW_CACHE_NODE }, -#endif -}; - -struct cache_op_and_result -{ - perf_hw_cache_op_id op_id; - perf_hw_cache_op_result_id result_id; -}; - -static string_to_id CACHE_OPERATION_TABLE[] = { - { "loads", { PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS } }, - { "stores", { PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS } }, - { "prefetches", { PERF_COUNT_HW_CACHE_OP_PREFETCH, PERF_COUNT_HW_CACHE_RESULT_ACCESS } }, - { "load-misses", { PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS } }, - { "store-misses", { PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_MISS } }, - { "prefetch-misses", { PERF_COUNT_HW_CACHE_OP_PREFETCH, PERF_COUNT_HW_CACHE_RESULT_MISS } }, -}; - -inline constexpr std::uint64_t make_cache_config(perf_hw_cache_id cache, perf_hw_cache_op_id op, - perf_hw_cache_op_result_id op_result) -{ - return cache | (op << 8) | (op_result << 16); -} - -template -inline constexpr std::size_t array_size(T (&)[N]) -{ - return N; -} -} // namespace - -namespace lo2s -{ -namespace perf -{ - -static void populate_event_map(std::unordered_map& map) -{ - Log::info() << "checking available events..."; - lo2s::perf::Event HW_EVENT_TABLE[] = { - PERF_EVENT_HW("cpu-cycles", CPU_CYCLES), - PERF_EVENT_HW("instructions", INSTRUCTIONS), - PERF_EVENT_HW("cache-references", CACHE_REFERENCES), - PERF_EVENT_HW("cache-misses", CACHE_MISSES), - PERF_EVENT_HW("branch-instructions", BRANCH_INSTRUCTIONS), - PERF_EVENT_HW("branch-misses", BRANCH_MISSES), - PERF_EVENT_HW("bus-cycles", BUS_CYCLES), -#ifdef HAVE_PERF_EVENT_STALLED_CYCLES_FRONTEND - PERF_EVENT_HW("stalled-cycles-frontend", STALLED_CYCLES_FRONTEND), -#endif -#ifdef HAVE_PERF_EVENT_STALLED_CYCLES_BACKEND - PERF_EVENT_HW("stalled-cycles-backend", STALLED_CYCLES_BACKEND), -#endif -#ifdef HAVE_PERF_EVENT_REF_CYCLES - PERF_EVENT_HW("ref-cycles", REF_CPU_CYCLES), -#endif - }; - lo2s::perf::Event SW_EVENT_TABLE[] = { - PERF_EVENT_SW("cpu-clock", CPU_CLOCK), - PERF_EVENT_SW("task-clock", TASK_CLOCK), - PERF_EVENT_SW("page-faults", PAGE_FAULTS), - PERF_EVENT_SW("context-switches", CONTEXT_SWITCHES), - PERF_EVENT_SW("cpu-migrations", CPU_MIGRATIONS), - PERF_EVENT_SW("minor-faults", PAGE_FAULTS_MIN), - PERF_EVENT_SW("major-faults", PAGE_FAULTS_MAJ), -#ifdef HAVE_PERF_EVENT_ALIGNMENT_FAULTS - PERF_EVENT_SW("alignment-faults", ALIGNMENT_FAULTS), -#endif -#ifdef HAVE_PERF_EVENT_EMULATION_FAULTS - PERF_EVENT_SW("emulation-faults", EMULATION_FAULTS), -#endif -#ifdef HAVE_PERF_EVENT_DUMMY - PERF_EVENT_SW("dummy", DUMMY), -#endif -#ifdef HAVE_PERF_EVENT_BPF_OUTPUT - PERF_EVENT_SW("bpf-output", BPF_OUTPUT), -#endif -#ifdef HAVE_PERF_EVENT_CGROUP_SWITCHES - PERF_EVENT_SW("cgroup-switches", CGROUP_SWITCHES), -#endif - }; - - map.reserve(array_size(HW_EVENT_TABLE) + array_size(SW_EVENT_TABLE) + - array_size(CACHE_NAME_TABLE) * array_size(CACHE_OPERATION_TABLE)); - for (auto& ev : HW_EVENT_TABLE) - { - Event event(ev); - map.emplace(event.name(), event); - } - - for (auto& ev : SW_EVENT_TABLE) - { - Event event(ev); - map.emplace(event.name(), event); - } - - std::stringstream name_fmt; - for (auto& cache : CACHE_NAME_TABLE) - { - for (auto& operation : CACHE_OPERATION_TABLE) - { - name_fmt.str(std::string()); - name_fmt << cache.name << '-' << operation.name; - - // don't use EventProvider::instance() here, will cause recursive init - map.emplace(name_fmt.str(), - EventProvider::create_event(name_fmt.str(), PERF_TYPE_HW_CACHE, - make_cache_config(cache.id, operation.id.op_id, - operation.id.result_id))); - } - } -} - -std::vector EventProvider::get_pmu_events() -{ - std::vector events; - - const std::filesystem::path pmu_devices("/sys/bus/event_source/devices"); - - for (const auto& pmu : std::filesystem::directory_iterator(pmu_devices)) - { - const auto pmu_path = pmu.path(); - - const std::filesystem::path event_dir(pmu_path / "events"); - - // some PMUs don't have any events, in that case event_dir doesn't exist - if (!std::filesystem::is_directory(event_dir)) - { - continue; - } - - for (const auto& event : std::filesystem::directory_iterator(event_dir)) - { - std::stringstream event_name; - - const auto event_path = event.path(); - const auto extension = event_path.extension(); - - // ignore scaling and unit information - if (extension == ".scale" || extension == ".unit") - { - continue; - } - - // use std::filesystem::path::string, otherwise the paths are formatted quoted - event_name << pmu_path.filename().string() << '/' << event_path.filename().string() - << '/'; - try - { - SysfsEvent event = EventProvider::instance().create_sysfs_event(event_name.str()); - events.emplace_back(event); - } - catch (const EventProvider::InvalidEvent& e) - { - Log::debug() << "Can not open event " << event_name.str() << ":" << e.what(); - } - } - } - - return events; -} - -Event EventProvider::fallback_metric_leader_event() -{ - Log::debug() << "checking for metric leader event..."; - for (auto candidate : { - "ref-cycles", - "cpu-cycles", - "bus-cycles", - }) - { - try - { - const Event ev = get_event_by_name(candidate); - Log::debug() << "found suitable metric leader event: " << candidate; - return ev; - } - catch (const InvalidEvent& e) - { - Log::debug() << "not a suitable metric leader event: " << candidate; - } - } - - throw InvalidEvent{ "no suitable metric leader event found" }; -} - -/** - * takes the name of an event, checks if it can be opened with each cpu and returns a PerfEvent - * with a set of working cpus - */ -const Event raw_read_event(const std::string& ev_name) -{ - // Do not check whether the event_is_openable because we don't know whether we are in - // system or process mode - return EventProvider::instance().create_event(ev_name, PERF_TYPE_RAW, - std::stoull(ev_name.substr(1), nullptr, 16), 0); -} - -EventProvider::EventProvider() -{ - populate_event_map(event_map_); -} - -Event EventProvider::cache_event(const std::string& name) -{ - // Format for raw events is r followed by a hexadecimal number - static const std::regex raw_regex("r[[:xdigit:]]{1,8}"); - - // save event in event map; return a reference to the inserted event to - // the caller. - try - { - if (regex_match(name, raw_regex)) - { - return event_map_.emplace(name, raw_read_event(name)).first->second; - } - else - { - SysfsEvent event = EventProvider::instance().create_sysfs_event(name, false); - return event_map_.emplace(name, event).first->second; - } - } - catch (const InvalidEvent& e) - { - // emplace unavailable Sampling Event - SysfsEvent event = EventProvider::instance().create_sysfs_event(name, false); - event.make_invalid(); - - event_map_.emplace(name, event); - throw e; - } -} - -/** - * takes the name of an event and looks it up in an internal event list. - * @returns The corresponding PerfEvent if it is available - * @throws InvalidEvent if the event is unavailable - */ -Event EventProvider::get_event_by_name(const std::string& name) -{ - auto& ev_map = instance().event_map_; - auto event_it = ev_map.find(name); - if (event_it != ev_map.end()) - { - if (event_it->second.is_valid()) - { - return event_it->second; - } - else - { - throw InvalidEvent("The event '" + name + "' is not available"); - } - } - else - { - return instance_mutable().cache_event(name); - } -} - -bool EventProvider::has_event(const std::string& name) -{ - auto& ev_map = instance().event_map_; - const auto event_it = ev_map.find(name); - if (event_it != ev_map.end()) - { - return (event_it->second.is_valid()); - } - else - { - try - { - instance_mutable().cache_event(name); - return true; - } - catch (const InvalidEvent&) - { - return false; - } - } -} - -std::vector EventProvider::get_predefined_events() -{ - const auto& ev_map = instance().event_map_; - - std::vector events; - events.reserve(ev_map.size()); - - for (const auto& event : ev_map) - { - if (event.second.is_valid()) - { - events.push_back(std::move(event.second)); - } - } - - return events; -} - -// returns a standard TracepointEvent, can use config() if specified, otherwise sets default values -tracepoint::TracepointEvent EventProvider::create_tracepoint_event(const std::string& name, - bool use_config, - bool enable_on_exec) -{ - tracepoint::TracepointEvent event(name, enable_on_exec); - event.sample_period(0); - - if (use_config) - { - apply_config_attrs(event); - } - else - { - apply_default_attrs(event); - } - - return event; -} - -// returns a Event with bp_addr set to local_time, uses config() -Event EventProvider::create_time_event(uint64_t local_time, bool enable_on_exec) -{ -#ifndef USE_HW_BREAKPOINT_COMPAT - Event event("Time", PERF_TYPE_BREAKPOINT, 0); // TODO: name for time events -#else - Event event("Time", PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); -#endif - - event.sample_period(1); // may be overwritten by event.time_attrs - event.time_attrs(local_time, enable_on_exec); - - apply_config_attrs(event); - event.exclude_kernel(true); // overwrite config value - - return event; -} - -// returns a standard Event, uses config() if possible, else sets default values -Event EventProvider::create_event(const std::string& name, perf_type_id type, std::uint64_t config, - std::uint64_t config1) -{ - Event event(name, type, config, config1); - event.sample_period(0); - - try - { - apply_config_attrs(event); - event.exclude_kernel(true); // overwrite config value - } - catch (...) - { - apply_default_attrs(event); - } - - return event; -} - -// returns a SysfsEvent with sampling options enabled, uses config() -SysfsEvent EventProvider::create_sampling_event(bool enable_on_exec) -{ - SysfsEvent event(config().sampling_event, enable_on_exec); - apply_config_attrs(event); - - event.sample_period(config().sampling_period); - event.use_sampling_options(config().use_pebs, config().sampling, config().enable_cct); - - return event; -} - -// returns a standard SysfsEvent, can use config() if specified, otherwise sets default values -SysfsEvent EventProvider::create_sysfs_event(const std::string& name, bool use_config) -{ - SysfsEvent event(name); - event.sample_period(0); - - if (use_config) - { - apply_config_attrs(event); - } - else - { - apply_default_attrs(event); - } - - return event; -} - -void EventProvider::apply_config_attrs(Event& event) -{ - event.watermark(config().mmap_pages); - event.exclude_kernel(config().exclude_kernel); - event.clock_attrs(config().use_clockid, config().clockid); -} - -void EventProvider::apply_default_attrs(Event& event) -{ - event.watermark(16); // default mmap-pages value - event.exclude_kernel(true); // enabled by default - event.clock_attrs(true, CLOCK_MONOTONIC_RAW); -} - -} // namespace perf -} // namespace lo2s diff --git a/src/perf/event_resolver.cpp b/src/perf/event_resolver.cpp new file mode 100644 index 00000000..ec2597b9 --- /dev/null +++ b/src/perf/event_resolver.cpp @@ -0,0 +1,418 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2017, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#include +#include +#include +#include +#ifdef HAVE_LIBPFM +#include +#endif +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +extern "C" +{ +#include +#include +#include +} + +namespace +{ +#define PERF_EVENT(name, type, id) { (name), (type), (id) } +#define PERF_EVENT_SW(name, id) PERF_EVENT(name, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_##id) + +template +struct string_to_id +{ + const char* name; + T id; +}; + +static string_to_id CACHE_NAME_TABLE[] = { + { "L1-dcache", PERF_COUNT_HW_CACHE_L1D }, { "L1-icache", PERF_COUNT_HW_CACHE_L1I }, + { "LLC", PERF_COUNT_HW_CACHE_LL }, { "dTLB", PERF_COUNT_HW_CACHE_DTLB }, + { "iTLB", PERF_COUNT_HW_CACHE_ITLB }, { "branch", PERF_COUNT_HW_CACHE_BPU }, +#ifdef HAVE_PERF_EVENT_CACHE_NODE + { "node", PERF_COUNT_HW_CACHE_NODE }, +#endif +}; + +struct cache_op_and_result +{ + perf_hw_cache_op_id op_id; + perf_hw_cache_op_result_id result_id; +}; + +static string_to_id CACHE_OPERATION_TABLE[] = { + { "loads", { PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS } }, + { "stores", { PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_ACCESS } }, + { "prefetches", { PERF_COUNT_HW_CACHE_OP_PREFETCH, PERF_COUNT_HW_CACHE_RESULT_ACCESS } }, + { "load-misses", { PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_MISS } }, + { "store-misses", { PERF_COUNT_HW_CACHE_OP_WRITE, PERF_COUNT_HW_CACHE_RESULT_MISS } }, + { "prefetch-misses", { PERF_COUNT_HW_CACHE_OP_PREFETCH, PERF_COUNT_HW_CACHE_RESULT_MISS } }, +}; + +inline constexpr std::uint64_t make_cache_config(perf_hw_cache_id cache, perf_hw_cache_op_id op, + perf_hw_cache_op_result_id op_result) +{ + return cache | (op << 8) | (op_result << 16); +} + +template +inline constexpr std::size_t array_size(T (&)[N]) +{ + return N; +} +} // namespace + +namespace lo2s +{ +namespace perf +{ + +std::vector EventResolver::get_pmu_events() +{ + std::vector events; + + const std::filesystem::path pmu_devices("/sys/bus/event_source/devices"); + + for (const auto& pmu : std::filesystem::directory_iterator(pmu_devices)) + { + const auto pmu_path = pmu.path(); + + const std::filesystem::path event_dir(pmu_path / "events"); + + // some PMUs don't have any events, in that case event_dir doesn't exist + if (!std::filesystem::is_directory(event_dir)) + { + continue; + } + + for (const auto& event : std::filesystem::directory_iterator(event_dir)) + { + std::stringstream event_name; + + const auto event_path = event.path(); + const auto extension = event_path.extension(); + + // ignore scaling and unit information + if (extension == ".scale" || extension == ".unit") + { + continue; + } + + // use std::filesystem::path::string, otherwise the paths are formatted quoted + event_name << pmu_path.filename().string() << '/' << event_path.filename().string() + << '/'; + try + { + SysfsEventAttr event(event_name.str()); + events.emplace_back(event); + } + catch (const EventAttr::InvalidEvent& e) + { + Log::debug() << "Can not open event " << event_name.str() << ":" << e.what(); + } + } + } + + return events; +} + +std::vector EventResolver::get_tracepoint_event_names() +{ + try + { + std::ifstream ifs_available_events; + ifs_available_events.exceptions(std::ios::failbit | std::ios::badbit); + + ifs_available_events.open("/sys/kernel/debug/tracing/available_events"); + ifs_available_events.exceptions(std::ios::badbit); + + std::vector available; + + for (std::string tracepoint; std::getline(ifs_available_events, tracepoint);) + { + available.emplace_back(std::move(tracepoint)); + } + + return available; + } + catch (const std::ios_base::failure& e) + { + Log::debug() << "Retrieving kernel tracepoint event names failed: " << e.what(); + return {}; + } +} + +EventAttr EventResolver::fallback_metric_leader_event() +{ + Log::debug() << "checking for metric leader event..."; + for (auto candidate : { + "ref-cycles", + "cpu-cycles", + "bus-cycles", + }) + { + try + { + const EventAttr ev = get_event_by_name(candidate); + Log::debug() << "found suitable metric leader event: " << candidate; + return ev; + } + catch (const EventAttr::InvalidEvent& e) + { + Log::debug() << "not a suitable metric leader event: " << candidate; + } + } + + throw EventAttr::InvalidEvent{ "no suitable metric leader event found" }; +} + +EventResolver::EventResolver() +{ + struct predef_event + { + std::string name; + perf_type_id type; + uint64_t config; + }; + std::vector predef_events = { + { "cpu-cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES }, + { "instructions", PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS }, + { "cache-references", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES }, + { "cache-misses", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES }, + { "branch-instructions", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, + { "branch-misses", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES }, + { "bus-cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES }, +#ifdef HAVE_PERF_EVENT_STALLED_CYCLES_FRONTEND + { "stalled-cycles-frontend", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, +#endif +#ifdef HAVE_PERF_EVENT_STALLED_CYCLES_BACKEND + { "stalled-cycles-backend", PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, +#endif +#ifdef HAVE_PERF_EVENT_REF_CYCLES + { "ref-cycles", PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES }, + { "cpu-clock", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK }, + { "task-clock", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK }, + { "page-faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS }, + { "context-switches", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES }, + { "cpu-migrations", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS }, + { "minor-faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN }, + { "major-faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ }, +#ifdef HAVE_PERF_EVENT_ALIGNMENT_FAULTS + { "alignment-faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS }, +#endif +#ifdef HAVE_PERF_EVENT_EMULATION_FAULTS + { "emulation-faults", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS }, +#endif +#ifdef HAVE_PERF_EVENT_DUMMY + { "dummy", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY }, +#endif +#ifdef HAVE_PERF_EVENT_BPF_OUTPUT + { "bpf-output", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT }, +#endif +#ifdef HAVE_PERF_EVENT_CGROUP_SWITCHES + { "cgroup-switches", PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES }, +#endif + }; + + for (auto& predef_ev : predef_events) + { + try + { + SimpleEventAttr ev(predef_ev.name, predef_ev.type, predef_ev.config); + event_map_.emplace(predef_ev.name, ev); + } + catch (EventAttr::InvalidEvent& e) + { + continue; + } + } +#endif + std::stringstream name_fmt; + for (auto& cache : CACHE_NAME_TABLE) + { + for (auto& operation : CACHE_OPERATION_TABLE) + { + try + { + name_fmt.str(std::string()); + name_fmt << cache.name << '-' << operation.name; + + event_map_.emplace(name_fmt.str(), + SimpleEventAttr(name_fmt.str(), PERF_TYPE_HW_CACHE, + make_cache_config(cache.id, operation.id.op_id, + operation.id.result_id))); + } + catch (EventAttr::InvalidEvent& e) + { + } + } + } +} + +EventAttr EventResolver::cache_event(const std::string& name) +{ + // Format for raw events is r followed by a hexadecimal number + static const std::regex raw_regex("r[[:xdigit:]]{1,8}"); + + // save event in event map; return a reference to the inserted event to + // the caller. + try + { + if (regex_match(name, raw_regex)) + { + std::optional ev = SimpleEventAttr::raw(name); + return event_map_.emplace(name, ev).first->second.value(); + } + else + { + std::optional ev = SysfsEventAttr(name); + return event_map_.emplace(name, ev).first->second.value(); + } + } + catch (const EventAttr::InvalidEvent& e) + { + event_map_.emplace(name, std::nullopt); + throw e; + } +} + +/** + * takes the name of an event and looks it up in an internal event list. + * @returns The corresponding PerfEvent if it is available + * @throws InvalidEvent if the event is unavailable + */ +EventAttr EventResolver::get_event_by_name(const std::string& name) +{ + auto event_it = event_map_.find(name); + if (event_it != event_map_.end()) + { + if (event_it->second.has_value()) + { + return event_it->second.value(); + } + else + { + throw EventAttr::InvalidEvent("The event '" + name + "' is not available"); + } + } + else + { + return cache_event(name); + } +} + +EventAttr EventResolver::get_metric_leader(const std::string& metric_leader) +{ + std::optional leader; + Log::info() << "choosing default metric-leader"; + if (metric_leader == "") + { + + try + { + leader = EventResolver::instance().get_event_by_name("cpu-clock"); + } + catch (const EventAttr::InvalidEvent& e) + { + Log::warn() << "cpu-clock isn't available, trying to use a fallback event"; + try + { + leader = EventResolver::instance().fallback_metric_leader_event(); + } + catch (const EventAttr::InvalidEvent& e) + { + Log::error() << "Failed to determine a suitable metric leader event"; + Log::error() << "Try manually specifying one with --metric-leader."; + } + } + } + else + { + try + { + leader = perf::EventResolver::instance().get_event_by_name(metric_leader); + } + catch (const EventAttr::InvalidEvent& e) + { + Log::error() << "Metric leader " << metric_leader << " not available."; + Log::error() << "Please choose another metric leader."; + } + } + return leader.value(); +} + +bool EventResolver::has_event(const std::string& name) +{ + const auto event_it = event_map_.find(name); + if (event_it != event_map_.end()) + { + return (event_it->second.has_value()); + } + else + { + try + { + cache_event(name); + return true; + } + catch (const EventAttr::InvalidEvent&) + { + return false; + } + } +} + +std::vector EventResolver::get_predefined_events() +{ + std::vector events; + events.reserve(event_map_.size()); + + for (const auto& event : event_map_) + { + if (event.second.has_value()) + { + events.push_back(std::move(event.second.value())); + } + } + + return events; +} + +} // namespace perf +} // namespace lo2s diff --git a/src/perf/time/converter.cpp b/src/perf/time/converter.cpp index 6c641bc0..4c1af0ce 100644 --- a/src/perf/time/converter.cpp +++ b/src/perf/time/converter.cpp @@ -44,7 +44,7 @@ Converter::Converter() : offset(otf2::chrono::duration(0)) const auto time_diff = reader.local_time.time_since_epoch() - reader.perf_time.time_since_epoch(); - if (lo2s::config().use_clockid) + if (lo2s::config().clockid.has_value()) { if (time_diff < std::chrono::microseconds(-100) or time_diff > std::chrono::microseconds(0)) { diff --git a/src/perf/time/reader.cpp b/src/perf/time/reader.cpp index ba6404d2..0bf6f526 100644 --- a/src/perf/time/reader.cpp +++ b/src/perf/time/reader.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -56,8 +57,8 @@ Reader::Reader() static_assert(sizeof(local_time) == 8, "The local time object must not be a big fat " "object, or the hardware breakpoint won't work."); - Event event = - EventProvider::instance().create_time_event(reinterpret_cast(&local_time)); + EventAttr event = + EventComposer::instance().create_time_event(reinterpret_cast(&local_time)); try { diff --git a/src/perf/tracepoint/event.cpp b/src/perf/tracepoint/event_attr.cpp similarity index 84% rename from src/perf/tracepoint/event.cpp rename to src/perf/tracepoint/event_attr.cpp index 70671511..ba8e0f56 100644 --- a/src/perf/tracepoint/event.cpp +++ b/src/perf/tracepoint/event_attr.cpp @@ -19,7 +19,7 @@ * along with lo2s. If not, see . */ -#include +#include #include namespace lo2s @@ -29,27 +29,24 @@ namespace perf namespace tracepoint { -TracepointEvent::TracepointEvent(const std::string& name, bool enable_on_exec) -: Event(name, PERF_TYPE_TRACEPOINT, 0), name_(name) +TracepointEventAttr::TracepointEventAttr(const std::string& name) +: EventAttr(name, PERF_TYPE_TRACEPOINT, 0) { - set_common_attrs(enable_on_exec); parse_format(); // update to correct config (id_ set in parse_format()) attr_.config = id_; - attr_.sample_type |= PERF_SAMPLE_RAW | PERF_SAMPLE_IDENTIFIER; - update_availability(); } -const std::filesystem::path TracepointEvent::base_path_ = "/sys/kernel/debug/tracing/events"; +const std::filesystem::path TracepointEventAttr::base_path_ = "/sys/kernel/debug/tracing/events"; -TracepointEvent::ParseError::ParseError(const std::string& what, int error_code) +TracepointEventAttr::ParseError::ParseError(const std::string& what, int error_code) : std::runtime_error{ what + ": " + std::strerror(error_code) } { } -void TracepointEvent::parse_format() +void TracepointEventAttr::parse_format() { using namespace std::string_literals; @@ -89,7 +86,7 @@ void TracepointEvent::parse_format() } } -void TracepointEvent::parse_format_line(const std::string& line) +void TracepointEventAttr::parse_format_line(const std::string& line) { static std::regex field_regex( "^\\s+field:([^;]+);\\s+offset:(\\d+);\\s+size:(\\d+);\\s+signed:(\\d+);$"); diff --git a/src/perf/tracepoint/writer.cpp b/src/perf/tracepoint/writer.cpp index 70c1ba34..41cda75b 100644 --- a/src/perf/tracepoint/writer.cpp +++ b/src/perf/tracepoint/writer.cpp @@ -14,9 +14,9 @@ namespace perf namespace tracepoint { -Writer::Writer(Cpu cpu, const std::string& name, trace::Trace& trace_, +Writer::Writer(Cpu cpu, perf::tracepoint::TracepointEventAttr event, trace::Trace& trace_, const otf2::definition::metric_class& metric_class) -: Reader(cpu, name), +: Reader(cpu, event), writer_(trace_.create_metric_writer(fmt::format("tracepoint metrics for {}", cpu))), metric_instance_( trace_.metric_instance(metric_class, writer_.location(), trace_.system_tree_cpu_node(cpu))), diff --git a/src/perf/util.cpp b/src/perf/util.cpp index 4ae7755e..78405f52 100644 --- a/src/perf/util.cpp +++ b/src/perf/util.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/platform.cpp b/src/platform.cpp index 769958fd..6f892cd8 100644 --- a/src/platform.cpp +++ b/src/platform.cpp @@ -217,7 +217,7 @@ Processor detect_processor(void) return Processor::UNKNOWN; } -std::vector get_mem_events() +std::vector get_mem_events() { static auto proc = detect_processor(); switch (proc) diff --git a/src/topology.cpp b/src/topology.cpp index 144d9b65..c352a870 100644 --- a/src/topology.cpp +++ b/src/topology.cpp @@ -3,10 +3,10 @@ namespace lo2s { -const std::filesystem::path Topology::base_path = "/sys/devices/system/cpu"; void Topology::read_proc() { + const std::filesystem::path base_path = "/sys/devices/system/cpu"; auto online = parse_list_from_file(base_path / "online"); for (auto cpu_id : online) diff --git a/src/trace/trace.cpp b/src/trace/trace.cpp index be875a21..66ccc78e 100644 --- a/src/trace/trace.cpp +++ b/src/trace/trace.cpp @@ -580,7 +580,7 @@ Trace::metric_instance(const otf2::definition::metric_class& metric_class, } otf2::definition::metric_class& -Trace::tracepoint_metric_class(const perf::tracepoint::TracepointEvent& event) +Trace::tracepoint_metric_class(const perf::tracepoint::TracepointEventAttr& event) { if (!registry_.has(ByString(event.name()))) {