From be68a584abe33f8bc5c799e2208c164a9c1960a3 Mon Sep 17 00:00:00 2001 From: ito-san <57388357+ito-san@users.noreply.github.com> Date: Fri, 27 May 2022 12:49:17 +0900 Subject: [PATCH] fix(system_monitor): move top command execution to a timer (#948) * fix(system_monitor): move top command execution to a timer Signed-off-by: ito-san * removed unnecessary update method Signed-off-by: ito-san * use tier4_autoware_utils::StopWatch Signed-off-by: ito-san * Ensure thread-safe Signed-off-by: ito-san --- .../process_monitor/process_monitor.hpp | 19 ++-- system/system_monitor/package.xml | 1 + .../src/process_monitor/process_monitor.cpp | 87 ++++++++++++++----- 3 files changed, 80 insertions(+), 27 deletions(-) diff --git a/system/system_monitor/include/system_monitor/process_monitor/process_monitor.hpp b/system/system_monitor/include/system_monitor/process_monitor/process_monitor.hpp index 39bf897116b0a..802d905d6bfd5 100644 --- a/system/system_monitor/include/system_monitor/process_monitor/process_monitor.hpp +++ b/system/system_monitor/include/system_monitor/process_monitor/process_monitor.hpp @@ -27,6 +27,7 @@ #include #include +#include #include #include @@ -41,11 +42,6 @@ class ProcessMonitor : public rclcpp::Node */ explicit ProcessMonitor(const rclcpp::NodeOptions & options); - /** - * @brief Update the diagnostic state - */ - void update(); - protected: using DiagStatus = diagnostic_msgs::msg::DiagnosticStatus; @@ -110,6 +106,11 @@ class ProcessMonitor : public rclcpp::Node std::vector> * tasks, const std::string & message, const std::string & error_command, const std::string & content); + /** + * @brief timer callback to execute top command + */ + void onTimer(); + diagnostic_updater::Updater updater_; //!< @brief Updater class which advertises to /diagnostics char hostname_[HOST_NAME_MAX + 1]; //!< @brief host name @@ -118,7 +119,13 @@ class ProcessMonitor : public rclcpp::Node std::vector> load_tasks_; //!< @brief list of diagnostics tasks for high load procs std::vector> - memory_tasks_; //!< @brief list of diagnostics tasks for high memory procs + memory_tasks_; //!< @brief list of diagnostics tasks for high memory procs + rclcpp::TimerBase::SharedPtr timer_; //!< @brief timer to execute top command + std::string top_output_; //!< @brief output from top command + bool is_top_error_; //!< @brief flag if an error occurs + double elapsed_ms_; //!< @brief Execution time of top command + std::mutex mutex_; //!< @brief mutex for output from top command + rclcpp::CallbackGroup::SharedPtr timer_callback_group_; //!< @brief Callback Group }; #endif // SYSTEM_MONITOR__PROCESS_MONITOR__PROCESS_MONITOR_HPP_ diff --git a/system/system_monitor/package.xml b/system/system_monitor/package.xml index 559dc9c072592..01ef6d6f9a3a8 100644 --- a/system/system_monitor/package.xml +++ b/system/system_monitor/package.xml @@ -18,6 +18,7 @@ rclcpp rclcpp_components std_msgs + tier4_autoware_utils tier4_external_api_msgs chrony diff --git a/system/system_monitor/src/process_monitor/process_monitor.cpp b/system/system_monitor/src/process_monitor/process_monitor.cpp index 661753196128e..57e1aaf20f571 100644 --- a/system/system_monitor/src/process_monitor/process_monitor.cpp +++ b/system/system_monitor/src/process_monitor/process_monitor.cpp @@ -21,6 +21,8 @@ #include "system_monitor/system_monitor_utility.hpp" +#include + #include #include @@ -31,8 +33,11 @@ ProcessMonitor::ProcessMonitor(const rclcpp::NodeOptions & options) : Node("process_monitor", options), updater_(this), - num_of_procs_(declare_parameter("num_of_procs", 5)) + num_of_procs_(declare_parameter("num_of_procs", 5)), + is_top_error_(false) { + using namespace std::literals::chrono_literals; + int index; gethostname(hostname_, sizeof(hostname_)); @@ -50,33 +55,40 @@ ProcessMonitor::ProcessMonitor(const rclcpp::NodeOptions & options) memory_tasks_.push_back(task); updater_.add(*task); } -} -void ProcessMonitor::update() { updater_.force_update(); } + // Start timer to execute top command + timer_callback_group_ = this->create_callback_group(rclcpp::CallbackGroupType::MutuallyExclusive); + timer_ = rclcpp::create_timer( + this, get_clock(), 1s, std::bind(&ProcessMonitor::onTimer, this), timer_callback_group_); +} void ProcessMonitor::monitorProcesses(diagnostic_updater::DiagnosticStatusWrapper & stat) { - // Remember start time to measure elapsed time - const auto t_start = SystemMonitorUtility::startMeasurement(); - - bp::ipstream is_err; - bp::ipstream is_out; - std::ostringstream os; + // thread-safe read + std::string str; + bool is_top_error; + double elapsed_ms; + { + std::lock_guard lock(mutex_); + str = top_output_; + is_top_error = is_top_error_; + elapsed_ms = elapsed_ms_; + } - // Get processes - bp::child c("top -bn1 -o %CPU -w 128", bp::std_out > is_out, bp::std_err > is_err); - c.wait(); - if (c.exit_code() != 0) { - is_err >> os.rdbuf(); + if (is_top_error) { stat.summary(DiagStatus::ERROR, "top error"); - stat.add("top", os.str().c_str()); - setErrorContent(&load_tasks_, "top error", "top", os.str().c_str()); - setErrorContent(&memory_tasks_, "top error", "top", os.str().c_str()); + stat.add("top", str); + setErrorContent(&load_tasks_, "top error", "top", str); + setErrorContent(&memory_tasks_, "top error", "top", str); return; } - is_out >> os.rdbuf(); - std::string str = os.str(); + // If top still not running + if (str.empty()) { + // Send OK tentatively + stat.summary(DiagStatus::OK, "starting up"); + return; + } // Get task summary getTasksSummary(stat, str); @@ -89,8 +101,7 @@ void ProcessMonitor::monitorProcesses(diagnostic_updater::DiagnosticStatusWrappe // Get high memory processes getHighMemoryProcesses(str); - // Measure elapsed time since start time and report - SystemMonitorUtility::stopMeasurement(t_start, stat); + stat.addf("execution time", "%f ms", elapsed_ms); } void ProcessMonitor::getTasksSummary( @@ -310,5 +321,39 @@ void ProcessMonitor::setErrorContent( } } +void ProcessMonitor::onTimer() +{ + bool is_top_error = false; + + // Start to measure elapsed time + tier4_autoware_utils::StopWatch stop_watch; + stop_watch.tic("execution_time"); + + bp::ipstream is_err; + bp::ipstream is_out; + std::ostringstream os; + + // Get processes + bp::child c("top -bn1 -o %CPU -w 128", bp::std_out > is_out, bp::std_err > is_err); + c.wait(); + + if (c.exit_code() != 0) { + is_top_error = true; + is_err >> os.rdbuf(); + } else { + is_out >> os.rdbuf(); + } + + const double elapsed_ms = stop_watch.toc("execution_time"); + + // thread-safe copy + { + std::lock_guard lock(mutex_); + top_output_ = os.str(); + is_top_error_ = is_top_error; + elapsed_ms_ = elapsed_ms; + } +} + #include RCLCPP_COMPONENTS_REGISTER_NODE(ProcessMonitor)