Skip to content

Commit

Permalink
Let the icinga check command also output the error when last reload f…
Browse files Browse the repository at this point in the history
…ailed

refs #7263
  • Loading branch information
Al2Klimov committed Oct 30, 2020
1 parent 2005e4b commit f0b7d7b
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 22 deletions.
1 change: 1 addition & 0 deletions doc/10-icinga-template-library.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ Name | Description
-----------------------|---------------
icinga\_min\_version | **Optional.** Required minimum Icinga 2 version, e.g. `2.8.0`. If not satisfied, the state changes to `Critical`. Release packages only.
icinga\_perfdata | **Optional.** Only yield the given performance data. E.g. `[ "*_latency", "*_execution_time" ]`
icinga\_verbose | **Optional.** If the last reload failed, tell the reason.

### cluster <a id="itl-icinga-cluster"></a>

Expand Down
39 changes: 27 additions & 12 deletions lib/base/application.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#include "base/scriptglobal.hpp"
#include "base/process.hpp"
#include "base/tlsutility.hpp"
#include <algorithm>
#include <boost/algorithm/string/trim.hpp>
#include <boost/exception/errinfo_api_function.hpp>
#include <boost/exception/errinfo_errno.hpp>
#include <boost/exception/errinfo_file_name.hpp>
#include <cstring>
#include <sstream>
#include <iostream>
#include <fstream>
Expand Down Expand Up @@ -56,11 +58,12 @@ double Application::m_StartTime;
bool Application::m_ScriptDebuggerEnabled = false;

#ifdef _WIN32
double Application::m_LastReloadFailed = 0;
Application::LastFailedReload Application::m_LastReloadFailed;
std::mutex Application::m_LastReloadFailedMutex;
#else /* _WIN32 */
std::atomic<double>* Application::m_LastReloadFailed = ([]() -> std::atomic<double>* {
Application::LastFailedReload* Application::m_LastReloadFailed = ([]() -> Application::LastFailedReload* {
auto memory (mmap(
nullptr, sizeof(std::atomic<double>),
nullptr, sizeof(Application::LastFailedReload),
PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0
));
if (memory == MAP_FAILED) {
Expand All @@ -69,8 +72,11 @@ std::atomic<double>* Application::m_LastReloadFailed = ([]() -> std::atomic<doub
<< boost::errinfo_errno(errno));
}

auto lrf ((std::atomic<double>*)memory);
lrf->store(0);
auto lrf ((Application::LastFailedReload*)memory);

lrf->When.store(0);
std::fill((volatile char*)lrf->Why, (volatile char*)lrf->Why + sizeof(lrf->Why), 0);

return lrf;
})();
#endif /* _WIN32 */
Expand Down Expand Up @@ -383,7 +389,7 @@ void Application::OnShutdown()
static void ReloadProcessCallbackInternal(const ProcessResult& pr)
{
if (pr.ExitStatus != 0) {
Application::SetLastReloadFailed(Utility::GetTime());
Application::SetLastReloadFailed(Utility::GetTime(), pr.Output);
Log(LogCritical, "Application", "Found error in config: reloading aborted");
}
#ifdef _WIN32
Expand Down Expand Up @@ -1188,21 +1194,30 @@ void Application::SetScriptDebuggerEnabled(bool enabled)
m_ScriptDebuggerEnabled = enabled;
}

double Application::GetLastReloadFailed()
std::pair<double, String> Application::GetLastReloadFailed()
{
#ifdef _WIN32
return m_LastReloadFailed;
std::unique_lock<std::mutex> lock (m_LastReloadFailedMutex);
return std::pair<double, String>(m_LastReloadFailed.When, m_LastReloadFailed.Why);
#else /* _WIN32 */
return m_LastReloadFailed->load();
char reason[sizeof(m_LastReloadFailed->Why)];
std::copy((volatile char*)m_LastReloadFailed->Why, (volatile char*)m_LastReloadFailed->Why + sizeof(m_LastReloadFailed->Why), (char*)reason);

return std::pair<double, String>(m_LastReloadFailed->When.load(), String((char*)reason));
#endif /* _WIN32 */
}

void Application::SetLastReloadFailed(double ts)
void Application::SetLastReloadFailed(double ts, const String& reason)
{
#ifdef _WIN32
m_LastReloadFailed = ts;
std::unique_lock<std::mutex> lock (m_LastReloadFailedMutex);
m_LastReloadFailed = LastFailedReload{ts, reason};
#else /* _WIN32 */
m_LastReloadFailed->store(ts);
char buf[sizeof(m_LastReloadFailed->Why)] = {0};

(void)strncpy((char*)buf, reason.CStr(), sizeof(buf));
std::copy((char*)buf, (char*)buf + sizeof(buf), (volatile char*)m_LastReloadFailed->Why);
m_LastReloadFailed->When.store(ts);
#endif /* _WIN32 */
}

Expand Down
24 changes: 20 additions & 4 deletions lib/base/application.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include "base/configuration.hpp"
#include <atomic>
#include <iosfwd>
#include <mutex>
#include <utility>

namespace icinga
{
Expand Down Expand Up @@ -101,8 +103,8 @@ class Application : public ObjectImpl<Application> {
static bool GetScriptDebuggerEnabled();
static void SetScriptDebuggerEnabled(bool enabled);

static double GetLastReloadFailed();
static void SetLastReloadFailed(double ts);
static std::pair<double, String> GetLastReloadFailed();
static void SetLastReloadFailed(double ts, const String& reason);

static void DisplayInfoMessage(std::ostream& os, bool skipVersion = false);

Expand Down Expand Up @@ -138,10 +140,24 @@ class Application : public ObjectImpl<Application> {
static double m_StartTime;
static double m_MainTime;
static bool m_ScriptDebuggerEnabled;

#ifdef _WIN32
static double m_LastReloadFailed;
struct LastFailedReload
{
double When = 0;
String Why;
};

static LastFailedReload m_LastReloadFailed;
static std::mutex m_LastReloadFailedMutex;
#else /* _WIN32 */
static std::atomic<double> *m_LastReloadFailed;
struct LastFailedReload
{
std::atomic<double> When;
volatile char Why[4096];
};

static LastFailedReload *m_LastReloadFailed;
#endif /* _WIN32 */

#ifdef _WIN32
Expand Down
1 change: 0 additions & 1 deletion lib/base/logger.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ class Logger : public ObjectImpl<Logger>

void ValidateSeverity(const Lazy<String>& lvalue, const ValidationUtils& utils) final;

protected:
void Start(bool runtimeCreated) override;
void Stop(bool runtimeRemoved) override;

Expand Down
24 changes: 22 additions & 2 deletions lib/cli/daemoncommand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "base/atomic.hpp"
#include "base/defer.hpp"
#include "base/logger.hpp"
#include "base/streamlogger.hpp"
#include "base/application.hpp"
#include "base/timer.hpp"
#include "base/utility.hpp"
Expand All @@ -23,6 +24,7 @@
#include <boost/program_options.hpp>
#include <iostream>
#include <fstream>
#include <sstream>

#ifdef _WIN32
#include <windows.h>
Expand Down Expand Up @@ -233,16 +235,32 @@ int RunWorker(const std::vector<std::string>& configs, bool closeConsoleLog = fa
}
#endif /* I2_DEBUG */

std::ostringstream oss;
StreamLogger::Ptr sl = new StreamLogger();

sl->BindStream(&oss, false);
sl->Start(true);
sl->SetActive(true);

Log(LogInformation, "cli", "Loading configuration file(s).");

{
std::vector<ConfigItem::Ptr> newItems;

if (!DaemonUtility::LoadConfigFiles(configs, newItems, Configuration::ObjectsPath, Configuration::VarsPath)) {
Log(LogCritical, "cli", "Config validation failed. Re-run with 'icinga2 daemon -C' after fixing the config.");

sl->Stop(true);
sl = nullptr;
Application::SetLastReloadFailed(Utility::GetTime(), oss.str());

return EXIT_FAILURE;
}

sl->Stop(true);
sl = nullptr;
oss = decltype(oss)();

#ifndef _WIN32
Log(LogNotice, "cli")
<< "Notifying umbrella process (PID " << l_UmbrellaPid << ") about the config loading success";
Expand Down Expand Up @@ -477,6 +495,7 @@ static pid_t StartUnixWorker(const std::vector<std::string>& configs, bool close
}

(void)sigprocmask(SIG_UNBLOCK, &l_UnixWorkerSignals, nullptr);
Application::SetLastReloadFailed(Utility::GetTime(), "fork() failed");
return -1;

case 0:
Expand Down Expand Up @@ -511,11 +530,13 @@ static pid_t StartUnixWorker(const std::vector<std::string>& configs, bool close
} catch (const std::exception& ex) {
Log(LogCritical, "cli")
<< "Failed to re-initialize thread pool after forking (child): " << DiagnosticInformation(ex);
Application::SetLastReloadFailed(Utility::GetTime(), "Failed to re-initialize thread pool after forking (child)");
_exit(EXIT_FAILURE);
}

_exit(RunWorker(configs, closeConsoleLog, stderrFile));
} catch (...) {
Application::SetLastReloadFailed(Utility::GetTime(), "");
_exit(EXIT_FAILURE);
}

Expand Down Expand Up @@ -767,12 +788,11 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector<std::strin

if (nextWorker == -1) {
Log(LogCritical, "Application", "Found error in config: reloading aborted");
Application::SetLastReloadFailed(Utility::GetTime());
} else {
Log(LogInformation, "Application")
<< "Reload done, old process shutting down. Child process with PID '" << nextWorker << "' is taking over.";

Application::SetLastReloadFailed(0);
Application::SetLastReloadFailed(0, "");
(void)kill(currentWorker, SIGTERM);

{
Expand Down
20 changes: 17 additions & 3 deletions lib/methods/icingachecktask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
auto perfdataFilter (MacroProcessor::ResolveMacros("$icinga_perfdata$", resolvers, checkable->GetLastCheckResult(),
nullptr, MacroProcessor::EscapeCallback(), resolvedMacros, useResolvedMacros));

auto verbose (MacroProcessor::ResolveMacros("$icinga_verbose$", resolvers, checkable->GetLastCheckResult(),
nullptr, MacroProcessor::EscapeCallback(), resolvedMacros, useResolvedMacros));

if (resolvedMacros && !useResolvedMacros)
return;

Expand Down Expand Up @@ -163,10 +166,17 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
". Version: " + appVersion;

/* Indicate a warning if the last reload failed. */
double lastReloadFailed = Application::GetLastReloadFailed();
auto lastReloadFailed (Application::GetLastReloadFailed());
String verboseText;

if (lastReloadFailed.first > 0) {
output += "; Last reload attempt failed at " + Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", lastReloadFailed.first);

if (verbose.ToBool() && lastReloadFailed.second.GetLength()) {
output += " (reason: see below)";
verboseText = lastReloadFailed.second;
}

if (lastReloadFailed > 0) {
output += "; Last reload attempt failed at " + Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", lastReloadFailed);
state =ServiceWarning;
}

Expand All @@ -192,6 +202,10 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
state = ServiceCritical;
}

if (verboseText.GetLength()) {
output += "\n\n" + verboseText;
}

String commandName = command->GetName();
if (Checkable::ExecuteCommandProcessFinishedHandler) {
double now = Utility::GetTime();
Expand Down

0 comments on commit f0b7d7b

Please sign in to comment.