Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Let the icinga check command also output the error when last reload failed #8430

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/10-icinga-template-library.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ Custom variables passed as [command parameters](03-monitoring-basics.md#command-
Name | Description
-----------------------|---------------
icinga\_min\_version | **Optional.** Required minimum Icinga 2 version, e.g. `2.8.0`. If not satisfied, the state changes to `Critical`. Release packages only.
icinga\_verbose | **Optional.** If the last reload failed, tell the reason.

### cluster <a id="itl-icinga-cluster"></a>

Expand Down
34 changes: 19 additions & 15 deletions lib/base/application.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <boost/exception/errinfo_errno.hpp>
#include <boost/exception/errinfo_file_name.hpp>
#include <boost/stacktrace.hpp>
#include <cstring>
#include <sstream>
#include <iostream>
#include <fstream>
Expand All @@ -30,6 +31,7 @@
#ifdef _WIN32
#include <windows.h>
#else /* _WIN32 */
#include "base/shared-memory.hpp"
#include <signal.h>
#endif /* _WIN32 */

Expand Down Expand Up @@ -63,11 +65,18 @@ char **Application::m_ArgV;
double Application::m_StartTime;
bool Application::m_ScriptDebuggerEnabled = false;

Application::LastReloadFailed* Application::m_LastReloadFailed (Application::AllocLastReloadFailed());

Application::LastReloadFailed* Application::AllocLastReloadFailed()
{
#ifdef _WIN32
double Application::m_LastReloadFailed = 0;
static LastReloadFailed lrf;
return &lrf;
#else /* _WIN32 */
SharedMemory<Application::AtomicTs> Application::m_LastReloadFailed (0);
static SharedMemory<LastReloadFailed> slrf;
return &slrf.Get();
#endif /* _WIN32 */
}

#ifdef _WIN32
static LPTOP_LEVEL_EXCEPTION_FILTER l_DefaultUnhandledExceptionFilter = nullptr;
Expand Down Expand Up @@ -379,7 +388,7 @@ void Application::OnShutdown()
static void ReloadProcessCallbackInternal(const ProcessResult& pr)
{
if (pr.ExitStatus != 0) {
Application::SetLastReloadFailed(Utility::GetTime());
Application::SetLastReloadFailed(Utility::GetTime(), pr.Output);
Log(LogCritical, "Application", "Found error in config: reloading aborted");
}
#ifdef _WIN32
Expand Down Expand Up @@ -1211,22 +1220,17 @@ void Application::SetScriptDebuggerEnabled(bool enabled)
m_ScriptDebuggerEnabled = enabled;
}

double Application::GetLastReloadFailed()
std::pair<double, String> Application::GetLastReloadFailed()
{
#ifdef _WIN32
return m_LastReloadFailed;
#else /* _WIN32 */
return m_LastReloadFailed.Get().load();
#endif /* _WIN32 */
LastReloadFailed::SharedLock lock (m_LastReloadFailed->Mutex);
return {m_LastReloadFailed->When, String(m_LastReloadFailed->Why)};
}

void Application::SetLastReloadFailed(double ts)
void Application::SetLastReloadFailed(double ts, const String& error)
{
#ifdef _WIN32
m_LastReloadFailed = ts;
#else /* _WIN32 */
m_LastReloadFailed.Get().store(ts);
#endif /* _WIN32 */
LastReloadFailed::UniqueLock lock (m_LastReloadFailed->Mutex);
m_LastReloadFailed->When = ts;
strncpy(m_LastReloadFailed->Why, error.CStr(), sizeof(m_LastReloadFailed->Why));
}

void Application::ValidateName(const Lazy<String>& lvalue, const ValidationUtils& utils)
Expand Down
37 changes: 29 additions & 8 deletions lib/base/application.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,20 @@
#define APPLICATION_H

#include "base/i2-base.hpp"
#include "base/atomic.hpp"
#include "base/application-ti.hpp"
#include "base/logger.hpp"
#include "base/configuration.hpp"
#include "base/shared-memory.hpp"
#include <iosfwd>
#include <utility>

#ifdef _WIN32
#include <mutex>
#include <shared_mutex>
#else /* _WIN32 */
#include <boost/interprocess/sync/interprocess_sharable_mutex.hpp>
#include <boost/interprocess/sync/scoped_lock.hpp>
#include <boost/interprocess/sync/sharable_lock.hpp>
#endif /* _WIN32 */

namespace icinga
{
Expand Down Expand Up @@ -102,8 +110,8 @@ class Application : public ObjectImpl<Application> {
static bool GetScriptDebuggerEnabled();
static void SetScriptDebuggerEnabled(bool enabled);

static double GetLastReloadFailed();
static void SetLastReloadFailed(double ts);
static std::pair<double, String> GetLastReloadFailed();
static void SetLastReloadFailed(double ts, const String& error);

static void DisplayInfoMessage(std::ostream& os, bool skipVersion = false);

Expand Down Expand Up @@ -139,13 +147,26 @@ class Application : public ObjectImpl<Application> {
static double m_StartTime;
static double m_MainTime;
static bool m_ScriptDebuggerEnabled;

struct LastReloadFailed
{
#ifdef _WIN32
static double m_LastReloadFailed;
typedef std::shared_lock<std::shared_mutex> SharedLock;
typedef std::unique_lock<std::shared_mutex> UniqueLock;

std::shared_mutex Mutex;
#else /* _WIN32 */
typedef Atomic<double> AtomicTs;
static_assert(AtomicTs::is_always_lock_free);
static SharedMemory<AtomicTs> m_LastReloadFailed;
typedef boost::interprocess::sharable_lock<boost::interprocess::interprocess_sharable_mutex> SharedLock;
typedef boost::interprocess::scoped_lock<boost::interprocess::interprocess_sharable_mutex> UniqueLock;

boost::interprocess::interprocess_sharable_mutex Mutex;
#endif /* _WIN32 */
double When = 0;
char Why[16 * 1024] = {0};
};

static LastReloadFailed* m_LastReloadFailed;
static LastReloadFailed* AllocLastReloadFailed();

#ifdef _WIN32
static BOOL WINAPI CtrlHandler(DWORD type);
Expand Down
1 change: 0 additions & 1 deletion lib/base/logger.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ class Logger : public ObjectImpl<Logger>
void SetSeverity(const String& value, bool suppress_events = false, const Value& cookie = Empty) override;
void ValidateSeverity(const Lazy<String>& lvalue, const ValidationUtils& utils) final;

protected:
void Start(bool runtimeCreated) override;
void Stop(bool runtimeRemoved) override;

Expand Down
48 changes: 46 additions & 2 deletions lib/cli/daemoncommand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "base/atomic.hpp"
#include "base/defer.hpp"
#include "base/logger.hpp"
#include "base/streamlogger.hpp"
#include "base/application.hpp"
#include "base/process.hpp"
#include "base/timer.hpp"
Expand All @@ -25,6 +26,7 @@
#include <boost/program_options.hpp>
#include <iostream>
#include <fstream>
#include <sstream>

#ifdef _WIN32
#include <windows.h>
Expand Down Expand Up @@ -222,6 +224,10 @@ static double GetDebugWorkerDelay()

static String l_ObjectsPath;

#ifndef _WIN32
static bool l_WorkerLoadedConfig = false;
#endif /* _WIN32 */

/**
* Do the actual work (config loading, ...)
*
Expand All @@ -246,6 +252,13 @@ int RunWorker(const std::vector<std::string>& configs, bool closeConsoleLog = fa
}
#endif /* I2_DEBUG */

std::ostringstream oss;
StreamLogger::Ptr sl = new StreamLogger();

sl->BindStream(&oss, false);
sl->Start(true);
sl->SetActive(true);

Log(LogInformation, "cli", "Loading configuration file(s).");
NotifyStatus("Loading configuration file(s)...");

Expand All @@ -255,14 +268,24 @@ int RunWorker(const std::vector<std::string>& configs, bool closeConsoleLog = fa
if (!DaemonUtility::LoadConfigFiles(configs, newItems, l_ObjectsPath, Configuration::VarsPath)) {
Log(LogCritical, "cli", "Config validation failed. Re-run with 'icinga2 daemon -C' after fixing the config.");
NotifyStatus("Config validation failed.");

sl->Stop(true);
sl = nullptr;
Application::SetLastReloadFailed(Utility::GetTime(), oss.str());

return EXIT_FAILURE;
}

sl->Stop(true);
sl = nullptr;
oss = decltype(oss)();

#ifndef _WIN32
Log(LogNotice, "cli")
<< "Notifying umbrella process (PID " << l_UmbrellaPid << ") about the config loading success";

(void)kill(l_UmbrellaPid, SIGUSR2);
l_WorkerLoadedConfig = true;

Log(LogNotice, "cli")
<< "Waiting for the umbrella process to let us doing the actual work";
Expand Down Expand Up @@ -489,6 +512,7 @@ static pid_t StartUnixWorker(const std::vector<std::string>& configs, bool close
}

(void)sigprocmask(SIG_UNBLOCK, &l_UnixWorkerSignals, nullptr);
Application::SetLastReloadFailed(Utility::GetTime(), "fork(2) failed");
return -1;

case 0:
Expand Down Expand Up @@ -531,6 +555,12 @@ static pid_t StartUnixWorker(const std::vector<std::string>& configs, bool close
} catch (const std::exception& ex) {
Log(LogCritical, "cli")
<< "Failed to re-initialize thread pool after forking (child): " << DiagnosticInformation(ex);

Application::SetLastReloadFailed(
Utility::GetTime(),
"Failed to re-initialize thread pool after forking (child): " + DiagnosticInformation(ex)
);

_exit(EXIT_FAILURE);
}

Expand All @@ -539,14 +569,29 @@ static pid_t StartUnixWorker(const std::vector<std::string>& configs, bool close
} catch (const std::exception& ex) {
Log(LogCritical, "cli")
<< "Failed to initialize process spawn helper after forking (child): " << DiagnosticInformation(ex);

Application::SetLastReloadFailed(
Utility::GetTime(),
"Failed to initialize process spawn helper after forking (child): " + DiagnosticInformation(ex)
);

_exit(EXIT_FAILURE);
}

_exit(RunWorker(configs, closeConsoleLog, stderrFile));
} catch (const std::exception& ex) {
Log(LogCritical, "cli") << "Exception in main process: " << DiagnosticInformation(ex);

if (!l_WorkerLoadedConfig) {
Application::SetLastReloadFailed(Utility::GetTime(), "Exception in main process: " + DiagnosticInformation(ex));
}

_exit(EXIT_FAILURE);
} catch (...) {
if (!l_WorkerLoadedConfig) {
Application::SetLastReloadFailed(Utility::GetTime(), "Exception in main process");
}

_exit(EXIT_FAILURE);
}

Expand Down Expand Up @@ -813,15 +858,14 @@ int DaemonCommand::Run(const po::variables_map& vm, const std::vector<std::strin
break;
case -2:
Log(LogCritical, "Application", "Found error in config: reloading aborted");
Application::SetLastReloadFailed(Utility::GetTime());
break;
default:
Log(LogInformation, "Application")
<< "Reload done, old process shutting down. Child process with PID '" << nextWorker << "' is taking over.";

NotifyStatus("Shutting down old instance...");

Application::SetLastReloadFailed(0);
Application::SetLastReloadFailed(0, "");
(void)kill(currentWorker, SIGTERM);

{
Expand Down
20 changes: 17 additions & 3 deletions lib/methods/icingachecktask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
String icingaMinVersion = MacroProcessor::ResolveMacros("$icinga_min_version$", resolvers, checkable->GetLastCheckResult(),
&missingIcingaMinVersion, MacroProcessor::EscapeCallback(), resolvedMacros, useResolvedMacros);

auto verbose (MacroProcessor::ResolveMacros("$icinga_verbose$", resolvers, checkable->GetLastCheckResult(),
nullptr, MacroProcessor::EscapeCallback(), resolvedMacros, useResolvedMacros));

if (resolvedMacros && !useResolvedMacros)
return;

Expand Down Expand Up @@ -158,10 +161,17 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
". Version: " + appVersion;

/* Indicate a warning if the last reload failed. */
double lastReloadFailed = Application::GetLastReloadFailed();
auto lastReloadFailed (Application::GetLastReloadFailed());
String verboseText;

if (lastReloadFailed.first > 0) {
output += "; Last reload attempt failed at " + Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", lastReloadFailed.first);

if (verbose.ToBool() && lastReloadFailed.second.GetLength()) {
output += ", see below";
verboseText = lastReloadFailed.second;
}

if (lastReloadFailed > 0) {
output += "; Last reload attempt failed at " + Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", lastReloadFailed);
state =ServiceWarning;
}

Expand All @@ -187,6 +197,10 @@ void IcingaCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
state = ServiceCritical;
}

if (verboseText.GetLength()) {
output += "\n\n" + verboseText;
}

String commandName = command->GetName();

if (Checkable::ExecuteCommandProcessFinishedHandler) {
Expand Down