Skip to content

Commit

Permalink
Merge pull request #6970 from Icinga/bugfix/perfdata-gaps
Browse files Browse the repository at this point in the history
Improve reload handling for features (metric & queue flush, activation priority)
  • Loading branch information
Michael Friedrich authored Feb 26, 2019
2 parents bee9098 + ab7a799 commit e2df115
Show file tree
Hide file tree
Showing 32 changed files with 223 additions and 88 deletions.
1 change: 1 addition & 0 deletions doc/17-language-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ Constant | Description
--------------------|-------------------
Vars |**Read-write.** Contains a dictionary with global custom attributes. Not set by default.
NodeName |**Read-write.** Contains the cluster node name. Set to the local hostname by default.
ReloadTimeout |**Read-write.** Defines the reload timeout for child processes. Defaults to `300s`.
Environment |**Read-write.** The name of the Icinga environment. Included in the SNI host name for outbound connections. Not set by default.
RunAsUser |**Read-write.** Defines the user the Icinga 2 daemon is running as. Set in the Icinga 2 sysconfig.
RunAsGroup |**Read-write.** Defines the group the Icinga 2 daemon is running as. Set in the Icinga 2 sysconfig.
Expand Down
17 changes: 14 additions & 3 deletions lib/base/application.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ void Application::OnConfigLoaded()

ASSERT(m_Instance == nullptr);
m_Instance = this;

String reloadTimeout;

if (ScriptGlobal::Exists("ReloadTimeout"))
reloadTimeout = ScriptGlobal::Get("ReloadTimeout");

if (!reloadTimeout.IsEmpty())
Configuration::ReloadTimeout = Convert::ToDouble(reloadTimeout);
}

/**
Expand Down Expand Up @@ -384,8 +392,6 @@ static void ReloadProcessCallback(const ProcessResult& pr)

pid_t Application::StartReloadProcess()
{
Log(LogInformation, "Application", "Got reload command: Starting new instance.");

// prepare arguments
ArrayData args;
args.push_back(GetExePath(m_ArgV[0]));
Expand All @@ -405,9 +411,14 @@ pid_t Application::StartReloadProcess()
#endif /* _WIN32 */

Process::Ptr process = new Process(Process::PrepareCommand(new Array(std::move(args))));
process->SetTimeout(300);
process->SetTimeout(Configuration::ReloadTimeout);
process->Run(&ReloadProcessCallback);

Log(LogInformation, "Application")
<< "Got reload command: Started new instance with PID '"
<< (unsigned long)(process->GetPID()) << "' (timeout is "
<< Configuration::ReloadTimeout << "s).";

return process->GetPID();
}

Expand Down
7 changes: 4 additions & 3 deletions lib/base/configobject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include "base/workqueue.hpp"
#include "base/context.hpp"
#include "base/application.hpp"
#include <algorithm>
#include <fstream>
#include <boost/exception/errinfo_api_function.hpp>
#include <boost/exception/errinfo_errno.hpp>
Expand Down Expand Up @@ -601,10 +600,12 @@ void ConfigObject::RestoreObjects(const String& filename, int attributeTypes)

void ConfigObject::StopObjects()
{
auto types = Type::GetAllTypes();
std::vector<Type::Ptr> types = Type::GetAllTypes();

std::sort(types.begin(), types.end(), [](const Type::Ptr& a, const Type::Ptr& b) {
return a->GetActivationPriority() > b->GetActivationPriority();
if (a->GetActivationPriority() > b->GetActivationPriority())
return true;
return false;
});

for (const Type::Ptr& type : types) {
Expand Down
11 changes: 11 additions & 0 deletions lib/base/configuration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ String Configuration::PidPath;
String Configuration::PkgDataDir;
String Configuration::PrefixDir;
String Configuration::ProgramData;
double Configuration::ReloadTimeout{300};
int Configuration::RLimitFiles;
int Configuration::RLimitProcesses;
int Configuration::RLimitStack;
Expand Down Expand Up @@ -223,6 +224,16 @@ void Configuration::SetProgramData(const String& val, bool suppress_events, cons
HandleUserWrite("ProgramData", &Configuration::ProgramData, val, m_ReadOnly);
}

double Configuration::GetReloadTimeout() const
{
return Configuration::ReloadTimeout;
}

void Configuration::SetReloadTimeout(double val, bool suppress_events, const Value& cookie)
{
HandleUserWrite("ReloadTimeout", &Configuration::ReloadTimeout, val, m_ReadOnly);
}

int Configuration::GetRLimitFiles() const
{
return Configuration::RLimitFiles;
Expand Down
4 changes: 4 additions & 0 deletions lib/base/configuration.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ class Configuration : public ObjectImpl<Configuration>
String GetProgramData() const override;
void SetProgramData(const String& value, bool suppress_events = false, const Value& cookie = Empty) override;

double GetReloadTimeout() const override;
void SetReloadTimeout(double value, bool suppress_events = false, const Value& cookie = Empty) override;

int GetRLimitFiles() const override;
void SetRLimitFiles(int value, bool suppress_events = false, const Value& cookie = Empty) override;

Expand Down Expand Up @@ -130,6 +133,7 @@ class Configuration : public ObjectImpl<Configuration>
static String PkgDataDir;
static String PrefixDir;
static String ProgramData;
static double ReloadTimeout;
static int RLimitFiles;
static int RLimitProcesses;
static int RLimitStack;
Expand Down
5 changes: 5 additions & 0 deletions lib/base/configuration.ti
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ abstract class Configuration
set;
};

[config, no_storage, virtual] double ReloadTimeout {
get;
set;
};

[config, no_storage, virtual] int RLimitFiles {
get;
set;
Expand Down
35 changes: 23 additions & 12 deletions lib/checker/checkercomponent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include "icinga/cib.hpp"
#include "remote/apilistener.hpp"
#include "base/configtype.hpp"
#include "base/defer.hpp"
#include "base/objectlock.hpp"
#include "base/utility.hpp"
#include "base/perfdatavalue.hpp"
Expand Down Expand Up @@ -57,7 +56,6 @@ void CheckerComponent::Start(bool runtimeCreated)
Log(LogInformation, "CheckerComponent")
<< "'" << GetName() << "' started.";

m_RunningChecks.store(0);

m_Thread = std::thread(std::bind(&CheckerComponent::CheckThreadProc, this));

Expand All @@ -75,13 +73,32 @@ void CheckerComponent::Stop(bool runtimeRemoved)
m_CV.notify_all();
}

m_ResultTimer->Stop();
m_Thread.join();
double wait = 0.0;

while (GetPendingCheckables() > 0) {
Log(LogDebug, "CheckerComponent")
<< "Waiting for running checks (" << GetPendingCheckables()
<< ") to finish. Waited for " << wait << " seconds now.";

while (m_RunningChecks.load()) {
Utility::Sleep(1.0 / 60.0);
Utility::Sleep(0.1);
wait += 0.1;

/* Pick a timeout slightly shorther than the process reload timeout. */
double waitMax = Configuration::ReloadTimeout - 30;
if (waitMax <= 0)
waitMax = 1;

if (wait > waitMax) {
Log(LogWarning, "CheckerComponent")
<< "Checks running too long for " << wait
<< " seconds, hard shutdown before reload timeout: " << Configuration::ReloadTimeout << ".";
break;
}
}

m_ResultTimer->Stop();
m_Thread.join();

Log(LogInformation, "CheckerComponent")
<< "'" << GetName() << "' stopped.";

Expand Down Expand Up @@ -196,8 +213,6 @@ void CheckerComponent::CheckThreadProc()

Checkable::IncreasePendingChecks();

m_RunningChecks.fetch_add(1);

Utility::QueueAsyncCallback(std::bind(&CheckerComponent::ExecuteCheckHelper, CheckerComponent::Ptr(this), checkable));

lock.lock();
Expand All @@ -206,10 +221,6 @@ void CheckerComponent::CheckThreadProc()

void CheckerComponent::ExecuteCheckHelper(const Checkable::Ptr& checkable)
{
Defer decrementRunningChecks ([this]{
m_RunningChecks.fetch_sub(1);
});

try {
checkable->ExecuteCheck();
} catch (const std::exception& ex) {
Expand Down
3 changes: 0 additions & 3 deletions lib/checker/checkercomponent.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,11 @@
#include "base/configobject.hpp"
#include "base/timer.hpp"
#include "base/utility.hpp"
#include <atomic>
#include <boost/thread/mutex.hpp>
#include <boost/thread/condition_variable.hpp>
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <boost/multi_index/key_extractors.hpp>
#include <cstdint>
#include <thread>

namespace icinga
Expand Down Expand Up @@ -75,7 +73,6 @@ class CheckerComponent final : public ObjectImpl<CheckerComponent>
boost::condition_variable m_CV;
bool m_Stopped{false};
std::thread m_Thread;
std::atomic<uintmax_t> m_RunningChecks;

CheckableSet m_IdleCheckables;
CheckableSet m_PendingCheckables;
Expand Down
2 changes: 1 addition & 1 deletion lib/checker/checkercomponent.ti
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace icinga

class CheckerComponent : ConfigObject
{
activation_priority 100;
activation_priority 300;

[config, no_storage] int concurrent_checks {
get {{{
Expand Down
15 changes: 0 additions & 15 deletions lib/db_ido/servicedbobject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,6 @@ void ServiceDbObject::OnConfigUpdateHeavy()
DbObject::OnMultipleQueries(queries);

/* service dependencies */
Log(LogDebug, "ServiceDbObject")
<< "service dependencies for '" << service->GetName() << "'";

queries.clear();

DbQuery query2;
Expand Down Expand Up @@ -233,9 +230,6 @@ void ServiceDbObject::OnConfigUpdateHeavy()
DbObject::OnMultipleQueries(queries);

/* service contacts, contactgroups */
Log(LogDebug, "ServiceDbObject")
<< "service contacts: " << service->GetName();

queries.clear();

DbQuery query3;
Expand All @@ -248,9 +242,6 @@ void ServiceDbObject::OnConfigUpdateHeavy()
queries.emplace_back(std::move(query3));

for (const User::Ptr& user : CompatUtility::GetCheckableNotificationUsers(service)) {
Log(LogDebug, "ServiceDbObject")
<< "service contacts: " << user->GetName();

DbQuery query_contact;
query_contact.Table = GetType()->GetTable() + "_contacts";
query_contact.Type = DbQueryInsert;
Expand All @@ -266,9 +257,6 @@ void ServiceDbObject::OnConfigUpdateHeavy()

DbObject::OnMultipleQueries(queries);

Log(LogDebug, "ServiceDbObject")
<< "service contactgroups: " << service->GetName();

queries.clear();

DbQuery query4;
Expand All @@ -281,9 +269,6 @@ void ServiceDbObject::OnConfigUpdateHeavy()
queries.emplace_back(std::move(query4));

for (const UserGroup::Ptr& usergroup : CompatUtility::GetCheckableNotificationUserGroups(service)) {
Log(LogDebug, "ServiceDbObject")
<< "service contactgroups: " << usergroup->GetName();

DbQuery query_contact;
query_contact.Table = GetType()->GetTable() + "_contactgroups";
query_contact.Type = DbQueryInsert;
Expand Down
19 changes: 10 additions & 9 deletions lib/db_ido_mysql/idomysqlconnection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,6 @@ void IdoMysqlConnection::Resume()

void IdoMysqlConnection::Pause()
{
Log(LogInformation, "IdoMysqlConnection")
<< "'" << GetName() << "' paused.";

m_ReconnectTimer.reset();

DbConnection::Pause();
Expand All @@ -102,8 +99,12 @@ void IdoMysqlConnection::Pause()
<< "Rescheduling disconnect task.";
#endif /* I2_DEBUG */

m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Disconnect, this), PriorityHigh);
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Disconnect, this), PriorityLow);
m_QueryQueue.Join();

Log(LogInformation, "IdoMysqlConnection")
<< "'" << GetName() << "' paused.";

}

void IdoMysqlConnection::ExceptionHandler(boost::exception_ptr exp)
Expand Down Expand Up @@ -175,7 +176,7 @@ void IdoMysqlConnection::ReconnectTimerHandler()
<< "Scheduling reconnect task.";
#endif /* I2_DEBUG */

m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Reconnect, this), PriorityLow);
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::Reconnect, this), PriorityHigh);
}

void IdoMysqlConnection::Reconnect()
Expand Down Expand Up @@ -434,9 +435,9 @@ void IdoMysqlConnection::Reconnect()
<< "Scheduling session table clear and finish connect task.";
#endif /* I2_DEBUG */

m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::ClearTablesBySession, this), PriorityLow);
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::ClearTablesBySession, this), PriorityHigh);

m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::FinishConnect, this, startTime), PriorityLow);
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::FinishConnect, this, startTime), PriorityHigh);
}

void IdoMysqlConnection::FinishConnect(double startTime)
Expand Down Expand Up @@ -709,7 +710,7 @@ void IdoMysqlConnection::ActivateObject(const DbObject::Ptr& dbobj)
<< "Scheduling object activation task for '" << dbobj->GetName1() << "!" << dbobj->GetName2() << "'.";
#endif /* I2_DEBUG */

m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalActivateObject, this, dbobj), PriorityLow);
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalActivateObject, this, dbobj), PriorityHigh);
}

void IdoMysqlConnection::InternalActivateObject(const DbObject::Ptr& dbobj)
Expand Down Expand Up @@ -754,7 +755,7 @@ void IdoMysqlConnection::DeactivateObject(const DbObject::Ptr& dbobj)
<< "Scheduling object deactivation task for '" << dbobj->GetName1() << "!" << dbobj->GetName2() << "'.";
#endif /* I2_DEBUG */

m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalDeactivateObject, this, dbobj), PriorityLow);
m_QueryQueue.Enqueue(std::bind(&IdoMysqlConnection::InternalDeactivateObject, this, dbobj), PriorityHigh);
}

void IdoMysqlConnection::InternalDeactivateObject(const DbObject::Ptr& dbobj)
Expand Down
Loading

0 comments on commit e2df115

Please sign in to comment.