From 771070549cb324280c6fe4ae88a244d82530cb32 Mon Sep 17 00:00:00 2001 From: Eric Lippmann Date: Thu, 8 Feb 2024 12:09:06 +0100 Subject: [PATCH 01/11] IoEngine: Always log coroutine exception diagnostics While analyzing a possible memory leak, we encountered several coroutine exception messages, which unfortunately do not provide any information about what exactly went wrong, as exception diagnostics were previously only logged at the notice level. --- lib/base/io-engine.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/base/io-engine.hpp b/lib/base/io-engine.hpp index 684d3ac3962..326f04fdc47 100644 --- a/lib/base/io-engine.hpp +++ b/lib/base/io-engine.hpp @@ -109,8 +109,7 @@ class IoEngine // https://github.com/boostorg/coroutine/issues/39 throw; } catch (const std::exception& ex) { - Log(LogCritical, "IoEngine", "Exception in coroutine!"); - Log(LogDebug, "IoEngine") << "Exception in coroutine: " << DiagnosticInformation(ex); + Log(LogCritical, "IoEngine") << "Exception in coroutine: " << DiagnosticInformation(ex); } catch (...) { Log(LogCritical, "IoEngine", "Exception in coroutine!"); } From 0f2478fb28447367d4b1a9b83fa8d7227e7cb480 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Thu, 8 Feb 2024 11:30:23 +0100 Subject: [PATCH 02/11] Drop redundant `CpuBoundWork` usages in `lib/remote` --- lib/remote/httpserverconnection.cpp | 6 ------ lib/remote/jsonrpcconnection.cpp | 6 ++---- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/lib/remote/httpserverconnection.cpp b/lib/remote/httpserverconnection.cpp index 76cfd3c5269..504e6b86906 100644 --- a/lib/remote/httpserverconnection.cpp +++ b/lib/remote/httpserverconnection.cpp @@ -240,8 +240,6 @@ bool HandleAccessControl( auto headerAllowOrigin (listener->GetAccessControlAllowOrigin()); if (headerAllowOrigin) { - CpuBoundWork allowOriginHeader (yc); - auto allowedOrigins (headerAllowOrigin->ToSet()); if (!allowedOrigins.empty()) { @@ -251,8 +249,6 @@ bool HandleAccessControl( response.set(http::field::access_control_allow_origin, origin); } - allowOriginHeader.Done(); - response.set(http::field::access_control_allow_credentials, "true"); if (request.method() == http::verb::options && !request[http::field::access_control_request_method].empty()) { @@ -537,8 +533,6 @@ void HttpServerConnection::ProcessMessages(boost::asio::yield_context yc) auto authenticatedUser (m_ApiUser); if (!authenticatedUser) { - CpuBoundWork fetchingAuthenticatedUser (yc); - authenticatedUser = ApiUser::GetByAuthHeader(std::string(request[http::field::authorization])); } diff --git a/lib/remote/jsonrpcconnection.cpp b/lib/remote/jsonrpcconnection.cpp index 3bae3cafdcc..db35ef8332b 100644 --- a/lib/remote/jsonrpcconnection.cpp +++ b/lib/remote/jsonrpcconnection.cpp @@ -81,6 +81,8 @@ void JsonRpcConnection::HandleIncomingMessages(boost::asio::yield_context yc) CpuBoundWork handleMessage (yc); MessageHandler(message); + + l_TaskStats.InsertValue(Utility::GetTime(), 1); } catch (const std::exception& ex) { Log(m_ShuttingDown ? LogDebug : LogWarning, "JsonRpcConnection") << "Error while processing JSON-RPC message for identity '" << m_Identity @@ -88,10 +90,6 @@ void JsonRpcConnection::HandleIncomingMessages(boost::asio::yield_context yc) break; } - - CpuBoundWork taskStats (yc); - - l_TaskStats.InsertValue(Utility::GetTime(), 1); } Disconnect(); From 599a54aae0567e48cfe4a1f7f38c8e60a8384760 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Fri, 9 Feb 2024 12:00:50 +0100 Subject: [PATCH 03/11] EventsHandler: Drop superfluous `CpuBoundWork` usage --- lib/remote/eventshandler.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/remote/eventshandler.cpp b/lib/remote/eventshandler.cpp index e05ef229bc6..06d5412214f 100644 --- a/lib/remote/eventshandler.cpp +++ b/lib/remote/eventshandler.cpp @@ -116,16 +116,12 @@ bool EventsHandler::HandleRequest( auto event (subscriber.GetInbox()->Shift(yc)); if (event) { - CpuBoundWork buildingResponse (yc); - String body = JsonEncode(event); boost::algorithm::replace_all(body, "\n", ""); asio::const_buffer payload (body.CStr(), body.GetLength()); - buildingResponse.Done(); - asio::async_write(stream, payload, yc); asio::async_write(stream, newLine, yc); stream.async_flush(yc); From e66f8567de0f0f90aeff83ee39b4ef02aeb21d5d Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Fri, 9 Feb 2024 15:17:26 +0100 Subject: [PATCH 04/11] HttpServerConnection: Drop superfluous `CpuBoundWork` usage --- lib/remote/httpserverconnection.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/remote/httpserverconnection.cpp b/lib/remote/httpserverconnection.cpp index 504e6b86906..0db0ea9809a 100644 --- a/lib/remote/httpserverconnection.cpp +++ b/lib/remote/httpserverconnection.cpp @@ -102,8 +102,6 @@ void HttpServerConnection::Disconnect() auto listener (ApiListener::GetInstance()); if (listener) { - CpuBoundWork removeHttpClient (yc); - listener->RemoveHttpClient(this); } } From ed8156db28628b7c231fbaefaab41b3c371f2335 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Thu, 8 Feb 2024 11:15:53 +0100 Subject: [PATCH 05/11] Drop redundant `CpuBoundWork` usage in `JsonRpcConnection::Disconnect()` Although there is locking involved here, it shoudln't take too long for the thread to actually acquire it, since there aren't that many threads dealing with endpoint clients concurrently. It's just wasting pointless time trying to obtain a CPU slot. --- lib/remote/jsonrpcconnection.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/remote/jsonrpcconnection.cpp b/lib/remote/jsonrpcconnection.cpp index db35ef8332b..d5a8e46cb8a 100644 --- a/lib/remote/jsonrpcconnection.cpp +++ b/lib/remote/jsonrpcconnection.cpp @@ -197,14 +197,14 @@ void JsonRpcConnection::Disconnect() Log(LogWarning, "JsonRpcConnection") << "API client disconnected for identity '" << m_Identity << "'"; - { - CpuBoundWork removeClient (yc); - - if (m_Endpoint) { - m_Endpoint->RemoveClient(this); - } else { - ApiListener::GetInstance()->RemoveAnonymousClient(this); - } + // We need to unregister the endpoint client as soon as possible not to confuse Icinga 2, + // given that Endpoint::GetConnected() is just performing a check that the endpoint's client + // cache is not empty, which could result in an already disconnected endpoint never trying to + // reconnect again. See #7444. + if (m_Endpoint) { + m_Endpoint->RemoveClient(this); + } else { + ApiListener::GetInstance()->RemoveAnonymousClient(this); } m_OutgoingMessagesQueued.Set(); From dfffb29c81fa7ea8c4a9e90a6356c236d46a6b15 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Fri, 9 Feb 2024 16:42:53 +0100 Subject: [PATCH 06/11] ApiListener: Reset `m_LogMessageCount` when rotating Closing and re-opening that very same log file shouldn't reset the counter, otherwise some log files may exceed the max limit per file as their offset indicator is reset each time they are re-opened. --- lib/remote/apilistener.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/remote/apilistener.cpp b/lib/remote/apilistener.cpp index 85443e21850..b71250376e6 100644 --- a/lib/remote/apilistener.cpp +++ b/lib/remote/apilistener.cpp @@ -1375,7 +1375,6 @@ void ApiListener::OpenLogFile() } m_LogFile = new StdioStream(fp.release(), true); - m_LogMessageCount = 0; SetLogMessageTimestamp(Utility::GetTime()); } @@ -1405,6 +1404,9 @@ void ApiListener::RotateLogFile() if (!Utility::PathExists(newpath)) { try { Utility::RenameFile(oldpath, newpath); + + // We're rotating the current log file, so reset the log message counter as well. + m_LogMessageCount = 0; } catch (const std::exception& ex) { Log(LogCritical, "ApiListener") << "Cannot rotate replay log file from '" << oldpath << "' to '" From 8ff7121e939a8d1866d974fa51404f3c49788adc Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Fri, 9 Feb 2024 12:27:25 +0100 Subject: [PATCH 07/11] ApiListener#ListenerCoroutineProc(): get remote endpoint ASAP for logging On incoming connection timeout we log the remote endpoint which isn't available if it was already disconnected - an exception is thrown. Get it as long as we're still connected not to lose it, nor to get an exception. --- lib/remote/apilistener.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/remote/apilistener.cpp b/lib/remote/apilistener.cpp index b71250376e6..ab4a67b69cb 100644 --- a/lib/remote/apilistener.cpp +++ b/lib/remote/apilistener.cpp @@ -513,6 +513,8 @@ void ApiListener::ListenerCoroutineProc(boost::asio::yield_context yc, const Sha server->async_accept(socket.lowest_layer(), yc); + auto remoteEndpoint (socket.lowest_layer().remote_endpoint()); + if (!crlPath.IsEmpty()) { time_t currentCreationTime = Utility::GetFileCreationTime(crlPath); @@ -531,12 +533,11 @@ void ApiListener::ListenerCoroutineProc(boost::asio::yield_context yc, const Sha auto strand (Shared::Make(io)); - IoEngine::SpawnCoroutine(*strand, [this, strand, sslConn](asio::yield_context yc) { + IoEngine::SpawnCoroutine(*strand, [this, strand, sslConn, remoteEndpoint](asio::yield_context yc) { Timeout::Ptr timeout(new Timeout(strand->context(), *strand, boost::posix_time::microseconds(int64_t(GetConnectTimeout() * 1e6)), - [sslConn](asio::yield_context yc) { + [sslConn, remoteEndpoint](asio::yield_context yc) { Log(LogWarning, "ApiListener") - << "Timeout while processing incoming connection from " - << sslConn->lowest_layer().remote_endpoint(); + << "Timeout while processing incoming connection from " << remoteEndpoint; boost::system::error_code ec; sslConn->lowest_layer().cancel(ec); From 5243241b33b71c80b33c4259c3a1a238bcfec63d Mon Sep 17 00:00:00 2001 From: Julian Brost Date: Wed, 7 Feb 2024 10:32:58 +0100 Subject: [PATCH 08/11] HttpServerConnection: use exceptions for error handling When a HTTP connection dies prematurely while the response is sent, `http::async_write()` sets the error code to something like broken pipe for example. When calling `async_flush()` afterwards, it sometimes happens that this never returns. This results in a resource leak as the coroutine isn't cleaned up. This commit makes the individual functions throw exceptions instead of silently ignoring the errors, resulting in the function terminating early and also resulting in an error being logged as well. --- lib/remote/httpserverconnection.cpp | 50 +++++++++++------------------ 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/lib/remote/httpserverconnection.cpp b/lib/remote/httpserverconnection.cpp index 0db0ea9809a..42c206feede 100644 --- a/lib/remote/httpserverconnection.cpp +++ b/lib/remote/httpserverconnection.cpp @@ -190,10 +190,8 @@ bool EnsureValidHeaders( response.set(http::field::connection, "close"); - boost::system::error_code ec; - - http::async_write(stream, response, yc[ec]); - stream.async_flush(yc[ec]); + http::async_write(stream, response, yc); + stream.async_flush(yc); return false; } @@ -215,10 +213,8 @@ void HandleExpect100( response.result(http::status::continue_); - boost::system::error_code ec; - - http::async_write(stream, response, yc[ec]); - stream.async_flush(yc[ec]); + http::async_write(stream, response, yc); + stream.async_flush(yc); } } @@ -257,10 +253,8 @@ bool HandleAccessControl( response.content_length(response.body().size()); response.set(http::field::connection, "close"); - boost::system::error_code ec; - - http::async_write(stream, response, yc[ec]); - stream.async_flush(yc[ec]); + http::async_write(stream, response, yc); + stream.async_flush(yc); return false; } @@ -288,10 +282,8 @@ bool EnsureAcceptHeader( response.content_length(response.body().size()); response.set(http::field::connection, "close"); - boost::system::error_code ec; - - http::async_write(stream, response, yc[ec]); - stream.async_flush(yc[ec]); + http::async_write(stream, response, yc); + stream.async_flush(yc); return false; } @@ -329,10 +321,8 @@ bool EnsureAuthenticatedUser( response.content_length(response.body().size()); } - boost::system::error_code ec; - - http::async_write(stream, response, yc[ec]); - stream.async_flush(yc[ec]); + http::async_write(stream, response, yc); + stream.async_flush(yc); return false; } @@ -423,8 +413,8 @@ bool EnsureValidBody( response.set(http::field::connection, "close"); - http::async_write(stream, response, yc[ec]); - stream.async_flush(yc[ec]); + http::async_write(stream, response, yc); + stream.async_flush(yc); return false; } @@ -464,10 +454,8 @@ bool ProcessRequest( HttpUtility::SendJsonError(response, nullptr, 500, "Unhandled exception" , DiagnosticInformation(ex)); - boost::system::error_code ec; - - http::async_write(stream, response, yc[ec]); - stream.async_flush(yc[ec]); + http::async_write(stream, response, yc); + stream.async_flush(yc); return true; } @@ -476,10 +464,8 @@ bool ProcessRequest( return false; } - boost::system::error_code ec; - - http::async_write(stream, response, yc[ec]); - stream.async_flush(yc[ec]); + http::async_write(stream, response, yc); + stream.async_flush(yc); return true; } @@ -574,8 +560,8 @@ void HttpServerConnection::ProcessMessages(boost::asio::yield_context yc) } } catch (const std::exception& ex) { if (!m_ShuttingDown) { - Log(LogCritical, "HttpServerConnection") - << "Unhandled exception while processing HTTP request: " << ex.what(); + Log(LogWarning, "HttpServerConnection") + << "Exception while processing HTTP request from " << m_PeerAddress << ": " << ex.what(); } } From 660b82b4f9c22e052107dcf67bad5bc3735374f8 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Thu, 31 Oct 2024 14:19:05 +0100 Subject: [PATCH 09/11] JsonRpcConnection: Don't read any data on shutdown When the `Desconnect()` method is called, clients are not disconnected immediately. Instead, a new coroutine is spawned using the same strand as the other coroutines. This coroutine calls `async_shutdown` on the TCP socket, which might be blocking. However, in order not to block indefintely, the `Timeout` class cancels all operations on the socket after `10` seconds. Though, the timeout does not trigger the handler immediately; it creates spawns another coroutine using the same strand as in the `JsonRpcConnection` class. This can cause unexpected delays if e.g. `HandleIncomingMessages` gets resumed before the coroutine from the timeout class. Apart from that, the coroutine for writing messages uses the same condition, making the two symmetrical. --- lib/remote/jsonrpcconnection.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/remote/jsonrpcconnection.cpp b/lib/remote/jsonrpcconnection.cpp index d5a8e46cb8a..320a0db0f5b 100644 --- a/lib/remote/jsonrpcconnection.cpp +++ b/lib/remote/jsonrpcconnection.cpp @@ -62,7 +62,7 @@ void JsonRpcConnection::HandleIncomingMessages(boost::asio::yield_context yc) { m_Stream->next_layer().SetSeen(&m_Seen); - for (;;) { + while (!m_ShuttingDown) { String message; try { From 2854c618dd24134259f44a9c158c36804ad948ee Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Wed, 28 Aug 2024 17:34:17 +0200 Subject: [PATCH 10/11] HttpServerConnection: Drop yet another superfluous `CpuBoundWork` usage --- lib/remote/httpserverconnection.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/remote/httpserverconnection.cpp b/lib/remote/httpserverconnection.cpp index 42c206feede..9b800ea8668 100644 --- a/lib/remote/httpserverconnection.cpp +++ b/lib/remote/httpserverconnection.cpp @@ -348,8 +348,6 @@ bool EnsureValidBody( Array::Ptr permissions = authenticatedUser->GetPermissions(); if (permissions) { - CpuBoundWork evalPermissions (yc); - ObjectLock olock(permissions); for (const Value& permissionInfo : permissions) { From 4850018464dbad0c92f5cb21324117d80b6cdb85 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Fri, 30 Aug 2024 18:00:00 +0200 Subject: [PATCH 11/11] Don't use thread-local variable in coroutine & process final `cr` in global thread pool --- lib/methods/ifwapichecktask.cpp | 218 ++++++++++++-------------------- 1 file changed, 83 insertions(+), 135 deletions(-) diff --git a/lib/methods/ifwapichecktask.cpp b/lib/methods/ifwapichecktask.cpp index 8516d70c033..16e2f7e63e3 100644 --- a/lib/methods/ifwapichecktask.cpp +++ b/lib/methods/ifwapichecktask.cpp @@ -33,55 +33,6 @@ using namespace icinga; REGISTER_FUNCTION_NONCONST(Internal, IfwApiCheck, &IfwApiCheckTask::ScriptFunc, "checkable:cr:resolvedMacros:useResolvedMacros"); -static void ReportIfwCheckResult( - const Checkable::Ptr& checkable, const Value& cmdLine, const CheckResult::Ptr& cr, - const String& output, double start, double end, int exitcode = 3, const Array::Ptr& perfdata = nullptr -) -{ - if (Checkable::ExecuteCommandProcessFinishedHandler) { - ProcessResult pr; - pr.PID = -1; - pr.Output = perfdata ? output + " |" + String(perfdata->Join(" ")) : output; - pr.ExecutionStart = start; - pr.ExecutionEnd = end; - pr.ExitStatus = exitcode; - - Checkable::ExecuteCommandProcessFinishedHandler(cmdLine, pr); - } else { - auto splittedPerfdata (perfdata); - - if (perfdata) { - splittedPerfdata = new Array(); - ObjectLock oLock (perfdata); - - for (String pv : perfdata) { - PluginUtility::SplitPerfdata(pv)->CopyTo(splittedPerfdata); - } - } - - cr->SetOutput(output); - cr->SetPerformanceData(splittedPerfdata); - cr->SetState((ServiceState)exitcode); - cr->SetExitStatus(exitcode); - cr->SetExecutionStart(start); - cr->SetExecutionEnd(end); - cr->SetCommand(cmdLine); - - checkable->ProcessCheckResult(cr); - } -} - -static void ReportIfwCheckResult( - boost::asio::yield_context yc, const Checkable::Ptr& checkable, const Value& cmdLine, - const CheckResult::Ptr& cr, const String& output, double start -) -{ - double end = Utility::GetTime(); - CpuBoundWork cbw (yc); - - ReportIfwCheckResult(checkable, cmdLine, cr, output, start, end); -} - static const char* GetUnderstandableError(const std::exception& ex) { auto se (dynamic_cast(&ex)); @@ -93,10 +44,12 @@ static const char* GetUnderstandableError(const std::exception& ex) return ex.what(); } +// Note: If DoIfwNetIo returns due to an error, the plugin output of the specified CheckResult (cr) will always be set, +// and if it was successful, the cr exit status, plugin state and performance data (if any) will also be overridden. +// Therefore, you have to take care yourself of setting all the other necessary fields for the check result. static void DoIfwNetIo( - boost::asio::yield_context yc, const Checkable::Ptr& checkable, const Array::Ptr& cmdLine, - const CheckResult::Ptr& cr, const String& psCommand, const String& psHost, const String& san, const String& psPort, - AsioTlsStream& conn, boost::beast::http::request& req, double start + boost::asio::yield_context yc, const CheckResult::Ptr& cr, const String& psCommand, const String& psHost, const String& san, + const String& psPort, AsioTlsStream& conn, boost::beast::http::request& req ) { namespace http = boost::beast::http; @@ -107,11 +60,7 @@ static void DoIfwNetIo( try { Connect(conn.lowest_layer(), psHost, psPort, yc); } catch (const std::exception& ex) { - ReportIfwCheckResult( - yc, checkable, cmdLine, cr, - "Can't connect to IfW API on host '" + psHost + "' port '" + psPort + "': " + GetUnderstandableError(ex), - start - ); + cr->SetOutput("Can't connect to IfW API on host '" + psHost + "' port '" + psPort + "': " + GetUnderstandableError(ex)); return; } @@ -120,12 +69,7 @@ static void DoIfwNetIo( try { sslConn.async_handshake(conn.next_layer().client, yc); } catch (const std::exception& ex) { - ReportIfwCheckResult( - yc, checkable, cmdLine, cr, - "TLS handshake with IfW API on host '" + psHost + "' (SNI: '" + san - + "') port '" + psPort + "' failed: " + GetUnderstandableError(ex), - start - ); + cr->SetOutput("TLS handshake with IfW API on host '" + psHost + "' (SNI: '" + san+ "') port '" + psPort + "' failed: " + GetUnderstandableError(ex)); return; } @@ -135,15 +79,10 @@ static void DoIfwNetIo( try { cn = GetCertificateCN(cert); - } catch (const std::exception&) { - } + } catch (const std::exception&) { } - ReportIfwCheckResult( - yc, checkable, cmdLine, cr, - "Certificate validation failed for IfW API on host '" + psHost + "' (SNI: '" + san + "'; CN: " - + (cn.IsString() ? "'" + cn + "'" : "N/A") + ") port '" + psPort + "': " + sslConn.GetVerifyError(), - start - ); + cr->SetOutput("Certificate validation failed for IfW API on host '" + psHost + "' (SNI: '" + san + "'; CN: " + + (cn.IsString() ? "'" + cn + "'" : "N/A") + ") port '" + psPort + "': " + sslConn.GetVerifyError()); return; } @@ -151,87 +90,56 @@ static void DoIfwNetIo( http::async_write(conn, req, yc); conn.async_flush(yc); } catch (const std::exception& ex) { - ReportIfwCheckResult( - yc, checkable, cmdLine, cr, - "Can't send HTTP request to IfW API on host '" + psHost + "' port '" + psPort + "': " + GetUnderstandableError(ex), - start - ); + cr->SetOutput("Can't send HTTP request to IfW API on host '" + psHost + "' port '" + psPort + "': " + GetUnderstandableError(ex)); return; } try { http::async_read(conn, buf, resp, yc); } catch (const std::exception& ex) { - ReportIfwCheckResult( - yc, checkable, cmdLine, cr, - "Can't read HTTP response from IfW API on host '" + psHost + "' port '" + psPort + "': " + GetUnderstandableError(ex), - start - ); + cr->SetOutput("Can't read HTTP response from IfW API on host '" + psHost + "' port '" + psPort + "': " + GetUnderstandableError(ex)); return; } - double end = Utility::GetTime(); - { boost::system::error_code ec; sslConn.async_shutdown(yc[ec]); } - CpuBoundWork cbw (yc); Value jsonRoot; try { jsonRoot = JsonDecode(resp.body()); } catch (const std::exception& ex) { - ReportIfwCheckResult( - checkable, cmdLine, cr, - "Got bad JSON from IfW API on host '" + psHost + "' port '" + psPort + "': " + ex.what(), start, end - ); + cr->SetOutput("Got bad JSON from IfW API on host '" + psHost + "' port '" + psPort + "': " + ex.what()); return; } if (!jsonRoot.IsObjectType()) { - ReportIfwCheckResult( - checkable, cmdLine, cr, - "Got JSON, but not an object, from IfW API on host '" - + psHost + "' port '" + psPort + "': " + JsonEncode(jsonRoot), - start, end - ); + cr->SetOutput("Got JSON, but not an object, from IfW API on host '"+ psHost + "' port '" + psPort + "': " + JsonEncode(jsonRoot)); return; } Value jsonBranch; if (!Dictionary::Ptr(jsonRoot)->Get(psCommand, &jsonBranch)) { - ReportIfwCheckResult( - checkable, cmdLine, cr, - "Missing ." + psCommand + " in JSON object from IfW API on host '" - + psHost + "' port '" + psPort + "': " + JsonEncode(jsonRoot), - start, end - ); + cr->SetOutput("Missing ." + psCommand + " in JSON object from IfW API on host '" + psHost + "' port '" + psPort + "': " + JsonEncode(jsonRoot)); return; } if (!jsonBranch.IsObjectType()) { - ReportIfwCheckResult( - checkable, cmdLine, cr, - "." + psCommand + " in JSON from IfW API on host '" - + psHost + "' port '" + psPort + "' is not an object: " + JsonEncode(jsonBranch), - start, end - ); + cr->SetOutput("." + psCommand + " in JSON from IfW API on host '" + psHost + "' port '" + psPort + "' is not an object: " + JsonEncode(jsonBranch)); return; } Dictionary::Ptr result = jsonBranch; - Value exitcode; + Value rawExitcode; - if (!result->Get("exitcode", &exitcode)) { - ReportIfwCheckResult( - checkable, cmdLine, cr, + if (!result->Get("exitcode", &rawExitcode)) { + cr->SetOutput( "Missing ." + psCommand + ".exitcode in JSON object from IfW API on host '" - + psHost + "' port '" + psPort + "': " + JsonEncode(result), - start, end + + psHost + "' port '" + psPort + "': " + JsonEncode(result) ); return; } @@ -239,27 +147,25 @@ static void DoIfwNetIo( static const std::set exitcodes {ServiceOK, ServiceWarning, ServiceCritical, ServiceUnknown}; static const auto exitcodeList (Array::FromSet(exitcodes)->Join(", ")); - if (!exitcode.IsNumber() || exitcodes.find(exitcode) == exitcodes.end()) { - ReportIfwCheckResult( - checkable, cmdLine, cr, - "Got bad exitcode " + JsonEncode(exitcode) + " from IfW API on host '" + psHost + "' port '" + psPort - + "', expected one of: " + exitcodeList, - start, end + if (!rawExitcode.IsNumber() || exitcodes.find(rawExitcode) == exitcodes.end()) { + cr->SetOutput( + "Got bad exitcode " + JsonEncode(rawExitcode) + " from IfW API on host '" + psHost + "' port '" + psPort + + "', expected one of: " + exitcodeList ); return; } + auto exitcode (static_cast(rawExitcode.Get())); + auto perfdataVal (result->Get("perfdata")); Array::Ptr perfdata; try { perfdata = perfdataVal; } catch (const std::exception&) { - ReportIfwCheckResult( - checkable, cmdLine, cr, + cr->SetOutput( "Got bad perfdata " + JsonEncode(perfdataVal) + " from IfW API on host '" - + psHost + "' port '" + psPort + "', expected an array", - start, end + + psHost + "' port '" + psPort + "', expected an array" ); return; } @@ -269,18 +175,20 @@ static void DoIfwNetIo( for (auto& pv : perfdata) { if (!pv.IsString()) { - ReportIfwCheckResult( - checkable, cmdLine, cr, + cr->SetOutput( "Got bad perfdata value " + JsonEncode(perfdata) + " from IfW API on host '" - + psHost + "' port '" + psPort + "', expected an array of strings", - start, end + + psHost + "' port '" + psPort + "', expected an array of strings" ); return; } } + + cr->SetPerformanceData(PluginUtility::SplitPerfdata(perfdata->Join(" "))); } - ReportIfwCheckResult(checkable, cmdLine, cr, result->Get("checkresult"), start, end, exitcode, perfdata); + cr->SetState(exitcode); + cr->SetExitStatus(exitcode); + cr->SetOutput(result->Get("checkresult")); } void IfwApiCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckResult::Ptr& cr, @@ -344,6 +252,34 @@ void IfwApiCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes String username = resolveMacros("$ifw_api_username$"); String password = resolveMacros("$ifw_api_password$"); + // Use this lambda to process the final Ifw check result. Callers don't need to pass the check result + // as an argument, as the lambda already captures the `cr` and notices all the `cr` changes made across + // the code. You just need to set the necessary cr fields when appropriated and then call this closure. + std::function reportResult; + + if (auto callback = Checkable::ExecuteCommandProcessFinishedHandler; callback) { + reportResult = [cr, callback = std::move(callback)]() { + ProcessResult pr; + pr.PID = -1; + if (auto pd = cr->GetPerformanceData(); pd) { + pr.Output = cr->GetOutput() +" |" + String(pd->Join(" ")); + } else { + pr.Output = cr->GetOutput(); + } + pr.ExecutionStart = cr->GetExecutionStart(); + pr.ExecutionEnd = cr->GetExecutionEnd(); + pr.ExitStatus = cr->GetExitStatus(); + + callback(cr->GetCommand(), pr); + }; + } else { + reportResult = [checkable, cr]() { checkable->ProcessCheckResult(cr); }; + } + + // Set the default check result state and exit code to unknown for the moment! + cr->SetExitStatus(ServiceUnknown); + cr->SetState(ServiceUnknown); + Dictionary::Ptr params = new Dictionary(); if (arguments) { @@ -369,11 +305,12 @@ void IfwApiCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes if (kv.second.GetType() == ValueObject) { auto now (Utility::GetTime()); - ReportIfwCheckResult( - checkable, command->GetName(), cr, - "$ifw_api_arguments$ may not directly contain objects (especially functions).", now, now - ); + cr->SetCommand(command->GetName()); + cr->SetExecutionStart(now); + cr->SetExecutionEnd(now); + cr->SetOutput("$ifw_api_arguments$ may not directly contain objects (especially functions)."); + reportResult(); return; } } @@ -498,12 +435,17 @@ void IfwApiCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes auto& io (IoEngine::Get().GetIoContext()); auto strand (Shared::Make(io)); Shared::Ptr ctx; - double start = Utility::GetTime(); + + cr->SetExecutionStart(Utility::GetTime()); + cr->SetCommand(cmdLine); try { ctx = SetupSslContext(cert, key, ca, crl, DEFAULT_TLS_CIPHERS, DEFAULT_TLS_PROTOCOLMIN, DebugInfo()); } catch (const std::exception& ex) { - ReportIfwCheckResult(checkable, cmdLine, cr, ex.what(), start, Utility::GetTime()); + cr->SetOutput(ex.what()); + cr->SetExecutionEnd(Utility::GetTime()); + + reportResult(); return; } @@ -511,7 +453,7 @@ void IfwApiCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes IoEngine::SpawnCoroutine( *strand, - [strand, checkable, cmdLine, cr, psCommand, psHost, expectedSan, psPort, conn, req, start, checkTimeout](asio::yield_context yc) { + [strand, checkable, cr, psCommand, psHost, expectedSan, psPort, conn, req, checkTimeout, reportResult = std::move(reportResult)](asio::yield_context yc) { Timeout::Ptr timeout = new Timeout(strand->context(), *strand, boost::posix_time::microseconds(int64_t(checkTimeout * 1e6)), [&conn, &checkable](boost::asio::yield_context yc) { Log(LogNotice, "IfwApiCheckTask") @@ -525,7 +467,13 @@ void IfwApiCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes Defer cancelTimeout ([&timeout]() { timeout->Cancel(); }); - DoIfwNetIo(yc, checkable, cmdLine, cr, psCommand, psHost, expectedSan, psPort, *conn, *req, start); + DoIfwNetIo(yc, cr, psCommand, psHost, expectedSan, psPort, *conn, *req); + + cr->SetExecutionEnd(Utility::GetTime()); + + // Post the check result processing to the global pool not to block the I/O threads, + // which could affect processing important RPC messages and HTTP connections. + Utility::QueueAsyncCallback(reportResult); } ); }