forked from sonoble/sonic-buildimage
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Warm reboot: Add support for orchagent pre-shutdown warm-restart stat…
…e check (sonic-net#562) * Add orchagent pre-warm-restart check mechanism * Add orchagent_restart_check options: --noFreeze & --skipPendingTaskCheck * Add waitTime option for response from orchagent * Fix build issue with latest master * adapt to new dvs.runcmd() signature * Move standard header before local headers
- Loading branch information
1 parent
41e61bd
commit 9fda944
Showing
8 changed files
with
342 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
#include <iostream> | ||
#include <sstream> | ||
|
||
#include <unistd.h> | ||
#include <getopt.h> | ||
|
||
#include "notificationproducer.h" | ||
#include "notificationconsumer.h" | ||
#include "select.h" | ||
#include "logger.h" | ||
|
||
|
||
void printUsage() | ||
{ | ||
SWSS_LOG_ENTER(); | ||
|
||
std::cout << "Usage: orchagent_restart_check [-s] " << std::endl; | ||
std::cout << " -n --noFreeze" << std::endl; | ||
std::cout << " Don't freeze orchagent even if check succeeded" << std::endl; | ||
std::cout << " -s --skipPendingTaskCheck" << std::endl; | ||
std::cout << " Skip pending task dependency check for orchagent" << std::endl; | ||
std::cout << " -w --waitTime" << std::endl; | ||
std::cout << " Wait time for response from orchagent, in milliseconds. Default value: 1000" << std::endl; | ||
std::cout << " -h --help:" << std::endl; | ||
std::cout << " Print out this message" << std::endl; | ||
} | ||
|
||
|
||
/* | ||
* Before stopping orchagent for warm restart, basic state check is preferred to | ||
* ensure orchagent is not in transient state, so a deterministic state may be restored after restart. | ||
* | ||
* Here is to implement orchagent_restart_check binary which may talk to orchagent and | ||
* ask it to do self-check, return "READY " signal and freeze if everything is ok, | ||
* otherwise "NOT_READY" signal should be returned. | ||
* | ||
* Optionally: | ||
* if --noFreeze option is provided, orchagent won't freeze. | ||
* if --skipPendingTaskCheck option is provided, orchagent won't use | ||
* whether there is pending task existing as state check criterion. | ||
*/ | ||
int main(int argc, char **argv) | ||
{ | ||
swss::Logger::getInstance().setMinPrio(swss::Logger::SWSS_INFO); | ||
SWSS_LOG_ENTER(); | ||
|
||
std::string skipPendingTaskCheck = "fasle"; | ||
std::string noFreeze = "fasle"; | ||
/* Default wait time is 1000 millisecond */ | ||
int waitTime = 1000; | ||
|
||
const char* const optstring = "nsw:"; | ||
while(true) | ||
{ | ||
static struct option long_options[] = | ||
{ | ||
{ "noFreeze", no_argument, 0, 'n' }, | ||
{ "skipPendingTaskCheck", no_argument, 0, 's' }, | ||
{ "waitTime", required_argument, 0, 'w' } | ||
}; | ||
|
||
int option_index = 0; | ||
|
||
int c = getopt_long(argc, argv, optstring, long_options, &option_index); | ||
|
||
if (c == -1) | ||
{ | ||
break; | ||
} | ||
|
||
switch (c) | ||
{ | ||
case 'n': | ||
SWSS_LOG_NOTICE("Won't freeze orchagent even if check succeeded"); | ||
noFreeze = "true"; | ||
break; | ||
case 's': | ||
SWSS_LOG_NOTICE("Skipping pending task check for orchagent"); | ||
skipPendingTaskCheck = "true"; | ||
break; | ||
case 'w': | ||
SWSS_LOG_NOTICE("Wait time for response from orchagent set to %s milliseconds", optarg); | ||
waitTime = atoi(optarg); | ||
break; | ||
case 'h': | ||
printUsage(); | ||
exit(EXIT_SUCCESS); | ||
|
||
case '?': | ||
SWSS_LOG_WARN("unknown option %c", optopt); | ||
printUsage(); | ||
exit(EXIT_FAILURE); | ||
|
||
default: | ||
SWSS_LOG_ERROR("getopt_long failure"); | ||
exit(EXIT_FAILURE); | ||
} | ||
} | ||
|
||
swss::DBConnector db(APPL_DB, swss::DBConnector::DEFAULT_UNIXSOCKET, 0); | ||
// Send warm restart query via "RESTARTCHECK" notification channel | ||
swss::NotificationProducer restartQuery(&db, "RESTARTCHECK"); | ||
// Will listen for the reply on "RESTARTCHECKREPLY" channel | ||
swss::NotificationConsumer restartQueryReply(&db, "RESTARTCHECKREPLY"); | ||
|
||
std::vector<swss::FieldValueTuple> values; | ||
values.emplace_back("NoFreeze", noFreeze); | ||
values.emplace_back("SkipPendingTaskCheck", skipPendingTaskCheck); | ||
std::string op = "orchagent"; | ||
SWSS_LOG_NOTICE("requested %s to do warm restart state check", op.c_str()); | ||
restartQuery.send(op, op, values); | ||
|
||
|
||
swss::Select s; | ||
s.addSelectable(&restartQueryReply); | ||
swss::Selectable *sel; | ||
std::string op_ret, data; | ||
values.clear(); | ||
int result = s.select(&sel, waitTime); | ||
if (result == swss::Select::OBJECT) | ||
{ | ||
restartQueryReply.pop(op_ret, data, values); | ||
if (data == "READY") | ||
{ | ||
SWSS_LOG_NOTICE("RESTARTCHECK success, %s is frozen and ready for warm restart", op_ret.c_str()); | ||
std::cout << "RESTARTCHECK succeeded" << std::endl; | ||
return EXIT_SUCCESS; | ||
} | ||
else | ||
{ | ||
SWSS_LOG_NOTICE("RESTARTCHECK failed, %s is not ready for warm restart with status %s", | ||
op_ret.c_str(), data.c_str()); | ||
} | ||
} | ||
else if (result == swss::Select::TIMEOUT) | ||
{ | ||
SWSS_LOG_NOTICE("RESTARTCHECK for %s timed out", op_ret.c_str()); | ||
} | ||
else | ||
{ | ||
SWSS_LOG_NOTICE("RESTARTCHECK for %s error", op_ret.c_str()); | ||
} | ||
std::cout << "RESTARTCHECK failed" << std::endl; | ||
return EXIT_FAILURE; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.