Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sonic-host-services changes for gnoi reboot #157

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 152 additions & 0 deletions host_modules/gnoi_reboot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""gNOI reboot module which performs reboot"""

import json
import logging
import threading
import time
from host_modules import host_service
from utils.run_cmd import _run_command

MOD_NAME = 'gnoi_reboot'
# Reboot method in reboot request
# Both enum and string representations are supported
REBOOTMETHOD_COLD_BOOT_VALUES = {1, "COLD"}
REBOOTMETHOD_WARM_BOOT_VALUES = {4, "WARM"}
REBOOTMETHOD_NSF_VALUES = {5, "NSF"}

# Timeout for SONiC Host Service to be killed during reboot
REBOOT_TIMEOUT = 260

EXECUTE_COLD_REBOOT_COMMAND = "sudo reboot"
EXECUTE_NSF_REBOOT_COMMAND = "/etc/init.d/gpins-nsf-boot nsf-reboot"

logger = logging.getLogger(__name__)


class GnoiReboot(host_service.HostModule):
"""DBus endpoint that executes the reboot and returns the reboot status
"""

def __init__(self, mod_name):
"""Use threading.lock mechanism to read/write into response_data
since response_data can be read/write by multiple threads"""
self.lock = threading.Lock()
# reboot_status_flag is used to keep track of reboot status on host
self.reboot_status_flag = {}
# Populating with default value i.e., no active reboot
self.populate_reboot_status_flag()
super(GnoiReboot, self).__init__(mod_name)

def populate_reboot_status_flag(self, active = False, when = 0, reason = ""):
"""Populates the reboot_status_flag with given input params"""
self.lock.acquire()
self.reboot_status_flag["active"] = active
self.reboot_status_flag["when"] = when
self.reboot_status_flag["reason"] = reason
self.lock.release()
return

def validate_reboot_request(self, reboot_request):
# Check whether reboot method is present.
if "method" not in reboot_request:
return 1, "Reboot request must contain a reboot method"

# Check whether reboot method is valid.
rebootmethod = reboot_request["method"]
valid_method = False
for values in [REBOOTMETHOD_COLD_BOOT_VALUES, REBOOTMETHOD_NSF_VALUES]:
if rebootmethod in values:
valid_method = True
if not valid_method:
return 1, "Invalid reboot method: " + str(rebootmethod)

# Check whether delay is non-zero. delay key will not exist in reboot_request if it is zero
if "delay" in reboot_request and reboot_request["delay"] != 0:
return 1, "Delayed reboot is not supported"
return 0, ""

def execute_reboot(self, rebootmethod):
"""Execute reboot and reset reboot_status_flag when reboot fails"""

if rebootmethod in REBOOTMETHOD_COLD_BOOT_VALUES:
command = EXECUTE_COLD_REBOOT_COMMAND
logger.warning("%s: Issuing cold reboot", MOD_NAME)
elif rebootmethod in REBOOTMETHOD_NSF_VALUES:
command = EXECUTE_NSF_REBOOT_COMMAND
logger.warning("%s: Issuing NSF reboot", MOD_NAME)
else:
logger.error("%s: Invalid reboot method: %d", MOD_NAME, rebootmethod)
return

rc, stdout, stderr = _run_command(command)
if rc:
self.populate_reboot_status_flag()
logger.error("%s: Reboot failed execution with stdout: %s, "
"stderr: %s", MOD_NAME, stdout, stderr)
return

"""Wait for 260 seconds for the reboot to complete. Here, we expect that SONiC Host Service
will be killed during this waiting period if the reboot is successful. If this module
is still alive after the below waiting period, we can conclude that the reboot has failed.
Each container can take up to 20 seconds to get killed. In total, there are 10 containers,
and adding a buffer of 1 minute brings up the delay value to be 260 seconds."""
time.sleep(REBOOT_TIMEOUT)
# Conclude that the reboot has failed if we reach this point
self.populate_reboot_status_flag()
return

@host_service.method(host_service.bus_name(MOD_NAME), in_signature='as', out_signature='is')
def issue_reboot(self, options):
"""Issues reboot after performing the following steps sequentially:
1. Checks that reboot_status_flag is not set
2. Validates the reboot request
3. Sets the reboot_status_flag
4. Issues the reboot in a separate thread
"""
logger.warning("%s: issue_reboot rpc called", MOD_NAME)
self.lock.acquire()
is_reboot_ongoing = self.reboot_status_flag["active"]
self.lock.release()
# Return without issuing the reboot if the previous reboot is ongoing
if is_reboot_ongoing:
return 1, "Previous reboot is ongoing"

"""Convert input json formatted reboot request into python dict.
reboot_request is a python dict with the following keys:
method - specifies the method of reboot
delay - delay to issue reboot, key exists only if it is non-zero
message - reason for reboot
force - either true/false, key exists only if it is true
"""
try:
reboot_request = json.loads(options[0])
except ValueError:
return 1, "Failed to parse json formatted reboot request into python dict"

# Validate reboot request
err, errstr = self.validate_reboot_request(reboot_request)
if err:
return err, errstr

# Sets reboot_status_flag to be in active state
self.populate_reboot_status_flag(True, int(time.time()), reboot_request["message"])

# Issue reboot in a new thread and reset the reboot_status_flag if the reboot fails
try:
t = threading.Thread(target=self.execute_reboot, args=(reboot_request["method"],))
t.start()
except RuntimeError as error:
return 1, "Failed to start thread to execute reboot with error: " + str(error)
return 0, "Successfully issued reboot"

@host_service.method(host_service.bus_name(MOD_NAME), in_signature='', out_signature='is')
def get_reboot_status(self):
"""Returns current reboot status on host in json format"""
self.lock.acquire()
response_data = json.dumps(self.reboot_status_flag)
self.lock.release()
return 0, response_data

def register():
"""Return the class name"""
return GnoiReboot, MOD_NAME
3 changes: 2 additions & 1 deletion scripts/sonic-host-server
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import dbus.service
import dbus.mainloop.glib

from gi.repository import GObject
from host_modules import config_engine, gcu, host_service, showtech, systemd_service, file_service
from host_modules import config_engine, gcu, host_service, showtech, systemd_service, file_service, gnoi_reboot


def register_dbus():
Expand All @@ -21,6 +21,7 @@ def register_dbus():
'config': config_engine.Config('config'),
'gcu': gcu.GCU('gcu'),
'host_service': host_service.HostService('host_service'),
'gnoi_reboot': gnoi_reboot.GnoiReboot('gnoi_reboot'),
'showtech': showtech.Showtech('showtech'),
'systemd': systemd_service.SystemdService('systemd'),
'file_stat': file_service.FileService('file')
Expand Down
221 changes: 221 additions & 0 deletions tests/gnoi_reboot_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
"""Tests for gnoi_reboot."""

import imp
import sys
import os
import pytest
import datetime

if sys.version_info >= (3, 3):
from unittest import mock
else:
# Expect the 'mock' package for python 2
# https://pypi.python.org/pypi/mock
import mock

test_path = os.path.dirname(os.path.abspath(__file__))
sonic_host_service_path = os.path.dirname(test_path)
host_modules_path = os.path.join(sonic_host_service_path, "host_modules")
sys.path.insert(0, sonic_host_service_path)

TEST_ACTIVE_RESPONSE_DATA = "{\"active\": true, \"when\": 1617811205, \"reason\": \"testing reboot response\"}"
TEST_INACTIVE_RESPONSE_DATA = "{\"active\": false, \"when\": 0, \"reason\": \"\"}"

REBOOTMETHOD_UNKNOWN_ENUM = 0
REBOOTMETHOD_COLD_BOOT_ENUM = 1
REBOOTMETHOD_NSF_ENUM = 5

TEST_TIMESTAMP = 1618942253.831912040
REPORT_CRITICAL_STATE_FULL_COMMAND = "redis-cli -n 6 HSET COMPONENT_STATE_TABLE|host state ERROR reason \"cold reboot has failed\" essential true timestamp \"2021-04-20 18:10:53\" timestamp-seconds 1618942253 timestamp-nanoseconds 831912040"

VALID_REBOOT_REQUEST_COLD = "{\"method\": 1, \"message\": \"test reboot request reason\"}"
VALID_REBOOT_REQUEST_NSF = "{\"method\": \"NSF\", \"message\": \"test reboot request reason\"}"
INVALID_REBOOT_REQUEST = "\"method\": 1, \"message\": \"test reboot request reason\""

imp.load_source("host_service", host_modules_path + "/host_service.py")
imp.load_source("gnoi_reboot", host_modules_path + "/gnoi_reboot.py")
from gnoi_reboot import *


class TestGnoiReboot(object):
@classmethod
def setup_class(cls):
with mock.patch("gnoi_reboot.super") as mock_host_module:
cls.gnoi_reboot_module = GnoiReboot(MOD_NAME)

def test_populate_reboot_status_flag(self):
with mock.patch("time.time", return_value=1617811205.25):
self.gnoi_reboot_module.populate_reboot_status_flag()
assert self.gnoi_reboot_module.reboot_status_flag["active"] == False
assert self.gnoi_reboot_module.reboot_status_flag["when"] == 0
assert self.gnoi_reboot_module.reboot_status_flag["reason"] == ""

def test_validate_reboot_request_success_cold_boot_enum_method(self):
reboot_request = {"method": REBOOTMETHOD_COLD_BOOT_ENUM, "reason": "test reboot request reason"}
result = self.gnoi_reboot_module.validate_reboot_request(reboot_request)
assert result[0] == 0
assert result[1] == ""

def test_validate_reboot_request_success_cold_boot_string_method(self):
reboot_request = {"method": "COLD", "reason": "test reboot request reason"}
result = self.gnoi_reboot_module.validate_reboot_request(reboot_request)
assert result[0] == 0
assert result[1] == ""

def test_validate_reboot_request_success_nsf_enum_method(self):
reboot_request = {"method": REBOOTMETHOD_NSF_ENUM, "reason": "test reboot request reason"}
result = self.gnoi_reboot_module.validate_reboot_request(reboot_request)
assert result[0] == 0
assert result[1] == ""

def test_validate_reboot_request_success_nsf_enum_method(self):
reboot_request = {"method": "NSF", "reason": "test reboot request reason"}
result = self.gnoi_reboot_module.validate_reboot_request(reboot_request)
assert result[0] == 0
assert result[1] == ""

def test_validate_reboot_request_fail_unknown_method(self):
reboot_request = {"method": 0, "reason": "test reboot request reason"}
result = self.gnoi_reboot_module.validate_reboot_request(reboot_request)
assert result[0] == 1
assert result[1] == "Invalid reboot method: 0"

def test_validate_reboot_request_fail_no_method(self):
reboot_request = {"reason": "test reboot request reason"}
result = self.gnoi_reboot_module.validate_reboot_request(reboot_request)
assert result[0] == 1
assert result[1] == "Reboot request must contain a reboot method"

def test_validate_reboot_request_fail_delayed_reboot(self):
reboot_request = {"method": REBOOTMETHOD_COLD_BOOT_ENUM, "delay": 10, "reason": "test reboot request reason"}
result = self.gnoi_reboot_module.validate_reboot_request(reboot_request)
assert result[0] == 1
assert result[1] == "Delayed reboot is not supported"

def test_execute_reboot_success(self):
with (
mock.patch("gnoi_reboot._run_command") as mock_run_command,
mock.patch("time.sleep") as mock_sleep,
mock.patch("gnoi_reboot.GnoiReboot.populate_reboot_status_flag") as mock_populate_reboot_status_flag,
):
mock_run_command.return_value = (0, ["stdout: execute NSF reboot"], ["stderror: execute NSF reboot"])
self.gnoi_reboot_module.execute_reboot("NSF")
mock_run_command.assert_called_once_with("/etc/init.d/gpins-nsf-boot nsf-reboot")
mock_sleep.assert_called_once_with(260)
mock_populate_reboot_status_flag.assert_called_once_with()

def test_execute_reboot_fail_unknown_reboot(self, caplog):
with caplog.at_level(logging.ERROR):
self.gnoi_reboot_module.execute_reboot(-1)
msg = "gnoi_reboot: Invalid reboot method: -1"
assert caplog.records[0].message == msg

def test_execute_reboot_fail_issue_reboot_command_cold_boot(self, caplog):
with (
mock.patch("gnoi_reboot._run_command") as mock_run_command,
mock.patch("gnoi_reboot.GnoiReboot.populate_reboot_status_flag") as mock_populate_reboot_status_flag,
caplog.at_level(logging.ERROR),
):
mock_run_command.return_value = (1, ["stdout: execute cold reboot"], ["stderror: execute cold reboot"])
self.gnoi_reboot_module.execute_reboot(REBOOTMETHOD_COLD_BOOT_ENUM)
msg = ("gnoi_reboot: Reboot failed execution with "
"stdout: ['stdout: execute cold reboot'], stderr: "
"['stderror: execute cold reboot']")
assert caplog.records[0].message == msg
mock_populate_reboot_status_flag.assert_called_once_with()

def test_execute_reboot_fail_issue_reboot_command_nsf(self, caplog):
with (
mock.patch("gnoi_reboot._run_command") as mock_run_command,
mock.patch("gnoi_reboot.GnoiReboot.populate_reboot_status_flag") as mock_populate_reboot_status_flag,
caplog.at_level(logging.ERROR),
):
mock_run_command.return_value = (1, ["stdout: execute NSF reboot"], ["stderror: execute NSF reboot"])
self.gnoi_reboot_module.execute_reboot("NSF")
msg = ("gnoi_reboot: Reboot failed execution with "
"stdout: ['stdout: execute NSF reboot'], stderr: "
"['stderror: execute NSF reboot']")
assert caplog.records[0].message == msg
mock_populate_reboot_status_flag.assert_called_once_with()

def test_issue_reboot_success_cold_boot(self):
with (
mock.patch("threading.Thread") as mock_thread,
mock.patch("gnoi_reboot.GnoiReboot.validate_reboot_request", return_value=(0, "")),
):
self.gnoi_reboot_module.populate_reboot_status_flag()
result = self.gnoi_reboot_module.issue_reboot([VALID_REBOOT_REQUEST_COLD])
assert result[0] == 0
assert result[1] == "Successfully issued reboot"
mock_thread.assert_called_once_with(
target=self.gnoi_reboot_module.execute_reboot,
args=(REBOOTMETHOD_COLD_BOOT_ENUM,),
)
mock_thread.return_value.start.assert_called_once_with()

def test_issue_reboot_success_nsf(self):
with (
mock.patch("threading.Thread") as mock_thread,
mock.patch("gnoi_reboot.GnoiReboot.validate_reboot_request", return_value=(0, "")),
):
self.gnoi_reboot_module.populate_reboot_status_flag()
result = self.gnoi_reboot_module.issue_reboot([VALID_REBOOT_REQUEST_NSF])
assert result[0] == 0
assert result[1] == "Successfully issued reboot"
mock_thread.assert_called_once_with(
target=self.gnoi_reboot_module.execute_reboot,
args=("NSF",),
)
mock_thread.return_value.start.assert_called_once_with()

def test_issue_reboot_previous_reboot_ongoing(self):
self.gnoi_reboot_module.populate_reboot_status_flag()
self.gnoi_reboot_module.reboot_status_flag["active"] = True
result = self.gnoi_reboot_module.issue_reboot([VALID_REBOOT_REQUEST_COLD])
assert result[0] == 1
assert result[1] == "Previous reboot is ongoing"

def test_issue_reboot_bad_format_reboot_request(self):
self.gnoi_reboot_module.populate_reboot_status_flag()
result = self.gnoi_reboot_module.issue_reboot([INVALID_REBOOT_REQUEST])
assert result[0] == 1
assert result[1] == "Failed to parse json formatted reboot request into python dict"

def test_issue_reboot_invalid_reboot_request(self):
with mock.patch("gnoi_reboot.GnoiReboot.validate_reboot_request", return_value=(1, "failed to validate reboot request")):
self.gnoi_reboot_module.populate_reboot_status_flag()
result = self.gnoi_reboot_module.issue_reboot([VALID_REBOOT_REQUEST_COLD])
assert result[0] == 1
assert result[1] == "failed to validate reboot request"

def raise_runtime_exception_test(self):
raise RuntimeError('test raise RuntimeError exception')

def test_issue_reboot_fail_issue_reboot_thread(self):
with mock.patch("threading.Thread") as mock_thread:
mock_thread.return_value.start = self.raise_runtime_exception_test
self.gnoi_reboot_module.populate_reboot_status_flag()
result = self.gnoi_reboot_module.issue_reboot([VALID_REBOOT_REQUEST_COLD])
assert result[0] == 1
assert result[1] == "Failed to start thread to execute reboot with error: test raise RuntimeError exception"

def test_get_reboot_status_active(self):
self.gnoi_reboot_module.populate_reboot_status_flag(True, 1617811205, "testing reboot response")
result = self.gnoi_reboot_module.get_reboot_status()
assert result[0] == 0
assert result[1] == TEST_ACTIVE_RESPONSE_DATA

def test_get_reboot_status_inactive(self):
self.gnoi_reboot_module.populate_reboot_status_flag(False, 0, "")
result = self.gnoi_reboot_module.get_reboot_status()
assert result[0] == 0
assert result[1] == TEST_INACTIVE_RESPONSE_DATA

def test_register(self):
result = register()
assert result[0] == GnoiReboot
assert result[1] == MOD_NAME

@classmethod
def teardown_class(cls):
print("TEARDOWN")
Loading
Loading