Skip to content

Commit

Permalink
sonic-host-services changes for gNOI Cold Reboot
Browse files Browse the repository at this point in the history
  • Loading branch information
rkavitha-hcl committed Nov 6, 2024
1 parent 13a5419 commit efb38de
Show file tree
Hide file tree
Showing 5 changed files with 414 additions and 2 deletions.
155 changes: 155 additions & 0 deletions host_modules/gnoi_reboot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
"""gNOI reboot module which performs reboot"""

import json
import logging
import threading
import time
from host_modules import host_service
from utils.run_cmd import _run_command

MOD_NAME = 'gnoi_reboot'
# Reboot method in reboot request
# Both enum and string representations are supported
REBOOTMETHOD_COLD_BOOT_VALUES = {1, "COLD"}
REBOOTMETHOD_WARM_BOOT_VALUES = {4, "WARM"}
REBOOTMETHOD_NSF_VALUES = {5, "NSF"}

# Timeout for SONiC Host Service to be killed during reboot
REBOOT_TIMEOUT = 260

EXECUTE_COLD_REBOOT_COMMAND = "sudo reboot"
EXECUTE_NSF_REBOOT_COMMAND = "/etc/init.d/gpins-nsf-boot nsf-reboot"

logger = logging.getLogger(__name__)


class GnoiReboot(host_service.HostModule):
"""DBus endpoint that executes the reboot and returns the reboot status
"""

def __init__(self, mod_name):
"""Use threading.lock mechanism to read/write into response_data
since response_data can be read/write by multiple threads"""
self.lock = threading.Lock()
# reboot_status_flag is used to keep track of reboot status on host
self.reboot_status_flag = {}
# Populating with default value i.e., no active reboot
self.populate_reboot_status_flag()
super(GnoiReboot, self).__init__(mod_name)

def populate_reboot_status_flag(self, active = False, when = 0, reason = ""):
"""Populates the reboot_status_flag with given input params"""
self.lock.acquire()
self.reboot_status_flag["active"] = active
self.reboot_status_flag["when"] = when
self.reboot_status_flag["reason"] = reason
self.lock.release()
return

def validate_reboot_request(self, reboot_request):
# Check whether reboot method is present.
if "method" not in reboot_request:
return 1, "Reboot request must contain a reboot method"

# Check whether reboot method is valid.
rebootmethod = reboot_request["method"]
valid_method = False
for values in [REBOOTMETHOD_COLD_BOOT_VALUES, REBOOTMETHOD_NSF_VALUES]:
if rebootmethod in values:
valid_method = True
if not valid_method:
return 1, "Invalid reboot method: " + str(rebootmethod)

# Check whether delay is non-zero. delay key will not exist in reboot_request if it is zero
if "delay" in reboot_request and reboot_request["delay"] != 0:
return 1, "Delayed reboot is not supported"
return 0, ""

def execute_reboot(self, rebootmethod):
"""Execute reboot and reset reboot_status_flag when reboot fails"""

if rebootmethod in REBOOTMETHOD_COLD_BOOT_VALUES:
command = EXECUTE_COLD_REBOOT_COMMAND
f = open("/tmp/hostlog.txt", "w")
f.write("Received reboot command ! ")
f.close()
logger.warning("%s: Issuing cold reboot", MOD_NAME)
elif rebootmethod in REBOOTMETHOD_NSF_VALUES:
command = EXECUTE_NSF_REBOOT_COMMAND
logger.warning("%s: Issuing NSF reboot", MOD_NAME)
else:
logger.error("%s: Invalid reboot method: %d", MOD_NAME, rebootmethod)
return

rc, stdout, stderr = _run_command(command)
if rc:
self.populate_reboot_status_flag()
logger.error("%s: Reboot failed execution with stdout: %s, "
"stderr: %s", MOD_NAME, stdout, stderr)
return

"""Wait for 260 seconds for the reboot to complete. Here, we expect that SONiC Host Service
will be killed during this waiting period if the reboot is successful. If this module
is still alive after the below waiting period, we can conclude that the reboot has failed.
Each container can take up to 20 seconds to get killed. In total, there are 10 containers,
and adding a buffer of 1 minute brings up the delay value to be 260 seconds."""
time.sleep(REBOOT_TIMEOUT)
# Conclude that the reboot has failed if we reach this point
self.populate_reboot_status_flag()
return

@host_service.method(host_service.bus_name(MOD_NAME), in_signature='as', out_signature='is')
def issue_reboot(self, options):
"""Issues reboot after performing the following steps sequentially:
1. Checks that reboot_status_flag is not set
2. Validates the reboot request
3. Sets the reboot_status_flag
4. Issues the reboot in a separate thread
"""
logger.warning("%s: issue_reboot rpc called", MOD_NAME)
self.lock.acquire()
is_reboot_ongoing = self.reboot_status_flag["active"]
self.lock.release()
# Return without issuing the reboot if the previous reboot is ongoing
if is_reboot_ongoing:
return 1, "Previous reboot is ongoing"

"""Convert input json formatted reboot request into python dict.
reboot_request is a python dict with the following keys:
method - specifies the method of reboot
delay - delay to issue reboot, key exists only if it is non-zero
message - reason for reboot
force - either true/false, key exists only if it is true
"""
try:
reboot_request = json.loads(options[0])
except ValueError:
return 1, "Failed to parse json formatted reboot request into python dict"

# Validate reboot request
err, errstr = self.validate_reboot_request(reboot_request)
if err:
return err, errstr

# Sets reboot_status_flag to be in active state
self.populate_reboot_status_flag(True, int(time.time()), reboot_request["message"])

# Issue reboot in a new thread and reset the reboot_status_flag if the reboot fails
try:
t = threading.Thread(target=self.execute_reboot, args=(reboot_request["method"],))
t.start()
except RuntimeError as error:
return 1, "Failed to start thread to execute reboot with error: " + str(error)
return 0, "Successfully issued reboot"

@host_service.method(host_service.bus_name(MOD_NAME), in_signature='', out_signature='is')
def get_reboot_status(self):
"""Returns current reboot status on host in json format"""
self.lock.acquire()
response_data = json.dumps(self.reboot_status_flag)
self.lock.release()
return 0, response_data

def register():
"""Return the class name"""
return GnoiReboot, MOD_NAME
3 changes: 2 additions & 1 deletion scripts/sonic-host-server
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import dbus.service
import dbus.mainloop.glib

from gi.repository import GObject
from host_modules import config_engine, gcu, host_service, showtech, systemd_service, file_service
from host_modules import config_engine, gcu, host_service, showtech, systemd_service, file_service, gnoi_reboot


def register_dbus():
Expand All @@ -21,6 +21,7 @@ def register_dbus():
'config': config_engine.Config('config'),
'gcu': gcu.GCU('gcu'),
'host_service': host_service.HostService('host_service'),
'gnoi_reboot': gnoi_reboot.GnoiReboot('gnoi_reboot'),
'showtech': showtech.Showtech('showtech'),
'systemd': systemd_service.SystemdService('systemd'),
'file_stat': file_service.FileService('file')
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
maintainer = 'Joe LeVeque',
maintainer_email = 'jolevequ@microsoft.com',
packages = [
'host_modules'
'host_modules',
'utils',
],
scripts = [
'scripts/caclmgrd',
Expand Down
Loading

0 comments on commit efb38de

Please sign in to comment.