Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sonic-host-services changes for gNOI Cold Reboot #181

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 152 additions & 0 deletions host_modules/reboot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""reboot module which performs reboot"""

import json
import logging
import threading
import time
from host_modules import host_service
from utils.run_cmd import _run_command

MOD_NAME = 'reboot'
# Reboot method in reboot request
# Both enum and string representations are supported
REBOOTMETHOD_COLD_BOOT_VALUES = {1, "COLD"}
REBOOTMETHOD_WARM_BOOT_VALUES = {4, "WARM"}
REBOOTMETHOD_NSF_VALUES = {5, "NSF"}

# Timeout for SONiC Host Service to be killed during reboot
REBOOT_TIMEOUT = 260

EXECUTE_COLD_REBOOT_COMMAND = "sudo reboot"
EXECUTE_NSF_REBOOT_COMMAND = "/etc/init.d/gpins-nsf-boot nsf-reboot"

logger = logging.getLogger(__name__)


class Reboot(host_service.HostModule):
"""DBus endpoint that executes the reboot and returns the reboot status
"""

def __init__(self, mod_name):
"""Use threading.lock mechanism to read/write into response_data
since response_data can be read/write by multiple threads"""
self.lock = threading.Lock()
# reboot_status_flag is used to keep track of reboot status on host
self.reboot_status_flag = {}
# Populating with default value i.e., no active reboot
self.populate_reboot_status_flag()
super(Reboot, self).__init__(mod_name)

def populate_reboot_status_flag(self, active = False, when = 0, reason = ""):
"""Populates the reboot_status_flag with given input params"""
self.lock.acquire()
self.reboot_status_flag["active"] = active
self.reboot_status_flag["when"] = when
self.reboot_status_flag["reason"] = reason
self.lock.release()
return

def validate_reboot_request(self, reboot_request):
# Check whether reboot method is present.
if "method" not in reboot_request:
return 1, "Reboot request must contain a reboot method"

# Check whether reboot method is valid.
rebootmethod = reboot_request["method"]
valid_method = False
for values in [REBOOTMETHOD_COLD_BOOT_VALUES, REBOOTMETHOD_NSF_VALUES]:
if rebootmethod in values:
valid_method = True
if not valid_method:
return 1, "Invalid reboot method: " + str(rebootmethod)

# Check whether delay is non-zero. delay key will not exist in reboot_request if it is zero
if "delay" in reboot_request and reboot_request["delay"] != 0:
return 1, "Delayed reboot is not supported"
return 0, ""

def execute_reboot(self, rebootmethod):
"""Execute reboot and reset reboot_status_flag when reboot fails"""

if rebootmethod in REBOOTMETHOD_COLD_BOOT_VALUES:
command = EXECUTE_COLD_REBOOT_COMMAND
logger.warning("%s: Issuing cold reboot", MOD_NAME)
elif rebootmethod in REBOOTMETHOD_NSF_VALUES:
command = EXECUTE_NSF_REBOOT_COMMAND
logger.warning("%s: Issuing NSF reboot", MOD_NAME)
else:
logger.error("%s: Invalid reboot method: %d", MOD_NAME, rebootmethod)
return

rc, stdout, stderr = _run_command(command)
if rc:
self.populate_reboot_status_flag()
logger.error("%s: Reboot failed execution with stdout: %s, "
"stderr: %s", MOD_NAME, stdout, stderr)
return

"""Wait for 260 seconds for the reboot to complete. Here, we expect that SONiC Host Service
will be killed during this waiting period if the reboot is successful. If this module
is still alive after the below waiting period, we can conclude that the reboot has failed.
Each container can take up to 20 seconds to get killed. In total, there are 10 containers,
and adding a buffer of 1 minute brings up the delay value to be 260 seconds."""
time.sleep(REBOOT_TIMEOUT)
# Conclude that the reboot has failed if we reach this point
self.populate_reboot_status_flag()
return

@host_service.method(host_service.bus_name(MOD_NAME), in_signature='as', out_signature='is')
def issue_reboot(self, options):
"""Issues reboot after performing the following steps sequentially:
1. Checks that reboot_status_flag is not set
2. Validates the reboot request
3. Sets the reboot_status_flag
4. Issues the reboot in a separate thread
"""
logger.warning("%s: issue_reboot rpc called", MOD_NAME)
self.lock.acquire()
is_reboot_ongoing = self.reboot_status_flag["active"]
self.lock.release()
# Return without issuing the reboot if the previous reboot is ongoing
if is_reboot_ongoing:
return 1, "Previous reboot is ongoing"

"""Convert input json formatted reboot request into python dict.
reboot_request is a python dict with the following keys:
method - specifies the method of reboot
delay - delay to issue reboot, key exists only if it is non-zero
message - reason for reboot
force - either true/false, key exists only if it is true
"""
try:
reboot_request = json.loads(options[0])
except ValueError:
return 1, "Failed to parse json formatted reboot request into python dict"

# Validate reboot request
err, errstr = self.validate_reboot_request(reboot_request)
if err:
return err, errstr

# Sets reboot_status_flag to be in active state
self.populate_reboot_status_flag(True, int(time.time()), reboot_request["message"])

# Issue reboot in a new thread and reset the reboot_status_flag if the reboot fails
try:
t = threading.Thread(target=self.execute_reboot, args=(reboot_request["method"],))
t.start()
except RuntimeError as error:
return 1, "Failed to start thread to execute reboot with error: " + str(error)
return 0, "Successfully issued reboot"

@host_service.method(host_service.bus_name(MOD_NAME), in_signature='', out_signature='is')
def get_reboot_status(self):
"""Returns current reboot status on host in json format"""
self.lock.acquire()
response_data = json.dumps(self.reboot_status_flag)
self.lock.release()
return 0, response_data

def register():
"""Return the class name"""
return Reboot, MOD_NAME
3 changes: 2 additions & 1 deletion scripts/sonic-host-server
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import dbus.service
import dbus.mainloop.glib

from gi.repository import GObject
from host_modules import config_engine, gcu, host_service, showtech, systemd_service, file_service
from host_modules import config_engine, gcu, host_service, showtech, systemd_service, file_service, reboot


def register_dbus():
Expand All @@ -21,6 +21,7 @@ def register_dbus():
'config': config_engine.Config('config'),
'gcu': gcu.GCU('gcu'),
'host_service': host_service.HostService('host_service'),
'reboot': reboot.Reboot('reboot'),
'showtech': showtech.Showtech('showtech'),
'systemd': systemd_service.SystemdService('systemd'),
'file_stat': file_service.FileService('file')
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
maintainer = 'Joe LeVeque',
maintainer_email = 'jolevequ@microsoft.com',
packages = [
'host_modules'
'host_modules',
'utils',
],
scripts = [
'scripts/caclmgrd',
Expand Down
Loading
Loading