Skip to content

Commit

Permalink
[main.py]: Implement a config process level lock.
Browse files Browse the repository at this point in the history
 Changes:
 1.) Implement a class, which uses hsetnx for lock.
 2.) lock is expired within timeout period or will be released by config python click process.
 3.) After -y prompt, lock is reacquired, because timer could have expired,
     before user enters yes.
  • Loading branch information
Praveen Chaudhary committed May 4, 2021
1 parent 9492eab commit 63338bf
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 1 deletion.
109 changes: 109 additions & 0 deletions config/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,8 @@ def breakout_warnUser_extraTables(cm, final_delPorts, confirm=True):
click.secho("Below Config can not be verified, It may cause harm "\
"to the system\n {}".format(json.dumps(tables, indent=2)))
click.confirm('Do you wish to Continue?', abort=True)
# Reacquire lock, if confirmation is needed for this command
cdblock.reacquireLock()
except Exception as e:
raise Exception("Failed in breakout_warnUser_extraTables. Error: {}".format(str(e)))
return
Expand Down Expand Up @@ -647,6 +649,8 @@ def _get_sonic_generated_services(num_asic):
def _abort_if_false(ctx, param, value):
if not value:
ctx.abort()
# Reacquire lock, if confirmation is needed for this command
cdblock.reacquireLock()


def _get_disabled_services_list(config_db):
Expand Down Expand Up @@ -837,6 +841,103 @@ def cache_arp_entries():
open(restore_flag_file, 'w').close()
return success

# class for locking entire config process
class ConfigDbLock():
def __init__(self):
self.lockName = "LOCK|configDbLock"
self.timeout = 10
self.pid = os.getpid()
self.client = None
return

def acquireLock(self):
try:
# connect to db
db_kwargs = dict()
configdb = ConfigDBConnector()
configdb.connect()

self.client = configdb.get_redis_client('CONFIG_DB')
# Set lock and expire time. Process may get killed b/w set lock and
# expire call.
if self.client.hsetnx(self.lockName, "PID", self.pid):
self.client.expire(self.lockName, self.timeout)
log.log_debug(":::Lock Acquired:::")
# if lock exists but expire timer not running, run expire time and
# abort.
elif self.client.ttl(self.lockName) == -1:
self.client.expire(self.lockName, self.timeout)
click.echo(":::Can not acquire lock, Reset Timer & Abort:::");
sys.exit(1)
else:
click.echo(":::Can not acquire lock, Abort:::");
sys.exit(1)
except Exception as e:
click.echo(":::Exception: acquireLock {}:::".format(e))
sys.exit(1)
return

def reacquireLock(self):
try:
# Try to set lock first
if self.client.hsetnx(self.lockName, "PID", self.pid):
self.client.expire(self.lockName, self.timeout)
log.log_debug(":::Lock Reacquired:::")
# if lock exists, check who owns it
else:
p = self.client.pipeline(True)
# watch, we do not want to work on modified lock
p.watch(self.lockName)
# if current process holding then extend the timer
if p.hget(self.lockName, "PID") == str(self.pid):
self.client.expire(self.lockName, self.timeout)
log.log_debug(":::Lock Timer Extended:::");
p.unwatch()
return
else:
# some other process is holding the lock.
click.echo(":::Can not acquire lock LOCK PID: {} and self.pid:{}:::".\
format(p.hget(self.lockName, "PID"), self.pid))
p.unwatch()
sys.exit(1)

except Exception as e:
click.echo(":::Exception: reacquireLock {}:::".format(e))
sys.exit(1)
return

def _releaseLock(self):
try:
"""
If LOCK was never acquired, self.client should be None. This
happens with 'config ?' command
"""
if self.client == None:
return

p = self.client.pipeline(True)
# watch, we do not want to work on modified lock
p.watch(self.lockName)
# if current process holding the lock then release it.
if p.hget(self.lockName, "PID") == str(self.pid):
p.multi()
p.delete(self.lockName)
p.execute()
return
else:
# some other process is holding the lock. Do nothing.
pass
p.unwatch()
except Exception as e:
raise e
return

def __del__(self):
self._releaseLock()
return
# end of class configdblock

cdblock = ConfigDbLock()
# This is our main entrypoint - the main 'config' command
@click.group(cls=clicommon.AbbreviationGroup, context_settings=CONTEXT_SETTINGS)
@click.pass_context
Expand All @@ -863,6 +964,10 @@ def config(ctx):
if os.geteuid() != 0:
exit("Root privileges are required for this operation")

# Take lock only when config command is executed, to avoid taking lock for
# TABs and for -h. Note ? is treated as input python clicks
cdblock.acquireLock()

ctx.obj = Db()


Expand Down Expand Up @@ -943,6 +1048,8 @@ def load(filename, yes):

if not yes:
click.confirm(message, abort=True)
# Reacquire lock, if confirmation is needed for this command
cdblock.reacquireLock()

num_asic = multi_asic.get_num_asics()
cfg_files = []
Expand Down Expand Up @@ -1132,6 +1239,8 @@ def reload(db, filename, yes, load_sysinfo, no_service_restart, disable_arp_cach

if not yes:
click.confirm(message, abort=True)
# Reacquire lock, if confirmation is needed for this command
cdblock.reacquireLock()

log.log_info("'reload' executing...")

Expand Down
10 changes: 9 additions & 1 deletion config/muxcable.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,8 @@ def state(state, port):

if port is not None and port != "all":
click.confirm(('Muxcable at port {} will be changed to {} state. Continue?'.format(port, state)), abort=True)
# Reacquire lock, if confirmation is needed for this command
cdblock.reacquireLock()
logical_port_list = platform_sfputil_helper.get_logical_list()
if port not in logical_port_list:
click.echo("ERR: This is not a valid port, valid ports ({})".format(", ".join(logical_port_list)))
Expand Down Expand Up @@ -344,7 +346,7 @@ def state(state, port):

logical_port_list_per_port = logical_port_list_for_physical_port.get(physical_port, None)

""" This check is required for checking whether or not this logical port is the one which is
""" This check is required for checking whether or not this logical port is the one which is
actually mapped to physical port and by convention it is always the first port.
TODO: this should be removed with more logic to check which logical port maps to actual physical port
being used"""
Expand Down Expand Up @@ -384,6 +386,8 @@ def state(state, port):
elif port == "all" and port is not None:

click.confirm(('Muxcables at all ports will be changed to {} state. Continue?'.format(state)), abort=True)
# Reacquire lock, if confirmation is needed for this command
cdblock.reacquireLock()
logical_port_list = platform_sfputil_helper.get_logical_list()

rc = True
Expand Down Expand Up @@ -491,6 +495,8 @@ def setswitchmode(state, port):

if port is not None and port != "all":
click.confirm(('Muxcable at port {} will be changed to {} switching mode. Continue?'.format(port, state)), abort=True)
# Reacquire lock, if confirmation is needed for this command
cdblock.reacquireLock()
logical_port_list = platform_sfputil_helper.get_logical_list()
if port not in logical_port_list:
click.echo("ERR: This is not a valid port, valid ports ({})".format(", ".join(logical_port_list)))
Expand Down Expand Up @@ -563,6 +569,8 @@ def setswitchmode(state, port):
elif port == "all" and port is not None:

click.confirm(('Muxcable at port {} will be changed to {} switching mode. Continue?'.format(port, state)), abort=True)
# Reacquire lock, if confirmation is needed for this command
cdblock.reacquireLock()
logical_port_list = platform_sfputil_helper.get_logical_list()

rc = True
Expand Down

0 comments on commit 63338bf

Please sign in to comment.