Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CP-49720 Move LOCK_TYPE_RUNNING from cleanup.py to lock.py #691

Merged
merged 3 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 1 addition & 24 deletions drivers/FileSR.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,31 +378,8 @@ def _process_replay(self, data):
index += 1

def _kickGC(self):
# don't bother if an instance already running (this is just an
# optimization to reduce the overhead of forking a new process if we
# don't have to, but the process will check the lock anyways)
lockRunning = Lock(cleanup.LOCK_TYPE_RUNNING, self.uuid)
if not lockRunning.acquireNoblock():
if cleanup.should_preempt(self.session, self.uuid):
util.SMlog("Aborting currently-running coalesce of garbage VDI")
try:
if not cleanup.abort(self.uuid, soft=True):
util.SMlog("The GC has already been scheduled to "
"re-start")
except util.CommandException as e:
if e.code != errno.ETIMEDOUT:
raise
util.SMlog('failed to abort the GC')
finally:
return
else:
util.SMlog("A GC instance already running, not kicking")
return
else:
lockRunning.release()

util.SMlog("Kicking GC")
cleanup.start_gc(self.uuid)
cleanup.start_gc(self.session, self.uuid)

def _isbind(self):
# os.path.ismount can't deal with bind mount
Expand Down
23 changes: 1 addition & 22 deletions drivers/LVHDSR.py
Original file line number Diff line number Diff line change
Expand Up @@ -1289,29 +1289,8 @@ def _prepareTestMode(self):
util.SMlog("Setting env %s" % self.ENV_VAR_VHD_TEST[self.testMode])

def _kickGC(self):
# don't bother if an instance already running (this is just an
# optimization to reduce the overhead of forking a new process if we
# don't have to, but the process will check the lock anyways)
lockRunning = Lock(cleanup.LOCK_TYPE_RUNNING, self.uuid)
if not lockRunning.acquireNoblock():
if cleanup.should_preempt(self.session, self.uuid):
util.SMlog("Aborting currently-running coalesce of garbage VDI")
try:
if not cleanup.abort(self.uuid, soft=True):
util.SMlog("The GC has already been scheduled to "
"re-start")
except util.CommandException as e:
if e.code != errno.ETIMEDOUT:
raise
util.SMlog('failed to abort the GC')
else:
util.SMlog("A GC instance already running, not kicking")
return
else:
lockRunning.release()

util.SMlog("Kicking GC")
cleanup.start_gc(self.uuid)
cleanup.start_gc(self.session, self.uuid)

def ensureCBTSpace(self):
# Ensure we have space for at least one LV
Expand Down
69 changes: 44 additions & 25 deletions drivers/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,12 @@
# process "lock", used simply as an indicator that a process already exists
# that is doing GC/coalesce on this SR (such a process holds the lock, and we
# check for the fact by trying the lock).
LOCK_TYPE_RUNNING = "running"
lockRunning = None
lockGCRunning = None

# process "lock" to indicate that the GC process has been activated but may not
# yet be running, stops a second process from being started.
LOCK_TYPE_GC_ACTIVE = "gc_active"
lockActive = None
lockGCActive = None

# Default coalesce error rate limit, in messages per minute. A zero value
# disables throttling, and a negative value disables error reporting.
Expand Down Expand Up @@ -2934,7 +2933,7 @@ def abortTest():


def _gcLoop(sr, dryRun=False, immediate=False):
if not lockActive.acquireNoblock():
if not lockGCActive.acquireNoblock():
Util.log("Another GC instance already active, exiting")
return

Expand Down Expand Up @@ -2971,7 +2970,7 @@ def _gcLoop(sr, dryRun=False, immediate=False):
Util.log("No work, exiting")
break

if not lockRunning.acquireNoblock():
if not lockGCRunning.acquireNoblock():
Util.log("Unable to acquire GC running lock.")
return
try:
Expand Down Expand Up @@ -3013,15 +3012,15 @@ def _gcLoop(sr, dryRun=False, immediate=False):
continue

finally:
lockRunning.release()
lockGCRunning.release()
except:
task_status = "failure"
raise
finally:
sr.xapi.set_task_status(task_status)
Util.log("GC process exiting, no work left")
_create_init_file(sr.uuid)
lockActive.release()
lockGCActive.release()


def _xapi_enabled(session, hostref):
Expand Down Expand Up @@ -3069,19 +3068,19 @@ def _abort(srUuid, soft=False):
soft: If set to True and there is a pending abort signal, the function
doesn't do anything. If set to False, a new abort signal is issued.

returns: If soft is set to False, we return True holding lockActive. If
returns: If soft is set to False, we return True holding lockGCActive. If
soft is set to False and an abort signal is pending, we return False
without holding lockActive. An exception is raised in case of error."""
without holding lockGCActive. An exception is raised in case of error."""
Util.log("=== SR %s: abort ===" % (srUuid))
init(srUuid)
if not lockActive.acquireNoblock():
if not lockGCActive.acquireNoblock():
gotLock = False
Util.log("Aborting currently-running instance (SR %s)" % srUuid)
abortFlag = IPCFlag(srUuid)
if not abortFlag.set(FLAG_TYPE_ABORT, soft):
return False
for i in range(SR.LOCK_RETRY_ATTEMPTS):
gotLock = lockActive.acquireNoblock()
gotLock = lockGCActive.acquireNoblock()
if gotLock:
break
time.sleep(SR.LOCK_RETRY_INTERVAL)
Expand All @@ -3093,12 +3092,12 @@ def _abort(srUuid, soft=False):


def init(srUuid):
global lockRunning
if not lockRunning:
lockRunning = lock.Lock(LOCK_TYPE_RUNNING, srUuid)
global lockActive
if not lockActive:
lockActive = LockActive(srUuid)
global lockGCRunning
if not lockGCRunning:
lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid)
global lockGCActive
if not lockGCActive:
lockGCActive = LockActive(srUuid)


class LockActive:
Expand Down Expand Up @@ -3166,7 +3165,7 @@ def abort(srUuid, soft=False):
"""
if _abort(srUuid, soft):
Util.log("abort: releasing the process lock")
lockActive.release()
lockGCActive.release()
return True
else:
return False
Expand Down Expand Up @@ -3206,7 +3205,27 @@ def gc(session, srUuid, inBackground, dryRun=False):
_gc(session, srUuid, dryRun, immediate=True)


def start_gc(sr_uuid):
def start_gc(session, sr_uuid):
# don't bother if an instance already running (this is just an
# optimization to reduce the overhead of forking a new process if we
# don't have to, but the process will check the lock anyways)
lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid)
if not lockRunning.acquireNoblock():
if should_preempt(session, sr_uuid):
util.SMlog("Aborting currently-running coalesce of garbage VDI")
try:
if not abort(sr_uuid, soft=True):
util.SMlog("The GC has already been scheduled to re-start")
except util.CommandException as e:
if e.code != errno.ETIMEDOUT:
raise
util.SMlog('failed to abort the GC')
else:
util.SMlog("A GC instance already running, not kicking")
return
else:
lockRunning.release()

util.SMlog(f"Starting GC file is {__file__}")
subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
Expand All @@ -3224,7 +3243,7 @@ def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False):
Util.log("=== SR %s: gc_force ===" % srUuid)
init(srUuid)
sr = SR.getInstance(srUuid, session, lockSR, True)
if not lockActive.acquireNoblock():
if not lockGCActive.acquireNoblock():
abort(srUuid)
else:
Util.log("Nothing was running, clear to proceed")
Expand All @@ -3240,7 +3259,7 @@ def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False):
finally:
sr.cleanup()
sr.logFilter.logState()
lockActive.release()
lockGCActive.release()


def get_state(srUuid):
Expand All @@ -3249,8 +3268,8 @@ def get_state(srUuid):
locking.
"""
init(srUuid)
if lockActive.acquireNoblock():
lockActive.release()
if lockGCActive.acquireNoblock():
lockGCActive.release()
return False
return True

Expand Down Expand Up @@ -3323,9 +3342,9 @@ def abort_optional_reenable(uuid):
ret = _abort(uuid)
input("Press enter to re-enable...")
print("GC/coalesce re-enabled")
lockRunning.release()
lockGCRunning.release()
if ret:
lockActive.release()
lockGCActive.release()


##############################################################################
Expand Down
3 changes: 1 addition & 2 deletions drivers/iscsilib.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import lock
import glob
import tempfile
from cleanup import LOCK_TYPE_RUNNING
from configparser import RawConfigParser
import io

Expand Down Expand Up @@ -54,7 +53,7 @@ def doexec_locked(cmd):
"""Executes via util.doexec the command specified whilst holding lock"""
_lock = None
if os.path.basename(cmd[0]) == 'iscsiadm':
_lock = lock.Lock(LOCK_TYPE_RUNNING, 'iscsiadm')
_lock = lock.Lock(lock.LOCK_TYPE_ISCSIADM_RUNNING, 'iscsiadm')
_lock.acquire()
# util.SMlog("%s" % cmd)
(rc, stdout, stderr) = util.doexec(cmd)
Expand Down
3 changes: 3 additions & 0 deletions drivers/lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@

VERBOSE = True

# Still just called "running" for backwards compatibility
LOCK_TYPE_GC_RUNNING = "running"
LOCK_TYPE_ISCSIADM_RUNNING = "isciadm_running"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fine but we will get a slightly odd lock name of iscsiadm.iscsiadm_running as a result.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine with that. If in the future we encounter a lock named newthing.iscsiadm_running we'll know where to look for the cut & paste error...


class LockException(util.SMException):
pass
Expand Down
3 changes: 1 addition & 2 deletions drivers/resetvdis.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,10 @@

def reset_sr(session, host_uuid, sr_uuid, is_sr_master):
from vhdutil import LOCK_TYPE_SR
from cleanup import LOCK_TYPE_RUNNING

cleanup.abort(sr_uuid)

gc_lock = lock.Lock(LOCK_TYPE_RUNNING, sr_uuid)
gc_lock = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid)
sr_lock = lock.Lock(LOCK_TYPE_SR, sr_uuid)
gc_lock.acquire()
sr_lock.acquire()
Expand Down
3 changes: 3 additions & 0 deletions tests/test_FileSR.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,9 @@ def setUp(self):
lock_patcher = mock.patch('FileSR.Lock')
self.mock_lock = lock_patcher.start()

lock_patcher_cleanup = mock.patch('cleanup.lock.Lock')
self.mock_lock_cleanup = lock_patcher_cleanup.start()

xapi_patcher = mock.patch('SR.XenAPI')
self.mock_xapi = xapi_patcher.start()
self.mock_session = mock.MagicMock()
Expand Down
Loading
Loading