Skip to content

Commit

Permalink
core: LSM: fix locking on restart
Browse files Browse the repository at this point in the history
This patch fixes an issue when the engine is restarted during LSM,
the command gets stuck.

The cause of this issue was that EngineLocks were passed to
LiveMigrateDiskCommand, and from there to CreateSnapshotCommand in
the executeCommand() stage. There the locks are released, and
reacquired later on before snapshot remove phase. If the engine was
restarted during CreateSnapshot phase - both the CreateSnapshot and
LiveMigrateDisk commands resumed, running reacquireLocks, and then
before the Snapshot Remove phase, LiveMigrateDiskCommand attempted
to reacquire locks again, getting stuck.

In this patch we override reacquireLocks method in LiveMigrateDisk
command, doing nothing, and then the locks on Disk and VM are
acquired separately before the snapshot remove phase

Bug-Url: https://bugzilla.redhat.com/2110186
  • Loading branch information
mkemel committed Sep 22, 2022
1 parent 89a8939 commit 4898813
Showing 1 changed file with 22 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
import org.ovirt.engine.core.dao.StorageDomainDao;
import org.ovirt.engine.core.dao.StorageDomainStaticDao;
import org.ovirt.engine.core.dao.VmDao;
import org.ovirt.engine.core.utils.lock.EngineLock;
import org.ovirt.engine.core.utils.transaction.TransactionSupport;
import org.ovirt.engine.core.vdsbroker.ResourceManager;
import org.ovirt.engine.core.vdsbroker.builder.vminfo.VmInfoBuildUtils;
Expand Down Expand Up @@ -282,6 +283,8 @@ public boolean performNextOperation(int completedChildCount) {
return true;
}

EngineLock removeSnapshotLock = createEngineLockForSnapshotRemove();

if (getParameters().getLiveDiskMigrateStage() == LiveDiskMigrateStage.LIVE_MIGRATE_DISK_EXEC_COMPLETED) {
// This lock is required to prevent CreateSnapshotForVmCommand from
// running at the same time as RemoveSnapshotCommand
Expand All @@ -290,7 +293,7 @@ public boolean performNextOperation(int completedChildCount) {
// at the end of CreateSnapshotForVm command called from the executeCommand() method.
// Here the lock is acquired again and will be released when this command (LiveMigrateDisk)
// finishes.
if (!lockManager.acquireLock(getLock()).isAcquired()) {
if (!lockManager.acquireLock(removeSnapshotLock).isAcquired()) {
log.info("Failed to acquire VM lock, will retry on the next polling cycle");
return true;
}
Expand All @@ -304,6 +307,8 @@ public boolean performNextOperation(int completedChildCount) {
return true;
}

lockManager.releaseLock(removeSnapshotLock);

return false;
}

Expand Down Expand Up @@ -726,17 +731,31 @@ public AuditLogType getAuditLogTypeValue() {

@Override
protected Map<String, Pair<String, String>> getExclusiveLocks() {
return Collections.emptyMap();
}

@Override
protected Map<String, Pair<String, String>> getSharedLocks() {
return Collections.emptyMap();
}

private Map<String, Pair<String, String>> getExclusiveLocksForSnapshotRemove() {
return Collections.singletonMap(getParameters().getImageGroupID().toString(),
LockMessagesMatchUtil.makeLockingPair(LockingGroup.DISK,
getDiskIsBeingMigratedMessage(getDiskImageByDiskId(getParameters().getImageGroupID()))));
}

@Override
protected Map<String, Pair<String, String>> getSharedLocks() {
private Map<String, Pair<String, String>> getSharedLocksForSnapshotRemove() {
return Collections.singletonMap(getVmId().toString(),
LockMessagesMatchUtil.makeLockingPair(LockingGroup.VM, EngineMessage.ACTION_TYPE_FAILED_OBJECT_LOCKED));
}

private EngineLock createEngineLockForSnapshotRemove() {
return new EngineLock(
getExclusiveLocksForSnapshotRemove(),
getSharedLocksForSnapshotRemove());
}

private String getDiskIsBeingMigratedMessage(Disk disk) {
return EngineMessage.ACTION_TYPE_FAILED_DISK_IS_BEING_MIGRATED.name()
+ String.format("$DiskName %1$s", disk != null ? disk.getDiskAlias() : "");
Expand Down

0 comments on commit 4898813

Please sign in to comment.