Skip to content

Commit

Permalink
core: handle absent of lease SD when starting ha vm
Browse files Browse the repository at this point in the history
This patch looks like a hack but as we don't have enough time
to do that more cleanly for 4.5.3, we go this way in order to
address a significant issue with disaster recovery:
When registering an highly avilable VM on the secondary site
and trying to start it, the latter operation may fail due to
filtering hosts that didn't report a proper state for the
storage domain on which the VM lease was created.

The proposed solution is to add the VM to HaAutoStartVmsRunner
so we would keep trying to start it in that case. Specifically,
when we fail to find a host due to the filterng that is done
by VmLeasesReadyFilterPolicyUnit on an attempt to start the
VM by a request we get from outside (i.e., not internal
execution), we switch the VM to Down state with exit reason
that reflects an Error and then add it to the aforementioned
runner service.

Bug-Url: https://bugzilla.redhat.com/1974535
Signed-off-by: Arik Hadas <ahadas@redhat.com>
  • Loading branch information
ahadas committed Sep 20, 2022
1 parent b758f52 commit 28bca2b
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import org.ovirt.engine.core.common.businessentities.VdsNumaNode;
import org.ovirt.engine.core.common.businessentities.VmDevice;
import org.ovirt.engine.core.common.businessentities.VmDeviceGeneralType;
import org.ovirt.engine.core.common.businessentities.VmExitStatus;
import org.ovirt.engine.core.common.businessentities.VmNumaNode;
import org.ovirt.engine.core.common.businessentities.VmPayload;
import org.ovirt.engine.core.common.businessentities.VmPool;
Expand Down Expand Up @@ -169,6 +170,8 @@ public class RunVmCommand<T extends RunVmParams> extends RunVmCommandBase<T>
private StorageDomainDao storageDomainDao;
@Inject
private ManagedBlockStorageCommandUtil managedBlockStorageCommandUtil;
@Inject
private HaAutoStartVmsRunner haAutoStartVmsRunner;

protected RunVmCommand(Guid commandId) {
super(commandId);
Expand Down Expand Up @@ -1084,7 +1087,14 @@ protected boolean validateImpl() {
return false;
}

getVmManager().setFailedSchedulingDueToLeaseSd(false);
if (!runVmValidator.canScheduleVm(getRunVdssList(), getVdsWhiteList(), getCluster())) {
if (!isInternalExecution() && getVmManager().isFailedSchedulingDueToLeaseSd()) {
getVm().setStatus(VMStatus.Down);
getVm().setExitStatus(VmExitStatus.Error);
getVmManager().update(getVm().getDynamicData());
haAutoStartVmsRunner.addVmsToRun(Collections.singletonList(getVmId()));
}
return false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ private boolean canRunPoolVm(VM vm, List<String> messages) {
vmHandler.updateNetworkInterfacesFromDb(vm);

RunVmParams runVmParams = new RunVmParams(vm.getId());
Guid activeIsoDomainId = isoDomainListSynchronizer.findActiveISODomain(storagePoolId);
Guid activeIsoDomainId = isoDomainListSynchronizer.findActiveISODomain(vm.getStoragePoolId());

RunVmValidator validator = new RunVmValidator(vm, runVmParams, false, activeIsoDomainId, messages);
validator = Injector.injectMembers(validator);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public List<VDS> filter(SchedulingContext context, List<VDS> hosts, VM vm, PerHo
return hosts;
}

return hosts.stream()
List<VDS> filteredHosts = hosts.stream()
.filter(vds -> {
ArrayList<VDSDomainsData> domainsData = resourceManager.getVdsManager(vds.getId()).getDomains();
if (!isVmLeaseReadyForHost(domainsData, vm, vds.getName())) {
Expand All @@ -56,6 +56,12 @@ public List<VDS> filter(SchedulingContext context, List<VDS> hosts, VM vm, PerHo
}
return true;
}).collect(Collectors.toList());

if (filteredHosts.isEmpty() && !hosts.isEmpty()) {
resourceManager.getVmManager(vm.getId()).setFailedSchedulingDueToLeaseSd(true);
}

return filteredHosts;
}

private boolean isVmLeaseReadyForHost(ArrayList<VDSDomainsData> domainsData, VM vm, String vdsName) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
import org.ovirt.engine.core.utils.MockConfigDescriptor;
import org.ovirt.engine.core.utils.MockConfigExtension;
import org.ovirt.engine.core.utils.MockedConfig;
import org.ovirt.engine.core.vdsbroker.VmManager;

@ExtendWith({MockitoExtension.class, MockConfigExtension.class})
@MockitoSettings(strictness = Strictness.LENIENT)
Expand Down Expand Up @@ -118,6 +119,9 @@ public class RunVmCommandTest extends BaseCommandTest {
@InjectMocks
VmHandler vmHandler;

@Mock
private VmManager vmManager;

private static final String ACTIVE_ISO_PREFIX =
"/rhev/data-center/mnt/some_computer/f6bccab4-e2f5-4e02-bba0-5748a7bc07b6/images/11111111-1111-1111-1111-111111111111";
private static final String INACTIVE_ISO_PREFIX = "";
Expand Down Expand Up @@ -334,6 +338,7 @@ public void setUp() {
mockSuccessfulRunVmValidator();
doNothing().when(command).initParametersForExternalNetworks(null, false);
doReturn(Collections.emptyMap()).when(command).flushPassthroughVnicToVfMap();
doReturn(vmManager).when(command).getVmManager();
mockBackend();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.mockito.junit.jupiter.MockitoSettings;
import org.mockito.quality.Strictness;
import org.ovirt.engine.core.bll.scheduling.SchedulingContext;
import org.ovirt.engine.core.common.businessentities.Cluster;
import org.ovirt.engine.core.common.businessentities.VDS;
Expand All @@ -24,8 +26,10 @@
import org.ovirt.engine.core.compat.Guid;
import org.ovirt.engine.core.vdsbroker.ResourceManager;
import org.ovirt.engine.core.vdsbroker.VdsManager;
import org.ovirt.engine.core.vdsbroker.VmManager;

@ExtendWith(MockitoExtension.class)
@MockitoSettings(strictness = Strictness.LENIENT)
class VmLeasesReadyFilterPolicyUnitTest {

@Mock
Expand All @@ -37,6 +41,9 @@ class VmLeasesReadyFilterPolicyUnitTest {
@Mock
VdsManager host2VdsManager;

@Mock
VmManager vmManager;

@InjectMocks
VmLeasesReadyFilterPolicyUnit unit = new VmLeasesReadyFilterPolicyUnit(null, null);

Expand Down Expand Up @@ -174,6 +181,7 @@ private ArrayList<VDSDomainsData> prepareDomainsData(boolean isSanlockInitialize
private void setUpMocks() {
doReturn(host1VdsManager).when(resourceManager).getVdsManager(host1.getId());
doReturn(host2VdsManager).when(resourceManager).getVdsManager(host2.getId());
doReturn(vmManager).when(resourceManager).getVmManager(vm.getId());
doReturn(host1.getDomains()).when(host1VdsManager).getDomains();
doReturn(host2.getDomains()).when(host2VdsManager).getDomains();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ public class VmManager {
/** Locks the VM devices for changes of their dynamic properties (addresses, plugged/unplugged) */
private final Lock vmDevicesLock;

private boolean failedSchedulingDueToLeaseSd;

private Long vmDataChangedTime;
/** how long to wait for a response for power-off operation, in nanoseconds */
private long powerOffTimeout;
Expand Down Expand Up @@ -365,6 +367,14 @@ public void setLastStatusBeforeMigration(VMStatus lastStatusBeforeMigration) {
this.lastStatusBeforeMigration = lastStatusBeforeMigration;
}

public boolean isFailedSchedulingDueToLeaseSd() {
return failedSchedulingDueToLeaseSd;
}

public void setFailedSchedulingDueToLeaseSd(boolean failedSchedulingDueToLeaseSd) {
this.failedSchedulingDueToLeaseSd = failedSchedulingDueToLeaseSd;
}

public boolean isDeviceBeingHotUnlugged(Guid deviceId) {
synchronized (devicesBeingHotUnplugged) {
return devicesBeingHotUnplugged.contains(deviceId);
Expand Down

0 comments on commit 28bca2b

Please sign in to comment.