Skip to content

Commit

Permalink
Check on-disk bitmap status on StartVmBackup and on TransferDiskImage
Browse files Browse the repository at this point in the history
Sometimes a bitmap can become invalid/corrupt without oVirt noticing
this. For example if a hypervisor crashes, the active bitmap will become
invalid.
This means that the qcow2 volume does not contain the bitmap anymore,
but oVirt thinks the bitmap still exists because it's in the database.

This will currently cause oVirt to fail to create a NBDServer, as it
will error with 'Bitmap does not exist in ...'.

We use the QemuImageInfo to get all the on-disk bitmaps and compare
them with the bitmaps in the oVirt database. If there is some
inconsistency we remove all the bitmaps/checkpoints.

Signed-off-by: Jean-Louis Dupond <jean-louis@dupond.be>
  • Loading branch information
dupondje authored and sandrobonazzola committed Jun 10, 2024
1 parent 43726e5 commit 4ab605c
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@
import org.ovirt.engine.core.common.FeatureSupported;
import org.ovirt.engine.core.common.VdcObjectType;
import org.ovirt.engine.core.common.action.ActionParametersBase;
import org.ovirt.engine.core.common.action.ActionParametersBase.EndProcedure;
import org.ovirt.engine.core.common.action.ActionReturnValue;
import org.ovirt.engine.core.common.action.ActionType;
import org.ovirt.engine.core.common.action.DeleteAllVmCheckpointsParameters;
import org.ovirt.engine.core.common.action.LockProperties;
import org.ovirt.engine.core.common.action.VmBackupParameters;
import org.ovirt.engine.core.common.action.VolumeBitmapCommandParameters;
Expand All @@ -58,6 +60,8 @@
import org.ovirt.engine.core.common.businessentities.storage.DiskBackupMode;
import org.ovirt.engine.core.common.businessentities.storage.DiskImage;
import org.ovirt.engine.core.common.businessentities.storage.ImageStatus;
import org.ovirt.engine.core.common.businessentities.storage.Qcow2BitmapInfo;
import org.ovirt.engine.core.common.businessentities.storage.QemuImageInfo;
import org.ovirt.engine.core.common.businessentities.storage.VmBackupType;
import org.ovirt.engine.core.common.errors.EngineException;
import org.ovirt.engine.core.common.errors.EngineMessage;
Expand Down Expand Up @@ -264,6 +268,24 @@ protected void executeCommand() {
Guid vmBackupId = createVmBackup();
log.info("Created VmBackup entity '{}'", vmBackupId);

// Set a VDS to be able to gather Qemu Image Info
if (getVds() == null) {
setHostForColdBackupOperation();
}
if (getVds().isQemuImageInfoBitmaps()) {
log.info("Checking VM checkpoint '{}' for VM '{}'", vmBackup.getFromCheckpointId(), vmBackup.getVmId());
if (!validateCheckpoint(vmBackup.getFromCheckpointId())) {
addCustomValue("backupId", vmBackupId.toString());
auditLogDirector.log(this, AuditLogType.VM_INCREMENTAL_BACKUP_FAILED_FULL_VM_BACKUP_NEEDED);
setCommandStatus(CommandStatus.FAILED);
return;
}
log.info("Previous VM checkpoint '{}' for VM '{}' is valid", vmBackup.getFromCheckpointId(), vmBackup.getVmId());
} else {
log.info("Could not check VM checkpoint '{}' for VM '{}' due to missing bitmap info support in vdsm",
vmBackup.getFromCheckpointId(), vmBackup.getVmId());
}

if (isLiveBackup()) {
log.info("Redefine previous VM checkpoints for VM '{}'", vmBackup.getVmId());
if (!redefineVmCheckpoints()) {
Expand Down Expand Up @@ -368,6 +390,45 @@ public boolean performNextOperation(int completedChildCount) {
return true;
}

private boolean validateCheckpoint(Guid checkpointId) {
List<DiskImage> images = vmCheckpointDao.getDisksByCheckpointId(checkpointId);
/* Check if the checkpoint is still there on each volume/image */
for (DiskImage image : images) {
QemuImageInfo qcow2Info = imagesHandler.getQemuImageInfoFromVdsm(
getStoragePoolId(),
image.getStorageIds().get(0),
image.getId(),
image.getImageId(),
getParameters().getVdsRunningOn(),
!isLiveBackup());

boolean valid = false;
if (qcow2Info != null) {
List<Qcow2BitmapInfo> bitmaps = qcow2Info.getQcow2bitmaps();
if (bitmaps != null) {
valid = bitmaps.stream().anyMatch(bitmap -> bitmap.getName().equals(checkpointId));
}
}
/* Bitmap did not exist on disk -> Remove checkpoints */
if (!valid) {
log.error("Checkpoint '{}' does not exist for disk '{}'. Removing checkpoints",
checkpointId,
image.getId());
/* Some checkpoint corruption, remove checkpoints */
DeleteAllVmCheckpointsParameters deleteAllVmCheckpointsParameters =
new DeleteAllVmCheckpointsParameters(getVmId(), List.of(image));
deleteAllVmCheckpointsParameters.setParentCommand(getActionType());
deleteAllVmCheckpointsParameters.setParentParameters(getParameters());
deleteAllVmCheckpointsParameters.setEndProcedure(EndProcedure.COMMAND_MANAGED);
deleteAllVmCheckpointsParameters.setForce(true);

runInternalAction(ActionType.DeleteAllVmCheckpoints, deleteAllVmCheckpointsParameters);
return false;
}
}
return true;
}

private boolean redefineVmCheckpoints() {
VmBackupParameters parameters = new VmBackupParameters(getParameters().getVmBackup());
parameters.setParentCommand(getActionType());
Expand All @@ -380,8 +441,6 @@ private boolean redefineVmCheckpoints() {
}

private boolean startAddBitmapJobs() {
setHostForColdBackupOperation();

VmBackup vmBackup = getParameters().getVmBackup();
if (getParameters().getVdsRunningOn() == null) {
log.error("Failed to find host to run cold backup operation for VM '{}'", vmBackup.getVmId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@
import org.ovirt.engine.core.common.AuditLogType;
import org.ovirt.engine.core.common.VdcObjectType;
import org.ovirt.engine.core.common.action.ActionParametersBase;
import org.ovirt.engine.core.common.action.ActionParametersBase.EndProcedure;
import org.ovirt.engine.core.common.action.ActionReturnValue;
import org.ovirt.engine.core.common.action.ActionType;
import org.ovirt.engine.core.common.action.AddDiskParameters;
import org.ovirt.engine.core.common.action.DeleteAllVmCheckpointsParameters;
import org.ovirt.engine.core.common.action.LockProperties;
import org.ovirt.engine.core.common.action.RemoveDiskParameters;
import org.ovirt.engine.core.common.action.TransferDiskImageParameters;
Expand All @@ -54,6 +56,8 @@
import org.ovirt.engine.core.common.businessentities.storage.ImageTransfer;
import org.ovirt.engine.core.common.businessentities.storage.ImageTransferBackend;
import org.ovirt.engine.core.common.businessentities.storage.ImageTransferPhase;
import org.ovirt.engine.core.common.businessentities.storage.Qcow2BitmapInfo;
import org.ovirt.engine.core.common.businessentities.storage.QemuImageInfo;
import org.ovirt.engine.core.common.businessentities.storage.TimeoutPolicyType;
import org.ovirt.engine.core.common.businessentities.storage.TransferType;
import org.ovirt.engine.core.common.businessentities.storage.VmBackupType;
Expand Down Expand Up @@ -259,7 +263,46 @@ private PrepareImageVDSCommandParameters getPrepareParameters(Guid vdsId) {
getDiskImage().getImageId(), true);
}

private boolean validateBitmap(DiskImage image, Guid checkpointId) {
QemuImageInfo qcow2Info = imagesHandler.getQemuImageInfoFromVdsm(
getStoragePoolId(),
image.getStorageIds().get(0),
image.getId(),
image.getImageId(),
getVdsId(),
!isLiveBackup());

boolean valid = false;
if (qcow2Info != null) {
List<Qcow2BitmapInfo> bitmaps = qcow2Info.getQcow2bitmaps();
if (bitmaps != null) {
valid = bitmaps.stream().anyMatch(bitmap -> bitmap.getName().equals(checkpointId));
}
}
/* Bitmap did not exist on disk -> Remove checkpoints */
if (!valid) {
log.error("Checkpoint '{}' does not exist for disk '{}'. Removing checkpoints",
checkpointId,
image.getId());
/* Some checkpoint corruption, remove checkpoints */
DeleteAllVmCheckpointsParameters deleteAllVmCheckpointsParameters =
new DeleteAllVmCheckpointsParameters(getVmId(), List.of(image));
deleteAllVmCheckpointsParameters.setParentCommand(getActionType());
deleteAllVmCheckpointsParameters.setParentParameters(getParameters());
deleteAllVmCheckpointsParameters.setEndProcedure(EndProcedure.COMMAND_MANAGED);
deleteAllVmCheckpointsParameters.setForce(true);

runInternalAction(ActionType.DeleteAllVmCheckpoints, deleteAllVmCheckpointsParameters);
return false;
}
return true;
}

private Guid getBitmap() {
if (!validateBitmap(getDiskImage(), getBackup().getFromCheckpointId())) {
return null;
}

if (isHybridBackup() && getDiskImage().getBackupMode() == DiskBackupMode.Incremental) {
return getBackup().getFromCheckpointId();
}
Expand Down

0 comments on commit 4ab605c

Please sign in to comment.