Skip to content

Commit

Permalink
core: RemoveImage: handle failure in VDS command
Browse files Browse the repository at this point in the history
RemoveImageCommand used to call for DeleteImageGroup VDS command, and
perform the required DB operations without polling on the Async Task.

One issue that it led to is when disk is configured with "Wipe on
Delete", and the wipe action took more than 50 hours (default max
async task polling time) - the task was interrupted, the image is not
deleted in VDSM, but removed from DB, not having any tracking for them
in the engine.

In this patch we add async task polling to RemoveImageCommand and
handle the issue with interrupted wipe on delete.

Bug-Url: https://bugzilla.redhat.com/1836318
  • Loading branch information
mkemel committed Sep 15, 2022
1 parent 5cdce04 commit 0f21548
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@
import java.util.List;
import java.util.Set;

import javax.enterprise.inject.Instance;
import javax.enterprise.inject.Typed;
import javax.inject.Inject;

import org.ovirt.engine.core.bll.InternalCommandAttribute;
import org.ovirt.engine.core.bll.NonTransactiveCommandAttribute;
import org.ovirt.engine.core.bll.context.CommandContext;
import org.ovirt.engine.core.bll.storage.domain.PostDeleteActionHandler;
import org.ovirt.engine.core.bll.tasks.interfaces.CommandCallback;
import org.ovirt.engine.core.common.VdcObjectType;
import org.ovirt.engine.core.common.action.ActionType;
import org.ovirt.engine.core.common.action.RemoveImageParameters;
Expand Down Expand Up @@ -79,6 +82,11 @@ public class RemoveImageCommand<T extends RemoveImageParameters> extends BaseIma
@Inject
private VmDao vmDao;

@Inject
@Typed(RemoveImageCommandCallback.class)
private Instance<RemoveImageCommandCallback> callbackProvider;


public RemoveImageCommand(T parameters, CommandContext cmdContext) {
super(parameters, cmdContext);
}
Expand Down Expand Up @@ -120,12 +128,13 @@ protected void executeCommand() {
Guid taskId = persistAsyncTaskPlaceHolder(getParameters().getParentCommand());

VDSReturnValue vdsReturnValue = performDeleteImageVdsmOperation();
getTaskIdList().add(
createTask(taskId,
vdsReturnValue.getCreationInfo(),
getParameters().getParentCommand(),
VdcObjectType.Storage,
getStorageDomainId()));
Guid vdsmTaskId = createTask(taskId,
vdsReturnValue.getCreationInfo(),
getParameters().getParentCommand(),
VdcObjectType.Storage,
getStorageDomainId());
getTaskIdList().add(vdsmTaskId);
getReturnValue().getVdsmTaskIdList().add(vdsmTaskId);
} catch (EngineException e) {
if (e.getErrorCode() == EngineError.ImageDoesNotExistInDomainError) {
log.info("Disk '{}' doesn't exist on storage domain '{}', rolling forward",
Expand All @@ -140,10 +149,6 @@ protected void executeCommand() {
throw e;
}
}

if (!ACTIONS_NOT_REQUIRED_DB_OPERATION.contains(getParameters().getParentCommand())) {
performImageDbOperations();
}
} else {
log.warn("DiskImage is null, nothing to remove");
}
Expand Down Expand Up @@ -295,6 +300,9 @@ private void removeImageMapping() {
}

private void performImageDbOperations() {
if (ACTIONS_NOT_REQUIRED_DB_OPERATION.contains(getParameters().getParentCommand())) {
return;
}
switch (getParameters().getDbOperationScope()) {
case IMAGE:
removeImageFromDB();
Expand Down Expand Up @@ -323,4 +331,24 @@ protected VDSReturnValue performDeleteImageVdsmOperation() {
getDiskImage().isWipeAfterDelete(), getStorageDomain().getDiscardAfterDelete(),
getParameters().isForceDelete())));
}

@Override
public CommandCallback getCallback() {
return getParameters().getParentCommand() == ActionType.RemoveDisk ?
callbackProvider.get() :
null;
}

public void onSucceeded() {
performImageDbOperations();
}

public void onFailed() {
imageDao.updateStatusOfImagesByImageGroupId(getImageGroupId(), ImageStatus.ILLEGAL);
}

@Override
protected void setSucceeded(boolean value) {
super.setSucceeded(value);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package org.ovirt.engine.core.bll.storage.disk.image;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.enterprise.inject.Typed;
import javax.inject.Inject;

import org.ovirt.engine.core.bll.tasks.CommandCoordinatorUtil;
import org.ovirt.engine.core.bll.tasks.interfaces.CommandCallback;
import org.ovirt.engine.core.common.action.RemoveImageParameters;
import org.ovirt.engine.core.common.businessentities.AsyncTaskStatus;
import org.ovirt.engine.core.compat.CommandStatus;
import org.ovirt.engine.core.compat.Guid;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Typed(RemoveImageCommandCallback.class)
public class RemoveImageCommandCallback implements CommandCallback {
private static final Logger log = LoggerFactory.getLogger(RemoveImageCommandCallback.class);

@Inject
private CommandCoordinatorUtil commandCoordinatorUtil;

@Override
public void doPolling(Guid cmdId, List<Guid> childCmdIds) {
RemoveImageCommand<RemoveImageParameters> command = getRemoveImageCommand(cmdId);
Set<Guid> taskIds = new HashSet<>(command.getReturnValue().getVdsmTaskIdList());
Map<Guid, AsyncTaskStatus> idToTaskStatusMap = commandCoordinatorUtil.pollTasks(taskIds);
for (Map.Entry<Guid, AsyncTaskStatus> idToTaskStatus : idToTaskStatusMap.entrySet()) {
Guid id = idToTaskStatus.getKey();
AsyncTaskStatus status = idToTaskStatus.getValue();
if (status.getTaskIsRunning()) {
log.info("Waiting on remove image command to complete the task (taskId = '{}')", id);
return;
}
}

List<AsyncTaskStatus> failedTasks = new ArrayList<>();
idToTaskStatusMap.forEach((id, status) -> {
if (!status.getTaskEndedSuccessfully()) {
failedTasks.add(status);
}
});
if (failedTasks.isEmpty()) {
command.setSucceeded(true);
command.setCommandStatus(CommandStatus.SUCCEEDED);
command.persistCommand(command.getParameters().getParentCommand());
log.info("Remove image command has completed successfully for image '{}' with async task '{}'.",
command.getParameters().getImageId(), taskIds);
} else {
command.setSucceeded(false);
command.setCommandStatus(CommandStatus.FAILED);
log.info("Remove image command has failed for images '{}' with async task '{}'.",
command.getParameters().getImageId(), failedTasks);
}
command.persistCommand(command.getParameters().getParentCommand());
}

@Override
public void onSucceeded(Guid cmdId, List<Guid> childCmdIds) {
getRemoveImageCommand(cmdId).onSucceeded();
}

@Override
public void onFailed(Guid cmdId, List<Guid> childCmdIds) {
getRemoveImageCommand(cmdId).onFailed();
}

private RemoveImageCommand<RemoveImageParameters> getRemoveImageCommand(Guid cmdId) {
return commandCoordinatorUtil.retrieveCommand(cmdId);
}
}

0 comments on commit 0f21548

Please sign in to comment.