diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/StateEventHandleFailure.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/StateEventHandleFailure.java new file mode 100644 index 000000000000..5e757c7858c2 --- /dev/null +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/StateEventHandleFailure.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.server.master.event; + +/** + * This exception represent the exception can be recovered, when we get this exception, + * we will move the event to the fail of the queue. + */ +public class StateEventHandleFailure extends Exception { + + public StateEventHandleFailure(String message) { + super(message); + } + + public StateEventHandleFailure(String message, Throwable throwable) { + super(message, throwable); + } +} diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/StateEventHandler.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/StateEventHandler.java index 93d4a7d4b3aa..377ea71f6231 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/StateEventHandler.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/StateEventHandler.java @@ -28,9 +28,10 @@ public interface StateEventHandler { * @param stateEvent given state event. * @throws StateEventHandleException this exception means it can be recovered. * @throws StateEventHandleError this exception means it cannot be recovered, so the event need to drop. + * @throws StateEventHandleException this means it can be recovered. */ boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable, - StateEvent stateEvent) throws StateEventHandleException, StateEventHandleError; + StateEvent stateEvent) throws StateEventHandleException, StateEventHandleError, StateEventHandleFailure; StateEventType getEventType(); } diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskWaitTaskGroupStateHandler.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskWaitTaskGroupStateHandler.java index 02d08a6616dd..bb0c7b8b703c 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskWaitTaskGroupStateHandler.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/event/TaskWaitTaskGroupStateHandler.java @@ -31,9 +31,13 @@ public class TaskWaitTaskGroupStateHandler implements StateEventHandler { private static final Logger logger = LoggerFactory.getLogger(TaskWaitTaskGroupStateHandler.class); @Override - public boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable, StateEvent stateEvent) { + public boolean handleStateEvent(WorkflowExecuteRunnable workflowExecuteRunnable, + StateEvent stateEvent) throws StateEventHandleFailure { logger.info("Handle task instance wait task group event, taskInstanceId: {}", stateEvent.getTaskInstanceId()); - return workflowExecuteRunnable.checkForceStartAndWakeUp(stateEvent); + if (!workflowExecuteRunnable.checkForceStartAndWakeUp(stateEvent)) { + throw new StateEventHandleFailure("Task state event handle failed due to robing taskGroup resource failed"); + } + return true; } @Override diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java index 827e618ec200..b13adeac2a86 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/runner/WorkflowExecuteRunnable.java @@ -71,6 +71,7 @@ import org.apache.dolphinscheduler.server.master.event.StateEvent; import org.apache.dolphinscheduler.server.master.event.StateEventHandleError; import org.apache.dolphinscheduler.server.master.event.StateEventHandleException; +import org.apache.dolphinscheduler.server.master.event.StateEventHandleFailure; import org.apache.dolphinscheduler.server.master.event.StateEventHandler; import org.apache.dolphinscheduler.server.master.event.StateEventHandlerManager; import org.apache.dolphinscheduler.server.master.event.TaskStateEvent; @@ -315,6 +316,13 @@ public void handleEvents() { stateEvent, stateEventHandleException); ThreadUtils.sleep(Constants.SLEEP_TIME_MILLIS); + } catch (StateEventHandleFailure stateEventHandleFailure) { + logger.error("State event handle failed, will move event to the tail: {}", + stateEvent, + stateEventHandleFailure); + this.stateEvents.remove(stateEvent); + this.stateEvents.offer(stateEvent); + ThreadUtils.sleep(Constants.SLEEP_TIME_MILLIS); } catch (Exception e) { // we catch the exception here, since if the state event handle failed, the state event will still keep // in the stateEvents queue.