Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug on state based replication code path #6727

Merged
merged 1 commit into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions service/history/ndc/workflow_state_replicator.go
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,8 @@ func (r *WorkflowStateReplicatorImpl) applySnapshot(
versionedTransition *repication.VersionedTransitionArtifact,
sourceClusterName string,
) error {
snapshot := versionedTransition.GetSyncWorkflowStateSnapshotAttributes().State
if snapshot == nil {
attribute := versionedTransition.GetSyncWorkflowStateSnapshotAttributes()
if attribute == nil || attribute.State == nil {
var versionHistories *history.VersionHistories
if localMutableState != nil {
versionHistories = localMutableState.GetExecutionInfo().VersionHistories
Expand All @@ -444,6 +444,7 @@ func (r *WorkflowStateReplicatorImpl) applySnapshot(
versionHistories,
)
}
snapshot := attribute.State
if localMutableState == nil {
return r.applySnapshotWhenWorkflowNotExist(ctx, namespaceID, workflowID, runID, wfCtx, releaseFn, snapshot, sourceClusterName, versionedTransition.NewRunInfo, true)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func (e *ExecutableVerifyVersionedTransitionTask) Execute() error {
e.NamespaceID,
e.WorkflowID,
e.RunID,
e.ReplicationTask().VersionedTransition,
nil,
nil,
)
default:
Expand All @@ -137,6 +137,9 @@ func (e *ExecutableVerifyVersionedTransitionTask) Execute() error {
}

transitionHistory := ms.GetExecutionInfo().TransitionHistory
if len(transitionHistory) == 0 {
return nil
}
err = workflow.TransitionHistoryStalenessCheck(transitionHistory, e.ReplicationTask().VersionedTransition)

// case 1: VersionedTransition is up-to-date on current mutable state
Expand All @@ -155,7 +158,7 @@ func (e *ExecutableVerifyVersionedTransitionTask) Execute() error {
e.NamespaceID,
e.WorkflowID,
e.RunID,
e.ReplicationTask().VersionedTransition,
transitionHistory[len(transitionHistory)-1],
ms.GetExecutionInfo().VersionHistories,
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -332,11 +332,12 @@ func (s *executableVerifyVersionedTransitionTaskSuite) TestExecute_CurrentBranch

mu := workflow.NewMockMutableState(s.controller)
mu.EXPECT().GetNextEventID().Return(taskNextEvent).AnyTimes()
transitionHistory := []*persistencepb.VersionedTransition{
{NamespaceFailoverVersion: 1, TransitionCount: 3},
{NamespaceFailoverVersion: 3, TransitionCount: 6},
}
mu.EXPECT().GetExecutionInfo().Return(&persistencepb.WorkflowExecutionInfo{
TransitionHistory: []*persistencepb.VersionedTransition{
{NamespaceFailoverVersion: 1, TransitionCount: 3},
{NamespaceFailoverVersion: 3, TransitionCount: 6},
},
TransitionHistory: transitionHistory,
}).AnyTimes()

s.mockGetMutableState(s.namespaceID, s.workflowID, s.runID, mu, nil)
Expand All @@ -353,6 +354,7 @@ func (s *executableVerifyVersionedTransitionTaskSuite) TestExecute_CurrentBranch

err := task.Execute()
s.IsType(&serviceerrors.SyncState{}, err)
s.Equal(transitionHistory[1], err.(*serviceerrors.SyncState).VersionedTransition)
}

func (s *executableVerifyVersionedTransitionTaskSuite) TestExecute_NonCurrentBranch_VerifySuccess() {
Expand Down
18 changes: 15 additions & 3 deletions service/history/workflow/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -741,11 +741,23 @@ func (c *ContextImpl) mergeUpdateWithNewReplicationTasks(
// so that they can be applied transactionally in the standby cluster.
// TODO: this logic should be more generic so that the first replication task
// in the new run doesn't have to be HistoryReplicationTask
newRunTask := newWorkflowSnapshot.Tasks[tasks.CategoryReplication][0].(*tasks.HistoryReplicationTask)
var newRunBranchToken []byte
var newRunID string
newRunTask := newWorkflowSnapshot.Tasks[tasks.CategoryReplication][0]
delete(newWorkflowSnapshot.Tasks, tasks.CategoryReplication)

newRunBranchToken := newRunTask.BranchToken
newRunID := newRunTask.RunID
switch task := newRunTask.(type) {
case *tasks.HistoryReplicationTask:
// Handle HistoryReplicationTask specifically
newRunBranchToken = task.BranchToken
newRunID = task.RunID
case *tasks.SyncVersionedTransitionTask:
// Handle SyncVersionedTransitionTask specifically
newRunID = task.RunID
default:
// Handle unexpected types or log an error if this case is not expected
return serviceerror.NewInternal(fmt.Sprintf("unexpected replication task type for new run task %T", newRunTask))
}
taskUpdated := false

updateTask := func(task interface{}) bool {
Expand Down
Loading