Skip to content

Commit

Permalink
fix(core): Fix ignoring crashed executions without event msgs (#7368)
Browse files Browse the repository at this point in the history
when the event logs do not contain messages for running executions, the
recovery/crash detection on startup would skip these. this PR fixes
that.
  • Loading branch information
flipswitchingmonkey authored and elsmr committed Oct 19, 2023
1 parent 6a9ca20 commit fd6b491
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 7 deletions.
36 changes: 30 additions & 6 deletions packages/cli/src/eventbus/MessageEventBus/MessageEventBus.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { LoggerProxy, jsonParse } from 'n8n-workflow';
import type { MessageEventBusDestinationOptions } from 'n8n-workflow';
import type { DeleteResult } from 'typeorm';
import { In } from 'typeorm';
import type {
EventMessageTypes,
EventNamesTypes,
Expand Down Expand Up @@ -132,7 +133,23 @@ export class MessageEventBus extends EventEmitter {
this.logWriter?.startLogging();
await this.send(unsentAndUnfinished.unsentMessages);

const unfinishedExecutionIds = Object.keys(unsentAndUnfinished.unfinishedExecutions);
let unfinishedExecutionIds = Object.keys(unsentAndUnfinished.unfinishedExecutions);

// if we are in queue mode, running jobs may still be running on a worker despite the main process
// crashing, so we can't just mark them as crashed
if (config.get('executions.mode') !== 'queue') {
const dbUnfinishedExecutionIds = (
await Container.get(ExecutionRepository).find({
where: {
status: In(['running', 'new', 'unknown']),
},
select: ['id'],
})
).map((e) => e.id);
unfinishedExecutionIds = Array.from(
new Set<string>([...unfinishedExecutionIds, ...dbUnfinishedExecutionIds]),
);
}

if (unfinishedExecutionIds.length > 0) {
LoggerProxy.warn(`Found unfinished executions: ${unfinishedExecutionIds.join(', ')}`);
Expand Down Expand Up @@ -160,11 +177,18 @@ export class MessageEventBus extends EventEmitter {
this.logWriter?.startRecoveryProcess();
for (const executionId of unfinishedExecutionIds) {
LoggerProxy.warn(`Attempting to recover execution ${executionId}`);
await recoverExecutionDataFromEventLogMessages(
executionId,
unsentAndUnfinished.unfinishedExecutions[executionId],
true,
);
if (!unsentAndUnfinished.unfinishedExecutions[executionId]?.length) {
LoggerProxy.debug(
`No event messages found, marking execution ${executionId} as 'crashed'`,
);
await Container.get(ExecutionRepository).markAsCrashed([executionId]);
} else {
await recoverExecutionDataFromEventLogMessages(
executionId,
unsentAndUnfinished.unfinishedExecutions[executionId],
true,
);
}
}
}
// remove the recovery process flag file
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/src/eventbus/MessageEventBus/recoverEvents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { ExecutionRepository } from '@db/repositories';
export async function recoverExecutionDataFromEventLogMessages(
executionId: string,
messages: EventMessageTypes[],
applyToDb = true,
applyToDb: boolean,
): Promise<IRunExecutionData | undefined> {
const executionEntry = await Container.get(ExecutionRepository).findSingleExecution(executionId, {
includeData: true,
Expand Down
56 changes: 56 additions & 0 deletions packages/cli/test/integration/eventbus.ee.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import type { MessageEventBusDestinationWebhook } from '@/eventbus/MessageEventB
import type { MessageEventBusDestinationSentry } from '@/eventbus/MessageEventBusDestination/MessageEventBusDestinationSentry.ee';
import { EventMessageAudit } from '@/eventbus/EventMessageClasses/EventMessageAudit';
import type { EventNamesTypes } from '@/eventbus/EventMessageClasses';
import { EventMessageWorkflow } from '@/eventbus/EventMessageClasses/EventMessageWorkflow';
import { EventMessageNode } from '@/eventbus/EventMessageClasses/EventMessageNode';

jest.unmock('@/eventbus/MessageEventBus/MessageEventBus');
jest.mock('axios');
Expand Down Expand Up @@ -389,3 +391,57 @@ test('DELETE /eventbus/destination delete all destinations by id', async () => {

expect(Object.keys(eventBus.destinations).length).toBe(0);
});

// These two tests are running very flaky on CI due to the logwriter working in a worker
// Mocking everything on the other would defeat the purpose of even testing them... so, skipping in CI for now.
// eslint-disable-next-line n8n-local-rules/no-skipped-tests
test.skip('should not find unfinished executions in recovery process', async () => {
eventBus.logWriter?.putMessage(
new EventMessageWorkflow({
eventName: 'n8n.workflow.started',
payload: { executionId: '509', isManual: false },
}),
);
eventBus.logWriter?.putMessage(
new EventMessageNode({
eventName: 'n8n.node.started',
payload: { executionId: '509', nodeName: 'Set', workflowName: 'test' },
}),
);
eventBus.logWriter?.putMessage(
new EventMessageNode({
eventName: 'n8n.node.finished',
payload: { executionId: '509', nodeName: 'Set', workflowName: 'test' },
}),
);
eventBus.logWriter?.putMessage(
new EventMessageWorkflow({
eventName: 'n8n.workflow.success',
payload: { executionId: '509', success: true },
}),
);
const unfinishedExecutions = await eventBus.getUnfinishedExecutions();

expect(Object.keys(unfinishedExecutions)).toHaveLength(0);
});

// eslint-disable-next-line n8n-local-rules/no-skipped-tests
test.skip('should not find unfinished executions in recovery process', async () => {
eventBus.logWriter?.putMessage(
new EventMessageWorkflow({
eventName: 'n8n.workflow.started',
payload: { executionId: '510', isManual: false },
}),
);
eventBus.logWriter?.putMessage(
new EventMessageNode({
eventName: 'n8n.node.started',
payload: { executionId: '510', nodeName: 'Set', workflowName: 'test' },
}),
);

const unfinishedExecutions = await eventBus.getUnfinishedExecutions();

expect(Object.keys(unfinishedExecutions)).toHaveLength(1);
expect(Object.keys(unfinishedExecutions)).toContain('510');
});

0 comments on commit fd6b491

Please sign in to comment.