From 7a0cd42b8ae3d5db040aaf461d39970184c46cbc Mon Sep 17 00:00:00 2001 From: hukirala <31215997+krlydm@users.noreply.github.com> Date: Thu, 17 Mar 2022 10:50:56 +0100 Subject: [PATCH 1/3] Implement multi runner process support for scale down. --- .../src/scale-runners/scale-down.test.ts | 57 ++++++++++++- .../runners/src/scale-runners/scale-down.ts | 81 +++++++++++++------ 2 files changed, 112 insertions(+), 26 deletions(-) diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts index e17afd79ed..22a8aa741d 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts @@ -18,6 +18,8 @@ const mockOctokit = { listSelfHostedRunnersForOrg: jest.fn(), deleteSelfHostedRunnerFromOrg: jest.fn(), deleteSelfHostedRunnerFromRepo: jest.fn(), + getSelfHostedRunnerForOrg: jest.fn(), + getSelfHostedRunnerForRepo: jest.fn(), }, paginate: jest.fn(), }; @@ -143,6 +145,18 @@ const DEFAULT_RUNNERS_ORIGINAL = [ launchTime: moment(new Date()).toDate(), repo: `${TEST_DATA.repositoryOwner}/${TEST_DATA.repositoryName}`, }, + { + instanceId: 'i-running-112', + launchTime: moment(new Date()).subtract(25, 'minutes').toDate(), + type: 'Repo', + owner: `doe/another-repo`, + }, + { + instanceId: 'i-running-113', + launchTime: moment(new Date()).subtract(25, 'minutes').toDate(), + type: 'Org', + owner: TEST_DATA.repositoryOwner, + }, ]; const DEFAULT_REGISTERED_RUNNERS = [ @@ -170,6 +184,22 @@ const DEFAULT_REGISTERED_RUNNERS = [ id: 106, name: 'i-running-106', }, + { + id: 1121, + name: 'i-running-112-1', + }, + { + id: 1122, + name: 'i-running-112-2', + }, + { + id: 1131, + name: 'i-running-113-1', + }, + { + id: 1132, + name: 'i-running-113-2', + }, ]; describe('scaleDown', () => { @@ -216,6 +246,29 @@ describe('scaleDown', () => { } }); + mockOctokit.actions.getSelfHostedRunnerForRepo.mockImplementation((repo) => { + if (repo.runner_id === 1121) { + return { + data: { busy: true } + }; + } else { + return { + data: { busy: false } + }; + } + }); + mockOctokit.actions.getSelfHostedRunnerForOrg.mockImplementation((repo) => { + if (repo.runner_id === 1131) { + return { + data: { busy: true } + }; + } else { + return { + data: { busy: false } + }; + } + }); + const mockTerminateRunners = mocked(terminateRunner); mockTerminateRunners.mockImplementation(async () => { return; @@ -329,7 +382,7 @@ describe('scaleDown', () => { beforeEach(() => { process.env.SCALE_DOWN_CONFIG = JSON.stringify([ { - idleCount: 2, + idleCount: 3, cron: '* * * * * *', timeZone: 'Europe/Amsterdam', }, @@ -459,7 +512,7 @@ describe('scaleDown', () => { beforeEach(() => { process.env.SCALE_DOWN_CONFIG = JSON.stringify([ { - idleCount: 2, + idleCount: 3, cron: '* * * * * *', timeZone: 'Europe/Amsterdam', }, diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts index 1c48af50e7..102d39d43b 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts @@ -47,6 +47,27 @@ async function getOrCreateOctokit(runner: RunnerInfo): Promise { return octokit; } +async function getGitHubRunnerBusyState(client: Octokit, ec2runner: RunnerInfo, runnerId: number): Promise { + const state = + ec2runner.type === 'Org' + ? await client.actions.getSelfHostedRunnerForOrg({ + runner_id: runnerId, + org: ec2runner.owner, + }) + : await client.actions.getSelfHostedRunnerForRepo({ + runner_id: runnerId, + owner: ec2runner.owner.split('/')[0], + repo: ec2runner.owner.split('/')[1], + }); + + logger.info( + `Runner '${ec2runner.instanceId}' - GitHub Runner ID '${runnerId}' - Busy: ${state.data.busy}`, + LogFields.print(), + ); + + return state.data.busy; +} + async function listGitHubRunners(runner: RunnerInfo): Promise { const key = runner.owner as string; const cachedRunners = githubCache.runners.get(key); @@ -86,33 +107,45 @@ function bootTimeExceeded(ec2Runner: RunnerInfo): boolean { return launchTimePlusBootTime < moment(new Date()).utc(); } -async function removeRunner(ec2runner: RunnerInfo, ghRunnerId: number): Promise { +async function removeRunner(ec2runner: RunnerInfo, ghRunnerIds: number[]): Promise { const githubAppClient = await getOrCreateOctokit(ec2runner); try { - const result = - ec2runner.type === 'Org' - ? await githubAppClient.actions.deleteSelfHostedRunnerFromOrg({ - runner_id: ghRunnerId, - org: ec2runner.owner, - }) - : await githubAppClient.actions.deleteSelfHostedRunnerFromRepo({ - runner_id: ghRunnerId, - owner: ec2runner.owner.split('/')[0], - repo: ec2runner.owner.split('/')[1], - }); - - if (result.status == 204) { - await terminateRunner(ec2runner.instanceId); - logger.info( - `AWS runner instance '${ec2runner.instanceId}' is terminated and GitHub runner is de-registered.`, - LogFields.print(), - ); + const states = await Promise.all(ghRunnerIds.map(async ghRunnerId => { + return await getGitHubRunnerBusyState(githubAppClient, ec2runner, ghRunnerId); + })); + + if (states.every((busy) => busy === false)) { + const statuses = await Promise.all(ghRunnerIds.map(async ghRunnerId => { + return (ec2runner.type === 'Org' + ? await githubAppClient.actions.deleteSelfHostedRunnerFromOrg({ + runner_id: ghRunnerId, + org: ec2runner.owner, + }) + : await githubAppClient.actions.deleteSelfHostedRunnerFromRepo({ + runner_id: ghRunnerId, + owner: ec2runner.owner.split('/')[0], + repo: ec2runner.owner.split('/')[1], + })).status; + })) + + if (statuses.every((status) => status == 204)) { + await terminateRunner(ec2runner.instanceId); + logger.info( + `AWS runner instance '${ec2runner.instanceId}' is terminated and GitHub runner is de-registered.`, + LogFields.print(), + ); + } else { + logger.error(`Failed to de-register GitHub runner: ${statuses}`, LogFields.print()); + } } else { - logger.error(`Failed to de-register GitHub runner: ${result.status}`, LogFields.print()); + logger.info( + `Runner '${ec2runner.instanceId}' cannot be de-registered, because it is still busy.`, + LogFields.print(), + ); } } catch (e) { logger.info( - `Runner '${ec2runner.instanceId}' cannot be de-registered, most likely the runner is active.`, + `Runner '${ec2runner.instanceId}' cannot be de-registered, most likely it is still busy.`, LogFields.print(), ); } @@ -133,15 +166,15 @@ async function evaluateAndRemoveRunners( ); for (const ec2Runner of ec2RunnersFiltered) { const ghRunners = await listGitHubRunners(ec2Runner); - const ghRunner = ghRunners.find((runner) => runner.name === ec2Runner.instanceId); - if (ghRunner) { + const ghRunnersFiltered = ghRunners.filter((runner: { name: string; }) => runner.name.startsWith(ec2Runner.instanceId)); + if (ghRunnersFiltered.length) { if (runnerMinimumTimeExceeded(ec2Runner)) { if (idleCounter > 0) { idleCounter--; logger.info(`Runner '${ec2Runner.instanceId}' will kept idle.`, LogFields.print()); } else { logger.info(`Runner '${ec2Runner.instanceId}' will be terminated.`, LogFields.print()); - await removeRunner(ec2Runner, ghRunner.id); + await removeRunner(ec2Runner, ghRunnersFiltered.map((runner: { id: any; }) => runner.id)); } } } else { From 3464b65160e44b6ce55407b789f78a7d42aec2b3 Mon Sep 17 00:00:00 2001 From: hukirala <31215997+krlydm@users.noreply.github.com> Date: Thu, 17 Mar 2022 11:39:37 +0100 Subject: [PATCH 2/3] Fix format and lint issues. --- .../src/scale-runners/scale-down.test.ts | 8 +- .../runners/src/scale-runners/scale-down.ts | 75 +++++++++++-------- 2 files changed, 47 insertions(+), 36 deletions(-) diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts index 22a8aa741d..8ba803561f 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts @@ -249,22 +249,22 @@ describe('scaleDown', () => { mockOctokit.actions.getSelfHostedRunnerForRepo.mockImplementation((repo) => { if (repo.runner_id === 1121) { return { - data: { busy: true } + data: { busy: true }, }; } else { return { - data: { busy: false } + data: { busy: false }, }; } }); mockOctokit.actions.getSelfHostedRunnerForOrg.mockImplementation((repo) => { if (repo.runner_id === 1131) { return { - data: { busy: true } + data: { busy: true }, }; } else { return { - data: { busy: false } + data: { busy: false }, }; } }); diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts index 102d39d43b..ec97ffe683 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts @@ -48,17 +48,17 @@ async function getOrCreateOctokit(runner: RunnerInfo): Promise { } async function getGitHubRunnerBusyState(client: Octokit, ec2runner: RunnerInfo, runnerId: number): Promise { - const state = - ec2runner.type === 'Org' - ? await client.actions.getSelfHostedRunnerForOrg({ - runner_id: runnerId, - org: ec2runner.owner, - }) - : await client.actions.getSelfHostedRunnerForRepo({ - runner_id: runnerId, - owner: ec2runner.owner.split('/')[0], - repo: ec2runner.owner.split('/')[1], - }); + const state = + ec2runner.type === 'Org' + ? await client.actions.getSelfHostedRunnerForOrg({ + runner_id: runnerId, + org: ec2runner.owner, + }) + : await client.actions.getSelfHostedRunnerForRepo({ + runner_id: runnerId, + owner: ec2runner.owner.split('/')[0], + repo: ec2runner.owner.split('/')[1], + }); logger.info( `Runner '${ec2runner.instanceId}' - GitHub Runner ID '${runnerId}' - Busy: ${state.data.busy}`, @@ -110,23 +110,29 @@ function bootTimeExceeded(ec2Runner: RunnerInfo): boolean { async function removeRunner(ec2runner: RunnerInfo, ghRunnerIds: number[]): Promise { const githubAppClient = await getOrCreateOctokit(ec2runner); try { - const states = await Promise.all(ghRunnerIds.map(async ghRunnerId => { - return await getGitHubRunnerBusyState(githubAppClient, ec2runner, ghRunnerId); - })); + const states = await Promise.all( + ghRunnerIds.map(async (ghRunnerId) => { + return await getGitHubRunnerBusyState(githubAppClient, ec2runner, ghRunnerId); + }), + ); if (states.every((busy) => busy === false)) { - const statuses = await Promise.all(ghRunnerIds.map(async ghRunnerId => { - return (ec2runner.type === 'Org' - ? await githubAppClient.actions.deleteSelfHostedRunnerFromOrg({ - runner_id: ghRunnerId, - org: ec2runner.owner, - }) - : await githubAppClient.actions.deleteSelfHostedRunnerFromRepo({ - runner_id: ghRunnerId, - owner: ec2runner.owner.split('/')[0], - repo: ec2runner.owner.split('/')[1], - })).status; - })) + const statuses = await Promise.all( + ghRunnerIds.map(async (ghRunnerId) => { + return ( + ec2runner.type === 'Org' + ? await githubAppClient.actions.deleteSelfHostedRunnerFromOrg({ + runner_id: ghRunnerId, + org: ec2runner.owner, + }) + : await githubAppClient.actions.deleteSelfHostedRunnerFromRepo({ + runner_id: ghRunnerId, + owner: ec2runner.owner.split('/')[0], + repo: ec2runner.owner.split('/')[1], + }) + ).status; + }), + ); if (statuses.every((status) => status == 204)) { await terminateRunner(ec2runner.instanceId); @@ -138,10 +144,10 @@ async function removeRunner(ec2runner: RunnerInfo, ghRunnerIds: number[]): Promi logger.error(`Failed to de-register GitHub runner: ${statuses}`, LogFields.print()); } } else { - logger.info( - `Runner '${ec2runner.instanceId}' cannot be de-registered, because it is still busy.`, - LogFields.print(), - ); + logger.info( + `Runner '${ec2runner.instanceId}' cannot be de-registered, because it is still busy.`, + LogFields.print(), + ); } } catch (e) { logger.info( @@ -166,7 +172,9 @@ async function evaluateAndRemoveRunners( ); for (const ec2Runner of ec2RunnersFiltered) { const ghRunners = await listGitHubRunners(ec2Runner); - const ghRunnersFiltered = ghRunners.filter((runner: { name: string; }) => runner.name.startsWith(ec2Runner.instanceId)); + const ghRunnersFiltered = ghRunners.filter((runner: { name: string }) => + runner.name.startsWith(ec2Runner.instanceId), + ); if (ghRunnersFiltered.length) { if (runnerMinimumTimeExceeded(ec2Runner)) { if (idleCounter > 0) { @@ -174,7 +182,10 @@ async function evaluateAndRemoveRunners( logger.info(`Runner '${ec2Runner.instanceId}' will kept idle.`, LogFields.print()); } else { logger.info(`Runner '${ec2Runner.instanceId}' will be terminated.`, LogFields.print()); - await removeRunner(ec2Runner, ghRunnersFiltered.map((runner: { id: any; }) => runner.id)); + await removeRunner( + ec2Runner, + ghRunnersFiltered.map((runner: { id: number }) => runner.id), + ); } } } else { From 65742ca5e5951120d5d2bdba85c1df800496d4f8 Mon Sep 17 00:00:00 2001 From: hukirala <31215997+krlydm@users.noreply.github.com> Date: Fri, 25 Mar 2022 15:46:26 +0100 Subject: [PATCH 3/3] Minor fixes. --- .../lambdas/runners/src/scale-runners/scale-down.test.ts | 3 +-- .../runners/lambdas/runners/src/scale-runners/scale-down.ts | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts index 9102195447..8ba803561f 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts @@ -313,8 +313,7 @@ describe('scaleDown', () => { ); RUNNERS_ALL_REMOVED = DEFAULT_RUNNERS_ORG.filter( - (r) => - !r.instanceId.includes('running') && !r.instanceId.includes('registered'), + (r) => !r.instanceId.includes('running') && !r.instanceId.includes('registered'), ); DEFAULT_RUNNERS_ORPHANED = DEFAULT_RUNNERS_ORIGINAL.filter( (r) => r.instanceId.includes('orphan') && !r.instanceId.includes('not-registered'), diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts index 65b37624b5..605221c58f 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts @@ -112,6 +112,7 @@ async function removeRunner(ec2runner: RunnerInfo, ghRunnerIds: number[]): Promi try { const states = await Promise.all( ghRunnerIds.map(async (ghRunnerId) => { + // Get busy state instead of using the output of listGitHubRunners(...) to minimize to race condition. return await getGitHubRunnerBusyState(githubAppClient, ec2runner, ghRunnerId); }), );