Skip to content

Commit

Permalink
feat(runners): Add support for looking up runner AMI ID from an SSM p…
Browse files Browse the repository at this point in the history
…arameter at instance launch time (philips-labs#2520)

* Add support for looking up a pre-built runner AMI ID from an SSM parameter

* Format terraform code

* Format TS code

* Add tests

* Make error message more helpful

* Fixes per comments

* Re-format terraform code

* Sync var description

* Add scale up test for overridden AMI ID

* Fix iam role policy name

* Add example
  • Loading branch information
jpalomaki authored Oct 31, 2022
1 parent 5764f5b commit 68e2381
Show file tree
Hide file tree
Showing 10 changed files with 150 additions and 10 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,8 @@ export interface GithubWorkflowEvent {
This extendible format allows to add more fields to be added if needed.
You can configure the queue by setting properties to `workflow_job_events_queue_config`

NOTE: By default, a runner AMI update requires a re-apply of this terraform config (the runner AMI ID is looked up by a terraform data source). To avoid this, you can use `ami_id_ssm_parameter_name` to have the scale-up lambda dynamically lookup the runner AMI ID from an SSM parameter at instance launch time. Said SSM parameter is managed outside of this module (e.g. by a runner AMI build workflow).

## Examples

Examples are located in the [examples](./examples) directory. The following examples are provided:
Expand Down Expand Up @@ -419,6 +421,7 @@ We welcome any improvement to the standard module to make the default as secure
|------|-------------|------|---------|:--------:|
| <a name="input_ami_filter"></a> [ami\_filter](#input\_ami\_filter) | List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used. | `map(list(string))` | `null` | no |
| <a name="input_ami_owners"></a> [ami\_owners](#input\_ami\_owners) | The list of owners used to select the AMI of action runner instances. | `list(string)` | <pre>[<br> "amazon"<br>]</pre> | no |
| <a name="input_ami_id_ssm_parameter_name"></a> [ami\_id\_ssm\_parameter\_name](#input\_ami\_id\_ssm\_parameter\_name) | Optional SSM parameter that contains the runner AMI ID to launch instances from. Overrides `ami_filter`. The parameter value is managed outside of this module (e.g. in a runner AMI build workflow). This allows for AMI updates without having to re-apply this terraform config. | `string` | `null` | no |
| <a name="input_aws_partition"></a> [aws\_partition](#input\_aws\_partition) | (optiona) partition in the arn namespace to use if not 'aws' | `string` | `"aws"` | no |
| <a name="input_aws_region"></a> [aws\_region](#input\_aws\_region) | AWS region. | `string` | n/a | yes |
| <a name="input_block_device_mappings"></a> [block\_device\_mappings](#input\_block\_device\_mappings) | The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`. | <pre>list(object({<br> delete_on_termination = bool<br> device_name = string<br> encrypted = bool<br> iops = number<br> kms_key_id = string<br> snapshot_id = string<br> throughput = number<br> volume_size = number<br> volume_type = string<br> }))</pre> | <pre>[<br> {<br> "delete_on_termination": true,<br> "device_name": "/dev/xvda",<br> "encrypted": true,<br> "iops": null,<br> "kms_key_id": null,<br> "snapshot_id": null,<br> "throughput": null,<br> "volume_size": 30,<br> "volume_type": "gp3"<br> }<br>]</pre> | no |
Expand Down
4 changes: 4 additions & 0 deletions examples/prebuilt/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ module "runners" {
ami_filter = { name = [var.ami_name_filter] }
ami_owners = [data.aws_caller_identity.current.account_id]

# Look up runner AMI ID from an AWS SSM parameter (overrides ami_filter at instance launch time)
# NOTE: the parameter must be managed outside of this module (e.g. in a runner AMI build workflow)
# ami_id_ssm_parameter_name = "my-runner-ami-id"

# disable binary syncer since github agent is already installed in the AMI.
enable_runner_binaries_syncer = false

Expand Down
7 changes: 4 additions & 3 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,10 @@ module "runners" {
instance_max_spot_price = var.instance_max_spot_price
block_device_mappings = var.block_device_mappings

runner_architecture = var.runner_architecture
ami_filter = var.ami_filter
ami_owners = var.ami_owners
runner_architecture = var.runner_architecture
ami_filter = var.ami_filter
ami_owners = var.ami_owners
ami_id_ssm_parameter_name = var.ami_id_ssm_parameter_name

sqs_build_queue = aws_sqs_queue.queued_builds
github_app_parameters = local.github_app_parameters
Expand Down
58 changes: 56 additions & 2 deletions modules/runners/lambdas/runners/src/aws/runners.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import ScaleError from './../scale-runners/ScaleError';
import { RunnerInfo, RunnerInputParameters, createRunner, listEC2Runners, terminateRunner } from './runners';

const mockEC2 = { describeInstances: jest.fn(), createFleet: jest.fn(), terminateInstances: jest.fn() };
const mockSSM = { putParameter: jest.fn() };
const mockSSM = { putParameter: jest.fn(), getParameter: jest.fn() };
jest.mock('aws-sdk', () => ({
EC2: jest.fn().mockImplementation(() => mockEC2),
SSM: jest.fn().mockImplementation(() => mockSSM),
Expand Down Expand Up @@ -170,6 +170,8 @@ describe('terminate runner', () => {
describe('create runner', () => {
const mockCreateFleet = { promise: jest.fn() };
const mockPutParameter = { promise: jest.fn() };
const mockGetParameter = { promise: jest.fn() };

const defaultRunnerConfig: RunnerConfig = {
allocationStrategy: 'capacity-optimized',
capacityType: 'spot',
Expand All @@ -191,6 +193,8 @@ describe('create runner', () => {
Instances: [{ InstanceIds: ['i-1234'] }],
});
mockSSM.putParameter.mockImplementation(() => mockPutParameter);

mockSSM.getParameter.mockImplementation(() => mockGetParameter);
});

it('calls create fleet of 1 instance with the correct config for repo', async () => {
Expand Down Expand Up @@ -259,6 +263,21 @@ describe('create runner', () => {
await expect(createRunner(createRunnerConfig(defaultRunnerConfig))).rejects.toThrowError(Error);
expect(mockSSM.putParameter).not.toBeCalled();
});

it('uses ami id from ssm parameter when ami id ssm param is specified', async () => {
const paramValue: AWS.SSM.GetParameterResult = {
Parameter: {
Value: 'ami-123',
},
};
mockGetParameter.promise.mockReturnValue(paramValue);
await createRunner(createRunnerConfig({ ...defaultRunnerConfig, amiIdSsmParameterName: 'my-ami-id-param' }));
const expectedRequest = expectedCreateFleetRequest({ ...defaultExpectedFleetRequestValues, imageId: 'ami-123' });
expect(mockEC2.createFleet).toBeCalledWith(expectedRequest);
expect(mockSSM.getParameter).toBeCalledWith({
Name: 'my-ami-id-param',
});
});
});

describe('create runner with errors', () => {
Expand All @@ -279,6 +298,10 @@ describe('create runner with errors', () => {
const mockPutParameter = { promise: jest.fn() };

mockSSM.putParameter.mockImplementation(() => mockPutParameter);

const mockGetParameter = { promise: jest.fn() };

mockSSM.getParameter.mockImplementation(() => mockGetParameter);
});

it('test ScaleError with one error.', async () => {
Expand Down Expand Up @@ -326,6 +349,22 @@ describe('create runner with errors', () => {
expect(mockEC2.createFleet).toBeCalledWith(expectedCreateFleetRequest(defaultExpectedFleetRequestValues));
expect(mockSSM.putParameter).not.toBeCalled();
});

it('test error in ami id lookup from ssm parameter', async () => {
mockSSM.getParameter.mockImplementation(() => {
return {
promise: jest.fn().mockImplementation(() => {
throw Error('Wow, such transient');
}),
};
});

await expect(
createRunner(createRunnerConfig({ ...defaultRunnerConfig, amiIdSsmParameterName: 'my-ami-id-param' })),
).rejects.toBeInstanceOf(Error);
expect(mockEC2.createFleet).not.toBeCalled();
expect(mockSSM.putParameter).not.toBeCalled();
});
});

function createFleetMockWithErrors(errors: string[], instances?: string[]) {
Expand Down Expand Up @@ -354,6 +393,7 @@ interface RunnerConfig {
capacityType: EC2.DefaultTargetCapacityType;
allocationStrategy: EC2.AllocationStrategy;
maxSpotPrice?: string;
amiIdSsmParameterName?: string;
}

function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters {
Expand All @@ -370,6 +410,7 @@ function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters {
instanceAllocationStrategy: runnerConfig.allocationStrategy,
},
subnets: ['subnet-123', 'subnet-456'],
amiIdSsmParameterName: runnerConfig.amiIdSsmParameterName,
};
}

Expand All @@ -379,10 +420,11 @@ interface ExpectedFleetRequestValues {
allocationStrategy: EC2.AllocationStrategy;
maxSpotPrice?: string;
totalTargetCapacity: number;
imageId?: string;
}

function expectedCreateFleetRequest(expectedValues: ExpectedFleetRequestValues): AWS.EC2.CreateFleetRequest {
return {
const request: AWS.EC2.CreateFleetRequest = {
LaunchTemplateConfigs: [
{
LaunchTemplateSpecification: {
Expand Down Expand Up @@ -429,4 +471,16 @@ function expectedCreateFleetRequest(expectedValues: ExpectedFleetRequestValues):
},
Type: 'instant',
};

if (expectedValues.imageId) {
for (const config of request.LaunchTemplateConfigs) {
if (config.Overrides) {
for (const override of config.Overrides) {
override.ImageId = expectedValues.imageId;
}
}
}
}

return request;
}
42 changes: 37 additions & 5 deletions modules/runners/lambdas/runners/src/aws/runners.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export interface RunnerInputParameters {
instanceAllocationStrategy: EC2.SpotAllocationStrategy;
};
numberOfRunners?: number;
amiIdSsmParameterName?: string;
}

export async function listEC2Runners(filters: ListRunnerFilters | undefined = undefined): Promise<RunnerList[]> {
Expand Down Expand Up @@ -107,17 +108,27 @@ export async function terminateRunner(instanceId: string): Promise<void> {
logger.info(`Runner ${instanceId} has been terminated.`, LogFields.print());
}

function generateFleeOverrides(
function generateFleetOverrides(
subnetIds: string[],
instancesTypes: string[],
amiId?: string,
): EC2.FleetLaunchTemplateOverridesListRequest {
type Override = {
SubnetId: string;
InstanceType: string;
ImageId?: string;
};
const result: EC2.FleetLaunchTemplateOverridesListRequest = [];
subnetIds.forEach((s) => {
instancesTypes.forEach((i) => {
result.push({
const item: Override = {
SubnetId: s,
InstanceType: i,
});
};
if (amiId) {
item.ImageId = amiId;
}
result.push(item);
});
});
return result;
Expand All @@ -127,6 +138,27 @@ export async function createRunner(runnerParameters: RunnerInputParameters): Pro
logger.debug('Runner configuration: ' + JSON.stringify(runnerParameters), LogFields.print());

const ec2 = new EC2();
const ssm = new SSM();

let amiIdOverride = undefined;

if (runnerParameters.amiIdSsmParameterName) {
logger.debug(`Looking up runner AMI ID from SSM parameter: ${runnerParameters.amiIdSsmParameterName}`);
try {
const result: AWS.SSM.GetParameterResult = await ssm
.getParameter({ Name: runnerParameters.amiIdSsmParameterName })
.promise();
amiIdOverride = result.Parameter?.Value;
} catch (e) {
logger.error(
`Failed to lookup runner AMI ID from SSM parameter: ${runnerParameters.amiIdSsmParameterName}. ` +
'Please ensure that the given parameter exists on this region and contains a valid runner AMI ID',
e,
);
throw e;
}
}

const numberOfRunners = runnerParameters.numberOfRunners ? runnerParameters.numberOfRunners : 1;

let fleet: AWS.EC2.CreateFleetResult;
Expand All @@ -140,9 +172,10 @@ export async function createRunner(runnerParameters: RunnerInputParameters): Pro
LaunchTemplateName: runnerParameters.launchTemplateName,
Version: '$Default',
},
Overrides: generateFleeOverrides(
Overrides: generateFleetOverrides(
runnerParameters.subnets,
runnerParameters.ec2instanceCriteria.instanceTypes,
amiIdOverride,
),
},
],
Expand Down Expand Up @@ -202,7 +235,6 @@ export async function createRunner(runnerParameters: RunnerInputParameters): Pro

logger.info('Created instance(s): ', instances.join(','), LogFields.print());

const ssm = new SSM();
for (const instance of instances) {
await ssm
.putParameter({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,12 @@ describe('scaleUp with GHES', () => {
];
expect(createRunner).toBeCalledWith(expectedRunnerParams);
});

it('creates a runner with ami id override from ssm parameter', async () => {
process.env.AMI_ID_SSM_PARAMETER_NAME = 'my-ami-id-param';
await scaleUpModule.scaleUp('aws:sqs', TEST_DATA);
expect(createRunner).toBeCalledWith({ ...expectedRunnerParams, amiIdSsmParameterName: 'my-ami-id-param' });
});
});

describe('on repo level', () => {
Expand Down
3 changes: 3 additions & 0 deletions modules/runners/lambdas/runners/src/scale-runners/scale-up.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ interface CreateEC2RunnerConfig {
launchTemplateName: string;
ec2instanceCriteria: RunnerInputParameters['ec2instanceCriteria'];
numberOfRunners?: number;
amiIdSsmParameterName?: string;
}

function generateRunnerServiceConfig(githubRunnerConfig: CreateGitHubRunnerConfig, token: string) {
Expand Down Expand Up @@ -159,6 +160,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
const instanceMaxSpotPrice = process.env.INSTANCE_MAX_SPOT_PRICE;
const instanceAllocationStrategy = process.env.INSTANCE_ALLOCATION_STRATEGY || 'lowest-price'; // same as AWS default
const enableJobQueuedCheck = yn(process.env.ENABLE_JOB_QUEUED_CHECK, { default: true });
const amiIdSsmParameterName = process.env.AMI_ID_SSM_PARAMETER_NAME;

if (ephemeralEnabled && payload.eventType !== 'workflow_job') {
logger.warn(
Expand Down Expand Up @@ -222,6 +224,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
environment,
launchTemplateName,
subnets,
amiIdSsmParameterName,
},
githubInstallationClient,
);
Expand Down
23 changes: 23 additions & 0 deletions modules/runners/scale-up.tf
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ resource "aws_lambda_function" "scale_up" {
RUNNER_GROUP_NAME = var.runner_group_name
RUNNERS_MAXIMUM_COUNT = var.runners_maximum_count
SUBNET_IDS = join(",", var.subnet_ids)
AMI_ID_SSM_PARAMETER_NAME = var.ami_id_ssm_parameter_name
}
}

Expand Down Expand Up @@ -110,3 +111,25 @@ resource "aws_iam_role_policy_attachment" "scale_up_vpc_execution_role" {
role = aws_iam_role.scale_up.name
policy_arn = "arn:${var.aws_partition}:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole"
}

resource "aws_iam_role_policy" "ami_id_ssm_parameter_read" {
count = var.ami_id_ssm_parameter_name != null ? 1 : 0
name = "${var.prefix}-ami-id-ssm-parameter-read"
role = aws_iam_role.scale_up.name
policy = <<-JSON
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"ssm:GetParameter"
],
"Resource": [
"arn:aws:ssm:${var.aws_region}:${data.aws_caller_identity.current.account_id}:parameter/${trimprefix(var.ami_id_ssm_parameter_name, "/")}"
]
}
]
}
JSON
}
6 changes: 6 additions & 0 deletions modules/runners/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,12 @@ variable "ami_owners" {
default = ["amazon"]
}

variable "ami_id_ssm_parameter_name" {
description = "Externally managed SSM parameter (of data type aws:ec2:image) that contains the AMI ID to launch runner instances from. Overrides ami_filter"
type = string
default = null
}

variable "enabled_userdata" {
description = "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI"
type = bool
Expand Down
8 changes: 8 additions & 0 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -302,11 +302,19 @@ variable "ami_filter" {
type = map(list(string))
default = null
}

variable "ami_owners" {
description = "The list of owners used to select the AMI of action runner instances."
type = list(string)
default = ["amazon"]
}

variable "ami_id_ssm_parameter_name" {
description = "Externally managed SSM parameter (of data type aws:ec2:image) that contains the AMI ID to launch runner instances from. Overrides ami_filter"
type = string
default = null
}

variable "lambda_s3_bucket" {
description = "S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly."
default = null
Expand Down

0 comments on commit 68e2381

Please sign in to comment.