From ba2536bbf7bc7a98180b25d8703ef6edc25bc2b7 Mon Sep 17 00:00:00 2001 From: Nathaniel McAuliffe Date: Tue, 31 Aug 2021 08:24:45 -0400 Subject: [PATCH] feat(scale-down): Update Owner Logic (#1065) * feat(scale-down): Update Owner Logic * Update expect count * Removing org runner flag Prettier * Refactoring * Fixing resolved conflicts * Terminate legacy runners (#2) * Terminate legacy runners * Update modules/runners/lambdas/runners/src/scale-runners/scale-down.ts Co-authored-by: Gertjan Maas * Move find index to new function * Removing old comment Co-authored-by: Gertjan Maas * Update modules/runners/lambdas/runners/src/scale-runners/scale-down.ts Co-authored-by: Niek Palm * Addressing feedback * Shouldn't need to update original runner list anymore * Fixing case for legacy conversion * Update var descr * Add boot time check Co-authored-by: Gertjan Maas Co-authored-by: Niek Palm --- README.md | 161 +++-- modules/runners/README.md | 17 +- .../runners/src/scale-runners/cache.ts | 10 + .../runners/src/scale-runners/runners.test.ts | 46 +- .../runners/src/scale-runners/runners.ts | 36 +- .../src/scale-runners/scale-down.test.ts | 601 +++++++++--------- .../runners/src/scale-runners/scale-down.ts | 277 ++++---- .../src/scale-runners/scale-up.test.ts | 20 +- .../runners/src/scale-runners/scale-up.ts | 4 +- modules/runners/scale-down.tf | 2 +- modules/runners/variables.tf | 6 + variables.tf | 8 +- 12 files changed, 613 insertions(+), 575 deletions(-) create mode 100644 modules/runners/lambdas/runners/src/scale-runners/cache.ts diff --git a/README.md b/README.md index f95a9a428a..ead428f14d 100644 --- a/README.md +++ b/README.md @@ -338,100 +338,99 @@ No requirements. ## Providers -| Name | Version | -| ------ | ------- | -| aws | n/a | -| random | n/a | +| Name | Version | +|------|---------| +| aws | n/a | +| random | n/a | ## Modules -| Name | Source | Version | -| --------------- | -------------------------------- | ------- | -| runner_binaries | ./modules/runner-binaries-syncer | | -| runners | ./modules/runners | | -| webhook | ./modules/webhook | | +| Name | Source | Version | +|------|--------|---------| +| runner_binaries | ./modules/runner-binaries-syncer | | +| runners | ./modules/runners | | +| ssm | ./modules/ssm | | +| webhook | ./modules/webhook | | ## Resources -| Name | -| ---------------------------------------------------------------------------------------------------------------------------- | -| [aws_kms_alias](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_alias) | -| [aws_kms_key](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/kms_key) | +| Name | +|------| | [aws_resourcegroups_group](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/resourcegroups_group) | -| [aws_sqs_queue](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | -| [random_string](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/string) | +| [aws_sqs_queue](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue) | +| [random_string](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/string) | ## Inputs -| Name | Description | Type | Default | Required | -| ----------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------: | -| ami\_filter | List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used. | `map(list(string))` | `{}` | no | -| ami\_owners | The list of owners used to select the AMI of action runner instances. | `list(string)` |
[
"amazon"
]
| no | -| aws\_region | AWS region. | `string` | n/a | yes | -| block\_device\_mappings | The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops` | `map(string)` | `{}` | no | -| cloudwatch\_config | (optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details. | `string` | `null` | no | -| create\_service\_linked\_role\_spot | (optional) create the serviced linked role for spot instances that is required by the scale-up lambda. | `bool` | `false` | no | -| delay\_webhook\_event | The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event. | `number` | `30` | no | -| enable\_cloudwatch\_agent | Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`. | `bool` | `true` | no | -| enable\_organization\_runners | Register runners to organization, instead of repo level | `bool` | `false` | no | -| enable\_ssm\_on\_runners | Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances. | `bool` | `false` | no | -| encrypt\_secrets | Encrypt secret variables for lambda's such as secrets and private keys. | `bool` | `true` | no | -| environment | A name that identifies the environment, used as prefix and for tagging. | `string` | n/a | yes | -| ghes\_url | GitHub Enterprise Server URL. Example: https://github.internal.co - DO NOT SET IF USING PUBLIC GITHUB | `string` | `null` | no | -| github\_app | GitHub app parameters, see your github app. Ensure the key is the base64-encoded `.pem` file (the output of `base64 app.private-key.pem`, not the content of `private-key.pem`). |
object({
key_base64 = string
id = string
client_id = string
client_secret = string
webhook_secret = string
})
| n/a | yes | -| idle\_config | List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle. |
list(object({
cron = string
timeZone = string
idleCount = number
}))
| `[]` | no | -| instance\_profile\_path | The path that will be added to the instance\_profile, if not set the environment name will be used. | `string` | `null` | no | -| instance\_type | [DEPRECATED] See instance\_types. | `string` | `"m5.large"` | no | -| instance\_types | List of instance types for the action runner. | `set(string)` | `null` | no | -| key\_name | Key pair name | `string` | `null` | no | -| kms\_key\_id | Custom KMS key to encrypted lambda secrets, if not provided and `encrypt_secrets` = `true` a KMS key will be created by the module. Secrets will be encrypted with a context `Environment = var.environment`. | `string` | `null` | no | -| lambda\_s3\_bucket | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `any` | `null` | no | -| lambda\_security\_group\_ids | List of security group IDs associated with the Lambda function. | `list(string)` | `[]` | no | -| lambda\_subnet\_ids | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | `[]` | no | -| logging\_retention\_in\_days | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | -| manage\_kms\_key | Let the module manage the KMS key. | `bool` | `true` | no | -| market\_options | Market options for the action runner instances. Setting the value to `null` let the scaler create on-demand instances instead of spot instances. | `string` | `"spot"` | no | -| minimum\_running\_time\_in\_minutes | The time an ec2 action runner should be running at minimum before terminated if non busy. | `number` | `5` | no | -| repository\_white\_list | List of repositories allowed to use the github app | `list(string)` | `[]` | no | -| role\_path | The path that will be added to role path for created roles, if not set the environment name will be used. | `string` | `null` | no | -| role\_permissions\_boundary | Permissions boundary that will be added to the created roles. | `string` | `null` | no | -| runner\_additional\_security\_group\_ids | (optional) List of additional security groups IDs to apply to the runner | `list(string)` | `[]` | no | -| runner\_allow\_prerelease\_binaries | Allow the runners to update to prerelease binaries. | `bool` | `false` | no | -| runner\_as\_root | Run the action runner under the root user. | `bool` | `false` | no | -| runner\_binaries\_syncer\_lambda\_timeout | Time out of the binaries sync lambda in seconds. | `number` | `300` | no | -| runner\_binaries\_syncer\_lambda\_zip | File location of the binaries sync lambda zip file. | `string` | `null` | no | -| runner\_extra\_labels | Extra labels for the runners (GitHub). Separate each label by a comma | `string` | `""` | no | -| runner\_group\_name | Name of the runner group. | `string` | `"Default"` | no | -| runner\_iam\_role\_managed\_policy\_arns | Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role | `list(string)` | `[]` | no | -| runner\_log\_files | (optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details. |
list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
}))
|
[
{
"file_path": "/var/log/messages",
"log_group_name": "messages",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
},
{
"file_path": "/var/log/user-data.log",
"log_group_name": "user_data",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
},
{
"file_path": "/home/ec2-user/actions-runner/_diag/Runner_**.log",
"log_group_name": "runner",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
}
]
| no | -| runners\_lambda\_s3\_key | S3 key for runners lambda function. Required if using S3 bucket to specify lambdas. | `any` | `null` | no | -| runners\_lambda\_s3\_object\_version | S3 object version for runners lambda function. Useful if S3 versioning is enabled on source bucket. | `any` | `null` | no | -| runners\_lambda\_zip | File location of the lambda zip file for scaling runners. | `string` | `null` | no | -| runners\_maximum\_count | The maximum number of runners that will be created. | `number` | `3` | no | -| runners\_scale\_down\_lambda\_timeout | Time out for the scale down lambda in seconds. | `number` | `60` | no | -| runners\_scale\_up\_lambda\_timeout | Time out for the scale up lambda in seconds. | `number` | `180` | no | -| scale\_down\_schedule\_expression | Scheduler expression to check every x for scale down. | `string` | `"cron(*/5 * * * ? *)"` | no | -| subnet\_ids | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | n/a | yes | -| syncer\_lambda\_s3\_key | S3 key for syncer lambda function. Required if using S3 bucket to specify lambdas. | `any` | `null` | no | -| syncer\_lambda\_s3\_object\_version | S3 object version for syncer lambda function. Useful if S3 versioning is enabled on source bucket. | `any` | `null` | no | -| tags | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | -| userdata\_post\_install | Script to be ran after the GitHub Actions runner is installed on the EC2 instances | `string` | `""` | no | -| userdata\_pre\_install | Script to be ran before the GitHub Actions runner is installed on the EC2 instances | `string` | `""` | no | -| userdata\_template | Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored. | `string` | `null` | no | -| volume\_size | Size of runner volume | `number` | `30` | no | -| vpc\_id | The VPC for security groups of the action runners. | `string` | n/a | yes | -| webhook\_lambda\_s3\_key | S3 key for webhook lambda function. Required if using S3 bucket to specify lambdas. | `any` | `null` | no | -| webhook\_lambda\_s3\_object\_version | S3 object version for webhook lambda function. Useful if S3 versioning is enabled on source bucket. | `any` | `null` | no | -| webhook\_lambda\_timeout | Time out of the webhook lambda in seconds. | `number` | `10` | no | -| webhook\_lambda\_zip | File location of the webhook lambda zip file. | `string` | `null` | no | +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| ami\_filter | List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used. | `map(list(string))` | `{}` | no | +| ami\_owners | The list of owners used to select the AMI of action runner instances. | `list(string)` |
[
"amazon"
]
| no | +| aws\_region | AWS region. | `string` | n/a | yes | +| block\_device\_mappings | The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops` | `map(string)` | `{}` | no | +| cloudwatch\_config | (optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details. | `string` | `null` | no | +| create\_service\_linked\_role\_spot | (optional) create the serviced linked role for spot instances that is required by the scale-up lambda. | `bool` | `false` | no | +| delay\_webhook\_event | The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event. | `number` | `30` | no | +| enable\_cloudwatch\_agent | Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`. | `bool` | `true` | no | +| enable\_organization\_runners | Register runners to organization, instead of repo level | `bool` | `false` | no | +| enable\_ssm\_on\_runners | Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances. | `bool` | `false` | no | +| environment | A name that identifies the environment, used as prefix and for tagging. | `string` | n/a | yes | +| ghes\_url | GitHub Enterprise Server URL. Example: https://github.internal.co - DO NOT SET IF USING PUBLIC GITHUB | `string` | `null` | no | +| github\_app | GitHub app parameters, see your github app. Ensure the key is the base64-encoded `.pem` file (the output of `base64 app.private-key.pem`, not the content of `private-key.pem`). |
object({
key_base64 = string
id = string
client_id = string
client_secret = string
webhook_secret = string
})
| n/a | yes | +| idle\_config | List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle. |
list(object({
cron = string
timeZone = string
idleCount = number
}))
| `[]` | no | +| instance\_profile\_path | The path that will be added to the instance\_profile, if not set the environment name will be used. | `string` | `null` | no | +| instance\_type | [DEPRECATED] See instance\_types. | `string` | `"m5.large"` | no | +| instance\_types | List of instance types for the action runner. | `set(string)` | `null` | no | +| key\_name | Key pair name | `string` | `null` | no | +| kms\_key\_arn | Optional CMK Key ARN to be used for Parameter Store. This key must be in the current account. | `string` | `null` | no | +| lambda\_s3\_bucket | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `any` | `null` | no | +| lambda\_security\_group\_ids | List of security group IDs associated with the Lambda function. | `list(string)` | `[]` | no | +| lambda\_subnet\_ids | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | `[]` | no | +| logging\_retention\_in\_days | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | +| market\_options | Market options for the action runner instances. Setting the value to `null` let the scaler create on-demand instances instead of spot instances. | `string` | `"spot"` | no | +| minimum\_running\_time\_in\_minutes | The time an ec2 action runner should be running at minimum before terminated if not busy. | `number` | `5` | no | +| repository\_white\_list | List of repositories allowed to use the github app | `list(string)` | `[]` | no | +| role\_path | The path that will be added to role path for created roles, if not set the environment name will be used. | `string` | `null` | no | +| role\_permissions\_boundary | Permissions boundary that will be added to the created roles. | `string` | `null` | no | +| runner\_additional\_security\_group\_ids | (optional) List of additional security groups IDs to apply to the runner | `list(string)` | `[]` | no | +| runner\_allow\_prerelease\_binaries | Allow the runners to update to prerelease binaries. | `bool` | `false` | no | +| runner\_as\_root | Run the action runner under the root user. | `bool` | `false` | no | +| runner\_binaries\_syncer\_lambda\_timeout | Time out of the binaries sync lambda in seconds. | `number` | `300` | no | +| runner\_binaries\_syncer\_lambda\_zip | File location of the binaries sync lambda zip file. | `string` | `null` | no | +| runner\_boot\_time\_in\_minutes | The minimum time for an EC2 runner to boot and register as a runner. | `number` | `5` | no | +| runner\_extra\_labels | Extra labels for the runners (GitHub). Separate each label by a comma | `string` | `""` | no | +| runner\_group\_name | Name of the runner group. | `string` | `"Default"` | no | +| runner\_iam\_role\_managed\_policy\_arns | Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role | `list(string)` | `[]` | no | +| runner\_log\_files | (optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details. |
list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
}))
|
[
{
"file_path": "/var/log/messages",
"log_group_name": "messages",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
},
{
"file_path": "/var/log/user-data.log",
"log_group_name": "user_data",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
},
{
"file_path": "/home/ec2-user/actions-runner/_diag/Runner_**.log",
"log_group_name": "runner",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
}
]
| no | +| runners\_lambda\_s3\_key | S3 key for runners lambda function. Required if using S3 bucket to specify lambdas. | `any` | `null` | no | +| runners\_lambda\_s3\_object\_version | S3 object version for runners lambda function. Useful if S3 versioning is enabled on source bucket. | `any` | `null` | no | +| runners\_lambda\_zip | File location of the lambda zip file for scaling runners. | `string` | `null` | no | +| runners\_maximum\_count | The maximum number of runners that will be created. | `number` | `3` | no | +| runners\_scale\_down\_lambda\_timeout | Time out for the scale down lambda in seconds. | `number` | `60` | no | +| runners\_scale\_up\_lambda\_timeout | Time out for the scale up lambda in seconds. | `number` | `180` | no | +| scale\_down\_schedule\_expression | Scheduler expression to check every x for scale down. | `string` | `"cron(*/5 * * * ? *)"` | no | +| subnet\_ids | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | n/a | yes | +| syncer\_lambda\_s3\_key | S3 key for syncer lambda function. Required if using S3 bucket to specify lambdas. | `any` | `null` | no | +| syncer\_lambda\_s3\_object\_version | S3 object version for syncer lambda function. Useful if S3 versioning is enabled on source bucket. | `any` | `null` | no | +| tags | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | +| userdata\_post\_install | Script to be ran after the GitHub Actions runner is installed on the EC2 instances | `string` | `""` | no | +| userdata\_pre\_install | Script to be ran before the GitHub Actions runner is installed on the EC2 instances | `string` | `""` | no | +| userdata\_template | Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored. | `string` | `null` | no | +| volume\_size | Size of runner volume | `number` | `30` | no | +| vpc\_id | The VPC for security groups of the action runners. | `string` | n/a | yes | +| webhook\_lambda\_s3\_key | S3 key for webhook lambda function. Required if using S3 bucket to specify lambdas. | `any` | `null` | no | +| webhook\_lambda\_s3\_object\_version | S3 object version for webhook lambda function. Useful if S3 versioning is enabled on source bucket. | `any` | `null` | no | +| webhook\_lambda\_timeout | Time out of the webhook lambda in seconds. | `number` | `10` | no | +| webhook\_lambda\_zip | File location of the webhook lambda zip file. | `string` | `null` | no | ## Outputs -| Name | Description | -| ---------------- | ----------- | -| binaries\_syncer | n/a | -| runners | n/a | -| webhook | n/a | +| Name | Description | +|------|-------------| +| binaries\_syncer | n/a | +| runners | n/a | +| ssm\_parameters | n/a | +| webhook | n/a | ## Contribution diff --git a/modules/runners/README.md b/modules/runners/README.md index d8c4715e1d..280b5a1300 100644 --- a/modules/runners/README.md +++ b/modules/runners/README.md @@ -87,15 +87,15 @@ No Modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| ami\_filter | List of maps used to create the AMI filter for the action runner AMI. | `map(list(string))` |
{
"name": [
"amzn2-ami-hvm-2.*-x86_64-ebs"
]
}
| no | +| ami\_filter | Map of lists used to create the AMI filter for the action runner AMI. | `map(list(string))` |
{
"name": [
"amzn2-ami-hvm-2.*-x86_64-ebs"
]
}
| no | | ami\_owners | The list of owners used to select the AMI of action runner instances. | `list(string)` |
[
"amazon"
]
| no | | aws\_region | AWS region. | `string` | n/a | yes | | block\_device\_mappings | The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops` | `map(string)` | `{}` | no | | cloudwatch\_config | (optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details. | `string` | `null` | no | -| create\_service\_linked\_role\_spot | (optional) create the serviced linked role for spot instances that is required by the scale-up lambda. | `bool` | `false` | no | +| create\_service\_linked\_role\_spot | (optional) create the service linked role for spot instances that is required by the scale-up lambda. | `bool` | `false` | no | | enable\_cloudwatch\_agent | Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`. | `bool` | `true` | no | | enable\_organization\_runners | n/a | `bool` | n/a | yes | -| enable\_ssm\_on\_runners | Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances. | `bool` | n/a | yes | +| enable\_ssm\_on\_runners | Enable to allow access to the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances. | `bool` | n/a | yes | | environment | A name that identifies the environment, used as prefix and for tagging. | `string` | n/a | yes | | ghes\_url | GitHub Enterprise Server URL. DO NOT SET IF USING PUBLIC GITHUB | `string` | `null` | no | | github\_app\_parameters | Parameter Store for GitHub App Parameters. |
object({
key_base64 = map(string)
id = map(string)
client_id = map(string)
client_secret = map(string)
})
| n/a | yes | @@ -114,16 +114,17 @@ No Modules. | logging\_retention\_in\_days | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | | market\_options | Market options for the action runner instances. | `string` | `"spot"` | no | | minimum\_running\_time\_in\_minutes | The time an ec2 action runner should be running at minimum before terminated if non busy. | `number` | `5` | no | -| overrides | This maps provides the possibility to override some defaults. The following attributes are supported: `name_sg` overwrite the `Name` tag for all security groups created by this module. `name_runner_agent_instance` override the `Name` tag for the ec2 instance defined in the auto launch configuration. `name_docker_machine_runners` override the `Name` tag spot instances created by the runner agent. | `map(string)` |
{
"name_runner": "",
"name_sg": ""
}
| no | -| role\_path | The path that will be added to the role, if not set the environment name will be used. | `string` | `null` | no | +| overrides | This map provides the possibility to override some defaults. The following attributes are supported: `name_sg` overrides the `Name` tag for all security groups created by this module. `name_runner_agent_instance` overrides the `Name` tag for the ec2 instance defined in the auto launch configuration. `name_docker_machine_runners` overrides the `Name` tag spot instances created by the runner agent. | `map(string)` |
{
"name_runner": "",
"name_sg": ""
}
| no | +| role\_path | The path that will be added to the role; if not set, the environment name will be used. | `string` | `null` | no | | role\_permissions\_boundary | Permissions boundary that will be added to the created role for the lambda. | `string` | `null` | no | | runner\_additional\_security\_group\_ids | (optional) List of additional security groups IDs to apply to the runner | `list(string)` | `[]` | no | | runner\_architecture | The platform architecture of the runner instance\_type. | `string` | `"x64"` | no | | runner\_as\_root | Run the action runner under the root user. | `bool` | `false` | no | +| runner\_boot\_time\_in\_minutes | The minimum time for an EC2 runner to boot and register as a runner. | `number` | `5` | no | | runner\_extra\_labels | Extra labels for the runners (GitHub). Separate each label by a comma | `string` | `""` | no | | runner\_group\_name | Name of the runner group. | `string` | `"Default"` | no | | runner\_iam\_role\_managed\_policy\_arns | Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role | `list(string)` | `[]` | no | -| runner\_log\_files | (optional) List of logfiles to send to cloudwatch, will only be used if `enable_cloudwatch_agent` is set to true. Object description: `log_group_name`: Name of the log group, `prefix_log_group`: If true, the log group name will be prefixed with `/github-self-hosted-runners/`, `file_path`: path to the log file, `log_stream_name`: name of the log stream. |
list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
}))
|
[
{
"file_path": "/var/log/messages",
"log_group_name": "messages",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
},
{
"file_path": "/var/log/user-data.log",
"log_group_name": "user_data",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
},
{
"file_path": "/home/ec2-user/actions-runner/_diag/Runner_**.log",
"log_group_name": "runner",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
}
]
| no | +| runner\_log\_files | (optional) List of logfiles to send to CloudWatch, will only be used if `enable_cloudwatch_agent` is set to true. Object description: `log_group_name`: Name of the log group, `prefix_log_group`: If true, the log group name will be prefixed with `/github-self-hosted-runners/`, `file_path`: path to the log file, `log_stream_name`: name of the log stream. |
list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
}))
|
[
{
"file_path": "/var/log/messages",
"log_group_name": "messages",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
},
{
"file_path": "/var/log/user-data.log",
"log_group_name": "user_data",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
},
{
"file_path": "/home/ec2-user/actions-runner/_diag/Runner_**.log",
"log_group_name": "runner",
"log_stream_name": "{instance_id}",
"prefix_log_group": true
}
]
| no | | runners\_lambda\_s3\_key | S3 key for runners lambda function. Required if using S3 bucket to specify lambdas. | `any` | `null` | no | | runners\_lambda\_s3\_object\_version | S3 object version for runners lambda function. Useful if S3 versioning is enabled on source bucket. | `any` | `null` | no | | runners\_maximum\_count | The maximum number of runners that will be created. | `number` | `3` | no | @@ -133,8 +134,8 @@ No Modules. | sqs\_build\_queue | SQS queue to consume accepted build events. |
object({
arn = string
})
| n/a | yes | | subnet\_ids | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | n/a | yes | | tags | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | -| userdata\_post\_install | User-data script snippet to insert after GitHub acton runner install | `string` | `""` | no | -| userdata\_pre\_install | User-data script snippet to insert before GitHub acton runner install | `string` | `""` | no | +| userdata\_post\_install | User-data script snippet to insert after GitHub action runner install | `string` | `""` | no | +| userdata\_pre\_install | User-data script snippet to insert before GitHub action runner install | `string` | `""` | no | | userdata\_template | Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored. | `string` | `null` | no | | volume\_size | Size of runner volume | `number` | `30` | no | | vpc\_id | The VPC for the security groups. | `string` | n/a | yes | diff --git a/modules/runners/lambdas/runners/src/scale-runners/cache.ts b/modules/runners/lambdas/runners/src/scale-runners/cache.ts new file mode 100644 index 0000000000..8c0c34a14a --- /dev/null +++ b/modules/runners/lambdas/runners/src/scale-runners/cache.ts @@ -0,0 +1,10 @@ +import { Octokit } from '@octokit/rest'; + +export type UnboxPromise = T extends Promise ? U : T; + +export type GhRunners = UnboxPromise>['data']['runners']; + +export class githubCache { + static clients: Map = new Map(); + static runners: Map = new Map(); +} diff --git a/modules/runners/lambdas/runners/src/scale-runners/runners.test.ts b/modules/runners/lambdas/runners/src/scale-runners/runners.test.ts index d5916e88c3..94e4aff0e2 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/runners.test.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/runners.test.ts @@ -1,4 +1,4 @@ -import { listRunners, createRunner, terminateRunner, RunnerInfo } from './runners'; +import { listEC2Runners, createRunner, terminateRunner, RunnerInfo } from './runners'; const mockEC2 = { describeInstances: jest.fn(), runInstances: jest.fn(), terminateInstances: jest.fn() }; const mockSSM = { putParameter: jest.fn() }; @@ -25,17 +25,17 @@ describe('list instances', () => { LaunchTime: new Date('2020-10-10T14:48:00.000+09:00'), InstanceId: 'i-1234', Tags: [ - { Key: 'Repo', Value: 'CoderToCat/hello-world' }, - { Key: 'Org', Value: 'CoderToCat' }, { Key: 'Application', Value: 'github-action-runner' }, + { Key: 'Type', Value: 'Org' }, + { Key: 'Owner', Value: 'CoderToCat' }, ], }, { LaunchTime: new Date('2020-10-11T14:48:00.000+09:00'), InstanceId: 'i-5678', Tags: [ - { Key: 'Repo', Value: REPO_NAME }, - { Key: 'Org', Value: ORG_NAME }, + { Key: 'Owner', Value: REPO_NAME }, + { Key: 'Type', Value: 'Repo' }, { Key: 'Application', Value: 'github-action-runner' }, ], }, @@ -47,51 +47,53 @@ describe('list instances', () => { }); it('returns a list of instances', async () => { - const resp = await listRunners(); + const resp = await listEC2Runners(); expect(resp.length).toBe(2); expect(resp).toContainEqual({ instanceId: 'i-1234', launchTime: new Date('2020-10-10T14:48:00.000+09:00'), - repo: 'CoderToCat/hello-world', - org: 'CoderToCat', + type: 'Org', + owner: 'CoderToCat', }); expect(resp).toContainEqual({ instanceId: 'i-5678', launchTime: new Date('2020-10-11T14:48:00.000+09:00'), - repo: REPO_NAME, - org: ORG_NAME, + type: 'Repo', + owner: REPO_NAME, }); }); it('calls EC2 describe instances', async () => { - await listRunners(); + await listEC2Runners(); expect(mockEC2.describeInstances).toBeCalled(); }); it('filters instances on repo name', async () => { - await listRunners({ runnerType: 'Repo', runnerOwner: REPO_NAME, environment: undefined }); + await listEC2Runners({ runnerType: 'Repo', runnerOwner: REPO_NAME, environment: undefined }); expect(mockEC2.describeInstances).toBeCalledWith({ Filters: [ { Name: 'tag:Application', Values: ['github-action-runner'] }, { Name: 'instance-state-name', Values: ['running', 'pending'] }, - { Name: 'tag:Repo', Values: [REPO_NAME] }, + { Name: 'tag:Type', Values: ['Repo'] }, + { Name: 'tag:Owner', Values: [REPO_NAME] }, ], }); }); it('filters instances on org name', async () => { - await listRunners({ runnerType: 'Org', runnerOwner: ORG_NAME, environment: undefined }); + await listEC2Runners({ runnerType: 'Org', runnerOwner: ORG_NAME, environment: undefined }); expect(mockEC2.describeInstances).toBeCalledWith({ Filters: [ { Name: 'tag:Application', Values: ['github-action-runner'] }, { Name: 'instance-state-name', Values: ['running', 'pending'] }, - { Name: 'tag:Org', Values: [ORG_NAME] }, + { Name: 'tag:Type', Values: ['Org'] }, + { Name: 'tag:Owner', Values: [ORG_NAME] }, ], }); }); - it('filters instances on org name', async () => { - await listRunners({ environment: ENVIRONMENT }); + it('filters instances on environment', async () => { + await listEC2Runners({ environment: ENVIRONMENT }); expect(mockEC2.describeInstances).toBeCalledWith({ Filters: [ { Name: 'tag:Application', Values: ['github-action-runner'] }, @@ -112,8 +114,10 @@ describe('terminate runner', () => { it('calls terminate instances with the right instance ids', async () => { const runner: RunnerInfo = { instanceId: 'instance-2', + owner: 'owner-2', + type: 'Repo', }; - await terminateRunner(runner); + await terminateRunner(runner.instanceId); expect(mockEC2.terminateInstances).toBeCalledWith({ InstanceIds: [runner.instanceId] }); }); @@ -156,7 +160,8 @@ describe('create runner', () => { ResourceType: 'instance', Tags: [ { Key: 'Application', Value: 'github-action-runner' }, - { Key: 'Repo', Value: REPO_NAME }, + { Key: 'Type', Value: 'Repo' }, + { Key: 'Owner', Value: REPO_NAME }, ], }, ], @@ -183,7 +188,8 @@ describe('create runner', () => { ResourceType: 'instance', Tags: [ { Key: 'Application', Value: 'github-action-runner' }, - { Key: 'Org', Value: ORG_NAME }, + { Key: 'Type', Value: 'Org' }, + { Key: 'Owner', Value: ORG_NAME }, ], }, ], diff --git a/modules/runners/lambdas/runners/src/scale-runners/runners.ts b/modules/runners/lambdas/runners/src/scale-runners/runners.ts index bbad2a498e..e3892c1dab 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/runners.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/runners.ts @@ -1,12 +1,21 @@ import { EC2, SSM } from 'aws-sdk'; -export interface RunnerInfo { +export interface RunnerList { instanceId: string; launchTime?: Date; + owner?: string; + type?: string; repo?: string; org?: string; } +export interface RunnerInfo { + instanceId: string; + launchTime?: Date; + owner: string; + type: string; +} + export interface ListRunnerFilters { runnerType?: 'Org' | 'Repo'; runnerOwner?: string; @@ -20,7 +29,7 @@ export interface RunnerInputParameters { runnerOwner: string; } -export async function listRunners(filters: ListRunnerFilters | undefined = undefined): Promise { +export async function listEC2Runners(filters: ListRunnerFilters | undefined = undefined): Promise { const ec2 = new EC2(); const ec2Filters = [ { Name: 'tag:Application', Values: ['github-action-runner'] }, @@ -31,11 +40,12 @@ export async function listRunners(filters: ListRunnerFilters | undefined = undef ec2Filters.push({ Name: 'tag:Environment', Values: [filters.environment] }); } if (filters.runnerType && filters.runnerOwner) { - ec2Filters.push({ Name: `tag:${filters.runnerType}`, Values: [filters.runnerOwner] }); + ec2Filters.push({ Name: `tag:Type`, Values: [filters.runnerType] }); + ec2Filters.push({ Name: `tag:Owner`, Values: [filters.runnerOwner] }); } } const runningInstances = await ec2.describeInstances({ Filters: ec2Filters }).promise(); - const runners: RunnerInfo[] = []; + const runners: RunnerList[] = []; if (runningInstances.Reservations) { for (const r of runningInstances.Reservations) { if (r.Instances) { @@ -43,8 +53,10 @@ export async function listRunners(filters: ListRunnerFilters | undefined = undef runners.push({ instanceId: i.InstanceId as string, launchTime: i.LaunchTime, - repo: i.Tags?.find((e) => e.Key === 'Repo')?.Value, - org: i.Tags?.find((e) => e.Key === 'Org')?.Value, + owner: i.Tags?.find((e) => e.Key === 'Owner')?.Value as string, + type: i.Tags?.find((e) => e.Key === 'Type')?.Value as string, + repo: i.Tags?.find((e) => e.Key === 'Repo')?.Value as string, + org: i.Tags?.find((e) => e.Key === 'Org')?.Value as string, }); } } @@ -53,14 +65,14 @@ export async function listRunners(filters: ListRunnerFilters | undefined = undef return runners; } -export async function terminateRunner(runner: RunnerInfo): Promise { +export async function terminateRunner(instanceId: string): Promise { const ec2 = new EC2(); await ec2 .terminateInstances({ - InstanceIds: [runner.instanceId], + InstanceIds: [instanceId], }) .promise(); - console.debug('Runner terminated.' + runner.instanceId); + console.debug(`Runner ${instanceId} has been terminated.`); } export async function createRunner(runnerParameters: RunnerInputParameters, launchTemplateName: string): Promise { @@ -99,10 +111,8 @@ function getInstanceParams( ResourceType: 'instance', Tags: [ { Key: 'Application', Value: 'github-action-runner' }, - { - Key: runnerParameters.runnerType, - Value: runnerParameters.runnerOwner, - }, + { Key: 'Type', Value: runnerParameters.runnerType }, + { Key: 'Owner', Value: runnerParameters.runnerOwner }, ], }, ], diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts index 2dcc865da2..72616ee033 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts @@ -1,10 +1,11 @@ import moment from 'moment'; import { mocked } from 'ts-jest/utils'; -import { listRunners, terminateRunner } from './runners'; +import { listEC2Runners, terminateRunner, RunnerInfo, RunnerList } from './runners'; import { scaleDown } from './scale-down'; import * as ghAuth from './gh-auth'; import nock from 'nock'; import { Octokit } from '@octokit/rest'; +import { githubCache } from './cache'; const mockOctokit = { apps: { @@ -25,75 +26,124 @@ jest.mock('@octokit/rest', () => ({ jest.mock('./runners'); jest.mock('./gh-auth'); +jest.mock('./cache'); const mocktokit = Octokit as jest.MockedClass; const mockedAppAuth = mocked(ghAuth.createGithubAppAuth, true); const mockedInstallationAuth = mocked(ghAuth.createGithubInstallationAuth, true); const mockCreateClient = mocked(ghAuth.createOctoClient, true); +const mockListRunners = mocked(listEC2Runners); export interface TestData { repositoryName: string; repositoryOwner: string; } +const cleanEnv = process.env; + const environment = 'unit-test-environment'; const minimumRunningTimeInMinutes = 15; +const runnerBootTimeInMinutes = 5; const TEST_DATA: TestData = { repositoryName: 'hello-world', repositoryOwner: 'Codertocat', }; -const DEFAULT_RUNNERS = [ +let DEFAULT_RUNNERS: RunnerList[]; +let RUNNERS_ALL_REMOVED: RunnerInfo[]; +let DEFAULT_RUNNERS_REPO_TO_BE_REMOVED: RunnerInfo[]; +let RUNNERS_ORG_TO_BE_REMOVED_WITH_AUTO_SCALING_CONFIG: RunnerInfo[]; +let RUNNERS_REPO_WITH_AUTO_SCALING_CONFIG: RunnerInfo[]; +let RUNNERS_ORG_WITH_AUTO_SCALING_CONFIG: RunnerInfo[]; +let DEFAULT_RUNNERS_REPO: RunnerInfo[]; +let DEFAULT_RUNNERS_ORG: RunnerInfo[]; +let DEFAULT_RUNNERS_ORG_TO_BE_REMOVED: RunnerInfo[]; +let DEFAULT_RUNNERS_ORPHANED: RunnerInfo[]; +let DEFAULT_REPO_RUNNERS_ORPHANED: RunnerInfo[]; +let DEFAULT_ORG_RUNNERS_ORPHANED: RunnerInfo[]; +const DEFAULT_RUNNERS_ORIGINAL = [ { instanceId: 'i-idle-101', launchTime: moment(new Date()) .subtract(minimumRunningTimeInMinutes + 5, 'minutes') .toDate(), - repo: `${TEST_DATA.repositoryOwner}/${TEST_DATA.repositoryName}`, - org: undefined, + type: 'Repo', + owner: `${TEST_DATA.repositoryOwner}/${TEST_DATA.repositoryName}`, + }, + { + instanceId: 'i-idle-102', + launchTime: moment(new Date()) + .subtract(minimumRunningTimeInMinutes + 3, 'minutes') + .toDate(), + type: 'Org', + owner: TEST_DATA.repositoryOwner, }, { - instanceId: 'i-oldest-idle-102', + instanceId: 'i-oldest-idle-103', launchTime: moment(new Date()) .subtract(minimumRunningTimeInMinutes + 27, 'minutes') .toDate(), - repo: `${TEST_DATA.repositoryOwner}/${TEST_DATA.repositoryName}`, - org: undefined, + type: 'Repo', + owner: `${TEST_DATA.repositoryOwner}/${TEST_DATA.repositoryName}`, + }, + { + instanceId: 'i-oldest-idle-104', + launchTime: moment(new Date()) + .subtract(minimumRunningTimeInMinutes + 27, 'minutes') + .toDate(), + type: 'Org', + owner: TEST_DATA.repositoryOwner, + }, + { + instanceId: 'i-running-105', + launchTime: moment(new Date()).subtract(25, 'minutes').toDate(), + type: 'Repo', + owner: `doe/another-repo`, }, { - instanceId: 'i-running-103', + instanceId: 'i-running-106', launchTime: moment(new Date()).subtract(25, 'minutes').toDate(), - repo: `doe/another-repo`, - org: undefined, + type: 'Org', + owner: TEST_DATA.repositoryOwner, }, { - instanceId: 'i-orphan-104', + instanceId: 'i-orphan-107', launchTime: moment(new Date()) .subtract(minimumRunningTimeInMinutes + 5, 'minutes') .toDate(), - repo: `doe/another-repo`, - org: undefined, + type: 'Repo', + owner: `doe/another-repo`, }, { - instanceId: 'i-not-registered-105', + instanceId: 'i-not-registered-108', launchTime: moment(new Date()) .subtract(minimumRunningTimeInMinutes - 1, 'minutes') .toDate(), - repo: `doe/another-repo`, - org: undefined, + type: 'Repo', + owner: `doe/another-repo`, + }, + { + instanceId: 'i-not-registered-109', + launchTime: moment(new Date()) + .subtract(minimumRunningTimeInMinutes - 2, 'minutes') + .toDate(), + type: 'Org', + owner: TEST_DATA.repositoryOwner, + }, + { + instanceId: 'i-legacy-110', + launchTime: moment(new Date()) + .subtract(minimumRunningTimeInMinutes + 5, 'minutes') + .toDate(), + repo: `${TEST_DATA.repositoryOwner}/${TEST_DATA.repositoryName}`, + }, + { + instanceId: 'i-new-111', + launchTime: moment(new Date()).toDate(), + repo: `${TEST_DATA.repositoryOwner}/${TEST_DATA.repositoryName}`, }, ]; -const DEFAULT_RUNNERS_TO_BE_REMOVED = DEFAULT_RUNNERS.filter( - (r) => r.instanceId.includes('idle') || r.instanceId.includes('orphan'), -); - -const RUNNERS_WITH_AUTO_SCALING_CONFIG = DEFAULT_RUNNERS.filter( - (r) => r.instanceId.includes('idle') || r.instanceId.includes('running'), -); - -const RUNNERS_TO_BE_REMOVED_WITH_AUTO_SCALING_CONFIG = DEFAULT_RUNNERS.filter((r) => r.instanceId.includes('oldest')); - const DEFAULT_REGISTERED_RUNNERS = [ { id: 101, @@ -101,25 +151,43 @@ const DEFAULT_REGISTERED_RUNNERS = [ }, { id: 102, - name: 'i-oldest-idle-102', + name: 'i-idle-102', }, { id: 103, - name: 'i-running-103', + name: 'i-oldest-idle-103', + }, + { + id: 104, + name: 'i-oldest-idle-104', + }, + { + id: 105, + name: 'i-running-105', + }, + { + id: 106, + name: 'i-running-106', }, ]; describe('scaleDown', () => { beforeEach(() => { + process.env = { ...cleanEnv }; process.env.GITHUB_APP_KEY_BASE64 = 'TEST_CERTIFICATE_DATA'; process.env.GITHUB_APP_ID = '1337'; process.env.GITHUB_APP_CLIENT_ID = 'TEST_CLIENT_ID'; process.env.GITHUB_APP_CLIENT_SECRET = 'TEST_CLIENT_SECRET'; process.env.RUNNERS_MAXIMUM_COUNT = '3'; + process.env.SCALE_DOWN_CONFIG = '[]'; process.env.ENVIRONMENT = environment; process.env.MINIMUM_RUNNING_TIME_IN_MINUTES = minimumRunningTimeInMinutes.toString(); + process.env.RUNNER_BOOT_TIME_IN_MINUTES = runnerBootTimeInMinutes.toString(); nock.disableNetConnect(); jest.clearAllMocks(); + jest.resetModules(); + githubCache.clients.clear(); + githubCache.runners.clear(); mockOctokit.apps.getOrgInstallation.mockImplementation(() => ({ data: { id: 'ORG', @@ -131,377 +199,300 @@ describe('scaleDown', () => { }, })); - mockOctokit.paginate.mockImplementation(() => { - return DEFAULT_REGISTERED_RUNNERS; - }); - + mockOctokit.paginate.mockResolvedValue(DEFAULT_REGISTERED_RUNNERS); mockOctokit.actions.deleteSelfHostedRunnerFromRepo.mockImplementation((repo) => { - if (repo.runner_id === 103) { + if (repo.runner_id === 105) { throw Error(); } else { return { status: 204 }; } }); mockOctokit.actions.deleteSelfHostedRunnerFromOrg.mockImplementation((repo) => { - return repo.runner_id === 103 ? { status: 500 } : { status: 204 }; + if (repo.runner_id === 106) { + throw Error(); + } else { + return { status: 204 }; + } }); const mockTerminateRunners = mocked(terminateRunner); mockTerminateRunners.mockImplementation(async () => { return; }); + mockedAppAuth.mockResolvedValue({ + type: 'app', + token: 'token', + appId: 1, + expiresAt: 'some-date', + }); + mockedInstallationAuth.mockResolvedValue({ + type: 'token', + tokenType: 'installation', + token: 'token', + createdAt: 'some-date', + expiresAt: 'some-date', + permissions: {}, + repositorySelection: 'all', + }); + mockCreateClient.mockResolvedValue(new mocktokit()); + DEFAULT_RUNNERS = JSON.parse(JSON.stringify(DEFAULT_RUNNERS_ORIGINAL)); + DEFAULT_RUNNERS_REPO = DEFAULT_RUNNERS.filter((r) => r.type === 'Repo') as RunnerInfo[]; + DEFAULT_RUNNERS_ORG = DEFAULT_RUNNERS.filter((r) => r.type === 'Org') as RunnerInfo[]; + DEFAULT_RUNNERS_REPO_TO_BE_REMOVED = DEFAULT_RUNNERS_REPO.filter( + (r) => r.instanceId.includes('idle') || r.instanceId.includes('orphan'), + ); + DEFAULT_RUNNERS_ORG_TO_BE_REMOVED = DEFAULT_RUNNERS_ORG.filter( + (r) => r.instanceId.includes('idle') || r.instanceId.includes('orphan'), + ); + + RUNNERS_REPO_WITH_AUTO_SCALING_CONFIG = DEFAULT_RUNNERS_REPO.filter( + (r) => r.instanceId.includes('idle') || r.instanceId.includes('running'), + ); + + RUNNERS_ORG_WITH_AUTO_SCALING_CONFIG = DEFAULT_RUNNERS_ORG.filter( + (r) => r.instanceId.includes('idle') || r.instanceId.includes('running'), + ); + + RUNNERS_ORG_TO_BE_REMOVED_WITH_AUTO_SCALING_CONFIG = DEFAULT_RUNNERS_ORG.filter((r) => + r.instanceId.includes('oldest'), + ); + + RUNNERS_ALL_REMOVED = DEFAULT_RUNNERS_ORG.filter( + (r) => !r.instanceId.includes('running') && !r.instanceId.includes('registered'), + ); + DEFAULT_RUNNERS_ORPHANED = DEFAULT_RUNNERS_ORIGINAL.filter( + (r) => r.instanceId.includes('orphan') && !r.instanceId.includes('not-registered'), + ) as RunnerInfo[]; + DEFAULT_REPO_RUNNERS_ORPHANED = DEFAULT_RUNNERS_REPO.filter( + (r) => r.instanceId.includes('orphan') && !r.instanceId.includes('not-registered'), + ); + DEFAULT_ORG_RUNNERS_ORPHANED = DEFAULT_RUNNERS_ORG.filter( + (r) => r.instanceId.includes('orphan') && !r.instanceId.includes('not-registered'), + ); }); - describe('no runners running', () => { - beforeAll(() => { - mockedAppAuth.mockResolvedValue({ - type: 'app', - token: 'token', - appId: 1, - expiresAt: 'some-date', - }); - mockedInstallationAuth.mockResolvedValue({ - type: 'token', - tokenType: 'installation', - token: 'token', - createdAt: 'some-date', - expiresAt: 'some-date', - permissions: {}, - repositorySelection: 'all', - }); - mockCreateClient.mockResolvedValue(new mocktokit()); - const mockListRunners = mocked(listRunners); - mockListRunners.mockImplementation(async () => []); - }); - - it('No runners for repo.', async () => { - process.env.ENABLE_ORGANIZATION_RUNNERS = 'false'; - process.env.SCALE_DOWN_CONFIG = '[]'; - await scaleDown(); - expect(listRunners).toBeCalledWith({ - environment: environment, - }); - expect(terminateRunner).not.toBeCalled(); - expect(mockOctokit.apps.getRepoInstallation).not.toBeCalled(); - }); - - it('No runners for org.', async () => { - process.env.ENABLE_ORGANIZATION_RUNNERS = 'true'; - await scaleDown(); - expect(listRunners).toBeCalledWith({ - environment: environment, + describe('github.com', () => { + describe('no runners running', () => { + beforeEach(() => { + mockListRunners.mockResolvedValue([]); }); - expect(terminateRunner).not.toBeCalled(); - expect(mockOctokit.apps.getRepoInstallation).not.toBeCalled(); - }); - }); - describe('on repo level', () => { - beforeAll(() => { - process.env.ENABLE_ORGANIZATION_RUNNERS = 'false'; - process.env.SCALE_DOWN_CONFIG = '[]'; - const mockListRunners = mocked(listRunners); - mockListRunners.mockImplementation(async () => { - return DEFAULT_RUNNERS; + it('No runners online', async () => { + await scaleDown(); + expect(listEC2Runners).toBeCalledWith({ + environment: environment, + }); + expect(terminateRunner).not; + expect(mockOctokit.apps.getRepoInstallation).not; + expect(mockOctokit.apps.getRepoInstallation).not; }); }); - it('Terminate 3 of 5 runners for repo.', async () => { + it('Terminates 3 of 5 runners owned by repos and all orphaned', async () => { + mockListRunners.mockResolvedValue(DEFAULT_RUNNERS_REPO); await scaleDown(); - expect(listRunners).toBeCalledWith({ + expect(listEC2Runners).toBeCalledWith({ environment: environment, }); expect(mockOctokit.apps.getRepoInstallation).toBeCalled(); - expect(terminateRunner).toBeCalledTimes(3); - for (const toTerminate of DEFAULT_RUNNERS_TO_BE_REMOVED) { - expect(terminateRunner).toHaveBeenCalledWith(toTerminate); - } - }); - }); - describe('on org level', () => { - beforeAll(() => { - process.env.ENABLE_ORGANIZATION_RUNNERS = 'true'; - process.env.SCALE_DOWN_CONFIG = '[]'; - const mockListRunners = mocked(listRunners); - mockListRunners.mockImplementation(async () => { - return DEFAULT_RUNNERS; - }); + expect(terminateRunner).toBeCalledTimes(4); + for (const toTerminate of DEFAULT_RUNNERS_REPO_TO_BE_REMOVED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); + } + for (const toTerminate of DEFAULT_REPO_RUNNERS_ORPHANED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); + } }); - it('Terminate 3 of 5 runners for org.', async () => { + it('Terminates 2 of 3 runners owned by orgs and all orphaned', async () => { + mockListRunners.mockResolvedValue(DEFAULT_RUNNERS_ORG); await scaleDown(); - expect(listRunners).toBeCalledWith({ + expect(listEC2Runners).toBeCalledWith({ environment: environment, }); expect(mockOctokit.apps.getOrgInstallation).toBeCalled(); expect(terminateRunner).toBeCalledTimes(3); - for (const toTerminate of DEFAULT_RUNNERS_TO_BE_REMOVED) { - expect(terminateRunner).toHaveBeenCalledWith(toTerminate); + for (const toTerminate of DEFAULT_RUNNERS_ORG_TO_BE_REMOVED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); + } + for (const toTerminate of DEFAULT_ORG_RUNNERS_ORPHANED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); } }); - }); - describe('Have runners idle.', () => { - beforeAll(() => { - process.env.SCALE_DOWN_CONFIG = JSON.stringify([ - { - idleCount: 2, - cron: '* * * * * *', - timeZone: 'Europe/Amsterdam', - }, - ]); - - const mockListRunners = mocked(listRunners); - mockListRunners.mockImplementation(async () => { - return RUNNERS_WITH_AUTO_SCALING_CONFIG; + describe('With idle config', () => { + beforeEach(() => { + process.env.SCALE_DOWN_CONFIG = JSON.stringify([ + { + idleCount: 2, + cron: '* * * * * *', + timeZone: 'Europe/Amsterdam', + }, + ]); }); - }); - it('Terminate 1 of runners for org.', async () => { - process.env.ENABLE_ORGANIZATION_RUNNERS = 'true'; - await scaleDown(); + it('Terminates 1 runner owned by orgs', async () => { + mockListRunners.mockResolvedValue(RUNNERS_ORG_WITH_AUTO_SCALING_CONFIG); + await scaleDown(); - expect(listRunners).toBeCalledWith({ - environment: environment, + expect(listEC2Runners).toBeCalledWith({ + environment: environment, + }); + + expect(mockOctokit.apps.getOrgInstallation).toBeCalled(); + expect(terminateRunner).toBeCalledTimes(1); + for (const toTerminate of RUNNERS_ORG_TO_BE_REMOVED_WITH_AUTO_SCALING_CONFIG) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); + } }); - expect(mockOctokit.apps.getOrgInstallation).toBeCalled(); - expect(terminateRunner).toBeCalledTimes(1); - for (const toTerminate of RUNNERS_TO_BE_REMOVED_WITH_AUTO_SCALING_CONFIG) { - expect(terminateRunner).toHaveBeenCalledWith(toTerminate); - } + it('Terminates 0 runners owned by repos', async () => { + mockListRunners.mockResolvedValue(RUNNERS_REPO_WITH_AUTO_SCALING_CONFIG); + process.env.ENABLE_ORGANIZATION_RUNNERS = 'false'; + await scaleDown(); + + expect(listEC2Runners).toBeCalledWith({ + environment: environment, + }); + + expect(mockOctokit.apps.getRepoInstallation).toBeCalled(); + expect(terminateRunner).not.toBeCalled(); + }); }); - it('Terminate 1 of runners for repo.', async () => { - process.env.ENABLE_ORGANIZATION_RUNNERS = 'false'; + it('Terminates 6 runners amongst all owners and all orphaned', async () => { + mockListRunners.mockResolvedValue(DEFAULT_RUNNERS); await scaleDown(); - expect(listRunners).toBeCalledWith({ + expect(listEC2Runners).toBeCalledWith({ environment: environment, }); - expect(mockOctokit.apps.getRepoInstallation).toBeCalled(); - expect(terminateRunner).toBeCalledTimes(1); - for (const toTerminate of RUNNERS_TO_BE_REMOVED_WITH_AUTO_SCALING_CONFIG) { - expect(terminateRunner).toHaveBeenCalledWith(toTerminate); + expect(mockOctokit.apps.getRepoInstallation).toBeCalledTimes(2); + expect(mockOctokit.apps.getOrgInstallation).toBeCalledTimes(1); + expect(terminateRunner).toBeCalledTimes(8); + for (const toTerminate of RUNNERS_ALL_REMOVED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); } - }); - }); -}); - -describe('scaleDown ghes', () => { - beforeEach(() => { - process.env.GITHUB_APP_KEY_BASE64 = 'TEST_CERTIFICATE_DATA'; - process.env.GITHUB_APP_ID = '1337'; - process.env.GITHUB_APP_CLIENT_ID = 'TEST_CLIENT_ID'; - process.env.GITHUB_APP_CLIENT_SECRET = 'TEST_CLIENT_SECRET'; - process.env.RUNNERS_MAXIMUM_COUNT = '3'; - process.env.ENVIRONMENT = environment; - process.env.MINIMUM_RUNNING_TIME_IN_MINUTES = minimumRunningTimeInMinutes.toString(); - process.env.GHES_URL = 'https://github.enterprise.something'; - jest.clearAllMocks(); - mockOctokit.apps.getOrgInstallation.mockImplementation(() => ({ - data: { - id: 'ORG', - }, - })); - mockOctokit.apps.getRepoInstallation.mockImplementation(() => ({ - data: { - id: 'REPO', - }, - })); - - mockOctokit.paginate.mockImplementation(() => { - return DEFAULT_REGISTERED_RUNNERS; - }); - - mockOctokit.actions.deleteSelfHostedRunnerFromRepo.mockImplementation((repo) => { - if (repo.runner_id === 103) { - throw Error(); - } else { - return { status: 204 }; + for (const toTerminate of DEFAULT_RUNNERS_ORPHANED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); } }); - mockOctokit.actions.deleteSelfHostedRunnerFromOrg.mockImplementation((repo) => { - return repo.runner_id === 103 ? { status: 500 } : { status: 204 }; - }); - - const mockTerminateRunners = mocked(terminateRunner); - mockTerminateRunners.mockImplementation(async () => { - return; - }); }); - describe('no runners running', () => { - beforeAll(() => { - const mockListRunners = mocked(listRunners); - mockListRunners.mockImplementation(async () => []); + describe('ghes', () => { + beforeEach(() => { + process.env.GHES_URL = 'https://github.enterprise.something'; }); - - it('No runners for repo.', async () => { - process.env.ENABLE_ORGANIZATION_RUNNERS = 'false'; - process.env.SCALE_DOWN_CONFIG = '[]'; - await scaleDown(); - expect(listRunners).toBeCalledWith({ - environment: environment, - }); - expect(terminateRunner).not.toBeCalled(); - expect(mockOctokit.apps.getRepoInstallation).not.toBeCalled(); - }); - - it('No runners for org.', async () => { - process.env.ENABLE_ORGANIZATION_RUNNERS = 'true'; - await scaleDown(); - expect(listRunners).toBeCalledWith({ - environment: environment, + describe('no runners running', () => { + beforeEach(() => { + mockListRunners.mockResolvedValue([]); }); - expect(terminateRunner).not.toBeCalled(); - expect(mockOctokit.apps.getRepoInstallation).not.toBeCalled(); - }); - }); - describe('on repo level', () => { - beforeAll(() => { - process.env.ENABLE_ORGANIZATION_RUNNERS = 'false'; - process.env.SCALE_DOWN_CONFIG = '[]'; - const mockListRunners = mocked(listRunners); - mockListRunners.mockImplementation(async () => { - return DEFAULT_RUNNERS; + it('No runners online', async () => { + await scaleDown(); + expect(listEC2Runners).toBeCalledWith({ + environment: environment, + }); + expect(terminateRunner).not; + expect(mockOctokit.apps.getRepoInstallation).not; + expect(mockOctokit.apps.getRepoInstallation).not; }); }); - it('Terminate 3 of 5 runners for repo.', async () => { + it('Terminates 3 of 5 runners owned by repos and all orphaned', async () => { + mockListRunners.mockResolvedValue(DEFAULT_RUNNERS_REPO); await scaleDown(); - expect(listRunners).toBeCalledWith({ + expect(listEC2Runners).toBeCalledWith({ environment: environment, }); expect(mockOctokit.apps.getRepoInstallation).toBeCalled(); - expect(terminateRunner).toBeCalledTimes(3); - for (const toTerminate of DEFAULT_RUNNERS_TO_BE_REMOVED) { - expect(terminateRunner).toHaveBeenCalledWith(toTerminate); + expect(terminateRunner).toBeCalledTimes(4); + for (const toTerminate of DEFAULT_RUNNERS_REPO_TO_BE_REMOVED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); + } + for (const toTerminate of DEFAULT_REPO_RUNNERS_ORPHANED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); } - }); - }); - - describe('on org level', () => { - beforeAll(() => { - process.env.ENABLE_ORGANIZATION_RUNNERS = 'true'; - process.env.SCALE_DOWN_CONFIG = '[]'; - const mockListRunners = mocked(listRunners); - mockListRunners.mockImplementation(async () => { - return DEFAULT_RUNNERS; - }); }); - it('Terminate 3 of 5 runners for org.', async () => { + it('Terminates 2 of 3 runners owned by orgs and all orphaned', async () => { + mockListRunners.mockResolvedValue(DEFAULT_RUNNERS_ORG); await scaleDown(); - expect(listRunners).toBeCalledWith({ + expect(listEC2Runners).toBeCalledWith({ environment: environment, }); expect(mockOctokit.apps.getOrgInstallation).toBeCalled(); expect(terminateRunner).toBeCalledTimes(3); - for (const toTerminate of DEFAULT_RUNNERS_TO_BE_REMOVED) { - expect(terminateRunner).toHaveBeenCalledWith(toTerminate); + for (const toTerminate of DEFAULT_RUNNERS_ORG_TO_BE_REMOVED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); + } + for (const toTerminate of DEFAULT_ORG_RUNNERS_ORPHANED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); } }); - }); - describe('on repo level', () => { - beforeAll(() => { - process.env.ENABLE_ORGANIZATION_RUNNERS = 'false'; - process.env.SCALE_DOWN_CONFIG = '[]'; - const mockListRunners = mocked(listRunners); - mockListRunners.mockImplementation(async () => { - return DEFAULT_RUNNERS; + describe('With idle config', () => { + beforeEach(() => { + process.env.SCALE_DOWN_CONFIG = JSON.stringify([ + { + idleCount: 2, + cron: '* * * * * *', + timeZone: 'Europe/Amsterdam', + }, + ]); }); - }); - it('Terminate 3 of 5 runners for repo.', async () => { - await scaleDown(); - expect(listRunners).toBeCalledWith({ - environment: environment, - }); + it('Terminates 1 runner owned by orgs', async () => { + mockListRunners.mockResolvedValue(RUNNERS_ORG_WITH_AUTO_SCALING_CONFIG); + await scaleDown(); - expect(mockOctokit.apps.getRepoInstallation).toBeCalled(); - expect(terminateRunner).toBeCalledTimes(3); - for (const toTerminate of DEFAULT_RUNNERS_TO_BE_REMOVED) { - expect(terminateRunner).toHaveBeenCalledWith(toTerminate); - } - }); - }); + expect(listEC2Runners).toBeCalledWith({ + environment: environment, + }); - describe('on org level', () => { - beforeAll(() => { - process.env.ENABLE_ORGANIZATION_RUNNERS = 'true'; - process.env.SCALE_DOWN_CONFIG = '[]'; - const mockListRunners = mocked(listRunners); - mockListRunners.mockImplementation(async () => { - return DEFAULT_RUNNERS; + expect(mockOctokit.apps.getOrgInstallation).toBeCalled(); + expect(terminateRunner).toBeCalledTimes(1); + for (const toTerminate of RUNNERS_ORG_TO_BE_REMOVED_WITH_AUTO_SCALING_CONFIG) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); + } }); - }); - it('Terminate 3 of 5 runners for org.', async () => { - await scaleDown(); - expect(listRunners).toBeCalledWith({ - environment: environment, - }); + it('Terminates 0 runners owned by repos', async () => { + mockListRunners.mockResolvedValue(RUNNERS_REPO_WITH_AUTO_SCALING_CONFIG); + process.env.ENABLE_ORGANIZATION_RUNNERS = 'false'; + await scaleDown(); - expect(mockOctokit.apps.getOrgInstallation).toBeCalled(); - expect(terminateRunner).toBeCalledTimes(3); - for (const toTerminate of DEFAULT_RUNNERS_TO_BE_REMOVED) { - expect(terminateRunner).toHaveBeenCalledWith(toTerminate); - } - }); - }); + expect(listEC2Runners).toBeCalledWith({ + environment: environment, + }); - describe('Have runners idle.', () => { - beforeAll(() => { - process.env.SCALE_DOWN_CONFIG = JSON.stringify([ - { - idleCount: 2, - cron: '* * * * * *', - timeZone: 'Europe/Amsterdam', - }, - ]); - - const mockListRunners = mocked(listRunners); - mockListRunners.mockImplementation(async () => { - return RUNNERS_WITH_AUTO_SCALING_CONFIG; + expect(mockOctokit.apps.getRepoInstallation).toBeCalled(); + expect(terminateRunner).not.toBeCalled(); }); }); - it('Terminate 1 of runners for org.', async () => { - process.env.ENABLE_ORGANIZATION_RUNNERS = 'true'; + it('Terminates 6 runners amongst all owners and all orphaned', async () => { + mockListRunners.mockResolvedValue(DEFAULT_RUNNERS); await scaleDown(); - expect(listRunners).toBeCalledWith({ + expect(listEC2Runners).toBeCalledWith({ environment: environment, }); - expect(mockOctokit.apps.getOrgInstallation).toBeCalled(); - expect(terminateRunner).toBeCalledTimes(1); - for (const toTerminate of RUNNERS_TO_BE_REMOVED_WITH_AUTO_SCALING_CONFIG) { - expect(terminateRunner).toHaveBeenCalledWith(toTerminate); + expect(mockOctokit.apps.getRepoInstallation).toBeCalledTimes(2); + expect(mockOctokit.apps.getOrgInstallation).toBeCalledTimes(1); + expect(terminateRunner).toBeCalledTimes(8); + for (const toTerminate of RUNNERS_ALL_REMOVED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); } - }); - - it('Terminate 1 of runners for repo.', async () => { - process.env.ENABLE_ORGANIZATION_RUNNERS = 'false'; - await scaleDown(); - - expect(listRunners).toBeCalledWith({ - environment: environment, - }); - - expect(mockOctokit.apps.getRepoInstallation).toBeCalled(); - expect(terminateRunner).toBeCalledTimes(1); - for (const toTerminate of RUNNERS_TO_BE_REMOVED_WITH_AUTO_SCALING_CONFIG) { - expect(terminateRunner).toHaveBeenCalledWith(toTerminate); + for (const toTerminate of DEFAULT_RUNNERS_ORPHANED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); } }); }); diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts index a4a5b4b067..f05c8317e0 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts @@ -1,143 +1,156 @@ import { Octokit } from '@octokit/rest'; import moment from 'moment'; -import yn from 'yn'; -import { listRunners, RunnerInfo, terminateRunner } from './runners'; +import { listEC2Runners, RunnerInfo, RunnerList, terminateRunner } from './runners'; import { getIdleRunnerCount, ScalingDownConfig } from './scale-down-config'; import { createOctoClient, createGithubAppAuth, createGithubInstallationAuth } from './gh-auth'; +import { githubCache, GhRunners } from './cache'; -interface Repo { - repoName: string; - repoOwner: string; -} - -function getRepo(runner: RunnerInfo, orgLevel: boolean): Repo { - return orgLevel - ? { repoOwner: runner.org as string, repoName: '' } - : { repoOwner: runner.repo?.split('/')[0] as string, repoName: runner.repo?.split('/')[1] as string }; -} - -function createGitHubClientForRunnerFactory(): (runner: RunnerInfo, orgLevel: boolean) => Promise { - const cache: Map = new Map(); +async function getOrCreateOctokit(runner: RunnerInfo): Promise { + const key = runner.owner; + const cachedOctokit = githubCache.clients.get(key); - return async (runner: RunnerInfo, orgLevel: boolean) => { - const repo = getRepo(runner, orgLevel); - const key = orgLevel ? repo.repoOwner : repo.repoOwner + repo.repoName; - const cachedOctokit = cache.get(key); + if (cachedOctokit) { + console.debug(`[createGitHubClientForRunner] Cache hit for ${key}`); + return cachedOctokit; + } - if (cachedOctokit) { - console.debug(`[createGitHubClientForRunner] Cache hit for ${key}`); - return cachedOctokit; - } + console.debug(`[createGitHubClientForRunner] Cache miss for ${key}`); + const ghesBaseUrl = process.env.GHES_URL; + let ghesApiUrl = ''; + if (ghesBaseUrl) { + ghesApiUrl = `${ghesBaseUrl}/api/v3`; + } + const ghAuthPre = await createGithubAppAuth(undefined, ghesApiUrl); + const githubClientPre = await createOctoClient(ghAuthPre.token, ghesApiUrl); - console.debug(`[createGitHubClientForRunner] Cache miss for ${key}`); - const ghesBaseUrl = process.env.GHES_URL as string; - let ghesApiUrl = ''; - if (ghesBaseUrl) { - ghesApiUrl = `${ghesBaseUrl}/api/v3`; - } - const ghAuth = await createGithubAppAuth(undefined, ghesApiUrl); - const githubClient = await createOctoClient(ghAuth.token, ghesApiUrl); - const installationId = orgLevel + const installationId = + runner.type === 'Org' ? ( - await githubClient.apps.getOrgInstallation({ - org: repo.repoOwner, + await githubClientPre.apps.getOrgInstallation({ + org: runner.owner, }) ).data.id : ( - await githubClient.apps.getRepoInstallation({ - owner: repo.repoOwner, - repo: repo.repoName, + await githubClientPre.apps.getRepoInstallation({ + owner: runner.owner.split('/')[0], + repo: runner.owner.split('/')[1], }) ).data.id; - const ghAuth2 = await createGithubInstallationAuth(installationId, ghesApiUrl); - const octokit = await createOctoClient(ghAuth2.token, ghesApiUrl); - cache.set(key, octokit); + const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl); + const octokit = await createOctoClient(ghAuth.token, ghesApiUrl); + githubCache.clients.set(key, octokit); - return octokit; - }; + return octokit; } -/** - * Extract the inner type of a promise if any - */ -export type UnboxPromise = T extends Promise ? U : T; - -type GhRunners = UnboxPromise>['data']['runners']; - -function listGithubRunnersFactory(): ( - client: Octokit, - runner: RunnerInfo, - enableOrgLevel: boolean, -) => Promise { - const cache: Map = new Map(); - return async (client: Octokit, runner: RunnerInfo, enableOrgLevel: boolean) => { - const repo = getRepo(runner, enableOrgLevel); - const key = enableOrgLevel ? repo.repoOwner : repo.repoOwner + repo.repoName; - const cachedRunners = cache.get(key); - if (cachedRunners) { - console.debug(`[listGithubRunners] Cache hit for ${key}`); - return cachedRunners; - } +async function listGitHubRunners(runner: RunnerInfo): Promise { + const key = runner.owner as string; + const cachedRunners = githubCache.runners.get(key); + if (cachedRunners) { + console.debug(`[listGithubRunners] Cache hit for ${key}`); + return cachedRunners; + } - console.debug(`[listGithubRunners] Cache miss for ${key}`); - const runners = enableOrgLevel + const client = await getOrCreateOctokit(runner); + console.debug(`[listGithubRunners] Cache miss for ${key}`); + const runners = + runner.type === 'Org' ? await client.paginate(client.actions.listSelfHostedRunnersForOrg, { - org: repo.repoOwner, + org: runner.owner, }) : await client.paginate(client.actions.listSelfHostedRunnersForRepo, { - owner: repo.repoOwner, - repo: repo.repoName, + owner: runner.owner.split('/')[0], + repo: runner.owner.split('/')[1], }); - cache.set(key, runners); + githubCache.runners.set(key, runners); - return runners; - }; + return runners; } -function runnerMinimumTimeExceeded(runner: RunnerInfo, minimumRunningTimeInMinutes: string): boolean { +function runnerMinimumTimeExceeded(runner: RunnerInfo): boolean { + const minimumRunningTimeInMinutes = process.env.MINIMUM_RUNNING_TIME_IN_MINUTES; const launchTimePlusMinimum = moment(runner.launchTime).utc().add(minimumRunningTimeInMinutes, 'minutes'); const now = moment(new Date()).utc(); return launchTimePlusMinimum < now; } -async function removeRunner( - ec2runner: RunnerInfo, - ghRunnerId: number, - repo: Repo, - enableOrgLevel: boolean, - githubAppClient: Octokit, -): Promise { +function bootTimeExceeded(ec2Runner: RunnerInfo): boolean { + const runnerBootTimeInMinutes = process.env.RUNNER_BOOT_TIME_IN_MINUTES; + const launchTimePlusBootTime = moment(ec2Runner.launchTime).utc().add(runnerBootTimeInMinutes, 'minutes'); + return launchTimePlusBootTime < moment(new Date()).utc(); +} + +async function removeRunner(ec2runner: RunnerInfo, ghRunnerId: number): Promise { + const githubAppClient = await getOrCreateOctokit(ec2runner); try { - const result = enableOrgLevel - ? await githubAppClient.actions.deleteSelfHostedRunnerFromOrg({ - runner_id: ghRunnerId, - org: repo.repoOwner, - }) - : await githubAppClient.actions.deleteSelfHostedRunnerFromRepo({ - runner_id: ghRunnerId, - owner: repo.repoOwner, - repo: repo.repoName, - }); + const result = + ec2runner.type === 'Org' + ? await githubAppClient.actions.deleteSelfHostedRunnerFromOrg({ + runner_id: ghRunnerId, + org: ec2runner.owner, + }) + : await githubAppClient.actions.deleteSelfHostedRunnerFromRepo({ + runner_id: ghRunnerId, + owner: ec2runner.owner.split('/')[0], + repo: ec2runner.owner.split('/')[1], + }); if (result.status == 204) { - await terminateRunner(ec2runner); + await terminateRunner(ec2runner.instanceId); console.info(`AWS runner instance '${ec2runner.instanceId}' is terminated and GitHub runner is de-registered.`); + } else { + console.error(`Failed to de-register GitHub runner: ${result.status}`); } } catch (e) { console.debug(`Runner '${ec2runner.instanceId}' cannot be de-registered, most likely the runner is active.`); } } -export async function scaleDown(): Promise { - const scaleDownConfigs = JSON.parse(process.env.SCALE_DOWN_CONFIG) as [ScalingDownConfig]; - const enableOrgLevel = JSON.parse(process.env.ENABLE_ORGANIZATION_RUNNERS || 'true') as boolean; - const environment = process.env.ENVIRONMENT; - const minimumRunningTimeInMinutes = process.env.MINIMUM_RUNNING_TIME_IN_MINUTES; +async function evaluateAndRemoveRunners( + ec2Runners: RunnerInfo[], + scaleDownConfigs: ScalingDownConfig[], +): Promise { let idleCounter = getIdleRunnerCount(scaleDownConfigs); + const ownerTags = new Set(ec2Runners.map((runner) => runner.owner)); + + for (const ownerTag of ownerTags) { + const ec2RunnersFiltered = ec2Runners.filter((runner) => runner.owner === ownerTag); + for (const ec2Runner of ec2RunnersFiltered) { + const ghRunners = await listGitHubRunners(ec2Runner); + const ghRunner = ghRunners.find((runner) => runner.name === ec2Runner.instanceId); + if (ghRunner) { + if (runnerMinimumTimeExceeded(ec2Runner)) { + if (idleCounter > 0) { + idleCounter--; + console.debug(`Runner '${ec2Runner.instanceId}' will kept idle.`); + } else { + console.debug(`Runner '${ec2Runner.instanceId}' will be terminated.`); + await removeRunner(ec2Runner, ghRunner.id); + } + } + } else { + if (bootTimeExceeded(ec2Runner)) { + console.debug(`Runner '${ec2Runner.instanceId}' is orphaned and will be removed.`); + terminateOrphan(ec2Runner.instanceId); + } else { + console.debug(`Runner ${ec2Runner.instanceId} has not yet booted.`); + } + } + } + } +} - // list and sort runners, newest first. This ensure we keep the newest runners longer. - const runners = ( - await listRunners({ +async function terminateOrphan(instanceId: string): Promise { + try { + await terminateRunner(instanceId); + } catch (e) { + console.debug(`Orphan runner '${instanceId}' cannot be removed.`); + } +} + +async function listAndSortRunners(environment: string) { + return ( + await listEC2Runners({ environment, }) ).sort((a, b): number => { @@ -147,46 +160,42 @@ export async function scaleDown(): Promise { if (a.launchTime > b.launchTime) return -1; return 0; }); +} - if (runners.length === 0) { - console.debug(`No active runners found for environment: '${environment}'`); - return; - } +/** + * We are moving to a new strategy to find and remove runners, this function will ensure + * during migration runners tagged in the old way are removed. + */ +function filterLegacyRunners(ec2runners: RunnerList[]): RunnerInfo[] { + return ec2runners + .filter((ec2Runner) => ec2Runner.repo || ec2Runner.org) + .map((ec2Runner) => ({ + instanceId: ec2Runner.instanceId, + launchTime: ec2Runner.launchTime, + type: ec2Runner.org ? 'Org' : 'Repo', + owner: ec2Runner.org ? (ec2Runner.org as string) : (ec2Runner.repo as string), + })); +} - const createGitHubClientForRunner = createGitHubClientForRunnerFactory(); - const listGithubRunners = listGithubRunnersFactory(); +function filterRunners(ec2runners: RunnerList[]): RunnerInfo[] { + return ec2runners.filter((ec2Runner) => ec2Runner.type) as RunnerInfo[]; +} - for (const ec2runner of runners) { - if (!runnerMinimumTimeExceeded(ec2runner, minimumRunningTimeInMinutes)) { - continue; - } +export async function scaleDown(): Promise { + const scaleDownConfigs = JSON.parse(process.env.SCALE_DOWN_CONFIG) as [ScalingDownConfig]; + const environment = process.env.ENVIRONMENT; - const githubAppClient = await createGitHubClientForRunner(ec2runner, enableOrgLevel); - - const ghRunners = await listGithubRunners(githubAppClient, ec2runner, enableOrgLevel); - let orphanEc2Runner = true; - for (const ghRunner of ghRunners) { - const runnerName = ghRunner.name as string; - if (runnerName === ec2runner.instanceId) { - orphanEc2Runner = false; - if (idleCounter > 0) { - idleCounter--; - console.debug(`Runner '${ec2runner.instanceId}' will kept idle.`); - } else { - const repo = getRepo(ec2runner, enableOrgLevel); - await removeRunner(ec2runner, ghRunner.id, repo, enableOrgLevel, githubAppClient); - } - } - } + // list and sort runners, newest first. This ensure we keep the newest runners longer. + const ec2Runners = await listAndSortRunners(environment); - // Remove orphan AWS runners. - if (orphanEc2Runner) { - console.info(`Runner '${ec2runner.instanceId}' is orphan, and will be removed.`); - try { - await terminateRunner(ec2runner); - } catch (e) { - console.debug(`Orphan runner '${ec2runner.instanceId}' cannot be removed.`); - } - } + if (ec2Runners.length === 0) { + console.debug(`No active runners found for environment: '${environment}'`); + return; } + const legacyRunners = filterLegacyRunners(ec2Runners); + console.log(JSON.stringify(legacyRunners)); + const runners = filterRunners(ec2Runners); + + await evaluateAndRemoveRunners(runners, scaleDownConfigs); + await evaluateAndRemoveRunners(legacyRunners, scaleDownConfigs); } diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-up.test.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-up.test.ts index 9c3049b3bd..765afa0dbd 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-up.test.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-up.test.ts @@ -1,6 +1,6 @@ import { mocked } from 'ts-jest/utils'; import * as scaleUpModule from './scale-up'; -import { listRunners, createRunner, RunnerInputParameters } from './runners'; +import { listEC2Runners, createRunner, RunnerInputParameters } from './runners'; import * as ghAuth from './gh-auth'; import nock from 'nock'; import { Octokit } from '@octokit/rest'; @@ -103,13 +103,13 @@ beforeEach(() => { mockOctokit.actions.createRegistrationTokenForRepo.mockImplementation(() => mockTokenReturnValue); mockOctokit.apps.getOrgInstallation.mockImplementation(() => mockInstallationIdReturnValueOrgs); mockOctokit.apps.getRepoInstallation.mockImplementation(() => mockInstallationIdReturnValueRepos); - const mockListRunners = mocked(listRunners); + const mockListRunners = mocked(listEC2Runners); mockListRunners.mockImplementation(async () => [ { instanceId: 'i-1234', launchTime: new Date(), - repo: `${TEST_DATA.repositoryOwner}/${TEST_DATA.repositoryName}`, - org: TEST_DATA.repositoryOwner, + type: 'Org', + owner: TEST_DATA.repositoryOwner, }, ]); }); @@ -156,7 +156,7 @@ describe('scaleUp with GHES', () => { data: { total_count: 0 }, })); await scaleUpModule.scaleUp('aws:sqs', TEST_DATA); - expect(listRunners).not.toBeCalled(); + expect(listEC2Runners).not.toBeCalled(); }); describe('on org level', () => { @@ -167,7 +167,7 @@ describe('scaleUp with GHES', () => { it('gets the current org level runners', async () => { await scaleUpModule.scaleUp('aws:sqs', TEST_DATA); - expect(listRunners).toBeCalledWith({ + expect(listEC2Runners).toBeCalledWith({ environment: 'unit-test-environment', runnerType: 'Org', runnerOwner: TEST_DATA.repositoryOwner, @@ -265,7 +265,7 @@ describe('scaleUp with GHES', () => { it('gets the current repo level runners', async () => { await scaleUpModule.scaleUp('aws:sqs', TEST_DATA); - expect(listRunners).toBeCalledWith({ + expect(listEC2Runners).toBeCalledWith({ environment: 'unit-test-environment', runnerType: 'Repo', runnerOwner: `${TEST_DATA.repositoryOwner}/${TEST_DATA.repositoryName}`, @@ -399,7 +399,7 @@ describe('scaleUp with public GH', () => { data: { status: 'completed' }, })); await scaleUpModule.scaleUp('aws:sqs', TEST_DATA); - expect(listRunners).not.toBeCalled(); + expect(listEC2Runners).not.toBeCalled(); }); describe('on org level', () => { @@ -412,7 +412,7 @@ describe('scaleUp with public GH', () => { it('gets the current org level runners', async () => { await scaleUpModule.scaleUp('aws:sqs', TEST_DATA); - expect(listRunners).toBeCalledWith({ + expect(listEC2Runners).toBeCalledWith({ environment: 'unit-test-environment', runnerType: 'Org', runnerOwner: TEST_DATA.repositoryOwner, @@ -475,7 +475,7 @@ describe('scaleUp with public GH', () => { it('gets the current repo level runners', async () => { await scaleUpModule.scaleUp('aws:sqs', TEST_DATA); - expect(listRunners).toBeCalledWith({ + expect(listEC2Runners).toBeCalledWith({ environment: 'unit-test-environment', runnerType: 'Repo', runnerOwner: `${TEST_DATA.repositoryOwner}/${TEST_DATA.repositoryName}`, diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts index ecc577c865..63a8f95111 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts @@ -1,4 +1,4 @@ -import { listRunners, createRunner, RunnerInputParameters } from './runners'; +import { listEC2Runners, createRunner, RunnerInputParameters } from './runners'; import { createOctoClient, createGithubAppAuth, createGithubInstallationAuth } from './gh-auth'; import yn from 'yn'; import { Octokit } from '@octokit/rest'; @@ -50,7 +50,7 @@ export const scaleUp = async (eventSource: string, payload: ActionRequestMessage const isQueued = await getJobStatus(githubInstallationClient, payload); if (isQueued) { - const currentRunners = await listRunners({ + const currentRunners = await listEC2Runners({ environment, runnerType, runnerOwner, diff --git a/modules/runners/scale-down.tf b/modules/runners/scale-down.tf index b000726353..431cdc8ddb 100644 --- a/modules/runners/scale-down.tf +++ b/modules/runners/scale-down.tf @@ -14,8 +14,8 @@ resource "aws_lambda_function" "scale_down" { environment { variables = { ENVIRONMENT = var.environment - ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners MINIMUM_RUNNING_TIME_IN_MINUTES = var.minimum_running_time_in_minutes + RUNNER_BOOT_TIME_IN_MINUTES = var.runner_boot_time_in_minutes SCALE_DOWN_CONFIG = jsonencode(var.idle_config) GHES_URL = var.ghes_url PARAMETER_GITHUB_APP_CLIENT_ID_NAME = var.github_app_parameters.client_id.name diff --git a/modules/runners/variables.tf b/modules/runners/variables.tf index 07803b87fb..bc3f6be0c0 100644 --- a/modules/runners/variables.tf +++ b/modules/runners/variables.tf @@ -135,6 +135,12 @@ variable "minimum_running_time_in_minutes" { default = 5 } +variable "runner_boot_time_in_minutes" { + description = "The minimum time for an EC2 runner to boot and register as a runner." + type = number + default = 5 +} + variable "runner_extra_labels" { description = "Extra labels for the runners (GitHub). Separate each label by a comma" type = string diff --git a/variables.tf b/variables.tf index 3b3eeeeda2..fca140b685 100644 --- a/variables.tf +++ b/variables.tf @@ -48,7 +48,13 @@ variable "scale_down_schedule_expression" { } variable "minimum_running_time_in_minutes" { - description = "The time an ec2 action runner should be running at minimum before terminated if non busy." + description = "The time an ec2 action runner should be running at minimum before terminated if not busy." + type = number + default = 5 +} + +variable "runner_boot_time_in_minutes" { + description = "The minimum time for an EC2 runner to boot and register as a runner." type = number default = 5 }