diff --git a/README.md b/README.md index 6e5994a305..77857ca6a0 100644 --- a/README.md +++ b/README.md @@ -155,6 +155,8 @@ Join our discord community via [this invite link](https://discord.gg/bxgXW8jJGh) | [key\_name](#input\_key\_name) | Key pair name | `string` | `null` | no | | [kms\_key\_arn](#input\_kms\_key\_arn) | Optional CMK Key ARN to be used for Parameter Store. This key must be in the current account. | `string` | `null` | no | | [lambda\_architecture](#input\_lambda\_architecture) | AWS Lambda architecture. Lambda functions using Graviton processors ('arm64') tend to have better price/performance than 'x86\_64' functions. | `string` | `"arm64"` | no | +| [lambda\_event\_source\_mapping\_batch\_size](#input\_lambda\_event\_source\_mapping\_batch\_size) | Maximum number of records to pass to the lambda function in a single batch for the event source mapping. When not set, the AWS default of 10 events will be used. | `number` | `10` | no | +| [lambda\_event\_source\_mapping\_maximum\_batching\_window\_in\_seconds](#input\_lambda\_event\_source\_mapping\_maximum\_batching\_window\_in\_seconds) | Maximum amount of time to gather records before invoking the lambda function, in seconds. AWS requires this to be greater than 0 if batch\_size is greater than 10. Defaults to 0. | `number` | `0` | no | | [lambda\_principals](#input\_lambda\_principals) | (Optional) add extra principals to the role created for execution of the lambda, e.g. for local testing. |
list(object({| `[]` | no | | [lambda\_runtime](#input\_lambda\_runtime) | AWS Lambda runtime. | `string` | `"nodejs22.x"` | no | | [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `string` | `null` | no | diff --git a/lambdas/functions/control-plane/src/aws/runners.test.ts b/lambdas/functions/control-plane/src/aws/runners.test.ts index c4ec328c9b..8d508fbf5e 100644 --- a/lambdas/functions/control-plane/src/aws/runners.test.ts +++ b/lambdas/functions/control-plane/src/aws/runners.test.ts @@ -419,9 +419,12 @@ describe('create runner with errors', () => { }); it('test ScaleError with multiple error.', async () => { - createFleetMockWithErrors(['UnfulfillableCapacity', 'SomeError']); + createFleetMockWithErrors(['UnfulfillableCapacity', 'MaxSpotInstanceCountExceeded', 'NotMappedError']); - await expect(createRunner(createRunnerConfig(defaultRunnerConfig))).rejects.toBeInstanceOf(ScaleError); + await expect(createRunner(createRunnerConfig(defaultRunnerConfig))).rejects.toMatchObject({ + name: 'ScaleError', + failedInstanceCount: 2, + }); expect(mockEC2Client).toHaveReceivedCommandWith( CreateFleetCommand, expectedCreateFleetRequest(defaultExpectedFleetRequestValues), @@ -521,7 +524,7 @@ describe('create runner with errors fail over to OnDemand', () => { expect(mockEC2Client).toHaveReceivedCommandTimes(CreateFleetCommand, 2); - // first call with spot failuer + // first call with spot failure expect(mockEC2Client).toHaveReceivedNthCommandWith(1, CreateFleetCommand, { ...expectedCreateFleetRequest({ ...defaultExpectedFleetRequestValues, @@ -530,7 +533,7 @@ describe('create runner with errors fail over to OnDemand', () => { }), }); - // second call with with OnDemand failback + // second call with with OnDemand fallback expect(mockEC2Client).toHaveReceivedNthCommandWith(2, CreateFleetCommand, { ...expectedCreateFleetRequest({ ...defaultExpectedFleetRequestValues, @@ -540,13 +543,13 @@ describe('create runner with errors fail over to OnDemand', () => { }); }); - it('test InsufficientInstanceCapacity no failback.', async () => { + it('test InsufficientInstanceCapacity no fallback.', async () => { await expect( createRunner(createRunnerConfig({ ...defaultRunnerConfig, onDemandFailoverOnError: [] })), ).rejects.toBeInstanceOf(Error); }); - it('test InsufficientInstanceCapacity with mutlipte instances and fallback to on demand .', async () => { + it('test InsufficientInstanceCapacity with multiple instances and fallback to on demand .', async () => { const instancesIds = ['i-123', 'i-456']; createFleetMockWithWithOnDemandFallback(['InsufficientInstanceCapacity'], instancesIds); diff --git a/lambdas/functions/control-plane/src/aws/runners.ts b/lambdas/functions/control-plane/src/aws/runners.ts index 6779dd39d2..d95dc99fa4 100644 --- a/lambdas/functions/control-plane/src/aws/runners.ts +++ b/lambdas/functions/control-plane/src/aws/runners.ts @@ -166,53 +166,62 @@ async function processFleetResult( ): Promise
type = string
identifiers = list(string)
}))
list(object({| `[]` | no | | [lambda\_runtime](#input\_lambda\_runtime) | AWS Lambda runtime. | `string` | `"nodejs22.x"` | no | | [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `string` | `null` | no | diff --git a/modules/multi-runner/runners.tf b/modules/multi-runner/runners.tf index 811ab36260..d58e61f6ac 100644 --- a/modules/multi-runner/runners.tf +++ b/modules/multi-runner/runners.tf @@ -58,28 +58,30 @@ module "runners" { credit_specification = each.value.runner_config.credit_specification cpu_options = each.value.runner_config.cpu_options - enable_runner_binaries_syncer = each.value.runner_config.enable_runner_binaries_syncer - lambda_s3_bucket = var.lambda_s3_bucket - runners_lambda_s3_key = var.runners_lambda_s3_key - runners_lambda_s3_object_version = var.runners_lambda_s3_object_version - lambda_runtime = var.lambda_runtime - lambda_architecture = var.lambda_architecture - lambda_zip = var.runners_lambda_zip - lambda_scale_up_memory_size = var.scale_up_lambda_memory_size - lambda_timeout_scale_up = var.runners_scale_up_lambda_timeout - lambda_scale_down_memory_size = var.scale_down_lambda_memory_size - lambda_timeout_scale_down = var.runners_scale_down_lambda_timeout - lambda_subnet_ids = var.lambda_subnet_ids - lambda_security_group_ids = var.lambda_security_group_ids - lambda_tags = var.lambda_tags - tracing_config = var.tracing_config - logging_retention_in_days = var.logging_retention_in_days - logging_kms_key_id = var.logging_kms_key_id - enable_cloudwatch_agent = each.value.runner_config.enable_cloudwatch_agent - cloudwatch_config = try(coalesce(each.value.runner_config.cloudwatch_config, var.cloudwatch_config), null) - runner_log_files = each.value.runner_config.runner_log_files - runner_group_name = each.value.runner_config.runner_group_name - runner_name_prefix = each.value.runner_config.runner_name_prefix + enable_runner_binaries_syncer = each.value.runner_config.enable_runner_binaries_syncer + lambda_s3_bucket = var.lambda_s3_bucket + runners_lambda_s3_key = var.runners_lambda_s3_key + runners_lambda_s3_object_version = var.runners_lambda_s3_object_version + lambda_runtime = var.lambda_runtime + lambda_architecture = var.lambda_architecture + lambda_zip = var.runners_lambda_zip + lambda_scale_up_memory_size = var.scale_up_lambda_memory_size + lambda_event_source_mapping_batch_size = var.lambda_event_source_mapping_batch_size + lambda_event_source_mapping_maximum_batching_window_in_seconds = var.lambda_event_source_mapping_maximum_batching_window_in_seconds + lambda_timeout_scale_up = var.runners_scale_up_lambda_timeout + lambda_scale_down_memory_size = var.scale_down_lambda_memory_size + lambda_timeout_scale_down = var.runners_scale_down_lambda_timeout + lambda_subnet_ids = var.lambda_subnet_ids + lambda_security_group_ids = var.lambda_security_group_ids + lambda_tags = var.lambda_tags + tracing_config = var.tracing_config + logging_retention_in_days = var.logging_retention_in_days + logging_kms_key_id = var.logging_kms_key_id + enable_cloudwatch_agent = each.value.runner_config.enable_cloudwatch_agent + cloudwatch_config = try(coalesce(each.value.runner_config.cloudwatch_config, var.cloudwatch_config), null) + runner_log_files = each.value.runner_config.runner_log_files + runner_group_name = each.value.runner_config.runner_group_name + runner_name_prefix = each.value.runner_config.runner_name_prefix scale_up_reserved_concurrent_executions = each.value.runner_config.scale_up_reserved_concurrent_executions diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf index edbdb33059..2c7c7c69e5 100644 --- a/modules/multi-runner/variables.tf +++ b/modules/multi-runner/variables.tf @@ -718,3 +718,15 @@ variable "user_agent" { type = string default = "github-aws-runners" } + +variable "lambda_event_source_mapping_batch_size" { + description = "Maximum number of records to pass to the lambda function in a single batch for the event source mapping. When not set, the AWS default of 10 events will be used." + type = number + default = 10 +} + +variable "lambda_event_source_mapping_maximum_batching_window_in_seconds" { + description = "Maximum amount of time to gather records before invoking the lambda function, in seconds. AWS requires this to be greater than 0 if batch_size is greater than 10. Defaults to 0." + type = number + default = 0 +} diff --git a/modules/runners/README.md b/modules/runners/README.md index cf62c2c96a..45807f67bf 100644 --- a/modules/runners/README.md +++ b/modules/runners/README.md @@ -177,6 +177,8 @@ yarn run dist | [key\_name](#input\_key\_name) | Key pair name | `string` | `null` | no | | [kms\_key\_arn](#input\_kms\_key\_arn) | Optional CMK Key ARN to be used for Parameter Store. | `string` | `null` | no | | [lambda\_architecture](#input\_lambda\_architecture) | AWS Lambda architecture. Lambda functions using Graviton processors ('arm64') tend to have better price/performance than 'x86\_64' functions. | `string` | `"arm64"` | no | +| [lambda\_event\_source\_mapping\_batch\_size](#input\_lambda\_event\_source\_mapping\_batch\_size) | Maximum number of records to pass to the lambda function in a single batch for the event source mapping. When not set, the AWS default of 10 events will be used. | `number` | `10` | no | +| [lambda\_event\_source\_mapping\_maximum\_batching\_window\_in\_seconds](#input\_lambda\_event\_source\_mapping\_maximum\_batching\_window\_in\_seconds) | Maximum amount of time to gather records before invoking the lambda function, in seconds. AWS requires this to be greater than 0 if batch\_size is greater than 10. Defaults to 0. | `number` | `0` | no | | [lambda\_runtime](#input\_lambda\_runtime) | AWS Lambda runtime. | `string` | `"nodejs22.x"` | no | | [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `string` | `null` | no | | [lambda\_scale\_down\_memory\_size](#input\_lambda\_scale\_down\_memory\_size) | Memory size limit in MB for scale down lambda. | `number` | `512` | no | diff --git a/modules/runners/job-retry.tf b/modules/runners/job-retry.tf index e51c3903d4..130992667f 100644 --- a/modules/runners/job-retry.tf +++ b/modules/runners/job-retry.tf @@ -3,30 +3,32 @@ locals { job_retry_enabled = var.job_retry != null && var.job_retry.enable ? true : false job_retry = { - prefix = var.prefix - tags = local.tags - aws_partition = var.aws_partition - architecture = var.lambda_architecture - runtime = var.lambda_runtime - security_group_ids = var.lambda_security_group_ids - subnet_ids = var.lambda_subnet_ids - kms_key_arn = var.kms_key_arn - lambda_tags = var.lambda_tags - log_level = var.log_level - logging_kms_key_id = var.logging_kms_key_id - logging_retention_in_days = var.logging_retention_in_days - metrics = var.metrics - role_path = var.role_path - role_permissions_boundary = var.role_permissions_boundary - s3_bucket = var.lambda_s3_bucket - s3_key = var.runners_lambda_s3_key - s3_object_version = var.runners_lambda_s3_object_version - zip = var.lambda_zip - tracing_config = var.tracing_config - github_app_parameters = var.github_app_parameters - enable_organization_runners = var.enable_organization_runners - sqs_build_queue = var.sqs_build_queue - ghes_url = var.ghes_url + prefix = var.prefix + tags = local.tags + aws_partition = var.aws_partition + architecture = var.lambda_architecture + runtime = var.lambda_runtime + security_group_ids = var.lambda_security_group_ids + subnet_ids = var.lambda_subnet_ids + kms_key_arn = var.kms_key_arn + lambda_tags = var.lambda_tags + log_level = var.log_level + logging_kms_key_id = var.logging_kms_key_id + logging_retention_in_days = var.logging_retention_in_days + metrics = var.metrics + role_path = var.role_path + role_permissions_boundary = var.role_permissions_boundary + s3_bucket = var.lambda_s3_bucket + s3_key = var.runners_lambda_s3_key + s3_object_version = var.runners_lambda_s3_object_version + zip = var.lambda_zip + tracing_config = var.tracing_config + github_app_parameters = var.github_app_parameters + enable_organization_runners = var.enable_organization_runners + sqs_build_queue = var.sqs_build_queue + ghes_url = var.ghes_url + lambda_event_source_mapping_batch_size = var.lambda_event_source_mapping_batch_size + lambda_event_source_mapping_maximum_batching_window_in_seconds = var.lambda_event_source_mapping_maximum_batching_window_in_seconds } } diff --git a/modules/runners/job-retry/README.md b/modules/runners/job-retry/README.md index 91089a213b..4f4c80921c 100644 --- a/modules/runners/job-retry/README.md +++ b/modules/runners/job-retry/README.md @@ -42,7 +42,7 @@ The module is an inner module and used by the runner module when the opt-in feat | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [config](#input\_config) | Configuration for the spot termination watcher lambda function.
type = string
identifiers = list(string)
}))
object({| n/a | yes | +| [config](#input\_config) | Configuration for the spot termination watcher lambda function.
aws_partition = optional(string, null)
architecture = optional(string, null)
enable_organization_runners = bool
environment_variables = optional(map(string), {})
ghes_url = optional(string, null)
user_agent = optional(string, null)
github_app_parameters = object({
key_base64 = map(string)
id = map(string)
})
kms_key_arn = optional(string, null)
lambda_tags = optional(map(string), {})
log_level = optional(string, null)
logging_kms_key_id = optional(string, null)
logging_retention_in_days = optional(number, null)
memory_size = optional(number, null)
metrics = optional(object({
enable = optional(bool, false)
namespace = optional(string, null)
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
}), {})
}), {})
prefix = optional(string, null)
principals = optional(list(object({
type = string
identifiers = list(string)
})), [])
queue_encryption = optional(object({
kms_data_key_reuse_period_seconds = optional(number, null)
kms_master_key_id = optional(string, null)
sqs_managed_sse_enabled = optional(bool, true)
}), {})
role_path = optional(string, null)
role_permissions_boundary = optional(string, null)
runtime = optional(string, null)
security_group_ids = optional(list(string), [])
subnet_ids = optional(list(string), [])
s3_bucket = optional(string, null)
s3_key = optional(string, null)
s3_object_version = optional(string, null)
sqs_build_queue = object({
url = string
arn = string
})
tags = optional(map(string), {})
timeout = optional(number, 30)
tracing_config = optional(object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
}), {})
zip = optional(string, null)
})
object({| n/a | yes | ## Outputs diff --git a/modules/runners/job-retry/main.tf b/modules/runners/job-retry/main.tf index 9561c7db71..612c515f8c 100644 --- a/modules/runners/job-retry/main.tf +++ b/modules/runners/job-retry/main.tf @@ -44,9 +44,10 @@ module "job_retry" { } resource "aws_lambda_event_source_mapping" "job_retry" { - event_source_arn = aws_sqs_queue.job_retry_check_queue.arn - function_name = module.job_retry.lambda.function.arn - batch_size = 1 + event_source_arn = aws_sqs_queue.job_retry_check_queue.arn + function_name = module.job_retry.lambda.function.arn + batch_size = var.config.lambda_event_source_mapping_batch_size + maximum_batching_window_in_seconds = var.config.lambda_event_source_mapping_maximum_batching_window_in_seconds } resource "aws_lambda_permission" "job_retry" { diff --git a/modules/runners/job-retry/variables.tf b/modules/runners/job-retry/variables.tf index 4a8fe19fbf..f40bec1ba7 100644 --- a/modules/runners/job-retry/variables.tf +++ b/modules/runners/job-retry/variables.tf @@ -11,6 +11,8 @@ variable "config" { 'user_agent': Optional User-Agent header for GitHub API requests. 'github_app_parameters': Parameter Store for GitHub App Parameters. 'kms_key_arn': Optional CMK Key ARN instead of using the default AWS managed key. + `lambda_event_source_mapping_batch_size`: Maximum number of records to pass to the lambda function in a single batch for the event source mapping. When not set, the AWS default will be used. + `lambda_event_source_mapping_maximum_batching_window_in_seconds`: Maximum amount of time to gather records before invoking the lambda function, in seconds. AWS requires this to be greater than 0 if batch_size is greater than 10. `lambda_principals`: Add extra principals to the role created for execution of the lambda, e.g. for local testing. `lambda_tags`: Map of tags that will be added to created resources. By default resources will be tagged with name and environment. `log_level`: Logging level for lambda logging. Valid values are 'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'. @@ -45,12 +47,14 @@ variable "config" { key_base64 = map(string) id = map(string) }) - kms_key_arn = optional(string, null) - lambda_tags = optional(map(string), {}) - log_level = optional(string, null) - logging_kms_key_id = optional(string, null) - logging_retention_in_days = optional(number, null) - memory_size = optional(number, null) + kms_key_arn = optional(string, null) + lambda_event_source_mapping_batch_size = optional(number, 10) + lambda_event_source_mapping_maximum_batching_window_in_seconds = optional(number, 0) + lambda_tags = optional(map(string), {}) + log_level = optional(string, null) + logging_kms_key_id = optional(string, null) + logging_retention_in_days = optional(number, null) + memory_size = optional(number, null) metrics = optional(object({ enable = optional(bool, false) namespace = optional(string, null) diff --git a/modules/runners/scale-up.tf b/modules/runners/scale-up.tf index 9230267c07..a9985aec40 100644 --- a/modules/runners/scale-up.tf +++ b/modules/runners/scale-up.tf @@ -87,9 +87,11 @@ resource "aws_cloudwatch_log_group" "scale_up" { } resource "aws_lambda_event_source_mapping" "scale_up" { - event_source_arn = var.sqs_build_queue.arn - function_name = aws_lambda_function.scale_up.arn - batch_size = 1 + event_source_arn = var.sqs_build_queue.arn + function_name = aws_lambda_function.scale_up.arn + function_response_types = ["ReportBatchItemFailures"] + batch_size = var.lambda_event_source_mapping_batch_size + maximum_batching_window_in_seconds = var.lambda_event_source_mapping_maximum_batching_window_in_seconds } resource "aws_lambda_permission" "scale_runners_lambda" { diff --git a/modules/runners/variables.tf b/modules/runners/variables.tf index a78231e7da..c71fa77898 100644 --- a/modules/runners/variables.tf +++ b/modules/runners/variables.tf @@ -770,3 +770,23 @@ variable "user_agent" { type = string default = null } + +variable "lambda_event_source_mapping_batch_size" { + description = "Maximum number of records to pass to the lambda function in a single batch for the event source mapping. When not set, the AWS default of 10 events will be used." + type = number + default = 10 + validation { + condition = var.lambda_event_source_mapping_batch_size >= 1 && var.lambda_event_source_mapping_batch_size <= 1000 + error_message = "The batch size for the lambda event source mapping must be between 1 and 1000." + } +} + +variable "lambda_event_source_mapping_maximum_batching_window_in_seconds" { + description = "Maximum amount of time to gather records before invoking the lambda function, in seconds. AWS requires this to be greater than 0 if batch_size is greater than 10. Defaults to 0." + type = number + default = 0 + validation { + condition = var.lambda_event_source_mapping_maximum_batching_window_in_seconds >= 0 && var.lambda_event_source_mapping_maximum_batching_window_in_seconds <= 300 + error_message = "Maximum batching window must be between 0 and 300 seconds." + } +} diff --git a/variables.tf b/variables.tf index f412d2a486..07deb83cf9 100644 --- a/variables.tf +++ b/variables.tf @@ -1016,3 +1016,19 @@ variable "user_agent" { type = string default = "github-aws-runners" } + +variable "lambda_event_source_mapping_batch_size" { + description = "Maximum number of records to pass to the lambda function in a single batch for the event source mapping. When not set, the AWS default of 10 events will be used." + type = number + default = 10 +} + +variable "lambda_event_source_mapping_maximum_batching_window_in_seconds" { + description = "Maximum amount of time to gather records before invoking the lambda function, in seconds. AWS requires this to be greater than 0 if batch_size is greater than 10. Defaults to 0." + type = number + default = 0 + validation { + condition = var.lambda_event_source_mapping_maximum_batching_window_in_seconds >= 0 && var.lambda_event_source_mapping_maximum_batching_window_in_seconds <= 300 + error_message = "Maximum batching window must be between 0 and 300 seconds." + } +}
aws_partition = optional(string, null)
architecture = optional(string, null)
enable_organization_runners = bool
environment_variables = optional(map(string), {})
ghes_url = optional(string, null)
user_agent = optional(string, null)
github_app_parameters = object({
key_base64 = map(string)
id = map(string)
})
kms_key_arn = optional(string, null)
lambda_event_source_mapping_batch_size = optional(number, 10)
lambda_event_source_mapping_maximum_batching_window_in_seconds = optional(number, 0)
lambda_tags = optional(map(string), {})
log_level = optional(string, null)
logging_kms_key_id = optional(string, null)
logging_retention_in_days = optional(number, null)
memory_size = optional(number, null)
metrics = optional(object({
enable = optional(bool, false)
namespace = optional(string, null)
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
}), {})
}), {})
prefix = optional(string, null)
principals = optional(list(object({
type = string
identifiers = list(string)
})), [])
queue_encryption = optional(object({
kms_data_key_reuse_period_seconds = optional(number, null)
kms_master_key_id = optional(string, null)
sqs_managed_sse_enabled = optional(bool, true)
}), {})
role_path = optional(string, null)
role_permissions_boundary = optional(string, null)
runtime = optional(string, null)
security_group_ids = optional(list(string), [])
subnet_ids = optional(list(string), [])
s3_bucket = optional(string, null)
s3_key = optional(string, null)
s3_object_version = optional(string, null)
sqs_build_queue = object({
url = string
arn = string
})
tags = optional(map(string), {})
timeout = optional(number, 30)
tracing_config = optional(object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
}), {})
zip = optional(string, null)
})