From 38df77fa10b109774b114b86b482ae82f2584738 Mon Sep 17 00:00:00 2001 From: xuetinp <113963935+xuetinp@users.noreply.github.com> Date: Wed, 21 Sep 2022 14:37:10 +0000 Subject: [PATCH] fix(core): fix rate limit errors when deploying cloudwatch log groups (#822) --- .../lib/core/lib/exporting-log-group.ts | 16 +++ .../lib/core/lib/log-group-factory.ts | 20 +++- .../lib/core/test/exporting-log-group.test.ts | 8 ++ .../lib/core/test/log-group-factory.test.ts | 98 +++++++++++++++++++ 4 files changed, 141 insertions(+), 1 deletion(-) diff --git a/packages/aws-rfdk/lib/core/lib/exporting-log-group.ts b/packages/aws-rfdk/lib/core/lib/exporting-log-group.ts index e3e3e1c24..684f15bfc 100644 --- a/packages/aws-rfdk/lib/core/lib/exporting-log-group.ts +++ b/packages/aws-rfdk/lib/core/lib/exporting-log-group.ts @@ -17,6 +17,7 @@ import { Code, Runtime, SingletonFunction, + CfnFunction, } from 'aws-cdk-lib/aws-lambda'; import { ILogGroup, @@ -24,6 +25,7 @@ import { LogRetention, RetentionDays, } from 'aws-cdk-lib/aws-logs'; +import { Stack } from 'aws-cdk-lib/core'; import { Construct } from 'constructs'; /** @@ -111,10 +113,24 @@ export class ExportingLogGroup extends Construct { threshold: 1, }); + // Define log retention retry options to reduce the risk of the rate exceed error + // as the default create log group TPS is only 5. Make sure to set the timeout of log retention function + // to be greater than total retry time. That's because if the function that is used for a custom resource + // doesn't exit properly, it'd end up in retries and may take cloud formation an hour to realize that + // the custom resource failed. const logRetention = new LogRetention(this, 'LogRetention', { logGroupName: props.logGroupName, retention: retentionInDays, + logRetentionRetryOptions: { + base: Duration.millis(200), + maxRetries: 7, + }, }); + // referenced from cdk code: https://github.com/aws/aws-cdk/blob/v2.33.0/packages/@aws-cdk/aws-logs/lib/log-retention.ts#L116 + const logRetentionFunctionConstructId = 'LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a'; + const logRetentionFunction = Stack.of(this).node.findChild(logRetentionFunctionConstructId); + const cfnFunction = logRetentionFunction.node.defaultChild as CfnFunction; + cfnFunction.addPropertyOverride('Timeout', 30); this.logGroup = LogGroup.fromLogGroupArn(scope, `${props.logGroupName}LogGroup`, logRetention.logGroupArn); this.logGroup.grant(exportLogsFunction, 'logs:CreateExportTask'); diff --git a/packages/aws-rfdk/lib/core/lib/log-group-factory.ts b/packages/aws-rfdk/lib/core/lib/log-group-factory.ts index 1d0c418ee..b44324819 100644 --- a/packages/aws-rfdk/lib/core/lib/log-group-factory.ts +++ b/packages/aws-rfdk/lib/core/lib/log-group-factory.ts @@ -3,6 +3,8 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { Duration, Stack } from 'aws-cdk-lib'; +import { CfnFunction } from 'aws-cdk-lib/aws-lambda'; import { ILogGroup, LogGroup, @@ -52,7 +54,12 @@ export class LogGroupFactory { const fullLogGroupName = props?.logGroupPrefix ? `${props.logGroupPrefix}${logGroupName}` : logGroupName; const retention = props?.retention ? props.retention : LogGroupFactory.DEFAULT_LOG_RETENTION; - return props?.bucketName + // Define log retention retry options to reduce the risk of the rate exceed error + // as the default create log group TPS is only 5. Make sure to set the timeout of log retention function + // to be greater than total retry time. That's because if the function that is used for a custom resource + // doesn't exit properly, it'd end up in retries and may take cloud formation an hour to realize that + // the custom resource failed. + const logGroup = props?.bucketName ? new ExportingLogGroup(scope, logWrapperId, { bucketName: props.bucketName, logGroupName: fullLogGroupName, @@ -64,7 +71,18 @@ export class LogGroupFactory { new LogRetention(scope, logWrapperId, { logGroupName: fullLogGroupName, retention, + logRetentionRetryOptions: { + base: Duration.millis(200), + maxRetries: 7, + }, }).logGroupArn); + // referenced from cdk code: https://github.com/aws/aws-cdk/blob/v2.33.0/packages/@aws-cdk/aws-logs/lib/log-retention.ts#L116 + const logRetentionFunctionConstructId = 'LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a'; + const logRetentionFunction = Stack.of(scope).node.findChild(logRetentionFunctionConstructId); + const cfnFunction = logRetentionFunction.node.defaultChild as CfnFunction; + cfnFunction.addPropertyOverride('Timeout', 30); + + return logGroup; } /** diff --git a/packages/aws-rfdk/lib/core/test/exporting-log-group.test.ts b/packages/aws-rfdk/lib/core/test/exporting-log-group.test.ts index 58add28d8..bdf86f4ac 100644 --- a/packages/aws-rfdk/lib/core/test/exporting-log-group.test.ts +++ b/packages/aws-rfdk/lib/core/test/exporting-log-group.test.ts @@ -31,6 +31,10 @@ test('default exporting log group is created correctly', () => { ], }, LogGroupName: 'logGroup', + SdkRetry: { + maxRetries: 7, + base: 200, + }, RetentionInDays: 3, }); Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { @@ -147,6 +151,10 @@ test('custom set retention is created correctly', () => { ], }, LogGroupName: 'logGroup', + SdkRetry: { + maxRetries: 7, + base: 200, + }, RetentionInDays: 7, }); Template.fromStack(stack).resourceCountIs('AWS::Lambda::Function', 2); diff --git a/packages/aws-rfdk/lib/core/test/log-group-factory.test.ts b/packages/aws-rfdk/lib/core/test/log-group-factory.test.ts index 97e1c1570..023c56859 100644 --- a/packages/aws-rfdk/lib/core/test/log-group-factory.test.ts +++ b/packages/aws-rfdk/lib/core/test/log-group-factory.test.ts @@ -21,8 +21,24 @@ describe('log group', () => { // THEN Template.fromStack(stack).hasResourceProperties('Custom::LogRetention', { LogGroupName: 'testLogGroup', + SdkRetry: { + maxRetries: 7, + base: 200, + }, RetentionInDays: 3, }); + + expect(Object.keys(Template.fromStack(stack).findResources('AWS::Lambda::Function', { + Properties: { + Role: { + 'Fn::GetAtt': [ + 'LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8aServiceRole9741ECFB', + 'Arn', + ], + }, + Timeout: 30, + }, + })).length).toEqual(1); Template.fromStack(stack).hasResourceProperties('AWS::Lambda::Function', Match.not({ Role: { 'Fn::GetAtt': [ @@ -44,8 +60,25 @@ describe('log group', () => { // THEN Template.fromStack(stack).hasResourceProperties('Custom::LogRetention', { LogGroupName: 'prefix-testLogGroup', + SdkRetry: { + maxRetries: 7, + base: 200, + }, RetentionInDays: 3, }); + + expect(Object.keys(Template.fromStack(stack).findResources('AWS::Lambda::Function', { + Properties: { + Role: { + 'Fn::GetAtt': [ + 'LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8aServiceRole9741ECFB', + 'Arn', + ], + }, + Timeout: 30, + }, + })).length).toEqual(1); + Template.fromStack(stack).hasResourceProperties('AWS::Lambda::Function', Match.not({ Role: { 'Fn::GetAtt': [ @@ -67,8 +100,25 @@ describe('log group', () => { // THEN Template.fromStack(stack).hasResourceProperties('Custom::LogRetention', { LogGroupName: 'testLogGroup', + SdkRetry: { + maxRetries: 7, + base: 200, + }, RetentionInDays: 7, }); + + expect(Object.keys(Template.fromStack(stack).findResources('AWS::Lambda::Function', { + Properties: { + Role: { + 'Fn::GetAtt': [ + 'LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8aServiceRole9741ECFB', + 'Arn', + ], + }, + Timeout: 30, + }, + })).length).toEqual(1); + Template.fromStack(stack).hasResourceProperties('AWS::Lambda::Function', Match.not({ Role: { 'Fn::GetAtt': [ @@ -92,9 +142,25 @@ describe('exporting log group', () => { // THEN Template.fromStack(stack).hasResourceProperties('Custom::LogRetention', { LogGroupName: 'testLogGroup', + SdkRetry: { + maxRetries: 7, + base: 200, + }, RetentionInDays: 3, }); + expect(Object.keys(Template.fromStack(stack).findResources('AWS::Lambda::Function', { + Properties: { + Role: { + 'Fn::GetAtt': [ + 'LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8aServiceRole9741ECFB', + 'Arn', + ], + }, + Timeout: 30, + }, + })).length).toEqual(1); + Template.fromStack(stack).hasResourceProperties('AWS::Lambda::Function', { Role: { 'Fn::GetAtt': [ @@ -124,9 +190,25 @@ describe('exporting log group', () => { // THEN Template.fromStack(stack).hasResourceProperties('Custom::LogRetention', { LogGroupName: 'prefix-testLogGroup', + SdkRetry: { + maxRetries: 7, + base: 200, + }, RetentionInDays: 3, }); + expect(Object.keys(Template.fromStack(stack).findResources('AWS::Lambda::Function', { + Properties: { + Role: { + 'Fn::GetAtt': [ + 'LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8aServiceRole9741ECFB', + 'Arn', + ], + }, + Timeout: 30, + }, + })).length).toEqual(1); + Template.fromStack(stack).hasResourceProperties('AWS::Lambda::Function', { Role: { 'Fn::GetAtt': [ @@ -157,9 +239,25 @@ describe('exporting log group', () => { // THEN Template.fromStack(stack).hasResourceProperties('Custom::LogRetention', { LogGroupName: 'testLogGroup', + SdkRetry: { + maxRetries: 7, + base: 200, + }, RetentionInDays: 7, }); + expect(Object.keys(Template.fromStack(stack).findResources('AWS::Lambda::Function', { + Properties: { + Role: { + 'Fn::GetAtt': [ + 'LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8aServiceRole9741ECFB', + 'Arn', + ], + }, + Timeout: 30, + }, + })).length).toEqual(1); + Template.fromStack(stack).hasResourceProperties('AWS::Lambda::Function', { Role: { 'Fn::GetAtt': [