Skip to content

Commit

Permalink
fix(cli): CLI hangs for 10 minutes on expired credentials (#21052)
Browse files Browse the repository at this point in the history
When using environment variable credentials (`AWS_ACCESS_KEY_ID` etc)
that were expired, the CLI would proceed to retry calls involving those
credentials because the `ExpiredToken` error is marked as `retryable:
true`.

Because we have extremely aggressive retries for most of our SDK calls
(since the CloudFormation throttling limits are low and we generate a
lot of contention on them), calls can take up to 10 minutes to run out
of retries.

Try and detect `ExpiredToken` situations sooner and error out harder
without trying to recover from them.

This PR only handles the situation where there is a Roles to assume --
this works because calls to STS have a much lower retry count, and so
it only takes a couple of seconds to run out of retries and surface
the `ExpiredToken` to the CLI, which we can then use to abort early.

This is all to work around aws/aws-sdk-js#3581

----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
rix0rrr authored Aug 19, 2022
1 parent 5cc0d35 commit 1e305e6
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 6 deletions.
3 changes: 1 addition & 2 deletions packages/aws-cdk/lib/api/aws-auth/awscli-compatible.ts
Original file line number Diff line number Diff line change
Expand Up @@ -344,5 +344,4 @@ async function tokenCodeFn(serialArn: string, cb: (err?: Error, token?: string)
debug('Failed to get MFA token', err);
cb(err);
}
}

}
17 changes: 15 additions & 2 deletions packages/aws-cdk/lib/api/aws-auth/sdk-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import { AwsCliCompatible } from './awscli-compatible';
import { cached } from './cached';
import { CredentialPlugins } from './credential-plugins';
import { Mode } from './credentials';
import { ISDK, SDK } from './sdk';
import { ISDK, SDK, isUnrecoverableAwsError } from './sdk';


// Some configuration that can only be achieved by setting
Expand Down Expand Up @@ -182,7 +182,12 @@ export class SdkProvider {
// account.
if (options?.assumeRoleArn === undefined) {
if (baseCreds.source === 'incorrectDefault') { throw new Error(fmtObtainCredentialsError(env.account, baseCreds)); }
return { sdk: new SDK(baseCreds.credentials, env.region, this.sdkOptions), didAssumeRole: false };

// Our current credentials must be valid and not expired. Confirm that before we get into doing
// actual CloudFormation calls, which might take a long time to hang.
const sdk = new SDK(baseCreds.credentials, env.region, this.sdkOptions);
await sdk.validateCredentials();
return { sdk, didAssumeRole: false };
}

// We will proceed to AssumeRole using whatever we've been given.
Expand All @@ -194,6 +199,10 @@ export class SdkProvider {
await sdk.forceCredentialRetrieval();
return { sdk, didAssumeRole: true };
} catch (e) {
if (isUnrecoverableAwsError(e)) {
throw e;
}

// AssumeRole failed. Proceed and warn *if and only if* the baseCredentials were already for the right account
// or returned from a plugin. This is to cover some current setups for people using plugins or preferring to
// feed the CLI credentials which are sufficient by themselves. Prefer to assume the correct role if we can,
Expand Down Expand Up @@ -270,6 +279,10 @@ export class SdkProvider {

return await new SDK(creds, this.defaultRegion, this.sdkOptions).currentAccount();
} catch (e) {
if (isUnrecoverableAwsError(e)) {
throw e;
}

debug('Unable to determine the default AWS account:', e);
return undefined;
}
Expand Down
44 changes: 43 additions & 1 deletion packages/aws-cdk/lib/api/aws-auth/sdk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,20 @@ export class SDK implements ISDK {
*/
private readonly cloudFormationRetryOptions = { maxRetries: 10, retryDelayOptions: { base: 1_000 } };

/**
* STS is used to check credential validity, don't do too many retries.
*/
private readonly stsRetryOptions = { maxRetries: 3, retryDelayOptions: { base: 100 } };

/**
* Whether we have proof that the credentials have not expired
*
* We need to do some manual plumbing around this because the JS SDKv2 treats `ExpiredToken`
* as retriable and we have hefty retries on CFN calls making the CLI hang for a good 15 minutes
* if the credentials have expired.
*/
private _credentialsValidated = false;

constructor(
private readonly _credentials: AWS.Credentials,
region: string,
Expand Down Expand Up @@ -202,13 +216,16 @@ export class SDK implements ISDK {
return cached(this, CURRENT_ACCOUNT_KEY, () => SDK.accountCache.fetch(this._credentials.accessKeyId, async () => {
// if we don't have one, resolve from STS and store in cache.
debug('Looking up default account ID from STS');
const result = await new AWS.STS(this.config).getCallerIdentity().promise();
const result = await new AWS.STS({ ...this.config, ...this.stsRetryOptions }).getCallerIdentity().promise();
const accountId = result.Account;
const partition = result.Arn!.split(':')[1];
if (!accountId) {
throw new Error('STS didn\'t return an account ID');
}
debug('Default account ID:', accountId);

// Save another STS call later if this one already succeeded
this._credentialsValidated = true;
return { accountId, partition };
}));
}
Expand All @@ -234,6 +251,12 @@ export class SDK implements ISDK {
try {
await this._credentials.getPromise();
} catch (e) {
if (isUnrecoverableAwsError(e)) {
throw e;
}

// Only reason this would fail is if it was an AssumRole. Otherwise,
// reading from an INI file or reading env variables is unlikely to fail.
debug(`Assuming role failed: ${e.message}`);
throw new Error([
'Could not assume role in target account',
Expand All @@ -247,6 +270,18 @@ export class SDK implements ISDK {
}
}

/**
* Make sure the the current credentials are not expired
*/
public async validateCredentials() {
if (this._credentialsValidated) {
return;
}

await new AWS.STS({ ...this.config, ...this.stsRetryOptions }).getCallerIdentity().promise();
this._credentialsValidated = true;
}

public getEndpointSuffix(region: string): string {
return regionUtil.getEndpointSuffix(region);
}
Expand Down Expand Up @@ -372,3 +407,10 @@ function allChainedExceptionMessages(e: Error | undefined) {
}
return ret.join(': ');
}

/**
* Return whether an error should not be recovered from
*/
export function isUnrecoverableAwsError(e: Error) {
return (e as any).code === 'ExpiredToken';
}
9 changes: 8 additions & 1 deletion packages/aws-cdk/test/api/fake-sts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ export class FakeSts {
_attributes: { xmlns: 'https://sts.amazonaws.com/doc/2011-06-15/' },
Error: {
Type: 'Sender',
Code: 'Error',
Code: e.code ?? 'Error',
Message: e.message,
},
RequestId: '1',
Expand Down Expand Up @@ -159,6 +159,13 @@ export class FakeSts {
private handleAssumeRole(mockRequest: MockRequest): Record<string, any> {
const identity = this.identity(mockRequest);

const failureRequested = mockRequest.parsedBody.RoleArn.match(/<FAIL:([^>]+)>/);
if (failureRequested) {
const err = new Error(`STS failing by user request: ${failureRequested[1]}`);
(err as any).code = failureRequested[1];
throw err;
}

this.assumedRoles.push({
roleArn: mockRequest.parsedBody.RoleArn,
roleSessionName: mockRequest.parsedBody.RoleSessionName,
Expand Down
16 changes: 16 additions & 0 deletions packages/aws-cdk/test/api/sdk-provider.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,22 @@ describe('with intercepted network calls', () => {
// THEN
expect((await sdk.currentAccount()).accountId).toEqual(uniq('88888'));
});

test('if AssumeRole fails because of ExpiredToken, then fail completely', async () => {
// GIVEN
prepareCreds({
fakeSts,
config: {
default: { aws_access_key_id: 'foo', $account: '88888' },
},
});
const provider = await providerFromProfile(undefined);

// WHEN - assumeRole fails with a specific error
await expect(async () => {
await provider.forEnvironment(env(uniq('88888')), Mode.ForReading, { assumeRoleArn: '<FAIL:ExpiredToken>' });
}).rejects.toThrow(/ExpiredToken/);
});
});

describe('Plugins', () => {
Expand Down

0 comments on commit 1e305e6

Please sign in to comment.