Skip to content

Commit

Permalink
refactor(core): refactor CloudFormationLang.toJSON() (#11224)
Browse files Browse the repository at this point in the history
Our previous implementation of `toJSON()` was quite hacky.

It replaced values inside the structure with objects that had a custom
`toJSON()` serializer, and then called `JSON.stringify()` on the result.

The resulting JSON would have special markers in it where the Token
values would be string-substituted back in.

It's actually easier and gives us more control to just implement
JSONification ourselves in a Token-aware recursive function.

This change has been split off from a larger, upcoming PR in order
to make the individual reviews smaller.

Incidentally also fixes #13465, as the type of encoded tokens is assumed to match
the type of the encoded value (e.g., a `string[]`-encoded token is assumed to
produce a list at deploy-time and so will not be quoted).


----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
rix0rrr committed Mar 10, 2021
1 parent 8dca507 commit bc1293b
Show file tree
Hide file tree
Showing 6 changed files with 645 additions and 198 deletions.
346 changes: 267 additions & 79 deletions packages/@aws-cdk/core/lib/private/cloudformation-lang.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import { Lazy } from '../lazy';
import { Reference } from '../reference';
import { DefaultTokenResolver, IFragmentConcatenator, IPostProcessor, IResolvable, IResolveContext } from '../resolvable';
import { TokenizedStringFragments } from '../string-fragments';
import { DefaultTokenResolver, IFragmentConcatenator, IResolveContext } from '../resolvable';
import { Token } from '../token';
import { Intrinsic } from './intrinsic';
import { resolve } from './resolve';
import { INTRINSIC_KEY_PREFIX, ResolutionTypeHint, resolvedTypeHint } from './resolve';

/**
* Routines that know how to do operations at the CloudFormation document language level
Expand All @@ -24,59 +21,12 @@ export class CloudFormationLang {
* @param space Indentation to use (default: no pretty-printing)
*/
public static toJSON(obj: any, space?: number): string {
// This works in two stages:
//
// First, resolve everything. This gets rid of the lazy evaluations, evaluation
// to the real types of things (for example, would a function return a string, an
// intrinsic, or a number? We have to resolve to know).
//
// We then to through the returned result, identify things that evaluated to
// CloudFormation intrinsics, and re-wrap those in Tokens that have a
// toJSON() method returning their string representation. If we then call
// JSON.stringify() on that result, that gives us essentially the same
// string that we started with, except with the non-token characters quoted.
//
// {"field": "${TOKEN}"} --> {\"field\": \"${TOKEN}\"}
//
// A final resolve() on that string (done by the framework) will yield the string
// we're after.
//
// Resolving and wrapping are done in go using the resolver framework.
class IntrinsincWrapper extends DefaultTokenResolver {
constructor() {
super(CLOUDFORMATION_CONCAT);
}

public resolveToken(t: IResolvable, context: IResolveContext, postProcess: IPostProcessor) {
// Return References directly, so their type is maintained and the references will
// continue to work. Only while preparing, because we do need the final value of the
// token while resolving.
if (Reference.isReference(t) && context.preparing) { return wrap(t); }

// Deep-resolve and wrap. This is necessary for Lazy tokens so we can see "inside" them.
return wrap(super.resolveToken(t, context, postProcess));
}
public resolveString(fragments: TokenizedStringFragments, context: IResolveContext) {
return wrap(super.resolveString(fragments, context));
}
public resolveList(l: string[], context: IResolveContext) {
return wrap(super.resolveList(l, context));
}
}

// We need a ResolveContext to get started so return a Token
return Lazy.stringValue({
produce: (ctx: IResolveContext) =>
JSON.stringify(resolve(obj, {
preparing: ctx.preparing,
scope: ctx.scope,
resolver: new IntrinsincWrapper(),
}), undefined, space),
return Lazy.uncachedString({
// We used to do this by hooking into `JSON.stringify()` by adding in objects
// with custom `toJSON()` functions, but it's ultimately simpler just to
// reimplement the `stringify()` function from scratch.
produce: (ctx) => tokenAwareStringify(obj, space ?? 0, ctx),
});

function wrap(value: any): any {
return isIntrinsic(value) ? new JsonToken(deepQuoteStringsForJSON(value)) : value;
}
}

/**
Expand All @@ -97,44 +47,227 @@ export class CloudFormationLang {

// Otherwise return a Join intrinsic (already in the target document language to avoid taking
// circular dependencies on FnJoin & friends)
return { 'Fn::Join': ['', minimalCloudFormationJoin('', parts)] };
return fnJoinConcat(parts);
}
}

/**
* Token that also stringifies in the toJSON() operation.
* Return a CFN intrinsic mass concatting any number of CloudFormation expressions
*/
class JsonToken extends Intrinsic {
/**
* Special handler that gets called when JSON.stringify() is used.
*/
public toJSON() {
return this.toString();
}
function fnJoinConcat(parts: any[]) {
return { 'Fn::Join': ['', minimalCloudFormationJoin('', parts)] };
}

/**
* Deep escape strings for use in a JSON context
* Perform a JSON.stringify()-like operation, except aware of Tokens and CloudFormation intrincics
*
* Tokens will be resolved and if any resolve to CloudFormation intrinsics, the intrinsics
* will be lifted to the top of a giant `{ Fn::Join }` expression.
*
* If Tokens resolve to primitive types (for example, by using Lazies), we'll
* use the primitive type to determine how to encode the value into the JSON.
*
* If Tokens resolve to CloudFormation intrinsics, we'll use the type of the encoded
* value as a type hint to determine how to encode the value into the JSON. The difference
* is that we add quotes (") around strings, and don't add anything around non-strings.
*
* The following structure:
*
* { SomeAttr: resource.someAttr }
*
* Will JSONify to either:
*
* '{ "SomeAttr": "' ++ { Fn::GetAtt: [Resource, SomeAttr] } ++ '" }'
* or '{ "SomeAttr": ' ++ { Fn::GetAtt: [Resource, SomeAttr] } ++ ' }'
*
* Depending on whether `someAttr` is type-hinted to be a string or not.
*
* (Where ++ is the CloudFormation string-concat operation (`{ Fn::Join }`).
*
* -----------------------
*
* This work requires 2 features from the `resolve()` function:
*
* - INTRINSICS TYPE HINTS: intrinsics are represented by values like
* `{ Ref: 'XYZ' }`. These values can reference either a string or a list/number at
* deploy time, and from the value alone there's no way to know which. We need
* to know the type to know whether to JSONify this reference to:
*
* '{ "referencedValue": "' ++ { Ref: XYZ } ++ '"}'
* or '{ "referencedValue": ' ++ { Ref: XYZ } ++ '}'
*
* I.e., whether or not we need to enclose the reference in quotes or not.
*
* We COULD have done this by resolving one token at a time, and looking at the
* type of the encoded token we were resolving to obtain a type hint. However,
* the `resolve()` and Token system resist a level-at-a-time resolve
* operation: because of the existence of post-processors, we must have done a
* complete recursive resolution of a token before we can look at its result
* (after which any type information about the sources of nested resolved
* values is lost).
*
* To fix this, "type hints" have been added to the `resolve()` function,
* giving an idea of the type of the source value for compplex result values.
* This only works for objects (not strings and numbers) but fortunately
* we only care about the types of intrinsics, which are always complex values.
*
* Type hinting could have been added to the `IResolvable` protocol as well,
* but for now we just use the type of an encoded value as a type hint. That way
* we don't need to annotate anything more at the L1 level--we will use the type
* encodings added by construct authors at the L2 levels. L1 users can escape the
* default decision of "string" by using `Token.asList()`.
*
* - COMPLEX KEYS: since tokens can be string-encoded, we can use string-encoded tokens
* as the keys in JavaScript objects. However, after resolution, those string-encoded
* tokens could resolve to intrinsics (`{ Ref: ... }`), which CANNOT be stored in
* JavaScript objects anymore.
*
* We therefore need a protocol to store the resolved values somewhere in the JavaScript
* type model, which can be returned by `resolve()`, and interpreted by `tokenAwareStringify()`
* to produce the correct JSON.
*
* And example will quickly show the point:
*
* User writes:
* { [resource.resourceName]: 'SomeValue' }
* ------ string actually looks like ------>
* { '${Token[1234]}': 'SomeValue' }
* ------ resolve ------->
* { '$IntrinsicKey$0': [ {Ref: Resource}, 'SomeValue' ] }
* ------ tokenAwareStringify ------->
* '{ "' ++ { Ref: Resource } ++ '": "SomeValue" }'
*/
function deepQuoteStringsForJSON(x: any): any {
if (typeof x === 'string') {
// Whenever we escape a string we strip off the outermost quotes
// since we're already in a quoted context.
const stringified = JSON.stringify(x);
return stringified.substring(1, stringified.length - 1);
function tokenAwareStringify(root: any, space: number, ctx: IResolveContext) {
let indent = 0;

const ret = new Array<Segment>();

// First completely resolve the tree, then encode to JSON while respecting the type
// hints we got for the resolved intrinsics.
recurse(ctx.resolve(root, { allowIntrinsicKeys: true }));

switch (ret.length) {
case 0: return undefined;
case 1: return renderSegment(ret[0]);
default:
return fnJoinConcat(ret.map(renderSegment));
}

if (Array.isArray(x)) {
return x.map(deepQuoteStringsForJSON);
/**
* Stringify a JSON element
*/
function recurse(obj: any): void {
if (obj === undefined) { return; }

if (Token.isUnresolved(obj)) {
throw new Error('This shouldnt happen anymore');
}
if (Array.isArray(obj)) {
return renderCollection('[', ']', obj, recurse);
}
if (typeof obj === 'object' && obj != null && !(obj instanceof Date)) {
// Treat as an intrinsic if this LOOKS like a CFN intrinsic (`{ Ref: ... }`)
// AND it's the result of a token resolution. Otherwise, we just treat this
// value as a regular old JSON object (that happens to look a lot like an intrinsic).
if (isIntrinsic(obj) && resolvedTypeHint(obj)) {
return renderIntrinsic(obj);
}

return renderCollection('{', '}', definedEntries(obj), ([key, value]) => {
if (key.startsWith(INTRINSIC_KEY_PREFIX)) {
[key, value] = value;
}

recurse(key);
pushLiteral(prettyPunctuation(':'));
recurse(value);
});
}
// Otherwise we have a scalar, defer to JSON.stringify()s serialization
pushLiteral(JSON.stringify(obj));
}

if (typeof x === 'object') {
for (const key of Object.keys(x)) {
x[key] = deepQuoteStringsForJSON(x[key]);
/**
* Render an object or list
*/
function renderCollection<A>(pre: string, post: string, xs: Iterable<A>, each: (x: A) => void) {
pushLiteral(pre);
indent += space;
let atLeastOne = false;
for (const [comma, item] of sepIter(xs)) {
if (comma) { pushLiteral(','); }
pushLineBreak();
each(item);
atLeastOne = true;
}
indent -= space;
if (atLeastOne) { pushLineBreak(); }
pushLiteral(post);
}

return x;
function renderIntrinsic(intrinsic: any) {
switch (resolvedTypeHint(intrinsic)) {
case ResolutionTypeHint.STRING:
pushLiteral('"');
pushIntrinsic(deepQuoteStringLiterals(intrinsic));
pushLiteral('"');
break;

default:
pushIntrinsic(intrinsic);
break;
}
}

/**
* Push a literal onto the current segment if it's also a literal, otherwise open a new Segment
*/
function pushLiteral(lit: string) {
let last = ret[ret.length - 1];
if (last?.type !== 'literal') {
last = { type: 'literal', parts: [] };
ret.push(last);
}
last.parts.push(lit);
}

/**
* Add a new intrinsic segment
*/
function pushIntrinsic(intrinsic: any) {
ret.push({ type: 'intrinsic', intrinsic });
}

/**
* Push a line break if we are pretty-printing, otherwise don't
*/
function pushLineBreak() {
if (space > 0) {
pushLiteral(`\n${' '.repeat(indent)}`);
}
}

/**
* Add a space after the punctuation if we are pretty-printing, no space if not
*/
function prettyPunctuation(punc: string) {
return space > 0 ? `${punc} ` : punc;
}
}

/**
* A Segment is either a literal string or a CloudFormation intrinsic
*/
type Segment = { type: 'literal'; parts: string[] } | { type: 'intrinsic'; intrinsic: any };

/**
* Render a segment
*/
function renderSegment(s: Segment): NonNullable<any> {
switch (s.type) {
case 'literal': return s.parts.join('');
case 'intrinsic': return s.intrinsic;
}
}

const CLOUDFORMATION_CONCAT: IFragmentConcatenator = {
Expand Down Expand Up @@ -204,3 +337,58 @@ export function isNameOfCloudFormationIntrinsic(name: string): boolean {
// these are 'fake' intrinsics, only usable inside the parameter overrides of a CFN CodePipeline Action
return name !== 'Fn::GetArtifactAtt' && name !== 'Fn::GetParam';
}

/**
* Separated iterator
*/
function* sepIter<A>(xs: Iterable<A>): IterableIterator<[boolean, A]> {
let comma = false;
for (const item of xs) {
yield [comma, item];
comma = true;
}
}

/**
* Object.entries() but skipping undefined values
*/
function* definedEntries<A extends object>(xs: A): IterableIterator<[string, any]> {
for (const [key, value] of Object.entries(xs)) {
if (value !== undefined) {
yield [key, value];
}
}
}

/**
* Quote string literals inside an intrinsic
*
* Formally, this should only match string literals that will be interpreted as
* string literals. Fortunately, the strings that should NOT be quoted are
* Logical IDs and attribute names, which cannot contain quotes anyway. Hence,
* we can get away not caring about the distinction and just quoting everything.
*/
function deepQuoteStringLiterals(x: any): any {
if (Array.isArray(x)) {
return x.map(deepQuoteStringLiterals);
}
if (typeof x === 'object' && x != null) {
const ret: any = {};
for (const [key, value] of Object.entries(x)) {
ret[deepQuoteStringLiterals(key)] = deepQuoteStringLiterals(value);
}
return ret;
}
if (typeof x === 'string') {
return quoteString(x);
}
return x;
}

/**
* Quote the characters inside a string, for use inside toJSON
*/
function quoteString(s: string) {
s = JSON.stringify(s);
return s.substring(1, s.length - 1);
}
Loading

0 comments on commit bc1293b

Please sign in to comment.