Skip to content

Commit

Permalink
fix: support double quote string enum (#122)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarshallOfSound authored Sep 23, 2024
1 parent fc8da34 commit 4c9ad8f
Show file tree
Hide file tree
Showing 2 changed files with 292 additions and 17 deletions.
129 changes: 129 additions & 0 deletions src/__tests__/markdown-helpers.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,76 @@ def fn():
expect(extractStringEnum('wassup')).toBe(null);
});

it('should error helpfully on invalid value separators', () => {
expect(() => extractStringEnum('Can be `x` sometimes `y'))
.toThrowErrorMatchingInlineSnapshot(`
"Unexpected separator token while extracting string enum, expected a comma or "and" or "or" but found "s"
Context: \`x\` sometimes \`y
^"
`);
});

it('should error helpfully on unterminated enum strings', () => {
expect(() => extractStringEnum('Can be `x` or `y')).toThrowErrorMatchingInlineSnapshot(`
"Unexpected early termination of token sequence while extracting string enum, did you forget to close a quote?
Context: \`x\` or \`y"
`);
});

describe('mixed ticks', () => {
it('should extract an enum when mixed quotes are used', () => {
const values = extractStringEnum('Can be `x"` or "`y"')!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('x"');
expect(values[1].value).toBe('`y');
});
});

describe('deprecated wrappers', () => {
it('should handle strikethrough deprecation wrappers', () => {
const values = extractStringEnum('Can be `x` or ~~`y`~~')!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
});
});

describe('lead-in descriptions', () => {
it('should handle value lists that smoothly lead in to prose with a comma', () => {
const values = extractStringEnum('Can be `x` or `y`, where `x` implies that...')!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
});

it('should handle value lists that smoothly lead in to prose with a fullstop', () => {
const values = extractStringEnum('Can be `x` or `y`. The `x` value implies that...')!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
});

it('should handle value lists that smoothly lead in to prose with a semicolon', () => {
const values = extractStringEnum('Can be `x` or `y`; the `x` value implies that...')!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
});

it('should handle value lists that smoothly lead in to prose with a hyphen', () => {
const values = extractStringEnum('Can be `x` or `y` - the `x` value implies that...')!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
});
});

describe('with backticks', () => {
it('should extract an enum of the format "can be x"', () => {
const values = extractStringEnum('Can be `x`')!;
Expand Down Expand Up @@ -260,6 +330,65 @@ def fn():
});
});

describe('with double quotes', () => {
it('should extract an enum of the format "can be x"', () => {
const values = extractStringEnum(`Can be "x"`)!;
expect(values).not.toBe(null);
expect(values).toHaveLength(1);
expect(values[0].value).toBe('x');
});

it('should extract an enum of the format "can be x or y"', () => {
const values = extractStringEnum(`Can be "x" or "y"`)!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
});

it('should extract an enum of the format "can be x, y or z"', () => {
const values = extractStringEnum(`Can be "x", "y" or "z"`)!;
expect(values).not.toBe(null);
expect(values).toHaveLength(3);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
expect(values[2].value).toBe('z');
});

it('should extract an enum of the format "can be x, y, or z"', () => {
const values = extractStringEnum(`Can be "x", "y", or "z"`)!;
expect(values).not.toBe(null);
expect(values).toHaveLength(3);
expect(values[0].value).toBe('x');
expect(values[1].value).toBe('y');
expect(values[2].value).toBe('z');
});

it('should extract an enum of the format "values include a', () => {
const values = extractStringEnum(`Values include "a"`)!;
expect(values).not.toBe(null);
expect(values).toHaveLength(1);
expect(values[0].value).toBe('a');
});

it('should extract an enum of the format "values include a and b', () => {
const values = extractStringEnum(`Values include "a" and "b"`)!;
expect(values).not.toBe(null);
expect(values).toHaveLength(2);
expect(values[0].value).toBe('a');
expect(values[1].value).toBe('b');
});

it('should extract an enum of the format "values include a, b and c', () => {
const values = extractStringEnum(`Values include "a", "b" and "c"`)!;
expect(values).not.toBe(null);
expect(values).toHaveLength(3);
expect(values[0].value).toBe('a');
expect(values[1].value).toBe('b');
expect(values[2].value).toBe('c');
});
});

describe('rawTypeToTypeInformation()', () => {
it('should map a primitive types correctly', () => {
expect(rawTypeToTypeInformation('Boolean', '', null)).toMatchSnapshot();
Expand Down
180 changes: 163 additions & 17 deletions src/markdown-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -453,28 +453,174 @@ export enum StripReturnTypeBehavior {
DO_NOT_STRIP,
}

// All possible value separators, sorted by reverse length to ensure
// that we match the longer comma prefix variants first if they are present
const niceSeparators = [',', 'and', 'or', ', and', ', or'].sort((a, b) => b.length - a.length);
// Some string enums can also be objects, the final phrase is "or an object" and we
// should gracefully terminate in that case
const niceTerminators = [', or an Object', 'or an Object'].sort((a, b) => b.length - a.length);
const suffixesToIgnore = ['(Deprecated)'];

export const extractStringEnum = (description: string): PossibleStringValue[] | null => {
const possibleValues: PossibleStringValue[] = [];

const inlineValuesPattern = /(?:can be|values? includes?) ((?:(?:[`|'][a-zA-Z0-9-_\.:]+[`|'])(?:(, | )?))*(?:(?:or|and) [`|'][a-zA-Z0-9-_\.:]+[`|'])?)/i;
const inlineMatch = inlineValuesPattern.exec(description);
if (inlineMatch) {
const valueString = inlineMatch[1];
const valuePattern = /[`|']([a-zA-Z0-9-_\.:]+)[`|']/g;
let value = valuePattern.exec(valueString);

while (value) {
possibleValues.push({
value: value[1],
description: '',
});
value = valuePattern.exec(valueString);
const inlineValuesLocatorPattern = /(?:can be|values? includes?) (.+)/i;
const locatorMatch = inlineValuesLocatorPattern.exec(description);
if (!locatorMatch) return null;

const valuesTokens = locatorMatch[1].split('');

const state = {
// Where are we in the valueTokens array
position: 0,
// What values have we found so far
values: [] as string[],
// The current value we are building, was found wrapped by `currentQuoter`
currentValue: '',
// The quote character that we encountered to start building a value
// We won't stop adding characters to `currentValue` until the same character
// is encountered again
currentQuoter: null as null | string,
// In some cases quoted values are wrapped with other markdown indicators, for
// instance strikethrough ~ characters. This handles those to ensure anything
// we allow as a wrapping character is unwrapped after a value is extracted.
currentQuoterWrappers: [] as string[],
// This is set to true after a value is extracted to allow us to parse out a
// nice separator. For instance a "comma", a complete list is in `niceSeparators`
// above.
expectingNiceSeparator: false,
// This is set after the state machine reaches a point that _could_ be the end,
// an invalid token when this is set to true is not a fatal error rather the
// graceful termination of the state machine.
couldBeDone: false,
};
const lookAhead = (length: number) => {
return valuesTokens.slice(state.position - 1, state.position + length - 1).join('');
};
stringEnumTokenLoop: while (state.position < valuesTokens.length) {
const char = valuesTokens[state.position];
state.position++;

if (state.currentQuoter) {
// We should never expect a separator inside a quoted value
if (state.expectingNiceSeparator) {
throw new Error('Impossible state encountered while extracting a string enum');
}
if (char === state.currentQuoter) {
state.currentQuoter = null;
state.values.push(state.currentValue);
state.currentValue = '';
state.expectingNiceSeparator = true;
} else {
state.currentValue += char;
}
} else {
// Whitespace can be skipped
if (char === ' ') {
continue stringEnumTokenLoop;
}

// If we're between values we should be expecting one of the above "nice"
// separators.
if (state.expectingNiceSeparator) {
// Before checking for a separator we need to ensure we have unwrapped any wrapping
// chars
if (state.currentQuoterWrappers.length) {
const expectedUnwrap = state.currentQuoterWrappers.pop();
if (char !== expectedUnwrap) {
throw new Error(
`Unexpected token while extracting string enum. Expected an unwrapping token that matched "${expectedUnwrap}". But found token: ${char}\nContext: "${
locatorMatch[1]
}"\n${' '.repeat(8 + state.position)}^`,
);
}
continue stringEnumTokenLoop;
}

if (char === '.' || char === ';' || char === '-') {
break stringEnumTokenLoop;
}

for (const suffix of suffixesToIgnore) {
if (lookAhead(suffix.length) === suffix) {
state.position += suffix.length - 1;
continue stringEnumTokenLoop;
}
}

for (const niceTerminator of niceTerminators) {
if (lookAhead(niceTerminator.length) === niceTerminator) {
state.position += niceTerminator.length - 1;
state.expectingNiceSeparator = false;
state.couldBeDone = true;
continue stringEnumTokenLoop;
}
}

for (const niceSeparator of niceSeparators) {
if (lookAhead(niceSeparator.length) === niceSeparator) {
state.position += niceSeparator.length - 1;
state.expectingNiceSeparator = false;
if (niceSeparator === ',') {
state.couldBeDone = true;
}
continue stringEnumTokenLoop;
}
}
throw new Error(
`Unexpected separator token while extracting string enum, expected a comma or "and" or "or" but found "${char}"\nContext: ${
locatorMatch[1]
}\n${' '.repeat(8 + state.position)}^`,
);
}

if (['"', "'", '`'].includes(char)) {
// Quote chars start a new value
state.currentQuoter = char;
// A new value has started, we no longer could be done on an invalid char
state.couldBeDone = false;
continue stringEnumTokenLoop;
}
if (['~'].includes(char)) {
// Deprecated string enum values are wrapped with strikethrough
state.currentQuoterWrappers.push(char);
continue stringEnumTokenLoop;
}
// If we are at the very start we should just assume our heuristic found something silly
// and bail, 0 valid characters is skip-able
if (state.position === 1) {
return null;
}
// If the last thing we parsed _could_ have been a termination character
// let's assume an invalid character here confirms that.
if (state.couldBeDone) {
break stringEnumTokenLoop;
}
// Anything else is unexpected
throw new Error(
`Unexpected token while extracting string enum. Token: ${char}\nContext: "${
locatorMatch[1]
}"\n${' '.repeat(9 + state.position)}^`,
);
}
}

// Reached the end of the description, we should check
// if we are in a clean state (not inside a quote).
// If so we're good, if not hard error
if (state.currentQuoter || state.currentValue) {
throw new Error(
`Unexpected early termination of token sequence while extracting string enum, did you forget to close a quote?\nContext: ${locatorMatch[1]}`,
);
}

return possibleValues.length === 0 ? null : possibleValues;
// No options we should just bail, can't have a string enum with 0 options
if (!state.values.length) {
return null;
}

return null;
return state.values.map(value => ({
value,
description: '',
}));
};

export const extractReturnType = (
Expand Down

0 comments on commit 4c9ad8f

Please sign in to comment.