Skip to content

Commit

Permalink
Enable formatting for triple-quoted strings (#1505)
Browse files Browse the repository at this point in the history
* Add test cases for tirple-quoted string

* Add a todo test case for formatting

* Enhance lexer to handle triple-quoted string

* Fix tslint issues

* Fix typos
  • Loading branch information
pokutuna authored Jun 28, 2023
1 parent e07be0a commit 6bd0aab
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 5 deletions.
24 changes: 24 additions & 0 deletions examples/formatter/definitions/triple_quoted.sqlx
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
config { type: "table" }

SELECT

'''1''' AS single_line,

"""multi
line
string
with indent"""
AS multi_line,

REGEXP_CONTAINS(
"\n abc\n ",
r'''
abc
''') AS multi_line_regex,

"""
This project is ...
"${database()}"!!
""" AS with_js

post_operations { select """1""" as inner_sql }
10 changes: 10 additions & 0 deletions sqlx/format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ function stripUnformattableText(
const placeholderId = generatePlaceholderId();
switch (part.type) {
case SyntaxTreeNodeType.SQL_LITERAL_STRING:
case SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING:
case SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER: {
placeholders[placeholderId] = part;
return placeholderId;
Expand Down Expand Up @@ -242,6 +243,7 @@ function formatPlaceholderInSqlx(
const wholeLine = getWholeLineContainingPlaceholderId(placeholderId, sqlx);
const indent = " ".repeat(wholeLine.length - wholeLine.trimLeft().length);
const formattedPlaceholder = formatSqlQueryPlaceholder(placeholderSyntaxNode, indent);

// Replace the placeholder entirely if (a) it fits on one line and (b) it isn't a comment.
// Otherwise, push the replacement onto its own line.
if (
Expand All @@ -250,6 +252,12 @@ function formatPlaceholderInSqlx(
) {
return sqlx.replace(placeholderId, () => formattedPlaceholder.trim());
}

// Keep internal line breaks in multiline string.
if (placeholderSyntaxNode.type === SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING) {
return sqlx.replace(placeholderId, () => formattedPlaceholder.trim());
}

// Push multi-line placeholders to their own lines, if they're not already on one.
const [textBeforePlaceholder, textAfterPlaceholder] = wholeLine.split(placeholderId);
const newLines: string[] = [];
Expand All @@ -270,6 +278,8 @@ function formatSqlQueryPlaceholder(node: SyntaxTreeNode, jsIndent: string): stri
case SyntaxTreeNodeType.SQL_LITERAL_STRING:
case SyntaxTreeNodeType.SQL_COMMENT:
return formatEveryLine(node.concatenate(), line => `${jsIndent}${line.trimLeft()}`);
case SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING:
return `${jsIndent}${node.concatenate().trimLeft()}`;
default:
throw new Error(`Unrecognized SyntaxTreeNodeType: ${node.type}`);
}
Expand Down
98 changes: 94 additions & 4 deletions sqlx/lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ const LEXER_STATE_NAMES = {
JS_TEMPLATE_STRING: "jsTemplateString",
INNER_SQL_BLOCK: "innerSqlBlock",
SQL_SINGLE_QUOTE_STRING: "innerSingleQuote",
SQL_DOUBLE_QUOTE_STRING: "innerDoubleQuote"
SQL_DOUBLE_QUOTE_STRING: "innerDoubleQuote",
SQL_TRIPLE_SINGLE_QUOTE_STRING: "innerTripleSingleQuote",
SQL_TRIPLE_DOUBLE_QUOTE_STRING: "innerTripleDoubleQuote"
};

const SQL_LEXER_TOKEN_NAMES = {
Expand All @@ -21,8 +23,10 @@ const SQL_LEXER_TOKEN_NAMES = {
MULTI_LINE_COMMENT: LEXER_STATE_NAMES.SQL + "_multiLineComment",
START_JS_PLACEHOLDER: LEXER_STATE_NAMES.SQL + "_startJsPlaceholder",
BACKTICK: LEXER_STATE_NAMES.SQL + "_backtick",
START_QUOTE_SINGLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteSingle",
START_QUOTE_DOUBLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteDouble",
START_QUOTE_SINGLE: LEXER_STATE_NAMES.SQL + "_startQuoteSingle",
START_QUOTE_DOUBLE: LEXER_STATE_NAMES.SQL + "_startQuoteDouble",
START_TRIPLE_QUOTE_SINGLE: LEXER_STATE_NAMES.SQL + "_startTripleQuoteSingle",
START_TRIPLE_QUOTE_DOUBLE: LEXER_STATE_NAMES.SQL + "_startTripleQuoteDouble",
CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL + "_captureEverythingElse"
};

Expand Down Expand Up @@ -54,6 +58,8 @@ const INNER_SQL_BLOCK_LEXER_TOKEN_NAMES = {
BACKTICK: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_backtick",
START_QUOTE_SINGLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteSingle",
START_QUOTE_DOUBLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startQuoteDouble",
START_TRIPLE_QUOTE_SINGLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startTripleQuoteSingle",
START_TRIPLE_QUOTE_DOUBLE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_startTripleQuoteDouble",
CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.INNER_SQL_BLOCK + "_captureEverythingElse"
};

Expand All @@ -73,6 +79,20 @@ const SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES = {
CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL_DOUBLE_QUOTE_STRING + "_captureEverythingElse"
};

const SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES = {
ESCAPED_BACKSLASH: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_escapedBackslash",
START_JS_PLACEHOLDER: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_startJsPlaceholder",
CLOSE_QUOTE: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_closeTripleQuoteSingle",
CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING + "_captureEverythingElse"
};

const SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES = {
ESCAPED_BACKSLASH: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_escapedBackslash",
START_JS_PLACEHOLDER: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_startJsPlaceholder",
CLOSE_QUOTE: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_closeTripleQuoteDouble",
CAPTURE_EVERYTHING_ELSE: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING + "_captureEverythingElse"
};

const lexer = moo.states(buildSqlxLexer());

export enum SyntaxTreeNodeType {
Expand All @@ -81,6 +101,7 @@ export enum SyntaxTreeNodeType {
SQL,
SQL_COMMENT,
SQL_LITERAL_STRING,
SQL_LITERAL_MULTILINE_STRING,
SQL_STATEMENT_SEPARATOR
}

Expand All @@ -97,6 +118,8 @@ const START_TOKEN_NODE_MAPPINGS = new Map<string, SyntaxTreeNodeType>([
[SQL_LEXER_TOKEN_NAMES.START_PRE_OPERATIONS, SyntaxTreeNodeType.SQL],
[SQL_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_STRING],
[SQL_LEXER_TOKEN_NAMES.START_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_STRING],
[SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING],
[SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING],

[JS_BLOCK_LEXER_TOKEN_NAMES.START_JS_BLOCK, SyntaxTreeNodeType.JAVASCRIPT],

Expand All @@ -108,6 +131,8 @@ const START_TOKEN_NODE_MAPPINGS = new Map<string, SyntaxTreeNodeType>([
],
[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_STRING],
[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_STRING],
[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING],
[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE, SyntaxTreeNodeType.SQL_LITERAL_MULTILINE_STRING],

[
SQL_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER,
Expand All @@ -117,14 +142,26 @@ const START_TOKEN_NODE_MAPPINGS = new Map<string, SyntaxTreeNodeType>([
[
SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER,
SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER
],

[
SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER,
SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER
],

[
SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER,
SyntaxTreeNodeType.JAVASCRIPT_TEMPLATE_STRING_PLACEHOLDER
]
]);

const CLOSE_TOKEN_TYPES = new Set<string>([
JS_BLOCK_LEXER_TOKEN_NAMES.CLOSE_BLOCK,
INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.CLOSE_BLOCK,
SQL_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE,
SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE
SQL_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE,
SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE,
SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE
]);

const WHOLE_TOKEN_NODE_MAPPINGS = new Map<string, SyntaxTreeNodeType>([
Expand Down Expand Up @@ -269,6 +306,19 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } {
push: LEXER_STATE_NAMES.JS_BLOCK
};
sqlLexer[SQL_LEXER_TOKEN_NAMES.BACKTICK] = "`";

// Since quotes(' & ") are substring of triple-quotes(''' & """), the declarations of
// triple-quote tokens must be placed first. The parsing order by moo implicitly depends
// on the order of property creation in rule object.
sqlLexer[SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE] = {
match: "'''",
push: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING,
};
sqlLexer[SQL_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE] = {
match: '"""',
push: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING,
};

sqlLexer[SQL_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE] = {
match: "'",
push: LEXER_STATE_NAMES.SQL_SINGLE_QUOTE_STRING
Expand Down Expand Up @@ -329,6 +379,14 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } {
pop: 1
};
innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.BACKTICK] = "`";
innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_SINGLE] = {
match: "'''",
push: LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING
};
innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_TRIPLE_QUOTE_DOUBLE] = {
match: '"""',
push: LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING
};
innerSqlBlockLexer[INNER_SQL_BLOCK_LEXER_TOKEN_NAMES.START_QUOTE_SINGLE] = {
match: "'",
push: LEXER_STATE_NAMES.SQL_SINGLE_QUOTE_STRING
Expand Down Expand Up @@ -376,13 +434,45 @@ function buildSqlxLexer(): { [x: string]: moo.Rules } {
lineBreaks: true
};

const innerTripleSingleQuoteLexer: moo.Rules = {};
innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.ESCAPED_BACKSLASH] = "\\\\";
innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER] = {
match: "${",
push: LEXER_STATE_NAMES.JS_BLOCK
};
innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE] = {
match: "'''",
pop: 1
};
innerTripleSingleQuoteLexer[SQL_TRIPLE_SINGLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CAPTURE_EVERYTHING_ELSE] = {
match: /[\s\S]+?/,
lineBreaks: true
};

const innerTripleDoubleQuoteLexer: moo.Rules = {};
innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.ESCAPED_BACKSLASH] = "\\\\";
innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.START_JS_PLACEHOLDER] = {
match: "${",
push: LEXER_STATE_NAMES.JS_BLOCK
};
innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CLOSE_QUOTE] = {
match: '"""',
pop: 1
};
innerTripleDoubleQuoteLexer[SQL_TRIPLE_DOUBLE_QUOTE_STRING_LEXER_TOKEN_NAMES.CAPTURE_EVERYTHING_ELSE] = {
match: /[\s\S]+?/,
lineBreaks: true
};

const lexerStates: { [x: string]: moo.Rules } = {};
lexerStates[LEXER_STATE_NAMES.SQL] = sqlLexer;
lexerStates[LEXER_STATE_NAMES.JS_BLOCK] = jsBlockLexer;
lexerStates[LEXER_STATE_NAMES.JS_TEMPLATE_STRING] = jsTemplateStringLexer;
lexerStates[LEXER_STATE_NAMES.INNER_SQL_BLOCK] = innerSqlBlockLexer;
lexerStates[LEXER_STATE_NAMES.SQL_SINGLE_QUOTE_STRING] = innerSingleQuoteLexer;
lexerStates[LEXER_STATE_NAMES.SQL_DOUBLE_QUOTE_STRING] = innerDoubleQuoteLexer;
lexerStates[LEXER_STATE_NAMES.SQL_TRIPLE_SINGLE_QUOTE_STRING] = innerTripleSingleQuoteLexer;
lexerStates[LEXER_STATE_NAMES.SQL_TRIPLE_DOUBLE_QUOTE_STRING] = innerTripleDoubleQuoteLexer;

return lexerStates;
}
51 changes: 50 additions & 1 deletion tests/sqlx/format.spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { expect } from "chai";
import * as path from "path";

import { formatFile } from "df/sqlx/format";
import { format, formatFile } from "df/sqlx/format";
import { suite, test } from "df/testing";

suite("@dataform/sqlx", () => {
Expand Down Expand Up @@ -146,7 +146,56 @@ WHERE
n < 8
QUALIFY
MOD(ROW_NUMBER() OVER (), 2) = 0
`);
});

test("format triple quoted string", async () => {
expect(await formatFile(path.resolve("examples/formatter/definitions/triple_quoted.sqlx")))
.equal(`config {
type: "table"
}
SELECT
'''1''' AS single_line,
"""multi
line
string
with indent""" AS multi_line,
REGEXP_CONTAINS("\\n abc\\n ", r'''
abc
''') AS multi_line_regex,
"""
This project is ...
"\${database()}"!!
""" AS with_js
post_operations {
select
"""1""" as inner_sql
}
`);
});
});

suite("formatter todos", () => {
test("TODO format template string in a string", async () => {
const input = `
config {
type: "view"
}
SELECT
"ok" AS \${ "here"+ "works" },
"1 + 2 = \${ 1+2 }" AS TODO_in_string,
'''\${1 +2 }''' AS TODO_in_triple_quoted_string
`;
expect(format(input, 'sqlx')).eql(`config {
type: "view"
}
SELECT
"ok" AS \${"here" + "works"},
"1 + 2 = \${ 1+2 }" AS TODO_in_string,
'''\${1 +2 }''' AS TODO_in_triple_quoted_string
`)});
})
});

0 comments on commit 6bd0aab

Please sign in to comment.