Skip to content

Commit

Permalink
Update sql-formatter & specify SQL language according to warehouse (d…
Browse files Browse the repository at this point in the history
…ataform-co#1490)

* Update sql-formatter

* Fix placeholder to be treated as a identifier

* Pass language argument & tweak tests

* Add test cases for named arguments & QUALIFY clause

* Pass language according to warehouse from cli

* Include thrown error message while parsing dataform.json in format

* Revert using ErrorWithCause & just include message

* Move warehouse and sql language mapping

* Add user-defined type guard to handle WarehouseType

* Update format subcommand to use WarehouseType

* Bump minor version to 2.6.0

* Fix trailing ;

* Fix code format & tslint issues
  • Loading branch information
pokutuna committed Jun 9, 2023
1 parent e88b327 commit 5b6f674
Show file tree
Hide file tree
Showing 10 changed files with 133 additions and 28 deletions.
21 changes: 19 additions & 2 deletions cli/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import {
} from "df/cli/credentials";
import { actuallyResolve, assertPathExists, compiledGraphHasErrors } from "df/cli/util";
import { createYargsCli, INamedOption } from "df/cli/yargswrapper";
import { supportsCancel, WarehouseType } from "df/core/adapters";
import { isWarehouseType, supportsCancel, WarehouseType } from "df/core/adapters";
import { targetAsReadableString } from "df/core/targets";
import { dataform } from "df/protos/ts";
import { formatFile } from "df/sqlx/format";
Expand Down Expand Up @@ -715,14 +715,31 @@ export function runCli() {
positionalOptions: [projectDirMustExistOption],
options: [trackOption],
processFn: async argv => {
const readWarehouseConfig = (): WarehouseType => {
let wh: string;
try {
const dataformJson = fs.readFileSync(path.resolve(argv[projectDirMustExistOption.name], "dataform.json"), 'utf8');
const projectConfig = JSON.parse(dataformJson);
wh = projectConfig.warehouse;
} catch (e) {
throw new Error(`Could not parse dataform.json: ${e.message}`);
}
if (!isWarehouseType(wh)) {
throw new Error("Unrecognized 'warehouse' setting in dataform.json");
}
return wh;
};
const warehouse = readWarehouseConfig();

const filenames = glob.sync("{definitions,includes}/**/*.{js,sqlx}", {
cwd: argv[projectDirMustExistOption.name]
});
const results = await Promise.all(
filenames.map(async filename => {
try {
await formatFile(path.resolve(argv[projectDirMustExistOption.name], filename), {
overwriteFile: true
overwriteFile: true,
warehouse
});
return {
filename
Expand Down
4 changes: 4 additions & 0 deletions core/adapters/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ export enum WarehouseType {
SQLDATAWAREHOUSE = "sqldatawarehouse"
}

export function isWarehouseType(input: any): input is WarehouseType {
return Object.values(WarehouseType).includes(input);
}

const CANCELLATION_SUPPORTED = [WarehouseType.BIGQUERY, WarehouseType.SQLDATAWAREHOUSE];

export function supportsCancel(warehouseType: WarehouseType) {
Expand Down
3 changes: 3 additions & 0 deletions examples/formatter/definitions/named_arguments.sqlx
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
config { type: "operations" }

SELECT MAKE_INTERVAL(1, day=>2, minute => 3)
5 changes: 5 additions & 0 deletions examples/formatter/definitions/qualify.sqlx
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
config { type: "operations" }

SELECT * FROM UNNEST([0,1,2,3,4,5,6,7,8,9]) AS n
WHERE n < 8
QUALIFY MOD(ROW_NUMBER() OVER (), 2) = 0
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@
"semver": "^7.3.8",
"snowflake-sdk": "^1.6.12",
"source-map-loader": "^0.2.0",
"sql-formatter": "^2.3.3",
"sql-formatter": "^12.2.1",
"ssh2": "^1.4.0",
"style-loader": "^1.0.0",
"tarjan-graph": "^2.0.0",
Expand Down
44 changes: 32 additions & 12 deletions sqlx/format.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import * as crypto from "crypto";
import * as fs from "fs";
import * as jsBeautify from "js-beautify";
import * as sqlFormatter from "sql-formatter";
import { promisify } from "util";

import { ErrorWithCause } from "df/common/errors/errors";
import { WarehouseType } from "df/core/adapters";
import { SyntaxTreeNode, SyntaxTreeNodeType } from "df/sqlx/lexer";
import { v4 as uuidv4 } from "uuid";

Expand All @@ -16,11 +16,26 @@ const JS_BEAUTIFY_OPTIONS: JsBeautifyOptions = {

const MAX_SQL_FORMAT_ATTEMPTS = 5;

export function format(text: string, fileExtension: string) {
const WAREHOUSE_LANGUAGE_MAP: Record<WarehouseType, sqlFormatter.SqlLanguage> = {
[WarehouseType.BIGQUERY]: "bigquery",
[WarehouseType.PRESTO]: "trino",
[WarehouseType.POSTGRES]: "postgresql",
[WarehouseType.REDSHIFT]: "redshift",
[WarehouseType.SNOWFLAKE]: "snowflake",
[WarehouseType.SQLDATAWAREHOUSE]: "transactsql"
};

const DEFAULT_WAREHOUSE_FOR_FORMATTING: WarehouseType = WarehouseType.BIGQUERY;

export function format(
text: string,
fileExtension: string,
warehouse: WarehouseType = DEFAULT_WAREHOUSE_FOR_FORMATTING
) {
try {
switch (fileExtension) {
case "sqlx":
return postProcessFormattedSqlx(formatSqlx(SyntaxTreeNode.create(text)));
return postProcessFormattedSqlx(formatSqlx(SyntaxTreeNode.create(text), "", warehouse));
case "js":
return `${formatJavaScript(text).trim()}\n`;
default:
Expand All @@ -35,12 +50,14 @@ export async function formatFile(
filename: string,
options?: {
overwriteFile?: boolean;
warehouse?: WarehouseType;
}
) {
const fileExtension = filename.split(".").slice(-1)[0];
const originalFileContent = await promisify(fs.readFile)(filename, "utf8");
const formattedText = format(originalFileContent, fileExtension);
if (formattedText !== format(formattedText, fileExtension)) {

const formattedText = format(originalFileContent, fileExtension, options?.warehouse);
if (formattedText !== format(formattedText, fileExtension, options?.warehouse)) {
throw new Error("Formatter unable to determine final formatted form.");
}

Expand All @@ -57,10 +74,11 @@ export async function formatFile(
return formattedText;
}

function formatSqlx(node: SyntaxTreeNode, indent: string = "") {
function formatSqlx(node: SyntaxTreeNode, indent: string = "", warehouse: WarehouseType) {
const { sqlxStatements, javascriptBlocks, innerSqlBlocks } = separateSqlxIntoParts(
node.children()
);
const sqlLanguage = WAREHOUSE_LANGUAGE_MAP[warehouse];

// First, format the JS blocks (including the config block).
const formattedJsCodeBlocks = javascriptBlocks.map(jsCodeBlock =>
Expand All @@ -73,7 +91,7 @@ function formatSqlx(node: SyntaxTreeNode, indent: string = "") {
[placeholderId: string]: SyntaxTreeNode | string;
} = {};
const unformattedPlaceholderSql = stripUnformattableText(sqlxStatement, placeholders).join("");
const formattedPlaceholderSql = formatSql(unformattedPlaceholderSql);
const formattedPlaceholderSql = formatSql(unformattedPlaceholderSql, sqlLanguage);
return formatEveryLine(
replacePlaceholders(formattedPlaceholderSql, placeholders),
line => `${indent}${line}`
Expand Down Expand Up @@ -101,7 +119,7 @@ function formatSqlx(node: SyntaxTreeNode, indent: string = "") {
]);

return `${upToFirstBrace}
${formatSqlx(sqlCodeBlockWithoutOuterBraces, " ")}
${formatSqlx(sqlCodeBlockWithoutOuterBraces, " ", warehouse)}
${lastBraceOnwards}`;
});

Expand Down Expand Up @@ -175,7 +193,9 @@ function stripUnformattableText(
}

function generatePlaceholderId() {
return uuidv4()
// Add a leading character to ensure that the placeholder doesn't start with a number.
// Identifiers beginning with a number cause errors when formatting.
return '_' + uuidv4()
.replace(/-/g, "")
.substring(0, 16);
}
Expand All @@ -199,11 +219,11 @@ function formatJavaScript(text: string) {
return jsBeautify.js(text, JS_BEAUTIFY_OPTIONS);
}

function formatSql(text: string) {
let formatted = sqlFormatter.format(text) as string;
function formatSql(text: string, language: sqlFormatter.SqlLanguage) {
let formatted = sqlFormatter.format(text, { language }) as string;
// Unfortunately sql-formatter does not always produce final formatted output (even on plain SQL) in a single pass.
for (let attempts = 0; attempts < MAX_SQL_FORMAT_ATTEMPTS; attempts++) {
const newFormatted = sqlFormatter.format(formatted) as string;
const newFormatted = sqlFormatter.format(formatted, { language }) as string;
if (newFormatted === formatted) {
return newFormatted;
}
Expand Down
1 change: 0 additions & 1 deletion sqlx/sql_formatter.d.ts

This file was deleted.

38 changes: 33 additions & 5 deletions tests/sqlx/format.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ drop something
---
alter table
\${tempTable} rename to \${finalTableName}
alter table \${tempTable}
rename to \${finalTableName}
---
SELECT
SUM(IF (session_start_event, 1, 0)) AS session_index
SUM(IF(session_start_event, 1, 0)) AS session_index
`);
});

Expand All @@ -67,7 +67,7 @@ select
CAST(
REGEXP_EXTRACT("", r"^/([0-9]+)\\"\\'/.*") AS INT64
) AS id2,
IFNULL (
IFNULL(
regexp_extract('', r'\\a?query=([^&]+)&*'),
regexp_extract('', r'\\a?q=([^&]+)&*')
) AS id3,
Expand All @@ -91,7 +91,7 @@ SELECT
SELECT
SUM(IF(track.event = "event_viewed_project_with_connection", 1, 0))
FROM
UNNEST(records)
UNNEST (records)
)
) > 0 as created_project,
/* multi line
Expand All @@ -118,6 +118,34 @@ input "something" {
test("doesn't care about special string replacement characters", async () => {
expect(await formatFile(path.resolve("examples/formatter/definitions/dollar_regex.sqlx")))
.equal(`'^.*(bot|crawl|slurp|spider|archiv|spinn|sniff|seo|audit|survey|pingdom|worm|capture|(browser|screen)shots|analyz|index|thumb|check|facebook|PhantomJS|a_archiver|facebookexternalhit|BingPreview|360User-agent|semalt).*$'
`);
});

test("formats named arguments", async () => {
expect(await formatFile(path.resolve("examples/formatter/definitions/named_arguments.sqlx")))
.equal(`config {
type: "operations"
}
SELECT
MAKE_INTERVAL(1, day => 2, minute => 3)
`);
});

test("formats QUALIFY clause", async () => {
expect(await formatFile(path.resolve("examples/formatter/definitions/qualify.sqlx")))
.equal(`config {
type: "operations"
}
SELECT
*
FROM
UNNEST ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) AS n
WHERE
n < 8
QUALIFY
MOD(ROW_NUMBER() OVER (), 2) = 0
`);
});
});
Expand Down
2 changes: 1 addition & 1 deletion version.bzl
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# NOTE: If you change the format of this line, you must change the bash command
# in /scripts/publish to extract the version string correctly.
DF_VERSION = "2.5.0"
DF_VERSION = "2.6.0"
41 changes: 35 additions & 6 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3244,6 +3244,11 @@ digest-header@^0.0.1:
dependencies:
utility "0.1.11"

discontinuous-range@1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/discontinuous-range/-/discontinuous-range-1.0.0.tgz#e38331f0844bba49b9a9cb71c771585aab1bc65a"
integrity sha512-c68LpLbO+7kP/b1Hr1qs8/BJ09F5khZGTxqxZuhzxpmwJKOgRFHJWIb9/KmqnqHhLdO55aOxFH/EGBvUQbL/RQ==

dns-packet@^1.3.2:
version "1.3.4"
resolved "https://registry.yarnpkg.com/dns-packet/-/dns-packet-1.3.4.tgz#e3455065824a2507ba886c55a89963bb107dec6f"
Expand Down Expand Up @@ -5787,7 +5792,7 @@ lodash.uniq@^4.5.0:
resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-4.5.0.tgz#d0225373aeb652adc1bc82e4945339a842754773"
integrity "sha1-0CJTc662Uq3BvILklFM5qEJ1R3M= sha512-xfBaXQd9ryd9dlSDvnvI0lvxfLJlYAZzXomUYzLKtUeOQvOP5piqAWuGtrhWeqaXK9hhoM/iyJc5AV+XfsX3HQ=="

lodash@^4.15.0, lodash@^4.16.0, lodash@^4.17.11, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.20, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.17.5:
lodash@^4.15.0, lodash@^4.17.11, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.20, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.17.5:
version "4.17.21"
resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
integrity "sha1-Z5WRxWTDv/quhFTPCz3zcMPWkRw= sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
Expand Down Expand Up @@ -6281,6 +6286,16 @@ ncp@~2.0.0:
resolved "https://registry.yarnpkg.com/ncp/-/ncp-2.0.0.tgz#195a21d6c46e361d2fb1281ba38b91e9df7bdbb3"
integrity "sha1-GVoh1sRuNh0vsSgbo4uR6d9727M= sha512-zIdGUrPRFTUELUvr3Gmc7KZ2Sw/h1PiVM0Af/oHB6zgnV1ikqSfRk+TOufi79aHYCW3NiOXmr1BP5nWbzojLaA=="

nearley@^2.20.1:
version "2.20.1"
resolved "https://registry.yarnpkg.com/nearley/-/nearley-2.20.1.tgz#246cd33eff0d012faf197ff6774d7ac78acdd474"
integrity sha512-+Mc8UaAebFzgV+KpI5n7DasuuQCHA89dmwm7JXw3TV43ukfNQ9DnBH3Mdb2g/I4Fdxc26pwimBWvjIw0UAILSQ==
dependencies:
commander "^2.19.0"
moo "^0.5.0"
railroad-diagrams "^1.0.0"
randexp "0.4.6"

negotiator@0.6.3:
version "0.6.3"
resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.3.tgz#58e323a72fedc0d6f9cd4d31fe49f51479590ccd"
Expand Down Expand Up @@ -7358,6 +7373,19 @@ raf@^3.1.0:
dependencies:
performance-now "^2.1.0"

railroad-diagrams@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/railroad-diagrams/-/railroad-diagrams-1.0.0.tgz#eb7e6267548ddedfb899c1b90e57374559cddb7e"
integrity sha512-cz93DjNeLY0idrCNOH6PviZGRN9GJhsdm9hpn1YCS879fj4W+x5IFJhhkRZcwVgMmFF7R82UA/7Oh+R8lLZg6A==

randexp@0.4.6:
version "0.4.6"
resolved "https://registry.yarnpkg.com/randexp/-/randexp-0.4.6.tgz#e986ad5e5e31dae13ddd6f7b3019aa7c87f60ca3"
integrity sha512-80WNmd9DA0tmZrw9qQa62GPPWfuXJknrmVmLcxvq4uZBdYqb1wYoKTmnlGUchvVWe0XiLupYkBoXVOxz3C8DYQ==
dependencies:
discontinuous-range "1.0.0"
ret "~0.1.10"

randombytes@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a"
Expand Down Expand Up @@ -8343,12 +8371,13 @@ sprintf-js@~1.0.2:
resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c"
integrity "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw= sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g=="

sql-formatter@^2.3.3:
version "2.3.3"
resolved "https://registry.yarnpkg.com/sql-formatter/-/sql-formatter-2.3.3.tgz#910ef484fbb988a5e510bea4161157e3b80b2f62"
integrity "sha1-kQ70hPu5iKXlEL6kFhFX47gLL2I= sha512-m6pqVXwsm9GkCHC/+gdPvNowI7PNoVTT6OZMWKwXJoP2MvfntfhcfyliIf4/QX6t+DirSJ6XDSiSS70YvZ87Lw=="
sql-formatter@^12.2.1:
version "12.2.1"
resolved "https://registry.yarnpkg.com/sql-formatter/-/sql-formatter-12.2.1.tgz#6e048fea63c06f7f10371066502e4eb163967e17"
integrity sha512-rvNfY+g+mv3vxzqdDrsLVjhe/UZohYdCqUqyTK/JI8oXxnW4ObcldlclqUoL55SWQs8azEfFf66XfPBi6tkjfQ==
dependencies:
lodash "^4.16.0"
argparse "^2.0.1"
nearley "^2.20.1"

ssh2@^1.4.0:
version "1.4.0"
Expand Down

0 comments on commit 5b6f674

Please sign in to comment.