Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update sql-formatter & specify SQL language according to warehouse #1490

Merged
merged 14 commits into from
Jun 9, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions cli/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import {
} from "df/cli/credentials";
import { actuallyResolve, assertPathExists, compiledGraphHasErrors } from "df/cli/util";
import { createYargsCli, INamedOption } from "df/cli/yargswrapper";
import { supportsCancel, WarehouseType } from "df/core/adapters";
import { supportsCancel, WarehouseType, isWarehouseType } from "df/core/adapters";
import { targetAsReadableString } from "df/core/targets";
import { dataform } from "df/protos/ts";
import { formatFile } from "df/sqlx/format";
Expand Down Expand Up @@ -715,14 +715,31 @@ export function runCli() {
positionalOptions: [projectDirMustExistOption],
options: [trackOption],
processFn: async argv => {
const readWarehouseConfig = (): WarehouseType => {
let warehouse: string;
try {
const dataformJson = fs.readFileSync(path.resolve(argv[projectDirMustExistOption.name], "dataform.json"), 'utf8');
const projectConfig = JSON.parse(dataformJson);
warehouse = projectConfig.warehouse;
} catch (e) {
throw new Error(`Could not parse dataform.json: ${e.message}`);
}
if (!isWarehouseType(warehouse)) {
throw new Error("Unrecognized 'warehouse' setting in dataform.json");
}
return warehouse;
};
const warehouse = readWarehouseConfig();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I defined a immediate function because it is convenient for typing and scoping


const filenames = glob.sync("{definitions,includes}/**/*.{js,sqlx}", {
cwd: argv[projectDirMustExistOption.name]
});
const results = await Promise.all(
filenames.map(async filename => {
try {
await formatFile(path.resolve(argv[projectDirMustExistOption.name], filename), {
overwriteFile: true
overwriteFile: true,
warehouse
});
return {
filename
Expand Down
4 changes: 4 additions & 0 deletions core/adapters/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ export enum WarehouseType {
SQLDATAWAREHOUSE = "sqldatawarehouse"
}

export function isWarehouseType(input: any): input is WarehouseType {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

input: string?

Copy link
Contributor Author

@pokutuna pokutuna Jun 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this does type narrowing using user-defined type guard, it's fine to take a string, but I think it would be better to take any or unknown.

return Object.values(WarehouseType).includes(input);
}

const CANCELLATION_SUPPORTED = [WarehouseType.BIGQUERY, WarehouseType.SQLDATAWAREHOUSE];

export function supportsCancel(warehouseType: WarehouseType) {
Expand Down
3 changes: 3 additions & 0 deletions examples/formatter/definitions/named_arguments.sqlx
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
config { type: "operations" }

SELECT MAKE_INTERVAL(1, day=>2, minute => 3)
5 changes: 5 additions & 0 deletions examples/formatter/definitions/qualify.sqlx
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
config { type: "operations" }

SELECT * FROM UNNEST([0,1,2,3,4,5,6,7,8,9]) AS n
WHERE n < 8
QUALIFY MOD(ROW_NUMBER() OVER (), 2) = 0
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@
"semver": "^7.3.8",
"snowflake-sdk": "^1.6.12",
"source-map-loader": "^0.2.0",
"sql-formatter": "^2.3.3",
"sql-formatter": "^12.2.1",
"ssh2": "^1.4.0",
"style-loader": "^1.0.0",
"tarjan-graph": "^2.0.0",
Expand Down
44 changes: 32 additions & 12 deletions sqlx/format.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import * as crypto from "crypto";
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

an unused import

import * as fs from "fs";
import * as jsBeautify from "js-beautify";
import * as sqlFormatter from "sql-formatter";
import { promisify } from "util";

import { WarehouseType } from "df/core/adapters";
import { ErrorWithCause } from "df/common/errors/errors";
import { SyntaxTreeNode, SyntaxTreeNodeType } from "df/sqlx/lexer";
import { v4 as uuidv4 } from "uuid";
Expand All @@ -16,11 +16,26 @@ const JS_BEAUTIFY_OPTIONS: JsBeautifyOptions = {

const MAX_SQL_FORMAT_ATTEMPTS = 5;

export function format(text: string, fileExtension: string) {
const WAREHOUSE_LANGUAGE_MAP: Record<WarehouseType, sqlFormatter.SqlLanguage> = {
[WarehouseType.BIGQUERY]:"bigquery",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: seems unformatted

[WarehouseType.PRESTO]: "trino",
[WarehouseType.POSTGRES]: "postgresql",
[WarehouseType.REDSHIFT]: "redshift",
[WarehouseType.SNOWFLAKE]: "snowflake",
[WarehouseType.SQLDATAWAREHOUSE]: "transactsql"
};

const DEFAULT_WAREHOUSE_FOR_FORMATTING: WarehouseType = WarehouseType.BIGQUERY;

export function format(
text: string,
fileExtension: string,
warehouse: WarehouseType = DEFAULT_WAREHOUSE_FOR_FORMATTING
) {
try {
switch (fileExtension) {
case "sqlx":
return postProcessFormattedSqlx(formatSqlx(SyntaxTreeNode.create(text)));
return postProcessFormattedSqlx(formatSqlx(SyntaxTreeNode.create(text), "", warehouse));
case "js":
return `${formatJavaScript(text).trim()}\n`;
default:
Expand All @@ -35,12 +50,14 @@ export async function formatFile(
filename: string,
options?: {
overwriteFile?: boolean;
warehouse?: WarehouseType;
}
) {
const fileExtension = filename.split(".").slice(-1)[0];
const originalFileContent = await promisify(fs.readFile)(filename, "utf8");
const formattedText = format(originalFileContent, fileExtension);
if (formattedText !== format(formattedText, fileExtension)) {

const formattedText = format(originalFileContent, fileExtension, options?.warehouse);
if (formattedText !== format(formattedText, fileExtension, options?.warehouse)) {
throw new Error("Formatter unable to determine final formatted form.");
}

Expand All @@ -57,10 +74,11 @@ export async function formatFile(
return formattedText;
}

function formatSqlx(node: SyntaxTreeNode, indent: string = "") {
function formatSqlx(node: SyntaxTreeNode, indent: string = "", warehouse: WarehouseType) {
const { sqlxStatements, javascriptBlocks, innerSqlBlocks } = separateSqlxIntoParts(
node.children()
);
const sqlLanguage = WAREHOUSE_LANGUAGE_MAP[warehouse];

// First, format the JS blocks (including the config block).
const formattedJsCodeBlocks = javascriptBlocks.map(jsCodeBlock =>
Expand All @@ -73,7 +91,7 @@ function formatSqlx(node: SyntaxTreeNode, indent: string = "") {
[placeholderId: string]: SyntaxTreeNode | string;
} = {};
const unformattedPlaceholderSql = stripUnformattableText(sqlxStatement, placeholders).join("");
const formattedPlaceholderSql = formatSql(unformattedPlaceholderSql);
const formattedPlaceholderSql = formatSql(unformattedPlaceholderSql, sqlLanguage);
return formatEveryLine(
replacePlaceholders(formattedPlaceholderSql, placeholders),
line => `${indent}${line}`
Expand Down Expand Up @@ -101,7 +119,7 @@ function formatSqlx(node: SyntaxTreeNode, indent: string = "") {
]);

return `${upToFirstBrace}
${formatSqlx(sqlCodeBlockWithoutOuterBraces, " ")}
${formatSqlx(sqlCodeBlockWithoutOuterBraces, " ", warehouse)}
${lastBraceOnwards}`;
});

Expand Down Expand Up @@ -175,7 +193,9 @@ function stripUnformattableText(
}

function generatePlaceholderId() {
return uuidv4()
// Add a leading character to ensure that the placeholder doesn't start with a number.
// Identifiers beginning with a number cause errors when formatting.
return '_' + uuidv4()
.replace(/-/g, "")
.substring(0, 16);
}
Expand All @@ -199,11 +219,11 @@ function formatJavaScript(text: string) {
return jsBeautify.js(text, JS_BEAUTIFY_OPTIONS);
}

function formatSql(text: string) {
let formatted = sqlFormatter.format(text) as string;
function formatSql(text: string, language: sqlFormatter.SqlLanguage) {
let formatted = sqlFormatter.format(text, { language }) as string;
// Unfortunately sql-formatter does not always produce final formatted output (even on plain SQL) in a single pass.
for (let attempts = 0; attempts < MAX_SQL_FORMAT_ATTEMPTS; attempts++) {
const newFormatted = sqlFormatter.format(formatted) as string;
const newFormatted = sqlFormatter.format(formatted, { language }) as string;
if (newFormatted === formatted) {
return newFormatted;
}
Expand Down
1 change: 0 additions & 1 deletion sqlx/sql_formatter.d.ts

This file was deleted.

38 changes: 33 additions & 5 deletions tests/sqlx/format.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ drop something

---

alter table
\${tempTable} rename to \${finalTableName}
alter table \${tempTable}
rename to \${finalTableName}

---

SELECT
SUM(IF (session_start_event, 1, 0)) AS session_index
SUM(IF(session_start_event, 1, 0)) AS session_index
`);
});

Expand All @@ -67,7 +67,7 @@ select
CAST(
REGEXP_EXTRACT("", r"^/([0-9]+)\\"\\'/.*") AS INT64
) AS id2,
IFNULL (
IFNULL(
regexp_extract('', r'\\a?query=([^&]+)&*'),
regexp_extract('', r'\\a?q=([^&]+)&*')
) AS id3,
Expand All @@ -91,7 +91,7 @@ SELECT
SELECT
SUM(IF(track.event = "event_viewed_project_with_connection", 1, 0))
FROM
UNNEST(records)
UNNEST (records)
)
) > 0 as created_project,
/* multi line
Expand All @@ -118,6 +118,34 @@ input "something" {
test("doesn't care about special string replacement characters", async () => {
expect(await formatFile(path.resolve("examples/formatter/definitions/dollar_regex.sqlx")))
.equal(`'^.*(bot|crawl|slurp|spider|archiv|spinn|sniff|seo|audit|survey|pingdom|worm|capture|(browser|screen)shots|analyz|index|thumb|check|facebook|PhantomJS|a_archiver|facebookexternalhit|BingPreview|360User-agent|semalt).*$'
`);
});

test("formats named arguments", async () => {
expect(await formatFile(path.resolve("examples/formatter/definitions/named_arguments.sqlx")))
.equal(`config {
type: "operations"
}

SELECT
MAKE_INTERVAL(1, day => 2, minute => 3)
`);
});

test("formats QUALIFY clause", async () => {
expect(await formatFile(path.resolve("examples/formatter/definitions/qualify.sqlx")))
.equal(`config {
type: "operations"
}

SELECT
*
FROM
UNNEST ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) AS n
WHERE
n < 8
QUALIFY
MOD(ROW_NUMBER() OVER (), 2) = 0
`);
});
});
Expand Down
2 changes: 1 addition & 1 deletion version.bzl
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# NOTE: If you change the format of this line, you must change the bash command
# in /scripts/publish to extract the version string correctly.
DF_VERSION = "2.5.0"
DF_VERSION = "2.6.0"
41 changes: 35 additions & 6 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3244,6 +3244,11 @@ digest-header@^0.0.1:
dependencies:
utility "0.1.11"

discontinuous-range@1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/discontinuous-range/-/discontinuous-range-1.0.0.tgz#e38331f0844bba49b9a9cb71c771585aab1bc65a"
integrity sha512-c68LpLbO+7kP/b1Hr1qs8/BJ09F5khZGTxqxZuhzxpmwJKOgRFHJWIb9/KmqnqHhLdO55aOxFH/EGBvUQbL/RQ==

dns-packet@^1.3.2:
version "1.3.4"
resolved "https://registry.yarnpkg.com/dns-packet/-/dns-packet-1.3.4.tgz#e3455065824a2507ba886c55a89963bb107dec6f"
Expand Down Expand Up @@ -5787,7 +5792,7 @@ lodash.uniq@^4.5.0:
resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-4.5.0.tgz#d0225373aeb652adc1bc82e4945339a842754773"
integrity "sha1-0CJTc662Uq3BvILklFM5qEJ1R3M= sha512-xfBaXQd9ryd9dlSDvnvI0lvxfLJlYAZzXomUYzLKtUeOQvOP5piqAWuGtrhWeqaXK9hhoM/iyJc5AV+XfsX3HQ=="

lodash@^4.15.0, lodash@^4.16.0, lodash@^4.17.11, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.20, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.17.5:
lodash@^4.15.0, lodash@^4.17.11, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.20, lodash@^4.17.21, lodash@^4.17.4, lodash@^4.17.5:
version "4.17.21"
resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
integrity "sha1-Z5WRxWTDv/quhFTPCz3zcMPWkRw= sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
Expand Down Expand Up @@ -6281,6 +6286,16 @@ ncp@~2.0.0:
resolved "https://registry.yarnpkg.com/ncp/-/ncp-2.0.0.tgz#195a21d6c46e361d2fb1281ba38b91e9df7bdbb3"
integrity "sha1-GVoh1sRuNh0vsSgbo4uR6d9727M= sha512-zIdGUrPRFTUELUvr3Gmc7KZ2Sw/h1PiVM0Af/oHB6zgnV1ikqSfRk+TOufi79aHYCW3NiOXmr1BP5nWbzojLaA=="

nearley@^2.20.1:
version "2.20.1"
resolved "https://registry.yarnpkg.com/nearley/-/nearley-2.20.1.tgz#246cd33eff0d012faf197ff6774d7ac78acdd474"
integrity sha512-+Mc8UaAebFzgV+KpI5n7DasuuQCHA89dmwm7JXw3TV43ukfNQ9DnBH3Mdb2g/I4Fdxc26pwimBWvjIw0UAILSQ==
dependencies:
commander "^2.19.0"
moo "^0.5.0"
railroad-diagrams "^1.0.0"
randexp "0.4.6"

negotiator@0.6.3:
version "0.6.3"
resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.3.tgz#58e323a72fedc0d6f9cd4d31fe49f51479590ccd"
Expand Down Expand Up @@ -7358,6 +7373,19 @@ raf@^3.1.0:
dependencies:
performance-now "^2.1.0"

railroad-diagrams@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/railroad-diagrams/-/railroad-diagrams-1.0.0.tgz#eb7e6267548ddedfb899c1b90e57374559cddb7e"
integrity sha512-cz93DjNeLY0idrCNOH6PviZGRN9GJhsdm9hpn1YCS879fj4W+x5IFJhhkRZcwVgMmFF7R82UA/7Oh+R8lLZg6A==

randexp@0.4.6:
version "0.4.6"
resolved "https://registry.yarnpkg.com/randexp/-/randexp-0.4.6.tgz#e986ad5e5e31dae13ddd6f7b3019aa7c87f60ca3"
integrity sha512-80WNmd9DA0tmZrw9qQa62GPPWfuXJknrmVmLcxvq4uZBdYqb1wYoKTmnlGUchvVWe0XiLupYkBoXVOxz3C8DYQ==
dependencies:
discontinuous-range "1.0.0"
ret "~0.1.10"

randombytes@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a"
Expand Down Expand Up @@ -8343,12 +8371,13 @@ sprintf-js@~1.0.2:
resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c"
integrity "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw= sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g=="

sql-formatter@^2.3.3:
version "2.3.3"
resolved "https://registry.yarnpkg.com/sql-formatter/-/sql-formatter-2.3.3.tgz#910ef484fbb988a5e510bea4161157e3b80b2f62"
integrity "sha1-kQ70hPu5iKXlEL6kFhFX47gLL2I= sha512-m6pqVXwsm9GkCHC/+gdPvNowI7PNoVTT6OZMWKwXJoP2MvfntfhcfyliIf4/QX6t+DirSJ6XDSiSS70YvZ87Lw=="
sql-formatter@^12.2.1:
version "12.2.1"
resolved "https://registry.yarnpkg.com/sql-formatter/-/sql-formatter-12.2.1.tgz#6e048fea63c06f7f10371066502e4eb163967e17"
integrity sha512-rvNfY+g+mv3vxzqdDrsLVjhe/UZohYdCqUqyTK/JI8oXxnW4ObcldlclqUoL55SWQs8azEfFf66XfPBi6tkjfQ==
dependencies:
lodash "^4.16.0"
argparse "^2.0.1"
nearley "^2.20.1"

ssh2@^1.4.0:
version "1.4.0"
Expand Down