Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update sql-formatter & specify SQL language according to warehouse #1490

Merged
merged 14 commits into from
Jun 9, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 18 additions & 24 deletions cli/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ import {
} from "df/cli/credentials";
import { actuallyResolve, assertPathExists, compiledGraphHasErrors } from "df/cli/util";
import { createYargsCli, INamedOption } from "df/cli/yargswrapper";
import { supportsCancel, WarehouseType } from "df/core/adapters";
import { supportsCancel, WarehouseType, isWarehouseType } from "df/core/adapters";
import { targetAsReadableString } from "df/core/targets";
import { dataform } from "df/protos/ts";
import { formatFile, SqlLanguage } from "df/sqlx/format";
import { formatFile } from "df/sqlx/format";
import parseDuration from "parse-duration";

const RECOMPILE_DELAY = 500;
Expand Down Expand Up @@ -233,15 +233,6 @@ const tableOptionName = "table";
const getCredentialsPath = (projectDir: string, credentialsPath: string) =>
actuallyResolve(credentialsPath || path.join(projectDir, CREDENTIALS_FILENAME));

const warehouseSqlLanguageMap: Record<WarehouseType, SqlLanguage> = {
[WarehouseType.BIGQUERY]:"bigquery",
[WarehouseType.PRESTO]: "trino",
[WarehouseType.POSTGRES]: "postgresql",
[WarehouseType.REDSHIFT]: "redshift",
[WarehouseType.SNOWFLAKE]: "snowflake",
[WarehouseType.SQLDATAWAREHOUSE]: "transactsql"
};

export function runCli() {
const builtYargs = createYargsCli({
commands: [
Expand Down Expand Up @@ -724,18 +715,21 @@ export function runCli() {
positionalOptions: [projectDirMustExistOption],
options: [trackOption],
processFn: async argv => {
let warehouse: string | undefined;
try {
const dataformJson = fs.readFileSync(path.resolve(argv[projectDirMustExistOption.name], "dataform.json"), 'utf8');
const projectConfig = JSON.parse(dataformJson);
warehouse = projectConfig.warehouse;
} catch {
throw new Error('Could not parse dataform.json');
}
if (!dbadapters.validWarehouses.includes(warehouse)) {
throw new Error("Unrecognized 'warehouse' setting in dataform.json");
}
const language = warehouseSqlLanguageMap[warehouse as WarehouseType];
const readWarehouseConfig = (): WarehouseType => {
let warehouse: string;
try {
const dataformJson = fs.readFileSync(path.resolve(argv[projectDirMustExistOption.name], "dataform.json"), 'utf8');
const projectConfig = JSON.parse(dataformJson);
warehouse = projectConfig.warehouse;
} catch (e) {
throw new Error(`Could not parse dataform.json: ${e.message}`);
}
if (!isWarehouseType(warehouse)) {
throw new Error("Unrecognized 'warehouse' setting in dataform.json");
}
return warehouse;
};
const warehouse = readWarehouseConfig();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I defined a immediate function because it is convenient for typing and scoping


const filenames = glob.sync("{definitions,includes}/**/*.{js,sqlx}", {
cwd: argv[projectDirMustExistOption.name]
Expand All @@ -745,7 +739,7 @@ export function runCli() {
try {
await formatFile(path.resolve(argv[projectDirMustExistOption.name], filename), {
overwriteFile: true,
language,
warehouse
});
return {
filename
Expand Down
4 changes: 4 additions & 0 deletions core/adapters/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ export enum WarehouseType {
SQLDATAWAREHOUSE = "sqldatawarehouse"
}

export function isWarehouseType(input: any): input is WarehouseType {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

input: string?

Copy link
Contributor Author

@pokutuna pokutuna Jun 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this does type narrowing using user-defined type guard, it's fine to take a string, but I think it would be better to take any or unknown.

return Object.values(WarehouseType).includes(input);
}

const CANCELLATION_SUPPORTED = [WarehouseType.BIGQUERY, WarehouseType.SQLDATAWAREHOUSE];

export function supportsCancel(warehouseType: WarehouseType) {
Expand Down
36 changes: 25 additions & 11 deletions sqlx/format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import * as jsBeautify from "js-beautify";
import * as sqlFormatter from "sql-formatter";
import { promisify } from "util";

import { WarehouseType } from "df/core/adapters";
import { ErrorWithCause } from "df/common/errors/errors";
import { SyntaxTreeNode, SyntaxTreeNodeType } from "df/sqlx/lexer";
import { v4 as uuidv4 } from "uuid";
Expand All @@ -15,13 +16,26 @@ const JS_BEAUTIFY_OPTIONS: JsBeautifyOptions = {

const MAX_SQL_FORMAT_ATTEMPTS = 5;

export type SqlLanguage = sqlFormatter.SqlLanguage;
const WAREHOUSE_LANGUAGE_MAP: Record<WarehouseType, sqlFormatter.SqlLanguage> = {
[WarehouseType.BIGQUERY]:"bigquery",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: seems unformatted

[WarehouseType.PRESTO]: "trino",
[WarehouseType.POSTGRES]: "postgresql",
[WarehouseType.REDSHIFT]: "redshift",
[WarehouseType.SNOWFLAKE]: "snowflake",
[WarehouseType.SQLDATAWAREHOUSE]: "transactsql"
};

const DEFAULT_WAREHOUSE_FOR_FORMATTING: WarehouseType = WarehouseType.BIGQUERY;

export function format(text: string, fileExtension: string, language: SqlLanguage) {
export function format(
text: string,
fileExtension: string,
warehouse: WarehouseType = DEFAULT_WAREHOUSE_FOR_FORMATTING
) {
try {
switch (fileExtension) {
case "sqlx":
return postProcessFormattedSqlx(formatSqlx(SyntaxTreeNode.create(text), "", language));
return postProcessFormattedSqlx(formatSqlx(SyntaxTreeNode.create(text), "", warehouse));
case "js":
return `${formatJavaScript(text).trim()}\n`;
default:
Expand All @@ -36,15 +50,14 @@ export async function formatFile(
filename: string,
options?: {
overwriteFile?: boolean;
language?: SqlLanguage;
warehouse?: WarehouseType;
}
) {
const fileExtension = filename.split(".").slice(-1)[0];
const originalFileContent = await promisify(fs.readFile)(filename, "utf8");

const language = options?.language || 'bigquery';
const formattedText = format(originalFileContent, fileExtension, language);
if (formattedText !== format(formattedText, fileExtension, language)) {
const formattedText = format(originalFileContent, fileExtension, options?.warehouse);
if (formattedText !== format(formattedText, fileExtension, options?.warehouse)) {
throw new Error("Formatter unable to determine final formatted form.");
}

Expand All @@ -61,10 +74,11 @@ export async function formatFile(
return formattedText;
}

function formatSqlx(node: SyntaxTreeNode, indent: string = "", language: SqlLanguage) {
function formatSqlx(node: SyntaxTreeNode, indent: string = "", warehouse: WarehouseType) {
const { sqlxStatements, javascriptBlocks, innerSqlBlocks } = separateSqlxIntoParts(
node.children()
);
const sqlLanguage = WAREHOUSE_LANGUAGE_MAP[warehouse];

// First, format the JS blocks (including the config block).
const formattedJsCodeBlocks = javascriptBlocks.map(jsCodeBlock =>
Expand All @@ -77,7 +91,7 @@ function formatSqlx(node: SyntaxTreeNode, indent: string = "", language: SqlLang
[placeholderId: string]: SyntaxTreeNode | string;
} = {};
const unformattedPlaceholderSql = stripUnformattableText(sqlxStatement, placeholders).join("");
const formattedPlaceholderSql = formatSql(unformattedPlaceholderSql, language);
const formattedPlaceholderSql = formatSql(unformattedPlaceholderSql, sqlLanguage);
return formatEveryLine(
replacePlaceholders(formattedPlaceholderSql, placeholders),
line => `${indent}${line}`
Expand Down Expand Up @@ -105,7 +119,7 @@ function formatSqlx(node: SyntaxTreeNode, indent: string = "", language: SqlLang
]);

return `${upToFirstBrace}
${formatSqlx(sqlCodeBlockWithoutOuterBraces, " ", language)}
${formatSqlx(sqlCodeBlockWithoutOuterBraces, " ", warehouse)}
${lastBraceOnwards}`;
});

Expand Down Expand Up @@ -205,7 +219,7 @@ function formatJavaScript(text: string) {
return jsBeautify.js(text, JS_BEAUTIFY_OPTIONS);
}

function formatSql(text: string, language: SqlLanguage) {
function formatSql(text: string, language: sqlFormatter.SqlLanguage) {
let formatted = sqlFormatter.format(text, { language }) as string;
// Unfortunately sql-formatter does not always produce final formatted output (even on plain SQL) in a single pass.
for (let attempts = 0; attempts < MAX_SQL_FORMAT_ATTEMPTS; attempts++) {
Expand Down
2 changes: 1 addition & 1 deletion version.bzl
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# NOTE: If you change the format of this line, you must change the bash command
# in /scripts/publish to extract the version string correctly.
DF_VERSION = "2.5.0"
DF_VERSION = "2.6.0"