Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added d1 export --local support #6073

Merged
merged 1 commit into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/heavy-tomatoes-sing.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"miniflare": minor
"wrangler": minor
---

Added D1 export support for local databases
37 changes: 37 additions & 0 deletions packages/miniflare/src/workers/d1/database.worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
viewToBuffer,
} from "miniflare:shared";
import { z } from "miniflare:zod";
import { dumpSql } from "./dumpSql";

const D1ValueSchema = z.union([
z.number(),
Expand All @@ -27,6 +28,11 @@ const D1QuerySchema = z.object({
type D1Query = z.infer<typeof D1QuerySchema>;
const D1QueriesSchema = z.union([D1QuerySchema, z.array(D1QuerySchema)]);

const D1_EXPORT_PRAGMA = `PRAGMA miniflare_d1_export(?,?,?);`;
type D1ExportPragma = [
{ sql: typeof D1_EXPORT_PRAGMA; params: [number, number, ...string[]] },
];

const D1ResultsFormatSchema = z
.enum(["ARRAY_OF_OBJECTS", "ROWS_AND_COLUMNS", "NONE"])
.catch("ARRAY_OF_OBJECTS");
Expand Down Expand Up @@ -201,11 +207,42 @@ export class D1DatabaseObject extends MiniflareDurableObject {
let queries = D1QueriesSchema.parse(await req.json());
if (!Array.isArray(queries)) queries = [queries];

// Special local-mode-only handlers
if (this.#isExportPragma(queries)) {
return this.#doExportData(queries);
}

const { searchParams } = new URL(req.url);
const resultsFormat = D1ResultsFormatSchema.parse(
searchParams.get("resultsFormat")
);

return Response.json(this.#txn(queries, resultsFormat));
};

#isExportPragma(queries: D1Query[]): queries is D1ExportPragma {
return (
queries.length === 1 &&
queries[0].sql === D1_EXPORT_PRAGMA &&
(queries[0].params?.length || 0) >= 2
);
}

#doExportData(
queries: [
{ sql: typeof D1_EXPORT_PRAGMA; params: [number, number, ...string[]] },
]
) {
const [noSchema, noData, ...tables] = queries[0].params;
const options = {
noSchema: Boolean(noSchema),
noData: Boolean(noData),
tables,
};
return Response.json({
success: true,
results: [Array.from(dumpSql(this.state.storage.sql, options))],
meta: {},
});
}
}
160 changes: 160 additions & 0 deletions packages/miniflare/src/workers/d1/dumpSql.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
// NOTE: this function duplicates the logic inside SQLite's shell.c.in as close
// as possible, with any deviations noted.
import { SqlStorage } from "@cloudflare/workers-types/experimental";

export function* dumpSql(
andyjessop marked this conversation as resolved.
Show resolved Hide resolved
db: SqlStorage,
options?: {
noSchema?: boolean;
noData?: boolean;
tables?: string[];
}
) {
yield `PRAGMA defer_foreign_keys=TRUE;`;

// Empty set means include all tables
const filterTables = new Set(options?.tables || []);
const { noData, noSchema } = options || {};

// Taken from SQLite shell.c.in https://github.com/sqlite/sqlite/blob/105c20648e1b05839fd0638686b95f2e3998abcb/src/shell.c.in#L8463-L8469
// @ts-ignore (SqlStorageStatement needs to be callable)
const tables_cursor = db.prepare(`
SELECT name, type, sql
FROM sqlite_schema AS o
WHERE (true) AND type=='table'
AND sql NOT NULL
ORDER BY tbl_name='sqlite_sequence', rowid;
`)();
const tables: any[] = Array.from(tables_cursor);

for (const { name: table, sql } of tables) {
if (filterTables.size > 0 && !filterTables.has(table)) continue;

if (table === "sqlite_sequence") {
if (!noSchema) yield `DELETE FROM sqlite_sequence;`;
} else if (table.match(/^sqlite_stat./)) {
// This feels like it should really appear _after_ the contents of sqlite_stat[1,4] but I'm choosing
// to match SQLite's dump output exactly so writing it immediately like they do.
if (!noSchema) yield `ANALYZE sqlite_schema;`;
} else if (sql.startsWith(`CREATE VIRTUAL TABLE`)) {
throw new Error(
`D1 Export error: cannot export databases with Virtual Tables (fts5)`
);
} else if (table.startsWith("_cf_") || table.startsWith("sqlite_")) {
continue;
} else {
// SQLite dump has an extremely weird behaviour where, if the table was explicitly
// quoted i.e. "Table", then in the dump it has `IF NOT EXISTS` injected. I don't understand
// why, but on the off chance there's a good reason to I am following suit.
if (sql.match(/CREATE TABLE ['"].*/)) {
if (!noSchema) yield `CREATE TABLE IF NOT EXISTS ${sql.substring(13)};`;
} else {
if (!noSchema) yield `${sql};`;
}
}

if (noData) continue;
const columns_cursor = db.exec(`PRAGMA table_info="${table}"`);
const columns = Array.from(columns_cursor);
const select = `SELECT ${columns.map((c) => c.name).join(", ")}
FROM "${table}";`;
const rows_cursor = db.exec(select);
for (const dataRow of rows_cursor.raw()) {
const formattedCells = dataRow.map((cell: unknown, i: number) => {
const colType = columns[i].type;
const cellType = typeof cell;
if (cell === null) {
return "NULL";
} else if (colType === "INTEGER" || cellType === "number") {
return cell;
} else if (colType === "TEXT" || cellType === "string") {
return outputQuotedEscapedString(cell);
} else if (cell instanceof ArrayBuffer) {
return `X'${Array.prototype.map
.call(new Uint8Array(cell), (b) => b.toString(16))
.join("")}'`;
} else {
console.log({ colType, cellType, cell, column: columns[i] });
return "ERROR";
}
});

yield `INSERT INTO ${sqliteQuote(table)} VALUES(${formattedCells.join(",")});`;
}
}

if (!noSchema) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it matter that this query appears first in shell.c.in?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it? This is supposed to match up with https://github.com/sqlite/sqlite/blob/105c20648e1b05839fd0638686b95f2e3998abcb/src/shell.c.in#L8472, i.e. directly after run_schema_dump_query (which dumps schema & data), it checks SHFLG_DumpDataOnly) is not set before exporting all ('index','trigger','view') rows of sqlite_schema

Have I got that wrong?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, you're correct.

// Taken from SQLite shell.c.in https://github.com/sqlite/sqlite/blob/105c20648e1b05839fd0638686b95f2e3998abcb/src/shell.c.in#L8473-L8478
const rest_of_schema = db.exec(
`SELECT name, sql FROM sqlite_schema AS o WHERE (true) AND sql NOT NULL AND type IN ('index', 'trigger', 'view') ORDER BY type COLLATE NOCASE /* DESC */;`
);
// 'DESC' appears in the code linked above but the observed behaviour of SQLite appears otherwise
for (const { name, sql } of rest_of_schema) {
if (filterTables.size > 0 && !filterTables.has(name as string)) continue;
yield `${sql};`;
}
}
}

// Ported `output_quoted_escaped_string` from https://github.com/sqlite/sqlite/blob/master/src/shell.c.in#L1799-L1862
function outputQuotedEscapedString(cell: unknown) {
let lfs = false;
let crs = false;

const quotesOrNewlinesRegexp = /'|(\n)|(\r)/g;

// Function to replace ' with '', while also tracking whether the string contains any \r or \n chars
const escapeQuotesDetectingNewlines = (_: string, lf: string, cr: string) => {
if (lf) {
lfs = true;
return `\\n`;
}
if (cr) {
crs = true;
return `\\r`;
}
return `''`;
};

const escaped_string = (cell as string).replace(
quotesOrNewlinesRegexp,
escapeQuotesDetectingNewlines
);
let output_string = `'${escaped_string}'`;
if (crs) output_string = `replace(${output_string},'\\r',char(13))`;
if (lfs) output_string = `replace(${output_string},'\\n',char(10))`;
return output_string;
}

// Ported from quoteChar: https://github.com/sqlite/sqlite/blob/378bf82e2bc09734b8c5869f9b148efe37d29527/src/shell.c.in#L990
export function sqliteQuote(token: string) {
// Empty input
return token.length === 0 ||
// Doesn't start with alpha or underscore
!token.match(/^[a-zA-Z_]/) ||
token.match(/\W/) ||
SQLITE_KEYWORDS.has(token.toUpperCase())
? `"${token}"`
: token;
}

// List taken from `aKeywordTable` inhttps://github.com/sqlite/sqlite/blob/378bf82e2bc09734b8c5869f9b148efe37d29527/tool/mkkeywordhash.c#L172
// prettier-ignore
export const SQLITE_KEYWORDS = new Set([
"ABORT", "ACTION", "ADD", "AFTER", "ALL", "ALTER", "ALWAYS", "ANALYZE", "AND", "AS", "ASC",
"ATTACH", "AUTOINCREMENT", "BEFORE", "BEGIN", "BETWEEN", "BY", "CASCADE", "CASE", "CAST",
"CHECK", "COLLATE", "COLUMN", "COMMIT", "CONFLICT", "CONSTRAINT", "CREATE", "CROSS", "CURRENT",
"CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP", "DATABASE", "DEFAULT", "DEFERRED",
"DEFERRABLE", "DELETE", "DESC", "DETACH", "DISTINCT", "DO", "DROP", "END", "EACH", "ELSE",
"ESCAPE", "EXCEPT", "EXCLUSIVE", "EXCLUDE", "EXISTS", "EXPLAIN", "FAIL", "FILTER", "FIRST",
"FOLLOWING", "FOR", "FOREIGN", "FROM", "FULL", "GENERATED", "GLOB", "GROUP", "GROUPS", "HAVING",
"IF", "IGNORE", "IMMEDIATE", "IN", "INDEX", "INDEXED", "INITIALLY", "INNER", "INSERT", "INSTEAD",
"INTERSECT", "INTO", "IS", "ISNULL", "JOIN", "KEY", "LAST", "LEFT", "LIKE", "LIMIT", "MATCH",
"MATERIALIZED", "NATURAL", "NO", "NOT", "NOTHING", "NOTNULL", "NULL", "NULLS", "OF", "OFFSET",
"ON", "OR", "ORDER", "OTHERS", "OUTER", "OVER", "PARTITION", "PLAN", "PRAGMA", "PRECEDING",
"PRIMARY", "QUERY", "RAISE", "RANGE", "RECURSIVE", "REFERENCES", "REGEXP", "REINDEX", "RELEASE",
"RENAME", "REPLACE", "RESTRICT", "RETURNING", "RIGHT", "ROLLBACK", "ROW", "ROWS", "SAVEPOINT",
"SELECT", "SET", "TABLE", "TEMP", "TEMPORARY", "THEN", "TIES", "TO", "TRANSACTION", "TRIGGER",
"UNBOUNDED", "UNION", "UNIQUE", "UPDATE", "USING", "VACUUM", "VALUES", "VIEW", "VIRTUAL", "WHEN",
"WHERE", "WINDOW", "WITH", "WITHOUT"
]);
90 changes: 80 additions & 10 deletions packages/wrangler/src/__tests__/d1/export.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { http, HttpResponse } from "msw";
import { mockAccountId, mockApiToken } from "../helpers/mock-account-id";
import { mockConsoleMethods } from "../helpers/mock-console";
import { useMockIsTTY } from "../helpers/mock-istty";
import { mockGetMemberships, mockOAuthFlow } from "../helpers/mock-oauth-flow";
import { mockGetMemberships } from "../helpers/mock-oauth-flow";
import { msw } from "../helpers/msw";
import { runInTempDir } from "../helpers/run-in-tmp";
import { runWrangler } from "../helpers/run-wrangler";
Expand All @@ -14,19 +14,90 @@ describe("execute", () => {
mockApiToken();
mockConsoleMethods();
runInTempDir();
const { mockOAuthServerCallback } = mockOAuthFlow();
const { setIsTTY } = useMockIsTTY();

it("should throw if output is missing", async () => {
await expect(runWrangler("d1 export db --local")).rejects.toThrowError(
`Missing required argument: output`
);
});

it("should reject --local mode (for now)", async () => {
await expect(
runWrangler("d1 export db --local --output /tmp/test.sql")
).rejects.toThrowError(
`Local imports/exports will be coming in a future version of Wrangler.`
it("should handle local", async () => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should probably add tests for the edge cases like quoted names adding the "IF NOT EXISTS" as well as properly quoting things like keywords.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have all these inside the D1 codebase, but see #6073 (comment) I think it makes sense for this file to live in a separate package with a whole suite of tests (as well as a test runner that runs native SQLite for comparison like the D1 suite does).

Imo that's a follow-up task. For the moment I'm just keeping the two implementations in sync manually.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree it could be a follow-up task.

setIsTTY(false);
writeWranglerToml({
d1_databases: [
{ binding: "DATABASE", database_name: "db", database_id: "xxxx" },
],
});

// Verify the basic command works with an empty DB
await runWrangler("d1 export db --local --output test-local.sql");
expect(fs.readFileSync("test-local.sql", "utf8")).toBe(
"PRAGMA defer_foreign_keys=TRUE;"
);

// Fill with data
fs.writeFileSync(
"data.sql",
`
CREATE TABLE foo(id INTEGER PRIMARY KEY, value TEXT);
CREATE TABLE bar(id INTEGER PRIMARY KEY, value TEXT);
INSERT INTO foo (value) VALUES ('xxx'),('yyy'),('zzz');
INSERT INTO bar (value) VALUES ('aaa'),('bbb'),('ccc');
`
);
await runWrangler("d1 execute db --local --file data.sql");

// SQL output expectations
const create_foo = "CREATE TABLE foo(id INTEGER PRIMARY KEY, value TEXT);";
const create_bar = "CREATE TABLE bar(id INTEGER PRIMARY KEY, value TEXT);";
const insert_foo = [
"INSERT INTO foo VALUES(1,'xxx');",
"INSERT INTO foo VALUES(2,'yyy');",
"INSERT INTO foo VALUES(3,'zzz');",
];
const insert_bar = [
"INSERT INTO bar VALUES(1,'aaa');",
"INSERT INTO bar VALUES(2,'bbb');",
"INSERT INTO bar VALUES(3,'ccc');",
];

// Full export
await runWrangler("d1 export db --local --output test-full.sql");
expect(fs.readFileSync("test-full.sql", "utf8")).toBe(
[
"PRAGMA defer_foreign_keys=TRUE;",
create_foo,
...insert_foo,
create_bar,
...insert_bar,
].join("\n")
);

// Schema only
await runWrangler(
"d1 export db --local --output test-schema.sql --no-data"
);
expect(fs.readFileSync("test-schema.sql", "utf8")).toBe(
["PRAGMA defer_foreign_keys=TRUE;", create_foo, create_bar].join("\n")
);

// Data only
await runWrangler(
"d1 export db --local --output test-data.sql --no-schema"
);
expect(fs.readFileSync("test-data.sql", "utf8")).toBe(
["PRAGMA defer_foreign_keys=TRUE;", ...insert_foo, ...insert_bar].join(
"\n"
)
);

// Foo only
await runWrangler(
"d1 export db --local --output test-data.sql --table foo"
);
expect(fs.readFileSync("test-data.sql", "utf8")).toBe(
["PRAGMA defer_foreign_keys=TRUE;", create_foo, ...insert_foo].join("\n")
);
});

Expand All @@ -37,7 +108,6 @@ describe("execute", () => {
{ binding: "DATABASE", database_name: "db", database_id: "xxxx" },
],
});
mockOAuthServerCallback();
mockGetMemberships([
{ id: "IG-88", account: { id: "1701", name: "enterprise" } },
]);
Expand Down Expand Up @@ -102,7 +172,7 @@ describe("execute", () => {
})
);

await runWrangler("d1 export db --remote --output /tmp/test.sql");
expect(fs.readFileSync("/tmp/test.sql", "utf8")).toBe(mockSqlContent);
await runWrangler("d1 export db --remote --output test-remote.sql");
expect(fs.readFileSync("test-remote.sql", "utf8")).toBe(mockSqlContent);
});
});
Loading
Loading