From c70e1bc7930a418d7846f06a74012a46b1cb9f80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Tue, 8 Oct 2024 07:54:02 +0200 Subject: [PATCH 01/39] explicit duckdb 1.29.0; self-host core extensions; document --- docs/lib/duckdb.md | 58 +++++++++++++++++++++++++++++++++++++ docs/sql.md | 2 ++ src/client/stdlib/duckdb.js | 35 ++++++++++++++++++++++ src/libraries.ts | 29 +++++++++++++++++++ src/npm.ts | 34 +++++++++++++++++++--- src/resolvers.ts | 5 ++++ test/libraries-test.ts | 26 ++++++++++++++++- 7 files changed, 184 insertions(+), 5 deletions(-) diff --git a/docs/lib/duckdb.md b/docs/lib/duckdb.md index 7fc98451d..9f43432fc 100644 --- a/docs/lib/duckdb.md +++ b/docs/lib/duckdb.md @@ -105,3 +105,61 @@ const sql = DuckDBClient.sql({quakes: `https://earthquake.usgs.gov/earthquakes/f ```sql echo SELECT * FROM quakes ORDER BY updated DESC; ``` + +## Extensions + +DuckDB has a flexible extension mechanism that allows for dynamically loading extensions. These may extend DuckDB's functionality by providing support for additional file formats, introducing new types, and domain-specific functionality. + +### Built-in extensions + +The built-in extensions are statically linked to the default bundle. In other words, they are immediately available to use. Currently this includes "httpfs" (and others?). + +### Installing extensions + +Installing an extension, in DuckDB-wasm, references the source file or extensions repository that holds it. Thus, you can specify: + +```sql echo run=false +INSTALL h3 FROM community; +LOAD h3; +SELECT format('{:x}', h3_latlng_to_cell(37.77, -122.43, 9)) AS cell_id; +``` + +Beyond the official extensions repositories (with core extensions at `https://extensions.duckdb.org` and community extensions at `https://community.duckdb.org`), you can install an extension from an explicit URL: + +```sql echo run=false +INSTALL custom FROM 'https://example.com/v1.1.1/wasm_mvp/custom.wasm'; +``` + +### Self-hosted core extensions + +Framework downloads a copy of the [core extensions](https://duckdb.org/2023/12/18/duckdb-extensions-in-wasm.html), and the DuckDBClient installs them by default. This ensures that all the common extensions ("json", "inet", "spatial", etc.), are self-hosted. + +You can however override this (for example, if you need to test something against a new version of an extension), and install explicitly: + +```sql echo run=false +INSTALL json FROM core; +-- use JSON features +``` + +### Loading extensions + +Loading an extension actually downloads the build and makes its features available in subsequent queries. You can load an extension explicitly like so: + +```sql echo run=false +LOAD spatial; +SELECT ST_Area('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::GEOMETRY) as area; +``` + +Many of the core extensions are auto-loaded when their functions are used in a query. For example, the query below transparently loads the self hosted "json" extension: + +```sql echo run=false +SELECT bbox FROM read_json('https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_day.geojson'); +``` + +Similarly, this query transparently loads the self-hosted "inet" extension: + +```sql echo +SELECT '127.0.0.1'::INET AS ipv4, '2001:db8:3c4d::/48'::INET AS ipv6; +``` + +These features are tied to DuckDB wasm’s 1.29 version, and strongly dependent on its development cycle. diff --git a/docs/sql.md b/docs/sql.md index 05361025e..778090bc1 100644 --- a/docs/sql.md +++ b/docs/sql.md @@ -205,3 +205,5 @@ Inputs.table(await sql([`SELECT * FROM gaia WHERE source_id IN (${[source_ids]}) When interpolating values into SQL queries, be careful to avoid [SQL injection](https://en.wikipedia.org/wiki/SQL_injection) by properly escaping or sanitizing user input. The example above is safe only because `source_ids` are known to be numeric. + +For more information, see [DuckDB: extensions](./lib/duckdb#extensions). diff --git a/src/client/stdlib/duckdb.js b/src/client/stdlib/duckdb.js index 950e5bebc..9f3ff894f 100644 --- a/src/client/stdlib/duckdb.js +++ b/src/client/stdlib/duckdb.js @@ -182,6 +182,40 @@ Object.defineProperty(DuckDBClient.prototype, "dialect", { value: "duckdb" }); +async function installLocalExtensions(database) { + const repo = new URL("../../_npm/extensions.duckdb.org", import.meta.url).href; + const connection = await database.connect(); + await connection.query( + [ + // "arrow", + "autocomplete", + // "aws", + // "azure", + // "delta", + // "excel", + "fts", + // "httpfs", + // "iceberg", + "icu", + "inet", + // "jmalloc", + "json", + // "motherduck", + "parquet", + // "postgres_scanner", + "spatial", + "sqlite_scanner", + "substrait", + "tpcds", + "tpch", + "vss" + ] + .map((ext) => `INSTALL ${ext} FROM '${repo}';`) + .join("\n") + ); + // await connection.query(`SET custom_extension_repository = '${repo}';`); +} + async function insertSource(database, name, source) { source = await source; if (isFileAttachment(source)) return insertFile(database, name, source); @@ -309,6 +343,7 @@ async function createDuckDB() { const worker = await duckdb.createWorker(bundle.mainWorker); const db = new duckdb.AsyncDuckDB(logger, worker); await db.instantiate(bundle.mainModule); + await installLocalExtensions(db); return db; } diff --git a/src/libraries.ts b/src/libraries.ts index 9c2cba98e..0cddec217 100644 --- a/src/libraries.ts +++ b/src/libraries.ts @@ -80,6 +80,35 @@ export function getImplicitDownloads(imports: Iterable): Set { implicits.add("npm:@duckdb/duckdb-wasm/dist/duckdb-browser-mvp.worker.js"); implicits.add("npm:@duckdb/duckdb-wasm/dist/duckdb-eh.wasm"); implicits.add("npm:@duckdb/duckdb-wasm/dist/duckdb-browser-eh.worker.js"); + // Ref. https://github.com/duckdb/duckdb-wasm/releases/tag/v1.29.0 + for (const extension of [ + // "arrow", + "autocomplete", + // "aws", + // "azure", + // "delta", + // "excel", + "fts", + // "httpfs", + // "iceberg", + "icu", + "inet", + // "jmalloc", + "json", + // "motherduck", + "parquet", + // "postgres_scanner", + "spatial", + "sqlite_scanner", + "substrait", + "tpcds", + "tpch", + "vss" + ]) { + for (const platform of ["eh", "mvp"]) { + implicits.add(`https://extensions.duckdb.org/v1.1.1/wasm_${platform}/${extension}.duckdb_extension.wasm`); + } + } } if (set.has("npm:@observablehq/sqlite")) { implicits.add("npm:sql.js/dist/sql-wasm.js"); diff --git a/src/npm.ts b/src/npm.ts index df2084df8..584a31637 100644 --- a/src/npm.ts +++ b/src/npm.ts @@ -162,7 +162,7 @@ export async function getDependencyResolver( (name === "arquero" || name === "@uwdata/mosaic-core" || name === "@duckdb/duckdb-wasm") && depName === "apache-arrow" // prettier-ignore ? "latest" // force Arquero, Mosaic & DuckDB-Wasm to use the (same) latest version of Arrow : name === "@uwdata/mosaic-core" && depName === "@duckdb/duckdb-wasm" - ? "1.28.0" // force Mosaic to use the latest (stable) version of DuckDB-Wasm + ? "1.29.0" // force Mosaic to use the latest (stable) version of DuckDB-Wasm : pkg.dependencies?.[depName] ?? pkg.devDependencies?.[depName] ?? pkg.peerDependencies?.[depName] ?? @@ -248,9 +248,7 @@ async function resolveNpmVersion(root: string, {name, range}: NpmSpecifier): Pro export async function resolveNpmImport(root: string, specifier: string): Promise { const { name, - range = name === "@duckdb/duckdb-wasm" - ? "1.28.0" // https://github.com/duckdb/duckdb-wasm/issues/1561 - : undefined, + range = name === "@duckdb/duckdb-wasm" ? "1.29.0" : undefined, path = name === "mermaid" ? "dist/mermaid.esm.min.mjs/+esm" : name === "echarts" @@ -316,3 +314,31 @@ export function fromJsDelivrPath(path: string): string { const subpath = parts.slice(i).join("/"); // "+esm" or "lite/+esm" or "lite.js/+esm" return `/_npm/${namever}/${subpath === "+esm" ? "_esm.js" : subpath.replace(/\/\+esm$/, "._esm.js")}`; } + +const downloadRequests = new Map>(); + +/** + * Given a URL such as + * https://extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm, + * returns the corresponding local path such as + * _npm/extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm + */ +export async function resolveDuckDBDownload(root: string, href: string): Promise { + if (!href.startsWith("https://extensions.duckdb.org")) throw new Error(`invalid download path: ${href}`); + const path = "/_npm/" + href.slice("https://".length); + const outputPath = join(root, ".observablehq", "cache", "_npm", href.slice("https://".length)); + if (existsSync(outputPath)) return path; + let promise = downloadRequests.get(outputPath); + if (promise) return promise; // coalesce concurrent requests + promise = (async () => { + console.log(`download: ${href} ${faint("→")} ${outputPath}`); + const response = await fetch(href); + if (!response.ok) throw new Error(`unable to fetch: ${href}`); + await mkdir(dirname(outputPath), {recursive: true}); + await writeFile(outputPath, Buffer.from(await response.arrayBuffer())); + return path; + })(); + promise.catch(console.error).then(() => downloadRequests.delete(outputPath)); + downloadRequests.set(outputPath, promise); + return promise; +} diff --git a/src/resolvers.ts b/src/resolvers.ts index 3e9bd49fd..6964dbea5 100644 --- a/src/resolvers.ts +++ b/src/resolvers.ts @@ -12,6 +12,7 @@ import type {LoaderResolver} from "./loader.js"; import type {MarkdownPage} from "./markdown.js"; import {extractNodeSpecifier, resolveNodeImport, resolveNodeImports} from "./node.js"; import {extractNpmSpecifier, populateNpmCache, resolveNpmImport, resolveNpmImports} from "./npm.js"; +import {resolveDuckDBDownload} from "./npm.js"; import {isAssetPath, isPathImport, parseRelativeUrl, relativePath, resolveLocalPath, resolvePath} from "./path.js"; export interface Resolvers { @@ -367,6 +368,10 @@ async function resolveResolvers( const path = await resolveNpmImport(root, specifier.slice("npm:".length)); resolutions.set(specifier, path); await populateNpmCache(root, path); + } else if (specifier.startsWith("https://extensions.duckdb.org/")) { + const path = await resolveDuckDBDownload(root, specifier); + resolutions.set(specifier, path); + await populateNpmCache(root, path); } else if (!specifier.startsWith("observablehq:")) { throw new Error(`unhandled implicit download: ${specifier}`); } diff --git a/test/libraries-test.ts b/test/libraries-test.ts index 8ecbeeee5..b0a0395f8 100644 --- a/test/libraries-test.ts +++ b/test/libraries-test.ts @@ -58,7 +58,31 @@ describe("getImplicitDownloads(imports)", () => { "npm:@duckdb/duckdb-wasm/dist/duckdb-mvp.wasm", "npm:@duckdb/duckdb-wasm/dist/duckdb-browser-mvp.worker.js", "npm:@duckdb/duckdb-wasm/dist/duckdb-eh.wasm", - "npm:@duckdb/duckdb-wasm/dist/duckdb-browser-eh.worker.js" + "npm:@duckdb/duckdb-wasm/dist/duckdb-browser-eh.worker.js", + "https://extensions.duckdb.org/v1.1.1/wasm_eh/autocomplete.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_eh/fts.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_eh/icu.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_eh/inet.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_eh/spatial.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_eh/sqlite_scanner.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_eh/substrait.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_eh/tpcds.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_eh/tpch.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_eh/vss.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_mvp/autocomplete.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_mvp/fts.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_mvp/icu.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_mvp/inet.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_mvp/spatial.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_mvp/sqlite_scanner.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_mvp/substrait.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_mvp/tpcds.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_mvp/tpch.duckdb_extension.wasm", + "https://extensions.duckdb.org/v1.1.1/wasm_mvp/vss.duckdb_extension.wasm" ]) ); assert.deepStrictEqual( From 0029c8c895e24099606ac0f087d8ec0ada06e9a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Thu, 10 Oct 2024 16:56:01 +0200 Subject: [PATCH 02/39] configure which extensions are self-hosted (not quite there yet: still need to do hashing, per-extension configuration of the LOAD command, and per page configuration) --- docs/lib/duckdb.md | 33 +++++++++++---------- src/client/stdlib/duckdb.js | 35 ----------------------- src/client/stdlib/recommendedLibraries.js | 16 ++++++++++- src/config.ts | 26 ++++++++++++++++- src/libraries.ts | 34 +++------------------- src/npm.ts | 4 +-- src/preview.ts | 4 +-- src/render.ts | 6 +++- src/resolvers.ts | 13 +++++---- 9 files changed, 76 insertions(+), 95 deletions(-) diff --git a/docs/lib/duckdb.md b/docs/lib/duckdb.md index 9f43432fc..5e13e53d4 100644 --- a/docs/lib/duckdb.md +++ b/docs/lib/duckdb.md @@ -112,7 +112,7 @@ DuckDB has a flexible extension mechanism that allows for dynamically loading ex ### Built-in extensions -The built-in extensions are statically linked to the default bundle. In other words, they are immediately available to use. Currently this includes "httpfs" (and others?). +The built-in extensions are statically linked to the default bundle. In other words, they are immediately available to use. This case includes, for example, the "httpfs" extension. ### Installing extensions @@ -124,42 +124,41 @@ LOAD h3; SELECT format('{:x}', h3_latlng_to_cell(37.77, -122.43, 9)) AS cell_id; ``` -Beyond the official extensions repositories (with core extensions at `https://extensions.duckdb.org` and community extensions at `https://community.duckdb.org`), you can install an extension from an explicit URL: +Beyond the official extensions repositories (with `core` extensions at `https://extensions.duckdb.org` and `community` extensions at `https://community.duckdb.org`), you can install an extension from an explicit URL: ```sql echo run=false -INSTALL custom FROM 'https://example.com/v1.1.1/wasm_mvp/custom.wasm'; -``` - -### Self-hosted core extensions - -Framework downloads a copy of the [core extensions](https://duckdb.org/2023/12/18/duckdb-extensions-in-wasm.html), and the DuckDBClient installs them by default. This ensures that all the common extensions ("json", "inet", "spatial", etc.), are self-hosted. - -You can however override this (for example, if you need to test something against a new version of an extension), and install explicitly: - -```sql echo run=false -INSTALL json FROM core; --- use JSON features +INSTALL custom FROM 'https://example.com/'; ``` ### Loading extensions -Loading an extension actually downloads the build and makes its features available in subsequent queries. You can load an extension explicitly like so: +To activate an extension in a DuckDB instance, we have to “load” it, for example with an explicit `LOAD` statement: ```sql echo run=false LOAD spatial; SELECT ST_Area('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::GEOMETRY) as area; ``` -Many of the core extensions are auto-loaded when their functions are used in a query. For example, the query below transparently loads the self hosted "json" extension: +Many of the core extensions however do not need an explicit `LOAD` statement, as they get autoloaded when DuckDB detects that they are needed. For example, the query below autoloads the "json" extension: ```sql echo run=false SELECT bbox FROM read_json('https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_day.geojson'); ``` -Similarly, this query transparently loads the self-hosted "inet" extension: +Similarly, this query autoloads the "inet" extension: ```sql echo SELECT '127.0.0.1'::INET AS ipv4, '2001:db8:3c4d::/48'::INET AS ipv6; ``` +### Self-hosted extensions + +Framework will download and host the extensions of your choice locally. By default, only "json" and "parquet" are self-hosted, but you can add more by specifying the list in the [config](../config). The self-hosted extensions are served (currently) from the `/_npm/` directory, ensuring that you can continue to work offline and from a server you control. + +
+ +Note that if you `INSTALL` or `LOAD` an extension that is not self-hosted, DuckDB will load it from the core or community servers. At present Framework does not know which extensions your code is using—but you can inspect the network activity in your browser to see if that is the case, and decide to add them to the list of self-hosted extensions. In the future, the preview server might be able to raise a warning if the list is incomplete. If you are interested in this feature, please upvote #issueTK. + +
+ These features are tied to DuckDB wasm’s 1.29 version, and strongly dependent on its development cycle. diff --git a/src/client/stdlib/duckdb.js b/src/client/stdlib/duckdb.js index 9f3ff894f..950e5bebc 100644 --- a/src/client/stdlib/duckdb.js +++ b/src/client/stdlib/duckdb.js @@ -182,40 +182,6 @@ Object.defineProperty(DuckDBClient.prototype, "dialect", { value: "duckdb" }); -async function installLocalExtensions(database) { - const repo = new URL("../../_npm/extensions.duckdb.org", import.meta.url).href; - const connection = await database.connect(); - await connection.query( - [ - // "arrow", - "autocomplete", - // "aws", - // "azure", - // "delta", - // "excel", - "fts", - // "httpfs", - // "iceberg", - "icu", - "inet", - // "jmalloc", - "json", - // "motherduck", - "parquet", - // "postgres_scanner", - "spatial", - "sqlite_scanner", - "substrait", - "tpcds", - "tpch", - "vss" - ] - .map((ext) => `INSTALL ${ext} FROM '${repo}';`) - .join("\n") - ); - // await connection.query(`SET custom_extension_repository = '${repo}';`); -} - async function insertSource(database, name, source) { source = await source; if (isFileAttachment(source)) return insertFile(database, name, source); @@ -343,7 +309,6 @@ async function createDuckDB() { const worker = await duckdb.createWorker(bundle.mainWorker); const db = new duckdb.AsyncDuckDB(logger, worker); await db.instantiate(bundle.mainModule); - await installLocalExtensions(db); return db; } diff --git a/src/client/stdlib/recommendedLibraries.js b/src/client/stdlib/recommendedLibraries.js index ebe5dea81..9a82e2bb9 100644 --- a/src/client/stdlib/recommendedLibraries.js +++ b/src/client/stdlib/recommendedLibraries.js @@ -16,7 +16,21 @@ export const mermaid = () => import("observablehq:stdlib/mermaid").then((mermaid export const Plot = () => import("npm:@observablehq/plot"); export const React = () => import("npm:react"); export const ReactDOM = () => import("npm:react-dom"); -export const sql = () => import("observablehq:stdlib/duckdb").then((duckdb) => duckdb.sql); +export const sql = () => + import("observablehq:stdlib/duckdb").then(async (duckdb) => { + const {sql} = duckdb; + const extensions = JSON.parse(document.querySelector("#observablehq-duckdb-hosted-extensions").textContent); + for (const [name, ref] of extensions) { + const x = `INSTALL ${name} FROM '${new URL(`..${ref}`, import.meta.url).href}';`; + console.warn(import.meta.url, x); + await sql([x]); + const y = `LOAD ${name};`; + console.warn(import.meta.url, y); + await sql([y]); + } + console.warn(extensions); + return sql; + }); export const SQLite = () => import("observablehq:stdlib/sqlite").then((sqlite) => sqlite.default); export const SQLiteDatabaseClient = () => import("observablehq:stdlib/sqlite").then((sqlite) => sqlite.SQLiteDatabaseClient); // prettier-ignore export const tex = () => import("observablehq:stdlib/tex").then((tex) => tex.default); diff --git a/src/config.ts b/src/config.ts index 126445a90..adbe8654e 100644 --- a/src/config.ts +++ b/src/config.ts @@ -76,6 +76,10 @@ export interface SearchConfigSpec { index?: unknown; } +export interface DuckDBConfig { + extensions: {[key: string]: string}; +} + export interface Config { root: string; // defaults to src output: string; // defaults to dist @@ -98,6 +102,7 @@ export interface Config { normalizePath: (path: string) => string; loaders: LoaderResolver; watchPath?: string; + duckdb: DuckDBConfig; } export interface ConfigSpec { @@ -125,6 +130,7 @@ export interface ConfigSpec { quotes?: unknown; cleanUrls?: unknown; markdownIt?: unknown; + duckdb?: unknown; } interface ScriptSpec { @@ -260,6 +266,7 @@ export function normalizeConfig(spec: ConfigSpec = {}, defaultRoot?: string, wat const search = spec.search == null || spec.search === false ? null : normalizeSearch(spec.search as any); const interpreters = normalizeInterpreters(spec.interpreters as any); const normalizePath = getPathNormalizer(spec.cleanUrls); + const duckdb = normalizeDuckDB(spec.duckdb as any); // If this path ends with a slash, then add an implicit /index to the // end of the path. Otherwise, remove the .html extension (we use clean @@ -310,7 +317,8 @@ export function normalizeConfig(spec: ConfigSpec = {}, defaultRoot?: string, wat md, normalizePath, loaders: new LoaderResolver({root, interpreters}), - watchPath + watchPath, + duckdb }; if (pages === undefined) Object.defineProperty(config, "pages", {get: () => readPages(root, md)}); if (sidebar === undefined) Object.defineProperty(config, "sidebar", {get: () => config.pages.length > 0}); @@ -488,3 +496,19 @@ export function mergeStyle( export function stringOrNull(spec: unknown): string | null { return spec == null || spec === false ? null : String(spec); } + +function normalizeDuckDB(spec: unknown): DuckDBConfig { + const extensions = spec?.["extensions"] ?? ["json", "parquet"]; + return { + extensions: Object.fromEntries( + Object.entries( + Array.isArray(extensions) + ? Object.fromEntries(extensions.map((name) => [name, true])) + : (spec as {[key: string]: string}) + ).map(([name, value]) => [ + name, + value === true ? `https://extensions.duckdb.org/v1.1.1/wasm_eh/${name}.duckdb_extension.wasm` : `${value}` + ]) + ) + }; +} diff --git a/src/libraries.ts b/src/libraries.ts index 0cddec217..db4cc21c5 100644 --- a/src/libraries.ts +++ b/src/libraries.ts @@ -1,3 +1,5 @@ +import type {DuckDBConfig} from "./config.js"; + export function getImplicitFileImports(methods: Iterable): Set { const set = setof(methods); const implicits = new Set(); @@ -72,7 +74,7 @@ export function getImplicitStylesheets(imports: Iterable): Set { * library used by FileAttachment) we manually enumerate the needed additional * downloads here. TODO Support versioned imports, too, such as "npm:leaflet@1". */ -export function getImplicitDownloads(imports: Iterable): Set { +export function getImplicitDownloads(imports: Iterable, duckdb: DuckDBConfig): Set { const set = setof(imports); const implicits = new Set(); if (set.has("npm:@observablehq/duckdb")) { @@ -80,35 +82,7 @@ export function getImplicitDownloads(imports: Iterable): Set { implicits.add("npm:@duckdb/duckdb-wasm/dist/duckdb-browser-mvp.worker.js"); implicits.add("npm:@duckdb/duckdb-wasm/dist/duckdb-eh.wasm"); implicits.add("npm:@duckdb/duckdb-wasm/dist/duckdb-browser-eh.worker.js"); - // Ref. https://github.com/duckdb/duckdb-wasm/releases/tag/v1.29.0 - for (const extension of [ - // "arrow", - "autocomplete", - // "aws", - // "azure", - // "delta", - // "excel", - "fts", - // "httpfs", - // "iceberg", - "icu", - "inet", - // "jmalloc", - "json", - // "motherduck", - "parquet", - // "postgres_scanner", - "spatial", - "sqlite_scanner", - "substrait", - "tpcds", - "tpch", - "vss" - ]) { - for (const platform of ["eh", "mvp"]) { - implicits.add(`https://extensions.duckdb.org/v1.1.1/wasm_${platform}/${extension}.duckdb_extension.wasm`); - } - } + for (const [, url] of Object.entries(duckdb.extensions)) implicits.add(url); } if (set.has("npm:@observablehq/sqlite")) { implicits.add("npm:sql.js/dist/sql-wasm.js"); diff --git a/src/npm.ts b/src/npm.ts index 584a31637..f7b73ad70 100644 --- a/src/npm.ts +++ b/src/npm.ts @@ -323,8 +323,8 @@ const downloadRequests = new Map>(); * returns the corresponding local path such as * _npm/extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm */ -export async function resolveDuckDBDownload(root: string, href: string): Promise { - if (!href.startsWith("https://extensions.duckdb.org")) throw new Error(`invalid download path: ${href}`); +export async function resolveDownload(root: string, href: string): Promise { + if (!href.startsWith("https://")) throw new Error(`invalid download path: ${href}`); const path = "/_npm/" + href.slice("https://".length); const outputPath = join(root, ".observablehq", "cache", "_npm", href.slice("https://".length)); if (existsSync(outputPath)) return path; diff --git a/src/preview.ts b/src/preview.ts index 42f3c931a..de7f0f02a 100644 --- a/src/preview.ts +++ b/src/preview.ts @@ -390,9 +390,9 @@ function handleWatch(socket: WebSocket, req: IncomingMessage, configPromise: Pro if (path.endsWith("/")) path += "index"; path = join(dirname(path), basename(path, ".html")); config = await configPromise; - const {root, loaders, normalizePath} = config; + const {root, loaders, normalizePath, duckdb} = config; const page = await loaders.loadPage(path, {path, ...config}); - const resolvers = await getResolvers(page, {root, path, loaders, normalizePath}); + const resolvers = await getResolvers(page, {root, path, loaders, normalizePath, duckdb}); if (resolvers.hash === initialHash) send({type: "welcome"}); else return void send({type: "reload"}); hash = resolvers.hash; diff --git a/src/render.ts b/src/render.ts index ba1dc9fdb..efb46f57a 100644 --- a/src/render.ts +++ b/src/render.ts @@ -11,6 +11,7 @@ import {findModule} from "./javascript/module.js"; import type {TranspileModuleOptions} from "./javascript/transpile.js"; import {transpileJavaScript, transpileModule} from "./javascript/transpile.js"; import type {MarkdownPage} from "./markdown.js"; +import {resolveDownload} from "./npm.js"; import type {PageLink} from "./pager.js"; import {findLink, normalizePath} from "./pager.js"; import {isAssetPath, resolvePath, resolveRelativePath} from "./path.js"; @@ -30,7 +31,7 @@ type RenderInternalOptions = export async function renderPage(page: MarkdownPage, options: RenderOptions & RenderInternalOptions): Promise { const {data, params} = page; - const {base, path, title, preview} = options; + const {base, path, title, preview, duckdb} = options; const {loaders, resolvers = await getResolvers(page, options)} = options; const {draft = false, sidebar = options.sidebar} = data; const toc = mergeToc(data.toc, options.toc); @@ -57,6 +58,9 @@ if (location.pathname.endsWith("/")) { `) : "" } + `) : "" } - + + +
+ +
+

test DuckDB

+
+
+ +
+ + diff --git a/test/output/build/duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/cd372fb8.js b/test/output/build/duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/cd372fb8.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-browser-eh.worker.cd372fb8.js b/test/output/build/duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-browser-eh.worker.cd372fb8.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-browser-mvp.worker.cd372fb8.js b/test/output/build/duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-browser-mvp.worker.cd372fb8.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-eh.wasm b/test/output/build/duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-eh.wasm new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-mvp.wasm b/test/output/build/duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-mvp.wasm new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_npm/htl@0.3.1/cd372fb8.js b/test/output/build/duckdb/_npm/htl@0.3.1/cd372fb8.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_npm/isoformat@0.2.1/cd372fb8.js b/test/output/build/duckdb/_npm/isoformat@0.2.1/cd372fb8.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_observablehq/client.00000001.js b/test/output/build/duckdb/_observablehq/client.00000001.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_observablehq/duckdb_manifest.json b/test/output/build/duckdb/_observablehq/duckdb_manifest.json new file mode 100644 index 000000000..796a34dd9 --- /dev/null +++ b/test/output/build/duckdb/_observablehq/duckdb_manifest.json @@ -0,0 +1 @@ +{"extensions":[["json",{"ref":"_duckdb/e3b0c442","load":false}],["parquet",{"ref":"_duckdb/e3b0c442","load":false}]]} \ No newline at end of file diff --git a/test/output/build/duckdb/_observablehq/runtime.00000002.js b/test/output/build/duckdb/_observablehq/runtime.00000002.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_observablehq/stdlib.00000003.js b/test/output/build/duckdb/_observablehq/stdlib.00000003.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_observablehq/stdlib/duckdb.00000005.js b/test/output/build/duckdb/_observablehq/stdlib/duckdb.00000005.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_observablehq/stdlib/inputs.00000006.js b/test/output/build/duckdb/_observablehq/stdlib/inputs.00000006.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_observablehq/stdlib/inputs.00000007.css b/test/output/build/duckdb/_observablehq/stdlib/inputs.00000007.css new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_observablehq/theme-air,near-midnight.00000004.css b/test/output/build/duckdb/_observablehq/theme-air,near-midnight.00000004.css new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/index.html b/test/output/build/duckdb/index.html new file mode 100644 index 000000000..b0ceaf67b --- /dev/null +++ b/test/output/build/duckdb/index.html @@ -0,0 +1,48 @@ + + + + + + +test DuckDB + + + + + + + + + + + + + + + + + + +
+ +
+

test DuckDB

+
+
+ +
+ + From 2fb28789c8051229787c7836f13ebe60fdb161bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Fri, 11 Oct 2024 18:30:07 +0200 Subject: [PATCH 13/39] bake-in the extensions manifest --- src/build.ts | 19 +++++++++---------- src/client/stdlib/duckdb.js | 6 +++--- src/duckdb.ts | 9 +++++---- src/preview.ts | 8 +++++--- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/build.ts b/src/build.ts index da31b2680..b5ed9a36b 100644 --- a/src/build.ts +++ b/src/build.ts @@ -150,6 +150,9 @@ export async function build( effects.output.write(`${faint("bundle")} ${path} ${faint("→")} `); const clientPath = getClientPath(path === "/_observablehq/client.js" ? "index.js" : path.slice("/_observablehq/".length)); // prettier-ignore const define: {[key: string]: string} = {}; + if (path === "/_observablehq/stdlib/duckdb.js") { + define["process.DUCKDB_MANIFEST"] = JSON.stringify(await duckDBManifest(duckdb, {root, log: true})); + } const contents = await rollupClient(clientPath, root, path, {minify: true, keepNames: true, define}); await prepareOutput(cachePath); await writeFile(cachePath, contents); @@ -215,20 +218,17 @@ export async function build( aliases.set(path, alias); await effects.writeFile(alias, contents); } else if (path.startsWith("/_duckdb/")) { - continue; + const name = path.slice("/_duckdb/".length, -9); + for (const p of ["eh", "mvp"]) + await effects.copyFile( + join(sourcePath, "v1.1.1", `wasm_${p}`, `${name}.duckdb_extension.wasm`), + join(path, "v1.1.1", `wasm_${p}`, `${name}.duckdb_extension.wasm`) + ); } else { await effects.copyFile(sourcePath, path); } } - // Write the DuckDB extensions manifest. - if (globalImports.has("/_observablehq/stdlib/duckdb.js")) { - const path = join("_observablehq", "duckdb_manifest.json"); - effects.output.write(`${faint("duckdb manifest")} `); - await effects.writeFile(path, JSON.stringify(await duckDBManifest(duckdb, {root}))); - effects.logger.log(path); - } - // Compute the hashes for global modules. By computing the hash on the file in // the cache root, this takes into consideration the resolved exact versions // of npm and node imports for transitive dependencies. @@ -478,7 +478,6 @@ export class FileBuildEffects implements BuildEffects { } } async copyFile(sourcePath: string, outputPath: string): Promise { - if (sourcePath === "test/input/build/only.duckdb/.observablehq/cache/_duckdb/e3b0c442") console.trace(); const destination = join(this.outputRoot, outputPath); this.logger.log(destination); await prepareOutput(destination); diff --git a/src/client/stdlib/duckdb.js b/src/client/stdlib/duckdb.js index 573d87c52..25344996d 100644 --- a/src/client/stdlib/duckdb.js +++ b/src/client/stdlib/duckdb.js @@ -186,9 +186,9 @@ Object.defineProperty(DuckDBClient.prototype, "dialect", { async function registerExtensions(db, {load}) { const connection = await db.connect(); try { - const {log, extensions} = await fetch(import.meta.resolve("observablehq:duckdb_manifest.json")).then((r) => - r.json() - ); + // Baked-in extensions manifest. + const {log, extensions} = process.DUCKDB_MANIFEST; + // Preview adds a DuckDBClientReport utility to the console. We don’t add it // in the public build so as not to pollute the window. if (log) { diff --git a/src/duckdb.ts b/src/duckdb.ts index 0c9b71e49..e66665b37 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -39,7 +39,7 @@ export async function resolveDuckDBExtension(root: string, duckdb: DuckDBConfig, const outputDir = join(cache, "duckdb", host); const files = ["eh", "mvp"].map((p) => join(outputDir, `${name}.${p}.wasm`)); if (files.every(existsSync)) { - const ref = await duckDBHash(files); + const ref = await duckDBHash(name, files); if (platforms.every((p) => existsSync(join(cache, ref, "v1.1.1", `wasm_${p}`, `${name}.duckdb_extension.wasm`)))) return ref; } @@ -57,7 +57,7 @@ export async function resolveDuckDBExtension(root: string, duckdb: DuckDBConfig, await writeFile(outputPath, Buffer.from(await response.arrayBuffer())); }) ).then(async () => { - const ref = await duckDBHash(files); + const ref = await duckDBHash(name, files); for (const [i, p] of platforms.entries()) { const targetPath = join(cache, ref, "v1.1.1", `wasm_${p}`, `${name}.duckdb_extension.wasm`); await mkdir(dirname(targetPath), {recursive: true}); @@ -70,8 +70,9 @@ export async function resolveDuckDBExtension(root: string, duckdb: DuckDBConfig, return promise; } -async function duckDBHash(files: string[]): Promise { +async function duckDBHash(name: string, files: string[]): Promise { const hash = createHash("sha256"); + hash.update(name); for (const file of files) hash.update(await readFile(file, "utf-8")); - return join("_duckdb", hash.digest("hex").slice(0, 8)); + return join("_duckdb", `${name}-${hash.digest("hex").slice(0, 8)}`); } diff --git a/src/preview.ts b/src/preview.ts index d095d04f3..08a81a76d 100644 --- a/src/preview.ts +++ b/src/preview.ts @@ -136,12 +136,14 @@ export class PreviewServer { end(req, res, await bundleStyles({theme: match.groups!.theme?.split(",") ?? []}), "text/css"); } else if (pathname.startsWith("/_observablehq/") && pathname.endsWith(".js")) { const path = getClientPath(pathname.slice("/_observablehq/".length)); - end(req, res, await rollupClient(path, root, pathname), "text/javascript"); + const options = + pathname === "/_observablehq/stdlib/duckdb.js" + ? {define: {"process.DUCKDB_MANIFEST": JSON.stringify(await duckDBManifest(duckdb, {root, log: true}))}} + : {}; + end(req, res, await rollupClient(path, root, pathname, options), "text/javascript"); } else if (pathname.startsWith("/_observablehq/") && pathname.endsWith(".css")) { const path = getClientPath(pathname.slice("/_observablehq/".length)); end(req, res, await bundleStyles({path}), "text/css"); - } else if (pathname === "/_observablehq/duckdb_manifest.json") { - end(req, res, JSON.stringify(await duckDBManifest(duckdb, {root, log: true})), "application/json"); } else if (pathname.startsWith("/_node/") || pathname.startsWith("/_jsr/") || pathname.startsWith("/_duckdb/")) { send(req, pathname, {root: join(root, ".observablehq", "cache")}).pipe(res); } else if (pathname.startsWith("/_npm/")) { From bc49674f23a7569c3fef77dbc1dd1662f526d81e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Fri, 11 Oct 2024 18:34:10 +0200 Subject: [PATCH 14/39] fix test --- .../build/{duckdb.duckdb => duckdb}/index.md | 0 .../duckdb-wasm@1.29.0/dist/duckdb-mvp.wasm | 0 .../duckdb.duckdb/_npm/htl@0.3.1/cd372fb8.js | 0 .../_npm/isoformat@0.2.1/cd372fb8.js | 0 .../_observablehq/client.00000001.js | 0 .../_observablehq/duckdb_manifest.json | 1 - .../_observablehq/runtime.00000002.js | 0 .../_observablehq/stdlib.00000003.js | 0 .../_observablehq/stdlib/duckdb.00000005.js | 0 .../_observablehq/stdlib/inputs.00000006.js | 0 .../_observablehq/stdlib/inputs.00000007.css | 0 .../theme-air,near-midnight.00000004.css | 0 test/output/build/duckdb.duckdb/index.html | 48 ------------------- .../wasm_eh/json.duckdb_extension.wasm} | 0 .../wasm_mvp/json.duckdb_extension.wasm} | 0 .../wasm_eh/parquet.duckdb_extension.wasm} | 0 .../wasm_mvp/parquet.duckdb_extension.wasm} | 0 .../duckdb/_observablehq/duckdb_manifest.json | 1 - 18 files changed, 50 deletions(-) rename test/input/build/{duckdb.duckdb => duckdb}/index.md (100%) delete mode 100644 test/output/build/duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-mvp.wasm delete mode 100644 test/output/build/duckdb.duckdb/_npm/htl@0.3.1/cd372fb8.js delete mode 100644 test/output/build/duckdb.duckdb/_npm/isoformat@0.2.1/cd372fb8.js delete mode 100644 test/output/build/duckdb.duckdb/_observablehq/client.00000001.js delete mode 100644 test/output/build/duckdb.duckdb/_observablehq/duckdb_manifest.json delete mode 100644 test/output/build/duckdb.duckdb/_observablehq/runtime.00000002.js delete mode 100644 test/output/build/duckdb.duckdb/_observablehq/stdlib.00000003.js delete mode 100644 test/output/build/duckdb.duckdb/_observablehq/stdlib/duckdb.00000005.js delete mode 100644 test/output/build/duckdb.duckdb/_observablehq/stdlib/inputs.00000006.js delete mode 100644 test/output/build/duckdb.duckdb/_observablehq/stdlib/inputs.00000007.css delete mode 100644 test/output/build/duckdb.duckdb/_observablehq/theme-air,near-midnight.00000004.css delete mode 100644 test/output/build/duckdb.duckdb/index.html rename test/output/build/{duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/cd372fb8.js => duckdb/_duckdb/json-02bd175f/v1.1.1/wasm_eh/json.duckdb_extension.wasm} (100%) rename test/output/build/{duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-browser-eh.worker.cd372fb8.js => duckdb/_duckdb/json-02bd175f/v1.1.1/wasm_mvp/json.duckdb_extension.wasm} (100%) rename test/output/build/{duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-browser-mvp.worker.cd372fb8.js => duckdb/_duckdb/parquet-37a0fe5a/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm} (100%) rename test/output/build/{duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-eh.wasm => duckdb/_duckdb/parquet-37a0fe5a/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm} (100%) delete mode 100644 test/output/build/duckdb/_observablehq/duckdb_manifest.json diff --git a/test/input/build/duckdb.duckdb/index.md b/test/input/build/duckdb/index.md similarity index 100% rename from test/input/build/duckdb.duckdb/index.md rename to test/input/build/duckdb/index.md diff --git a/test/output/build/duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-mvp.wasm b/test/output/build/duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-mvp.wasm deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/output/build/duckdb.duckdb/_npm/htl@0.3.1/cd372fb8.js b/test/output/build/duckdb.duckdb/_npm/htl@0.3.1/cd372fb8.js deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/output/build/duckdb.duckdb/_npm/isoformat@0.2.1/cd372fb8.js b/test/output/build/duckdb.duckdb/_npm/isoformat@0.2.1/cd372fb8.js deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/output/build/duckdb.duckdb/_observablehq/client.00000001.js b/test/output/build/duckdb.duckdb/_observablehq/client.00000001.js deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/output/build/duckdb.duckdb/_observablehq/duckdb_manifest.json b/test/output/build/duckdb.duckdb/_observablehq/duckdb_manifest.json deleted file mode 100644 index 796a34dd9..000000000 --- a/test/output/build/duckdb.duckdb/_observablehq/duckdb_manifest.json +++ /dev/null @@ -1 +0,0 @@ -{"extensions":[["json",{"ref":"_duckdb/e3b0c442","load":false}],["parquet",{"ref":"_duckdb/e3b0c442","load":false}]]} \ No newline at end of file diff --git a/test/output/build/duckdb.duckdb/_observablehq/runtime.00000002.js b/test/output/build/duckdb.duckdb/_observablehq/runtime.00000002.js deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/output/build/duckdb.duckdb/_observablehq/stdlib.00000003.js b/test/output/build/duckdb.duckdb/_observablehq/stdlib.00000003.js deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/output/build/duckdb.duckdb/_observablehq/stdlib/duckdb.00000005.js b/test/output/build/duckdb.duckdb/_observablehq/stdlib/duckdb.00000005.js deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/output/build/duckdb.duckdb/_observablehq/stdlib/inputs.00000006.js b/test/output/build/duckdb.duckdb/_observablehq/stdlib/inputs.00000006.js deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/output/build/duckdb.duckdb/_observablehq/stdlib/inputs.00000007.css b/test/output/build/duckdb.duckdb/_observablehq/stdlib/inputs.00000007.css deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/output/build/duckdb.duckdb/_observablehq/theme-air,near-midnight.00000004.css b/test/output/build/duckdb.duckdb/_observablehq/theme-air,near-midnight.00000004.css deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/output/build/duckdb.duckdb/index.html b/test/output/build/duckdb.duckdb/index.html deleted file mode 100644 index b0ceaf67b..000000000 --- a/test/output/build/duckdb.duckdb/index.html +++ /dev/null @@ -1,48 +0,0 @@ - - - - - - -test DuckDB - - - - - - - - - - - - - - - - - - -
- -
-

test DuckDB

-
-
- -
- - diff --git a/test/output/build/duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/cd372fb8.js b/test/output/build/duckdb/_duckdb/json-02bd175f/v1.1.1/wasm_eh/json.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/cd372fb8.js rename to test/output/build/duckdb/_duckdb/json-02bd175f/v1.1.1/wasm_eh/json.duckdb_extension.wasm diff --git a/test/output/build/duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-browser-eh.worker.cd372fb8.js b/test/output/build/duckdb/_duckdb/json-02bd175f/v1.1.1/wasm_mvp/json.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-browser-eh.worker.cd372fb8.js rename to test/output/build/duckdb/_duckdb/json-02bd175f/v1.1.1/wasm_mvp/json.duckdb_extension.wasm diff --git a/test/output/build/duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-browser-mvp.worker.cd372fb8.js b/test/output/build/duckdb/_duckdb/parquet-37a0fe5a/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-browser-mvp.worker.cd372fb8.js rename to test/output/build/duckdb/_duckdb/parquet-37a0fe5a/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm diff --git a/test/output/build/duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-eh.wasm b/test/output/build/duckdb/_duckdb/parquet-37a0fe5a/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb.duckdb/_npm/@duckdb/duckdb-wasm@1.29.0/dist/duckdb-eh.wasm rename to test/output/build/duckdb/_duckdb/parquet-37a0fe5a/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm diff --git a/test/output/build/duckdb/_observablehq/duckdb_manifest.json b/test/output/build/duckdb/_observablehq/duckdb_manifest.json deleted file mode 100644 index 796a34dd9..000000000 --- a/test/output/build/duckdb/_observablehq/duckdb_manifest.json +++ /dev/null @@ -1 +0,0 @@ -{"extensions":[["json",{"ref":"_duckdb/e3b0c442","load":false}],["parquet",{"ref":"_duckdb/e3b0c442","load":false}]]} \ No newline at end of file From 9a13f2a4afec00d0265615408b1c206171ff10b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Fri, 11 Oct 2024 18:39:11 +0200 Subject: [PATCH 15/39] don't activate spatial on the documentation --- docs/sql.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sql.md b/docs/sql.md index 40497a939..16ec36dc0 100644 --- a/docs/sql.md +++ b/docs/sql.md @@ -219,9 +219,9 @@ The self-hosted extensions are immediately available in all the `sql` code block SELECT bbox FROM read_json('https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_day.geojson'); ``` -Likewise, with the "spatial" extension configured, you can directly run: +Likewise, with the "spatial" extension configured, you could directly run: -```sql echo +```sql echo run=false SELECT ST_Area('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::GEOMETRY) as area; ``` From 4a5128d29ba9485a4ea0735e4030e1dafb67dfc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 14 Oct 2024 15:46:18 +0200 Subject: [PATCH 16/39] refactor: hash individual extensions, include the list of platforms in the config (not configurable yet) --- src/build.ts | 28 ++++--- src/client/stdlib/duckdb.js | 41 +++++----- src/config.ts | 2 + src/duckdb.ts | 75 ++++++++++++------- src/libraries.ts | 6 +- src/resolvers.ts | 3 +- test/build-test.ts | 1 - test/config-test.ts | 2 + test/libraries-test.ts | 2 +- .../v1.1.1/wasm_eh/json.duckdb_extension.wasm | 0 .../wasm_eh/parquet.duckdb_extension.wasm | 0 .../wasm_mvp/json.duckdb_extension.wasm | 0 .../wasm_mvp/parquet.duckdb_extension.wasm | 0 .../v1.1.1/wasm_eh/json.duckdb_extension.wasm | 0 .../wasm_eh/parquet.duckdb_extension.wasm | 0 .../wasm_mvp/json.duckdb_extension.wasm | 0 .../wasm_mvp/parquet.duckdb_extension.wasm | 0 17 files changed, 100 insertions(+), 60 deletions(-) rename test/output/build/duckdb/_duckdb/{json-02bd175f => e3b0c442/extensions.duckdb.org}/v1.1.1/wasm_eh/json.duckdb_extension.wasm (100%) rename test/output/build/duckdb/_duckdb/{parquet-37a0fe5a => e3b0c442/extensions.duckdb.org}/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm (100%) rename test/output/build/duckdb/_duckdb/{json-02bd175f => e3b0c442/extensions.duckdb.org}/v1.1.1/wasm_mvp/json.duckdb_extension.wasm (100%) rename test/output/build/duckdb/_duckdb/{parquet-37a0fe5a => e3b0c442/extensions.duckdb.org}/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm (100%) create mode 100644 test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm create mode 100644 test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm create mode 100644 test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm create mode 100644 test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm diff --git a/src/build.ts b/src/build.ts index b5ed9a36b..4db5388a3 100644 --- a/src/build.ts +++ b/src/build.ts @@ -141,6 +141,20 @@ export async function build( effects.logger.log(cachePath); } + // Copy over the DuckDB extensions and create the DuckDB manifest. + for (const path of globalImports) { + if (path.startsWith("/_duckdb/")) { + const sourcePath = join(cacheRoot, path); + effects.output.write(`${faint("build")} ${path} ${faint("→")} `); + const contents = await readFile(sourcePath); + const hash = createHash("sha256").update(contents).digest("hex").slice(0, 8); + const alias = applyHash(path, hash); + aliases.set(path, alias); + await effects.writeFile(alias, contents); + } + } + const duckdb_manifest = await duckDBManifest(duckdb, {root, log: true, aliases}); + // Generate the client bundles. These are initially generated into the cache // because we need to rewrite any npm and node imports to be hashed; this is // handled generally for all global imports below. @@ -151,7 +165,7 @@ export async function build( const clientPath = getClientPath(path === "/_observablehq/client.js" ? "index.js" : path.slice("/_observablehq/".length)); // prettier-ignore const define: {[key: string]: string} = {}; if (path === "/_observablehq/stdlib/duckdb.js") { - define["process.DUCKDB_MANIFEST"] = JSON.stringify(await duckDBManifest(duckdb, {root, log: true})); + define["process.DUCKDB_MANIFEST"] = JSON.stringify(duckdb_manifest); } const contents = await rollupClient(clientPath, root, path, {minify: true, keepNames: true, define}); await prepareOutput(cachePath); @@ -205,8 +219,8 @@ export async function build( } // Copy over global assets (e.g., minisearch.json, DuckDB’s WebAssembly). - // Anything in _observablehq also needs a content hash, but anything in _npm, - // _node or _duckdb does not (because they are already necessarily immutable). + // Anything in _observablehq also needs a content hash, but anything in _npm + // or _node does not (because they are already necessarily immutable). for (const path of globalImports) { if (path.endsWith(".js")) continue; const sourcePath = join(cacheRoot, path); @@ -217,13 +231,6 @@ export async function build( const alias = applyHash(path, hash); aliases.set(path, alias); await effects.writeFile(alias, contents); - } else if (path.startsWith("/_duckdb/")) { - const name = path.slice("/_duckdb/".length, -9); - for (const p of ["eh", "mvp"]) - await effects.copyFile( - join(sourcePath, "v1.1.1", `wasm_${p}`, `${name}.duckdb_extension.wasm`), - join(path, "v1.1.1", `wasm_${p}`, `${name}.duckdb_extension.wasm`) - ); } else { await effects.copyFile(sourcePath, path); } @@ -409,6 +416,7 @@ function validateLinks(outputs: Map): [valid: Li } function applyHash(path: string, hash: string): string { + if (path.startsWith("/_duckdb/")) return join("/_duckdb/", hash, path.slice("/_duckdb/".length)); const ext = extname(path); let name = basename(path, ext); if (path.endsWith(".js")) name = name.replace(/(^|\.)_esm$/, ""); // allow hash to replace _esm diff --git a/src/client/stdlib/duckdb.js b/src/client/stdlib/duckdb.js index 25344996d..acb24a4b5 100644 --- a/src/client/stdlib/duckdb.js +++ b/src/client/stdlib/duckdb.js @@ -29,16 +29,26 @@ import * as duckdb from "npm:@duckdb/duckdb-wasm"; // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. -const bundle = await duckdb.selectBundle({ - mvp: { - mainModule: import.meta.resolve("npm:@duckdb/duckdb-wasm/dist/duckdb-mvp.wasm"), - mainWorker: import.meta.resolve("npm:@duckdb/duckdb-wasm/dist/duckdb-browser-mvp.worker.js") - }, - eh: { - mainModule: import.meta.resolve("npm:@duckdb/duckdb-wasm/dist/duckdb-eh.wasm"), - mainWorker: import.meta.resolve("npm:@duckdb/duckdb-wasm/dist/duckdb-browser-eh.worker.js") - } -}); +// Baked-in manifest. +const manifest = process.DUCKDB_MANIFEST; + +const candidates = { + ...(manifest.bundles.includes("mvp") && { + mvp: { + mainModule: import.meta.resolve("npm:@duckdb/duckdb-wasm/dist/duckdb-mvp.wasm"), + mainWorker: import.meta.resolve("npm:@duckdb/duckdb-wasm/dist/duckdb-browser-mvp.worker.js") + } + }), + ...(manifest.bundles.includes("eh") && { + eh: { + mainModule: import.meta.resolve("npm:@duckdb/duckdb-wasm/dist/duckdb-eh.wasm"), + mainWorker: import.meta.resolve("npm:@duckdb/duckdb-wasm/dist/duckdb-browser-eh.worker.js") + } + }) +}; +const bundle = await duckdb.selectBundle(candidates); +const activeBundle = manifest.bundles.find((key) => bundle.mainModule === candidates[key].mainModule); +const extensions = manifest.extensions.filter(([, {bundle}]) => bundle === activeBundle); const logger = new duckdb.ConsoleLogger(duckdb.LogLevel.WARNING); @@ -179,19 +189,14 @@ export class DuckDBClient { } } -Object.defineProperty(DuckDBClient.prototype, "dialect", { - value: "duckdb" -}); +Object.defineProperty(DuckDBClient.prototype, "dialect", {value: "duckdb"}); async function registerExtensions(db, {load}) { const connection = await db.connect(); try { - // Baked-in extensions manifest. - const {log, extensions} = process.DUCKDB_MANIFEST; - // Preview adds a DuckDBClientReport utility to the console. We don’t add it // in the public build so as not to pollute the window. - if (log) { + if (manifest.log) { window.DuckDBClientReport = async () => { const connection = await db.connect(); try { @@ -208,7 +213,7 @@ async function registerExtensions(db, {load}) { await Promise.all( extensions.map(([name, {ref, load: l}]) => connection - .query(`INSTALL ${name} FROM '${import.meta.resolve(`../../${ref}`)}'`) + .query(`INSTALL ${name} FROM '${import.meta.resolve(`../..${ref}`)}'`) .then(() => (load ? load.includes(name) : l) && connection.query(`LOAD ${name}`)) ) ); diff --git a/src/config.ts b/src/config.ts index 35e691d75..71d180df8 100644 --- a/src/config.ts +++ b/src/config.ts @@ -80,6 +80,7 @@ export interface DuckDBConfig { install: string[]; load: string[]; source: {[name: string]: string}; + bundles: string[]; } export interface Config { @@ -504,6 +505,7 @@ function normalizeDuckDB(spec: unknown): DuckDBConfig { const load = spec?.["load"] ?? []; const source = new Map(Object.entries(spec?.["source"] ?? {})); return { + bundles: ["eh", "mvp"], install, load: load.filter((name: string) => install.includes(name)), source: Object.fromEntries( diff --git a/src/duckdb.ts b/src/duckdb.ts index e66665b37..be5c1efd5 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -1,24 +1,33 @@ -import {createHash} from "node:crypto"; import {existsSync} from "node:fs"; -import {copyFile, mkdir, readFile, writeFile} from "node:fs/promises"; +import {mkdir, writeFile} from "node:fs/promises"; import {dirname, join} from "node:path/posix"; +import {cross} from "d3-array"; import type {DuckDBConfig} from "./config.js"; import {faint} from "./tty.js"; const downloadRequests = new Map>(); -export async function duckDBManifest(duckdb: DuckDBConfig, {root, log}: {root: string; log?: boolean}) { +export async function duckDBManifest( + duckdb: DuckDBConfig, + {root, log, aliases}: {root: string; log?: boolean; aliases?: Map} +) { return { - log, + bundles: duckdb.bundles, extensions: await Promise.all( - duckdb.install.map(async (name) => [ - name, - { - ref: await resolveDuckDBExtension(root, duckdb, name), - load: duckdb.load.includes(name) - } - ]) - ) + cross(duckdb.bundles, duckdb.install).map(async ([p, name]) => { + let ext = await resolveDuckDBExtension(root, p, duckdb.source[name], name); + if (aliases?.has(ext)) ext = aliases.get(ext)!; + return [ + name, + { + ref: dirname(dirname(dirname(ext))), + load: duckdb.load.includes(name), + bundle: p + } + ]; + }) + ), + log }; } @@ -30,25 +39,33 @@ export async function duckDBManifest(duckdb: DuckDBConfig, {root, log}: {root: s * statement. The repo is structured as required by DuckDB with: * ${repo}/v1.1.1/wasm_{p}/${name}.duckdb_extension.wasm */ -export async function resolveDuckDBExtension(root: string, duckdb: DuckDBConfig, name: string): Promise { - const platforms = ["eh", "mvp"]; - const repo = duckdb.source[name]; +export async function resolveDuckDBExtension(root: string, p: string, repo: string, name: string): Promise { if (!repo.startsWith("https://")) throw new Error(`invalid repo: ${repo}`); - const {host} = new URL(repo); const cache = join(root, ".observablehq", "cache"); - const outputDir = join(cache, "duckdb", host); - const files = ["eh", "mvp"].map((p) => join(outputDir, `${name}.${p}.wasm`)); - if (files.every(existsSync)) { - const ref = await duckDBHash(name, files); - if (platforms.every((p) => existsSync(join(cache, ref, "v1.1.1", `wasm_${p}`, `${name}.duckdb_extension.wasm`)))) - return ref; - } - const key = join(outputDir, name); - let promise = downloadRequests.get(key); + const file = `${name}.duckdb_extension.wasm`; + const ref = `${repo}/v1.1.1/wasm_${p}/${file}`.slice("https://".length); + const path = join("_duckdb", ref); + const cachePath = join(cache, path); + if (existsSync(cachePath)) return `/${path}`; + let promise = downloadRequests.get(cachePath); if (promise) return promise; // coalesce concurrent requests + promise = (async () => { + const href = `https://${ref}`; + console.log(`duckdb:${href} ${faint("→")} ${cachePath}`); + const response = await fetch(href); + if (!response.ok) throw new Error(`unable to fetch: ${href}`); + await mkdir(dirname(cachePath), {recursive: true}); + await writeFile(cachePath, Buffer.from(await response.arrayBuffer())); + return `/${path}`; + })(); + promise.catch(console.error).then(() => downloadRequests.delete(cachePath)); + downloadRequests.set(cachePath, promise); + return promise; +} + +/* promise = Promise.all( - platforms.map(async (p) => { - const href = `${repo}/v1.1.1/wasm_${p}/${name}.duckdb_extension.wasm`; + bundles.map(async (p) => { const outputPath = join(outputDir, `${name}.${p}.wasm`); console.log(`download: ${href} ${faint("→")} ${outputPath}`); const response = await fetch(href); @@ -58,7 +75,7 @@ export async function resolveDuckDBExtension(root: string, duckdb: DuckDBConfig, }) ).then(async () => { const ref = await duckDBHash(name, files); - for (const [i, p] of platforms.entries()) { + for (const [i, p] of bundles.entries()) { const targetPath = join(cache, ref, "v1.1.1", `wasm_${p}`, `${name}.duckdb_extension.wasm`); await mkdir(dirname(targetPath), {recursive: true}); await copyFile(files[i], targetPath); @@ -76,3 +93,5 @@ async function duckDBHash(name: string, files: string[]): Promise { for (const file of files) hash.update(await readFile(file, "utf-8")); return join("_duckdb", `${name}-${hash.digest("hex").slice(0, 8)}`); } + +*/ diff --git a/src/libraries.ts b/src/libraries.ts index efd293038..4989842aa 100644 --- a/src/libraries.ts +++ b/src/libraries.ts @@ -83,7 +83,11 @@ export function getImplicitDownloads(imports: Iterable, duckdb?: DuckDBC implicits.add("npm:@duckdb/duckdb-wasm/dist/duckdb-eh.wasm"); implicits.add("npm:@duckdb/duckdb-wasm/dist/duckdb-browser-eh.worker.js"); if (!duckdb) throw new Error("Implementation error: missing duckdb configuration"); - for (const name of duckdb.install) implicits.add(`duckdb:${name}`); + for (const p of duckdb.bundles) { + for (const name of duckdb.install) { + implicits.add(`duckdb:${p},${name},${duckdb.source[name]}`); + } + } } if (set.has("npm:@observablehq/sqlite")) { implicits.add("npm:sql.js/dist/sql-wasm.js"); diff --git a/src/resolvers.ts b/src/resolvers.ts index 61c385076..ab87f6e62 100644 --- a/src/resolvers.ts +++ b/src/resolvers.ts @@ -371,7 +371,8 @@ async function resolveResolvers( resolutions.set(specifier, path); await populateNpmCache(root, path); } else if (specifier.startsWith("duckdb:")) { - const path = await resolveDuckDBExtension(root, duckdb, specifier.slice("duckdb:".length)); + const [p, name, repo] = specifier.slice("duckdb:".length).split(","); + const path = await resolveDuckDBExtension(root, p, repo, name); resolutions.set(specifier, path); } else if (!specifier.startsWith("observablehq:")) { throw new Error(`unhandled implicit download: ${specifier}`); diff --git a/test/build-test.ts b/test/build-test.ts index 9fc516c3e..e1ff0a392 100644 --- a/test/build-test.ts +++ b/test/build-test.ts @@ -76,7 +76,6 @@ describe("build", () => { // update the test snapshots whenever Framework’s client code changes. We // make an exception for minisearch.json because to test the content. for (const path of findFiles(join(outputDir, "_observablehq"))) { - if (path === "duckdb_manifest.json") continue; const match = /^((.+)\.[0-9a-f]{8})\.(\w+)$/.exec(path); if (!match) throw new Error(`no hash found: ${path}`); const [, key, name, ext] = match; diff --git a/test/config-test.ts b/test/config-test.ts index e9bfd7248..fcae88d4b 100644 --- a/test/config-test.ts +++ b/test/config-test.ts @@ -45,6 +45,7 @@ describe("readConfig(undefined, root)", () => { search: null, watchPath: resolve("test/input/build/config/observablehq.config.js"), duckdb: { + bundles: ["eh", "mvp"], install: ["json", "parquet"], load: [], source: {json: "https://extensions.duckdb.org", parquet: "https://extensions.duckdb.org"} @@ -78,6 +79,7 @@ describe("readConfig(undefined, root)", () => { search: null, watchPath: undefined, duckdb: { + bundles: ["eh", "mvp"], install: ["json", "parquet"], load: [], source: {json: "https://extensions.duckdb.org", parquet: "https://extensions.duckdb.org"} diff --git a/test/libraries-test.ts b/test/libraries-test.ts index 9a5b13596..acc5ecddd 100644 --- a/test/libraries-test.ts +++ b/test/libraries-test.ts @@ -53,7 +53,7 @@ describe("getImplicitStylesheets(imports)", () => { describe("getImplicitDownloads(imports)", () => { it("supports known imports", () => { assert.deepStrictEqual( - getImplicitDownloads(["npm:@observablehq/duckdb"], {install: [], load: [], source: {}}), + getImplicitDownloads(["npm:@observablehq/duckdb"], {install: [], load: [], source: {}, bundles: []}), new Set([ "npm:@duckdb/duckdb-wasm/dist/duckdb-mvp.wasm", "npm:@duckdb/duckdb-wasm/dist/duckdb-browser-mvp.worker.js", diff --git a/test/output/build/duckdb/_duckdb/json-02bd175f/v1.1.1/wasm_eh/json.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb/_duckdb/json-02bd175f/v1.1.1/wasm_eh/json.duckdb_extension.wasm rename to test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm diff --git a/test/output/build/duckdb/_duckdb/parquet-37a0fe5a/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb/_duckdb/parquet-37a0fe5a/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm rename to test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm diff --git a/test/output/build/duckdb/_duckdb/json-02bd175f/v1.1.1/wasm_mvp/json.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb/_duckdb/json-02bd175f/v1.1.1/wasm_mvp/json.duckdb_extension.wasm rename to test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm diff --git a/test/output/build/duckdb/_duckdb/parquet-37a0fe5a/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb/_duckdb/parquet-37a0fe5a/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm rename to test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm diff --git a/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm new file mode 100644 index 000000000..e69de29bb From 13f892c281343a7348bf87ef38c0a65e5dd60785 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 14 Oct 2024 15:55:42 +0200 Subject: [PATCH 17/39] don't copy extensions twice --- src/build.ts | 4 ++-- .../v1.1.1/wasm_eh/json.duckdb_extension.wasm | 0 .../v1.1.1/wasm_eh/parquet.duckdb_extension.wasm | 0 .../v1.1.1/wasm_mvp/json.duckdb_extension.wasm | 0 .../v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm | 0 .../v1.1.1/wasm_eh/json.duckdb_extension.wasm | 0 .../v1.1.1/wasm_eh/parquet.duckdb_extension.wasm | 0 .../v1.1.1/wasm_mvp/json.duckdb_extension.wasm | 0 .../v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm | 0 9 files changed, 2 insertions(+), 2 deletions(-) rename test/output/build/duckdb/_duckdb/{e3b0c442/extensions.duckdb.org => e3b0c442-extensions.duckdb.org}/v1.1.1/wasm_eh/json.duckdb_extension.wasm (100%) rename test/output/build/duckdb/_duckdb/{e3b0c442/extensions.duckdb.org => e3b0c442-extensions.duckdb.org}/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm (100%) rename test/output/build/duckdb/_duckdb/{e3b0c442/extensions.duckdb.org => e3b0c442-extensions.duckdb.org}/v1.1.1/wasm_mvp/json.duckdb_extension.wasm (100%) rename test/output/build/duckdb/_duckdb/{e3b0c442/extensions.duckdb.org => e3b0c442-extensions.duckdb.org}/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm (100%) delete mode 100644 test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm delete mode 100644 test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm delete mode 100644 test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm delete mode 100644 test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm diff --git a/src/build.ts b/src/build.ts index 4db5388a3..6557b7923 100644 --- a/src/build.ts +++ b/src/build.ts @@ -222,7 +222,7 @@ export async function build( // Anything in _observablehq also needs a content hash, but anything in _npm // or _node does not (because they are already necessarily immutable). for (const path of globalImports) { - if (path.endsWith(".js")) continue; + if (path.endsWith(".js") || path.startsWith("/_duckdb/")) continue; const sourcePath = join(cacheRoot, path); effects.output.write(`${faint("build")} ${path} ${faint("→")} `); if (path.startsWith("/_observablehq/")) { @@ -416,7 +416,7 @@ function validateLinks(outputs: Map): [valid: Li } function applyHash(path: string, hash: string): string { - if (path.startsWith("/_duckdb/")) return join("/_duckdb/", hash, path.slice("/_duckdb/".length)); + if (path.startsWith("/_duckdb/")) return join("/_duckdb/", `${hash}-${path.slice("/_duckdb/".length)}`); const ext = extname(path); let name = basename(path, ext); if (path.endsWith(".js")) name = name.replace(/(^|\.)_esm$/, ""); // allow hash to replace _esm diff --git a/test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm rename to test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm diff --git a/test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm rename to test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm diff --git a/test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm rename to test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm diff --git a/test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb/_duckdb/e3b0c442/extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm rename to test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm diff --git a/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm deleted file mode 100644 index e69de29bb..000000000 diff --git a/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm deleted file mode 100644 index e69de29bb..000000000 From d72f0c3dd7695a6a6afdb46fae836dd6f25156c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Sun, 20 Oct 2024 20:08:50 +0200 Subject: [PATCH 18/39] Update src/duckdb.ts Co-authored-by: Mike Bostock --- src/duckdb.ts | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/src/duckdb.ts b/src/duckdb.ts index be5c1efd5..4d884b204 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -62,36 +62,3 @@ export async function resolveDuckDBExtension(root: string, p: string, repo: stri downloadRequests.set(cachePath, promise); return promise; } - -/* - promise = Promise.all( - bundles.map(async (p) => { - const outputPath = join(outputDir, `${name}.${p}.wasm`); - console.log(`download: ${href} ${faint("→")} ${outputPath}`); - const response = await fetch(href); - if (!response.ok) throw new Error(`unable to fetch: ${href}`); - await mkdir(dirname(outputPath), {recursive: true}); - await writeFile(outputPath, Buffer.from(await response.arrayBuffer())); - }) - ).then(async () => { - const ref = await duckDBHash(name, files); - for (const [i, p] of bundles.entries()) { - const targetPath = join(cache, ref, "v1.1.1", `wasm_${p}`, `${name}.duckdb_extension.wasm`); - await mkdir(dirname(targetPath), {recursive: true}); - await copyFile(files[i], targetPath); - } - return ref; - }); - promise.catch(console.error).then(() => files.forEach((file) => downloadRequests.delete(file))); - downloadRequests.set(key, promise); - return promise; -} - -async function duckDBHash(name: string, files: string[]): Promise { - const hash = createHash("sha256"); - hash.update(name); - for (const file of files) hash.update(await readFile(file, "utf-8")); - return join("_duckdb", `${name}-${hash.digest("hex").slice(0, 8)}`); -} - -*/ From d6fc0202309b0317ee77eb0c520ddebbd06f2bcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 21 Oct 2024 11:44:19 +0200 Subject: [PATCH 19/39] remove DuckDBClientReport utility --- docs/sql.md | 6 +----- src/build.ts | 2 +- src/client/stdlib/duckdb.js | 16 ---------------- src/duckdb.ts | 5 ++--- 4 files changed, 4 insertions(+), 25 deletions(-) diff --git a/docs/sql.md b/docs/sql.md index 16ec36dc0..8c1e78d7d 100644 --- a/docs/sql.md +++ b/docs/sql.md @@ -245,11 +245,7 @@ However, for performance and ergonomy, we strongly recommend adding all the exte
-Since some extensions are autoloading, it can be hard to tell which ones are -effectively in use. You can inspect the network tab in your browser, or (in -preview only), open your browser console and type `DuckDBClientReport()` — this -utility offers additional information, such as the path used for self-hosted -extensions. +To tell which extensions are effectively in use on a page, inspect the network tab in your browser, or run the following query: `FROM duckdb_extensions() WHERE loaded;`.
diff --git a/src/build.ts b/src/build.ts index 18b3c80e7..72a66c660 100644 --- a/src/build.ts +++ b/src/build.ts @@ -153,7 +153,7 @@ export async function build( await effects.writeFile(alias, contents); } } - const duckdb_manifest = await duckDBManifest(duckdb, {root, log: true, aliases}); + const duckdb_manifest = await duckDBManifest(duckdb, {root, aliases}); // Generate the client bundles. These are initially generated into the cache // because we need to rewrite any npm and node imports to be hashed; this is diff --git a/src/client/stdlib/duckdb.js b/src/client/stdlib/duckdb.js index acb24a4b5..914494a48 100644 --- a/src/client/stdlib/duckdb.js +++ b/src/client/stdlib/duckdb.js @@ -194,22 +194,6 @@ Object.defineProperty(DuckDBClient.prototype, "dialect", {value: "duckdb"}); async function registerExtensions(db, {load}) { const connection = await db.connect(); try { - // Preview adds a DuckDBClientReport utility to the console. We don’t add it - // in the public build so as not to pollute the window. - if (manifest.log) { - window.DuckDBClientReport = async () => { - const connection = await db.connect(); - try { - const refs = new Map(extensions); - const ext = await connection.query( - "SELECT extension_name, description FROM duckdb_extensions() WHERE loaded;" - ); - console.table(Array.from(ext, (e) => ({...e, ...refs.get(e.extension_name)}))); - } finally { - await connection.close(); - } - }; - } await Promise.all( extensions.map(([name, {ref, load: l}]) => connection diff --git a/src/duckdb.ts b/src/duckdb.ts index 4d884b204..73a8316e5 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -9,7 +9,7 @@ const downloadRequests = new Map>(); export async function duckDBManifest( duckdb: DuckDBConfig, - {root, log, aliases}: {root: string; log?: boolean; aliases?: Map} + {root, aliases}: {root: string; aliases?: Map} ) { return { bundles: duckdb.bundles, @@ -26,8 +26,7 @@ export async function duckDBManifest( } ]; }) - ), - log + ) }; } From 69f25a2f5f97d35ebc5832225aa2a62470c63bb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 21 Oct 2024 11:54:00 +0200 Subject: [PATCH 20/39] renames --- src/build.ts | 6 +++--- src/client/stdlib/duckdb.js | 3 ++- src/duckdb.ts | 2 +- src/preview.ts | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/build.ts b/src/build.ts index 72a66c660..e3dcd3766 100644 --- a/src/build.ts +++ b/src/build.ts @@ -3,7 +3,7 @@ import {existsSync} from "node:fs"; import {copyFile, readFile, rm, stat, writeFile} from "node:fs/promises"; import {basename, dirname, extname, join} from "node:path/posix"; import type {Config} from "./config.js"; -import {duckDBManifest} from "./duckdb.js"; +import {getDuckDBManifest} from "./duckdb.js"; import {CliError} from "./error.js"; import {getClientPath, prepareOutput} from "./files.js"; import {findModule, getModuleHash, readJavaScript} from "./javascript/module.js"; @@ -153,7 +153,7 @@ export async function build( await effects.writeFile(alias, contents); } } - const duckdb_manifest = await duckDBManifest(duckdb, {root, aliases}); + const duckDBManifest = await getDuckDBManifest(duckdb, {root, aliases}); // Generate the client bundles. These are initially generated into the cache // because we need to rewrite any npm and node imports to be hashed; this is @@ -165,7 +165,7 @@ export async function build( const clientPath = getClientPath(path === "/_observablehq/client.js" ? "index.js" : path.slice("/_observablehq/".length)); // prettier-ignore const define: {[key: string]: string} = {}; if (path === "/_observablehq/stdlib/duckdb.js") { - define["process.DUCKDB_MANIFEST"] = JSON.stringify(duckdb_manifest); + define["DUCKDB_MANIFEST"] = JSON.stringify(duckDBManifest); } const contents = await rollupClient(clientPath, root, path, {minify: true, keepNames: true, define}); await prepareOutput(cachePath); diff --git a/src/client/stdlib/duckdb.js b/src/client/stdlib/duckdb.js index 914494a48..5158f1c52 100644 --- a/src/client/stdlib/duckdb.js +++ b/src/client/stdlib/duckdb.js @@ -30,7 +30,8 @@ import * as duckdb from "npm:@duckdb/duckdb-wasm"; // POSSIBILITY OF SUCH DAMAGE. // Baked-in manifest. -const manifest = process.DUCKDB_MANIFEST; +// eslint-disable-next-line no-undef +const manifest = DUCKDB_MANIFEST; const candidates = { ...(manifest.bundles.includes("mvp") && { diff --git a/src/duckdb.ts b/src/duckdb.ts index 73a8316e5..15fbfc0e3 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -7,7 +7,7 @@ import {faint} from "./tty.js"; const downloadRequests = new Map>(); -export async function duckDBManifest( +export async function getDuckDBManifest( duckdb: DuckDBConfig, {root, aliases}: {root: string; aliases?: Map} ) { diff --git a/src/preview.ts b/src/preview.ts index 08a81a76d..2dcd78736 100644 --- a/src/preview.ts +++ b/src/preview.ts @@ -16,7 +16,7 @@ import type {WebSocket} from "ws"; import {WebSocketServer} from "ws"; import type {Config} from "./config.js"; import {readConfig} from "./config.js"; -import {duckDBManifest} from "./duckdb.js"; +import {getDuckDBManifest} from "./duckdb.js"; import {enoent, isEnoent, isHttpError, isSystemError} from "./error.js"; import {getClientPath} from "./files.js"; import type {FileWatchers} from "./fileWatchers.js"; @@ -138,7 +138,7 @@ export class PreviewServer { const path = getClientPath(pathname.slice("/_observablehq/".length)); const options = pathname === "/_observablehq/stdlib/duckdb.js" - ? {define: {"process.DUCKDB_MANIFEST": JSON.stringify(await duckDBManifest(duckdb, {root, log: true}))}} + ? {define: {DUCKDB_MANIFEST: JSON.stringify(await getDuckDBManifest(duckdb, {root}))}} : {}; end(req, res, await rollupClient(path, root, pathname, options), "text/javascript"); } else if (pathname.startsWith("/_observablehq/") && pathname.endsWith(".css")) { From 30788e355bc4e5de08adc9a54cf9be7cf248eb33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 21 Oct 2024 11:58:27 +0200 Subject: [PATCH 21/39] p for platform --- src/duckdb.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/duckdb.ts b/src/duckdb.ts index 15fbfc0e3..c902eca57 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -38,11 +38,16 @@ export async function getDuckDBManifest( * statement. The repo is structured as required by DuckDB with: * ${repo}/v1.1.1/wasm_{p}/${name}.duckdb_extension.wasm */ -export async function resolveDuckDBExtension(root: string, p: string, repo: string, name: string): Promise { +export async function resolveDuckDBExtension( + root: string, + platform: string, + repo: string, + name: string +): Promise { if (!repo.startsWith("https://")) throw new Error(`invalid repo: ${repo}`); const cache = join(root, ".observablehq", "cache"); const file = `${name}.duckdb_extension.wasm`; - const ref = `${repo}/v1.1.1/wasm_${p}/${file}`.slice("https://".length); + const ref = `${repo}/v1.1.1/wasm_${platform}/${file}`.slice("https://".length); const path = join("_duckdb", ref); const cachePath = join(cache, path); if (existsSync(cachePath)) return `/${path}`; From 710f36afb437936ffe9c5e0fb8ac6a611ddc2091 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 21 Oct 2024 12:13:03 +0200 Subject: [PATCH 22/39] centralize DUCKDBWASMVERSION and DUCKDBVERSION --- src/duckdb.ts | 5 ++++- src/npm.ts | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/duckdb.ts b/src/duckdb.ts index c902eca57..d88fdbc3e 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -7,6 +7,9 @@ import {faint} from "./tty.js"; const downloadRequests = new Map>(); +export const DUCKDBWASMVERSION = "1.29.0"; +export const DUCKDBVERSION = "1.1.1"; + export async function getDuckDBManifest( duckdb: DuckDBConfig, {root, aliases}: {root: string; aliases?: Map} @@ -47,7 +50,7 @@ export async function resolveDuckDBExtension( if (!repo.startsWith("https://")) throw new Error(`invalid repo: ${repo}`); const cache = join(root, ".observablehq", "cache"); const file = `${name}.duckdb_extension.wasm`; - const ref = `${repo}/v1.1.1/wasm_${platform}/${file}`.slice("https://".length); + const ref = `${repo}/v${DUCKDBVERSION}/wasm_${platform}/${file}`.slice("https://".length); const path = join("_duckdb", ref); const cachePath = join(cache, path); if (existsSync(cachePath)) return `/${path}`; diff --git a/src/npm.ts b/src/npm.ts index cd2997ce7..2f88a07dc 100644 --- a/src/npm.ts +++ b/src/npm.ts @@ -4,6 +4,7 @@ import {dirname, extname, join} from "node:path/posix"; import type {CallExpression} from "acorn"; import {simple} from "acorn-walk"; import {maxSatisfying, rsort, satisfies, validRange} from "semver"; +import {DUCKDBWASMVERSION} from "./duckdb.js"; import {isEnoent} from "./error.js"; import type {ExportNode, ImportNode, ImportReference} from "./javascript/imports.js"; import {isImportMetaResolve, parseImports} from "./javascript/imports.js"; @@ -162,7 +163,7 @@ export async function getDependencyResolver( (name === "arquero" || name === "@uwdata/mosaic-core" || name === "@duckdb/duckdb-wasm") && depName === "apache-arrow" // prettier-ignore ? "latest" // force Arquero, Mosaic & DuckDB-Wasm to use the (same) latest version of Arrow : name === "@uwdata/mosaic-core" && depName === "@duckdb/duckdb-wasm" - ? "1.29.0" // force Mosaic to use the latest (stable) version of DuckDB-Wasm + ? DUCKDBWASMVERSION // force Mosaic to use the latest (stable) version of DuckDB-Wasm : pkg.dependencies?.[depName] ?? pkg.devDependencies?.[depName] ?? pkg.peerDependencies?.[depName] ?? @@ -248,7 +249,7 @@ async function resolveNpmVersion(root: string, {name, range}: NpmSpecifier): Pro export async function resolveNpmImport(root: string, specifier: string): Promise { const { name, - range = name === "@duckdb/duckdb-wasm" ? "1.29.0" : undefined, + range = name === "@duckdb/duckdb-wasm" ? DUCKDBWASMVERSION : undefined, path = name === "mermaid" ? "dist/mermaid.esm.min.mjs/+esm" : name === "echarts" From 4f58100d24c5a10a29b247e9a392e4d8af511230 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 21 Oct 2024 12:15:15 +0200 Subject: [PATCH 23/39] clearer --- src/duckdb.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/duckdb.ts b/src/duckdb.ts index d88fdbc3e..8da777df4 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -39,7 +39,7 @@ export async function getDuckDBManifest( * for every supported platform p ("eh" and "mvp"), and returns a content-hashed * reference (_duckdb/{hash}) to use in the corresponding DuckDB INSTALL * statement. The repo is structured as required by DuckDB with: - * ${repo}/v1.1.1/wasm_{p}/${name}.duckdb_extension.wasm + * ${repo}/v{duckdbversion}/wasm_{platform}/${name}.duckdb_extension.wasm */ export async function resolveDuckDBExtension( root: string, From a8cfdcd9cec0aa4fe6c9ce6d2f734b486f059afe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 21 Oct 2024 15:08:44 +0200 Subject: [PATCH 24/39] better config; manifest.extensions now lists individual extensions once only, with one reference per platform --- src/client/stdlib/duckdb.js | 5 ++-- src/config.ts | 47 +++++++++++++++++++++++-------------- src/duckdb.ts | 38 ++++++++++++++++++++---------- src/libraries.ts | 6 ++--- 4 files changed, 60 insertions(+), 36 deletions(-) diff --git a/src/client/stdlib/duckdb.js b/src/client/stdlib/duckdb.js index 5158f1c52..5bf5ae252 100644 --- a/src/client/stdlib/duckdb.js +++ b/src/client/stdlib/duckdb.js @@ -48,8 +48,7 @@ const candidates = { }) }; const bundle = await duckdb.selectBundle(candidates); -const activeBundle = manifest.bundles.find((key) => bundle.mainModule === candidates[key].mainModule); -const extensions = manifest.extensions.filter(([, {bundle}]) => bundle === activeBundle); +const activePlatform = manifest.bundles.find((key) => bundle.mainModule === candidates[key].mainModule); const logger = new duckdb.ConsoleLogger(duckdb.LogLevel.WARNING); @@ -196,7 +195,7 @@ async function registerExtensions(db, {load}) { const connection = await db.connect(); try { await Promise.all( - extensions.map(([name, {ref, load: l}]) => + manifest.extensions.map(([name, {[activePlatform]: ref, load: l}]) => connection .query(`INSTALL ${name} FROM '${import.meta.resolve(`../..${ref}`)}'`) .then(() => (load ? load.includes(name) : l) && connection.query(`LOAD ${name}`)) diff --git a/src/config.ts b/src/config.ts index 71d180df8..05cf02369 100644 --- a/src/config.ts +++ b/src/config.ts @@ -77,10 +77,8 @@ export interface SearchConfigSpec { } export interface DuckDBConfig { - install: string[]; - load: string[]; - source: {[name: string]: string}; bundles: string[]; + extensions: {[name: string]: {install?: false; load: boolean; source: string}}; } export interface Config { @@ -500,23 +498,38 @@ export function stringOrNull(spec: unknown): string | null { return spec == null || spec === false ? null : String(spec); } +function duckDBExtensionSource(source?: string): string { + return source === undefined || source === "core" + ? "https://extensions.duckdb.org" + : source === "community" + ? "https://community-extensions.duckdb.org" + : (source = String(source)).startsWith("https://") + ? source + : (() => { + throw new Error(`Unsupported DuckDB extension source ${source}`); + })(); +} + function normalizeDuckDB(spec: unknown): DuckDBConfig { - const install = spec?.["install"] ?? ["json", "parquet"]; - const load = spec?.["load"] ?? []; - const source = new Map(Object.entries(spec?.["source"] ?? {})); + const extensions = spec?.["extensions"] ?? {json: {load: false}, parquet: {load: false}}; return { bundles: ["eh", "mvp"], - install, - load: load.filter((name: string) => install.includes(name)), - source: Object.fromEntries( - install.map((name: string) => { - let href = source.get(name) ?? "core"; - if (href === "core") href = "https://extensions.duckdb.org"; - else if (href === "community") href = "https://community-extensions.duckdb.org"; - if (!href?.["startsWith"]?.("https://")) - throw new Error(`unknown source for duckdb extension ${name}: ${href}`); - return [name, href]; - }) + extensions: Object.fromEntries( + Array.from(Object.entries(extensions), ([key, config]) => { + return [ + key, + !config + ? null + : config === true + ? {load: true, source: duckDBExtensionSource()} + : config === false + ? {install: false} + : { + source: duckDBExtensionSource(config["source"]), + load: config["load"] === undefined ? true : Boolean(config["load"]) + } + ]; + }).filter(([, config]) => config !== null) ) }; } diff --git a/src/duckdb.ts b/src/duckdb.ts index 8da777df4..b21a6afea 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -1,7 +1,6 @@ import {existsSync} from "node:fs"; import {mkdir, writeFile} from "node:fs/promises"; import {dirname, join} from "node:path/posix"; -import {cross} from "d3-array"; import type {DuckDBConfig} from "./config.js"; import {faint} from "./tty.js"; @@ -10,6 +9,12 @@ const downloadRequests = new Map>(); export const DUCKDBWASMVERSION = "1.29.0"; export const DUCKDBVERSION = "1.1.1"; +async function getDuckDBExtension(root, platform, source, name, aliases) { + let ext = await resolveDuckDBExtension(root, platform, source, name); + if (aliases?.has(ext)) ext = aliases.get(ext)!; + return dirname(dirname(dirname(ext))); +} + export async function getDuckDBManifest( duckdb: DuckDBConfig, {root, aliases}: {root: string; aliases?: Map} @@ -17,18 +22,25 @@ export async function getDuckDBManifest( return { bundles: duckdb.bundles, extensions: await Promise.all( - cross(duckdb.bundles, duckdb.install).map(async ([p, name]) => { - let ext = await resolveDuckDBExtension(root, p, duckdb.source[name], name); - if (aliases?.has(ext)) ext = aliases.get(ext)!; - return [ - name, - { - ref: dirname(dirname(dirname(ext))), - load: duckdb.load.includes(name), - bundle: p - } - ]; - }) + Array.from(Object.entries(duckdb.extensions), ([name, {install, load, source}]) => + (async () => { + return [ + name, + { + install, + load, + ...Object.fromEntries( + await Promise.all( + duckdb.bundles.map(async (platform) => [ + platform, + await getDuckDBExtension(root, platform, source, name, aliases) + ]) + ) + ) + } + ]; + })() + ) ) }; } diff --git a/src/libraries.ts b/src/libraries.ts index 4989842aa..a816997f5 100644 --- a/src/libraries.ts +++ b/src/libraries.ts @@ -83,9 +83,9 @@ export function getImplicitDownloads(imports: Iterable, duckdb?: DuckDBC implicits.add("npm:@duckdb/duckdb-wasm/dist/duckdb-eh.wasm"); implicits.add("npm:@duckdb/duckdb-wasm/dist/duckdb-browser-eh.worker.js"); if (!duckdb) throw new Error("Implementation error: missing duckdb configuration"); - for (const p of duckdb.bundles) { - for (const name of duckdb.install) { - implicits.add(`duckdb:${p},${name},${duckdb.source[name]}`); + for (const [name, {source}] of Object.entries(duckdb.extensions)) { + for (const platform of duckdb.bundles) { + implicits.add(`duckdb:${platform},${name},${source}`); } } } From 490d969b0c2feb2e0a6aa1c4e809faca93dae31a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 21 Oct 2024 15:34:10 +0200 Subject: [PATCH 25/39] validate extension names; centralize DUCKDBBUNDLES --- src/config.ts | 40 ++++++++++++++++++---------------------- src/duckdb.ts | 1 + 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/config.ts b/src/config.ts index 05cf02369..47cd72b40 100644 --- a/src/config.ts +++ b/src/config.ts @@ -8,6 +8,7 @@ import {pathToFileURL} from "node:url"; import he from "he"; import type MarkdownIt from "markdown-it"; import wrapAnsi from "wrap-ansi"; +import {DUCKDBBUNDLES} from "./duckdb.js"; import {visitFiles} from "./files.js"; import {formatIsoDate, formatLocaleDate} from "./format.js"; import type {FrontMatter} from "./frontMatter.js"; @@ -506,30 +507,25 @@ function duckDBExtensionSource(source?: string): string { : (source = String(source)).startsWith("https://") ? source : (() => { - throw new Error(`Unsupported DuckDB extension source ${source}`); + throw new Error(`unsupported DuckDB extension source ${source}`); })(); } function normalizeDuckDB(spec: unknown): DuckDBConfig { - const extensions = spec?.["extensions"] ?? {json: {load: false}, parquet: {load: false}}; - return { - bundles: ["eh", "mvp"], - extensions: Object.fromEntries( - Array.from(Object.entries(extensions), ([key, config]) => { - return [ - key, - !config - ? null - : config === true - ? {load: true, source: duckDBExtensionSource()} - : config === false - ? {install: false} - : { - source: duckDBExtensionSource(config["source"]), - load: config["load"] === undefined ? true : Boolean(config["load"]) - } - ]; - }).filter(([, config]) => config !== null) - ) - }; + const extensions: {[name: string]: any} = {}; + for (const [name, config] of Object.entries(spec?.["extensions"] ?? {json: {load: false}, parquet: {load: false}})) { + if (!/^\w+$/.test(name)) throw new Error(`illegal extension name ${name}`); + if (config) { + extensions[name] = + config === true + ? {load: true, source: duckDBExtensionSource()} + : config === false + ? {install: false} + : { + source: duckDBExtensionSource(config["source"]), + load: config["load"] === undefined ? true : Boolean(config["load"]) + }; + } + } + return {bundles: DUCKDBBUNDLES, extensions}; } diff --git a/src/duckdb.ts b/src/duckdb.ts index b21a6afea..47ff8e81d 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -8,6 +8,7 @@ const downloadRequests = new Map>(); export const DUCKDBWASMVERSION = "1.29.0"; export const DUCKDBVERSION = "1.1.1"; +export const DUCKDBBUNDLES = ["eh", "mvp"]; async function getDuckDBExtension(root, platform, source, name, aliases) { let ext = await resolveDuckDBExtension(root, platform, source, name); From aaff8f870cb18436b74f8b7c61e7f950329c95b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 21 Oct 2024 15:44:37 +0200 Subject: [PATCH 26/39] fix tests --- test/config-test.ts | 26 ++++++++++++++++++++------ test/libraries-test.ts | 2 +- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/test/config-test.ts b/test/config-test.ts index fcae88d4b..cb1a9def8 100644 --- a/test/config-test.ts +++ b/test/config-test.ts @@ -46,9 +46,16 @@ describe("readConfig(undefined, root)", () => { watchPath: resolve("test/input/build/config/observablehq.config.js"), duckdb: { bundles: ["eh", "mvp"], - install: ["json", "parquet"], - load: [], - source: {json: "https://extensions.duckdb.org", parquet: "https://extensions.duckdb.org"} + extensions: { + json: { + load: false, + source: "https://extensions.duckdb.org" + }, + parquet: { + load: false, + source: "https://extensions.duckdb.org" + } + } } }); }); @@ -80,9 +87,16 @@ describe("readConfig(undefined, root)", () => { watchPath: undefined, duckdb: { bundles: ["eh", "mvp"], - install: ["json", "parquet"], - load: [], - source: {json: "https://extensions.duckdb.org", parquet: "https://extensions.duckdb.org"} + extensions: { + json: { + load: false, + source: "https://extensions.duckdb.org" + }, + parquet: { + load: false, + source: "https://extensions.duckdb.org" + } + } } }); }); diff --git a/test/libraries-test.ts b/test/libraries-test.ts index acc5ecddd..4c270f37b 100644 --- a/test/libraries-test.ts +++ b/test/libraries-test.ts @@ -53,7 +53,7 @@ describe("getImplicitStylesheets(imports)", () => { describe("getImplicitDownloads(imports)", () => { it("supports known imports", () => { assert.deepStrictEqual( - getImplicitDownloads(["npm:@observablehq/duckdb"], {install: [], load: [], source: {}, bundles: []}), + getImplicitDownloads(["npm:@observablehq/duckdb"], {extensions: {}, bundles: []}), new Set([ "npm:@duckdb/duckdb-wasm/dist/duckdb-mvp.wasm", "npm:@duckdb/duckdb-wasm/dist/duckdb-browser-mvp.worker.js", From 8bd09721aa3da1207229a473f00a0c3d38442df8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Wed, 30 Oct 2024 09:26:46 +0100 Subject: [PATCH 27/39] copy edit --- docs/sql.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/sql.md b/docs/sql.md index 8c1e78d7d..fbdb82eb2 100644 --- a/docs/sql.md +++ b/docs/sql.md @@ -219,29 +219,29 @@ The self-hosted extensions are immediately available in all the `sql` code block SELECT bbox FROM read_json('https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_day.geojson'); ``` -Likewise, with the "spatial" extension configured, you could directly run: +Likewise, with the “spatial” extension configured, you could directly run: ```sql echo run=false SELECT ST_Area('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::GEOMETRY) as area; ``` -If you use an extension that is not self-hosted, DuckDB falls back to loading it directly from DuckDB’s servers. For example, this documentation does not have the "inet" extension configured for self-hosting. +If you use an extension that is not self-hosted, DuckDB falls back to loading it directly from DuckDB’s servers. For example, this documentation does not have the “inet” extension configured for self-hosting. ```sql echo SELECT '127.0.0.1'::INET AS ipv4, '2001:db8:3c4d::/48'::INET AS ipv6; ``` -During development, you can experiment freely with extensions that are not self-hosted. For example to try out the "h3" `community` extension: +During development, you can experiment freely with extensions that are not self-hosted. For example to try out the “h3” `community` extension: ```sql echo run=false -INSTALL "h3" FROM community; -LOAD "h3"; +INSTALL h3 FROM community; +LOAD h3; SELECT format('{:x}', h3_latlng_to_cell(37.77, -122.43, 9)) AS cell_id; ``` (this returns the H3 cell [`892830828a3ffff`](https://h3geo.org/#hex=892830828a3ffff)) -However, for performance and ergonomy, we strongly recommend adding all the extensions you actually use to your site’s configuration. +For performance and ergonomy, we strongly recommend adding all the extensions you actually use to the [configuration](./config#duckdb).
From b90c22ab32df95cab13af6b5f5ccc3142fe6978b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Wed, 30 Oct 2024 09:27:36 +0100 Subject: [PATCH 28/39] support loading non-self-hosted extensions --- src/client/stdlib/duckdb.js | 4 ++-- src/config.ts | 9 +++++---- src/duckdb.ts | 10 ++++++++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/client/stdlib/duckdb.js b/src/client/stdlib/duckdb.js index 5bf5ae252..1f78abd9c 100644 --- a/src/client/stdlib/duckdb.js +++ b/src/client/stdlib/duckdb.js @@ -197,8 +197,8 @@ async function registerExtensions(db, {load}) { await Promise.all( manifest.extensions.map(([name, {[activePlatform]: ref, load: l}]) => connection - .query(`INSTALL ${name} FROM '${import.meta.resolve(`../..${ref}`)}'`) - .then(() => (load ? load.includes(name) : l) && connection.query(`LOAD ${name}`)) + .query(`INSTALL "${name}" FROM '${ref.startsWith("https://") ? ref : import.meta.resolve(`../..${ref}`)}'`) + .then(() => (load ? load.includes(name) : l) && connection.query(`LOAD "${name}"`)) ) ); } finally { diff --git a/src/config.ts b/src/config.ts index 47cd72b40..c2cdfb389 100644 --- a/src/config.ts +++ b/src/config.ts @@ -515,15 +515,16 @@ function normalizeDuckDB(spec: unknown): DuckDBConfig { const extensions: {[name: string]: any} = {}; for (const [name, config] of Object.entries(spec?.["extensions"] ?? {json: {load: false}, parquet: {load: false}})) { if (!/^\w+$/.test(name)) throw new Error(`illegal extension name ${name}`); - if (config) { + if (config != null) { extensions[name] = config === true - ? {load: true, source: duckDBExtensionSource()} + ? {load: true, install: true, source: duckDBExtensionSource()} : config === false - ? {install: false} + ? {load: false, install: false, source: duckDBExtensionSource()} : { source: duckDBExtensionSource(config["source"]), - load: config["load"] === undefined ? true : Boolean(config["load"]) + install: Boolean(config["install"] ?? true), + load: Boolean(config["load"] ?? true) }; } } diff --git a/src/duckdb.ts b/src/duckdb.ts index 47ff8e81d..3fa4c4e94 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -10,7 +10,13 @@ export const DUCKDBWASMVERSION = "1.29.0"; export const DUCKDBVERSION = "1.1.1"; export const DUCKDBBUNDLES = ["eh", "mvp"]; -async function getDuckDBExtension(root, platform, source, name, aliases) { +async function getDuckDBExtension( + root: string, + platform: string, + source: string, + name: string, + aliases?: Map +) { let ext = await resolveDuckDBExtension(root, platform, source, name); if (aliases?.has(ext)) ext = aliases.get(ext)!; return dirname(dirname(dirname(ext))); @@ -34,7 +40,7 @@ export async function getDuckDBManifest( await Promise.all( duckdb.bundles.map(async (platform) => [ platform, - await getDuckDBExtension(root, platform, source, name, aliases) + install ? await getDuckDBExtension(root, platform, source, name, aliases) : source ]) ) ) From b37be072b85713cea12578d9ed51da393a30df6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Wed, 30 Oct 2024 09:27:51 +0100 Subject: [PATCH 29/39] test duckdb config normalization & defaults --- test/config-test.ts | 107 +++++++++++++++++++++++++++++++++----------- 1 file changed, 81 insertions(+), 26 deletions(-) diff --git a/test/config-test.ts b/test/config-test.ts index cb1a9def8..4da430710 100644 --- a/test/config-test.ts +++ b/test/config-test.ts @@ -4,6 +4,22 @@ import MarkdownIt from "markdown-it"; import {normalizeConfig as config, mergeToc, readConfig, setCurrentDate} from "../src/config.js"; import {LoaderResolver} from "../src/loader.js"; +const DUCKDB_DEFAULTS = { + bundles: ["eh", "mvp"], + extensions: { + json: { + install: true, + load: false, + source: "https://extensions.duckdb.org" + }, + parquet: { + install: true, + load: false, + source: "https://extensions.duckdb.org" + } + } +}; + describe("readConfig(undefined, root)", () => { before(() => setCurrentDate(new Date("2024-01-10T16:00:00"))); it("imports the config file at the specified root", async () => { @@ -44,19 +60,7 @@ describe("readConfig(undefined, root)", () => { 'Built with Observable on Jan 10, 2024.', search: null, watchPath: resolve("test/input/build/config/observablehq.config.js"), - duckdb: { - bundles: ["eh", "mvp"], - extensions: { - json: { - load: false, - source: "https://extensions.duckdb.org" - }, - parquet: { - load: false, - source: "https://extensions.duckdb.org" - } - } - } + duckdb: DUCKDB_DEFAULTS }); }); it("returns the default config if no config file is found", async () => { @@ -85,19 +89,7 @@ describe("readConfig(undefined, root)", () => { 'Built with Observable on Jan 10, 2024.', search: null, watchPath: undefined, - duckdb: { - bundles: ["eh", "mvp"], - extensions: { - json: { - load: false, - source: "https://extensions.duckdb.org" - }, - parquet: { - load: false, - source: "https://extensions.duckdb.org" - } - } - } + duckdb: DUCKDB_DEFAULTS }); }); }); @@ -321,3 +313,66 @@ describe("mergeToc(spec, toc)", () => { assert.deepStrictEqual(mergeToc({}, toc), {label: "Contents", show: true}); }); }); + +describe("normalizeConfig(duckdb)", () => { + const root = ""; + it("uses the defaults", () => { + const {duckdb} = config({}, root); + assert.deepEqual(duckdb, DUCKDB_DEFAULTS); + }); + it("supports install:false and load:false", () => { + const {duckdb} = config({duckdb: {extensions: {json: {install: false, load: false}}}}, root); + assert.deepEqual(duckdb.extensions, { + json: { + install: false, + load: false, + source: "https://extensions.duckdb.org" + } + }); + }); + it("supports core, community and https:// sources", () => { + const {duckdb} = config( + { + duckdb: { + extensions: {foo: {source: "core"}, bar: {source: "community"}, baz: {source: "https://custom-domain"}} + } + }, + root + ); + assert.deepEqual(duckdb.extensions, { + foo: { + install: true, + load: true, + source: "https://extensions.duckdb.org" + }, + bar: { + install: true, + load: true, + source: "https://community-extensions.duckdb.org" + }, + baz: { + install: true, + load: true, + source: "https://custom-domain" + } + }); + }); + it("supports shorthand", () => { + const {duckdb} = config({duckdb: {extensions: {foo: true, bar: false}}}, root); + assert.deepEqual(duckdb.extensions, { + foo: { + install: true, + load: true, + source: "https://extensions.duckdb.org" + }, + bar: { + install: false, + load: false, + source: "https://extensions.duckdb.org" + } + }); + }); + it("rejects illegal names", () => { + assert.throws(() => config({duckdb: {extensions: {"*^/": true}}}, root)); + }); +}); From 9abaf57b515765115c4683e4cbb556a20e92eccd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Wed, 30 Oct 2024 17:35:13 +0100 Subject: [PATCH 30/39] documentation --- docs/config.md | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/config.md b/docs/config.md index 53fd5e8ca..6c6a2b0ac 100644 --- a/docs/config.md +++ b/docs/config.md @@ -299,17 +299,23 @@ export default { ## duckdb -The **duckdb** option specifies the list of DuckDB [extensions](./sql#extensions) that you want to self-host and make available in the `sql` and `DuckDBClient` instances. +The **duckdb** option allows you to specify the DuckDB [extensions](./sql#extensions) that you want to self-host and make available in the `sql` and `DuckDBClient` instances. -Its **install** key is an array of the names of extensions to self-host; it defaults to `["json", "parquet"]`. The optional **load** key is an array of names of extensions to load immediately. It defaults to the empty array (since "json" and "parquet" are autoloaded, there is no reason to load them before we actually need them). Lastly, the **source** key is an object of key:value pairs representing the repo used to download each extension. The source repo for any name defaults to `core`, which points to `https://extensions.duckdb.org/`. You can use `core`, `community` (which points to `https://community-extensions.duckdb.org/`), or a custom URL: +Its **extensions** property is an object where keys are extension names, and values describe the **source** for the extension, and whether to **install** (self-host) it, and **load** it immediately. + +The **source** property is the reference of the repo from which to download the extension. It defaults to `core`, which points to `https://extensions.duckdb.org/`. You can use `core`, `community` (which points to `https://community-extensions.duckdb.org/`), or a custom URL, for example if you develop your own extensions. + +By default "json" and "parquet" are installed, but not loaded (since they are autoloaded, there is no reason to load them before we actually need them). If you don’t want to self-host an extension, set its **install** property to false. You will still be able to load it from its source by calling `INSTALL` and `LOAD`. + +As a shorthand, you can specify `name: true` to install and load the named extension from the "core" repository. (And `name: false` is shorthand for `{install: false, load: false}`.) + +For example, a typical configuration for a geospatial data app might install and load "spatial" from "core" and "h3" from "community": ```js run=false duckdb: { - install: ["json", "spatial", "h3", "custom"], - load: ["spatial"], - source: { - h3: "community", - custom: "https://my-custom-repo.tld" + extensions: { + spatial: true, + h3: {source: "community"} } } ``` From ccc00732764858c699dbdf265f701fe89ba92e59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Wed, 30 Oct 2024 17:43:54 +0100 Subject: [PATCH 31/39] typography --- docs/config.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/config.md b/docs/config.md index 6c6a2b0ac..a0ffbfcd4 100644 --- a/docs/config.md +++ b/docs/config.md @@ -309,7 +309,7 @@ By default "json" and "parquet" are installed, but not loaded (since they are au As a shorthand, you can specify `name: true` to install and load the named extension from the "core" repository. (And `name: false` is shorthand for `{install: false, load: false}`.) -For example, a typical configuration for a geospatial data app might install and load "spatial" from "core" and "h3" from "community": +For example, a typical configuration for a geospatial data app might install and load “spatial” from `core` and “h3” from `community`: ```js run=false duckdb: { From 26c7a6ff15b2ff14c018ddd68e33cc003f6d7633 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Thu, 31 Oct 2024 07:54:28 +0100 Subject: [PATCH 32/39] doc --- docs/lib/duckdb.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/lib/duckdb.md b/docs/lib/duckdb.md index b6e063ce6..e5fb4e1df 100644 --- a/docs/lib/duckdb.md +++ b/docs/lib/duckdb.md @@ -110,8 +110,16 @@ SELECT * FROM quakes ORDER BY updated DESC; DuckDB’s [extensions](../sql#extensions) are supported. -By default, `DuckDBClient.of` and `DuckDBClient.sql` load the (self-hosted) extensions referenced in the [configuration](../config#duckdb). You can pass an options object with a **load** key set to a different list if you want a different environment: +By default, `DuckDBClient.of` and `DuckDBClient.sql` load the extensions referenced in the [configuration](../config#duckdb). If you want a different environment, you can pass options listing the extensions you want to load. + +For example, pass an empty array to instantiate a DuckDBClient with no loaded extensions (even if your configuration lists several extensions): + +```js echo run=false +const simpledb = DuckDBClient.of({}, {load: []}); +``` + +Or, create a geospatial tagged template literal: ```js echo run=false -const geodb = await DuckDBClient.of({}, {load: ["spatial", "h3"]}); +const geosql = DuckDBClient.sql({}, {load: ["spatial", "h3"]}); ``` From 7704416cf65daae854784093b7d9e93d051f9a1b Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 1 Nov 2024 11:33:03 -0700 Subject: [PATCH 33/39] use view for <50MB --- src/client/stdlib/duckdb.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/client/stdlib/duckdb.js b/src/client/stdlib/duckdb.js index 1f78abd9c..508fccd63 100644 --- a/src/client/stdlib/duckdb.js +++ b/src/client/stdlib/duckdb.js @@ -282,7 +282,7 @@ async function insertFile(database, name, file, options) { }); } if (/\.parquet$/i.test(file.name)) { - const table = file.size < 10e6 ? "TABLE" : "VIEW"; // for small files, materialize the table + const table = file.size < 50e6 ? "TABLE" : "VIEW"; // for small files, materialize the table return await connection.query(`CREATE ${table} '${name}' AS SELECT * FROM parquet_scan('${file.name}')`); } if (/\.(db|ddb|duckdb)$/i.test(file.name)) { From 1dde616d2f26c6499d36d0e9e441424d946fc34f Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 1 Nov 2024 15:14:40 -0700 Subject: [PATCH 34/39] docs, shorthand, etc. --- docs/config.md | 42 ++++++---- docs/lib/duckdb.md | 91 ++++++++++++++++++++-- docs/sql.md | 46 +---------- package.json | 4 +- src/client/stdlib/duckdb.js | 10 +-- src/config.ts | 78 +++++++++++-------- src/duckdb.ts | 97 ++++++++++++++--------- src/npm.ts | 6 +- test/config-test.ts | 150 ++++++++++++++++++++++++++++++------ test/resolvers-test.ts | 8 +- 10 files changed, 361 insertions(+), 171 deletions(-) diff --git a/docs/config.md b/docs/config.md index 4070d02d3..cc2538be5 100644 --- a/docs/config.md +++ b/docs/config.md @@ -303,27 +303,43 @@ export default { ## duckdb -The **duckdb** option allows you to specify the DuckDB [extensions](./sql#extensions) that you want to self-host and make available in the `sql` and `DuckDBClient` instances. +The **duckdb** option configures [self-hosting](./lib/duckdb#self-hosting-of-extensions) and loading of [DuckDB extensions](./lib/duckdb#extensions) for use in [SQL code blocks](./sql) and the `sql` and `DuckDBClient` built-ins. For example, a geospatial data app might enable the [`spatial`](https://duckdb.org/docs/extensions/spatial/overview.html) and [`h3`](https://duckdb.org/community_extensions/extensions/h3.html) extensions like so: -Its **extensions** property is an object where keys are extension names, and values describe the **source** for the extension, and whether to **install** (self-host) it, and **load** it immediately. - -The **source** property is the reference of the repo from which to download the extension. It defaults to `core`, which points to `https://extensions.duckdb.org/`. You can use `core`, `community` (which points to `https://community-extensions.duckdb.org/`), or a custom URL, for example if you develop your own extensions. - -By default "json" and "parquet" are installed, but not loaded (since they are autoloaded, there is no reason to load them before we actually need them). If you don’t want to self-host an extension, set its **install** property to false. You will still be able to load it from its source by calling `INSTALL` and `LOAD`. +```js run=false +export default { + duckdb: { + extensions: ["spatial", "h3"] + } +}; +``` -As a shorthand, you can specify `name: true` to install and load the named extension from the "core" repository. (And `name: false` is shorthand for `{install: false, load: false}`.) +The **extensions** option can either be an array of extension names, or an object whose keys are extension names and whose values are configuration options for the given extension, including its **source** repository (defaulting to the keyword _core_ for core extensions, and otherwise _community_; can also be a custom repository URL), whether to **load** it immediately (defaulting to true, except for known extensions that support autoloading), and whether to **install** it (_i.e._ to self-host, defaulting to true). As additional shorthand, you can specify `[name]: true` to install and load the named extension from the default (_core_ or _community_) source repository, or `[name]: string` to install and load the named extension from the given source repository. -For example, a typical configuration for a geospatial data app might install and load “spatial” from `core` and “h3” from `community`: +The configuration above is equivalent to: ```js run=false -duckdb: { - extensions: { - spatial: true, - h3: {source: "community"} +export default { + duckdb: { + extensions: { + spatial: { + source: "https://extensions.duckdb.org/", + install: true, + load: true + }, + h3: { + source: "https://community-extensions.duckdb.org/", + install: true, + load: true + } + } } -} +}; ``` +The `json` and `parquet` are configured (and therefore self-hosted) by default. To expressly disable self-hosting of extension, you can set its **install** property to false, or equivalently pass null as the extension configuration object. + +For more, see [DuckDB extensions](./lib/duckdb#extensions). + ## markdownIt A hook for registering additional [markdown-it](https://github.com/markdown-it/markdown-it) plugins. For example, to use [markdown-it-footnote](https://github.com/markdown-it/markdown-it-footnote), first install the plugin with either `npm add markdown-it-footnote` or `yarn add markdown-it-footnote`, then register it like so: diff --git a/docs/lib/duckdb.md b/docs/lib/duckdb.md index e5fb4e1df..46b0ae140 100644 --- a/docs/lib/duckdb.md +++ b/docs/lib/duckdb.md @@ -65,7 +65,7 @@ const db2 = await DuckDBClient.of({base: FileAttachment("quakes.db")}); db2.queryRow(`SELECT COUNT() FROM base.events`) ``` -For externally-hosted data, you can create an empty `DuckDBClient` and load a table from a SQL query, say using [`read_parquet`](https://duckdb.org/docs/guides/import/parquet_import) or [`read_csv`](https://duckdb.org/docs/guides/import/csv_import). DuckDB offers many affordances to make this easier (in many cases it detects the file format and uses the correct loader automatically). +For externally-hosted data, you can create an empty `DuckDBClient` and load a table from a SQL query, say using [`read_parquet`](https://duckdb.org/docs/guides/import/parquet_import) or [`read_csv`](https://duckdb.org/docs/guides/import/csv_import). DuckDB offers many affordances to make this easier. (In many cases it detects the file format and uses the correct loader automatically.) ```js run=false const db = await DuckDBClient.of(); @@ -106,20 +106,95 @@ const sql = DuckDBClient.sql({quakes: `https://earthquake.usgs.gov/earthquakes/f SELECT * FROM quakes ORDER BY updated DESC; ``` -## Extensions +## Extensions -DuckDB’s [extensions](../sql#extensions) are supported. +[DuckDB extensions](https://duckdb.org/docs/extensions/overview.html) extend DuckDB’s functionality, adding support for additional file formats, new types, and domain-specific functions. For example, the [`json` extension](https://duckdb.org/docs/data/json/overview.html) provides a `read_json` method for reading JSON files: -By default, `DuckDBClient.of` and `DuckDBClient.sql` load the extensions referenced in the [configuration](../config#duckdb). If you want a different environment, you can pass options listing the extensions you want to load. +```sql echo +SELECT bbox FROM read_json('https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_day.geojson'); +``` + +To read a local file (or data loader), use `FileAttachment` and interpolation `${…}`: + +```sql echo +SELECT bbox FROM read_json(${FileAttachment("../quakes.json").href}); +``` + +For convenience, Framework configures the `json` and `parquet` extensions by default. Some other [core extensions](https://duckdb.org/docs/extensions/core_extensions.html) also autoload, meaning that you don’t need to explicitly enable them; however, Framework will only [self-host extensions](#self-hosting-of-extensions) if you explicitly configure them, and therefore we recommend that you always use the [**duckdb** config option](../config#duckdb) to configure DuckDB extensions. Any configured extensions will be automatically [installed and loaded](https://duckdb.org/docs/extensions/overview#explicit-install-and-load), making them available in SQL code blocks as well as the `sql` and `DuckDBClient` built-ins. + +For example, to configure the [`spatial` extension](https://duckdb.org/docs/extensions/spatial/overview.html): + +```js run=false +export default { + duckdb: { + extensions: ["spatial"] + } +}; +``` + +You can then use the `ST_Area` function to compute the area of a polygon: + +```sql echo run=false +SELECT ST_Area('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::GEOMETRY) as area; +``` + +To tell which extensions have been loaded, you can run the following query: + +```sql echo +FROM duckdb_extensions() WHERE loaded; +``` + +
+ +If the `duckdb_extensions()` function runs before DuckDB autoloads a core extension (such as `json`), it might not be included in the returned set. -For example, pass an empty array to instantiate a DuckDBClient with no loaded extensions (even if your configuration lists several extensions): +
+ +### Self-hosting of extensions + +As with [npm imports](../imports#self-hosting-of-npm-imports), configured DuckDB extensions are self-hosted, improving performance, stability, & security, and allowing you to develop offline. Extensions are downloaded to the DuckDB cache folder, which lives in .observablehq/cache/_duckdb within the source root (typically `src`). You can clear the cache and restart the preview server to re-fetch the latest versions of any DuckDB extensions. If you use an [autoloading core extension](https://duckdb.org/docs/extensions/core_extensions.html#list-of-core-extensions) that is not configured, DuckDB-Wasm [will load it](https://duckdb.org/docs/api/wasm/extensions.html#fetching-duckdb-wasm-extensions) from the default extension repository, `extensions.duckdb.org`, at runtime. + +## Configuring + +The second argument to `DuckDBClient.of` and `DuckDBClient.sql` is a [`DuckDBConfig`](https://shell.duckdb.org/docs/interfaces/index.DuckDBConfig.html) object which configures the behavior of DuckDB-Wasm. By default, Framework sets the `castBigIntToDouble` and `castTimestampToDate` query options to true. To instead use [`BigInt`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/BigInt): + +```js run=false +const bigdb = DuckDBClient.of({}, {query: {castBigIntToDouble: false}}); +``` + +By default, `DuckDBClient.of` and `DuckDBClient.sql` automatically load all [configured extensions](#extensions). To change the loaded extensions for a particular `DuckDBClient`, use the **extensions** config option. For example, pass an empty array to instantiate a DuckDBClient with no loaded extensions (even if your configuration lists several): ```js echo run=false -const simpledb = DuckDBClient.of({}, {load: []}); +const simpledb = DuckDBClient.of({}, {extensions: []}); ``` -Or, create a geospatial tagged template literal: +Alternatively, you can configure extensions to be self-hosted but not load by default using the **duckdb** config option and the `load: false` shorthand: + +```js run=false +export default { + duckdb: { + extensions: { + spatial: false, + h3: false + } + } +}; +``` + +You can then selectively load extensions as needed like so: ```js echo run=false -const geosql = DuckDBClient.sql({}, {load: ["spatial", "h3"]}); +const geosql = DuckDBClient.sql({}, {extensions: ["spatial", "h3"]}); ``` + +In the future, we’d like to allow DuckDB to be configured globally (beyond just [extensions](#extensions)) via the [**duckdb** config option](../config#duckdb); please upvote [#1791](https://github.com/observablehq/framework/issues/1791) if you are interested in this feature. + +## Versioning + +Framework currently uses [DuckDB-Wasm 1.29.0](https://github.com/duckdb/duckdb-wasm/releases/tag/v1.29.0), which aligns with [DuckDB 1.1.1](https://github.com/duckdb/duckdb/releases/tag/v1.1.1). You can load a different version of DuckDB-Wasm by importing `npm:@duckdb/duckdb-wasm` directly, for example: + +```js run=false +import * as duckdb from "npm:@duckdb/duckdb-wasm@1.28.0"; +``` + +However, you will not be able to change the version of DuckDB-Wasm used by SQL code blocks or the `sql` or `DuckDBClient` built-ins, nor can you use Framework’s support for self-hosting extensions with a different version of DuckDB-Wasm. diff --git a/docs/sql.md b/docs/sql.md index fbdb82eb2..4748989ca 100644 --- a/docs/sql.md +++ b/docs/sql.md @@ -29,7 +29,7 @@ sql:
For performance and reliability, we recommend using local files rather than loading data from external servers at runtime. You can use a data loader to take a snapshot of a remote data during build if needed.
-You can also register tables via code (say to have sources that are defined dynamically via user input) by defining the `sql` symbol with [DuckDBClient.sql](./lib/duckdb). +You can also register tables via code (say to have sources that are defined dynamically via user input) by defining the `sql` symbol with [DuckDBClient.sql](./lib/duckdb). To register [DuckDB extensions](./lib/duckdb#extensions), use the [**duckdb** config option](./config#duckdb). ## SQL code blocks @@ -206,47 +206,3 @@ Inputs.table(await sql([`SELECT * FROM gaia WHERE source_id IN (${[source_ids]}) When interpolating values into SQL queries, be careful to avoid [SQL injection](https://en.wikipedia.org/wiki/SQL_injection) by properly escaping or sanitizing user input. The example above is safe only because `source_ids` are known to be numeric.
- -## Extensions - -DuckDB has a flexible extension mechanism that allows for dynamically loading extensions. These may extend DuckDB's functionality by providing support for additional file formats, introducing new types, and domain-specific functionality. - -Framework can download and host the extensions of your choice. By default, only "json" and "parquet" are self-hosted, but you can add more by specifying them in the [configuration](./config). The self-hosted extensions are served from the `/_duckdb/` directory with a content-hashed URL, ensuring optimal performance and allowing you to work offline and from a server you control. - -The self-hosted extensions are immediately available in all the `sql` code blocks and [DuckDBClient](./lib/duckdb) instances. For example, the query below works instantly since the "json" extension is configured: - -```sql echo -SELECT bbox FROM read_json('https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_day.geojson'); -``` - -Likewise, with the “spatial” extension configured, you could directly run: - -```sql echo run=false -SELECT ST_Area('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::GEOMETRY) as area; -``` - -If you use an extension that is not self-hosted, DuckDB falls back to loading it directly from DuckDB’s servers. For example, this documentation does not have the “inet” extension configured for self-hosting. - -```sql echo -SELECT '127.0.0.1'::INET AS ipv4, '2001:db8:3c4d::/48'::INET AS ipv6; -``` - -During development, you can experiment freely with extensions that are not self-hosted. For example to try out the “h3” `community` extension: - -```sql echo run=false -INSTALL h3 FROM community; -LOAD h3; -SELECT format('{:x}', h3_latlng_to_cell(37.77, -122.43, 9)) AS cell_id; -``` - -(this returns the H3 cell [`892830828a3ffff`](https://h3geo.org/#hex=892830828a3ffff)) - -For performance and ergonomy, we strongly recommend adding all the extensions you actually use to the [configuration](./config#duckdb). - -
- -To tell which extensions are effectively in use on a page, inspect the network tab in your browser, or run the following query: `FROM duckdb_extensions() WHERE loaded;`. - -
- -These features are tied to DuckDB wasm’s 1.29 version, and strongly dependent on its development cycle. diff --git a/package.json b/package.json index 2ef44b4a3..16584430e 100644 --- a/package.json +++ b/package.json @@ -26,8 +26,8 @@ "test": "concurrently npm:test:mocha npm:test:tsc npm:test:lint npm:test:prettier", "test:coverage": "c8 --check-coverage --lines 80 --per-file yarn test:mocha", "test:build": "rimraf test/build && cross-env npm_package_version=1.0.0-test node build.js --sourcemap --outdir=test/build \"{src,test}/**/*.{ts,js,css}\" --ignore \"test/input/**\" --ignore \"test/output/**\" --ignore \"test/preview/dashboard/**\" --ignore \"**/*.d.ts\" && cp -r templates test/build", - "test:mocha": "yarn test:build && rimraf --glob test/.observablehq/cache test/input/build/*/.observablehq/cache && cross-env OBSERVABLE_TELEMETRY_DISABLE=1 TZ=America/Los_Angeles mocha --timeout 30000 -p \"test/build/test/**/*-test.js\" && yarn test:annotate", - "test:mocha:serial": "yarn test:build && rimraf --glob test/.observablehq/cache test/input/build/*/.observablehq/cache && cross-env OBSERVABLE_TELEMETRY_DISABLE=1 TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/*-test.js\" && yarn test:annotate", + "test:mocha": "yarn test:build && rimraf --glob test/.observablehq/cache test/input/build/*/.observablehq/cache && cross-env OBSERVABLE_TELEMETRY_DISABLE=1 TZ=America/Los_Angeles mocha --timeout 30000 -p \"test/build/test/**/*-test.js\"", + "test:mocha:serial": "yarn test:build && rimraf --glob test/.observablehq/cache test/input/build/*/.observablehq/cache && cross-env OBSERVABLE_TELEMETRY_DISABLE=1 TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/*-test.js\"", "test:annotate": "yarn test:build && cross-env OBSERVABLE_ANNOTATE_FILES=true TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/annotate.js\"", "test:lint": "eslint src test --max-warnings=0", "test:prettier": "prettier --check src test", diff --git a/src/client/stdlib/duckdb.js b/src/client/stdlib/duckdb.js index 508fccd63..52bdf9488 100644 --- a/src/client/stdlib/duckdb.js +++ b/src/client/stdlib/duckdb.js @@ -32,7 +32,6 @@ import * as duckdb from "npm:@duckdb/duckdb-wasm"; // Baked-in manifest. // eslint-disable-next-line no-undef const manifest = DUCKDB_MANIFEST; - const candidates = { ...(manifest.bundles.includes("mvp") && { mvp: { @@ -49,7 +48,6 @@ const candidates = { }; const bundle = await duckdb.selectBundle(candidates); const activePlatform = manifest.bundles.find((key) => bundle.mainModule === candidates[key].mainModule); - const logger = new duckdb.ConsoleLogger(duckdb.LogLevel.WARNING); let db; @@ -179,7 +177,7 @@ export class DuckDBClient { config = {...config, query: {...config.query, castBigIntToDouble: true}}; } await db.open(config); - await registerExtensions(db, config); + await registerExtensions(db, config.extensions); await Promise.all(Object.entries(sources).map(([name, source]) => insertSource(db, name, source))); return new DuckDBClient(db); } @@ -191,14 +189,14 @@ export class DuckDBClient { Object.defineProperty(DuckDBClient.prototype, "dialect", {value: "duckdb"}); -async function registerExtensions(db, {load}) { +async function registerExtensions(db, extensions = []) { const connection = await db.connect(); try { await Promise.all( - manifest.extensions.map(([name, {[activePlatform]: ref, load: l}]) => + manifest.extensions.map(([name, {[activePlatform]: ref, load}]) => connection .query(`INSTALL "${name}" FROM '${ref.startsWith("https://") ? ref : import.meta.resolve(`../..${ref}`)}'`) - .then(() => (load ? load.includes(name) : l) && connection.query(`LOAD "${name}"`)) + .then(() => load && extensions.includes(name) && connection.query(`LOAD "${name}"`)) ) ); } finally { diff --git a/src/config.ts b/src/config.ts index 376787da7..2357799ad 100644 --- a/src/config.ts +++ b/src/config.ts @@ -8,7 +8,7 @@ import {pathToFileURL} from "node:url"; import he from "he"; import type MarkdownIt from "markdown-it"; import wrapAnsi from "wrap-ansi"; -import {DUCKDBBUNDLES} from "./duckdb.js"; +import {DUCKDB_BUNDLES, DUCKDB_CORE_EXTENSIONS} from "./duckdb.js"; import {visitFiles} from "./files.js"; import {formatIsoDate, formatLocaleDate} from "./format.js"; import type {FrontMatter} from "./frontMatter.js"; @@ -79,7 +79,19 @@ export interface SearchConfigSpec { export interface DuckDBConfig { bundles: string[]; - extensions: {[name: string]: {install?: false; load: boolean; source: string}}; + extensions: {[name: string]: DuckDBExtensionConfig}; +} + +export interface DuckDBExtensionConfig { + source: string; + install: boolean; + load: boolean; +} + +interface DuckDBExtensionConfigSpec { + source: unknown; + install: unknown; + load: unknown; } export interface Config { @@ -510,34 +522,40 @@ export function stringOrNull(spec: unknown): string | null { return spec == null || spec === false ? null : String(spec); } -function duckDBExtensionSource(source?: string): string { - return source === undefined || source === "core" - ? "https://extensions.duckdb.org" - : source === "community" - ? "https://community-extensions.duckdb.org" - : (source = String(source)).startsWith("https://") - ? source - : (() => { - throw new Error(`unsupported DuckDB extension source ${source}`); - })(); -} - +// TODO convert array of names +// TODO configure bundles? function normalizeDuckDB(spec: unknown): DuckDBConfig { - const extensions: {[name: string]: any} = {}; - for (const [name, config] of Object.entries(spec?.["extensions"] ?? {json: {load: false}, parquet: {load: false}})) { - if (!/^\w+$/.test(name)) throw new Error(`illegal extension name ${name}`); - if (config != null) { - extensions[name] = - config === true - ? {load: true, install: true, source: duckDBExtensionSource()} - : config === false - ? {load: false, install: false, source: duckDBExtensionSource()} - : { - source: duckDBExtensionSource(config["source"]), - install: Boolean(config["install"] ?? true), - load: Boolean(config["load"] ?? true) - }; - } + const extensions: {[name: string]: DuckDBExtensionConfig} = {}; + let extspec: Record = spec?.["extensions"] ?? {}; + if (Array.isArray(extspec)) extspec = Object.fromEntries(extspec.map((name) => [name, {}])); + if (extspec.json === undefined) extspec = {...extspec, json: false}; + if (extspec.parquet === undefined) extspec = {...extspec, parquet: false}; + for (const name in extspec) { + if (!/^\w+$/.test(name)) throw new Error(`invalid extension: ${name}`); + const vspec = extspec[name]; + if (vspec == null) continue; + const { + source = DUCKDB_CORE_EXTENSIONS.some(([n]) => n === name) ? "core" : "community", + install = true, + load = !DUCKDB_CORE_EXTENSIONS.find(([n]) => n === name)?.[1] + } = typeof vspec === "boolean" + ? {load: vspec} + : typeof vspec === "string" + ? {source: vspec} + : (vspec as DuckDBExtensionConfigSpec); + extensions[name] = { + source: normalizeDuckDBSource(String(source)), + install: Boolean(install), + load: Boolean(load) + }; } - return {bundles: DUCKDBBUNDLES, extensions}; + return {bundles: DUCKDB_BUNDLES, extensions}; +} + +function normalizeDuckDBSource(source: string): string { + if (source === "core") return "https://extensions.duckdb.org/"; + if (source === "community") return "https://community-extensions.duckdb.org/"; + const url = new URL(source); + if (url.protocol !== "https:") throw new Error(`invalid source: ${source}`); + return String(url); } diff --git a/src/duckdb.ts b/src/duckdb.ts index 3fa4c4e94..e481ca9f0 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -6,21 +6,35 @@ import {faint} from "./tty.js"; const downloadRequests = new Map>(); -export const DUCKDBWASMVERSION = "1.29.0"; -export const DUCKDBVERSION = "1.1.1"; -export const DUCKDBBUNDLES = ["eh", "mvp"]; +export const DUCKDB_WASM_VERSION = "1.29.0"; +export const DUCKDB_VERSION = "1.1.1"; +export const DUCKDB_BUNDLES = ["eh", "mvp"]; -async function getDuckDBExtension( - root: string, - platform: string, - source: string, - name: string, - aliases?: Map -) { - let ext = await resolveDuckDBExtension(root, platform, source, name); - if (aliases?.has(ext)) ext = aliases.get(ext)!; - return dirname(dirname(dirname(ext))); -} +// https://duckdb.org/docs/extensions/core_extensions.html +export const DUCKDB_CORE_EXTENSIONS: [name: string, autoload: boolean][] = [ + ["arrow", false], + ["autocomplete", true], + ["aws", true], + ["azure", true], + ["delta", true], + ["excel", true], + ["fts", true], + ["httpfs", true], + ["iceberg", false], + ["icu", true], + ["inet", true], + ["jemalloc", false], + ["json", true], + ["mysql", false], + ["parquet", true], + ["postgres", true], + ["spatial", false], + ["sqlite", true], + ["substrait", false], + ["tpcds", true], + ["tpch", true], + ["vss", false] +]; export async function getDuckDBManifest( duckdb: DuckDBConfig, @@ -30,28 +44,38 @@ export async function getDuckDBManifest( bundles: duckdb.bundles, extensions: await Promise.all( Array.from(Object.entries(duckdb.extensions), ([name, {install, load, source}]) => - (async () => { - return [ - name, - { - install, - load, - ...Object.fromEntries( - await Promise.all( - duckdb.bundles.map(async (platform) => [ - platform, - install ? await getDuckDBExtension(root, platform, source, name, aliases) : source - ]) - ) + (async () => [ + name, + { + install, + load, + ...Object.fromEntries( + await Promise.all( + duckdb.bundles.map(async (platform) => [ + platform, + install ? await getDuckDBExtension(root, platform, source, name, aliases) : source + ]) ) - } - ]; - })() + ) + } + ])() ) ) }; } +async function getDuckDBExtension( + root: string, + platform: string, + source: string, + name: string, + aliases?: Map +) { + let ext = await resolveDuckDBExtension(root, platform, source, name); + if (aliases?.has(ext)) ext = aliases.get(ext)!; + return dirname(dirname(dirname(ext))); +} + /** * Given a duckdb configuration and an extension name such as "parquet", saves * the binary to _duckdb/{hash}/v1.1.1/wasm_{p}/parquet.duckdb_extension.wasm @@ -66,20 +90,19 @@ export async function resolveDuckDBExtension( repo: string, name: string ): Promise { - if (!repo.startsWith("https://")) throw new Error(`invalid repo: ${repo}`); const cache = join(root, ".observablehq", "cache"); const file = `${name}.duckdb_extension.wasm`; - const ref = `${repo}/v${DUCKDBVERSION}/wasm_${platform}/${file}`.slice("https://".length); - const path = join("_duckdb", ref); + const url = new URL(`v${DUCKDB_VERSION}/wasm_${platform}/${file}`, repo); + if (url.protocol !== "https:") throw new Error(`invalid repo: ${repo}`); + const path = join("_duckdb", String(url).slice("https://".length)); const cachePath = join(cache, path); if (existsSync(cachePath)) return `/${path}`; let promise = downloadRequests.get(cachePath); if (promise) return promise; // coalesce concurrent requests promise = (async () => { - const href = `https://${ref}`; - console.log(`duckdb:${href} ${faint("→")} ${cachePath}`); - const response = await fetch(href); - if (!response.ok) throw new Error(`unable to fetch: ${href}`); + console.log(`duckdb:${url} ${faint("→")} ${cachePath}`); + const response = await fetch(url); + if (!response.ok) throw new Error(`unable to fetch: ${url}`); await mkdir(dirname(cachePath), {recursive: true}); await writeFile(cachePath, Buffer.from(await response.arrayBuffer())); return `/${path}`; diff --git a/src/npm.ts b/src/npm.ts index e614b57b2..984080df6 100644 --- a/src/npm.ts +++ b/src/npm.ts @@ -4,7 +4,7 @@ import {dirname, extname, join} from "node:path/posix"; import type {CallExpression} from "acorn"; import {simple} from "acorn-walk"; import {maxSatisfying, rsort, satisfies, validRange} from "semver"; -import {DUCKDBWASMVERSION} from "./duckdb.js"; +import {DUCKDB_WASM_VERSION} from "./duckdb.js"; import {isEnoent} from "./error.js"; import annotate from "./javascript/annotate.js"; import type {ExportNode, ImportNode, ImportReference} from "./javascript/imports.js"; @@ -164,7 +164,7 @@ export async function getDependencyResolver( (name === "arquero" || name === "@uwdata/mosaic-core" || name === "@duckdb/duckdb-wasm") && depName === "apache-arrow" // prettier-ignore ? "latest" // force Arquero, Mosaic & DuckDB-Wasm to use the (same) latest version of Arrow : name === "@uwdata/mosaic-core" && depName === "@duckdb/duckdb-wasm" - ? DUCKDBWASMVERSION // force Mosaic to use the latest (stable) version of DuckDB-Wasm + ? DUCKDB_WASM_VERSION // force Mosaic to use the latest (stable) version of DuckDB-Wasm : pkg.dependencies?.[depName] ?? pkg.devDependencies?.[depName] ?? pkg.peerDependencies?.[depName] ?? @@ -250,7 +250,7 @@ async function resolveNpmVersion(root: string, {name, range}: NpmSpecifier): Pro export async function resolveNpmImport(root: string, specifier: string): Promise { const { name, - range = name === "@duckdb/duckdb-wasm" ? DUCKDBWASMVERSION : undefined, + range = name === "@duckdb/duckdb-wasm" ? DUCKDB_WASM_VERSION : undefined, path = name === "mermaid" ? "dist/mermaid.esm.min.mjs/+esm" : name === "echarts" diff --git a/test/config-test.ts b/test/config-test.ts index 9138d678b..7be173cdc 100644 --- a/test/config-test.ts +++ b/test/config-test.ts @@ -8,14 +8,14 @@ const DUCKDB_DEFAULTS = { bundles: ["eh", "mvp"], extensions: { json: { + source: "https://extensions.duckdb.org/", install: true, - load: false, - source: "https://extensions.duckdb.org" + load: false }, parquet: { + source: "https://extensions.duckdb.org/", install: true, - load: false, - source: "https://extensions.duckdb.org" + load: false } } }; @@ -470,13 +470,54 @@ describe("normalizeConfig(duckdb)", () => { const {duckdb} = config({}, root); assert.deepEqual(duckdb, DUCKDB_DEFAULTS); }); - it("supports install:false and load:false", () => { + it("supports install: false and load: false", () => { const {duckdb} = config({duckdb: {extensions: {json: {install: false, load: false}}}}, root); assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, json: { + source: "https://extensions.duckdb.org/", install: false, - load: false, - source: "https://extensions.duckdb.org" + load: false + } + }); + }); + it("supports null", () => { + const {duckdb} = config({duckdb: {extensions: {json: null}}}, root); + assert.deepEqual( + duckdb.extensions, + Object.fromEntries(Object.entries(DUCKDB_DEFAULTS.extensions).filter(([name]) => name !== "json")) + ); + }); + it("defaults load: false for known auto-loading extensions", () => { + const {duckdb} = config({duckdb: {extensions: {aws: {}}}}, root); + assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, + aws: { + source: "https://extensions.duckdb.org/", + install: true, + load: false + } + }); + }); + it("defaults source: core for known core extensions", () => { + const {duckdb} = config({duckdb: {extensions: {mysql: {}}}}, root); + assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, + mysql: { + source: "https://extensions.duckdb.org/", + install: true, + load: true + } + }); + }); + it("defaults source: community for unknown extensions", () => { + const {duckdb} = config({duckdb: {extensions: {h3: {}}}}, root); + assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, + h3: { + source: "https://community-extensions.duckdb.org/", + install: true, + load: true } }); }); @@ -484,45 +525,108 @@ describe("normalizeConfig(duckdb)", () => { const {duckdb} = config( { duckdb: { - extensions: {foo: {source: "core"}, bar: {source: "community"}, baz: {source: "https://custom-domain"}} + extensions: { + foo: {source: "core"}, + bar: {source: "community"}, + baz: {source: "https://custom-domain"} + } } }, root ); assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, foo: { + source: "https://extensions.duckdb.org/", install: true, - load: true, - source: "https://extensions.duckdb.org" + load: true }, bar: { + source: "https://community-extensions.duckdb.org/", install: true, - load: true, - source: "https://community-extensions.duckdb.org" + load: true }, baz: { + source: "https://custom-domain/", // URL normalization install: true, - load: true, - source: "https://custom-domain" + load: true } }); }); - it("supports shorthand", () => { - const {duckdb} = config({duckdb: {extensions: {foo: true, bar: false}}}, root); + it("supports source: string shorthand", () => { + const {duckdb} = config( + { + duckdb: { + extensions: { + foo: "core", + bar: "community", + baz: "https://custom-domain" + } + } + }, + root + ); assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, foo: { + source: "https://extensions.duckdb.org/", install: true, - load: true, - source: "https://extensions.duckdb.org" + load: true }, bar: { - install: false, - load: false, - source: "https://extensions.duckdb.org" + source: "https://community-extensions.duckdb.org/", + install: true, + load: true + }, + baz: { + source: "https://custom-domain/", // URL normalization + install: true, + load: true + } + }); + }); + it("supports load: boolean shorthand", () => { + const {duckdb} = config({duckdb: {extensions: {json: true, foo: true, bar: false}}}, root); + assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, + json: { + source: "https://extensions.duckdb.org/", + install: true, + load: true + }, + foo: { + source: "https://community-extensions.duckdb.org/", + install: true, + load: true + }, + bar: { + source: "https://community-extensions.duckdb.org/", + install: true, + load: false + } + }); + }); + it("supports sources shorthand", () => { + const {duckdb} = config({duckdb: {extensions: ["spatial", "h3"]}}, root); + assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, + spatial: { + source: "https://extensions.duckdb.org/", + install: true, + load: true + }, + h3: { + source: "https://community-extensions.duckdb.org/", + install: true, + load: true } }); }); - it("rejects illegal names", () => { - assert.throws(() => config({duckdb: {extensions: {"*^/": true}}}, root)); + it("rejects invalid names", () => { + assert.throws(() => config({duckdb: {extensions: {"*^/": true}}}, root), /invalid extension/i); + }); + it("rejects invalid sources", () => { + assert.throws(() => config({duckdb: {extensions: {foo: "file:///path/to/extension"}}}, root), /invalid source/i); + assert.throws(() => config({duckdb: {extensions: {foo: "notasource"}}}, root), /invalid url/i); }); }); diff --git a/test/resolvers-test.ts b/test/resolvers-test.ts index ca886e503..090d34d21 100644 --- a/test/resolvers-test.ts +++ b/test/resolvers-test.ts @@ -88,8 +88,8 @@ describe("getResolvers(page, {root, path})", () => { }); }); -describe("resolveLink(href) with {cleanUrls: false}", () => { - const options = getOptions({root: "test/input", path: "sub/index.html", cleanUrls: false}); +describe("resolveLink(href) with {preserveExtension: true}", () => { + const options = getOptions({root: "test/input", path: "sub/index.html", preserveExtension: true}); const page = parseMarkdown("", options); async function getResolveLink() { const resolvers = await getResolvers(page, options); @@ -163,8 +163,8 @@ describe("resolveLink(href) with {cleanUrls: false}", () => { }); }); -describe("resolveLink(href) with {cleanUrls: true}", () => { - const options = getOptions({root: "test/input", path: "sub/index.html", cleanUrls: true}); +describe("resolveLink(href) with {preserveExtension: false}", () => { + const options = getOptions({root: "test/input", path: "sub/index.html", preserveExtension: false}); const page = parseMarkdown("", options); async function getResolveLink() { const resolvers = await getResolvers(page, options); From 049196624ec36d150f074d70d785a33a2cf82fd9 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 1 Nov 2024 15:42:37 -0700 Subject: [PATCH 35/39] annotate fixes --- package.json | 11 ++++++----- src/javascript/annotate.ts | 15 +++++---------- src/javascript/transpile.ts | 10 +++++----- src/node.ts | 4 ++-- src/npm.ts | 4 ++-- src/rollup.ts | 4 ++-- test/javascript/annotate.ts | 6 ++---- 7 files changed, 24 insertions(+), 30 deletions(-) diff --git a/package.json b/package.json index 16584430e..c4d0d96d4 100644 --- a/package.json +++ b/package.json @@ -24,11 +24,12 @@ "docs:deploy": "tsx --no-warnings=ExperimentalWarning ./src/bin/observable.ts deploy", "build": "rimraf dist && node build.js --outdir=dist --outbase=src \"src/**/*.{ts,js,css}\" --ignore \"**/*.d.ts\"", "test": "concurrently npm:test:mocha npm:test:tsc npm:test:lint npm:test:prettier", - "test:coverage": "c8 --check-coverage --lines 80 --per-file yarn test:mocha", - "test:build": "rimraf test/build && cross-env npm_package_version=1.0.0-test node build.js --sourcemap --outdir=test/build \"{src,test}/**/*.{ts,js,css}\" --ignore \"test/input/**\" --ignore \"test/output/**\" --ignore \"test/preview/dashboard/**\" --ignore \"**/*.d.ts\" && cp -r templates test/build", - "test:mocha": "yarn test:build && rimraf --glob test/.observablehq/cache test/input/build/*/.observablehq/cache && cross-env OBSERVABLE_TELEMETRY_DISABLE=1 TZ=America/Los_Angeles mocha --timeout 30000 -p \"test/build/test/**/*-test.js\"", - "test:mocha:serial": "yarn test:build && rimraf --glob test/.observablehq/cache test/input/build/*/.observablehq/cache && cross-env OBSERVABLE_TELEMETRY_DISABLE=1 TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/*-test.js\"", - "test:annotate": "yarn test:build && cross-env OBSERVABLE_ANNOTATE_FILES=true TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/annotate.js\"", + "test:coverage": "c8 --check-coverage --lines 80 --per-file yarn test:mocha:all", + "test:build": "rimraf test/build && rimraf --glob test/.observablehq/cache test/input/build/*/.observablehq/cache && cross-env npm_package_version=1.0.0-test node build.js --sourcemap --outdir=test/build \"{src,test}/**/*.{ts,js,css}\" --ignore \"test/input/**\" --ignore \"test/output/**\" --ignore \"test/preview/dashboard/**\" --ignore \"**/*.d.ts\" && cp -r templates test/build", + "test:mocha": "yarn test:mocha:serial -p", + "test:mocha:serial": "yarn test:build && cross-env OBSERVABLE_TELEMETRY_DISABLE=1 TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/*-test.js\"", + "test:mocha:annotate": "yarn test:build && cross-env OBSERVABLE_ANNOTATE_FILES=true TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/annotate.js\"", + "test:mocha:all": "yarn test:mocha && cross-env OBSERVABLE_ANNOTATE_FILES=true TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/annotate.js\"", "test:lint": "eslint src test --max-warnings=0", "test:prettier": "prettier --check src test", "test:tsc": "tsc --noEmit", diff --git a/src/javascript/annotate.ts b/src/javascript/annotate.ts index 92cfe4510..2d2fb031e 100644 --- a/src/javascript/annotate.ts +++ b/src/javascript/annotate.ts @@ -1,14 +1,9 @@ import {isPathImport} from "../path.js"; -/** - * Annotate a path to a local import or file so it can be reworked server-side. - */ - const annotate = process.env["OBSERVABLE_ANNOTATE_FILES"]; -if (typeof annotate === "string" && annotate !== "true") - throw new Error(`unsupported OBSERVABLE_ANNOTATE_FILES value: ${annotate}`); -export default annotate - ? function (uri: string): string { - return `${JSON.stringify(uri)}${isPathImport(uri) ? "/* observablehq-file */" : ""}`; - } +if (annotate && annotate !== "true") throw new Error(`unsupported OBSERVABLE_ANNOTATE_FILES: ${annotate}`); + +/** Annotate a path to a local import or file so it can be reworked server-side. */ +export const annotatePath = annotate + ? (uri: string) => `${JSON.stringify(uri)}${isPathImport(uri) ? "/* observablehq-file */" : ""}` : JSON.stringify; diff --git a/src/javascript/transpile.ts b/src/javascript/transpile.ts index a4596753a..d68ebbb44 100644 --- a/src/javascript/transpile.ts +++ b/src/javascript/transpile.ts @@ -7,7 +7,7 @@ import {isPathImport, relativePath, resolvePath, resolveRelativePath} from "../p import {getModuleResolver} from "../resolvers.js"; import type {Params} from "../route.js"; import {Sourcemap} from "../sourcemap.js"; -import annotate from "./annotate.js"; +import {annotatePath} from "./annotate.js"; import type {FileExpression} from "./files.js"; import {findFiles} from "./files.js"; import type {ExportNode, ImportNode} from "./imports.js"; @@ -102,7 +102,7 @@ export async function transpileModule( async function rewriteImportSource(source: StringLiteral) { const specifier = getStringLiteralValue(source); - output.replaceLeft(source.start, source.end, annotate(await resolveImport(specifier))); + output.replaceLeft(source.start, source.end, annotatePath(await resolveImport(specifier))); } for (const {name, node} of findFiles(body, path, input)) { @@ -116,7 +116,7 @@ export async function transpileModule( info ? `{"name":${JSON.stringify(p)},"mimeType":${JSON.stringify( mime.getType(name) ?? undefined - )},"path":${annotate(relativePath(servePath, resolveFile(name)))},"lastModified":${JSON.stringify( + )},"path":${annotatePath(relativePath(servePath, resolveFile(name)))},"lastModified":${JSON.stringify( info.mtimeMs )},"size":${JSON.stringify(info.size)}}` : JSON.stringify(p) @@ -136,7 +136,7 @@ export async function transpileModule( if (isImportMetaResolve(node) && isStringLiteral(source)) { const value = getStringLiteralValue(source); const resolution = isPathImport(value) && !isJavaScript(value) ? resolveFile(value) : await resolveImport(value); - output.replaceLeft(source.start, source.end, annotate(resolution)); + output.replaceLeft(source.start, source.end, annotatePath(resolution)); } } @@ -204,7 +204,7 @@ function rewriteImportDeclarations( for (const node of declarations) { output.delete(node.start, node.end + +(output.input[node.end] === "\n")); specifiers.push(rewriteImportSpecifiers(node)); - imports.push(`import(${annotate(resolve(getStringLiteralValue(node.source as StringLiteral)))})`); + imports.push(`import(${annotatePath(resolve(getStringLiteralValue(node.source as StringLiteral)))})`); } if (declarations.length > 1) { output.insertLeft(0, `const [${specifiers.join(", ")}] = await Promise.all([${imports.join(", ")}]);\n`); diff --git a/src/node.ts b/src/node.ts index 0d8ebc7a1..bb2c340c2 100644 --- a/src/node.ts +++ b/src/node.ts @@ -13,7 +13,7 @@ import type {AstNode, OutputChunk, Plugin, ResolveIdResult} from "rollup"; import {rollup} from "rollup"; import esbuild from "rollup-plugin-esbuild"; import {prepareOutput, toOsPath} from "./files.js"; -import annotate from "./javascript/annotate.js"; +import {annotatePath} from "./javascript/annotate.js"; import type {ImportReference} from "./javascript/imports.js"; import {isJavaScript, parseImports} from "./javascript/imports.js"; import {parseNpmSpecifier, rewriteNpmImports} from "./npm.js"; @@ -87,7 +87,7 @@ function isBadCommonJs(specifier: string): boolean { } function shimCommonJs(specifier: string, require: NodeRequire): string { - return `export {${Object.keys(require(specifier))}} from ${annotate(specifier)};\n`; + return `export {${Object.keys(require(specifier))}} from ${annotatePath(specifier)};\n`; } async function bundle( diff --git a/src/npm.ts b/src/npm.ts index 984080df6..553ac00de 100644 --- a/src/npm.ts +++ b/src/npm.ts @@ -6,7 +6,7 @@ import {simple} from "acorn-walk"; import {maxSatisfying, rsort, satisfies, validRange} from "semver"; import {DUCKDB_WASM_VERSION} from "./duckdb.js"; import {isEnoent} from "./error.js"; -import annotate from "./javascript/annotate.js"; +import {annotatePath} from "./javascript/annotate.js"; import type {ExportNode, ImportNode, ImportReference} from "./javascript/imports.js"; import {isImportMetaResolve, parseImports} from "./javascript/imports.js"; import {parseProgram} from "./javascript/parse.js"; @@ -66,7 +66,7 @@ export function rewriteNpmImports(input: string, resolve: (s: string) => string const value = getStringLiteralValue(source); const resolved = resolve(value); if (resolved === undefined || value === resolved) return; - output.replaceLeft(source.start, source.end, annotate(resolved)); + output.replaceLeft(source.start, source.end, annotatePath(resolved)); } // TODO Preserve the source map, but download it too. diff --git a/src/rollup.ts b/src/rollup.ts index f603c4a04..6a41c5d46 100644 --- a/src/rollup.ts +++ b/src/rollup.ts @@ -6,7 +6,7 @@ import type {AstNode, OutputChunk, Plugin, ResolveIdResult} from "rollup"; import {rollup} from "rollup"; import esbuild from "rollup-plugin-esbuild"; import {getClientPath, getStylePath} from "./files.js"; -import annotate from "./javascript/annotate.js"; +import {annotatePath} from "./javascript/annotate.js"; import type {StringLiteral} from "./javascript/source.js"; import {getStringLiteralValue, isStringLiteral} from "./javascript/source.js"; import {resolveNpmImport} from "./npm.js"; @@ -178,7 +178,7 @@ function importMetaResolve(path: string, resolveImport: ImportResolver): Plugin for (const source of resolves) { const specifier = getStringLiteralValue(source); const resolution = await resolveImport(specifier); - if (resolution) output.replaceLeft(source.start, source.end, annotate(relativePath(path, resolution))); + if (resolution) output.replaceLeft(source.start, source.end, annotatePath(relativePath(path, resolution))); } return {code: String(output)}; diff --git a/test/javascript/annotate.ts b/test/javascript/annotate.ts index 17edaa25b..8c527b37d 100644 --- a/test/javascript/annotate.ts +++ b/test/javascript/annotate.ts @@ -1,7 +1,5 @@ -/** - * This file is not suffixed with '-test'; it expects to run with an extra - * OBSERVABLE_ANNOTATE_FILES=true environment variable. - */ +// This file is not suffixed with '-test'; it expects to run with an extra +// OBSERVABLE_ANNOTATE_FILES=true environment variable. import assert from "node:assert"; import type {TranspileModuleOptions} from "../../src/javascript/transpile.js"; import {transpileModule} from "../../src/javascript/transpile.js"; From be2638575db4192e73601e844b41b71a1a659935 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 1 Nov 2024 15:43:11 -0700 Subject: [PATCH 36/39] disable telemetry on annotate tests, too --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index c4d0d96d4..01fc3871b 100644 --- a/package.json +++ b/package.json @@ -28,8 +28,8 @@ "test:build": "rimraf test/build && rimraf --glob test/.observablehq/cache test/input/build/*/.observablehq/cache && cross-env npm_package_version=1.0.0-test node build.js --sourcemap --outdir=test/build \"{src,test}/**/*.{ts,js,css}\" --ignore \"test/input/**\" --ignore \"test/output/**\" --ignore \"test/preview/dashboard/**\" --ignore \"**/*.d.ts\" && cp -r templates test/build", "test:mocha": "yarn test:mocha:serial -p", "test:mocha:serial": "yarn test:build && cross-env OBSERVABLE_TELEMETRY_DISABLE=1 TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/*-test.js\"", - "test:mocha:annotate": "yarn test:build && cross-env OBSERVABLE_ANNOTATE_FILES=true TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/annotate.js\"", - "test:mocha:all": "yarn test:mocha && cross-env OBSERVABLE_ANNOTATE_FILES=true TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/annotate.js\"", + "test:mocha:annotate": "yarn test:build && cross-env OBSERVABLE_TELEMETRY_DISABLE=1 OBSERVABLE_ANNOTATE_FILES=true TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/annotate.js\"", + "test:mocha:all": "yarn test:mocha && cross-env OBSERVABLE_TELEMETRY_DISABLE=1 OBSERVABLE_ANNOTATE_FILES=true TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/annotate.js\"", "test:lint": "eslint src test --max-warnings=0", "test:prettier": "prettier --check src test", "test:tsc": "tsc --noEmit", From a23d3e4868e95f8e315ccc708c56a09c69b39273 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 1 Nov 2024 16:33:38 -0700 Subject: [PATCH 37/39] tidier duckdb manifest --- docs/project-structure.md | 2 +- src/build.ts | 15 ++++++------- src/client/stdlib/duckdb.js | 12 +++++----- src/duckdb.ts | 22 +++++++++++++------ .../v1.1.1/wasm_eh/json.duckdb_extension.wasm | 0 .../wasm_mvp/json.duckdb_extension.wasm | 0 .../wasm_eh/parquet.duckdb_extension.wasm | 0 .../wasm_mvp/parquet.duckdb_extension.wasm | 0 8 files changed, 29 insertions(+), 22 deletions(-) rename test/output/build/duckdb/_duckdb/{e3b0c442-extensions.duckdb.org => json-e3b0c442}/v1.1.1/wasm_eh/json.duckdb_extension.wasm (100%) rename test/output/build/duckdb/_duckdb/{e3b0c442-extensions.duckdb.org => json-e3b0c442}/v1.1.1/wasm_mvp/json.duckdb_extension.wasm (100%) rename test/output/build/duckdb/_duckdb/{e3b0c442-extensions.duckdb.org => parquet-e3b0c442}/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm (100%) rename test/output/build/duckdb/_duckdb/{e3b0c442-extensions.duckdb.org => parquet-e3b0c442}/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm (100%) diff --git a/docs/project-structure.md b/docs/project-structure.md index 3d36774ce..ef625e8dc 100644 --- a/docs/project-structure.md +++ b/docs/project-structure.md @@ -99,7 +99,7 @@ For this site, routes map to files as: /hello → dist/hello.html → src/hello.md ``` -This assumes [“clean URLs”](./config#clean-urls) as supported by most static site servers; `/hello` can also be accessed as `/hello.html`, and `/` can be accessed as `/index` and `/index.html`. (Some static site servers automatically redirect to clean URLs, but we recommend being consistent when linking to your site.) +This assumes [“clean URLs”](./config#preserve-extension) as supported by most static site servers; `/hello` can also be accessed as `/hello.html`, and `/` can be accessed as `/index` and `/index.html`. (Some static site servers automatically redirect to clean URLs, but we recommend being consistent when linking to your site.) Apps should always have a top-level `index.md` in the source root; this is your app’s home page, and it’s what people visit by default. diff --git a/src/build.ts b/src/build.ts index e3dcd3766..0f06635a4 100644 --- a/src/build.ts +++ b/src/build.ts @@ -141,19 +141,20 @@ export async function build( effects.logger.log(cachePath); } - // Copy over the DuckDB extensions and create the DuckDB manifest. + // Copy over the DuckDB extensions, initializing aliases that are needed to + // construct the DuckDB manifest. for (const path of globalImports) { if (path.startsWith("/_duckdb/")) { const sourcePath = join(cacheRoot, path); effects.output.write(`${faint("build")} ${path} ${faint("→")} `); const contents = await readFile(sourcePath); const hash = createHash("sha256").update(contents).digest("hex").slice(0, 8); - const alias = applyHash(path, hash); + const [, , , version, bundle, name] = path.split("/"); + const alias = join("/_duckdb/", `${basename(name, ".duckdb_extension.wasm")}-${hash}`, version, bundle, name); aliases.set(path, alias); await effects.writeFile(alias, contents); } } - const duckDBManifest = await getDuckDBManifest(duckdb, {root, aliases}); // Generate the client bundles. These are initially generated into the cache // because we need to rewrite any npm and node imports to be hashed; this is @@ -164,9 +165,7 @@ export async function build( effects.output.write(`${faint("bundle")} ${path} ${faint("→")} `); const clientPath = getClientPath(path === "/_observablehq/client.js" ? "index.js" : path.slice("/_observablehq/".length)); // prettier-ignore const define: {[key: string]: string} = {}; - if (path === "/_observablehq/stdlib/duckdb.js") { - define["DUCKDB_MANIFEST"] = JSON.stringify(duckDBManifest); - } + if (path === "/_observablehq/stdlib/duckdb.js") define["DUCKDB_MANIFEST"] = JSON.stringify(await getDuckDBManifest(duckdb, {root, aliases})); // prettier-ignore const contents = await rollupClient(clientPath, root, path, {minify: true, keepNames: true, define}); await prepareOutput(cachePath); await writeFile(cachePath, contents); @@ -220,7 +219,8 @@ export async function build( // Copy over global assets (e.g., minisearch.json, DuckDB’s WebAssembly). // Anything in _observablehq also needs a content hash, but anything in _npm - // or _node does not (because they are already necessarily immutable). + // or _node does not (because they are already necessarily immutable). We’re + // skipping DuckDB’s extensions because they were previously copied above. for (const path of globalImports) { if (path.endsWith(".js") || path.startsWith("/_duckdb/")) continue; const sourcePath = join(cacheRoot, path); @@ -416,7 +416,6 @@ function validateLinks(outputs: Map): [valid: Li } function applyHash(path: string, hash: string): string { - if (path.startsWith("/_duckdb/")) return join("/_duckdb/", `${hash}-${path.slice("/_duckdb/".length)}`); const ext = extname(path); let name = basename(path, ext); if (path.endsWith(".js")) name = name.replace(/(^|\.)_esm$/, ""); // allow hash to replace _esm diff --git a/src/client/stdlib/duckdb.js b/src/client/stdlib/duckdb.js index 52bdf9488..79fcb13f4 100644 --- a/src/client/stdlib/duckdb.js +++ b/src/client/stdlib/duckdb.js @@ -189,18 +189,18 @@ export class DuckDBClient { Object.defineProperty(DuckDBClient.prototype, "dialect", {value: "duckdb"}); -async function registerExtensions(db, extensions = []) { - const connection = await db.connect(); +async function registerExtensions(db, extensions) { + const con = await db.connect(); try { await Promise.all( manifest.extensions.map(([name, {[activePlatform]: ref, load}]) => - connection - .query(`INSTALL "${name}" FROM '${ref.startsWith("https://") ? ref : import.meta.resolve(`../..${ref}`)}'`) - .then(() => load && extensions.includes(name) && connection.query(`LOAD "${name}"`)) + con + .query(`INSTALL "${name}" FROM '${import.meta.resolve(ref)}'`) + .then(() => (extensions === undefined ? load : extensions.includes(name)) && con.query(`LOAD "${name}"`)) ) ); } finally { - await connection.close(); + await con.close(); } } diff --git a/src/duckdb.ts b/src/duckdb.ts index e481ca9f0..f9340f397 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -64,6 +64,14 @@ export async function getDuckDBManifest( }; } +/** + * Returns the extension “custom repository” location as needed for DuckDB’s + * INSTALL command. This is the relative path to which DuckDB will implicitly add + * v{version}/wasm_{platform}/{name}.duckdb_extension.wasm, assuming that the + * manifest is baked into /_observablehq/stdlib/duckdb.js. + * + * https://duckdb.org/docs/extensions/working_with_extensions#creating-a-custom-repository + */ async function getDuckDBExtension( root: string, platform: string, @@ -73,16 +81,16 @@ async function getDuckDBExtension( ) { let ext = await resolveDuckDBExtension(root, platform, source, name); if (aliases?.has(ext)) ext = aliases.get(ext)!; - return dirname(dirname(dirname(ext))); + return join("..", "..", dirname(dirname(dirname(ext)))); } /** - * Given a duckdb configuration and an extension name such as "parquet", saves - * the binary to _duckdb/{hash}/v1.1.1/wasm_{p}/parquet.duckdb_extension.wasm - * for every supported platform p ("eh" and "mvp"), and returns a content-hashed - * reference (_duckdb/{hash}) to use in the corresponding DuckDB INSTALL - * statement. The repo is structured as required by DuckDB with: - * ${repo}/v{duckdbversion}/wasm_{platform}/${name}.duckdb_extension.wasm + * Saves the given DuckDB extension to the .observablehq/cache/_duckdb cache, + * as {repo}/v{version}/wasm_{platform}/{name}.duckdb_extension.wasm, + * returning the serving path to the saved file in the cache (starting with + * /_duckdb). + * + * https://duckdb.org/docs/extensions/overview#installation-location */ export async function resolveDuckDBExtension( root: string, diff --git a/test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/json-e3b0c442/v1.1.1/wasm_eh/json.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_eh/json.duckdb_extension.wasm rename to test/output/build/duckdb/_duckdb/json-e3b0c442/v1.1.1/wasm_eh/json.duckdb_extension.wasm diff --git a/test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/json-e3b0c442/v1.1.1/wasm_mvp/json.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_mvp/json.duckdb_extension.wasm rename to test/output/build/duckdb/_duckdb/json-e3b0c442/v1.1.1/wasm_mvp/json.duckdb_extension.wasm diff --git a/test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/parquet-e3b0c442/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm rename to test/output/build/duckdb/_duckdb/parquet-e3b0c442/v1.1.1/wasm_eh/parquet.duckdb_extension.wasm diff --git a/test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm b/test/output/build/duckdb/_duckdb/parquet-e3b0c442/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm similarity index 100% rename from test/output/build/duckdb/_duckdb/e3b0c442-extensions.duckdb.org/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm rename to test/output/build/duckdb/_duckdb/parquet-e3b0c442/v1.1.1/wasm_mvp/parquet.duckdb_extension.wasm From 6e828c93dd2a73613480016d9af581e2ed40799c Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 1 Nov 2024 16:40:30 -0700 Subject: [PATCH 38/39] remove todo --- src/config.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/config.ts b/src/config.ts index 2357799ad..48d8d0f79 100644 --- a/src/config.ts +++ b/src/config.ts @@ -522,7 +522,6 @@ export function stringOrNull(spec: unknown): string | null { return spec == null || spec === false ? null : String(spec); } -// TODO convert array of names // TODO configure bundles? function normalizeDuckDB(spec: unknown): DuckDBConfig { const extensions: {[name: string]: DuckDBExtensionConfig} = {}; From 365dbe3346f57ca6e76fc5b340645dcc35de3e8e Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 1 Nov 2024 17:22:56 -0700 Subject: [PATCH 39/39] more robust duckdb: scheme --- docs/lib/duckdb.md | 2 +- src/duckdb.ts | 40 ++++++++----------- src/libraries.ts | 3 +- src/resolvers.ts | 5 +-- ...inputs.00000006.js => inputs.00000006.css} | 0 ...inputs.00000007.css => inputs.00000007.js} | 0 test/output/build/duckdb/index.html | 6 +-- 7 files changed, 25 insertions(+), 31 deletions(-) rename test/output/build/duckdb/_observablehq/stdlib/{inputs.00000006.js => inputs.00000006.css} (100%) rename test/output/build/duckdb/_observablehq/stdlib/{inputs.00000007.css => inputs.00000007.js} (100%) diff --git a/docs/lib/duckdb.md b/docs/lib/duckdb.md index 46b0ae140..1cdfd72a0 100644 --- a/docs/lib/duckdb.md +++ b/docs/lib/duckdb.md @@ -152,7 +152,7 @@ If the `duckdb_extensions()` function runs before DuckDB autoloads a core extens ### Self-hosting of extensions -As with [npm imports](../imports#self-hosting-of-npm-imports), configured DuckDB extensions are self-hosted, improving performance, stability, & security, and allowing you to develop offline. Extensions are downloaded to the DuckDB cache folder, which lives in .observablehq/cache/_duckdb within the source root (typically `src`). You can clear the cache and restart the preview server to re-fetch the latest versions of any DuckDB extensions. If you use an [autoloading core extension](https://duckdb.org/docs/extensions/core_extensions.html#list-of-core-extensions) that is not configured, DuckDB-Wasm [will load it](https://duckdb.org/docs/api/wasm/extensions.html#fetching-duckdb-wasm-extensions) from the default extension repository, `extensions.duckdb.org`, at runtime. +As with [npm imports](../imports#self-hosting-of-npm-imports), configured DuckDB extensions are self-hosted, improving performance, stability, & security, and allowing you to develop offline. Extensions are downloaded to the DuckDB cache folder, which lives in .observablehq/cache/\_duckdb within the source root (typically `src`). You can clear the cache and restart the preview server to re-fetch the latest versions of any DuckDB extensions. If you use an [autoloading core extension](https://duckdb.org/docs/extensions/core_extensions.html#list-of-core-extensions) that is not configured, DuckDB-Wasm [will load it](https://duckdb.org/docs/api/wasm/extensions.html#fetching-duckdb-wasm-extensions) from the default extension repository, `extensions.duckdb.org`, at runtime. ## Configuring diff --git a/src/duckdb.ts b/src/duckdb.ts index f9340f397..be36f286e 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -53,7 +53,9 @@ export async function getDuckDBManifest( await Promise.all( duckdb.bundles.map(async (platform) => [ platform, - install ? await getDuckDBExtension(root, platform, source, name, aliases) : source + install + ? await getDuckDBExtension(root, resolveDuckDBExtension(source, platform, name), aliases) + : source ]) ) ) @@ -64,6 +66,10 @@ export async function getDuckDBManifest( }; } +export function resolveDuckDBExtension(repo: string, platform: string, name: string): URL { + return new URL(`v${DUCKDB_VERSION}/wasm_${platform}/${name}.duckdb_extension.wasm`, repo); +} + /** * Returns the extension “custom repository” location as needed for DuckDB’s * INSTALL command. This is the relative path to which DuckDB will implicitly add @@ -72,43 +78,31 @@ export async function getDuckDBManifest( * * https://duckdb.org/docs/extensions/working_with_extensions#creating-a-custom-repository */ -async function getDuckDBExtension( - root: string, - platform: string, - source: string, - name: string, - aliases?: Map -) { - let ext = await resolveDuckDBExtension(root, platform, source, name); +async function getDuckDBExtension(root: string, href: string | URL, aliases?: Map) { + let ext = await cacheDuckDBExtension(root, href); if (aliases?.has(ext)) ext = aliases.get(ext)!; return join("..", "..", dirname(dirname(dirname(ext)))); } /** * Saves the given DuckDB extension to the .observablehq/cache/_duckdb cache, - * as {repo}/v{version}/wasm_{platform}/{name}.duckdb_extension.wasm, - * returning the serving path to the saved file in the cache (starting with - * /_duckdb). + * as {origin}/{path}/{name}.duckdb_extension.wasm, returning the serving path + * to the saved file in the cache (starting with /_duckdb). * * https://duckdb.org/docs/extensions/overview#installation-location */ -export async function resolveDuckDBExtension( - root: string, - platform: string, - repo: string, - name: string -): Promise { +export async function cacheDuckDBExtension(root: string, href: string | URL): Promise { + const url = new URL(href); + if (url.protocol !== "https:") throw new Error(`unsupported protocol: ${url.protocol}`); + const key = String(url).slice("https://".length); + const path = join("_duckdb", key); const cache = join(root, ".observablehq", "cache"); - const file = `${name}.duckdb_extension.wasm`; - const url = new URL(`v${DUCKDB_VERSION}/wasm_${platform}/${file}`, repo); - if (url.protocol !== "https:") throw new Error(`invalid repo: ${repo}`); - const path = join("_duckdb", String(url).slice("https://".length)); const cachePath = join(cache, path); if (existsSync(cachePath)) return `/${path}`; let promise = downloadRequests.get(cachePath); if (promise) return promise; // coalesce concurrent requests promise = (async () => { - console.log(`duckdb:${url} ${faint("→")} ${cachePath}`); + console.log(`duckdb:${key} ${faint("→")} ${cachePath}`); const response = await fetch(url); if (!response.ok) throw new Error(`unable to fetch: ${url}`); await mkdir(dirname(cachePath), {recursive: true}); diff --git a/src/libraries.ts b/src/libraries.ts index a816997f5..b801c34f3 100644 --- a/src/libraries.ts +++ b/src/libraries.ts @@ -1,4 +1,5 @@ import type {DuckDBConfig} from "./config.js"; +import {resolveDuckDBExtension} from "./duckdb.js"; export function getImplicitFileImports(methods: Iterable): Set { const set = setof(methods); @@ -85,7 +86,7 @@ export function getImplicitDownloads(imports: Iterable, duckdb?: DuckDBC if (!duckdb) throw new Error("Implementation error: missing duckdb configuration"); for (const [name, {source}] of Object.entries(duckdb.extensions)) { for (const platform of duckdb.bundles) { - implicits.add(`duckdb:${platform},${name},${source}`); + implicits.add(`duckdb:${resolveDuckDBExtension(source, platform, name)}`); } } } diff --git a/src/resolvers.ts b/src/resolvers.ts index ab87f6e62..600dfb196 100644 --- a/src/resolvers.ts +++ b/src/resolvers.ts @@ -1,7 +1,7 @@ import {createHash} from "node:crypto"; import {extname, join} from "node:path/posix"; import type {DuckDBConfig} from "./config.js"; -import {resolveDuckDBExtension} from "./duckdb.js"; +import {cacheDuckDBExtension} from "./duckdb.js"; import {findAssets} from "./html.js"; import {defaultGlobals} from "./javascript/globals.js"; import {isJavaScript} from "./javascript/imports.js"; @@ -371,8 +371,7 @@ async function resolveResolvers( resolutions.set(specifier, path); await populateNpmCache(root, path); } else if (specifier.startsWith("duckdb:")) { - const [p, name, repo] = specifier.slice("duckdb:".length).split(","); - const path = await resolveDuckDBExtension(root, p, repo, name); + const path = await cacheDuckDBExtension(root, specifier.slice("duckdb:".length)); resolutions.set(specifier, path); } else if (!specifier.startsWith("observablehq:")) { throw new Error(`unhandled implicit download: ${specifier}`); diff --git a/test/output/build/duckdb/_observablehq/stdlib/inputs.00000006.js b/test/output/build/duckdb/_observablehq/stdlib/inputs.00000006.css similarity index 100% rename from test/output/build/duckdb/_observablehq/stdlib/inputs.00000006.js rename to test/output/build/duckdb/_observablehq/stdlib/inputs.00000006.css diff --git a/test/output/build/duckdb/_observablehq/stdlib/inputs.00000007.css b/test/output/build/duckdb/_observablehq/stdlib/inputs.00000007.js similarity index 100% rename from test/output/build/duckdb/_observablehq/stdlib/inputs.00000007.css rename to test/output/build/duckdb/_observablehq/stdlib/inputs.00000007.js diff --git a/test/output/build/duckdb/index.html b/test/output/build/duckdb/index.html index b0ceaf67b..9028d12f2 100644 --- a/test/output/build/duckdb/index.html +++ b/test/output/build/duckdb/index.html @@ -8,15 +8,15 @@ - + - + - +