From af3a5ce22803ee295f9c6dcacebe8389a60f4b36 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Mon, 4 Dec 2023 19:08:31 -0800 Subject: [PATCH] transitive preloads & subresource integrity --- src/javascript/imports.ts | 166 +++++++++++++++++++------ src/render.ts | 7 +- test/mocks/jsdelivr.ts | 12 +- test/output/build/imports/foo/foo.html | 2 +- 4 files changed, 145 insertions(+), 42 deletions(-) diff --git a/src/javascript/imports.ts b/src/javascript/imports.ts index 53f79d9e6..179582c57 100644 --- a/src/javascript/imports.ts +++ b/src/javascript/imports.ts @@ -2,16 +2,8 @@ import {createHash} from "node:crypto"; import {readFileSync} from "node:fs"; import {join} from "node:path"; import {Parser} from "acorn"; -import type { - CallExpression, - ExportAllDeclaration, - ExportNamedDeclaration, - Identifier, - ImportDeclaration, - ImportExpression, - Node, - Program -} from "acorn"; +import type {CallExpression, Identifier, Node, Program} from "acorn"; +import type {ExportAllDeclaration, ExportNamedDeclaration, ImportDeclaration, ImportExpression} from "acorn"; import {simple} from "acorn-walk"; import {isEnoent} from "../error.js"; import {type Feature, type ImportReference, type JavaScriptNode} from "../javascript.js"; @@ -23,6 +15,9 @@ import {findFetches, maybeAddFetch, rewriteIfLocalFetch} from "./fetches.js"; import {defaultGlobals} from "./globals.js"; import {findReferences} from "./references.js"; +type ImportNode = ImportDeclaration | ImportExpression; +type ExportNode = ExportAllDeclaration | ExportNamedDeclaration; + export interface ImportsAndFetches { imports: ImportReference[]; fetches: Feature[]; @@ -32,15 +27,15 @@ export interface ImportsAndFetches { * Finds all export declarations in the specified node. (This is used to * disallow exports within JavaScript code blocks.) */ -export function findExports(body: Node): (ExportAllDeclaration | ExportNamedDeclaration)[] { - const exports: (ExportAllDeclaration | ExportNamedDeclaration)[] = []; +export function findExports(body: Node): ExportNode[] { + const exports: ExportNode[] = []; simple(body, { ExportAllDeclaration: findExport, ExportNamedDeclaration: findExport }); - function findExport(node: ExportAllDeclaration | ExportNamedDeclaration) { + function findExport(node: ExportNode) { exports.push(node); } @@ -65,7 +60,7 @@ export function findImports(body: Node, root: string, path: string): ImportsAndF CallExpression: findFetch }); - function findImport(node) { + function findImport(node: ImportNode) { if (isStringLiteral(node.source)) { const value = getStringLiteralValue(node.source); if (isLocalImport(value, path)) { @@ -105,11 +100,12 @@ export function parseLocalImports(root: string, paths: string[]): ImportsAndFetc const imports: ImportReference[] = []; const fetches: Feature[] = []; const set = new Set(paths); + for (const path of set) { imports.push({type: "local", name: path}); try { const input = readFileSync(join(root, path), "utf-8"); - const program = Parser.parse(input, parseOptions) as Program; + const program = Parser.parse(input, parseOptions); simple( program, @@ -127,10 +123,8 @@ export function parseLocalImports(root: string, paths: string[]): ImportsAndFetc if (!isEnoent(error) && !(error instanceof SyntaxError)) throw error; } } - function findImport( - node: ImportDeclaration | ImportExpression | ExportAllDeclaration | ExportNamedDeclaration, - path: string - ) { + + function findImport(node: ImportNode | ExportNode, path: string) { if (isStringLiteral(node.source)) { const value = getStringLiteralValue(node.source); if (isLocalImport(value, path)) { @@ -141,15 +135,16 @@ export function parseLocalImports(root: string, paths: string[]): ImportsAndFetc } } } + return {imports, fetches}; } /** Rewrites import specifiers in the specified ES module source. */ export async function rewriteModule(input: string, sourcePath: string, resolver: ImportResolver): Promise { - const body = Parser.parse(input, parseOptions) as Program; + const body = Parser.parse(input, parseOptions); const references: Identifier[] = findReferences(body, defaultGlobals); const output = new Sourcemap(input); - const imports: (ImportDeclaration | ImportExpression | ExportAllDeclaration | ExportNamedDeclaration)[] = []; + const imports: (ImportNode | ExportNode)[] = []; simple(body, { ImportDeclaration: rewriteImport, @@ -161,7 +156,7 @@ export async function rewriteModule(input: string, sourcePath: string, resolver: } }); - function rewriteImport(node: ImportDeclaration | ImportExpression | ExportAllDeclaration | ExportNamedDeclaration) { + function rewriteImport(node: ImportNode | ExportNode) { imports.push(node); } @@ -267,10 +262,6 @@ export function createImportResolver(root: string, base: "." | "_import" = "."): }; } -// Like import, don’t fetch the same package more than once to ensure -// consistency; restart the server if you want to clear the cache. -const npmCache = new Map>(); - function parseNpmSpecifier(specifier: string): {name: string; range?: string; path?: string} { const parts = specifier.split("/"); const namerange = specifier.startsWith("@") ? [parts.shift()!, parts.shift()!].join("/") : parts.shift()!; @@ -286,29 +277,130 @@ function formatNpmSpecifier({name, range, path}: {name: string; range?: string; return `${name}${range ? `@${range}` : ""}${path ? `/${path}` : ""}`; } -async function resolveNpmVersion(specifier: string): Promise { - const {name, range} = parseNpmSpecifier(specifier); // ignore path - specifier = formatNpmSpecifier({name, range}); - let promise = npmCache.get(specifier); +// Like import, don’t fetch the same package more than once to ensure +// consistency; restart the server if you want to clear the cache. +const fetchCache = new Map>(); + +async function cachedFetch(href: string): Promise<{headers: Headers; body: any}> { + let promise = fetchCache.get(href); if (promise) return promise; promise = (async () => { - const search = range ? `?specifier=${range}` : ""; - const response = await fetch(`https://data.jsdelivr.com/v1/packages/npm/${name}/resolved${search}`); - if (!response.ok) throw new Error(`unable to resolve npm specifier: ${name}`); - const body = await response.json(); - return body.version; + const response = await fetch(href); + if (!response.ok) throw new Error(`unable to fetch: ${href}`); + const json = /^application\/json(;|$)/.test(response.headers.get("content-type")!); + const body = await (json ? response.json() : response.text()); + return {headers: response.headers, body}; })(); - promise.catch(() => npmCache.delete(specifier)); // try again on error - npmCache.set(specifier, promise); + promise.catch(() => fetchCache.delete(href)); // try again on error + fetchCache.set(href, promise); return promise; } +async function resolveNpmVersion(specifier: string): Promise { + const {name, range} = parseNpmSpecifier(specifier); // ignore path + specifier = formatNpmSpecifier({name, range}); + const search = range ? `?specifier=${range}` : ""; + return (await cachedFetch(`https://data.jsdelivr.com/v1/packages/npm/${name}/resolved${search}`)).body.version; +} + export async function resolveNpmImport(specifier: string): Promise { const {name, path = "+esm"} = parseNpmSpecifier(specifier); const version = await resolveNpmVersion(specifier); return `https://cdn.jsdelivr.net/npm/${name}@${version}/${path}`; } +const importsCache = new Map>>(); + +/** + * Fetches the module at the specified URL, parses it, and returns a promise to + * any transitive modules it imports (on the same host; only path-based imports + * are considered). Only static imports are considered; dynamic imports may not + * be used and hence are not preloaded. + */ +async function fetchModuleImports(href: string): Promise> { + let promise = importsCache.get(href); + if (promise) return promise; + promise = (async () => { + const {body} = await cachedFetch(href); + const imports = new Set(); + let program: Program; + try { + program = Parser.parse(body, parseOptions); + } catch (error) { + if (!isEnoent(error) && !(error instanceof SyntaxError)) throw error; + return imports; + } + simple(program, { + ImportDeclaration: findImport, + ExportAllDeclaration: findImport, + ExportNamedDeclaration: findImport + }); + function findImport(node: ImportNode | ExportNode) { + if (isStringLiteral(node.source)) { + const value = getStringLiteralValue(node.source); + if (["./", "../", "/"].some((prefix) => value.startsWith(prefix))) { + imports.add(String(new URL(value, href))); + } + } + } + return imports; + })(); + promise.catch(() => importsCache.delete(href)); // try again on error + importsCache.set(href, promise); + return promise; +} + +const integrityCache = new Map(); + +/** + * Given a set of resolved module specifiers (URLs) to preload, fetches any + * externally-hosted modules to compute the transitively-imported modules; also + * precomputes the subresource integrity hash for each fetched module. + */ +export async function resolveModulePreloads(hrefs: Set): Promise { + let resolve: () => void; + const visited = new Set(); + const queue = new Set>(); + + for (const href of hrefs) { + if (href.startsWith("https:")) { + enqueue(href); + } + } + + function enqueue(href: string) { + if (visited.has(href)) return; + visited.add(href); + const promise = (async () => { + integrityCache.set(href, await fetchModuleIntegrity(href)); + for (const i of await fetchModuleImports(href)) { + hrefs.add(i); + enqueue(i); + } + })(); + promise.finally(() => { + queue.delete(promise); + queue.size || resolve(); + }); + queue.add(promise); + } + + if (queue.size) return new Promise((y) => (resolve = y)); +} + +async function fetchModuleIntegrity(href: string): Promise { + const {body} = await cachedFetch(href); + return `sha384-${createHash("sha384").update(body).digest("base64")}`; +} + +/** + * Given a specifier (URL) that was previously resolved by + * resolveModulePreloads, returns the computed subresource integrity hash. + */ +export function resolveModuleIntegrity(href: string): string | undefined { + return integrityCache.get(href); +} + function resolveBuiltin(base: "." | "_import", path: string, specifier: string): string { return relativeUrl(join(base === "." ? "_import" : ".", path), join("_observablehq", specifier)); } diff --git a/src/render.ts b/src/render.ts index 1ab72ec35..38903b699 100644 --- a/src/render.ts +++ b/src/render.ts @@ -1,7 +1,8 @@ import {parseHTML} from "linkedom"; import {type Config, type Page, type Section, mergeToc} from "./config.js"; import {type Html, html} from "./html.js"; -import {type ImportResolver, createImportResolver} from "./javascript/imports.js"; +import type {ImportResolver} from "./javascript/imports.js"; +import {createImportResolver, resolveModuleIntegrity, resolveModulePreloads} from "./javascript/imports.js"; import type {FileReference, ImportReference, Transpile} from "./javascript.js"; import {addImplicitSpecifiers, addImplicitStylesheets} from "./libraries.js"; import {type ParseResult, parseMarkdown} from "./markdown.js"; @@ -182,6 +183,7 @@ async function renderLinks(parseResult: ParseResult, path: string, resolver: Imp const preloads = new Set(); for (const specifier of specifiers) preloads.add(await resolver(path, specifier)); if (parseResult.cells.some((cell) => cell.databases?.length)) preloads.add(relativeUrl(path, "/_observablehq/database.js")); // prettier-ignore + await resolveModulePreloads(preloads); return html`${ Array.from(stylesheets).sort().map(renderStylesheet) // }${ @@ -194,7 +196,8 @@ function renderStylesheet(href: string): Html { } function renderModulePreload(href: string): Html { - return html`\n`; + const integrity: string | undefined = resolveModuleIntegrity(href); + return html`\n`; } function renderFooter(path: string, options: Pick): Html { diff --git a/test/mocks/jsdelivr.ts b/test/mocks/jsdelivr.ts index 141db9d6f..df77b10ea 100644 --- a/test/mocks/jsdelivr.ts +++ b/test/mocks/jsdelivr.ts @@ -28,9 +28,17 @@ export function mockJsDelivr() { globalDispatcher = getGlobalDispatcher(); const agent = new MockAgent(); agent.disableNetConnect(); - const client = agent.get("https://data.jsdelivr.com"); + const dataClient = agent.get("https://data.jsdelivr.com"); for (const [name, version] of packages) { - client.intercept({path: `/v1/packages/npm/${name}/resolved`, method: "GET"}).reply(200, {version}); + dataClient + .intercept({path: `/v1/packages/npm/${name}/resolved`, method: "GET"}) + .reply(200, {version}, {headers: {"content-type": "application/json; charset=utf-8"}}); + } + const cdnClient = agent.get("https://cdn.jsdelivr.net"); + for (const [name, version] of packages) { + cdnClient + .intercept({path: `/npm/${name}@${version}/+esm`, method: "GET"}) + .reply(200, "", {headers: {"content-type": "text/javascript; charset=utf-8"}}); } setGlobalDispatcher(agent); }); diff --git a/test/output/build/imports/foo/foo.html b/test/output/build/imports/foo/foo.html index 22abcb363..7e8a89776 100644 --- a/test/output/build/imports/foo/foo.html +++ b/test/output/build/imports/foo/foo.html @@ -11,7 +11,7 @@ - +