From f56e57896743f423f2aed0eb66f2b79737f7231f Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Tue, 5 Dec 2023 07:05:22 -0800 Subject: [PATCH] transitive module preloads & subresource integrity (#317) * transitive preloads & subresource integrity * only sri if immutable cache * simplify * preload stylesheets, client.js --- src/javascript/imports.ts | 166 ++++++++++++++---- src/render.ts | 16 +- test/mocks/jsdelivr.ts | 12 +- test/output/build/404/404.html | 3 + test/output/build/archives/tar.html | 3 + test/output/build/archives/zip.html | 3 + test/output/build/config/closed/page.html | 3 + test/output/build/config/index.html | 3 + test/output/build/config/one.html | 3 + test/output/build/config/sub/two.html | 3 + test/output/build/config/toc-override.html | 3 + test/output/build/config/toc.html | 3 + test/output/build/fetches/foo.html | 3 + test/output/build/fetches/top.html | 3 + test/output/build/files/files.html | 3 + .../build/files/subsection/subfiles.html | 3 + test/output/build/imports/foo/foo.html | 5 +- test/output/build/missing-file/index.html | 3 + test/output/build/missing-import/index.html | 3 + test/output/build/multi/index.html | 3 + test/output/build/multi/subsection/index.html | 3 + test/output/build/simple-public/index.html | 3 + test/output/build/simple/simple.html | 3 + test/output/build/subtitle/index.html | 3 + 24 files changed, 215 insertions(+), 44 deletions(-) diff --git a/src/javascript/imports.ts b/src/javascript/imports.ts index 53f79d9e6..c66fd78fe 100644 --- a/src/javascript/imports.ts +++ b/src/javascript/imports.ts @@ -2,16 +2,8 @@ import {createHash} from "node:crypto"; import {readFileSync} from "node:fs"; import {join} from "node:path"; import {Parser} from "acorn"; -import type { - CallExpression, - ExportAllDeclaration, - ExportNamedDeclaration, - Identifier, - ImportDeclaration, - ImportExpression, - Node, - Program -} from "acorn"; +import type {CallExpression, Identifier, Node, Program} from "acorn"; +import type {ExportAllDeclaration, ExportNamedDeclaration, ImportDeclaration, ImportExpression} from "acorn"; import {simple} from "acorn-walk"; import {isEnoent} from "../error.js"; import {type Feature, type ImportReference, type JavaScriptNode} from "../javascript.js"; @@ -23,6 +15,9 @@ import {findFetches, maybeAddFetch, rewriteIfLocalFetch} from "./fetches.js"; import {defaultGlobals} from "./globals.js"; import {findReferences} from "./references.js"; +type ImportNode = ImportDeclaration | ImportExpression; +type ExportNode = ExportAllDeclaration | ExportNamedDeclaration; + export interface ImportsAndFetches { imports: ImportReference[]; fetches: Feature[]; @@ -32,15 +27,15 @@ export interface ImportsAndFetches { * Finds all export declarations in the specified node. (This is used to * disallow exports within JavaScript code blocks.) */ -export function findExports(body: Node): (ExportAllDeclaration | ExportNamedDeclaration)[] { - const exports: (ExportAllDeclaration | ExportNamedDeclaration)[] = []; +export function findExports(body: Node): ExportNode[] { + const exports: ExportNode[] = []; simple(body, { ExportAllDeclaration: findExport, ExportNamedDeclaration: findExport }); - function findExport(node: ExportAllDeclaration | ExportNamedDeclaration) { + function findExport(node: ExportNode) { exports.push(node); } @@ -65,7 +60,7 @@ export function findImports(body: Node, root: string, path: string): ImportsAndF CallExpression: findFetch }); - function findImport(node) { + function findImport(node: ImportNode) { if (isStringLiteral(node.source)) { const value = getStringLiteralValue(node.source); if (isLocalImport(value, path)) { @@ -105,11 +100,12 @@ export function parseLocalImports(root: string, paths: string[]): ImportsAndFetc const imports: ImportReference[] = []; const fetches: Feature[] = []; const set = new Set(paths); + for (const path of set) { imports.push({type: "local", name: path}); try { const input = readFileSync(join(root, path), "utf-8"); - const program = Parser.parse(input, parseOptions) as Program; + const program = Parser.parse(input, parseOptions); simple( program, @@ -127,10 +123,8 @@ export function parseLocalImports(root: string, paths: string[]): ImportsAndFetc if (!isEnoent(error) && !(error instanceof SyntaxError)) throw error; } } - function findImport( - node: ImportDeclaration | ImportExpression | ExportAllDeclaration | ExportNamedDeclaration, - path: string - ) { + + function findImport(node: ImportNode | ExportNode, path: string) { if (isStringLiteral(node.source)) { const value = getStringLiteralValue(node.source); if (isLocalImport(value, path)) { @@ -141,15 +135,16 @@ export function parseLocalImports(root: string, paths: string[]): ImportsAndFetc } } } + return {imports, fetches}; } /** Rewrites import specifiers in the specified ES module source. */ export async function rewriteModule(input: string, sourcePath: string, resolver: ImportResolver): Promise { - const body = Parser.parse(input, parseOptions) as Program; + const body = Parser.parse(input, parseOptions); const references: Identifier[] = findReferences(body, defaultGlobals); const output = new Sourcemap(input); - const imports: (ImportDeclaration | ImportExpression | ExportAllDeclaration | ExportNamedDeclaration)[] = []; + const imports: (ImportNode | ExportNode)[] = []; simple(body, { ImportDeclaration: rewriteImport, @@ -161,7 +156,7 @@ export async function rewriteModule(input: string, sourcePath: string, resolver: } }); - function rewriteImport(node: ImportDeclaration | ImportExpression | ExportAllDeclaration | ExportNamedDeclaration) { + function rewriteImport(node: ImportNode | ExportNode) { imports.push(node); } @@ -267,10 +262,6 @@ export function createImportResolver(root: string, base: "." | "_import" = "."): }; } -// Like import, don’t fetch the same package more than once to ensure -// consistency; restart the server if you want to clear the cache. -const npmCache = new Map>(); - function parseNpmSpecifier(specifier: string): {name: string; range?: string; path?: string} { const parts = specifier.split("/"); const namerange = specifier.startsWith("@") ? [parts.shift()!, parts.shift()!].join("/") : parts.shift()!; @@ -286,29 +277,130 @@ function formatNpmSpecifier({name, range, path}: {name: string; range?: string; return `${name}${range ? `@${range}` : ""}${path ? `/${path}` : ""}`; } -async function resolveNpmVersion(specifier: string): Promise { - const {name, range} = parseNpmSpecifier(specifier); // ignore path - specifier = formatNpmSpecifier({name, range}); - let promise = npmCache.get(specifier); +// Like import, don’t fetch the same package more than once to ensure +// consistency; restart the server if you want to clear the cache. +const fetchCache = new Map>(); + +async function cachedFetch(href: string): Promise<{headers: Headers; body: any}> { + let promise = fetchCache.get(href); if (promise) return promise; promise = (async () => { - const search = range ? `?specifier=${range}` : ""; - const response = await fetch(`https://data.jsdelivr.com/v1/packages/npm/${name}/resolved${search}`); - if (!response.ok) throw new Error(`unable to resolve npm specifier: ${name}`); - const body = await response.json(); - return body.version; + const response = await fetch(href); + if (!response.ok) throw new Error(`unable to fetch: ${href}`); + const json = /^application\/json(;|$)/.test(response.headers.get("content-type")!); + const body = await (json ? response.json() : response.text()); + return {headers: response.headers, body}; })(); - promise.catch(() => npmCache.delete(specifier)); // try again on error - npmCache.set(specifier, promise); + promise.catch(() => fetchCache.delete(href)); // try again on error + fetchCache.set(href, promise); return promise; } +async function resolveNpmVersion(specifier: string): Promise { + const {name, range} = parseNpmSpecifier(specifier); // ignore path + specifier = formatNpmSpecifier({name, range}); + const search = range ? `?specifier=${range}` : ""; + return (await cachedFetch(`https://data.jsdelivr.com/v1/packages/npm/${name}/resolved${search}`)).body.version; +} + export async function resolveNpmImport(specifier: string): Promise { const {name, path = "+esm"} = parseNpmSpecifier(specifier); const version = await resolveNpmVersion(specifier); return `https://cdn.jsdelivr.net/npm/${name}@${version}/${path}`; } +const preloadCache = new Map | undefined>>(); + +/** + * Fetches the module at the specified URL and returns a promise to any + * transitive modules it imports (on the same host; only path-based imports are + * considered), as well as its subresource integrity hash. Only static imports + * are considered, and the fetched module must be have immutable public caching; + * dynamic imports may not be used and hence are not preloaded. + */ +async function fetchModulePreloads(href: string): Promise | undefined> { + let promise = preloadCache.get(href); + if (promise) return promise; + promise = (async () => { + const {headers, body} = await cachedFetch(href); + const cache = headers.get("cache-control")?.split(/\s*,\s*/); + if (!cache?.some((c) => c === "immutable") || !cache?.some((c) => c === "public")) return; + const imports = new Set(); + let program: Program; + try { + program = Parser.parse(body, parseOptions); + } catch (error) { + if (!isEnoent(error) && !(error instanceof SyntaxError)) throw error; + return; + } + simple(program, { + ImportDeclaration: findImport, + ExportAllDeclaration: findImport, + ExportNamedDeclaration: findImport + }); + function findImport(node: ImportNode | ExportNode) { + if (isStringLiteral(node.source)) { + const value = getStringLiteralValue(node.source); + if (["./", "../", "/"].some((prefix) => value.startsWith(prefix))) { + imports.add(String(new URL(value, href))); + } + } + } + integrityCache.set(href, `sha384-${createHash("sha384").update(body).digest("base64")}`); + return imports; + })(); + promise.catch(() => preloadCache.delete(href)); // try again on error + preloadCache.set(href, promise); + return promise; +} + +const integrityCache = new Map(); + +/** + * Given a set of resolved module specifiers (URLs) to preload, fetches any + * externally-hosted modules to compute the transitively-imported modules; also + * precomputes the subresource integrity hash for each fetched module. + */ +export async function resolveModulePreloads(hrefs: Set): Promise { + let resolve: () => void; + const visited = new Set(); + const queue = new Set>(); + + for (const href of hrefs) { + if (href.startsWith("https:")) { + enqueue(href); + } + } + + function enqueue(href: string) { + if (visited.has(href)) return; + visited.add(href); + const promise = (async () => { + const imports = await fetchModulePreloads(href); + if (!imports) return; + for (const i of imports) { + hrefs.add(i); + enqueue(i); + } + })(); + promise.finally(() => { + queue.delete(promise); + queue.size || resolve(); + }); + queue.add(promise); + } + + if (queue.size) return new Promise((y) => (resolve = y)); +} + +/** + * Given a specifier (URL) that was previously resolved by + * resolveModulePreloads, returns the computed subresource integrity hash. + */ +export function resolveModuleIntegrity(href: string): string | undefined { + return integrityCache.get(href); +} + function resolveBuiltin(base: "." | "_import", path: string, specifier: string): string { return relativeUrl(join(base === "." ? "_import" : ".", path), join("_observablehq", specifier)); } diff --git a/src/render.ts b/src/render.ts index 1ab72ec35..ce01936b2 100644 --- a/src/render.ts +++ b/src/render.ts @@ -1,7 +1,8 @@ import {parseHTML} from "linkedom"; import {type Config, type Page, type Section, mergeToc} from "./config.js"; import {type Html, html} from "./html.js"; -import {type ImportResolver, createImportResolver} from "./javascript/imports.js"; +import type {ImportResolver} from "./javascript/imports.js"; +import {createImportResolver, resolveModuleIntegrity, resolveModulePreloads} from "./javascript/imports.js"; import type {FileReference, ImportReference, Transpile} from "./javascript.js"; import {addImplicitSpecifiers, addImplicitStylesheets} from "./libraries.js"; import {type ParseResult, parseMarkdown} from "./markdown.js"; @@ -179,10 +180,12 @@ async function renderLinks(parseResult: ParseResult, path: string, resolver: Imp const inputs = new Set(parseResult.cells.flatMap((cell) => cell.inputs ?? [])); addImplicitSpecifiers(specifiers, inputs); await addImplicitStylesheets(stylesheets, specifiers); - const preloads = new Set(); + const preloads = new Set([relativeUrl(path, "/_observablehq/client.js")]); for (const specifier of specifiers) preloads.add(await resolver(path, specifier)); - if (parseResult.cells.some((cell) => cell.databases?.length)) preloads.add(relativeUrl(path, "/_observablehq/database.js")); // prettier-ignore + await resolveModulePreloads(preloads); return html`${ + Array.from(stylesheets).sort().map(renderStylesheetPreload) // + }${ Array.from(stylesheets).sort().map(renderStylesheet) // }${ Array.from(preloads).sort().map(renderModulePreload) // @@ -193,8 +196,13 @@ function renderStylesheet(href: string): Html { return html`\n`; } +function renderStylesheetPreload(href: string): Html { + return html`\n`; +} + function renderModulePreload(href: string): Html { - return html`\n`; + const integrity: string | undefined = resolveModuleIntegrity(href); + return html`\n`; } function renderFooter(path: string, options: Pick): Html { diff --git a/test/mocks/jsdelivr.ts b/test/mocks/jsdelivr.ts index 141db9d6f..81f93b862 100644 --- a/test/mocks/jsdelivr.ts +++ b/test/mocks/jsdelivr.ts @@ -28,9 +28,17 @@ export function mockJsDelivr() { globalDispatcher = getGlobalDispatcher(); const agent = new MockAgent(); agent.disableNetConnect(); - const client = agent.get("https://data.jsdelivr.com"); + const dataClient = agent.get("https://data.jsdelivr.com"); for (const [name, version] of packages) { - client.intercept({path: `/v1/packages/npm/${name}/resolved`, method: "GET"}).reply(200, {version}); + dataClient + .intercept({path: `/v1/packages/npm/${name}/resolved`, method: "GET"}) + .reply(200, {version}, {headers: {"content-type": "application/json; charset=utf-8"}}); + } + const cdnClient = agent.get("https://cdn.jsdelivr.net"); + for (const [name, version] of packages) { + cdnClient + .intercept({path: `/npm/${name}@${version}/+esm`, method: "GET"}) + .reply(200, "", {headers: {"cache-control": "public, immutable", "content-type": "text/javascript; charset=utf-8"}}); // prettier-ignore } setGlobalDispatcher(agent); }); diff --git a/test/output/build/404/404.html b/test/output/build/404/404.html index c83e18824..3412799ac 100644 --- a/test/output/build/404/404.html +++ b/test/output/build/404/404.html @@ -4,8 +4,11 @@ Page not found + + +