From 6586877e83f61230c5435f77a3b18f8b8adc4fda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Fri, 15 Mar 2024 17:33:21 +0100 Subject: [PATCH] lastModified (#1051) * tests * document * fileAttachment knows about lastModified * build tests know about lastModified * lastModified * lastModified and content hash are now based on a data loader's cached payload * minimize diff * lowercase comment * Apply suggestions from code review Co-authored-by: Mike Bostock * fix LoaderResolver.getLastModified logic & add tests * okay this is a fake data loader, but let's keep it consistent * restore default mime-type but on render instead of in the client/stdlib * move the default mime-type back to the client; the test doesn't have it anymore * javadoc comment --------- Co-authored-by: Mike Bostock --- docs/javascript/files.md | 6 ++-- src/client/stdlib/fileAttachment.js | 13 ++++---- src/dataloader.ts | 31 ++++++++++++++--- src/render.ts | 18 ++++++---- src/resolvers.ts | 3 +- test/build-test.ts | 1 + test/dataloaders-test.ts | 33 ++++++++++++++++++- test/input/build/files/files.md | 4 +++ test/input/loader/cached.txt.sh | 1 + test/input/loader/not-cached.txt.sh | 1 + test/input/loader/simple.txt | 1 + test/output/build/archives.posix/tar.html | 14 ++++---- test/output/build/archives.posix/zip.html | 8 ++--- test/output/build/archives.win32/tar.html | 6 ++-- test/output/build/archives.win32/zip.html | 4 +-- test/output/build/fetches/foo.html | 4 +-- test/output/build/fetches/top.html | 8 ++--- test/output/build/files/files.html | 14 ++++++-- .../build/files/subsection/subfiles.html | 4 +-- test/output/build/imports/foo/foo.html | 2 +- test/output/build/multi/index.html | 4 +-- test/output/build/simple/simple.html | 2 +- 22 files changed, 129 insertions(+), 53 deletions(-) create mode 100644 test/input/loader/cached.txt.sh create mode 100644 test/input/loader/not-cached.txt.sh create mode 100644 test/input/loader/simple.txt diff --git a/docs/javascript/files.md b/docs/javascript/files.md index 1d14a19c4..86f09b2f0 100644 --- a/docs/javascript/files.md +++ b/docs/javascript/files.md @@ -10,7 +10,7 @@ Load files — whether static or generated dynamically by a [data loader](../loa import {FileAttachment} from "npm:@observablehq/stdlib"; ``` -The `FileAttachment` function takes a path and returns a file handle. This handle exposes the file’s name and [MIME type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types). +The `FileAttachment` function takes a path and returns a file handle. This handle exposes the file’s name, [MIME type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types), and last modification date as the number of milliseconds since UNIX epoch. ```js echo FileAttachment("volcano.json") @@ -32,7 +32,7 @@ volcano ### Static analysis -The `FileAttachment` function can _only_ be passed a static string literal; constructing a dynamic path such as `FileAttachment("my" + "file.csv")` is invalid syntax. Static analysis is used to invoke [data loaders](../loaders) at build time, and ensures that only referenced files are included in the generated output during build. In the future [#260](https://github.com/observablehq/framework/issues/260), it will also allow content hashes for cache breaking during deploy. +The `FileAttachment` function can _only_ be passed a static string literal; constructing a dynamic path such as `FileAttachment("my" + "file.csv")` is invalid syntax. Static analysis is used to invoke [data loaders](../loaders) at build time, and ensures that only referenced files are included in the generated output during build. This also allows a content hash in the file name for cache breaking during deploy. If you have multiple files, you can enumerate them explicitly like so: @@ -52,6 +52,8 @@ const frames = [ None of the files in `frames` above are loaded until a [content method](#supported-formats) is invoked, for example by saying `frames[0].image()`. +For missing files, `file.lastModified` is undefined. The `file.mimeType` is determined by checking the file extension against the [`mime-db` media type database](https://github.com/jshttp/mime-db); it defaults to `application/octet-stream`. + ## Supported formats `FileAttachment` supports a variety of methods for loading file contents: diff --git a/src/client/stdlib/fileAttachment.js b/src/client/stdlib/fileAttachment.js index 94156052e..7421bfb6a 100644 --- a/src/client/stdlib/fileAttachment.js +++ b/src/client/stdlib/fileAttachment.js @@ -11,8 +11,8 @@ export function FileAttachment(name, base = location.href) { const url = new URL(name, base).href; const file = files.get(url); if (!file) throw new Error(`File not found: ${name}`); - const {path, mimeType} = file; - return new FileAttachmentImpl(new URL(path, location).href, name.split("/").pop(), mimeType); + const {path, mimeType, lastModified} = file; + return new FileAttachmentImpl(new URL(path, location).href, name.split("/").pop(), mimeType, lastModified); } async function remote_fetch(file) { @@ -28,9 +28,10 @@ async function dsv(file, delimiter, {array = false, typed = false} = {}) { } export class AbstractFile { - constructor(name, mimeType = "application/octet-stream") { - Object.defineProperty(this, "name", {value: `${name}`, enumerable: true}); + constructor(name, mimeType = "application/octet-stream", lastModified) { Object.defineProperty(this, "mimeType", {value: `${mimeType}`, enumerable: true}); + Object.defineProperty(this, "name", {value: `${name}`, enumerable: true}); + if (lastModified !== undefined) Object.defineProperty(this, "lastModified", {value: Number(lastModified), enumerable: true}); // prettier-ignore } async blob() { return (await remote_fetch(this)).blob(); @@ -95,8 +96,8 @@ export class AbstractFile { } class FileAttachmentImpl extends AbstractFile { - constructor(url, name, mimeType) { - super(name, mimeType); + constructor(url, name, mimeType, lastModified) { + super(name, mimeType, lastModified); Object.defineProperty(this, "_url", {value: url}); } async url() { diff --git a/src/dataloader.ts b/src/dataloader.ts index 18e41b674..4ff211279 100644 --- a/src/dataloader.ts +++ b/src/dataloader.ts @@ -7,7 +7,7 @@ import JSZip from "jszip"; import {extract} from "tar-stream"; import {maybeStat, prepareOutput} from "./files.js"; import {FileWatchers} from "./fileWatchers.js"; -import {getFileHash} from "./javascript/module.js"; +import {getFileHash, getFileInfo} from "./javascript/module.js"; import type {Logger, Writer} from "./logger.js"; import {cyan, faint, green, red, yellow} from "./tty.js"; @@ -129,12 +129,35 @@ export class LoaderResolver { return FileWatchers.of(this, path, watchPaths, callback); } - getFileHash(path: string): string { + /** + * Get the actual path of a file. For data loaders, it is the output if + * already available (cached). In build this is always the case (unless the + * corresponding data loader fails). However in preview we return the page + * before running the data loaders (which will run on demand from the page), + * so there might be a temporary discrepancy when a cache is stale. + */ + private getFilePath(name: string): string { + let path = name; if (!existsSync(join(this.root, path))) { const loader = this.find(path); - if (loader) path = relative(this.root, loader.path); + if (loader) { + path = relative(this.root, loader.path); + if (name !== path) { + const cachePath = join(".observablehq", "cache", name); + if (existsSync(join(this.root, cachePath))) path = cachePath; + } + } } - return getFileHash(this.root, path); + return path; + } + + getFileHash(name: string): string { + return getFileHash(this.root, this.getFilePath(name)); + } + + getLastModified(name: string): number | undefined { + const entry = getFileInfo(this.root, this.getFilePath(name)); + return entry && Math.floor(entry.mtimeMs); } resolveFilePath(path: string): string { diff --git a/src/render.ts b/src/render.ts index c895f34fc..8672a8ef1 100644 --- a/src/render.ts +++ b/src/render.ts @@ -8,7 +8,7 @@ import {transpileJavaScript} from "./javascript/transpile.js"; import type {MarkdownPage} from "./markdown.js"; import type {PageLink} from "./pager.js"; import {findLink, normalizePath} from "./pager.js"; -import {relativePath} from "./path.js"; +import {relativePath, resolvePath} from "./path.js"; import type {Resolvers} from "./resolvers.js"; import {getResolvers} from "./resolvers.js"; import {rollupClient} from "./rollup.js"; @@ -25,7 +25,8 @@ type RenderInternalOptions = export async function renderPage(page: MarkdownPage, options: RenderOptions & RenderInternalOptions): Promise { const {data} = page; - const {root, md, base, path, pages, title, preview, search, resolvers = await getResolvers(page, options)} = options; + const {root, md, base, path, pages, title, preview, search} = options; + const {loaders, resolvers = await getResolvers(page, options)} = options; const {normalizeLink} = md; const sidebar = data?.sidebar !== undefined ? Boolean(data.sidebar) : options.sidebar; const toc = mergeToc(data?.toc, options.toc); @@ -63,7 +64,9 @@ import ${preview || page.code.length ? `{${preview ? "open, " : ""}define} from )};` : "" }${data?.sql ? `\nimport {registerTable} from ${JSON.stringify(resolveImport("npm:@observablehq/duckdb"))};` : ""}${ - files.size ? `\n${renderFiles(files, resolveFile)}` : "" + files.size + ? `\n${renderFiles(files, resolveFile, (name: string) => loaders.getLastModified(resolvePath(path, name)))}` + : "" }${ data?.sql ? `\n${Object.entries(data.sql) @@ -84,18 +87,19 @@ ${html.unsafe(rewriteHtml(page.html, resolvers))}${renderFooter(path, opt `); } -function renderFiles(files: Iterable, resolve: (name: string) => string): string { +function renderFiles(files: Iterable, resolve: (name: string) => string, getLastModified): string { return Array.from(files) .sort() - .map((f) => renderFile(f, resolve)) + .map((f) => renderFile(f, resolve, getLastModified)) .join(""); } -function renderFile(name: string, resolve: (name: string) => string): string { +function renderFile(name: string, resolve: (name: string) => string, getLastModified): string { return `\nregisterFile(${JSON.stringify(name)}, ${JSON.stringify({ name, mimeType: mime.getType(name) ?? undefined, - path: resolve(name) + path: resolve(name), + lastModified: getLastModified(name) })});`; } diff --git a/src/resolvers.ts b/src/resolvers.ts index bde853a5a..3e231ef63 100644 --- a/src/resolvers.ts +++ b/src/resolvers.ts @@ -114,8 +114,7 @@ export async function getResolvers( } } - // Compute the content hash. TODO In build, this needs to consider the output - // of data loaders, rather than the source of data loaders. + // Compute the content hash. for (const f of assets) hash.update(loaders.getFileHash(resolvePath(path, f))); for (const f of files) hash.update(loaders.getFileHash(resolvePath(path, f))); for (const i of localImports) hash.update(getModuleHash(root, resolvePath(path, i))); diff --git a/test/build-test.ts b/test/build-test.ts index 5271831da..e6f68c5f0 100644 --- a/test/build-test.ts +++ b/test/build-test.ts @@ -101,6 +101,7 @@ class TestEffects extends FileBuildEffects { async writeFile(outputPath: string, contents: string | Buffer): Promise { if (typeof contents === "string" && outputPath.endsWith(".html")) { contents = contents.replace(/^(\s* @@ -63,6 +70,7 @@
+