From 30a692f7f7a68f6b83e5bb886fd9f6ffad65b262 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Fri, 4 Oct 2024 20:43:52 +0000 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9C=A8=20render=20csv=20and=20zip=20file?= =?UTF-8?q?=20in=20CF=20worker=20for=20grapher=20charts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/launch.json | 2 +- functions/_common/grapherRenderer.ts | 365 +++++++++++++++++- functions/_common/readmeTools.ts | 301 +++++++++++++++ functions/grapher/[slug].ts | 49 ++- functions/package.json | 1 + functions/tsconfig.json | 3 +- .../src/IndicatorKeyData/IndicatorKeyData.tsx | 44 +-- .../core-table/src/CoreTable.ts | 11 +- .../core-table/src/OwidTable.ts | 4 +- .../grapher/src/core/LegacyToOwidTable.ts | 2 + .../types/src/domainTypes/CoreTableTypes.ts | 1 + packages/@ourworldindata/utils/src/index.ts | 1 + .../utils/src/metadataHelpers.ts | 43 +++ yarn.lock | 83 +++- 14 files changed, 846 insertions(+), 64 deletions(-) create mode 100644 functions/_common/readmeTools.ts diff --git a/.vscode/launch.json b/.vscode/launch.json index 4afec7a04eb..48b3df00747 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -140,4 +140,4 @@ "restart": true }, ] -} \ No newline at end of file +} diff --git a/functions/_common/grapherRenderer.ts b/functions/_common/grapherRenderer.ts index b7b91766eb2..1f5b52d27a9 100644 --- a/functions/_common/grapherRenderer.ts +++ b/functions/_common/grapherRenderer.ts @@ -4,10 +4,20 @@ import { excludeUndefined, GrapherInterface, R2GrapherConfigDirectory, + OwidColumnDef, + getCitationShort, + getAttributionFragmentsFromVariable, + getCitationLong, + getLastUpdatedFromVariable, + OwidTableSlugs, + getNextUpdateFromVariable, } from "@ourworldindata/utils" +import { OwidOrigin } from "@ourworldindata/types" +import { constructReadme } from "./readmeTools" import { svg2png, initialize as initializeSvg2Png } from "svg2png-wasm" import { TimeLogger } from "./timeLogger" import { png, StatusError } from "itty-router" +import JSZip from "jszip" import svg2png_wasm from "../../node_modules/svg2png-wasm/svg2png_wasm_bg.wasm" @@ -17,6 +27,7 @@ import LatoMedium from "../_common/fonts/LatoLatin-Medium.ttf.bin" import LatoBold from "../_common/fonts/LatoLatin-Bold.ttf.bin" import PlayfairSemiBold from "../_common/fonts/PlayfairDisplayLatin-SemiBold.ttf.bin" import { Env } from "./env.js" +import { fromPairs } from "lodash" declare global { // eslint-disable-next-line no-var @@ -275,16 +286,13 @@ export async function fetchGrapherConfig( etag: fetchResponse.headers.get("etag"), } } - -async function fetchAndRenderGrapherToSvg( - id: GrapherIdentifier, +async function initGrapher( + identifier: GrapherIdentifier, options: ImageOptions, searchParams: URLSearchParams, env: Env -): Promise { - const grapherLogger = new TimeLogger("grapher") - - const grapherConfigResponse = await fetchGrapherConfig(id, env) +): Promise { + const grapherConfigResponse = await fetchGrapherConfig(identifier, env) if (grapherConfigResponse.status === 404) { // we throw 404 errors instad of returning a 404 response so that the router @@ -304,10 +312,349 @@ async function fetchAndRenderGrapherToSvg( }) grapher.shouldIncludeDetailsInStaticExport = options.details - grapherLogger.log("grapherInit") + return grapher +} + +function assembleMetadata(grapher: Grapher, searchParams: URLSearchParams) { + const columnsToIgnore = new Set( + [ + OwidTableSlugs.entityId, + OwidTableSlugs.time, + OwidTableSlugs.entityColor, + OwidTableSlugs.entityName, + OwidTableSlugs.entityCode, + OwidTableSlugs.year, + OwidTableSlugs.day, + ].map((slug) => slug.toString()) + ) + + const columnsToGet = grapher.inputTable.columnSlugs.filter( + (col) => !columnsToIgnore.has(col) + ) + const useShortNames = searchParams.get("useColumnShortNames") === "true" + console.log("useShortNames", useShortNames) + + const columns: [ + string, + { + title: string + titleProducer: string + titleVariant: string + descriptionShort: string + descriptionFromProducer: string + descriptionKey: string[] + descriptionProcessing: string + shortUnit: string + unit: string + timespan: string + tolerance: number + type: string + conversionFactor: number + owidVariableId: number + catalogPath: string + sources: Partial< + Pick< + OwidOrigin, + | "attribution" + | "attributionShort" + | "description" + | "urlDownload" + | "urlMain" + > + >[] + shortName: string + }, + ][] = grapher.inputTable.getColumns(columnsToGet).map((col) => { + console.log("mapping col", col.name) + const { + descriptionShort, + descriptionKey, + descriptionProcessing, + additionalInfo, + shortUnit, + unit, + timespan, + tolerance, + type, + origins, + sourceLink, + sourceName, + owidVariableId, + shortName, + } = col.def as OwidColumnDef + const lastUpdated = getLastUpdatedFromVariable(col.def) + const nextUpdate = getNextUpdateFromVariable(col.def) + + let condensedOrigins: + | Partial< + Pick< + OwidOrigin, + | "attribution" + | "attributionShort" + | "description" + | "urlDownload" + | "urlMain" + > + >[] + | undefined = origins?.map((origin) => { + const { + attribution, + attributionShort, + description, + citationFull, + urlDownload, + urlMain, + dateAccessed, + } = origin + return { + attribution, + attributionShort, + description, + urlDownload, + urlMain, + dateAccessed, + citationFull, + } + }) + + if (!condensedOrigins || condensedOrigins.length === 0) { + condensedOrigins = [ + { + attribution: sourceName, + urlMain: sourceLink, + }, + ] + } + + const def = col.def as OwidColumnDef + + const citationShort = getCitationShort( + def.origins, + getAttributionFragmentsFromVariable(def), + def.owidProcessingLevel + ) + + const citationLong = getCitationLong( + col.titlePublicOrDisplayName, + def.origins ?? [], + col.source ?? {}, + getAttributionFragmentsFromVariable(def), + def.presentation?.attributionShort, + def.presentation?.titleVariant, + def.owidProcessingLevel, + undefined + ) + + const titleShort = col.titlePublicOrDisplayName.title + const attributionShort = col.titlePublicOrDisplayName.attributionShort + const titleVariant = col.titlePublicOrDisplayName.titleVariant + const attributionString = + attributionShort && titleVariant + ? `${attributionShort} – ${titleVariant}` + : attributionShort || titleVariant + const titleModifier = attributionString ? ` - ${attributionString}` : "" + const titleLong = `${col.titlePublicOrDisplayName.title}${titleModifier}` + + return [ + useShortNames ? shortName : col.name, + { + titleShort, + titleLong, + descriptionShort, + descriptionKey, + descriptionProcessing, + shortUnit, + unit, + timespan, + tolerance, + type, + conversionFactor: col.display?.conversionFactor, + owidVariableId, + shortName, + additionalInfo, + lastUpdated, + nextUpdate, + citationShort, + citationLong, + fullMetadata: `https://api.ourworldindata.org/v1/indicators/${owidVariableId}.metadata.json`, + }, + ] + }) + + const fullMetadata = { + chart: { + title: grapher.title, + subtitle: grapher.subtitle, + note: grapher.note, + xAxisLabel: grapher.xAxis.label, + yAxisLabel: grapher.yAxis.label, + citation: grapher.sourcesLine, + originalChartUrl: grapher.canonicalUrl, + selection: grapher.selectedEntityNames, + }, + columns: fromPairs(columns), + } + + return fullMetadata +} + +export async function fetchMetadataForGrapher( + identifier: GrapherIdentifier, + env: Env, + searchParams?: URLSearchParams +) { + console.log("Initializing grapher") + const grapher = await initGrapher( + identifier, + TWITTER_OPTIONS, + searchParams ?? new URLSearchParams(""), + env + ) + + await grapher.downloadLegacyDataFromOwidVariableIds() + + const fullMetadata = assembleMetadata( + grapher, + searchParams ?? new URLSearchParams("") + ) + + return Response.json(fullMetadata) +} + +export async function fetchZipForGrapher( + identifier: GrapherIdentifier, + env: Env, + searchParams?: URLSearchParams +) { + console.log("preparing to generate zip file") + const grapher = await initGrapher( + identifier, + TWITTER_OPTIONS, + searchParams ?? new URLSearchParams(""), + env + ) + await grapher.downloadLegacyDataFromOwidVariableIds() + ensureDownloadOfDataAllowed(grapher) + const metadata = assembleMetadata(grapher, searchParams) + const readme = assembleReadme(grapher) + const csv = assembleCsv(grapher, searchParams) + console.log("Fetched the parts, creating zip file") + const zip = new JSZip() + zip.file( + `${identifier.id}.metadata.json`, + JSON.stringify(metadata, undefined, 2) + ) + zip.file(`${identifier.id}.csv`, csv) + zip.file("readme.md", readme) + const content = await zip.generateAsync({ type: "arraybuffer" }) + console.log("Generated content, returning response") + return new Response(content, { + headers: { + "Content-Type": "application/zip", + }, + }) +} + +function assembleCsv(grapher: Grapher, searchParams: URLSearchParams): string { + const useShortNames = searchParams.get("useColumnShortNames") === "true" + const table = + searchParams.get("csvType") === "filtered" + ? grapher.transformedTable + : grapher.inputTable + return table.toPrettyCsv(useShortNames) +} + +export async function fetchCsvForGrapher( + identifier: GrapherIdentifier, + env: Env, + searchParams?: URLSearchParams +) { + const grapher = await initGrapher( + identifier, + TWITTER_OPTIONS, + searchParams ?? new URLSearchParams(""), + env + ) + await grapher.downloadLegacyDataFromOwidVariableIds() + console.log("checking if download is allowed") + ensureDownloadOfDataAllowed(grapher) + console.log("data download is allowed") + const csv = assembleCsv(grapher, searchParams ?? new URLSearchParams("")) + return new Response(csv, { + headers: { + "Content-Type": "text/csv", + }, + }) +} +function ensureDownloadOfDataAllowed(grapher: Grapher) { + if ( + grapher.inputTable.columnsAsArray.some( + (col) => (col.def as OwidColumnDef).nonRedistributable + ) + ) { + throw new StatusError( + 403, + "This chart contains non-redistributable data that we are not allowed to re-share and it therefore cannot be downloaded as a CSV." + ) + } +} + +export async function fetchReadmeForGrapher( + identifier: GrapherIdentifier, + env: Env, + searchParams?: URLSearchParams +) { + console.log("Initializing grapher") + const grapher = await initGrapher( + identifier, + TWITTER_OPTIONS, + searchParams ?? new URLSearchParams(""), + env + ) + + await grapher.downloadLegacyDataFromOwidVariableIds() + + const readme = assembleReadme(grapher) + return new Response(readme, { + headers: { + "Content-Type": "text/markdown", + }, + }) +} + +function assembleReadme(grapher: Grapher): string { + const columnsToIgnore = new Set( + [ + OwidTableSlugs.entityId, + OwidTableSlugs.time, + OwidTableSlugs.entityColor, + OwidTableSlugs.entityName, + OwidTableSlugs.entityCode, + OwidTableSlugs.year, + OwidTableSlugs.day, + ].map((slug) => slug.toString()) + ) + + const columnsToGet = grapher.inputTable.columnSlugs.filter( + (col) => !columnsToIgnore.has(col) + ) + + const columns = grapher.inputTable.getColumns(columnsToGet) + + return constructReadme(grapher, columns) +} +async function fetchAndRenderGrapherToSvg( + identifier: GrapherIdentifier, + options: ImageOptions, + searchParams: URLSearchParams, + env: Env +) { + const grapherLogger = new TimeLogger("grapher") + const grapher = await initGrapher(identifier, options, searchParams, env) + + grapherLogger.log("initGrapher") const promises = [] promises.push(grapher.downloadLegacyDataFromOwidVariableIds()) - if (options.details && grapher.detailsOrderedByReference.length) { promises.push( await fetch("https://ourworldindata.org/dods.json") diff --git a/functions/_common/readmeTools.ts b/functions/_common/readmeTools.ts new file mode 100644 index 00000000000..c15ff56d3d4 --- /dev/null +++ b/functions/_common/readmeTools.ts @@ -0,0 +1,301 @@ +import { + excludeUndefined, + formatSourceDate, + getAttributionFragmentsFromVariable, + getLastUpdatedFromVariable, + getNextUpdateFromVariable, + getPhraseForProcessingLevel, + OwidColumnDef, + getDateRange, + uniq, + getCitationShort, + getCitationLong, + prepareSourcesForDisplay, + uniqBy, +} from "@ourworldindata/utils" +import { CoreColumn } from "@ourworldindata/core-table" +import { Grapher } from "@ourworldindata/grapher" + +const markdownNewlineEnding = " " + +export function* getCitationLines( + def: OwidColumnDef, + col: CoreColumn +): Generator { + yield "" + yield "### How to cite this data" + yield "" + yield "#### In-line citation" + yield `If you have limited space (e.g. in data visualizations), you can use this abbreviated in-line citation:` + + markdownNewlineEnding + const attributionFragments = getAttributionFragmentsFromVariable({ + ...def, + source: { name: def.sourceName }, + }) + const citationShort = getCitationShort( + def.origins ?? [], + attributionFragments, + def.owidProcessingLevel + ) + + yield citationShort + + yield "" + + yield "#### Full citation" + const citationLong = getCitationLong( + col.titlePublicOrDisplayName, + def.origins ?? [], + col.source ?? {}, + attributionFragments, + def.presentation?.attributionShort, + def.presentation?.titleVariant, + def.owidProcessingLevel, + undefined + ) + yield citationLong +} + +export function* getDataProcessingLines( + def: OwidColumnDef +): Generator { + yield "" + yield "### How we process data at Our World In Data" + yield "" + yield `All data and visualizations on Our World in Data rely on data sourced from one or several original data providers. Preparing this original data involves several processing steps. Depending on the data, this can include standardizing country names and world region definitions, converting units, calculating derived indicators such as per capita measures, as well as adding or adapting metadata such as the name or the description given to an indicator.` + yield `At the link below you can find a detailed description of the structure of our data pipeline, including links to all the code used to prepare data across Our World in Data.` + yield `[Read about our data pipeline](https://docs.owid.io/projects/etl/)` + if (def.descriptionProcessing) { + yield "" + yield `#### Notes on our processing step for this indicator` + yield def.descriptionProcessing + } +} + +export function* getDescriptionLines( + def: OwidColumnDef, + attribution: string +): Generator { + const descriptionKey = def.descriptionKey + if (descriptionKey) { + yield "" + yield `### What you should know about this data` + for (const desc of descriptionKey) yield `* ${desc.trim()}` + } + + if (def.descriptionFromProducer) { + yield "" + yield `### How is this data described by its producer - ${attribution}?` + yield def.descriptionFromProducer.trim() + } + + if (def.additionalInfo) { + yield "" + yield `### Additional information about this data` + yield def.additionalInfo.trim() + } +} + +export function* getKeyDataLines( + def: OwidColumnDef, + col: CoreColumn +): Generator { + const lastUpdated = getLastUpdatedFromVariable(def) + if (lastUpdated) + yield `Last updated: ${formatSourceDate(lastUpdated, "MMMM D, YYYY")}` + + markdownNewlineEnding + + const nextUpdate = getNextUpdateFromVariable(def) + if (nextUpdate) + yield `Next update: ${formatSourceDate(nextUpdate, "MMMM YYYY")}` + + markdownNewlineEnding + + const dateRange = def.timespan ? getDateRange(def.timespan) : undefined + if (dateRange) yield `Date range: ${dateRange}` + markdownNewlineEnding + + const unit = def.unit + if (unit) yield `Unit: ${unit}` + markdownNewlineEnding + + const unitConversionFactor = + col.unitConversionFactor && col.unitConversionFactor !== 1 + ? col.unitConversionFactor + : undefined + if (unitConversionFactor) + yield `Unit conversion factor: ${unitConversionFactor}` + + markdownNewlineEnding +} + +export function yieldMultilineTextAsLines(line: string): string[] { + return line.split("\n").map((l) => l.trim()) +} + +export function* getSources( + def: OwidColumnDef +): Generator { + const sourcesForDisplay = uniqBy(prepareSourcesForDisplay(def), "label") + + if (sourcesForDisplay.length === 0) return + else if (sourcesForDisplay.length === 1) { + yield "" + yield "### Source" + } else { + yield "" + yield "### Sources" + } + + for (const source of sourcesForDisplay) { + yield "" + yield `#### ${source.label}` + if (source.dataPublishedBy) + yield `Data published by: ${source.dataPublishedBy.trim()}` + + markdownNewlineEnding + if (source.retrievedOn) + yield `Retrieved on: ${source.retrievedOn.trim()}` + + markdownNewlineEnding + if (source.retrievedFrom) + yield `Retrieved from: ${source.retrievedFrom.trim()}` + + markdownNewlineEnding + } +} + +export function getSource(attribution: string, def: OwidColumnDef): string { + const processingLevelPhrase = + attribution.toLowerCase() !== "our world in data" + ? getPhraseForProcessingLevel(def.owidProcessingLevel) + : undefined + const fullProcessingPhrase = processingLevelPhrase + ? ` – ${processingLevelPhrase} by Our World In Data` + : "" + const source = `${attribution}${fullProcessingPhrase}` + return source +} + +export function getAttribution(def: OwidColumnDef): string { + const producers = uniq( + excludeUndefined((def.origins ?? []).map((o) => o.producer)) + ) + + const attributionFragments = + getAttributionFragmentsFromVariable(def) ?? producers + const attribution = attributionFragments.join(", ") + + if (attribution === "") { + return def.sourceName + } else return attribution +} + +export function* getDescription( + def: OwidColumnDef +): Generator { + const description = def.descriptionShort || def.description + if (description) yield* yieldMultilineTextAsLines(description) +} + +export function getTitle(col: CoreColumn): string { + let title = col.titlePublicOrDisplayName.title + if ( + col.titlePublicOrDisplayName.attributionShort && + col.titlePublicOrDisplayName.titleVariant + ) + title = `${title} – ${col.titlePublicOrDisplayName.titleVariant} – ${col.titlePublicOrDisplayName.attributionShort}` + else if (col.titlePublicOrDisplayName.titleVariant) + title = `${title} – ${col.titlePublicOrDisplayName.titleVariant}` + else if (col.titlePublicOrDisplayName.attributionShort) + title = `${title} – ${col.titlePublicOrDisplayName.attributionShort}` + return title +} + +function* columnReadmeText(col: CoreColumn) { + const def = col.def as OwidColumnDef + + const title = getTitle(col) + yield "" + yield `## ${title}` + + yield* getDescription(def) + + yield* getKeyDataLines(def, col) + + yield "" + + const attribution = getAttribution(def) + + const source = getSource(attribution, def) + + yield* getCitationLines(def, col) + + yield `Source: ${source}` + + yield* getDescriptionLines(def, attribution) + + yield* getSources(def) + + yield* getDataProcessingLines(def) + yield "" +} + +export function constructReadme( + grapher: Grapher, + columns: CoreColumn[] +): string { + const isSingleColumn = columns.length === 1 + const sources = columns.flatMap((col) => [...columnReadmeText(col)]) + let readme: string + if (isSingleColumn) + readme = `# ${grapher.title} - Data package + +This data package contains the data that powers the chart ["${grapher.title}"](${grapher.originUrl}) on the Our World in Data website. + +## CSV Structure + +The high level structure of the CSV file is that each row is an observation for an entity (usually a country or region) and a timepoint (usually a year). + +The first two columns in the CSV file are "Entity" and "Code". "Entity" is the name of the entity (e.g. "United States"). "Code" is the OWID internal entity code that we use if the entity is a country or region. For normal countries, this is the same as the [iso alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) code of the entity (e.g. "USA") - for non-standard countries like historical countries these are custom codes. + +The third column is either "Year" or "Day". If the data is annual, this is "Year" and contains only the year as an integer. If the column is "Day", the column contains a date string in the form "YYYY-MM-DD". + +The final column is the data column, which is the time series that powers the chart. If the CSV data is downloaded using the "full data" option, then the column corresponds to the time series below. If the CSV data is downloaded using the "only selected data visible in the chart" option then the data column is transformed depending on the chart type and thus the association with the time series might not be as straightforward. + +## Metadata.json structure + +The .metadata.json file contains metadata about the data package. The "charts" key contains information to recreate the chart, like the title, subtitle etc.. The "columns" key contains information about each of the columns in the csv, like the unit, timespan covered, citation for the data etc.. + +## About the data + +Our World in Data is almost never the original producer of the data - almost all of the data we use has been compiled by others. If you want to re-use data, it is your responsibility to ensure that you adhere to the sources' license and to credit them correctly. Please note that a single time series may have more than one source - e.g. when we stich together data from different time periods by different producers or when we calculate per capita metrics using population data from a second source. + +## Detailed information about the data + +${sources.join("\n")} + + ` + else + readme = `# ${grapher.title} - Data package + +This data package contains the data that powers the chart ["${grapher.title}"](${grapher.originUrl}) on the Our World in Data website. + +## CSV Structure + +The high level structure of the CSV file is that each row is an observation for an entity (usually a country or region) and a timepoint (usually a year). + +The first two columns in the CSV file are "Entity" and "Code". "Entity" is the name of the entity (e.g. "United States"). "Code" is the OWID internal entity code that we use if the entity is a country or region. For normal countries, this is the same as the [iso alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) code of the entity (e.g. "USA") - for non-standard countries like historical countries these are custom codes. + +The third column is either "Year" or "Day". If the data is annual, this is "Year" and contains only the year as an integer. If the column is "Day", the column contains a date string in the form "YYYY-MM-DD". + +The remaining columns are the data columns, each of which is a time series. If the CSV data is downloaded using the "full data" option, then each column corresponds to one time series below. If the CSV data is downloaded using the "only selected data visible in the chart" option then the data columns are transformed depending on the chart type and thus the association with the time series might not be as straightforward. + +## Metadata.json structure + +The .metadata.json file contains metadata about the data package. The "charts" key contains information to recreate the chart, like the title, subtitle etc.. The "columns" key contains information about each of the columns in the csv, like the unit, timespan covered, citation for the data etc.. + +## About the data + +Our World in Data is almost never the original producer of the data - almost all of the data we use has been compiled by others. If you want to re-use data, it is your responsibility to ensure that you adhere to the sources' license and to credit them correctly. Please note that a single time series may have more than one source - e.g. when we stich together data from different time periods by different producers or when we calculate per capita metrics using population data from a second source. + +## Detailed information about each time series + +${sources.join("\n")} + + ` + return readme +} diff --git a/functions/grapher/[slug].ts b/functions/grapher/[slug].ts index 1884316e968..88ae85a52d4 100644 --- a/functions/grapher/[slug].ts +++ b/functions/grapher/[slug].ts @@ -4,6 +4,10 @@ import { extensions, handlePageNotFound, fetchUnparsedGrapherConfig, + fetchCsvForGrapher, + fetchMetadataForGrapher, + fetchReadmeForGrapher, + fetchZipForGrapher, getRedirectForUrl, } from "../_common/grapherRenderer.js" import { IRequestStrict, Router, StatusError, error, cors } from "itty-router" @@ -12,6 +16,17 @@ import { handleThumbnailRequest } from "../_common/reusableHandlers.js" const { preflight, corsify } = cors({ allowMethods: ["GET", "OPTIONS", "HEAD"], }) +// We collect the possible extensions here so we can easily take them into account +// when handling redirects +export const extensions = { + configJson: ".config.json", + png: ".png", + svg: ".svg", + csv: ".csv", + metadata: ".metadata.json", + readme: ".readme.md", + zip: ".zip", +} const router = Router< IRequestStrict, @@ -50,6 +65,30 @@ router "svg" ) ) + .get( + `/grapher/:slug${extensions.csv}`, + async ({ params: { slug } }, { searchParams }, env) => + fetchCsvForGrapher({ type: "slug", id: slug }, env, searchParams) + ) + .get( + `/grapher/:slug${extensions.metadata}`, + async ({ params: { slug } }, { searchParams }, env) => + fetchMetadataForGrapher( + { type: "slug", id: slug }, + env, + searchParams + ) + ) + .get( + `/grapher/:slug${extensions.readme}`, + async ({ params: { slug } }, { searchParams }, env) => + fetchReadmeForGrapher({ type: "slug", id: slug }, env, searchParams) + ) + .get( + `/grapher/:slug${extensions.zip}`, + async ({ params: { slug } }, { searchParams }, env) => + fetchZipForGrapher({ type: "slug", id: slug }, env, searchParams) + ) .get( "/grapher/:slug", async ({ params: { slug } }, { searchParams }, env) => @@ -77,12 +116,14 @@ export const onRequest: PagesFunction = async (context) => { // if we have a redirect in the _grapherRedirects.json file. // This is done as a catch handler that checks for 404 pages // so that the common, happy path does not have to fetch the redirects file. + console.log("Handling error", e) if (e instanceof StatusError && e.status === 404) { console.log("Handling 404 for", url.pathname) const redirect = await getRedirectForUrl(env, url) return redirect || error(404, "Not found") - } - return error(500, e) + } else if (e instanceof StatusError) { + return error(e.status, e.message) + } else return error(500, e) }) } @@ -99,7 +140,9 @@ async function handleHtmlPageRequest( // { redirect: "manual" } // ) - const grapherPageResp = await env.ASSETS.fetch(url, { redirect: "manual" }) + const grapherPageResp = await env.ASSETS.fetch(env.url, { + redirect: "manual", + }) if (grapherPageResp.status === 404) { // grapherPageResp should be a static 404 HTML page. diff --git a/functions/package.json b/functions/package.json index 970d3a6c776..15600d6356b 100644 --- a/functions/package.json +++ b/functions/package.json @@ -4,6 +4,7 @@ "@ourworldindata/grapher": "workspace:^", "@ourworldindata/utils": "workspace:^", "itty-router": "^5.0.17", + "jszip": "^3.10.1", "stripe": "^14.20.0", "svg2png-wasm": "^1.4.1" }, diff --git a/functions/tsconfig.json b/functions/tsconfig.json index 0998a9f102a..9014defb3d5 100644 --- a/functions/tsconfig.json +++ b/functions/tsconfig.json @@ -4,6 +4,7 @@ "module": "esnext", "moduleResolution": "nodenext", "lib": ["esnext"], - "types": ["@cloudflare/workers-types"] + "types": ["@cloudflare/workers-types"], + "esModuleInterop": true } } diff --git a/packages/@ourworldindata/components/src/IndicatorKeyData/IndicatorKeyData.tsx b/packages/@ourworldindata/components/src/IndicatorKeyData/IndicatorKeyData.tsx index eae6ba63347..5853353610f 100644 --- a/packages/@ourworldindata/components/src/IndicatorKeyData/IndicatorKeyData.tsx +++ b/packages/@ourworldindata/components/src/IndicatorKeyData/IndicatorKeyData.tsx @@ -4,6 +4,7 @@ import { getPhraseForProcessingLevel, splitSourceTextIntoFragments, formatSourceDate, + getDateRange, } from "@ourworldindata/utils" import { DATAPAGE_SOURCES_AND_PROCESSING_SECTION_ID } from "../SharedDataPageConstants.js" import { SimpleMarkdownText } from "../SimpleMarkdownText.js" @@ -112,46 +113,3 @@ export const makeLinks = ({ link }: { link?: string }): React.ReactNode => { ) }) } - -const getDateRange = (dateRange: string): string | null => { - // This regex matches: - // Beginning of string - // Ignore whitespace - // a named group called start that matches: - // hyphen aka minus - // 1 or more digits - // Ignore whitespace - // hyphen aka minus OR en dash - // Ignore whitespace - // a named group called end that matches: - // hyphen aka minus - // 1 or more digits - // Ignore whitespace - // End of string - const dateRangeRegex = /^\s*(?(-)?\d+)\s*(-|–)\s*(?(-)?\d+)\s*$/ - const match = dateRange.match(dateRangeRegex) - if (match) { - const firstYearString = match.groups?.start - const lastYearString = match.groups?.end - if (!firstYearString || !lastYearString) return null - - const firstYear = parseInt(firstYearString, 10) - const lastYear = parseInt(lastYearString, 10) - let formattedFirstYear - - // if start year is before year 0, add BCE to the end - if (firstYear < 0) formattedFirstYear = `${Math.abs(firstYear)} BCE` - else formattedFirstYear = firstYear - - // if end year is before year 0, add BCE to the end or, if start year is after year 0, add CE to the end - let formattedLastYear - if (lastYear < 0) formattedLastYear = `${Math.abs(lastYear)} BCE` - else if (firstYear < 0) formattedLastYear = `${lastYear} CE` - else formattedLastYear = lastYear - - if (lastYear < 0 || firstYear < 0) - return `${formattedFirstYear} – ${formattedLastYear}` - else return `${formattedFirstYear}–${formattedLastYear}` - } - return null -} diff --git a/packages/@ourworldindata/core-table/src/CoreTable.ts b/packages/@ourworldindata/core-table/src/CoreTable.ts index b015a01ed8e..b1fefb42e71 100644 --- a/packages/@ourworldindata/core-table/src/CoreTable.ts +++ b/packages/@ourworldindata/core-table/src/CoreTable.ts @@ -36,6 +36,7 @@ import { CoreColumnDef, JsTypes, OwidTableSlugs, + OwidColumnDef, } from "@ourworldindata/types" import { AlignedTextTableOptions, @@ -903,11 +904,17 @@ export class CoreTable< return this.toDelimited("\t") } - toCsvWithColumnNames(): string { + toCsvWithColumnNames(useShortNames: boolean = false): string { const delimiter = "," const header = this.columnsAsArray - .map((col) => csvEscape(col.name)) + .map((col) => + csvEscape( + useShortNames && (col.def as OwidColumnDef).shortName + ? (col.def as OwidColumnDef).shortName + : col.name + ) + ) .join(delimiter) + "\n" const body = this.rows .map((row) => diff --git a/packages/@ourworldindata/core-table/src/OwidTable.ts b/packages/@ourworldindata/core-table/src/OwidTable.ts index bf3842e5615..c17d1e7b540 100644 --- a/packages/@ourworldindata/core-table/src/OwidTable.ts +++ b/packages/@ourworldindata/core-table/src/OwidTable.ts @@ -606,14 +606,14 @@ export class OwidTable extends CoreTable { } // Give our users a clean CSV of each Grapher. Assumes an Owid Table with entityName. - toPrettyCsv(): string { + toPrettyCsv(useShortNames: boolean = false): string { return this.dropColumns([ OwidTableSlugs.entityId, OwidTableSlugs.time, OwidTableSlugs.entityColor, ]) .sortBy([this.entityNameSlug]) - .toCsvWithColumnNames() + .toCsvWithColumnNames(useShortNames) } @imemo get entityNameColorIndex(): Map { diff --git a/packages/@ourworldindata/grapher/src/core/LegacyToOwidTable.ts b/packages/@ourworldindata/grapher/src/core/LegacyToOwidTable.ts index 4ebb778756d..dad8561d05c 100644 --- a/packages/@ourworldindata/grapher/src/core/LegacyToOwidTable.ts +++ b/packages/@ourworldindata/grapher/src/core/LegacyToOwidTable.ts @@ -615,6 +615,7 @@ const columnDefFromOwidVariable = ( presentation, catalogPath, updatePeriodDays, + shortName, } = variable // Without this the much used var 123 appears as "Countries Continent". We could rename in Grapher but not sure the effects of that. @@ -667,6 +668,7 @@ const columnDefFromOwidVariable = ( owidSchemaVersion: variable.schemaVersion, type, sort, + shortName, } } diff --git a/packages/@ourworldindata/types/src/domainTypes/CoreTableTypes.ts b/packages/@ourworldindata/types/src/domainTypes/CoreTableTypes.ts index 2df5e36c638..5fa5ba92b00 100644 --- a/packages/@ourworldindata/types/src/domainTypes/CoreTableTypes.ts +++ b/packages/@ourworldindata/types/src/domainTypes/CoreTableTypes.ts @@ -259,6 +259,7 @@ export interface OwidColumnDef extends CoreColumnDef { catalogPath?: string owidProcessingLevel?: OwidProcessingLevel owidSchemaVersion?: number + shortName?: string } export const OwidEntityNameColumnDef = { diff --git a/packages/@ourworldindata/utils/src/index.ts b/packages/@ourworldindata/utils/src/index.ts index e87f1307af3..ce77166c21b 100644 --- a/packages/@ourworldindata/utils/src/index.ts +++ b/packages/@ourworldindata/utils/src/index.ts @@ -138,6 +138,7 @@ export { splitSourceTextIntoFragments, prepareSourcesForDisplay, formatSourceDate, + getDateRange, getCitationLong, getCitationShort, grabMetadataForGdocLinkedIndicator, diff --git a/packages/@ourworldindata/utils/src/metadataHelpers.ts b/packages/@ourworldindata/utils/src/metadataHelpers.ts index 946c6a43888..6a99a3d3d2b 100644 --- a/packages/@ourworldindata/utils/src/metadataHelpers.ts +++ b/packages/@ourworldindata/utils/src/metadataHelpers.ts @@ -292,3 +292,46 @@ export function grabMetadataForGdocLinkedIndicator( ), } } + +export const getDateRange = (dateRange: string): string | null => { + // This regex matches: + // Beginning of string + // Ignore whitespace + // a named group called start that matches: + // hyphen aka minus + // 1 or more digits + // Ignore whitespace + // hyphen aka minus OR en dash + // Ignore whitespace + // a named group called end that matches: + // hyphen aka minus + // 1 or more digits + // Ignore whitespace + // End of string + const dateRangeRegex = /^\s*(?(-)?\d+)\s*(-|–)\s*(?(-)?\d+)\s*$/ + const match = dateRange.match(dateRangeRegex) + if (match) { + const firstYearString = match.groups?.start + const lastYearString = match.groups?.end + if (!firstYearString || !lastYearString) return null + + const firstYear = parseInt(firstYearString, 10) + const lastYear = parseInt(lastYearString, 10) + let formattedFirstYear + + // if start year is before year 0, add BCE to the end + if (firstYear < 0) formattedFirstYear = `${Math.abs(firstYear)} BCE` + else formattedFirstYear = firstYear + + // if end year is before year 0, add BCE to the end or, if start year is after year 0, add CE to the end + let formattedLastYear + if (lastYear < 0) formattedLastYear = `${Math.abs(lastYear)} BCE` + else if (firstYear < 0) formattedLastYear = `${lastYear} CE` + else formattedLastYear = lastYear + + if (lastYear < 0 || firstYear < 0) + return `${formattedFirstYear} – ${formattedLastYear}` + else return `${formattedFirstYear}–${formattedLastYear}` + } + return null +} diff --git a/yarn.lock b/yarn.lock index 47dd646856f..ac7a3bf197d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2938,7 +2938,7 @@ __metadata: languageName: node linkType: hard -"@npmcli/package-json@npm:5.2.0, @npmcli/package-json@npm:^5.0.0, @npmcli/package-json@npm:^5.1.0": +"@npmcli/package-json@npm:5.2.0, @npmcli/package-json@npm:^5.1.0": version: 5.2.0 resolution: "@npmcli/package-json@npm:5.2.0" dependencies: @@ -2953,6 +2953,21 @@ __metadata: languageName: node linkType: hard +"@npmcli/package-json@npm:^5.0.0": + version: 5.1.0 + resolution: "@npmcli/package-json@npm:5.1.0" + dependencies: + "@npmcli/git": "npm:^5.0.0" + glob: "npm:^10.2.2" + hosted-git-info: "npm:^7.0.0" + json-parse-even-better-errors: "npm:^3.0.0" + normalize-package-data: "npm:^6.0.0" + proc-log: "npm:^4.0.0" + semver: "npm:^7.5.3" + checksum: 10/0e5cb5eff32cf80234525160a702c91a38e4b98ab74e34e2632b43c4350dbad170bd835989cc7d6e18d24798e3242e45b60f3d5e26bd128fe1c4529931105f8e + languageName: node + linkType: hard + "@npmcli/promise-spawn@npm:^7.0.0": version: 7.0.2 resolution: "@npmcli/promise-spawn@npm:7.0.2" @@ -8541,7 +8556,7 @@ __metadata: languageName: node linkType: hard -"dedent@npm:1.5.3, dedent@npm:^1.0.0": +"dedent@npm:1.5.3": version: 1.5.3 resolution: "dedent@npm:1.5.3" peerDependencies: @@ -8553,6 +8568,18 @@ __metadata: languageName: node linkType: hard +"dedent@npm:^1.0.0": + version: 1.5.1 + resolution: "dedent@npm:1.5.1" + peerDependencies: + babel-plugin-macros: ^3.1.0 + peerDependenciesMeta: + babel-plugin-macros: + optional: true + checksum: 10/fc00a8bc3dfb7c413a778dc40ee8151b6c6ff35159d641f36ecd839c1df5c6e0ec5f4992e658c82624a1a62aaecaffc23b9c965ceb0bbf4d698bfc16469ac27d + languageName: node + linkType: hard + "deep-extend@npm:^0.6.0": version: 0.6.0 resolution: "deep-extend@npm:0.6.0" @@ -8826,7 +8853,14 @@ __metadata: languageName: node linkType: hard -"dotenv@npm:^16.0.1, dotenv@npm:^16.0.3, dotenv@npm:^16.4.4, dotenv@npm:~16.4.5": +"dotenv@npm:^16.0.1, dotenv@npm:^16.0.3": + version: 16.3.1 + resolution: "dotenv@npm:16.3.1" + checksum: 10/dbb778237ef8750e9e3cd1473d3c8eaa9cc3600e33a75c0e36415d0fa0848197f56c3800f77924c70e7828f0b03896818cd52f785b07b9ad4d88dba73fbba83f + languageName: node + linkType: hard + +"dotenv@npm:^16.4.4, dotenv@npm:~16.4.5": version: 16.4.5 resolution: "dotenv@npm:16.4.5" checksum: 10/55a3134601115194ae0f924e54473459ed0d9fc340ae610b676e248cca45aa7c680d86365318ea964e6da4e2ea80c4514c1adab5adb43d6867fb57ff068f95c8 @@ -11583,6 +11617,13 @@ __metadata: languageName: node linkType: hard +"immediate@npm:~3.0.5": + version: 3.0.6 + resolution: "immediate@npm:3.0.6" + checksum: 10/f9b3486477555997657f70318cc8d3416159f208bec4cca3ff3442fd266bc23f50f0c9bd8547e1371a6b5e82b821ec9a7044a4f7b944798b25aa3cc6d5e63e62 + languageName: node + linkType: hard + "immutable@npm:^4.0.0, immutable@npm:^4.3.6": version: 4.3.6 resolution: "immutable@npm:4.3.6" @@ -13202,6 +13243,18 @@ __metadata: languageName: node linkType: hard +"jszip@npm:^3.10.1": + version: 3.10.1 + resolution: "jszip@npm:3.10.1" + dependencies: + lie: "npm:~3.3.0" + pako: "npm:~1.0.2" + readable-stream: "npm:~2.3.6" + setimmediate: "npm:^1.0.5" + checksum: 10/bfbfbb9b0a27121330ac46ab9cdb3b4812433faa9ba4a54742c87ca441e31a6194ff70ae12acefa5fe25406c432290e68003900541d948a169b23d30c34dd984 + languageName: node + linkType: hard + "just-diff-apply@npm:^5.2.0": version: 5.5.0 resolution: "just-diff-apply@npm:5.5.0" @@ -13467,6 +13520,15 @@ __metadata: languageName: node linkType: hard +"lie@npm:~3.3.0": + version: 3.3.0 + resolution: "lie@npm:3.3.0" + dependencies: + immediate: "npm:~3.0.5" + checksum: 10/f335ce67fe221af496185d7ce39c8321304adb701e122942c495f4f72dcee8803f9315ee572f5f8e8b08b9e8d7195da91b9fad776e8864746ba8b5e910adf76e + languageName: node + linkType: hard + "lilconfig@npm:3.0.0": version: 3.0.0 resolution: "lilconfig@npm:3.0.0" @@ -15269,6 +15331,7 @@ __metadata: "@ourworldindata/grapher": "workspace:^" "@ourworldindata/utils": "workspace:^" itty-router: "npm:^5.0.17" + jszip: "npm:^3.10.1" stripe: "npm:^14.20.0" svg2png-wasm: "npm:^1.4.1" languageName: unknown @@ -15444,6 +15507,13 @@ __metadata: languageName: node linkType: hard +"pako@npm:~1.0.2": + version: 1.0.11 + resolution: "pako@npm:1.0.11" + checksum: 10/1ad07210e894472685564c4d39a08717e84c2a68a70d3c1d9e657d32394ef1670e22972a433cbfe48976cb98b154ba06855dcd3fcfba77f60f1777634bec48c0 + languageName: node + linkType: hard + "papaparse@npm:^5.3.1": version: 5.3.1 resolution: "papaparse@npm:5.3.1" @@ -18044,6 +18114,13 @@ __metadata: languageName: node linkType: hard +"setimmediate@npm:^1.0.5": + version: 1.0.5 + resolution: "setimmediate@npm:1.0.5" + checksum: 10/76e3f5d7f4b581b6100ff819761f04a984fa3f3990e72a6554b57188ded53efce2d3d6c0932c10f810b7c59414f85e2ab3c11521877d1dea1ce0b56dc906f485 + languageName: node + linkType: hard + "setprototypeof@npm:1.2.0": version: 1.2.0 resolution: "setprototypeof@npm:1.2.0" From 97595f02ebf4542b1596881e89f2109ccbacca17 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Thu, 10 Oct 2024 14:07:44 +0200 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=94=A8=20incorporate=20PR=20feedback?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- functions/_common/grapherRenderer.ts | 8 +++++++- functions/_common/readmeTools.ts | 4 +++- functions/grapher/[slug].ts | 11 ----------- packages/@ourworldindata/utils/src/Util.ts | 2 ++ 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/functions/_common/grapherRenderer.ts b/functions/_common/grapherRenderer.ts index 1f5b52d27a9..5937f2a7069 100644 --- a/functions/_common/grapherRenderer.ts +++ b/functions/_common/grapherRenderer.ts @@ -54,8 +54,11 @@ export const extensions = { configJson: ".config.json", png: ".png", svg: ".svg", + csv: ".csv", + metadata: ".metadata.json", + readme: ".readme.md", + zip: ".zip", } - interface ImageOptions { pngWidth: number pngHeight: number @@ -454,6 +457,7 @@ function assembleMetadata(grapher: Grapher, searchParams: URLSearchParams) { : attributionShort || titleVariant const titleModifier = attributionString ? ` - ${attributionString}` : "" const titleLong = `${col.titlePublicOrDisplayName.title}${titleModifier}` + const dateDownloaded = new Date() return [ useShortNames ? shortName : col.name, @@ -477,6 +481,8 @@ function assembleMetadata(grapher: Grapher, searchParams: URLSearchParams) { citationShort, citationLong, fullMetadata: `https://api.ourworldindata.org/v1/indicators/${owidVariableId}.metadata.json`, + // date downloaded should be YYYY-MM-DD + dateDownloaded: dateDownloaded.toISOString().split("T")[0], }, ] }) diff --git a/functions/_common/readmeTools.ts b/functions/_common/readmeTools.ts index c15ff56d3d4..1c189706f49 100644 --- a/functions/_common/readmeTools.ts +++ b/functions/_common/readmeTools.ts @@ -12,6 +12,7 @@ import { getCitationLong, prepareSourcesForDisplay, uniqBy, + formatDate, } from "@ourworldindata/utils" import { CoreColumn } from "@ourworldindata/core-table" import { Grapher } from "@ourworldindata/grapher" @@ -241,10 +242,11 @@ export function constructReadme( const isSingleColumn = columns.length === 1 const sources = columns.flatMap((col) => [...columnReadmeText(col)]) let readme: string + const downloadDate = formatDate(new Date()) // formats the date as "October 10, 2024" if (isSingleColumn) readme = `# ${grapher.title} - Data package -This data package contains the data that powers the chart ["${grapher.title}"](${grapher.originUrl}) on the Our World in Data website. +This data package contains the data that powers the chart ["${grapher.title}"](${grapher.originUrl}) on the Our World in Data website. It was downloaded on ${downloadDate}. ## CSV Structure diff --git a/functions/grapher/[slug].ts b/functions/grapher/[slug].ts index 88ae85a52d4..25bfb057566 100644 --- a/functions/grapher/[slug].ts +++ b/functions/grapher/[slug].ts @@ -16,17 +16,6 @@ import { handleThumbnailRequest } from "../_common/reusableHandlers.js" const { preflight, corsify } = cors({ allowMethods: ["GET", "OPTIONS", "HEAD"], }) -// We collect the possible extensions here so we can easily take them into account -// when handling redirects -export const extensions = { - configJson: ".config.json", - png: ".png", - svg: ".svg", - csv: ".csv", - metadata: ".metadata.json", - readme: ".readme.md", - zip: ".zip", -} const router = Router< IRequestStrict, diff --git a/packages/@ourworldindata/utils/src/Util.ts b/packages/@ourworldindata/utils/src/Util.ts index 96af7378925..05002360011 100644 --- a/packages/@ourworldindata/utils/src/Util.ts +++ b/packages/@ourworldindata/utils/src/Util.ts @@ -1315,6 +1315,8 @@ export const getIndexableKeys = Object.keys as ( obj: T ) => Array +/** Formats a date like this: "October 10, 2024" + */ export const formatDate = (date: Date): string => { return date.toLocaleDateString("en-US", { year: "numeric",