diff --git a/examples/README.md b/examples/README.md index 531a24bd3..b39311b61 100644 --- a/examples/README.md +++ b/examples/README.md @@ -56,6 +56,7 @@ - [`loader-arrow`](https://observablehq.observablehq.cloud/framework-example-loader-arrow/) - Generating Apache Arrow IPC files - [`loader-databricks`](https://observablehq.observablehq.cloud/framework-example-loader-databricks/) - Loading data from Databricks - [`loader-duckdb`](https://observablehq.observablehq.cloud/framework-example-loader-duckdb/) - Processing data with DuckDB +- [`loader-elasticsearch`](https://observablehq.observablehq.cloud/framework-example-loader-elasticsearch/) - Loading data from Elasticsearch - [`loader-github`](https://observablehq.observablehq.cloud/framework-example-loader-github/) - Loading data from GitHub - [`loader-google-analytics`](https://observablehq.observablehq.cloud/framework-example-loader-google-analytics/) - Loading data from Google Analytics - [`loader-julia-to-txt`](https://observablehq.observablehq.cloud/framework-example-loader-julia-to-txt/) - Generating TXT from Julia diff --git a/examples/loader-elasticsearch/.gitignore b/examples/loader-elasticsearch/.gitignore new file mode 100644 index 000000000..0210d7df8 --- /dev/null +++ b/examples/loader-elasticsearch/.gitignore @@ -0,0 +1,5 @@ +.DS_Store +.env +/dist/ +node_modules/ +yarn-error.log diff --git a/examples/loader-elasticsearch/README.md b/examples/loader-elasticsearch/README.md new file mode 100644 index 000000000..32b6ad124 --- /dev/null +++ b/examples/loader-elasticsearch/README.md @@ -0,0 +1,37 @@ +[Framework examples →](../) + +# Elasticsearch data loader + +View live: + +This Observable Framework example demonstrates how to write a TypeScript data loader that runs a query on Elasticsearch using the [Elasticsearch Node.js client](https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/index.html). The data loader lives in [`src/data/kibana_sample_data_logs.csv.ts`](./src/data/kibana_sample_data_logs.csv.ts) and uses the helper [`src/data/es_client.ts`](./src/data/es_client.ts). + +To fully reproduce the example, you need to have a setup with both Elasticsearch and Kibana running to create the sample data. Here’s how to set up both on macOS: + +```bash +# Download and run Elasticsearch +curl -O https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-8.14.1-darwin-x86_64.tar.gz +gunzip -c elasticsearch-8.14.1-darwin-x86_64.tar.gz | tar xopf - +cd elasticsearch-8.14.1 +./bin/elasticsearch + +# Next, in another terminal tab, download and run Kibana +curl -O https://artifacts.elastic.co/downloads/kibana/kibana-8.14.1-darwin-x86_64.tar.gz +gunzip -c kibana-8.14.1-darwin-x86_64.tar.gz | tar xopf - +cd kibana-8.14.1 +./bin/kibana +``` + +The commands for both will output instructions how to finish the setup with security enabled. Once you have both running, you can create the “Sample web logs” dataset in Kibana via this URL: http://localhost:5601/app/home#/tutorial_directory/sampleData + +Finally, create the `.env` file with the credentials shared for the user `elastic` that were logged when starting Elasticsearch like this. To get the CA fingerprint for the config, run the following command from the directory you started installing Elasticsearch: + +``` +openssl x509 -fingerprint -sha256 -noout -in ./elasticsearch-8.14.1/config/certs/http_ca.crt +``` + +``` +ES_NODE="https://elastic:@localhost:9200" +ES_CA_FINGERPRINT="" +ES_UNSAFE_TLS_REJECT_UNAUTHORIZED="FALSE" +``` diff --git a/examples/loader-elasticsearch/observablehq.config.js b/examples/loader-elasticsearch/observablehq.config.js new file mode 100644 index 000000000..fb0f92431 --- /dev/null +++ b/examples/loader-elasticsearch/observablehq.config.js @@ -0,0 +1,3 @@ +export default { + root: "src" +}; diff --git a/examples/loader-elasticsearch/package.json b/examples/loader-elasticsearch/package.json new file mode 100644 index 000000000..e7e554ef1 --- /dev/null +++ b/examples/loader-elasticsearch/package.json @@ -0,0 +1,26 @@ +{ + "type": "module", + "private": true, + "scripts": { + "clean": "rimraf src/.observablehq/cache", + "build": "rimraf dist && observable build", + "dev": "observable preview", + "deploy": "observable deploy", + "observable": "observable" + }, + "dependencies": { + "@elastic/elasticsearch": "^8.14.0", + "@observablehq/framework": "latest", + "d3-dsv": "^3.0.1", + "d3-time": "^3.1.0", + "dotenv": "^16.4.5" + }, + "devDependencies": { + "@types/d3-dsv": "^3.0.7", + "@types/d3-time": "^3.0.3", + "rimraf": "^5.0.5" + }, + "engines": { + "node": ">=20" + } +} diff --git a/examples/loader-elasticsearch/src/.gitignore b/examples/loader-elasticsearch/src/.gitignore new file mode 100644 index 000000000..1235d15eb --- /dev/null +++ b/examples/loader-elasticsearch/src/.gitignore @@ -0,0 +1 @@ +/.observablehq/cache/ diff --git a/examples/loader-elasticsearch/src/data/es_client.ts b/examples/loader-elasticsearch/src/data/es_client.ts new file mode 100644 index 000000000..23ccd3bd4 --- /dev/null +++ b/examples/loader-elasticsearch/src/data/es_client.ts @@ -0,0 +1,67 @@ +import "dotenv/config"; +import { Client } from "@elastic/elasticsearch"; + +// Have a look at the "Getting started" guide of the Elasticsearch node.js client +// to learn more about how to configure these environment variables: +// https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/getting-started-js.html + +const { + // ES_NODE can include the username and password in the URL, e.g.: + // ES_NODE=https://:@:9200 + ES_NODE, + // As an alternative to ES_NODE when using Elastic Cloud, you can use ES_CLOUD_ID and + // set it to the Cloud ID that you can find in the cloud console of the deployment (https://cloud.elastic.co/). + ES_CLOUD_ID, + // ES_API_KEY can be used instead of username and password. + // The API key will take precedence if both are set. + ES_API_KEY, + ES_USERNAME, + ES_PASSWORD, + // the fingerprint (SHA256) of the CA certificate that is used to sign + // the certificate that the Elasticsearch node presents for TLS. + ES_CA_FINGERPRINT, + // Warning: This option should be considered an insecure workaround for local development only. + // You may wish to specify a self-signed certificate rather than disabling certificate verification. + // ES_UNSAFE_TLS_REJECT_UNAUTHORIZED can be set to FALSE to disable certificate verification. + // See https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/client-connecting.html#auth-tls for more. + ES_UNSAFE_TLS_REJECT_UNAUTHORIZED, +} = process.env; + +if ((!ES_NODE && !ES_CLOUD_ID) || (ES_NODE && ES_CLOUD_ID)) + throw new Error( + "Either ES_NODE or ES_CLOUD_ID need to be defined, but not both.", + ); + +const esUrl = ES_NODE ? new URL(ES_NODE) : undefined; +const isHTTPS = esUrl?.protocol === "https:"; +const isLocalhost = esUrl?.hostname === "localhost"; + +export const esClient = new Client({ + ...(ES_NODE ? { node: ES_NODE } : {}), + ...(ES_CLOUD_ID ? { cloud: { id: ES_CLOUD_ID } } : {}), + ...(ES_CA_FINGERPRINT ? { caFingerprint: ES_CA_FINGERPRINT } : {}), + ...(ES_API_KEY + ? { + auth: { + apiKey: ES_API_KEY, + }, + } + : {}), + ...(!ES_API_KEY && ES_USERNAME && ES_PASSWORD + ? { + auth: { + username: ES_USERNAME, + password: ES_PASSWORD, + }, + } + : {}), + ...(isHTTPS && + isLocalhost && + ES_UNSAFE_TLS_REJECT_UNAUTHORIZED?.toLowerCase() === "false" + ? { + tls: { + rejectUnauthorized: false, + }, + } + : {}), +}); diff --git a/examples/loader-elasticsearch/src/data/kibana_sample_data_logs.csv b/examples/loader-elasticsearch/src/data/kibana_sample_data_logs.csv new file mode 100644 index 000000000..aff60bffe --- /dev/null +++ b/examples/loader-elasticsearch/src/data/kibana_sample_data_logs.csv @@ -0,0 +1,184 @@ +date,count,response_code +2024-06-16,230,200 +2024-06-16,12,503 +2024-06-16,7,404 +2024-06-17,209,200 +2024-06-17,12,404 +2024-06-17,10,503 +2024-06-18,210,200 +2024-06-18,16,404 +2024-06-18,4,503 +2024-06-19,220,200 +2024-06-19,11,404 +2024-06-19,5,503 +2024-06-20,211,200 +2024-06-20,15,404 +2024-06-20,4,503 +2024-06-21,210,200 +2024-06-21,12,404 +2024-06-21,8,503 +2024-06-22,211,200 +2024-06-22,10,503 +2024-06-22,8,404 +2024-06-23,219,200 +2024-06-23,9,404 +2024-06-23,3,503 +2024-06-24,211,200 +2024-06-24,12,404 +2024-06-24,7,503 +2024-06-25,208,200 +2024-06-25,11,404 +2024-06-25,11,503 +2024-06-26,222,200 +2024-06-26,7,503 +2024-06-26,1,404 +2024-06-27,209,200 +2024-06-27,13,503 +2024-06-27,8,404 +2024-06-28,203,200 +2024-06-28,18,404 +2024-06-28,9,503 +2024-06-29,216,200 +2024-06-29,8,404 +2024-06-29,6,503 +2024-06-30,214,200 +2024-06-30,10,404 +2024-06-30,6,503 +2024-07-01,212,200 +2024-07-01,12,404 +2024-07-01,6,503 +2024-07-02,215,200 +2024-07-02,12,404 +2024-07-02,3,503 +2024-07-03,212,200 +2024-07-03,11,404 +2024-07-03,6,503 +2024-07-04,210,200 +2024-07-04,14,404 +2024-07-04,7,503 +2024-07-05,211,200 +2024-07-05,14,404 +2024-07-05,5,503 +2024-07-06,196,200 +2024-07-06,32,404 +2024-07-06,2,503 +2024-07-07,205,200 +2024-07-07,17,404 +2024-07-07,8,503 +2024-07-08,204,200 +2024-07-08,14,404 +2024-07-08,12,503 +2024-07-09,202,200 +2024-07-09,17,404 +2024-07-09,11,503 +2024-07-10,214,200 +2024-07-10,10,503 +2024-07-10,6,404 +2024-07-11,206,200 +2024-07-11,13,404 +2024-07-11,11,503 +2024-07-12,218,200 +2024-07-12,6,404 +2024-07-12,6,503 +2024-07-13,216,200 +2024-07-13,10,404 +2024-07-13,4,503 +2024-07-14,216,200 +2024-07-14,7,404 +2024-07-14,6,503 +2024-07-15,218,200 +2024-07-15,12,404 +2024-07-15,1,503 +2024-07-16,162,200 +2024-07-16,7,404 +2024-07-16,4,503 +2024-07-17,211,200 +2024-07-17,14,404 +2024-07-17,5,503 +2024-07-18,214,200 +2024-07-18,9,404 +2024-07-18,7,503 +2024-07-19,213,200 +2024-07-19,9,404 +2024-07-19,8,503 +2024-07-20,212,200 +2024-07-20,10,404 +2024-07-20,7,503 +2024-07-21,203,200 +2024-07-21,21,404 +2024-07-21,7,503 +2024-07-22,215,200 +2024-07-22,8,503 +2024-07-22,7,404 +2024-07-23,208,200 +2024-07-23,17,404 +2024-07-23,4,503 +2024-07-24,212,200 +2024-07-24,13,404 +2024-07-24,6,503 +2024-07-25,211,200 +2024-07-25,13,404 +2024-07-25,6,503 +2024-07-26,213,200 +2024-07-26,9,404 +2024-07-26,8,503 +2024-07-27,210,200 +2024-07-27,110,404 +2024-07-27,9,503 +2024-07-28,217,200 +2024-07-28,8,404 +2024-07-28,6,503 +2024-07-29,200,200 +2024-07-29,17,404 +2024-07-29,13,503 +2024-07-30,215,200 +2024-07-30,12,404 +2024-07-30,3,503 +2024-07-31,212,200 +2024-07-31,11,404 +2024-07-31,7,503 +2024-08-01,206,200 +2024-08-01,13,503 +2024-08-01,11,404 +2024-08-02,216,200 +2024-08-02,7,404 +2024-08-02,7,503 +2024-08-03,214,200 +2024-08-03,14,404 +2024-08-03,2,503 +2024-08-04,213,200 +2024-08-04,13,503 +2024-08-04,4,404 +2024-08-05,212,200 +2024-08-05,9,404 +2024-08-05,9,503 +2024-08-06,211,200 +2024-08-06,11,503 +2024-08-06,8,404 +2024-08-07,210,200 +2024-08-07,13,404 +2024-08-07,7,503 +2024-08-08,206,200 +2024-08-08,16,404 +2024-08-08,8,503 +2024-08-09,211,200 +2024-08-09,13,404 +2024-08-09,6,503 +2024-08-10,212,200 +2024-08-10,13,404 +2024-08-10,5,503 +2024-08-11,208,200 +2024-08-11,12,404 +2024-08-11,10,503 +2024-08-12,210,200 +2024-08-12,10,404 +2024-08-12,10,503 +2024-08-13,213,200 +2024-08-13,9,503 +2024-08-13,8,404 +2024-08-14,210,200 +2024-08-14,13,404 +2024-08-14,7,503 +2024-08-15,194,200 +2024-08-15,8,404 +2024-08-15,3,503 diff --git a/examples/loader-elasticsearch/src/data/kibana_sample_data_logs.csv.ts b/examples/loader-elasticsearch/src/data/kibana_sample_data_logs.csv.ts new file mode 100644 index 000000000..1ff13918e --- /dev/null +++ b/examples/loader-elasticsearch/src/data/kibana_sample_data_logs.csv.ts @@ -0,0 +1,62 @@ +import { csvFormat } from "d3-dsv"; +import { esClient } from "./es_client.js"; + +interface AggsResponseFormat { + logs_histogram: { + buckets: Array<{ + key: number; + key_as_string: string; + doc_count: number; + response_code: { + buckets: Array<{ key: string; doc_count: number }>; + }; + }>; + }; +} + +interface LoaderOutputFormat { + date: string; + count: number; + response_code: string; +} + +const resp = await esClient.search({ + index: "kibana_sample_data_logs", + size: 0, + aggs: { + logs_histogram: { + date_histogram: { + field: "@timestamp", + calendar_interval: "1d", + }, + aggs: { + response_code: { + terms: { + field: "response.keyword", + }, + }, + }, + }, + }, +}); + +process.stdout.write( + csvFormat( + // This transforms the nested response from Elasticsearch into a flat array. + resp.aggregations!.logs_histogram.buckets.reduce>( + (p, c) => { + p.push( + ...c.response_code.buckets.map((d) => ({ + // Just keep the date from the full ISO string. + date: c.key_as_string.split("T")[0], + count: d.doc_count, + response_code: d.key, + })), + ); + + return p; + }, + [], + ), + ), +); diff --git a/examples/loader-elasticsearch/src/index.md b/examples/loader-elasticsearch/src/index.md new file mode 100644 index 000000000..4740f03d6 --- /dev/null +++ b/examples/loader-elasticsearch/src/index.md @@ -0,0 +1,184 @@ +# Elasticsearch data loader + +Here’s a TypeScript data loader that queries an Elasticsearch cluster. + +```ts +import { csvFormat } from "d3-dsv"; +import { esClient } from "./es_client.js"; + +interface AggsResponseFormat { + logs_histogram: { + buckets: Array<{ + key: number; + key_as_string: string; + doc_count: number; + response_code: { + buckets: Array<{ key: string; doc_count: number }>; + }; + }>; + }; +} + +interface LoaderOutputFormat { + date: string; + count: number; + response_code: string; +} + +const resp = await esClient.search({ + index: "kibana_sample_data_logs", + size: 0, + aggs: { + logs_histogram: { + date_histogram: { + field: "@timestamp", + calendar_interval: "1d", + }, + aggs: { + response_code: { + terms: { + field: "response.keyword", + }, + }, + }, + }, + }, +}); + +process.stdout.write( + csvFormat( + // This transforms the nested response from Elasticsearch into a flat array. + resp.aggregations!.logs_histogram.buckets.reduce>( + (p, c) => { + p.push( + ...c.response_code.buckets.map((d) => ({ + // Just keep the date from the full ISO string. + date: c.key_as_string.split("T")[0], + count: d.doc_count, + response_code: d.key, + })), + ); + + return p; + }, + [], + ), + ), +); +``` + +The data loader uses a helper file, `es_client.ts`, which provides a wrapper on the `@elastic/elasticsearch` package. This reduces the amount of boilerplate you need to write to issue a query. + +```ts +import "dotenv/config"; +import { Client } from "@elastic/elasticsearch"; + +// Have a look at the "Getting started" guide of the Elasticsearch node.js client +// to learn more about how to configure these environment variables: +// https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/getting-started-js.html + +const { + // ES_NODE can include the username and password in the URL, e.g.: + // ES_NODE=https://:@:9200 + ES_NODE, + // As an alternative to ES_NODE when using Elastic Cloud, you can use ES_CLOUD_ID and + // set it to the Cloud ID that you can find in the cloud console of the deployment (https://cloud.elastic.co/). + ES_CLOUD_ID, + // ES_API_KEY can be used instead of username and password. + // The API key will take precedence if both are set. + ES_API_KEY, + ES_USERNAME, + ES_PASSWORD, + // the fingerprint (SHA256) of the CA certificate that is used to sign + // the certificate that the Elasticsearch node presents for TLS. + ES_CA_FINGERPRINT, + // Warning: This option should be considered an insecure workaround for local development only. + // You may wish to specify a self-signed certificate rather than disabling certificate verification. + // ES_UNSAFE_TLS_REJECT_UNAUTHORIZED can be set to FALSE to disable certificate verification. + // See https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/client-connecting.html#auth-tls for more. + ES_UNSAFE_TLS_REJECT_UNAUTHORIZED, +} = process.env; + +if ((!ES_NODE && !ES_CLOUD_ID) || (ES_NODE && ES_CLOUD_ID)) + throw new Error( + "Either ES_NODE or ES_CLOUD_ID need to be defined, but not both.", + ); + +const esUrl = ES_NODE ? new URL(ES_NODE) : undefined; +const isHTTPS = esUrl?.protocol === "https:"; +const isLocalhost = esUrl?.hostname === "localhost"; + +export const esClient = new Client({ + ...(ES_NODE ? { node: ES_NODE } : {}), + ...(ES_CLOUD_ID ? { cloud: { id: ES_CLOUD_ID } } : {}), + ...(ES_CA_FINGERPRINT ? { caFingerprint: ES_CA_FINGERPRINT } : {}), + ...(ES_API_KEY + ? { + auth: { + apiKey: ES_API_KEY, + }, + } + : {}), + ...(!ES_API_KEY && ES_USERNAME && ES_PASSWORD + ? { + auth: { + username: ES_USERNAME, + password: ES_PASSWORD, + }, + } + : {}), + ...(isHTTPS && + isLocalhost && + ES_UNSAFE_TLS_REJECT_UNAUTHORIZED?.toLowerCase() === "false" + ? { + tls: { + rejectUnauthorized: false, + }, + } + : {}), +}); +``` + +
+ +To run this data loader, you’ll need to install `@elastic/elasticsearch`, `d3-dsv` and `dotenv` using your preferred package manager such as npm or Yarn. + +
+ +For the data loader to authenticate with your Elasticsearch cluster, you need to set the environment variables defined in the helper. If you use GitHub, you can use [secrets in GitHub Actions](https://docs.github.com/en/actions/security-guides/using-secrets-in-github-actions) to set environment variables; other platforms provide similar functionality for continuous deployment. For local development, we use the `dotenv` package, which allows environment variables to be defined in a `.env` file which lives in the project root and looks like this: + +``` +ES_NODE="https://USERNAME:PASSWORD@HOST:9200" +``` + +
+ +The `.env` file should not be committed to your source code repository; keep your credentials secret. + +
+ +The above data loader lives in `data/kibana_sample_data_logs.csv.ts`, so we can load the data as `data/kibana_sample_data_logs.csv`. The `FileAttachment.csv` method parses the file and returns a promise to an array of objects. + +```js echo +const logs = FileAttachment("./data/kibana_sample_data_logs.csv").csv({typed: true}); +``` + +The `logs` table has three columns: `date`, `count` and `response_code`. We can display the table using `Inputs.table`. + +```js echo +Inputs.table(logs) +``` + +Lastly, we can pass the table to Observable Plot to make a line chart. + +```js echo +Plot.plot({ + style: "overflow: visible;", + y: { grid: true }, + marks: [ + Plot.ruleY([0]), + Plot.line(logs, {x: "date", y: "count", stroke: "response_code", tip: true}), + Plot.text(logs, Plot.selectLast({x: "date", y: "count", z: "response_code", text: "response_code", textAnchor: "start", dx: 3})) + ] +}) +```