diff --git a/.github/semantic.yml b/.github/semantic.yml index 9ee8601b083ea..73a03dbf7e6c3 100644 --- a/.github/semantic.yml +++ b/.github/semantic.yml @@ -122,6 +122,7 @@ scopes: - datadog_agent source # Anything `datadog_agent` source related - dnstap source # Anything `dnstap` source related - docker_logs source # Anything `docker_logs` source related + - exec source # Anything `exec` source related - file source # Anything `file` source related - fluent source # Anything `fluent` source related - generator source # Anything `generator` source related diff --git a/.github/workflows/k8s_e2e.yml b/.github/workflows/k8s_e2e.yml index f275865c3c15e..19b072fbe0a4a 100644 --- a/.github/workflows/k8s_e2e.yml +++ b/.github/workflows/k8s_e2e.yml @@ -93,7 +93,7 @@ jobs: || contains(github.event.pull_request.labels.*.name, 'ci-condition: k8s e2e tests enable') || contains(github.event.pull_request.labels.*.name, 'ci-condition: k8s e2e all targets') steps: - - uses: actions/github-script@v4.1 + - uses: actions/github-script@v5 id: set-matrix with: script: | diff --git a/.github/workflows/test-harness.yml b/.github/workflows/test-harness.yml index 865eb7895de81..e5063ebe4d5e2 100644 --- a/.github/workflows/test-harness.yml +++ b/.github/workflows/test-harness.yml @@ -18,7 +18,7 @@ jobs: steps: - name: Indicate that we picked up the command with a comment reaction - uses: actions/github-script@v4.1 + uses: actions/github-script@v5 with: github-token: '${{secrets.GITHUB_TOKEN}}' script: | @@ -38,7 +38,7 @@ jobs: - name: Get Pull Request info id: pr-info - uses: actions/github-script@v4.1 + uses: actions/github-script@v5 with: script: | const pr_info_response = await github.pulls.get({ @@ -111,7 +111,7 @@ jobs: AWS_DEFAULT_REGION: '${{ secrets.TEST_HARNESS_AWS_DEFAULT_REGION }}' - name: Post a comment with the results - uses: actions/github-script@v4.1 + uses: actions/github-script@v5 with: github-token: '${{secrets.GITHUB_TOKEN}}' script: | diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4879c51d61185..b057d02dfa68d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -39,6 +39,8 @@ jobs: dependencies: ${{ steps.filter.outputs.dependencies }} internal_events: ${{ steps.filter.outputs.internal_events }} helm: ${{ steps.filter.outputs.helm }} + cue: ${{ steps.filter.outputs.cue }} + markdown: ${{ steps.filter.outputs.cue }} steps: - uses: actions/checkout@v2.3.4 - uses: dorny/paths-filter@v2 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 120000 index 0000000000000..49d1b98f97e06 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1 @@ +docs/CONTRIBUTING.md \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 32b0e5baaebda..2d389a9101640 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -971,7 +971,7 @@ checksum = "64c01c1c607d25c71bbaa67c113d6c6b36c434744b4fd66691d711b5b1bc0c8b" dependencies = [ "chrono", "chrono-tz-build", - "phf 0.10.0", + "phf", ] [[package]] @@ -981,7 +981,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db058d493fb2f65f41861bfed7e3fe6335264a9f0f92710cab5bdf01fef09069" dependencies = [ "parse-zoneinfo", - "phf 0.10.0", + "phf", "phf_codegen", ] @@ -1178,6 +1178,10 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b" +[[package]] +name = "core_common" +version = "0.1.0" + [[package]] name = "cpufeatures" version = "0.2.1" @@ -1773,6 +1777,18 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "212d0f5754cb6769937f4501cc0e67f4f4483c8d2c3e1e922ee9edbe4ab4c7c0" +[[package]] +name = "dns-lookup" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53ecafc952c4528d9b51a458d1a8904b81783feff9fde08ab6ed2545ff396872" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "socket2 0.4.2", + "winapi 0.3.9", +] + [[package]] name = "dnsmsg-parser" version = "0.1.0" @@ -3070,10 +3086,6 @@ dependencies = [ "cfg-if 1.0.0", ] -[[package]] -name = "internal_event" -version = "0.1.0" - [[package]] name = "inventory" version = "0.1.10" @@ -3329,9 +3341,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.102" +version = "0.2.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2a5ac8f984bfcf3a823267e5fde638acc3325f6496633a5da6bb6eb2171e103" +checksum = "dd8f7255a17a627354f321ef0055d63b898c6fb27eff628af4d1b66b7331edf6" [[package]] name = "libflate" @@ -3765,9 +3777,9 @@ dependencies = [ [[package]] name = "mlua" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d3e21a11f1d1e955a1e9e766eb43e5295a066677dfc4428e296670369f429f8" +checksum = "34a8d7d9e39238b946f1baf17e710739a07dbcb8339aaf801f8a2ad1c40e4a68" dependencies = [ "bstr", "cc", @@ -3940,9 +3952,9 @@ dependencies = [ [[package]] name = "nix" -version = "0.22.1" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7555d6c7164cc913be1ce7f95cbecdabda61eb2ccd89008524af306fb7f5031" +checksum = "d3bb9a13fa32bc5aeb64150cd3f32d6cf4c748f8f8a417cce5d2eb976a8370ba" dependencies = [ "bitflags", "cc", @@ -4443,15 +4455,6 @@ dependencies = [ "indexmap", ] -[[package]] -name = "phf" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" -dependencies = [ - "phf_shared 0.8.0", -] - [[package]] name = "phf" version = "0.10.0" @@ -4651,9 +4654,9 @@ dependencies = [ [[package]] name = "postgres-types" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "430f4131e1b7657b0cd9a2b0c3408d77c9a43a042d300b8c77f981dffcc43a2f" +checksum = "04619f94ba0cc80999f4fc7073607cb825bc739a883cb6d20900fc5e009d6b0d" dependencies = [ "bytes 1.1.0", "chrono", @@ -4702,9 +4705,9 @@ dependencies = [ [[package]] name = "pretty_assertions" -version = "0.7.2" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cab0e7c02cf376875e9335e0ba1da535775beb5450d21e1dffca068818ed98b" +checksum = "ec0cfe1b2403f172ba0f234e500906ee0a3e493fb81092dac23ebefe129301cc" dependencies = [ "ansi_term 0.12.1", "ctor", @@ -5723,7 +5726,7 @@ dependencies = [ "libc", "log", "memchr", - "nix 0.22.1", + "nix 0.22.2", "radix_trie", "scopeguard", "smallvec", @@ -6212,9 +6215,9 @@ checksum = "c307a32c1c5c437f38c7fd45d753050587732ba8628319fbdf12a7e289ccc590" [[package]] name = "smallvec" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" +checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309" [[package]] name = "smol" @@ -6496,6 +6499,15 @@ dependencies = [ "syn 1.0.76", ] +[[package]] +name = "substring" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ee6433ecef213b2e72f587ef64a2f5943e7cd16fbd82dbe8bc07486c534c86" +dependencies = [ + "autocfg", +] + [[package]] name = "subtle" version = "2.4.1" @@ -6845,9 +6857,9 @@ dependencies = [ [[package]] name = "tokio-postgres" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d2b1383c7e4fb9a09e292c7c6afb7da54418d53b045f1c1fac7a911411a2b8b" +checksum = "2f916ee7e52c8a74dfe4162dd73a073d0d7d4b387ea7b97a774c0c10b0776531" dependencies = [ "async-trait", "byteorder", @@ -6857,7 +6869,7 @@ dependencies = [ "log", "parking_lot", "percent-encoding", - "phf 0.8.0", + "phf", "pin-project-lite", "postgres-protocol", "postgres-types", @@ -7584,7 +7596,7 @@ dependencies = [ "metrics-util", "mlua", "mongodb", - "nix 0.22.1", + "nix 0.22.2", "no-proxy", "nom 7.0.0", "notify", @@ -7717,6 +7729,7 @@ dependencies = [ "buffers", "bytes 1.1.0", "chrono", + "core_common", "criterion", "db-key", "derivative", @@ -7728,7 +7741,6 @@ dependencies = [ "http", "hyper-proxy", "indexmap", - "internal_event", "lazy_static", "lookup", "metrics", @@ -7751,6 +7763,7 @@ dependencies = [ "serde_json", "shared", "snafu", + "substring", "tokio", "tokio-stream", "tokio-test", @@ -7871,6 +7884,7 @@ dependencies = [ "criterion", "csv", "datadog-search-syntax", + "dns-lookup", "grok", "hex", "hostname", @@ -8342,18 +8356,18 @@ dependencies = [ [[package]] name = "zeroize" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "377db0846015f7ae377174787dd452e1c5f5a9050bc6f954911d01f116daa0cd" +checksum = "bf68b08513768deaa790264a7fac27a58cbf2705cfcdc9448362229217d7e970" dependencies = [ "zeroize_derive", ] [[package]] name = "zeroize_derive" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2c1e130bebaeab2f23886bf9acbaca14b092408c452543c857f66399cd6dab1" +checksum = "bdff2024a851a322b08f179173ae2ba620445aef1e838f0c196820eade4ae0c7" dependencies = [ "proc-macro2 1.0.29", "quote 1.0.9", diff --git a/Cargo.toml b/Cargo.toml index 4ebc374d012da..3eeed1b12056c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -240,7 +240,7 @@ lru = { version = "0.6.6", default-features = false, optional = true } maxminddb = { version = "0.21.0", default-features = false, optional = true } md-5 = { version = "0.9", optional = true } # make sure to update the external docs when the Lua version changes -mlua = { version = "0.6.3", default-features = false, features = ["lua54", "send", "vendored"], optional = true } +mlua = { version = "0.6.4", default-features = false, features = ["lua54", "send", "vendored"], optional = true } mongodb = { version = "2.0.0", default-features = false, features = ["tokio-runtime"], optional = true } async-nats = { version = "0.10.1", default-features = false, optional = true } no-proxy = { version = "0.3.1", default-features = false, features = ["serialize"] } @@ -272,7 +272,7 @@ strip-ansi-escapes = { version = "0.1.1", default-features = false } structopt = { version = "0.3.23", default-features = false } syslog = { version = "5.0.0", default-features = false, optional = true } syslog_loose = { version = "0.15.0", default-features = false, optional = true } -tokio-postgres = { version = "0.7.2", default-features = false, features = ["runtime", "with-chrono-0_4"], optional = true } +tokio-postgres = { version = "0.7.3", default-features = false, features = ["runtime", "with-chrono-0_4"], optional = true } toml = { version = "0.5.8", default-features = false } typetag = { version = "0.1.7", default-features = false } twox-hash = { version = "1.6.1", default-features = false } @@ -294,7 +294,7 @@ security-framework = "2.3.1" [target.'cfg(unix)'.dependencies] atty = "0.2.14" -nix = "0.22.1" +nix = "0.22.2" [build-dependencies] prost-build = { version = "0.8", optional = true } @@ -305,10 +305,10 @@ approx = "0.5.0" assert_cmd = "2.0.1" base64 = "0.13.0" criterion = { version = "0.3.5", features = ["html_reports", "async_tokio"] } -libc = "0.2.102" +libc = "0.2.103" libz-sys = "1.1.3" matches = "0.1.9" -pretty_assertions = "0.7.2" +pretty_assertions = "1.0.0" reqwest = { version = "0.11.4", features = ["json"] } tempfile = "3.2.0" tokio = { version = "1.12.0", features = ["test-util"] } diff --git a/Makefile b/Makefile index 9e5738f837a1f..456eb3efaaab5 100644 --- a/Makefile +++ b/Makefile @@ -678,7 +678,7 @@ check-kubernetes-yaml: ## Check that the generated Kubernetes YAML configs are u ${MAYBE_ENVIRONMENT_EXEC} ./scripts/kubernetes-yaml.sh check check-events: ## Check that events satisfy patterns set in https://github.com/timberio/vector/blob/master/rfcs/2020-03-17-2064-event-driven-observability.md - ${MAYBE_ENVIRONMENT_EXEC} ./scripts/check-events.sh + ${MAYBE_ENVIRONMENT_EXEC} ./scripts/check-events ##@ Rustdoc build-rustdoc: ## Build Vector's Rustdocs diff --git a/README.md b/README.md index 7373e8055c45a..0b3b1a04043ac 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ pipeline that puts you in control of your observability data. [Collect][docs.sources], [transform][docs.transforms], and [route][docs.sinks] all your logs, metrics, and traces to any vendors you want today and any other vendors you may want tomorrow. Vector enables dramatic cost reduction, novel -data enrichment, and data security where you need it, not where is most +data enrichment, and data security where you need it, not where it is most convenient for your vendors. Additionally, it is open source and up to 10x faster than every alternative in the space. diff --git a/benches/metrics_bench_util/mod.rs b/benches/metrics_bench_util/mod.rs index 4f94be3e0dca9..b93f87dfd810a 100644 --- a/benches/metrics_bench_util/mod.rs +++ b/benches/metrics_bench_util/mod.rs @@ -36,7 +36,7 @@ fn disable_metrics_tracing_integration() { #[inline] fn boot() { vector::trace::init(false, false, "warn"); - vector::metrics::init().expect("metrics initialization failed"); + vector::metrics::init_test().expect("metrics initialization failed"); } #[allow(dead_code)] // condition compilation @@ -74,7 +74,7 @@ pub fn benchmark(c: &mut Criterion, mode: Mode) { boot(); let metrics_core_enabled = mode != Mode::MetricsOff; assert_eq!( - vector::metrics::get_controller().is_ok(), + vector::metrics::Controller::get().is_ok(), metrics_core_enabled, "the presence of a controller must correspond to whether metrics core is on or off" ); diff --git a/benches/metrics_snapshot.rs b/benches/metrics_snapshot.rs index 30dd3a01e22e9..b90259f6fdf03 100644 --- a/benches/metrics_snapshot.rs +++ b/benches/metrics_snapshot.rs @@ -10,7 +10,7 @@ fn benchmark(c: &mut Criterion) { &cardinality, |b, &cardinality| { let controller = prepare_metrics(cardinality); - b.iter(|| vector::metrics::capture_metrics(controller)); + b.iter(|| controller.capture_metrics()); }, ); } @@ -18,9 +18,9 @@ fn benchmark(c: &mut Criterion) { } fn prepare_metrics(cardinality: usize) -> &'static vector::metrics::Controller { - let _ = vector::metrics::init(); - let controller = vector::metrics::get_controller().unwrap(); - vector::metrics::reset(controller); + let _ = vector::metrics::init_test(); + let controller = vector::metrics::Controller::get().unwrap(); + controller.reset(); for idx in 0..cardinality { metrics::counter!("test", 1, "idx" => format!("{}", idx)); diff --git a/docs/specs/buffer.md b/docs/specs/buffer.md index 616b01d0511bf..258ad1d19a377 100644 --- a/docs/specs/buffer.md +++ b/docs/specs/buffer.md @@ -16,11 +16,31 @@ interpreted as described in [RFC 2119]. Vector buffers MUST be instrumented for optimal observability and monitoring. This is required to drive various interfaces that Vector users depend on to manage Vector installations in mission critical production environments. This section extends the [Instrumentation Specification]. +### Terms and Definitions + +`component_metadata` - Refers to the metadata (component id, component scope, component kind, and component type) of the component associated with the buffer. Buffer metrics MUST be tagged with all or partial `component_metadata` unless specified otherwise. In most cases, these tags are automatically added from tracing span context and do not need to be included as event properties. + ### Events -#### `EventsReceived` +#### `BufferCreated` + +*All buffers* MUST emit a `BufferCreated` event immediately upon creation. To avoid stale metrics, this event MUST be regularly emitted at an interval. + +* Properties + * `max_size_bytes` - the max size of the buffer in bytes + * `max_size_events` - the max size of the buffer in number of events + * `initial_events_size` - the number of events in the buffer at creation + * `initial_bytes_size` - the byte size of the buffer at creation + * `component_metadata` - as defined in [Terms and Definitions](#terms-and-definitions) +* Metric + * MUST emit the `buffer_max_event_size` gauge (in-memory buffers) if the defined `max_size_events` value is present + * MUST emit the `buffer_max_byte_size` gauge (disk buffers) if the defined `max_size_bytes` value is present + * MUST emit the `buffer_received_events_total` counter with the defined `initial_events_size` value + * MUST emit the `buffer_received_bytes_total` counter with the defined `initial_bytes_size` value + +#### `BufferEventsReceived` -*All buffers* MUST emit an `EventsReceived` event immediately after receiving one or more Vector events. +*All buffers* MUST emit an `BufferEventsReceived` event immediately after receiving one or more Vector events. * Properties * `count` - the number of received events @@ -30,11 +50,10 @@ Vector buffers MUST be instrumented for optimal observability and monitoring. Th * MUST increment the `buffer_received_bytes_total` counter by the defined `byte_size` * MUST increment the `buffer_events` gauge by the defined `count` * MUST increment the `buffer_byte_size` gauge by the defined `byte_size` - * MUST update the `buffer_usage_percentage` gauge which measures the current buffer space utilization (number of events/bytes) over total space available (max number of events/bytes) -#### `EventsSent` +#### `BufferEventsSent` -*All buffers* MUST emit an `EventsSent` event immediately after sending one or more Vector events. +*All buffers* MUST emit an `BufferEventsSent` event immediately after sending one or more Vector events. * Properties * `count` - the number of sent events @@ -44,7 +63,6 @@ Vector buffers MUST be instrumented for optimal observability and monitoring. Th * MUST increment the `buffer_sent_bytes_total` counter by the defined `byte_size` * MUST decrement the `buffer_events` gauge by the defined `count` * MUST decrement the `buffer_byte_size` gauge by the defined `byte_size` - * MUST update the `buffer_usage_percentage` gauge #### `EventsDropped` diff --git a/lib/file-source/Cargo.toml b/lib/file-source/Cargo.toml index 7ef8325b08025..3d30e41134126 100644 --- a/lib/file-source/Cargo.toml +++ b/lib/file-source/Cargo.toml @@ -75,7 +75,7 @@ features = ["full"] criterion = "0.3" quickcheck = "1" tempfile = "3.1.0" -pretty_assertions = "0.7.2" +pretty_assertions = "1.0.0" [[bench]] name = "buffer" diff --git a/lib/lookup/src/lookup_buf/mod.rs b/lib/lookup/src/lookup_buf/mod.rs index 79aa6030261c9..b6eae0ddad3c0 100644 --- a/lib/lookup/src/lookup_buf/mod.rs +++ b/lib/lookup/src/lookup_buf/mod.rs @@ -5,6 +5,7 @@ use quickcheck::{Arbitrary, Gen}; use serde::de::{self, Visitor}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::{ + borrow::Cow, collections::VecDeque, fmt::{self, Display, Formatter}, ops::{Index, IndexMut}, @@ -287,6 +288,14 @@ impl From for LookupBuf { } } +impl From> for LookupBuf { + fn from(input: Cow<'_, str>) -> Self { + let mut segments = VecDeque::with_capacity(1); + segments.push_back(SegmentBuf::from(input.as_ref())); + LookupBuf { segments } + } +} + impl From for LookupBuf { fn from(input: SegmentBuf) -> Self { let mut segments = VecDeque::with_capacity(1); diff --git a/lib/vector-core/Cargo.toml b/lib/vector-core/Cargo.toml index 53127e58ec105..612e90d52d83e 100644 --- a/lib/vector-core/Cargo.toml +++ b/lib/vector-core/Cargo.toml @@ -22,13 +22,13 @@ http = { version = "0.2.5", default-features = false } hyper-proxy = { version = "0.9.1", default-features = false, features = ["openssl-tls"] } # pinned due to https://github.com/tkaitchuck/aHash/issues/95 indexmap = { version = "~1.6.2", default-features = false, features = ["serde"] } -internal_event = { path = "internal-event", default-features = false } +core_common = { path = "core-common", default-features = false } lazy_static = { version = "1.4.0", default-features = false } lookup = { path = "../lookup", features = ["arbitrary"] } metrics = { version = "0.17.0", default-features = false, features = ["std"]} metrics-tracing-context = { version = "0.8.0", default-features = false } metrics-util = { version = "0.10.1", default-features = false, features = ["std"] } -mlua = { version = "0.6.3", default-features = false, features = ["lua54", "send", "vendored"], optional = true } +mlua = { version = "0.6.4", default-features = false, features = ["lua54", "send", "vendored"], optional = true } no-proxy = { version = "0.3.1", default-features = false, features = ["serialize"] } once_cell = { version = "1.8", default-features = false } pest = { version = "2.1.3", default-features = false } @@ -41,6 +41,7 @@ serde = { version = "1.0.130", default-features = false, features = ["derive"] } serde_json = { version = "1.0.68", default-features = false } shared = { path = "../shared" } snafu = { version = "0.6.10", default-features = false } +substring = { version = "1.4", default-features = false } tokio = { version = "1.12.0", default-features = false } tokio-stream = { version = "0.1", default-features = false, optional = true } tokio-util = { version = "0.6.8", default-features = false, features = ["time"] } @@ -61,7 +62,7 @@ criterion = { version = "0.3.5", features = ["html_reports"] } env-test-util = "1.0.1" quickcheck = "1.0.3" proptest = "1.0" -pretty_assertions = "0.7.2" +pretty_assertions = "1.0.0" tokio-test = "0.4.2" [features] @@ -78,3 +79,7 @@ harness = false [[bench]] name = "event" harness = false + +[[bench]] +name = "path_iter" +harness = false diff --git a/lib/vector-core/benches/path_iter.rs b/lib/vector-core/benches/path_iter.rs new file mode 100644 index 0000000000000..2dad569bd5371 --- /dev/null +++ b/lib/vector-core/benches/path_iter.rs @@ -0,0 +1,54 @@ +use criterion::{ + criterion_group, criterion_main, measurement::WallTime, BenchmarkGroup, Criterion, SamplingMode, +}; +use vector_core::event::PathIter; + +fn path_iter(c: &mut Criterion) { + let mut group: BenchmarkGroup = c.benchmark_group("vector_core::event::util::log"); + group.sampling_mode(SamplingMode::Auto); + + group.bench_function("PathIter (flat)", move |b| { + b.iter_with_large_drop(|| { + let iter = PathIter::new("message"); + iter.collect::>() + }) + }); + + group.bench_function("PathIter (nested)", move |b| { + b.iter_with_large_drop(|| { + let iter = PathIter::new("obj.message"); + iter.collect::>() + }) + }); + + group.bench_function("PathIter (nested array)", move |b| { + b.iter_with_large_drop(|| { + let iter = PathIter::new("obj.messages[2]"); + iter.collect::>() + }) + }); + + group.bench_function("PathIter (nested escaped)", move |b| { + b.iter_with_large_drop(|| { + let iter = PathIter::new("obj.\\messages[]\\"); + iter.collect::>() + }) + }); +} + +criterion_group!( + name = benches; + config = Criterion::default() + // degree of noise to ignore in measurements, here 1% + .noise_threshold(0.01) + // likelihood of noise registering as difference, here 5% + .significance_level(0.05) + // likelihood of capturing the true runtime, here 95% + .confidence_level(0.95) + // total number of bootstrap resamples, higher is less noisy but slower + .nresamples(100_000) + // total samples to collect within the set measurement time + .sample_size(150); + targets = path_iter +); +criterion_main!(benches); diff --git a/lib/vector-core/buffers/Cargo.toml b/lib/vector-core/buffers/Cargo.toml index d687612c1add1..c7cda20b78594 100644 --- a/lib/vector-core/buffers/Cargo.toml +++ b/lib/vector-core/buffers/Cargo.toml @@ -19,7 +19,7 @@ tracing = { version = "0.1.28", default-features = false } [dev-dependencies] criterion = { version = "0.3", features = ["html_reports"] } -pretty_assertions = "0.7.2" +pretty_assertions = "1.0.0" quickcheck = "1.0" tempdir = "0.3" tokio-test = "0.4.2" diff --git a/lib/vector-core/build.rs b/lib/vector-core/build.rs index cee9ff5b58447..e344294e1b3d7 100644 --- a/lib/vector-core/build.rs +++ b/lib/vector-core/build.rs @@ -1,8 +1,8 @@ fn main() { println!("cargo:rerun-if-changed=proto/event.proto"); - let mut prost_build = prost_build::Config::new(); - prost_build.btree_map(&["."]); - prost_build + let _ = prost_build::Config::new() + .btree_map(&["."]) + .bytes(&["raw_bytes"]) .compile_protos(&["proto/event.proto"], &["proto/"]) .unwrap(); } diff --git a/lib/vector-core/internal-event/Cargo.toml b/lib/vector-core/core-common/Cargo.toml similarity index 82% rename from lib/vector-core/internal-event/Cargo.toml rename to lib/vector-core/core-common/Cargo.toml index 6a6a51b884520..c2cedd62684f8 100644 --- a/lib/vector-core/internal-event/Cargo.toml +++ b/lib/vector-core/core-common/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "internal_event" +name = "core_common" version = "0.1.0" authors = ["Vector Contributors "] edition = "2018" diff --git a/lib/vector-core/src/byte_size_of.rs b/lib/vector-core/core-common/src/byte_size_of.rs similarity index 100% rename from lib/vector-core/src/byte_size_of.rs rename to lib/vector-core/core-common/src/byte_size_of.rs diff --git a/lib/vector-core/core-common/src/internal_event.rs b/lib/vector-core/core-common/src/internal_event.rs new file mode 100644 index 0000000000000..9f608a42914e1 --- /dev/null +++ b/lib/vector-core/core-common/src/internal_event.rs @@ -0,0 +1,9 @@ +pub trait InternalEvent { + fn emit_logs(&self) {} + fn emit_metrics(&self) {} +} + +pub fn emit(event: &impl InternalEvent) { + event.emit_logs(); + event.emit_metrics(); +} diff --git a/lib/vector-core/core-common/src/lib.rs b/lib/vector-core/core-common/src/lib.rs new file mode 100644 index 0000000000000..e2aa05c7999a0 --- /dev/null +++ b/lib/vector-core/core-common/src/lib.rs @@ -0,0 +1,10 @@ +//! The Vector Core common library +//! +//! This library includes common functionality relied upon by vector-core +//! and core-related crates (e.g. buffers). + +#![deny(clippy::all)] +#![deny(clippy::pedantic)] + +pub mod byte_size_of; +pub mod internal_event; diff --git a/lib/vector-core/internal-event/src/lib.rs b/lib/vector-core/internal-event/src/lib.rs deleted file mode 100644 index f734bed59f6eb..0000000000000 --- a/lib/vector-core/internal-event/src/lib.rs +++ /dev/null @@ -1,17 +0,0 @@ -//! The Vector Core Internal Event library -//! -//! This library powers the Event-driven Observability pattern (RFC 2064) that -//! vector uses for internal instrumentation - -#![deny(clippy::all)] -#![deny(clippy::pedantic)] - -pub trait InternalEvent { - fn emit_logs(&self) {} - fn emit_metrics(&self) {} -} - -pub fn emit(event: &impl InternalEvent) { - event.emit_logs(); - event.emit_metrics(); -} diff --git a/lib/vector-core/src/event/proto.rs b/lib/vector-core/src/event/proto.rs index 047d64797f8d1..33e926b8b4b89 100644 --- a/lib/vector-core/src/event/proto.rs +++ b/lib/vector-core/src/event/proto.rs @@ -237,7 +237,7 @@ impl From for WithMetadata { fn decode_value(input: Value) -> Option { match input.kind { - Some(value::Kind::RawBytes(data)) => Some(event::Value::Bytes(data.into())), + Some(value::Kind::RawBytes(data)) => Some(event::Value::Bytes(data)), Some(value::Kind::Timestamp(ts)) => Some(event::Value::Timestamp( chrono::Utc.timestamp(ts.seconds, ts.nanos as u32), )), @@ -281,7 +281,7 @@ fn decode_array(items: Vec) -> Option { fn encode_value(value: event::Value) -> Value { Value { kind: match value { - event::Value::Bytes(b) => Some(value::Kind::RawBytes(b.to_vec())), + event::Value::Bytes(b) => Some(value::Kind::RawBytes(b)), event::Value::Timestamp(ts) => Some(value::Kind::Timestamp(prost_types::Timestamp { seconds: ts.timestamp(), nanos: ts.timestamp_subsec_nanos() as i32, diff --git a/lib/vector-core/src/event/util/log/contains.rs b/lib/vector-core/src/event/util/log/contains.rs index d78895b673df1..853d1f4abe807 100644 --- a/lib/vector-core/src/event/util/log/contains.rs +++ b/lib/vector-core/src/event/util/log/contains.rs @@ -6,7 +6,7 @@ pub fn contains(fields: &BTreeMap, path: &str) -> bool { let mut path_iter = PathIter::new(path); match path_iter.next() { - Some(PathComponent::Key(key)) => match fields.get(&key) { + Some(PathComponent::Key(key)) => match fields.get(key.as_ref()) { None => false, Some(value) => value_contains(value, path_iter), }, @@ -14,14 +14,14 @@ pub fn contains(fields: &BTreeMap, path: &str) -> bool { } } -fn value_contains(mut value: &Value, mut path_iter: I) -> bool +fn value_contains<'a, I>(mut value: &Value, mut path_iter: I) -> bool where - I: Iterator, + I: Iterator>, { loop { value = match (path_iter.next(), value) { (None, _) => return true, - (Some(PathComponent::Key(ref key)), Value::Map(map)) => match map.get(key) { + (Some(PathComponent::Key(key)), Value::Map(map)) => match map.get(key.as_ref()) { None => return false, Some(nested_value) => nested_value, }, diff --git a/lib/vector-core/src/event/util/log/get.rs b/lib/vector-core/src/event/util/log/get.rs index 98a9b423cccc8..d346ecb79af5c 100644 --- a/lib/vector-core/src/event/util/log/get.rs +++ b/lib/vector-core/src/event/util/log/get.rs @@ -6,7 +6,7 @@ pub fn get<'a>(fields: &'a BTreeMap, path: &str) -> Option<&'a Va let mut path_iter = PathIter::new(path); match path_iter.next() { - Some(PathComponent::Key(key)) => match fields.get(&key) { + Some(PathComponent::Key(key)) => match fields.get(key.as_ref()) { None => None, Some(value) => get_value(value, path_iter), }, @@ -15,14 +15,14 @@ pub fn get<'a>(fields: &'a BTreeMap, path: &str) -> Option<&'a Va } /// Returns a reference to a field value specified by a path iter. -pub fn get_value(mut value: &Value, mut path_iter: I) -> Option<&Value> +pub fn get_value<'a, I>(mut value: &Value, mut path_iter: I) -> Option<&Value> where - I: Iterator, + I: Iterator>, { loop { match (path_iter.next(), value) { (None, _) => return Some(value), - (Some(PathComponent::Key(ref key)), Value::Map(map)) => match map.get(key) { + (Some(PathComponent::Key(key)), Value::Map(map)) => match map.get(key.as_ref()) { None => return None, Some(nested_value) => { value = nested_value; diff --git a/lib/vector-core/src/event/util/log/get_mut.rs b/lib/vector-core/src/event/util/log/get_mut.rs index 08f3a1c727171..cae2304738a75 100644 --- a/lib/vector-core/src/event/util/log/get_mut.rs +++ b/lib/vector-core/src/event/util/log/get_mut.rs @@ -6,7 +6,7 @@ pub fn get_mut<'a>(fields: &'a mut BTreeMap, path: &str) -> Optio let mut path_iter = PathIter::new(path); match path_iter.next() { - Some(PathComponent::Key(key)) => match fields.get_mut(&key) { + Some(PathComponent::Key(key)) => match fields.get_mut(key.as_ref()) { None => None, Some(value) => get_mut_value(value, path_iter), }, @@ -14,14 +14,14 @@ pub fn get_mut<'a>(fields: &'a mut BTreeMap, path: &str) -> Optio } } -fn get_mut_value(mut value: &mut Value, mut path_iter: I) -> Option<&mut Value> +fn get_mut_value<'a, I>(mut value: &mut Value, mut path_iter: I) -> Option<&mut Value> where - I: Iterator, + I: Iterator>, { loop { match (path_iter.next(), value) { (None, value) => return Some(value), - (Some(PathComponent::Key(ref key)), Value::Map(map)) => match map.get_mut(key) { + (Some(PathComponent::Key(key)), Value::Map(map)) => match map.get_mut(key.as_ref()) { None => return None, Some(nested_value) => { value = nested_value; diff --git a/lib/vector-core/src/event/util/log/insert.rs b/lib/vector-core/src/event/util/log/insert.rs index b464a9da5e49d..f4d25ecd6fe62 100644 --- a/lib/vector-core/src/event/util/log/insert.rs +++ b/lib/vector-core/src/event/util/log/insert.rs @@ -14,45 +14,45 @@ pub fn insert_path( map_insert(fields, path.into_iter().peekable(), value) } -fn map_insert( +fn map_insert<'a, I>( fields: &mut BTreeMap, mut path_iter: Peekable, value: Value, ) -> Option where - I: Iterator, + I: Iterator>, { match (path_iter.next(), path_iter.peek()) { - (Some(PathComponent::Key(current)), None) => fields.insert(current, value), + (Some(PathComponent::Key(current)), None) => fields.insert(current.into_owned(), value), (Some(PathComponent::Key(current)), Some(PathComponent::Key(_))) => { - if let Some(Value::Map(map)) = fields.get_mut(¤t) { + if let Some(Value::Map(map)) = fields.get_mut(current.as_ref()) { map_insert(map, path_iter, value) } else { let mut map = BTreeMap::new(); map_insert(&mut map, path_iter, value); - fields.insert(current, Value::Map(map)) + fields.insert(current.into_owned(), Value::Map(map)) } } (Some(PathComponent::Key(current)), Some(&PathComponent::Index(next))) => { - if let Some(Value::Array(array)) = fields.get_mut(¤t) { + if let Some(Value::Array(array)) = fields.get_mut(current.as_ref()) { array_insert(array, path_iter, value) } else { let mut array = Vec::with_capacity(next + 1); array_insert(&mut array, path_iter, value); - fields.insert(current, Value::Array(array)) + fields.insert(current.into_owned(), Value::Array(array)) } } _ => None, } } -fn array_insert( +fn array_insert<'a, I>( values: &mut Vec, mut path_iter: Peekable, value: Value, ) -> Option where - I: Iterator, + I: Iterator>, { match (path_iter.next(), path_iter.peek()) { (Some(PathComponent::Index(current)), None) => { diff --git a/lib/vector-core/src/event/util/log/path_iter.rs b/lib/vector-core/src/event/util/log/path_iter.rs index 682bfb19722f8..191dbeaecce7c 100644 --- a/lib/vector-core/src/event/util/log/path_iter.rs +++ b/lib/vector-core/src/event/util/log/path_iter.rs @@ -1,26 +1,34 @@ -use regex::Regex; use serde::{Deserialize, Serialize}; -use std::{mem, str::Chars}; - -thread_local! { - pub static FAST_RE: Regex = Regex::new(r"\A\w+(\.\w+)*\z").unwrap(); -} +use std::{borrow::Cow, mem, str::Chars}; +use substring::Substring; #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] -pub enum PathComponent { +pub enum PathComponent<'a> { /// For example, in `a.b[0].c[2]` the keys are "a", "b", and "c". - Key(String), + Key(Cow<'a, str>), /// For example, in `a.b[0].c[2]` the indexes are 0 and 2. Index(usize), /// Indicates that a parsing error occurred. Invalid, } +impl<'a> PathComponent<'a> { + pub fn into_static(self) -> PathComponent<'static> { + match self { + PathComponent::Key(k) => PathComponent::<'static>::Key(k.into_owned().into()), + PathComponent::Index(u) => PathComponent::<'static>::Index(u), + PathComponent::Invalid => PathComponent::Invalid, + } + } +} + /// Iterator over components of paths specified in form `a.b[0].c[2]`. pub struct PathIter<'a> { path: &'a str, chars: Chars<'a>, - state: PathIterState<'a>, + state: State, + temp: String, + pos: usize, } impl<'a> PathIter<'a> { @@ -30,18 +38,17 @@ impl<'a> PathIter<'a> { path, chars: path.chars(), state: Default::default(), + temp: String::default(), + pos: 0, } } } -/// The parsing is implemented using a state machine. The idea of using Rust -/// enums to model states is taken from [Pretty State Machine Patterns in -/// Rust](https://hoverbear.org/blog/rust-state-machine-pattern/). -enum PathIterState<'a> { +enum State { Start, - Fast(std::str::Split<'a, char>), - Key(String), - KeyEscape(String), // escape mode inside keys entered into after `\` character + Key(usize), + Escape, + EscapedKey, Index(usize), Dot, OpeningBracket, @@ -50,30 +57,16 @@ enum PathIterState<'a> { Invalid, } -impl PathIterState<'_> { - fn is_start(&self) -> bool { - matches!(self, Self::Start) - } -} - -impl<'a> Default for PathIterState<'a> { - fn default() -> PathIterState<'a> { - PathIterState::Start +impl Default for State { + fn default() -> State { + State::Start } } impl<'a> Iterator for PathIter<'a> { - type Item = PathComponent; + type Item = PathComponent<'a>; fn next(&mut self) -> Option { - use PathIterState::{ - ClosingBracket, Dot, End, Fast, Index, Invalid, Key, KeyEscape, OpeningBracket, Start, - }; - - if self.state.is_start() && FAST_RE.with(|re| re.is_match(self.path)) { - self.state = Fast(self.path.split('.')); - } - let mut res = None; loop { if let Some(res) = res { @@ -82,87 +75,102 @@ impl<'a> Iterator for PathIter<'a> { let c = self.chars.next(); self.state = match mem::take(&mut self.state) { - Start => match c { - Some('.') | Some('[') | Some(']') | None => Invalid, - Some('\\') => KeyEscape(String::new()), - Some(c) => Key(c.to_string()), + State::Start => match c { + Some('.') | Some('[') | Some(']') | None => State::Invalid, + Some('\\') => State::Escape, + Some(_) => State::Key(self.pos), }, - Key(mut s) => match c { - Some('.') => { - res = Some(Some(PathComponent::Key(s))); - Dot + State::Key(start) => match c { + Some('.') | Some('[') | None => { + res = Some(Some(PathComponent::Key( + self.path.substring(start, self.pos).into(), + ))); + char_to_state(c) } - Some('[') => { - res = Some(Some(PathComponent::Key(s))); - OpeningBracket + Some(']') => State::Invalid, + Some('\\') => { + self.temp.push_str(self.path.substring(start, self.pos)); + State::Escape } - Some(']') => Invalid, - Some('\\') => KeyEscape(s), - None => { - res = Some(Some(PathComponent::Key(s))); - End + Some(_) => State::Key(start), + }, + State::EscapedKey => match c { + Some('.') | Some('[') | None => { + res = Some(Some(PathComponent::Key( + std::mem::take(&mut self.temp).into(), + ))); + char_to_state(c) } + Some(']') => State::Invalid, + Some('\\') => State::Escape, Some(c) => { - s.push(c); - Key(s) + self.temp.push(c); + State::EscapedKey } }, - KeyEscape(mut s) => match c { + State::Escape => match c { Some(c) if c == '.' || c == '[' || c == ']' || c == '\\' => { - s.push(c); - Key(s) + self.temp.push(c); + State::EscapedKey } - _ => Invalid, + _ => State::Invalid, }, - Index(i) => match c { + State::Index(i) => match c { Some(c) if ('0'..='9').contains(&c) => { - Index(10 * i + (c as usize - '0' as usize)) + State::Index(10 * i + (c as usize - '0' as usize)) } Some(']') => { res = Some(Some(PathComponent::Index(i))); - ClosingBracket + State::ClosingBracket } - _ => Invalid, + _ => State::Invalid, }, - Dot => match c { - Some('.') | Some('[') | Some(']') | None => Invalid, - Some('\\') => KeyEscape(String::new()), - Some(c) => Key(c.to_string()), + State::Dot => match c { + Some('.') | Some('[') | Some(']') | None => State::Invalid, + Some('\\') => State::Escape, + Some(_) => State::Key(self.pos), }, - OpeningBracket => match c { - Some(c) if ('0'..='9').contains(&c) => Index(c as usize - '0' as usize), - _ => Invalid, + State::OpeningBracket => match c { + Some(c) if ('0'..='9').contains(&c) => State::Index(c as usize - '0' as usize), + _ => State::Invalid, }, - ClosingBracket => match c { - Some('.') => Dot, - Some('[') => OpeningBracket, - None => End, - _ => Invalid, + State::ClosingBracket => match c { + Some('.') | Some('[') | None => char_to_state(c), + _ => State::Invalid, }, - End => { + State::End => { res = Some(None); - End + State::End } - Invalid => { + State::Invalid => { res = Some(Some(PathComponent::Invalid)); - End - } - Fast(mut iter) => { - res = Some(iter.next().map(|s| PathComponent::Key(s.to_string()))); - Fast(iter) + State::End } - } + }; + self.pos += 1; } } } +#[inline] +fn char_to_state(c: Option) -> State { + match c { + Some('.') => State::Dot, + Some('[') => State::OpeningBracket, + Some(']') => State::ClosingBracket, + Some('\\') => State::Escape, + None => State::End, + _ => State::Invalid, + } +} + #[cfg(test)] mod test { use super::*; #[test] fn path_iter_elementary() { - let actual: Vec<_> = PathIter::new(&"squirrel".to_string()).collect(); + let actual: Vec<_> = PathIter::new("squirrel").collect(); let expected = vec![PathComponent::Key("squirrel".into())]; assert_eq!(actual, expected); } diff --git a/lib/vector-core/src/event/util/log/remove.rs b/lib/vector-core/src/event/util/log/remove.rs index 511ce0debbe83..46a0b57f611c8 100644 --- a/lib/vector-core/src/event/util/log/remove.rs +++ b/lib/vector-core/src/event/util/log/remove.rs @@ -43,13 +43,13 @@ fn remove_map( ) -> Option<(Value, bool)> { match path.next()? { PathComponent::Key(key) => match path.peek() { - None => fields.remove(&key).map(|v| (v, fields.is_empty())), + None => fields.remove(key.as_ref()).map(|v| (v, fields.is_empty())), Some(_) => { let (result, empty) = fields - .get_mut(&key) + .get_mut(key.as_ref()) .and_then(|value| remove_rec(value, path, prune))?; if prune && empty { - fields.remove(&key); + fields.remove(key.as_ref()); } Some((result, fields.is_empty())) } diff --git a/lib/vector-core/src/lib.rs b/lib/vector-core/src/lib.rs index a5b4752062d93..1e0fd2b1a9706 100644 --- a/lib/vector-core/src/lib.rs +++ b/lib/vector-core/src/lib.rs @@ -33,14 +33,13 @@ pub mod source; mod test_util; pub mod transform; pub use buffers; -mod byte_size_of; pub mod partition; pub mod serde; pub mod stream; pub mod time; -pub use internal_event; +pub use core_common::byte_size_of::ByteSizeOf; +pub use core_common::internal_event; -pub use byte_size_of::ByteSizeOf; use std::path::PathBuf; #[macro_use] diff --git a/lib/vector-core/src/metrics/mod.rs b/lib/vector-core/src/metrics/mod.rs index 552391eb2f89c..876f770c0e12e 100644 --- a/lib/vector-core/src/metrics/mod.rs +++ b/lib/vector-core/src/metrics/mod.rs @@ -10,10 +10,24 @@ use crate::metrics::label_filter::VectorLabelFilter; use crate::metrics::recorder::VectorRecorder; use metrics::Key; use metrics_tracing_context::TracingContextLayer; -use metrics_util::{layers::Layer, Generational, NotTracked, Registry}; +use metrics_util::{layers::Layer, Generational, NotTracked}; use once_cell::sync::OnceCell; +use snafu::Snafu; + +pub(self) type Registry = metrics_util::Registry>; + +type Result = std::result::Result; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Snafu)] +pub enum Error { + #[snafu(display("Recorder already initialized."))] + AlreadyInitialized, + #[snafu(display("Metrics system was not initialized."))] + NotInitialized, +} static CONTROLLER: OnceCell = OnceCell::new(); + // Cardinality counter parameters, expose the internal metrics registry // cardinality. Useful for the end users to help understand the characteristics // of their environment and how vectors acts in it. @@ -22,7 +36,7 @@ static CARDINALITY_KEY: Key = Key::from_static_name(CARDINALITY_KEY_NAME); /// Controller allows capturing metric snapshots. pub struct Controller { - registry: Arc>>, + recorder: VectorRecorder, } fn metrics_enabled() -> bool { @@ -33,26 +47,16 @@ fn tracing_context_layer_enabled() -> bool { !matches!(std::env::var("DISABLE_INTERNAL_METRICS_TRACING_INTEGRATION"), Ok(x) if x == "true") } -/// Initialize the metrics sub-system -/// -/// # Errors -/// -/// This function will error if it is called multiple times. -pub fn init() -> crate::Result<()> { +fn init(recorder: VectorRecorder) -> Result<()> { // An escape hatch to allow disabing internal metrics core. May be used for // performance reasons. This is a hidden and undocumented functionality. if !metrics_enabled() { metrics::set_boxed_recorder(Box::new(metrics::NoopRecorder)) - .map_err(|_| "recorder already initialized")?; + .map_err(|_| Error::AlreadyInitialized)?; info!(message = "Internal metrics core is disabled."); return Ok(()); } - //// - //// Prepare the registry - //// - let registry = Arc::new(Registry::>::untracked()); - //// //// Prepare the controller //// @@ -62,11 +66,11 @@ pub fn init() -> crate::Result<()> { // interested in these metrics can grab copies. See `capture_metrics` and // its callers for an example. let controller = Controller { - registry: registry.clone(), + recorder: recorder.clone(), }; CONTROLLER .set(controller) - .map_err(|_| "controller already initialized")?; + .map_err(|_| Error::AlreadyInitialized)?; //// //// Initialize the recorder. @@ -75,7 +79,6 @@ pub fn init() -> crate::Result<()> { // The recorder is the interface between metrics-rs and our registry. In our // case it doesn't _do_ much other than shepherd into the registry and // update the cardinality counter, see above, as needed. - let recorder = VectorRecorder::new(registry); let recorder: Box = if tracing_context_layer_enabled() { // Apply a layer to capture tracing span fields as labels. Box::new(TracingContextLayer::new(VectorLabelFilter).layer(recorder)) @@ -85,49 +88,67 @@ pub fn init() -> crate::Result<()> { // This where we combine metrics-rs and our registry. We box it to avoid // having to fiddle with statics ourselves. - metrics::set_boxed_recorder(recorder).map_err(|_| "recorder already initialized")?; - - Ok(()) + metrics::set_boxed_recorder(recorder).map_err(|_| Error::AlreadyInitialized) } -/// Clear all metrics from the registry. -pub fn reset(controller: &Controller) { - controller.registry.clear(); +/// Initialize the default metrics sub-system +/// +/// # Errors +/// +/// This function will error if it is called multiple times. +pub fn init_global() -> Result<()> { + init(VectorRecorder::new_global()) } -/// Get a handle to the globally registered controller, if it's initialized. +/// Initialize the thread-local metrics sub-system /// /// # Errors /// -/// This function will fail if the metrics subsystem has not been correctly -/// initialized. -pub fn get_controller() -> crate::Result<&'static Controller> { - CONTROLLER - .get() - .ok_or_else(|| "metrics system not initialized".into()) +/// This function will error if it is called multiple times. +pub fn init_test() -> Result<()> { + init(VectorRecorder::new_test()) } -/// Take a snapshot of all gathered metrics and expose them as metric -/// [`Event`](crate::event::Event)s. -pub fn capture_metrics(controller: &Controller) -> impl Iterator { - let mut metrics: Vec = Vec::new(); - controller.registry.visit(|_kind, (key, handle)| { - metrics.push(Metric::from_metric_kv(key, handle.get_inner())); - }); - - // Add alias `events_processed_total` for `events_out_total`. - for i in 0..metrics.len() { - let metric = &metrics[i]; - if metric.name() == "events_out_total" { - let alias = metric.clone().with_name("processed_events_total"); - metrics.push(alias); - } +impl Controller { + /// Clear all metrics from the registry. + pub fn reset(&self) { + self.recorder.with_registry(Registry::clear); } - let handle = Handle::Counter(Arc::new(Counter::with_count(metrics.len() as u64 + 1))); - metrics.push(Metric::from_metric_kv(&CARDINALITY_KEY, &handle)); + /// Get a handle to the globally registered controller, if it's initialized. + /// + /// # Errors + /// + /// This function will fail if the metrics subsystem has not been correctly + /// initialized. + pub fn get() -> Result<&'static Self> { + CONTROLLER.get().ok_or(Error::NotInitialized) + } + + /// Take a snapshot of all gathered metrics and expose them as metric + /// [`Event`](crate::event::Event)s. + pub fn capture_metrics(&self) -> impl Iterator { + let mut metrics: Vec = Vec::new(); + self.recorder.with_registry(|registry| { + registry.visit(|_kind, (key, handle)| { + metrics.push(Metric::from_metric_kv(key, handle.get_inner())); + }); + }); + + // Add alias `events_processed_total` for `events_out_total`. + for i in 0..metrics.len() { + let metric = &metrics[i]; + if metric.name() == "events_out_total" { + let alias = metric.clone().with_name("processed_events_total"); + metrics.push(alias); + } + } + + let handle = Handle::Counter(Arc::new(Counter::with_count(metrics.len() as u64 + 1))); + metrics.push(Metric::from_metric_kv(&CARDINALITY_KEY, &handle)); - metrics.into_iter() + metrics.into_iter() + } } #[macro_export] diff --git a/lib/vector-core/src/metrics/recorder.rs b/lib/vector-core/src/metrics/recorder.rs index ea14f5c0bfcfc..d1ab01b3b7323 100644 --- a/lib/vector-core/src/metrics/recorder.rs +++ b/lib/vector-core/src/metrics/recorder.rs @@ -1,30 +1,50 @@ -use std::sync::Arc; - +use super::Registry; use crate::metrics::handle::Handle; use metrics::{GaugeValue, Key, Recorder, Unit}; -use metrics_util::{MetricKind, NotTracked, Registry}; +use metrics_util::MetricKind; +use once_cell::unsync::OnceCell; +use std::sync::Arc; + +thread_local!(static LOCAL_REGISTRY: OnceCell=OnceCell::new()); /// [`VectorRecorder`] is a [`metrics::Recorder`] implementation that's suitable /// for the advanced usage that we have in Vector. -pub(crate) struct VectorRecorder { - registry: Arc>>, +#[derive(Clone)] +pub(super) enum VectorRecorder { + Global(Arc), + ThreadLocal, } impl VectorRecorder { - pub fn new(registry: Arc>>) -> Self { - Self { registry } + pub(super) fn new_global() -> Self { + let registry = Arc::new(Registry::untracked()); + Self::Global(registry) + } + + pub(super) fn new_test() -> Self { + Self::with_thread_local(Registry::clear); + Self::ThreadLocal + } + + pub(super) fn with_registry(&self, doit: impl FnOnce(&Registry) -> T) -> T { + match &self { + Self::Global(registry) => doit(registry), + Self::ThreadLocal => Self::with_thread_local(doit), + } + } + + fn with_thread_local(doit: impl FnOnce(&Registry) -> T) -> T { + LOCAL_REGISTRY.with(|oc| doit(oc.get_or_init(Registry::untracked))) } } impl Recorder for VectorRecorder { fn register_counter(&self, key: &Key, _unit: Option, _description: Option<&'static str>) { - self.registry - .op(MetricKind::Counter, key, |_| {}, Handle::counter); + self.with_registry(|r| r.op(MetricKind::Counter, key, |_| {}, Handle::counter)); } fn register_gauge(&self, key: &Key, _unit: Option, _description: Option<&'static str>) { - self.registry - .op(MetricKind::Gauge, key, |_| {}, Handle::gauge); + self.with_registry(|r| r.op(MetricKind::Gauge, key, |_| {}, Handle::gauge)); } fn register_histogram( @@ -33,34 +53,39 @@ impl Recorder for VectorRecorder { _unit: Option, _description: Option<&'static str>, ) { - self.registry - .op(MetricKind::Histogram, key, |_| {}, Handle::histogram); + self.with_registry(|r| r.op(MetricKind::Histogram, key, |_| {}, Handle::histogram)); } fn increment_counter(&self, key: &Key, value: u64) { - self.registry.op( - MetricKind::Counter, - key, - |handle| handle.increment_counter(value), - Handle::counter, - ); + self.with_registry(|r| { + r.op( + MetricKind::Counter, + key, + |handle| handle.increment_counter(value), + Handle::counter, + ); + }); } fn update_gauge(&self, key: &Key, value: GaugeValue) { - self.registry.op( - MetricKind::Gauge, - key, - |handle| handle.update_gauge(value), - Handle::gauge, - ); + self.with_registry(|r| { + r.op( + MetricKind::Gauge, + key, + |handle| handle.update_gauge(value), + Handle::gauge, + ); + }); } fn record_histogram(&self, key: &Key, value: f64) { - self.registry.op( - MetricKind::Histogram, - key, - |handle| handle.record_histogram(value), - Handle::histogram, - ); + self.with_registry(|r| { + r.op( + MetricKind::Histogram, + key, + |handle| handle.record_histogram(value), + Handle::histogram, + ); + }); } } diff --git a/lib/vector-core/src/metrics/tests/mod.rs b/lib/vector-core/src/metrics/tests/mod.rs index 9507e3b970450..a2fc653b299ee 100644 --- a/lib/vector-core/src/metrics/tests/mod.rs +++ b/lib/vector-core/src/metrics/tests/mod.rs @@ -23,7 +23,9 @@ fn test_labels_injection() { counter!("labels_injected_total", 1); - let metric = super::capture_metrics(super::get_controller().unwrap()) + let metric = super::Controller::get() + .unwrap() + .capture_metrics() .map(|e| e.into_metric()) .find(|metric| metric.name() == "labels_injected_total") .unwrap(); @@ -46,7 +48,9 @@ fn test_cardinality_metric() { let _ = super::init(); let capture_value = || { - let metric = super::capture_metrics(super::get_controller().unwrap()) + let metric = super::Controller::get() + .unwrap() + .capture_metrics() .map(Event::into_metric) .find(|metric| metric.name() == super::CARDINALITY_KEY_NAME) .unwrap(); diff --git a/lib/vector-core/src/sink.rs b/lib/vector-core/src/sink.rs index c8a6ea8540626..a991f11e5bde9 100644 --- a/lib/vector-core/src/sink.rs +++ b/lib/vector-core/src/sink.rs @@ -15,13 +15,13 @@ impl VectorSink { /// # Errors /// /// It is unclear under what conditions this function will error. - pub async fn run(mut self, input: S) -> Result<(), ()> + pub async fn run(self, input: S) -> Result<(), ()> where S: Stream + Send, { match self { Self::Sink(sink) => input.map(Ok).forward(sink).await, - Self::Stream(ref mut s) => s.run(Box::pin(input)).await, + Self::Stream(s) => s.run(Box::pin(input)).await, } } @@ -48,5 +48,5 @@ impl fmt::Debug for VectorSink { #[async_trait] pub trait StreamSink { - async fn run(&mut self, input: BoxStream<'_, Event>) -> Result<(), ()>; + async fn run(self: Box, input: BoxStream<'_, Event>) -> Result<(), ()>; } diff --git a/lib/vrl/compiler/src/expression/function_call.rs b/lib/vrl/compiler/src/expression/function_call.rs index d140601561d07..11ac2567e1a8c 100644 --- a/lib/vrl/compiler/src/expression/function_call.rs +++ b/lib/vrl/compiler/src/expression/function_call.rs @@ -162,10 +162,10 @@ impl FunctionCall { }) })?; - let compile_info = FunctionCompileContext { span: call_span }; + let compile_ctx = FunctionCompileContext { span: call_span }; let mut expr = function - .compile(state, &compile_info, list) + .compile(state, &compile_ctx, list) .map_err(|error| Error::Compilation { call_span, error })?; // Asking for an infallible function to abort on error makes no sense. diff --git a/lib/vrl/compiler/src/state.rs b/lib/vrl/compiler/src/state.rs index be7633424b6f0..bb2fb424f27bb 100644 --- a/lib/vrl/compiler/src/state.rs +++ b/lib/vrl/compiler/src/state.rs @@ -126,6 +126,14 @@ pub struct Runtime { } impl Runtime { + pub fn is_empty(&self) -> bool { + self.variables.is_empty() + } + + pub fn clear(&mut self) { + self.variables.clear(); + } + pub fn variable(&self, ident: &Ident) -> Option<&Value> { self.variables.get(ident) } diff --git a/lib/vrl/core/src/runtime.rs b/lib/vrl/core/src/runtime.rs index 9b6946f49c628..843a00e6523cf 100644 --- a/lib/vrl/core/src/runtime.rs +++ b/lib/vrl/core/src/runtime.rs @@ -9,6 +9,7 @@ pub type RuntimeResult = Result; #[derive(Debug, Default)] pub struct Runtime { state: state::Runtime, + root_lookup: LookupBuf, } /// The error raised if the runtime is terminated. @@ -42,7 +43,18 @@ impl Error for Terminate { impl Runtime { pub fn new(state: state::Runtime) -> Self { - Self { state } + Self { + state, + root_lookup: LookupBuf::root(), + } + } + + pub fn is_empty(&self) -> bool { + self.state.is_empty() + } + + pub fn clear(&mut self) { + self.state.clear(); } /// Given the provided [`Target`], resolve the provided [`Program`] to @@ -57,7 +69,7 @@ impl Runtime { // // VRL technically supports any `Value` object as the root, but the // assumption is people are expected to use it to query objects. - match target.get(&LookupBuf::root()) { + match target.get(&self.root_lookup) { Ok(Some(Value::Object(_))) => {} Ok(Some(value)) => { return Err(Terminate::Error( diff --git a/lib/vrl/parser/src/parser.lalrpop b/lib/vrl/parser/src/parser.lalrpop index 4e8bb7f7164d7..b03e24ce67c04 100644 --- a/lib/vrl/parser/src/parser.lalrpop +++ b/lib/vrl/parser/src/parser.lalrpop @@ -423,10 +423,10 @@ IfStatement: IfStatement = > NonterminalNewline* > - )*> + )*> >)?> => { let mut alternative = alternative; - + alternatives.reverse(); for Node { span, mut node } in alternatives { node.alternative = alternative; let node = Node::new(span, Expr::IfStatement(Node::new(span, node))); diff --git a/lib/vrl/stdlib/Cargo.toml b/lib/vrl/stdlib/Cargo.toml index 26d5bd886c046..7d7d362ebc5a0 100644 --- a/lib/vrl/stdlib/Cargo.toml +++ b/lib/vrl/stdlib/Cargo.toml @@ -8,7 +8,7 @@ license = "MPL-2.0" [dependencies] vrl = { path = "../core" } -lookup = { path = "../../lookup" } +lookup_lib = {package = "lookup", path = "../../lookup" } datadog-search-syntax = { path = "../../datadog/search-syntax", optional = true } base64 = { version = "0.13", optional = true } @@ -16,6 +16,7 @@ bytes = { version = "1.1.0", optional = true } chrono = { version = "0.4", optional = true } cidr-utils = { version = "0.5", optional = true } csv = { version = "1.1", optional = true } +dns-lookup = { version = "1.0.8", optional = true } grok = { version = "1", optional = true } hex = { version = "0.4", optional = true } hostname = { version = "0.3", optional = true } @@ -75,6 +76,7 @@ default = [ "format_int", "format_number", "format_timestamp", + "get", "get_env_var", "get_hostname", "includes", @@ -134,8 +136,11 @@ default = [ "parse_xml", "push", "redact", + "remove", "replace", + "reverse_dns", "round", + "set", "sha1", "sha2", "sha3", @@ -145,6 +150,7 @@ default = [ "string", "strip_ansi_escape_codes", "strip_whitespace", + "tally", "tag_types_externally", "timestamp", "to_bool", @@ -191,6 +197,7 @@ floor = [] format_int = [] format_number = ["rust_decimal"] format_timestamp = ["chrono"] +get = [] get_env_var = [] get_hostname = ["hostname"] includes = [] @@ -250,8 +257,11 @@ parse_user_agent = ["woothee","uaparser","lazy_static"] parse_xml = ["roxmltree", "lazy_static", "regex"] push = [] redact = ["lazy_static", "regex"] +remove = ["shared/btreemap"] replace = [] +reverse_dns = ["dns-lookup"] round = [] +set = ["shared/btreemap"] sha1 = ["sha-1", "hex"] sha2 = ["sha-2", "hex"] sha3 = ["sha-3", "hex"] @@ -262,12 +272,13 @@ string = [] strip_ansi_escape_codes = ["bytes", "strip-ansi-escapes"] strip_whitespace = [] tag_types_externally = ["shared/btreemap"] +tally = [] timestamp = [] to_bool = ["shared/conversion"] to_float = ["shared/conversion"] to_int = ["shared/conversion"] -to_string = ["chrono"] to_regex = ["tracing", "regex"] +to_string = ["chrono"] to_syslog_facility = [] to_syslog_level = [] to_syslog_severity = [] diff --git a/lib/vrl/stdlib/benches/benches.rs b/lib/vrl/stdlib/benches/benches.rs index c5ede97a2e4db..a429c1ce7df36 100644 --- a/lib/vrl/stdlib/benches/benches.rs +++ b/lib/vrl/stdlib/benches/benches.rs @@ -33,6 +33,7 @@ criterion_group!( format_int, format_number, format_timestamp, + get, get_env_var, get_hostname, includes, @@ -89,8 +90,11 @@ criterion_group!( parse_xml, push, redact, + remove, replace, + reverse_dns, round, + set, sha1, sha2, sha3, @@ -99,6 +103,7 @@ criterion_group!( starts_with, strip_ansi_escape_codes, strip_whitespace, + tally, to_bool, to_float, to_int, @@ -461,6 +466,25 @@ bench_function! { } } +bench_function! { + set => vrl_stdlib::Set; + + single { + args: func_args![value: value!({ "foo": "bar" }), path: vec!["baz"], data: true], + want: Ok(value!({ "foo": "bar", "baz": true })), + } + + nested { + args: func_args![value: value!({ "foo": { "bar": "baz" } }), path: vec!["foo", "bar", "qux"], data: 42], + want: Ok(value!({ "foo": { "bar": { "qux": 42 } } })), + } + + indexing { + args: func_args![value: value!([0, 42, 91]), path: vec![3], data: 1], + want: Ok(value!([0, 42, 91, 1])), + } +} + bench_function! { ip_aton => vrl_stdlib::IpAton; @@ -733,6 +757,25 @@ bench_function! { } } +bench_function! { + get => vrl_stdlib::Get; + + single { + args: func_args![value: value!({ "foo": "bar" }), path: vec!["foo"]], + want: Ok("bar"), + } + + nested { + args: func_args![value: value!({ "foo": { "bar": "baz" } }), path: vec!["foo", "bar"]], + want: Ok("baz"), + } + + indexing { + args: func_args![value: value!([0, 42, 91]), path: vec![-2]], + want: Ok(42), + } +} + bench_function! { r#match => vrl_stdlib::Match; @@ -1712,6 +1755,25 @@ bench_function! { } } +bench_function! { + remove => vrl_stdlib::Remove; + + single { + args: func_args![value: value!({ "foo": "bar", "baz": true }), path: vec!["foo"]], + want: Ok(value!({ "baz": true })), + } + + nested { + args: func_args![value: value!({ "foo": { "bar": "baz" } }), path: vec!["foo", "bar"]], + want: Ok(value!({ "foo": {} })), + } + + indexing { + args: func_args![value: value!([0, 42, 91]), path: vec![-2]], + want: Ok(vec![0, 91]), + } +} + bench_function! { replace => vrl_stdlib::Replace; @@ -1734,6 +1796,15 @@ bench_function! { } } +bench_function! { + reverse_dns => vrl_stdlib::ReverseDns; + + google { + args: func_args![value: value!("8.8.8.8")], + want: Ok(value!("dns.google")), + } +} + bench_function! { round => vrl_stdlib::Round; @@ -1905,6 +1976,18 @@ bench_function! { } } +bench_function! { + tally => vrl_stdlib::Tally; + + default { + args: func_args![ + value: value!(["bar", "foo", "baz", "foo"]), + ], + want: Ok(value!({"bar": 1, "foo": 2, "baz": 1})), + } + +} + bench_function! { to_bool => vrl_stdlib::ToBool; diff --git a/lib/vrl/stdlib/src/append.rs b/lib/vrl/stdlib/src/append.rs index 3edea3d9a78df..e94fc2a9596aa 100644 --- a/lib/vrl/stdlib/src/append.rs +++ b/lib/vrl/stdlib/src/append.rs @@ -34,7 +34,7 @@ impl Function for Append { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/array.rs b/lib/vrl/stdlib/src/array.rs index 49b0d709ec1c4..e6bb8a2a60be0 100644 --- a/lib/vrl/stdlib/src/array.rs +++ b/lib/vrl/stdlib/src/array.rs @@ -36,7 +36,7 @@ impl Function for Array { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/assert.rs b/lib/vrl/stdlib/src/assert.rs index 356d7f7684462..9b4413bed38db 100644 --- a/lib/vrl/stdlib/src/assert.rs +++ b/lib/vrl/stdlib/src/assert.rs @@ -46,7 +46,7 @@ impl Function for Assert { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let condition = arguments.required("condition"); diff --git a/lib/vrl/stdlib/src/assert_eq.rs b/lib/vrl/stdlib/src/assert_eq.rs index eb4f93f288881..5b31b69a571d1 100644 --- a/lib/vrl/stdlib/src/assert_eq.rs +++ b/lib/vrl/stdlib/src/assert_eq.rs @@ -53,7 +53,7 @@ impl Function for AssertEq { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let left = arguments.required("left"); diff --git a/lib/vrl/stdlib/src/boolean.rs b/lib/vrl/stdlib/src/boolean.rs index fdcbb5eb186df..da97fc19c7965 100644 --- a/lib/vrl/stdlib/src/boolean.rs +++ b/lib/vrl/stdlib/src/boolean.rs @@ -36,7 +36,7 @@ impl Function for Boolean { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/ceil.rs b/lib/vrl/stdlib/src/ceil.rs index 53e2408d632ed..09feafb66db17 100644 --- a/lib/vrl/stdlib/src/ceil.rs +++ b/lib/vrl/stdlib/src/ceil.rs @@ -27,7 +27,7 @@ impl Function for Ceil { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/compact.rs b/lib/vrl/stdlib/src/compact.rs index 1238e955a0558..54a5f4bd893ea 100644 --- a/lib/vrl/stdlib/src/compact.rs +++ b/lib/vrl/stdlib/src/compact.rs @@ -68,7 +68,7 @@ impl Function for Compact { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/contains.rs b/lib/vrl/stdlib/src/contains.rs index 1a1d3c5e6b2b3..4b96ba41d7319 100644 --- a/lib/vrl/stdlib/src/contains.rs +++ b/lib/vrl/stdlib/src/contains.rs @@ -31,7 +31,7 @@ impl Function for Contains { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/decode_base64.rs b/lib/vrl/stdlib/src/decode_base64.rs index f9e561126c85f..a1e9d8e2429fd 100644 --- a/lib/vrl/stdlib/src/decode_base64.rs +++ b/lib/vrl/stdlib/src/decode_base64.rs @@ -28,7 +28,7 @@ impl Function for DecodeBase64 { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/decode_percent.rs b/lib/vrl/stdlib/src/decode_percent.rs index a30b9e07dea36..2ef01c2994889 100644 --- a/lib/vrl/stdlib/src/decode_percent.rs +++ b/lib/vrl/stdlib/src/decode_percent.rs @@ -21,7 +21,7 @@ impl Function for DecodePercent { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/del.rs b/lib/vrl/stdlib/src/del.rs index b4f74dce54e63..34adc26ec06a1 100644 --- a/lib/vrl/stdlib/src/del.rs +++ b/lib/vrl/stdlib/src/del.rs @@ -52,7 +52,7 @@ impl Function for Del { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let query = arguments.required_query("target")?; diff --git a/lib/vrl/stdlib/src/downcase.rs b/lib/vrl/stdlib/src/downcase.rs index baaea177edcfe..9193a9c429649 100644 --- a/lib/vrl/stdlib/src/downcase.rs +++ b/lib/vrl/stdlib/src/downcase.rs @@ -19,7 +19,7 @@ impl Function for Downcase { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/encode_base64.rs b/lib/vrl/stdlib/src/encode_base64.rs index fc45fe5bda72a..43a41d5c26c71 100644 --- a/lib/vrl/stdlib/src/encode_base64.rs +++ b/lib/vrl/stdlib/src/encode_base64.rs @@ -33,7 +33,7 @@ impl Function for EncodeBase64 { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/encode_json.rs b/lib/vrl/stdlib/src/encode_json.rs index ad9aff1c6219f..090a0c76beaa5 100644 --- a/lib/vrl/stdlib/src/encode_json.rs +++ b/lib/vrl/stdlib/src/encode_json.rs @@ -19,7 +19,7 @@ impl Function for EncodeJson { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/encode_key_value.rs b/lib/vrl/stdlib/src/encode_key_value.rs index 629dde26a50ad..537c7447522e5 100644 --- a/lib/vrl/stdlib/src/encode_key_value.rs +++ b/lib/vrl/stdlib/src/encode_key_value.rs @@ -43,7 +43,7 @@ impl Function for EncodeKeyValue { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/encode_logfmt.rs b/lib/vrl/stdlib/src/encode_logfmt.rs index 854a8ae1aa53c..2661762066742 100644 --- a/lib/vrl/stdlib/src/encode_logfmt.rs +++ b/lib/vrl/stdlib/src/encode_logfmt.rs @@ -27,7 +27,7 @@ impl Function for EncodeLogfmt { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { // The encode_logfmt function is just an alias for `encode_key_value` with the following diff --git a/lib/vrl/stdlib/src/encode_percent.rs b/lib/vrl/stdlib/src/encode_percent.rs index 20b9c1f25d2c9..b2561f5088cf0 100644 --- a/lib/vrl/stdlib/src/encode_percent.rs +++ b/lib/vrl/stdlib/src/encode_percent.rs @@ -70,7 +70,7 @@ impl Function for EncodePercent { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let ascii_sets = vec![ diff --git a/lib/vrl/stdlib/src/ends_with.rs b/lib/vrl/stdlib/src/ends_with.rs index f500125a8bfa9..bb194f083c87f 100644 --- a/lib/vrl/stdlib/src/ends_with.rs +++ b/lib/vrl/stdlib/src/ends_with.rs @@ -31,7 +31,7 @@ impl Function for EndsWith { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/exists.rs b/lib/vrl/stdlib/src/exists.rs index 85d6111eb8592..42af4980e4ba9 100644 --- a/lib/vrl/stdlib/src/exists.rs +++ b/lib/vrl/stdlib/src/exists.rs @@ -34,7 +34,7 @@ impl Function for Exists { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let query = arguments.required_query("field")?; diff --git a/lib/vrl/stdlib/src/find.rs b/lib/vrl/stdlib/src/find.rs index 25361462bad73..5e925a7f6330e 100644 --- a/lib/vrl/stdlib/src/find.rs +++ b/lib/vrl/stdlib/src/find.rs @@ -40,7 +40,7 @@ impl Function for Find { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/flatten.rs b/lib/vrl/stdlib/src/flatten.rs index f547311bd7be0..d10ce56aadf97 100644 --- a/lib/vrl/stdlib/src/flatten.rs +++ b/lib/vrl/stdlib/src/flatten.rs @@ -35,7 +35,7 @@ impl Function for Flatten { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/float.rs b/lib/vrl/stdlib/src/float.rs index 31a7092de8a61..ebf9c25613a77 100644 --- a/lib/vrl/stdlib/src/float.rs +++ b/lib/vrl/stdlib/src/float.rs @@ -36,7 +36,7 @@ impl Function for Float { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/floor.rs b/lib/vrl/stdlib/src/floor.rs index 11256e69a2ad5..6fda62978d851 100644 --- a/lib/vrl/stdlib/src/floor.rs +++ b/lib/vrl/stdlib/src/floor.rs @@ -27,7 +27,7 @@ impl Function for Floor { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/format_int.rs b/lib/vrl/stdlib/src/format_int.rs index e810716d3d04d..81b4fd87e3923 100644 --- a/lib/vrl/stdlib/src/format_int.rs +++ b/lib/vrl/stdlib/src/format_int.rs @@ -28,7 +28,7 @@ impl Function for FormatInt { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/format_number.rs b/lib/vrl/stdlib/src/format_number.rs index 6addb6930b4e6..762e75c17ba2e 100644 --- a/lib/vrl/stdlib/src/format_number.rs +++ b/lib/vrl/stdlib/src/format_number.rs @@ -37,7 +37,7 @@ impl Function for FormatNumber { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/format_timestamp.rs b/lib/vrl/stdlib/src/format_timestamp.rs index 627acc2199a6a..d44b62d71cb46 100644 --- a/lib/vrl/stdlib/src/format_timestamp.rs +++ b/lib/vrl/stdlib/src/format_timestamp.rs @@ -28,7 +28,7 @@ impl Function for FormatTimestamp { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/get.rs b/lib/vrl/stdlib/src/get.rs new file mode 100644 index 0000000000000..55e1a7437c55f --- /dev/null +++ b/lib/vrl/stdlib/src/get.rs @@ -0,0 +1,165 @@ +use lookup_lib::{LookupBuf, SegmentBuf}; +use vrl::prelude::*; + +#[derive(Clone, Copy, Debug)] +pub struct Get; + +impl Function for Get { + fn identifier(&self) -> &'static str { + "get" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::OBJECT | kind::ARRAY, + required: true, + }, + Parameter { + keyword: "path", + kind: kind::ARRAY, + required: true, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[ + Example { + title: "returns existing field", + source: r#"get!(value: {"foo": "bar"}, path: ["foo"])"#, + result: Ok(r#""bar""#), + }, + Example { + title: "returns null for unknown field", + source: r#"get!(value: {"foo": "bar"}, path: ["baz"])"#, + result: Ok("null"), + }, + Example { + title: "nested path", + source: r#"get!(value: {"foo": { "bar": true }}, path: ["foo", "bar"])"#, + result: Ok(r#"true"#), + }, + Example { + title: "indexing", + source: r#"get!(value: [92, 42], path: [0])"#, + result: Ok("92"), + }, + Example { + title: "nested indexing", + source: r#"get!(value: {"foo": { "bar": [92, 42] }}, path: ["foo", "bar", 1])"#, + result: Ok("42"), + }, + Example { + title: "external target", + source: indoc! {r#" + . = { "foo": true } + get!(value: ., path: ["foo"]) + "#}, + result: Ok("true"), + }, + Example { + title: "variable", + source: indoc! {r#" + var = { "foo": true } + get!(value: var, path: ["foo"]) + "#}, + result: Ok("true"), + }, + Example { + title: "missing index", + source: r#"get!(value: {"foo": { "bar": [92, 42] }}, path: ["foo", "bar", 1, -1])"#, + result: Ok("null"), + }, + Example { + title: "invalid indexing", + source: r#"get!(value: [42], path: ["foo"])"#, + result: Ok("null"), + }, + Example { + title: "invalid segment type", + source: r#"get!(value: {"foo": { "bar": [92, 42] }}, path: ["foo", true])"#, + result: Err( + r#"function call error for "get" at (0:62): path segment must be either "string" or "integer", not "boolean""#, + ), + }, + ] + } + + fn compile( + &self, + _state: &state::Compiler, + _ctx: &FunctionCompileContext, + mut arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + let path = arguments.required("path"); + + Ok(Box::new(GetFn { value, path })) + } +} + +#[derive(Debug, Clone)] +pub struct GetFn { + value: Box, + path: Box, +} + +impl Expression for GetFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let path = match self.path.resolve(ctx)? { + Value::Array(path) => { + let mut get = LookupBuf::root(); + + for segment in path { + let segment = match segment { + Value::Bytes(field) => { + SegmentBuf::Field(String::from_utf8_lossy(&field).into_owned().into()) + } + Value::Integer(index) => SegmentBuf::Index(index as isize), + value => { + return Err(format!( + r#"path segment must be either "string" or "integer", not {}"#, + value.kind() + ) + .into()) + } + }; + + get.push_back(segment) + } + + get + } + value => { + return Err(value::Error::Expected { + got: value.kind(), + expected: Kind::Array, + } + .into()) + } + }; + + Ok(self.value.resolve(ctx)?.get(&path)?.unwrap_or(Value::Null)) + } + + fn type_def(&self, _: &state::Compiler) -> TypeDef { + TypeDef::new().fallible().unknown() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + get => Get; + + any { + args: func_args![value: value!([42]), path: value!([0])], + want: Ok(42), + tdef: TypeDef::new().fallible(), + } + ]; +} diff --git a/lib/vrl/stdlib/src/get_env_var.rs b/lib/vrl/stdlib/src/get_env_var.rs index bffcd600a5af5..23d72fef77796 100644 --- a/lib/vrl/stdlib/src/get_env_var.rs +++ b/lib/vrl/stdlib/src/get_env_var.rs @@ -27,7 +27,7 @@ impl Function for GetEnvVar { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let name = arguments.required("name"); diff --git a/lib/vrl/stdlib/src/get_hostname.rs b/lib/vrl/stdlib/src/get_hostname.rs index a716ca72a6eb6..b11a070ce7fbc 100644 --- a/lib/vrl/stdlib/src/get_hostname.rs +++ b/lib/vrl/stdlib/src/get_hostname.rs @@ -11,7 +11,7 @@ impl Function for GetHostname { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, _: ArgumentList, ) -> Compiled { Ok(Box::new(GetHostnameFn)) diff --git a/lib/vrl/stdlib/src/includes.rs b/lib/vrl/stdlib/src/includes.rs index 84319943d51eb..cb6efc9e0c2e3 100644 --- a/lib/vrl/stdlib/src/includes.rs +++ b/lib/vrl/stdlib/src/includes.rs @@ -41,7 +41,7 @@ impl Function for Includes { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/integer.rs b/lib/vrl/stdlib/src/integer.rs index 58d8510c6ddfd..e1fcb87812705 100644 --- a/lib/vrl/stdlib/src/integer.rs +++ b/lib/vrl/stdlib/src/integer.rs @@ -36,7 +36,7 @@ impl Function for Integer { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/ip_aton.rs b/lib/vrl/stdlib/src/ip_aton.rs index 9465c9a0b08b6..5881eb7eb9377 100644 --- a/lib/vrl/stdlib/src/ip_aton.rs +++ b/lib/vrl/stdlib/src/ip_aton.rs @@ -29,7 +29,7 @@ impl Function for IpAton { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/ip_cidr_contains.rs b/lib/vrl/stdlib/src/ip_cidr_contains.rs index af655f73bcd43..f8296fea61bc1 100644 --- a/lib/vrl/stdlib/src/ip_cidr_contains.rs +++ b/lib/vrl/stdlib/src/ip_cidr_contains.rs @@ -56,7 +56,7 @@ impl Function for IpCidrContains { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let cidr = arguments.required("cidr"); diff --git a/lib/vrl/stdlib/src/ip_ntoa.rs b/lib/vrl/stdlib/src/ip_ntoa.rs index eb420e7bb3a6e..a6f2148a91dcb 100644 --- a/lib/vrl/stdlib/src/ip_ntoa.rs +++ b/lib/vrl/stdlib/src/ip_ntoa.rs @@ -29,7 +29,7 @@ impl Function for IpNtoa { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/ip_subnet.rs b/lib/vrl/stdlib/src/ip_subnet.rs index 3f9ae0f2b7d1c..365acd4f8b17d 100644 --- a/lib/vrl/stdlib/src/ip_subnet.rs +++ b/lib/vrl/stdlib/src/ip_subnet.rs @@ -41,7 +41,7 @@ impl Function for IpSubnet { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/ip_to_ipv6.rs b/lib/vrl/stdlib/src/ip_to_ipv6.rs index ebad35fafb968..68a19f1dff22a 100644 --- a/lib/vrl/stdlib/src/ip_to_ipv6.rs +++ b/lib/vrl/stdlib/src/ip_to_ipv6.rs @@ -29,7 +29,7 @@ impl Function for IpToIpv6 { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/ipv6_to_ipv4.rs b/lib/vrl/stdlib/src/ipv6_to_ipv4.rs index 74f1d6e4f7cca..d3f2bc447819f 100644 --- a/lib/vrl/stdlib/src/ipv6_to_ipv4.rs +++ b/lib/vrl/stdlib/src/ipv6_to_ipv4.rs @@ -29,7 +29,7 @@ impl Function for Ipv6ToIpV4 { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/is_array.rs b/lib/vrl/stdlib/src/is_array.rs index 31bf68f9b7df2..be445da2d871e 100644 --- a/lib/vrl/stdlib/src/is_array.rs +++ b/lib/vrl/stdlib/src/is_array.rs @@ -39,7 +39,7 @@ impl Function for IsArray { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/is_boolean.rs b/lib/vrl/stdlib/src/is_boolean.rs index 80db2dcb7e8ea..8b2a62178c2db 100644 --- a/lib/vrl/stdlib/src/is_boolean.rs +++ b/lib/vrl/stdlib/src/is_boolean.rs @@ -39,7 +39,7 @@ impl Function for IsBoolean { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/is_float.rs b/lib/vrl/stdlib/src/is_float.rs index d7f1e62f79602..15eb8fa84d753 100644 --- a/lib/vrl/stdlib/src/is_float.rs +++ b/lib/vrl/stdlib/src/is_float.rs @@ -39,7 +39,7 @@ impl Function for IsFloat { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/is_integer.rs b/lib/vrl/stdlib/src/is_integer.rs index 9a683670cb9ca..c1d8dc2d13b3a 100644 --- a/lib/vrl/stdlib/src/is_integer.rs +++ b/lib/vrl/stdlib/src/is_integer.rs @@ -39,7 +39,7 @@ impl Function for IsInteger { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/is_null.rs b/lib/vrl/stdlib/src/is_null.rs index b73421ecaf0b0..dc6482bbab464 100644 --- a/lib/vrl/stdlib/src/is_null.rs +++ b/lib/vrl/stdlib/src/is_null.rs @@ -39,7 +39,7 @@ impl Function for IsNull { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/is_nullish.rs b/lib/vrl/stdlib/src/is_nullish.rs index 35fd5792603ff..d53679737151f 100644 --- a/lib/vrl/stdlib/src/is_nullish.rs +++ b/lib/vrl/stdlib/src/is_nullish.rs @@ -29,7 +29,7 @@ impl Function for IsNullish { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/is_object.rs b/lib/vrl/stdlib/src/is_object.rs index 8e9ff3aa1de74..83ba55f5a3d8a 100644 --- a/lib/vrl/stdlib/src/is_object.rs +++ b/lib/vrl/stdlib/src/is_object.rs @@ -39,7 +39,7 @@ impl Function for IsObject { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/is_regex.rs b/lib/vrl/stdlib/src/is_regex.rs index 25eb58d985b2c..bea457e865504 100644 --- a/lib/vrl/stdlib/src/is_regex.rs +++ b/lib/vrl/stdlib/src/is_regex.rs @@ -39,7 +39,7 @@ impl Function for IsRegex { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/is_string.rs b/lib/vrl/stdlib/src/is_string.rs index 4aae4c5c5d4c4..fabfca5965e5d 100644 --- a/lib/vrl/stdlib/src/is_string.rs +++ b/lib/vrl/stdlib/src/is_string.rs @@ -39,7 +39,7 @@ impl Function for IsString { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/is_timestamp.rs b/lib/vrl/stdlib/src/is_timestamp.rs index 38cf84ff1fc08..ad7157d6b18c4 100644 --- a/lib/vrl/stdlib/src/is_timestamp.rs +++ b/lib/vrl/stdlib/src/is_timestamp.rs @@ -39,7 +39,7 @@ impl Function for IsTimestamp { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/join.rs b/lib/vrl/stdlib/src/join.rs index 445958f93e895..fa61b11c194db 100644 --- a/lib/vrl/stdlib/src/join.rs +++ b/lib/vrl/stdlib/src/join.rs @@ -27,7 +27,7 @@ impl Function for Join { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/length.rs b/lib/vrl/stdlib/src/length.rs index 5312d0d67ecde..38739abb05902 100644 --- a/lib/vrl/stdlib/src/length.rs +++ b/lib/vrl/stdlib/src/length.rs @@ -39,7 +39,7 @@ impl Function for Length { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/lib.rs b/lib/vrl/stdlib/src/lib.rs index f66ce6389c05d..4d56fe73dadb7 100644 --- a/lib/vrl/stdlib/src/lib.rs +++ b/lib/vrl/stdlib/src/lib.rs @@ -52,6 +52,8 @@ mod format_int; mod format_number; #[cfg(feature = "format_timestamp")] mod format_timestamp; +#[cfg(feature = "get")] +mod get; #[cfg(feature = "get_env_var")] mod get_env_var; #[cfg(feature = "get_hostname")] @@ -178,10 +180,16 @@ mod parse_xml; mod push; #[cfg(feature = "redact")] mod redact; +#[cfg(feature = "remove")] +mod remove; #[cfg(feature = "replace")] mod replace; +#[cfg(feature = "reverse_dns")] +mod reverse_dns; #[cfg(feature = "round")] mod round; +#[cfg(feature = "set")] +mod set; #[cfg(feature = "sha1")] mod sha1; #[cfg(feature = "sha2")] @@ -202,6 +210,8 @@ mod strip_ansi_escape_codes; mod strip_whitespace; #[cfg(feature = "tag_types_externally")] mod tag_types_externally; +#[cfg(feature = "tally")] +mod tally; #[cfg(feature = "timestamp")] mod timestamp; #[cfg(feature = "to_bool")] @@ -293,6 +303,8 @@ pub use format_int::FormatInt; pub use format_number::FormatNumber; #[cfg(feature = "format_timestamp")] pub use format_timestamp::FormatTimestamp; +#[cfg(feature = "get")] +pub use get::Get; #[cfg(feature = "get_env_var")] pub use get_env_var::GetEnvVar; #[cfg(feature = "get_hostname")] @@ -411,10 +423,16 @@ pub use push::Push; pub use r#match::Match; #[cfg(feature = "redact")] pub use redact::Redact; +#[cfg(feature = "remove")] +pub use remove::Remove; #[cfg(feature = "replace")] pub use replace::Replace; +#[cfg(feature = "reverse_dns")] +pub use reverse_dns::ReverseDns; #[cfg(feature = "round")] pub use round::Round; +#[cfg(feature = "set")] +pub use set::Set; #[cfg(feature = "sha2")] pub use sha2::Sha2; #[cfg(feature = "sha3")] @@ -433,6 +451,8 @@ pub use strip_ansi_escape_codes::StripAnsiEscapeCodes; pub use strip_whitespace::StripWhitespace; #[cfg(feature = "tag_types_externally")] pub use tag_types_externally::TagTypesExternally; +#[cfg(feature = "tally")] +pub use tally::Tally; #[cfg(feature = "timestamp")] pub use timestamp::Timestamp; #[cfg(feature = "to_bool")] @@ -520,6 +540,8 @@ pub fn all() -> Vec> { Box::new(FormatNumber), #[cfg(feature = "format_timestamp")] Box::new(FormatTimestamp), + #[cfg(feature = "get")] + Box::new(Get), #[cfg(feature = "get_env_var")] Box::new(GetEnvVar), #[cfg(feature = "get_hostname")] @@ -642,10 +664,16 @@ pub fn all() -> Vec> { Box::new(Push), #[cfg(feature = "redact")] Box::new(Redact), + #[cfg(feature = "remove")] + Box::new(Remove), #[cfg(feature = "replace")] Box::new(Replace), + #[cfg(feature = "reverse_dns")] + Box::new(ReverseDns), #[cfg(feature = "round")] Box::new(Round), + #[cfg(feature = "set")] + Box::new(Set), #[cfg(feature = "sha1")] Box::new(Sha1), #[cfg(feature = "sha2")] @@ -664,6 +692,8 @@ pub fn all() -> Vec> { Box::new(StripAnsiEscapeCodes), #[cfg(feature = "strip_whitespace")] Box::new(StripWhitespace), + #[cfg(feature = "tally")] + Box::new(Tally), #[cfg(feature = "tag_types_externally")] Box::new(TagTypesExternally), #[cfg(feature = "timestamp")] diff --git a/lib/vrl/stdlib/src/match.rs b/lib/vrl/stdlib/src/match.rs index 68084d1eced6c..3baf38c55fb84 100644 --- a/lib/vrl/stdlib/src/match.rs +++ b/lib/vrl/stdlib/src/match.rs @@ -41,7 +41,7 @@ impl Function for Match { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/match_any.rs b/lib/vrl/stdlib/src/match_any.rs index 5fdaeb9b90cbb..b24c47e37a571 100644 --- a/lib/vrl/stdlib/src/match_any.rs +++ b/lib/vrl/stdlib/src/match_any.rs @@ -42,7 +42,7 @@ impl Function for MatchAny { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/match_array.rs b/lib/vrl/stdlib/src/match_array.rs index a0ed0bcf316c5..e3b0a6d39583c 100644 --- a/lib/vrl/stdlib/src/match_array.rs +++ b/lib/vrl/stdlib/src/match_array.rs @@ -26,7 +26,7 @@ impl Function for MatchArray { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/match_datadog_query.rs b/lib/vrl/stdlib/src/match_datadog_query.rs index 3164246c38097..f60e1287e9fe4 100644 --- a/lib/vrl/stdlib/src/match_datadog_query.rs +++ b/lib/vrl/stdlib/src/match_datadog_query.rs @@ -4,7 +4,7 @@ use cached::{proc_macro::cached, SizedCache}; use datadog_search_syntax::{ normalize_fields, parse, BooleanType, Comparison, ComparisonValue, Field, QueryNode, }; -use lookup::{parser::parse_lookup, LookupBuf}; +use lookup_lib::{parser::parse_lookup, LookupBuf}; use regex::Regex; use std::borrow::Cow; @@ -44,7 +44,7 @@ impl Function for MatchDatadogQuery { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/md5.rs b/lib/vrl/stdlib/src/md5.rs index bcf87ef5c0efe..c9cfc4a469ab0 100644 --- a/lib/vrl/stdlib/src/md5.rs +++ b/lib/vrl/stdlib/src/md5.rs @@ -28,7 +28,7 @@ impl Function for Md5 { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/merge.rs b/lib/vrl/stdlib/src/merge.rs index 58a45fd17e1b9..365476751478c 100644 --- a/lib/vrl/stdlib/src/merge.rs +++ b/lib/vrl/stdlib/src/merge.rs @@ -41,7 +41,7 @@ impl Function for Merge { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let to = arguments.required("to"); diff --git a/lib/vrl/stdlib/src/now.rs b/lib/vrl/stdlib/src/now.rs index 74ddceb428fa0..10d0fea1ee0d0 100644 --- a/lib/vrl/stdlib/src/now.rs +++ b/lib/vrl/stdlib/src/now.rs @@ -20,7 +20,7 @@ impl Function for Now { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, _: ArgumentList, ) -> Compiled { Ok(Box::new(NowFn)) diff --git a/lib/vrl/stdlib/src/object.rs b/lib/vrl/stdlib/src/object.rs index d77dab2c49ce0..89f7fde72b17a 100644 --- a/lib/vrl/stdlib/src/object.rs +++ b/lib/vrl/stdlib/src/object.rs @@ -36,7 +36,7 @@ impl Function for Object { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/only_fields.rs b/lib/vrl/stdlib/src/only_fields.rs index 6881bd6430342..8ec9d3738baf2 100644 --- a/lib/vrl/stdlib/src/only_fields.rs +++ b/lib/vrl/stdlib/src/only_fields.rs @@ -21,7 +21,7 @@ impl Function for OnlyFields { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let mut paths = vec![]; diff --git a/lib/vrl/stdlib/src/parse_apache_log.rs b/lib/vrl/stdlib/src/parse_apache_log.rs index dc82fc47cb35e..7a92b0ada13de 100644 --- a/lib/vrl/stdlib/src/parse_apache_log.rs +++ b/lib/vrl/stdlib/src/parse_apache_log.rs @@ -33,7 +33,7 @@ impl Function for ParseApacheLog { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let variants = vec![value!("common"), value!("combined"), value!("error")]; diff --git a/lib/vrl/stdlib/src/parse_aws_alb_log.rs b/lib/vrl/stdlib/src/parse_aws_alb_log.rs index 26eae20e4971b..7d46fd0a23fde 100644 --- a/lib/vrl/stdlib/src/parse_aws_alb_log.rs +++ b/lib/vrl/stdlib/src/parse_aws_alb_log.rs @@ -30,7 +30,7 @@ impl Function for ParseAwsAlbLog { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_aws_cloudwatch_log_subscription_message.rs b/lib/vrl/stdlib/src/parse_aws_cloudwatch_log_subscription_message.rs index 9c272b10cf971..7afa351a63100 100644 --- a/lib/vrl/stdlib/src/parse_aws_cloudwatch_log_subscription_message.rs +++ b/lib/vrl/stdlib/src/parse_aws_cloudwatch_log_subscription_message.rs @@ -49,7 +49,7 @@ impl Function for ParseAwsCloudWatchLogSubscriptionMessage { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_aws_vpc_flow_log.rs b/lib/vrl/stdlib/src/parse_aws_vpc_flow_log.rs index e7c4ff00ad65c..2ec4d4d91e6db 100644 --- a/lib/vrl/stdlib/src/parse_aws_vpc_flow_log.rs +++ b/lib/vrl/stdlib/src/parse_aws_vpc_flow_log.rs @@ -49,7 +49,7 @@ impl Function for ParseAwsVpcFlowLog { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_common_log.rs b/lib/vrl/stdlib/src/parse_common_log.rs index 414df53361797..5896e780b2c0c 100644 --- a/lib/vrl/stdlib/src/parse_common_log.rs +++ b/lib/vrl/stdlib/src/parse_common_log.rs @@ -28,7 +28,7 @@ impl Function for ParseCommonLog { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_csv.rs b/lib/vrl/stdlib/src/parse_csv.rs index 0c9a578b63940..9c3c7497ea392 100644 --- a/lib/vrl/stdlib/src/parse_csv.rs +++ b/lib/vrl/stdlib/src/parse_csv.rs @@ -20,7 +20,7 @@ impl Function for ParseCsv { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_duration.rs b/lib/vrl/stdlib/src/parse_duration.rs index c31ea9f733570..97f69f3a0a776 100644 --- a/lib/vrl/stdlib/src/parse_duration.rs +++ b/lib/vrl/stdlib/src/parse_duration.rs @@ -51,7 +51,7 @@ impl Function for ParseDuration { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_glog.rs b/lib/vrl/stdlib/src/parse_glog.rs index d8f845ceccb4b..b9eec0d5fc6bc 100644 --- a/lib/vrl/stdlib/src/parse_glog.rs +++ b/lib/vrl/stdlib/src/parse_glog.rs @@ -47,7 +47,7 @@ impl Function for ParseGlog { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_grok.rs b/lib/vrl/stdlib/src/parse_grok.rs index f051957068f48..34e86c75a7081 100644 --- a/lib/vrl/stdlib/src/parse_grok.rs +++ b/lib/vrl/stdlib/src/parse_grok.rs @@ -88,7 +88,7 @@ impl Function for ParseGrok { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_int.rs b/lib/vrl/stdlib/src/parse_int.rs index c15515381d197..dcaf0d2e5d1a5 100644 --- a/lib/vrl/stdlib/src/parse_int.rs +++ b/lib/vrl/stdlib/src/parse_int.rs @@ -46,7 +46,7 @@ impl Function for ParseInt { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_json.rs b/lib/vrl/stdlib/src/parse_json.rs index 27aedf82adbc5..fc6b5dc945e73 100644 --- a/lib/vrl/stdlib/src/parse_json.rs +++ b/lib/vrl/stdlib/src/parse_json.rs @@ -74,7 +74,7 @@ impl Function for ParseJson { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_key_value.rs b/lib/vrl/stdlib/src/parse_key_value.rs index 5df5922d053c7..68ebbd9b5919c 100644 --- a/lib/vrl/stdlib/src/parse_key_value.rs +++ b/lib/vrl/stdlib/src/parse_key_value.rs @@ -80,7 +80,7 @@ impl Function for ParseKeyValue { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_klog.rs b/lib/vrl/stdlib/src/parse_klog.rs index 1b176ccfd364f..3c3bbc18ed12c 100644 --- a/lib/vrl/stdlib/src/parse_klog.rs +++ b/lib/vrl/stdlib/src/parse_klog.rs @@ -47,7 +47,7 @@ impl Function for ParseKlog { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_linux_authorization.rs b/lib/vrl/stdlib/src/parse_linux_authorization.rs index f831e1117a89b..a9e961a2e5a90 100644 --- a/lib/vrl/stdlib/src/parse_linux_authorization.rs +++ b/lib/vrl/stdlib/src/parse_linux_authorization.rs @@ -35,7 +35,7 @@ impl Function for ParseLinuxAuthorization { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_logfmt.rs b/lib/vrl/stdlib/src/parse_logfmt.rs index 4a2cd4dafd2b6..13e0e507f3bc9 100644 --- a/lib/vrl/stdlib/src/parse_logfmt.rs +++ b/lib/vrl/stdlib/src/parse_logfmt.rs @@ -35,7 +35,7 @@ impl Function for ParseLogFmt { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_nginx_log.rs b/lib/vrl/stdlib/src/parse_nginx_log.rs index 8d75c354325e5..989a0a623cd20 100644 --- a/lib/vrl/stdlib/src/parse_nginx_log.rs +++ b/lib/vrl/stdlib/src/parse_nginx_log.rs @@ -34,7 +34,7 @@ impl Function for ParseNginxLog { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let variants = vec![value!("combined"), value!("error")]; diff --git a/lib/vrl/stdlib/src/parse_query_string.rs b/lib/vrl/stdlib/src/parse_query_string.rs index a93188e5e62b9..848da884c97e3 100644 --- a/lib/vrl/stdlib/src/parse_query_string.rs +++ b/lib/vrl/stdlib/src/parse_query_string.rs @@ -26,7 +26,7 @@ impl Function for ParseQueryString { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_regex.rs b/lib/vrl/stdlib/src/parse_regex.rs index 92ab5c3ece9f9..5335b1a201d88 100644 --- a/lib/vrl/stdlib/src/parse_regex.rs +++ b/lib/vrl/stdlib/src/parse_regex.rs @@ -34,7 +34,7 @@ impl Function for ParseRegex { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_regex_all.rs b/lib/vrl/stdlib/src/parse_regex_all.rs index 35056e3deeb88..87d1a1257631c 100644 --- a/lib/vrl/stdlib/src/parse_regex_all.rs +++ b/lib/vrl/stdlib/src/parse_regex_all.rs @@ -34,7 +34,7 @@ impl Function for ParseRegexAll { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_ruby_hash.rs b/lib/vrl/stdlib/src/parse_ruby_hash.rs index cfb648380683f..ebfea8705d332 100644 --- a/lib/vrl/stdlib/src/parse_ruby_hash.rs +++ b/lib/vrl/stdlib/src/parse_ruby_hash.rs @@ -41,7 +41,7 @@ impl Function for ParseRubyHash { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_syslog.rs b/lib/vrl/stdlib/src/parse_syslog.rs index ac86c4acc6c55..2dc5880dcb368 100644 --- a/lib/vrl/stdlib/src/parse_syslog.rs +++ b/lib/vrl/stdlib/src/parse_syslog.rs @@ -44,7 +44,7 @@ impl Function for ParseSyslog { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_timestamp.rs b/lib/vrl/stdlib/src/parse_timestamp.rs index d51ef6c1b3d43..06534225748b8 100644 --- a/lib/vrl/stdlib/src/parse_timestamp.rs +++ b/lib/vrl/stdlib/src/parse_timestamp.rs @@ -20,7 +20,7 @@ impl Function for ParseTimestamp { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_tokens.rs b/lib/vrl/stdlib/src/parse_tokens.rs index ff98b1b6fba7b..0ec0fe14aed07 100644 --- a/lib/vrl/stdlib/src/parse_tokens.rs +++ b/lib/vrl/stdlib/src/parse_tokens.rs @@ -22,7 +22,7 @@ impl Function for ParseTokens { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_url.rs b/lib/vrl/stdlib/src/parse_url.rs index 15e5c68effa19..0cf3042c76d59 100644 --- a/lib/vrl/stdlib/src/parse_url.rs +++ b/lib/vrl/stdlib/src/parse_url.rs @@ -40,7 +40,7 @@ impl Function for ParseUrl { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_user_agent.rs b/lib/vrl/stdlib/src/parse_user_agent.rs index 26d897fcaf815..df212ce952860 100644 --- a/lib/vrl/stdlib/src/parse_user_agent.rs +++ b/lib/vrl/stdlib/src/parse_user_agent.rs @@ -86,7 +86,7 @@ impl Function for ParseUserAgent { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/parse_xml.rs b/lib/vrl/stdlib/src/parse_xml.rs index 40617be894435..80096d4238c38 100644 --- a/lib/vrl/stdlib/src/parse_xml.rs +++ b/lib/vrl/stdlib/src/parse_xml.rs @@ -49,7 +49,7 @@ impl Function for ParseXml { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/push.rs b/lib/vrl/stdlib/src/push.rs index 60ecff2c7b428..aa4d2b7bc702c 100644 --- a/lib/vrl/stdlib/src/push.rs +++ b/lib/vrl/stdlib/src/push.rs @@ -41,7 +41,7 @@ impl Function for Push { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/redact.rs b/lib/vrl/stdlib/src/redact.rs index 6d33ddbd638ea..704913081ed25 100644 --- a/lib/vrl/stdlib/src/redact.rs +++ b/lib/vrl/stdlib/src/redact.rs @@ -57,7 +57,7 @@ impl Function for Redact { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/remove.rs b/lib/vrl/stdlib/src/remove.rs new file mode 100644 index 0000000000000..4ec0712c81610 --- /dev/null +++ b/lib/vrl/stdlib/src/remove.rs @@ -0,0 +1,209 @@ +use lookup_lib::{LookupBuf, SegmentBuf}; +use shared::btreemap; +use vrl::prelude::*; + +#[derive(Clone, Copy, Debug)] +pub struct Remove; + +impl Function for Remove { + fn identifier(&self) -> &'static str { + "remove" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::OBJECT | kind::ARRAY, + required: true, + }, + Parameter { + keyword: "path", + kind: kind::ARRAY, + required: true, + }, + Parameter { + keyword: "compact", + kind: kind::BOOLEAN, + required: false, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[ + Example { + title: "remove existing field", + source: r#"remove!(value: {"foo": "bar"}, path: ["foo"])"#, + result: Ok("{}"), + }, + Example { + title: "remove unknown field", + source: r#"remove!(value: {"foo": "bar"}, path: ["baz"])"#, + result: Ok(r#"{ "foo": "bar" }"#), + }, + Example { + title: "nested path", + source: r#"remove!(value: {"foo": { "bar": true }}, path: ["foo", "bar"])"#, + result: Ok(r#"{ "foo": {} }"#), + }, + Example { + title: "compact object", + source: r#"remove!(value: {"foo": { "bar": true }}, path: ["foo", "bar"], compact: true)"#, + result: Ok(r#"{}"#), + }, + Example { + title: "indexing", + source: r#"remove!(value: [92, 42], path: [0])"#, + result: Ok("[42]"), + }, + Example { + title: "nested indexing", + source: r#"remove!(value: {"foo": { "bar": [92, 42] }}, path: ["foo", "bar", 1])"#, + result: Ok(r#"{ "foo": { "bar": [92] } }"#), + }, + Example { + title: "compact array", + source: r#"remove!(value: {"foo": [42], "bar": true }, path: ["foo", 0], compact: true)"#, + result: Ok(r#"{ "bar": true }"#), + }, + Example { + title: "external target", + source: indoc! {r#" + . = { "foo": true } + remove!(value: ., path: ["foo"]) + "#}, + result: Ok("{}"), + }, + Example { + title: "variable", + source: indoc! {r#" + var = { "foo": true } + remove!(value: var, path: ["foo"]) + "#}, + result: Ok("{}"), + }, + Example { + title: "missing index", + source: r#"remove!(value: {"foo": { "bar": [92, 42] }}, path: ["foo", "bar", 1, -1])"#, + result: Ok(r#"{ "foo": { "bar": [92, 42] } }"#), + }, + Example { + title: "invalid indexing", + source: r#"remove!(value: [42], path: ["foo"])"#, + result: Ok("[42]"), + }, + Example { + title: "invalid segment type", + source: r#"remove!(value: {"foo": { "bar": [92, 42] }}, path: ["foo", true])"#, + result: Err( + r#"function call error for "remove" at (0:65): path segment must be either "string" or "integer", not "boolean""#, + ), + }, + ] + } + + fn compile( + &self, + _state: &state::Compiler, + _ctx: &FunctionCompileContext, + mut arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + let path = arguments.required("path"); + let compact = arguments.optional("compact").unwrap_or(expr!(false)); + + Ok(Box::new(RemoveFn { + value, + path, + compact, + })) + } +} + +#[derive(Debug, Clone)] +pub struct RemoveFn { + value: Box, + path: Box, + compact: Box, +} + +impl Expression for RemoveFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let path = match self.path.resolve(ctx)? { + Value::Array(path) => { + let mut lookup = LookupBuf::root(); + + for segment in path { + let segment = match segment { + Value::Bytes(field) => { + SegmentBuf::Field(String::from_utf8_lossy(&field).into_owned().into()) + } + Value::Integer(index) => SegmentBuf::Index(index as isize), + value => { + return Err(format!( + r#"path segment must be either "string" or "integer", not {}"#, + value.kind() + ) + .into()) + } + }; + + lookup.push_back(segment) + } + + lookup + } + value => { + return Err(value::Error::Expected { + got: value.kind(), + expected: Kind::Array, + } + .into()) + } + }; + + let compact = self.compact.resolve(ctx)?.try_boolean()?; + + let mut value = self.value.resolve(ctx)?; + value.remove(&path, compact)?; + + Ok(value) + } + + fn type_def(&self, state: &state::Compiler) -> TypeDef { + let kind = self.value.type_def(state).kind(); + + let td = TypeDef::new().fallible(); + + match kind { + Kind::Array => td.array::(vec![]), + Kind::Object => td.object::<(), Kind>(btreemap! {}), + k if k.contains_array() && k.contains_object() => td + .array::(vec![]) + .add_object::<(), Kind>(btreemap! {}), + _ => unreachable!("compiler guaranteed"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + remove => Remove; + + array { + args: func_args![value: value!([42]), path: value!([0])], + want: Ok(value!([])), + tdef: TypeDef::new().array::(vec![]).fallible(), + } + + object { + args: func_args![value: value!({ "foo": 42 }), path: value!(["foo"])], + want: Ok(value!({})), + tdef: TypeDef::new().object::<(), Kind>(btreemap!{}).fallible(), + } + ]; +} diff --git a/lib/vrl/stdlib/src/replace.rs b/lib/vrl/stdlib/src/replace.rs index 8942ae8da4eca..fe4adff06ea45 100644 --- a/lib/vrl/stdlib/src/replace.rs +++ b/lib/vrl/stdlib/src/replace.rs @@ -56,7 +56,7 @@ impl Function for Replace { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/reverse_dns.rs b/lib/vrl/stdlib/src/reverse_dns.rs new file mode 100644 index 0000000000000..1f81d3b18998e --- /dev/null +++ b/lib/vrl/stdlib/src/reverse_dns.rs @@ -0,0 +1,98 @@ +use dns_lookup::lookup_addr; +use std::net::IpAddr; + +use vrl::prelude::*; + +#[derive(Clone, Copy, Debug)] +pub struct ReverseDns; + +impl Function for ReverseDns { + fn identifier(&self) -> &'static str { + "reverse_dns" + } + + fn parameters(&self) -> &'static [Parameter] { + &[Parameter { + keyword: "value", + kind: kind::BYTES, + required: true, + }] + } + + fn examples(&self) -> &'static [Example] { + &[Example { + title: "Example", + source: r#"reverse_dns!("127.0.0.1")"#, + result: Ok("localhost"), + }] + } + + fn compile( + &self, + _state: &state::Compiler, + _info: &FunctionCompileContext, + mut arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + + Ok(Box::new(ReverseDnsFn { value })) + } +} + +#[derive(Debug, Clone)] +struct ReverseDnsFn { + value: Box, +} + +impl Expression for ReverseDnsFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let ip: IpAddr = self + .value + .resolve(ctx)? + .try_bytes_utf8_lossy()? + .parse() + .map_err(|err| format!("unable to parse IP address: {}", err))?; + + let host = + lookup_addr(&ip).map_err(|err| format!("unable to perform a lookup : {}", err))?; + + Ok(host.into()) + } + + fn type_def(&self, state: &state::Compiler) -> TypeDef { + self.value.type_def(state).fallible().bytes() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + reverse_dns => ReverseDns; + + invalid_ip { + args: func_args![value: value!("999.999.999.999")], + want: Err("unable to parse IP address: invalid IP address syntax"), + tdef: TypeDef::new().fallible().bytes(), + } + + google_ipv4 { + args: func_args![value: value!("8.8.8.8")], + want: Ok(value!("dns.google")), + tdef: TypeDef::new().fallible().bytes(), + } + + google_ipv6 { + args: func_args![value: value!("2001:4860:4860::8844")], + want: Ok(value!("dns.google")), + tdef: TypeDef::new().fallible().bytes(), + } + + invalid_type { + args: func_args![value: value!(1)], + want: Err("expected \"string\", got \"integer\""), + tdef: TypeDef::new().fallible().bytes(), + } + ]; +} diff --git a/lib/vrl/stdlib/src/round.rs b/lib/vrl/stdlib/src/round.rs index 48f196ea8148a..162ebbd5db1a9 100644 --- a/lib/vrl/stdlib/src/round.rs +++ b/lib/vrl/stdlib/src/round.rs @@ -47,7 +47,7 @@ impl Function for Round { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/set.rs b/lib/vrl/stdlib/src/set.rs new file mode 100644 index 0000000000000..e636dc4e3fda0 --- /dev/null +++ b/lib/vrl/stdlib/src/set.rs @@ -0,0 +1,183 @@ +use lookup_lib::{LookupBuf, SegmentBuf}; +use shared::btreemap; +use vrl::prelude::*; + +#[derive(Clone, Copy, Debug)] +pub struct Set; + +impl Function for Set { + fn identifier(&self) -> &'static str { + "set" + } + + fn parameters(&self) -> &'static [Parameter] { + &[ + Parameter { + keyword: "value", + kind: kind::OBJECT | kind::ARRAY, + required: true, + }, + Parameter { + keyword: "path", + kind: kind::ARRAY, + required: true, + }, + Parameter { + keyword: "data", + kind: kind::ANY, + required: true, + }, + ] + } + + fn examples(&self) -> &'static [Example] { + &[ + Example { + title: "set existing field", + source: r#"set!(value: {"foo": "bar"}, path: ["foo"], data: "baz")"#, + result: Ok(r#"{ "foo": "baz" }"#), + }, + Example { + title: "nested fields", + source: r#"set!(value: {}, path: ["foo", "bar"], data: "baz")"#, + result: Ok(r#"{ "foo": { "bar" : "baz" } }"#), + }, + Example { + title: "indexing", + source: r#"set!(value: [{ "foo": "bar" }], path: [0, "foo", "bar"], data: "baz")"#, + result: Ok(r#"[{ "foo": { "bar": "baz" } }]"#), + }, + Example { + title: "nested indexing", + source: r#"set!(value: {"foo": { "bar": [] }}, path: ["foo", "bar", 1], data: "baz")"#, + result: Ok(r#"{ "foo": { "bar": [null, "baz"] } }"#), + }, + Example { + title: "external target", + source: indoc! {r#" + . = { "foo": true } + set!(value: ., path: ["bar"], data: "baz") + "#}, + result: Ok(r#"{ "foo": true, "bar": "baz" }"#), + }, + Example { + title: "variable", + source: indoc! {r#" + var = { "foo": true } + set!(value: var, path: ["bar"], data: "baz") + "#}, + result: Ok(r#"{ "foo": true, "bar": "baz" }"#), + }, + Example { + title: "invalid indexing", + source: r#"set!(value: [], path: ["foo"], data: "baz")"#, + result: Ok(r#"{ "foo": "baz" }"#), + }, + Example { + title: "invalid segment type", + source: r#"set!({"foo": { "bar": [92, 42] }}, ["foo", true], "baz")"#, + result: Err( + r#"function call error for "set" at (0:56): path segment must be either "string" or "integer", not "boolean""#, + ), + }, + ] + } + + fn compile( + &self, + _state: &state::Compiler, + _ctx: &FunctionCompileContext, + mut arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + let path = arguments.required("path"); + let data = arguments.required("data"); + + Ok(Box::new(SetFn { value, path, data })) + } +} + +#[derive(Debug, Clone)] +pub struct SetFn { + value: Box, + path: Box, + data: Box, +} + +impl Expression for SetFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let path = match self.path.resolve(ctx)? { + Value::Array(segments) => { + let mut insert = LookupBuf::root(); + + for segment in segments { + let segment = match segment { + Value::Bytes(path) => { + SegmentBuf::Field(String::from_utf8_lossy(&path).into_owned().into()) + } + Value::Integer(index) => SegmentBuf::Index(index as isize), + value => { + return Err(format!( + r#"path segment must be either "string" or "integer", not {}"#, + value.kind() + ) + .into()) + } + }; + + insert.push_back(segment) + } + + insert + } + value => { + return Err(value::Error::Expected { + got: value.kind(), + expected: Kind::Array | Kind::Bytes, + } + .into()) + } + }; + + let mut value = self.value.resolve(ctx)?; + value.insert(&path, self.data.resolve(ctx)?)?; + + Ok(value) + } + + fn type_def(&self, state: &state::Compiler) -> TypeDef { + let kind = self.value.type_def(state).kind(); + + let td = TypeDef::new().fallible(); + + match kind { + Kind::Array => td.array::(vec![]), + Kind::Object => td.object::<(), Kind>(btreemap! {}), + k if k.contains_array() && k.contains_object() => td + .array::(vec![]) + .add_object::<(), Kind>(btreemap! {}), + _ => unreachable!("compiler guaranteed"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + set => Set; + + array { + args: func_args![value: value!([]), path: vec![0], data: true], + want: Ok(vec![true]), + tdef: TypeDef::new().array::(vec![]).fallible(), + } + + object { + args: func_args![value: value!({}), path: vec!["foo"], data: true], + want: Ok(value!({ "foo": true })), + tdef: TypeDef::new().object::<(), Kind>(btreemap!{}).fallible(), + } + ]; +} diff --git a/lib/vrl/stdlib/src/sha1.rs b/lib/vrl/stdlib/src/sha1.rs index 72aa3d52eb674..7d11efd7ff560 100644 --- a/lib/vrl/stdlib/src/sha1.rs +++ b/lib/vrl/stdlib/src/sha1.rs @@ -28,7 +28,7 @@ impl Function for Sha1 { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/sha2.rs b/lib/vrl/stdlib/src/sha2.rs index 76910c59202c8..1557a7b80dac5 100644 --- a/lib/vrl/stdlib/src/sha2.rs +++ b/lib/vrl/stdlib/src/sha2.rs @@ -42,7 +42,7 @@ impl Function for Sha2 { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let variants = vec![ diff --git a/lib/vrl/stdlib/src/sha3.rs b/lib/vrl/stdlib/src/sha3.rs index a57a3ce371a65..e8034554a60c9 100644 --- a/lib/vrl/stdlib/src/sha3.rs +++ b/lib/vrl/stdlib/src/sha3.rs @@ -42,7 +42,7 @@ impl Function for Sha3 { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let variants = vec![ diff --git a/lib/vrl/stdlib/src/slice.rs b/lib/vrl/stdlib/src/slice.rs index 9d05e51bc2f71..91d25cf2a16ab 100644 --- a/lib/vrl/stdlib/src/slice.rs +++ b/lib/vrl/stdlib/src/slice.rs @@ -52,7 +52,7 @@ impl Function for Slice { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/split.rs b/lib/vrl/stdlib/src/split.rs index 8e8191fcec98a..ecb8aabbf402a 100644 --- a/lib/vrl/stdlib/src/split.rs +++ b/lib/vrl/stdlib/src/split.rs @@ -51,7 +51,7 @@ impl Function for Split { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/starts_with.rs b/lib/vrl/stdlib/src/starts_with.rs index 6391e522782ea..0c194b94ae69d 100644 --- a/lib/vrl/stdlib/src/starts_with.rs +++ b/lib/vrl/stdlib/src/starts_with.rs @@ -51,7 +51,7 @@ impl Function for StartsWith { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/string.rs b/lib/vrl/stdlib/src/string.rs index e27e86105a878..e06c87803bd7a 100644 --- a/lib/vrl/stdlib/src/string.rs +++ b/lib/vrl/stdlib/src/string.rs @@ -36,7 +36,7 @@ impl Function for String { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/strip_ansi_escape_codes.rs b/lib/vrl/stdlib/src/strip_ansi_escape_codes.rs index 8ee7610a5ea73..55827933e2554 100644 --- a/lib/vrl/stdlib/src/strip_ansi_escape_codes.rs +++ b/lib/vrl/stdlib/src/strip_ansi_escape_codes.rs @@ -24,7 +24,7 @@ impl Function for StripAnsiEscapeCodes { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/strip_whitespace.rs b/lib/vrl/stdlib/src/strip_whitespace.rs index ec06e9c230d3e..7d9b7728c4184 100644 --- a/lib/vrl/stdlib/src/strip_whitespace.rs +++ b/lib/vrl/stdlib/src/strip_whitespace.rs @@ -39,7 +39,7 @@ impl Function for StripWhitespace { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/tag_types_externally.rs b/lib/vrl/stdlib/src/tag_types_externally.rs index a4c92e90896ef..b9ffb8de7d6e3 100644 --- a/lib/vrl/stdlib/src/tag_types_externally.rs +++ b/lib/vrl/stdlib/src/tag_types_externally.rs @@ -47,7 +47,7 @@ impl Function for TagTypesExternally { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/tally.rs b/lib/vrl/stdlib/src/tally.rs new file mode 100644 index 0000000000000..28c6122fd82ab --- /dev/null +++ b/lib/vrl/stdlib/src/tally.rs @@ -0,0 +1,98 @@ +use vrl::prelude::*; + +use std::collections::{BTreeMap, HashMap}; + +#[derive(Clone, Copy, Debug)] +pub struct Tally; + +impl Function for Tally { + fn identifier(&self) -> &'static str { + "tally" + } + + fn examples(&self) -> &'static [Example] { + &[Example { + title: "tally", + source: r#"tally!(["foo", "bar", "foo", "baz"])"#, + result: Ok(r#"{"foo": 2, "bar": 1, "baz": 1}"#), + }] + } + + fn compile( + &self, + _state: &state::Compiler, + _ctx: &FunctionCompileContext, + mut arguments: ArgumentList, + ) -> Compiled { + let value = arguments.required("value"); + + Ok(Box::new(TallyFn { value })) + } + + fn parameters(&self) -> &'static [Parameter] { + &[Parameter { + keyword: "value", + kind: kind::ARRAY, + required: true, + }] + } +} + +#[derive(Debug, Clone)] +pub(crate) struct TallyFn { + value: Box, +} + +impl Expression for TallyFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + let value = self.value.resolve(ctx)?.try_array()?; + + #[allow(clippy::mutable_key_type)] // false positive due to bytes::Bytes + let mut map: HashMap = HashMap::new(); + for value in value.into_iter() { + if let Value::Bytes(value) = value { + *map.entry(value).or_insert(0) += 1; + } else { + return Err(format!("all values must be strings, found: {:?}", value).into()); + } + } + + let map: BTreeMap<_, _> = map + .into_iter() + .map(|(k, v)| (String::from_utf8_lossy(&k).into_owned(), Value::from(v))) + .collect(); + + Ok(map.into()) + } + + fn type_def(&self, _: &state::Compiler) -> TypeDef { + TypeDef::new() + .object::<(), Kind>(map! { (): Kind::Integer }) + .fallible() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + test_function![ + tally => Tally; + + default { + args: func_args![ + value: value!(["bar", "foo", "baz", "foo"]), + ], + want: Ok(value!({"bar": 1, "foo": 2, "baz": 1})), + tdef: TypeDef::new().object::<(), Kind>(map! { (): Kind::Integer }).fallible(), + } + + non_string_values { + args: func_args![ + value: value!(["foo", [1,2,3], "123abc", 1, true, [1,2,3], "foo", true, 1]), + ], + want: Err("all values must be strings, found: Array([Integer(1), Integer(2), Integer(3)])"), + tdef: TypeDef::new().object::<(), Kind>(map! { (): Kind::Integer }).fallible(), + } + ]; +} diff --git a/lib/vrl/stdlib/src/timestamp.rs b/lib/vrl/stdlib/src/timestamp.rs index 2b0930b8480fc..2a3140291cbbc 100644 --- a/lib/vrl/stdlib/src/timestamp.rs +++ b/lib/vrl/stdlib/src/timestamp.rs @@ -36,7 +36,7 @@ impl Function for Timestamp { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/to_bool.rs b/lib/vrl/stdlib/src/to_bool.rs index 2eec747dc471e..5f28d0723aa05 100644 --- a/lib/vrl/stdlib/src/to_bool.rs +++ b/lib/vrl/stdlib/src/to_bool.rs @@ -135,7 +135,7 @@ impl Function for ToBool { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/to_float.rs b/lib/vrl/stdlib/src/to_float.rs index 804f808cae756..8520ecd538081 100644 --- a/lib/vrl/stdlib/src/to_float.rs +++ b/lib/vrl/stdlib/src/to_float.rs @@ -58,10 +58,8 @@ impl Function for ToFloat { }, Example { title: "timestamp", - source: "to_float!(t'2020-01-01T00:00:00Z')", - result: Err( - r#"function call error for "to_float" at (0:34): unable to coerce "timestamp" into "float""#, - ), + source: "to_float(t'2020-01-01T00:00:00.100Z')", + result: Ok("1577836800.1"), }, Example { title: "array", @@ -90,7 +88,7 @@ impl Function for ToFloat { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); @@ -115,6 +113,7 @@ impl Expression for ToFloatFn { Integer(v) => Ok((v as f64).into()), Boolean(v) => Ok(NotNan::new(if v { 1.0 } else { 0.0 }).unwrap().into()), Null => Ok(0.0.into()), + Timestamp(v) => Ok((v.timestamp_nanos() as f64 / 1_000_000_000_f64).into()), Bytes(v) => Conversion::Float .convert(v) .map_err(|e| e.to_string().into()), @@ -125,9 +124,9 @@ impl Expression for ToFloatFn { fn type_def(&self, state: &state::Compiler) -> TypeDef { TypeDef::new() .with_fallibility( - self.value.type_def(state).has_kind( - Kind::Bytes | Kind::Timestamp | Kind::Array | Kind::Object | Kind::Regex, - ), + self.value + .type_def(state) + .has_kind(Kind::Bytes | Kind::Array | Kind::Object | Kind::Regex), ) .float() } @@ -136,6 +135,7 @@ impl Expression for ToFloatFn { #[cfg(test)] mod tests { use super::*; + use chrono::prelude::*; test_function![ to_float => ToFloat; @@ -151,5 +151,11 @@ mod tests { want: Ok(20.0), tdef: TypeDef::new().infallible().float(), } + + timestamp { + args: func_args![value: Utc.ymd(2014, 7, 8).and_hms_milli(9, 10, 11, 12)], + want: Ok(1404810611.012), + tdef: TypeDef::new().infallible().float(), + } ]; } diff --git a/lib/vrl/stdlib/src/to_int.rs b/lib/vrl/stdlib/src/to_int.rs index 5c28754f5da62..d8ff1ef16b79a 100644 --- a/lib/vrl/stdlib/src/to_int.rs +++ b/lib/vrl/stdlib/src/to_int.rs @@ -88,7 +88,7 @@ impl Function for ToInt { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/to_regex.rs b/lib/vrl/stdlib/src/to_regex.rs index ac0475c7458bd..852ee05dea94f 100644 --- a/lib/vrl/stdlib/src/to_regex.rs +++ b/lib/vrl/stdlib/src/to_regex.rs @@ -28,7 +28,7 @@ impl Function for ToRegex { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { warn!("`to_regex` is an expensive function that could impact throughput."); diff --git a/lib/vrl/stdlib/src/to_string.rs b/lib/vrl/stdlib/src/to_string.rs index 56fc4b6096a5c..e7ffca03488c1 100644 --- a/lib/vrl/stdlib/src/to_string.rs +++ b/lib/vrl/stdlib/src/to_string.rs @@ -80,7 +80,7 @@ impl Function for ToString { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/to_syslog_facility.rs b/lib/vrl/stdlib/src/to_syslog_facility.rs index d9181b74ddd69..066c95556c691 100644 --- a/lib/vrl/stdlib/src/to_syslog_facility.rs +++ b/lib/vrl/stdlib/src/to_syslog_facility.rs @@ -36,7 +36,7 @@ impl Function for ToSyslogFacility { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/to_syslog_level.rs b/lib/vrl/stdlib/src/to_syslog_level.rs index ec909982530e7..2522557fb0805 100644 --- a/lib/vrl/stdlib/src/to_syslog_level.rs +++ b/lib/vrl/stdlib/src/to_syslog_level.rs @@ -36,7 +36,7 @@ impl Function for ToSyslogLevel { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/to_syslog_severity.rs b/lib/vrl/stdlib/src/to_syslog_severity.rs index 654200908adb3..efa55f8fe6cda 100644 --- a/lib/vrl/stdlib/src/to_syslog_severity.rs +++ b/lib/vrl/stdlib/src/to_syslog_severity.rs @@ -36,7 +36,7 @@ impl Function for ToSyslogSeverity { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/to_timestamp.rs b/lib/vrl/stdlib/src/to_timestamp.rs index 16d3a14716c7a..6477affb6efce 100644 --- a/lib/vrl/stdlib/src/to_timestamp.rs +++ b/lib/vrl/stdlib/src/to_timestamp.rs @@ -95,7 +95,7 @@ impl Function for ToTimestamp { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/to_unix_timestamp.rs b/lib/vrl/stdlib/src/to_unix_timestamp.rs index 7efcf8075c7b8..af6155eed3db0 100644 --- a/lib/vrl/stdlib/src/to_unix_timestamp.rs +++ b/lib/vrl/stdlib/src/to_unix_timestamp.rs @@ -47,7 +47,7 @@ impl Function for ToUnixTimestamp { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/truncate.rs b/lib/vrl/stdlib/src/truncate.rs index b0ce7e981e9cb..16d77c2e613b4 100644 --- a/lib/vrl/stdlib/src/truncate.rs +++ b/lib/vrl/stdlib/src/truncate.rs @@ -51,7 +51,7 @@ impl Function for Truncate { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/unique.rs b/lib/vrl/stdlib/src/unique.rs index 7a4d619d2b57a..701ea07c4c3f0 100644 --- a/lib/vrl/stdlib/src/unique.rs +++ b/lib/vrl/stdlib/src/unique.rs @@ -21,7 +21,7 @@ impl Function for Unique { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/unnest.rs b/lib/vrl/stdlib/src/unnest.rs index 922ac8d57311e..fbfde8f6f4567 100644 --- a/lib/vrl/stdlib/src/unnest.rs +++ b/lib/vrl/stdlib/src/unnest.rs @@ -1,4 +1,4 @@ -use lookup::LookupBuf; +use lookup_lib::LookupBuf; use vrl::prelude::*; #[derive(Clone, Copy, Debug)] @@ -45,7 +45,7 @@ impl Function for Unnest { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let path = arguments.required_query("path")?; diff --git a/lib/vrl/stdlib/src/upcase.rs b/lib/vrl/stdlib/src/upcase.rs index 9245ea7a0b841..7a4385f09dce6 100644 --- a/lib/vrl/stdlib/src/upcase.rs +++ b/lib/vrl/stdlib/src/upcase.rs @@ -27,7 +27,7 @@ impl Function for Upcase { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, mut arguments: ArgumentList, ) -> Compiled { let value = arguments.required("value"); diff --git a/lib/vrl/stdlib/src/uuid_v4.rs b/lib/vrl/stdlib/src/uuid_v4.rs index 00c66d74f285f..4d1e8991b02d3 100644 --- a/lib/vrl/stdlib/src/uuid_v4.rs +++ b/lib/vrl/stdlib/src/uuid_v4.rs @@ -20,7 +20,7 @@ impl Function for UuidV4 { fn compile( &self, _state: &state::Compiler, - _info: &FunctionCompileContext, + _ctx: &FunctionCompileContext, _: ArgumentList, ) -> Compiled { Ok(Box::new(UuidV4Fn)) diff --git a/lib/vrl/tests/tests/expressions/if_statement/nested_if_else.vrl b/lib/vrl/tests/tests/expressions/if_statement/nested_if_else.vrl new file mode 100644 index 0000000000000..7569cc0f7eb98 --- /dev/null +++ b/lib/vrl/tests/tests/expressions/if_statement/nested_if_else.vrl @@ -0,0 +1,18 @@ +# result: ["1xx", "2xx", "3xx", "4xx", null] + +status_code = 150 +a1 = if status_code < 200 { "1xx" } else if status_code < 300 { "2xx" } else if status_code < 400 { "3xx" } else if status_code < 500 { "4xx" } + +status_code = 250 +a2 = if status_code < 200 { "1xx" } else if status_code < 300 { "2xx" } else if status_code < 400 { "3xx" } else if status_code < 500 { "4xx" } + +status_code = 350 +a3 = if status_code < 200 { "1xx" } else if status_code < 300 { "2xx" } else if status_code < 400 { "3xx" } else if status_code < 500 { "4xx" } + +status_code = 450 +a4 = if status_code < 200 { "1xx" } else if status_code < 300 { "2xx" } else if status_code < 400 { "3xx" } else if status_code < 500 { "4xx" } + +status_code = 550 +a5 = if status_code < 200 { "1xx" } else if status_code < 300 { "2xx" } else if status_code < 400 { "3xx" } else if status_code < 500 { "4xx" } + +[a1, a2, a3, a4, a5] diff --git a/rfcs/2021-08-29-8381-vrl-iteration-support.md b/rfcs/2021-08-29-8381-vrl-iteration-support.md new file mode 100644 index 0000000000000..839b93e4d2e28 --- /dev/null +++ b/rfcs/2021-08-29-8381-vrl-iteration-support.md @@ -0,0 +1,1088 @@ +# RFC 8381 - 2021-08-22 - VRL Iteration Support + +We add native, limited support for iteration to VRL in a way that fits the VRL +[design document][doc], to allow operators to optimally remap their data. + +## Table Of Contents + + + +* [Context](#context) +* [Cross cutting concerns](#cross-cutting-concerns) +* [Scope](#scope) + * [In scope](#in-scope) + * [Out of scope](#out-of-scope) +* [Pain](#pain) +* [Proposal](#proposal) + * [User Experience](#user-experience) + * [Example Use-Case](#example-use-case) + * [Object Iteration](#object-iteration) + * [Array Iteration](#array-iteration) + * [Implementation](#implementation) + * [Closure-support For Functions](#closure-support-for-functions) + * [Lexical Scoping](#lexical-scoping) + * [Returning Two-Element Array](#returning-two-element-array) + * [Tuple Type](#tuple-type) + * [Parser Changes](#parser-changes) + * [Compiler Changes](#compiler-changes) + * [Function Trait](#function-trait) + * [Expression Trait](#expression-trait) +* [Rationale](#rationale) +* [Drawbacks](#drawbacks) +* [Prior Art](#prior-art) +* [Alternatives](#alternatives) + * [For-Loop](#for-loop) +* [Outstanding Questions](#outstanding-questions) +* [Plan Of Attack](#plan-of-attack) +* [Future Improvements](#future-improvements) + * [Iteration Control-Flow](#iteration-control-flow) + * [Specialized Iteration Functions](#specialized-iteration-functions) + * [Schema Support](#schema-support) + * [Pipeline Operator Support](#pipeline-operator-support) + * [Dynamic Field Assignment Support](#dynamic-field-assignment-support) + + + +## Context + +* Magic `*_keys` and *_values` Remap functions [#5785][] +* feat(remap): add for-loop statement [#5875][] +* Remap enumerating/looping RFC [#6031][] + +[#5785]: https://github.com/timberio/vector/issues/5785 +[#5875]: https://github.com/timberio/vector/issues/5875 +[#6031]: https://github.com/timberio/vector/issues/6031 + +## Cross cutting concerns + +* New `replace_keys` Remap function [#5377][] +* New `replace_values` Remap function [#5783][] +* New `redact_values` Remap function [#5784][] +* Complex nested parsing with Remap (waninfo) [#5852][] +* enhancement(vrl): add filter_array [#7908][] + +[#5377]: https://github.com/timberio/vector/issues/5377 +[#5783]: https://github.com/timberio/vector/issues/5783 +[#5784]: https://github.com/timberio/vector/issues/5784 +[#5852]: https://github.com/timberio/vector/issues/5852 +[#7908]: https://github.com/timberio/vector/issues/7908 + +## Scope + +### In scope + +* Ability to iterate/map over objects and arrays. +* Additional language constructs to support iteration. + +### Out of scope + +* Specialized forms of iteration (reduce, filter, etc...). +* Iterating any types other than objects or arrays. +* Iteration control-flow (e.g. `break` or `return`) +* Boundless iteration (e.g. `loop`). + +## Pain + +VRL is used to remap events to their desired state. Remapping involves +manipulating existing fields, or adding new ones. + +One gap in the language right now is the possibility to _dynamically remap +fields_. That is, an event might have fields that can't be known at +compile-time, which you still want to manipulate. + +To do this, you have to be able to _iterate_ over key/value pairs of your +object, and remap them individually. This requires some form of iteration +support in the language. + +## Proposal + +### User Experience + +Operators gain access to a new `map` function that allows them to iterate over +objects or arrays, and manipulate key/value pairs. + +The function takes a value, an optional "recursive" flag, and a closure to apply +to each individual key/value pair in an object or index/value pair in an array. + +There is no unbounded `loop` iterator, to avoid accidental infinite loops in +programs. Additionally, control-flow statements (e.g. `break` or `return`) to +manipulate the iteration is not supported at this time (see "[future +improvements](#future-improvements)"). Iteration always runs to completion. + +#### Example Use-Case + +Let's take a look at this function in action: + +```json +{ + "tags": { + "foo": true, + "bar": false, + "baz": "no", + "qux": [true, false], + "quux": { + "one": true, + "two": false + } + }, + "ips": [ + "180.14.129.174", + "31.73.200.120", + "82.35.219.252", + "113.58.218.2", + "32.85.172.216" + ] +} +``` + +```coffee +# Once "schema support" lands, this can be removed. +.tags = object(.tags) ?? {} +.ips = array(.ips) ?? [] + +# Recursively map all `.tags` to their new values. +# +# A copy of the object is returned, with the key/value changes applied. +.tags = map(.tags, recursive: true) { |tag, value| + # `value` can be a boolean, or any other value. We enforce it to be + # a boolean. + value = bool!(value) ?? false + + # Manipulate the field string ("tag") if the value returns `true`. + if value { + tag = "__" + upcase(tag) + } + + # Mapping an object requires you to return a two-element array, the first + # being the string to which the key is set, the second the value of the + # record. + # + # This invariant will be checked at compile-time. + [tag, value] +} + +# Map all IP addresses in `.ips`. +.ips = map(.ips) { |index, ip| + # Enforce `ip` to be a string. + ip = string(ip) ?? "unknown" + + # Mapping an array requires you to return a single value to which the + # item-under-iteration will be mapped to. + { + "address": ip, + "order": index, + "private": starts_with(ip, "180.14"), + } +} +``` + +```json +{ + "tags": { + "__FOO": true, + "bar": false, + "baz": false, + "qux": false, + "quux": { + "__ONE": true, + "two": false + } + }, + "ips": [ + { "address": "180.14.129.174", "order": 0, "private": true }, + { "address": "31.73.200.120", "order": 1, "private": false }, + { "address": "82.35.219.252", "order": 2, "private": false }, + { "address": "113.58.218.2", "order": 3, "private": false }, + { "address": "32.85.172.216", "order": 4, "private": false } + ] +} +``` + +#### Object Iteration + +Let's start by looking at the function signature when iterating over an object +(the same function can be used for array iteration, which is explained down +below, but to keep the signature simple at first, we'll start with objects): + +```coffee +map(value: OBJECT, recursive: BOOLEAN) { |, | [EXPRESSION, EXPRESSION] } -> OBJECT +``` + +Let's break this down: + +* The function name is `map`. +* It takes two arguments, `value` and `recursive`. + * `value` has to be of type `object`, which is the object to be iterated over. + * `recursive` has to be of type `boolean`, determining whether to iterate over + nested objects (_not_ arrays). It defaults to `false`. +* A closure-like expression is expected as part of the function call, but after + the closing `)`. + * This takes the form of `{ |...| expression }`. + * When iterating over an object, `|...|` has to represent two variables, one + for the key, and one for the value (f.e. `|key, value|`). + * The expression has to return a 2-element `array` + * the first element is the new `key` value, the second the `value` value +* The function returns a new `object`, with the manipulated keys/values. + +Here's a simplified example on how to use the function: + +```json +{ "foo": true, "bar": false } +``` + +```coffee +. = map(.) { |key, value| + key = upcase(key) + value = !value + + [key, value] +} +``` + +```json +{ "FOO": false, "BAR": true } +``` + +As an example, the shortest form to write the above example in would be: + +```coffee +. = map(.) { |k,v| [upcase(k), !v] } +``` + +The object under iteration is not mutated, instead a copy of the value is +iterated, and mutated, returning a new object or array after iteration +completes. + +#### Array Iteration + +The signature for array iteration is as follows: + +```coffee +map(value: ARRAY, recursive: BOOLEAN) { |, | EXPRESSION } -> ARRAY +``` + +This is nearly identical to the object signature, except that it takes an array, +has a variable for the index of the current item, and returns a single +expression to use as the value at the given index. Additionally, the `recursive` +flag only recurses into nested arrays, _not_ objects. + +Here's an example: + +```json +["foo", "bar"] +``` + +```coffee +. = map(.) { |index, value| + value + "_" + to_string!(index) +} +``` + +```json +["foo_0", "bar_1"] +``` + +### Implementation + +This proposal favors adding a _mapping_ function over _for-loop syntax_. That +is, the RFC proposes: + +```coffee +map(.) { |key, value| [upcase(key), value] } +``` + +over: + +```coffee +for (key, _value) in . { + key = upcase(key) +} +``` + +This choice is made both on technical merits, based on the [VRL Design +Document][doc] and for improved future capabilities. See the +"[for-loop](#for-loop)" alternative section for more details on this. + +For the chosen proposal to work, there are two separate concepts that need to +be implemented: + +* closure-support for functions +* lexical scoping + +Let's discuss these one by one, before we arrive at the final part, implementing +the `map` function that uses both concepts. + +[doc]: https://github.com/timberio/vector/blob/jean/vrl-design-doc/lib/vrl/DESIGN.md + +#### Closure-support For Functions + +For iteration to land in the form proposed in this RFC, we need a way for +operators to write _what_ they want to happen to keys and/or values of objects +and arrays. + +We do this by allowing functions to expose the fact that they accept a closure +as a stand-alone argument to their function call. + +"stand-alone" means the closure comes _after_ the function call itself, e.g. +this: + +```coffee +map(.) { |k, v| + [k, v] +} +``` + +over this: + +```coffee +map(., { |k, v| + [k, v] +}) +``` + +This choice is made to make it clear that closures in VRL _can't be passed +around through variables, but are instead syntactically attached to a function +call_. + +That is, we don't want to allow this: + +```coffee +my_closure = { |k, v| [k, v] } +map(., my_closure) +``` + +There are several reasons for rejecting this functionality: + +* It allows for slow or infinite recursion, violating the "Safety and + performance over ease of use" VRL design principle. + +* It can make reading (and writing) VRL programs more complex, and code can no + longer be reasoned about by reading from top-to-bottom, violating the "design + the feature for the intended target audience" design principle. + +* We cannot allow assigning closures to event fields, requiring us to make + a distinction between assigning to a _variable_ and an _event field_, one we + haven't had to make before, and would like to avoid making. + +* In practice, we haven't seen any use-case from operators that couldn't be + solved by the current RFC proposal, but would be solved by the above syntax. + +Instead, the closure-syntax is tied to a function call, and can only be added to +functions that explicitly expose their ability to take a closure with `x` +arguments that returns `y` value. + +The return type of a closure is checked at compile-time, including the +requirement in `map` for a two-element array. + +The variable names used to access the provided closure values (e.g. `|key, +value|`) are checked at compile-time to make sure you are actually using the +variables (to avoid potential variable name typo's). This behaves the same to +any other "unused variable assignment" checks happening at compile-time. + +#### Lexical Scoping + +Lexical scoping (variables being accessible within a given scope, instead of +globally) is something we've discussed before. + +Before, we decided that the complexity of adding lexical scoping wasn't worth +the investment before our first release, and we also hoped that lexical scoping +wouldn't be something that was ever needed in VRL. + +With this feature, and particular the function-closure syntax, lexical scoping +comes to top of mind again. + +The reason for that, is the following example: + +```coffee +map(.) { |key, value| + key = upcase(key) + + [key, value] +} + +key +``` + +We reference `key` outside the closure, at the last line of the program. What +should the value of `key` be in this case? + +Without lexical scoping, it would be set to the upper-case variant of the "last" +key in the event. + +With lexical scoping, it would return an "undefined variable" error at +compile-time, because the `key` variable _inside_ the closure is +lexically-scoped to that block, and remains undefined outside of the block. + +However, while the above syntax would be _new_ and thus not a breaking change, +for existing code, adding lexical scoping _would_ be a breaking change: + +```coffee +{ + foo = "baz" +} + +foo +``` + +Previously, `foo` would return `"baz"` when the program runs, but with lexical +scoping, the compiler returns an "undefined variable" compilation error instead. + +This is a breaking change, but because it results in a compilation error, there +will not be any unexpected runtime behavior for this case. + +There is one additional case that _will_ result in a change in runtime behavior: + +```coffee +foo = "bar" + +{ + foo = "baz" +} + +foo +``` + +Previously, `foo` would be set to `"baz"`, while lexical scoping means `foo` +outside the block will stay at `"bar"`. While we'll treat this as a breaking +change, we consider this pattern unlikely to be present in production code, and +so we accept this change in VRL. + +In terms of exact rules, the following applies to lexical scoping in VRL: + +* A VRL program has a single "root" scope, to which any unnested code belongs. +* A new scope is created by using the block (`{ ... }`) expression. +* Nested block expressions result in nested scopes. +* Function-closures also create a new scope. +* Any variable defined in a higher-level scope is accessible in nested scopes. +* Any variable defined in a lower-level scope _cannot_ be accessed in parent + scopes. +* If a variable with the same identifier is overwritten in a lower-level scope, + higher-level scopes will keep access to the original value assigned to that + variable. + +#### Returning Two-Element Array + +We require the `map` function closure to return a two-element `array` type. +Without this requirement, mapping would work as follows: + +```coffee +map(.) { |key, _| + key = upcase(key) +} +``` + +That is, `key` would be a "special variable" inside the closure, which modifies +the actual key of the record within the object. + +This doesn't fit existing patterns in VRL. It looks as if there's a _dangling_ +variable `key` at the end that remains unused, but because we special-cased this +situation, it would instead magically update the actual key in the object after +the closure runs to completion. + +This can become more difficult to reason about if/when we introduce control-flow +statements such as `break`, as you could have set `key` before calling `break`, +which would then either still mutate the actual key, or not, depending on how we +implement `break`, but either way, the program itself becomes less readable, and +operators have to read the language documentation to understand the semantic +differences between how code behaves _inside_ a function-closure and _outside_. + +Instead, the `map` function-closure is required to return a two-element array of +`[key, value]`, which the function machinery then uses to update the actual +values of the object record, e.g.: + +```coffee +map(.) { |key, value| + key = upcase(key) + + # The array return-value clearly defines the eventual key and value values. + [key, value] +} +``` + +##### Tuple Type + +Alternatively, we could introduce a new `tuple` type to define the return-type +of the closure: + +```coffee +map(.) { |key, value| + key = upcase(key) + + (key, value) +} +``` + +They would semantically be the same, given that VRL supports mixing value types +in arrays, which is usually the difference between a tuple and an array, that, +and the fact that a tuple is immutable, but an array doesn't have to be. + +Semantically, using a tuple makes more sense, but it does add an extra `Value` +type, and it does mean we have to convert that tuple to a type supported by +JSON, which will likely be an array, so the external JSON representation of +tuples remains the same, regardless of whether we use arrays or tuples inside +VRL itself. + +Since there isn't a clear benefit at this moment to using a tuple over +a two-element array, the choice is made to forgo adding the tuple type at this +moment. + +#### Parser Changes + +Because the closure syntax will be tied to function calls, we don't need to add +a new top-level node type to the abstract syntax tree (AST). Instead, we need to +extend the existing `FunctionCall` type to support an optional closure: + +```rust +pub struct FunctionCall { + pub ident: Node, + pub abort_on_error: bool, + pub arguments: Vec>, +} +``` + +We'll modify the type to this: + +```rust +pub struct FunctionCall { + pub ident: Ident, + pub abort_on_error: bool, + pub arguments: Vec, + pub closure: Option, +} + +pub struct FunctionClosure { + pub variables: Vec, + pub block: Block, +} +``` + +Next, we need to teach the parser to parse optional closures for function calls. + +The existing [LALRPOP][] grammar: + +```rust +FunctionCall: FunctionCall = { + > "(" + NonterminalNewline* + >?> + ")" => { /* ... */ }, +}; +``` + +Is updated to support optional closures: + +```rust +FunctionCall: FunctionCall = { + > "(" + NonterminalNewline* + >?> + ")" => { /* ... */ }, +}; + +#[inline] +FunctionClosure: FunctionClosure = { + "{" + "|" ?> "|" NonterminalNewline* + + "}" => FunctionClosure { variables, block: Block(expressions) }, +}; +``` + +This will allow the parser to unambiguously parse optional function closures, +and add them as nodes to the program AST. + +[lalrpop]: https://lalrpop.github.io/lalrpop/ + +#### Compiler Changes + +Once the parser knows how to parse function closures, the compiler needs to +interpret them. + +To start, we need to update the `FunctionCall` expression: + +```rust +pub struct FunctionCall { + expr: Box, + abort_on_error: bool, + maybe_fallible_arguments: bool, + + // new addition + closure: Option, +} + +pub struct FunctionClosure { + variables: Vec, + block: Block, +} +``` + +We also need to update `compile_function_call` (not expanded here), to translate +the AST to updated `FunctionCall` expression type. + +#### Function Trait + +The bulk of the work needs to happen in the `Function` trait: + +```rust +pub type Compiled = Result, Box>; + +pub trait Function: Sync + fmt::Debug { + /// The identifier by which the function can be called. + fn identifier(&self) -> &'static str; + + /// One or more examples demonstrating usage of the function in VRL source + /// code. + fn examples(&self) -> &'static [Example]; + + /// Compile a [`Function`] into a type that can be resolved to an + /// [`Expression`]. + /// + /// This function is called at compile-time for any `Function` used in the + /// program. + /// + /// At runtime, the `Expression` returned by this function is executed and + /// resolved to its final [`Value`]. + fn compile(&self, state: &super::State, arguments: ArgumentList) -> Compiled; + + /// An optional list of parameters the function accepts. + /// + /// This list is used at compile-time to check function arity, keyword names + /// and argument type definition. + fn parameters(&self) -> &'static [Parameter] { + &[] + } +} +``` + +First, we're going to have to extend the `compile` method to take an optional +`Closure`: + +```rust +fn compile(&self, state: &super::State, arguments: ArgumentList, closure: Option) -> Compiled; +``` + +This will require us to update all currently existing function implementations, +but this is a mechanical change, as no existing functions can deal with closures +right now, so all of them will add `_closure: Option` to their method +implementation, to indicate to the reader/Rust compiler that the closure +variable is unused. + +Next, we need to have a way for the function definition to tell the compiler +a few questions: + +1. Does this function accept a closure? +2. If it does, how many variable names does it accept? +3. What type will the variables have at runtime? +4. What return type must the closure resolve to? + +To resolve these questions, function definitions must implement a new method: + +```rust +fn closure(&self) -> Option { + None +} +``` + +With `closure::Definition` defined as such: + +```rust +mod closure { + /// The definition of a function-closure block a function expects to + /// receive. + struct Definition { + inputs: Vec, + } + + /// One input variant for a function-closure. + /// + /// A closure can support different variable input shapes, depending on the + /// type of a given parameter of the function. + /// + /// For example, the `map` function takes either an `Object` or an `Array` + /// for the `value` parameter, and the closure it takes either accepts + /// `|key, value|`, where "key" is always a string, or `|index, value|` where + /// "index" is always a number, depending on the parameter input type. + struct Input { + /// The parameter name upon which this closure input variant depends on. + parameter: &'static str, + + /// The value kind this closure input expects from the parameter. + kind: value::Kind, + + /// The list of variables attached to this closure input type. + variables: Vec, + + /// The return type this input variant expects the closure to have. + output: Output, + } + + /// One variable input for a closure. + /// + /// For example, in `{ |foo, bar| ... }`, `foo` and `bar` are each + /// a `ClosureVariable`. + struct Variable { + /// The value kind this variable will return when called. + kind: value::Kind, + } + + enum Output { + Array { + /// The number, and kind of elements expected. + elements: Vec, + } + + Object { + /// The field names, and value kinds expected. + fields: HashMap<&'static str, value::Kind, + } + + Scalar { + /// The expected scalar kind. + kind: value::Kind, + } + + Any, + } +} +``` + +As shown above, the default trait implementation for this new method returns +`None`, which means any function (the vast majority) that doesn't accept +a closure can forgo implementing this method, and continue to work as normal. + +In the case of the `map` function, we'd implement it like so: + +```rust +fn closure(&self) -> Option { + let field = closure::Variable { kind: kind::String }; + let index = closure::Variable { kind: kind::Integer }; + let value = closure::Variable { kind: kind::Any }; + + let object = closure::Input { + parameter: "value", + kind: kind::Object, + variables: vec![field, value], + output: closure::Output::Array { + elements: vec![kind::String, kind::Any], + } + }; + + let array = closure::Input { + parameter: "value", + kind: kind::Array, + variables: vec![index, value], + output: closure::Output::Any, + }; + + Some(closure::Definition { + inputs: vec![object, array], + }) +} +``` + +With the above in place, `map` can now iterate over both objects and arrays, and +depending on which type is detected at compile-time, the closure attached to the +function call can make guarantees about which type the first variable name will +have. + +For example: + +```coffee +. = { "foo": true } +. = map(.) { |key, value| [key, value] } +``` + +```coffee +. = ["foo", true] +. = map(.) { |_index, value| value } +``` + +In the first example, because the compiler knows `map` receives an object as its +first argument, it can guarantee that `key` will be a string, and `value` of +"any" type. Additionally, it can show a compile-time error if the last +expression in the block is not an array, with two elements, and the first +element being of the string kind. + +The second example is similar, except that it accepts any return value, and +guarantees that the first variable is a number (the index of the value in the +array). + +Note that for the above to work, the compiler must know the _exact_ type +provided to (in this case) the `value` function parameter. It can't be _either +array or object_, it has to be exactly one of the two. Operators can guarantee +this by using `to_object`, etc. + +#### Expression Trait + +With all of this in place, the `map` function can compile its expression given +the closure details, and run the closure multiple times to completion, doing something +like this: + +```rust +fn resolve(&self, ctx: &mut Context) -> Result { + let run = |key, value| { + // TODO: handle variable scope stack + ctx.variables.insert(key, value); + let closure_value = self.closure.resolve(self)?; + ctx.variables.remove(key); + + Ok(closure_value) + }; + + let result = match self.value.resolve(ctx)? { + Value::Object(object) => { + let mut result = BTreeMap::default(); + + for (key, value) in object.into_iter() { + let v = run(key, value)?.try_array()?; + result.insert(v[0], v[1]); + } + + result.into() + } + Value::Array(array) => { + let mut result = Vec::with_capacity(array.len()); + + for (index, value) in array.into_iter().enumerate() { + let v = run(index, value)?; + result.push(v); + } + + result.into() + } + _ => unreachable!("expected object or array"), + }; + + Ok(result) +} +``` + +This should get us most of the way towards adding function-closure support to +VRL, and using that support in the initial `map` function to do its work. + +## Rationale + +Iteration unlocks solutions to many remapping scenarios we currently don't +support. Not implementing this RFC would hold VRL back, and prevent operators +with more complex use-cases from using Vector with VRL to achieve their goals. + +By adding iteration, we unlock the capability to resolve almost all use-cases in +the future by introducing more iteration-based functions. + +## Drawbacks + +* It adds more complexity to the language. +* There are potential performance foot guns when iterating over large + collections. +* The parser and compiler have to become more complex to support this use-case. + +## Prior Art + +* [Rust `Iterator` trait](https://doc.rust-lang.org/std/iter/trait.Iterator.html#) +* [Nested data structure traversal examples](https://github.com/josevalim/nested-data-structure-traversal) +* [Ruby blocks](https://www.tutorialspoint.com/ruby/ruby_blocks.htm) +* [Rust closures](https://doc.rust-lang.org/book/ch13-01-closures.html) + +## Alternatives + +### For-Loop + +A different approach to iteration is to use a built-in syntax for-loop: + +```coffee +for (key, _value) in . { + key = upcase(key) +} +``` + +The biggest strength of this approach is the simplicity of the syntax, and the +familiarity with many other languages that have for-loops. + +It's relevant to mention that this solution also still needs lexical-scoping +implemented, to avoid "leaking" the values of the `key` and `value` variables +outside of the loop. + +One problem with this approach is that recursive iteration (accessing nested +object fields) isn't possible, unless we add another special syntax (e.g. +`recursive for (.., ..) in . {}`). This adds more surface-level syntax and +removes some of its familiarity, making it a less attractive solution. + +An additional problem is that the `key` and `value` variables become "special", +in that, even though it _appears_ that they aren't used after assignment, the +`for-loop` expression would actually update the object key after each iteration +in the loop. + +While this is technically the same problem we had to solve in the function-based +solution, applying that same solution to a `for-loop` again makes it look less +like for-loops in other languages, defeating one of the strengths of this +approach: + +```coffee +for (key, value) in . { + key = upcase(key) + + (key, value) +} +``` + +A solution to the magic-variable problem would be to allow dynamic paths, and +have operators directly assign to those paths: + +```coffee +for (key, _value) in . { + .[upcase(key)] = value +} +``` + +This solves one problem, but introduces another: using `.` always starts +at the root of the target. Given the following example: + +```json +{ "foo": { "bar": true } } +``` + +How would we use dynamic paths in a recursive for-loop? + +```coffee +recursive for (key, value) in . { + .[upcase(key)] = value +} +``` + +Because key is `"foo"` and then `"bar"`, you would end up with: + +```json +{ "FOO": true, "BAR": true } +``` + +Which is not the expected outcome. + +This could be solved by making `.` relative in the for-loop, but that's a major +shift from the current way VRL works, requires a new way of accessing the root +object if you can't use `.`, and goes against the rules as laid out in the +[design document][doc]. + +--- + +* What other approaches have been considered and why did you not choose them? +* How about not doing this at all? + +## Outstanding Questions + +* Do we want to introduce any form of lexical-scoping in this RFC, or keep the + status-quo for now? +* Do we want to introduce tuple-expressions or are we satisfied with using + a two-element array as the return value for `map`? +* ... + +## Plan Of Attack + +* [ ] Add lexical scoping to VRL +* [ ] Add support for parsing function-closure syntax +* [ ] Add support for compiling function-closure syntax +* [ ] Add new `map` function +* [ ] Document new functionality + +## Future Improvements + +### Iteration Control-Flow + +While likely desirable, this RFC intentionally avoids control-flow operations +inside iterators. + +They are likely to be one of the first enhancements to this feature, though: + +```coffee +. = map(.) |key, value| { + # Return default key/value pairs if the value is an object. + if is_object(value) { + return [key, value] + } + + # ... +} +``` + +### Specialized Iteration Functions + +Once this RFC is implemented, additional iteration capability can be expanded by +adding new functions to the standard library. + +For example, filtering: + +```coffee +# Return a new array with "180.14.129.174" removed. +.ips = filter(.ips) |_index, ip| { + ip = string(ip) ?? "unknown" + + !starts_with(ip, "180.14") +} +``` + +Or ensuring all elements adhere to a condition: + +```coffee +# Add new `all_public` boolean field. +.all_public = all(.ips) |_index, ip| { + ip = string(ip) ?? "unknown" + + !starts_with(ip, "180.14") +} +``` + +Some additional suggestions include `flatten`, `partition`, `fold`, `any`, +`find`, `max`, `min`, etc... + +### Schema Support + +Once [schema support][] lands, writing iterators can become less verbose. + +For example, this example from the RFC: + +```coffee +.ips = array(.ips) ?? [] +.ips = filter(.ips) |_index, ip| { + ip = string(ip) ?? "unknown" + + !starts_with(ip, "180.14") +} +``` + +Can be written as follows, when applying the correct schema: + +```coffee +.ips = filter(.ips) |_, ip| !starts_with(ip, "180.14") +``` + +Because a type schema could guarantee the compiler that `.ips` is an array, with +only string items. + +### Pipeline Operator Support + +Once the [pipeline operations][] land, we can further expand the above example +as follows: + +```coffee +.private_and_public_ips = filter(.ip) |_, ip| is_ip(ip) |> partition() |_, ip| starts_with(ip, "180.14") +``` + +### Dynamic Field Assignment Support + +Once [dynamic field assignment][] lands, you can dynamically move fields as +well: + +```json +["foo", "bar", "baz"] +``` + +```coffee +for_each(.) |index, value| .[value] = index +``` + +```json +{ + "foo": 0, + "bar": 1, + "baz": 2 +} +``` diff --git a/rfcs/2021-09-01-8547-accept-metrics-in-datadog-agent-source.md b/rfcs/2021-09-01-8547-accept-metrics-in-datadog-agent-source.md index 83fa27ca79687..f6de6e1de6d17 100644 --- a/rfcs/2021-09-01-8547-accept-metrics-in-datadog-agent-source.md +++ b/rfcs/2021-09-01-8547-accept-metrics-in-datadog-agent-source.md @@ -4,30 +4,30 @@ Currently the `datadog_agent` [source](https://vector.dev/docs/reference/configu supports logs. This RFC suggests to extend Vector to support receiving metrics from Datadog agents and ingest those as metrics from a Vector perspective so they can be benefit from Vector capabilities. - * [Context](#context) - * [Cross cutting concerns](#cross-cutting-concerns) - * [Scope](#scope) - + [In scope](#in-scope) - + [Out of scope](#out-of-scope) - * [Pain](#pain) - * [Proposal](#proposal) - + [User Experience](#user-experience) - + [Implementation](#implementation) - * [Rationale](#rationale) - * [Drawbacks](#drawbacks) - * [Prior Art](#prior-art) - * [Alternatives](#alternatives) - + [For transport between Agents and Vector](#for-transport-between-agents-and-vector) - + [Flattening sketches](#flattening-sketches) - + [For Request routing](#for-request-routing) - * [Outstanding Questions](#outstanding-questions) - * [Plan Of Attack](#plan-of-attack) - * [Future Improvements](#future-improvements) +* [Context](#context) +* [Cross cutting concerns](#cross-cutting-concerns) +* [Scope](#scope) + * [In scope](#in-scope) + * [Out of scope](#out-of-scope) +* [Pain](#pain) +* [Proposal](#proposal) + * [User Experience](#user-experience) + * [Implementation](#implementation) +* [Rationale](#rationale) +* [Drawbacks](#drawbacks) +* [Prior Art](#prior-art) +* [Alternatives](#alternatives) + * [For transport between Agents and Vector](#for-transport-between-agents-and-vector) + * [Flattening sketches](#flattening-sketches) + * [For Request routing](#for-request-routing) +* [Outstanding Questions](#outstanding-questions) +* [Plan Of Attack](#plan-of-attack) +* [Future Improvements](#future-improvements) ## Context -- Vector is foreseen as a Datadog Agents aggregator, thus receiving metrics from Datadog Agents is a logical development -- Vector has support to send metrics to Datadog, thus receiving metrics from Agent is a consistent feature to add +* Vector is foreseen as a Datadog Agents aggregator, thus receiving metrics from Datadog Agents is a logical development +* Vector has support to send metrics to Datadog, thus receiving metrics from Agent is a consistent feature to add ## Cross cutting concerns @@ -45,33 +45,33 @@ better aggregation and accuracy. ### In scope -- Implement a Datadog metrics endpoint in Vector, it will match the [metrics intake +* Implement a Datadog metrics endpoint in Vector, it will match the [metrics intake API](https://docs.datadoghq.com/api/latest/metrics/) with additional route that the Agent uses -- Include support for sketches that uses protobuf. -- Ensure all Datadog metrics type are mapped to internal Vector metric type and that there is no loss of accuracy in a +* Include support for sketches that uses protobuf. +* Ensure all Datadog metrics type are mapped to internal Vector metric type and that there is no loss of accuracy in a pass through configuration. ### Out of scope -- Anything not related to metrics - - Processing API validation requests - - Processing other kind of payloads: traces, event, etc. -- Shipping sketches to Datadog in the `datadog_metrics` sinks, it is required reach a fully functional situation but +* Anything not related to metrics + * Processing API validation requests + * Processing other kind of payloads: traces, event, etc. +* Shipping sketches to Datadog in the `datadog_metrics` sinks, it is required reach a fully functional situation but this is not the goal of this RFC that focus on receiving metrics from Datadog Agents. ## Pain -- Users cannot aggregate metrics from Datadog agents +* Users cannot aggregate metrics from Datadog agents ## Proposal ### User Experience -- Vector will support receiving Datadog Metrics sent by the official Datadog Agent through a standard source -- Metrics received will be fully supported inside Vector, all metric types will be supported -- The following metrics flow: `n*(Datadog Agents) -> Vector -> Datadog` should just work -- No foreseen backward compatibily issue (tags management may be a bit bothersome) -- New configuration settings should be consistent with existing ones +* Vector will support receiving Datadog Metrics sent by the official Datadog Agent through a standard source +* Metrics received will be fully supported inside Vector, all metric types will be supported +* The following metrics flow: `n*(Datadog Agents) -> Vector -> Datadog` should just work +* No foreseen backward compatibily issue (tags management may be a bit bothersome) +* New configuration settings should be consistent with existing ones Regarding the Datadog Agent configuration, ideally it should be only a matter of configuring `metrics_dd_url: https://vector.mycompany.tld` to forward metrics to a Vector deployement. @@ -81,11 +81,12 @@ behavior](https://github.com/DataDog/datadog-agent/blob/main/pkg/config/config.g [here](https://github.com/DataDog/datadog-agent/blob/main/pkg/forwarder/forwarder_health.go#L131-L143)). I.e. if `dd_url` contains a known pattern (i.e. it has a suffix that matches a Datadog site) some extra hostname manipulation happens. But overal, the following paths are expected to be supported on the host behind `dd_url`: -- `/api/v1/validate` for API key validation -- `/api/v1/check_run` for check submission -- `/intake/` for events and metadata (possibly others) -- `/support/flare/` for support flare -- `/api/v1/series` & `/api/beta/sketches` for metrics submission + +* `/api/v1/validate` for API key validation +* `/api/v1/check_run` for check submission +* `/intake/` for events and metadata (possibly others) +* `/support/flare/` for support flare +* `/api/v1/series` & `/api/beta/sketches` for metrics submission Then to only ship metrics, and let other payload follow the standard path, the newly introduced Datadog Agent setting `metrics_dd_url` would have to be set to point to a Vector host, with a `datadog_agent` source enabled. And then request @@ -94,20 +95,21 @@ targeted to `/api/v1/series` & `/api/beta/sketches` would be diverted there allo ### Implementation A few details about the Datadog Agents & [Datadog metrics](https://docs.datadoghq.com/metrics/types/): -- The base structure for all metrics is named + +* The base structure for all metrics is named [`MetricSample`](https://github.com/DataDog/datadog-agent/blob/main/pkg/metrics/metric_sample.go#L81-L94) and can be of [several types](https://github.com/DataDog/datadog-agent/blob/main/pkg/metrics/metric_sample.go#L20-L31) -- Major Agent usecases: - - Metrics are send from corechecks (i.e. go code) +* Major Agent usecases: + * Metrics are send from corechecks (i.e. go code) [here](https://github.com/DataDog/datadog-agent/blob/main/pkg/aggregator/sender.go#L227-L252) - - Dogstatsd metrics are converted to the `MetricSample` structure + * Dogstatsd metrics are converted to the `MetricSample` structure [here](https://github.com/DataDog/datadog-agent/blob/main/pkg/dogstatsd/enrich.go#L87-L137) However Datadog Agents metrics are transformed before being sent, ultimately metrics accounts for two differents kind of payload: -- The count, gauge and rate series kind of payload, sent to `/api/v1/series` using the [JSON schema officially +* The count, gauge and rate series kind of payload, sent to `/api/v1/series` using the [JSON schema officially documented](https://docs.datadoghq.com/api/latest/metrics) with few undocumented [additional fields](https://github.com/DataDog/datadog-agent/blob/main/pkg/metrics/series.go#L45-L57), but this align very well with the existing `datadog_metrics` sinks. -- The sketches kind of payload, sent to `/api/beta/sketches` and serialized as protobuf as shown +* The sketches kind of payload, sent to `/api/beta/sketches` and serialized as protobuf as shown [here](https://github.com/DataDog/datadog-agent/blob/main/pkg/serializer/serializer.go#L315-L338) (it ultimately lands [here](https://github.com/DataDog/datadog-agent/blob/main/pkg/metrics/sketch_series.go#L103-L269)). Public `.proto` definition can be found @@ -120,20 +122,20 @@ representing it](https://github.com/timberio/vector/blob/master/lib/vector-core/ The implementation would then consist in: -- Implement a Datadog Agent change and introduce a new override (let's say `metrics_dd_url`) that would only divert +* Implement a Datadog Agent change and introduce a new override (let's say `metrics_dd_url`) that would only divert request to `/api/v1/series` & `/api/beta/sketches` to a specific endpoints. -- Handle the `/api/v1/series` route (based on both the offical [API](https://docs.datadoghq.com/api/latest/metrics/) and +* Handle the `/api/v1/series` route (based on both the offical [API](https://docs.datadoghq.com/api/latest/metrics/) and the [Datadog Agent itself](https://github.com/DataDog/datadog-agent/blob/main/pkg/forwarder/telemetry.go#L20-L31)) to cover every metric type handled by this endpoint (count, gauge and rate) and: - - Add support for missing fields in the `datadog_metrics` sinks - - The same value but different keys tags (Datadog allows `key:foo` & `key:bar` but Vector doesn't) maybe supported + * Add support for missing fields in the `datadog_metrics` sinks + * The same value but different keys tags (Datadog allows `key:foo` & `key:bar` but Vector doesn't) maybe supported later if there is demand for it (see the note below). - - Overall this is fairly straighforward -- Handle the `/api/beta/sketches` route in the `datadog_agent` source to support sketches/distribution encoded using + * Overall this is fairly straighforward +* Handle the `/api/beta/sketches` route in the `datadog_agent` source to support sketches/distribution encoded using protobuf, but once decoded those sketches will require internal support in Vector: - - Distribution metrics in the `datadog_metrics` sink would need to use sketches and the associated endpoint. This is + * Distribution metrics in the `datadog_metrics` sink would need to use sketches and the associated endpoint. This is a prerequisite to support end-to-end sketches forwarding. - - The sketches the agent ships is based on this [paper](http://www.vldb.org/pvldb/vol12/p2195-masson.pdf) whereas + * The sketches the agent ships is based on this [paper](http://www.vldb.org/pvldb/vol12/p2195-masson.pdf) whereas Vector uses what's called a summary inside the Agent, implementing the complete DDSketch support in Vector is probably a good idea as sketches have convenient properties for wide consistent aggregation and limited error. To support smooth migration, full DDsktech (or compatible sketch) support is mandatory, as customers that emit @@ -152,9 +154,9 @@ arise, Vector internal tag representation will not be changed following this RFC ## Rationale -- Smoother Vector integration with Datadog. -- Needed for Vector to act as a complete Datadog Agent aggregator (but further work will still be required). -- Extend the Vector ecosystem, bring additional feature for distribution metrics that would enable consistent +* Smoother Vector integration with Datadog. +* Needed for Vector to act as a complete Datadog Agent aggregator (but further work will still be required). +* Extend the Vector ecosystem, bring additional feature for distribution metrics that would enable consistent aggregation. ## Drawbacks @@ -177,6 +179,7 @@ service for aggregation and it does not support sketches. ## Alternatives ### For transport between Agents and Vector + The use an alternate protocol between Datadog Agents and Vector (Like Prometheus, Statds, OpenTelemetry or Vector own protocol) could be envisioned. This would call for a significant, yet possible with the current Agent architecture, addition, those changes would mostly be located in the @@ -187,6 +190,7 @@ something that aligns well with the purpose of the Datadog Agent. This would als because of protocol conversion. ### Flattening sketches + For sketches, we could flatten sketches and compute usual derived metrics (min/max/average/count/some percentiles) and send those as gauge/count, but it would prevent (or at least impact) existing distribution/sketches users. Moreover if instead of sketches only derived metrics are used a lot of the tagging flexiblity will be lost. By submitting tagged @@ -197,6 +201,7 @@ the implementation in Vector and remove the prerequisite of having sketches supp ### For Request routing Instead of being done in the Agent, the request routing could be implemented either: + 1. In Vector, that would receive both metric and non-metric payload, simply proxying non-metric payload directly to Datdog without further processing. 2. Or in a third party middle layer (e.g. haproxy or similare). It could leverage the [documented @@ -215,11 +220,11 @@ None ## Plan Of Attack -- [ ] Implement a new `metrics_dd_url` overrides in the Datadog Agent -- [ ] Support `/api/v1/series` route still in the `datadog_agent` source, implement complete support in the +* [ ] Implement a new `metrics_dd_url` overrides in the Datadog Agent +* [ ] Support `/api/v1/series` route still in the `datadog_agent` source, implement complete support in the `datadog_metrics` sinks for the undocumented fields, incoming tags would be stored as key only with an empty string for their value inside Vector. Validate the `Agent->Vector->Datadog` scenario for gauge, count & rate. -- [ ] Support `/api/beta/sketches` route, again in the `datadog_agent`, and validate the `Agent->Vector->Datadog` +* [ ] Support `/api/beta/sketches` route, again in the `datadog_agent`, and validate the `Agent->Vector->Datadog` scenario for sketches/distributions. This would also required internal sketches support in Vector along with sending sketches from the `datadog_metrics` sinks, this is not directly addressed by this RFC but it is tracked in the following issues: [#7283](https://github.com/timberio/vector/issues/7283), @@ -229,6 +234,6 @@ The later task depends on the issue [#9181](https://github.com/vectordotdev/vect ## Future Improvements -- Wider use of sketches for distribution aggregation. -- Expose some sketches function in VRL (at least merging sketches). -- Continue on processing other kind of Datadog payloads. +* Wider use of sketches for distribution aggregation. +* Expose some sketches function in VRL (at least merging sketches). +* Continue on processing other kind of Datadog payloads. diff --git a/scripts/check-events b/scripts/check-events new file mode 100755 index 0000000000000..12bde48153b14 --- /dev/null +++ b/scripts/check-events @@ -0,0 +1,189 @@ +#!/usr/bin/env ruby +# coding: utf-8 + +require 'find' + +# These members/tags are common to multiple events +BYTE_SIZE_COUNT = ['byte_size', 'count'] + +def hash_array_add(hash, key, item) + arr = hash.fetch(key, Array::new) + arr.append(item) + hash[key] = arr +end + +# A class to hold error reports and common functionality +class Event + attr_accessor :path + attr_reader :name, :reports + attr_writer :members + + def initialize(name) + @path = nil + @name = name + @reports = [] + @members = [] + @counters = {} + @logs = [] + end + + def add_counter(name, tags) + @counters[name] = tags + end + + def add_log(type, message, parameters) + @logs.append([type, message, parameters]) + end + + def valid? + @reports.clear + + # Check BytesReceived events (for sources) + if @name.end_with? 'BytesReceived' + members_must_include(['byte_size']) + counters_must_include('component_received_bytes_total', ['protocol'] + @members - ['byte_size']) + end + + # Check EventsReceived events (common) + if @name.end_with? 'EventsReceived' + members_must_include(BYTE_SIZE_COUNT) + counters_must_include('component_received_events_total', @members - BYTE_SIZE_COUNT) + counters_must_include('component_received_event_bytes_total', @members - BYTE_SIZE_COUNT) + end + + # Check EventsSent events (common) + if @name.end_with? 'EventsSent' + members_must_include(BYTE_SIZE_COUNT) + counters_must_include('component_sent_events_total', @members - BYTE_SIZE_COUNT) + counters_must_include('component_sent_event_bytes_total', @members - BYTE_SIZE_COUNT) + end + + # Check BytesSent events (for sinks) + if @name.end_with? 'BytesSent' + members_must_include(['byte_size']) + counters_must_include('component_sent_bytes_total', ['protocol'] + @members - ['byte_size']) + end + + has_errors = @logs.one? { |type, _, _| type == 'error' } + + # Make sure Error events output an error + if has_errors or @name.end_with? 'Error' + append('Error events MUST be named "___Error".') unless @name.end_with? 'Error' + counters_must_include('component_errors_total', ['error_type', 'stage'] + @members - ['error']) + end + + # Make sure error events contain the right parameters + @logs.each do |type, message, parameters| + if type == 'error' + ['error', 'stage'].each do |parameter| + unless parameters.include? parameter + @reports.append("Error log MUST include parameter \"#{parameter}\".") + end + end + end + end + + @reports.empty? + end + + private + + def append(report) + @reports.append(report) + end + + def generic_must_contain(array, names, prefix, suffix) + names.each do |name| + unless array.include? name + @reports.append("#{prefix} MUST #{suffix} \"#{name}\".") + end + end + end + + def counters_must_include(name, required_tags) + unless @counters.include? name + @reports.append("This event MUST increment counter \"#{name}\".") + else + tags = @counters[name] + required_tags.each do |tag| + unless tags.include? tag + @reports.append("Counter \"#{name}\" MUST include tag \"#{tag}\".") + end + end + end + end + + def members_must_include(names) + generic_must_contain(@members, names, 'This event', 'have a member named') + end +end + +$all_events = Hash::new { |hash, key| hash[key] = Event::new(key) } + +error_count = 0 + +# Scan sources and build internal structures +Find.find('src') do |path| + if path.end_with? '.rs' + text = File.read(path) + + # Check log message texts for correct formatting. See below for the + # full regex + text.scan(/(trace|debug|info|warn|error)!\(\s*(message\s*=\s*)?"([^({)][^("]+)"/) do + |type, has_message_prefix, message| + reports = [] + reports.append('Message must start with a capital.') unless message.match(/^[[:upper:]]/) + reports.append('Message must end with a period.') unless message.match(/\.$/) + unless reports.empty? + puts "#{path}: Errors in message \"#{message}\":" + reports.each { |report| puts " #{report}" } + error_count += 1 + end + end + + if path.start_with? 'src/internal_events/' and !text.match?(/## skip check-events ##/i) + # Scan internal event structs for member names + text.scan(/[\n ]struct (\S+?)(?:<.+?>)?(?: {\n(.+?)\n\s*}|;)\n/m) do |struct_name, members| + $all_events[struct_name].path = path + if members + member_names = members.scan(/ ([A-Za-z0-9_]+): /).map { |member,| member } + $all_events[struct_name].members = member_names + end + end + + # Scan internal event implementation blocks for logs and metrics + text.scan(/^(\s*)impl(?:<.+?>)? InternalEvent for ([A-Za-z0-9_]+)(?:<.+?>)? {\n(.+?)\n\1}$/m) do |_space, event_name, block| + # Scan for counter names and tags + block.scan(/ counter!\((?:\n\s+)?"([^"]+)",(.+?)\)[;\n]/m) do |name, tags| + tags = tags.scan(/"([^"]+)" => /).map { |tag,| tag } + $all_events[event_name].add_counter(name, tags) + end + + # Scan for log outputs and their parameters + block.scan(/ + (trace|debug|info|warn|error)! # The log type + \(\s*(?:message\s*=\s*)? # Skip any leading "message =" bit + "([^({)][^("]+)" # The log message text + ([^;]*?) # Match the parameter list + \)(?:;|\n\s*}) # Normally would end with simply ");", but some are missing the semicolon + /mx) do |type, message, parameters| + parameters = parameters.scan(/([a-z0-9_]+) *= .|[?%]([a-z0-9_.]+)/) \ + .map { |assignment, simple| assignment or simple } + + $all_events[event_name].add_log(type, message, parameters) + end + end + end + end +end + +$all_events.each_value do |event| + unless event.valid? + puts "#{event.path}: Errors in event #{event.name}:" + event.reports.each { |report| puts " #{report}" } + error_count += 1 + end +end + +puts "#{error_count} error(s)" +exit 1 if error_count > 0 diff --git a/scripts/check-events.sh b/scripts/check-events.sh deleted file mode 100755 index 8d337114f5dc7..0000000000000 --- a/scripts/check-events.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash -# shellcheck disable=SC2016 -exec find src -type f -name \*.rs -exec awk ' - BEGIN { - RS = "" - FS = "\n" - error_count = 0 - } - - match($0, /(trace|debug|info|warn|error)!\(\s*(message\s*=\s*)?"([^({)][^("]+)"/, groups) { - message = groups[3] - delete errors; - if (!match(message, /^[A-Z]/)) { errors[1] = "Message must begin with a capital." } - if (!match(message, /\.$/)) { errors[2] = "Message must end with a period." } - if (length(errors)) { - print FILENAME, ": Errors:" - for (i in errors) { - print " ", errors[i] - } - print $0 - print "" - error_count++ - } - } - - END { - print error_count, "error(s)!" - if (error_count > 0) { - exit 1 - } - } -' {} + diff --git a/src/api/schema/metrics/filter.rs b/src/api/schema/metrics/filter.rs index 293e59f6e4a13..33a49c175ece1 100644 --- a/src/api/schema/metrics/filter.rs +++ b/src/api/schema/metrics/filter.rs @@ -4,17 +4,15 @@ use super::{ use crate::{ config::ComponentKey, event::{Metric, MetricValue}, - metrics::{capture_metrics, get_controller, Controller}, + metrics::Controller, }; use async_stream::stream; -use lazy_static::lazy_static; use std::collections::BTreeMap; use tokio::time::Duration; use tokio_stream::{Stream, StreamExt}; -lazy_static! { - static ref GLOBAL_CONTROLLER: &'static Controller = - get_controller().expect("Metrics system not initialized. Please report."); +fn get_controller() -> &'static Controller { + Controller::get().expect("Metrics system not initialized. Please report.") } /// Sums an iteratable of `&Metric`, by folding metric values. Convenience function typically @@ -141,13 +139,13 @@ impl<'a> MetricsFilter<'a> for Vec<&'a Metric> { /// Returns a stream of `Metric`s, collected at the provided millisecond interval. pub fn get_metrics(interval: i32) -> impl Stream { - let controller = get_controller().unwrap(); + let controller = get_controller(); let mut interval = tokio::time::interval(Duration::from_millis(interval as u64)); stream! { loop { interval.tick().await; - for m in capture_metrics(controller) { + for m in controller.capture_metrics() { yield m; } } @@ -155,20 +153,21 @@ pub fn get_metrics(interval: i32) -> impl Stream { } pub fn get_all_metrics(interval: i32) -> impl Stream> { - let controller = get_controller().unwrap(); + let controller = get_controller(); let mut interval = tokio::time::interval(Duration::from_millis(interval as u64)); stream! { loop { interval.tick().await; - yield capture_metrics(controller).collect() + yield controller.capture_metrics().collect() } } } /// Return Vec based on a component id tag. pub fn by_component_key(component_key: &ComponentKey) -> Vec { - capture_metrics(&GLOBAL_CONTROLLER) + get_controller() + .capture_metrics() .filter_map(|m| { if let Some(pipeline) = component_key.pipeline_str() { m.tag_matches("component_id", component_key.id()) diff --git a/src/app.rs b/src/app.rs index f09836e383720..4d6ec07d04442 100644 --- a/src/app.rs +++ b/src/app.rs @@ -97,7 +97,7 @@ impl Application { LogFormat::Json => true, }; - metrics::init().expect("metrics initialization failed"); + metrics::init_global().expect("metrics initialization failed"); if let Some(threads) = root_opts.threads { if threads < 1 { diff --git a/src/codecs/framers/mod.rs b/src/codecs/framers/mod.rs index 303149235eb42..ef394bf7c0877 100644 --- a/src/codecs/framers/mod.rs +++ b/src/codecs/framers/mod.rs @@ -55,22 +55,25 @@ impl TcpError for BoxedFramingError { /// Produce byte frames from a byte stream / byte message. pub trait Framer: - tokio_util::codec::Decoder + DynClone + Send + Sync + tokio_util::codec::Decoder + DynClone + Debug + Send + Sync { } /// Default implementation for `Framer`s that implement -/// `tokio_util::codec::Decoder` and `Clone`. +/// `tokio_util::codec::Decoder`. impl Framer for Decoder where - Decoder: - tokio_util::codec::Decoder + Clone + Send + Sync + Decoder: tokio_util::codec::Decoder + + Clone + + Debug + + Send + + Sync { } dyn_clone::clone_trait_object!(Framer); -/// A `Box` containing a thread-safe `Framer`. -pub type BoxedFramer = Box; +/// A `Box` containing a `Framer`. +pub type BoxedFramer = Box; /// Define options for a framer and build it from the config object. /// diff --git a/src/codecs/framers/newline_delimited.rs b/src/codecs/framers/newline_delimited.rs index 2b79a24c67f23..d1f2d3aaf6e21 100644 --- a/src/codecs/framers/newline_delimited.rs +++ b/src/codecs/framers/newline_delimited.rs @@ -20,6 +20,11 @@ impl NewlineDelimitedDecoderConfig { max_length: crate::serde::default_max_length(), } } + + /// Creates a `NewlineDelimitedCodec` with a maximum frame length limit. + pub const fn new_with_max_length(max_length: usize) -> Self { + Self { max_length } + } } #[typetag::serde(name = "newline_delimited")] diff --git a/src/codecs/mod.rs b/src/codecs/mod.rs index ef7794aaf78eb..ac0896284e809 100644 --- a/src/codecs/mod.rs +++ b/src/codecs/mod.rs @@ -54,7 +54,7 @@ impl TcpError for Error { } } -#[derive(Clone)] +#[derive(Debug, Clone)] /// A decoder that can decode structured events from a byte stream / byte /// messages. pub struct Decoder { diff --git a/src/codecs/parsers/mod.rs b/src/codecs/parsers/mod.rs index f06718f790147..07017ab9b9201 100644 --- a/src/codecs/parsers/mod.rs +++ b/src/codecs/parsers/mod.rs @@ -20,7 +20,7 @@ use smallvec::SmallVec; use std::fmt::Debug; /// Parse structured events from bytes. -pub trait Parser: DynClone + Send + Sync { +pub trait Parser: DynClone + Debug + Send + Sync { /// Parses structured events from bytes. /// /// It returns a `SmallVec` rather than an `Event` directly, since one byte @@ -32,8 +32,8 @@ pub trait Parser: DynClone + Send + Sync { dyn_clone::clone_trait_object!(Parser); -/// A `Box` containing a thread-safe `Parser`. -pub type BoxedParser = Box; +/// A `Box` containing a `Parser`. +pub type BoxedParser = Box; /// Define options for a parser and build it from the config object. /// diff --git a/src/config/mod.rs b/src/config/mod.rs index 06f2f84002884..16ddb7a318f38 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -290,7 +290,9 @@ impl SinkOuter { if self.healthcheck_uri.is_some() && self.healthcheck.uri.is_some() { warn!("Both `healthcheck.uri` and `healthcheck_uri` options are specified. Using value of `healthcheck.uri`.") } else if self.healthcheck_uri.is_some() { - warn!("`healthcheck_uri` option has been deprecated, use `healthcheck.uri` instead. ") + warn!( + "The `healthcheck_uri` option has been deprecated, use `healthcheck.uri` instead." + ) } SinkHealthcheckOptions { uri: self diff --git a/src/config/unit_test.rs b/src/config/unit_test.rs index 356a8ce746ef5..43b632d0bd32e 100644 --- a/src/config/unit_test.rs +++ b/src/config/unit_test.rs @@ -1511,4 +1511,50 @@ mod tests { vec!["Data type mismatch between foo.step1 (Metric) and foo.step2 (Log)".to_owned()] ); } + + #[tokio::test] + async fn invalid_name_in_expanded_transform() { + let config: ConfigBuilder = toml::from_str(indoc! {r#" + [sources.input] + type = "generator" + format = "shuffle" + lines = ["one", "two"] + count = 5 + + [transforms.foo] + inputs = ["input"] + type = "compound" + [[transforms.foo.steps]] + type = "log_to_metric" + [[transforms.foo.steps.metrics]] + type = "counter" + field = "c" + name = "sum" + namespace = "ns" + [[transforms.foo.steps]] + id = "0" + type = "log_to_metric" + [[transforms.foo.steps.metrics]] + type = "counter" + field = "c" + name = "sum" + namespace = "ns" + + [sinks.output] + type = "console" + inputs = [ "foo.0" ] + encoding = "json" + target = "stdout" + "#}) + .unwrap(); + + let err = crate::config::compiler::compile(config).err().unwrap(); + assert_eq!( + err, + vec![ + "failed to expand transform 'foo': conflicting id found while expanding transform" + .to_owned() + ] + ); + } } diff --git a/src/internal_events/apache_metrics.rs b/src/internal_events/apache_metrics.rs index eb1acd1521a8a..6a363acf53f23 100644 --- a/src/internal_events/apache_metrics.rs +++ b/src/internal_events/apache_metrics.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use crate::sources::apache_metrics; use metrics::{counter, histogram}; use std::time::Instant; diff --git a/src/internal_events/aws_ecs_metrics.rs b/src/internal_events/aws_ecs_metrics.rs index 5adcf58ff5023..558a71bc6241b 100644 --- a/src/internal_events/aws_ecs_metrics.rs +++ b/src/internal_events/aws_ecs_metrics.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::{counter, histogram}; use std::borrow::Cow; use std::time::Instant; diff --git a/src/internal_events/aws_kinesis_firehose.rs b/src/internal_events/aws_kinesis_firehose.rs index 7e5de1e726cc1..0e4abf6ee8c76 100644 --- a/src/internal_events/aws_kinesis_firehose.rs +++ b/src/internal_events/aws_kinesis_firehose.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use crate::sources::aws_kinesis_firehose::Compression; use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/aws_s3.rs b/src/internal_events/aws_s3.rs index 26c13e6776fe0..ed069886b04dd 100644 --- a/src/internal_events/aws_s3.rs +++ b/src/internal_events/aws_s3.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + #[cfg(feature = "sources-aws_s3")] pub mod source { use crate::sources::aws_s3::sqs::ProcessingError; diff --git a/src/internal_events/azure_blob.rs b/src/internal_events/azure_blob.rs index dceb59ddb1a45..ea71af8532d2d 100644 --- a/src/internal_events/azure_blob.rs +++ b/src/internal_events/azure_blob.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use uuid::Uuid; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/batch.rs b/src/internal_events/batch.rs index 9d0828ba66aee..d042642e2dc81 100644 --- a/src/internal_events/batch.rs +++ b/src/internal_events/batch.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/coercer.rs b/src/internal_events/coercer.rs index 0785620991878..e3618deca884a 100644 --- a/src/internal_events/coercer.rs +++ b/src/internal_events/coercer.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/common.rs b/src/internal_events/common.rs index 3a90e9afda5ef..7c172518e31a8 100644 --- a/src/internal_events/common.rs +++ b/src/internal_events/common.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/concat.rs b/src/internal_events/concat.rs index 1daf41b5180f5..6ad18730de52d 100644 --- a/src/internal_events/concat.rs +++ b/src/internal_events/concat.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/conditions.rs b/src/internal_events/conditions.rs index 95a2b547c2c1b..9faea5693a0f1 100644 --- a/src/internal_events/conditions.rs +++ b/src/internal_events/conditions.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/datadog_events.rs b/src/internal_events/datadog_events.rs index 6113f0e9c1c0d..42854fdb54143 100644 --- a/src/internal_events/datadog_events.rs +++ b/src/internal_events/datadog_events.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/dnstap.rs b/src/internal_events/dnstap.rs index a2bbf26774800..de0b1fbee1129 100644 --- a/src/internal_events/dnstap.rs +++ b/src/internal_events/dnstap.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/docker_logs.rs b/src/internal_events/docker_logs.rs index cac1dfc61529a..9d36dff6e6487 100644 --- a/src/internal_events/docker_logs.rs +++ b/src/internal_events/docker_logs.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use bollard::errors::Error; use chrono::ParseError; use metrics::counter; diff --git a/src/internal_events/eventstoredb_metrics.rs b/src/internal_events/eventstoredb_metrics.rs index 60c32b8ddcea7..c859a14aba978 100644 --- a/src/internal_events/eventstoredb_metrics.rs +++ b/src/internal_events/eventstoredb_metrics.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/exec.rs b/src/internal_events/exec.rs index c1a5668e0fecd..ee8899f7a9ae6 100644 --- a/src/internal_events/exec.rs +++ b/src/internal_events/exec.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::{counter, histogram}; use std::time::Duration; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/file.rs b/src/internal_events/file.rs index 3259a995c2ef3..9668279f52c8f 100644 --- a/src/internal_events/file.rs +++ b/src/internal_events/file.rs @@ -25,7 +25,7 @@ mod source { #[derive(Debug)] pub struct FileBytesReceived<'a> { pub byte_size: usize, - pub path: &'a str, + pub file: &'a str, } impl<'a> InternalEvent for FileBytesReceived<'a> { @@ -34,7 +34,7 @@ mod source { message = "Bytes received.", byte_size = %self.byte_size, protocol = "file", - path = %self.path, + file = %self.file, ); } @@ -42,13 +42,14 @@ mod source { counter!( "component_received_bytes_total", self.byte_size as u64, "protocol" => "file", - "file" => self.path.to_string() + "file" => self.file.to_owned() ); } } #[derive(Debug)] pub struct FileEventsReceived<'a> { + pub count: usize, pub file: &'a str, pub byte_size: usize, } @@ -56,15 +57,16 @@ mod source { impl InternalEvent for FileEventsReceived<'_> { fn emit_logs(&self) { trace!( - message = "Received one event.", - file = %self.file, - byte_size = %self.byte_size + message = "Received events.", + count = %self.count, + byte_size = %self.byte_size, + file = %self.file ); } fn emit_metrics(&self) { counter!( - "events_in_total", 1, + "events_in_total", self.count as u64, "file" => self.file.to_owned(), ); counter!( @@ -72,7 +74,7 @@ mod source { "file" => self.file.to_owned(), ); counter!( - "component_received_events_total", 1, + "component_received_events_total", self.count as u64, "file" => self.file.to_owned(), ); counter!( @@ -84,28 +86,28 @@ mod source { #[derive(Debug)] pub struct FileChecksumFailed<'a> { - pub path: &'a Path, + pub file: &'a Path, } impl<'a> InternalEvent for FileChecksumFailed<'a> { fn emit_logs(&self) { warn!( message = "Currently ignoring file too small to fingerprint.", - path = %self.path.display(), + file = %self.file.display(), ) } fn emit_metrics(&self) { counter!( "checksum_errors_total", 1, - "file" => self.path.to_string_lossy().into_owned(), + "file" => self.file.to_string_lossy().into_owned(), ); } } #[derive(Debug)] pub struct FileFingerprintReadError<'a> { - pub path: &'a Path, + pub file: &'a Path, pub error: Error, } @@ -113,7 +115,7 @@ mod source { fn emit_logs(&self) { error!( message = "Failed reading file for fingerprinting.", - path = %self.path.display(), + file = %self.file.display(), error_type = "read_failed", error = %self.error, stage = "receiving", @@ -123,12 +125,12 @@ mod source { fn emit_metrics(&self) { counter!( "fingerprint_read_errors_total", 1, - "file" => self.path.to_string_lossy().into_owned(), + "file" => self.file.to_string_lossy().into_owned(), ); counter!( "component_errors_total", 1, "error_type" => "read_failed", - "file" => self.path.to_string_lossy().into_owned(), + "file" => self.file.to_string_lossy().into_owned(), "stage" => "receiving", ); } @@ -136,7 +138,7 @@ mod source { #[derive(Debug)] pub struct FileDeleteError<'a> { - pub path: &'a Path, + pub file: &'a Path, pub error: Error, } @@ -144,7 +146,7 @@ mod source { fn emit_logs(&self) { warn!( message = "Failed in deleting file.", - path = %self.path.display(), + file = %self.file.display(), error = %self.error, internal_log_rate_secs = 1 ); @@ -153,12 +155,12 @@ mod source { fn emit_metrics(&self) { counter!( "file_delete_errors_total", 1, - "file" => self.path.to_string_lossy().into_owned(), + "file" => self.file.to_string_lossy().into_owned(), ); counter!( "component_errors_total", 1, "error_type" => "delete_failed", - "file" => self.path.to_string_lossy().into_owned(), + "file" => self.file.to_string_lossy().into_owned(), "stage" => "receiving" ); } @@ -166,49 +168,49 @@ mod source { #[derive(Debug)] pub struct FileDeleted<'a> { - pub path: &'a Path, + pub file: &'a Path, } impl<'a> InternalEvent for FileDeleted<'a> { fn emit_logs(&self) { info!( message = "File deleted.", - path = %self.path.display(), + file = %self.file.display(), ); } fn emit_metrics(&self) { counter!( "files_deleted_total", 1, - "file" => self.path.to_string_lossy().into_owned(), + "file" => self.file.to_string_lossy().into_owned(), ); } } #[derive(Debug)] pub struct FileUnwatched<'a> { - pub path: &'a Path, + pub file: &'a Path, } impl<'a> InternalEvent for FileUnwatched<'a> { fn emit_logs(&self) { info!( message = "Stopped watching file.", - path = %self.path.display(), + file = %self.file.display(), ); } fn emit_metrics(&self) { counter!( "files_unwatched_total", 1, - "file" => self.path.to_string_lossy().into_owned(), + "file" => self.file.to_string_lossy().into_owned(), ); } } #[derive(Debug)] pub struct FileWatchError<'a> { - pub path: &'a Path, + pub file: &'a Path, pub error: Error, } @@ -216,7 +218,7 @@ mod source { fn emit_logs(&self) { error!( message = "Failed to watch file.", - path = %self.path.display(), + file = %self.file.display(), error_type = "watch_failed", error = %self.error, stage = "receiving" @@ -226,12 +228,12 @@ mod source { fn emit_metrics(&self) { counter!( "file_watch_errors_total", 1, - "file" => self.path.to_string_lossy().into_owned(), + "file" => self.file.to_string_lossy().into_owned(), ); counter!( "component_errors_total", 1, "error_type" => "watch_failed", - "file" => self.path.to_string_lossy().into_owned(), + "file" => self.file.to_string_lossy().into_owned(), "stage" => "receiving" ); } @@ -239,7 +241,7 @@ mod source { #[derive(Debug)] pub struct FileResumed<'a> { - pub path: &'a Path, + pub file: &'a Path, pub file_position: u64, } @@ -247,7 +249,7 @@ mod source { fn emit_logs(&self) { info!( message = "Resuming to watch file.", - path = %self.path.display(), + file = %self.file.display(), file_position = %self.file_position ); } @@ -255,28 +257,28 @@ mod source { fn emit_metrics(&self) { counter!( "files_resumed_total", 1, - "file" => self.path.to_string_lossy().into_owned(), + "file" => self.file.to_string_lossy().into_owned(), ); } } #[derive(Debug)] pub struct FileAdded<'a> { - pub path: &'a Path, + pub file: &'a Path, } impl<'a> InternalEvent for FileAdded<'a> { fn emit_logs(&self) { info!( message = "Found new file to watch.", - path = %self.path.display(), + file = %self.file.display(), ); } fn emit_metrics(&self) { counter!( "files_added_total", 1, - "file" => self.path.to_string_lossy().into_owned(), + "file" => self.file.to_string_lossy().into_owned(), ); } } @@ -351,7 +353,7 @@ mod source { counter!( "component_errors_total", 1, "error_type" => "glob_failed", - "file" => self.path.to_string_lossy().into_owned(), + "path" => self.path.to_string_lossy().into_owned(), "stage" => "receiving" ); } @@ -361,39 +363,39 @@ mod source { pub struct FileSourceInternalEventsEmitter; impl FileSourceInternalEvents for FileSourceInternalEventsEmitter { - fn emit_file_added(&self, path: &Path) { - emit!(&FileAdded { path }); + fn emit_file_added(&self, file: &Path) { + emit!(&FileAdded { file }); } - fn emit_file_resumed(&self, path: &Path, file_position: u64) { + fn emit_file_resumed(&self, file: &Path, file_position: u64) { emit!(&FileResumed { - path, + file, file_position }); } - fn emit_file_watch_error(&self, path: &Path, error: Error) { - emit!(&FileWatchError { path, error }); + fn emit_file_watch_error(&self, file: &Path, error: Error) { + emit!(&FileWatchError { file, error }); } - fn emit_file_unwatched(&self, path: &Path) { - emit!(&FileUnwatched { path }); + fn emit_file_unwatched(&self, file: &Path) { + emit!(&FileUnwatched { file }); } - fn emit_file_deleted(&self, path: &Path) { - emit!(&FileDeleted { path }); + fn emit_file_deleted(&self, file: &Path) { + emit!(&FileDeleted { file }); } - fn emit_file_delete_error(&self, path: &Path, error: Error) { - emit!(&FileDeleteError { path, error }); + fn emit_file_delete_error(&self, file: &Path, error: Error) { + emit!(&FileDeleteError { file, error }); } - fn emit_file_fingerprint_read_error(&self, path: &Path, error: Error) { - emit!(&FileFingerprintReadError { path, error }); + fn emit_file_fingerprint_read_error(&self, file: &Path, error: Error) { + emit!(&FileFingerprintReadError { file, error }); } - fn emit_file_checksum_failed(&self, path: &Path) { - emit!(&FileChecksumFailed { path }); + fn emit_file_checksum_failed(&self, file: &Path) { + emit!(&FileChecksumFailed { file }); } fn emit_file_checkpointed(&self, count: usize, duration: Duration) { diff --git a/src/internal_events/fluent.rs b/src/internal_events/fluent.rs index b6d444de3e6cb..0eb2370e7c190 100644 --- a/src/internal_events/fluent.rs +++ b/src/internal_events/fluent.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use crate::sources::fluent::DecodeError; use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/geoip.rs b/src/internal_events/geoip.rs index f70d5942e4cf6..9e2d2652a03c9 100644 --- a/src/internal_events/geoip.rs +++ b/src/internal_events/geoip.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/http.rs b/src/internal_events/http.rs index 288ea062a7114..07396a3bac632 100644 --- a/src/internal_events/http.rs +++ b/src/internal_events/http.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use std::error::Error; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/http_client.rs b/src/internal_events/http_client.rs index 9504be69aa662..a772f596c29e4 100644 --- a/src/internal_events/http_client.rs +++ b/src/internal_events/http_client.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use http::{ header::{self, HeaderMap, HeaderValue}, Request, Response, diff --git a/src/internal_events/journald.rs b/src/internal_events/journald.rs index 8429039d66551..d52a1a0fbd406 100644 --- a/src/internal_events/journald.rs +++ b/src/internal_events/journald.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/kafka.rs b/src/internal_events/kafka.rs index fc85f98a869bf..4f051cb2b2053 100644 --- a/src/internal_events/kafka.rs +++ b/src/internal_events/kafka.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::{counter, gauge}; use vector_core::internal_event::InternalEvent; use vector_core::update_counter; diff --git a/src/internal_events/kubernetes/instrumenting_watcher.rs b/src/internal_events/kubernetes/instrumenting_watcher.rs index 88d3dbedcf467..2228245e9b8e8 100644 --- a/src/internal_events/kubernetes/instrumenting_watcher.rs +++ b/src/internal_events/kubernetes/instrumenting_watcher.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use std::fmt::Debug; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/log_to_metric.rs b/src/internal_events/log_to_metric.rs index 92f6307772ee5..1bf30ac9d9708 100644 --- a/src/internal_events/log_to_metric.rs +++ b/src/internal_events/log_to_metric.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use crate::template::TemplateParseError; use metrics::counter; use std::num::ParseFloatError; diff --git a/src/internal_events/logplex.rs b/src/internal_events/logplex.rs index 9b2d8d7629d21..953ec97913551 100644 --- a/src/internal_events/logplex.rs +++ b/src/internal_events/logplex.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/lua.rs b/src/internal_events/lua.rs index ba6aeb9abf1d9..4a528627387fa 100644 --- a/src/internal_events/lua.rs +++ b/src/internal_events/lua.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::{counter, gauge}; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/mod.rs b/src/internal_events/mod.rs index f62862bead665..358a59b3d107e 100644 --- a/src/internal_events/mod.rs +++ b/src/internal_events/mod.rs @@ -287,6 +287,16 @@ pub use self::windows::*; #[cfg(feature = "sources-mongodb_metrics")] pub use mongodb_metrics::*; +#[cfg(test)] +#[macro_export] +macro_rules! emit { + ($event:expr) => {{ + crate::test_util::components::record_internal_event(stringify!($event)); + vector_core::internal_event::emit($event) + }}; +} + +#[cfg(not(test))] #[macro_export] macro_rules! emit { ($event:expr) => { diff --git a/src/internal_events/mongodb_metrics.rs b/src/internal_events/mongodb_metrics.rs index 729b4721bbe5b..70cc10155e445 100644 --- a/src/internal_events/mongodb_metrics.rs +++ b/src/internal_events/mongodb_metrics.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::{counter, histogram}; use mongodb::{bson, error::Error as MongoError}; use std::time::Instant; diff --git a/src/internal_events/nats.rs b/src/internal_events/nats.rs index 0d2120100860c..e8f8c9a445354 100644 --- a/src/internal_events/nats.rs +++ b/src/internal_events/nats.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use std::io::Error; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/nginx_metrics.rs b/src/internal_events/nginx_metrics.rs index 9128e4b165746..15395b4435ff4 100644 --- a/src/internal_events/nginx_metrics.rs +++ b/src/internal_events/nginx_metrics.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use crate::sources::nginx_metrics::parser::ParseError; use metrics::{counter, histogram}; use std::time::Instant; diff --git a/src/internal_events/prometheus.rs b/src/internal_events/prometheus.rs index 35aa27113131a..e641262b3f361 100644 --- a/src/internal_events/prometheus.rs +++ b/src/internal_events/prometheus.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use hyper::StatusCode; use metrics::{counter, histogram}; #[cfg(feature = "sources-prometheus")] diff --git a/src/internal_events/pulsar.rs b/src/internal_events/pulsar.rs index d2d255c2c4d11..a8a512aa9e8d7 100644 --- a/src/internal_events/pulsar.rs +++ b/src/internal_events/pulsar.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/redis.rs b/src/internal_events/redis.rs index 9eb11052b6cec..8f471a8d4676e 100644 --- a/src/internal_events/redis.rs +++ b/src/internal_events/redis.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/remap.rs b/src/internal_events/remap.rs index 9398d67e20e52..6fb989e79918a 100644 --- a/src/internal_events/remap.rs +++ b/src/internal_events/remap.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/socket.rs b/src/internal_events/socket.rs index 0dbaecaad4f13..fdf3c7a33cd00 100644 --- a/src/internal_events/socket.rs +++ b/src/internal_events/socket.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/splunk_hec.rs b/src/internal_events/splunk_hec.rs index 85ee1a20d2014..c938eddbad535 100644 --- a/src/internal_events/splunk_hec.rs +++ b/src/internal_events/splunk_hec.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use crate::event::metric::{MetricKind, MetricValue}; use metrics::counter; use serde_json::Error; diff --git a/src/internal_events/statsd_source.rs b/src/internal_events/statsd_source.rs index 74c50bf74aa68..e2bae30d80fc8 100644 --- a/src/internal_events/statsd_source.rs +++ b/src/internal_events/statsd_source.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use bytes::Bytes; use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/stdin.rs b/src/internal_events/stdin.rs index 5ce2b875114a2..4eb4bc9b87a97 100644 --- a/src/internal_events/stdin.rs +++ b/src/internal_events/stdin.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/syslog.rs b/src/internal_events/syslog.rs index 6848bf5d73434..5cd2d1e3d13af 100644 --- a/src/internal_events/syslog.rs +++ b/src/internal_events/syslog.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/tcp.rs b/src/internal_events/tcp.rs index 5510fd84525b7..c3777da4b3dd4 100644 --- a/src/internal_events/tcp.rs +++ b/src/internal_events/tcp.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use crate::tls::TlsError; use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/udp.rs b/src/internal_events/udp.rs index 6f7a1d8f71b84..4e91b0f9bf892 100644 --- a/src/internal_events/udp.rs +++ b/src/internal_events/udp.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/unix.rs b/src/internal_events/unix.rs index dbbef24d6fea9..21da274e6a253 100644 --- a/src/internal_events/unix.rs +++ b/src/internal_events/unix.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use std::{io::Error, path::Path}; use vector_core::internal_event::InternalEvent; diff --git a/src/internal_events/vector.rs b/src/internal_events/vector.rs index a94e13dc19c1e..822a7e0de55f4 100644 --- a/src/internal_events/vector.rs +++ b/src/internal_events/vector.rs @@ -1,3 +1,5 @@ +// ## skip check-events ## + use metrics::counter; use prost::DecodeError; use vector_core::internal_event::InternalEvent; diff --git a/src/kubernetes/state/instrumenting.rs b/src/kubernetes/state/instrumenting.rs index 24a650d7cef0c..3141b2c716591 100644 --- a/src/kubernetes/state/instrumenting.rs +++ b/src/kubernetes/state/instrumenting.rs @@ -97,7 +97,8 @@ mod tests { } fn get_metric_value(op_kind: &'static str) -> Option { - let controller = crate::metrics::get_controller().expect("failed to init metric container"); + let controller = + crate::metrics::Controller::get().expect("failed to init metric container"); let tags_to_lookup = Some( vec![("op_kind".to_owned(), op_kind.to_owned())] @@ -105,7 +106,8 @@ mod tests { .collect(), ); - crate::metrics::capture_metrics(controller) + controller + .capture_metrics() .find(|metric| { metric.name() == "k8s_state_ops_total" && metric.tags() == tags_to_lookup.as_ref() }) @@ -143,7 +145,7 @@ mod tests { #[tokio::test] async fn add() { trace_init(); - let _ = crate::metrics::init(); + let _ = crate::metrics::init_test(); let _guard = tests_lock().await; let (mut writer, mut events_rx, mut actions_tx) = prepare_test(); @@ -174,7 +176,7 @@ mod tests { #[tokio::test] async fn update() { trace_init(); - let _ = crate::metrics::init(); + let _ = crate::metrics::init_test(); let _guard = tests_lock().await; let (mut writer, mut events_rx, mut actions_tx) = prepare_test(); @@ -205,7 +207,7 @@ mod tests { #[tokio::test] async fn delete() { trace_init(); - let _ = crate::metrics::init(); + let _ = crate::metrics::init_test(); let _guard = tests_lock().await; let (mut writer, mut events_rx, mut actions_tx) = prepare_test(); @@ -236,7 +238,7 @@ mod tests { #[tokio::test] async fn resync() { trace_init(); - let _ = crate::metrics::init(); + let _ = crate::metrics::init_test(); let _guard = tests_lock().await; let (mut writer, mut events_rx, mut actions_tx) = prepare_test(); @@ -263,7 +265,7 @@ mod tests { #[tokio::test] async fn request_maintenance_without_maintenance() { trace_init(); - let _ = crate::metrics::init(); + let _ = crate::metrics::init_test(); let _guard = tests_lock().await; let (mut writer, _events_rx, _actions_tx) = prepare_test(); @@ -276,7 +278,7 @@ mod tests { #[tokio::test] async fn request_maintenance_with_maintenance() { trace_init(); - let _ = crate::metrics::init(); + let _ = crate::metrics::init_test(); let _guard = tests_lock().await; let (events_tx, _events_rx) = mpsc::channel(0); @@ -299,7 +301,7 @@ mod tests { #[tokio::test] async fn perform_maintenance() { trace_init(); - let _ = crate::metrics::init(); + let _ = crate::metrics::init_test(); let _guard = tests_lock().await; let (mut writer, mut events_rx, mut actions_tx) = prepare_test(); diff --git a/src/sinks/blackhole.rs b/src/sinks/blackhole.rs index 27c4561096661..d717458131f2f 100644 --- a/src/sinks/blackhole.rs +++ b/src/sinks/blackhole.rs @@ -91,7 +91,7 @@ impl BlackholeSink { #[async_trait] impl StreamSink for BlackholeSink { - async fn run(&mut self, input: BoxStream<'_, Event>) -> Result<(), ()> { + async fn run(mut self: Box, input: BoxStream<'_, Event>) -> Result<(), ()> { // Spin up a task that does the periodic reporting. This is decoupled from the main sink so // that rate limiting support can be added more simply without having to interleave it with // the printing. @@ -167,7 +167,7 @@ mod tests { print_interval_secs: 10, rate: None, }; - let mut sink = BlackholeSink::new(config, Acker::Null); + let sink = Box::new(BlackholeSink::new(config, Acker::Null)); let (_input_lines, events) = random_events_with_stream(100, 10, None); let _ = sink.run(Box::pin(events)).await.unwrap(); diff --git a/src/sinks/console.rs b/src/sinks/console.rs index ae3faea51b3a7..fea5b7273ff84 100644 --- a/src/sinks/console.rs +++ b/src/sinks/console.rs @@ -132,7 +132,7 @@ struct WriterSink { #[async_trait] impl StreamSink for WriterSink { - async fn run(&mut self, mut input: BoxStream<'_, Event>) -> Result<(), ()> { + async fn run(mut self: Box, mut input: BoxStream<'_, Event>) -> Result<(), ()> { while let Some(event) = input.next().await { self.acker.ack(1); if let Some(mut buf) = encode_event(event, &self.encoding) { diff --git a/src/sinks/datadog/events.rs b/src/sinks/datadog/events.rs index e01d80bc1b511..4e76b3b0a16f6 100644 --- a/src/sinks/datadog/events.rs +++ b/src/sinks/datadog/events.rs @@ -171,7 +171,7 @@ impl DatadogEventsService { "title", ] .iter() - .map(|field| vec![PathComponent::Key(field.to_string())]) + .map(|field| vec![PathComponent::Key((*field).into())]) .collect(), ), // DataDog Event API requires unix timestamp. diff --git a/src/sinks/datadog/logs/sink.rs b/src/sinks/datadog/logs/sink.rs index e5b3368eb8379..fcd6ee8e2b46e 100644 --- a/src/sinks/datadog/logs/sink.rs +++ b/src/sinks/datadog/logs/sink.rs @@ -105,9 +105,9 @@ impl LogSinkBuilder { pub fn build(self) -> LogSink { LogSink { default_api_key: self.default_api_key, - encoding: Some(self.encoding), - acker: Some(self.context.acker()), - service: Some(self.service), + encoding: self.encoding, + acker: self.context.acker(), + service: self.service, timeout: self.timeout.unwrap_or(BATCH_DEFAULT_TIMEOUT), compression: self.compression.unwrap_or_default(), log_schema: self.log_schema.unwrap_or_else(|| log_schema()), @@ -124,16 +124,16 @@ pub struct LogSink { /// case we batch them by this default. default_api_key: Arc, /// The ack system for this sink to vector's buffer mechanism - acker: Option, + acker: Acker, /// The API service - service: Option, + service: S, /// The encoding of payloads /// /// This struct always generates JSON payloads. However we do, technically, /// allow the user to set the encoding to a single value -- JSON -- and this /// encoding comes with rules on sanitizing the payload which must be /// applied. - encoding: Option>, + encoding: EncodingConfigWithDefault, /// The compression technique to use when building the request body compression: Compression, /// The total duration before a flush is forced @@ -254,26 +254,14 @@ where S::Response: AsRef + Send + 'static, S::Error: Debug + Into + Send, { - async fn run(&mut self, input: BoxStream<'_, Event>) -> Result<(), ()> { + async fn run(self: Box, input: BoxStream<'_, Event>) -> Result<(), ()> { let io_bandwidth = 64; let (io_tx, io_rx) = channel(io_bandwidth); - let service = self - .service - .take() - .expect("same sink should not be run twice"); - let acker = self - .acker - .take() - .expect("same sink should not be run twice"); - let encoding = self - .encoding - .take() - .expect("same sink should not be run twice"); + let encoding = self.encoding; let default_api_key = Arc::clone(&self.default_api_key); let compression = self.compression; let log_schema = self.log_schema; - - let io = run_io(io_rx, service, acker).in_current_span(); + let io = run_io(io_rx, self.service, self.acker).in_current_span(); let _ = tokio::spawn(io); let batcher = Batcher::new( @@ -303,12 +291,12 @@ where } } Err(error) => { - error!("Sink was unable to construct a payload body: {}", error); + error!(message = "Sink was unable to construct a payload body.", %error); return Err(()); } }, Err(error) => { - error!("Task failed to properly join: {}", error); + error!(message = "Task failed to properly join.", %error); return Err(()); } } diff --git a/src/sinks/file/mod.rs b/src/sinks/file/mod.rs index 0b5068ad89b0d..238453179394e 100644 --- a/src/sinks/file/mod.rs +++ b/src/sinks/file/mod.rs @@ -336,8 +336,10 @@ async fn write_event_to_file( #[async_trait] impl StreamSink for FileSink { - async fn run(&mut self, input: BoxStream<'_, Event>) -> Result<(), ()> { - FileSink::run(self, input).await.expect("file sink error"); + async fn run(mut self: Box, input: BoxStream<'_, Event>) -> Result<(), ()> { + FileSink::run(&mut self, input) + .await + .expect("file sink error"); Ok(()) } } diff --git a/src/sinks/nats.rs b/src/sinks/nats.rs index 040f104037e66..ce2847c343507 100644 --- a/src/sinks/nats.rs +++ b/src/sinks/nats.rs @@ -152,8 +152,8 @@ impl From<&NatsSinkConfig> for NatsOptions { #[async_trait] impl StreamSink for NatsSink { - async fn run(&mut self, mut input: BoxStream<'_, Event>) -> Result<(), ()> { - let nats_options: async_nats::Options = self.options.clone().into(); + async fn run(self: Box, mut input: BoxStream<'_, Event>) -> Result<(), ()> { + let nats_options: async_nats::Options = self.options.into(); let nc = nats_options.connect(&self.url).await.map_err(|_| ())?; @@ -270,7 +270,7 @@ mod integration_tests { // Publish events. let (acker, ack_counter) = Acker::new_for_testing(); - let mut sink = NatsSink::new(cnf.clone(), acker).unwrap(); + let sink = Box::new(NatsSink::new(cnf.clone(), acker).unwrap()); let num_events = 1_000; let (input, events) = random_lines_with_stream(100, num_events, None); diff --git a/src/sinks/prometheus/exporter.rs b/src/sinks/prometheus/exporter.rs index aba57d05f4d73..2eeb8ae3dc782 100644 --- a/src/sinks/prometheus/exporter.rs +++ b/src/sinks/prometheus/exporter.rs @@ -288,7 +288,7 @@ impl PrometheusExporter { #[async_trait] impl StreamSink for PrometheusExporter { - async fn run(&mut self, mut input: BoxStream<'_, Event>) -> Result<(), ()> { + async fn run(mut self: Box, mut input: BoxStream<'_, Event>) -> Result<(), ()> { self.start_server_if_needed().await; while let Some(event) = input.next().await { let item = event.into_metric(); @@ -591,7 +591,7 @@ mod tests { }; let cx = SinkContext::new_test(); - let mut sink = PrometheusExporter::new(config, cx.acker()); + let sink = Box::new(PrometheusExporter::new(config, cx.acker())); let m1 = Metric::new( "absolute", @@ -616,11 +616,13 @@ mod tests { Event::Metric(m1.clone().with_value(MetricValue::Counter { value: 40. })), ]; + let internal_metrics = Arc::clone(&sink.metrics); + sink.run(Box::pin(futures::stream::iter(metrics))) .await .unwrap(); - let map = &sink.metrics.read().unwrap().map; + let map = &internal_metrics.read().unwrap().map; assert_eq!( map.get_full(&MetricEntry(m1)).unwrap().1.value(), diff --git a/src/sinks/s3_common/sink.rs b/src/sinks/s3_common/sink.rs index 185c48ac9391d..17f7619fa03f0 100644 --- a/src/sinks/s3_common/sink.rs +++ b/src/sinks/s3_common/sink.rs @@ -42,10 +42,10 @@ pub struct S3Sink where R: S3RequestBuilder, { - acker: Option, - service: Option, + acker: Acker, + service: S, request_builder: R, - partitioner: Option, + partitioner: KeyPartitioner, batch_size_bytes: Option, batch_size_events: NonZeroUsize, batch_timeout: Duration, @@ -65,10 +65,10 @@ where batch_timeout: Duration, ) -> Self { Self { - acker: Some(cx.acker()), - service: Some(service), + acker: cx.acker(), + service, request_builder, - partitioner: Some(partitioner), + partitioner, batch_size_bytes, batch_size_events, batch_timeout, @@ -85,7 +85,7 @@ where S::Error: Debug + Into + Send, R: S3RequestBuilder + Send, { - async fn run(&mut self, input: BoxStream<'_, Event>) -> Result<(), ()> { + async fn run(mut self: Box, input: BoxStream<'_, Event>) -> Result<(), ()> { // All sinks do the same fundamental job: take in events, and ship them // out. Empirical testing shows that our number one priority for high // throughput needs to be servicing I/O as soon as we possibly can. In @@ -94,25 +94,13 @@ where // batching, ordering, and so on. let (io_tx, io_rx) = channel(64); let io_barrier = Arc::new(Barrier::new(2)); - let service = self - .service - .take() - .expect("same sink should not be run twice"); - let acker = self - .acker - .take() - .expect("same sink should not be run twice"); - let partitioner = self - .partitioner - .take() - .expect("same sink should not be run twice"); - let io = run_io(io_rx, Arc::clone(&io_barrier), service, acker).in_current_span(); + let io = run_io(io_rx, Arc::clone(&io_barrier), self.service, self.acker).in_current_span(); let _ = tokio::spawn(io); let batcher = Batcher::new( input, - partitioner, + self.partitioner, self.batch_timeout, self.batch_size_events, self.batch_size_bytes, diff --git a/src/sinks/util/adaptive_concurrency/tests.rs b/src/sinks/util/adaptive_concurrency/tests.rs index 4d143c874cf62..1c064eb0f92af 100644 --- a/src/sinks/util/adaptive_concurrency/tests.rs +++ b/src/sinks/util/adaptive_concurrency/tests.rs @@ -4,7 +4,7 @@ use super::controller::ControllerStatistics; use crate::{ config::{self, DataType, SinkConfig, SinkContext}, event::{metric::MetricValue, Event}, - metrics::{self, capture_metrics, get_controller}, + metrics::{self}, sinks::{ util::{ retries::RetryLogic, sink, BatchSettings, Concurrency, EncodedEvent, EncodedLength, @@ -103,7 +103,8 @@ struct TestParams { requests: usize, // The time interval between requests. - interval: Option, + #[serde(default = "default_interval")] + interval: f64, // The delay is the base time every request takes return. delay: f64, @@ -125,6 +126,10 @@ struct TestParams { concurrency: Concurrency, } +const fn default_interval() -> f64 { + 0.0 +} + const fn default_concurrency() -> Concurrency { Concurrency::Adaptive } @@ -383,7 +388,7 @@ struct TestResults { } async fn run_test(params: TestParams) -> TestResults { - let _ = metrics::init(); + let _ = metrics::init_test(); let (send_done, is_done) = oneshot::channel(); let test_config = TestConfig { @@ -409,7 +414,7 @@ async fn run_test(params: TestParams) -> TestResults { let (topology, _crash) = start_topology(config.build().unwrap(), false).await; - let controller = get_controller().unwrap(); + let controller = metrics::Controller::get().unwrap(); is_done.await.expect("Test failed to complete"); topology.stop().await; @@ -429,7 +434,8 @@ async fn run_test(params: TestParams) -> TestResults { .into_inner() .expect("Failed to unwrap controller_stats Mutex"); - let metrics = capture_metrics(controller) + let metrics = controller + .capture_metrics() .map(|metric| (metric.name().to_string(), metric)) .collect::>(); // Ensure basic statistics are captured, don't actually examine them diff --git a/src/sinks/util/encoding/config.rs b/src/sinks/util/encoding/config.rs index ab2850eb5e8c7..b715baf3cdd39 100644 --- a/src/sinks/util/encoding/config.rs +++ b/src/sinks/util/encoding/config.rs @@ -26,7 +26,7 @@ pub struct EncodingConfig { pub(crate) schema: Option, // TODO(2410): Using PathComponents here is a hack for #2407, #2410 should fix this fully. #[serde(default, skip_serializing_if = "skip_serializing_if_default")] - pub(crate) only_fields: Option>>, + pub(crate) only_fields: Option>>>, #[serde(default, skip_serializing_if = "skip_serializing_if_default")] pub(crate) except_fields: Option>, #[serde(default, skip_serializing_if = "skip_serializing_if_default")] @@ -41,7 +41,7 @@ impl EncodingConfiguration for EncodingConfig { &self.schema } // TODO(2410): Using PathComponents here is a hack for #2407, #2410 should fix this fully. - fn only_fields(&self) -> &Option>> { + fn only_fields(&self) -> &Option>>> { &self.only_fields } fn except_fields(&self) -> &Option> { @@ -156,7 +156,11 @@ where only_fields: inner.only_fields.map(|fields| { fields .iter() - .map(|only| PathIter::new(only).collect()) + .map(|only| { + PathIter::new(only) + .map(|component| component.into_static()) + .collect() + }) .collect() }), except_fields: inner.except_fields, diff --git a/src/sinks/util/encoding/with_default.rs b/src/sinks/util/encoding/with_default.rs index f4bc6aa9c4cc8..36c8fd4654e69 100644 --- a/src/sinks/util/encoding/with_default.rs +++ b/src/sinks/util/encoding/with_default.rs @@ -27,7 +27,7 @@ pub struct EncodingConfigWithDefault { /// Keep only the following fields of the message. (Items mutually exclusive with `except_fields`) #[serde(default, skip_serializing_if = "skip_serializing_if_default")] // TODO(2410): Using PathComponents here is a hack for #2407, #2410 should fix this fully. - pub(crate) only_fields: Option>>, + pub(crate) only_fields: Option>>>, /// Remove the following fields of the message. (Items mutually exclusive with `only_fields`) #[serde(default, skip_serializing_if = "skip_serializing_if_default")] pub(crate) except_fields: Option>, @@ -44,7 +44,7 @@ impl EncodingConfiguration for EncodingConfigWithDefa &self.schema } // TODO(2410): Using PathComponents here is a hack for #2407, #2410 should fix this fully. - fn only_fields(&self) -> &Option>> { + fn only_fields(&self) -> &Option>>> { &self.only_fields } fn except_fields(&self) -> &Option> { @@ -133,7 +133,11 @@ where only_fields: inner.only_fields.map(|fields| { fields .iter() - .map(|only| PathIter::new(only).collect()) + .map(|only| { + PathIter::new(only) + .map(|component| component.into_static()) + .collect() + }) .collect() }), except_fields: inner.except_fields, diff --git a/src/sinks/util/retries.rs b/src/sinks/util/retries.rs index 34a790ccab152..037b2bae22640 100644 --- a/src/sinks/util/retries.rs +++ b/src/sinks/util/retries.rs @@ -95,26 +95,27 @@ where fn retry(&self, _: &Req, result: Result<&Res, &Error>) -> Option { match result { - Ok(response) => { - if self.remaining_attempts == 0 { - error!("Retries exhausted; dropping the request."); - return None; - } - - match self.logic.should_retry_response(response) { - RetryAction::Retry(reason) => { - warn!(message = "Retrying after response.", reason = %reason); - Some(self.build_retry()) + Ok(response) => match self.logic.should_retry_response(response) { + RetryAction::Retry(reason) => { + if self.remaining_attempts == 0 { + error!( + message = "OK/retry response but retries exhausted; dropping the request.", + reason = ?reason + ); + return None; } - RetryAction::DontRetry(reason) => { - error!(message = "Not retriable; dropping the request.", reason = ?reason); - None - } + warn!(message = "Retrying after response.", reason = %reason); + Some(self.build_retry()) + } - RetryAction::Successful => None, + RetryAction::DontRetry(reason) => { + error!(message = "Not retriable; dropping the request.", reason = ?reason); + None } - } + + RetryAction::Successful => None, + }, Err(error) => { if self.remaining_attempts == 0 { error!(message = "Retries exhausted; dropping the request.", %error); diff --git a/src/sinks/util/tcp.rs b/src/sinks/util/tcp.rs index 65741fd7cc31f..591b9ba7c9a83 100644 --- a/src/sinks/util/tcp.rs +++ b/src/sinks/util/tcp.rs @@ -249,7 +249,7 @@ impl TcpSink { #[async_trait] impl StreamSink for TcpSink { - async fn run(&mut self, input: BoxStream<'_, Event>) -> Result<(), ()> { + async fn run(self: Box, input: BoxStream<'_, Event>) -> Result<(), ()> { // We need [Peekable](https://docs.rs/futures/0.3.6/futures/stream/struct.Peekable.html) for initiating // connection only when we have something to send. let encode_event = Arc::clone(&self.encode_event); diff --git a/src/sinks/util/udp.rs b/src/sinks/util/udp.rs index fca9d81b020af..05679e79cffc7 100644 --- a/src/sinks/util/udp.rs +++ b/src/sinks/util/udp.rs @@ -247,7 +247,7 @@ impl UdpSink { #[async_trait] impl StreamSink for UdpSink { - async fn run(&mut self, input: BoxStream<'_, Event>) -> Result<(), ()> { + async fn run(self: Box, input: BoxStream<'_, Event>) -> Result<(), ()> { let mut input = input.peekable(); while Pin::new(&mut input).peek().await.is_some() { diff --git a/src/sinks/util/unix.rs b/src/sinks/util/unix.rs index 54fe6364c7311..f982022215b2a 100644 --- a/src/sinks/util/unix.rs +++ b/src/sinks/util/unix.rs @@ -133,7 +133,7 @@ impl UnixSink { #[async_trait] impl StreamSink for UnixSink { // Same as TcpSink, more details there. - async fn run(&mut self, input: BoxStream<'_, Event>) -> Result<(), ()> { + async fn run(mut self: Box, input: BoxStream<'_, Event>) -> Result<(), ()> { let encode_event = Arc::clone(&self.encode_event); let mut input = input .map(|mut event| { diff --git a/src/sources/dnstap/parser.rs b/src/sources/dnstap/parser.rs index 0e00c87d4fa5c..36d1368b41464 100644 --- a/src/sources/dnstap/parser.rs +++ b/src/sources/dnstap/parser.rs @@ -68,7 +68,7 @@ lazy_static! { pub struct DnstapParser<'a> { event_schema: &'a DnstapEventSchema, - parent_key_path: Vec, + parent_key_path: Vec>, log_event: &'a mut LogEvent, } @@ -94,7 +94,7 @@ impl<'a> DnstapParser<'a> { V: Into + Debug, { let mut node_path = self.parent_key_path.clone(); - node_path.push(PathComponent::Key(key.to_string())); + node_path.push(PathComponent::Key(key.into())); self.log_event.insert_path(node_path, value) } @@ -256,16 +256,8 @@ impl<'a> DnstapParser<'a> { to_dnstap_message_type(dnstap_message_type_id), ); - let request_message_key = self - .event_schema - .dnstap_message_schema() - .request_message() - .to_string(); - let response_message_key = self - .event_schema - .dnstap_message_schema() - .response_message() - .to_string(); + let request_message_key = self.event_schema.dnstap_message_schema().request_message(); + let response_message_key = self.event_schema.dnstap_message_schema().response_message(); if let Some(query_time_sec) = dnstap_message.query_time_sec { let (time_in_nanosec, query_time_nsec) = match dnstap_message.query_time_nsec { @@ -293,7 +285,7 @@ impl<'a> DnstapParser<'a> { if dnstap_message.query_message != None { self.parent_key_path - .push(PathComponent::Key(request_message_key.clone())); + .push(PathComponent::Key(request_message_key.into())); let time_key_name = if dnstap_message_type_id <= MAX_DNSTAP_QUERY_MESSAGE_TYPE_ID { self.event_schema.dns_query_message_schema().time() @@ -350,7 +342,7 @@ impl<'a> DnstapParser<'a> { if dnstap_message.response_message != None { self.parent_key_path - .push(PathComponent::Key(response_message_key.clone())); + .push(PathComponent::Key(response_message_key.into())); let time_key_name = if dnstap_message_type_id <= MAX_DNSTAP_QUERY_MESSAGE_TYPE_ID { self.event_schema.dns_query_message_schema().time() @@ -384,11 +376,11 @@ impl<'a> DnstapParser<'a> { 1..=12 => { if let Some(query_message) = dnstap_message.query_message { let mut query_message_parser = DnsMessageParser::new(query_message); - if let Err(error) = self - .parse_dns_query_message(&request_message_key, &mut query_message_parser) + if let Err(error) = + self.parse_dns_query_message(request_message_key, &mut query_message_parser) { self.log_raw_dns_message( - &request_message_key, + request_message_key, query_message_parser.raw_message(), ); @@ -398,12 +390,11 @@ impl<'a> DnstapParser<'a> { if let Some(response_message) = dnstap_message.response_message { let mut response_message_parser = DnsMessageParser::new(response_message); - if let Err(error) = self.parse_dns_query_message( - &response_message_key, - &mut response_message_parser, - ) { + if let Err(error) = self + .parse_dns_query_message(response_message_key, &mut response_message_parser) + { self.log_raw_dns_message( - &response_message_key, + response_message_key, response_message_parser.raw_message(), ); @@ -416,11 +407,11 @@ impl<'a> DnstapParser<'a> { let mut update_request_message_parser = DnsMessageParser::new(update_request_message); if let Err(error) = self.parse_dns_update_message( - &request_message_key, + request_message_key, &mut update_request_message_parser, ) { self.log_raw_dns_message( - &request_message_key, + request_message_key, update_request_message_parser.raw_message(), ); @@ -432,11 +423,11 @@ impl<'a> DnstapParser<'a> { let mut update_response_message_parser = DnsMessageParser::new(update_response_message); if let Err(error) = self.parse_dns_update_message( - &response_message_key, + response_message_key, &mut update_response_message_parser, ) { self.log_raw_dns_message( - &response_message_key, + response_message_key, update_response_message_parser.raw_message(), ); @@ -468,9 +459,9 @@ impl<'a> DnstapParser<'a> { self.insert(time_precision_key, time_precision.to_string()); } - fn log_raw_dns_message(&mut self, key_prefix: &str, raw_dns_message: &[u8]) { + fn log_raw_dns_message(&mut self, key_prefix: &'static str, raw_dns_message: &[u8]) { self.parent_key_path - .push(PathComponent::Key(key_prefix.to_string())); + .push(PathComponent::Key(key_prefix.into())); self.insert( self.event_schema.dns_query_message_schema().raw_data(), @@ -482,13 +473,13 @@ impl<'a> DnstapParser<'a> { fn parse_dns_query_message( &mut self, - key_prefix: &str, + key_prefix: &'static str, dns_message_parser: &mut DnsMessageParser, ) -> Result<()> { let msg = dns_message_parser.parse_as_query_message()?; self.parent_key_path - .push(PathComponent::Key(key_prefix.to_string())); + .push(PathComponent::Key(key_prefix.into())); self.insert( self.event_schema.dns_query_message_schema().response_code(), @@ -546,9 +537,9 @@ impl<'a> DnstapParser<'a> { Ok(()) } - fn log_dns_query_message_header(&mut self, parent_key: &str, header: &QueryHeader) { + fn log_dns_query_message_header(&mut self, parent_key: &'static str, header: &QueryHeader) { self.parent_key_path - .push(PathComponent::Key(parent_key.to_string())); + .push(PathComponent::Key(parent_key.into())); self.insert( self.event_schema.dns_query_header_schema().id(), @@ -627,9 +618,13 @@ impl<'a> DnstapParser<'a> { self.parent_key_path.pop(); } - fn log_dns_query_message_query_section(&mut self, key_path: &str, questions: &[QueryQuestion]) { + fn log_dns_query_message_query_section( + &mut self, + key_path: &'static str, + questions: &[QueryQuestion], + ) { self.parent_key_path - .push(PathComponent::Key(key_path.to_string())); + .push(PathComponent::Key(key_path.into())); for (i, query) in questions.iter().enumerate() { self.parent_key_path.push(PathComponent::Index(i)); @@ -667,13 +662,13 @@ impl<'a> DnstapParser<'a> { fn parse_dns_update_message( &mut self, - key_prefix: &str, + key_prefix: &'static str, dns_message_parser: &mut DnsMessageParser, ) -> Result<()> { let msg = dns_message_parser.parse_as_update_message()?; self.parent_key_path - .push(PathComponent::Key(key_prefix.to_string())); + .push(PathComponent::Key(key_prefix.into())); self.insert( self.event_schema @@ -724,9 +719,9 @@ impl<'a> DnstapParser<'a> { Ok(()) } - fn log_dns_update_message_header(&mut self, key_prefix: &str, header: &UpdateHeader) { + fn log_dns_update_message_header(&mut self, key_prefix: &'static str, header: &UpdateHeader) { self.parent_key_path - .push(PathComponent::Key(key_prefix.to_string())); + .push(PathComponent::Key(key_prefix.into())); self.insert( self.event_schema.dns_update_header_schema().id(), @@ -775,9 +770,9 @@ impl<'a> DnstapParser<'a> { self.parent_key_path.pop(); } - fn log_dns_update_message_zone_section(&mut self, key_path: &str, zone: &ZoneInfo) { + fn log_dns_update_message_zone_section(&mut self, key_path: &'static str, zone: &ZoneInfo) { self.parent_key_path - .push(PathComponent::Key(key_path.to_string())); + .push(PathComponent::Key(key_path.into())); self.insert( self.event_schema.dns_update_zone_info_schema().zone_name(), @@ -803,9 +798,9 @@ impl<'a> DnstapParser<'a> { self.parent_key_path.pop(); } - fn log_edns(&mut self, key_prefix: &str, opt_section: &Option) { + fn log_edns(&mut self, key_prefix: &'static str, opt_section: &Option) { self.parent_key_path - .push(PathComponent::Key(key_prefix.to_string())); + .push(PathComponent::Key(key_prefix.into())); if let Some(edns) = opt_section { self.insert( @@ -843,9 +838,9 @@ impl<'a> DnstapParser<'a> { self.parent_key_path.pop(); } - fn log_edns_options(&mut self, key_path: &str, options: &[EdnsOptionEntry]) { + fn log_edns_options(&mut self, key_path: &'static str, options: &[EdnsOptionEntry]) { self.parent_key_path - .push(PathComponent::Key(key_path.to_string())); + .push(PathComponent::Key(key_path.into())); options.iter().enumerate().for_each(|(i, opt)| { self.parent_key_path.push(PathComponent::Index(i)); @@ -871,9 +866,9 @@ impl<'a> DnstapParser<'a> { ); } - fn log_dns_message_record_section(&mut self, key_path: &str, records: &[DnsRecord]) { + fn log_dns_message_record_section(&mut self, key_path: &'static str, records: &[DnsRecord]) { self.parent_key_path - .push(PathComponent::Key(key_path.to_string())); + .push(PathComponent::Key(key_path.into())); for (i, record) in records.iter().enumerate() { self.parent_key_path.push(PathComponent::Index(i)); diff --git a/src/sources/dnstap/schema.rs b/src/sources/dnstap/schema.rs index 68c1dad2b1c14..ea9f7d3c4c626 100644 --- a/src/sources/dnstap/schema.rs +++ b/src/sources/dnstap/schema.rs @@ -1,4 +1,4 @@ -use getset::{Getters, MutGetters, Setters}; +use getset::{CopyGetters, Getters, MutGetters, Setters}; #[derive(Getters, MutGetters, Default, Debug, Clone)] #[get = "pub"] @@ -35,8 +35,8 @@ impl DnstapEventSchema { } } -#[derive(Getters, Setters, Debug, Clone)] -#[get = "pub"] +#[derive(CopyGetters, Setters, Debug, Clone)] +#[get_copy = "pub"] pub struct DnstapRootDataSchema { server_identity: &'static str, server_version: &'static str, @@ -68,8 +68,8 @@ impl Default for DnstapRootDataSchema { } } -#[derive(Getters, Debug, Clone)] -#[get = "pub"] +#[derive(CopyGetters, Debug, Clone)] +#[get_copy = "pub"] pub struct DnstapMessageSchema { socket_family: &'static str, socket_protocol: &'static str, @@ -102,8 +102,8 @@ impl Default for DnstapMessageSchema { } } -#[derive(Getters, Debug, Clone)] -#[get = "pub"] +#[derive(CopyGetters, Debug, Clone)] +#[get_copy = "pub"] pub struct DnsMessageCommonSchema { response_code: &'static str, response: &'static str, @@ -126,8 +126,8 @@ impl Default for DnsMessageCommonSchema { } } -#[derive(Getters, Debug, Clone)] -#[get = "pub"] +#[derive(CopyGetters, Debug, Clone)] +#[get_copy = "pub"] pub struct DnsQueryMessageSchema { response_code: &'static str, response: &'static str, @@ -161,8 +161,8 @@ impl Default for DnsQueryMessageSchema { } } -#[derive(Getters, Debug, Clone)] -#[get = "pub"] +#[derive(CopyGetters, Debug, Clone)] +#[get_copy = "pub"] pub struct DnsUpdateMessageSchema { response_code: &'static str, response: &'static str, @@ -194,8 +194,8 @@ impl Default for DnsUpdateMessageSchema { } } -#[derive(Getters, Debug, Clone)] -#[get = "pub"] +#[derive(CopyGetters, Debug, Clone)] +#[get_copy = "pub"] pub struct DnsMessageHeaderCommonSchema { id: &'static str, opcode: &'static str, @@ -214,8 +214,8 @@ impl Default for DnsMessageHeaderCommonSchema { } } -#[derive(Getters, Debug, Clone)] -#[get = "pub"] +#[derive(CopyGetters, Debug, Clone)] +#[get_copy = "pub"] pub struct DnsQueryHeaderSchema { id: &'static str, opcode: &'static str, @@ -255,8 +255,8 @@ impl Default for DnsQueryHeaderSchema { } } -#[derive(Getters, Debug, Clone)] -#[get = "pub"] +#[derive(CopyGetters, Debug, Clone)] +#[get_copy = "pub"] pub struct DnsUpdateHeaderSchema { id: &'static str, opcode: &'static str, @@ -284,8 +284,8 @@ impl Default for DnsUpdateHeaderSchema { } } -#[derive(Getters, Debug, Clone)] -#[get = "pub"] +#[derive(CopyGetters, Debug, Clone)] +#[get_copy = "pub"] pub struct DnsMessageOptPseudoSectionSchema { extended_rcode: &'static str, version: &'static str, @@ -306,8 +306,8 @@ impl Default for DnsMessageOptPseudoSectionSchema { } } -#[derive(Getters, Debug, Clone)] -#[get = "pub"] +#[derive(CopyGetters, Debug, Clone)] +#[get_copy = "pub"] pub struct DnsMessageOptionSchema { opt_code: &'static str, opt_name: &'static str, @@ -324,8 +324,8 @@ impl Default for DnsMessageOptionSchema { } } -#[derive(Getters, Debug, Clone)] -#[get = "pub"] +#[derive(CopyGetters, Debug, Clone)] +#[get_copy = "pub"] pub struct DnsRecordSchema { name: &'static str, record_type: &'static str, @@ -350,8 +350,8 @@ impl Default for DnsRecordSchema { } } -#[derive(Getters, Debug, Clone)] -#[get = "pub"] +#[derive(CopyGetters, Debug, Clone)] +#[get_copy = "pub"] pub struct DnsQueryQuestionSchema { name: &'static str, question_type: &'static str, @@ -370,8 +370,8 @@ impl Default for DnsQueryQuestionSchema { } } -#[derive(Getters, Debug, Clone)] -#[get = "pub"] +#[derive(CopyGetters, Debug, Clone)] +#[get_copy = "pub"] pub struct DnsUpdateZoneInfoSchema { zone_name: &'static str, zone_class: &'static str, diff --git a/src/sources/docker_logs.rs b/src/sources/docker_logs.rs index c738c2718e2c8..fc327e2e2b6eb 100644 --- a/src/sources/docker_logs.rs +++ b/src/sources/docker_logs.rs @@ -888,7 +888,7 @@ impl ContainerLogInfo { let prefix_path = PathIter::new("label").collect::>(); for (key, value) in self.metadata.labels.iter() { let mut path = prefix_path.clone(); - path.push(PathComponent::Key(key.clone())); + path.push(PathComponent::Key(key.clone().into())); log_event.insert_path(path, value.clone()); } } diff --git a/src/sources/file.rs b/src/sources/file.rs index f2a3dfc3ae314..add6e0d741cc7 100644 --- a/src/sources/file.rs +++ b/src/sources/file.rs @@ -336,7 +336,7 @@ pub fn file_source( .map(move |mut line| { emit!(&FileBytesReceived { byte_size: line.text.len(), - path: &line.filename, + file: &line.filename, }); // transcode each line from the file's encoding charset to utf8 line.text = match encoding_decoder.as_mut() { @@ -454,6 +454,7 @@ fn create_event( file_key: &Option, ) -> Event { emit!(&FileEventsReceived { + count: 1, file: &file, byte_size: line.len(), }); @@ -482,6 +483,7 @@ mod tests { event::{EventStatus, Value}, shutdown::ShutdownSignal, sources::file, + test_util::components::{self, SOURCE_TESTS}, }; use encoding_rs::UTF_16LE; use pretty_assertions::assert_eq; @@ -1219,7 +1221,7 @@ mod tests { }; let path = dir.path().join("file"); - let received=run_file_source(&config, false, NoAcks, async { + let received = run_file_source(&config, false, NoAcks, async { let mut file = File::create(&path).unwrap(); sleep_500_millis().await; // The files must be observed at their original lengths before writing to them @@ -1646,6 +1648,8 @@ mod tests { acking_mode: AckingMode, inner: impl Future, ) -> Vec { + components::init(); + let (tx, rx) = if acking_mode == Acks { let (tx, rx) = Pipeline::new_test_finalize(EventStatus::Delivered); (tx, rx.boxed()) @@ -1668,6 +1672,7 @@ mod tests { if wait_shutdown { shutdown_done.await; } + SOURCE_TESTS.assert(&["file"]); result } diff --git a/src/sources/generator.rs b/src/sources/generator.rs index 4880079d6b76e..0d77abb0b1bf8 100644 --- a/src/sources/generator.rs +++ b/src/sources/generator.rs @@ -15,14 +15,18 @@ use tokio::time::{interval, Duration}; #[derive(Clone, Debug, Default, Deserialize, Serialize)] pub struct GeneratorConfig { - #[serde(alias = "batch_interval")] - interval: Option, + #[serde(alias = "batch_interval", default = "default_interval")] + interval: f64, #[serde(default = "usize::max_value")] count: usize, #[serde(flatten)] format: OutputFormat, } +const fn default_interval() -> f64 { + 1.0 +} + #[derive(Debug, PartialEq, Snafu)] pub enum GeneratorConfigError { #[snafu(display("A non-empty list of lines is required for the shuffle format"))] @@ -98,7 +102,7 @@ impl GeneratorConfig { } #[allow(dead_code)] // to make check-component-features pass - pub fn repeat(lines: Vec, count: usize, interval: Option) -> Self { + pub fn repeat(lines: Vec, count: usize, interval: f64) -> Self { Self { count, interval, @@ -110,7 +114,13 @@ impl GeneratorConfig { } async fn inner(self, mut shutdown: ShutdownSignal, mut out: Pipeline) -> Result<(), ()> { - let mut interval = self.interval.map(|i| interval(Duration::from_secs_f64(i))); + let maybe_interval: Option = if self.interval != 0.0 { + Some(self.interval) + } else { + None + }; + + let mut interval = maybe_interval.map(|i| interval(Duration::from_secs_f64(i))); for n in 0..self.count { if matches!(futures::poll!(&mut shutdown), Poll::Ready(_)) { diff --git a/src/sources/host_metrics/filesystem.rs b/src/sources/host_metrics/filesystem.rs index 7e02dc4239fb6..8696d1ce2ae4a 100644 --- a/src/sources/host_metrics/filesystem.rs +++ b/src/sources/host_metrics/filesystem.rs @@ -114,7 +114,7 @@ impl HostMetrics { .await } Err(error) => { - error!(message = "Failed to load partitions info", %error, internal_log_rate_secs = 60); + error!(message = "Failed to load partitions info.", %error, internal_log_rate_secs = 60); vec![] } } diff --git a/src/sources/internal_metrics.rs b/src/sources/internal_metrics.rs index 989cc9100b719..19ac702b66995 100644 --- a/src/sources/internal_metrics.rs +++ b/src/sources/internal_metrics.rs @@ -1,7 +1,6 @@ use crate::{ config::{log_schema, DataType, SourceConfig, SourceContext, SourceDescription}, metrics::Controller, - metrics::{capture_metrics, get_controller}, shutdown::ShutdownSignal, Pipeline, }; @@ -76,7 +75,7 @@ impl SourceConfig for InternalMetricsConfig { namespace, host_key, pid_key, - get_controller()?, + Controller::get()?, interval, cx.out, cx.shutdown, @@ -109,7 +108,7 @@ async fn run( let hostname = crate::get_hostname(); let pid = std::process::id().to_string(); - let metrics = capture_metrics(controller); + let metrics = controller.capture_metrics(); out.send_all(&mut stream::iter(metrics).map(|mut metric| { // A metric starts out with a default "vector" namespace, but will be overridden @@ -142,7 +141,7 @@ mod tests { metric::{Metric, MetricValue}, Event, }, - metrics::{capture_metrics, get_controller}, + metrics::Controller, Pipeline, }; use metrics::{counter, gauge, histogram}; @@ -155,7 +154,7 @@ mod tests { #[test] fn captures_internal_metrics() { - let _ = crate::metrics::init(); + let _ = crate::metrics::init_test(); // There *seems* to be a race condition here (CI was flaky), so add a slight delay. std::thread::sleep(std::time::Duration::from_millis(300)); @@ -169,12 +168,13 @@ mod tests { histogram!("quux", 8.0, "host" => "foo"); histogram!("quux", 8.1, "host" => "foo"); - let controller = get_controller().expect("no controller"); + let controller = Controller::get().expect("no controller"); // There *seems* to be a race condition here (CI was flaky), so add a slight delay. std::thread::sleep(std::time::Duration::from_millis(300)); - let output = capture_metrics(controller) + let output = controller + .capture_metrics() .map(|metric| (metric.name().to_string(), metric)) .collect::>(); @@ -222,7 +222,7 @@ mod tests { } async fn event_from_config(config: InternalMetricsConfig) -> Event { - let _ = crate::metrics::init(); + let _ = crate::metrics::init_test(); let (sender, mut recv) = Pipeline::new_test(); diff --git a/src/sources/kubernetes_logs/namespace_metadata_annotator.rs b/src/sources/kubernetes_logs/namespace_metadata_annotator.rs index 4ff7c0dadf9f5..89e13fb97620f 100644 --- a/src/sources/kubernetes_logs/namespace_metadata_annotator.rs +++ b/src/sources/kubernetes_logs/namespace_metadata_annotator.rs @@ -63,7 +63,7 @@ fn annotate_from_metadata(log: &mut LogEvent, fields_spec: &FieldsSpec, metadata if let Some(labels) = &metadata.labels { for (key, val) in labels.iter() { let mut path = prefix_path.clone(); - path.push(PathComponent::Key(key.clone())); + path.push(PathComponent::Key(key.clone().into())); log.insert_path(path, val.to_owned()); } } diff --git a/src/sources/kubernetes_logs/pod_metadata_annotator.rs b/src/sources/kubernetes_logs/pod_metadata_annotator.rs index e499214f54530..73f3ece9e6db4 100644 --- a/src/sources/kubernetes_logs/pod_metadata_annotator.rs +++ b/src/sources/kubernetes_logs/pod_metadata_annotator.rs @@ -138,7 +138,7 @@ fn annotate_from_metadata(log: &mut LogEvent, fields_spec: &FieldsSpec, metadata let prefix_path = PathIter::new(fields_spec.pod_labels.as_ref()).collect::>(); for (key, val) in labels.iter() { let mut path = prefix_path.clone(); - path.push(PathComponent::Key(key.clone())); + path.push(PathComponent::Key(key.clone().into())); log.insert_path(path, val.to_owned()); } } @@ -147,7 +147,7 @@ fn annotate_from_metadata(log: &mut LogEvent, fields_spec: &FieldsSpec, metadata let prefix_path = PathIter::new(fields_spec.pod_annotations.as_ref()).collect::>(); for (key, val) in annotations.iter() { let mut path = prefix_path.clone(); - path.push(PathComponent::Key(key.clone())); + path.push(PathComponent::Key(key.clone().into())); log.insert_path(path, val.to_owned()); } } diff --git a/src/sources/socket/mod.rs b/src/sources/socket/mod.rs index e3bcfcac73919..39724920b0c29 100644 --- a/src/sources/socket/mod.rs +++ b/src/sources/socket/mod.rs @@ -3,13 +3,12 @@ mod udp; #[cfg(unix)] mod unix; -use super::util::TcpSource; use crate::{ - codecs::{BytesParser, Decoder, NewlineDelimitedCodec}, config::{ log_schema, DataType, GenerateConfig, Resource, SourceConfig, SourceContext, SourceDescription, }, + sources::util::TcpSource, tls::MaybeTlsSettings, }; use serde::{Deserialize, Serialize}; @@ -81,9 +80,8 @@ impl SourceConfig for SocketConfig { async fn build(&self, cx: SourceContext) -> crate::Result { match self.mode.clone() { Mode::Tcp(config) => { - let tcp = tcp::RawTcpSource { - config: config.clone(), - }; + let decoder = config.decoding().build()?; + let tcp = tcp::RawTcpSource::new(config.clone(), decoder); let tls = MaybeTlsSettings::from_config(config.tls(), true)?; tcp.run( config.address(), @@ -100,12 +98,7 @@ impl SourceConfig for SocketConfig { .host_key() .clone() .unwrap_or_else(|| log_schema().host_key().to_string()); - let decoder = Decoder::new( - Box::new(NewlineDelimitedCodec::new_with_max_length( - config.max_length(), - )), - Box::new(BytesParser), - ); + let decoder = config.decoding().build()?; Ok(udp::udp( config.address(), host_key, @@ -120,12 +113,7 @@ impl SourceConfig for SocketConfig { let host_key = config .host_key .unwrap_or_else(|| log_schema().host_key().to_string()); - let decoder = Decoder::new( - Box::new(NewlineDelimitedCodec::new_with_max_length( - config.max_length, - )), - Box::new(BytesParser), - ); + let decoder = config.decoding.build()?; Ok(unix::unix_datagram( config.path, config.max_length, @@ -140,12 +128,7 @@ impl SourceConfig for SocketConfig { let host_key = config .host_key .unwrap_or_else(|| log_schema().host_key().to_string()); - let decoder = Decoder::new( - Box::new(NewlineDelimitedCodec::new_with_max_length( - config.max_length, - )), - Box::new(BytesParser), - ); + let decoder = config.decoding.build()?; Ok(unix::unix_stream( config.path, host_key, @@ -181,6 +164,7 @@ impl SourceConfig for SocketConfig { mod test { use super::{tcp::TcpConfig, udp::UdpConfig, SocketConfig}; use crate::{ + codecs::{DecodingConfig, NewlineDelimitedDecoderConfig}, config::{ log_schema, ComponentKey, GlobalOptions, SinkContext, SourceConfig, SourceContext, }, @@ -276,7 +260,12 @@ mod test { let addr = next_addr(); let mut config = TcpConfig::from_address(addr.into()); - config.set_max_length(10); + config.set_decoding(DecodingConfig::new( + Some(Box::new( + NewlineDelimitedDecoderConfig::new_with_max_length(10), + )), + None, + )); let server = SocketConfig::from(config) .build(SourceContext::new_test(tx)) @@ -309,7 +298,6 @@ mod test { let addr = next_addr(); let mut config = TcpConfig::from_address(addr.into()); - config.set_max_length(10); config.set_tls(Some(TlsConfig::test_config())); let server = SocketConfig::from(config) @@ -318,11 +306,7 @@ mod test { .unwrap(); tokio::spawn(server); - let lines = vec![ - "short".to_owned(), - "this is too long".to_owned(), - "more short".to_owned(), - ]; + let lines = vec!["one line".to_owned(), "another line".to_owned()]; wait_for_tcp(addr).await; send_lines_tls(addr, "localhost".into(), lines.into_iter(), None) @@ -330,12 +314,15 @@ mod test { .unwrap(); let event = rx.next().await.unwrap(); - assert_eq!(event.as_log()[log_schema().message_key()], "short".into()); + assert_eq!( + event.as_log()[log_schema().message_key()], + "one line".into() + ); let event = rx.next().await.unwrap(); assert_eq!( event.as_log()[log_schema().message_key()], - "more short".into() + "another line".into() ); } @@ -345,7 +332,6 @@ mod test { let addr = next_addr(); let mut config = TcpConfig::from_address(addr.into()); - config.set_max_length(10); config.set_tls(Some(TlsConfig { enabled: Some(true), options: TlsOptions { @@ -361,11 +347,7 @@ mod test { .unwrap(); tokio::spawn(server); - let lines = vec![ - "short".to_owned(), - "this is too long".to_owned(), - "more short".to_owned(), - ]; + let lines = vec!["one line".to_owned(), "another line".to_owned()]; wait_for_tcp(addr).await; send_lines_tls( @@ -380,13 +362,13 @@ mod test { let event = rx.next().await.unwrap(); assert_eq!( event.as_log()[crate::config::log_schema().message_key()], - "short".into() + "one line".into() ); let event = rx.next().await.unwrap(); assert_eq!( event.as_log()[crate::config::log_schema().message_key()], - "more short".into() + "another line".into() ); } diff --git a/src/sources/socket/tcp.rs b/src/sources/socket/tcp.rs index 54f3fa4fef168..856c74642f6f3 100644 --- a/src/sources/socket/tcp.rs +++ b/src/sources/socket/tcp.rs @@ -1,5 +1,5 @@ use crate::{ - codecs::{self, BytesParser, NewlineDelimitedCodec}, + codecs::{self, DecodingConfig}, event::Event, internal_events::{SocketEventsReceived, SocketMode}, sources::util::{SocketListenAddr, TcpSource}, @@ -17,9 +17,6 @@ pub struct TcpConfig { address: SocketListenAddr, #[get_copy = "pub"] keepalive: Option, - #[serde(default = "crate::serde::default_max_length")] - #[getset(get_copy = "pub", set = "pub")] - max_length: usize, #[serde(default = "default_shutdown_timeout_secs")] #[getset(get_copy = "pub", set = "pub")] shutdown_timeout_secs: u64, @@ -29,6 +26,9 @@ pub struct TcpConfig { tls: Option, #[get_copy = "pub"] receive_buffer_bytes: Option, + #[serde(flatten, default)] + #[getset(get = "pub", set = "pub")] + decoding: DecodingConfig, } const fn default_shutdown_timeout_secs() -> u64 { @@ -39,20 +39,20 @@ impl TcpConfig { pub const fn new( address: SocketListenAddr, keepalive: Option, - max_length: usize, shutdown_timeout_secs: u64, host_key: Option, tls: Option, receive_buffer_bytes: Option, + decoding: DecodingConfig, ) -> Self { Self { address, keepalive, - max_length, shutdown_timeout_secs, host_key, tls, receive_buffer_bytes, + decoding, } } @@ -60,18 +60,25 @@ impl TcpConfig { Self { address, keepalive: None, - max_length: crate::serde::default_max_length(), shutdown_timeout_secs: default_shutdown_timeout_secs(), host_key: None, tls: None, receive_buffer_bytes: None, + decoding: DecodingConfig::default(), } } } #[derive(Debug, Clone)] pub struct RawTcpSource { - pub config: TcpConfig, + config: TcpConfig, + decoder: codecs::Decoder, +} + +impl RawTcpSource { + pub const fn new(config: TcpConfig, decoder: codecs::Decoder) -> Self { + Self { config, decoder } + } } impl TcpSource for RawTcpSource { @@ -80,12 +87,7 @@ impl TcpSource for RawTcpSource { type Decoder = codecs::Decoder; fn decoder(&self) -> Self::Decoder { - codecs::Decoder::new( - Box::new(NewlineDelimitedCodec::new_with_max_length( - self.config.max_length, - )), - Box::new(BytesParser), - ) + self.decoder.clone() } fn handle_events(&self, events: &mut [Event], host: Bytes, byte_size: usize) { @@ -110,28 +112,3 @@ impl TcpSource for RawTcpSource { } } } - -#[cfg(test)] -mod test { - - #[test] - fn tcp_it_defaults_max_length() { - let with: super::TcpConfig = toml::from_str( - r#" - address = "127.0.0.1:1234" - max_length = 19 - "#, - ) - .unwrap(); - - let without: super::TcpConfig = toml::from_str( - r#" - address = "127.0.0.1:1234" - "#, - ) - .unwrap(); - - assert_eq!(with.max_length, 19); - assert_eq!(without.max_length, crate::serde::default_max_length()); - } -} diff --git a/src/sources/socket/udp.rs b/src/sources/socket/udp.rs index 915d7459acc17..fe151738e6d2b 100644 --- a/src/sources/socket/udp.rs +++ b/src/sources/socket/udp.rs @@ -1,5 +1,5 @@ use crate::{ - codecs::Decoder, + codecs::{Decoder, DecodingConfig}, event::Event, internal_events::{SocketEventsReceived, SocketMode}, shutdown::ShutdownSignal, @@ -28,6 +28,9 @@ pub struct UdpConfig { host_key: Option, #[get_copy = "pub"] receive_buffer_bytes: Option, + #[serde(flatten, default)] + #[get = "pub"] + decoding: DecodingConfig, } impl UdpConfig { @@ -37,6 +40,7 @@ impl UdpConfig { max_length: crate::serde::default_max_length(), host_key: None, receive_buffer_bytes: None, + decoding: Default::default(), } } } diff --git a/src/sources/socket/unix.rs b/src/sources/socket/unix.rs index 5833af7444fa5..c8542508a80e7 100644 --- a/src/sources/socket/unix.rs +++ b/src/sources/socket/unix.rs @@ -1,5 +1,5 @@ use crate::{ - codecs::Decoder, + codecs::{Decoder, DecodingConfig}, event::Event, internal_events::{SocketEventsReceived, SocketMode}, shutdown::ShutdownSignal, @@ -20,6 +20,8 @@ pub struct UnixConfig { #[serde(default = "crate::serde::default_max_length")] pub max_length: usize, pub host_key: Option, + #[serde(flatten, default)] + pub decoding: DecodingConfig, } impl UnixConfig { @@ -28,6 +30,7 @@ impl UnixConfig { path, max_length: crate::serde::default_max_length(), host_key: None, + decoding: Default::default(), } } } diff --git a/src/test_util/components.rs b/src/test_util/components.rs new file mode 100644 index 0000000000000..960ebcf2e5e2d --- /dev/null +++ b/src/test_util/components.rs @@ -0,0 +1,147 @@ +#![deny(missing_docs)] + +//! This is a framework for testing components for their compliance with +//! the component spec in `docs/specs/component.md` by capturing emitted +//! internal events and metrics, and testing that they fit the required +//! patterns. + +use crate::event::{Metric, MetricValue}; +use crate::metrics::{self, Controller}; +use lazy_static::lazy_static; +use std::cell::RefCell; +use std::collections::HashSet; +use std::env; + +thread_local!( + /// A buffer for recording internal events emitted by a single test. + static EVENTS_RECORDED: RefCell> = RefCell::new(Default::default()); +); + +/// This struct is used to describe a set of component tests. +pub struct ComponentTests { + /// The list of event (suffixes) that must be emitted by the component + events: &'static [&'static str], + /// The list of counter metrics (with given tags) that must be incremented + tagged_counters: &'static [&'static str], + /// The list of counter metrics (with no particular tags) that must be incremented + untagged_counters: &'static [&'static str], +} + +lazy_static! { + /// The component test specification for all sources + pub static ref SOURCE_TESTS: ComponentTests = ComponentTests { + events: &["BytesReceived", "EventsReceived", "EventsSent"], + tagged_counters: &[ + "component_received_bytes_total", + "component_received_events_total", + "component_received_event_bytes_total", + ], + untagged_counters: &[ + "component_sent_events_total", + "component_sent_event_bytes_total", + ], + }; +} + +impl ComponentTests { + /// Run the test specification, and assert that all tests passed + pub fn assert(&self, tags: &[&str]) { + let mut test = ComponentTester::new(); + test.emitted_all_events(self.events); + test.emitted_all_counters(self.tagged_counters, tags); + test.emitted_all_counters(self.untagged_counters, &[]); + if !test.errors.is_empty() { + panic!( + "Failed to assert compliance, errors:\n {}\n", + test.errors.join("\n ") + ); + } + } +} + +/// Initialize the necessary bits needed to run a component test specification. +pub fn init() { + EVENTS_RECORDED.with(|er| er.borrow_mut().clear()); + // Handle multiple initializations. + if let Err(error) = metrics::init_test() { + if error != metrics::Error::AlreadyInitialized { + panic!("Failed to initialize metrics recorder: {:?}", error); + } + } +} + +/// Record an emitted internal event. This is somewhat dumb at this +/// point, just recording the pure string value of the `emit!` call +/// parameter. At some point, making all internal events implement +/// `Debug` or `Serialize` might allow for more sophistication here, but +/// this is good enough for these tests. This should only be used by the +/// test `emit!` macro. The `check-events` script will test that emitted +/// events contain the right fields, etc. +pub fn record_internal_event(event: &str) { + // Remove leading '&' + // Remove trailing '{fields…}' + let event = event.strip_prefix('&').unwrap_or(event); + let event = event.find('{').map(|par| &event[..par]).unwrap_or(event); + EVENTS_RECORDED.with(|er| er.borrow_mut().insert(event.into())); +} + +/// Tests if the given metric contains all the given tag names +fn has_tags(metric: &Metric, names: &[&str]) -> bool { + metric + .tags() + .map(|tags| names.iter().all(|name| tags.contains_key(*name))) + .unwrap_or_else(|| names.is_empty()) +} + +/// Standard metrics test environment data +struct ComponentTester { + metrics: Vec, + errors: Vec, +} + +impl ComponentTester { + fn new() -> Self { + let mut metrics: Vec<_> = Controller::get().unwrap().capture_metrics().collect(); + + if env::var("DEBUG_COMPONENT_COMPLIANCE").is_ok() { + EVENTS_RECORDED.with(|events| { + for event in events.borrow().iter() { + println!("{}", event); + } + }); + metrics.sort_by(|a, b| a.name().cmp(b.name())); + for metric in &metrics { + println!("{}", metric); + } + } + + let errors = Vec::new(); + Self { metrics, errors } + } + + fn emitted_all_counters(&mut self, names: &[&str], tags: &[&str]) { + let tag_suffix = (!tags.is_empty()) + .then(|| format!("{{{}}}", tags.join(","))) + .unwrap_or_else(String::new); + for name in names { + if !self.metrics.iter().any(|m| { + matches!(m.value(), MetricValue::Counter { .. }) + && m.name() == *name + && has_tags(m, tags) + }) { + self.errors + .push(format!("Missing metric named {}{}", name, tag_suffix)); + } + } + } + + fn emitted_all_events(&mut self, names: &[&str]) { + for name in names { + if !EVENTS_RECORDED + .with(|events| events.borrow().iter().any(|event| event.ends_with(name))) + { + self.errors.push(format!("Missing emitted event {}", name)); + } + } + } +} diff --git a/src/test_util/mod.rs b/src/test_util/mod.rs index e1362ef148bf6..4e9350f78ae0e 100644 --- a/src/test_util/mod.rs +++ b/src/test_util/mod.rs @@ -45,6 +45,7 @@ const WAIT_FOR_SECS: u64 = 5; // The default time to wait in `wait_for` const WAIT_FOR_MIN_MILLIS: u64 = 5; // The minimum time to pause before retrying const WAIT_FOR_MAX_MILLIS: u64 = 500; // The maximum time to pause before retrying +pub mod components; pub mod stats; #[macro_export] diff --git a/src/topology/test/source_finished.rs b/src/topology/test/source_finished.rs index 958ff3d96e59e..9c7dcdb7b34a7 100644 --- a/src/topology/test/source_finished.rs +++ b/src/topology/test/source_finished.rs @@ -9,7 +9,7 @@ use tokio::time::{timeout, Duration}; #[tokio::test] async fn sources_finished() { let mut old_config = Config::builder(); - let generator = GeneratorConfig::repeat(vec!["text".to_owned()], 1, None); + let generator = GeneratorConfig::repeat(vec!["text".to_owned()], 1, 0.0); old_config.add_source("in", generator); old_config.add_sink( "out", diff --git a/src/transforms/compound.rs b/src/transforms/compound.rs index ed50d68721261..f7b15f7a94c8b 100644 --- a/src/transforms/compound.rs +++ b/src/transforms/compound.rs @@ -41,22 +41,21 @@ impl TransformConfig for CompoundConfig { fn expand( &mut self, ) -> crate::Result>, ExpandType)>> { - let steps = &self.steps; - if !steps.is_empty() { - Ok(Some(( - steps - .iter() - .enumerate() - .map(|(i, step)| { - let TransformStep { id, transform } = step; - ( - id.as_ref().cloned().unwrap_or_else(|| i.to_string()), - transform.to_owned(), - ) - }) - .collect(), - ExpandType::Serial, - ))) + let mut map: IndexMap> = IndexMap::new(); + for (i, step) in self.steps.iter().enumerate() { + if map + .insert( + step.id.as_ref().cloned().unwrap_or_else(|| i.to_string()), + step.transform.to_owned(), + ) + .is_some() + { + return Err("conflicting id found while expanding transform".into()); + } + } + + if !map.is_empty() { + Ok(Some((map, ExpandType::Serial))) } else { Err("must specify at least one transform".into()) } diff --git a/src/transforms/grok_parser.rs b/src/transforms/grok_parser.rs index c7850baf52284..dc22be47251fb 100644 --- a/src/transforms/grok_parser.rs +++ b/src/transforms/grok_parser.rs @@ -87,7 +87,7 @@ pub struct GrokParser { field: String, drop_field: bool, types: HashMap, - paths: HashMap>, + paths: HashMap>>, } impl Clone for GrokParser { @@ -119,7 +119,9 @@ impl FunctionTransform for GrokParser { if let Some(path) = self.paths.get(name) { event.insert_path(path.to_vec(), value); } else { - let path = PathIter::new(name).collect::>(); + let path = PathIter::new(name) + .map(|component| component.into_static()) + .collect::>(); self.paths.insert(name.to_string(), path.clone()); event.insert_path(path, value); } diff --git a/src/transforms/remap.rs b/src/transforms/remap.rs index f33a3d45fc88a..d9c8a5d8ef628 100644 --- a/src/transforms/remap.rs +++ b/src/transforms/remap.rs @@ -54,9 +54,10 @@ impl TransformConfig for RemapConfig { } } -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct Remap { program: Program, + runtime: Runtime, timezone: TimeZone, drop_on_error: bool, drop_on_abort: bool, @@ -94,11 +95,29 @@ impl Remap { Ok(Remap { program, + runtime: Runtime::default(), timezone: config.timezone, drop_on_error: config.drop_on_error, drop_on_abort: config.drop_on_abort, }) } + + #[cfg(test)] + const fn runtime(&self) -> &Runtime { + &self.runtime + } +} + +impl Clone for Remap { + fn clone(&self) -> Self { + Self { + program: self.program.clone(), + runtime: Runtime::default(), + timezone: self.timezone, + drop_on_error: self.drop_on_error, + drop_on_abort: self.drop_on_abort, + } + } } impl FunctionTransform for Remap { @@ -122,9 +141,10 @@ impl FunctionTransform for Remap { let mut target: VrlTarget = event.into(); - let mut runtime = Runtime::default(); - - let result = runtime.resolve(&mut target, &self.program, &self.timezone); + let result = self + .runtime + .resolve(&mut target, &self.program, &self.timezone); + self.runtime.clear(); match result { Ok(_) => { @@ -223,6 +243,42 @@ mod tests { event.as_log().get(field).unwrap().to_string_lossy() } + #[test] + fn check_remap_doesnt_share_state_between_events() { + let conf = RemapConfig { + source: Some(".foo = .sentinel".to_string()), + file: None, + timezone: TimeZone::default(), + drop_on_error: true, + drop_on_abort: false, + }; + let mut tform = Remap::new(conf, &Default::default()).unwrap(); + assert!(tform.runtime().is_empty()); + + let event1 = { + let mut event1 = LogEvent::from("event1"); + event1.insert("sentinel", "bar"); + Event::from(event1) + }; + let metadata1 = event1.metadata().clone(); + let result1 = transform_one(&mut tform, event1).unwrap(); + assert_eq!(get_field_string(&result1, "message"), "event1"); + assert_eq!(get_field_string(&result1, "foo"), "bar"); + assert_eq!(result1.metadata(), &metadata1); + assert!(tform.runtime().is_empty()); + + let event2 = { + let event2 = LogEvent::from("event2"); + Event::from(event2) + }; + let metadata2 = event2.metadata().clone(); + let result2 = transform_one(&mut tform, event2).unwrap(); + assert_eq!(get_field_string(&result2, "message"), "event2"); + assert_eq!(result2.as_log().get("foo"), Some(&Value::Null)); + assert_eq!(result2.metadata(), &metadata2); + assert!(tform.runtime().is_empty()); + } + #[test] fn check_remap_adds() { let event = { diff --git a/src/transforms/route.rs b/src/transforms/route.rs index 669e1b657a24e..9de8968395aba 100644 --- a/src/transforms/route.rs +++ b/src/transforms/route.rs @@ -104,7 +104,12 @@ impl TransformConfig for RouteConfig { let mut map: IndexMap> = IndexMap::new(); while let Some((k, v)) = self.route.pop() { - map.insert(k.clone(), Box::new(LaneConfig { condition: v })); + if map + .insert(k.clone(), Box::new(LaneConfig { condition: v })) + .is_some() + { + return Err("duplicate route id".into()); + } } if !map.is_empty() { diff --git a/src/transforms/tokenizer.rs b/src/transforms/tokenizer.rs index 46bed561c75df..78f8cf310cfad 100644 --- a/src/transforms/tokenizer.rs +++ b/src/transforms/tokenizer.rs @@ -65,7 +65,7 @@ impl TransformConfig for TokenizerConfig { #[derive(Clone, Debug)] pub struct Tokenizer { - field_names: Vec<(String, Vec, Conversion)>, + field_names: Vec<(String, Vec>, Conversion)>, field: String, drop_field: bool, } @@ -81,7 +81,9 @@ impl Tokenizer { .into_iter() .map(|name| { let conversion = types.get(&name).unwrap_or(&Conversion::Bytes).clone(); - let path: Vec = PathIter::new(&name).collect(); + let path: Vec> = PathIter::new(name.as_str()) + .map(|component| component.into_static()) + .collect(); (name, path, conversion) }) .collect(); diff --git a/tests/api.rs b/tests/api.rs index bca761a7aec14..16593299331fd 100644 --- a/tests/api.rs +++ b/tests/api.rs @@ -77,7 +77,7 @@ mod tests { // Initialize the metrics system. fn init_metrics() -> oneshot::Sender<()> { vector::trace::init(true, true, "info"); - let _ = vector::metrics::init(); + let _ = vector::metrics::init_test(); let (shutdown_tx, shutdown_rx) = oneshot::channel::<()>(); tokio::spawn(async move { diff --git a/tests/behavior/transforms/remap.toml b/tests/behavior/transforms/remap.toml index 5bf06f873912c..f034f78f2c9ee 100644 --- a/tests/behavior/transforms/remap.toml +++ b/tests/behavior/transforms/remap.toml @@ -2183,3 +2183,24 @@ # .none == "foobarbaz" && # .from_split == "big__bad__booper" # ''' + +[transforms.remap_function_reverse_dns] + inputs = [] + type = "remap" + source = """ + .host = reverse_dns!(.ip) + """ +[[tests]] + name = "remap_function_reverse_dns" + [tests.input] + insert_at = "remap_function_reverse_dns" + type = "log" + [tests.input.log_fields] + ip = "127.0.0.1" + [[tests.outputs]] + extract_from = "remap_function_reverse_dns" + [[tests.outputs.conditions]] + "type" = "vrl" + source = ''' + .host == "localhost" + ''' diff --git a/tests/metrics_snapshot.rs b/tests/metrics_snapshot.rs index e4574bc35a628..cd98fa70c7454 100644 --- a/tests/metrics_snapshot.rs +++ b/tests/metrics_snapshot.rs @@ -1,16 +1,15 @@ -fn prepare_metrics(cardinality: usize) -> &'static vector::metrics::Controller { - let _ = vector::metrics::init(); - let controller = vector::metrics::get_controller().unwrap(); - vector::metrics::reset(controller); +use vector::metrics::Controller; + +fn prepare_metrics(cardinality: usize) -> &'static Controller { + let _ = vector::metrics::init_test(); + let controller = Controller::get().unwrap(); + controller.reset(); for idx in 0..cardinality { metrics::counter!("test", 1, "idx" => format!("{}", idx)); } - assert_cardinality_matches( - &vector::metrics::capture_metrics(controller), - cardinality + 1, - ); + assert_cardinality_matches(&controller.capture_metrics(), cardinality + 1); controller } @@ -23,7 +22,7 @@ fn assert_cardinality_matches(iter: &impl Iterator, cardinality: usize) { fn cardinality_matches() { for cardinality in &[0, 1, 10, 100, 1000, 10000] { let controller = prepare_metrics(*cardinality); - let iter = vector::metrics::capture_metrics(controller); + let iter = controller.capture_metrics(); assert_cardinality_matches(&iter, *cardinality + 1); } } diff --git a/tests/support/mod.rs b/tests/support/mod.rs index beca6d1f718ce..e8b1fca299dc6 100644 --- a/tests/support/mod.rs +++ b/tests/support/mod.rs @@ -354,7 +354,7 @@ where S: Sink + Send + std::marker::Unpin, >::Error: std::fmt::Display, { - async fn run(&mut self, mut input: BoxStream<'_, Event>) -> Result<(), ()> { + async fn run(mut self: Box, mut input: BoxStream<'_, Event>) -> Result<(), ()> { while let Some(event) = input.next().await { if let Err(error) = self.sink.send(event).await { error!(message = "Ingesting an event failed at mock sink.", %error); diff --git a/website/content/en/docs/reference/configuration/_index.md b/website/content/en/docs/reference/configuration/_index.md index f2862e04a1a52..d78656e7097db 100644 --- a/website/content/en/docs/reference/configuration/_index.md +++ b/website/content/en/docs/reference/configuration/_index.md @@ -1,15 +1,18 @@ --- title: Configuring Vector short: Configuration -weight: 3 +weight: 2 aliases: ["/docs/configuration", "/docs/setup/configuration"] --- -Vector is configured using a configuration file. This section contains a comprehensive reference of all Vector configuration options. +Vector is configured using a configuration file. This section contains a +comprehensive reference of all Vector configuration options. ## Example -The following is an example of a popular Vector configuration that ingests logs from a file and routes them to both Elasticsearch and AWS S3. Your configuration will differ based on your needs. +The following is an example of a popular Vector configuration that ingests logs +from a file and routes them to both Elasticsearch and AWS S3. Your configuration +will differ based on your needs. {{< tabs default="vector.toml" >}} {{< tab title="vector.toml" >}} @@ -111,63 +114,66 @@ sinks: ```json { - "data_dir": "/var/lib/vector", - "sources": { - "apache_logs": { - "type": "file", - "include": [ - "/var/log/apache2/*.log" - ], - "ignore_older": 86400 + "data_dir": "/var/lib/vector", + "sources": { + "apache_logs": { + "type": "file", + "include": [ + "/var/log/apache2/*.log" + ], + "ignore_older": 86400 + } + }, + "transforms": { + "remap": { + "inputs": [ + "apache_logs" + ], + "type": "remap", + "source": ". = parse_apache_log(.message)" + }, + "apache_sampler": { + "inputs": [ + "apache_parser" + ], + "type": "sampler", + "rate": 50 + } + }, + "sinks": { + "es_cluster": { + "inputs": [ + "apache_sampler" + ], + "type": "elasticsearch", + "host": "http://79.12.221.222:9200", + "index": "vector-%Y-%m-%d" + }, + "s3_archives": { + "inputs": [ + "apache_parser" + ], + "type": "aws_s3", + "region": "us-east-1", + "bucket": "my-log-archives", + "key_prefix": "date=%Y-%m-%d", + "compression": "gzip", + "encoding": "ndjson", + "batch": { + "max_size": 10000000 } - }, - "transforms": { - "remap": { - "inputs": [ - "apache_logs" - ], - "type": "remap", - "source": ". = parse_apache_log(.message)" - }, - "apache_sampler": { - "inputs": [ - "apache_parser" - ], - "type": "sampler", - "rate": 50 - } - }, - "sinks": { - "es_cluster": { - "inputs": [ - "apache_sampler" - ], - "type": "elasticsearch", - "host": "http://79.12.221.222:9200", - "index": "vector-%Y-%m-%d" - }, - "s3_archives": { - "inputs": [ - "apache_parser" - ], - "type": "aws_s3", - "region": "us-east-1", - "bucket": "my-log-archives", - "key_prefix": "date=%Y-%m-%d", - "compression": "gzip", - "encoding": "ndjson", - "batch": { - "max_size": 10000000 - } - } - } + } + } } ``` {{< /tab >}} {{< /tabs >}} -To use this configuration file, specify it with the `--config` flag when starting Vector: +To use this configuration file, specify it with the `--config` flag when +starting Vector: + +{{< tabs default="TOML" >}} {{< tab title="TOML" >}} {{< tabs default="TOML" >}} {{< tab title="TOML" >}} @@ -197,20 +203,21 @@ vector --config /etc/vector/vector.json ### Components -{{< jump "/docs/reference/configuration/sources" >}} -{{< jump "/docs/reference/configuration/transforms" >}} -{{< jump "/docs/reference/configuration/sinks" >}} +{{< jump "/docs/reference/configuration/sources" >}} {{< jump +"/docs/reference/configuration/transforms" >}} {{< jump +"/docs/reference/configuration/sinks" >}} ### Advanced -{{< jump "/docs/reference/configuration/global-options" >}} -{{< jump "/docs/reference/configuration/template-syntax" >}} +{{< jump "/docs/reference/configuration/global-options" >}} {{< jump +"/docs/reference/configuration/template-syntax" >}} ## How it works ### Environment variables -Vector interpolates environment variables within your configuration file with the following syntax: +Vector interpolates environment variables within your configuration file with +the following syntax: ```toml [transforms.add_host] @@ -231,15 +238,21 @@ option = "${ENV_VAR:-default}" #### Escaping -You can escape environment variables by prefacing them with a `$` character. For example `$${HOSTNAME}` or `$$HOSTNAME` is treated literally in the above environment variable example. +You can escape environment variables by prefacing them with a `$` character. For +example `$${HOSTNAME}` or `$$HOSTNAME` is treated literally in the above +environment variable example. ### Formats -Vector supports [TOML], [YAML], and [JSON] to ensure that Vector fits into your workflow. A side benefit of supporting JSON is that it enables you to use JSON-outputting data templating languages like [Jsonnet] and [Cue]. +Vector supports [TOML], [YAML], and [JSON] to ensure that Vector fits into your +workflow. A side benefit of supporting JSON is that it enables you to use +JSON-outputting data templating languages like [Jsonnet] and [Cue]. #### Location -The location of your Vector configuration file depends on your installation method. For most Linux-based systems, the file can be found at `/etc/vector/vector.toml`. +The location of your Vector configuration file depends on your installation +method. For most Linux-based systems, the file can be found at +`/etc/vector/vector.toml`. ### Multiple files @@ -257,7 +270,8 @@ vector --config /etc/vector/*.toml #### Wilcards in component IDs -Vector supports wildcards (`*`) in component IDs when building your topology. For example: +Vector supports wildcards (`*`) in component IDs when building your topology. +For example: ```toml [sources.app1_logs] diff --git a/website/content/en/docs/reference/configuration/tests.md b/website/content/en/docs/reference/configuration/tests.md deleted file mode 100644 index cdcbe47ab96d4..0000000000000 --- a/website/content/en/docs/reference/configuration/tests.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -title: Unit testing Vector configurations -short: Unit tests -weight: 5 -aliases: ["/docs/reference/tests"] ---- - -Vector enables you to unit test [transforms] in your processing topology - -You can define unit tests in your Vector configuration file to cover a network of transforms within the topology. The intention of these tests is to improve the maintainability of configurations containing larger and more complex combinations of transforms. - -You can execute tests within a configuration file using the `test` subcommand: - -```bash -vector test /etc/vector/vector.toml -``` - -[transforms]: /docs/reference/glossary/#transform diff --git a/website/content/en/docs/reference/configuration/unit-tests.md b/website/content/en/docs/reference/configuration/unit-tests.md new file mode 100644 index 0000000000000..a13b9eae77f63 --- /dev/null +++ b/website/content/en/docs/reference/configuration/unit-tests.md @@ -0,0 +1,383 @@ +--- +title: Unit testing Vector configurations +short: Unit tests +weight: 5 +aliases: [ + "/docs/reference/tests", + "/docs/reference/configuration/tests", + "/guides/level-up/unit-testing" +] +--- + +Vector enables you to [unit test] [transforms] in your processing [pipeline]. Unit tests in Vector +work just like unit tests in most programming languages: + +1. Provide a set of [inputs](#inputs) to a transform (or to multiple transforms chained together). +1. Specify the expected [outputs](#outputs) from the changes made by the transform (or multiple + transforms). +1. Receive directly actionable feedback from any test failures. + +Unit tests can serve as a useful guardrail when running in Vector in production settings where you +need to ensure that your topology doesn't exhibit unexpected behavior and generally improve the +maintainability of your Vector pipelines, particularly in larger and more complex pipelines. + +## Running unit tests + +You can execute tests within a [configuration](#configuring) file using Vector's +[`test`][vector_test] subcommand: + +```bash +vector test /etc/vector/vector.toml +``` + +You can also specify multiple configuration files to test: + +```bash +vector test /etc/vector/pipeline1.toml /etc/vector/pipeline2.toml +``` + +Glob patterns are also supported: + +```bash +vector test /etc/vector/*.toml +``` + +Specifying multiple files is useful if you want to, for example, keep your unit tests in a separate +file from your pipeline configuration. Vector always treats multiple files as a single, unified +configuration. + +## Verifying output {#verifying} + +You can use [VRL assertions][assertions] to verify that the output of the transform(s) being tested +conforms to your expectations. VRL provides two assertion functions: + +* [`assert`][assert] takes a [Boolean expression][boolean] as its first argument. If the Boolean + resolves to `false`, the test fails and Vector logs an error. +* [`assert_eq`][assert_eq] takes any two values as its first two arguments. If those two values + aren't equal, the test fails and Vector logs an error. + +With both functions, you can supply a custom log message to be emitted if the assertion fails: + +```ruby +# Named argument +assert!(1 == 2, message: "the rules of arithmetic have been violated") +assert_eq!(1, 2, message: "the rules of arithmetic have been violated") + +# Positional arguments are also valid +assert!(1 == 2, "the rules of arithmetic have been violated") +assert_eq!(1, 2, "the rules of arithmetic have been violated") +``` + +{{< info title="Make your assertions infallible" >}} +We recommend making `assert` and `assert_eq` invocations in unit tests [infallible] by applying the +bang (`!`) syntax, as in `assert!(1 == 1)` rather than `assert(1 == 1)`. The `!` indicates that the +VRL program should abort if the condition fails. + +[infallible]: /docs/reference/vrl/#fallibility +{{< /info >}} + +If you use the `assert` function, you need to pass a [Boolean expression][boolean] to the function +as the first argument. Especially useful when writing Boolean expressions are the [type +functions][type], functions like [`exists`][exists], [`includes`][includes], +[`is_nullish`][is_nullish] and [`contains`][contains], and VRL [comparisons]. Here's an example +usage of a Boolean expression passed to an `assert` function: + +```toml +[[tests.outputs.conditions]] +type = "vrl" +source = ''' +assert!(is_string(.message) && is_timestamp(.timestamp) && !exists(.other)) +''' +``` + +In this case, the VRL program (under `source`) evaluates to a single Boolean that expresses the +following: + +* The `message` field must be a string +* The `timestamp` field must be a valid timestamp +* The `other` field must not exist + +It's also possible to break a test up into multiple `assert` or `assert_eq` statements: + +```toml +source = ''' +assert!(exists(.message), "no message field provided") +assert!(!is_nullish(.message), "message field is an empty string") +assert!(is_string(.message), "message field has as unexpected type") +assert_eq!(.message, "success", "message field had an unexpected value") +assert!(exists(.timestamp), "no timestamp provided") +assert!(is_timestamp(.timestamp), "timestamp is invalid") +assert!(!exists(.other), "extraneous other field present") +''' +``` + +You can also store the Boolean expressions in variables rather than passing the entire statement to +the `assert` function: + +```toml +source = ''' +message_field_valid = exists(.message) && + !is_nullish(.message) && + .message == "success" + +assert!(message_field_valid) +''' +``` + +## Example unit test configuration {#example} + +Below is an annotated example of a unit test suite for a transform called `add_metadata`, which +adds a unique ID and a timestamp to log events: + +```toml +# The transform being tested is a Vector Remap Language (VRL) transform that +# adds two fields to each incoming log event: a timestamp and a unique ID +[transforms.add_metadata] +type = "remap" +inputs = [] +source = ''' +.timestamp = now() +.id = uuid_v4() +''' + +# Here we begin configuring our test suite +[[tests]] +name = "Test for the add_metadata transform" + +# The inputs for the test +[[tests.inputs]] +insert_at = "add_metadata" # The transform into which the testing event is inserted +type = "log" # The event type (either log or metric) + +# The test log event that is passed to the `add_metadata` transform +[tests.inputs.log_fields] +message = "successful transaction" +code = 200 + +# The expected outputs of the test +[[tests.outputs]] +extract_from = "add_metadata" # The transform from which the resulting event is extracted + +# The declaration of what we expect +[[tests.outputs.conditions]] +type = "vrl" +source = ''' +assert!(is_timestamp(.timestamp)) +assert!(is_string(.id)) +assert_eq!(.message, "successful transaction") +''' +``` + +This example represents a complete test of the `add_metadata` transform, include test `inputs` +and expected `outputs` drawn from a specific transform. + +{{< success title="Multiple config formats available" >}} +The unit testing example above is in TOML but Vector also supports YAML and JSON as configuration +formats. +{{< /success >}} + +## Configuring unit tests {#configuring} + +Unit tests in Vector live alongside your topology configuration. You can specify your tests in the +same config file alongside your transform definitions or split them out into a separate file. + +Unit tests need are specified inside of a `tests` array. Each test requires a `name`: + +```toml +[[tests]] +name = "test 1" +# Other test config + +[[tests]] +name = "test_2" +# Other test config + +# etc. +``` + +Inside each test definition, you need to specify two things: + +* An array of `inputs` that provides [input events](#inputs) for the test. +* An array of `outputs` that provides [expected outputs](#outputs) for the test. + +Optionally, you can specify a `no_outputs_from` list of transforms that must *not* output events +in order for the test to pass. Here's an example: + +```toml +[[tests]] +name = "skip_remove_fields" +no_outputs_from = ["remove_extraneous_fields"] +``` + +In this case, the output from some transform called `remove_extraneous_fields` is + +### Inputs + +In in the `inputs` array for the test, you have these options: + +Parameter | Type | Description +:---------|:-----|:----------- +`insert_at` | string (name of transform) | The name of the transform into which the test input is inserted. This is particularly useful when you want to test only a subset of a transform pipeline. +`value` | string (raw event value) | A raw string value to act as an input event. Use only in cases where events are raw strings and not structured objects with event fields. +`log_fields` | object | If the transform handles [log events](#logs), these are the key/value pairs that comprise the input event. +`metric` | object | If the transform handles [metric events](#metrics), these are the fields that comprise that metric. Subfields include `name`, `tags`, `kind`, and others. + +Here's an example `inputs` declaration: + +```toml +[transforms.add_metadata] +# transform config + +[[tests]] +name = "Test add_metadata transform" + +[[tests.inputs]] +insert_at = "add_metadata" + +[tests.inputs.log_fields] +message = "<102>1 2020-12-22T15:22:31.111Z vector-user.biz su 2666 ID389 - Something went wrong" +``` + +### Outputs + +In the `outputs` array of your unit testing configuration you specify two things: + +Parameter | Type | Description +:---------|:-----|:----------- +`extract_from` | string (name of transform) | The transform whose output you want to test. +`conditions` | array of objects | The [VRL conditions](#verifying) to run against the output. + +Each condition in the `conditions` array has two fields: + +Parameter | Type | Description +:---------|:-----|:----------- +`type` | string | The type of condition you're providing. As the original `check_fields` syntax is now deprecated, [`vrl`][vrl] is currently the only valid value. +`source` | string (VRL Boolean expression) | Explained in detail [above](#verifying). + +Here's an example `outputs` declaration: + +```toml +[[tests.outputs]] +extract_from = "add_metadata" + +[[tests.outputs.conditions]] +type = "vrl" +source = ''' +assert!(is_string(.id)) +assert!(exists(.tags)) +''' +``` + +{{< danger title="`check_fields` conditions now deprecated" >}} +Vector initially provided a `check_fields` condition type that enabled you to specify Boolean +test conditions using a special configuration-based system. `check_fields` is now deprecated. We +strongly recommend converting any existing `check_fields` tests to `vrl` conditions. +{{< /danger >}} + +### Event types + +There are currently two event types that you can unit test in Vector: + +* [`log`](#logs) events +* [`metric`](#metrics) events + +#### Logs + +As explained in the section on [inputs](#inputs) above, when testing log events you have can specify +either a structured event [object] or a raw [string]. + +##### Object + +To specify a structured log event as your test input, use `log_fields`: + +```toml +[tests.inputs.log_fields] +message = "successful transaction" +code = 200 +id = "38c5b0d0-5e7e-42aa-ae86-2b642ad2d1b8" +``` + +##### Raw string value + +To specify a raw string value for a log event, use `value`: + +```toml +[[tests.inputs]] +insert_at = "add_metadata" +value = "<102>1 2020-12-22T15:22:31.111Z vector-user.biz su 2666 ID389 - Something went wrong" +``` + +#### Metrics + +You can specify the fields in a metric event to be unit tested using a `metric` object: + +```toml +[[tests.inputs]] +insert_at = "my_metric_transform" +type = "metric" + +[tests.inputs.metric] +name = "count" +kind = "absolute" +counter = { value = 1 } +``` + +Here's a full end-to-end example of unit testing a metric through a transform: + +```toml +[transforms.add_env_to_metric] +type = "remap" +inputs = [] +source = ''' +env, err = get_env_var!("ENV") +if err != null { + log(err, level: "error") +} +tags.environment = env +''' + +[[tests]] +name = "add_unique_id_test" + +[[tests.inputs]] +insert_at = "add_unique_id_to_metric" +type = "metric" + +[tests.inputs.metric] +name = "website_hits" +kind = "absolute" +counter = { value = 1 } + +[[tests.outputs]] +extract_from = "add_unique_id_to_metric" + +[[tests.outputs.conditions]] +type = "vrl" +source = ''' +assert_eq!(.name, "website_hits") +assert_eq!(.kind, "absolute) +assert_eq!(.tags.environment, "production") +''' +``` + +[assert]: /docs/reference/vrl/functions/#assert +[assert_eq]: /docs/reference/vrl/functions/#assert_eq +[assertions]: /docs/reference/vrl#assertions +[boolean]: /docs/reference/vrl/#boolean-expressions +[comparisons]: /docs/reference/vrl/expressions/#comparison +[contains]: /docs/reference/vrl/functions/#contains +[datadog_search]: https://docs.datadoghq.com/logs/explorer/search_syntax +[exists]: /docs/reference/vrl/functions/#exists +[filter]: /docs/reference/configuration/transforms/filter +[includes]: /docs/reference/vrl/functions/#includes +[is_nullish]: /docs/reference/vrl/functions/#is_nullish +[logs]: /docs/about/under-the-hood/architecture/data-model/log +[metrics]: /docs/about/under-the-hood/architecture/data-model/metric +[pipeline]: /docs/reference/glossary/#pipeline +[remap]: /docs/reference/configuration/transforms/remap +[transforms]: /docs/reference/glossary/#transform +[type]: /docs/reference/vrl/functions/#type-functions +[unit test]: https://en.wikipedia.org/wiki/Unit_testing +[vector_test]: /docs/reference/cli#test +[vector_tests]: https://github.com/vectordotdev/vector/tree/master/tests/behavior/transforms +[vrl]: /docs/reference/vrl diff --git a/website/content/en/docs/reference/vrl/_index.md b/website/content/en/docs/reference/vrl/_index.md index a8b69adc66e05..579a59b3e4728 100644 --- a/website/content/en/docs/reference/vrl/_index.md +++ b/website/content/en/docs/reference/vrl/_index.md @@ -2,22 +2,28 @@ title: Vector Remap Language (VRL) description: A domain-specific language for modifying your observability data short: Vector Remap Language -weight: 4 +weight: 1 aliases: ["/docs/reference/remap"] --- -Vector Remap Language (VRL) is an expression-oriented language designed for transforming observability data (logs and metrics) in a [safe](#safety) and [performant](#performance) manner. It features a simple [syntax](expressions) and a rich set of built-in functions tailored specifically to observability use cases. +Vector Remap Language (VRL) is an expression-oriented language designed for +transforming observability data (logs and metrics) in a [safe](#safety) and +[performant](#performance) manner. It features a simple [syntax](expressions) +and a rich set of built-in functions tailored specifically to observability use +cases. -You can use VRL in Vector via the [`remap`][remap] transform. For a more in-depth picture, see the [announcement blog post][blog_post]. +You can use VRL in Vector via the [`remap`][remap] transform. For a more +in-depth picture, see the [announcement blog post][blog_post]. ## Quickstart VRL programs act on a single observability [event](#event) and can be used to: -* **Transform** observability events -* Specify **conditions** for [routing][route] and [filtering][filter] events +- **Transform** observability events +- Specify **conditions** for [routing][route] and [filtering][filter] events -Those programs are specified as part of your Vector [configuration]. Here's an example `remap` transform that contains a VRL program in the `source` field: +Those programs are specified as part of your Vector [configuration]. Here's an +example `remap` transform that contains a VRL program in the `source` field: ```toml {title="vector.toml"} [transforms.modify] @@ -29,11 +35,14 @@ source = ''' ''' ``` -This program changes the contents of each event that passes through this transform, [deleting][del] the `user_info` field and adding a [timestamp][now] to the event. +This program changes the contents of each event that passes through this +transform, [deleting][del] the `user_info` field and adding a [timestamp][now] +to the event. ### Example: parsing JSON -Let's have a look at a more complex example. Imagine that you're working with HTTP log events that look like this: +Let's have a look at a more complex example. Imagine that you're working with +HTTP log events that look like this: ```text "{\"status\":200,\"timestamp\":\"2021-03-01T19:19:24.646170Z\",\"message\":\"SUCCESS\",\"username\":\"ub40fan4life\"}" @@ -41,10 +50,10 @@ Let's have a look at a more complex example. Imagine that you're working with HT You want to apply these changes to each event: -* Parse the raw string into JSON -* Reformat the `time` into a UNIX timestamp -* Remove the `username` field -* Convert the `message` to lowercase +- Parse the raw string into JSON +- Reformat the `time` into a UNIX timestamp +- Remove the `username` field +- Convert the `message` to lowercase This VRL program would accomplish all of that: @@ -67,7 +76,10 @@ Finally, the resulting event: ### Example: filtering events -The JSON parsing program in the example above modifies the contents of each event. But you can also use VRL to specify conditions, which convert events into a single Boolean expression. Here's an example [`filter`][filter] transform that filters out all messages for which the `severity` field equals `"info"`: +The JSON parsing program in the example above modifies the contents of each +event. But you can also use VRL to specify conditions, which convert events into +a single Boolean expression. Here's an example [`filter`][filter] transform that +filters out all messages for which the `severity` field equals `"info"`: ```toml {title="vector.toml"} [transforms.filter_out_info] @@ -76,41 +88,49 @@ inputs = ["logs"] condition = '.severity != "info"' ``` -Conditions can also be more multifaceted. This condition would filter out all events for which the `severity` field is `"info"`, the `status_code` field is greater than or equal to 400, and the `host` field isn't set: +Conditions can also be more multifaceted. This condition would filter out all +events for which the `severity` field is `"info"`, the `status_code` field is +greater than or equal to 400, and the `host` field isn't set: ```vrl condition = '.severity != "info" && .status_code < 400 && exists(.host) ``` -{{< info title="More VRL examples" >}} -You can find more VRL examples further down [on this page](#other-examples) or in the [VRL example reference](/docs/reference/vrl/examples). -{{< /info >}} +{{< info title="More VRL examples" >}} You can find more VRL examples further +down [on this page](#other-examples) or in the +[VRL example reference](/docs/reference/vrl/examples). {{< /info >}} ## Reference -All language constructs are contained in the following reference pages. Use these references as you write your VRL programs: +All language constructs are contained in the following reference pages. Use +these references as you write your VRL programs: {{< pages >}} ## Learn -VRL is designed to minimize the learning curve. These resources can help you get acquainted with Vector and VRL: +VRL is designed to minimize the learning curve. These resources can help you get +acquainted with Vector and VRL: -{{< jump "/docs/setup/quickstart" >}} -{{< jump "/guides/level-up/transformation" >}} +{{< jump "/docs/setup/quickstart" >}} {{< jump "/guides/level-up/transformation" +>}} ## The goals of VRL {#goals} -VRL is built by the Vector team and its development is guided by two core goals, [safety](#safety) and [performance](#performance), without compromising on flexibility. This makes VRL ideal for critical, performance-sensitive infrastructure, like observabiity pipelines. To illustrate how we achieve these, below is a VRL feature matrix across these principles: - -Feature | Safety | Performance -:-------|:-------|:----------- -[Compilation](#compilation) | ✅ | ✅ -[Ergonomic safety](#ergonomic-safety) | ✅ | ✅ -[Fail safety](#fail-safety) | ✅ | -[Memory safety](#memory-safety) | ✅ | -[Vector and Rust native](#vector-rust-native) | ✅ | ✅ -[Statelessness](#stateless) | ✅ | ✅ +VRL is built by the Vector team and its development is guided by two core goals, +[safety](#safety) and [performance](#performance), without compromising on +flexibility. This makes VRL ideal for critical, performance-sensitive +infrastructure, like observabiity pipelines. To illustrate how we achieve these, +below is a VRL feature matrix across these principles: + +| Feature | Safety | Performance | +| :-------------------------------------------- | :----- | :---------- | +| [Compilation](#compilation) | ✅ | ✅ | +| [Ergonomic safety](#ergonomic-safety) | ✅ | ✅ | +| [Fail safety](#fail-safety) | ✅ | | +| [Memory safety](#memory-safety) | ✅ | | +| [Vector and Rust native](#vector-rust-native) | ✅ | ✅ | +| [Statelessness](#stateless) | ✅ | ✅ | ## Concepts diff --git a/website/content/en/guides/level-up/troubleshooting.md b/website/content/en/guides/level-up/troubleshooting.md index ccb479292eab2..b66b23576b053 100644 --- a/website/content/en/guides/level-up/troubleshooting.md +++ b/website/content/en/guides/level-up/troubleshooting.md @@ -3,7 +3,7 @@ title: Troubleshooting description: A guide to debugging and troubleshooting Vector author_github: binarylogic domain: operations -weight: 5 +weight: 4 tags: ["troubleshooting", "level up", "guides", "guide"] --- diff --git a/website/content/en/guides/level-up/unit-testing.md b/website/content/en/guides/level-up/unit-testing.md deleted file mode 100644 index 7cd9659c6b433..0000000000000 --- a/website/content/en/guides/level-up/unit-testing.md +++ /dev/null @@ -1,221 +0,0 @@ ---- -title: Unit Testing Your Configs -description: Learn how to write and execute unit tests for your Vector configs -author_github: jeffail -domain: config -weight: 4 -tags: ["testing", "configs", "unit testing", "level up", "guides", "guide"] ---- - -{{< requirement >}} -Before you begin, this guide assumes the following: - -* You understand the [basic Vector concepts][concepts] -* You understand [how to set up a basic pipeline][pipeline] - -[concepts]: /docs/about/concepts -[pipeline]: /docs/setup/quickstart -{{< /requirement >}} - -You can define unit tests in a Vector configuration file that cover a network of -transforms within the topology. These tests help you develop configs containing -larger and more complex topologies and to improve their maintainability. - -The full spec can be found [here][docs.reference.configuration.tests]. This guide covers -writing and executing a unit test for the following config: - -```toml title="vector.toml" -[sources.over_tcp] - type = "socket" - mode = "tcp" - address = "0.0.0.0:9000" - -[transforms.foo] - type = "grok_parser" - inputs = ["over_tcp"] - pattern = "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}" - -[transforms.bar] - type = "add_fields" - inputs = ["foo"] - [transforms.bar.fields] - new_field = "this is a static value" - -[transforms.baz] - type = "remove_fields" - inputs = ["foo"] - fields = ["level"] - -[sinks.over_http] - type = "http" - inputs = ["baz"] - uri = "http://localhost:4195/post" - encoding = "text" -``` - -In this config we: - -* Parse a log line into the fields `timestamp`, `level` and `message` with the - transform `foo`. -* Add a static string field `new_field` using the transform `bar`. -* Remove the field `level` with the transform `baz`. - -In reality, it's unlikely that a config this simple would be worth the investment -of writing unit tests. Regardless, for the purpose of this guide we've concluded -that yes, we do wish to unit test this config. - -Specifically, we need to ensure that the resulting events of our topology -(whatever comes out of the `baz` transform) always meets the following -requirements: - -* Does *not* contain the field `level`. -* Contains the field `new_field`, with a static value `this is a static value`. -* Has a `timestamp` and `message` field containing the values extracted from the - raw message of the input log. - -Otherwise our system fails and an annoying relative (uncle Cecil) moves in to -live with us indefinitely. We will do _anything_ to prevent that. - -## Input - -First we shall write a single unit test at the bottom of our config called -`check_simple_log`. Each test must define input events (usually just one), which -initiates the test by injecting those events into a transform of the topology: - -```toml -[[tests]] - name = "check_simple_log" - - [[tests.inputs]] - insert_at = "foo" - type = "raw" - value = "2019-11-28T12:00:00+00:00 info Sorry, I'm busy this week Cecil" -``` - -Here we've specified that our test should begin by injecting an event at the -transform `foo`. The `raw` input type creates a log with only a `message` field -and `timestamp` (set to the time of the test), where `message` is populated with -the contents of the `value` field. - -## Outputs - -This test won't run in its current state because there's nothing to check. In -order to perform checks with this unit test we define an output to inspect: - -```toml -[[tests]] - name = "check_simple_log" - - [[tests.inputs]] - insert_at = "foo" - type = "raw" - value = "2019-11-28T12:00:00+00:00 info Sorry, I'm busy this week Cecil" - - [[tests.outputs]] - extract_from = "baz" - - [[tests.outputs.conditions]] - type = "check_fields" - "level.exists" = false - "new_field.equals" = "this is a static value" - "timestamp.equals" = "2019-11-28T12:00:00+00:00" - "message.equals" = "Sorry, I'm busy this week Cecil" -``` - -We can define any number of outputs for a test, and must specify at which -transform the output events should be extracted for checking. This allows us to -check the events from different transforms in a single test. For our purposes we -only need to check the output of `baz`. - -An output can also have any number of conditions to check, and these are how we -determine whether a test has failed or succeeded. In order for the test to pass -each condition for an output must resolve to `true`. - -It's possible for a topology to result in >1 events extracted from a single -transform, in which case each condition must pass for one or more of the -extracted events in order for the test to pass. - -An output without any conditions cannot fail a test, and instead prints the -input and output events of a transform during the test. This is useful when -building a config as it allows us to inspect the behavior of each transform in -isolation. - -The only condition we've defined here is a `check_fields` type. This is -currently the _only_ condition type on offer, and it allows us to specify any -number of field queries (of the format `"." = ""`). - -## Executing - -With this test added to the bottom of our config we are now able to execute it. -Executing tests within a config file can be done with the `test` subcommand: - -```bash -vector test ./example.toml -``` - -Doing this results in the following output: - -```shell -vector test ./example.toml -Running ./example.toml tests -test ./example.toml: check_simple_log ... failed - -failures: - ---- ./example.toml --- - -test 'check_simple_log': - -check transform 'baz' failed conditions: - condition[0]: predicates failed: [ new_field.equals: "this is a static value" ] -payloads (events encoded as JSON): - input: {"level":"info","timestamp":"2019-11-28T12:00:00+00:00","message":"Sorry, I'm busy this week Cecil"} - output: {"timestamp":"2019-11-28T12:00:00+00:00","message":"Sorry, I'm busy this week Cecil"} -``` - -Whoops! Something isn't right. Vector has told us that condition `0` (our only -condition) failed for the predicate `new_field.equals`. We also get to see a -JSON encoded representation of the input and output of the transform `baz`. -Try reviewing our config topology to see if you can spot the mistake. - -**Spoiler alert**: The problem is that transform `baz` is configured with the input -`foo`, which means `bar` is skipped in the topology! - -{{< info >}} -Side note: We would have also caught this particular issue with: - -```shell -vector validate --topology ./example.toml -``` - -{{< /info >}} - -The fix is easy, we simply change the input of `baz` from `foo` to `bar`: - -```diff ---- a/example.toml -+++ b/example.toml -@@ -16,7 +16,7 @@ - - [transforms.baz] - type = "remove_fields" -- inputs = ["foo"] -+ inputs = ["bar"] - fields = ["level"] -``` - -And running our test again gives us an exit status 0: - -```sh -vector test ./example.toml -Running ./example.toml tests -Test ./example.toml: check_simple_log ... passed -``` - -The test passed! Now if we configure our CI system to execute our test we can -ensure that uncle Cecil remains in Shoreditch after any future config change. -What an insufferable hipster he is. - -[docs.about.concepts]: /docs/about/concepts -[docs.reference.configuration.tests]: /docs/reference/configuration/tests -[docs.setup.quickstart]: /docs/setup/quickstart diff --git a/website/content/en/highlights/2021-10-05-0-17-upgrade-guide.md b/website/content/en/highlights/2021-10-05-0-17-upgrade-guide.md index 7bfc02a360692..174dc57356eea 100644 --- a/website/content/en/highlights/2021-10-05-0-17-upgrade-guide.md +++ b/website/content/en/highlights/2021-10-05-0-17-upgrade-guide.md @@ -49,3 +49,13 @@ which also writes to stdout by default. Following some discussion in that stdout can be processed separately. If you were previously depending on Vector's logs appearing in stdout, you should now look for them in stderr. + +### The `generator` source now has a default `interval` setting + +Previously, the [`generator`][generator] source had no default `interval`, which meant that if you +started Vector without setting an `interval`, the `generator` would output batches of test events as +fast as it can. In version 0.17.0, the default for `interval` is now `1.0`, which means that Vector +outputs one batch per second. To specify no delay between batches you now need to explicit set +`interval` to `0.0`. + +[generator]: /docs/reference/configuration/sources/generator diff --git a/website/cue/reference/cli.cue b/website/cue/reference/cli.cue index f9357c680fe3c..0b15106b9df1d 100644 --- a/website/cue/reference/cli.cue +++ b/website/cue/reference/cli.cue @@ -251,7 +251,7 @@ cli: { description: """ Run Vector config unit tests, then exit. This command is experimental and therefore subject to change. For guidance on how to write unit tests check - out the [unit testing documentation](\(urls.vector_unit_testing)). + out the [unit testing documentation](\(urls.vector_unit_tests)). """ options: { diff --git a/website/cue/reference/components/sinks/datadog_archives.cue b/website/cue/reference/components/sinks/datadog_archives.cue index 4ce3ddc85f005..2c95912c50fc8 100644 --- a/website/cue/reference/components/sinks/datadog_archives.cue +++ b/website/cue/reference/components/sinks/datadog_archives.cue @@ -168,17 +168,18 @@ components: sinks: datadog_archives: { title: "Event format/pre-processing" body: """ Within the gzipped JSON file, each event’s content is formatted as follows: + ```json { - "_id": "123456789abcdefg", - "date": "2018-05-15T14:31:16.003Z", - "host": "i-12345abced6789efg", - "source": "source_name", - "service": "service_name", - "status": "status_level", - "message": "2018-05-15T14:31:16.003Z INFO rid='acb-123' status=403 method=PUT", - "attributes": { "rid": "abc-123", "http": { "status_code": 403, "method": "PUT" } }, - "tags": [ "env:prod", "team:acme" ] + "_id": "123456789abcdefg", + "date": "2018-05-15T14:31:16.003Z", + "host": "i-12345abced6789efg", + "source": "source_name", + "service": "service_name", + "status": "status_level", + "message": "2018-05-15T14:31:16.003Z INFO rid='acb-123' status=403 method=PUT", + "attributes": { "rid": "abc-123", "http": { "status_code": 403, "method": "PUT" } }, + "tags": [ "env:prod", "team:acme" ] } ``` diff --git a/website/cue/reference/components/sinks/logdna.cue b/website/cue/reference/components/sinks/logdna.cue index d47e693d57d9f..d9c08e7ed0171 100644 --- a/website/cue/reference/components/sinks/logdna.cue +++ b/website/cue/reference/components/sinks/logdna.cue @@ -160,8 +160,8 @@ components: sinks: logdna: { component_sent_bytes_total: components.sources.internal_metrics.output.metrics.component_sent_bytes_total component_sent_events_total: components.sources.internal_metrics.output.metrics.component_sent_events_total component_sent_event_bytes_total: components.sources.internal_metrics.output.metrics.component_sent_event_bytes_total - events_discarded_total: components.sources.internal_metrics.output.metrics.events_discarded_total + events_discarded_total: components.sources.internal_metrics.output.metrics.events_discarded_total events_out_total: components.sources.internal_metrics.output.metrics.events_out_total - processing_errors_total: components.sources.internal_metrics.output.metrics.processing_errors_total + processing_errors_total: components.sources.internal_metrics.output.metrics.processing_errors_total } } diff --git a/website/cue/reference/components/sinks/loki.cue b/website/cue/reference/components/sinks/loki.cue index 3a5d62eff0f8d..27001884e5061 100644 --- a/website/cue/reference/components/sinks/loki.cue +++ b/website/cue/reference/components/sinks/loki.cue @@ -205,10 +205,10 @@ components: sinks: loki: { component_sent_bytes_total: components.sources.internal_metrics.output.metrics.component_sent_bytes_total component_sent_events_total: components.sources.internal_metrics.output.metrics.component_sent_events_total component_sent_event_bytes_total: components.sources.internal_metrics.output.metrics.component_sent_event_bytes_total - events_discarded_total: components.sources.internal_metrics.output.metrics.events_discarded_total + events_discarded_total: components.sources.internal_metrics.output.metrics.events_discarded_total events_out_total: components.sources.internal_metrics.output.metrics.events_out_total - processed_bytes_total: components.sources.internal_metrics.output.metrics.processed_bytes_total - processing_errors_total: components.sources.internal_metrics.output.metrics.processing_errors_total - streams_total: components.sources.internal_metrics.output.metrics.streams_total + processed_bytes_total: components.sources.internal_metrics.output.metrics.processed_bytes_total + processing_errors_total: components.sources.internal_metrics.output.metrics.processing_errors_total + streams_total: components.sources.internal_metrics.output.metrics.streams_total } } diff --git a/website/cue/reference/components/sinks/splunk_hec_logs.cue b/website/cue/reference/components/sinks/splunk_hec_logs.cue index aa32314cb09e3..79c898106b8c5 100644 --- a/website/cue/reference/components/sinks/splunk_hec_logs.cue +++ b/website/cue/reference/components/sinks/splunk_hec_logs.cue @@ -171,12 +171,12 @@ components: sinks: splunk_hec_logs: { component_sent_bytes_total: components.sources.internal_metrics.output.metrics.component_sent_bytes_total component_sent_events_total: components.sources.internal_metrics.output.metrics.component_sent_events_total component_sent_event_bytes_total: components.sources.internal_metrics.output.metrics.component_sent_event_bytes_total - encode_errors_total: components.sources.internal_metrics.output.metrics.encode_errors_total + encode_errors_total: components.sources.internal_metrics.output.metrics.encode_errors_total events_out_total: components.sources.internal_metrics.output.metrics.events_out_total - http_request_errors_total: components.sources.internal_metrics.output.metrics.http_request_errors_total - processing_errors_total: components.sources.internal_metrics.output.metrics.processing_errors_total - processed_bytes_total: components.sources.internal_metrics.output.metrics.processed_bytes_total - processed_events_total: components.sources.internal_metrics.output.metrics.processed_events_total - requests_received_total: components.sources.internal_metrics.output.metrics.requests_received_total + http_request_errors_total: components.sources.internal_metrics.output.metrics.http_request_errors_total + processing_errors_total: components.sources.internal_metrics.output.metrics.processing_errors_total + processed_bytes_total: components.sources.internal_metrics.output.metrics.processed_bytes_total + processed_events_total: components.sources.internal_metrics.output.metrics.processed_events_total + requests_received_total: components.sources.internal_metrics.output.metrics.requests_received_total } } diff --git a/website/cue/reference/components/sources/generator.cue b/website/cue/reference/components/sources/generator.cue index 2af53ed980cb7..8a499c7539460 100644 --- a/website/cue/reference/components/sources/generator.cue +++ b/website/cue/reference/components/sources/generator.cue @@ -60,12 +60,16 @@ components: sources: generator: { } } interval: { - common: false - description: "The amount of time, in seconds, to pause between each batch of output lines. If not set, there is no delay." - required: false + common: false + description: """ + The amount of time, in seconds, to pause between each batch of output lines. The + default is one batch per second. In order to remove the delay and output batches as + quickly as possible, set `interval` to `0.0`. + """ + required: false warnings: [] type: float: { - default: null + default: 1.0 examples: [1.0, 0.1, 0.01] } } diff --git a/website/cue/reference/components/sources/socket.cue b/website/cue/reference/components/sources/socket.cue index 0907f4a294170..f7aaf3be23574 100644 --- a/website/cue/reference/components/sources/socket.cue +++ b/website/cue/reference/components/sources/socket.cue @@ -86,9 +86,10 @@ components: sources: socket: { } } max_length: { - common: true - description: "The maximum bytes size of incoming messages before they are discarded." - required: false + common: true + description: "The maximum bytes size of incoming messages before they are discarded." + relevant_when: "mode = `unix_datagram`" + required: false warnings: [] type: uint: { default: 102400 diff --git a/website/cue/reference/remap/concepts/assertions.cue b/website/cue/reference/remap/concepts/assertions.cue new file mode 100644 index 0000000000000..16f563680fd03 --- /dev/null +++ b/website/cue/reference/remap/concepts/assertions.cue @@ -0,0 +1,16 @@ +remap: concepts: assertions: { + title: "Assertions" + description: """ + VRL offers two functions that you can use to assert that VRL values conform to your + expectations: [`assert`](\(urls.vrl_functions)/#assert) and + [`assert_eq`](\(urls.vrl_functions)/#assert_eq). `assert` aborts the VRL program and logs an + error if the provided [Boolean expression](#boolean-expressions) evaluates to `false`, while + `assert_eq` fails logs an error if the provided values aren't equal. Both functions also + enable you to provide custom log messages to be emitted upon failure. + + When running Vector, assertions can be useful in situations where you need to be notified + when any observability event fails a condition. When writing [unit + tests](\(urls.vector_unit_tests)), assertions can provide granular insight into which + test conditions have failed and why. + """ +} diff --git a/website/cue/reference/remap/functions.cue b/website/cue/reference/remap/functions.cue index 37ee54344b908..fc1b01f571df5 100644 --- a/website/cue/reference/remap/functions.cue +++ b/website/cue/reference/remap/functions.cue @@ -5,7 +5,6 @@ remap: { name: string description: string required: bool - multiple: bool | *false default?: bool | string | int | [string, ...string] type: [remap.#Type, ...remap.#Type] enum?: #Enum @@ -27,7 +26,7 @@ remap: { examples?: [remap.#Example, ...remap.#Example] } - #FunctionCategory: "Array" | "Codec" | "Coerce" | "Convert" | "Debug" | "Enumerate" | "Event" | "Hash" | "IP" | "Number" | "Object" | "Parse" | "Random" | "String" | "System" | "Timestamp" | "Type" + #FunctionCategory: "Array" | "Codec" | "Coerce" | "Convert" | "Debug" | "Enumerate" | "Path" | "Hash" | "IP" | "Number" | "Object" | "Parse" | "Random" | "String" | "System" | "Timestamp" | "Type" // A helper array for generating docs. At some point, we should generate this from the // #FunctionCategory enum if CUE adds support for that. @@ -38,7 +37,7 @@ remap: { "Convert", "Debug", "Enumerate", - "Event", + "Path", "Hash", "IP", "Number", diff --git a/website/cue/reference/remap/functions/assert.cue b/website/cue/reference/remap/functions/assert.cue index bbe0368f4806f..ba6c505784d3c 100644 --- a/website/cue/reference/remap/functions/assert.cue +++ b/website/cue/reference/remap/functions/assert.cue @@ -22,9 +22,13 @@ remap: functions: assert: { type: ["boolean"] }, { - name: "message" - description: "The failure message that's reported if `condition` evaluates to `false`." - required: true + name: "message" + description: """ + An optional custom error message. If the equality assertion fails, `message` is + appended to the default message prefix. See the [examples](#assert-examples) below + for a sample fully formed log message. + """ + required: false type: ["string"] }, ] diff --git a/website/cue/reference/remap/functions/assert_eq.cue b/website/cue/reference/remap/functions/assert_eq.cue index 1bc858664662c..544d8256e15da 100644 --- a/website/cue/reference/remap/functions/assert_eq.cue +++ b/website/cue/reference/remap/functions/assert_eq.cue @@ -33,8 +33,8 @@ remap: functions: assert_eq: { name: "message" description: """ An optional custom error message. If the equality assertion fails, `message` is - appended to the default message prefix. See the examples below for a sample fully - formed log message. + appended to the default message prefix. See the [examples](#assert_eq-examples) + below for a sample fully formed log message. """ required: false type: ["string"] diff --git a/website/cue/reference/remap/functions/del.cue b/website/cue/reference/remap/functions/del.cue index 72fb7576b82ba..3b1359203c926 100644 --- a/website/cue/reference/remap/functions/del.cue +++ b/website/cue/reference/remap/functions/del.cue @@ -1,9 +1,11 @@ package metadata remap: functions: del: { - category: "Event" + category: "Path" description: """ - Removes the field specified by the `path` from the current event object. + Removes the field specified by the static `path` from the target. + + For dynamic path deletion, see the `remove` function. """ arguments: [ diff --git a/website/cue/reference/remap/functions/exists.cue b/website/cue/reference/remap/functions/exists.cue index 33fdf12d9a726..528adf2aa8f82 100644 --- a/website/cue/reference/remap/functions/exists.cue +++ b/website/cue/reference/remap/functions/exists.cue @@ -1,9 +1,14 @@ package metadata remap: functions: exists: { - category: "Event" + category: "Path" description: """ - Checks whether the `path` exists for the current event. + Checks whether the `path` exists for the target. + + This function allows you to distinguish between a missing path, + or a path with a `null` value, something a regular path lookup + such as `.foo` would not allow, since that always returns `null` + if the path doesn't exist. """ arguments: [ @@ -11,7 +16,6 @@ remap: functions: exists: { name: "path" description: "The path of the field to check." required: true - multiple: false type: ["path"] }, ] diff --git a/website/cue/reference/remap/functions/get.cue b/website/cue/reference/remap/functions/get.cue new file mode 100644 index 0000000000000..991139e92c801 --- /dev/null +++ b/website/cue/reference/remap/functions/get.cue @@ -0,0 +1,57 @@ +package metadata + +remap: functions: get: { + category: "Path" + description: """ + Dynamically get the value of a given path. + + When you know the path you want to look up, you should use + static paths such as `.foo.bar[1]` to get the value of that + path. However, when you don't know the path names in advance, + you can use this dynamic get function to get at the requested + value. + """ + + arguments: [ + { + name: "value" + description: "The object or array to query." + required: true + type: ["object", "array"] + }, + { + name: "path" + description: "An array of path segments to look up the value for." + required: true + type: ["array"] + }, + ] + internal_failure_reasons: [ + #"path segment must be either "string" or "integer""#, + ] + return: types: ["any"] + + examples: [ + { + title: "single-segment top-level field" + source: #""" + get!(value: { "foo": "bar" }, path: ["foo"]) + """# + return: "bar" + }, + { + title: "multi-segment nested field" + source: #""" + get!(value: { "foo": { "bar": "baz" } }, path: ["foo", "bar"]) + """# + return: "baz" + }, + { + title: "array indexing" + source: #""" + get!(value: ["foo", "bar", "baz"], path: [-2]) + """# + return: "bar" + }, + ] +} diff --git a/website/cue/reference/remap/functions/remove.cue b/website/cue/reference/remap/functions/remove.cue new file mode 100644 index 0000000000000..363b3aec08310 --- /dev/null +++ b/website/cue/reference/remap/functions/remove.cue @@ -0,0 +1,73 @@ +package metadata + +remap: functions: remove: { + category: "Path" + description: """ + Dynamically remove the value for a given path. + + When you know the path you want to remove, you should use + the `del` function and static paths such as `del(.foo.bar[1])` + to remove the value at that path. The `del` function returns the + deleted value, and is more performant than this function. + However, when you don't know the path names in advance, you can + use this dynamic remove function to remove the value at the + provided path. + """ + + arguments: [ + { + name: "value" + description: "The object or array to remove data from." + required: true + type: ["object", "array"] + }, + { + name: "path" + description: "An array of path segments to remove the value at." + required: true + type: ["array"] + }, + { + name: "compact" + description: "Whether — after deletion — empty objects or arrays should be removed." + required: false + default: false + type: ["boolean"] + }, + ] + internal_failure_reasons: [ + #"path segment must be either "string" or "integer""#, + ] + return: types: ["object", "array"] + + examples: [ + { + title: "single-segment top-level field" + source: #""" + remove!(value: { "foo": "bar" }, path: ["foo"]) + """# + return: {} + }, + { + title: "multi-segment nested field" + source: #""" + remove!(value: { "foo": { "bar": "baz" } }, path: ["foo", "bar"]) + """# + return: foo: {} + }, + { + title: "array indexing" + source: #""" + remove!(value: ["foo", "bar", "baz"], path: [-2]) + """# + return: ["foo", "baz"] + }, + { + title: "compaction" + source: #""" + remove!(value: { "foo": { "bar": [42], "baz": true } }, path: ["foo", "bar", 0], compact: true) + """# + return: foo: baz: true + }, + ] +} diff --git a/website/cue/reference/remap/functions/set.cue b/website/cue/reference/remap/functions/set.cue new file mode 100644 index 0000000000000..0a28fc1cba175 --- /dev/null +++ b/website/cue/reference/remap/functions/set.cue @@ -0,0 +1,63 @@ +package metadata + +remap: functions: set: { + category: "Path" + description: """ + Dynamically insert data into the path of a given object or array. + + When you know the path you want to assign a value to, you should + use static path assignments such as `.foo.bar[1] = true` for + improved performance and readability. However, when you don't + know the path names in advance, you can use this dynamic + insertion function to insert the data into the object or array. + """ + + arguments: [ + { + name: "value" + description: "The object or array to insert data into." + required: true + type: ["object", "array"] + }, + { + name: "path" + description: "An array of path segments to insert the value to." + required: true + type: ["array"] + }, + { + name: "data" + description: "The data to be inserted." + required: true + type: ["any"] + }, + ] + internal_failure_reasons: [ + #"path segment must be either "string" or "integer""#, + ] + return: types: ["object", "array"] + + examples: [ + { + title: "single-segment top-level field" + source: #""" + set!(value: { "foo": "bar" }, path: ["foo"], data: "baz") + """# + return: foo: "baz" + }, + { + title: "multi-segment nested field" + source: #""" + set!(value: { "foo": { "bar": "baz" } }, path: ["foo", "bar"], data: "qux") + """# + return: foo: bar: "qux" + }, + { + title: "array" + source: #""" + set!(value: ["foo", "bar", "baz"], path: [-2], data: 42) + """# + return: ["foo", 42, "baz"] + }, + ] +} diff --git a/website/cue/reference/remap/functions/to_float.cue b/website/cue/reference/remap/functions/to_float.cue index 0d91a1761d5d1..402e8674befb2 100644 --- a/website/cue/reference/remap/functions/to_float.cue +++ b/website/cue/reference/remap/functions/to_float.cue @@ -13,7 +13,7 @@ remap: functions: to_float: { The value to convert to a float. Must be convertible to a float, otherwise an error is raised. """ required: true - type: ["float", "integer", "boolean", "string"] + type: ["integer", "float", "boolean", "string", "timestamp"] }, ] internal_failure_reasons: [ @@ -22,8 +22,11 @@ remap: functions: to_float: { return: { types: ["float"] rules: [ + "If `value` is a float, it will be returned as-is.", + "If `value` is an integer, it will be returned as as a float.", "If `value` is a string, it must be the string representation of an float or else an error is raised.", "If `value` is a boolean, `0.0` is returned for `false` and `1.0` is returned for `true`.", + "If `value` is a timestamp, a [Unix timestamp](\(urls.unix_timestamp)) with fractional seconds is returned.", ] } @@ -35,5 +38,12 @@ remap: functions: to_float: { """ return: 3.145 }, + { + title: "Coerce to a float (timestamp)" + source: """ + to_float(t'2020-12-30T22:20:53.824727Z') + """ + return: 1609366853.824727 + }, ] } diff --git a/website/cue/reference/urls.cue b/website/cue/reference/urls.cue index 86fa1dfb1c1e1..512f342ccecdb 100644 --- a/website/cue/reference/urls.cue +++ b/website/cue/reference/urls.cue @@ -574,7 +574,7 @@ urls: { vector_transform_aws_cloudwatch_logs_subscription_parser: "/docs/reference/configuration/transforms/aws_cloudwatch_logs_subscription_parser" vector_transforms: "/docs/reference/configuration/transforms" vector_twitter: "https://twitter.com/vectordotdev" - vector_unit_testing: "/guides/level-up/unit-testing/" + vector_unit_tests: "\(vector_configuration)/unit-tests" vector_version_branches: "\(vector_repo)/branches/all?query=v" vrl_announcement: "/blog/vector-remap-language" vrl_error_handling: "\(vrl_errors_reference)#handling" diff --git a/website/layouts/partials/admonition.html b/website/layouts/partials/admonition.html index 031c8e4d5d6f1..f4b2c59462c00 100644 --- a/website/layouts/partials/admonition.html +++ b/website/layouts/partials/admonition.html @@ -45,7 +45,7 @@
{{ with .title }} - {{ . }} + {{ . | markdownify }} {{ end }} diff --git a/website/layouts/partials/data.html b/website/layouts/partials/data.html index 45a471ed31380..08df04d577d82 100644 --- a/website/layouts/partials/data.html +++ b/website/layouts/partials/data.html @@ -617,7 +617,7 @@

{{ if $infallible }} {{ partial "badge.html" (dict "word" "infallible" "color" "blue") }} {{ else }} - {{ partial "badge.html" (dict "word" "fallible" "color" "red") }} + {{ partial "badge.html" (dict "word" "fallible" "color" "yellow") }} {{ end }} @@ -1772,6 +1772,13 @@

{{ . }} {{ end }} + + {{ if .required }} + yes + {{ else }} + no + {{ end }} + {{ end }} @@ -1793,6 +1800,9 @@

Default + + Required? + diff --git a/website/layouts/partials/navbar.html b/website/layouts/partials/navbar.html index b4223f86b239f..a40eb72c4b1be 100644 --- a/website/layouts/partials/navbar.html +++ b/website/layouts/partials/navbar.html @@ -45,38 +45,51 @@

+ +
{{/* Dark/light mode toggler */}} -
+
{{ partial "navbar/mode-toggler.html" . }}
- {{/* Search bar */}} - {{ partial "navbar/search-bar.html" }} +
+ - {{/* Social icons */}} - + {{/* Targeted by the global site search in assets/js/search.tsx */}} + +
+ + {{/* Social icons */}} + - {{/* Mobile dropdown toggle */}} -
- + {{/* Heroicon: outline/x */}} + + +
diff --git a/website/layouts/partials/navbar/mode-toggler.html b/website/layouts/partials/navbar/mode-toggler.html index 05ff266157803..96303e9381702 100644 --- a/website/layouts/partials/navbar/mode-toggler.html +++ b/website/layouts/partials/navbar/mode-toggler.html @@ -1,21 +1,15 @@ - \ No newline at end of file diff --git a/website/layouts/partials/navbar/search-bar.html b/website/layouts/partials/navbar/search-bar.html deleted file mode 100644 index 2888a75202e25..0000000000000 --- a/website/layouts/partials/navbar/search-bar.html +++ /dev/null @@ -1,9 +0,0 @@ -
- - - {{/* Targeted by the global site search in assets/js/search.tsx */}} - -
- \ No newline at end of file