From eb9916a339819e29339ea48ac1cfe56ed33383b1 Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Tue, 31 May 2022 16:00:33 -0400 Subject: [PATCH 01/12] wip commit Signed-off-by: Toby Lawrence --- Cargo.lock | 52 +++++++------ Cargo.toml | 2 + lib/vector-config-common/Cargo.toml | 2 +- lib/vector-config-macros/Cargo.toml | 4 +- lib/vector-config-macros/src/configurable.rs | 8 +- lib/vector-config/Cargo.toml | 4 +- lib/vector-config/src/lib.rs | 11 ++- src/sources/apache_metrics/mod.rs | 12 ++- src/sources/aws_ecs_metrics/mod.rs | 50 +++++++++++- src/sources/mod.rs | 80 ++++++++++++++++++++ 10 files changed, 181 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7125f57c95643..a40c144c390fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8785,6 +8785,8 @@ dependencies = [ "vector-vrl-functions", "vector_buffers", "vector_common", + "vector_config", + "vector_config_macros", "vector_core", "vrl", "vrl-cli", @@ -8814,29 +8816,6 @@ dependencies = [ "uuid 1.1.0", ] -[[package]] -name = "vector-config-common" -version = "0.1.0" -dependencies = [ - "darling 0.14.1", - "proc-macro2", - "quote", - "schemars", - "syn", -] - -[[package]] -name = "vector-config-macros" -version = "0.1.0" -dependencies = [ - "darling 0.14.1", - "proc-macro2", - "quote", - "serde_derive_internals", - "syn", - "vector-config-common", -] - [[package]] name = "vector-vrl-functions" version = "0.1.0" @@ -8918,8 +8897,31 @@ dependencies = [ "schemars", "serde", "serde_json", - "vector-config-common", - "vector-config-macros", + "vector_config_common", + "vector_config_macros", +] + +[[package]] +name = "vector_config_common" +version = "0.1.0" +dependencies = [ + "darling 0.14.1", + "proc-macro2", + "quote", + "schemars", + "syn", +] + +[[package]] +name = "vector_config_macros" +version = "0.1.0" +dependencies = [ + "darling 0.14.1", + "proc-macro2", + "quote", + "serde_derive_internals", + "syn", + "vector_config_common", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index fd4ab53d3983b..33a57d1ef319b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -122,6 +122,8 @@ tracing-limit = { path = "lib/tracing-limit" } value = { path = "lib/value" } vector_buffers = { path = "lib/vector-buffers", default-features = false } vector_common = { path = "lib/vector-common" } +vector_config = { path = "lib/vector-config" } +vector_config_macros = { path = "lib/vector-config-macros" } vector_core = { path = "lib/vector-core", default-features = false, features = ["vrl"] } vector-api-client = { path = "lib/vector-api-client", optional = true } vector-vrl-functions = { path = "lib/vector-vrl-functions" } diff --git a/lib/vector-config-common/Cargo.toml b/lib/vector-config-common/Cargo.toml index ad102412e8b28..85842e757c004 100644 --- a/lib/vector-config-common/Cargo.toml +++ b/lib/vector-config-common/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "vector-config-common" +name = "vector_config_common" version = "0.1.0" edition = "2021" diff --git a/lib/vector-config-macros/Cargo.toml b/lib/vector-config-macros/Cargo.toml index 31ce581e55818..10eab15266abe 100644 --- a/lib/vector-config-macros/Cargo.toml +++ b/lib/vector-config-macros/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "vector-config-macros" +name = "vector_config_macros" version = "0.1.0" edition = "2021" @@ -12,4 +12,4 @@ proc-macro2 = { version = "1.0", default-features = false } quote = { version = "1.0", default-features = false } serde_derive_internals = "0.26" syn = { version = "1.0", default-features = false } -vector-config-common = { path = "../vector-config-common" } +vector_config_common = { path = "../vector-config-common" } diff --git a/lib/vector-config-macros/src/configurable.rs b/lib/vector-config-macros/src/configurable.rs index 1b6cdf3fe90ff..87c5fb7a4f1ac 100644 --- a/lib/vector-config-macros/src/configurable.rs +++ b/lib/vector-config-macros/src/configurable.rs @@ -94,7 +94,7 @@ fn build_enum_generate_schema_fn(variants: &[Variant<'_>]) -> proc_macro2::Token .map(generate_enum_variant_schema); quote! { - fn generate_schema(schema_gen: &mut ::schemars::gen::SchemaGenerator, overrides: ::vector_config::Metadata<#clt, Self>) -> ::schemars::schema::SchemaObject { + fn generate_schema(schema_gen: &mut ::vector_config::schemars::gen::SchemaGenerator, overrides: ::vector_config::Metadata<#clt, Self>) -> ::vector_config::schemars::schema::SchemaObject { let mut subschemas = ::std::vec::Vec::new(); let schema_metadata = Self::metadata().merge(overrides); @@ -203,7 +203,7 @@ fn build_named_struct_generate_schema_fn( .map(|field| generate_named_struct_field(container, field)); quote! { - fn generate_schema(schema_gen: &mut ::schemars::gen::SchemaGenerator, overrides: ::vector_config::Metadata<#clt, Self>) -> ::schemars::schema::SchemaObject { + fn generate_schema(schema_gen: &mut ::vector_config::schemars::gen::SchemaGenerator, overrides: ::vector_config::Metadata<#clt, Self>) -> ::vector_config::schemars::schema::SchemaObject { let mut properties = ::indexmap::IndexMap::new(); let mut required = ::std::collections::BTreeSet::new(); @@ -239,7 +239,7 @@ fn build_tuple_struct_generate_schema_fn(fields: &[Field<'_>]) -> proc_macro2::T .map(generate_tuple_struct_field); quote! { - fn generate_schema(schema_gen: &mut ::schemars::gen::SchemaGenerator, overrides: ::vector_config::Metadata<#clt, Self>) -> ::schemars::schema::SchemaObject { + fn generate_schema(schema_gen: &mut ::vector_config::schemars::gen::SchemaGenerator, overrides: ::vector_config::Metadata<#clt, Self>) -> ::vector_config::schemars::schema::SchemaObject { let mut subschemas = ::std::collections::Vec::new(); let metadata = Self::metadata().merge(overrides); @@ -271,7 +271,7 @@ fn build_newtype_struct_generate_schema_fn(fields: &[Field<'_>]) -> proc_macro2: let field_schema = mapped_fields.remove(0); quote! { - fn generate_schema(schema_gen: &mut ::schemars::gen::SchemaGenerator, overrides: ::vector_config::Metadata<#clt, Self>) -> ::schemars::schema::SchemaObject { + fn generate_schema(schema_gen: &mut ::vector_config::schemars::gen::SchemaGenerator, overrides: ::vector_config::Metadata<#clt, Self>) -> ::vector_config::schemars::schema::SchemaObject { let metadata = Self::metadata().merge(overrides); #field_schema diff --git a/lib/vector-config/Cargo.toml b/lib/vector-config/Cargo.toml index 6173670e29c95..6f4f6195deff9 100644 --- a/lib/vector-config/Cargo.toml +++ b/lib/vector-config/Cargo.toml @@ -11,5 +11,5 @@ num-traits = { version = "0.2.15", default-features = false } schemars = { version = "0.8.10", default-features = true, features = ["preserve_order"] } serde = { version = "1.0", default-features = false } serde_json = { version = "1.0.81", default-features = false } -vector-config-common = { path = "../vector-config-common" } -vector-config-macros = { path = "../vector-config-macros" } +vector_config_common = { path = "../vector-config-common" } +vector_config_macros = { path = "../vector-config-macros" } diff --git a/lib/vector-config/src/lib.rs b/lib/vector-config/src/lib.rs index bd061eb3cec0c..03ce792962cc0 100644 --- a/lib/vector-config/src/lib.rs +++ b/lib/vector-config/src/lib.rs @@ -31,17 +31,21 @@ use core::fmt; use core::marker::PhantomData; use num_traits::{Bounded, ToPrimitive}; -use schemars::{gen::SchemaGenerator, schema::SchemaObject}; use serde::{Deserialize, Serialize}; pub mod schema; +// Re-export of `schemars` to make the imports simpler in `vector`. +pub mod schemars { + pub use schemars::{gen, schema}; +} + mod stdlib; // Re-export of the `#[configurable_component]` and `#[derive(Configurable)]` proc macros. pub use vector_config_macros::*; -// Re-export of both `Format` and `Validation` from `vetor_config_common`. +// Re-export of both `Format` and `Validation` from `vector_config_common`. // // The crate exists so that both `vector_config_macros` and `vector_config` can import the types and work with them // natively, but from a codegen and usage perspective, it's much cleaner to export everything needed to use @@ -49,6 +53,7 @@ pub use vector_config_macros::*; pub mod validation { pub use vector_config_common::validation::*; } + #[derive(Clone)] pub struct Metadata<'de, T: Configurable<'de>> { title: Option<&'static str>, @@ -301,7 +306,7 @@ where } /// Generates the schema for this value. - fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject; + fn generate_schema(gen: &mut schemars::gen::SchemaGenerator, overrides: Metadata<'de, Self>) -> schemars::schema::SchemaObject; } #[doc(hidden)] diff --git a/src/sources/apache_metrics/mod.rs b/src/sources/apache_metrics/mod.rs index 5d2c3a2321407..e17df1816afb4 100644 --- a/src/sources/apache_metrics/mod.rs +++ b/src/sources/apache_metrics/mod.rs @@ -8,9 +8,9 @@ use chrono::Utc; use futures::{stream, FutureExt, StreamExt, TryFutureExt}; use http::uri::Scheme; use hyper::{Body, Request}; -use serde::{Deserialize, Serialize}; use snafu::ResultExt; use tokio_stream::wrappers::IntervalStream; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -31,11 +31,17 @@ mod parser; pub use parser::ParseError; -#[derive(Deserialize, Serialize, Clone, Debug)] -struct ApacheMetricsConfig { +#[configurable_component(source)] +#[derive(Clone, Debug)] +pub struct ApacheMetricsConfig { + /// The list of `mod_status` endpoints to scrape metrics from. endpoints: Vec, + /// The interval between scrapes, in seconds. #[serde(default = "default_scrape_interval_secs")] scrape_interval_secs: u64, + /// The namespace of the metric. + /// + /// Disabled if empty. #[serde(default = "default_namespace")] namespace: String, } diff --git a/src/sources/aws_ecs_metrics/mod.rs b/src/sources/aws_ecs_metrics/mod.rs index 6903abe9d9471..0f0a561c2c6e0 100644 --- a/src/sources/aws_ecs_metrics/mod.rs +++ b/src/sources/aws_ecs_metrics/mod.rs @@ -2,9 +2,9 @@ use std::{env, time::Instant}; use futures::StreamExt; use hyper::{Body, Client, Request}; -use serde::{Deserialize, Serialize}; use tokio::time; use tokio_stream::wrappers::IntervalStream; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -19,23 +19,65 @@ use crate::{ mod parser; -#[derive(Deserialize, Serialize, Clone, Debug)] + +/// Version of the AWS ECS task metadata endpoint to use. +/// +/// More information about the different versions can be found +/// (here)[https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint.html]. +#[configurable_component] +#[derive(Clone, Debug)] #[serde(rename_all = "lowercase")] pub enum Version { + /// Version 2. + /// + /// More information about version 2 of the task metadata endpoint can be found + /// (here)[https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint-v2.html]. V2, + /// Version 3. + /// + /// More information about version 3 of the task metadata endpoint can be found + /// (here)[https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint-v3.html]. V3, + /// Version 4. + /// + /// More information about version 4 of the task metadata endpoint can be found + /// (here)[https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint-v4.html]. V4, } -#[derive(Deserialize, Serialize, Clone, Debug)] + +#[configurable_component(source)] +#[derive(Clone, Debug)] #[serde(deny_unknown_fields)] -struct AwsEcsMetricsSourceConfig { +pub struct AwsEcsMetricsSourceConfig { + /// Base URI of the task metadata endpoint. + /// + /// If empty, the URI will be automatically discovered based on the latest version detected. + /// + /// By default: + /// - The version 2 endpoint base URI is `169.254.170.2/v2/`. + /// - The version 3 endpoint base URI is stored in the environment variable `ECS_CONTAINER_METADATA_URI`. + /// - The version 4 endpoint base URI is stored in the environment variable `ECS_CONTAINER_METADATA_URI_V4`. #[serde(default = "default_endpoint")] endpoint: String, + /// The version of the task metadata endpoint to use. + /// + /// If empty, the version is automatically discovered based on environment variables. + /// + /// By default: + /// - Version 4 is used if the environment variable `ECS_CONTAINER_METADATA_URI_V4` is defined. + /// - Version 3 is used if the environment variable `ECS_CONTAINER_METADATA_URI_V4` is not defined, but the + /// environment variable `ECS_CONTAINER_METADATA_URI` _is_ defined. + /// - Version 2 is used if neither of the environment variables `ECS_CONTAINER_METADATA_URI_V4` or + /// `ECS_CONTAINER_METADATA_URI` are defined. #[serde(default = "default_version")] version: Version, + /// The interval between scrapes, in seconds. #[serde(default = "default_scrape_interval_secs")] scrape_interval_secs: u64, + /// The namespace of the metric. + /// + /// Disabled if empty. #[serde(default = "default_namespace")] namespace: String, } diff --git a/src/sources/mod.rs b/src/sources/mod.rs index 39f039089ee02..f1ebdc434dc90 100644 --- a/src/sources/mod.rs +++ b/src/sources/mod.rs @@ -73,6 +73,7 @@ pub mod vector; pub(crate) mod util; +use vector_config::configurable_component; pub use vector_core::source::Source; /// Common build errors @@ -81,3 +82,82 @@ enum BuildError { #[snafu(display("URI parse error: {}", source))] UriParseError { source: ::http::uri::InvalidUri }, } + +#[configurable_component] +#[derive(Clone)] +pub enum Sources { + /// Apache HTTP Server (HTTPD) Metrics. + #[cfg(feature = "sources-apache_metrics")] + ApacheMetrics(#[configurable(derived)] apache_metrics::ApacheMetricsConfig), + /// AWS ECS Metrics. + #[cfg(feature = "sources-aws_ecs_metrics")] + AwsEcsMetrics(#[configurable(derived)] aws_ecs_metrics::AwsEcsMetricsSourceConfig), + /*#[cfg(feature = "sources-aws_kinesis_firehose")] + AwsKinesisFirehose(#[configurable(derived)] aws_kinesis_firehose::AwsKinesisFirehoseConfig), + #[cfg(feature = "sources-aws_s3")] + AwsS3(#[configurable(derived)] aws_s3::AwsS3Config), + #[cfg(feature = "sources-aws_sqs")] + AwsSqs(#[configurable(derived)] aws_sqs::AwsSqsConfig), + #[cfg(feature = "sources-datadog_agent")] + DatadogAgent(#[configurable(derived)] datadog::agent::DatadogAgentConfig), + #[cfg(feature = "sources-demo_logs")] + DemoLogs(#[configurable(derived)] demo_logs::DemoLogsConfig), + #[cfg(all(unix, feature = "sources-dnstap"))] + Dnstap(#[configurable(derived)] dnstap::DnstapConfig), + #[cfg(feature = "sources-docker_logs")] + DockerLogs(#[configurable(derived)] docker_logs::DockerLogsConfig), + #[cfg(feature = "sources-eventstoredb_metrics")] + EventstoreDbMetrics(#[configurable(derived)] eventstoredb_metrics::EventStoreDbConfig), + #[cfg(feature = "sources-exec")] + Exec(#[configurable(derived)] exec::ExecConfig), + #[cfg(feature = "sources-file")] + File(#[configurable(derived)] file::FileConfig), + #[cfg(feature = "sources-fluent")] + Fluent(#[configurable(derived)] fluent::FluentConfig), + #[cfg(feature = "sources-gcp_pubsub")] + GcpPubsub(#[configurable(derived)] gcp_pubsub::PubsubConfig), + #[cfg(feature = "sources-heroku_logs")] + HerokuLogs(#[configurable(derived)] heroku_logs::LogplexConfig), + #[cfg(feature = "sources-host_metrics")] + HostMetrics(#[configurable(derived)] host_metrics::HostMetricsConfig), + #[cfg(feature = "sources-http")] + Http(#[configurable(derived)] http::SimpleHttpConfig), + #[cfg(feature = "sources-internal_logs")] + InternalLogs(#[configurable(derived)] internal_logs::InternalLogsConfig), + #[cfg(feature = "sources-internal_metrics")] + InternalMetrics(#[configurable(derived)] internal_metrics::InternalMetricsConfig), + #[cfg(all(unix, feature = "sources-journald"))] + Journald(#[configurable(derived)] journald::JournaldConfig), + #[cfg(all(feature = "sources-kafka", feature = "rdkafka"))] + Kafka(#[configurable(derived)] kafka::KafkaSourceConfig), + #[cfg(feature = "sources-kubernetes_logs")] + KubernetesLogs(#[configurable(derived)] kubernetes_logs::Config), + #[cfg(all(feature = "sources-logstash"))] + Logstash(#[configurable(derived)] logstash::LogstashConfig), + #[cfg(feature = "sources-mongodb_metrics")] + MongodbMetrics(#[configurable(derived)] mongodb_metrics::MongoDbMetricsConfig), + #[cfg(all(feature = "sources-nats"))] + Nats(#[configurable(derived)] nats::NatsSourceConfig), + #[cfg(feature = "sources-nginx_metrics")] + NginxMetrics(#[configurable(derived)] nginx_metrics::NginxMetricsConfig), + #[cfg(feature = "sources-postgresql_metrics")] + PostgresqlMetrics(#[configurable(derived)] postgresql_metrics::PostgresqlMetricsConfig), + #[cfg(feature = "sources-prometheus")] + PrometheusScrape(#[configurable(derived)] prometheus::scrape::PrometheusScrapeConfig), + #[cfg(feature = "sources-prometheus")] + PrometheusRemoteWrite(#[configurable(derived)] prometheus::remote_write::PrometheusRemoteWriteConfig), + #[cfg(feature = "sources-redis")] + Redis(#[configurable(derived)] redis::RedisSourceConfig), + #[cfg(feature = "sources-socket")] + Socket(#[configurable(derived)] socket::SocketConfig), + #[cfg(feature = "sources-splunk_hec")] + SplunkHec(#[configurable(derived)] splunk_hec::SplunkConfig), + #[cfg(feature = "sources-statsd")] + Statsd(#[configurable(derived)] statsd::StatsdConfig), + #[cfg(feature = "sources-stdin")] + Stdin(#[configurable(derived)] stdin::StdinConfig), + #[cfg(feature = "sources-syslog")] + Syslog(#[configurable(derived)] syslog::SyslogConfig), + #[cfg(feature = "sources-vector")] + Vector(#[configurable(derived)] vector::VectorConfig),*/ +} From 53c003a35d2a6c2f9d4707f5a5d34c10d2f2b669 Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Thu, 2 Jun 2022 16:55:41 -0400 Subject: [PATCH 02/12] initial commit with most sources fully instrumented with configurable bits Signed-off-by: Toby Lawrence --- Cargo.lock | 2 + lib/codecs/Cargo.toml | 1 + .../decoding/framing/character_delimited.rs | 3 +- .../src/decoding/framing/newline_delimited.rs | 3 +- .../src/decoding/framing/octet_counting.rs | 4 +- lib/codecs/src/decoding/mod.rs | 5 +- lib/vector-config-macros/src/ast/container.rs | 20 ++-- lib/vector-config-macros/src/ast/field.rs | 28 +++-- lib/vector-config-macros/src/ast/util.rs | 3 +- lib/vector-config-macros/src/configurable.rs | 104 ++++++++++++----- .../src/configurable_component.rs | 1 + lib/vector-config-macros/src/lib.rs | 3 +- lib/vector-config/src/lib.rs | 63 ++++++++-- lib/vector-config/src/schema.rs | 35 +++++- lib/vector-config/src/stdlib.rs | 55 ++++++++- lib/vector-config/tests/basic.rs | 26 +++++ lib/vector-core/Cargo.toml | 1 + lib/vector-core/src/config/mod.rs | 5 +- src/aws/auth.rs | 24 +++- src/aws/region.rs | 6 +- src/http.rs | 31 ++++- src/line_agg.rs | 53 +++++---- src/sources/apache_metrics/mod.rs | 3 + src/sources/aws_ecs_metrics/mod.rs | 17 +-- src/sources/aws_kinesis_firehose/mod.rs | 46 +++++++- src/sources/aws_s3/mod.rs | 48 ++++++-- src/sources/aws_s3/sqs.rs | 39 ++++++- src/sources/aws_sqs/config.rs | 41 ++++++- src/sources/aws_sqs/mod.rs | 2 +- src/sources/datadog/agent/mod.rs | 48 ++++++-- src/sources/demo_logs.rs | 44 ++++++- src/sources/dnstap/mod.rs | 45 +++++++- src/sources/docker_logs.rs | 71 +++++++++++- src/sources/eventstoredb_metrics/mod.rs | 15 ++- src/sources/file.rs | 42 ++++++- src/sources/fluent/mod.rs | 24 +++- src/sources/gcp_pubsub.rs | 15 ++- src/sources/heroku_logs.rs | 19 ++- src/sources/http.rs | 61 +++++++++- src/sources/internal_logs.rs | 18 ++- src/sources/internal_metrics.rs | 36 ++++-- src/sources/journald.rs | 22 +++- src/sources/kafka.rs | 27 ++++- src/sources/logstash.rs | 21 +++- src/sources/mod.rs | 109 ++++++++++++++++-- src/sources/mongodb_metrics/mod.rs | 20 +++- src/sources/nats.rs | 18 ++- src/sources/nginx_metrics/mod.rs | 25 +++- src/sources/postgresql_metrics.rs | 15 ++- src/sources/prometheus/mod.rs | 3 + src/sources/prometheus/remote_write.rs | 2 +- src/sources/prometheus/scrape.rs | 2 +- src/sources/socket/mod.rs | 19 ++- src/sources/socket/tcp.rs | 41 ++++++- src/sources/socket/udp.rs | 33 +++++- src/sources/socket/unix.rs | 29 ++++- src/sources/splunk_hec/acknowledgements.rs | 31 ++++- src/sources/splunk_hec/mod.rs | 44 +++++-- src/sources/statsd/mod.rs | 39 ++++++- src/sources/statsd/unix.rs | 9 +- src/sources/stdin.rs | 22 +++- src/sources/syslog.rs | 54 ++++++++- src/sources/util/multiline_config.rs | 10 +- src/sources/util/tcp/mod.rs | 14 ++- src/sources/vector/mod.rs | 33 ++++-- src/sources/vector/v1.rs | 22 +++- src/sources/vector/v2.rs | 17 ++- src/tcp.rs | 12 +- src/tls/settings.rs | 39 ++++++- 69 files changed, 1539 insertions(+), 303 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 44732672003ea..199ea849eb2fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1686,6 +1686,7 @@ dependencies = [ "tracing 0.1.34", "value", "vector_common", + "vector_config", "vector_core", ] @@ -8991,6 +8992,7 @@ dependencies = [ "value", "vector_buffers", "vector_common", + "vector_config", "vrl", ] diff --git a/lib/codecs/Cargo.toml b/lib/codecs/Cargo.toml index 9ed2b3ace4a10..8d659b64d3d82 100644 --- a/lib/codecs/Cargo.toml +++ b/lib/codecs/Cargo.toml @@ -20,6 +20,7 @@ tokio-util = { version = "0.7", default-features = false, features = ["codec"] } tracing = { version = "0.1", default-features = false } value = { path = "../value", default-features = false } vector_common = { path = "../vector-common", default-features = false } +vector_config = { path = "../vector-config", default-features = false } vector_core = { path = "../vector-core", default-features = false } [dev-dependencies] diff --git a/lib/codecs/src/decoding/framing/character_delimited.rs b/lib/codecs/src/decoding/framing/character_delimited.rs index e1a65fb6ae0bd..941d2e23e68d2 100644 --- a/lib/codecs/src/decoding/framing/character_delimited.rs +++ b/lib/codecs/src/decoding/framing/character_delimited.rs @@ -3,6 +3,7 @@ use memchr::memchr; use serde::{Deserialize, Serialize}; use tokio_util::codec::Decoder; use tracing::{trace, warn}; +use vector_config::Configurable; use super::BoxedFramingError; @@ -28,7 +29,7 @@ impl CharacterDelimitedDecoderConfig { } /// Options for building a `CharacterDelimitedDecoder`. -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] +#[derive(Clone, Configurable, Debug, Deserialize, PartialEq, Serialize)] pub struct CharacterDelimitedDecoderOptions { /// The character that delimits byte sequences. #[serde(with = "vector_core::serde::ascii_char")] diff --git a/lib/codecs/src/decoding/framing/newline_delimited.rs b/lib/codecs/src/decoding/framing/newline_delimited.rs index 52198f592f23c..4d2ae63ea560d 100644 --- a/lib/codecs/src/decoding/framing/newline_delimited.rs +++ b/lib/codecs/src/decoding/framing/newline_delimited.rs @@ -2,6 +2,7 @@ use bytes::{Bytes, BytesMut}; use derivative::Derivative; use serde::{Deserialize, Serialize}; use tokio_util::codec::Decoder; +use vector_config::Configurable; use super::{BoxedFramingError, CharacterDelimitedDecoder}; @@ -17,7 +18,7 @@ pub struct NewlineDelimitedDecoderConfig { } /// Options for building a `NewlineDelimitedDecoder`. -#[derive(Debug, Clone, Derivative, Deserialize, Serialize, PartialEq)] +#[derive(Clone, Configurable, Debug, Derivative, Deserialize, PartialEq, Serialize)] #[derivative(Default)] pub struct NewlineDelimitedDecoderOptions { /// The maximum length of the byte buffer. diff --git a/lib/codecs/src/decoding/framing/octet_counting.rs b/lib/codecs/src/decoding/framing/octet_counting.rs index 29cfc80e233da..d8e39d04d908a 100644 --- a/lib/codecs/src/decoding/framing/octet_counting.rs +++ b/lib/codecs/src/decoding/framing/octet_counting.rs @@ -5,6 +5,7 @@ use derivative::Derivative; use serde::{Deserialize, Serialize}; use tokio_util::codec::{LinesCodec, LinesCodecError}; use tracing::trace; +use vector_config::Configurable; use super::BoxedFramingError; @@ -31,9 +32,10 @@ impl OctetCountingDecoderConfig { } /// Options for building a `OctetCountingDecoder`. -#[derive(Debug, Clone, Derivative, Deserialize, Serialize, PartialEq)] +#[derive(Clone, Configurable, Debug, Derivative, Deserialize, PartialEq, Serialize)] #[derivative(Default)] pub struct OctetCountingDecoderOptions { + /// The maximum length of the byte buffer. #[serde(skip_serializing_if = "vector_core::serde::skip_serializing_if_default")] max_length: Option, } diff --git a/lib/codecs/src/decoding/mod.rs b/lib/codecs/src/decoding/mod.rs index 74c3a01cabd5e..200b108d1d724 100644 --- a/lib/codecs/src/decoding/mod.rs +++ b/lib/codecs/src/decoding/mod.rs @@ -25,6 +25,7 @@ pub use framing::{ }; use serde::{Deserialize, Serialize}; use smallvec::SmallVec; +use vector_config::Configurable; use vector_core::{config::DataType, event::Event, schema}; /// An error that occurred while decoding structured events from a byte stream / @@ -68,7 +69,7 @@ impl StreamDecodingError for Error { // Unfortunately, copying options of the nested enum variants is necessary // since `serde` doesn't allow `flatten`ing these: // https://github.com/serde-rs/serde/issues/1402. -#[derive(Debug, Clone, Deserialize, Serialize)] +#[derive(Clone, Configurable, Debug, Deserialize, Serialize)] #[serde(tag = "method", rename_all = "snake_case")] pub enum FramingConfig { /// Configures the `BytesDecoder`. @@ -216,7 +217,7 @@ impl tokio_util::codec::Decoder for Framer { // Unfortunately, copying options of the nested enum variants is necessary // since `serde` doesn't allow `flatten`ing these: // https://github.com/serde-rs/serde/issues/1402. -#[derive(Debug, Clone, Deserialize, Serialize)] +#[derive(Clone, Configurable, Debug, Deserialize, Serialize)] #[serde(tag = "codec", rename_all = "snake_case")] pub enum DeserializerConfig { /// Configures the `BytesDeserializer`. diff --git a/lib/vector-config-macros/src/ast/container.rs b/lib/vector-config-macros/src/ast/container.rs index d2cfda2916767..48e7c00aa84ac 100644 --- a/lib/vector-config-macros/src/ast/container.rs +++ b/lib/vector-config-macros/src/ast/container.rs @@ -15,6 +15,7 @@ const ERR_NO_ENUM_NEWTYPE_INTERNAL_TAG: &str = "newtype variants (i.e. `enum Som const ERR_NO_ENUM_VARIANT_DESCRIPTION: &str = "enum variants must have a description i.e. `/// This is a description` or `#[configurable(description = \"This is a description...\")]`"; const ERR_ENUM_UNTAGGED_DUPLICATES: &str = "enum variants must be unique in style/shape when in untagged mode i.e. there cannot be multiple unit variants, or tuple variants with the same fields, etc"; const ERR_NO_UNIT_STRUCTS: &str = "unit structs are not supported by `Configurable`"; +const ERR_MISSING_DESC: &str = "all structs/enums must have a description i.e. `/// This is a description` or `#[configurable(description = \"This is a description...\")]`"; pub struct Container<'a> { original: &'a DeriveInput, @@ -70,16 +71,6 @@ impl<'a> Container<'a> { ); } - // We don't support internal tag for newtype variants, because `serde` doesn't support it. - if variant.style() == Style::Newtype - && matches!(variant.tagging(), Tagging::Internal { .. }) - { - accumulator.push( - darling::Error::custom(ERR_NO_ENUM_NEWTYPE_INTERNAL_TAG) - .with_span(variant), - ); - } - // All variants must have a description. No derived/transparent mode. if variant.description().is_none() { accumulator.push( @@ -127,6 +118,15 @@ impl<'a> Container<'a> { }, }; + // All containers must have a description: no ifs, ands, or buts. + // + // The compile-time errors are a bit too inscrutable otherwise, and inscrutable errors are not very + // helpful when using procedural macros. + if attrs.description.is_none() { + accumulator + .push(darling::Error::custom(ERR_MISSING_DESC).with_span(&serde.ident)); + } + let original = input; let name = serde.attrs.name().deserialize_name(); let default_value = get_serde_default_value(serde.attrs.default()); diff --git a/lib/vector-config-macros/src/ast/field.rs b/lib/vector-config-macros/src/ast/field.rs index 22763202b00b9..b7b097117b657 100644 --- a/lib/vector-config-macros/src/ast/field.rs +++ b/lib/vector-config-macros/src/ast/field.rs @@ -69,6 +69,10 @@ impl<'a> Field<'a> { pub fn visible(&self) -> bool { self.attrs.visible } + + pub fn flatten(&self) -> bool { + self.attrs.flatten + } } #[derive(Debug, FromAttributes)] @@ -82,6 +86,8 @@ struct Attributes { deprecated: bool, #[darling(skip)] visible: bool, + #[darling(skip)] + flatten: bool, #[darling(multiple)] validation: Vec, } @@ -94,6 +100,7 @@ impl Attributes { ) -> darling::Result { // Derive any of the necessary fields from the `serde` side of things. self.visible = !field.attrs.skip_deserializing() || !field.attrs.skip_serializing(); + self.flatten = field.attrs.flatten(); // Parse any forwarded attributes that `darling` left us. self.deprecated = forwarded_attrs @@ -107,21 +114,24 @@ impl Attributes { self.title = self.title.or(doc_title); self.description = self.description.or(doc_description); - // Make sure that if we weren't able to derive a description from the attributes on this - // field, that they used the `derived` flag, which implies forwarding the description from - // the underlying type of the field when the field type's schema is being finalized. + // Make sure that if we weren't able to derive a description from the attributes on this field, that they used + // the `derived` flag, which implies forwarding the description from the underlying type of the field when the + // field type's schema is being finalized. + // + // The goal with doing so here is to be able to raise a compile-time error that points the user towards setting + // an explicit description unless they opt to derive it from the underlying type, which won't be _rare_, but is + // the only way for us to surface such a contextual error, as procedural macros can't dive into the given `T` to + // know if it has a description or not. // - // The goal with doing so here is to be able to raise a compile-time error that points the - // user towards setting an explicit description unless they opt to derive it from the - // underlying type, which won't be _rare_, but is the only way for us to surface such a - // contextual error, as procedural macros can't dive into the given `T` to know if it has a - // description or not. + // If a field is flattened, that's also another form of derivation so we don't require a description in that + // scenario either. if self.description.is_none() && !self.derived.is_present() && !self.transparent.is_present() && self.visible + && !self.flatten { - return Err(err_field_missing_description(field.original)); + return Err(err_field_missing_description(&field.original.ident)); } Ok(self) diff --git a/lib/vector-config-macros/src/ast/util.rs b/lib/vector-config-macros/src/ast/util.rs index 52702568cfe83..de6c83004f39d 100644 --- a/lib/vector-config-macros/src/ast/util.rs +++ b/lib/vector-config-macros/src/ast/util.rs @@ -1,4 +1,5 @@ use darling::error::Accumulator; +use quote::spanned::Spanned; use serde_derive_internals::{attr as serde_attr, Ctxt}; use syn::{Attribute, ExprPath, Lit, Meta, MetaNameValue}; @@ -77,7 +78,7 @@ pub fn get_default_exprpath() -> ExprPath { .expect("expression path for default should never be invalid") } -pub fn err_field_missing_description(field: &syn::Field) -> darling::Error { +pub fn err_field_missing_description(field: &T) -> darling::Error { darling::Error::custom(ERR_FIELD_MISSING_DESCRIPTION).with_span(field) } diff --git a/lib/vector-config-macros/src/configurable.rs b/lib/vector-config-macros/src/configurable.rs index 87c5fb7a4f1ac..b3a21438c1d77 100644 --- a/lib/vector-config-macros/src/configurable.rs +++ b/lib/vector-config-macros/src/configurable.rs @@ -150,32 +150,46 @@ fn generate_named_struct_field( let field_schema = generate_struct_field(field); - // If there is no default value specified for either the field itself, or the container the - // field is a part of, then we consider it required unless the field type itself is inherently - // optional, such as being `Option`. - let maybe_field_required = - if container.default_value().is_none() && field.default_value().is_none() { - Some(quote! { - if !#field_as_configurable::is_optional() { - if !required.insert(#field_key.to_string()) { - panic!(#field_already_contained); + // If the field is flattened, we store it into a different list of flattened subschemas vs adding it directly as a + // field via `properties`/`required`. + // + // If any flattened subschemas are present when we generate the struct schema overall, we do the merging of those at + // the end. + let integrate_field = if field.flatten() { + quote! { + flattened_subschemas.push(subschema); + } + } else { + // If there is no default value specified for either the field itself, or the container the + // field is a part of, then we consider it required unless the field type itself is inherently + // optional, such as being `Option`. + let maybe_field_required = + if container.default_value().is_none() && field.default_value().is_none() { + Some(quote! { + if !#field_as_configurable::is_optional() { + if !required.insert(#field_key.to_string()) { + panic!(#field_already_contained); + } } - } - }) - } else { - None - }; - - quote! { - { - #field_schema + }) + } else { + None + }; + quote! { if let Some(_) = properties.insert(#field_key.to_string(), subschema) { panic!(#field_already_contained); } #maybe_field_required } + }; + + quote! { + { + #field_schema + #integrate_field + } } } @@ -204,24 +218,25 @@ fn build_named_struct_generate_schema_fn( quote! { fn generate_schema(schema_gen: &mut ::vector_config::schemars::gen::SchemaGenerator, overrides: ::vector_config::Metadata<#clt, Self>) -> ::vector_config::schemars::schema::SchemaObject { - let mut properties = ::indexmap::IndexMap::new(); + let mut properties = ::vector_config::indexmap::IndexMap::new(); let mut required = ::std::collections::BTreeSet::new(); + let mut flattened_subschemas = ::std::vec::Vec::new(); let metadata = Self::metadata().merge(overrides); #(#mapped_fields)* - // TODO: We need to figure out if we actually use `#[serde(flatten)]` anywhere in order - // to capture not-specifically-named fields i.e. collecting all remaining/unknown fields - // in a hashmap. - // - // That usage would drive `additional_properties` but I can we can currently ignore it - // until we hit our first struct that needs it. let additional_properties = None; let mut schema = ::vector_config::schema::generate_struct_schema( properties, required, additional_properties, ); + + // If we have any flattened subschemas, deal with them now. + if !flattened_subschemas.is_empty() { + ::vector_config::schema::convert_to_flattened_schema(&mut schema, flattened_subschemas); + } + ::vector_config::schema::finalize_schema(schema_gen, &mut schema, metadata); schema @@ -476,7 +491,7 @@ fn generate_enum_struct_named_variant_schema( let mapped_fields = variant.fields().iter().map(generate_named_enum_field); quote! { - let mut properties = ::indexmap::IndexMap::new(); + let mut properties = ::vector_config::indexmap::IndexMap::new(); let mut required = ::std::collections::BTreeSet::new(); #(#mapped_fields)* @@ -554,7 +569,7 @@ fn generate_enum_variant_schema(variant: &Variant<'_>) -> proc_macro2::TokenStre quote! { #variant_schema - let mut wrapper_properties = ::indexmap::IndexMap::new(); + let mut wrapper_properties = ::vector_config::indexmap::IndexMap::new(); let mut wrapper_required = ::std::collections::BTreeSet::new(); wrapper_properties.insert(#variant_name.to_string(), subschema); @@ -610,9 +625,34 @@ fn generate_enum_variant_schema(variant: &Variant<'_>) -> proc_macro2::TokenStre generate_enum_variant_subschema(variant, variant_schema) } Style::Tuple => panic!("tuple variants should be rejected during AST parsing"), - Style::Newtype => panic!( - "newtype variants in internal tagging mode should be rejected during AST parsing" - ), + Style::Newtype => { + // We have to delegate viability to `serde`, essentially, because using internal tagging for a newtype + // variant is only possible when the inner field is a struct or map, and we can't access that type of + // information here, which is why `serde` does it at compile-time. + + // As such, we generate the schema for the single field, like we would normally do for a newtype + // variant, and then we follow the struct flattening logic where we layer on our tag field schema on the + // schema of the wrapped field... and since it has to be a struct or map to be valid for `serde`, that + // means it will also be an object schema in both cases, which means our flatteneing logic will be + // correct if the caller is doing The Right Thing (tm). + let wrapped_variant_schema = generate_enum_newtype_struct_variant_schema(variant); + + let variant_schema = quote! { + let mut subschema = { + let tag_schema = ::vector_config::schema::generate_internal_tagged_variant_schema(#tag.to_string(), #variant_name.to_string()); + let mut flattened_subschemas = ::std::vec::Vec::new(); + flattened_subschemas.push(tag_schema); + + #wrapped_variant_schema + + ::vector_config::schema::convert_to_flattened_schema(&mut subschema, flattened_subschemas); + + subschema + }; + }; + + generate_enum_variant_subschema(variant, variant_schema) + } Style::Unit => { // Internally-tagged unit variants are basically just a play on externally-tagged // struct variants. @@ -620,7 +660,7 @@ fn generate_enum_variant_schema(variant: &Variant<'_>) -> proc_macro2::TokenStre let variant_schema = quote! { #variant_schema - let mut wrapper_properties = ::indexmap::IndexMap::new(); + let mut wrapper_properties = ::vector_config::indexmap::IndexMap::new(); let mut wrapper_required = ::std::collections::BTreeSet::new(); wrapper_properties.insert(#tag.to_string(), subschema); @@ -673,7 +713,7 @@ fn generate_enum_variant_schema(variant: &Variant<'_>) -> proc_macro2::TokenStre quote! { { - let mut wrapper_properties = ::indexmap::IndexMap::new(); + let mut wrapper_properties = ::vector_config::indexmap::IndexMap::new(); let mut wrapper_required = ::std::collections::BTreeSet::new(); #tag_schema diff --git a/lib/vector-config-macros/src/configurable_component.rs b/lib/vector-config-macros/src/configurable_component.rs index e94ca8ffea9e5..48207c268efc7 100644 --- a/lib/vector-config-macros/src/configurable_component.rs +++ b/lib/vector-config-macros/src/configurable_component.rs @@ -46,6 +46,7 @@ pub fn configurable_component_impl(args: TokenStream, item: TokenStream) -> Toke let input = parse_macro_input!(item as DeriveInput); let derived = quote! { #[derive(::vector_config_macros::Configurable, ::serde::Serialize, ::serde::Deserialize)] + #[serde(deny_unknown_fields)] #component_type #input }; diff --git a/lib/vector-config-macros/src/lib.rs b/lib/vector-config-macros/src/lib.rs index dc89a734bea54..eddcf31d050b1 100644 --- a/lib/vector-config-macros/src/lib.rs +++ b/lib/vector-config-macros/src/lib.rs @@ -7,12 +7,13 @@ mod ast; mod configurable; mod configurable_component; -/// Allows the given struct/enum to be used as a type within the Vector configuration. +/// Designates a type as being part of a Vector configuration. #[proc_macro_attribute] pub fn configurable_component(args: TokenStream, item: TokenStream) -> TokenStream { configurable_component::configurable_component_impl(args, item) } +/// A helpful lil derive. #[proc_macro_derive(Configurable, attributes(configurable))] pub fn derive_configurable(input: TokenStream) -> TokenStream { configurable::derive_configurable_impl(input) diff --git a/lib/vector-config/src/lib.rs b/lib/vector-config/src/lib.rs index 03ce792962cc0..c7cba5dc1e46f 100644 --- a/lib/vector-config/src/lib.rs +++ b/lib/vector-config/src/lib.rs @@ -15,10 +15,6 @@ // of that get generated in terms of what ends up in the schema? Do we even have fields with lifetime bounds in any of // our configuration types in `vector`? :thinking: // -// TODO: We don't support `#[serde(flatten)]` either for collecting unknown fields or for flattening a field into its -// parent struct. However, per #12341, we might actually not want to allow using `flatten` for collecting unknown -// fields, at least, which would make implementing flatten support for merging structs a bit easier. -// // TODO: Is there a way that we could attempt to brute force detect the types of fields being used with a validation to // give a compile-time error when validators are used incorrectly? For example, we throw a runtime error if you use a // negative `min` range bound on an unsigned integer field, but it's a bit opaque and hard to decipher. Could we simply @@ -26,7 +22,51 @@ // `u64`, etc -- and then throw a compile-error from the macro? We would still end up throwing an error at runtime if // our heuristic to detect unsigned integers failed, but we might be able to give a meaningful error closer to the // problem, which would be much better. - +// +// TODO: If we want to deny unknown fields on structs, JSON Schema supports that by setting `additionalProperties` to +// `false` on a schema, which turns it into a "closed" schema. However, this is at odds with types used in enums, which +// is all of our component configuration types. This is because applying `additionalProperties` to the configuration +// type's schema itself would consider something like an internal enum tag (i.e. `"type": "aws_s3"`) as an additional +// property, even if `type` was already accounted for in another subschema that was validated against. +// +// JSON Schema draft 2019-09 has a solution for this -- `unevaluatedProperties` -- which forces the validator to track +// what properties have been "accounted" for, so far, during subschema validation during things like validating against +// all subschemas in `allOf`. +// +// Essentially, we should force all structs to generate a schema that sets `additionalProperties` to `false`, but if it +// gets used in a way that will place it into `allOf` (which is the case for internally tagged enum variants aka all +// component configuration types) then we need to update the schema codegen to unset that field, and re-apply it as +// `unevaluatedProperties` on the schema which is using `allOf`. +// +// Logically, this makes sense because we're only creating a new wrapper schema B around some schema A such that we can +// use it as a tagged enum variant, so rules like "no additional properties" should apply to the wrapper, since schema A +// and B should effectively represent the same exact thing. +// +// TODO: We may want to simply switch from using `description` as the baseline descriptive field to using `title`. +// While, by itself, I think `description` makes a little more sense than `title`, it makes it hard to do split-location +// documentation. +// +// For example, it would be nice to have helper types (i.e. `BatchConfig`, `MultilineConfig`, etc) define their own +// titles, and then allow other structs that have theor types as fields specify a description. This would be very useful +// in cases where fields are optional, such that you want the field's title to be the title of the underlying type (e.g. +// "Multi-line parsing configuration.") but you want the field's description to say something like "If not specified, +// then multiline parsing is disabled". Including that description on `MultilineConfig` itself is kind of weird because +// it forces that on everyone else using it, where, in some cases, it may not be optional at all. +// +// TODO: It's not clear what happens if we schemafy identically-named types i.e. `some::mod::Foo` and +// `another::mod::Foo` since we use the type's ident, not its full path, to generate its referencable name. This is good +// because the full names would be ugly as hell and offer little value but it means that we need all types, including +// transitive types, to have unique names... and we can't check this at develop-time, only compile-time. :thinkies: +// +// TODO: When we implement `Configurable` for all the `NonZero*` types, we should make sure they have default metadata +// that specifies a validation of having to be a minimum of 1. +// +// TODO: We need to figure out how to handle aliases. Looking previously, it seemed like we might need to do some very +// ugly combinatorial explosion stuff to define a schema per perumtation of all aliased fields in a config. We might be +// able to get away with using a combination of `allOf` and `oneOf` where we define a subschema for the non-aliased +// fields, and then a subschema using `oneOf`for each aliased field -- allowing it to match any of the possible field +// names for that specific field -- and then combine them all with `allOf`, which keeps the schema as compact as +// possible, I think, short of a new version of the specification coming out that adds native alias support for properties. use core::fmt; use core::marker::PhantomData; @@ -35,9 +75,13 @@ use serde::{Deserialize, Serialize}; pub mod schema; -// Re-export of `schemars` to make the imports simpler in `vector`. +// Re-export of the various public dependencies required by the generated code to simplify the import requirements for +// crates actually using the macros/derives. +pub mod indexmap { + pub use indexmap::*; +} pub mod schemars { - pub use schemars::{gen, schema}; + pub use schemars::*; } mod stdlib; @@ -306,7 +350,10 @@ where } /// Generates the schema for this value. - fn generate_schema(gen: &mut schemars::gen::SchemaGenerator, overrides: Metadata<'de, Self>) -> schemars::schema::SchemaObject; + fn generate_schema( + gen: &mut schemars::gen::SchemaGenerator, + overrides: Metadata<'de, Self>, + ) -> schemars::schema::SchemaObject; } #[doc(hidden)] diff --git a/lib/vector-config/src/schema.rs b/lib/vector-config/src/schema.rs index 3a496b43bdc83..5d36e0c544eea 100644 --- a/lib/vector-config/src/schema.rs +++ b/lib/vector-config/src/schema.rs @@ -1,4 +1,4 @@ -use std::collections::BTreeSet; +use std::{collections::BTreeSet, mem}; use indexmap::IndexMap; use num_traits::{Bounded, ToPrimitive}; @@ -63,9 +63,6 @@ pub fn apply_metadata<'de, T>(schema: &mut SchemaObject, metadata: Metadata<'de, where T: Configurable<'de>, { - // TODO: apply validations here depending on the instance type(s) in the schema, and figure out how to split, or if - // we need to split, whether we apply validations to the referencable type and/or the actual mutable schema ref - // Figure out if we're applying metadata to a schema reference or the actual schema itself. // Some things only makes sense to add to the reference (like a default value to use), while // some things only make sense to add to the schema itself (like custom metadata, validation, @@ -115,6 +112,26 @@ where schema.metadata = Some(Box::new(schema_metadata)); } +pub fn convert_to_flattened_schema(primary: &mut SchemaObject, mut subschemas: Vec) { + // Now we need to extract our object validation portion into a new schema object, add it to the list of subschemas, + // and then update the primary schema to use `allOf`. It is not valid to "extend" a schema via `allOf`, hence why we + // have to extract the primary schema object validation first. + + // First, we replace the primary schema with an empty schema, because we need to push it the actual primary schema + // into the list of `allOf` schemas. This is due to the fact that it's not valid to "extend" a schema using `allOf`, + // so everything has to be in there. + let primary_subschema = mem::replace(primary, SchemaObject::default()); + subschemas.insert(0, primary_subschema); + + let all_of_schemas = subschemas.into_iter().map(Schema::Object).collect(); + + // Now update the primary schema to use `allOf` to bring everything together. + primary.subschemas = Some(Box::new(SubschemaValidation { + all_of: Some(all_of_schemas), + ..Default::default() + })); +} + pub fn generate_null_schema() -> SchemaObject { SchemaObject { instance_type: Some(InstanceType::Null.into()), @@ -321,6 +338,16 @@ pub fn generate_const_string_schema(value: String) -> SchemaObject { } } +pub fn generate_internal_tagged_variant_schema(tag: String, value: String) -> SchemaObject { + let mut properties = IndexMap::new(); + properties.insert(tag.clone(), generate_const_string_schema(value)); + + let mut required = BTreeSet::new(); + required.insert(tag); + + generate_struct_schema(properties, required, None) +} + pub fn generate_root_schema<'de, T>() -> RootSchema where T: Configurable<'de>, diff --git a/lib/vector-config/src/stdlib.rs b/lib/vector-config/src/stdlib.rs index eef2244a21db6..62a38be402daf 100644 --- a/lib/vector-config/src/stdlib.rs +++ b/lib/vector-config/src/stdlib.rs @@ -1,6 +1,7 @@ -use std::{collections::HashMap, net::SocketAddr}; +use std::{collections::HashMap, net::SocketAddr, path::PathBuf}; use schemars::{gen::SchemaGenerator, schema::SchemaObject}; +use vector_config_common::validation::Validation; use crate::{ schema::{ @@ -26,6 +27,17 @@ where true } + fn metadata() -> Metadata<'de, Self> { + // We clone the default metadata of the wrapped type because otherwise this "level" of the schema would + // effective sever the link between things like the description of `T` itself and what we show for a field of + // type `Option`. + // + // To wit, this allows callers to use `#[configurable(derived)]` on a field of `Option` so long as `T` has a + // description, and both the optional field and the schema for `T` will get the description... but the + // description for the optional field can still be overridden independently, etc. + T::metadata().map_default_value(|default| Some(default)) + } + fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { let inner_metadata = overrides.clone().flatten_default(); let mut schema = generate_optional_schema(gen, inner_metadata); @@ -118,7 +130,13 @@ where T: Configurable<'de>, { fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { - let element_metadata = T::metadata(); + // We set `T` to be "transparent", which means that during schema finalization, we will relax the rules we + // enforce, such as needing a description, knowing that they'll be enforced on the field using `HashMap` itself, where carrying that description forward to `V` might literally make no sense, such as when `V` is + // a primitive type like an integer or string. + let mut element_metadata = T::metadata(); + element_metadata.set_transparent(); + let mut schema = generate_array_schema(gen, element_metadata); finalize_schema(gen, &mut schema, overrides); schema @@ -154,6 +172,7 @@ where } } +// Additional types that do not map directly to scalars. impl<'de> Configurable<'de> for SocketAddr { fn referencable_name() -> Option<&'static str> { Some("SocketAddr") @@ -172,3 +191,35 @@ impl<'de> Configurable<'de> for SocketAddr { schema } } + +impl<'de> Configurable<'de> for PathBuf { + fn referencable_name() -> Option<&'static str> { + Some("PathBuf") + } + + fn description() -> Option<&'static str> { + Some("A file path.") + } + + fn metadata() -> Metadata<'de, Self> { + let mut metadata = Metadata::default(); + if let Some(description) = Self::description() { + metadata.set_description(description); + } + + // Taken from + // https://stackoverflow.com/questions/44289075/regular-expression-to-validate-windows-and-linux-path-with-extension + // and manually checked against common Linux and Windows paths. It's probably not 100% correct, but it + // definitely covers the most basic cases. + const PATH_REGEX: &str = r#"(\/.*|[a-zA-Z]:\\(?:([^<>:"\/\\|?*]*[^<>:"\/\\|?*.]\\|..\\)*([^<>:"\/\\|?*]*[^<>:"\/\\|?*.]\\?|..\\))?)"#; + metadata.add_validation(Validation::Pattern(PATH_REGEX.to_string())); + + metadata + } + + fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { + let mut schema = generate_string_schema(); + finalize_schema(gen, &mut schema, overrides); + schema + } +} diff --git a/lib/vector-config/tests/basic.rs b/lib/vector-config/tests/basic.rs index 8cedc2b2d173c..a01e7bea3ceed 100644 --- a/lib/vector-config/tests/basic.rs +++ b/lib/vector-config/tests/basic.rs @@ -7,6 +7,7 @@ use std::{ collections::HashMap, net::{Ipv4Addr, SocketAddr, SocketAddrV4}, + path::PathBuf, }; use serde::{de, Deserialize, Deserializer}; @@ -64,6 +65,26 @@ pub enum Encoding { ), } +/// Enableable TLS configuration. +#[derive(Clone)] +#[configurable_component] +pub struct TlsEnablableConfig { + /// Whether or not TLS is enabled. + pub enabled: bool, + #[serde(flatten)] + pub options: TlsConfig, +} + +/// TLS configuration. +#[derive(Clone)] +#[configurable_component] +pub struct TlsConfig { + /// Certificate file. + pub crt_file: Option, + /// Private key file. + pub key_file: Option, +} + /// A listening address that can optionally support being passed in by systemd. #[derive(Clone, Copy, Debug, PartialEq)] #[configurable_component] @@ -168,6 +189,9 @@ pub struct AdvancedSinkConfig { #[deprecated] #[serde(default = "default_advanced_sink_encoding")] encoding: Encoding, + /// Overridden TLS description. + #[configurable(derived)] + tls: Option, /// The tags to apply to each event. tags: HashMap, } @@ -191,6 +215,7 @@ fn default_advanced_sink_endpoint() -> String { /// Collection of various sources available in Vector. #[derive(Clone)] #[configurable_component] +#[serde(tag = "type")] pub enum SourceConfig { /// Simple source. Simple(#[configurable(derived)] SimpleSourceConfig), @@ -199,6 +224,7 @@ pub enum SourceConfig { /// Collection of various sinks available in Vector. #[derive(Clone)] #[configurable_component] +#[serde(tag = "type")] pub enum SinkConfig { /// Simple sink. Simple(#[configurable(derived)] SimpleSinkConfig), diff --git a/lib/vector-core/Cargo.toml b/lib/vector-core/Cargo.toml index f411771fe28d8..caf2fac4a1531 100644 --- a/lib/vector-core/Cargo.toml +++ b/lib/vector-core/Cargo.toml @@ -54,6 +54,7 @@ url = { version = "2", default-features = false } value = { path = "../value", default-features = false, features = ["lua", "toml", "json", "api"] } vector_buffers = { path = "../vector-buffers", default-features = false } vector_common = { path = "../vector-common" } +vector_config = { path = "../vector-config" } # Rename to "vrl" once we use a release with stable `-Z namespaced-features`: # https://doc.rust-lang.org/cargo/reference/unstable.html#namespaced-features vrl-lib = { package = "vrl", path = "../vrl/vrl", optional = true } diff --git a/lib/vector-core/src/config/mod.rs b/lib/vector-core/src/config/mod.rs index f5f59cbe0e050..5eca4a8e509f1 100644 --- a/lib/vector-core/src/config/mod.rs +++ b/lib/vector-core/src/config/mod.rs @@ -11,6 +11,7 @@ pub mod proxy; pub use global_options::GlobalOptions; pub use id::ComponentKey; pub use log_schema::{init_log_schema, log_schema, LogSchema}; +use vector_config::Configurable; use crate::schema; @@ -141,8 +142,10 @@ impl Output { } } -#[derive(Clone, Copy, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +/// Acknowledgement configuration. +#[derive(Clone, Configurable, Copy, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] pub struct AcknowledgementsConfig { + /// Whether or not acknowledgements should be enabled. enabled: Option, } diff --git a/src/aws/auth.rs b/src/aws/auth.rs index 251e7e2978b2e..a77b3f572c47c 100644 --- a/src/aws/auth.rs +++ b/src/aws/auth.rs @@ -5,33 +5,47 @@ use aws_config::{ }; use aws_types::{credentials::SharedCredentialsProvider, region::Region, Credentials}; use serde::{Deserialize, Serialize}; +use vector_config::Configurable; // matches default load timeout from the SDK as of 0.10.1, but lets us confidently document the // default rather than relying on the SDK default to not change const DEFAULT_LOAD_TIMEOUT: Duration = Duration::from_secs(5); -/// Configuration for configuring authentication strategy for AWS. -#[derive(Serialize, Deserialize, Clone, Debug, Derivative)] +/// Configuration of the authentication strategy for interacting with AWS services. +#[derive(Clone, Configurable, Debug, Derivative, Deserialize, Serialize)] #[derivative(Default)] #[serde(untagged)] #[serde(deny_unknown_fields)] pub enum AwsAuthentication { + /// Authenticate using a fixed access key and secret pair. Static { + /// The AWS access key ID. access_key_id: String, + /// The AWS secret access key. secret_access_key: String, }, + /// Authenticate using credentials stored in a file. + /// + /// Optionally, specifies a credentials profile to use. File { + /// Path to the credentials file. credentials_file: String, + /// The credentials profile to use. profile: Option, }, + /// Assumes the given role ARN. Role { + /// The ARN of the role to assume. assume_role: String, + /// Timeout for assuming the role, in seconds. load_timeout_secs: Option, }, - // Default variant is used instead of Option since even for - // None we need to build `AwsCredentialsProvider`. + /// Default authentication strategy which tries a variety of substrategies in a chained fallback fashion. #[derivative(Default)] - Default { load_timeout_secs: Option }, + Default { + /// Timeout for successfully loading credentials, in seconds. + load_timeout_secs: Option, + }, } impl AwsAuthentication { diff --git a/src/aws/region.rs b/src/aws/region.rs index d162e508b4f0b..9828c2a88708c 100644 --- a/src/aws/region.rs +++ b/src/aws/region.rs @@ -4,11 +4,15 @@ use aws_smithy_http::endpoint::Endpoint; use aws_types::region::Region; use http::Uri; use serde::{Deserialize, Serialize}; +use vector_config::Configurable; -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)] +/// The region/endpoint configuration for interacting with an AWS service. +#[derive(Clone, Configurable, Debug, Default, Deserialize, PartialEq, Serialize)] #[serde(default)] pub struct RegionOrEndpoint { + /// The AWS region to use. pub region: Option, + /// The API endpoint of the service. pub endpoint: Option, } diff --git a/src/http.rs b/src/http.rs index 9112f16947b3f..506bf41fcd862 100644 --- a/src/http.rs +++ b/src/http.rs @@ -13,10 +13,10 @@ use hyper::{ }; use hyper_openssl::HttpsConnector; use hyper_proxy::ProxyConnector; -use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; use tower::Service; use tracing::Instrument; +use vector_config::configurable_component; use crate::{ config::ProxyConfig, @@ -209,11 +209,32 @@ impl fmt::Debug for HttpClient { } } -#[derive(Deserialize, Serialize, Clone, Debug, PartialEq, Eq)] -#[serde(deny_unknown_fields, rename_all = "snake_case", tag = "strategy")] +/// Authentication strategy for requests. +/// +/// HTTP authentication should almost always be used with HTTPS only, as the authentication credentials are passed as an +/// HTTP header without any additional encryption beyond what is provided by the transport itself. +#[configurable_component] +#[derive(Clone, Debug, Eq, PartialEq)] +#[serde(rename_all = "snake_case", tag = "strategy")] pub enum Auth { - Basic { user: String, password: String }, - Bearer { token: String }, + /// Basic authentication. + /// + /// The username and password are concatenated and encoded via base64. + Basic { + /// The username to send. + user: String, + + /// The password to send. + password: String, + }, + + /// Bearer authentication. + /// + /// A bearer token (OAuth2, JWT, etc) is passed as-is. + Bearer { + /// The bearer token to send. + token: String, + }, } pub trait MaybeAuth: Sized { diff --git a/src/line_agg.rs b/src/line_agg.rs index 8876f072c6bd3..4b8226d35e35c 100644 --- a/src/line_agg.rs +++ b/src/line_agg.rs @@ -16,51 +16,54 @@ use pin_project::pin_project; use regex::bytes::Regex; use serde::{Deserialize, Serialize}; use tokio_util::time::delay_queue::{DelayQueue, Key}; +use vector_config::Configurable; /// The mode of operation of the line aggregator. -#[derive(Debug, Hash, Clone, Copy, PartialEq, Deserialize, Serialize)] +#[derive(Clone, Configurable, Copy, Debug, Hash, Deserialize, PartialEq, Serialize)] #[serde(rename_all = "snake_case")] pub enum Mode { /// All consecutive lines matching this pattern are included in the group. - /// The first line (the line that matched the start pattern) does not need - /// to match the `ContinueThrough` pattern. - /// This is useful in cases such as a Java stack trace, where some indicator - /// in the line (such as leading whitespace) indicates that it is an - /// extension of the proceeding line. + /// + /// The first line (the line that matched the start pattern) does not need to match the `ContinueThrough` pattern. + /// + /// This is useful in cases such as a Java stack trace, where some indicator in the line (such as leading + /// whitespace) indicates that it is an extension of the proceeding line. ContinueThrough, - /// All consecutive lines matching this pattern, plus one additional line, - /// are included in the group. - /// This is useful in cases where a log message ends with a continuation - /// marker, such as a backslash, indicating that the following line is part - /// of the same message. + /// All consecutive lines matching this pattern, plus one additional line, are included in the group. + /// + /// This is useful in cases where a log message ends with a continuation marker, such as a backslash, indicating + /// that the following line is part of the same message. ContinuePast, - /// All consecutive lines not matching this pattern are included in the - /// group. - /// This is useful where a log line contains a marker indicating that it - /// begins a new message. + /// All consecutive lines not matching this pattern are included in the group. + /// + /// This is useful where a log line contains a marker indicating that it begins a new message. HaltBefore, - /// All consecutive lines, up to and including the first line matching this - /// pattern, are included in the group. - /// This is useful where a log line ends with a termination marker, such as - /// a semicolon. + /// All consecutive lines, up to and including the first line matching this pattern, are included in the group. + /// + /// This is useful where a log line ends with a termination marker, such as a semicolon. HaltWith, } /// Configuration parameters of the line aggregator. #[derive(Debug, Clone)] pub struct Config { - /// Start pattern to look for as a beginning of the message. + /// The regular expression pattern for detecting the beginning of the message. pub start_pattern: Regex, - /// Condition pattern to look for. Exact behavior is configured via `mode`. + /// The regular expression pattern used for evaluating whether the current line should be aggregated or if + /// aggregation should stop. + /// + /// Configured in tandem with `mode` to define the overall aggregation behavior. pub condition_pattern: Regex, - /// Mode of operation, specifies how the condition pattern is interpreted. + /// The mode of aggregation. + /// + /// Configured in tandem with `condition_pattern` to define the overall aggregation behavior. pub mode: Mode, - /// The maximum time to wait for the continuation. Once this timeout is - /// reached, the buffered message is guaranteed to be flushed, even if - /// incomplete. + /// The maximum time to wait for subsequent lines to be received and evaluated for aggregation. + /// + /// Once this timeout is reached, the buffered message is guaranteed to be flushed, even if incomplete. pub timeout: Duration, } diff --git a/src/sources/apache_metrics/mod.rs b/src/sources/apache_metrics/mod.rs index e17df1816afb4..fcf61a028d967 100644 --- a/src/sources/apache_metrics/mod.rs +++ b/src/sources/apache_metrics/mod.rs @@ -31,14 +31,17 @@ mod parser; pub use parser::ParseError; +/// Configuration for the `apache_metrics` source. #[configurable_component(source)] #[derive(Clone, Debug)] pub struct ApacheMetricsConfig { /// The list of `mod_status` endpoints to scrape metrics from. endpoints: Vec, + /// The interval between scrapes, in seconds. #[serde(default = "default_scrape_interval_secs")] scrape_interval_secs: u64, + /// The namespace of the metric. /// /// Disabled if empty. diff --git a/src/sources/aws_ecs_metrics/mod.rs b/src/sources/aws_ecs_metrics/mod.rs index 0f0a561c2c6e0..85e4eacb9cac4 100644 --- a/src/sources/aws_ecs_metrics/mod.rs +++ b/src/sources/aws_ecs_metrics/mod.rs @@ -19,7 +19,6 @@ use crate::{ mod parser; - /// Version of the AWS ECS task metadata endpoint to use. /// /// More information about the different versions can be found @@ -45,10 +44,9 @@ pub enum Version { V4, } - +/// Configuration for the `aws_ecs_metrics` source. #[configurable_component(source)] #[derive(Clone, Debug)] -#[serde(deny_unknown_fields)] pub struct AwsEcsMetricsSourceConfig { /// Base URI of the task metadata endpoint. /// @@ -56,14 +54,15 @@ pub struct AwsEcsMetricsSourceConfig { /// /// By default: /// - The version 2 endpoint base URI is `169.254.170.2/v2/`. - /// - The version 3 endpoint base URI is stored in the environment variable `ECS_CONTAINER_METADATA_URI`. - /// - The version 4 endpoint base URI is stored in the environment variable `ECS_CONTAINER_METADATA_URI_V4`. + /// - The version 3 endpoint base URI is stored in the environment variable `ECS_CONTAINER_METADATA_URI`. + /// - The version 4 endpoint base URI is stored in the environment variable `ECS_CONTAINER_METADATA_URI_V4`. #[serde(default = "default_endpoint")] endpoint: String, + /// The version of the task metadata endpoint to use. /// /// If empty, the version is automatically discovered based on environment variables. - /// + /// /// By default: /// - Version 4 is used if the environment variable `ECS_CONTAINER_METADATA_URI_V4` is defined. /// - Version 3 is used if the environment variable `ECS_CONTAINER_METADATA_URI_V4` is not defined, but the @@ -72,11 +71,13 @@ pub struct AwsEcsMetricsSourceConfig { /// `ECS_CONTAINER_METADATA_URI` are defined. #[serde(default = "default_version")] version: Version, - /// The interval between scrapes, in seconds. + + /// The interval between scrapes, in seconds. #[serde(default = "default_scrape_interval_secs")] scrape_interval_secs: u64, + /// The namespace of the metric. - /// + /// /// Disabled if empty. #[serde(default = "default_namespace")] namespace: String, diff --git a/src/sources/aws_kinesis_firehose/mod.rs b/src/sources/aws_kinesis_firehose/mod.rs index 585ff4ffff7a4..ee6b543831227 100644 --- a/src/sources/aws_kinesis_firehose/mod.rs +++ b/src/sources/aws_kinesis_firehose/mod.rs @@ -2,8 +2,8 @@ use std::{fmt, net::SocketAddr}; use codecs::decoding::{DeserializerConfig, FramingConfig}; use futures::FutureExt; -use serde::{Deserialize, Serialize}; use tracing::Span; +use vector_config::configurable_component; use warp::Filter; use crate::{ @@ -21,27 +21,65 @@ mod filters; mod handlers; mod models; -#[derive(Deserialize, Serialize, Debug, Clone)] +/// Configuration for the `aws_kinesis_firehose` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] pub struct AwsKinesisFirehoseConfig { + /// The address to listen for connections on. address: SocketAddr, + + /// An optional access key to authenticate requests against. + /// + /// AWS Kinesis Firehose can be configured to pass along a user-configurable access key with each request. If + /// configured, `access_key` should be set to the same value. Otherwise, all requests will be allowed. access_key: Option, - tls: Option, + + /// The compression scheme to use for decompressing records within the Firehose message. + /// + /// Some services, like AWS CloudWatch Logs, will [compress the events with + /// gzip](\(urls.aws_cloudwatch_logs_firehose)), before sending them AWS Kinesis Firehose. This option can be used + /// to automatically decompress them before forwarding them to the next component. + /// + /// Note that this is different from [Content encoding option](\(urls.aws_kinesis_firehose_http_protocol)) of the + /// Firehose HTTP endpoint destination. That option controls the content encoding of the entire HTTP request. record_compression: Option, + + #[configurable(derived)] + tls: Option, + + #[configurable(derived)] #[serde(default = "default_framing_message_based")] framing: FramingConfig, + + #[configurable(derived)] #[serde(default = "default_decoding")] decoding: DeserializerConfig, + + #[configurable(derived)] #[serde(default, deserialize_with = "bool_or_struct")] acknowledgements: AcknowledgementsConfig, } -#[derive(Derivative, Copy, Clone, Debug, Deserialize, Serialize, PartialEq)] +/// Compression scheme for records in a Firehose message. +#[configurable_component] +#[derive(Clone, Copy, Debug, Derivative, PartialEq)] #[serde(rename_all = "lowercase")] #[derivative(Default)] pub enum Compression { + /// Automatically attempt to determine the compression scheme. + /// + /// Vector will try to determine the compression scheme of the object by looking at its file signature, also known + /// as [magic bytes](\(urls.magic_bytes)). + /// + /// Given that determining the encoding using magic bytes is not a perfect check, if the record fails to decompress + /// with the discovered format, the record will be forwarded as-is. Thus, if you know the records will always be + /// gzip encoded (for example if they are coming from AWS CloudWatch Logs) then you should prefer to set `gzip` here + /// to have Vector reject any records that are not-gziped. #[derivative(Default)] Auto, + /// Uncompressed. None, + /// GZIP. Gzip, } diff --git a/src/sources/aws_s3/mod.rs b/src/sources/aws_s3/mod.rs index bb882f1011518..491a958731499 100644 --- a/src/sources/aws_s3/mod.rs +++ b/src/sources/aws_s3/mod.rs @@ -5,9 +5,9 @@ use async_compression::tokio::bufread; use aws_sdk_s3::types::ByteStream; use futures::stream; use futures::{stream::StreamExt, TryStreamExt}; -use serde::{Deserialize, Serialize}; use snafu::Snafu; use tokio_util::io::StreamReader; +use vector_config::configurable_component; use super::util::MultilineConfig; use crate::aws::create_client; @@ -27,47 +27,81 @@ use crate::{ pub mod sqs; -#[derive(Derivative, Copy, Clone, Debug, Deserialize, Serialize, PartialEq)] +/// Compression scheme for objects retrieved from S3. +#[configurable_component] +#[derive(Clone, Copy, Debug, Derivative, PartialEq)] #[serde(rename_all = "lowercase")] #[derivative(Default)] pub enum Compression { + /// Automatically attempt to determine the compression scheme. + /// + /// Vector will try to determine the compression scheme of the object from its: `Content-Encoding` and + /// `Content-Type` metadata, as well as the key suffix (e.g. `.gz`). + /// + /// It will fallback to 'none' if the compression scheme cannot be determined. #[derivative(Default)] Auto, + /// Uncompressed. None, + /// GZIP. Gzip, + /// ZSTD. Zstd, } -#[derive(Derivative, Copy, Clone, Debug, Deserialize, Serialize)] +/// Strategies for consuming objects from S3. +#[configurable_component] +#[derive(Clone, Copy, Debug, Derivative)] #[serde(rename_all = "lowercase")] #[derivative(Default)] enum Strategy { + /// Consumes objects by processing bucket notification events sent to an [AWS SQS queue](\(urls.aws_sqs)). #[derivative(Default)] Sqs, } -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -#[serde(default, deny_unknown_fields)] -struct AwsS3Config { +/// Configuration for the `aws_s3` source. +// TODO: The `Default` impl here makes the configuration schema output look pretty weird, especially because all the +// usage of optionals means we're spewing out a ton of `"foo": null` stuff in the default value, and that's not helpful +// when there's required fields. +// +// Maybe showing defaults at all, when there are required properties, doesn't actually make sense? :thinkies: +#[configurable_component(source)] +#[derive(Clone, Debug, Default)] +#[serde(default)] +pub struct AwsS3Config { #[serde(flatten)] region: RegionOrEndpoint, + /// The compression scheme used for decompressing objects retrieved from S3. compression: Compression, + /// The strategy to use to consume objects from S3. strategy: Strategy, + /// Configuration options for SQS. + /// + /// Only relevant when `strategy = "sqs"`. sqs: Option, - // Deprecated name. Moved to auth. + /// The ARN of an [IAM role](\(urls.aws_iam_role)) to assume at startup. + #[deprecated] assume_role: Option, + + #[configurable(derived)] #[serde(default)] auth: AwsAuthentication, + /// Multiline parsing configuration. + /// + /// If not specified, multiline parsing is disabled. multiline: Option, + #[configurable(derived)] #[serde(default, deserialize_with = "bool_or_struct")] acknowledgements: AcknowledgementsConfig, + #[configurable(derived)] tls_options: Option, } diff --git a/src/sources/aws_s3/sqs.rs b/src/sources/aws_s3/sqs.rs index 43570015b81cb..27beaa0cfe7fe 100644 --- a/src/sources/aws_s3/sqs.rs +++ b/src/sources/aws_s3/sqs.rs @@ -18,6 +18,7 @@ use snafu::{ResultExt, Snafu}; use tokio::{pin, select}; use tokio_util::codec::FramedRead; use tracing::Instrument; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::tls::TlsConfig; @@ -39,31 +40,57 @@ use lookup::path; static SUPPORTED_S3S_EVENT_VERSION: Lazy = Lazy::new(|| semver::VersionReq::parse("~2").unwrap()); -#[derive(Derivative, Clone, Debug, Deserialize, Serialize)] +/// SQS configuration options. +// +// TODO: It seems awfully likely that we could re-use the existing configuration type for the `aws_sqs` source in some +// way, given the near 100% overlap in configurable values. +#[configurable_component] +#[derive(Clone, Debug, Derivative)] #[derivative(Default)] -#[serde(deny_unknown_fields)] pub(super) struct Config { + /// The URL of the SQS queue to poll for bucket notifications. pub(super) queue_url: String, - // restricted to u32 for safe conversion to i64 later + /// How long to wait while polling the queue for new messages, in seconds. + /// + /// Generally should not be changed unless instructed to do so, as if messages are available, they will always be + /// consumed, regardless of the value of `poll_secs`. + // NOTE: We restrict this to u32 for safe conversion to i64 later. #[serde(default = "default_poll_secs")] #[derivative(Default(value = "default_poll_secs()"))] pub(super) poll_secs: u32, - // restricted to u32 for safe conversion to i64 later + /// The visibility timeout to use for messages, in secords. + /// + /// This controls how long a message is left unavailable after Vector receives it. If Vector receives a message, and + /// takes longer than `visibility_timeout_secs` to process and delete the message from the queue, it will be made reavailable for another consumer. + /// + /// This can happen if, for example, if Vector crashes between consuming a message and deleting it. + // NOTE: We restrict this to u32 for safe conversion to i64 later. #[serde(default = "default_visibility_timeout_secs")] #[derivative(Default(value = "default_visibility_timeout_secs()"))] pub(super) visibility_timeout_secs: u32, + /// Whether to delete the message once Vector processes it. + /// + /// It can be useful to set this to `false` to debug or during initial Vector setup. #[serde(default = "default_true")] #[derivative(Default(value = "default_true()"))] pub(super) delete_message: bool, - // number of tasks spawned for running the SQS/S3 receive loop + /// Number of concurrent tasks to create for polling the queue for messages. + /// + /// Defaults to the number of available CPUs on the system. + /// + /// Should not typically need to be changed, but it can sometimes be beneficial to raise this value when there is a + /// high rate of messages being pushed into the queue and the objects being fetched are small. In these cases, + /// Vector may not fully utilize system resources without fetching more messages per second, as the SQS message + /// consumption rate affects the S3 object retrieval rate. #[serde(default = "default_client_concurrency")] #[derivative(Default(value = "default_client_concurrency()"))] pub(super) client_concurrency: u32, + #[configurable(derived)] #[serde(default)] #[derivative(Default)] pub(super) tls_options: Option, @@ -693,7 +720,7 @@ pub struct S3Bucket { pub name: String, } -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] pub struct S3Object { // S3ObjectKeys are URL encoded diff --git a/src/sources/aws_sqs/config.rs b/src/sources/aws_sqs/config.rs index ef6b7cc95b4bd..9df1321b5826b 100644 --- a/src/sources/aws_sqs/config.rs +++ b/src/sources/aws_sqs/config.rs @@ -1,7 +1,7 @@ use std::cmp; use codecs::decoding::{DeserializerConfig, FramingConfig}; -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; use crate::aws::create_client; use crate::codecs::DecodingConfig; @@ -14,43 +14,76 @@ use crate::{ sources::aws_sqs::source::SqsSource, }; -#[derive(Deserialize, Serialize, Derivative, Debug, Clone)] +/// Configuration for the `aws_sqs` source. +#[configurable_component(source)] +#[derive(Clone, Debug, Derivative)] #[derivative(Default)] -#[serde(deny_unknown_fields)] pub struct AwsSqsConfig { #[serde(flatten)] pub region: RegionOrEndpoint, + + #[configurable(derived)] #[serde(default)] pub auth: AwsAuthentication, + /// The URL of the SQS queue to poll for messages. pub queue_url: String, + /// How long to wait while polling the queue for new messages, in seconds. + /// + /// Generally should not be changed unless instructed to do so, as if messages are available, they will always be + /// consumed, regardless of the value of `poll_secs`. + // NOTE: We restrict this to u32 for safe conversion to i64 later. #[serde(default = "default_poll_secs")] #[derivative(Default(value = "default_poll_secs()"))] pub poll_secs: u32, + /// The visibility timeout to use for messages, in secords. + /// + /// This controls how long a message is left unavailable after Vector receives it. If Vector receives a message, and + /// takes longer than `visibility_timeout_secs` to process and delete the message from the queue, it will be made reavailable for another consumer. + /// + /// This can happen if, for example, if Vector crashes between consuming a message and deleting it. + // NOTE: We restrict this to u32 for safe conversion to i64 later. // restricted to u32 for safe conversion to i64 later #[serde(default = "default_visibility_timeout_secs")] #[derivative(Default(value = "default_visibility_timeout_secs()"))] pub(super) visibility_timeout_secs: u32, + /// Whether to delete the message once Vector processes it. + /// + /// It can be useful to set this to `false` to debug or during initial Vector setup. #[serde(default = "default_true")] #[derivative(Default(value = "default_true()"))] pub(super) delete_message: bool, - // number of concurrent tasks spawned for receiving/processing SQS messages + /// Number of concurrent tasks to create for polling the queue for messages. + /// + /// Defaults to the number of available CPUs on the system. + /// + /// Should not typically need to be changed, but it can sometimes be beneficial to raise this value when there is a + /// high rate of messages being pushed into the queue and the messages being fetched are small. In these cases, + /// Vector may not fully utilize system resources without fetching more messages per second, as it spends more time + /// fetching the messages than processing them. #[serde(default = "default_client_concurrency")] #[derivative(Default(value = "default_client_concurrency()"))] pub client_concurrency: u32, + #[configurable(derived)] #[serde(default = "default_framing_message_based")] #[derivative(Default(value = "default_framing_message_based()"))] pub framing: FramingConfig, + + #[configurable(derived)] #[serde(default = "default_decoding")] #[derivative(Default(value = "default_decoding()"))] pub decoding: DeserializerConfig, + + #[configurable(derived)] #[serde(default, deserialize_with = "bool_or_struct")] pub acknowledgements: AcknowledgementsConfig, + + #[configurable(derived)] pub tls: Option, } diff --git a/src/sources/aws_sqs/mod.rs b/src/sources/aws_sqs/mod.rs index d0935b68c5fd5..9674a3d5a1e21 100644 --- a/src/sources/aws_sqs/mod.rs +++ b/src/sources/aws_sqs/mod.rs @@ -2,7 +2,7 @@ mod config; mod integration_tests; mod source; -use config::AwsSqsConfig; +pub use config::AwsSqsConfig; use crate::config::SourceDescription; diff --git a/src/sources/datadog/agent/mod.rs b/src/sources/datadog/agent/mod.rs index bb61b20aedb9c..c8d727c93d4ec 100644 --- a/src/sources/datadog/agent/mod.rs +++ b/src/sources/datadog/agent/mod.rs @@ -20,6 +20,7 @@ use serde::{Deserialize, Serialize}; use snafu::Snafu; use tracing::Span; use value::Kind; +use vector_config::configurable_component; use vector_core::event::{BatchNotifier, BatchStatus}; use warp::{filters::BoxedFilter, reject::Rejection, reply::Response, Filter, Reply}; @@ -42,26 +43,53 @@ pub const LOGS: &str = "logs"; pub const METRICS: &str = "metrics"; pub const TRACES: &str = "traces"; -#[derive(Deserialize, Serialize, Debug, Clone)] -struct DatadogAgentConfig { +/// Configuration for the `datadog_agent` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] +pub struct DatadogAgentConfig { + /// The address to accept connections on. + /// + /// The address _must_ include a port. address: SocketAddr, - tls: Option, + + /// When incoming events contain a Datadog API key, if this setting is set to `true` the key will kept in the event + /// metadata and will be used if the event is sent to a Datadog sink. #[serde(default = "crate::serde::default_true")] store_api_key: bool, - #[serde(default = "default_framing_message_based")] - framing: FramingConfig, - #[serde(default = "default_decoding")] - decoding: DeserializerConfig, - #[serde(default, deserialize_with = "bool_or_struct")] - acknowledgements: AcknowledgementsConfig, + + /// If this settings is set to `true`, logs won't be accepted by the component. #[serde(default = "crate::serde::default_false")] disable_logs: bool, + + /// If this settings is set to `true`, metrics won't be accepted by the component. #[serde(default = "crate::serde::default_false")] disable_metrics: bool, + + /// If this settings is set to `true`, traces won't be accepted by the component. #[serde(default = "crate::serde::default_false")] disable_traces: bool, + + /// If this setting is set to `true` logs, metrics and traces will be sent to different outputs. + /// + /// For a source component named `agent` the received logs, metrics, and traces can then be accessed by specifying + /// `agent.logs`, `agent.metrics`, and `agent.traces`, respectively, as the input to another component. #[serde(default = "crate::serde::default_false")] multiple_outputs: bool, + + #[configurable(derived)] + tls: Option, + + #[configurable(derived)] + #[serde(default = "default_framing_message_based")] + framing: FramingConfig, + + #[configurable(derived)] + #[serde(default = "default_decoding")] + decoding: DeserializerConfig, + + #[configurable(derived)] + #[serde(default, deserialize_with = "bool_or_struct")] + acknowledgements: AcknowledgementsConfig, } impl GenerateConfig for DatadogAgentConfig { @@ -408,7 +436,7 @@ fn handle_decode_error(encoding: &str, error: impl std::error::Error) -> ErrorMe } // https://github.com/DataDog/datadog-agent/blob/a33248c2bc125920a9577af1e16f12298875a4ad/pkg/logs/processor/json.go#L23-L49 -#[derive(Deserialize, Clone, Serialize, Debug)] +#[derive(Clone, Debug, Deserialize, Serialize)] #[serde(deny_unknown_fields)] struct LogMsg { pub message: Bytes, diff --git a/src/sources/demo_logs.rs b/src/sources/demo_logs.rs index ea7e812e5c20a..dd26127a92510 100644 --- a/src/sources/demo_logs.rs +++ b/src/sources/demo_logs.rs @@ -9,10 +9,10 @@ use codecs::{ use fakedata::logs::*; use futures::StreamExt; use rand::seq::SliceRandom; -use serde::{Deserialize, Serialize}; use snafu::Snafu; use tokio::time::{self, Duration}; use tokio_util::codec::FramedRead; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -24,19 +24,34 @@ use crate::{ SourceSender, }; -#[derive(Clone, Debug, Derivative, Deserialize, Serialize)] +/// Configuration for the `demo_logs` source. +#[configurable_component(source)] +#[derive(Clone, Debug, Derivative)] #[derivative(Default)] #[serde(default)] pub struct DemoLogsConfig { + /// The amount of time, in seconds, to pause between each batch of output lines. + /// + /// The default is one batch per second. In order to remove the delay and output batches as quickly as possible, set + /// `interval` to `0.0`. #[serde(alias = "batch_interval")] #[derivative(Default(value = "default_interval()"))] pub interval: f64, + + /// The total number of lines to output. + /// + /// By default, the source continuously prints logs (infinitely). #[derivative(Default(value = "default_count()"))] pub count: usize, + #[serde(flatten)] pub format: OutputFormat, + + #[configurable(derived)] #[derivative(Default(value = "default_framing_message_based()"))] pub framing: FramingConfig, + + #[configurable(derived)] #[derivative(Default(value = "default_decoding()"))] pub decoding: DeserializerConfig, } @@ -55,21 +70,36 @@ pub enum DemoLogsConfigError { ShuffleDemoLogsItemsEmpty, } -#[derive(Clone, Debug, Derivative, Deserialize, Serialize)] +/// Output format configuration. +#[configurable_component] +#[derive(Clone, Debug, Derivative)] #[derivative(Default)] #[serde(tag = "format", rename_all = "snake_case")] pub enum OutputFormat { + /// Lines are chosen at random from the list specified using `lines`. Shuffle { + /// If `true`, each output line starts with an increasing sequence number, beginning with 0. #[serde(default)] sequence: bool, + /// The list of lines to output. lines: Vec, }, + + /// Randomly generated logs in [Apache common](\(urls.apache_common)) format. ApacheCommon, + + /// Randomly generated logs in [Apache error](\(urls.apache_error)) format. ApacheError, + + /// Randomly generated logs in Syslog format ([RFC 5424](\(urls.syslog_5424))). #[serde(alias = "rfc5424")] Syslog, + + /// Randomly generated logs in Syslog format ([RFC 3164](\(urls.syslog_3164))). #[serde(alias = "rfc3164")] BsdSyslog, + + /// Randomly generated HTTP server logs in [JSON](\(urls.json)) format. #[derivative(Default)] Json, } @@ -236,9 +266,13 @@ impl SourceConfig for DemoLogsConfig { } } +/// Configuration for the `generator` source. // Add a compatibility alias to avoid breaking existing configs -#[derive(Deserialize, Serialize, Debug, Clone)] -struct DemoLogsCompatConfig(DemoLogsConfig); +// +// TODO: Is this old enough now that we could actually remove it? +#[configurable_component(source)] +#[derive(Clone, Debug)] +struct DemoLogsCompatConfig(#[configurable(transparent)] DemoLogsConfig); #[async_trait::async_trait] #[typetag::serde(name = "generator")] diff --git a/src/sources/dnstap/mod.rs b/src/sources/dnstap/mod.rs index abfcd6a3074a7..d99d754c11c0f 100644 --- a/src/sources/dnstap/mod.rs +++ b/src/sources/dnstap/mod.rs @@ -1,7 +1,7 @@ use std::path::PathBuf; use bytes::Bytes; -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use super::util::framestream::{build_framestream_unix_source, FrameHandler}; @@ -19,17 +19,54 @@ pub mod schema; use dnsmsg_parser::{dns_message, dns_message_parser}; pub use schema::DnstapEventSchema; -#[derive(Deserialize, Serialize, Debug)] +/// Configuration for the `dnstap` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] pub struct DnstapConfig { + /// Maximum length, in bytes, that a frame can be. #[serde(default = "default_max_frame_length")] pub max_frame_length: usize, + + /// Overrides the name of the log field used to add the source path to each event. + /// + /// The value will be the socket path itself. + /// + /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is + /// used. pub host_key: Option, + + /// Absolute path to the socket file to read DNSTAP data from. + /// + /// The DNS server must be configured to send its DNSTAP data to this socket file. The socket file will be created, + /// if it doesn't already exist, when the source first starts. pub socket_path: PathBuf, + + /// Whether or not to skip parsing/decoding of DNSTAP frames. + /// + /// If set to `true`, frames will not be parsed/decoded. The raw frame data will be set as a field on the event + /// (called `rawData`) and encoded as a base64 string. raw_data_only: Option, + + /// Whether or not to concurrently process DNSTAP frames. pub multithreaded: Option, + + /// Maximum number of frames that can be processed concurrently. pub max_frame_handling_tasks: Option, - pub(self) socket_file_mode: Option, + + /// Unix file mode bits to be applied to the unix socket file as its designated file permissions. + /// + /// Note that the file mode value can be specified in any numeric format supported by your configuration + /// language, but it is most intuitive to use an octal number. + pub socket_file_mode: Option, + + /// The size, in bytes, of the receive buffer used for the socket. + /// + /// This should not typically needed to be changed. pub socket_receive_buffer_size: Option, + + /// The size, in bytes, of the send buffer used for the socket. + /// + /// This should not typically needed to be changed. pub socket_send_buffer_size: Option, } @@ -54,7 +91,7 @@ impl DnstapConfig { impl Default for DnstapConfig { fn default() -> Self { Self { - host_key: Some("host".to_string()), + host_key: None, max_frame_length: default_max_frame_length(), socket_path: PathBuf::from("/run/bind/dnstap.sock"), raw_data_only: None, diff --git a/src/sources/docker_logs.rs b/src/sources/docker_logs.rs index 8f483120cb8d0..88eb404257935 100644 --- a/src/sources/docker_logs.rs +++ b/src/sources/docker_logs.rs @@ -16,6 +16,7 @@ use lookup::lookup_v2::{parse_path, OwnedSegment}; use once_cell::sync::Lazy; use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use super::util::MultilineConfig; @@ -46,21 +47,81 @@ static STDERR: Lazy = Lazy::new(|| "stderr".into()); static STDOUT: Lazy = Lazy::new(|| "stdout".into()); static CONSOLE: Lazy = Lazy::new(|| "console".into()); -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(deny_unknown_fields, default)] -pub(super) struct DockerLogsConfig { +/// Configuration for the `docker_logs` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] +#[serde(default)] +pub struct DockerLogsConfig { + /// Overrides the name of the log field used to add the current hostname to each event. + /// + /// The value will be the current hostname for wherever Vector is running. + /// + /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used. #[serde(default = "host_key")] host_key: String, + + /// The Docker host to connect to. + /// + /// Use an HTTPS URL to enable TLS encryption. + /// + /// If absent, Vector will try to use `DOCKER_HOST` environment variable. If `DOCKER_HOST` is also absent, Vector will use default Docker local socket (`/var/run/docker.sock` on Unix platforms, `//./pipe/docker_engine` on Windows). docker_host: Option, - tls: Option, + + /// A list of container IDs or names of containers to exclude from log collection. + /// + /// Matching is prefix first, so specifying a value of `foo` would match any container named `foo` as well as any + /// container whose name started with `foo`. This applies equally whether matching container IDs or names. + /// + /// By default, the source will collect logs for all containers. If `exclude_containers` is configured, any + /// container that matches a configured exclusion will be excluded even if it is also included via + /// `include_containers`, so care should be taken when utilizing prefix matches as they cannot be overridden by a + /// corresponding entry in `include_containers` e.g. excluding `foo` by attempting to include `foo-specific-id`. + /// + /// This can be used in conjunction with `include_containers`. exclude_containers: Option>, // Starts with actually, not exclude + + /// A list of container IDs or names of containers to include in log collection. + /// + /// Matching is prefix first, so specifying a value of `foo` would match any container named `foo` as well as any + /// container whose name started with `foo`. This applies equally whether matching container IDs or names. + /// + /// By default, the source will collect logs for all containers. If `include_containers` is configured, only + /// containers that match a configured inclusion and are also not excluded will be matched. + /// + /// This can be used in conjunction with `include_containers`. include_containers: Option>, // Starts with actually, not include + + /// A list of container object labels to match against when filtering running containers. + /// + /// Labels should follow the syntax described in the [Docker object labels](https://docs.docker.com/config/labels-custom-metadata/) documentation. include_labels: Option>, + + /// A list of image names to match against. + /// + /// If not provided, all images will be included. include_images: Option>, + + /// Overrides the name of the log field used to mark an event as partial. + /// + /// If `auto_partial_merge` is disabled, partial events will be emitted with a log field, controlled by this + /// configuration value, is set, indicating that the event is not complete. + /// + /// By default, `"_partial"` is used. partial_event_marker_field: Option, + + /// Whether or not to automatically merge partial events. auto_partial_merge: bool, - multiline: Option, + + /// The amount of time, in seconds, to wait before retrying after an error. retry_backoff_secs: u64, + + /// Multiline parsing configuration. + /// + /// If not specified, multiline parsing is disabled. + multiline: Option, + + #[configurable(derived)] + tls: Option, } impl Default for DockerLogsConfig { diff --git a/src/sources/eventstoredb_metrics/mod.rs b/src/sources/eventstoredb_metrics/mod.rs index dd89d46a3cac2..7a875a28270fd 100644 --- a/src/sources/eventstoredb_metrics/mod.rs +++ b/src/sources/eventstoredb_metrics/mod.rs @@ -3,8 +3,8 @@ use std::time::Duration; use futures::{FutureExt, StreamExt}; use http::Uri; use hyper::{Body, Request}; -use serde::{Deserialize, Serialize}; use tokio_stream::wrappers::IntervalStream; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use self::types::Stats; @@ -20,12 +20,21 @@ use crate::{ pub mod types; -#[derive(Deserialize, Serialize, Clone, Debug, Default)] -struct EventStoreDbConfig { +/// Configuration for the `eventstoredb_metrics` source. +#[configurable_component(source)] +#[derive(Clone, Debug, Default)] +pub struct EventStoreDbConfig { + /// Endpoints to scrape stats from. #[serde(default = "default_endpoint")] endpoint: String, + + /// The interval between scrapes, in seconds. #[serde(default = "default_scrape_interval_secs")] scrape_interval_secs: u64, + + /// Overrides the default namespace for the metrics emitted by the source. + /// + /// By default, `eventstoredb` is used. default_namespace: Option, } diff --git a/src/sources/file.rs b/src/sources/file.rs index 8e5b04b361179..1582b5fca2a5e 100644 --- a/src/sources/file.rs +++ b/src/sources/file.rs @@ -8,10 +8,10 @@ use file_source::{ }; use futures::{FutureExt, Stream, StreamExt, TryFutureExt}; use regex::bytes::Regex; -use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; use tokio::{sync::oneshot, task::spawn_blocking}; use tracing::{Instrument, Span}; +use vector_config::configurable_component; use super::util::{finalizer::OrderedFinalizer, EncodingConfig, MultilineConfig}; use crate::{ @@ -59,42 +59,72 @@ enum BuildError { }, } -#[derive(Deserialize, Serialize, Debug, PartialEq)] -#[serde(deny_unknown_fields, default)] +/// Configuration for the `file` source. +#[configurable_component(source)] +#[derive(Clone, Debug, PartialEq)] +#[serde(default)] pub struct FileConfig { pub include: Vec, + pub exclude: Vec, + pub file_key: Option, + pub start_at_beginning: Option, + pub ignore_checkpoints: Option, + + #[configurable(derived)] pub read_from: Option, + // Deprecated name #[serde(alias = "ignore_older")] pub ignore_older_secs: Option, + #[serde(default = "default_max_line_bytes")] pub max_line_bytes: usize, + pub host_key: Option, + pub data_dir: Option, + #[serde(alias = "glob_minimum_cooldown")] pub glob_minimum_cooldown_ms: u64, + // Deprecated name #[serde(alias = "fingerprinting")] fingerprint: FingerprintConfig, + pub ignore_not_found: bool, + pub message_start_indicator: Option, + pub multi_line_timeout: u64, // millis + + /// Multiline parsing configuration. + /// + /// If not specified, multiline parsing is disabled. pub multiline: Option, + pub max_read_bytes: usize, + pub oldest_first: bool, + #[serde(alias = "remove_after")] pub remove_after_secs: Option, + pub line_delimiter: String, + pub encoding: Option, + + #[configurable(derived)] #[serde(default, deserialize_with = "bool_or_struct")] acknowledgements: AcknowledgementsConfig, } -#[derive(Deserialize, Serialize, Clone, Debug, PartialEq)] +/// Fingerprinting schemes. +#[configurable_component] +#[derive(Clone, Debug, PartialEq)] #[serde(tag = "strategy", rename_all = "snake_case")] pub enum FingerprintConfig { Checksum { @@ -109,7 +139,9 @@ pub enum FingerprintConfig { DevInode, } -#[derive(Serialize, Deserialize, Copy, Clone, Debug, PartialEq)] +/// File position to use when reading a new file. +#[configurable_component] +#[derive(Copy, Clone, Debug, PartialEq)] #[serde(rename_all = "snake_case")] pub enum ReadFromConfig { Beginning, diff --git a/src/sources/fluent/mod.rs b/src/sources/fluent/mod.rs index 301d645ab50ac..78ecb70c9644d 100644 --- a/src/sources/fluent/mod.rs +++ b/src/sources/fluent/mod.rs @@ -6,9 +6,10 @@ use codecs::StreamDecodingError; use flate2::read::MultiGzDecoder; use lookup::path; use rmp_serde::{decode, Deserializer}; -use serde::{Deserialize, Serialize}; +use serde::Deserialize; use smallvec::{smallvec, SmallVec}; use tokio_util::codec::Decoder; +use vector_config::configurable_component; use super::util::{SocketListenAddr, TcpSource, TcpSourceAck, TcpSourceAcker}; use crate::{ @@ -26,15 +27,30 @@ use crate::{ mod message; use self::message::{FluentEntry, FluentMessage, FluentRecord, FluentTag, FluentTimestamp}; -#[derive(Deserialize, Serialize, Debug)] +/// Configuration for the `fluent` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] pub struct FluentConfig { + /// The address to listen for connections on. address: SocketListenAddr, - tls: Option, + + /// The maximum number of TCP connections that will be allowed at any given time. + connection_limit: Option, + + #[configurable(derived)] keepalive: Option, + + /// The size, in bytes, of the receive buffer used for each connection. + /// + /// This should not typically needed to be changed. receive_buffer_bytes: Option, + + #[configurable(derived)] + tls: Option, + + #[configurable(derived)] #[serde(default, deserialize_with = "bool_or_struct")] acknowledgements: AcknowledgementsConfig, - connection_limit: Option, } inventory::submit! { diff --git a/src/sources/gcp_pubsub.rs b/src/sources/gcp_pubsub.rs index d0916d6067980..7eca230d59966 100644 --- a/src/sources/gcp_pubsub.rs +++ b/src/sources/gcp_pubsub.rs @@ -6,7 +6,6 @@ use derivative::Derivative; use futures::{stream, Stream, StreamExt, TryFutureExt}; use http::uri::{InvalidUri, Scheme, Uri}; use once_cell::sync::Lazy; -use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; use tokio::sync::Mutex; use tonic::{ @@ -14,6 +13,7 @@ use tonic::{ transport::{Certificate, Channel, ClientTlsConfig, Endpoint, Identity}, Code, Request, Status, }; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -93,19 +93,24 @@ pub(crate) enum PubsubError { static CLIENT_ID: Lazy = Lazy::new(|| uuid::Uuid::new_v4().to_string()); -#[derive(Deserialize, Serialize, Derivative, Debug, Clone)] +/// Configuration for the `gcp_pubsub` source. +#[configurable_component(source)] +#[derive(Clone, Debug, Derivative)] #[derivative(Default)] -#[serde(deny_unknown_fields)] pub struct PubsubConfig { pub project: String, + pub subscription: String, + pub endpoint: Option, #[serde(default)] pub skip_authentication: bool, + #[serde(flatten)] pub auth: GcpAuthConfig, + #[configurable(derived)] pub tls: Option, #[serde(default = "default_ack_deadline")] @@ -114,13 +119,17 @@ pub struct PubsubConfig { #[serde(default = "default_retry_delay")] pub retry_delay_seconds: f64, + #[configurable(derived)] #[serde(default = "default_framing_message_based")] #[derivative(Default(value = "default_framing_message_based()"))] pub framing: FramingConfig, + + #[configurable(derived)] #[serde(default = "default_decoding")] #[derivative(Default(value = "default_decoding()"))] pub decoding: DeserializerConfig, + #[configurable(derived)] #[serde(default, deserialize_with = "bool_or_struct")] pub acknowledgements: AcknowledgementsConfig, } diff --git a/src/sources/heroku_logs.rs b/src/sources/heroku_logs.rs index c866b38238948..8f79574a45f0c 100644 --- a/src/sources/heroku_logs.rs +++ b/src/sources/heroku_logs.rs @@ -11,9 +11,9 @@ use codecs::{ decoding::{DeserializerConfig, FramingConfig}, StreamDecodingError, }; -use serde::{Deserialize, Serialize}; use smallvec::SmallVec; use tokio_util::codec::Decoder as _; +use vector_config::configurable_component; use warp::http::{HeaderMap, StatusCode}; use crate::{ @@ -31,17 +31,30 @@ use crate::{ }; use lookup::path; -#[derive(Deserialize, Serialize, Debug, Clone)] -pub(crate) struct LogplexConfig { +/// Configuration for `heroku_logs` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] +pub struct LogplexConfig { + /// The address to listen for connections on. address: SocketAddr, + #[serde(default)] query_parameters: Vec, + + #[configurable(derived)] tls: Option, + auth: Option, + + #[configurable(derived)] #[serde(default = "default_framing_message_based")] framing: FramingConfig, + + #[configurable(derived)] #[serde(default = "default_decoding")] decoding: DeserializerConfig, + + #[configurable(derived)] #[serde(default, deserialize_with = "bool_or_struct")] acknowledgements: AcknowledgementsConfig, } diff --git a/src/sources/http.rs b/src/sources/http.rs index dd085603afdd8..e69eb8f4a541b 100644 --- a/src/sources/http.rs +++ b/src/sources/http.rs @@ -9,8 +9,8 @@ use codecs::{ }; use http::StatusCode; use lookup::path; -use serde::{Deserialize, Serialize}; use tokio_util::codec::Decoder as _; +use vector_config::configurable_component; use warp::http::{HeaderMap, HeaderValue}; use crate::{ @@ -27,40 +27,91 @@ use crate::{ tls::TlsEnableableConfig, }; -#[derive(Clone, Copy, Debug, Derivative, Deserialize, Serialize)] +/// HTTP method. +#[configurable_component] +#[derive(Clone, Copy, Debug, Derivative)] #[derivative(Default)] #[serde(rename_all = "UPPERCASE")] pub enum HttpMethod { + /// HTTP HEAD method. Head, + + /// HTTP GET method. Get, + + /// HTTP POST method. #[derivative(Default)] Post, + + /// HTTP Put method. Put, + + /// HTTP PATCH method. Patch, + + /// HTTP DELETE method. Delete, } -#[derive(Deserialize, Serialize, Debug, Clone)] -pub(super) struct SimpleHttpConfig { +/// Configuration for the `http` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] +pub struct SimpleHttpConfig { + /// The address to listen for connections on. address: SocketAddr, + + /// The expected encoding of received data. + /// + /// Note that for `json` and `ndjson` encodings, the fields of the JSON objects are output as separate fields. #[serde(default)] encoding: Option, + + /// A list of HTTP headers to include in the log event. + /// + /// These will override any values included in the JSON payload with conflicting names. #[serde(default)] headers: Vec, + + /// A list of URL query parameters to include in the log event. + /// + /// These will override any values included in the body with conflicting names. #[serde(default)] query_parameters: Vec, - tls: Option, + auth: Option, + + /// Whether or not to treat the configured `path` as an absolute path. + /// + /// If set to `true`, only requests using the exact URL path specified in `path` will be accepted. Otherwise, + /// requests sent to a URL path that starts with the value of `path` will be accepted. + /// + /// With `strict_path` set to `false` and `path` set to `""`, the configured HTTP source will accept requests from + /// any URL path. #[serde(default = "crate::serde::default_true")] strict_path: bool, + + /// The URL path on which log event POST requests shall be sent. #[serde(default = "default_path")] path: String, + + /// The event key in which the requested URL path used to send the request will be stored. #[serde(default = "default_path_key")] path_key: String, + + /// Specifies the action of the HTTP request. #[serde(default)] method: HttpMethod, + + #[configurable(derived)] + tls: Option, + + #[configurable(derived)] framing: Option, + + #[configurable(derived)] decoding: Option, + + #[configurable(derived)] #[serde(default, deserialize_with = "bool_or_struct")] acknowledgements: AcknowledgementsConfig, } diff --git a/src/sources/internal_logs.rs b/src/sources/internal_logs.rs index 4445da5759007..315eb2f78baba 100644 --- a/src/sources/internal_logs.rs +++ b/src/sources/internal_logs.rs @@ -1,7 +1,7 @@ use bytes::Bytes; use chrono::Utc; use futures::{stream, StreamExt}; -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -13,10 +13,22 @@ use crate::{ SourceSender, }; -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -#[serde(deny_unknown_fields)] +/// Configuration for the `internal_logs` source. +#[configurable_component(source)] +#[derive(Clone, Debug, Default)] pub struct InternalLogsConfig { + /// Overrides the name of the log field used to add the current hostname to each event. + /// + /// The value will be the current hostname for wherever Vector is running. + /// + /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used. pub host_key: Option, + + /// Overrides the name of the log field used to add the current process ID to each event. + /// + /// The value will be the current process ID for Vector itself. + /// + /// By default, `"pid"` is used. pub pid_key: Option, } diff --git a/src/sources/internal_metrics.rs b/src/sources/internal_metrics.rs index 0ad7a7a36085c..2e190a9acc937 100644 --- a/src/sources/internal_metrics.rs +++ b/src/sources/internal_metrics.rs @@ -1,7 +1,7 @@ use futures::StreamExt; -use serde::{Deserialize, Serialize}; use tokio::time; use tokio_stream::wrappers::IntervalStream; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -12,13 +12,22 @@ use crate::{ SourceSender, }; -#[derive(Deserialize, Serialize, Debug, Clone, Derivative)] +/// Configuration for the `internal_metrics` source. +#[configurable_component(source)] +#[derive(Clone, Debug, Derivative)] #[derivative(Default)] -#[serde(deny_unknown_fields, default)] +#[serde(default)] pub struct InternalMetricsConfig { + /// The interval between metric gathering, in seconds. #[derivative(Default(value = "2.0"))] pub scrape_interval_secs: f64, + + #[configurable(derived)] pub tags: TagsConfig, + + /// Overrides the default namespace for the metrics emitted by the source. + /// + /// By default, `vector` is used. pub namespace: Option, } @@ -29,12 +38,25 @@ impl InternalMetricsConfig { } } -#[derive(Deserialize, Serialize, Debug, Clone, Derivative)] +/// Tag configuration for the `internal_metrics` source. +#[configurable_component] +#[derive(Clone, Debug, Derivative)] #[derivative(Default)] -#[serde(deny_unknown_fields, default)] +#[serde(default)] pub struct TagsConfig { - host_key: Option, - pid_key: Option, + /// Sets the name of the tag to use to add the current hostname to each metric. + /// + /// The value will be the current hostname for wherever Vector is running. + /// + /// By default, this is not set and the tag will not be automatically added. + pub host_key: Option, + + /// Sets the name of the tag to use to add the current process ID to each metric. + /// + /// The value will be the current process ID for Vector itself. + /// + /// By default, this is not set and the tag will not be automatically added. + pub pid_key: Option, } inventory::submit! { diff --git a/src/sources/journald.rs b/src/sources/journald.rs index fa45565e11dac..d21ff398ea3e7 100644 --- a/src/sources/journald.rs +++ b/src/sources/journald.rs @@ -18,7 +18,6 @@ use nix::{ unistd::Pid, }; use once_cell::sync::Lazy; -use serde::{Deserialize, Serialize}; use serde_json::{Error as JsonError, Value as JsonValue}; use snafu::{ResultExt, Snafu}; use tokio::{ @@ -29,6 +28,7 @@ use tokio::{ time::sleep, }; use tokio_util::codec::FramedRead; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -80,24 +80,40 @@ enum BuildError { type Matches = HashMap>; -#[derive(Deserialize, Serialize, Debug, Default)] -#[serde(deny_unknown_fields, default)] +/// Configuration for the `journald` source. +#[configurable_component(source)] +#[derive(Clone, Debug, Default)] +#[serde(default)] pub struct JournaldConfig { pub since_now: Option, + pub current_boot_only: Option, + pub units: Vec, + pub include_units: Vec, + pub exclude_units: Vec, + pub include_matches: Matches, + pub exclude_matches: Matches, + pub data_dir: Option, + pub batch_size: Option, + pub journalctl_path: Option, + pub journal_directory: Option, + + #[configurable(derived)] #[serde(default, deserialize_with = "bool_or_struct")] acknowledgements: AcknowledgementsConfig, + /// Deprecated #[serde(default)] + #[deprecated] remap_priority: bool, } diff --git a/src/sources/kafka.rs b/src/sources/kafka.rs index b569568bd925f..34abc8f8f3093 100644 --- a/src/sources/kafka.rs +++ b/src/sources/kafka.rs @@ -17,9 +17,9 @@ use rdkafka::{ consumer::{Consumer, StreamConsumer}, message::{BorrowedMessage, Headers, Message}, }; -use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; use tokio_util::codec::FramedRead; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use super::util::finalizer::OrderedFinalizer; @@ -48,42 +48,63 @@ enum BuildError { KafkaSubscribeError { source: rdkafka::error::KafkaError }, } -#[derive(Clone, Debug, Derivative, Deserialize, Serialize)] +/// Configuration for the `kafka` source. +#[configurable_component(source)] +#[derive(Clone, Debug, Derivative)] #[derivative(Default)] -#[serde(deny_unknown_fields)] pub struct KafkaSourceConfig { bootstrap_servers: String, + topics: Vec, + group_id: String, + #[serde(default = "default_auto_offset_reset")] auto_offset_reset: String, + #[serde(default = "default_session_timeout_ms")] session_timeout_ms: u64, + #[serde(default = "default_socket_timeout_ms")] socket_timeout_ms: u64, + #[serde(default = "default_fetch_wait_max_ms")] fetch_wait_max_ms: u64, + #[serde(default = "default_commit_interval_ms")] commit_interval_ms: u64, + #[serde(default = "default_key_field")] key_field: String, + #[serde(default = "default_topic_key")] topic_key: String, + #[serde(default = "default_partition_key")] partition_key: String, + #[serde(default = "default_offset_key")] offset_key: String, + #[serde(default = "default_headers_key")] headers_key: String, + librdkafka_options: Option>, + #[serde(flatten)] auth: KafkaAuthConfig, + + #[configurable(derived)] #[serde(default = "default_framing_message_based")] #[derivative(Default(value = "default_framing_message_based()"))] framing: FramingConfig, + + #[configurable(derived)] #[serde(default = "default_decoding")] #[derivative(Default(value = "default_decoding()"))] decoding: DeserializerConfig, + + #[configurable(derived)] #[serde(default, deserialize_with = "bool_or_struct")] acknowledgements: AcknowledgementsConfig, } diff --git a/src/sources/logstash.rs b/src/sources/logstash.rs index cc91193d07f52..4195036119e1b 100644 --- a/src/sources/logstash.rs +++ b/src/sources/logstash.rs @@ -9,10 +9,10 @@ use bytes::{Buf, Bytes, BytesMut}; use codecs::StreamDecodingError; use flate2::read::ZlibDecoder; use lookup::path; -use serde::{Deserialize, Serialize}; use smallvec::{smallvec, SmallVec}; use snafu::{ResultExt, Snafu}; use tokio_util::codec::Decoder; +use vector_config::configurable_component; use super::util::{SocketListenAddr, TcpSource, TcpSourceAck, TcpSourceAcker}; use crate::{ @@ -27,15 +27,30 @@ use crate::{ types, }; -#[derive(Deserialize, Serialize, Debug)] +/// Configuration for the `logstash` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] pub struct LogstashConfig { + /// The address to listen for connections on. address: SocketListenAddr, + + #[configurable(derived)] keepalive: Option, + + #[configurable(derived)] tls: Option, + + /// The size, in bytes, of the receive buffer used for each connection. + /// + /// This should not typically needed to be changed. receive_buffer_bytes: Option, + + /// The maximum number of TCP connections that will be allowed at any given time. + connection_limit: Option, + + #[configurable(derived)] #[serde(default, deserialize_with = "bool_or_struct")] acknowledgements: AcknowledgementsConfig, - connection_limit: Option, } inventory::submit! { diff --git a/src/sources/mod.rs b/src/sources/mod.rs index f1ebdc434dc90..e92a79aea84f0 100644 --- a/src/sources/mod.rs +++ b/src/sources/mod.rs @@ -1,3 +1,4 @@ +use serde::{Deserialize, Serialize}; use snafu::Snafu; #[cfg(feature = "sources-apache_metrics")] @@ -73,7 +74,7 @@ pub mod vector; pub(crate) mod util; -use vector_config::configurable_component; +use vector_config::Configurable; pub use vector_core::source::Source; /// Common build errors @@ -83,81 +84,175 @@ enum BuildError { UriParseError { source: ::http::uri::InvalidUri }, } -#[configurable_component] -#[derive(Clone)] +/// Configurable sources in Vector. +#[derive(Clone, Configurable, Deserialize, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] pub enum Sources { /// Apache HTTP Server (HTTPD) Metrics. #[cfg(feature = "sources-apache_metrics")] ApacheMetrics(#[configurable(derived)] apache_metrics::ApacheMetricsConfig), + /// AWS ECS Metrics. #[cfg(feature = "sources-aws_ecs_metrics")] AwsEcsMetrics(#[configurable(derived)] aws_ecs_metrics::AwsEcsMetricsSourceConfig), - /*#[cfg(feature = "sources-aws_kinesis_firehose")] + + /// AWS Kinesis Firehose. + #[cfg(feature = "sources-aws_kinesis_firehose")] AwsKinesisFirehose(#[configurable(derived)] aws_kinesis_firehose::AwsKinesisFirehoseConfig), + + /// AWS S3. #[cfg(feature = "sources-aws_s3")] AwsS3(#[configurable(derived)] aws_s3::AwsS3Config), + + /// AWS SQS. #[cfg(feature = "sources-aws_sqs")] AwsSqs(#[configurable(derived)] aws_sqs::AwsSqsConfig), + + /// Datadog Agent. #[cfg(feature = "sources-datadog_agent")] DatadogAgent(#[configurable(derived)] datadog::agent::DatadogAgentConfig), + + /// Demo logs. #[cfg(feature = "sources-demo_logs")] DemoLogs(#[configurable(derived)] demo_logs::DemoLogsConfig), + + /// DNSTAP. #[cfg(all(unix, feature = "sources-dnstap"))] Dnstap(#[configurable(derived)] dnstap::DnstapConfig), + + /// Docker Logs. #[cfg(feature = "sources-docker_logs")] DockerLogs(#[configurable(derived)] docker_logs::DockerLogsConfig), + + /// EventStoreDB Metrics. #[cfg(feature = "sources-eventstoredb_metrics")] EventstoreDbMetrics(#[configurable(derived)] eventstoredb_metrics::EventStoreDbConfig), + + /// Exec. #[cfg(feature = "sources-exec")] Exec(#[configurable(derived)] exec::ExecConfig), + + /// File. #[cfg(feature = "sources-file")] File(#[configurable(derived)] file::FileConfig), + + /// Fluent. #[cfg(feature = "sources-fluent")] Fluent(#[configurable(derived)] fluent::FluentConfig), + + /// GCP Pub/Sub. #[cfg(feature = "sources-gcp_pubsub")] GcpPubsub(#[configurable(derived)] gcp_pubsub::PubsubConfig), + + /// Heroku Logs. #[cfg(feature = "sources-heroku_logs")] HerokuLogs(#[configurable(derived)] heroku_logs::LogplexConfig), + + /// Host Metrics. #[cfg(feature = "sources-host_metrics")] HostMetrics(#[configurable(derived)] host_metrics::HostMetricsConfig), + + /// HTTP. #[cfg(feature = "sources-http")] Http(#[configurable(derived)] http::SimpleHttpConfig), + + /// Internal Logs. #[cfg(feature = "sources-internal_logs")] InternalLogs(#[configurable(derived)] internal_logs::InternalLogsConfig), + + /// Internal Metrics. #[cfg(feature = "sources-internal_metrics")] InternalMetrics(#[configurable(derived)] internal_metrics::InternalMetricsConfig), + + /// Journald. #[cfg(all(unix, feature = "sources-journald"))] Journald(#[configurable(derived)] journald::JournaldConfig), + + /// Kafka. #[cfg(all(feature = "sources-kafka", feature = "rdkafka"))] Kafka(#[configurable(derived)] kafka::KafkaSourceConfig), + + /// Kubernetes Logs. #[cfg(feature = "sources-kubernetes_logs")] KubernetesLogs(#[configurable(derived)] kubernetes_logs::Config), + + /// Logstash. #[cfg(all(feature = "sources-logstash"))] Logstash(#[configurable(derived)] logstash::LogstashConfig), + + /// MongoDB Metrics. #[cfg(feature = "sources-mongodb_metrics")] MongodbMetrics(#[configurable(derived)] mongodb_metrics::MongoDbMetricsConfig), + + /// NATS. #[cfg(all(feature = "sources-nats"))] Nats(#[configurable(derived)] nats::NatsSourceConfig), + + /// NGINX Metrics. #[cfg(feature = "sources-nginx_metrics")] NginxMetrics(#[configurable(derived)] nginx_metrics::NginxMetricsConfig), + + /// PostgreSQL Metrics. #[cfg(feature = "sources-postgresql_metrics")] PostgresqlMetrics(#[configurable(derived)] postgresql_metrics::PostgresqlMetricsConfig), + + /// Prometheus Scrape. #[cfg(feature = "sources-prometheus")] - PrometheusScrape(#[configurable(derived)] prometheus::scrape::PrometheusScrapeConfig), + PrometheusScrape(#[configurable(derived)] prometheus::PrometheusScrapeConfig), + + /// Prometheus Remote Write. #[cfg(feature = "sources-prometheus")] - PrometheusRemoteWrite(#[configurable(derived)] prometheus::remote_write::PrometheusRemoteWriteConfig), + PrometheusRemoteWrite(#[configurable(derived)] prometheus::PrometheusRemoteWriteConfig), + + /// Redis. #[cfg(feature = "sources-redis")] Redis(#[configurable(derived)] redis::RedisSourceConfig), + + /// Socket. #[cfg(feature = "sources-socket")] Socket(#[configurable(derived)] socket::SocketConfig), + + /// Splunk HEC. #[cfg(feature = "sources-splunk_hec")] SplunkHec(#[configurable(derived)] splunk_hec::SplunkConfig), + + /// Statsd. #[cfg(feature = "sources-statsd")] Statsd(#[configurable(derived)] statsd::StatsdConfig), + + /// Stdin. #[cfg(feature = "sources-stdin")] Stdin(#[configurable(derived)] stdin::StdinConfig), + + /// Syslog. #[cfg(feature = "sources-syslog")] Syslog(#[configurable(derived)] syslog::SyslogConfig), + + /// Vector. #[cfg(feature = "sources-vector")] - Vector(#[configurable(derived)] vector::VectorConfig),*/ + Vector(#[configurable(derived)] vector::VectorConfig), +} + +#[cfg(test)] +mod tests { + use vector_config::{configurable_component, schema::generate_root_schema}; + + use crate::sources::Sources; + + /// Top-level Vector configuration. (mock) + #[configurable_component] + #[derive(Clone)] + struct MockRootConfig { + /// All configured sources. + sources: Vec, + } + + #[test] + fn vector_config() { + let root_schema = generate_root_schema::(); + let json = serde_json::to_string_pretty(&root_schema) + .expect("rendering root schema to JSON should not fail"); + + println!("{}", json); + } } diff --git a/src/sources/mongodb_metrics/mod.rs b/src/sources/mongodb_metrics/mod.rs index b2b7630a167d1..c63c7dae51d76 100644 --- a/src/sources/mongodb_metrics/mod.rs +++ b/src/sources/mongodb_metrics/mod.rs @@ -11,10 +11,10 @@ use mongodb::{ options::ClientOptions, Client, }; -use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; use tokio::time; use tokio_stream::wrappers::IntervalStream; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -72,12 +72,24 @@ enum CollectError { Bson(bson::de::Error), } -#[derive(Deserialize, Serialize, Clone, Debug, Default)] -#[serde(deny_unknown_fields)] -struct MongoDbMetricsConfig { +/// Configuration for the `mongodb_metrics` source. +#[configurable_component(source)] +#[derive(Clone, Debug, Default)] +pub struct MongoDbMetricsConfig { + /// A list of MongoDB instances to scrape. + /// + /// Each endpoint must be in the [Connection String URI Format](https://www.mongodb.com/docs/manual/reference/connection-string/). endpoints: Vec, + + /// The interval between scrapes, in seconds. #[serde(default = "default_scrape_interval_secs")] scrape_interval_secs: u64, + + /// Overrides the default namespace for the metrics emitted by the source. + /// + /// If set to an empty string, no namespace is added to the metrics. + /// + /// By default, `mongodb` is used. #[serde(default = "default_namespace")] namespace: String, } diff --git a/src/sources/nats.rs b/src/sources/nats.rs index bba0a81b295a9..6969f42a5a5d8 100644 --- a/src/sources/nats.rs +++ b/src/sources/nats.rs @@ -2,9 +2,9 @@ use bytes::Bytes; use chrono::Utc; use codecs::decoding::{DeserializerConfig, FramingConfig, StreamDecodingError}; use futures::{pin_mut, stream, Stream, StreamExt}; -use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; use tokio_util::codec::FramedRead; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -29,20 +29,30 @@ enum BuildError { Subscribe { source: std::io::Error }, } -#[derive(Clone, Debug, Derivative, Deserialize, Serialize)] +#[configurable_component(source)] +#[derive(Clone, Debug, Derivative)] #[derivative(Default)] -#[serde(deny_unknown_fields)] -struct NatsSourceConfig { +pub struct NatsSourceConfig { url: String, + #[serde(alias = "name")] connection_name: String, + subject: String, + queue: Option, + + #[configurable(derived)] tls: Option, + auth: Option, + + #[configurable(derived)] #[serde(default = "default_framing_message_based")] #[derivative(Default(value = "default_framing_message_based()"))] framing: FramingConfig, + + #[configurable(derived)] #[serde(default = "default_decoding")] #[derivative(Default(value = "default_decoding()"))] decoding: DeserializerConfig, diff --git a/src/sources/nginx_metrics/mod.rs b/src/sources/nginx_metrics/mod.rs index 80cbe5c709caa..ef5654e9d04bc 100644 --- a/src/sources/nginx_metrics/mod.rs +++ b/src/sources/nginx_metrics/mod.rs @@ -5,10 +5,10 @@ use chrono::Utc; use futures::{future::join_all, StreamExt, TryFutureExt}; use http::{Request, StatusCode}; use hyper::{body::to_bytes as body_to_bytes, Body, Uri}; -use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; use tokio::time; use tokio_stream::wrappers::IntervalStream; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -53,15 +53,32 @@ enum NginxError { InvalidResponseStatus { status: StatusCode }, } -#[derive(Deserialize, Serialize, Clone, Debug, Default)] -#[serde(deny_unknown_fields)] -struct NginxMetricsConfig { +/// Configuration for the `nginx_metrics` source. +#[configurable_component(source)] +#[derive(Clone, Debug, Default)] +pub struct NginxMetricsConfig { + /// A list of NGINX instances to scrape. + /// + /// Each endpoint must be a valid HTTP/HTTPS URI pointing to an NGINX instance that has the + /// `ngx_http_stub_status_module` module enabled. endpoints: Vec, + + /// The interval between scrapes, in seconds. #[serde(default = "default_scrape_interval_secs")] scrape_interval_secs: u64, + + /// Overrides the default namespace for the metrics emitted by the source. + /// + /// If set to an empty string, no namespace is added to the metrics. + /// + /// By default, `nginx` is used. #[serde(default = "default_namespace")] namespace: String, + + #[configurable(derived)] tls: Option, + + #[configurable(derived)] auth: Option, } diff --git a/src/sources/postgresql_metrics.rs b/src/sources/postgresql_metrics.rs index 4ea1f25f0adcb..461fea769c423 100644 --- a/src/sources/postgresql_metrics.rs +++ b/src/sources/postgresql_metrics.rs @@ -15,7 +15,6 @@ use openssl::{ ssl::{SslConnector, SslMethod}, }; use postgres_openssl::MakeTlsConnector; -use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; use tokio::time; use tokio_postgres::{ @@ -24,6 +23,7 @@ use tokio_postgres::{ Client, Config, Error as PgError, NoTls, Row, }; use tokio_stream::wrappers::IntervalStream; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -100,14 +100,21 @@ struct PostgresqlMetricsTlsConfig { ca_file: PathBuf, } -#[derive(Deserialize, Serialize, Clone, Debug)] -#[serde(default, deny_unknown_fields)] -struct PostgresqlMetricsConfig { +/// Configuration for the `postgresql_metrics` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] +#[serde(default)] +pub struct PostgresqlMetricsConfig { endpoints: Vec, + include_databases: Option>, + exclude_databases: Option>, + scrape_interval_secs: u64, + namespace: String, + tls: Option, } diff --git a/src/sources/prometheus/mod.rs b/src/sources/prometheus/mod.rs index 56a5bfedade18..e9950e3392f38 100644 --- a/src/sources/prometheus/mod.rs +++ b/src/sources/prometheus/mod.rs @@ -1,3 +1,6 @@ pub(crate) mod parser; mod remote_write; mod scrape; + +pub use remote_write::PrometheusRemoteWriteConfig; +pub use scrape::PrometheusScrapeConfig; diff --git a/src/sources/prometheus/remote_write.rs b/src/sources/prometheus/remote_write.rs index 80c3933bef4e8..a6b3871fc6bad 100644 --- a/src/sources/prometheus/remote_write.rs +++ b/src/sources/prometheus/remote_write.rs @@ -26,7 +26,7 @@ use crate::{ const SOURCE_NAME: &str = "prometheus_remote_write"; #[derive(Clone, Debug, Deserialize, Serialize)] -struct PrometheusRemoteWriteConfig { +pub struct PrometheusRemoteWriteConfig { address: SocketAddr, tls: Option, diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 871dadc08f313..bed0bd2393e51 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -42,7 +42,7 @@ enum ConfigError { } #[derive(Deserialize, Serialize, Clone, Debug)] -struct PrometheusScrapeConfig { +pub struct PrometheusScrapeConfig { // Deprecated name #[serde(alias = "hosts")] endpoints: Vec, diff --git a/src/sources/socket/mod.rs b/src/sources/socket/mod.rs index fd530a73f6620..d4ce736ddff71 100644 --- a/src/sources/socket/mod.rs +++ b/src/sources/socket/mod.rs @@ -4,7 +4,7 @@ mod udp; mod unix; use codecs::NewlineDelimitedDecoderConfig; -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; #[cfg(unix)] use crate::serde::default_framing_message_based; @@ -18,21 +18,30 @@ use crate::{ tls::MaybeTlsSettings, }; -#[derive(Deserialize, Serialize, Debug, Clone)] -// TODO: add back when https://github.com/serde-rs/serde/issues/1358 is addressed -// #[serde(deny_unknown_fields)] +/// Configuration for the `socket` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] pub struct SocketConfig { #[serde(flatten)] pub mode: Mode, } -#[derive(Deserialize, Serialize, Debug, Clone)] +/// Listening mode for the `socket` source. +#[configurable_component] +#[derive(Clone, Debug)] #[serde(tag = "mode", rename_all = "snake_case")] pub enum Mode { + /// Listen on TCP. Tcp(tcp::TcpConfig), + + /// Listen on UDP. Udp(udp::UdpConfig), + + /// Listen on UDS, in datagram mode. (Unix domain socket) #[cfg(unix)] UnixDatagram(unix::UnixConfig), + + /// Listen on UDS, in stream mode. (Unix domain socket) #[cfg(unix)] #[serde(alias = "unix")] UnixStream(unix::UnixConfig), diff --git a/src/sources/socket/tcp.rs b/src/sources/socket/tcp.rs index 06308fb0ef128..a3559689324e2 100644 --- a/src/sources/socket/tcp.rs +++ b/src/sources/socket/tcp.rs @@ -1,8 +1,8 @@ use bytes::Bytes; use chrono::Utc; use codecs::decoding::{DeserializerConfig, FramingConfig}; -use serde::{Deserialize, Serialize}; use smallvec::SmallVec; +use vector_config::configurable_component; use crate::{ codecs::Decoder, @@ -14,21 +14,56 @@ use crate::{ tls::TlsEnableableConfig, }; -#[derive(Deserialize, Serialize, Debug, Clone)] +/// TCP configuration for the `socket` source. +#[configurable_component] +#[derive(Clone, Debug)] pub struct TcpConfig { + /// The address to listen for connections on. address: SocketListenAddr, + + #[configurable(derived)] keepalive: Option, + + /// The maximum buffer size, in bytes, of incoming messages. + /// + /// Messages larger than this are truncated. max_length: Option, + + /// The timeout, in seconds, before a connection is forcefully closed during shutdown. #[serde(default = "default_shutdown_timeout_secs")] shutdown_timeout_secs: u64, + + /// Overrides the name of the log field used to add the peer host to each event. + /// + /// The value will be the peer host's address, including the port i.e. `1.2.3.4:9000`. + /// + /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used. host_key: Option, + + /// Overrides the name of the log field used to add the peer host's port to each event. + /// + /// The value will be the peer host's port i.e. `9000`. + /// + /// By default, `"port"` is used. port_key: Option, + + #[configurable(derived)] tls: Option, + + /// The size, in bytes, of the receive buffer used for each connection. + /// + /// This should not typically needed to be changed. receive_buffer_bytes: Option, + + /// The maximum number of TCP connections that will be allowed at any given time. + pub connection_limit: Option, + + #[configurable(derived)] framing: Option, + + #[configurable(derived)] #[serde(default = "default_decoding")] decoding: DeserializerConfig, - pub connection_limit: Option, } const fn default_shutdown_timeout_secs() -> u64 { diff --git a/src/sources/socket/udp.rs b/src/sources/socket/udp.rs index cd500a3796cfc..2ac3a34096912 100644 --- a/src/sources/socket/udp.rs +++ b/src/sources/socket/udp.rs @@ -7,9 +7,9 @@ use codecs::{ StreamDecodingError, }; use futures::StreamExt; -use serde::{Deserialize, Serialize}; use tokio::net::UdpSocket; use tokio_util::codec::FramedRead; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -25,18 +25,43 @@ use crate::{ udp, SourceSender, }; -/// UDP processes messages per packet, where messages are separated by newline. -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(deny_unknown_fields)] +/// UDP configuration for the `socket` source. +#[configurable_component] +#[derive(Clone, Debug)] pub struct UdpConfig { + /// The address to listen for messages on. address: SocketAddr, + + /// The maximum buffer size, in bytes, of incoming messages. + /// + /// Messages larger than this are truncated. #[serde(default = "crate::serde::default_max_length")] pub(super) max_length: usize, + + /// Overrides the name of the log field used to add the peer host to each event. + /// + /// The value will be the peer host's address, including the port i.e. `1.2.3.4:9000`. + /// + /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used. host_key: Option, + + /// Overrides the name of the log field used to add the peer host's port to each event. + /// + /// The value will be the peer host's port i.e. `9000`. + /// + /// By default, `"port"` is used. port_key: Option, + + /// The size, in bytes, of the receive buffer used for the listening socket. + /// + /// This should not typically needed to be changed. receive_buffer_bytes: Option, + + #[configurable(derived)] #[serde(default = "default_framing_message_based")] pub(super) framing: FramingConfig, + + #[configurable(derived)] #[serde(default = "default_decoding")] decoding: DeserializerConfig, } diff --git a/src/sources/socket/unix.rs b/src/sources/socket/unix.rs index bff29f195609c..7fd94c792c7fe 100644 --- a/src/sources/socket/unix.rs +++ b/src/sources/socket/unix.rs @@ -3,7 +3,7 @@ use std::path::PathBuf; use bytes::Bytes; use chrono::Utc; use codecs::decoding::{DeserializerConfig, FramingConfig}; -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; use crate::{ codecs::Decoder, @@ -18,15 +18,38 @@ use crate::{ SourceSender, }; -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(deny_unknown_fields)] +/// Unix domain socket configuration for the `socket` source. +#[configurable_component] +#[derive(Clone, Debug)] pub struct UnixConfig { + /// The Unix socket path. + /// + /// This should be an absolute path. pub path: PathBuf, + + /// Unix file mode bits to be applied to the unix socket file as its designated file permissions. + /// + /// Note that the file mode value can be specified in any numeric format supported by your configuration + /// language, but it is most intuitive to use an octal number. pub socket_file_mode: Option, + + /// The maximum buffer size, in bytes, of incoming messages. + /// + /// Messages larger than this are truncated. pub max_length: Option, + + /// Overrides the name of the log field used to add the peer host to each event. + /// + /// The value will be the socket path itself. + /// + /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used.pub host_key: Option, pub host_key: Option, + + #[configurable(derived)] #[serde(default)] pub framing: Option, + + #[configurable(derived)] #[serde(default = "default_decoding")] pub decoding: DeserializerConfig, } diff --git a/src/sources/splunk_hec/acknowledgements.rs b/src/sources/splunk_hec/acknowledgements.rs index 3d3fc7f9c2d42..3fc73bb16d334 100644 --- a/src/sources/splunk_hec/acknowledgements.rs +++ b/src/sources/splunk_hec/acknowledgements.rs @@ -12,6 +12,7 @@ use futures::StreamExt; use roaring::RoaringTreemap; use serde::{Deserialize, Serialize}; use tokio::time::interval; +use vector_config::configurable_component; use vector_core::event::BatchStatusReceiver; use warp::Rejection; @@ -21,15 +22,43 @@ use crate::{ sources::util::finalizer::UnorderedFinalizer, }; -#[derive(Deserialize, Serialize, Debug, Clone)] +/// Acknowledgement configuration for the `splunk_hec` source. +#[configurable_component] +#[derive(Clone, Debug)] #[serde(default)] pub struct HecAcknowledgementsConfig { #[serde(flatten)] pub inner: AcknowledgementsConfig, + + /// The maximum number of ack statuses pending query across all channels. + /// + /// Equivalent to the `max_number_of_acked_requests_pending_query` Splunk HEC setting. + /// + /// Minimum of `1`. pub max_pending_acks: NonZeroU64, + + /// The maximum number of Splunk HEC channels clients can use with this source. + /// + /// Minimum of `1`. pub max_number_of_ack_channels: NonZeroU64, + + /// The maximum number of ack statuses pending query for a single channel. + /// + /// Equivalent to the `max_number_of_acked_requests_pending_query_per_ack_channel` Splunk HEC setting. + /// + /// Minimum of `1`. pub max_pending_acks_per_channel: NonZeroU64, + + /// Whether or not to remove channels after idling for `max_idle_time` seconds. + /// + /// A channel is idling if it is not used for sending data or querying ack statuses. pub ack_idle_cleanup: bool, + + /// The amount of time, in seconds, a channel is allowed to idle before removal. + /// + /// Channels can potentially idle for longer than this setting but clients should not rely on such behavior. + /// + /// Minimum of `1`. pub max_idle_time: NonZeroU64, } diff --git a/src/sources/splunk_hec/mod.rs b/src/sources/splunk_hec/mod.rs index 4301aef96e3f0..858cff96fa0da 100644 --- a/src/sources/splunk_hec/mod.rs +++ b/src/sources/splunk_hec/mod.rs @@ -10,10 +10,11 @@ use chrono::{DateTime, TimeZone, Utc}; use flate2::read::MultiGzDecoder; use futures::FutureExt; use http::StatusCode; -use serde::{Deserialize, Serialize}; +use serde::Serialize; use serde_json::{de::Read as JsonRead, Deserializer, Value as JsonValue}; use snafu::Snafu; use tracing::Span; +use vector_config::configurable_component; use vector_core::{event::BatchNotifier, ByteSizeOf}; use warp::{filters::BoxedFilter, path, reject::Rejection, reply::Response, Filter, Reply}; @@ -47,23 +48,46 @@ pub const INDEX: &str = "splunk_index"; pub const SOURCE: &str = "splunk_source"; pub const SOURCETYPE: &str = "splunk_sourcetype"; -/// Accepts HTTP requests. -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(deny_unknown_fields, default)] +/// Configuration for the `splunk_hec` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] +#[serde(default)] pub struct SplunkConfig { - /// Local address on which to listen + /// The address to listen for connections on. + /// + /// The address _must_ include a port. #[serde(default = "default_socket_address")] pub address: SocketAddr, - /// Splunk HEC token. Deprecated - use `valid_tokens` instead + + /// Optional authorization token. + /// + /// If supplied, incoming requests must supply this token in the `Authorization` header, just as a client would if + /// it was communicating with the Splunk HEC endpoint directly. + /// + /// If _not_ supplied, the `Authorization` header will be ignored and requests will not be authenticated. + #[deprecated] token: Option, - /// A list of tokens to accept. Omit this to accept any token + + /// Optional list of valid authorization tokens. + /// + /// If supplied, incoming requests must supply one of these tokens in the `Authorization` header, just as a client + /// would if it was communicating with the Splunk HEC endpoint directly. + /// + /// If _not_ supplied, the `Authorization` header will be ignored and requests will not be authenticated. valid_tokens: Option>, + + /// Whether or not to forward the Splunk HEC authentication token with events. + /// + /// If set to `true`, when incoming requests contain a Splunk HEC token, the token used will kept in the + /// event metadata and be preferentially used if the event is sent to a Splunk HEC sink. + store_hec_token: bool, + + #[configurable(derived)] tls: Option, - /// Splunk HEC indexer acknowledgement settings + + #[configurable(derived)] #[serde(deserialize_with = "bool_or_struct")] acknowledgements: HecAcknowledgementsConfig, - /// Splunk HEC token passthrough - store_hec_token: bool, } inventory::submit! { diff --git a/src/sources/statsd/mod.rs b/src/sources/statsd/mod.rs index 9ce4fc0a59f23..996faae0f7adb 100644 --- a/src/sources/statsd/mod.rs +++ b/src/sources/statsd/mod.rs @@ -6,10 +6,10 @@ use codecs::{ NewlineDelimitedDecoder, }; use futures::{StreamExt, TryFutureExt}; -use serde::{Deserialize, Serialize}; use smallvec::{smallvec, SmallVec}; use tokio::net::UdpSocket; use tokio_util::udp::UdpFramed; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use self::parser::ParseError; @@ -38,18 +38,32 @@ use parser::parse; #[cfg(unix)] use unix::{statsd_unix, UnixConfig}; -#[derive(Deserialize, Serialize, Debug, Clone)] +/// Configuration for the `statsd` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] #[serde(tag = "mode", rename_all = "snake_case")] -enum StatsdConfig { +pub enum StatsdConfig { + /// Listen on TCP. Tcp(TcpConfig), + + /// Listen on UDP. Udp(UdpConfig), + + /// Listen on UDS. (Unix domain socket) #[cfg(unix)] Unix(UnixConfig), } -#[derive(Deserialize, Serialize, Debug, Clone)] +/// UDP configuration for the `statsd` source. +#[configurable_component] +#[derive(Clone, Debug)] pub struct UdpConfig { + /// The address to listen for messages on. address: SocketAddr, + + /// The size, in bytes, of the receive buffer used for each connection. + /// + /// This should not typically needed to be changed. receive_buffer_bytes: Option, } @@ -62,15 +76,30 @@ impl UdpConfig { } } -#[derive(Deserialize, Serialize, Debug, Clone)] +/// TCP configuration for the `statsd` source. +#[configurable_component] +#[derive(Clone, Debug)] struct TcpConfig { + /// The address to listen for connections on. address: SocketListenAddr, + + #[configurable(derived)] keepalive: Option, + + #[configurable(derived)] #[serde(default)] tls: Option, + + /// The timeout before a connection is forcefully closed during shutdown. #[serde(default = "default_shutdown_timeout_secs")] shutdown_timeout_secs: u64, + + /// The size, in bytes, of the receive buffer used for each connection. + /// + /// This should not typically needed to be changed. receive_buffer_bytes: Option, + + /// The maximum number of TCP connections that will be allowed at any given time. connection_limit: Option, } diff --git a/src/sources/statsd/unix.rs b/src/sources/statsd/unix.rs index 6eb543a4b2ac0..33d00a4f88d9c 100644 --- a/src/sources/statsd/unix.rs +++ b/src/sources/statsd/unix.rs @@ -4,7 +4,7 @@ use codecs::{ decoding::{Deserializer, Framer}, NewlineDelimitedDecoder, }; -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; use super::StatsdDeserializer; use crate::{ @@ -14,8 +14,13 @@ use crate::{ SourceSender, }; -#[derive(Deserialize, Serialize, Debug, Clone)] +/// Unix domain socket configuration for the `statsd` source. +#[configurable_component] +#[derive(Clone, Debug)] pub struct UnixConfig { + /// The Unix socket path. + /// + /// This should be an absolute path. pub path: PathBuf, } diff --git a/src/sources/stdin.rs b/src/sources/stdin.rs index f1809272faa1f..3fb78cc8c87d2 100644 --- a/src/sources/stdin.rs +++ b/src/sources/stdin.rs @@ -8,8 +8,8 @@ use codecs::{ StreamDecodingError, }; use futures::{channel::mpsc, executor, SinkExt, StreamExt}; -use serde::{Deserialize, Serialize}; use tokio_util::{codec::FramedRead, io::StreamReader}; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -20,14 +20,28 @@ use crate::{ shutdown::ShutdownSignal, SourceSender, }; - -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(deny_unknown_fields, default)] +/// Configuration for the `stdin` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] +#[serde(default)] pub struct StdinConfig { + /// The maximum buffer size, in bytes, of incoming messages. + /// + /// Messages larger than this are truncated. #[serde(default = "crate::serde::default_max_length")] pub max_length: usize, + + /// Overrides the name of the log field used to add the current hostname to each event. + /// + /// The value will be the current hostname for wherever Vector is running. + /// + /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used. pub host_key: Option, + + #[configurable(derived)] pub framing: Option, + + #[configurable(derived)] #[serde(default = "default_decoding")] pub decoding: DeserializerConfig, } diff --git a/src/sources/syslog.rs b/src/sources/syslog.rs index 7e1267acfedaf..4e8a75651cd34 100644 --- a/src/sources/syslog.rs +++ b/src/sources/syslog.rs @@ -9,10 +9,10 @@ use codecs::{ BytesDecoder, OctetCountingDecoder, SyslogDeserializer, }; use futures::StreamExt; -use serde::{Deserialize, Serialize}; use smallvec::SmallVec; use tokio::net::UdpSocket; use tokio_util::udp::UdpFramed; +use vector_config::configurable_component; use crate::codecs::Decoder; #[cfg(unix)] @@ -31,35 +31,76 @@ use crate::{ udp, SourceSender, }; -#[derive(Deserialize, Serialize, Debug)] -// TODO: add back when serde-rs/serde#1358 is addressed -// #[serde(deny_unknown_fields)] +/// Configuration for the `syslog` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] pub struct SyslogConfig { #[serde(flatten)] mode: Mode, + + /// The maximum buffer size of incoming messages, in bytes. + /// + /// Messages larger than this are truncated. #[serde(default = "crate::serde::default_max_length")] max_length: usize, - /// The host key of the log. (This differs from `hostname`) + + /// Overrides the name of the log field used to add the peer host to each event. + /// + /// If using TCP or UDP, the value will be the peer host's address, including the port i.e. `1.2.3.4:9000`. If using + /// UDS, the value will be the socket path itself. + /// + /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used. host_key: Option, } -#[derive(Deserialize, Serialize, Debug, Clone)] +/// Listener mode for the `syslog` source. +#[configurable_component] +#[derive(Clone, Debug)] #[serde(tag = "mode", rename_all = "snake_case")] pub enum Mode { + /// Listen on TCP. Tcp { + /// The address to listen for connections on. address: SocketListenAddr, + + #[configurable(derived)] keepalive: Option, + + #[configurable(derived)] tls: Option, + + /// The size, in bytes, of the receive buffer used for each connection. + /// + /// This should not typically needed to be changed. receive_buffer_bytes: Option, + + /// The maximum number of TCP connections that will be allowed at any given time. connection_limit: Option, }, + + /// Listen on UDP. Udp { + /// The address to listen for messages on. address: SocketAddr, + + /// The size, in bytes, of the receive buffer used for the listening socket. + /// + /// This should not typically needed to be changed. receive_buffer_bytes: Option, }, + + /// Listen on UDS. (Unix domain socket) #[cfg(unix)] Unix { + /// The Unix socket path. + /// + /// This should be an absolute path. path: PathBuf, + + /// Unix file mode bits to be applied to the unix socket file as its designated file permissions. + /// + /// Note that the file mode value can be specified in any numeric format supported by your configuration + /// language, but it is most intuitive to use an octal number. socket_file_mode: Option, }, } @@ -323,6 +364,7 @@ mod test { use chrono::prelude::*; use codecs::decoding::format::Deserializer; use rand::{thread_rng, Rng}; + use serde::Deserialize; use tokio::time::{sleep, Duration, Instant}; use tokio_util::codec::BytesCodec; use value::Value; diff --git a/src/sources/util/multiline_config.rs b/src/sources/util/multiline_config.rs index db7896d0fe955..c12f3005ffeb0 100644 --- a/src/sources/util/multiline_config.rs +++ b/src/sources/util/multiline_config.rs @@ -3,15 +3,23 @@ use std::{convert::TryFrom, time::Duration}; use regex::bytes::Regex; use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; +use vector_config::Configurable; use crate::line_agg; -#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)] +/// Multi-line parsing configuration. +#[derive(Clone, Configurable, Debug, Deserialize, PartialEq, Serialize)] #[serde(deny_unknown_fields)] pub struct MultilineConfig { + /// Start regex pattern to look for as a beginning of the message. pub start_pattern: String, + /// Condition regex pattern to look for. Exact behavior is configured via `mode`. pub condition_pattern: String, + /// Mode of operation, specifies how `condition_pattern` is interpreted. pub mode: line_agg::Mode, + /// The maximum time to wait for the continuation, in milliseconds. + /// + /// Once this timeout is reached, the buffered message is guaranteed to be flushed, even if incomplete. pub timeout_ms: u64, } diff --git a/src/sources/util/tcp/mod.rs b/src/sources/util/tcp/mod.rs index d85baf4b471b4..2279d052cec74 100644 --- a/src/sources/util/tcp/mod.rs +++ b/src/sources/util/tcp/mod.rs @@ -7,7 +7,7 @@ use bytes::Bytes; use codecs::StreamDecodingError; use futures::{future::BoxFuture, FutureExt, StreamExt}; use listenfd::ListenFd; -use serde::{de, Deserialize, Deserializer, Serialize}; +use serde::{de, Deserialize, Deserializer}; use smallvec::SmallVec; use socket2::SockRef; use tokio::{ @@ -17,6 +17,7 @@ use tokio::{ }; use tokio_util::codec::{Decoder, FramedRead}; use tracing::Instrument; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use super::AfterReadExt as _; @@ -407,12 +408,17 @@ fn close_socket(socket: &MaybeTlsIncomingStream) -> bool { } } -#[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)] +/// A listening address that can be given directly or be managed via `systemd` socket activation. +#[configurable_component] +#[derive(Clone, Copy, Debug, PartialEq)] #[serde(untagged)] pub enum SocketListenAddr { - SocketAddr(SocketAddr), + /// An IPv4/IPv6 address and port. + SocketAddr(#[configurable(derived)] SocketAddr), + + /// A file descriptor identifier that is given from, and managed by, the socket activation feature of `systemd`. #[serde(deserialize_with = "parse_systemd_fd")] - SystemdFd(usize), + SystemdFd(#[configurable(transparent)] usize), } impl fmt::Display for SocketListenAddr { diff --git a/src/sources/vector/mod.rs b/src/sources/vector/mod.rs index 9e7962e23b62f..894e2da3e19cd 100644 --- a/src/sources/vector/mod.rs +++ b/src/sources/vector/mod.rs @@ -1,44 +1,61 @@ pub mod v1; pub mod v2; -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; use crate::config::{ GenerateConfig, Output, Resource, SourceConfig, SourceContext, SourceDescription, }; -#[derive(Serialize, Deserialize, Debug, Clone)] +/// Marker type for the version one of the configuration for the `vector` source. +#[configurable_component] +#[derive(Clone, Debug)] enum V1 { + /// Marker value for version one. #[serde(rename = "1")] V1, } -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(deny_unknown_fields)] +/// Configuration for version two of the `vector` source. +#[configurable_component] +#[derive(Clone, Debug)] pub struct VectorConfigV1 { + /// Version of the configuration. version: V1, + #[serde(flatten)] config: v1::VectorConfig, } -#[derive(Serialize, Deserialize, Debug, Clone)] +/// Marker type for the version two of the configuration for the `vector` source. +#[configurable_component] +#[derive(Clone, Debug)] enum V2 { + /// Marker value for version two. #[serde(rename = "2")] V2, } -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(deny_unknown_fields)] +/// Configuration for version two of the `vector` source. +#[configurable_component] +#[derive(Clone, Debug)] pub struct VectorConfigV2 { + /// Version of the configuration. version: Option, + #[serde(flatten)] config: v2::VectorConfig, } -#[derive(Serialize, Deserialize, Debug, Clone)] +/// Configurable for the `vector` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] #[serde(untagged)] pub enum VectorConfig { + /// Configuration for version one. V1(VectorConfigV1), + + /// Configuration for version two. V2(VectorConfigV2), } diff --git a/src/sources/vector/v1.rs b/src/sources/vector/v1.rs index ef565d88e9ddf..1a4f248040b34 100644 --- a/src/sources/vector/v1.rs +++ b/src/sources/vector/v1.rs @@ -4,8 +4,8 @@ use codecs::{ LengthDelimitedDecoder, }; use prost::Message; -use serde::{Deserialize, Serialize}; use smallvec::{smallvec, SmallVec}; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -21,15 +21,29 @@ use crate::{ tls::{MaybeTlsSettings, TlsEnableableConfig}, }; -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(deny_unknown_fields)] +/// Configuration for version one of the `vector` source. +#[configurable_component] +#[derive(Clone, Debug)] pub(crate) struct VectorConfig { + /// The address to listen for connections on. + /// + /// It _must_ include a port. address: SocketListenAddr, + + #[configurable(derived)] keepalive: Option, + + /// The timeout, in seconds, before a connection is forcefully closed during shutdown. #[serde(default = "default_shutdown_timeout_secs")] shutdown_timeout_secs: u64, - tls: Option, + + /// The size, in bytes, of the receive buffer used for each connection. + /// + /// This should not typically needed to be changed. receive_buffer_bytes: Option, + + #[configurable(derived)] + tls: Option, } const fn default_shutdown_timeout_secs() -> u64 { diff --git a/src/sources/vector/v2.rs b/src/sources/vector/v2.rs index a1ce623d1edbd..cab56bdf67fd1 100644 --- a/src/sources/vector/v2.rs +++ b/src/sources/vector/v2.rs @@ -1,12 +1,12 @@ use std::net::SocketAddr; use futures::TryFutureExt; -use serde::{Deserialize, Serialize}; use tokio::net::TcpStream; use tonic::{ transport::{server::Connected, Certificate}, Request, Response, Status, }; +use vector_config::configurable_component; use vector_core::{ event::{BatchNotifier, BatchStatus, BatchStatusReceiver, Event}, ByteSizeOf, @@ -87,14 +87,25 @@ async fn handle_batch_status(receiver: Option) -> Result<() BatchStatus::Delivered => Ok(()), } } -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(deny_unknown_fields)] + +/// Configuration for version two of the `vector` source. +#[configurable_component] +#[derive(Clone, Debug)] pub struct VectorConfig { + /// The address to listen for connections on. + /// + /// It _must_ include a port. pub address: SocketAddr, + + /// The timeout, in seconds, before a connection is forcefully closed during shutdown. #[serde(default = "default_shutdown_timeout_secs")] pub shutdown_timeout_secs: u64, + + #[configurable(derived)] #[serde(default)] tls: Option, + + #[configurable(derived)] #[serde(default, deserialize_with = "bool_or_struct")] acknowledgements: AcknowledgementsConfig, } diff --git a/src/tcp.rs b/src/tcp.rs index 8c225a69bf36c..49e54bc121145 100644 --- a/src/tcp.rs +++ b/src/tcp.rs @@ -1,14 +1,12 @@ -use serde::{Deserialize, Serialize}; use socket2::SockRef; use tokio::net::TcpStream; +use vector_config::configurable_component; -/// Configuration for keepalive probes in a TCP stream. -/// -/// This config's properties map to TCP keepalive properties in Tokio: -/// -#[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)] -#[serde(deny_unknown_fields)] +/// TCP keepalive settings for socket-based components. +#[configurable_component] +#[derive(Clone, Copy, Debug, PartialEq)] pub struct TcpKeepaliveConfig { + /// The time to wait, in seconds, before starting to send TCP keepalive probes on an idle connection. pub(crate) time_secs: Option, } diff --git a/src/tls/settings.rs b/src/tls/settings.rs index 2530f33be40ea..bd113174a8d18 100644 --- a/src/tls/settings.rs +++ b/src/tls/settings.rs @@ -14,6 +14,7 @@ use openssl::{ }; use serde::{Deserialize, Serialize}; use snafu::ResultExt; +use vector_config::Configurable; use super::{ AddCertToStoreSnafu, AddExtraChainCertSnafu, CaStackPushSnafu, DerExportSnafu, @@ -31,8 +32,13 @@ pub const TEST_PEM_CRT_PATH: &str = "tests/data/localhost.crt"; #[cfg(test)] pub const TEST_PEM_KEY_PATH: &str = "tests/data/localhost.key"; -#[derive(Clone, Debug, Default, Deserialize, Serialize)] +/// Configures the TLS options for incoming/outgoing connections. +#[derive(Clone, Configurable, Debug, Default, Deserialize, Serialize)] pub struct TlsEnableableConfig { + /// Whether or not to require TLS for incoming/outgoing connections. + /// + /// When enabled and used for incoming connections, an identity certificate is also required. See `tls.crt_file` for + /// more information. pub enabled: Option, #[serde(flatten)] pub options: TlsConfig, @@ -55,18 +61,45 @@ impl TlsEnableableConfig { } } -/// Standard TLS options -#[derive(Clone, Debug, Default, Deserialize, Serialize)] +/// Standard TLS options. +#[derive(Clone, Configurable, Debug, Default, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct TlsConfig { + /// Whether or not to require a valid identity certificate from the peer host. + /// + /// For outgoing connections, this implies that the certificate must be valid according to its chain of trust, up to + /// the root certificate authority configured on the host Vector is running on. + /// + /// For incoming connections, this implies that the peer host must present a valid client certificate that is also + /// valid according to its chain of trust. pub verify_certificate: Option, + /// Whether or not to verify the remote host's TLS certificate is valid for the hostname Vector connected to. + /// + /// Only relevant for outgoing connections. + /// + /// Do NOT set this to `false` unless you understand the risks of not verifying the remote hostname. pub verify_hostname: Option, + /// Absolute path to an additional CA certificate file. + /// + /// The certficate must be in the DER or PEM (X.509) format. Additionally, the certificate can be provided as an inline string in PEM format. #[serde(alias = "ca_path")] pub ca_file: Option, + /// Absolute path to a certificate file used to identify this server. + /// + /// The certificate must be in DER, PEM (X.509), or PKCS#12 format. Additionally, the certificate can be provided as + /// an inline string in PEM format. + /// + /// If this is set, and is not a PKCS#12 archive, `key_file` must also be set. #[serde(alias = "crt_path")] pub crt_file: Option, + /// Absolute path to a private key file used to identify this server. + /// + /// The key must be in DER or PEM (PKCS#8) format. Additionally, the key can be provided as an inline string in PEM format. #[serde(alias = "key_path")] pub key_file: Option, + /// Passphrase used to unlock the encrypted key file. + /// + /// This has no effect unless `key_file` is set. pub key_pass: Option, } From 6b350af24f5fcb9639318d35db6f7d7e9be16ca0 Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Fri, 3 Jun 2022 15:45:35 -0400 Subject: [PATCH 03/12] moar progress Signed-off-by: Toby Lawrence --- Cargo.lock | 1 + lib/vector-config/Cargo.toml | 1 + lib/vector-config/src/external.rs | 28 ++++++++++ lib/vector-config/src/lib.rs | 13 ++++- lib/vector-config/src/schema.rs | 51 +++++++++++++++++-- lib/vector-config/src/stdlib.rs | 79 ++++++++++++++++++++++++++--- src/gcp.rs | 29 ++++++++++- src/nats.rs | 53 +++++++++++++++---- src/sources/demo_logs.rs | 2 +- src/sources/exec/mod.rs | 52 +++++++++++++++---- src/sources/file.rs | 79 +++++++++++++++++++++++++++-- src/sources/fluent/mod.rs | 1 + src/sources/gcp_pubsub.rs | 12 ++++- src/sources/heroku_logs.rs | 11 +++- src/sources/host_metrics/mod.rs | 14 ++++- src/sources/http.rs | 1 + src/sources/journald.rs | 33 +++++++++++- src/sources/mod.rs | 14 +++-- src/sources/nats.rs | 10 ++++ src/sources/postgresql_metrics.rs | 32 +++++++++++- src/sources/util/body_decoding.rs | 13 ++++- src/sources/util/encoding_config.rs | 18 +++++-- src/sources/util/http/auth.rs | 11 ++-- 23 files changed, 497 insertions(+), 61 deletions(-) create mode 100644 lib/vector-config/src/external.rs diff --git a/Cargo.lock b/Cargo.lock index 199ea849eb2fd..97fd8f3ef9842 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8895,6 +8895,7 @@ dependencies = [ name = "vector_config" version = "0.1.0" dependencies = [ + "encoding_rs", "indexmap", "num-traits", "schemars", diff --git a/lib/vector-config/Cargo.toml b/lib/vector-config/Cargo.toml index 6f4f6195deff9..25907f8dd7808 100644 --- a/lib/vector-config/Cargo.toml +++ b/lib/vector-config/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" publish = false [dependencies] +encoding_rs = { version = "0.8", default-features = false, features = ["serde"] } indexmap = { version = "1.8", default-features = false } num-traits = { version = "0.2.15", default-features = false } schemars = { version = "0.8.10", default-features = true, features = ["preserve_order"] } diff --git a/lib/vector-config/src/external.rs b/lib/vector-config/src/external.rs new file mode 100644 index 0000000000000..55144dc46bb5e --- /dev/null +++ b/lib/vector-config/src/external.rs @@ -0,0 +1,28 @@ +use schemars::{gen::SchemaGenerator, schema::SchemaObject}; + +use crate::{ + schema::{finalize_schema, generate_string_schema}, + Configurable, Metadata, +}; + +impl<'de> Configurable<'de> for &'static encoding_rs::Encoding { + // TODO: At some point, we might want to override the metadata to define a validation pattern that only matches + // valid character set encodings... but that would be a very large array of strings, and technically the Encoding + // Standard standard is a living standard, so... :thinkies: + + fn referencable_name() -> Option<&'static str> { + Some("Encoding") + } + + fn description() -> Option<&'static str> { + Some( + "An encoding as defined in the [Encoding Standard](https://encoding.spec.whatwg.org/).", + ) + } + + fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { + let mut schema = generate_string_schema(); + finalize_schema(gen, &mut schema, overrides); + schema + } +} diff --git a/lib/vector-config/src/lib.rs b/lib/vector-config/src/lib.rs index c7cba5dc1e46f..8251f2784cbb1 100644 --- a/lib/vector-config/src/lib.rs +++ b/lib/vector-config/src/lib.rs @@ -66,7 +66,17 @@ // able to get away with using a combination of `allOf` and `oneOf` where we define a subschema for the non-aliased // fields, and then a subschema using `oneOf`for each aliased field -- allowing it to match any of the possible field // names for that specific field -- and then combine them all with `allOf`, which keeps the schema as compact as -// possible, I think, short of a new version of the specification coming out that adds native alias support for properties. +// possible, I think, short of a new version of the specification coming out that adds native alias support for +// properties. +// +// TODO: Add support for defining metadata on fields, since each field is defined as a schema unto itself, so we can +// stash metadata in the extensions for each field the same as we do for structs. +// +// TODO: Add support for single value metadata entries, in addition to key/value, such that for things like field metadata, we +// can essentially define flags i.e. `docs:templateable` as a metadata value for marking a field as working with +// Vector's template syntax, since doing `templateable = true` is weird given that we never otherwise specifically +// disable it. In other words, we want a way to define feature flags in metadata. + use core::fmt; use core::marker::PhantomData; @@ -84,6 +94,7 @@ pub mod schemars { pub use schemars::*; } +mod external; mod stdlib; // Re-export of the `#[configurable_component]` and `#[derive(Configurable)]` proc macros. diff --git a/lib/vector-config/src/schema.rs b/lib/vector-config/src/schema.rs index 5d36e0c544eea..5b60d2eb1bfd1 100644 --- a/lib/vector-config/src/schema.rs +++ b/lib/vector-config/src/schema.rs @@ -1,7 +1,7 @@ use std::{collections::BTreeSet, mem}; use indexmap::IndexMap; -use num_traits::{Bounded, ToPrimitive}; +use num_traits::{Bounded, ToPrimitive, Zero}; use schemars::{ gen::{SchemaGenerator, SchemaSettings}, schema::{ @@ -9,7 +9,7 @@ use schemars::{ SchemaObject, SingleOrVec, SubschemaValidation, }, }; -use serde_json::{Map, Value}; +use serde_json::{Map, Number, Value}; use vector_config_common::num::{NUMERIC_ENFORCED_LOWER_BOUND, NUMERIC_ENFORCED_UPPER_BOUND}; use crate::{Configurable, Metadata}; @@ -153,9 +153,9 @@ pub fn generate_string_schema() -> SchemaObject { } } -pub fn generate_number_schema<'de, N>() -> SchemaObject +pub fn generate_number_schema<'de, N>(nonzero: bool) -> SchemaObject where - N: Configurable<'de> + Bounded + ToPrimitive, + N: Configurable<'de> + Bounded + ToPrimitive + Zero, { // Calculate the minimum/maximum for the given `N`, respecting the 2^53 limit we put on each of those values. let (minimum, maximum) = { @@ -184,7 +184,7 @@ where }; // We always set the minimum/maximum bound to the mechanical limits - SchemaObject { + let mut schema = SchemaObject { instance_type: Some(InstanceType::Number.into()), number: Some(Box::new(NumberValidation { minimum: Some(minimum), @@ -192,7 +192,27 @@ where ..Default::default() })), ..Default::default() + }; + + // If the actual numeric type we're generating the schema for is a nonzero variant, we add an additional `not` + // subschema validation. + if nonzero { + let zero_num_unsigned = N::zero().to_u64().map(Into::into); + let zero_num_floating = N::zero().to_f64().and_then(Number::from_f64); + let zero_num = zero_num_unsigned + .or(zero_num_floating) + .expect("No usable integer type should be unrepresentable by both `u64` and `f64`."); + + schema.subschemas = Some(Box::new(SubschemaValidation { + not: Some(Box::new(Schema::Object(SchemaObject { + const_value: Some(Value::Number(zero_num)), + ..Default::default() + }))), + ..Default::default() + })); } + + schema } pub fn generate_array_schema<'de, T>( @@ -215,6 +235,27 @@ where } } +pub fn generate_set_schema<'de, T>( + gen: &mut SchemaGenerator, + metadata: Metadata<'de, T>, +) -> SchemaObject +where + T: Configurable<'de>, +{ + // We generate the schema for `T` itself, and then apply any of `T`'s metadata to the given schema. + let element_schema = T::generate_schema(gen, metadata); + + SchemaObject { + instance_type: Some(InstanceType::Array.into()), + array: Some(Box::new(ArrayValidation { + items: Some(SingleOrVec::Single(Box::new(element_schema.into()))), + unique_items: Some(true), + ..Default::default() + })), + ..Default::default() + } +} + pub fn generate_map_schema<'de, V>( gen: &mut SchemaGenerator, metadata: Metadata<'de, V>, diff --git a/lib/vector-config/src/stdlib.rs b/lib/vector-config/src/stdlib.rs index 62a38be402daf..d5107913271dd 100644 --- a/lib/vector-config/src/stdlib.rs +++ b/lib/vector-config/src/stdlib.rs @@ -1,4 +1,12 @@ -use std::{collections::HashMap, net::SocketAddr, path::PathBuf}; +use std::{ + collections::{HashMap, HashSet}, + net::SocketAddr, + num::{ + NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroU16, NonZeroU32, NonZeroU64, + NonZeroU8, + }, + path::PathBuf, +}; use schemars::{gen::SchemaGenerator, schema::SchemaObject}; use vector_config_common::validation::Validation; @@ -6,7 +14,8 @@ use vector_config_common::validation::Validation; use crate::{ schema::{ finalize_schema, generate_array_schema, generate_bool_schema, generate_map_schema, - generate_number_schema, generate_optional_schema, generate_string_schema, + generate_number_schema, generate_optional_schema, generate_set_schema, + generate_string_schema, }, Configurable, Metadata, }; @@ -70,7 +79,7 @@ macro_rules! impl_configuable_unsigned { impl<'de> Configurable<'de> for $ty { fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { $crate::__ensure_numeric_validation_bounds::(&overrides); - let mut schema = generate_number_schema::(); + let mut schema = generate_number_schema::(false); finalize_schema(gen, &mut schema, overrides); schema } @@ -85,7 +94,39 @@ macro_rules! impl_configuable_signed { impl<'de> Configurable<'de> for $ty { fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { $crate::__ensure_numeric_validation_bounds::(&overrides); - let mut schema = generate_number_schema::(); + let mut schema = generate_number_schema::(false); + finalize_schema(gen, &mut schema, overrides); + schema + } + } + )+ + }; +} + +macro_rules! impl_configuable_nonzero_unsigned { + ($($aty:ty => $ity:ty),+) => { + $( + impl<'de> Configurable<'de> for $aty { + fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { + let overrides = overrides.map_default_value(|n| n.get()); + $crate::__ensure_numeric_validation_bounds::<$ity>(&overrides); + let mut schema = generate_number_schema::<$ity>(true); + finalize_schema(gen, &mut schema, overrides); + schema + } + } + )+ + }; +} + +macro_rules! impl_configuable_nonzero_signed { + ($($aty:ty => $ity:ty),+) => { + $( + impl<'de> Configurable<'de> for $aty { + fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { + let overrides = overrides.map_default_value(|n| n.get()); + $crate::__ensure_numeric_validation_bounds::<$ity>(&overrides); + let mut schema = generate_number_schema::<$ity>(true); finalize_schema(gen, &mut schema, overrides); schema } @@ -96,11 +137,13 @@ macro_rules! impl_configuable_signed { impl_configuable_unsigned!(u8, u16, u32, u64); impl_configuable_signed!(i8, i16, i32, i64); +impl_configuable_nonzero_unsigned!(NonZeroU8 => u8, NonZeroU16 => u16, NonZeroU32 => u32, NonZeroU64 => u64); +impl_configuable_nonzero_signed!(NonZeroI8 => i8, NonZeroI16 => i16, NonZeroI32 => i32, NonZeroI64 => i64); impl<'de> Configurable<'de> for usize { fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { crate::__ensure_numeric_validation_bounds::(&overrides); - let mut schema = generate_number_schema::(); + let mut schema = generate_number_schema::(false); finalize_schema(gen, &mut schema, overrides); schema } @@ -109,7 +152,7 @@ impl<'de> Configurable<'de> for usize { impl<'de> Configurable<'de> for f64 { fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { crate::__ensure_numeric_validation_bounds::(&overrides); - let mut schema = generate_number_schema::(); + let mut schema = generate_number_schema::(false); finalize_schema(gen, &mut schema, overrides); schema } @@ -118,7 +161,7 @@ impl<'de> Configurable<'de> for f64 { impl<'de> Configurable<'de> for f32 { fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { crate::__ensure_numeric_validation_bounds::(&overrides); - let mut schema = generate_number_schema::(); + let mut schema = generate_number_schema::(false); finalize_schema(gen, &mut schema, overrides); schema } @@ -172,6 +215,28 @@ where } } +impl<'de, V> Configurable<'de> for HashSet +where + V: Configurable<'de> + Eq + std::hash::Hash, +{ + fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { + // We explicitly do not pass anything from the override metadata, because there's nothing to reasonably pass: if + // `V` is referencable, using the description for `HashSet` likely makes no sense, nor would a default make + // sense, and so on. + // + // We do, however, set `V` to be "transparent", which means that during schema finalization, we will relax the + // rules we enforce, such as needing a description, knowing that they'll be enforced on the field using + // `HashSet` itself, where carrying that description forward to `V` might literally make no sense, such as + // when `V` is a primitive type like an integer or string. + let mut value_metadata = V::metadata(); + value_metadata.set_transparent(); + + let mut schema = generate_set_schema(gen, value_metadata); + finalize_schema(gen, &mut schema, overrides); + schema + } +} + // Additional types that do not map directly to scalars. impl<'de> Configurable<'de> for SocketAddr { fn referencable_name() -> Option<&'static str> { diff --git a/src/gcp.rs b/src/gcp.rs index 80ee0e0ca8461..574c8db9d070c 100644 --- a/src/gcp.rs +++ b/src/gcp.rs @@ -10,11 +10,11 @@ use goauth::{ }; use hyper::header::AUTHORIZATION; use once_cell::sync::Lazy; -use serde::{Deserialize, Serialize}; use smpl_jwt::Jwt; use snafu::{ResultExt, Snafu}; use tokio::time::Instant; use tokio_stream::wrappers::IntervalStream; +use vector_config::configurable_component; use crate::{config::ProxyConfig, http::HttpClient, http::HttpError}; @@ -52,9 +52,34 @@ pub enum GcpError { BuildHttpClient { source: HttpError }, } -#[derive(Clone, Debug, Default, Deserialize, Serialize)] +/// Authentication configuration for GCP services. +// TODO: We're duplicating the "either this or that" verbiage for each field because this struct gets flattened into the +// component config types, which means all that's carried over are the fields, not the type itself. +// +// Seems like we really really have it as a nested field -- i.e. `auth.api_key` -- which is a closer fit to how we do +// similar things in configuration (TLS, framing, decoding, etc.). Doing so would let us embed the type itself, and +// hoist up the common documentation bits to the docs for the type rather than the fields. +#[configurable_component] +#[derive(Clone, Debug, Default)] pub struct GcpAuthConfig { + /// An API key. ([documentation](https://cloud.google.com/docs/authentication/api-keys)) + /// + /// Either an API key, or a path to a service account credentials JSON file can be specified. + /// + /// If both are unset, Vector checks the `GOOGLE_APPLICATION_CREDENTIALS` environment variable for a filename. If no + /// filename is named, Vector will attempt to fetch an instance service account for the compute instance the program is + /// running on. If Vector is not running on a GCE instance, then you must define eith an API key or service account + /// credentials JSON file. pub api_key: Option, + + /// Path to a service account credentials JSON file. ([documentation](https://cloud.google.com/docs/authentication/production#manually)) + /// + /// Either an API key, or a path to a service account credentials JSON file can be specified. + /// + /// If both are unset, Vector checks the `GOOGLE_APPLICATION_CREDENTIALS` environment variable for a filename. If no + /// filename is named, Vector will attempt to fetch an instance service account for the compute instance the program is + /// running on. If Vector is not running on a GCE instance, then you must define eith an API key or service account + /// credentials JSON file. pub credentials_path: Option, } diff --git a/src/nats.rs b/src/nats.rs index f06ff1ba6962a..e2ea1f2de4b4c 100644 --- a/src/nats.rs +++ b/src/nats.rs @@ -1,6 +1,6 @@ use nkeys::error::Error as NKeysError; -use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; +use vector_config::configurable_component; use crate::tls::TlsEnableableConfig; @@ -14,19 +14,36 @@ pub enum NatsConfigError { TlsMissingCert, } -#[derive(Clone, Debug, Deserialize, Serialize)] +/// Configuration for how Vector should authenticate to NATS. +#[configurable_component] +#[derive(Clone, Debug)] #[serde(rename_all = "snake_case", tag = "strategy")] pub(crate) enum NatsAuthConfig { + /// Username and password authentication. + /// ([documentation](https://docs.nats.io/running-a-nats-service/configuration/securing_nats/auth_intro/username_password)) UserPassword { + #[configurable(derived)] user_password: NatsAuthUserPassword, }, + + /// Token authentication. + /// ([documentation](https://docs.nats.io/running-a-nats-service/configuration/securing_nats/auth_intro/tokens)) Token { + #[configurable(derived)] token: NatsAuthToken, }, + + /// Credentials file authentication. + /// ([documentation](https://docs.nats.io/running-a-nats-service/configuration/securing_nats/auth_intro/jwt)) CredentialsFile { + #[configurable(derived)] credentials_file: NatsAuthCredentialsFile, }, + + /// NKey authentication. + /// ([documentation](https://docs.nats.io/running-a-nats-service/configuration/securing_nats/auth_intro/nkey_auth)) Nkey { + #[configurable(derived)] nkey: NatsAuthNKey, }, } @@ -44,29 +61,45 @@ impl std::fmt::Display for NatsAuthConfig { } } -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(deny_unknown_fields)] +/// Username and password configuration. +#[configurable_component] +#[derive(Clone, Debug)] pub(crate) struct NatsAuthUserPassword { + /// Username. pub(crate) user: String, + + /// Password. pub(crate) password: String, } -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(deny_unknown_fields)] +/// Token configuration. +#[configurable_component] +#[derive(Clone, Debug)] pub(crate) struct NatsAuthToken { + /// Token. pub(crate) value: String, } -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(deny_unknown_fields)] +/// Credentials file configuration. +#[configurable_component] +#[derive(Clone, Debug)] pub(crate) struct NatsAuthCredentialsFile { + /// Path to credentials file. pub(crate) path: String, } -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(deny_unknown_fields)] +/// NKeys configuration. +#[configurable_component] +#[derive(Clone, Debug)] pub(crate) struct NatsAuthNKey { + /// User. + /// + /// This is equivalent to a public key. pub(crate) nkey: String, + + /// Seed. + /// + /// This is equivalent to a private key. pub(crate) seed: String, } diff --git a/src/sources/demo_logs.rs b/src/sources/demo_logs.rs index dd26127a92510..3d4f9866467c7 100644 --- a/src/sources/demo_logs.rs +++ b/src/sources/demo_logs.rs @@ -272,7 +272,7 @@ impl SourceConfig for DemoLogsConfig { // TODO: Is this old enough now that we could actually remove it? #[configurable_component(source)] #[derive(Clone, Debug)] -struct DemoLogsCompatConfig(#[configurable(transparent)] DemoLogsConfig); +pub struct DemoLogsCompatConfig(#[configurable(transparent)] DemoLogsConfig); #[async_trait::async_trait] #[typetag::serde(name = "generator")] diff --git a/src/sources/exec/mod.rs b/src/sources/exec/mod.rs index de396e98e92c9..fe451596f5553 100644 --- a/src/sources/exec/mod.rs +++ b/src/sources/exec/mod.rs @@ -11,7 +11,6 @@ use codecs::{ StreamDecodingError, }; use futures::{FutureExt, StreamExt}; -use serde::{Deserialize, Serialize}; use smallvec::SmallVec; use snafu::Snafu; use tokio::{ @@ -22,6 +21,7 @@ use tokio::{ }; use tokio_stream::wrappers::IntervalStream; use tokio_util::codec::FramedRead; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -41,44 +41,74 @@ use lookup::path; pub mod sized_bytes_codec; -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(default, deny_unknown_fields)] +/// Configuration for the `exec` source. +#[configurable_component] +#[derive(Clone, Debug)] +#[serde(default)] pub struct ExecConfig { + #[configurable(derived)] pub mode: Mode, + + #[configurable(derived)] pub scheduled: Option, + + #[configurable(derived)] pub streaming: Option, + + /// The command to be run, plus any arguments required. pub command: Vec, + + /// The directory in which to run the command. pub working_directory: Option, + + /// Whether or not the output from stderr should be included when generating events. #[serde(default = "default_include_stderr")] pub include_stderr: bool, + + /// The maximum buffer size allowed before a log event will be generated. #[serde(default = "default_maximum_buffer_size")] pub maximum_buffer_size_bytes: usize, + + #[configurable(derived)] framing: Option, + + #[configurable(derived)] #[serde(default = "default_decoding")] decoding: DeserializerConfig, } -// TODO: Would be nice to combine the scheduled and streaming config with the mode enum once -// this serde ticket has been addressed (https://github.com/serde-rs/serde/issues/2013) -#[derive(Deserialize, Serialize, Debug, Clone, Copy)] -#[serde(rename_all = "snake_case", deny_unknown_fields)] +/// Mode of operation for running the command. +#[configurable_component] +#[derive(Clone, Copy, Debug)] +#[serde(rename_all = "snake_case")] pub enum Mode { + /// The command is run on a schedule. Scheduled, + + /// The command is run until it exits, potentially being restarted. Streaming, } -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(deny_unknown_fields)] +/// Configuration options for scheduled commands. +#[configurable_component] +#[derive(Clone, Debug)] pub struct ScheduledConfig { + /// The interval, in seconds, between scheduled command runs. + /// + /// If the command takes longer than `exec_interval_secs` to run, it will be killed. #[serde(default = "default_exec_interval_secs")] exec_interval_secs: u64, } -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(deny_unknown_fields)] +/// Configuration options for streaming commands. +#[configurable_component] +#[derive(Clone, Debug)] pub struct StreamingConfig { + /// Whether or not the command should be rerun if the command exits. #[serde(default = "default_respawn_on_exit")] respawn_on_exit: bool, + + /// The amount of time, in seconds, that Vector will wait before rerunning a streaming command that exited. #[serde(default = "default_respawn_interval_secs")] respawn_interval_secs: u64, } diff --git a/src/sources/file.rs b/src/sources/file.rs index 1582b5fca2a5e..a0fe4f46174f7 100644 --- a/src/sources/file.rs +++ b/src/sources/file.rs @@ -64,57 +64,105 @@ enum BuildError { #[derive(Clone, Debug, PartialEq)] #[serde(default)] pub struct FileConfig { + /// Array of file patterns to include. [Globbing](https://vector.dev/docs/reference/configuration/sources/file/#globbing) is supported. pub include: Vec, + /// Array of file patterns to exclude. [Globbing](https://vector.dev/docs/reference/configuration/sources/file/#globbing) is supported. + /// + /// Takes precedence over the `include` option. pub exclude: Vec, + /// Overrides the name of the log field used to add the file path to each event. + /// + /// The value will be the full path to the file where the event was read message. + /// + /// By default, `file` is used. pub file_key: Option, + /// Whether or not to start reading from the beginning of a new file. + /// + /// DEPRECATED: This is a deprecated option -- replaced by `ignore_checkpoints`/`read_from` -- and should be removed. + #[deprecated] pub start_at_beginning: Option, + /// Whether or not to ignore existing checkpoints when determining where to start reading a file. + /// + /// Checkpoints are still written normally. pub ignore_checkpoints: Option, #[configurable(derived)] pub read_from: Option, - // Deprecated name + /// Ignore files with a data modification date older than the specified number of seconds. #[serde(alias = "ignore_older")] pub ignore_older_secs: Option, + /// The maximum number of a bytes a line can contain before being discarded. + /// + /// This protects against malformed lines or tailing incorrect files. #[serde(default = "default_max_line_bytes")] pub max_line_bytes: usize, + /// Overrides the name of the log field used to add the current hostname to each event. + /// + /// The value will be the current hostname for wherever Vector is running. + /// + /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used. pub host_key: Option, + /// The directory used to persist file checkpoint positions. + /// + /// By default, the global `data_dir` option is used. Please make sure the user Vector is running as has write permissions to this directory. pub data_dir: Option, + /// Delay between file discovery calls, in milliseconds. + /// + /// This controls the interval at which Vector searches for files. Higher value result in greater chances of some short living files being missed between searches, but lower value increases the performance impact of file discovery. #[serde(alias = "glob_minimum_cooldown")] pub glob_minimum_cooldown_ms: u64, - // Deprecated name + #[configurable(derived)] #[serde(alias = "fingerprinting")] fingerprint: FingerprintConfig, + /// Ignore missing files when fingerprinting. + /// + /// This may be useful when used with source directories containing dangling symlinks. pub ignore_not_found: bool, + /// String value used to identify the start of a multi-line message. + /// + /// DEPRECATED: This is a deprecated option -- replaced by `multiline` -- and should be removed. + #[deprecated] pub message_start_indicator: Option, - pub multi_line_timeout: u64, // millis + /// How long to wait for more data when aggregating a multi-line message, in milliseconds. + /// + /// DEPRECATED: This is a deprecated option -- replaced by `multiline` -- and should be removed. + #[deprecated] + pub multi_line_timeout: u64, /// Multiline parsing configuration. /// /// If not specified, multiline parsing is disabled. pub multiline: Option, + /// An approximate limit on the amount of data read from a single file at a given time. pub max_read_bytes: usize, + /// Instead of balancing read capacity fairly across all watched files, prioritize draining the oldest files before moving on to read data from younger files. pub oldest_first: bool, + /// Timeout from reaching `EOF` after which file will be removed from filesystem, unless new data is written in the meantime. + /// + /// If not specified, files will not be removed. #[serde(alias = "remove_after")] pub remove_after_secs: Option, + /// String sequence used to separate one file line from another. pub line_delimiter: String, + #[configurable(derived)] pub encoding: Option, #[configurable(derived)] @@ -122,19 +170,37 @@ pub struct FileConfig { acknowledgements: AcknowledgementsConfig, } -/// Fingerprinting schemes. +/// Configuration for how files should be identified. +/// +/// This is important for `checkpointing` when file rotation is used. #[configurable_component] #[derive(Clone, Debug, PartialEq)] #[serde(tag = "strategy", rename_all = "snake_case")] pub enum FingerprintConfig { + /// Read lines from the beginning of the file and compute a checksum over them. Checksum { - // Deprecated name + /// Maximum number of bytes to use, from the lines that are read, for generating the checksum. + /// + /// TODO: Should we properly expose this in the documentation? There could definitely be value in allowing more + /// bytes to be used for the checksum generation, but we should commit to exposing it rather than hiding it. #[serde(alias = "fingerprint_bytes")] bytes: Option, + + /// The number of bytes to skip ahead (or ignore) when reading the data used for generating the checksum. + /// + /// This can be helpful if all files share a common header that should be skipped. ignored_header_bytes: usize, + + /// The number of lines to read for generating the checksum. + /// + /// If your files share a common header that is not always a fixed size, + /// + /// If the file has less than this amount of lines, it won’t be read at all. #[serde(default = "default_lines")] lines: usize, }, + + /// Use the [device and inode](https://en.wikipedia.org/wiki/Inode) as the identifier. #[serde(rename = "device_and_inode")] DevInode, } @@ -144,7 +210,10 @@ pub enum FingerprintConfig { #[derive(Copy, Clone, Debug, PartialEq)] #[serde(rename_all = "snake_case")] pub enum ReadFromConfig { + /// Read from the beginning of the file. Beginning, + + /// Start reading from the current end of the file. End, } diff --git a/src/sources/fluent/mod.rs b/src/sources/fluent/mod.rs index 78ecb70c9644d..5d4287002dd98 100644 --- a/src/sources/fluent/mod.rs +++ b/src/sources/fluent/mod.rs @@ -470,6 +470,7 @@ mod tests { use bytes::BytesMut; use chrono::{DateTime, Utc}; use rmp_serde::Serializer; + use serde::Serialize; use std::collections::BTreeMap; use tokio::{ io::{AsyncReadExt, AsyncWriteExt}, diff --git a/src/sources/gcp_pubsub.rs b/src/sources/gcp_pubsub.rs index 7eca230d59966..67f7d04425764 100644 --- a/src/sources/gcp_pubsub.rs +++ b/src/sources/gcp_pubsub.rs @@ -98,13 +98,19 @@ static CLIENT_ID: Lazy = Lazy::new(|| uuid::Uuid::new_v4().to_string()); #[derive(Clone, Debug, Derivative)] #[derivative(Default)] pub struct PubsubConfig { + /// The project name from which to pull logs. pub project: String, + /// The subscription within the project which is configured to receive logs. pub subscription: String, + /// The endpoint from which to pull data. pub endpoint: Option, - #[serde(default)] + /// Whether or not to load authentication credentials. + /// + /// Only used for tests. + #[serde(skip, default)] pub skip_authentication: bool, #[serde(flatten)] @@ -113,9 +119,13 @@ pub struct PubsubConfig { #[configurable(derived)] pub tls: Option, + /// The acknowledgement deadline, in seconds, to use for this stream. + /// + /// Messages that are not acknowledged when this deadline expires may be retransmitted. #[serde(default = "default_ack_deadline")] pub ack_deadline_seconds: i32, + /// The amount of time, in seconds, to wait between retry attempts after an error. #[serde(default = "default_retry_delay")] pub retry_delay_seconds: f64, diff --git a/src/sources/heroku_logs.rs b/src/sources/heroku_logs.rs index 8f79574a45f0c..1458a75c342f4 100644 --- a/src/sources/heroku_logs.rs +++ b/src/sources/heroku_logs.rs @@ -38,12 +38,16 @@ pub struct LogplexConfig { /// The address to listen for connections on. address: SocketAddr, + /// A list of URL query parameters to include in the log event. + /// + /// These will override any values included in the body with conflicting names. #[serde(default)] query_parameters: Vec, #[configurable(derived)] tls: Option, + #[configurable(derived)] auth: Option, #[configurable(derived)] @@ -141,8 +145,11 @@ impl SourceConfig for LogplexConfig { } // Add a compatibility alias to avoid breaking existing configs -#[derive(Deserialize, Serialize, Debug, Clone)] -pub struct LogplexCompatConfig(LogplexConfig); + +/// Configuration for the `logplex` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] +pub struct LogplexCompatConfig(#[configurable(transparent)] LogplexConfig); #[async_trait::async_trait] #[typetag::serde(name = "logplex")] diff --git a/src/sources/host_metrics/mod.rs b/src/sources/host_metrics/mod.rs index d08ec286b65bc..7d6b9d4898718 100644 --- a/src/sources/host_metrics/mod.rs +++ b/src/sources/host_metrics/mod.rs @@ -12,6 +12,7 @@ use serde::{ }; use tokio::time; use tokio_stream::wrappers::IntervalStream; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -65,23 +66,32 @@ impl From> for Namespace { } } -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -#[serde(deny_unknown_fields)] +/// Configuration for the `host_metrics` source. +#[configurable_component(source)] +#[derive(Clone, Debug, Default)] pub struct HostMetricsConfig { + /// The interval between metric gathering, in seconds. #[serde(default = "default_scrape_interval")] pub scrape_interval_secs: f64, pub collectors: Option>, + + /// Overrides the default namespace for the metrics emitted by the source. + /// + /// By default, `host` is used. #[serde(default)] pub namespace: Namespace, #[cfg(target_os = "linux")] #[serde(default)] pub(crate) cgroups: cgroups::CGroupsConfig, + #[serde(default)] pub disk: disk::DiskConfig, + #[serde(default)] pub filesystem: filesystem::FilesystemConfig, + #[serde(default)] pub network: network::NetworkConfig, } diff --git a/src/sources/http.rs b/src/sources/http.rs index e69eb8f4a541b..c8ca1f3c9d9f4 100644 --- a/src/sources/http.rs +++ b/src/sources/http.rs @@ -78,6 +78,7 @@ pub struct SimpleHttpConfig { #[serde(default)] query_parameters: Vec, + #[configurable(derived)] auth: Option, /// Whether or not to treat the configured `path` as an absolute path. diff --git a/src/sources/journald.rs b/src/sources/journald.rs index d21ff398ea3e7..42e4893634548 100644 --- a/src/sources/journald.rs +++ b/src/sources/journald.rs @@ -85,33 +85,64 @@ type Matches = HashMap>; #[derive(Clone, Debug, Default)] #[serde(default)] pub struct JournaldConfig { + /// Whether or not to only include future entries. pub since_now: Option, + /// Whether or not to only include entries from the current boot. pub current_boot_only: Option, + /// The list of unit names to monitor. + /// + /// If empty or not present, all units are accepted. Unit names lacking a "." will have ".service" appended to make them a valid service unit name. + // TODO: Why isn't this just an alias on `include_units`? + #[deprecated] pub units: Vec, + /// The list of unit names to monitor. + /// + /// If empty or not present, all units are accepted. Unit names lacking a "." will have ".service" appended to make them a valid service unit name. pub include_units: Vec, + /// The list of unit names to exclude from monitoring. + /// + /// Unit names lacking a "." will have ".service" appended to make them a valid service unit name. pub exclude_units: Vec, + /// This list contains sets of field/value pairs to monitor. + /// + /// If empty or not present, all journal fields are accepted. If `include_units` is specified, it will be merged into this list. pub include_matches: Matches, + /// This list contains sets of field/value pairs that, if any are present in a journal entry, will cause the entry to be excluded from this source. + /// + /// If `exclude_units` is specified, it will be merged into this list. pub exclude_matches: Matches, + /// The directory used to persist file checkpoint positions. + /// + /// By default, the global `data_dir` option is used. Please make sure the user Vector is running as has write permissions to this directory. pub data_dir: Option, + /// The `systemd` journal is read in batches, and a checkpoint is set at the end of each batch. This option limits the size of the batch. pub batch_size: Option, + /// The full path of the `journalctl` executable. + /// + /// If not set, Vector will search the path for `journalctl`. pub journalctl_path: Option, + /// The full path of the journal directory. + /// + /// If not set, `journalctl` will use the default system journal paths. pub journal_directory: Option, #[configurable(derived)] #[serde(default, deserialize_with = "bool_or_struct")] acknowledgements: AcknowledgementsConfig, - /// Deprecated + /// Whether or not to remap the `PRIORITY` field from an integer to string value. + /// + /// Has no effect unless the value of the field is already an integer. #[serde(default)] #[deprecated] remap_priority: bool, diff --git a/src/sources/mod.rs b/src/sources/mod.rs index e92a79aea84f0..101ef8cf79073 100644 --- a/src/sources/mod.rs +++ b/src/sources/mod.rs @@ -1,4 +1,3 @@ -use serde::{Deserialize, Serialize}; use snafu::Snafu; #[cfg(feature = "sources-apache_metrics")] @@ -74,7 +73,7 @@ pub mod vector; pub(crate) mod util; -use vector_config::Configurable; +use vector_config::configurable_component; pub use vector_core::source::Source; /// Common build errors @@ -85,7 +84,8 @@ enum BuildError { } /// Configurable sources in Vector. -#[derive(Clone, Configurable, Deserialize, Serialize)] +#[configurable_component] +#[derive(Clone, Debug)] #[serde(tag = "type", rename_all = "snake_case")] pub enum Sources { /// Apache HTTP Server (HTTPD) Metrics. @@ -144,6 +144,10 @@ pub enum Sources { #[cfg(feature = "sources-gcp_pubsub")] GcpPubsub(#[configurable(derived)] gcp_pubsub::PubsubConfig), + /// Generator. + #[cfg(feature = "sources-demo_logs")] + Generator(#[configurable(derived)] demo_logs::DemoLogsCompatConfig), + /// Heroku Logs. #[cfg(feature = "sources-heroku_logs")] HerokuLogs(#[configurable(derived)] heroku_logs::LogplexConfig), @@ -176,6 +180,10 @@ pub enum Sources { #[cfg(feature = "sources-kubernetes_logs")] KubernetesLogs(#[configurable(derived)] kubernetes_logs::Config), + /// Heroku Logs. + #[cfg(feature = "sources-heroku_logs")] + Logplex(#[configurable(derived)] heroku_logs::LogplexCompatConfig), + /// Logstash. #[cfg(all(feature = "sources-logstash"))] Logstash(#[configurable(derived)] logstash::LogstashConfig), diff --git a/src/sources/nats.rs b/src/sources/nats.rs index 6969f42a5a5d8..8ec15e53adbc5 100644 --- a/src/sources/nats.rs +++ b/src/sources/nats.rs @@ -29,22 +29,32 @@ enum BuildError { Subscribe { source: std::io::Error }, } +/// Configuration for the `nats` source. #[configurable_component(source)] #[derive(Clone, Debug, Derivative)] #[derivative(Default)] pub struct NatsSourceConfig { + /// The NATS URL to connect to. + /// + /// The URL must take the form of `nats://server:port`. url: String, + /// A name assigned to the NATS connection. #[serde(alias = "name")] connection_name: String, + /// The NATS subject to publish messages to. + // TODO: We will eventually be able to add metadata on a per-field basis, such that we can add metadata for marking + // this field as being capable of using Vector's templating syntax. subject: String, + /// NATS Queue Group to join. queue: Option, #[configurable(derived)] tls: Option, + #[configurable(derived)] auth: Option, #[configurable(derived)] diff --git a/src/sources/postgresql_metrics.rs b/src/sources/postgresql_metrics.rs index 461fea769c423..c8714d1c8fab6 100644 --- a/src/sources/postgresql_metrics.rs +++ b/src/sources/postgresql_metrics.rs @@ -94,9 +94,13 @@ enum CollectError { QueryError { source: PgError }, } -#[derive(Deserialize, Serialize, Clone, Debug)] -#[serde(deny_unknown_fields)] +/// TLS configuration for connecting to PostgreSQL. +#[configurable_component] +#[derive(Clone, Debug)] struct PostgresqlMetricsTlsConfig { + /// Absolute path to an additional CA certificate file. + /// + /// The certficate must be in the DER or PEM (X.509) format. ca_file: PathBuf, } @@ -105,16 +109,40 @@ struct PostgresqlMetricsTlsConfig { #[derive(Clone, Debug)] #[serde(default)] pub struct PostgresqlMetricsConfig { + /// A list of PostgreSQL instances to scrape. + /// + /// Each endpoint must be in the [Connection URI + /// format](https://www.postgresql.org/docs/current/libpq-connect.html#id-1.7.3.8.3.6). endpoints: Vec, + /// A list of databases to match (by using [POSIX Regular + /// Expressions](https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-POSIX-REGEXP)) against + /// the `datname` column for which you want to collect metrics from. + /// + /// If not set, metrics are collected from all databases. Specifying `""` will include metrics where `datname` is + /// `NULL`. + /// + /// This can be used in conjunction with `exclude_databases`. include_databases: Option>, + /// A list of databases to match (by using [POSIX Regular + /// Expressions](https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-POSIX-REGEXP)) against + /// the `datname` column for which you don’t want to collect metrics from. + /// + /// Specifying `""` will include metrics where `datname` is `NULL`. + /// + /// This can be used in conjunction with `include_databases`. exclude_databases: Option>, + /// The interval between scrapes, in seconds. scrape_interval_secs: u64, + /// Overrides the default namespace for the metrics emitted by the source. + /// + /// By default, `postgresql` is used. namespace: String, + #[configurable(derived)] tls: Option, } diff --git a/src/sources/util/body_decoding.rs b/src/sources/util/body_decoding.rs index 1027c68b8f949..ba90e3ef0efb3 100644 --- a/src/sources/util/body_decoding.rs +++ b/src/sources/util/body_decoding.rs @@ -1,12 +1,21 @@ -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; -#[derive(Deserialize, Serialize, Debug, Eq, PartialEq, Clone, Derivative, Copy)] +/// Content encoding. +#[configurable_component] +#[derive(Clone, Copy, Debug, Derivative, Eq, PartialEq)] #[serde(rename_all = "snake_case")] #[derivative(Default)] pub enum Encoding { + /// Plaintext. #[derivative(Default)] Text, + + /// Newline-delimited JSON. Ndjson, + + /// JSON. Json, + + /// Binary. Binary, } diff --git a/src/sources/util/encoding_config.rs b/src/sources/util/encoding_config.rs index 93bfb8fecac4f..5d6b673090c24 100644 --- a/src/sources/util/encoding_config.rs +++ b/src/sources/util/encoding_config.rs @@ -1,7 +1,19 @@ -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; -#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)] -#[serde(deny_unknown_fields)] +/// Character set encoding. +#[configurable_component] +#[derive(Clone, Debug, PartialEq)] pub struct EncodingConfig { + /// Encoding of the source messages. + /// + /// Takes one of the encoding [label strings](https://encoding.spec.whatwg.org/#concept-encoding-get) defined as + /// part of the [Encoding Standard](https://encoding.spec.whatwg.org/). + /// + /// When set, the messages are transcoded from the specified encoding to UTF-8, which is the encoding that Vector + /// assumes internally for string-like data. You should enable this transcoding operation if you need your data to + /// be in UTF-8 for further processing. At the time of transcoding, any malformed sequences (that can’t be mapped to + /// UTF-8) will be replaced with the Unicode [REPLACEMENT + /// CHARACTER](https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character) and warnings will be + /// logged. pub charset: &'static encoding_rs::Encoding, } diff --git a/src/sources/util/http/auth.rs b/src/sources/util/http/auth.rs index f60dac6cd22b0..491a10c4602aa 100644 --- a/src/sources/util/http/auth.rs +++ b/src/sources/util/http/auth.rs @@ -1,7 +1,7 @@ use std::convert::TryFrom; use headers::{Authorization, HeaderMapExt}; -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; use warp::http::HeaderMap; #[cfg(any( @@ -10,9 +10,14 @@ use warp::http::HeaderMap; ))] use super::error::ErrorMessage; -#[derive(Deserialize, Serialize, Debug, Clone)] +/// HTTP Basic authentication configuration. +#[configurable_component] +#[derive(Clone, Debug)] pub struct HttpSourceAuthConfig { + /// The username for basic authentication. pub username: String, + + /// The password for basic authentication. pub password: String, } @@ -40,7 +45,7 @@ impl TryFrom> for HttpSourceAuth { } } -#[derive(Debug, Clone)] +#[derive(Clone, Debug)] pub struct HttpSourceAuth { #[allow(unused)] // triggered by cargo-hack pub(self) token: Option, From d7d066e6a53f21f74394b80a9bd8a0f4c625dcef Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Mon, 6 Jun 2022 16:04:17 -0400 Subject: [PATCH 04/12] more or less all sources set at this point Signed-off-by: Toby Lawrence --- Cargo.lock | 1 + lib/vector-common/Cargo.toml | 1 + lib/vector-common/src/datetime.rs | 21 +++ lib/vector-config-macros/src/ast/field.rs | 2 +- lib/vector-config-macros/src/configurable.rs | 2 +- lib/vector-config/Cargo.toml | 2 +- lib/vector-config/src/external.rs | 2 +- lib/vector-config/src/lib.rs | 30 +++-- lib/vector-config/src/num.rs | 126 ++++++++++++++++++ lib/vector-config/src/schema.rs | 55 ++------ lib/vector-config/src/stdlib.rs | 90 ++----------- lib/vector-config/tests/basic.rs | 19 +-- src/config/enterprise.rs | 4 +- src/docker.rs | 16 ++- src/kafka.rs | 30 +++-- src/sources/host_metrics/cgroups.rs | 15 ++- src/sources/host_metrics/disk.rs | 7 +- src/sources/host_metrics/filesystem.rs | 11 +- src/sources/host_metrics/mod.rs | 91 +++++++++---- src/sources/host_metrics/network.rs | 7 +- src/sources/kafka.rs | 45 +++++++ src/sources/kubernetes_logs/mod.rs | 32 ++--- .../namespace_metadata_annotator.rs | 9 +- .../node_metadata_annotator.rs | 9 +- .../kubernetes_logs/pod_metadata_annotator.rs | 31 ++++- src/sources/prometheus/remote_write.rs | 12 +- src/sources/prometheus/scrape.rs | 38 +++++- src/sources/redis/mod.rs | 47 ++++++- src/sources/socket/mod.rs | 8 +- src/sources/statsd/mod.rs | 8 +- src/sources/vector/mod.rs | 4 +- 31 files changed, 531 insertions(+), 244 deletions(-) create mode 100644 lib/vector-config/src/num.rs diff --git a/Cargo.lock b/Cargo.lock index 97fd8f3ef9842..f501502668ac6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8889,6 +8889,7 @@ dependencies = [ "snafu", "tracing 0.1.34", "value", + "vector_config", ] [[package]] diff --git a/lib/vector-common/Cargo.toml b/lib/vector-common/Cargo.toml index ac2d03c0d6ab5..39606582a50ea 100644 --- a/lib/vector-common/Cargo.toml +++ b/lib/vector-common/Cargo.toml @@ -56,3 +56,4 @@ smallvec = { version = "1", default-features = false } snafu = { version = "0.7", optional = true } tracing = { version = "0.1.34", default-features = false } value = { path = "../value", features = ["json"] } +vector_config = { path = "../vector-config" } diff --git a/lib/vector-common/src/datetime.rs b/lib/vector-common/src/datetime.rs index 23ad4b7960261..577ee0fb9af52 100644 --- a/lib/vector-common/src/datetime.rs +++ b/lib/vector-common/src/datetime.rs @@ -3,6 +3,11 @@ use std::fmt::Debug; use chrono::{DateTime, Local, ParseError, TimeZone as _, Utc}; use chrono_tz::Tz; use derivative::Derivative; +use vector_config::{ + schema::{finalize_schema, generate_string_schema}, + schemars::{gen::SchemaGenerator, schema::SchemaObject}, + Configurable, Metadata, +}; #[derive(Clone, Copy, Debug, Derivative, Eq, PartialEq)] #[derivative(Default)] @@ -82,3 +87,19 @@ pub mod ser_de { } } } + +impl<'de> Configurable<'de> for TimeZone { + fn referencable_name() -> Option<&'static str> { + Some("vector_common::TimeZone") + } + + fn description() -> Option<&'static str> { + Some("Strongly-typed list of timezones as defined in the `tz` database.") + } + + fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { + let mut schema = generate_string_schema(); + finalize_schema(gen, &mut schema, overrides); + schema + } +} diff --git a/lib/vector-config-macros/src/ast/field.rs b/lib/vector-config-macros/src/ast/field.rs index b7b097117b657..dd01cdb0b89e9 100644 --- a/lib/vector-config-macros/src/ast/field.rs +++ b/lib/vector-config-macros/src/ast/field.rs @@ -131,7 +131,7 @@ impl Attributes { && self.visible && !self.flatten { - return Err(err_field_missing_description(&field.original.ident)); + return Err(err_field_missing_description(&field.original)); } Ok(self) diff --git a/lib/vector-config-macros/src/configurable.rs b/lib/vector-config-macros/src/configurable.rs index b3a21438c1d77..b4b9f6eacc0d1 100644 --- a/lib/vector-config-macros/src/configurable.rs +++ b/lib/vector-config-macros/src/configurable.rs @@ -58,7 +58,7 @@ pub fn derive_configurable_impl(input: TokenStream) -> TokenStream { #[allow(unused_qualifications)] impl #impl_generics ::vector_config::Configurable<#clt> for #name #ty_generics #where_clause { fn referencable_name() -> Option<&'static str> { - Some(#ref_name) + Some(std::concat!(std::module_path!(), "::", #ref_name)) } #metadata_fn diff --git a/lib/vector-config/Cargo.toml b/lib/vector-config/Cargo.toml index 25907f8dd7808..db2728f7ff7c9 100644 --- a/lib/vector-config/Cargo.toml +++ b/lib/vector-config/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" publish = false [dependencies] -encoding_rs = { version = "0.8", default-features = false, features = ["serde"] } +encoding_rs = { version = "0.8", default-features = false, features = ["alloc", "serde"] } indexmap = { version = "1.8", default-features = false } num-traits = { version = "0.2.15", default-features = false } schemars = { version = "0.8.10", default-features = true, features = ["preserve_order"] } diff --git a/lib/vector-config/src/external.rs b/lib/vector-config/src/external.rs index 55144dc46bb5e..eae0409403957 100644 --- a/lib/vector-config/src/external.rs +++ b/lib/vector-config/src/external.rs @@ -11,7 +11,7 @@ impl<'de> Configurable<'de> for &'static encoding_rs::Encoding { // Standard standard is a living standard, so... :thinkies: fn referencable_name() -> Option<&'static str> { - Some("Encoding") + Some("encoding_rs::Encoding") } fn description() -> Option<&'static str> { diff --git a/lib/vector-config/src/lib.rs b/lib/vector-config/src/lib.rs index 8251f2784cbb1..abf8c37335630 100644 --- a/lib/vector-config/src/lib.rs +++ b/lib/vector-config/src/lib.rs @@ -53,13 +53,18 @@ // then multiline parsing is disabled". Including that description on `MultilineConfig` itself is kind of weird because // it forces that on everyone else using it, where, in some cases, it may not be optional at all. // -// TODO: It's not clear what happens if we schemafy identically-named types i.e. `some::mod::Foo` and -// `another::mod::Foo` since we use the type's ident, not its full path, to generate its referencable name. This is good -// because the full names would be ugly as hell and offer little value but it means that we need all types, including -// transitive types, to have unique names... and we can't check this at develop-time, only compile-time. :thinkies: +// TODO: Right now, we're manually generating a referencable name where it makes sense by appending the module path to +// the ident for structs/enums, and by crafting the name by hand for anything like stdlib impls, or impls on external +// types. // -// TODO: When we implement `Configurable` for all the `NonZero*` types, we should make sure they have default metadata -// that specifies a validation of having to be a minimum of 1. +// We do this because technically `std::any::type_name` says that it doesn't provide a stable interface for getting the +// fully-qualified path of a type, which we would need (in general, regardless of whether or not we used that function) +// because we don't want definition types totally changing name between compiler versions, etc. +// +// This is obviously also tricky from a re-export standpoint i.e. what is the referencable name of a type that uses the +// derive macros for `Configurable` but is exporter somewhere entirely different? The path would refer to the source nol +// matter what, as it's based on how `std::module_path!()` works. Technically speaking, that's still correct from a "we +// shouldn't create duplicate schemas for T" standpoint, but could manifest as a non-obvious divergence. // // TODO: We need to figure out how to handle aliases. Looking previously, it seemed like we might need to do some very // ugly combinatorial explosion stuff to define a schema per perumtation of all aliased fields in a config. We might be @@ -80,7 +85,7 @@ use core::fmt; use core::marker::PhantomData; -use num_traits::{Bounded, ToPrimitive}; +use num::ConfigurableNumber; use serde::{Deserialize, Serialize}; pub mod schema; @@ -95,6 +100,7 @@ pub mod schemars { } mod external; +mod num; mod stdlib; // Re-export of the `#[configurable_component]` and `#[derive(Configurable)]` proc macros. @@ -370,7 +376,7 @@ where #[doc(hidden)] pub fn __ensure_numeric_validation_bounds<'de, N>(metadata: &Metadata<'de, N>) where - N: Configurable<'de> + Bounded + ToPrimitive, + N: Configurable<'de> + ConfigurableNumber, { // In `Validation::ensure_conformance`, we do some checks on any supplied numeric bounds to try and ensure they're // no larger than the largest f64 value where integer/floasting-point conversions are still lossless. What we @@ -381,12 +387,8 @@ where // We simply check the given metadata for any numeric validation bounds, and ensure they do not exceed the // mechanical limits of the given numeric type `N`. If they do, we panic, which is not as friendly as a contextual // compile-time error emitted from the `Configurable` derive macro... but we're working with what we've got here. - let mechanical_min_bound = N::min_value() - .to_f64() - .expect("`Configurable` does not support numbers larger than an f64 representation"); - let mechanical_max_bound = N::max_value() - .to_f64() - .expect("`Configurable` does not support numbers larger than an f64 representation"); + let mechanical_min_bound = N::get_enforced_min_bound(); + let mechanical_max_bound = N::get_enforced_max_bound(); for validation in metadata.validations() { if let validation::Validation::Range { minimum, maximum } = validation { diff --git a/lib/vector-config/src/num.rs b/lib/vector-config/src/num.rs new file mode 100644 index 0000000000000..47de6f5f6ad6e --- /dev/null +++ b/lib/vector-config/src/num.rs @@ -0,0 +1,126 @@ +use std::num::{ + NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, +}; + +use num_traits::{Bounded, One, ToPrimitive, Zero}; +use serde_json::Number; +use vector_config_common::num::{NUMERIC_ENFORCED_LOWER_BOUND, NUMERIC_ENFORCED_UPPER_BOUND}; + +/// A numeric type that can be represented correctly in a JSON Schema document. +/// +/// `N` must be an integral numeric type i.e. `f64`, `u8`, `i32`, and so on. The numeric type is parameterized in this +/// way to allow generating the schema for wrapper types such as `NonZeroU64`, where the overall type must be +/// represented as `NonZeroU64` but the integeral numeric type that we're constraining against is `u64`. +pub trait ConfigurableNumber { + /// The integral numeric type. + /// + /// We parameterize the "integral" numeric type in this way to allow generating the schema for wrapper types such as + /// `NonZeroU64`, where the overall type must be represented as `NonZeroU64` but the integeral numeric type that + /// we're constraining against is `u64`. + type Numeric: Bounded + ToPrimitive + Zero + One; + + /// Whether or not this numeric type disallows nonzero values. + fn is_nonzero() -> bool { + false + } + + /// Whether or not a generated schema for this numeric type must explicitly disallow zero values. + /// + /// In some cases, such as `NonZero*` types from `std::num`, a numeric type may not support zero values for reasons + /// of correctness and/or optimization. In some cases, we can simply adjust the normal minimum/maximum bounds in the + /// schema to encode this. In other cases, such as signed versions like `NonZeroI64`, zero is a discrete value + /// within the minimum and maximum bounds and must be excluded explicitly. + fn requires_nonzero_exclusion() -> bool { + false + } + + /// Gets the JSON encoded version of the zero value for the integral numeric type. + fn get_encoded_zero_value() -> Number { + let zero_num_unsigned = Self::Numeric::zero().to_u64().map(Into::into); + let zero_num_floating = Self::Numeric::zero().to_f64().and_then(Number::from_f64); + zero_num_unsigned + .or(zero_num_floating) + .expect("No usable integer type should be unrepresentable by both `u64` and `f64`.") + } + + /// Gets the minimum bound for this numeric type, limited by the representable range in JSON Schema. + fn get_enforced_min_bound() -> f64 { + let mechanical_minimum = match (Self::is_nonzero(), Self::requires_nonzero_exclusion()) { + // If the number is not a nonzero type, or it is a nonzero type, but needs an exclusion, we simply return + // its true mechanical minimum bound. For nonzero types, this is because we can only enforce the nonzero + // constraint through a negative schema bound, not through its normal minimum/maximum bounds validation. + (false, _) | (true, true) => Self::Numeric::min_value(), + // If the number is a nonzero type, but does not need an exclusion, its minimum bound is always 1. + (true, false) => Self::Numeric::one(), + }; + + let enforced_minimum = NUMERIC_ENFORCED_LOWER_BOUND; + let mechanical_minimum = mechanical_minimum + .to_f64() + .expect("`Configurable` does not support numbers larger than an `f64` representation"); + + if mechanical_minimum < enforced_minimum { + enforced_minimum + } else { + mechanical_minimum + } + } + + /// Gets the maximum bound for this numeric type, limited by the representable range in JSON Schema. + fn get_enforced_max_bound() -> f64 { + let enforced_maximum = NUMERIC_ENFORCED_UPPER_BOUND; + let mechanical_maximum = Self::Numeric::max_value() + .to_f64() + .expect("`Configurable` does not support numbers larger than an `f64` representation"); + + if mechanical_maximum > enforced_maximum { + enforced_maximum + } else { + mechanical_maximum + } + } +} + +macro_rules! impl_configuable_number { + ($($ty:ty),+) => { + $( + impl ConfigurableNumber for $ty { + type Numeric = $ty; + } + )+ + }; +} + +macro_rules! impl_configuable_number_nonzero { + ($($aty:ty => $ity:ty),+) => { + $( + impl ConfigurableNumber for $aty { + type Numeric = $ity; + + fn is_nonzero() -> bool { + true + } + } + )+ + }; + + (with_exclusion, $($aty:ty => $ity:ty),+) => { + $( + impl ConfigurableNumber for $aty { + type Numeric = $ity; + + fn is_nonzero() -> bool { + true + } + + fn requires_nonzero_exclusion() -> bool { + true + } + } + )+ + }; +} + +impl_configuable_number!(u8, u16, u32, u64, usize, i8, i16, i32, i64, isize, f32, f64); +impl_configuable_number_nonzero!(NonZeroU8 => u8, NonZeroU16 => u16, NonZeroU32 => u32, NonZeroU64 => u64); +impl_configuable_number_nonzero!(with_exclusion, NonZeroI8 => i8, NonZeroI16 => i16, NonZeroI32 => i32, NonZeroI64 => i64); diff --git a/lib/vector-config/src/schema.rs b/lib/vector-config/src/schema.rs index 5b60d2eb1bfd1..36e742ea95a2a 100644 --- a/lib/vector-config/src/schema.rs +++ b/lib/vector-config/src/schema.rs @@ -1,7 +1,6 @@ use std::{collections::BTreeSet, mem}; use indexmap::IndexMap; -use num_traits::{Bounded, ToPrimitive, Zero}; use schemars::{ gen::{SchemaGenerator, SchemaSettings}, schema::{ @@ -9,10 +8,9 @@ use schemars::{ SchemaObject, SingleOrVec, SubschemaValidation, }, }; -use serde_json::{Map, Number, Value}; -use vector_config_common::num::{NUMERIC_ENFORCED_LOWER_BOUND, NUMERIC_ENFORCED_UPPER_BOUND}; +use serde_json::{Map, Value}; -use crate::{Configurable, Metadata}; +use crate::{num::ConfigurableNumber, Configurable, Metadata}; /// Finalizes the schema by ensuring all metadata is applied and registering it in the generator. /// @@ -153,37 +151,15 @@ pub fn generate_string_schema() -> SchemaObject { } } -pub fn generate_number_schema<'de, N>(nonzero: bool) -> SchemaObject +pub fn generate_number_schema<'de, N>() -> SchemaObject where - N: Configurable<'de> + Bounded + ToPrimitive + Zero, + N: Configurable<'de> + ConfigurableNumber, { - // Calculate the minimum/maximum for the given `N`, respecting the 2^53 limit we put on each of those values. - let (minimum, maximum) = { - let enforced_minimum = NUMERIC_ENFORCED_LOWER_BOUND; - let enforced_maximum = NUMERIC_ENFORCED_UPPER_BOUND; - let mechanical_minimum = N::min_value() - .to_f64() - .expect("`Configurable` does not support numbers larger than an f64 representation"); - let mechanical_maximum = N::max_value() - .to_f64() - .expect("`Configurable` does not support numbers larger than an f64 representation"); - - let calculated_minimum = if mechanical_minimum < enforced_minimum { - enforced_minimum - } else { - mechanical_minimum - }; - - let calculated_maximum = if mechanical_maximum > enforced_maximum { - enforced_maximum - } else { - mechanical_maximum - }; - - (calculated_minimum, calculated_maximum) - }; + let minimum = N::get_enforced_min_bound(); + let maximum = N::get_enforced_max_bound(); - // We always set the minimum/maximum bound to the mechanical limits + // We always set the minimum/maximum bound to the mechanical limits. Any additional constraining as part of field + // validators will overwrite these limits. let mut schema = SchemaObject { instance_type: Some(InstanceType::Number.into()), number: Some(Box::new(NumberValidation { @@ -194,18 +170,13 @@ where ..Default::default() }; - // If the actual numeric type we're generating the schema for is a nonzero variant, we add an additional `not` - // subschema validation. - if nonzero { - let zero_num_unsigned = N::zero().to_u64().map(Into::into); - let zero_num_floating = N::zero().to_f64().and_then(Number::from_f64); - let zero_num = zero_num_unsigned - .or(zero_num_floating) - .expect("No usable integer type should be unrepresentable by both `u64` and `f64`."); - + // If the actual numeric type we're generating the schema for is a nonzero variant, and its constrain can't be + // represently solely by the normal minimum/maximum bounds, we explicitly add an exclusion for the appropriate zero + // value of the given numeric type. + if N::requires_nonzero_exclusion() { schema.subschemas = Some(Box::new(SubschemaValidation { not: Some(Box::new(Schema::Object(SchemaObject { - const_value: Some(Value::Number(zero_num)), + const_value: Some(Value::Number(N::get_encoded_zero_value())), ..Default::default() }))), ..Default::default() diff --git a/lib/vector-config/src/stdlib.rs b/lib/vector-config/src/stdlib.rs index d5107913271dd..21dff507fe066 100644 --- a/lib/vector-config/src/stdlib.rs +++ b/lib/vector-config/src/stdlib.rs @@ -73,13 +73,13 @@ impl<'de> Configurable<'de> for String { } // Numbers. -macro_rules! impl_configuable_unsigned { +macro_rules! impl_configuable_numeric { ($($ty:ty),+) => { $( impl<'de> Configurable<'de> for $ty { fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { $crate::__ensure_numeric_validation_bounds::(&overrides); - let mut schema = generate_number_schema::(false); + let mut schema = generate_number_schema::(); finalize_schema(gen, &mut schema, overrides); schema } @@ -88,84 +88,10 @@ macro_rules! impl_configuable_unsigned { }; } -macro_rules! impl_configuable_signed { - ($($ty:ty),+) => { - $( - impl<'de> Configurable<'de> for $ty { - fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { - $crate::__ensure_numeric_validation_bounds::(&overrides); - let mut schema = generate_number_schema::(false); - finalize_schema(gen, &mut schema, overrides); - schema - } - } - )+ - }; -} - -macro_rules! impl_configuable_nonzero_unsigned { - ($($aty:ty => $ity:ty),+) => { - $( - impl<'de> Configurable<'de> for $aty { - fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { - let overrides = overrides.map_default_value(|n| n.get()); - $crate::__ensure_numeric_validation_bounds::<$ity>(&overrides); - let mut schema = generate_number_schema::<$ity>(true); - finalize_schema(gen, &mut schema, overrides); - schema - } - } - )+ - }; -} - -macro_rules! impl_configuable_nonzero_signed { - ($($aty:ty => $ity:ty),+) => { - $( - impl<'de> Configurable<'de> for $aty { - fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { - let overrides = overrides.map_default_value(|n| n.get()); - $crate::__ensure_numeric_validation_bounds::<$ity>(&overrides); - let mut schema = generate_number_schema::<$ity>(true); - finalize_schema(gen, &mut schema, overrides); - schema - } - } - )+ - }; -} - -impl_configuable_unsigned!(u8, u16, u32, u64); -impl_configuable_signed!(i8, i16, i32, i64); -impl_configuable_nonzero_unsigned!(NonZeroU8 => u8, NonZeroU16 => u16, NonZeroU32 => u32, NonZeroU64 => u64); -impl_configuable_nonzero_signed!(NonZeroI8 => i8, NonZeroI16 => i16, NonZeroI32 => i32, NonZeroI64 => i64); - -impl<'de> Configurable<'de> for usize { - fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { - crate::__ensure_numeric_validation_bounds::(&overrides); - let mut schema = generate_number_schema::(false); - finalize_schema(gen, &mut schema, overrides); - schema - } -} - -impl<'de> Configurable<'de> for f64 { - fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { - crate::__ensure_numeric_validation_bounds::(&overrides); - let mut schema = generate_number_schema::(false); - finalize_schema(gen, &mut schema, overrides); - schema - } -} - -impl<'de> Configurable<'de> for f32 { - fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { - crate::__ensure_numeric_validation_bounds::(&overrides); - let mut schema = generate_number_schema::(false); - finalize_schema(gen, &mut schema, overrides); - schema - } -} +impl_configuable_numeric!( + u8, u16, u32, u64, usize, i8, i16, i32, i64, isize, f32, f64, NonZeroU8, NonZeroU16, + NonZeroU32, NonZeroU64, NonZeroI8, NonZeroI16, NonZeroI32, NonZeroI64 +); // Arrays and maps. impl<'de, T> Configurable<'de> for Vec @@ -240,7 +166,7 @@ where // Additional types that do not map directly to scalars. impl<'de> Configurable<'de> for SocketAddr { fn referencable_name() -> Option<&'static str> { - Some("SocketAddr") + Some("stdlib::SocketAddr") } fn description() -> Option<&'static str> { @@ -259,7 +185,7 @@ impl<'de> Configurable<'de> for SocketAddr { impl<'de> Configurable<'de> for PathBuf { fn referencable_name() -> Option<&'static str> { - Some("PathBuf") + Some("stdlib::PathBuf") } fn description() -> Option<&'static str> { diff --git a/lib/vector-config/tests/basic.rs b/lib/vector-config/tests/basic.rs index a01e7bea3ceed..ad45de2689b04 100644 --- a/lib/vector-config/tests/basic.rs +++ b/lib/vector-config/tests/basic.rs @@ -7,6 +7,7 @@ use std::{ collections::HashMap, net::{Ipv4Addr, SocketAddr, SocketAddrV4}, + num::NonZeroU64, path::PathBuf, }; @@ -25,9 +26,9 @@ pub struct SpecialDuration(#[configurable(transparent)] u64); pub struct BatchConfig { /// The maximum number of events in a batch before it is flushed. #[configurable(validation(range(max = 100000)))] - max_events: Option, + max_events: Option, /// The maximum number of bytes in a batch before it is flushed. - max_bytes: Option, + max_bytes: Option, /// The maximum amount of time a batch can exist before it is flushed. timeout: Option, } @@ -35,7 +36,7 @@ pub struct BatchConfig { impl Default for BatchConfig { fn default() -> Self { Self { - max_events: Some(1000), + max_events: Some(NonZeroU64::new(1000).expect("must be nonzero")), max_bytes: None, timeout: Some(SpecialDuration(10)), } @@ -152,10 +153,10 @@ pub struct SimpleSinkConfig { meaningless_field: String, } -const fn default_simple_sink_batch() -> BatchConfig { +fn default_simple_sink_batch() -> BatchConfig { BatchConfig { - max_events: Some(10000), - max_bytes: Some(16_000_000), + max_events: Some(NonZeroU64::new(10000).expect("must be nonzero")), + max_bytes: Some(NonZeroU64::new(16_000_000).expect("must be nonzero")), timeout: Some(SpecialDuration(5)), } } @@ -196,10 +197,10 @@ pub struct AdvancedSinkConfig { tags: HashMap, } -const fn default_advanced_sink_batch() -> BatchConfig { +fn default_advanced_sink_batch() -> BatchConfig { BatchConfig { - max_events: Some(5678), - max_bytes: Some(36_000_000), + max_events: Some(NonZeroU64::new(5678).expect("must be nonzero")), + max_bytes: Some(NonZeroU64::new(36_000_000).expect("must be nonzero")), timeout: Some(SpecialDuration(15)), } } diff --git a/src/config/enterprise.rs b/src/config/enterprise.rs index 429c2c63f0062..2bec989dab3b5 100644 --- a/src/config/enterprise.rs +++ b/src/config/enterprise.rs @@ -29,7 +29,7 @@ use crate::{ util::retries::ExponentialBackoff, }, sources::{ - host_metrics::{self, HostMetricsConfig}, + host_metrics::HostMetricsConfig, internal_logs::InternalLogsConfig, internal_metrics::InternalMetricsConfig, }, @@ -476,7 +476,7 @@ fn setup_metrics_reporting( // Create internal sources for host and internal metrics. We're using distinct sources here and // not attempting to reuse existing ones, to configure according to enterprise requirements. let host_metrics = HostMetricsConfig { - namespace: host_metrics::Namespace::from(Some("pipelines".to_owned())), + namespace: Some("pipelines".to_owned()), scrape_interval_secs: datadog.reporting_interval_secs, ..Default::default() }; diff --git a/src/docker.rs b/src/docker.rs index 6c86428a94b82..5beb81597fa80 100644 --- a/src/docker.rs +++ b/src/docker.rs @@ -9,8 +9,8 @@ use bollard::{ }; use futures::StreamExt; use http::uri::Uri; -use serde::{Deserialize, Serialize}; use snafu::Snafu; +use vector_config::configurable_component; // From bollard source. const DEFAULT_TIMEOUT: u64 = 120; @@ -21,11 +21,21 @@ pub enum Error { NoHost, } -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(deny_unknown_fields)] +/// TLS options to connect to the Docker daemon. +/// +/// Only relevant when connecting to Docker via an HTTPS URL. +/// +/// If not configured, Vector will try to use environment variable `DOCKER_CERT_PATH` and then` DOCKER_CONFIG`. If both environment variables are absent, Vector will try to read certificates in `~/.docker/`. +#[configurable_component] +#[derive(Clone, Debug)] pub struct DockerTlsConfig { + /// Path to CA certificate file. ca_file: PathBuf, + + /// Path to TLS certificate file. crt_file: PathBuf, + + /// Path to TLS key file. key_file: PathBuf, } diff --git a/src/kafka.rs b/src/kafka.rs index fb62469ec460d..f08e79be4096d 100644 --- a/src/kafka.rs +++ b/src/kafka.rs @@ -3,8 +3,9 @@ use std::path::{Path, PathBuf}; use rdkafka::{consumer::ConsumerContext, ClientConfig, ClientContext, Statistics}; use serde::{Deserialize, Serialize}; use snafu::Snafu; +use vector_config::configurable_component; -use crate::{internal_events::KafkaStatisticsReceived, tls::TlsConfig}; +use crate::{internal_events::KafkaStatisticsReceived, tls::TlsEnableableConfig}; #[derive(Debug, Snafu)] enum KafkaError { @@ -24,25 +25,32 @@ pub(crate) enum KafkaCompression { Zstd, } -#[derive(Clone, Debug, Default, Deserialize, Serialize)] +/// Kafka authentication configuration. +#[configurable_component] +#[derive(Clone, Debug, Default)] pub(crate) struct KafkaAuthConfig { + #[configurable(derived)] pub(crate) sasl: Option, - pub(crate) tls: Option, + + #[configurable(derived)] + pub(crate) tls: Option, } -#[derive(Clone, Debug, Default, Deserialize, Serialize)] +/// /// Options for SASL/SCRAM authentication support. +#[configurable_component] +#[derive(Clone, Debug, Default)] pub(crate) struct KafkaSaslConfig { + /// Enable SASL/SCRAM authentication to the remote (not supported on Windows at this time). pub(crate) enabled: Option, + + /// The Kafka SASL/SCRAM authentication username. pub(crate) username: Option, + + /// The Kafka SASL/SCRAM authentication password. pub(crate) password: Option, - pub(crate) mechanism: Option, -} -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -pub(crate) struct KafkaTlsConfig { - pub(crate) enabled: Option, - #[serde(flatten)] - pub(crate) options: TlsConfig, + /// The Kafka SASL/SCRAM mechanisms. + pub(crate) mechanism: Option, } impl KafkaAuthConfig { diff --git a/src/sources/host_metrics/cgroups.rs b/src/sources/host_metrics/cgroups.rs index 76b62218b794c..845cc990c393c 100644 --- a/src/sources/host_metrics/cgroups.rs +++ b/src/sources/host_metrics/cgroups.rs @@ -7,26 +7,37 @@ use std::{ use chrono::{DateTime, Utc}; use futures::future::BoxFuture; -use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; use tokio::{ fs::{self, File}, io::AsyncReadExt, }; use vector_common::btreemap; +use vector_config::configurable_component; use super::{filter_result_sync, FilterList, HostMetrics}; use crate::event::metric::Metric; const MICROSECONDS: f64 = 1.0 / 1_000_000.0; -#[derive(Clone, Debug, Derivative, Deserialize, Serialize)] +/// Options for the “cgroups” (controller groups) metrics collector. +/// +/// This collector is only available on Linux systems, and only supports either version 2 or hybrid cgroups. +#[configurable_component] +#[derive(Clone, Debug, Derivative)] #[derivative(Default)] #[serde(default)] pub(crate) struct CGroupsConfig { + /// The number of levels of the cgroups hierarchy for which to report metrics. + /// + /// A value of `1` means just the root or named cgroup. #[derivative(Default(value = "100"))] levels: usize, + + /// The base cgroup name to provide metrics for. pub(super) base: Option, + + /// Lists of group name patterns to include or exclude. groups: FilterList, } diff --git a/src/sources/host_metrics/disk.rs b/src/sources/host_metrics/disk.rs index aa12135fa29ff..00973506dafd5 100644 --- a/src/sources/host_metrics/disk.rs +++ b/src/sources/host_metrics/disk.rs @@ -1,14 +1,17 @@ use chrono::Utc; use futures::{stream, StreamExt}; use heim::units::information::byte; -use serde::{Deserialize, Serialize}; use vector_common::btreemap; +use vector_config::configurable_component; use super::{filter_result, FilterList, HostMetrics}; use crate::event::metric::Metric; -#[derive(Clone, Debug, Default, Deserialize, Serialize)] +/// Options for the “disk” metrics collector. +#[configurable_component] +#[derive(Clone, Debug, Default)] pub struct DiskConfig { + /// Lists of device name patterns to include or exclude. #[serde(default)] devices: FilterList, } diff --git a/src/sources/host_metrics/filesystem.rs b/src/sources/host_metrics/filesystem.rs index eaa0d09763a0e..b90d3ac8856f4 100644 --- a/src/sources/host_metrics/filesystem.rs +++ b/src/sources/host_metrics/filesystem.rs @@ -3,18 +3,25 @@ use futures::{stream, StreamExt}; use heim::units::information::byte; #[cfg(not(target_os = "windows"))] use heim::units::ratio::ratio; -use serde::{Deserialize, Serialize}; use vector_common::btreemap; +use vector_config::configurable_component; use super::{filter_result, FilterList, HostMetrics}; use crate::event::metric::Metric; -#[derive(Clone, Debug, Default, Deserialize, Serialize)] +/// Options for the “filesystem” metrics collector. +#[configurable_component] +#[derive(Clone, Debug, Default)] pub struct FilesystemConfig { + /// Lists of device name patterns to include or exclude. #[serde(default)] devices: FilterList, + + /// Lists of filesystem name patterns to include or exclude. #[serde(default)] filesystems: FilterList, + + /// Lists of mount point path patterns to include or exclude. #[serde(default)] mountpoints: FilterList, } diff --git a/src/sources/host_metrics/mod.rs b/src/sources/host_metrics/mod.rs index 7d6b9d4898718..b2121a419bb3b 100644 --- a/src/sources/host_metrics/mod.rs +++ b/src/sources/host_metrics/mod.rs @@ -12,7 +12,12 @@ use serde::{ }; use tokio::time; use tokio_stream::wrappers::IntervalStream; -use vector_config::configurable_component; +use vector_config::{ + configurable_component, + schema::{finalize_schema, generate_string_schema}, + schemars::{gen::SchemaGenerator, schema::SchemaObject}, + Configurable, Metadata, +}; use vector_core::ByteSizeOf; use crate::{ @@ -31,67 +36,83 @@ mod filesystem; mod memory; mod network; -#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] +/// Collector types. +#[configurable_component] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] #[serde(rename_all = "lowercase")] pub enum Collector { + /// CGroups. #[cfg(target_os = "linux")] CGroups, + + /// CPU. Cpu, + + /// Disk. Disk, + + /// Filesystem. Filesystem, + + /// Load average. Load, + + /// Host. Host, + + /// Memory. Memory, + + /// Network. Network, } -#[derive(Clone, Debug, Default, Deserialize, Serialize)] +/// Filtering configuration. +#[configurable_component] +#[derive(Clone, Debug, Default)] pub(self) struct FilterList { + /// Any patterns which should be included. includes: Option>, - excludes: Option>, -} - -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct Namespace(Option); - -impl Default for Namespace { - fn default() -> Self { - Self(Some("host".into())) - } -} -impl From> for Namespace { - fn from(s: Option) -> Self { - Namespace(s) - } + /// Any patterns which should be excluded. + excludes: Option>, } /// Configuration for the `host_metrics` source. #[configurable_component(source)] -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Derivative)] +#[derivative(Default)] pub struct HostMetricsConfig { /// The interval between metric gathering, in seconds. #[serde(default = "default_scrape_interval")] pub scrape_interval_secs: f64, + /// The list of host metric collector services to use. + /// + /// Defaults to all collectors. pub collectors: Option>, /// Overrides the default namespace for the metrics emitted by the source. /// /// By default, `host` is used. #[serde(default)] - pub namespace: Namespace, + #[derivative(Default(value = "default_namespace()"))] + pub namespace: Option, #[cfg(target_os = "linux")] + #[configurable(derived)] #[serde(default)] pub(crate) cgroups: cgroups::CGroupsConfig, + #[configurable(derived)] #[serde(default)] pub disk: disk::DiskConfig, + #[configurable(derived)] #[serde(default)] pub filesystem: filesystem::FilesystemConfig, + #[configurable(derived)] #[serde(default)] pub network: network::NetworkConfig, } @@ -100,6 +121,10 @@ const fn default_scrape_interval() -> f64 { 15.0 } +fn default_namespace() -> Option { + Some(String::from("host")) +} + inventory::submit! { SourceDescription::new::("host_metrics") } @@ -113,7 +138,7 @@ impl SourceConfig for HostMetricsConfig { init_roots(); let mut config = self.clone(); - config.namespace.0 = config.namespace.0.filter(|namespace| !namespace.is_empty()); + config.namespace = config.namespace.filter(|namespace| !namespace.is_empty()); Ok(Box::pin(config.run(cx.out, cx.shutdown))) } @@ -314,7 +339,7 @@ impl HostMetrics { tags: BTreeMap, ) -> Metric { Metric::new(name, MetricKind::Absolute, MetricValue::Counter { value }) - .with_namespace(self.config.namespace.0.clone()) + .with_namespace(self.config.namespace.clone()) .with_tags(Some(tags)) .with_timestamp(Some(timestamp)) } @@ -327,7 +352,7 @@ impl HostMetrics { tags: BTreeMap, ) -> Metric { Metric::new(name, MetricKind::Absolute, MetricValue::Gauge { value }) - .with_namespace(self.config.namespace.0.clone()) + .with_namespace(self.config.namespace.clone()) .with_tags(Some(tags)) .with_timestamp(Some(timestamp)) } @@ -475,6 +500,24 @@ impl Serialize for PatternWrapper { } } +// NOTE: We have to do a manual implementation of `Configurable` because `configurable_component` derives +// `Serialize`/`Deserialize` automatically, which we can't do here since they're already implemented by hand here. +impl<'de> Configurable<'de> for PatternWrapper { + fn referencable_name() -> Option<&'static str> { + Some("glob::PatternWrapper") + } + + fn description() -> Option<&'static str> { + Some("A compiled Unix shell style pattern.") + } + + fn generate_schema(gen: &mut SchemaGenerator, overrides: Metadata<'de, Self>) -> SchemaObject { + let mut schema = generate_string_schema(); + finalize_schema(gen, &mut schema, overrides); + schema + } +} + #[cfg(test)] pub(self) mod tests { use std::{collections::HashSet, future::Future}; @@ -595,7 +638,7 @@ pub(self) mod tests { #[tokio::test] async fn uses_custom_namespace() { let metrics = HostMetrics::new(HostMetricsConfig { - namespace: Namespace(Some("other".into())), + namespace: Some("other".into()), ..Default::default() }) .capture_metrics() diff --git a/src/sources/host_metrics/network.rs b/src/sources/host_metrics/network.rs index dc16b62933e0b..61dd0fe729e38 100644 --- a/src/sources/host_metrics/network.rs +++ b/src/sources/host_metrics/network.rs @@ -7,13 +7,16 @@ use heim::net::os::linux::IoCountersExt; #[cfg(target_os = "windows")] use heim::net::os::windows::IoCountersExt; use heim::units::information::byte; -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; use super::{filter_result, FilterList, HostMetrics}; use crate::event::metric::Metric; -#[derive(Clone, Debug, Default, Deserialize, Serialize)] +/// Options for the “network” metrics collector. +#[configurable_component] +#[derive(Clone, Debug, Default)] pub struct NetworkConfig { + /// Lists of device name patterns to include or exclude. #[serde(default)] devices: FilterList, } diff --git a/src/sources/kafka.rs b/src/sources/kafka.rs index 34abc8f8f3093..e606602a7258e 100644 --- a/src/sources/kafka.rs +++ b/src/sources/kafka.rs @@ -53,42 +53,87 @@ enum BuildError { #[derive(Clone, Debug, Derivative)] #[derivative(Default)] pub struct KafkaSourceConfig { + /// A comma-separated list of Kafka bootstrap servers. + /// + /// These are the servers in a Kafka cluster that a client should use to "bootstrap" its connection to the cluster, + /// allowing discovering all other hosts in the cluster. + /// + /// Must be in the form of `host:port`, and comma-separated. bootstrap_servers: String, + /// The Kafka topics names to read events from. + /// + /// Regular expression syntax is supported if the topic begins with `^`. topics: Vec, + /// The consumer group name to be used to consume events from Kafka. group_id: String, + /// If offsets for consumer group do not exist, set them using this strategy. + /// + /// See the [librdkafka documentation](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) for the `auto.offset.reset` option for further clarification. #[serde(default = "default_auto_offset_reset")] auto_offset_reset: String, + /// The Kafka session timeout, in milliseconds. #[serde(default = "default_session_timeout_ms")] session_timeout_ms: u64, + /// Timeout for network requests, in milliseconds. #[serde(default = "default_socket_timeout_ms")] socket_timeout_ms: u64, + /// Maximum time the broker may wait to fill the response, in milliseconds. #[serde(default = "default_fetch_wait_max_ms")] fetch_wait_max_ms: u64, + /// The frequency that the consumer offsets are committed (written) to offset storage, in milliseconds. #[serde(default = "default_commit_interval_ms")] commit_interval_ms: u64, + /// Overrides the name of the log field used to add the message key to each event. + /// + /// The value will be the message key of the Kafka message itself. + /// + /// By default, `"message_key"` is used. #[serde(default = "default_key_field")] key_field: String, + /// Overrides the name of the log field used to add the topic to each event. + /// + /// The value will be the topic from which the Kafka message was consumed from. + /// + /// By default, `"topic"` is used. #[serde(default = "default_topic_key")] topic_key: String, + /// Overrides the name of the log field used to add the partition to each event. + /// + /// The value will be the partition from which the Kafka message was consumed from. + /// + /// By default, `"partition"` is used. #[serde(default = "default_partition_key")] partition_key: String, + /// Overrides the name of the log field used to add the offset to each event. + /// + /// The value will be the offset of the Kafka message itself. + /// + /// By default, `"offset"` is used. #[serde(default = "default_offset_key")] offset_key: String, + /// Overrides the name of the log field used to add the headers to each event. + /// + /// The value will be the headers of the Kafka message itself. + /// + /// By default, `"headers"` is used. #[serde(default = "default_headers_key")] headers_key: String, + /// Advanced options set directly on the underlying `librdkafka` client. + /// + /// See the [librdkafka documentation](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) for details. librdkafka_options: Option>, #[serde(flatten)] diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index b0617c555dac6..0756ecc3c6d2c 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -24,8 +24,8 @@ use kube::{ }, Client, Config as ClientConfig, }; -use serde::{Deserialize, Serialize}; use vector_common::TimeZone; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -72,40 +72,40 @@ const FILE_KEY: &str = "file"; const SELF_NODE_NAME_ENV_KEY: &str = "VECTOR_SELF_NODE_NAME"; /// Configuration for the `kubernetes_logs` source. -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(deny_unknown_fields, default)] +#[configurable_component(source)] +#[derive(Clone, Debug)] +#[serde(default)] pub struct Config { - /// Specifies the label selector to filter `Pod`s with, to be used in - /// addition to the built-in `vector.dev/exclude` filter. + /// Specifies the label selector to filter `Pod`s with, to be used in addition to the built-in `vector.dev/exclude` filter. extra_label_selector: String, - /// Specifies the label selector to filter `Namespace`s with, to be used in - /// addition to the built-in `vector.dev/exclude` filter. + /// Specifies the label selector to filter `Namespace`s with, to be used in addition to the built-in `vector.dev/exclude` filter. extra_namespace_label_selector: String, /// The `name` of the Kubernetes `Node` that Vector runs at. - /// Required to filter the `Pod`s to only include the ones with the log - /// files accessible locally. + /// + /// Configured to use an environment var by default, to be evaluated to a value provided by Kubernetes at `Pod` deploy time. self_node_name: String, - /// Specifies the field selector to filter `Pod`s with, to be used in - /// addition to the built-in `Node` filter. + /// Specifies the field selector to filter `Pod`s with, to be used in addition to the built-in `Node` filter. extra_field_selector: String, - /// Automatically merge partial events. + /// Whether or not to automatically merge partial events. auto_partial_merge: bool, - /// Override global data_dir + /// The directory used to persist file checkpoint positions. + /// + /// By default, the global `data_dir` option is used. Please make sure the user Vector is running as has write permissions to this directory. data_dir: Option, - /// Specifies the field names for Pod metadata annotation. + #[configurable(derived)] #[serde(alias = "annotation_fields")] pod_annotation_fields: pod_metadata_annotator::FieldsSpec, - /// Specifies the field names for Namespace metadata annotation. + #[configurable(derived)] namespace_annotation_fields: namespace_metadata_annotator::FieldsSpec, - /// Specifies the field names for Node metadata annotation. + #[configurable(derived)] node_annotation_fields: node_metadata_annotator::FieldsSpec, /// A list of glob patterns to exclude from reading the files. diff --git a/src/sources/kubernetes_logs/namespace_metadata_annotator.rs b/src/sources/kubernetes_logs/namespace_metadata_annotator.rs index c1d63b77cd399..4875ba2d630c8 100644 --- a/src/sources/kubernetes_logs/namespace_metadata_annotator.rs +++ b/src/sources/kubernetes_logs/namespace_metadata_annotator.rs @@ -5,13 +5,16 @@ use k8s_openapi::{api::core::v1::Namespace, apimachinery::pkg::apis::meta::v1::ObjectMeta}; use kube::runtime::reflector::{store::Store, ObjectRef}; use lookup::lookup_v2::{parse_path, OwnedSegment}; -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; use crate::event::{Event, LogEvent}; -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(deny_unknown_fields, default)] +/// Configuration for how the events are annotated with Namespace metadata. +#[configurable_component] +#[derive(Clone, Debug)] +#[serde(default)] pub struct FieldsSpec { + /// Event field for Namespace labels. pub namespace_labels: String, } diff --git a/src/sources/kubernetes_logs/node_metadata_annotator.rs b/src/sources/kubernetes_logs/node_metadata_annotator.rs index 19d75615fb7a9..4ecc4fc3b4397 100644 --- a/src/sources/kubernetes_logs/node_metadata_annotator.rs +++ b/src/sources/kubernetes_logs/node_metadata_annotator.rs @@ -5,13 +5,16 @@ use k8s_openapi::{api::core::v1::Node, apimachinery::pkg::apis::meta::v1::ObjectMeta}; use kube::runtime::reflector::{store::Store, ObjectRef}; use lookup::lookup_v2::{parse_path, OwnedSegment}; -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; use crate::event::{Event, LogEvent}; -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(deny_unknown_fields, default)] +/// Configuration for how the events are annotated with Node metadata. +#[configurable_component] +#[derive(Clone, Debug)] +#[serde(default)] pub struct FieldsSpec { + /// Event field for Node labels. pub node_labels: String, } diff --git a/src/sources/kubernetes_logs/pod_metadata_annotator.rs b/src/sources/kubernetes_logs/pod_metadata_annotator.rs index a660acaee68b1..836445576355f 100644 --- a/src/sources/kubernetes_logs/pod_metadata_annotator.rs +++ b/src/sources/kubernetes_logs/pod_metadata_annotator.rs @@ -8,25 +8,50 @@ use k8s_openapi::{ }; use kube::runtime::reflector::{store::Store, ObjectRef}; use lookup::lookup_v2::{parse_path, OwnedSegment}; -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; use super::path_helpers::{parse_log_file_path, LogFileInfo}; use crate::event::{Event, LogEvent}; -#[derive(Deserialize, Serialize, Debug, Clone)] -#[serde(deny_unknown_fields, default)] +/// Configuration for how the events are annotated with `Pod` metadata. +#[configurable_component] +#[derive(Clone, Debug)] +#[serde(default)] pub struct FieldsSpec { + /// Event field for Pod name. pub pod_name: String, + + /// Event field for Pod namespace. pub pod_namespace: String, + + /// Event field for Pod uid. pub pod_uid: String, + + /// Event field for Pod IPv4 address. pub pod_ip: String, + + /// Event field for Pod IPv4 and IPv6 addresses. pub pod_ips: String, + + /// Event field for Pod labels. pub pod_labels: String, + + /// Event field for Pod annotations. pub pod_annotations: String, + + /// Event field for Pod node_name. pub pod_node_name: String, + + /// Event field for Pod owner reference. pub pod_owner: String, + + /// Event field for container name. pub container_name: String, + + /// Event field for container ID. pub container_id: String, + + /// Event field for container image. pub container_image: String, } diff --git a/src/sources/prometheus/remote_write.rs b/src/sources/prometheus/remote_write.rs index a6b3871fc6bad..1ac5ecb90c631 100644 --- a/src/sources/prometheus/remote_write.rs +++ b/src/sources/prometheus/remote_write.rs @@ -3,7 +3,7 @@ use std::{collections::HashMap, net::SocketAddr}; use bytes::Bytes; use prometheus_parser::proto; use prost::Message; -use serde::{Deserialize, Serialize}; +use vector_config::configurable_component; use warp::http::{HeaderMap, StatusCode}; use super::parser; @@ -25,14 +25,22 @@ use crate::{ const SOURCE_NAME: &str = "prometheus_remote_write"; -#[derive(Clone, Debug, Deserialize, Serialize)] +/// Configuration for the `prometheus_remote_write` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] pub struct PrometheusRemoteWriteConfig { + /// The address to accept connections on. + /// + /// The address _must_ include a port. address: SocketAddr, + #[configurable(derived)] tls: Option, + #[configurable(derived)] auth: Option, + #[configurable(derived)] #[serde(default, deserialize_with = "bool_or_struct")] acknowledgements: AcknowledgementsConfig, } diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index bed0bd2393e51..86370fd858371 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -9,6 +9,7 @@ use hyper::{Body, Request}; use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; use tokio_stream::wrappers::IntervalStream; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use super::parser; @@ -41,19 +42,52 @@ enum ConfigError { BothEndpointsAndHosts, } -#[derive(Deserialize, Serialize, Clone, Debug)] +/// Configuration for the `prometheus_scrape` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] pub struct PrometheusScrapeConfig { - // Deprecated name + /// Endpoints to scrape metrics from. #[serde(alias = "hosts")] endpoints: Vec, + + /// The interval between scrapes, in seconds. #[serde(default = "default_scrape_interval_secs")] scrape_interval_secs: u64, + + /// Overrides the name of the tag used to add the instance to each metric. + /// + /// The tag value will be the host/port of the scraped instance. + /// + /// By default, `"instance"` is used. instance_tag: Option, + + /// Overrides the name of the tag used to add the endpoint to each metric. + /// + /// The tag value will be the endpoint of the scraped instance. + /// + /// By default, `"endpoint"` is used. endpoint_tag: Option, + + /// Controls how tag conflicts are handled if the scraped source has tags that Vector would add. + /// + /// If `true`, Vector will not add the new tag if the scraped metric has the tag already. If `false`, Vector will + /// rename the conflicting tag by prepending `exported_` to the name. + /// + /// This matches Prometheus’ `honor_labels` configuration. #[serde(default = "crate::serde::default_false")] honor_labels: bool, + + /// Custom parameters for the scrape request query string. + /// + /// One or more values for the same parameter key can be provided. The parameters provided in this option are + /// appended to any parameters manually provided in the `endpoints` option. This option is especially useful when + /// scraping the `/federate` endpoint. query: Option>>, + + #[configurable(derived)] tls: Option, + + #[configurable(derived)] auth: Option, } diff --git a/src/sources/redis/mod.rs b/src/sources/redis/mod.rs index 32dbbb339e8ac..4d20268fe70e9 100644 --- a/src/sources/redis/mod.rs +++ b/src/sources/redis/mod.rs @@ -5,9 +5,9 @@ use codecs::{ StreamDecodingError, }; use futures::StreamExt; -use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; use tokio_util::codec::FramedRead; +use vector_config::configurable_component; use vector_core::ByteSizeOf; use crate::{ @@ -28,27 +28,42 @@ enum BuildError { Client { source: redis::RedisError }, } -#[derive(Copy, Clone, Debug, Derivative, Deserialize, Serialize)] +/// Data type to use for reading messages from Redis. +#[configurable_component] +#[derive(Copy, Clone, Debug, Derivative)] #[derivative(Default)] #[serde(rename_all = "lowercase")] pub enum DataTypeConfig { + /// The `list` data type. #[derivative(Default)] List, + + /// The `channel` data type. + /// + /// This is based on Redis' Pub/Sub capabilities. Channel, } -#[derive(Copy, Clone, Debug, Default, Derivative, Deserialize, Serialize, Eq, PartialEq)] +/// Options for the Redis `list` data type. +#[configurable_component] +#[derive(Copy, Clone, Debug, Default, Derivative, Eq, PartialEq)] #[serde(rename_all = "lowercase")] pub struct ListOption { + #[configurable(derived)] method: Method, } -#[derive(Clone, Copy, Debug, Derivative, Deserialize, Serialize, Eq, PartialEq)] +/// Method for getting events from the `list` data type. +#[configurable_component] +#[derive(Clone, Copy, Debug, Derivative, Eq, PartialEq)] #[derivative(Default)] #[serde(rename_all = "lowercase")] pub enum Method { + /// Pop messages from the head of the list. #[derivative(Default)] Lpop, + + /// Pop messages from the tail of the list. Rpop, } @@ -71,18 +86,38 @@ impl From<&redis::ConnectionInfo> for ConnectionInfo { } } -#[derive(Clone, Debug, Derivative, Deserialize, Serialize)] -#[serde(deny_unknown_fields)] +/// Configuration for the `redis` source. +#[configurable_component(source)] +#[derive(Clone, Debug, Derivative)] pub struct RedisSourceConfig { + /// The Redis data type (`list` or `channel`) to use. #[serde(default)] data_type: DataTypeConfig, + + #[configurable(derived)] list: Option, + + /// The Redis URL to connect to. + /// + /// The URL must take the form of `protocol://server:port/db` where the `protocol` can either be `redis` or `rediss` for connections secured via TLS. url: String, + + /// The Redis key to read messages from. key: String, + + /// Sets the name of the log field to use to add the key to each event. + /// + /// The value will be the Redis key that the event was read from. + /// + /// By default, this is not set and the field will not be automatically added. redis_key: Option, + + #[configurable(derived)] #[serde(default = "default_framing_message_based")] #[derivative(Default(value = "default_framing_message_based()"))] framing: FramingConfig, + + #[configurable(derived)] #[serde(default = "default_decoding")] #[derivative(Default(value = "default_decoding()"))] decoding: DeserializerConfig, diff --git a/src/sources/socket/mod.rs b/src/sources/socket/mod.rs index d4ce736ddff71..342e47715ee51 100644 --- a/src/sources/socket/mod.rs +++ b/src/sources/socket/mod.rs @@ -32,19 +32,19 @@ pub struct SocketConfig { #[serde(tag = "mode", rename_all = "snake_case")] pub enum Mode { /// Listen on TCP. - Tcp(tcp::TcpConfig), + Tcp(#[configurable(derived)] tcp::TcpConfig), /// Listen on UDP. - Udp(udp::UdpConfig), + Udp(#[configurable(derived)] udp::UdpConfig), /// Listen on UDS, in datagram mode. (Unix domain socket) #[cfg(unix)] - UnixDatagram(unix::UnixConfig), + UnixDatagram(#[configurable(derived)] unix::UnixConfig), /// Listen on UDS, in stream mode. (Unix domain socket) #[cfg(unix)] #[serde(alias = "unix")] - UnixStream(unix::UnixConfig), + UnixStream(#[configurable(derived)] unix::UnixConfig), } impl SocketConfig { diff --git a/src/sources/statsd/mod.rs b/src/sources/statsd/mod.rs index 996faae0f7adb..50e530e794236 100644 --- a/src/sources/statsd/mod.rs +++ b/src/sources/statsd/mod.rs @@ -44,14 +44,14 @@ use unix::{statsd_unix, UnixConfig}; #[serde(tag = "mode", rename_all = "snake_case")] pub enum StatsdConfig { /// Listen on TCP. - Tcp(TcpConfig), + Tcp(#[configurable(derived)] TcpConfig), /// Listen on UDP. - Udp(UdpConfig), + Udp(#[configurable(derived)] UdpConfig), /// Listen on UDS. (Unix domain socket) #[cfg(unix)] - Unix(UnixConfig), + Unix(#[configurable(derived)] UnixConfig), } /// UDP configuration for the `statsd` source. @@ -79,7 +79,7 @@ impl UdpConfig { /// TCP configuration for the `statsd` source. #[configurable_component] #[derive(Clone, Debug)] -struct TcpConfig { +pub struct TcpConfig { /// The address to listen for connections on. address: SocketListenAddr, diff --git a/src/sources/vector/mod.rs b/src/sources/vector/mod.rs index 894e2da3e19cd..ea3c9b440860e 100644 --- a/src/sources/vector/mod.rs +++ b/src/sources/vector/mod.rs @@ -53,10 +53,10 @@ pub struct VectorConfigV2 { #[serde(untagged)] pub enum VectorConfig { /// Configuration for version one. - V1(VectorConfigV1), + V1(#[configurable(derived)] VectorConfigV1), /// Configuration for version two. - V2(VectorConfigV2), + V2(#[configurable(derived)] VectorConfigV2), } inventory::submit! { From 8303d1e4efe8e24063eb26b57b8485b3ea6b7e47 Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Mon, 6 Jun 2022 17:00:30 -0400 Subject: [PATCH 05/12] fix some lints, switch to custom helper for deprecated items to avoid polluting cargo output Signed-off-by: Toby Lawrence --- Cargo.lock | 1 + lib/vector-config-macros/src/ast/container.rs | 16 +++++++--------- lib/vector-config-macros/src/ast/field.rs | 10 ++-------- lib/vector-config-macros/src/ast/variant.rs | 12 +++--------- lib/vector-config-macros/src/configurable.rs | 4 +--- lib/vector-config/src/schema.rs | 2 +- lib/vector-config/tests/basic.rs | 3 +-- lib/vector-core/Cargo.toml | 1 + lib/vector-core/src/config/mod.rs | 6 +++--- src/config/enterprise.rs | 3 +-- src/sinks/kafka/tests.rs | 10 +++++----- src/sources/file.rs | 6 +++--- src/sources/journald.rs | 4 ++-- src/sources/prometheus/remote_write.rs | 2 +- src/sources/prometheus/scrape.rs | 4 ++-- src/sources/redis/mod.rs | 4 ++-- src/sources/splunk_hec/mod.rs | 2 +- 17 files changed, 37 insertions(+), 53 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f501502668ac6..5a467bb163fec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8995,6 +8995,7 @@ dependencies = [ "vector_buffers", "vector_common", "vector_config", + "vector_config_macros", "vrl", ] diff --git a/lib/vector-config-macros/src/ast/container.rs b/lib/vector-config-macros/src/ast/container.rs index 48e7c00aa84ac..863dd2e2aa88e 100644 --- a/lib/vector-config-macros/src/ast/container.rs +++ b/lib/vector-config-macros/src/ast/container.rs @@ -1,4 +1,8 @@ -use darling::{error::Accumulator, util::path_to_string, FromAttributes, FromMeta}; +use darling::{ + error::Accumulator, + util::{path_to_string, Flag}, + FromAttributes, FromMeta, +}; use serde_derive_internals::{ast as serde_ast, Ctxt, Derive}; use syn::{DeriveInput, ExprPath, Generics, Ident, NestedMeta}; @@ -172,7 +176,7 @@ impl<'a> Container<'a> { } pub fn deprecated(&self) -> bool { - self.attrs.deprecated + self.attrs.deprecated.is_present() } pub fn metadata(&self) -> impl Iterator { @@ -189,19 +193,13 @@ impl<'a> Container<'a> { struct Attributes { title: Option, description: Option, - #[darling(skip)] - deprecated: bool, + deprecated: Flag, #[darling(multiple)] metadata: Vec, } impl Attributes { fn finalize(mut self, forwarded_attrs: &[syn::Attribute]) -> darling::Result { - // Parse any forwarded attributes that `darling` left us. - self.deprecated = forwarded_attrs - .iter() - .any(|a| a.path.is_ident("deprecated")); - // We additionally attempt to extract a title/description from the forwarded doc attributes, if they exist. // Whether we extract both a title and description, or just description, is documented in more detail in // `try_extract_doc_title_description` itself. diff --git a/lib/vector-config-macros/src/ast/field.rs b/lib/vector-config-macros/src/ast/field.rs index dd01cdb0b89e9..a4843f7a2d6c2 100644 --- a/lib/vector-config-macros/src/ast/field.rs +++ b/lib/vector-config-macros/src/ast/field.rs @@ -59,7 +59,7 @@ impl<'a> Field<'a> { } pub fn deprecated(&self) -> bool { - self.attrs.deprecated + self.attrs.deprecated.is_present() } pub fn validation(&self) -> &[Validation] { @@ -82,8 +82,7 @@ struct Attributes { description: Option, derived: Flag, transparent: Flag, - #[darling(skip)] - deprecated: bool, + deprecated: Flag, #[darling(skip)] visible: bool, #[darling(skip)] @@ -102,11 +101,6 @@ impl Attributes { self.visible = !field.attrs.skip_deserializing() || !field.attrs.skip_serializing(); self.flatten = field.attrs.flatten(); - // Parse any forwarded attributes that `darling` left us. - self.deprecated = forwarded_attrs - .iter() - .any(|a| a.path.is_ident("deprecated")); - // We additionally attempt to extract a title/description from the forwarded doc attributes, if they exist. // Whether we extract both a title and description, or just description, is documented in more detail in // `try_extract_doc_title_description` itself. diff --git a/lib/vector-config-macros/src/ast/variant.rs b/lib/vector-config-macros/src/ast/variant.rs index a77ec62ce3b83..998dbe4d07ed3 100644 --- a/lib/vector-config-macros/src/ast/variant.rs +++ b/lib/vector-config-macros/src/ast/variant.rs @@ -1,4 +1,4 @@ -use darling::{error::Accumulator, FromAttributes}; +use darling::{error::Accumulator, util::Flag, FromAttributes}; use serde_derive_internals::ast as serde_ast; use syn::spanned::Spanned; @@ -71,7 +71,7 @@ impl<'a> Variant<'a> { } pub fn deprecated(&self) -> bool { - self.attrs.deprecated + self.attrs.deprecated.is_present() } pub fn visible(&self) -> bool { @@ -90,8 +90,7 @@ impl<'a> Spanned for Variant<'a> { struct Attributes { title: Option, description: Option, - #[darling(skip)] - deprecated: bool, + deprecated: Flag, #[darling(skip)] visible: bool, } @@ -105,11 +104,6 @@ impl Attributes { // Derive any of the necessary fields from the `serde` side of things. self.visible = !variant.attrs.skip_deserializing() || !variant.attrs.skip_serializing(); - // Parse any forwarded attributes that `darling` left us. - self.deprecated = forwarded_attrs - .iter() - .any(|a| a.path.is_ident("deprecated")); - // We additionally attempt to extract a title/description from the forwarded doc attributes, if they exist. // Whether we extract both a title and description, or just description, is documented in more detail in // `try_extract_doc_title_description` itself. diff --git a/lib/vector-config-macros/src/configurable.rs b/lib/vector-config-macros/src/configurable.rs index b4b9f6eacc0d1..6741b68786683 100644 --- a/lib/vector-config-macros/src/configurable.rs +++ b/lib/vector-config-macros/src/configurable.rs @@ -167,9 +167,7 @@ fn generate_named_struct_field( if container.default_value().is_none() && field.default_value().is_none() { Some(quote! { if !#field_as_configurable::is_optional() { - if !required.insert(#field_key.to_string()) { - panic!(#field_already_contained); - } + assert!(required.insert(#field_key.to_string()), #field_already_contained); } }) } else { diff --git a/lib/vector-config/src/schema.rs b/lib/vector-config/src/schema.rs index 36e742ea95a2a..4748afcea83bf 100644 --- a/lib/vector-config/src/schema.rs +++ b/lib/vector-config/src/schema.rs @@ -118,7 +118,7 @@ pub fn convert_to_flattened_schema(primary: &mut SchemaObject, mut subschemas: V // First, we replace the primary schema with an empty schema, because we need to push it the actual primary schema // into the list of `allOf` schemas. This is due to the fact that it's not valid to "extend" a schema using `allOf`, // so everything has to be in there. - let primary_subschema = mem::replace(primary, SchemaObject::default()); + let primary_subschema = mem::take(primary); subschemas.insert(0, primary_subschema); let all_of_schemas = subschemas.into_iter().map(Schema::Object).collect(); diff --git a/lib/vector-config/tests/basic.rs b/lib/vector-config/tests/basic.rs index ad45de2689b04..45c893d56b543 100644 --- a/lib/vector-config/tests/basic.rs +++ b/lib/vector-config/tests/basic.rs @@ -186,8 +186,7 @@ pub struct AdvancedSinkConfig { #[configurable(derived)] #[serde(default = "default_advanced_sink_batch")] batch: BatchConfig, - #[configurable(derived)] - #[deprecated] + #[configurable(deprecated, derived)] #[serde(default = "default_advanced_sink_encoding")] encoding: Encoding, /// Overridden TLS description. diff --git a/lib/vector-core/Cargo.toml b/lib/vector-core/Cargo.toml index caf2fac4a1531..68620f7c44ddb 100644 --- a/lib/vector-core/Cargo.toml +++ b/lib/vector-core/Cargo.toml @@ -55,6 +55,7 @@ value = { path = "../value", default-features = false, features = ["lua", "toml" vector_buffers = { path = "../vector-buffers", default-features = false } vector_common = { path = "../vector-common" } vector_config = { path = "../vector-config" } +vector_config_macros = { path = "../vector-config-macros" } # Rename to "vrl" once we use a release with stable `-Z namespaced-features`: # https://doc.rust-lang.org/cargo/reference/unstable.html#namespaced-features vrl-lib = { package = "vrl", path = "../vrl/vrl", optional = true } diff --git a/lib/vector-core/src/config/mod.rs b/lib/vector-core/src/config/mod.rs index 5eca4a8e509f1..85cfd2bc30f02 100644 --- a/lib/vector-core/src/config/mod.rs +++ b/lib/vector-core/src/config/mod.rs @@ -1,7 +1,6 @@ use std::{fmt, num::NonZeroUsize}; use bitmask_enum::bitmask; -use serde::{Deserialize, Serialize}; mod global_options; mod id; @@ -11,7 +10,7 @@ pub mod proxy; pub use global_options::GlobalOptions; pub use id::ComponentKey; pub use log_schema::{init_log_schema, log_schema, LogSchema}; -use vector_config::Configurable; +use vector_config::configurable_component; use crate::schema; @@ -143,7 +142,8 @@ impl Output { } /// Acknowledgement configuration. -#[derive(Clone, Configurable, Copy, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +#[configurable_component] +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub struct AcknowledgementsConfig { /// Whether or not acknowledgements should be enabled. enabled: Option, diff --git a/src/config/enterprise.rs b/src/config/enterprise.rs index 2bec989dab3b5..4d1f70e936eb1 100644 --- a/src/config/enterprise.rs +++ b/src/config/enterprise.rs @@ -29,8 +29,7 @@ use crate::{ util::retries::ExponentialBackoff, }, sources::{ - host_metrics::HostMetricsConfig, - internal_logs::InternalLogsConfig, + host_metrics::HostMetricsConfig, internal_logs::InternalLogsConfig, internal_metrics::InternalMetricsConfig, }, transforms::remap::RemapConfig, diff --git a/src/sinks/kafka/tests.rs b/src/sinks/kafka/tests.rs index c41fc9f038ee0..d61b2abf9db18 100644 --- a/src/sinks/kafka/tests.rs +++ b/src/sinks/kafka/tests.rs @@ -24,7 +24,7 @@ mod integration_test { use crate::{ event::Value, - kafka::{KafkaAuthConfig, KafkaCompression, KafkaSaslConfig, KafkaTlsConfig}, + kafka::{KafkaAuthConfig, KafkaCompression, KafkaSaslConfig}, sinks::{ kafka::{ config::{KafkaRole, KafkaSinkConfig}, @@ -41,7 +41,7 @@ mod integration_test { components::{run_and_assert_sink_compliance, SINK_TAGS}, random_lines_with_stream, random_string, wait_for, }, - tls::TlsConfig, + tls::{TlsConfig, TlsEnableableConfig}, }; fn kafka_host() -> String { @@ -206,7 +206,7 @@ mod integration_test { kafka_happy_path( kafka_address(9092), None, - Some(KafkaTlsConfig { + Some(TlsEnableableConfig { enabled: Some(true), options: TlsConfig::test_config(), }), @@ -221,7 +221,7 @@ mod integration_test { kafka_happy_path( kafka_address(9092), None, - Some(KafkaTlsConfig { + Some(TlsEnableableConfig { enabled: Some(true), options: TlsConfig::test_config(), }), @@ -250,7 +250,7 @@ mod integration_test { async fn kafka_happy_path( server: String, sasl: Option, - tls: Option, + tls: Option, compression: KafkaCompression, ) { let topic = format!("test-{}", random_string(10)); diff --git a/src/sources/file.rs b/src/sources/file.rs index a0fe4f46174f7..77a897972b539 100644 --- a/src/sources/file.rs +++ b/src/sources/file.rs @@ -82,7 +82,7 @@ pub struct FileConfig { /// Whether or not to start reading from the beginning of a new file. /// /// DEPRECATED: This is a deprecated option -- replaced by `ignore_checkpoints`/`read_from` -- and should be removed. - #[deprecated] + #[configurable(deprecated)] pub start_at_beginning: Option, /// Whether or not to ignore existing checkpoints when determining where to start reading a file. @@ -133,13 +133,13 @@ pub struct FileConfig { /// String value used to identify the start of a multi-line message. /// /// DEPRECATED: This is a deprecated option -- replaced by `multiline` -- and should be removed. - #[deprecated] + #[configurable(deprecated)] pub message_start_indicator: Option, /// How long to wait for more data when aggregating a multi-line message, in milliseconds. /// /// DEPRECATED: This is a deprecated option -- replaced by `multiline` -- and should be removed. - #[deprecated] + #[configurable(deprecated)] pub multi_line_timeout: u64, /// Multiline parsing configuration. diff --git a/src/sources/journald.rs b/src/sources/journald.rs index 42e4893634548..1a5d549772957 100644 --- a/src/sources/journald.rs +++ b/src/sources/journald.rs @@ -95,7 +95,7 @@ pub struct JournaldConfig { /// /// If empty or not present, all units are accepted. Unit names lacking a "." will have ".service" appended to make them a valid service unit name. // TODO: Why isn't this just an alias on `include_units`? - #[deprecated] + #[configurable(deprecated)] pub units: Vec, /// The list of unit names to monitor. @@ -144,7 +144,7 @@ pub struct JournaldConfig { /// /// Has no effect unless the value of the field is already an integer. #[serde(default)] - #[deprecated] + #[configurable(deprecated)] remap_priority: bool, } diff --git a/src/sources/prometheus/remote_write.rs b/src/sources/prometheus/remote_write.rs index 1ac5ecb90c631..13518130bbdbc 100644 --- a/src/sources/prometheus/remote_write.rs +++ b/src/sources/prometheus/remote_write.rs @@ -30,7 +30,7 @@ const SOURCE_NAME: &str = "prometheus_remote_write"; #[derive(Clone, Debug)] pub struct PrometheusRemoteWriteConfig { /// The address to accept connections on. - /// + /// /// The address _must_ include a port. address: SocketAddr, diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 86370fd858371..ad14b5d225e7d 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -69,10 +69,10 @@ pub struct PrometheusScrapeConfig { endpoint_tag: Option, /// Controls how tag conflicts are handled if the scraped source has tags that Vector would add. - /// + /// /// If `true`, Vector will not add the new tag if the scraped metric has the tag already. If `false`, Vector will /// rename the conflicting tag by prepending `exported_` to the name. - /// + /// /// This matches Prometheus’ `honor_labels` configuration. #[serde(default = "crate::serde::default_false")] honor_labels: bool, diff --git a/src/sources/redis/mod.rs b/src/sources/redis/mod.rs index 4d20268fe70e9..aaa68fe4aca9c 100644 --- a/src/sources/redis/mod.rs +++ b/src/sources/redis/mod.rs @@ -39,7 +39,7 @@ pub enum DataTypeConfig { List, /// The `channel` data type. - /// + /// /// This is based on Redis' Pub/Sub capabilities. Channel, } @@ -98,7 +98,7 @@ pub struct RedisSourceConfig { list: Option, /// The Redis URL to connect to. - /// + /// /// The URL must take the form of `protocol://server:port/db` where the `protocol` can either be `redis` or `rediss` for connections secured via TLS. url: String, diff --git a/src/sources/splunk_hec/mod.rs b/src/sources/splunk_hec/mod.rs index 858cff96fa0da..621d13e91f360 100644 --- a/src/sources/splunk_hec/mod.rs +++ b/src/sources/splunk_hec/mod.rs @@ -65,7 +65,7 @@ pub struct SplunkConfig { /// it was communicating with the Splunk HEC endpoint directly. /// /// If _not_ supplied, the `Authorization` header will be ignored and requests will not be authenticated. - #[deprecated] + #[configurable(deprecated)] token: Option, /// Optional list of valid authorization tokens. From b7e8f59bc60830151c6e12551884e668f75f29d6 Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Mon, 6 Jun 2022 17:15:50 -0400 Subject: [PATCH 06/12] remove the deny unknown fields bit from configurable_component Signed-off-by: Toby Lawrence --- lib/vector-config-macros/src/configurable_component.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/vector-config-macros/src/configurable_component.rs b/lib/vector-config-macros/src/configurable_component.rs index 48207c268efc7..e94ca8ffea9e5 100644 --- a/lib/vector-config-macros/src/configurable_component.rs +++ b/lib/vector-config-macros/src/configurable_component.rs @@ -46,7 +46,6 @@ pub fn configurable_component_impl(args: TokenStream, item: TokenStream) -> Toke let input = parse_macro_input!(item as DeriveInput); let derived = quote! { #[derive(::vector_config_macros::Configurable, ::serde::Serialize, ::serde::Deserialize)] - #[serde(deny_unknown_fields)] #component_type #input }; From 3f2dd480c235b7e813016093e726adb6e91333ef Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Mon, 6 Jun 2022 17:23:36 -0400 Subject: [PATCH 07/12] missed some direct usages of the Configurable derive Signed-off-by: Toby Lawrence --- Cargo.lock | 1 + lib/codecs/Cargo.toml | 1 + lib/codecs/src/decoding/framing/character_delimited.rs | 5 +++-- lib/codecs/src/decoding/framing/newline_delimited.rs | 5 +++-- lib/codecs/src/decoding/framing/octet_counting.rs | 5 +++-- lib/codecs/src/decoding/mod.rs | 9 +++++---- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5a467bb163fec..1f755f75533ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1687,6 +1687,7 @@ dependencies = [ "value", "vector_common", "vector_config", + "vector_config_macros", "vector_core", ] diff --git a/lib/codecs/Cargo.toml b/lib/codecs/Cargo.toml index 8d659b64d3d82..cdd09fb3c96ac 100644 --- a/lib/codecs/Cargo.toml +++ b/lib/codecs/Cargo.toml @@ -21,6 +21,7 @@ tracing = { version = "0.1", default-features = false } value = { path = "../value", default-features = false } vector_common = { path = "../vector-common", default-features = false } vector_config = { path = "../vector-config", default-features = false } +vector_config_macros = { path = "../vector-config-macros", default-features = false } vector_core = { path = "../vector-core", default-features = false } [dev-dependencies] diff --git a/lib/codecs/src/decoding/framing/character_delimited.rs b/lib/codecs/src/decoding/framing/character_delimited.rs index 941d2e23e68d2..5eb86205a876c 100644 --- a/lib/codecs/src/decoding/framing/character_delimited.rs +++ b/lib/codecs/src/decoding/framing/character_delimited.rs @@ -3,7 +3,7 @@ use memchr::memchr; use serde::{Deserialize, Serialize}; use tokio_util::codec::Decoder; use tracing::{trace, warn}; -use vector_config::Configurable; +use vector_config::configurable_component; use super::BoxedFramingError; @@ -29,7 +29,8 @@ impl CharacterDelimitedDecoderConfig { } /// Options for building a `CharacterDelimitedDecoder`. -#[derive(Clone, Configurable, Debug, Deserialize, PartialEq, Serialize)] +#[configurable_component] +#[derive(Clone, Debug, PartialEq)] pub struct CharacterDelimitedDecoderOptions { /// The character that delimits byte sequences. #[serde(with = "vector_core::serde::ascii_char")] diff --git a/lib/codecs/src/decoding/framing/newline_delimited.rs b/lib/codecs/src/decoding/framing/newline_delimited.rs index 4d2ae63ea560d..a7c5513733627 100644 --- a/lib/codecs/src/decoding/framing/newline_delimited.rs +++ b/lib/codecs/src/decoding/framing/newline_delimited.rs @@ -2,7 +2,7 @@ use bytes::{Bytes, BytesMut}; use derivative::Derivative; use serde::{Deserialize, Serialize}; use tokio_util::codec::Decoder; -use vector_config::Configurable; +use vector_config::configurable_component; use super::{BoxedFramingError, CharacterDelimitedDecoder}; @@ -18,7 +18,8 @@ pub struct NewlineDelimitedDecoderConfig { } /// Options for building a `NewlineDelimitedDecoder`. -#[derive(Clone, Configurable, Debug, Derivative, Deserialize, PartialEq, Serialize)] +#[configurable_component] +#[derive(Clone, Debug, Derivative, PartialEq)] #[derivative(Default)] pub struct NewlineDelimitedDecoderOptions { /// The maximum length of the byte buffer. diff --git a/lib/codecs/src/decoding/framing/octet_counting.rs b/lib/codecs/src/decoding/framing/octet_counting.rs index d8e39d04d908a..88011fd843188 100644 --- a/lib/codecs/src/decoding/framing/octet_counting.rs +++ b/lib/codecs/src/decoding/framing/octet_counting.rs @@ -5,7 +5,7 @@ use derivative::Derivative; use serde::{Deserialize, Serialize}; use tokio_util::codec::{LinesCodec, LinesCodecError}; use tracing::trace; -use vector_config::Configurable; +use vector_config::configurable_component; use super::BoxedFramingError; @@ -32,7 +32,8 @@ impl OctetCountingDecoderConfig { } /// Options for building a `OctetCountingDecoder`. -#[derive(Clone, Configurable, Debug, Derivative, Deserialize, PartialEq, Serialize)] +#[configurable_component] +#[derive(Clone, Debug, Derivative, PartialEq)] #[derivative(Default)] pub struct OctetCountingDecoderOptions { /// The maximum length of the byte buffer. diff --git a/lib/codecs/src/decoding/mod.rs b/lib/codecs/src/decoding/mod.rs index 200b108d1d724..6eae17823dadc 100644 --- a/lib/codecs/src/decoding/mod.rs +++ b/lib/codecs/src/decoding/mod.rs @@ -23,9 +23,8 @@ pub use framing::{ NewlineDelimitedDecoderConfig, NewlineDelimitedDecoderOptions, OctetCountingDecoder, OctetCountingDecoderConfig, OctetCountingDecoderOptions, }; -use serde::{Deserialize, Serialize}; use smallvec::SmallVec; -use vector_config::Configurable; +use vector_config::configurable_component; use vector_core::{config::DataType, event::Event, schema}; /// An error that occurred while decoding structured events from a byte stream / @@ -69,7 +68,8 @@ impl StreamDecodingError for Error { // Unfortunately, copying options of the nested enum variants is necessary // since `serde` doesn't allow `flatten`ing these: // https://github.com/serde-rs/serde/issues/1402. -#[derive(Clone, Configurable, Debug, Deserialize, Serialize)] +#[configurable_component] +#[derive(Clone, Debug)] #[serde(tag = "method", rename_all = "snake_case")] pub enum FramingConfig { /// Configures the `BytesDecoder`. @@ -217,7 +217,8 @@ impl tokio_util::codec::Decoder for Framer { // Unfortunately, copying options of the nested enum variants is necessary // since `serde` doesn't allow `flatten`ing these: // https://github.com/serde-rs/serde/issues/1402. -#[derive(Clone, Configurable, Debug, Deserialize, Serialize)] +#[configurable_component] +#[derive(Clone, Debug)] #[serde(tag = "codec", rename_all = "snake_case")] pub enum DeserializerConfig { /// Configures the `BytesDeserializer`. From 0f1a740b643b8e3aa938d6110ad00b1d47060f68 Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Tue, 7 Jun 2022 13:34:42 -0400 Subject: [PATCH 08/12] bring back deny_unknown_fields + other small tweaks Signed-off-by: Toby Lawrence --- lib/vector-config-macros/src/ast/container.rs | 1 - lib/vector-config-macros/src/lib.rs | 31 +++++++++++++++++-- lib/vector-config/src/num.rs | 4 --- lib/vector-config/src/schema.rs | 2 +- lib/vector-core/src/config/mod.rs | 4 +-- src/aws/auth.rs | 11 ++++--- src/aws/region.rs | 9 +++--- src/docker.rs | 9 +++--- src/gcp.rs | 2 +- src/http.rs | 4 +-- src/kafka.rs | 18 ++++++++--- src/line_agg.rs | 30 +++++++++--------- src/nats.rs | 10 ++++-- src/sources/aws_ecs_metrics/mod.rs | 1 + src/sources/aws_s3/mod.rs | 6 ++-- src/sources/aws_s3/sqs.rs | 1 + src/sources/aws_sqs/config.rs | 1 + src/sources/docker_logs.rs | 10 +++--- src/sources/exec/mod.rs | 6 ++-- src/sources/file.rs | 6 ++-- src/sources/gcp_pubsub.rs | 3 +- src/sources/host_metrics/mod.rs | 1 + src/sources/internal_logs.rs | 1 + src/sources/internal_metrics.rs | 4 +-- src/sources/journald.rs | 16 +++++----- src/sources/kafka.rs | 1 + src/sources/kubernetes_logs/mod.rs | 2 +- .../namespace_metadata_annotator.rs | 2 +- .../node_metadata_annotator.rs | 2 +- .../kubernetes_logs/pod_metadata_annotator.rs | 2 +- src/sources/mongodb_metrics/mod.rs | 1 + src/sources/nats.rs | 1 + src/sources/nginx_metrics/mod.rs | 1 + src/sources/postgresql_metrics.rs | 5 +-- src/sources/redis/mod.rs | 3 +- src/sources/socket/udp.rs | 1 + src/sources/socket/unix.rs | 1 + src/sources/splunk_hec/mod.rs | 2 +- src/sources/stdin.rs | 2 +- src/sources/util/encoding_config.rs | 1 + src/sources/util/multiline_config.rs | 23 +++++++++----- src/sources/vector/mod.rs | 2 ++ src/sources/vector/v1.rs | 1 + 43 files changed, 155 insertions(+), 89 deletions(-) diff --git a/lib/vector-config-macros/src/ast/container.rs b/lib/vector-config-macros/src/ast/container.rs index 863dd2e2aa88e..d70f302c5d3f3 100644 --- a/lib/vector-config-macros/src/ast/container.rs +++ b/lib/vector-config-macros/src/ast/container.rs @@ -15,7 +15,6 @@ use super::{ }; const ERR_NO_ENUM_TUPLES: &str = "enum variants cannot be tuples (multiple unnamed fields)"; -const ERR_NO_ENUM_NEWTYPE_INTERNAL_TAG: &str = "newtype variants (i.e. `enum SomeEnum { SomeVariant(T) }`) cannot be used with tag-only mode as the type inside may or may not support embedding the tag field"; const ERR_NO_ENUM_VARIANT_DESCRIPTION: &str = "enum variants must have a description i.e. `/// This is a description` or `#[configurable(description = \"This is a description...\")]`"; const ERR_ENUM_UNTAGGED_DUPLICATES: &str = "enum variants must be unique in style/shape when in untagged mode i.e. there cannot be multiple unit variants, or tuple variants with the same fields, etc"; const ERR_NO_UNIT_STRUCTS: &str = "unit structs are not supported by `Configurable`"; diff --git a/lib/vector-config-macros/src/lib.rs b/lib/vector-config-macros/src/lib.rs index eddcf31d050b1..e8f30d9936fdb 100644 --- a/lib/vector-config-macros/src/lib.rs +++ b/lib/vector-config-macros/src/lib.rs @@ -1,6 +1,3 @@ -// TODO: Remove this once we add validation since that's the only piece of dead code in this crate at the moment. -#![allow(dead_code)] - use proc_macro::TokenStream; mod ast; @@ -8,6 +5,34 @@ mod configurable; mod configurable_component; /// Designates a type as being part of a Vector configuration. +/// +/// This will automatically derive the [`Configurable`][vector_config::Configurable] trait for the given struct/enum, as +/// well as ensuring that serialization/deserialization (via `serde`) is derived. +/// +/// ### Examples +/// +/// In its most basic form, this attribute macro can be used to simply derive the aforementioned traits, making it using +/// in any other type also deriving `Configurable`: +/// +/// ```norun +/// #[configurable_component] +/// pub struct Something { +/// ... +/// } +/// ``` +/// +/// Additionally, callers can specify the component type, when being used directly on the top-level configuration object +/// for a component by specifying the component type (`source`, `transform`, or `sink`) as the sole parameter: +/// +/// ```norun +/// #[configurable_component(source)] +/// pub struct KafkaSourceConfig { +/// ... +/// } +/// ``` +/// +/// This adds special metadata to the generated schema for that type indicating that it represents the configuration of +/// a component of the specified type. #[proc_macro_attribute] pub fn configurable_component(args: TokenStream, item: TokenStream) -> TokenStream { configurable_component::configurable_component_impl(args, item) diff --git a/lib/vector-config/src/num.rs b/lib/vector-config/src/num.rs index 47de6f5f6ad6e..cd32eb3e19e97 100644 --- a/lib/vector-config/src/num.rs +++ b/lib/vector-config/src/num.rs @@ -7,10 +7,6 @@ use serde_json::Number; use vector_config_common::num::{NUMERIC_ENFORCED_LOWER_BOUND, NUMERIC_ENFORCED_UPPER_BOUND}; /// A numeric type that can be represented correctly in a JSON Schema document. -/// -/// `N` must be an integral numeric type i.e. `f64`, `u8`, `i32`, and so on. The numeric type is parameterized in this -/// way to allow generating the schema for wrapper types such as `NonZeroU64`, where the overall type must be -/// represented as `NonZeroU64` but the integeral numeric type that we're constraining against is `u64`. pub trait ConfigurableNumber { /// The integral numeric type. /// diff --git a/lib/vector-config/src/schema.rs b/lib/vector-config/src/schema.rs index 4748afcea83bf..c042a29b2aad8 100644 --- a/lib/vector-config/src/schema.rs +++ b/lib/vector-config/src/schema.rs @@ -170,7 +170,7 @@ where ..Default::default() }; - // If the actual numeric type we're generating the schema for is a nonzero variant, and its constrain can't be + // If the actual numeric type we're generating the schema for is a nonzero variant, and its constraint can't be // represently solely by the normal minimum/maximum bounds, we explicitly add an exclusion for the appropriate zero // value of the given numeric type. if N::requires_nonzero_exclusion() { diff --git a/lib/vector-core/src/config/mod.rs b/lib/vector-core/src/config/mod.rs index 85cfd2bc30f02..26954b9a91f27 100644 --- a/lib/vector-core/src/config/mod.rs +++ b/lib/vector-core/src/config/mod.rs @@ -141,11 +141,11 @@ impl Output { } } -/// Acknowledgement configuration. +/// Configuration of acknowledgement behavior. #[configurable_component] #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub struct AcknowledgementsConfig { - /// Whether or not acknowledgements should be enabled. + /// Enables end-to-end acknowledgements. enabled: Option, } diff --git a/src/aws/auth.rs b/src/aws/auth.rs index a77b3f572c47c..dad957a7cee75 100644 --- a/src/aws/auth.rs +++ b/src/aws/auth.rs @@ -24,26 +24,29 @@ pub enum AwsAuthentication { /// The AWS secret access key. secret_access_key: String, }, + /// Authenticate using credentials stored in a file. /// - /// Optionally, specifies a credentials profile to use. + /// Additionally, the specific credential profile to use can be set. File { /// Path to the credentials file. credentials_file: String, /// The credentials profile to use. profile: Option, }, - /// Assumes the given role ARN. + + /// Assume the given role ARN. Role { /// The ARN of the role to assume. assume_role: String, /// Timeout for assuming the role, in seconds. load_timeout_secs: Option, }, - /// Default authentication strategy which tries a variety of substrategies in a chained fallback fashion. + + /// Default authentication strategy which tries a variety of substrategies in a one-after-the-other fashion. #[derivative(Default)] Default { - /// Timeout for successfully loading credentials, in seconds. + /// Timeout for successfully loading any credentials, in seconds. load_timeout_secs: Option, }, } diff --git a/src/aws/region.rs b/src/aws/region.rs index 9828c2a88708c..9337e1ba6e5d4 100644 --- a/src/aws/region.rs +++ b/src/aws/region.rs @@ -3,15 +3,16 @@ use std::str::FromStr; use aws_smithy_http::endpoint::Endpoint; use aws_types::region::Region; use http::Uri; -use serde::{Deserialize, Serialize}; -use vector_config::Configurable; +use vector_config::configurable_component; -/// The region/endpoint configuration for interacting with an AWS service. -#[derive(Clone, Configurable, Debug, Default, Deserialize, PartialEq, Serialize)] +/// Configuration of the region/endpoint to use when interacting with an AWS service. +#[configurable_component] +#[derive(Clone, Debug, Default, PartialEq)] #[serde(default)] pub struct RegionOrEndpoint { /// The AWS region to use. pub region: Option, + /// The API endpoint of the service. pub endpoint: Option, } diff --git a/src/docker.rs b/src/docker.rs index 5beb81597fa80..3fb6c07cf5fc8 100644 --- a/src/docker.rs +++ b/src/docker.rs @@ -21,21 +21,22 @@ pub enum Error { NoHost, } -/// TLS options to connect to the Docker daemon. +/// Configuration of TLS when connecting to the Docker daemon. /// /// Only relevant when connecting to Docker via an HTTPS URL. /// /// If not configured, Vector will try to use environment variable `DOCKER_CERT_PATH` and then` DOCKER_CONFIG`. If both environment variables are absent, Vector will try to read certificates in `~/.docker/`. #[configurable_component] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] pub struct DockerTlsConfig { - /// Path to CA certificate file. + /// Path to the CA certificate file. ca_file: PathBuf, - /// Path to TLS certificate file. + /// Path to the TLS certificate file. crt_file: PathBuf, - /// Path to TLS key file. + /// Path to the TLS key file. key_file: PathBuf, } diff --git a/src/gcp.rs b/src/gcp.rs index 574c8db9d070c..79723ab918421 100644 --- a/src/gcp.rs +++ b/src/gcp.rs @@ -52,7 +52,7 @@ pub enum GcpError { BuildHttpClient { source: HttpError }, } -/// Authentication configuration for GCP services. +/// Configuration of the authentication strategy for interacting with GCP services. // TODO: We're duplicating the "either this or that" verbiage for each field because this struct gets flattened into the // component config types, which means all that's carried over are the fields, not the type itself. // diff --git a/src/http.rs b/src/http.rs index 506bf41fcd862..01c4bc1ac4cbb 100644 --- a/src/http.rs +++ b/src/http.rs @@ -209,13 +209,13 @@ impl fmt::Debug for HttpClient { } } -/// Authentication strategy for requests. +/// Configuration of the authentication strategy for HTTP requests. /// /// HTTP authentication should almost always be used with HTTPS only, as the authentication credentials are passed as an /// HTTP header without any additional encryption beyond what is provided by the transport itself. #[configurable_component] #[derive(Clone, Debug, Eq, PartialEq)] -#[serde(rename_all = "snake_case", tag = "strategy")] +#[serde(deny_unknown_fields, rename_all = "snake_case", tag = "strategy")] pub enum Auth { /// Basic authentication. /// diff --git a/src/kafka.rs b/src/kafka.rs index f08e79be4096d..9943d33fa68f5 100644 --- a/src/kafka.rs +++ b/src/kafka.rs @@ -36,20 +36,28 @@ pub(crate) struct KafkaAuthConfig { pub(crate) tls: Option, } -/// /// Options for SASL/SCRAM authentication support. +/// Configuration for SASL authentication when interacting with Kafka. #[configurable_component] #[derive(Clone, Debug, Default)] pub(crate) struct KafkaSaslConfig { - /// Enable SASL/SCRAM authentication to the remote (not supported on Windows at this time). + /// Enables SASL authentication. + /// + /// Only `PLAIN` and `SCRAM`-based mechanisms are supported when configuring SASL authentication via `sasl.*`. For + /// other mechanisms, `librdkafka_options.*` must be used directly to configure other `librdkafka`-specific values + /// i.e. `sasl.kerberos.*` and so on. + /// + /// See the [librdkafka documentation](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) for details. + /// + /// SASL authentication is not supported on Windows. pub(crate) enabled: Option, - /// The Kafka SASL/SCRAM authentication username. + /// The SASL username. pub(crate) username: Option, - /// The Kafka SASL/SCRAM authentication password. + /// The SASL password. pub(crate) password: Option, - /// The Kafka SASL/SCRAM mechanisms. + /// The SASL mechanism to use. pub(crate) mechanism: Option, } diff --git a/src/line_agg.rs b/src/line_agg.rs index 4b8226d35e35c..f47f96b74c1f3 100644 --- a/src/line_agg.rs +++ b/src/line_agg.rs @@ -14,12 +14,12 @@ use bytes::{Bytes, BytesMut}; use futures::{Stream, StreamExt}; use pin_project::pin_project; use regex::bytes::Regex; -use serde::{Deserialize, Serialize}; use tokio_util::time::delay_queue::{DelayQueue, Key}; -use vector_config::Configurable; +use vector_config::configurable_component; -/// The mode of operation of the line aggregator. -#[derive(Clone, Configurable, Copy, Debug, Hash, Deserialize, PartialEq, Serialize)] +/// Mode of operation of the line aggregator. +#[configurable_component] +#[derive(Clone, Copy, Debug, Hash, PartialEq)] #[serde(rename_all = "snake_case")] pub enum Mode { /// All consecutive lines matching this pattern are included in the group. @@ -30,7 +30,7 @@ pub enum Mode { /// whitespace) indicates that it is an extension of the proceeding line. ContinueThrough, - /// All consecutive lines matching this pattern, plus one additional line, are included in the group. + /// All consecutive lines matching this pattern, plus one additional line, are included in the group. /// /// This is useful in cases where a log message ends with a continuation marker, such as a backslash, indicating /// that the following line is part of the same message. @@ -47,21 +47,23 @@ pub enum Mode { HaltWith, } -/// Configuration parameters of the line aggregator. -#[derive(Debug, Clone)] +/// Configuration of multi-line aggregation. +#[derive(Clone, Debug)] pub struct Config { - /// The regular expression pattern for detecting the beginning of the message. + /// Regular expression pattern that is used to match the start of a new message. pub start_pattern: Regex, - /// The regular expression pattern used for evaluating whether the current line should be aggregated or if - /// aggregation should stop. + + /// Regular expression pattern that is used to determine whether or not more lines should be read. /// - /// Configured in tandem with `mode` to define the overall aggregation behavior. + /// This setting must be configured in conjunction with `mode`. pub condition_pattern: Regex, - /// The mode of aggregation. + + /// Aggregation mode. /// - /// Configured in tandem with `condition_pattern` to define the overall aggregation behavior. + /// This setting must be configured in conjunction with `condition_pattern`. pub mode: Mode, - /// The maximum time to wait for subsequent lines to be received and evaluated for aggregation. + + /// The maximum amount of time to wait for the next additional line, in milliseconds. /// /// Once this timeout is reached, the buffered message is guaranteed to be flushed, even if incomplete. pub timeout: Duration, diff --git a/src/nats.rs b/src/nats.rs index e2ea1f2de4b4c..3954ef03eb028 100644 --- a/src/nats.rs +++ b/src/nats.rs @@ -14,7 +14,7 @@ pub enum NatsConfigError { TlsMissingCert, } -/// Configuration for how Vector should authenticate to NATS. +/// Configuration of the authentication strategy when interacting with NATS. #[configurable_component] #[derive(Clone, Debug)] #[serde(rename_all = "snake_case", tag = "strategy")] @@ -64,6 +64,7 @@ impl std::fmt::Display for NatsAuthConfig { /// Username and password configuration. #[configurable_component] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] pub(crate) struct NatsAuthUserPassword { /// Username. pub(crate) user: String, @@ -75,6 +76,7 @@ pub(crate) struct NatsAuthUserPassword { /// Token configuration. #[configurable_component] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] pub(crate) struct NatsAuthToken { /// Token. pub(crate) value: String, @@ -83,6 +85,7 @@ pub(crate) struct NatsAuthToken { /// Credentials file configuration. #[configurable_component] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] pub(crate) struct NatsAuthCredentialsFile { /// Path to credentials file. pub(crate) path: String, @@ -91,15 +94,16 @@ pub(crate) struct NatsAuthCredentialsFile { /// NKeys configuration. #[configurable_component] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] pub(crate) struct NatsAuthNKey { /// User. /// - /// This is equivalent to a public key. + /// Conceptually, this is equivalent to a public key. pub(crate) nkey: String, /// Seed. /// - /// This is equivalent to a private key. + /// Conceptually, this is equivalent to a private key. pub(crate) seed: String, } diff --git a/src/sources/aws_ecs_metrics/mod.rs b/src/sources/aws_ecs_metrics/mod.rs index 85e4eacb9cac4..45cb35f99c0eb 100644 --- a/src/sources/aws_ecs_metrics/mod.rs +++ b/src/sources/aws_ecs_metrics/mod.rs @@ -47,6 +47,7 @@ pub enum Version { /// Configuration for the `aws_ecs_metrics` source. #[configurable_component(source)] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] pub struct AwsEcsMetricsSourceConfig { /// Base URI of the task metadata endpoint. /// diff --git a/src/sources/aws_s3/mod.rs b/src/sources/aws_s3/mod.rs index 491a958731499..dfc5da73d0d07 100644 --- a/src/sources/aws_s3/mod.rs +++ b/src/sources/aws_s3/mod.rs @@ -68,7 +68,7 @@ enum Strategy { // Maybe showing defaults at all, when there are required properties, doesn't actually make sense? :thinkies: #[configurable_component(source)] #[derive(Clone, Debug, Default)] -#[serde(default)] +#[serde(default, deny_unknown_fields)] pub struct AwsS3Config { #[serde(flatten)] region: RegionOrEndpoint, @@ -92,9 +92,9 @@ pub struct AwsS3Config { #[serde(default)] auth: AwsAuthentication, - /// Multiline parsing configuration. + /// Multiline aggregation configuration. /// - /// If not specified, multiline parsing is disabled. + /// If not specified, multiline aggregation is disabled. multiline: Option, #[configurable(derived)] diff --git a/src/sources/aws_s3/sqs.rs b/src/sources/aws_s3/sqs.rs index 27beaa0cfe7fe..609bd53725253 100644 --- a/src/sources/aws_s3/sqs.rs +++ b/src/sources/aws_s3/sqs.rs @@ -47,6 +47,7 @@ static SUPPORTED_S3S_EVENT_VERSION: Lazy = #[configurable_component] #[derive(Clone, Debug, Derivative)] #[derivative(Default)] +#[serde(deny_unknown_fields)] pub(super) struct Config { /// The URL of the SQS queue to poll for bucket notifications. pub(super) queue_url: String, diff --git a/src/sources/aws_sqs/config.rs b/src/sources/aws_sqs/config.rs index 9df1321b5826b..9f8b7ffb793b1 100644 --- a/src/sources/aws_sqs/config.rs +++ b/src/sources/aws_sqs/config.rs @@ -18,6 +18,7 @@ use crate::{ #[configurable_component(source)] #[derive(Clone, Debug, Derivative)] #[derivative(Default)] +#[serde(deny_unknown_fields)] pub struct AwsSqsConfig { #[serde(flatten)] pub region: RegionOrEndpoint, diff --git a/src/sources/docker_logs.rs b/src/sources/docker_logs.rs index 88eb404257935..109afc1241e21 100644 --- a/src/sources/docker_logs.rs +++ b/src/sources/docker_logs.rs @@ -50,7 +50,7 @@ static CONSOLE: Lazy = Lazy::new(|| "console".into()); /// Configuration for the `docker_logs` source. #[configurable_component(source)] #[derive(Clone, Debug)] -#[serde(default)] +#[serde(deny_unknown_fields, default)] pub struct DockerLogsConfig { /// Overrides the name of the log field used to add the current hostname to each event. /// @@ -60,7 +60,7 @@ pub struct DockerLogsConfig { #[serde(default = "host_key")] host_key: String, - /// The Docker host to connect to. + /// Docker host to connect to. /// /// Use an HTTPS URL to enable TLS encryption. /// @@ -109,15 +109,15 @@ pub struct DockerLogsConfig { /// By default, `"_partial"` is used. partial_event_marker_field: Option, - /// Whether or not to automatically merge partial events. + /// Enables automatic merging of partial events. auto_partial_merge: bool, /// The amount of time, in seconds, to wait before retrying after an error. retry_backoff_secs: u64, - /// Multiline parsing configuration. + /// Multiline aggregation configuration. /// - /// If not specified, multiline parsing is disabled. + /// If not specified, multiline aggregation is disabled. multiline: Option, #[configurable(derived)] diff --git a/src/sources/exec/mod.rs b/src/sources/exec/mod.rs index fe451596f5553..efa8dd75e968f 100644 --- a/src/sources/exec/mod.rs +++ b/src/sources/exec/mod.rs @@ -44,7 +44,7 @@ pub mod sized_bytes_codec; /// Configuration for the `exec` source. #[configurable_component] #[derive(Clone, Debug)] -#[serde(default)] +#[serde(default, deny_unknown_fields)] pub struct ExecConfig { #[configurable(derived)] pub mode: Mode, @@ -80,7 +80,7 @@ pub struct ExecConfig { /// Mode of operation for running the command. #[configurable_component] #[derive(Clone, Copy, Debug)] -#[serde(rename_all = "snake_case")] +#[serde(rename_all = "snake_case", deny_unknown_fields)] pub enum Mode { /// The command is run on a schedule. Scheduled, @@ -92,6 +92,7 @@ pub enum Mode { /// Configuration options for scheduled commands. #[configurable_component] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] pub struct ScheduledConfig { /// The interval, in seconds, between scheduled command runs. /// @@ -103,6 +104,7 @@ pub struct ScheduledConfig { /// Configuration options for streaming commands. #[configurable_component] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] pub struct StreamingConfig { /// Whether or not the command should be rerun if the command exits. #[serde(default = "default_respawn_on_exit")] diff --git a/src/sources/file.rs b/src/sources/file.rs index 77a897972b539..e2a56b5485a81 100644 --- a/src/sources/file.rs +++ b/src/sources/file.rs @@ -62,7 +62,7 @@ enum BuildError { /// Configuration for the `file` source. #[configurable_component(source)] #[derive(Clone, Debug, PartialEq)] -#[serde(default)] +#[serde(deny_unknown_fields, default)] pub struct FileConfig { /// Array of file patterns to include. [Globbing](https://vector.dev/docs/reference/configuration/sources/file/#globbing) is supported. pub include: Vec, @@ -142,9 +142,9 @@ pub struct FileConfig { #[configurable(deprecated)] pub multi_line_timeout: u64, - /// Multiline parsing configuration. + /// Multiline aggregation configuration. /// - /// If not specified, multiline parsing is disabled. + /// If not specified, multiline aggregation is disabled. pub multiline: Option, /// An approximate limit on the amount of data read from a single file at a given time. diff --git a/src/sources/gcp_pubsub.rs b/src/sources/gcp_pubsub.rs index 67f7d04425764..fdc540c93cf91 100644 --- a/src/sources/gcp_pubsub.rs +++ b/src/sources/gcp_pubsub.rs @@ -97,6 +97,7 @@ static CLIENT_ID: Lazy = Lazy::new(|| uuid::Uuid::new_v4().to_string()); #[configurable_component(source)] #[derive(Clone, Debug, Derivative)] #[derivative(Default)] +#[serde(deny_unknown_fields)] pub struct PubsubConfig { /// The project name from which to pull logs. pub project: String, @@ -107,7 +108,7 @@ pub struct PubsubConfig { /// The endpoint from which to pull data. pub endpoint: Option, - /// Whether or not to load authentication credentials. + /// Disables the loading of authentication credentials. /// /// Only used for tests. #[serde(skip, default)] diff --git a/src/sources/host_metrics/mod.rs b/src/sources/host_metrics/mod.rs index b2121a419bb3b..a23ae3fcd845a 100644 --- a/src/sources/host_metrics/mod.rs +++ b/src/sources/host_metrics/mod.rs @@ -82,6 +82,7 @@ pub(self) struct FilterList { #[configurable_component(source)] #[derive(Clone, Debug, Derivative)] #[derivative(Default)] +#[serde(deny_unknown_fields)] pub struct HostMetricsConfig { /// The interval between metric gathering, in seconds. #[serde(default = "default_scrape_interval")] diff --git a/src/sources/internal_logs.rs b/src/sources/internal_logs.rs index 315eb2f78baba..3c559bdece85f 100644 --- a/src/sources/internal_logs.rs +++ b/src/sources/internal_logs.rs @@ -16,6 +16,7 @@ use crate::{ /// Configuration for the `internal_logs` source. #[configurable_component(source)] #[derive(Clone, Debug, Default)] +#[serde(deny_unknown_fields)] pub struct InternalLogsConfig { /// Overrides the name of the log field used to add the current hostname to each event. /// diff --git a/src/sources/internal_metrics.rs b/src/sources/internal_metrics.rs index 2e190a9acc937..cb425c012a8c7 100644 --- a/src/sources/internal_metrics.rs +++ b/src/sources/internal_metrics.rs @@ -16,7 +16,7 @@ use crate::{ #[configurable_component(source)] #[derive(Clone, Debug, Derivative)] #[derivative(Default)] -#[serde(default)] +#[serde(deny_unknown_fields, default)] pub struct InternalMetricsConfig { /// The interval between metric gathering, in seconds. #[derivative(Default(value = "2.0"))] @@ -42,7 +42,7 @@ impl InternalMetricsConfig { #[configurable_component] #[derive(Clone, Debug, Derivative)] #[derivative(Default)] -#[serde(default)] +#[serde(deny_unknown_fields, default)] pub struct TagsConfig { /// Sets the name of the tag to use to add the current hostname to each metric. /// diff --git a/src/sources/journald.rs b/src/sources/journald.rs index 1a5d549772957..e0ae16e2d4666 100644 --- a/src/sources/journald.rs +++ b/src/sources/journald.rs @@ -83,12 +83,12 @@ type Matches = HashMap>; /// Configuration for the `journald` source. #[configurable_component(source)] #[derive(Clone, Debug, Default)] -#[serde(default)] +#[serde(deny_unknown_fields, default)] pub struct JournaldConfig { - /// Whether or not to only include future entries. + /// Only include entries that appended to the journal after Vector starts reading it. pub since_now: Option, - /// Whether or not to only include entries from the current boot. + /// Only include entries that occurred after the current boot of the system. pub current_boot_only: Option, /// The list of unit names to monitor. @@ -98,22 +98,22 @@ pub struct JournaldConfig { #[configurable(deprecated)] pub units: Vec, - /// The list of unit names to monitor. + /// A list of unit names to monitor. /// /// If empty or not present, all units are accepted. Unit names lacking a "." will have ".service" appended to make them a valid service unit name. pub include_units: Vec, - /// The list of unit names to exclude from monitoring. + /// A list of unit names to exclude from monitoring. /// /// Unit names lacking a "." will have ".service" appended to make them a valid service unit name. pub exclude_units: Vec, - /// This list contains sets of field/value pairs to monitor. + /// A list of sets of field/value pairs to monitor. /// /// If empty or not present, all journal fields are accepted. If `include_units` is specified, it will be merged into this list. pub include_matches: Matches, - /// This list contains sets of field/value pairs that, if any are present in a journal entry, will cause the entry to be excluded from this source. + /// A list of sets of field/value pairs that, if any are present in a journal entry, will cause the entry to be excluded from this source. /// /// If `exclude_units` is specified, it will be merged into this list. pub exclude_matches: Matches, @@ -140,7 +140,7 @@ pub struct JournaldConfig { #[serde(default, deserialize_with = "bool_or_struct")] acknowledgements: AcknowledgementsConfig, - /// Whether or not to remap the `PRIORITY` field from an integer to string value. + /// Enables remapping the `PRIORITY` field from an integer to string value. /// /// Has no effect unless the value of the field is already an integer. #[serde(default)] diff --git a/src/sources/kafka.rs b/src/sources/kafka.rs index e606602a7258e..4f4197bfa48e7 100644 --- a/src/sources/kafka.rs +++ b/src/sources/kafka.rs @@ -52,6 +52,7 @@ enum BuildError { #[configurable_component(source)] #[derive(Clone, Debug, Derivative)] #[derivative(Default)] +#[serde(deny_unknown_fields)] pub struct KafkaSourceConfig { /// A comma-separated list of Kafka bootstrap servers. /// diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index 0756ecc3c6d2c..64abb2c8b1c28 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -74,7 +74,7 @@ const SELF_NODE_NAME_ENV_KEY: &str = "VECTOR_SELF_NODE_NAME"; /// Configuration for the `kubernetes_logs` source. #[configurable_component(source)] #[derive(Clone, Debug)] -#[serde(default)] +#[serde(deny_unknown_fields, default)] pub struct Config { /// Specifies the label selector to filter `Pod`s with, to be used in addition to the built-in `vector.dev/exclude` filter. extra_label_selector: String, diff --git a/src/sources/kubernetes_logs/namespace_metadata_annotator.rs b/src/sources/kubernetes_logs/namespace_metadata_annotator.rs index 4875ba2d630c8..f604e2f9b9e9b 100644 --- a/src/sources/kubernetes_logs/namespace_metadata_annotator.rs +++ b/src/sources/kubernetes_logs/namespace_metadata_annotator.rs @@ -12,7 +12,7 @@ use crate::event::{Event, LogEvent}; /// Configuration for how the events are annotated with Namespace metadata. #[configurable_component] #[derive(Clone, Debug)] -#[serde(default)] +#[serde(deny_unknown_fields, default)] pub struct FieldsSpec { /// Event field for Namespace labels. pub namespace_labels: String, diff --git a/src/sources/kubernetes_logs/node_metadata_annotator.rs b/src/sources/kubernetes_logs/node_metadata_annotator.rs index 4ecc4fc3b4397..c1a7edb5f2c12 100644 --- a/src/sources/kubernetes_logs/node_metadata_annotator.rs +++ b/src/sources/kubernetes_logs/node_metadata_annotator.rs @@ -12,7 +12,7 @@ use crate::event::{Event, LogEvent}; /// Configuration for how the events are annotated with Node metadata. #[configurable_component] #[derive(Clone, Debug)] -#[serde(default)] +#[serde(deny_unknown_fields, default)] pub struct FieldsSpec { /// Event field for Node labels. pub node_labels: String, diff --git a/src/sources/kubernetes_logs/pod_metadata_annotator.rs b/src/sources/kubernetes_logs/pod_metadata_annotator.rs index 836445576355f..c93f978f5d2d6 100644 --- a/src/sources/kubernetes_logs/pod_metadata_annotator.rs +++ b/src/sources/kubernetes_logs/pod_metadata_annotator.rs @@ -16,7 +16,7 @@ use crate::event::{Event, LogEvent}; /// Configuration for how the events are annotated with `Pod` metadata. #[configurable_component] #[derive(Clone, Debug)] -#[serde(default)] +#[serde(deny_unknown_fields, default)] pub struct FieldsSpec { /// Event field for Pod name. pub pod_name: String, diff --git a/src/sources/mongodb_metrics/mod.rs b/src/sources/mongodb_metrics/mod.rs index c63c7dae51d76..b8ed490e045e7 100644 --- a/src/sources/mongodb_metrics/mod.rs +++ b/src/sources/mongodb_metrics/mod.rs @@ -75,6 +75,7 @@ enum CollectError { /// Configuration for the `mongodb_metrics` source. #[configurable_component(source)] #[derive(Clone, Debug, Default)] +#[serde(deny_unknown_fields)] pub struct MongoDbMetricsConfig { /// A list of MongoDB instances to scrape. /// diff --git a/src/sources/nats.rs b/src/sources/nats.rs index 8ec15e53adbc5..6a48ed7d39156 100644 --- a/src/sources/nats.rs +++ b/src/sources/nats.rs @@ -33,6 +33,7 @@ enum BuildError { #[configurable_component(source)] #[derive(Clone, Debug, Derivative)] #[derivative(Default)] +#[serde(deny_unknown_fields)] pub struct NatsSourceConfig { /// The NATS URL to connect to. /// diff --git a/src/sources/nginx_metrics/mod.rs b/src/sources/nginx_metrics/mod.rs index ef5654e9d04bc..0f10cba7a9e77 100644 --- a/src/sources/nginx_metrics/mod.rs +++ b/src/sources/nginx_metrics/mod.rs @@ -56,6 +56,7 @@ enum NginxError { /// Configuration for the `nginx_metrics` source. #[configurable_component(source)] #[derive(Clone, Debug, Default)] +#[serde(deny_unknown_fields)] pub struct NginxMetricsConfig { /// A list of NGINX instances to scrape. /// diff --git a/src/sources/postgresql_metrics.rs b/src/sources/postgresql_metrics.rs index c8714d1c8fab6..7890d7c524753 100644 --- a/src/sources/postgresql_metrics.rs +++ b/src/sources/postgresql_metrics.rs @@ -94,9 +94,10 @@ enum CollectError { QueryError { source: PgError }, } -/// TLS configuration for connecting to PostgreSQL. +/// Configuration of TLS when connecting to PostgreSQL. #[configurable_component] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] struct PostgresqlMetricsTlsConfig { /// Absolute path to an additional CA certificate file. /// @@ -107,7 +108,7 @@ struct PostgresqlMetricsTlsConfig { /// Configuration for the `postgresql_metrics` source. #[configurable_component(source)] #[derive(Clone, Debug)] -#[serde(default)] +#[serde(default, deny_unknown_fields)] pub struct PostgresqlMetricsConfig { /// A list of PostgreSQL instances to scrape. /// diff --git a/src/sources/redis/mod.rs b/src/sources/redis/mod.rs index aaa68fe4aca9c..79c72a8158f2e 100644 --- a/src/sources/redis/mod.rs +++ b/src/sources/redis/mod.rs @@ -47,7 +47,7 @@ pub enum DataTypeConfig { /// Options for the Redis `list` data type. #[configurable_component] #[derive(Copy, Clone, Debug, Default, Derivative, Eq, PartialEq)] -#[serde(rename_all = "lowercase")] +#[serde(deny_unknown_fields, rename_all = "lowercase")] pub struct ListOption { #[configurable(derived)] method: Method, @@ -89,6 +89,7 @@ impl From<&redis::ConnectionInfo> for ConnectionInfo { /// Configuration for the `redis` source. #[configurable_component(source)] #[derive(Clone, Debug, Derivative)] +#[serde(deny_unknown_fields)] pub struct RedisSourceConfig { /// The Redis data type (`list` or `channel`) to use. #[serde(default)] diff --git a/src/sources/socket/udp.rs b/src/sources/socket/udp.rs index 2ac3a34096912..ef07c4da65061 100644 --- a/src/sources/socket/udp.rs +++ b/src/sources/socket/udp.rs @@ -28,6 +28,7 @@ use crate::{ /// UDP configuration for the `socket` source. #[configurable_component] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] pub struct UdpConfig { /// The address to listen for messages on. address: SocketAddr, diff --git a/src/sources/socket/unix.rs b/src/sources/socket/unix.rs index 7fd94c792c7fe..4d4b877d5ba73 100644 --- a/src/sources/socket/unix.rs +++ b/src/sources/socket/unix.rs @@ -21,6 +21,7 @@ use crate::{ /// Unix domain socket configuration for the `socket` source. #[configurable_component] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] pub struct UnixConfig { /// The Unix socket path. /// diff --git a/src/sources/splunk_hec/mod.rs b/src/sources/splunk_hec/mod.rs index 621d13e91f360..5e558dc41ab94 100644 --- a/src/sources/splunk_hec/mod.rs +++ b/src/sources/splunk_hec/mod.rs @@ -51,7 +51,7 @@ pub const SOURCETYPE: &str = "splunk_sourcetype"; /// Configuration for the `splunk_hec` source. #[configurable_component(source)] #[derive(Clone, Debug)] -#[serde(default)] +#[serde(deny_unknown_fields, default)] pub struct SplunkConfig { /// The address to listen for connections on. /// diff --git a/src/sources/stdin.rs b/src/sources/stdin.rs index 3fb78cc8c87d2..98a02498e08ea 100644 --- a/src/sources/stdin.rs +++ b/src/sources/stdin.rs @@ -23,7 +23,7 @@ use crate::{ /// Configuration for the `stdin` source. #[configurable_component(source)] #[derive(Clone, Debug)] -#[serde(default)] +#[serde(deny_unknown_fields, default)] pub struct StdinConfig { /// The maximum buffer size, in bytes, of incoming messages. /// diff --git a/src/sources/util/encoding_config.rs b/src/sources/util/encoding_config.rs index 5d6b673090c24..c959e9de9a75c 100644 --- a/src/sources/util/encoding_config.rs +++ b/src/sources/util/encoding_config.rs @@ -3,6 +3,7 @@ use vector_config::configurable_component; /// Character set encoding. #[configurable_component] #[derive(Clone, Debug, PartialEq)] +#[serde(deny_unknown_fields)] pub struct EncodingConfig { /// Encoding of the source messages. /// diff --git a/src/sources/util/multiline_config.rs b/src/sources/util/multiline_config.rs index c12f3005ffeb0..d4c31a266a64a 100644 --- a/src/sources/util/multiline_config.rs +++ b/src/sources/util/multiline_config.rs @@ -1,23 +1,30 @@ use std::{convert::TryFrom, time::Duration}; use regex::bytes::Regex; -use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; -use vector_config::Configurable; +use vector_config::configurable_component; use crate::line_agg; -/// Multi-line parsing configuration. -#[derive(Clone, Configurable, Debug, Deserialize, PartialEq, Serialize)] +/// Configuration of multi-line aggregation. +#[configurable_component] +#[derive(Clone, Debug, PartialEq)] #[serde(deny_unknown_fields)] pub struct MultilineConfig { - /// Start regex pattern to look for as a beginning of the message. + /// Regular expression pattern that is used to match the start of a new message. pub start_pattern: String, - /// Condition regex pattern to look for. Exact behavior is configured via `mode`. + + /// Regular expression pattern that is used to determine whether or not more lines should be read. + /// + /// This setting must be configured in conjunction with `mode`. pub condition_pattern: String, - /// Mode of operation, specifies how `condition_pattern` is interpreted. + + /// Aggregation mode. + /// + /// This setting must be configured in conjunction with `condition_pattern`. pub mode: line_agg::Mode, - /// The maximum time to wait for the continuation, in milliseconds. + + /// The maximum amount of time to wait for the next additional line, in milliseconds. /// /// Once this timeout is reached, the buffered message is guaranteed to be flushed, even if incomplete. pub timeout_ms: u64, diff --git a/src/sources/vector/mod.rs b/src/sources/vector/mod.rs index ea3c9b440860e..0528367f7562b 100644 --- a/src/sources/vector/mod.rs +++ b/src/sources/vector/mod.rs @@ -19,6 +19,7 @@ enum V1 { /// Configuration for version two of the `vector` source. #[configurable_component] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] pub struct VectorConfigV1 { /// Version of the configuration. version: V1, @@ -39,6 +40,7 @@ enum V2 { /// Configuration for version two of the `vector` source. #[configurable_component] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] pub struct VectorConfigV2 { /// Version of the configuration. version: Option, diff --git a/src/sources/vector/v1.rs b/src/sources/vector/v1.rs index 1a4f248040b34..ec0bf082eb19f 100644 --- a/src/sources/vector/v1.rs +++ b/src/sources/vector/v1.rs @@ -24,6 +24,7 @@ use crate::{ /// Configuration for version one of the `vector` source. #[configurable_component] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] pub(crate) struct VectorConfig { /// The address to listen for connections on. /// From ac723646b5418b23b9713f79f17528e4baf77b46 Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Tue, 7 Jun 2022 14:12:29 -0400 Subject: [PATCH 09/12] more tweaks, consistency, etc Signed-off-by: Toby Lawrence --- src/aws/auth.rs | 13 ++++++++----- src/sources/vector/v2.rs | 1 + src/tcp.rs | 1 + src/tls/settings.rs | 32 ++++++++++++++++++++++---------- 4 files changed, 32 insertions(+), 15 deletions(-) diff --git a/src/aws/auth.rs b/src/aws/auth.rs index dad957a7cee75..2e504deff27ec 100644 --- a/src/aws/auth.rs +++ b/src/aws/auth.rs @@ -4,23 +4,23 @@ use aws_config::{ default_provider::credentials::DefaultCredentialsChain, sts::AssumeRoleProviderBuilder, }; use aws_types::{credentials::SharedCredentialsProvider, region::Region, Credentials}; -use serde::{Deserialize, Serialize}; -use vector_config::Configurable; +use vector_config::configurable_component; // matches default load timeout from the SDK as of 0.10.1, but lets us confidently document the // default rather than relying on the SDK default to not change const DEFAULT_LOAD_TIMEOUT: Duration = Duration::from_secs(5); /// Configuration of the authentication strategy for interacting with AWS services. -#[derive(Clone, Configurable, Debug, Derivative, Deserialize, Serialize)] +#[configurable_component] +#[derive(Clone, Debug, Derivative)] #[derivative(Default)] -#[serde(untagged)] -#[serde(deny_unknown_fields)] +#[serde(deny_unknown_fields, untagged)] pub enum AwsAuthentication { /// Authenticate using a fixed access key and secret pair. Static { /// The AWS access key ID. access_key_id: String, + /// The AWS secret access key. secret_access_key: String, }, @@ -31,6 +31,7 @@ pub enum AwsAuthentication { File { /// Path to the credentials file. credentials_file: String, + /// The credentials profile to use. profile: Option, }, @@ -39,6 +40,7 @@ pub enum AwsAuthentication { Role { /// The ARN of the role to assume. assume_role: String, + /// Timeout for assuming the role, in seconds. load_timeout_secs: Option, }, @@ -111,6 +113,7 @@ async fn default_credentials_provider( #[cfg(test)] mod tests { use super::*; + use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Clone, Debug)] struct ComponentConfig { diff --git a/src/sources/vector/v2.rs b/src/sources/vector/v2.rs index cab56bdf67fd1..557b24801eeda 100644 --- a/src/sources/vector/v2.rs +++ b/src/sources/vector/v2.rs @@ -91,6 +91,7 @@ async fn handle_batch_status(receiver: Option) -> Result<() /// Configuration for version two of the `vector` source. #[configurable_component] #[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] pub struct VectorConfig { /// The address to listen for connections on. /// diff --git a/src/tcp.rs b/src/tcp.rs index 49e54bc121145..c2f5b3d3ce97d 100644 --- a/src/tcp.rs +++ b/src/tcp.rs @@ -5,6 +5,7 @@ use vector_config::configurable_component; /// TCP keepalive settings for socket-based components. #[configurable_component] #[derive(Clone, Copy, Debug, PartialEq)] +#[serde(deny_unknown_fields)] pub struct TcpKeepaliveConfig { /// The time to wait, in seconds, before starting to send TCP keepalive probes on an idle connection. pub(crate) time_secs: Option, diff --git a/src/tls/settings.rs b/src/tls/settings.rs index bd113174a8d18..07dc19e8bb9d8 100644 --- a/src/tls/settings.rs +++ b/src/tls/settings.rs @@ -12,9 +12,8 @@ use openssl::{ stack::Stack, x509::{store::X509StoreBuilder, X509}, }; -use serde::{Deserialize, Serialize}; use snafu::ResultExt; -use vector_config::Configurable; +use vector_config::configurable_component; use super::{ AddCertToStoreSnafu, AddExtraChainCertSnafu, CaStackPushSnafu, DerExportSnafu, @@ -33,7 +32,8 @@ pub const TEST_PEM_CRT_PATH: &str = "tests/data/localhost.crt"; pub const TEST_PEM_KEY_PATH: &str = "tests/data/localhost.key"; /// Configures the TLS options for incoming/outgoing connections. -#[derive(Clone, Configurable, Debug, Default, Deserialize, Serialize)] +#[configurable_component] +#[derive(Clone, Debug, Default)] pub struct TlsEnableableConfig { /// Whether or not to require TLS for incoming/outgoing connections. /// @@ -62,28 +62,38 @@ impl TlsEnableableConfig { } /// Standard TLS options. -#[derive(Clone, Configurable, Debug, Default, Deserialize, Serialize)] +#[configurable_component] +#[derive(Clone, Debug, Default)] #[serde(deny_unknown_fields)] pub struct TlsConfig { - /// Whether or not to require a valid identity certificate from the peer host. + /// Enables certificate verification. /// - /// For outgoing connections, this implies that the certificate must be valid according to its chain of trust, up to - /// the root certificate authority configured on the host Vector is running on. + /// If enabled, certificates must be valid in terms of not being expired, as well as being issued by a trusted + /// issuer. This verification operates in a hierarchical manner, checking that not only the leaf certificate (the + /// certificate presented by the client/server) is valid, but also that the issuer of that certificate is valid, and + /// so on until reaching a root certificate. /// - /// For incoming connections, this implies that the peer host must present a valid client certificate that is also - /// valid according to its chain of trust. + /// Relevant for both incoming and outgoing connections. + /// + /// Do NOT set this to `false` unless you understand the risks of not verifying the validity of certificates. pub verify_certificate: Option, - /// Whether or not to verify the remote host's TLS certificate is valid for the hostname Vector connected to. + + /// Enables hostname verification. + /// + /// If enabled, the hostname used to connect to the remote host must be present in the TLS certificate presented by + /// the remote host, either as the Common Name or as an entry in the Subject Alternative Name extension. /// /// Only relevant for outgoing connections. /// /// Do NOT set this to `false` unless you understand the risks of not verifying the remote hostname. pub verify_hostname: Option, + /// Absolute path to an additional CA certificate file. /// /// The certficate must be in the DER or PEM (X.509) format. Additionally, the certificate can be provided as an inline string in PEM format. #[serde(alias = "ca_path")] pub ca_file: Option, + /// Absolute path to a certificate file used to identify this server. /// /// The certificate must be in DER, PEM (X.509), or PKCS#12 format. Additionally, the certificate can be provided as @@ -92,11 +102,13 @@ pub struct TlsConfig { /// If this is set, and is not a PKCS#12 archive, `key_file` must also be set. #[serde(alias = "crt_path")] pub crt_file: Option, + /// Absolute path to a private key file used to identify this server. /// /// The key must be in DER or PEM (PKCS#8) format. Additionally, the key can be provided as an inline string in PEM format. #[serde(alias = "key_path")] pub key_file: Option, + /// Passphrase used to unlock the encrypted key file. /// /// This has no effect unless `key_file` is set. From 8cfdacda5bad00acf973b20712bf807a46e038af Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Thu, 9 Jun 2022 11:46:08 -0400 Subject: [PATCH 10/12] address some PR feedback Signed-off-by: Toby Lawrence --- lib/vector-config-macros/src/lib.rs | 5 ++++- lib/vector-config/src/lib.rs | 8 ++++++++ src/sources/dnstap/mod.rs | 2 +- src/sources/docker_logs.rs | 2 +- src/sources/file.rs | 2 +- src/sources/internal_logs.rs | 2 +- src/sources/socket/tcp.rs | 2 +- src/sources/socket/udp.rs | 2 +- src/sources/socket/unix.rs | 2 +- src/sources/stdin.rs | 2 +- src/sources/syslog.rs | 2 +- 11 files changed, 21 insertions(+), 10 deletions(-) diff --git a/lib/vector-config-macros/src/lib.rs b/lib/vector-config-macros/src/lib.rs index e8f30d9936fdb..936047172a335 100644 --- a/lib/vector-config-macros/src/lib.rs +++ b/lib/vector-config-macros/src/lib.rs @@ -38,7 +38,10 @@ pub fn configurable_component(args: TokenStream, item: TokenStream) -> TokenStre configurable_component::configurable_component_impl(args, item) } -/// A helpful lil derive. +/// Generates an implementation of `Configurable` trait for the given container. +/// +/// In general, `#[configurable_component]` should be preferred as it ensures the other necessary derives/trait +/// implementations are provided, and offers other features related to describing specific configuration types, etc. #[proc_macro_derive(Configurable, attributes(configurable))] pub fn derive_configurable(input: TokenStream) -> TokenStream { configurable::derive_configurable_impl(input) diff --git a/lib/vector-config/src/lib.rs b/lib/vector-config/src/lib.rs index abf8c37335630..f009d2b6e358b 100644 --- a/lib/vector-config/src/lib.rs +++ b/lib/vector-config/src/lib.rs @@ -331,6 +331,14 @@ impl<'de, T: Configurable<'de>> fmt::Debug for Metadata<'de, T> { } } +/// A type that can be represented in a Vector configuration. +/// +/// In Vector, we want to be able to generate a schema for our configuration such that we can have a Rust-agnostic +/// definition of exactly what is configurable, what values are allowed, what bounds exist, and so on and so forth. +/// +/// `Configurable` provides the machinery to allow describing and encoding the shape of a type, recursively, so that by +/// instrumenting all transitive types of the configuration, the schema can be discovered by generating the schema from +/// some root type. pub trait Configurable<'de>: Serialize + Deserialize<'de> + Sized where Self: Clone, diff --git a/src/sources/dnstap/mod.rs b/src/sources/dnstap/mod.rs index d99d754c11c0f..70be58775c91c 100644 --- a/src/sources/dnstap/mod.rs +++ b/src/sources/dnstap/mod.rs @@ -31,7 +31,7 @@ pub struct DnstapConfig { /// /// The value will be the socket path itself. /// - /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is + /// By default, the [global `host_key` option](https://vector.dev/docs/reference/configuration//global-options#log_schema.host_key) is /// used. pub host_key: Option, diff --git a/src/sources/docker_logs.rs b/src/sources/docker_logs.rs index 109afc1241e21..693f7394549ae 100644 --- a/src/sources/docker_logs.rs +++ b/src/sources/docker_logs.rs @@ -56,7 +56,7 @@ pub struct DockerLogsConfig { /// /// The value will be the current hostname for wherever Vector is running. /// - /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used. + /// By default, the [global `host_key` option](https://vector.dev/docs/reference/configuration//global-options#log_schema.host_key) is used. #[serde(default = "host_key")] host_key: String, diff --git a/src/sources/file.rs b/src/sources/file.rs index e2a56b5485a81..6294921df1038 100644 --- a/src/sources/file.rs +++ b/src/sources/file.rs @@ -107,7 +107,7 @@ pub struct FileConfig { /// /// The value will be the current hostname for wherever Vector is running. /// - /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used. + /// By default, the [global `host_key` option](https://vector.dev/docs/reference/configuration//global-options#log_schema.host_key) is used. pub host_key: Option, /// The directory used to persist file checkpoint positions. diff --git a/src/sources/internal_logs.rs b/src/sources/internal_logs.rs index 3c559bdece85f..9b39c4a463018 100644 --- a/src/sources/internal_logs.rs +++ b/src/sources/internal_logs.rs @@ -22,7 +22,7 @@ pub struct InternalLogsConfig { /// /// The value will be the current hostname for wherever Vector is running. /// - /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used. + /// By default, the [global `host_key` option](https://vector.dev/docs/reference/configuration//global-options#log_schema.host_key) is used. pub host_key: Option, /// Overrides the name of the log field used to add the current process ID to each event. diff --git a/src/sources/socket/tcp.rs b/src/sources/socket/tcp.rs index a3559689324e2..ce0e2badd1d42 100644 --- a/src/sources/socket/tcp.rs +++ b/src/sources/socket/tcp.rs @@ -37,7 +37,7 @@ pub struct TcpConfig { /// /// The value will be the peer host's address, including the port i.e. `1.2.3.4:9000`. /// - /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used. + /// By default, the [global `host_key` option](https://vector.dev/docs/reference/configuration//global-options#log_schema.host_key) is used. host_key: Option, /// Overrides the name of the log field used to add the peer host's port to each event. diff --git a/src/sources/socket/udp.rs b/src/sources/socket/udp.rs index ef07c4da65061..a33b31de4641c 100644 --- a/src/sources/socket/udp.rs +++ b/src/sources/socket/udp.rs @@ -43,7 +43,7 @@ pub struct UdpConfig { /// /// The value will be the peer host's address, including the port i.e. `1.2.3.4:9000`. /// - /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used. + /// By default, the [global `host_key` option](https://vector.dev/docs/reference/configuration//global-options#log_schema.host_key) is used. host_key: Option, /// Overrides the name of the log field used to add the peer host's port to each event. diff --git a/src/sources/socket/unix.rs b/src/sources/socket/unix.rs index 4d4b877d5ba73..bb4ff2f11e9cc 100644 --- a/src/sources/socket/unix.rs +++ b/src/sources/socket/unix.rs @@ -43,7 +43,7 @@ pub struct UnixConfig { /// /// The value will be the socket path itself. /// - /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used.pub host_key: Option, + /// By default, the [global `host_key` option](https://vector.dev/docs/reference/configuration//global-options#log_schema.host_key) is used.pub host_key: Option, pub host_key: Option, #[configurable(derived)] diff --git a/src/sources/stdin.rs b/src/sources/stdin.rs index 98a02498e08ea..e07b54feb372c 100644 --- a/src/sources/stdin.rs +++ b/src/sources/stdin.rs @@ -35,7 +35,7 @@ pub struct StdinConfig { /// /// The value will be the current hostname for wherever Vector is running. /// - /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used. + /// By default, the [global `host_key` option](https://vector.dev/docs/reference/configuration//global-options#log_schema.host_key) is used. pub host_key: Option, #[configurable(derived)] diff --git a/src/sources/syslog.rs b/src/sources/syslog.rs index 4e8a75651cd34..fbccd91884055 100644 --- a/src/sources/syslog.rs +++ b/src/sources/syslog.rs @@ -49,7 +49,7 @@ pub struct SyslogConfig { /// If using TCP or UDP, the value will be the peer host's address, including the port i.e. `1.2.3.4:9000`. If using /// UDS, the value will be the socket path itself. /// - /// By default, the [global `host_key` option](\(urls.vector_configuration)/global-options#log_schema.host_key) is used. + /// By default, the [global `host_key` option](https://vector.dev/docs/reference/configuration//global-options#log_schema.host_key) is used. host_key: Option, } From ea87206bcbbc235841c5e77b1638bca4b2efe8bf Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Thu, 9 Jun 2022 11:52:41 -0400 Subject: [PATCH 11/12] ignore config schema test since it writes a ton of stuff to console Signed-off-by: Toby Lawrence --- src/sources/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sources/mod.rs b/src/sources/mod.rs index 101ef8cf79073..72084ec534c26 100644 --- a/src/sources/mod.rs +++ b/src/sources/mod.rs @@ -256,6 +256,7 @@ mod tests { } #[test] + #[ignore] fn vector_config() { let root_schema = generate_root_schema::(); let json = serde_json::to_string_pretty(&root_schema) From a8ad767afde34d18ef4c1beee48eb6ab90e30149 Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Thu, 9 Jun 2022 14:37:40 -0400 Subject: [PATCH 12/12] fix missing thing Signed-off-by: Toby Lawrence --- src/sources/file.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sources/file.rs b/src/sources/file.rs index 39dbc7a002b4d..5f1c5c6b41656 100644 --- a/src/sources/file.rs +++ b/src/sources/file.rs @@ -653,6 +653,7 @@ mod tests { use encoding_rs::UTF_16LE; use pretty_assertions::assert_eq; + use serde::Deserialize; use tempfile::{tempdir, TempDir}; use tokio::time::{sleep, timeout, Duration};