From 74ae15eb53715925a53b68bff1c067c6d53344fa Mon Sep 17 00:00:00 2001 From: Stephen Wakely Date: Mon, 17 Apr 2023 22:08:57 +0100 Subject: [PATCH] chore: Revert transform definitions (#17146) * Revert "chore(topology): Transform outputs hash table of OutputId -> Definition (#17059)" This reverts commit 1bdb24d04329aabb7212942b08f503e910ed60ff. * Revert "chore(topology): split `build_pieces` into smaller functions (#17037)" This reverts commit 6e6f1eb590146ce57e699ff6fc7922314abc892a. * Revert "enhancement(topology): Update transforms to handle multiple definitions (#16793)" This reverts commit e19f4fca320d817fd58ce2bdf429ec3384a3eacb. Signed-off-by: Stephen Wakely --- benches/remap.rs | 20 +- benches/transform/route.rs | 6 +- lib/vector-core/src/config/mod.rs | 220 +--- lib/vector-core/src/schema/definition.rs | 100 +- lib/vector-core/src/transform/mod.rs | 6 +- src/api/schema/components/mod.rs | 4 +- src/config/compiler.rs | 5 +- src/config/graph.rs | 78 +- src/config/id.rs | 29 - src/config/mod.rs | 4 +- src/config/source.rs | 9 +- src/config/transform.rs | 17 +- src/config/unit_test/mod.rs | 14 +- src/config/unit_test/unit_test_components.rs | 21 +- src/config/validation.rs | 10 +- src/source_sender/mod.rs | 4 +- src/sources/amqp.rs | 29 +- src/sources/apache_metrics/mod.rs | 6 +- src/sources/aws_ecs_metrics/mod.rs | 6 +- src/sources/aws_kinesis_firehose/mod.rs | 10 +- src/sources/aws_s3/mod.rs | 8 +- src/sources/aws_sqs/config.rs | 9 +- src/sources/aws_sqs/source.rs | 20 +- src/sources/datadog_agent/metrics.rs | 32 +- src/sources/datadog_agent/mod.rs | 27 +- src/sources/datadog_agent/tests.rs | 330 +++--- src/sources/demo_logs.rs | 9 +- src/sources/dnstap/mod.rs | 6 +- src/sources/docker_logs/mod.rs | 6 +- src/sources/docker_logs/tests.rs | 148 ++- src/sources/eventstoredb_metrics/mod.rs | 6 +- src/sources/exec/mod.rs | 9 +- src/sources/file.rs | 107 +- .../file_descriptors/file_descriptor.rs | 4 +- src/sources/file_descriptors/mod.rs | 11 +- src/sources/file_descriptors/stdin.rs | 4 +- src/sources/fluent/mod.rs | 28 +- src/sources/gcp_pubsub.rs | 26 +- src/sources/heroku_logs.rs | 31 +- src/sources/host_metrics/mod.rs | 6 +- src/sources/http_client/client.rs | 9 +- src/sources/http_server.rs | 33 +- src/sources/internal_logs.rs | 26 +- src/sources/internal_metrics.rs | 6 +- src/sources/journald.rs | 34 +- src/sources/kafka.rs | 148 ++- src/sources/kubernetes_logs/mod.rs | 324 +++--- src/sources/logstash.rs | 29 +- src/sources/mongodb_metrics/mod.rs | 6 +- src/sources/nats.rs | 29 +- src/sources/nginx_metrics/mod.rs | 6 +- src/sources/opentelemetry/mod.rs | 10 +- src/sources/opentelemetry/tests.rs | 24 +- src/sources/postgresql_metrics.rs | 6 +- src/sources/prometheus/remote_write.rs | 6 +- src/sources/prometheus/scrape.rs | 6 +- src/sources/redis/mod.rs | 9 +- src/sources/socket/mod.rs | 10 +- src/sources/splunk_hec/mod.rs | 26 +- src/sources/statsd/mod.rs | 6 +- src/sources/syslog.rs | 28 +- src/sources/vector/mod.rs | 28 +- src/test_util/mock/sources/backpressure.rs | 10 +- src/test_util/mock/sources/basic.rs | 11 +- src/test_util/mock/sources/error.rs | 10 +- src/test_util/mock/sources/panic.rs | 10 +- src/test_util/mock/sources/tripwire.rs | 10 +- src/test_util/mock/transforms/basic.rs | 18 +- src/test_util/mock/transforms/noop.rs | 18 +- src/topology/builder.rs | 986 +++++++++--------- src/topology/schema.rs | 601 ++++++----- src/transforms/aggregate.rs | 10 +- src/transforms/aws_ec2_metadata.rs | 32 +- src/transforms/dedupe.rs | 17 +- src/transforms/filter.rs | 19 +- src/transforms/log_to_metric.rs | 14 +- src/transforms/lua/mod.rs | 12 +- src/transforms/lua/v1/mod.rs | 17 +- src/transforms/lua/v2/mod.rs | 20 +- src/transforms/metric_to_log.rs | 12 +- src/transforms/reduce/mod.rs | 179 ++-- src/transforms/remap.rs | 236 ++--- src/transforms/route.rs | 41 +- src/transforms/sample.rs | 20 +- .../tag_cardinality_limit/config.rs | 12 +- src/transforms/throttle.rs | 16 +- 86 files changed, 2005 insertions(+), 2560 deletions(-) diff --git a/benches/remap.rs b/benches/remap.rs index d3c2c6930a144..7d666d4041c11 100644 --- a/benches/remap.rs +++ b/benches/remap.rs @@ -4,7 +4,7 @@ use chrono::{DateTime, Utc}; use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; use indexmap::IndexMap; use vector::{ - config::{DataType, TransformOutput}, + config::{DataType, Output}, event::{Event, LogEvent, Value}, transforms::{ remap::{Remap, RemapConfig}, @@ -27,10 +27,8 @@ fn benchmark_remap(c: &mut Criterion) { let mut group = c.benchmark_group("remap"); let add_fields_runner = |tform: &mut Box, event: Event| { - let mut outputs = TransformOutputsBuf::new_with_capacity( - vec![TransformOutput::new(DataType::all(), vec![])], - 1, - ); + let mut outputs = + TransformOutputsBuf::new_with_capacity(vec![Output::default(DataType::all())], 1); tform.transform(event, &mut outputs); let result = outputs.take_primary(); let output_1 = result.first().unwrap().as_log(); @@ -79,10 +77,8 @@ fn benchmark_remap(c: &mut Criterion) { }); let json_parser_runner = |tform: &mut Box, event: Event| { - let mut outputs = TransformOutputsBuf::new_with_capacity( - vec![TransformOutput::new(DataType::all(), vec![])], - 1, - ); + let mut outputs = + TransformOutputsBuf::new_with_capacity(vec![Output::default(DataType::all())], 1); tform.transform(event, &mut outputs); let result = outputs.take_primary(); let output_1 = result.first().unwrap().as_log(); @@ -133,10 +129,8 @@ fn benchmark_remap(c: &mut Criterion) { let coerce_runner = |tform: &mut Box, event: Event, timestamp: DateTime| { - let mut outputs = TransformOutputsBuf::new_with_capacity( - vec![TransformOutput::new(DataType::all(), vec![])], - 1, - ); + let mut outputs = + TransformOutputsBuf::new_with_capacity(vec![Output::default(DataType::all())], 1); tform.transform(event, &mut outputs); let result = outputs.take_primary(); let output_1 = result.first().unwrap().as_log(); diff --git a/benches/transform/route.rs b/benches/transform/route.rs index f1ed85f163634..0e23695b8f58a 100644 --- a/benches/transform/route.rs +++ b/benches/transform/route.rs @@ -12,7 +12,7 @@ use vector::transforms::{ TransformOutputsBuf, }; use vector_core::{ - config::{DataType, TransformOutput}, + config::{DataType, Output}, event::{Event, EventContainer, EventMetadata, LogEvent}, transform::{SyncTransform, TransformContext}, }; @@ -54,10 +54,10 @@ fn route(c: &mut Criterion) { "bba", "bbca", "dba", "bea", "fba", "gba", "hba", "iba", "jba", "bka", "bal", "bma", "bna", "boa", "bpa", "bqa", "bra", "bsa", "bta", "bua", "bva", "bwa", "xba", "aby", "zba", ] { - outputs.push(TransformOutput { + outputs.push(Output { port: Some(String::from(name)), ty: DataType::Log, - log_schema_definitions: Vec::new(), + log_schema_definition: None, }); } let output_buffer: TransformOutputsBuf = TransformOutputsBuf::new_with_capacity(outputs, 10); diff --git a/lib/vector-core/src/config/mod.rs b/lib/vector-core/src/config/mod.rs index 07774b73f40e3..7350ff80d916b 100644 --- a/lib/vector-core/src/config/mod.rs +++ b/lib/vector-core/src/config/mod.rs @@ -100,119 +100,42 @@ impl Input { } #[derive(Debug, Clone, PartialEq)] -pub struct SourceOutput { +pub struct Output { pub port: Option, pub ty: DataType, // NOTE: schema definitions are only implemented/supported for log-type events. There is no // inherent blocker to support other types as well, but it'll require additional work to add // the relevant schemas, and store them separately in this type. - pub schema_definition: Option, -} - -impl SourceOutput { - /// Create a `SourceOutput` of the given data type that contains a single output `Definition`. - /// Designed for use in log sources. /// + /// The `None` variant of a schema definition has two distinct meanings for a source component + /// versus a transform component: /// - /// # Panics - /// - /// Panics if `ty` does not contain [`DataType::Log`]. - #[must_use] - pub fn new_logs(ty: DataType, schema_definition: schema::Definition) -> Self { - assert!(ty.contains(DataType::Log)); - - Self { - port: None, - ty, - schema_definition: Some(schema_definition), - } - } - - /// Create a `SourceOutput` of the given data type that contains no output `Definition`s. - /// Designed for use in metrics sources. + /// For *sources*, a `None` schema is identical to a `Some(Definition::source_default())`. /// - /// Sets the datatype to be [`DataType::Metric`]. - #[must_use] - pub fn new_metrics() -> Self { - Self { - port: None, - ty: DataType::Metric, - schema_definition: None, - } - } + /// For a *transform*, a schema [`schema::Definition`] is required if `Datatype` is Log. + pub log_schema_definition: Option, +} - /// Create a `SourceOutput` of the given data type that contains no output `Definition`s. - /// Designed for use in trace sources. +impl Output { + /// Create a default `Output` of the given data type. /// - /// Sets the datatype to be [`DataType::Trace`]. - #[must_use] - pub fn new_traces() -> Self { + /// A default output is one without a port identifier (i.e. not a named output) and the default + /// output consumers will receive if they declare the component itself as an input. + pub fn default(ty: DataType) -> Self { Self { port: None, - ty: DataType::Trace, - schema_definition: None, + ty, + log_schema_definition: None, } } - /// Return the schema [`schema::Definition`] from this output. - /// - /// Takes a `schema_enabled` flag to determine if the full definition including the fields - /// and associated types should be returned, or if a simple definition should be returned. - /// A simple definition is just the default for the namespace. For the Vector namespace the - /// meanings are included. - /// Schema enabled is set in the users configuration. + /// Set the schema definition for this `Output`. #[must_use] - pub fn schema_definition(&self, schema_enabled: bool) -> Option { - self.schema_definition.as_ref().map(|definition| { - if schema_enabled { - definition.clone() - } else { - let mut new_definition = - schema::Definition::default_for_namespace(definition.log_namespaces()); - - if definition.log_namespaces().contains(&LogNamespace::Vector) { - new_definition.add_meanings(definition.meanings()); - } - - new_definition - } - }) - } -} - -impl SourceOutput { - /// Set the port name for this `SourceOutput`. - #[must_use] - pub fn with_port(mut self, name: impl Into) -> Self { - self.port = Some(name.into()); + pub fn with_schema_definition(mut self, schema_definition: schema::Definition) -> Self { + self.log_schema_definition = Some(schema_definition); self } -} - -#[derive(Debug, Clone, PartialEq)] -pub struct TransformOutput { - pub port: Option, - pub ty: DataType, - - /// For *transforms* if `Datatype` is [`DataType::Log`], if schema is - /// enabled, at least one definition should be output. If the transform - /// has multiple connected sources, it is possible to have multiple output - /// definitions - one for each input. - pub log_schema_definitions: Vec, -} - -impl TransformOutput { - /// Create a `TransformOutput` of the given data type that contains multiple [`schema::Definition`]s. - /// Designed for use in transforms. - #[must_use] - pub fn new(ty: DataType, schema_definitions: Vec) -> Self { - Self { - port: None, - ty, - log_schema_definitions: schema_definitions, - } - } /// Set the port name for this `Output`. #[must_use] @@ -504,12 +427,10 @@ impl LogNamespace { #[cfg(test)] mod test { - use super::*; + use crate::config::{init_log_schema, LogNamespace, LogSchema}; use crate::event::LogEvent; use chrono::Utc; - use lookup::{event_path, owned_value_path, OwnedTargetPath}; - use value::Kind; - use vector_common::btreemap; + use lookup::event_path; #[test] fn test_insert_standard_vector_source_metadata() { @@ -525,107 +446,4 @@ mod test { assert!(event.get(event_path!("a", "b", "c", "d")).is_some()); } - - #[test] - fn test_source_definitions_legacy() { - let definition = schema::Definition::empty_legacy_namespace() - .with_event_field(&owned_value_path!("zork"), Kind::bytes(), Some("zork")) - .with_event_field(&owned_value_path!("nork"), Kind::integer(), None); - let output = SourceOutput::new_logs(DataType::Log, definition); - - let valid_event = LogEvent::from(Value::from(btreemap! { - "zork" => "norknoog", - "nork" => 32 - })) - .into(); - - let invalid_event = LogEvent::from(Value::from(btreemap! { - "nork" => 32 - })) - .into(); - - // Get a definition with schema enabled. - let new_definition = output.schema_definition(true).unwrap(); - - // Meanings should still exist. - assert_eq!( - Some(&OwnedTargetPath::event(owned_value_path!("zork"))), - new_definition.meaning_path("zork") - ); - - // Events should have the schema validated. - new_definition.assert_valid_for_event(&valid_event); - new_definition.assert_invalid_for_event(&invalid_event); - - // There should be the default legacy definition without schemas enabled. - assert_eq!( - Some(schema::Definition::default_legacy_namespace()), - output.schema_definition(false) - ); - } - - #[test] - fn test_source_definitons_vector() { - let definition = schema::Definition::default_for_namespace(&[LogNamespace::Vector].into()) - .with_metadata_field( - &owned_value_path!("vector", "zork"), - Kind::integer(), - Some("zork"), - ) - .with_event_field(&owned_value_path!("nork"), Kind::integer(), None); - - let output = SourceOutput::new_logs(DataType::Log, definition); - - let mut valid_event = LogEvent::from(Value::from(btreemap! { - "nork" => 32 - })); - - valid_event - .metadata_mut() - .value_mut() - .insert(path!("vector").concat("zork"), 32); - - let valid_event = valid_event.into(); - - let mut invalid_event = LogEvent::from(Value::from(btreemap! { - "nork" => 32 - })); - - invalid_event - .metadata_mut() - .value_mut() - .insert(path!("vector").concat("zork"), "noog"); - - let invalid_event = invalid_event.into(); - - // Get a definition with schema enabled. - let new_definition = output.schema_definition(true).unwrap(); - - // Meanings should still exist. - assert_eq!( - Some(&OwnedTargetPath::metadata(owned_value_path!( - "vector", "zork" - ))), - new_definition.meaning_path("zork") - ); - - // Events should have the schema validated. - new_definition.assert_valid_for_event(&valid_event); - new_definition.assert_invalid_for_event(&invalid_event); - - // Get a definition without schema enabled. - let new_definition = output.schema_definition(false).unwrap(); - - // Meanings should still exist. - assert_eq!( - Some(&OwnedTargetPath::metadata(owned_value_path!( - "vector", "zork" - ))), - new_definition.meaning_path("zork") - ); - - // Events should not have the schema validated. - new_definition.assert_valid_for_event(&valid_event); - new_definition.assert_valid_for_event(&invalid_event); - } } diff --git a/lib/vector-core/src/schema/definition.rs b/lib/vector-core/src/schema/definition.rs index 0442337bba094..fd6ea4565d9af 100644 --- a/lib/vector-core/src/schema/definition.rs +++ b/lib/vector-core/src/schema/definition.rs @@ -87,7 +87,7 @@ impl Definition { ) -> Self { Self { event_kind, - metadata_kind: Kind::object(Collection::any()), + metadata_kind: Kind::object(Collection::empty()), meaning: BTreeMap::default(), log_namespaces: log_namespaces.into(), } @@ -373,55 +373,25 @@ impl Definition { /// This method panics if the provided path points to an unknown location in the collection. #[must_use] pub fn with_meaning(mut self, target_path: OwnedTargetPath, meaning: &str) -> Self { - self.add_meaning(target_path, meaning); - self - } - - /// Adds the meaning pointing to the given path to our list of meanings. - /// - /// # Panics - /// - /// This method panics if the provided path points to an unknown location in the collection. - fn add_meaning(&mut self, target_path: OwnedTargetPath, meaning: &str) { - self.try_with_meaning(target_path, meaning) - .unwrap_or_else(|err| panic!("{}", err)); - } - - /// Register a semantic meaning for the definition. - /// - /// # Errors - /// - /// Returns an error if the provided path points to an unknown location in the collection. - pub fn try_with_meaning( - &mut self, - target_path: OwnedTargetPath, - meaning: &str, - ) -> Result<(), &'static str> { + // Ensure the path exists in the collection. match target_path.prefix { - PathPrefix::Event - if !self - .event_kind + PathPrefix::Event => assert!( + self.event_kind .at_path(&target_path.path) - .contains_any_defined() => - { - Err("meaning must point to a valid path") - } - - PathPrefix::Metadata - if !self - .metadata_kind + .contains_any_defined(), + "meaning must point to a valid path" + ), + PathPrefix::Metadata => assert!( + self.metadata_kind .at_path(&target_path.path) - .contains_any_defined() => - { - Err("meaning must point to a valid path") - } + .contains_any_defined(), + "meaning must point to a valid path" + ), + }; - _ => { - self.meaning - .insert(meaning.to_owned(), MeaningPointer::Valid(target_path)); - Ok(()) - } - } + self.meaning + .insert(meaning.to_owned(), MeaningPointer::Valid(target_path)); + self } /// Set the kind for all unknown fields. @@ -481,21 +451,6 @@ impl Definition { }) } - /// Adds the meanings provided by an iterator over the given meanings. - /// - /// # Panics - /// - /// This method panics if the provided path from any of the incoming meanings point to - /// an unknown location in the collection. - pub fn add_meanings<'a>( - &'a mut self, - meanings: impl Iterator, - ) { - for (meaning, path) in meanings { - self.add_meaning(path.clone(), meaning); - } - } - pub fn event_kind(&self) -> &Kind { &self.event_kind } @@ -526,7 +481,6 @@ mod test_utils { /// Checks that the schema definition is _valid_ for the given event. /// /// # Errors - /// /// If the definition is not valid, debug info will be returned. pub fn is_valid_for_event(&self, event: &Event) -> Result<(), String> { if let Some(log) = event.maybe_as_log() { @@ -568,27 +522,13 @@ mod test_utils { } /// Asserts that the schema definition is _valid_ for the given event. - /// /// # Panics - /// /// If the definition is not valid for the event. pub fn assert_valid_for_event(&self, event: &Event) { if let Err(err) = self.is_valid_for_event(event) { panic!("Schema definition assertion failed: {err}"); } } - - /// Asserts that the schema definition is _invalid_ for the given event. - /// - /// # Panics - /// - /// If the definition is valid for the event. - pub fn assert_invalid_for_event(&self, event: &Event) { - assert!( - self.is_valid_for_event(event).is_err(), - "Schema definition assertion should not be valid" - ); - } } } @@ -792,7 +732,7 @@ mod tests { "foo".into(), Kind::boolean().or_undefined(), )])), - metadata_kind: Kind::object(Collection::any()), + metadata_kind: Kind::object(Collection::empty()), meaning: [( "foo_meaning".to_owned(), MeaningPointer::Valid(parse_target_path("foo").unwrap()), @@ -816,7 +756,7 @@ mod tests { Kind::regex().or_null().or_undefined(), )])), )])), - metadata_kind: Kind::object(Collection::any()), + metadata_kind: Kind::object(Collection::empty()), meaning: [( "foobar".to_owned(), MeaningPointer::Valid(parse_target_path(".foo.bar").unwrap()), @@ -837,7 +777,7 @@ mod tests { "foo".into(), Kind::boolean().or_undefined(), )])), - metadata_kind: Kind::object(Collection::any()), + metadata_kind: Kind::object(Collection::empty()), meaning: BTreeMap::default(), log_namespaces: BTreeSet::new(), }, @@ -855,7 +795,7 @@ mod tests { fn test_unknown_fields() { let want = Definition { event_kind: Kind::object(Collection::from_unknown(Kind::bytes().or_integer())), - metadata_kind: Kind::object(Collection::any()), + metadata_kind: Kind::object(Collection::empty()), meaning: BTreeMap::default(), log_namespaces: BTreeSet::new(), }; diff --git a/lib/vector-core/src/transform/mod.rs b/lib/vector-core/src/transform/mod.rs index 372724c42a912..cc44eba2bdec5 100644 --- a/lib/vector-core/src/transform/mod.rs +++ b/lib/vector-core/src/transform/mod.rs @@ -222,14 +222,14 @@ struct TransformOutput { } pub struct TransformOutputs { - outputs_spec: Vec, + outputs_spec: Vec, primary_output: Option, named_outputs: HashMap, } impl TransformOutputs { pub fn new( - outputs_in: Vec, + outputs_in: Vec, ) -> (Self, HashMap, fanout::ControlChannel>) { let outputs_spec = outputs_in.clone(); let mut primary_output = None; @@ -319,7 +319,7 @@ pub struct TransformOutputsBuf { } impl TransformOutputsBuf { - pub fn new_with_capacity(outputs_in: Vec, capacity: usize) -> Self { + pub fn new_with_capacity(outputs_in: Vec, capacity: usize) -> Self { let mut primary_buffer = None; let mut named_buffers = HashMap::new(); diff --git a/src/api/schema/components/mod.rs b/src/api/schema/components/mod.rs index 171bdecc0bfe3..d6ba5ade90874 100644 --- a/src/api/schema/components/mod.rs +++ b/src/api/schema/components/mod.rs @@ -14,7 +14,7 @@ use tokio_stream::{wrappers::BroadcastStream, Stream, StreamExt}; use vector_config::NamedComponent; use vector_core::internal_event::DEFAULT_OUTPUT; -use crate::topology::schema::possible_definitions; +use crate::topology::schema::merged_definition; use crate::{ api::schema::{ components::state::component_by_component_key, @@ -294,7 +294,7 @@ pub fn update_config(config: &Config) { outputs: transform .inner .outputs( - &possible_definitions(&transform.inputs, config, &mut cache), + &merged_definition(&transform.inputs, config, &mut cache), config.schema.log_namespace(), ) .into_iter() diff --git a/src/config/compiler.rs b/src/config/compiler.rs index f4170505bf557..8ec372c2187c4 100644 --- a/src/config/compiler.rs +++ b/src/config/compiler.rs @@ -138,10 +138,7 @@ pub(crate) fn expand_globs(config: &mut ConfigBuilder) { }) .chain(config.transforms.iter().flat_map(|(key, t)| { t.inner - .outputs( - &[(key.into(), schema::Definition::any())], - config.schema.log_namespace(), - ) + .outputs(&schema::Definition::any(), config.schema.log_namespace()) .into_iter() .map(|output| OutputId { component: key.clone(), diff --git a/src/config/graph.rs b/src/config/graph.rs index 3c1e27439a78d..2beab867fbdd8 100644 --- a/src/config/graph.rs +++ b/src/config/graph.rs @@ -2,18 +2,18 @@ use indexmap::{set::IndexSet, IndexMap}; use std::collections::{HashMap, HashSet, VecDeque}; use super::{ - schema, ComponentKey, DataType, OutputId, SinkConfig, SinkOuter, SourceOuter, SourceOutput, - TransformOuter, TransformOutput, + schema, ComponentKey, DataType, Output, OutputId, SinkConfig, SinkOuter, SourceOuter, + TransformOuter, }; #[derive(Debug, Clone)] pub enum Node { Source { - outputs: Vec, + outputs: Vec, }, Transform { in_ty: DataType, - outputs: Vec, + outputs: Vec, }, Sink { ty: DataType, @@ -76,10 +76,9 @@ impl Graph { id.clone(), Node::Transform { in_ty: transform.inner.input().data_type(), - outputs: transform.inner.outputs( - &[(id.into(), schema::Definition::any())], - schema.log_namespace(), - ), + outputs: transform + .inner + .outputs(&schema::Definition::any(), schema.log_namespace()), }, ); } @@ -167,12 +166,7 @@ impl Graph { /// have inputs. fn get_output_type(&self, id: &OutputId) -> DataType { match &self.nodes[&id.component] { - Node::Source { outputs } => outputs - .iter() - .find(|output| output.port == id.port) - .map(|output| output.ty) - .expect("output didn't exist"), - Node::Transform { outputs, .. } => outputs + Node::Source { outputs } | Node::Transform { outputs, .. } => outputs .iter() .find(|output| output.port == id.port) .map(|output| output.ty) @@ -263,14 +257,7 @@ impl Graph { .iter() .flat_map(|(key, node)| match node { Node::Sink { .. } => vec![], - Node::Source { outputs } => outputs - .iter() - .map(|output| OutputId { - component: key.clone(), - port: output.port.clone(), - }) - .collect(), - Node::Transform { outputs, .. } => outputs + Node::Source { outputs } | Node::Transform { outputs, .. } => outputs .iter() .map(|output| OutputId { component: key.clone(), @@ -366,7 +353,6 @@ impl Graph { #[cfg(test)] mod test { use similar_asserts::assert_eq; - use vector_core::schema::Definition; use super::*; @@ -375,11 +361,7 @@ mod test { self.nodes.insert( id.into(), Node::Source { - outputs: vec![match ty { - DataType::Metric => SourceOutput::new_metrics(), - DataType::Trace => SourceOutput::new_traces(), - _ => SourceOutput::new_logs(ty, Definition::any()), - }], + outputs: vec![Output::default(ty)], }, ); } @@ -397,10 +379,7 @@ mod test { id.clone(), Node::Transform { in_ty, - outputs: vec![TransformOutput::new( - out_ty, - vec![Definition::default_legacy_namespace()], - )], + outputs: vec![Output::default(out_ty)], }, ); for from in inputs { @@ -414,10 +393,9 @@ mod test { fn add_transform_output(&mut self, id: &str, name: &str, ty: DataType) { let id = id.into(); match self.nodes.get_mut(&id) { - Some(Node::Transform { outputs, .. }) => outputs.push( - TransformOutput::new(ty, vec![Definition::default_legacy_namespace()]) - .with_port(name), - ), + Some(Node::Transform { outputs, .. }) => { + outputs.push(Output::default(ty).with_port(name)) + } _ => panic!("invalid transform"), } } @@ -635,13 +613,13 @@ mod test { graph.nodes.insert( ComponentKey::from("foo.bar"), Node::Source { - outputs: vec![SourceOutput::new_logs(DataType::all(), Definition::any())], + outputs: vec![Output::default(DataType::all())], }, ); graph.nodes.insert( ComponentKey::from("foo.bar"), Node::Source { - outputs: vec![SourceOutput::new_logs(DataType::all(), Definition::any())], + outputs: vec![Output::default(DataType::all())], }, ); graph.nodes.insert( @@ -649,15 +627,8 @@ mod test { Node::Transform { in_ty: DataType::all(), outputs: vec![ - TransformOutput::new( - DataType::all(), - vec![Definition::default_legacy_namespace()], - ), - TransformOutput::new( - DataType::all(), - vec![Definition::default_legacy_namespace()], - ) - .with_port("bar"), + Output::default(DataType::all()), + Output::default(DataType::all()).with_port("bar"), ], }, ); @@ -666,7 +637,7 @@ mod test { graph.nodes.insert( ComponentKey::from("baz.errors"), Node::Source { - outputs: vec![SourceOutput::new_logs(DataType::all(), Definition::any())], + outputs: vec![Output::default(DataType::all())], }, ); graph.nodes.insert( @@ -674,15 +645,8 @@ mod test { Node::Transform { in_ty: DataType::all(), outputs: vec![ - TransformOutput::new( - DataType::all(), - vec![Definition::default_legacy_namespace()], - ), - TransformOutput::new( - DataType::all(), - vec![Definition::default_legacy_namespace()], - ) - .with_port("errors"), + Output::default(DataType::all()), + Output::default(DataType::all()).with_port("errors"), ], }, ); diff --git a/src/config/id.rs b/src/config/id.rs index caa561b68352d..57c9cd33178bc 100644 --- a/src/config/id.rs +++ b/src/config/id.rs @@ -3,8 +3,6 @@ use std::{fmt, ops::Deref}; use vector_config::configurable_component; pub use vector_core::config::ComponentKey; -use super::schema; - /// A list of upstream [source][sources] or [transform][transforms] IDs. /// /// Wildcards (`*`) are supported. @@ -108,33 +106,6 @@ pub struct OutputId { pub port: Option, } -impl OutputId { - /// Some situations, for example when validating a config file requires running the - /// transforms::output function to retrieve the outputs, but we don't have an - /// `OutputId` from a source. This gives us an `OutputId` that we can use. - /// - /// TODO: This is not a pleasant solution, but would require some significant refactoring - /// to the topology code to avoid. - pub fn dummy() -> Self { - Self { - component: "dummy".into(), - port: None, - } - } - - /// Given a list of [`schema::Definition`]s, returns a [`Vec`] of tuples of - /// this `OutputId` with each `Definition`. - pub fn with_definitions( - &self, - definitions: impl IntoIterator, - ) -> Vec<(OutputId, schema::Definition)> { - definitions - .into_iter() - .map(|definition| (self.clone(), definition)) - .collect() - } -} - impl fmt::Display for OutputId { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self.port { diff --git a/src/config/mod.rs b/src/config/mod.rs index 1cd6b5aff96af..bd9f6c283eaf1 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -11,8 +11,8 @@ use indexmap::IndexMap; pub use vector_config::component::{GenerateConfig, SinkDescription, TransformDescription}; use vector_config::configurable_component; pub use vector_core::config::{ - AcknowledgementsConfig, DataType, GlobalOptions, Input, LogNamespace, - SourceAcknowledgementsConfig, SourceOutput, TransformOutput, + AcknowledgementsConfig, DataType, GlobalOptions, Input, LogNamespace, Output, + SourceAcknowledgementsConfig, }; use crate::{conditions, event::Metric, secrets::SecretBackends, serde::OneOrMany}; diff --git a/src/config/source.rs b/src/config/source.rs index 1353c18c05dc4..366fb8553c66b 100644 --- a/src/config/source.rs +++ b/src/config/source.rs @@ -10,8 +10,7 @@ use vector_config_common::attributes::CustomAttribute; use vector_config_common::schema::{SchemaGenerator, SchemaObject}; use vector_core::{ config::{ - AcknowledgementsConfig, GlobalOptions, LogNamespace, SourceAcknowledgementsConfig, - SourceOutput, + AcknowledgementsConfig, GlobalOptions, LogNamespace, Output, SourceAcknowledgementsConfig, }, source::Source, }; @@ -90,7 +89,7 @@ pub trait SourceConfig: DynClone + NamedComponent + core::fmt::Debug + Send + Sy async fn build(&self, cx: SourceContext) -> crate::Result; /// Gets the list of outputs exposed by this source. - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec; + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec; /// Gets the list of resources, if any, used by this source. /// @@ -131,8 +130,8 @@ pub struct SourceContext { /// Tracks the schema IDs assigned to schemas exposed by the source. /// - /// Given a source can expose multiple [`SourceOutput`] channels, the ID is tied to the identifier of - /// that `SourceOutput`. + /// Given a source can expose multiple [`Output`] channels, the ID is tied to the identifier of + /// that `Output`. pub schema_definitions: HashMap, schema::Definition>, } diff --git a/src/config/transform.rs b/src/config/transform.rs index f3271c3422e7b..54a5996d144a9 100644 --- a/src/config/transform.rs +++ b/src/config/transform.rs @@ -11,13 +11,12 @@ use vector_config::{ }; use vector_config_common::attributes::CustomAttribute; use vector_core::{ - config::{GlobalOptions, Input, LogNamespace, TransformOutput}, + config::{GlobalOptions, Input, LogNamespace, Output}, schema, transform::Transform, }; use super::schema::Options as SchemaOptions; -use super::OutputId; use super::{id::Inputs, ComponentKey}; pub type BoxedTransform = Box; @@ -109,9 +108,9 @@ pub struct TransformContext { /// Tracks the schema IDs assigned to schemas exposed by the transform. /// - /// Given a transform can expose multiple [`TransformOutput`] channels, the ID is tied to the identifier of - /// that `TransformOutput`. - pub schema_definitions: HashMap, Vec>, + /// Given a transform can expose multiple [`Output`] channels, the ID is tied to the identifier of + /// that `Output`. + pub schema_definitions: HashMap, schema::Definition>, /// The schema definition created by merging all inputs of the transform. /// @@ -129,7 +128,7 @@ impl Default for TransformContext { key: Default::default(), globals: Default::default(), enrichment_tables: Default::default(), - schema_definitions: HashMap::from([(None, vec![schema::Definition::any()])]), + schema_definitions: HashMap::from([(None, schema::Definition::any())]), merged_schema_definition: schema::Definition::any(), schema: SchemaOptions::default(), } @@ -148,7 +147,7 @@ impl TransformContext { } #[cfg(any(test, feature = "test"))] - pub fn new_test(schema_definitions: HashMap, Vec>) -> Self { + pub fn new_test(schema_definitions: HashMap, schema::Definition>) -> Self { Self { schema_definitions, ..Default::default() @@ -191,9 +190,9 @@ pub trait TransformConfig: DynClone + NamedComponent + core::fmt::Debug + Send + /// of events flowing through the transform. fn outputs( &self, - input_definitions: &[(OutputId, schema::Definition)], + merged_definition: &schema::Definition, global_log_namespace: LogNamespace, - ) -> Vec; + ) -> Vec; /// Validates that the configuration of the transform is valid. /// diff --git a/src/config/unit_test/mod.rs b/src/config/unit_test/mod.rs index 273b5fc70b849..7eac5cbc8b1ed 100644 --- a/src/config/unit_test/mod.rs +++ b/src/config/unit_test/mod.rs @@ -15,6 +15,8 @@ use tokio::sync::{ Mutex, }; use uuid::Uuid; +use value::Kind; +use vector_core::config::LogNamespace; pub use self::unit_test_components::{ UnitTestSinkCheck, UnitTestSinkConfig, UnitTestSinkResult, UnitTestSourceConfig, @@ -28,7 +30,7 @@ use crate::{ TestDefinition, TestInput, TestInputValue, TestOutput, }, event::{Event, LogEvent, Value}, - signal, + schema, signal, topology::{ self, builder::{self, Pieces}, @@ -188,7 +190,7 @@ impl UnitTestBuildMetadata { .flat_map(|(key, transform)| { transform .inner - .outputs(&[], builder.schema.log_namespace()) + .outputs(&schema::Definition::any(), builder.schema.log_namespace()) .into_iter() .map(|output| OutputId { component: key.clone(), @@ -459,7 +461,13 @@ fn get_loose_end_outputs_sink(config: &ConfigBuilder) -> Option Vec { - vec![SourceOutput::new_logs( - DataType::all(), - schema::Definition::default_legacy_namespace(), - )] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(DataType::all())] } fn can_acknowledge(&self) -> bool { @@ -103,11 +97,8 @@ impl SourceConfig for UnitTestStreamSourceConfig { })) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_logs( - DataType::all(), - schema::Definition::default_legacy_namespace(), - )] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(DataType::all())] } fn can_acknowledge(&self) -> bool { diff --git a/src/config/validation.rs b/src/config/validation.rs index 2c971f602fed3..dbb25377d9654 100644 --- a/src/config/validation.rs +++ b/src/config/validation.rs @@ -1,4 +1,5 @@ -use crate::{config::schema, topology::schema::possible_definitions}; +use crate::config::schema; +use crate::topology::schema::merged_definition; use futures_util::{stream, FutureExt, StreamExt, TryFutureExt, TryStreamExt}; use heim::{disk::Partition, units::information::byte}; use indexmap::IndexMap; @@ -171,10 +172,7 @@ pub fn check_outputs(config: &ConfigBuilder) -> Result<(), Vec> { if transform .inner - .outputs( - &[(OutputId::dummy(), definition)], - config.schema.log_namespace(), - ) + .outputs(&definition, config.schema.log_namespace()) .iter() .map(|output| output.port.as_deref().unwrap_or("")) .any(|name| name == DEFAULT_OUTPUT) @@ -345,7 +343,7 @@ pub fn warnings(config: &Config) -> Vec { transform .inner .outputs( - &possible_definitions(&transform.inputs, config, &mut cache), + &merged_definition(&transform.inputs, config, &mut cache), config.schema.log_namespace(), ) .iter() diff --git a/src/source_sender/mod.rs b/src/source_sender/mod.rs index 30ee2cf2b781c..5cf013e06e886 100644 --- a/src/source_sender/mod.rs +++ b/src/source_sender/mod.rs @@ -9,7 +9,7 @@ use vector_buffers::topology::channel::{self, LimitedReceiver, LimitedSender}; #[cfg(test)] use vector_core::event::{into_event_stream, EventStatus}; use vector_core::{ - config::{log_schema, SourceOutput}, + config::{log_schema, Output}, event::{array, Event, EventArray, EventContainer, EventRef}, internal_event::{ self, CountByteSize, EventsSent, InternalEventHandle as _, Registered, DEFAULT_OUTPUT, @@ -48,7 +48,7 @@ impl Builder { } } - pub fn add_source_output(&mut self, output: SourceOutput) -> LimitedReceiver { + pub fn add_output(&mut self, output: Output) -> LimitedReceiver { let lag_time = self.lag_time.clone(); match output.port { None => { diff --git a/src/sources/amqp.rs b/src/sources/amqp.rs index 157742b8733e2..cda43a446ade3 100644 --- a/src/sources/amqp.rs +++ b/src/sources/amqp.rs @@ -3,7 +3,7 @@ use crate::{ amqp::AmqpConfig, codecs::{Decoder, DecodingConfig}, - config::{SourceConfig, SourceContext, SourceOutput}, + config::{Output, SourceConfig, SourceContext}, event::{BatchNotifier, BatchStatus}, internal_events::{ source::{AmqpAckError, AmqpBytesReceived, AmqpEventError, AmqpRejectError}, @@ -142,7 +142,7 @@ impl SourceConfig for AmqpSourceConfig { amqp_source(self, cx.shutdown, cx.out, log_namespace, acknowledgements).await } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = self .decoding @@ -180,10 +180,7 @@ impl SourceConfig for AmqpSourceConfig { None, ); - vec![SourceOutput::new_logs( - self.decoding.output_type(), - schema_definition, - )] + vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { @@ -525,10 +522,10 @@ pub mod test { ..Default::default() }; - let definition = config - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -552,17 +549,17 @@ pub mod test { .with_metadata_field(&owned_value_path!("amqp", "exchange"), Kind::bytes(), None) .with_metadata_field(&owned_value_path!("amqp", "offset"), Kind::integer(), None); - assert_eq!(definition, Some(expected_definition)); + assert_eq!(definition, expected_definition); } #[test] fn output_schema_definition_legacy_namespace() { let config = AmqpSourceConfig::default(); - let definition = config - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -579,7 +576,7 @@ pub mod test { .with_event_field(&owned_value_path!("exchange"), Kind::bytes(), None) .with_event_field(&owned_value_path!("offset"), Kind::integer(), None); - assert_eq!(definition, Some(expected_definition)); + assert_eq!(definition, expected_definition); } } diff --git a/src/sources/apache_metrics/mod.rs b/src/sources/apache_metrics/mod.rs index 1a207624c8cb5..dd58f7bc8a223 100644 --- a/src/sources/apache_metrics/mod.rs +++ b/src/sources/apache_metrics/mod.rs @@ -14,7 +14,7 @@ use vector_config::configurable_component; use vector_core::{metric_tags, EstimatedJsonEncodedSizeOf}; use crate::{ - config::{GenerateConfig, ProxyConfig, SourceConfig, SourceContext, SourceOutput}, + config::{self, GenerateConfig, Output, ProxyConfig, SourceConfig, SourceContext}, event::metric::{Metric, MetricKind, MetricValue}, http::HttpClient, internal_events::{ @@ -93,8 +93,8 @@ impl SourceConfig for ApacheMetricsConfig { )) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_metrics()] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(config::DataType::Metric)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/aws_ecs_metrics/mod.rs b/src/sources/aws_ecs_metrics/mod.rs index e3db0a38c7619..ab474f64663ca 100644 --- a/src/sources/aws_ecs_metrics/mod.rs +++ b/src/sources/aws_ecs_metrics/mod.rs @@ -10,7 +10,7 @@ use vector_config::configurable_component; use vector_core::{config::LogNamespace, EstimatedJsonEncodedSizeOf}; use crate::{ - config::{GenerateConfig, SourceConfig, SourceContext, SourceOutput}, + config::{self, GenerateConfig, Output, SourceConfig, SourceContext}, internal_events::{ AwsEcsMetricsEventsReceived, AwsEcsMetricsHttpError, AwsEcsMetricsParseError, AwsEcsMetricsResponseError, RequestCompleted, StreamClosedError, @@ -159,8 +159,8 @@ impl SourceConfig for AwsEcsMetricsSourceConfig { ))) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_metrics()] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(config::DataType::Metric)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/aws_kinesis_firehose/mod.rs b/src/sources/aws_kinesis_firehose/mod.rs index 198f14104d308..20dd3d14eb3b6 100644 --- a/src/sources/aws_kinesis_firehose/mod.rs +++ b/src/sources/aws_kinesis_firehose/mod.rs @@ -13,8 +13,7 @@ use warp::Filter; use crate::{ codecs::DecodingConfig, config::{ - GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, SourceContext, - SourceOutput, + GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, SourceConfig, SourceContext, }, serde::{bool_or_struct, default_decoding, default_framing_message_based}, tls::{MaybeTlsSettings, TlsEnableableConfig}, @@ -184,7 +183,7 @@ impl SourceConfig for AwsKinesisFirehoseConfig { })) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let schema_definition = self .decoding .schema_definition(global_log_namespace.merge(self.log_namespace)) @@ -204,10 +203,7 @@ impl SourceConfig for AwsKinesisFirehoseConfig { None, ); - vec![SourceOutput::new_logs( - self.decoding.output_type(), - schema_definition, - )] + vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] } fn resources(&self) -> Vec { diff --git a/src/sources/aws_s3/mod.rs b/src/sources/aws_s3/mod.rs index 9085a5b26ce94..ecbc83f582787 100644 --- a/src/sources/aws_s3/mod.rs +++ b/src/sources/aws_s3/mod.rs @@ -15,9 +15,7 @@ use super::util::MultilineConfig; use crate::{ aws::{auth::AwsAuthentication, create_client, RegionOrEndpoint}, common::{s3::S3ClientBuilder, sqs::SqsClientBuilder}, - config::{ - ProxyConfig, SourceAcknowledgementsConfig, SourceConfig, SourceContext, SourceOutput, - }, + config::{Output, ProxyConfig, SourceAcknowledgementsConfig, SourceConfig, SourceContext}, line_agg, serde::bool_or_struct, tls::TlsConfig, @@ -140,7 +138,7 @@ impl SourceConfig for AwsS3Config { } } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let mut schema_definition = BytesDeserializerConfig .schema_definition(log_namespace) @@ -187,7 +185,7 @@ impl SourceConfig for AwsS3Config { schema_definition = schema_definition.unknown_fields(Kind::bytes()); } - vec![SourceOutput::new_logs(DataType::Log, schema_definition)] + vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/aws_sqs/config.rs b/src/sources/aws_sqs/config.rs index b165bc16bdee2..8d746033c062a 100644 --- a/src/sources/aws_sqs/config.rs +++ b/src/sources/aws_sqs/config.rs @@ -12,7 +12,7 @@ use crate::common::sqs::SqsClientBuilder; use crate::tls::TlsConfig; use crate::{ aws::{auth::AwsAuthentication, region::RegionOrEndpoint}, - config::{SourceAcknowledgementsConfig, SourceConfig, SourceContext, SourceOutput}, + config::{Output, SourceAcknowledgementsConfig, SourceConfig, SourceContext}, serde::{bool_or_struct, default_decoding, default_framing_message_based}, sources::aws_sqs::source::SqsSource, }; @@ -131,7 +131,7 @@ impl SourceConfig for AwsSqsConfig { )) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let schema_definition = self .decoding .schema_definition(global_log_namespace.merge(self.log_namespace)) @@ -144,10 +144,7 @@ impl SourceConfig for AwsSqsConfig { Some("timestamp"), ); - vec![SourceOutput::new_logs( - self.decoding.output_type(), - schema_definition, - )] + vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/aws_sqs/source.rs b/src/sources/aws_sqs/source.rs index 3a17e8c801fcb..8f02fd3028e0f 100644 --- a/src/sources/aws_sqs/source.rs +++ b/src/sources/aws_sqs/source.rs @@ -233,10 +233,10 @@ mod tests { log_namespace: Some(true), ..Default::default() }; - let definitions = config - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); let message = "test"; let now = Utc::now(); @@ -276,7 +276,7 @@ mod tests { .to_string_lossy(), now.to_rfc3339_opts(SecondsFormat::AutoSi, true) ); - definitions.unwrap().assert_valid_for_event(&events[0]); + definition.assert_valid_for_event(&events[0]); } #[tokio::test] @@ -285,10 +285,10 @@ mod tests { log_namespace: None, ..Default::default() }; - let definitions = config - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); let message = "test"; let now = Utc::now(); @@ -329,7 +329,7 @@ mod tests { .to_string_lossy(), now.to_rfc3339_opts(SecondsFormat::AutoSi, true) ); - definitions.unwrap().assert_valid_for_event(&events[0]); + definition.assert_valid_for_event(&events[0]); } #[test] diff --git a/src/sources/datadog_agent/metrics.rs b/src/sources/datadog_agent/metrics.rs index 0a02e2a11dc95..ea6d7538319d0 100644 --- a/src/sources/datadog_agent/metrics.rs +++ b/src/sources/datadog_agent/metrics.rs @@ -81,6 +81,7 @@ fn sketches_service( api_token, query_params.dd_api_key, ), + &source.metrics_schema_definition, &source.events_received, ) }); @@ -119,9 +120,7 @@ fn series_v1_service( api_token, query_params.dd_api_key, ), - // Currently metrics do not have schemas defined, so for now we just pass a - // default one. - &Arc::new(schema::Definition::default_legacy_namespace()), + &source.metrics_schema_definition, &source.events_received, ) }); @@ -160,6 +159,7 @@ fn series_v2_service( api_token, query_params.dd_api_key, ), + &source.metrics_schema_definition, &source.events_received, ) }); @@ -172,6 +172,7 @@ fn series_v2_service( fn decode_datadog_sketches( body: Bytes, api_key: Option>, + schema_definition: &Arc, events_received: &Registered, ) -> Result, ErrorMessage> { if body.is_empty() { @@ -183,7 +184,7 @@ fn decode_datadog_sketches( return Ok(Vec::new()); } - let metrics = decode_ddsketch(body, &api_key).map_err(|error| { + let metrics = decode_ddsketch(body, &api_key, schema_definition).map_err(|error| { ErrorMessage::new( StatusCode::UNPROCESSABLE_ENTITY, format!("Error decoding Datadog sketch: {:?}", error), @@ -201,6 +202,7 @@ fn decode_datadog_sketches( fn decode_datadog_series_v2( body: Bytes, api_key: Option>, + schema_definition: &Arc, events_received: &Registered, ) -> Result, ErrorMessage> { if body.is_empty() { @@ -212,12 +214,14 @@ fn decode_datadog_series_v2( return Ok(Vec::new()); } - let metrics = decode_ddseries_v2(body, &api_key, events_received).map_err(|error| { - ErrorMessage::new( - StatusCode::UNPROCESSABLE_ENTITY, - format!("Error decoding Datadog sketch: {:?}", error), - ) - })?; + let metrics = decode_ddseries_v2(body, &api_key, schema_definition, events_received).map_err( + |error| { + ErrorMessage::new( + StatusCode::UNPROCESSABLE_ENTITY, + format!("Error decoding Datadog sketch: {:?}", error), + ) + }, + )?; events_received.emit(CountByteSize( metrics.len(), @@ -230,6 +234,7 @@ fn decode_datadog_series_v2( pub(crate) fn decode_ddseries_v2( frame: Bytes, api_key: &Option>, + schema_definition: &Arc, events_received: &Registered, ) -> crate::Result> { let payload = MetricPayload::decode(frame)?; @@ -331,6 +336,9 @@ pub(crate) fn decode_ddseries_v2( if let Some(k) = &api_key { metric.metadata_mut().set_datadog_api_key(Arc::clone(k)); } + metric + .metadata_mut() + .set_schema_definition(schema_definition); metric.into() }) @@ -494,6 +502,7 @@ fn namespace_name_from_dd_metric(dd_metric_name: &str) -> (Option<&str>, &str) { pub(crate) fn decode_ddsketch( frame: Bytes, api_key: &Option>, + schema_definition: &Arc, ) -> crate::Result> { let payload = SketchPayload::decode(frame)?; // payload.metadata is always empty for payload coming from dd agents @@ -535,6 +544,9 @@ pub(crate) fn decode_ddsketch( metric.metadata_mut().set_datadog_api_key(Arc::clone(k)); } + metric + .metadata_mut() + .set_schema_definition(schema_definition); metric.into() }) }) diff --git a/src/sources/datadog_agent/mod.rs b/src/sources/datadog_agent/mod.rs index c92d8856e87b1..5132e12c50918 100644 --- a/src/sources/datadog_agent/mod.rs +++ b/src/sources/datadog_agent/mod.rs @@ -40,8 +40,8 @@ use warp::{filters::BoxedFilter, reject::Rejection, reply::Response, Filter, Rep use crate::{ codecs::{Decoder, DecodingConfig}, config::{ - log_schema, DataType, GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, - SourceContext, SourceOutput, + log_schema, DataType, GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, + SourceConfig, SourceContext, }, event::Event, internal_events::{HttpBytesReceived, HttpDecompressError, StreamClosedError}, @@ -154,6 +154,13 @@ impl SourceConfig for DatadogAgentConfig { .expect("registered log schema required") .clone(); + let metrics_schema_definition = cx + .schema_definitions + .get(&Some(METRICS.to_owned())) + .or_else(|| cx.schema_definitions.get(&None)) + .expect("registered metrics schema required") + .clone(); + let decoder = DecodingConfig::new(self.framing.clone(), self.decoding.clone(), log_namespace).build(); @@ -163,6 +170,7 @@ impl SourceConfig for DatadogAgentConfig { decoder, tls.http_protocol_name(), logs_schema_definition, + metrics_schema_definition, log_namespace, ); let listener = tls.bind(&self.address).await?; @@ -197,7 +205,7 @@ impl SourceConfig for DatadogAgentConfig { })) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let definition = self .decoding .schema_definition(global_log_namespace.merge(self.log_namespace)) @@ -247,12 +255,14 @@ impl SourceConfig for DatadogAgentConfig { if self.multiple_outputs { vec![ - SourceOutput::new_logs(DataType::Log, definition).with_port(LOGS), - SourceOutput::new_metrics().with_port(METRICS), - SourceOutput::new_traces().with_port(TRACES), + Output::default(DataType::Metric).with_port(METRICS), + Output::default(DataType::Log) + .with_schema_definition(definition) + .with_port(LOGS), + Output::default(DataType::Trace).with_port(TRACES), ] } else { - vec![SourceOutput::new_logs(DataType::all(), definition)] + vec![Output::default(DataType::all()).with_schema_definition(definition)] } } @@ -289,6 +299,7 @@ pub(crate) struct DatadogAgentSource { pub(crate) decoder: Decoder, protocol: &'static str, logs_schema_definition: Arc, + metrics_schema_definition: Arc, events_received: Registered, } @@ -325,6 +336,7 @@ impl DatadogAgentSource { decoder: Decoder, protocol: &'static str, logs_schema_definition: schema::Definition, + metrics_schema_definition: schema::Definition, log_namespace: LogNamespace, ) -> Self { Self { @@ -338,6 +350,7 @@ impl DatadogAgentSource { decoder, protocol, logs_schema_definition: Arc::new(logs_schema_definition), + metrics_schema_definition: Arc::new(metrics_schema_definition), log_namespace, events_received: register!(EventsReceived), } diff --git a/src/sources/datadog_agent/tests.rs b/src/sources/datadog_agent/tests.rs index d1d629c119e01..e71cbde6c0fc4 100644 --- a/src/sources/datadog_agent/tests.rs +++ b/src/sources/datadog_agent/tests.rs @@ -57,6 +57,14 @@ fn test_logs_schema_definition() -> schema::Definition { ) } +fn test_metrics_schema_definition() -> schema::Definition { + schema::Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("a schema tag"), + Kind::boolean().or_null(), + Some("tag"), + ) +} + impl Arbitrary for LogMsg { fn arbitrary(g: &mut Gen) -> Self { LogMsg { @@ -93,6 +101,7 @@ fn test_decode_log_body() { decoder, "http", test_logs_schema_definition(), + test_metrics_schema_definition(), LogNamespace::Legacy, ); @@ -164,8 +173,10 @@ async fn source( address, store_api_key, acknowledgements, multiple_outputs )) .unwrap(); - let schema_definitions = - HashMap::from([(Some(LOGS.to_owned()), test_logs_schema_definition())]); + let schema_definitions = HashMap::from([ + (Some(LOGS.to_owned()), test_logs_schema_definition()), + (Some(METRICS.to_owned()), test_metrics_schema_definition()), + ]); let context = SourceContext::new_test(sender, Some(schema_definitions)); tokio::spawn(async move { config.build(context).await.unwrap().await.unwrap(); @@ -924,6 +935,13 @@ async fn decode_series_endpoint_v1() { &events[3].metadata().datadog_api_key().as_ref().unwrap()[..], "12345678abcdefgh12345678abcdefgh" ); + + for event in events { + assert_eq!( + event.metadata().schema_definition(), + &test_metrics_schema_definition() + ); + } } }) .await; @@ -1026,6 +1044,13 @@ async fn decode_sketches() { &events[0].metadata().datadog_api_key().as_ref().unwrap()[..], "12345678abcdefgh12345678abcdefgh" ); + + for event in events { + assert_eq!( + event.metadata().schema_definition(), + &test_metrics_schema_definition() + ); + } } }) .await; @@ -1380,6 +1405,10 @@ async fn split_outputs() { &event.metadata().datadog_api_key().as_ref().unwrap()[..], "abcdefgh12345678abcdefgh12345678" ); + assert_eq!( + event.metadata().schema_definition(), + &test_metrics_schema_definition() + ); } { @@ -1831,7 +1860,7 @@ fn test_config_outputs() { let mut outputs = config .outputs(LogNamespace::Legacy) .into_iter() - .map(|output| (output.port.clone(), output.schema_definition(true))) + .map(|output| (output.port, output.log_schema_definition)) .collect::>(); for (name, want) in want { @@ -2050,6 +2079,13 @@ async fn decode_series_endpoint_v2() { &events[3].metadata().datadog_api_key().as_ref().unwrap()[..], "12345678abcdefgh12345678abcdefgh" ); + + for event in events { + assert_eq!( + event.metadata().schema_definition(), + &test_metrics_schema_definition() + ); + } } }) .await; @@ -2062,55 +2098,54 @@ fn test_output_schema_definition_json_vector_namespace() { decoding.codec = "json" "#}) .unwrap() - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + .outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); assert_eq!( definition, - Some( - Definition::new_with_default_metadata(Kind::json(), [LogNamespace::Vector]) - .with_metadata_field( - &owned_value_path!("datadog_agent", "ddsource"), - Kind::bytes(), - Some("source") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "ddtags"), - Kind::bytes(), - Some("tags") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "hostname"), - Kind::bytes(), - Some("host") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "service"), - Kind::bytes(), - Some("service") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "status"), - Kind::bytes(), - Some("severity") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "timestamp"), - Kind::timestamp(), - Some("timestamp") - ) - .with_metadata_field( - &owned_value_path!("vector", "ingest_timestamp"), - Kind::timestamp(), - None - ) - .with_metadata_field( - &owned_value_path!("vector", "source_type"), - Kind::bytes(), - None - ) - ) + Definition::new_with_default_metadata(Kind::json(), [LogNamespace::Vector]) + .with_metadata_field( + &owned_value_path!("datadog_agent", "ddsource"), + Kind::bytes(), + Some("source") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "ddtags"), + Kind::bytes(), + Some("tags") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "hostname"), + Kind::bytes(), + Some("host") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "service"), + Kind::bytes(), + Some("service") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "status"), + Kind::bytes(), + Some("severity") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "timestamp"), + Kind::timestamp(), + Some("timestamp") + ) + .with_metadata_field( + &owned_value_path!("vector", "ingest_timestamp"), + Kind::timestamp(), + None + ) + .with_metadata_field( + &owned_value_path!("vector", "source_type"), + Kind::bytes(), + None + ) ) } @@ -2121,56 +2156,55 @@ fn test_output_schema_definition_bytes_vector_namespace() { decoding.codec = "bytes" "#}) .unwrap() - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + .outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); assert_eq!( definition, - Some( - Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) - .with_metadata_field( - &owned_value_path!("datadog_agent", "ddsource"), - Kind::bytes(), - Some("source") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "ddtags"), - Kind::bytes(), - Some("tags") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "hostname"), - Kind::bytes(), - Some("host") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "service"), - Kind::bytes(), - Some("service") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "status"), - Kind::bytes(), - Some("severity") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "timestamp"), - Kind::timestamp(), - Some("timestamp") - ) - .with_metadata_field( - &owned_value_path!("vector", "ingest_timestamp"), - Kind::timestamp(), - None - ) - .with_metadata_field( - &owned_value_path!("vector", "source_type"), - Kind::bytes(), - None - ) - .with_meaning(OwnedTargetPath::event_root(), "message") - ) + Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) + .with_metadata_field( + &owned_value_path!("datadog_agent", "ddsource"), + Kind::bytes(), + Some("source") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "ddtags"), + Kind::bytes(), + Some("tags") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "hostname"), + Kind::bytes(), + Some("host") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "service"), + Kind::bytes(), + Some("service") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "status"), + Kind::bytes(), + Some("severity") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "timestamp"), + Kind::timestamp(), + Some("timestamp") + ) + .with_metadata_field( + &owned_value_path!("vector", "ingest_timestamp"), + Kind::timestamp(), + None + ) + .with_metadata_field( + &owned_value_path!("vector", "source_type"), + Kind::bytes(), + None + ) + .with_meaning(OwnedTargetPath::event_root(), "message") ) } @@ -2181,26 +2215,25 @@ fn test_output_schema_definition_json_legacy_namespace() { decoding.codec = "json" "#}) .unwrap() - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + .outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); assert_eq!( definition, - Some( - Definition::new_with_default_metadata(Kind::json(), [LogNamespace::Legacy]) - .with_event_field( - &owned_value_path!("timestamp"), - Kind::json().or_timestamp(), - None - ) - .with_event_field(&owned_value_path!("ddsource"), Kind::json(), None) - .with_event_field(&owned_value_path!("ddtags"), Kind::json(), None) - .with_event_field(&owned_value_path!("hostname"), Kind::json(), None) - .with_event_field(&owned_value_path!("service"), Kind::json(), None) - .with_event_field(&owned_value_path!("source_type"), Kind::json(), None) - .with_event_field(&owned_value_path!("status"), Kind::json(), None) - ) + Definition::new_with_default_metadata(Kind::json(), [LogNamespace::Legacy]) + .with_event_field( + &owned_value_path!("timestamp"), + Kind::json().or_timestamp(), + None + ) + .with_event_field(&owned_value_path!("ddsource"), Kind::json(), None) + .with_event_field(&owned_value_path!("ddtags"), Kind::json(), None) + .with_event_field(&owned_value_path!("hostname"), Kind::json(), None) + .with_event_field(&owned_value_path!("service"), Kind::json(), None) + .with_event_field(&owned_value_path!("source_type"), Kind::json(), None) + .with_event_field(&owned_value_path!("status"), Kind::json(), None) ) } @@ -2211,45 +2244,44 @@ fn test_output_schema_definition_bytes_legacy_namespace() { decoding.codec = "bytes" "#}) .unwrap() - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + .outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); assert_eq!( definition, - Some( - Definition::new_with_default_metadata( - Kind::object(Collection::empty()), - [LogNamespace::Legacy] - ) - .with_event_field( - &owned_value_path!("ddsource"), - Kind::bytes(), - Some("source") - ) - .with_event_field(&owned_value_path!("ddtags"), Kind::bytes(), Some("tags")) - .with_event_field(&owned_value_path!("hostname"), Kind::bytes(), Some("host")) - .with_event_field( - &owned_value_path!("message"), - Kind::bytes(), - Some("message") - ) - .with_event_field( - &owned_value_path!("service"), - Kind::bytes(), - Some("service") - ) - .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) - .with_event_field( - &owned_value_path!("status"), - Kind::bytes(), - Some("severity") - ) - .with_event_field( - &owned_value_path!("timestamp"), - Kind::timestamp(), - Some("timestamp") - ) + Definition::new_with_default_metadata( + Kind::object(Collection::empty()), + [LogNamespace::Legacy] + ) + .with_event_field( + &owned_value_path!("ddsource"), + Kind::bytes(), + Some("source") + ) + .with_event_field(&owned_value_path!("ddtags"), Kind::bytes(), Some("tags")) + .with_event_field(&owned_value_path!("hostname"), Kind::bytes(), Some("host")) + .with_event_field( + &owned_value_path!("message"), + Kind::bytes(), + Some("message") + ) + .with_event_field( + &owned_value_path!("service"), + Kind::bytes(), + Some("service") + ) + .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) + .with_event_field( + &owned_value_path!("status"), + Kind::bytes(), + Some("severity") + ) + .with_event_field( + &owned_value_path!("timestamp"), + Kind::timestamp(), + Some("timestamp") ) ) } diff --git a/src/sources/demo_logs.rs b/src/sources/demo_logs.rs index 9f0b6a0a098f7..c6aa841e6f751 100644 --- a/src/sources/demo_logs.rs +++ b/src/sources/demo_logs.rs @@ -19,7 +19,7 @@ use vector_core::{config::LogNamespace, EstimatedJsonEncodedSizeOf}; use crate::{ codecs::{Decoder, DecodingConfig}, - config::{SourceConfig, SourceContext, SourceOutput}, + config::{Output, SourceConfig, SourceContext}, internal_events::{DemoLogsEventProcessed, EventsReceived, StreamClosedError}, serde::{default_decoding, default_framing_message_based}, shutdown::ShutdownSignal, @@ -292,7 +292,7 @@ impl SourceConfig for DemoLogsConfig { ))) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { // There is a global and per-source `log_namespace` config. The source config overrides the global setting, // and is merged here. let log_namespace = global_log_namespace.merge(self.log_namespace); @@ -302,10 +302,7 @@ impl SourceConfig for DemoLogsConfig { .schema_definition(log_namespace) .with_standard_vector_source_metadata(); - vec![SourceOutput::new_logs( - self.decoding.output_type(), - schema_definition, - )] + vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/dnstap/mod.rs b/src/sources/dnstap/mod.rs index cb61a2876d76b..3c105f9473fe7 100644 --- a/src/sources/dnstap/mod.rs +++ b/src/sources/dnstap/mod.rs @@ -11,7 +11,7 @@ use vector_config::configurable_component; use super::util::framestream::{build_framestream_unix_source, FrameHandler}; use crate::{ - config::{log_schema, DataType, SourceConfig, SourceContext, SourceOutput}, + config::{log_schema, DataType, Output, SourceConfig, SourceContext}, event::{Event, LogEvent}, internal_events::DnstapParseError, Result, @@ -182,12 +182,12 @@ impl SourceConfig for DnstapConfig { build_framestream_unix_source(frame_handler, cx.shutdown, cx.out) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = self .schema_definition(log_namespace) .with_standard_vector_source_metadata(); - vec![SourceOutput::new_logs(DataType::Log, schema_definition)] + vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/docker_logs/mod.rs b/src/sources/docker_logs/mod.rs index 62abfc1ef52ff..3323a9dc7ac42 100644 --- a/src/sources/docker_logs/mod.rs +++ b/src/sources/docker_logs/mod.rs @@ -30,7 +30,7 @@ use vector_core::config::{LegacyKey, LogNamespace}; use super::util::MultilineConfig; use crate::{ - config::{log_schema, DataType, SourceConfig, SourceContext, SourceOutput}, + config::{log_schema, DataType, Output, SourceConfig, SourceContext}, docker::{docker, DockerTlsConfig}, event::{self, merge_state::LogEventMergeState, EstimatedJsonEncodedSizeOf, LogEvent, Value}, internal_events::{ @@ -272,7 +272,7 @@ impl SourceConfig for DockerLogsConfig { })) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let host_key = self.host_key.clone().path.map(LegacyKey::Overwrite); let schema_definition = BytesDeserializerConfig @@ -351,7 +351,7 @@ impl SourceConfig for DockerLogsConfig { None, ); - vec![SourceOutput::new_logs(DataType::Log, schema_definition)] + vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/docker_logs/tests.rs b/src/sources/docker_logs/tests.rs index 04c9a05c37bb0..0f8cd0981c069 100644 --- a/src/sources/docker_logs/tests.rs +++ b/src/sources/docker_logs/tests.rs @@ -293,12 +293,13 @@ mod integration_tests { #[tokio::test] async fn container_with_tty_vector_namespace() { trace_init(); - let schema_definitions = DockerLogsConfig::default() + let schema_definition = DockerLogsConfig::default() .outputs(LogNamespace::Vector) .first() .unwrap() - .schema_definition - .clone(); + .log_schema_definition + .clone() + .unwrap(); assert_source_compliance(&SOURCE_TAGS, async { let message = "log container_with_tty"; @@ -312,9 +313,7 @@ mod integration_tests { let events = collect_n(out, 1).await; container_remove(&id, &docker).await; - schema_definitions - .unwrap() - .assert_valid_for_event(&events[0]); + schema_definition.assert_valid_for_event(&events[0]); assert_eq!(events[0].as_log().get(".").unwrap(), &vrl::value!(message)); }) .await; @@ -323,12 +322,13 @@ mod integration_tests { #[tokio::test] async fn container_with_tty_legacy_namespace() { trace_init(); - let schema_definitions = DockerLogsConfig::default() + let schema_definition = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .schema_definition - .clone(); + .log_schema_definition + .clone() + .unwrap(); assert_source_compliance(&SOURCE_TAGS, async { let message = "log container_with_tty"; @@ -342,9 +342,7 @@ mod integration_tests { let events = collect_n(out, 1).await; container_remove(&id, &docker).await; - schema_definitions - .unwrap() - .assert_valid_for_event(&events[0]); + schema_definition.assert_valid_for_event(&events[0]); assert_eq!( events[0].as_log()[log_schema().message_key()], message.into() @@ -356,12 +354,13 @@ mod integration_tests { #[tokio::test] async fn newly_started_vector_namespace() { trace_init(); - let schema_definitions = DockerLogsConfig::default() + let schema_definition = DockerLogsConfig::default() .outputs(LogNamespace::Vector) .first() .unwrap() - .schema_definition - .clone(); + .log_schema_definition + .clone() + .unwrap(); assert_source_compliance(&SOURCE_TAGS, async { let message = "9"; @@ -376,9 +375,7 @@ mod integration_tests { let events = collect_n(out, 1).await; container_remove(&id, &docker).await; - schema_definitions - .unwrap() - .assert_valid_for_event(&events[0]); + schema_definition.assert_valid_for_event(&events[0]); let log = events[0].as_log(); let meta = log.metadata().value(); @@ -416,12 +413,13 @@ mod integration_tests { #[tokio::test] async fn newly_started_legacy_namespace() { trace_init(); - let schema_definitions = DockerLogsConfig::default() + let schema_definition = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .schema_definition - .clone(); + .log_schema_definition + .clone() + .unwrap(); assert_source_compliance(&SOURCE_TAGS, async { let message = "9"; @@ -436,9 +434,7 @@ mod integration_tests { let events = collect_n(out, 1).await; container_remove(&id, &docker).await; - schema_definitions - .unwrap() - .assert_valid_for_event(&events[0]); + schema_definition.assert_valid_for_event(&events[0]); let log = events[0].as_log(); assert_eq!(log[log_schema().message_key()], message.into()); assert_eq!(log[CONTAINER], id.into()); @@ -457,12 +453,13 @@ mod integration_tests { #[tokio::test] async fn restart_legacy_namespace() { trace_init(); - let schema_definitions = DockerLogsConfig::default() + let schema_definition = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .schema_definition - .clone(); + .log_schema_definition + .clone() + .unwrap(); assert_source_compliance(&SOURCE_TAGS, async { let message = "10"; @@ -476,14 +473,12 @@ mod integration_tests { let events = collect_n(out, 2).await; container_remove(&id, &docker).await; - let definition = schema_definitions.unwrap(); - - definition.assert_valid_for_event(&events[0]); + schema_definition.assert_valid_for_event(&events[0]); assert_eq!( events[0].as_log()[log_schema().message_key()], message.into() ); - definition.assert_valid_for_event(&events[1]); + schema_definition.assert_valid_for_event(&events[1]); assert_eq!( events[1].as_log()[log_schema().message_key()], message.into() @@ -495,12 +490,13 @@ mod integration_tests { #[tokio::test] async fn include_containers_legacy_namespace() { trace_init(); - let schema_definitions = DockerLogsConfig::default() + let schema_definition = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .schema_definition - .clone(); + .log_schema_definition + .clone() + .unwrap(); assert_source_compliance(&SOURCE_TAGS, async { let message = "11"; @@ -517,9 +513,7 @@ mod integration_tests { container_remove(&id0, &docker).await; container_remove(&id1, &docker).await; - schema_definitions - .unwrap() - .assert_valid_for_event(&events[0]); + schema_definition.assert_valid_for_event(&events[0]); assert_eq!( events[0].as_log()[log_schema().message_key()], message.into() @@ -531,12 +525,13 @@ mod integration_tests { #[tokio::test] async fn exclude_containers_legacy_namespace() { trace_init(); - let schema_definitions = DockerLogsConfig::default() + let schema_definition = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .schema_definition - .clone(); + .log_schema_definition + .clone() + .unwrap(); assert_source_compliance(&SOURCE_TAGS, async { let will_be_read = "12"; @@ -566,15 +561,13 @@ mod integration_tests { assert_eq!(events.len(), 2); - let definition = schema_definitions.unwrap(); - - definition.assert_valid_for_event(&events[0]); + schema_definition.assert_valid_for_event(&events[0]); assert_eq!( events[0].as_log()[log_schema().message_key()], will_be_read.into() ); - definition.assert_valid_for_event(&events[1]); + schema_definition.assert_valid_for_event(&events[1]); assert_eq!( events[1].as_log()[log_schema().message_key()], will_be_read.into() @@ -586,12 +579,13 @@ mod integration_tests { #[tokio::test] async fn include_labels_legacy_namespace() { trace_init(); - let schema_definitions = DockerLogsConfig::default() + let schema_definition = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .schema_definition - .clone(); + .log_schema_definition + .clone() + .unwrap(); assert_source_compliance(&SOURCE_TAGS, async { let message = "13"; @@ -609,9 +603,7 @@ mod integration_tests { container_remove(&id0, &docker).await; container_remove(&id1, &docker).await; - schema_definitions - .unwrap() - .assert_valid_for_event(&events[0]); + schema_definition.assert_valid_for_event(&events[0]); assert_eq!( events[0].as_log()[log_schema().message_key()], message.into() @@ -623,12 +615,13 @@ mod integration_tests { #[tokio::test] async fn currently_running_legacy_namespace() { trace_init(); - let schema_definitions = DockerLogsConfig::default() + let schema_definition = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .schema_definition - .clone(); + .log_schema_definition + .clone() + .unwrap(); assert_source_compliance(&SOURCE_TAGS, async { let message = "14"; @@ -643,9 +636,7 @@ mod integration_tests { let _ = container_kill(&id, &docker).await; container_remove(&id, &docker).await; - schema_definitions - .unwrap() - .assert_valid_for_event(&events[0]); + schema_definition.assert_valid_for_event(&events[0]); let log = events[0].as_log(); assert_eq!(log[log_schema().message_key()], message.into()); assert_eq!(log[CONTAINER], id.into()); @@ -664,12 +655,13 @@ mod integration_tests { #[tokio::test] async fn include_image_legacy_namespace() { trace_init(); - let schema_definitions = DockerLogsConfig::default() + let schema_definition = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .schema_definition - .clone(); + .log_schema_definition + .clone() + .unwrap(); assert_source_compliance(&SOURCE_TAGS, async { let message = "15"; @@ -688,9 +680,7 @@ mod integration_tests { let events = collect_n(out, 1).await; container_remove(&id, &docker).await; - schema_definitions - .unwrap() - .assert_valid_for_event(&events[0]); + schema_definition.assert_valid_for_event(&events[0]); assert_eq!( events[0].as_log()[log_schema().message_key()], message.into() @@ -758,12 +748,13 @@ mod integration_tests { #[tokio::test] async fn flat_labels_legacy_namespace() { trace_init(); - let schema_definitions = DockerLogsConfig::default() + let schema_definition = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .schema_definition - .clone(); + .log_schema_definition + .clone() + .unwrap(); assert_source_compliance(&SOURCE_TAGS, async { let message = "18"; @@ -778,9 +769,7 @@ mod integration_tests { let _ = container_kill(&id, &docker).await; container_remove(&id, &docker).await; - schema_definitions - .unwrap() - .assert_valid_for_event(&events[0]); + schema_definition.assert_valid_for_event(&events[0]); let log = events[0].as_log(); assert_eq!(log[log_schema().message_key()], message.into()); assert_eq!(log[CONTAINER], id.into()); @@ -805,12 +794,13 @@ mod integration_tests { #[tokio::test] async fn log_longer_than_16kb_legacy_namespace() { trace_init(); - let schema_definitions = DockerLogsConfig::default() + let schema_definition = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .schema_definition - .clone(); + .log_schema_definition + .clone() + .unwrap(); assert_source_compliance(&SOURCE_TAGS, async { let mut message = String::with_capacity(20 * 1024); @@ -827,9 +817,7 @@ mod integration_tests { let events = collect_n(out, 1).await; container_remove(&id, &docker).await; - schema_definitions - .unwrap() - .assert_valid_for_event(&events[0]); + schema_definition.assert_valid_for_event(&events[0]); let log = events[0].as_log(); assert_eq!(log[log_schema().message_key()], message.into()); }) @@ -840,11 +828,11 @@ mod integration_tests { async fn merge_multiline_vector_namespace() { assert_source_compliance(&SOURCE_TAGS, async { trace_init(); - let schema_definitions = DockerLogsConfig::default() + let schema_definition = DockerLogsConfig::default() .outputs(LogNamespace::Vector) .first() .unwrap() - .schema_definition + .log_schema_definition .clone() .unwrap(); @@ -893,7 +881,7 @@ mod integration_tests { let actual_messages = events .into_iter() .map(|event| { - schema_definitions.assert_valid_for_event(&event); + schema_definition.assert_valid_for_event(&event); event .into_log() @@ -912,11 +900,11 @@ mod integration_tests { async fn merge_multiline_legacy_namespace() { assert_source_compliance(&SOURCE_TAGS, async { trace_init(); - let schema_definitions = DockerLogsConfig::default() + let schema_definition = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .schema_definition + .log_schema_definition .clone() .unwrap(); @@ -964,7 +952,7 @@ mod integration_tests { let actual_messages = events .into_iter() .map(|event| { - schema_definitions.assert_valid_for_event(&event); + schema_definition.assert_valid_for_event(&event); event .into_log() diff --git a/src/sources/eventstoredb_metrics/mod.rs b/src/sources/eventstoredb_metrics/mod.rs index 57017950f56be..c46cb9cecb048 100644 --- a/src/sources/eventstoredb_metrics/mod.rs +++ b/src/sources/eventstoredb_metrics/mod.rs @@ -14,7 +14,7 @@ use vector_core::EstimatedJsonEncodedSizeOf; use self::types::Stats; use crate::{ - config::{SourceConfig, SourceContext, SourceOutput}, + config::{self, Output, SourceConfig, SourceContext}, http::HttpClient, internal_events::{ EventStoreDbMetricsHttpError, EventStoreDbStatsParsingError, EventsReceived, @@ -72,8 +72,8 @@ impl SourceConfig for EventStoreDbConfig { ) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_metrics()] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(config::DataType::Metric)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/exec/mod.rs b/src/sources/exec/mod.rs index 0bb58985c73f7..3facbb16a7dad 100644 --- a/src/sources/exec/mod.rs +++ b/src/sources/exec/mod.rs @@ -27,7 +27,7 @@ use vector_core::{config::LegacyKey, EstimatedJsonEncodedSizeOf}; use crate::{ codecs::{Decoder, DecodingConfig}, - config::{SourceConfig, SourceContext, SourceOutput}, + config::{Output, SourceConfig, SourceContext}, event::Event, internal_events::{ ExecChannelClosedError, ExecCommandExecuted, ExecEventsReceived, ExecFailedError, @@ -266,7 +266,7 @@ impl SourceConfig for ExecConfig { } } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(Some(self.log_namespace.unwrap_or(false))); let schema_definition = self @@ -304,10 +304,7 @@ impl SourceConfig for ExecConfig { None, ); - vec![SourceOutput::new_logs( - self.decoding.output_type(), - schema_definition, - )] + vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/file.rs b/src/sources/file.rs index b3644a6aec5d1..a70bb8a52f8cc 100644 --- a/src/sources/file.rs +++ b/src/sources/file.rs @@ -24,8 +24,7 @@ use vector_core::config::{LegacyKey, LogNamespace}; use super::util::{EncodingConfig, MultilineConfig}; use crate::{ config::{ - log_schema, DataType, SourceAcknowledgementsConfig, SourceConfig, SourceContext, - SourceOutput, + log_schema, DataType, Output, SourceAcknowledgementsConfig, SourceConfig, SourceContext, }, encoding_transcode::{Decoder, Encoder}, event::{BatchNotifier, BatchStatus, LogEvent}, @@ -432,7 +431,7 @@ impl SourceConfig for FileConfig { )) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let file_key = self.file_key.clone().path.map(LegacyKey::Overwrite); let host_key = self.host_key.clone().path.map(LegacyKey::Overwrite); @@ -467,7 +466,7 @@ impl SourceConfig for FileConfig { None, ); - vec![SourceOutput::new_logs(DataType::Log, schema_definition)] + vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { @@ -944,70 +943,62 @@ mod tests { #[test] fn output_schema_definition_vector_namespace() { - let definitions = FileConfig::default() - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = FileConfig::default().outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); assert_eq!( - definitions, - Some( - Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) - .with_meaning(OwnedTargetPath::event_root(), "message") - .with_metadata_field( - &owned_value_path!("vector", "source_type"), - Kind::bytes(), - None - ) - .with_metadata_field( - &owned_value_path!("vector", "ingest_timestamp"), - Kind::timestamp(), - None - ) - .with_metadata_field( - &owned_value_path!("file", "host"), - Kind::bytes().or_undefined(), - Some("host") - ) - .with_metadata_field( - &owned_value_path!("file", "offset"), - Kind::integer(), - None - ) - .with_metadata_field(&owned_value_path!("file", "path"), Kind::bytes(), None) - ) + definition, + Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) + .with_meaning(OwnedTargetPath::event_root(), "message") + .with_metadata_field( + &owned_value_path!("vector", "source_type"), + Kind::bytes(), + None + ) + .with_metadata_field( + &owned_value_path!("vector", "ingest_timestamp"), + Kind::timestamp(), + None + ) + .with_metadata_field( + &owned_value_path!("file", "host"), + Kind::bytes().or_undefined(), + Some("host") + ) + .with_metadata_field(&owned_value_path!("file", "offset"), Kind::integer(), None) + .with_metadata_field(&owned_value_path!("file", "path"), Kind::bytes(), None) ) } #[test] fn output_schema_definition_legacy_namespace() { - let definitions = FileConfig::default() - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = FileConfig::default().outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); assert_eq!( - definitions, - Some( - Definition::new_with_default_metadata( - Kind::object(Collection::empty()), - [LogNamespace::Legacy] - ) - .with_event_field( - &owned_value_path!("message"), - Kind::bytes(), - Some("message") - ) - .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) - .with_event_field(&owned_value_path!("timestamp"), Kind::timestamp(), None) - .with_event_field( - &owned_value_path!("host"), - Kind::bytes().or_undefined(), - Some("host") - ) - .with_event_field(&owned_value_path!("offset"), Kind::undefined(), None) - .with_event_field(&owned_value_path!("file"), Kind::bytes(), None) + definition, + Definition::new_with_default_metadata( + Kind::object(Collection::empty()), + [LogNamespace::Legacy] + ) + .with_event_field( + &owned_value_path!("message"), + Kind::bytes(), + Some("message") + ) + .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) + .with_event_field(&owned_value_path!("timestamp"), Kind::timestamp(), None) + .with_event_field( + &owned_value_path!("host"), + Kind::bytes().or_undefined(), + Some("host") ) + .with_event_field(&owned_value_path!("offset"), Kind::undefined(), None) + .with_event_field(&owned_value_path!("file"), Kind::bytes(), None) ) } diff --git a/src/sources/file_descriptors/file_descriptor.rs b/src/sources/file_descriptors/file_descriptor.rs index 008ac2b43773a..95b8d2be97a68 100644 --- a/src/sources/file_descriptors/file_descriptor.rs +++ b/src/sources/file_descriptors/file_descriptor.rs @@ -8,7 +8,7 @@ use vector_config::configurable_component; use vector_core::config::LogNamespace; use crate::{ - config::{GenerateConfig, Resource, SourceConfig, SourceContext, SourceOutput}, + config::{GenerateConfig, Output, Resource, SourceConfig, SourceContext}, serde::default_decoding, }; /// Configuration for the `file_descriptor` source. @@ -83,7 +83,7 @@ impl SourceConfig for FileDescriptorSourceConfig { self.source(pipe, cx.shutdown, cx.out, log_namespace) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); outputs(log_namespace, &self.host_key, &self.decoding, Self::NAME) diff --git a/src/sources/file_descriptors/mod.rs b/src/sources/file_descriptors/mod.rs index 68c8df41f60db..0eb514d756c9e 100644 --- a/src/sources/file_descriptors/mod.rs +++ b/src/sources/file_descriptors/mod.rs @@ -19,14 +19,14 @@ use vector_common::internal_event::{ }; use vector_config::NamedComponent; use vector_core::{ - config::{LegacyKey, LogNamespace}, + config::{LegacyKey, LogNamespace, Output}, event::Event, EstimatedJsonEncodedSizeOf, }; use crate::{ codecs::{Decoder, DecodingConfig}, - config::{log_schema, SourceOutput}, + config::log_schema, internal_events::{EventsReceived, FileDescriptorReadError, StreamClosedError}, shutdown::ShutdownSignal, SourceSender, @@ -210,7 +210,7 @@ fn outputs( host_key: &Option, decoding: &DeserializerConfig, source_name: &'static str, -) -> Vec { +) -> Vec { let legacy_host_key = Some(LegacyKey::InsertIfEmpty( host_key.clone().and_then(|k| k.path).unwrap_or_else(|| { parse_value_path(log_schema().host_key()).expect("log_schema.host_key to be valid path") @@ -228,8 +228,5 @@ fn outputs( ) .with_standard_vector_source_metadata(); - vec![SourceOutput::new_logs( - decoding.output_type(), - schema_definition, - )] + vec![Output::default(decoding.output_type()).with_schema_definition(schema_definition)] } diff --git a/src/sources/file_descriptors/stdin.rs b/src/sources/file_descriptors/stdin.rs index c3ed61d901e63..f63cc4a6ac857 100644 --- a/src/sources/file_descriptors/stdin.rs +++ b/src/sources/file_descriptors/stdin.rs @@ -6,7 +6,7 @@ use vector_config::configurable_component; use vector_core::config::LogNamespace; use crate::{ - config::{Resource, SourceConfig, SourceContext, SourceOutput}, + config::{Output, Resource, SourceConfig, SourceContext}, serde::default_decoding, }; @@ -90,7 +90,7 @@ impl SourceConfig for StdinConfig { ) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); outputs(log_namespace, &self.host_key, &self.decoding, Self::NAME) diff --git a/src/sources/fluent/mod.rs b/src/sources/fluent/mod.rs index eadf8abd556e3..f7662f076070c 100644 --- a/src/sources/fluent/mod.rs +++ b/src/sources/fluent/mod.rs @@ -22,8 +22,8 @@ use vector_core::schema::Definition; use super::util::net::{SocketListenAddr, TcpSource, TcpSourceAck, TcpSourceAcker}; use crate::{ config::{ - log_schema, DataType, GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, - SourceContext, SourceOutput, + log_schema, DataType, GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, + SourceConfig, SourceContext, }, event::{Event, LogEvent}, internal_events::{FluentMessageDecodeError, FluentMessageReceived}, @@ -114,11 +114,11 @@ impl SourceConfig for FluentConfig { ) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = self.schema_definition(log_namespace); - vec![SourceOutput::new_logs(DataType::Log, schema_definition)] + vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] } fn resources(&self) -> Vec { @@ -960,10 +960,10 @@ mod tests { log_namespace: Some(true), }; - let definitions = config - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -1000,7 +1000,7 @@ mod tests { None, ); - assert_eq!(definitions, Some(expected_definition)) + assert_eq!(definition, expected_definition) } #[test] @@ -1015,10 +1015,10 @@ mod tests { log_namespace: None, }; - let definitions = config - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -1035,7 +1035,7 @@ mod tests { .with_event_field(&owned_value_path!("host"), Kind::bytes(), Some("host")) .unknown_fields(Kind::bytes()); - assert_eq!(definitions, Some(expected_definition)) + assert_eq!(definition, expected_definition) } } diff --git a/src/sources/gcp_pubsub.rs b/src/sources/gcp_pubsub.rs index 3cd5836089fbe..423dfbfe0a972 100644 --- a/src/sources/gcp_pubsub.rs +++ b/src/sources/gcp_pubsub.rs @@ -30,7 +30,7 @@ use vector_core::config::{LegacyKey, LogNamespace}; use crate::{ codecs::{Decoder, DecodingConfig}, - config::{DataType, SourceAcknowledgementsConfig, SourceConfig, SourceContext, SourceOutput}, + config::{DataType, Output, SourceAcknowledgementsConfig, SourceConfig, SourceContext}, event::{BatchNotifier, BatchStatus, Event, MaybeAsLogMut, Value}, gcp::{GcpAuthConfig, GcpAuthenticator, Scope, PUBSUB_URL}, internal_events::{ @@ -326,7 +326,7 @@ impl SourceConfig for PubsubConfig { Ok(Box::pin(source)) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = self .decoding @@ -354,7 +354,7 @@ impl SourceConfig for PubsubConfig { None, ); - vec![SourceOutput::new_logs(DataType::Log, schema_definition)] + vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { @@ -756,10 +756,10 @@ mod tests { ..Default::default() }; - let definitions = config - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -790,17 +790,17 @@ mod tests { None, ); - assert_eq!(definitions, Some(expected_definition)); + assert_eq!(definition, expected_definition); } #[test] fn output_schema_definition_legacy_namespace() { let config = PubsubConfig::default(); - let definitions = config - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -824,7 +824,7 @@ mod tests { ) .with_event_field(&owned_value_path!("message_id"), Kind::bytes(), None); - assert_eq!(definitions, Some(expected_definition)); + assert_eq!(definition, expected_definition); } } diff --git a/src/sources/heroku_logs.rs b/src/sources/heroku_logs.rs index 787b3db9db9c0..be9671b803026 100644 --- a/src/sources/heroku_logs.rs +++ b/src/sources/heroku_logs.rs @@ -26,8 +26,8 @@ use vector_core::{ use crate::{ codecs::{Decoder, DecodingConfig}, config::{ - log_schema, GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, - SourceContext, SourceOutput, + log_schema, GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, SourceConfig, + SourceContext, }, event::{Event, LogEvent}, internal_events::{HerokuLogplexRequestReadError, HerokuLogplexRequestReceived}, @@ -182,14 +182,11 @@ impl SourceConfig for LogplexConfig { ) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { // There is a global and per-source `log_namespace` config. // The source config overrides the global setting and is merged here. let schema_def = self.schema_definition(global_log_namespace.merge(self.log_namespace)); - vec![SourceOutput::new_logs( - self.decoding.output_type(), - schema_def, - )] + vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_def)] } fn resources(&self) -> Vec { @@ -663,10 +660,10 @@ mod tests { ..Default::default() }; - let definitions = config - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -707,17 +704,17 @@ mod tests { None, ); - assert_eq!(definitions, Some(expected_definition)) + assert_eq!(definition, expected_definition) } #[test] fn output_schema_definition_legacy_namespace() { let config = LogplexConfig::default(); - let definitions = config - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -735,6 +732,6 @@ mod tests { .with_event_field(&owned_value_path!("proc_id"), Kind::bytes(), None) .unknown_fields(Kind::bytes()); - assert_eq!(definitions, Some(expected_definition)) + assert_eq!(definition, expected_definition) } } diff --git a/src/sources/host_metrics/mod.rs b/src/sources/host_metrics/mod.rs index d75ab4d0420f9..332a5b5a80e2b 100644 --- a/src/sources/host_metrics/mod.rs +++ b/src/sources/host_metrics/mod.rs @@ -20,7 +20,7 @@ use vector_core::config::LogNamespace; use vector_core::EstimatedJsonEncodedSizeOf; use crate::{ - config::{SourceConfig, SourceContext, SourceOutput}, + config::{DataType, Output, SourceConfig, SourceContext}, event::metric::{Metric, MetricKind, MetricTags, MetricValue}, internal_events::{EventsReceived, HostMetricsScrapeDetailError, StreamClosedError}, shutdown::ShutdownSignal, @@ -267,8 +267,8 @@ impl SourceConfig for HostMetricsConfig { Ok(Box::pin(config.run(cx.out, cx.shutdown))) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_metrics()] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(DataType::Metric)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/http_client/client.rs b/src/sources/http_client/client.rs index 61c7a6c572687..2eea222563a68 100644 --- a/src/sources/http_client/client.rs +++ b/src/sources/http_client/client.rs @@ -33,7 +33,7 @@ use codecs::{ }; use vector_config::configurable_component; use vector_core::{ - config::{log_schema, LogNamespace, SourceOutput}, + config::{log_schema, LogNamespace, Output}, event::Event, }; @@ -206,7 +206,7 @@ impl SourceConfig for HttpClientConfig { Ok(call(inputs, context, cx.out, self.method).boxed()) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { // There is a global and per-source `log_namespace` config. The source config overrides the global setting, // and is merged here. let log_namespace = global_log_namespace.merge(self.log_namespace); @@ -216,10 +216,7 @@ impl SourceConfig for HttpClientConfig { .schema_definition(log_namespace) .with_standard_vector_source_metadata(); - vec![SourceOutput::new_logs( - self.decoding.output_type(), - schema_definition, - )] + vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/http_server.rs b/src/sources/http_server.rs index eb0b7a33a41c6..0d416392e9876 100644 --- a/src/sources/http_server.rs +++ b/src/sources/http_server.rs @@ -23,8 +23,7 @@ use crate::{ codecs::{Decoder, DecodingConfig}, components::validation::*, config::{ - GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, SourceContext, - SourceOutput, + GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, SourceConfig, SourceContext, }, event::{Event, Value}, register_validatable_component, @@ -55,7 +54,7 @@ impl SourceConfig for HttpConfig { self.0.build(cx).await } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { self.0.outputs(global_log_namespace) } @@ -336,20 +335,20 @@ impl SourceConfig for SimpleHttpConfig { ) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { // There is a global and per-source `log_namespace` config. // The source config overrides the global setting and is merged here. let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = self.schema_definition(log_namespace); - vec![SourceOutput::new_logs( + vec![Output::default( self.decoding .as_ref() .map(|d| d.output_type()) .unwrap_or(DataType::Log), - schema_definition, - )] + ) + .with_schema_definition(schema_definition)] } fn resources(&self) -> Vec { @@ -1316,10 +1315,10 @@ mod tests { ..Default::default() }; - let definitions = config - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -1350,17 +1349,17 @@ mod tests { None, ); - assert_eq!(definitions, Some(expected_definition)) + assert_eq!(definition, expected_definition) } #[test] fn output_schema_definition_legacy_namespace() { let config = SimpleHttpConfig::default(); - let definitions = config - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -1376,7 +1375,7 @@ mod tests { .with_event_field(&owned_value_path!("path"), Kind::bytes(), None) .unknown_fields(Kind::bytes()); - assert_eq!(definitions, Some(expected_definition)) + assert_eq!(definition, expected_definition) } #[test] diff --git a/src/sources/internal_logs.rs b/src/sources/internal_logs.rs index bc69a34079328..524b16237d683 100644 --- a/src/sources/internal_logs.rs +++ b/src/sources/internal_logs.rs @@ -12,7 +12,7 @@ use vector_core::{ }; use crate::{ - config::{DataType, SourceConfig, SourceContext, SourceOutput}, + config::{DataType, Output, SourceConfig, SourceContext}, event::{EstimatedJsonEncodedSizeOf, Event}, internal_events::{InternalLogsBytesReceived, InternalLogsEventsReceived, StreamClosedError}, shutdown::ShutdownSignal, @@ -121,11 +121,11 @@ impl SourceConfig for InternalLogsConfig { ))) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let schema_definition = self.schema_definition(global_log_namespace.merge(self.log_namespace)); - vec![SourceOutput::new_logs(DataType::Log, schema_definition)] + vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { @@ -339,10 +339,10 @@ mod tests { fn output_schema_definition_vector_namespace() { let config = InternalLogsConfig::default(); - let definitions = config - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -368,7 +368,7 @@ mod tests { Some("host"), ); - assert_eq!(definitions, Some(expected_definition)) + assert_eq!(definition, expected_definition) } #[test] @@ -379,10 +379,10 @@ mod tests { config.pid_key = OptionalValuePath::from(owned_value_path!(pid_key)); - let definitions = config - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -402,6 +402,6 @@ mod tests { Some("host"), ); - assert_eq!(definitions, Some(expected_definition)) + assert_eq!(definition, expected_definition) } } diff --git a/src/sources/internal_metrics.rs b/src/sources/internal_metrics.rs index bddf04240673d..5b4822e7cdc02 100644 --- a/src/sources/internal_metrics.rs +++ b/src/sources/internal_metrics.rs @@ -10,7 +10,7 @@ use vector_core::config::LogNamespace; use vector_core::EstimatedJsonEncodedSizeOf; use crate::{ - config::{log_schema, SourceConfig, SourceContext, SourceOutput}, + config::{log_schema, DataType, Output, SourceConfig, SourceContext}, internal_events::{EventsReceived, InternalMetricsBytesReceived, StreamClosedError}, metrics::Controller, shutdown::ShutdownSignal, @@ -136,8 +136,8 @@ impl SourceConfig for InternalMetricsConfig { )) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_metrics()] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(DataType::Metric)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/journald.rs b/src/sources/journald.rs index 558bb19a62356..61288dbf33584 100644 --- a/src/sources/journald.rs +++ b/src/sources/journald.rs @@ -44,8 +44,7 @@ use vector_core::{ use crate::{ config::{ - log_schema, DataType, SourceAcknowledgementsConfig, SourceConfig, SourceContext, - SourceOutput, + log_schema, DataType, Output, SourceAcknowledgementsConfig, SourceConfig, SourceContext, }, event::{BatchNotifier, BatchStatus, BatchStatusReceiver, LogEvent}, internal_events::{ @@ -364,11 +363,11 @@ impl SourceConfig for JournaldConfig { )) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let schema_definition = self.schema_definition(global_log_namespace.merge(self.log_namespace)); - vec![SourceOutput::new_logs(DataType::Log, schema_definition)] + vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { @@ -1466,10 +1465,10 @@ mod tests { ..Default::default() }; - let definitions = config - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata(Kind::bytes().or_null(), [LogNamespace::Vector]) @@ -1499,17 +1498,17 @@ mod tests { Some("host"), ); - assert_eq!(definitions, Some(expected_definition)) + assert_eq!(definition, expected_definition) } #[test] fn output_schema_definition_legacy_namespace() { let config = JournaldConfig::default(); - let definitions = config - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -1524,7 +1523,7 @@ mod tests { ) .unknown_fields(Kind::bytes()); - assert_eq!(definitions, Some(expected_definition)) + assert_eq!(definition, expected_definition) } fn matches_schema(config: &JournaldConfig, namespace: LogNamespace) { @@ -1559,9 +1558,12 @@ mod tests { event.as_mut_log().insert("timestamp", chrono::Utc::now()); - let definitions = config.outputs(namespace).remove(0).schema_definition(true); + let definition = config.outputs(namespace)[0] + .clone() + .log_schema_definition + .unwrap(); - definitions.unwrap().assert_valid_for_event(&event); + definition.assert_valid_for_event(&event) } #[test] diff --git a/src/sources/kafka.rs b/src/sources/kafka.rs index fa3d551527d27..e8d4f1e4e56d1 100644 --- a/src/sources/kafka.rs +++ b/src/sources/kafka.rs @@ -35,8 +35,7 @@ use vector_core::{ use crate::{ codecs::{Decoder, DecodingConfig}, config::{ - log_schema, LogSchema, SourceAcknowledgementsConfig, SourceConfig, SourceContext, - SourceOutput, + log_schema, LogSchema, Output, SourceAcknowledgementsConfig, SourceConfig, SourceContext, }, event::{BatchNotifier, BatchStatus, Event, Value}, internal_events::{ @@ -305,7 +304,7 @@ impl SourceConfig for KafkaSourceConfig { ))) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let keys = self.keys(); @@ -356,10 +355,7 @@ impl SourceConfig for KafkaSourceConfig { None, ); - vec![SourceOutput::new_logs( - self.decoding.output_type(), - schema_definition, - )] + vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { @@ -793,85 +789,83 @@ mod test { #[test] fn test_output_schema_definition_vector_namespace() { - let definitions = make_config("topic", "group", LogNamespace::Vector) - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = make_config("topic", "group", LogNamespace::Vector) + .outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); assert_eq!( - definitions, - Some( - Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) - .with_meaning(OwnedTargetPath::event_root(), "message") - .with_metadata_field( - &owned_value_path!("kafka", "timestamp"), - Kind::timestamp(), - Some("timestamp") - ) - .with_metadata_field( - &owned_value_path!("kafka", "message_key"), - Kind::bytes(), - None - ) - .with_metadata_field(&owned_value_path!("kafka", "topic"), Kind::bytes(), None) - .with_metadata_field( - &owned_value_path!("kafka", "partition"), - Kind::bytes(), - None - ) - .with_metadata_field(&owned_value_path!("kafka", "offset"), Kind::bytes(), None) - .with_metadata_field( - &owned_value_path!("kafka", "headers"), - Kind::object(Collection::empty().with_unknown(Kind::bytes())), - None - ) - .with_metadata_field( - &owned_value_path!("vector", "ingest_timestamp"), - Kind::timestamp(), - None - ) - .with_metadata_field( - &owned_value_path!("vector", "source_type"), - Kind::bytes(), - None - ) - ) + definition, + Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) + .with_meaning(OwnedTargetPath::event_root(), "message") + .with_metadata_field( + &owned_value_path!("kafka", "timestamp"), + Kind::timestamp(), + Some("timestamp") + ) + .with_metadata_field( + &owned_value_path!("kafka", "message_key"), + Kind::bytes(), + None + ) + .with_metadata_field(&owned_value_path!("kafka", "topic"), Kind::bytes(), None) + .with_metadata_field( + &owned_value_path!("kafka", "partition"), + Kind::bytes(), + None + ) + .with_metadata_field(&owned_value_path!("kafka", "offset"), Kind::bytes(), None) + .with_metadata_field( + &owned_value_path!("kafka", "headers"), + Kind::object(Collection::empty().with_unknown(Kind::bytes())), + None + ) + .with_metadata_field( + &owned_value_path!("vector", "ingest_timestamp"), + Kind::timestamp(), + None + ) + .with_metadata_field( + &owned_value_path!("vector", "source_type"), + Kind::bytes(), + None + ) ) } #[test] fn test_output_schema_definition_legacy_namespace() { - let definitions = make_config("topic", "group", LogNamespace::Legacy) - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = make_config("topic", "group", LogNamespace::Legacy) + .outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); assert_eq!( - definitions, - Some( - Definition::new_with_default_metadata(Kind::json(), [LogNamespace::Legacy]) - .unknown_fields(Kind::undefined()) - .with_event_field( - &owned_value_path!("message"), - Kind::bytes(), - Some("message") - ) - .with_event_field( - &owned_value_path!("timestamp"), - Kind::timestamp(), - Some("timestamp") - ) - .with_event_field(&owned_value_path!("message_key"), Kind::bytes(), None) - .with_event_field(&owned_value_path!("topic"), Kind::bytes(), None) - .with_event_field(&owned_value_path!("partition"), Kind::bytes(), None) - .with_event_field(&owned_value_path!("offset"), Kind::bytes(), None) - .with_event_field( - &owned_value_path!("headers"), - Kind::object(Collection::empty().with_unknown(Kind::bytes())), - None - ) - .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) - ) + definition, + Definition::new_with_default_metadata(Kind::json(), [LogNamespace::Legacy]) + .unknown_fields(Kind::undefined()) + .with_event_field( + &owned_value_path!("message"), + Kind::bytes(), + Some("message") + ) + .with_event_field( + &owned_value_path!("timestamp"), + Kind::timestamp(), + Some("timestamp") + ) + .with_event_field(&owned_value_path!("message_key"), Kind::bytes(), None) + .with_event_field(&owned_value_path!("topic"), Kind::bytes(), None) + .with_event_field(&owned_value_path!("partition"), Kind::bytes(), None) + .with_event_field(&owned_value_path!("offset"), Kind::bytes(), None) + .with_event_field( + &owned_value_path!("headers"), + Kind::object(Collection::empty().with_unknown(Kind::bytes())), + None + ) + .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) ) } diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index 69f833c99d177..316e28f209b1f 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -42,8 +42,8 @@ use vector_core::{ use crate::{ config::{ - log_schema, ComponentKey, DataType, GenerateConfig, GlobalOptions, SourceConfig, - SourceContext, SourceOutput, + log_schema, ComponentKey, DataType, GenerateConfig, GlobalOptions, Output, SourceConfig, + SourceContext, }, event::Event, internal_events::{ @@ -295,7 +295,7 @@ impl SourceConfig for Config { )) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = BytesDeserializerConfig .schema_definition(log_namespace) @@ -491,7 +491,7 @@ impl SourceConfig for Config { ) .with_standard_vector_source_metadata(); - vec![SourceOutput::new_logs(DataType::Log, schema_definition)] + vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { @@ -1164,223 +1164,213 @@ mod tests { #[test] fn test_output_schema_definition_vector_namespace() { - let definitions = toml::from_str::("") + let definition = toml::from_str::("") .unwrap() - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); - - assert_eq!( - definitions, - Some( - Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "file"), - Kind::bytes(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "container_id"), - Kind::bytes().or_undefined(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "container_image"), - Kind::bytes().or_undefined(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "container_name"), - Kind::bytes().or_undefined(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "namespace_labels"), - Kind::object(Collection::empty().with_unknown(Kind::bytes())) - .or_undefined(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "node_labels"), - Kind::object(Collection::empty().with_unknown(Kind::bytes())) - .or_undefined(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_annotations"), - Kind::object(Collection::empty().with_unknown(Kind::bytes())) - .or_undefined(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_ip"), - Kind::bytes().or_undefined(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_ips"), - Kind::array(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_labels"), - Kind::object(Collection::empty().with_unknown(Kind::bytes())) - .or_undefined(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_name"), - Kind::bytes().or_undefined(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_namespace"), - Kind::bytes().or_undefined(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_node_name"), - Kind::bytes().or_undefined(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_owner"), - Kind::bytes().or_undefined(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_uid"), - Kind::bytes().or_undefined(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "stream"), - Kind::bytes(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "timestamp"), - Kind::timestamp(), - Some("timestamp") - ) - .with_metadata_field( - &owned_value_path!("vector", "source_type"), - Kind::bytes(), - None - ) - .with_metadata_field( - &owned_value_path!("vector", "ingest_timestamp"), - Kind::timestamp(), - None - ) - .with_meaning(OwnedTargetPath::event_root(), "message") - ) - ) - } - - #[test] - fn test_output_schema_definition_legacy_namespace() { - let definitions = toml::from_str::("") - .unwrap() - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + .outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); assert_eq!( - definitions, - Some( - Definition::new_with_default_metadata( - Kind::object(Collection::empty()), - [LogNamespace::Legacy] - ) - .with_event_field(&owned_value_path!("file"), Kind::bytes(), None) - .with_event_field( - &owned_value_path!("message"), + definition, + Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "file"), Kind::bytes(), - Some("message") + None ) - .with_event_field( - &owned_value_path!("kubernetes", "container_id"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "container_id"), Kind::bytes().or_undefined(), None ) - .with_event_field( - &owned_value_path!("kubernetes", "container_image"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "container_image"), Kind::bytes().or_undefined(), None ) - .with_event_field( - &owned_value_path!("kubernetes", "container_name"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "container_name"), Kind::bytes().or_undefined(), None ) - .with_event_field( - &owned_value_path!("kubernetes", "namespace_labels"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "namespace_labels"), Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), None ) - .with_event_field( - &owned_value_path!("kubernetes", "node_labels"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "node_labels"), Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), None ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_annotations"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_annotations"), Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), None ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_ip"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_ip"), Kind::bytes().or_undefined(), None ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_ips"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_ips"), Kind::array(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), None ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_labels"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_labels"), Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), None ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_name"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_name"), Kind::bytes().or_undefined(), None ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_namespace"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_namespace"), Kind::bytes().or_undefined(), None ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_node_name"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_node_name"), Kind::bytes().or_undefined(), None ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_owner"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_owner"), Kind::bytes().or_undefined(), None ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_uid"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_uid"), Kind::bytes().or_undefined(), None ) - .with_event_field(&owned_value_path!("stream"), Kind::bytes(), None) - .with_event_field( - &owned_value_path!("timestamp"), + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "stream"), + Kind::bytes(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "timestamp"), Kind::timestamp(), Some("timestamp") ) - .with_event_field( - &owned_value_path!("source_type"), + .with_metadata_field( + &owned_value_path!("vector", "source_type"), Kind::bytes(), None ) + .with_metadata_field( + &owned_value_path!("vector", "ingest_timestamp"), + Kind::timestamp(), + None + ) + .with_meaning(OwnedTargetPath::event_root(), "message") + ) + } + + #[test] + fn test_output_schema_definition_legacy_namespace() { + let definition = toml::from_str::("") + .unwrap() + .outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); + + assert_eq!( + definition, + Definition::new_with_default_metadata( + Kind::object(Collection::empty()), + [LogNamespace::Legacy] + ) + .with_event_field(&owned_value_path!("file"), Kind::bytes(), None) + .with_event_field( + &owned_value_path!("message"), + Kind::bytes(), + Some("message") + ) + .with_event_field( + &owned_value_path!("kubernetes", "container_id"), + Kind::bytes().or_undefined(), + None + ) + .with_event_field( + &owned_value_path!("kubernetes", "container_image"), + Kind::bytes().or_undefined(), + None + ) + .with_event_field( + &owned_value_path!("kubernetes", "container_name"), + Kind::bytes().or_undefined(), + None + ) + .with_event_field( + &owned_value_path!("kubernetes", "namespace_labels"), + Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), + None + ) + .with_event_field( + &owned_value_path!("kubernetes", "node_labels"), + Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), + None + ) + .with_event_field( + &owned_value_path!("kubernetes", "pod_annotations"), + Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), + None + ) + .with_event_field( + &owned_value_path!("kubernetes", "pod_ip"), + Kind::bytes().or_undefined(), + None + ) + .with_event_field( + &owned_value_path!("kubernetes", "pod_ips"), + Kind::array(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), + None + ) + .with_event_field( + &owned_value_path!("kubernetes", "pod_labels"), + Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), + None + ) + .with_event_field( + &owned_value_path!("kubernetes", "pod_name"), + Kind::bytes().or_undefined(), + None + ) + .with_event_field( + &owned_value_path!("kubernetes", "pod_namespace"), + Kind::bytes().or_undefined(), + None + ) + .with_event_field( + &owned_value_path!("kubernetes", "pod_node_name"), + Kind::bytes().or_undefined(), + None + ) + .with_event_field( + &owned_value_path!("kubernetes", "pod_owner"), + Kind::bytes().or_undefined(), + None + ) + .with_event_field( + &owned_value_path!("kubernetes", "pod_uid"), + Kind::bytes().or_undefined(), + None + ) + .with_event_field(&owned_value_path!("stream"), Kind::bytes(), None) + .with_event_field( + &owned_value_path!("timestamp"), + Kind::timestamp(), + Some("timestamp") ) + .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) ) } } diff --git a/src/sources/logstash.rs b/src/sources/logstash.rs index c8287a0d01a84..f1f090206956a 100644 --- a/src/sources/logstash.rs +++ b/src/sources/logstash.rs @@ -25,8 +25,8 @@ use vector_core::{ use super::util::net::{SocketListenAddr, TcpSource, TcpSourceAck, TcpSourceAcker}; use crate::{ config::{ - log_schema, DataType, GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, - SourceContext, SourceOutput, + log_schema, DataType, GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, + SourceConfig, SourceContext, }, event::{Event, LogEvent, Value}, serde::bool_or_struct, @@ -167,11 +167,10 @@ impl SourceConfig for LogstashConfig { ) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { // There is a global and per-source `log_namespace` config. // The source config overrides the global setting and is merged here. - vec![SourceOutput::new_logs( - DataType::Log, + vec![Output::default(DataType::Log).with_schema_definition( self.schema_definition(global_log_namespace.merge(self.log_namespace)), )] } @@ -791,10 +790,10 @@ mod test { ..Default::default() }; - let definitions = config - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -825,17 +824,17 @@ mod test { None, ); - assert_eq!(definitions, Some(expected_definition)) + assert_eq!(definition, expected_definition) } #[test] fn output_schema_definition_legacy_namespace() { let config = LogstashConfig::default(); - let definitions = config - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -850,7 +849,7 @@ mod test { .with_event_field(&owned_value_path!("timestamp"), Kind::timestamp(), None) .with_event_field(&owned_value_path!("host"), Kind::bytes(), Some("host")); - assert_eq!(definitions, Some(expected_definition)) + assert_eq!(definition, expected_definition) } } diff --git a/src/sources/mongodb_metrics/mod.rs b/src/sources/mongodb_metrics/mod.rs index b68356444d5dd..30f3f84ec4fb9 100644 --- a/src/sources/mongodb_metrics/mod.rs +++ b/src/sources/mongodb_metrics/mod.rs @@ -19,7 +19,7 @@ use vector_config::configurable_component; use vector_core::{metric_tags, ByteSizeOf, EstimatedJsonEncodedSizeOf}; use crate::{ - config::{SourceConfig, SourceContext, SourceOutput}, + config::{self, Output, SourceConfig, SourceContext}, event::metric::{Metric, MetricKind, MetricTags, MetricValue}, internal_events::{ CollectionCompleted, EndpointBytesReceived, MongoDbMetricsBsonParseError, @@ -156,8 +156,8 @@ impl SourceConfig for MongoDbMetricsConfig { })) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_metrics()] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(config::DataType::Metric)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/nats.rs b/src/sources/nats.rs index faef941a4c7f5..be96449831199 100644 --- a/src/sources/nats.rs +++ b/src/sources/nats.rs @@ -16,7 +16,7 @@ use vector_core::{ use crate::{ codecs::{Decoder, DecodingConfig}, - config::{GenerateConfig, SourceConfig, SourceContext, SourceOutput}, + config::{GenerateConfig, Output, SourceConfig, SourceContext}, event::Event, internal_events::StreamClosedError, nats::{from_tls_auth_config, NatsAuthConfig, NatsConfigError}, @@ -135,7 +135,7 @@ impl SourceConfig for NatsSourceConfig { ))) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let legacy_subject_key_field = self .subject_key_field @@ -154,10 +154,7 @@ impl SourceConfig for NatsSourceConfig { None, ); - vec![SourceOutput::new_logs( - self.decoding.output_type(), - schema_definition, - )] + vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { @@ -293,10 +290,10 @@ mod tests { ..Default::default() }; - let definitions = config - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -313,7 +310,7 @@ mod tests { ) .with_metadata_field(&owned_value_path!("nats", "subject"), Kind::bytes(), None); - assert_eq!(definitions, Some(expected_definition)); + assert_eq!(definition, expected_definition); } #[test] @@ -322,10 +319,10 @@ mod tests { subject_key_field: default_subject_key_field(), ..Default::default() }; - let definitions = config - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -340,7 +337,7 @@ mod tests { .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) .with_event_field(&owned_value_path!("subject"), Kind::bytes(), None); - assert_eq!(definitions, Some(expected_definition)); + assert_eq!(definition, expected_definition); } } diff --git a/src/sources/nginx_metrics/mod.rs b/src/sources/nginx_metrics/mod.rs index 0ea02fffd83c1..8d9cdb8b6e3de 100644 --- a/src/sources/nginx_metrics/mod.rs +++ b/src/sources/nginx_metrics/mod.rs @@ -16,7 +16,7 @@ use vector_config::configurable_component; use vector_core::{metric_tags, EstimatedJsonEncodedSizeOf}; use crate::{ - config::{SourceConfig, SourceContext, SourceOutput}, + config::{DataType, Output, SourceConfig, SourceContext}, event::metric::{Metric, MetricKind, MetricTags, MetricValue}, http::{Auth, HttpClient}, internal_events::{ @@ -144,8 +144,8 @@ impl SourceConfig for NginxMetricsConfig { })) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_metrics()] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(DataType::Metric)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/opentelemetry/mod.rs b/src/sources/opentelemetry/mod.rs index 275721872bf1d..dd2f2cc6add96 100644 --- a/src/sources/opentelemetry/mod.rs +++ b/src/sources/opentelemetry/mod.rs @@ -32,8 +32,8 @@ use self::{ }; use crate::{ config::{ - DataType, GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, - SourceContext, SourceOutput, + DataType, GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, SourceConfig, + SourceContext, }, serde::bool_or_struct, sources::{util::grpc::run_grpc_server, Source}, @@ -167,7 +167,7 @@ impl SourceConfig for OpentelemetryConfig { // TODO: appropriately handle "severity" meaning across both "severity_text" and "severity_number", // as both are optional and can be converted to/from. - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = Definition::new_with_default_metadata(Kind::any(), [log_namespace]) .with_source_metadata( @@ -255,7 +255,9 @@ impl SourceConfig for OpentelemetryConfig { } }; - vec![SourceOutput::new_logs(DataType::Log, schema_definition).with_port(LOGS)] + vec![Output::default(DataType::Log) + .with_port(LOGS) + .with_schema_definition(schema_definition)] } fn resources(&self) -> Vec { diff --git a/src/sources/opentelemetry/tests.rs b/src/sources/opentelemetry/tests.rs index c2eef23095339..23419aaed5beb 100644 --- a/src/sources/opentelemetry/tests.rs +++ b/src/sources/opentelemetry/tests.rs @@ -48,10 +48,13 @@ async fn receive_grpc_logs_vector_namespace() { acknowledgements: Default::default(), log_namespace: Some(true), }; - let schema_definitions = source + let schema_definition = source .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + .first() + .unwrap() + .log_schema_definition + .clone() + .unwrap(); let (sender, logs_output, _) = new_source(EventStatus::Delivered); let server = source @@ -108,7 +111,7 @@ async fn receive_grpc_logs_vector_namespace() { // we just send one, so only one output assert_eq!(output.len(), 1); let event = output.pop().unwrap(); - schema_definitions.unwrap().assert_valid_for_event(&event); + schema_definition.assert_valid_for_event(&event); assert_eq!(event.as_log().get(".").unwrap(), &vrl::value!("log body")); @@ -185,10 +188,13 @@ async fn receive_grpc_logs_legacy_namespace() { acknowledgements: Default::default(), log_namespace: Default::default(), }; - let schema_definitions = source + let schema_definition = source .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + .first() + .unwrap() + .log_schema_definition + .clone() + .unwrap(); let (sender, logs_output, _) = new_source(EventStatus::Delivered); let server = source @@ -245,9 +251,7 @@ async fn receive_grpc_logs_legacy_namespace() { // we just send one, so only one output assert_eq!(output.len(), 1); let actual_event = output.pop().unwrap(); - schema_definitions - .unwrap() - .assert_valid_for_event(&actual_event); + schema_definition.assert_valid_for_event(&actual_event); let expect_vec = vec_into_btmap(vec![ ( "attributes", diff --git a/src/sources/postgresql_metrics.rs b/src/sources/postgresql_metrics.rs index b7415c4217831..f7de5ff1fecbb 100644 --- a/src/sources/postgresql_metrics.rs +++ b/src/sources/postgresql_metrics.rs @@ -31,7 +31,7 @@ use vector_core::config::LogNamespace; use vector_core::{metric_tags, ByteSizeOf, EstimatedJsonEncodedSizeOf}; use crate::{ - config::{SourceConfig, SourceContext, SourceOutput}, + config::{DataType, Output, SourceConfig, SourceContext}, event::metric::{Metric, MetricKind, MetricTags, MetricValue}, internal_events::{ CollectionCompleted, EndpointBytesReceived, EventsReceived, PostgresqlMetricsCollectError, @@ -233,8 +233,8 @@ impl SourceConfig for PostgresqlMetricsConfig { })) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_metrics()] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(DataType::Metric)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/prometheus/remote_write.rs b/src/sources/prometheus/remote_write.rs index 3b27770ba995b..fce4e781848b5 100644 --- a/src/sources/prometheus/remote_write.rs +++ b/src/sources/prometheus/remote_write.rs @@ -10,7 +10,7 @@ use warp::http::{HeaderMap, StatusCode}; use super::parser; use crate::{ config::{ - GenerateConfig, SourceAcknowledgementsConfig, SourceConfig, SourceContext, SourceOutput, + self, GenerateConfig, Output, SourceAcknowledgementsConfig, SourceConfig, SourceContext, }, event::Event, internal_events::PrometheusRemoteWriteParseError, @@ -88,8 +88,8 @@ impl SourceConfig for PrometheusRemoteWriteConfig { ) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_metrics()] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(config::DataType::Metric)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 7deb49eb7085b..67a4cfd042055 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -12,7 +12,7 @@ use vector_core::{config::LogNamespace, event::Event}; use super::parser; use crate::sources::util::http::HttpMethod; use crate::{ - config::{GenerateConfig, SourceConfig, SourceContext, SourceOutput}, + config::{self, GenerateConfig, Output, SourceConfig, SourceContext}, http::Auth, internal_events::PrometheusParseError, sources::{ @@ -156,8 +156,8 @@ impl SourceConfig for PrometheusScrapeConfig { Ok(call(inputs, builder, cx.out, HttpMethod::Get).boxed()) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_metrics()] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(config::DataType::Metric)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/redis/mod.rs b/src/sources/redis/mod.rs index e2134e78ba9ee..e45b042927f61 100644 --- a/src/sources/redis/mod.rs +++ b/src/sources/redis/mod.rs @@ -20,7 +20,7 @@ use vector_core::{ use crate::{ codecs::{Decoder, DecodingConfig}, - config::{log_schema, GenerateConfig, SourceConfig, SourceContext, SourceOutput}, + config::{log_schema, GenerateConfig, Output, SourceConfig, SourceContext}, event::Event, internal_events::{EventsReceived, StreamClosedError}, serde::{default_decoding, default_framing_message_based}, @@ -195,7 +195,7 @@ impl SourceConfig for RedisSourceConfig { } } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let redis_key_path = self @@ -216,10 +216,7 @@ impl SourceConfig for RedisSourceConfig { ) .with_standard_vector_source_metadata(); - vec![SourceOutput::new_logs( - self.decoding.output_type(), - schema_definition, - )] + vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/socket/mod.rs b/src/sources/socket/mod.rs index c21a84b30da7a..e66f76778e805 100644 --- a/src/sources/socket/mod.rs +++ b/src/sources/socket/mod.rs @@ -13,7 +13,7 @@ use vector_core::config::{log_schema, LegacyKey, LogNamespace}; use crate::serde::default_framing_message_based; use crate::{ codecs::DecodingConfig, - config::{GenerateConfig, Resource, SourceConfig, SourceContext, SourceOutput}, + config::{GenerateConfig, Output, Resource, SourceConfig, SourceContext}, sources::util::net::TcpSource, tls::MaybeTlsSettings, }; @@ -217,7 +217,7 @@ impl SourceConfig for SocketConfig { } } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(Some(self.log_namespace())); let schema_definition = self @@ -309,10 +309,8 @@ impl SourceConfig for SocketConfig { } }; - vec![SourceOutput::new_logs( - self.decoding().output_type(), - schema_definition, - )] + vec![Output::default(self.decoding().output_type()) + .with_schema_definition(schema_definition)] } fn resources(&self) -> Vec { diff --git a/src/sources/splunk_hec/mod.rs b/src/sources/splunk_hec/mod.rs index f8d7d0e208429..b239921ce5b9e 100644 --- a/src/sources/splunk_hec/mod.rs +++ b/src/sources/splunk_hec/mod.rs @@ -34,7 +34,7 @@ use self::{ splunk_response::{HecResponse, HecResponseMetadata, HecStatusCode}, }; use crate::{ - config::{log_schema, DataType, Resource, SourceConfig, SourceContext, SourceOutput}, + config::{log_schema, DataType, Output, Resource, SourceConfig, SourceContext}, event::{Event, LogEvent, Value}, internal_events::{ EventsReceived, HttpBytesReceived, SplunkHecRequestBodyInvalidError, SplunkHecRequestError, @@ -175,7 +175,7 @@ impl SourceConfig for SplunkConfig { })) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = match log_namespace { @@ -237,7 +237,7 @@ impl SourceConfig for SplunkConfig { None, ); - vec![SourceOutput::new_logs(DataType::Log, schema_definition)] + vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] } fn resources(&self) -> Vec { @@ -2443,10 +2443,10 @@ mod tests { ..Default::default() }; - let definition = config - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()).or_bytes(), @@ -2488,16 +2488,16 @@ mod tests { None, ); - assert_eq!(definition, Some(expected_definition)); + assert_eq!(definition, expected_definition); } #[test] fn output_schema_definition_legacy_namespace() { let config = SplunkConfig::default(); - let definitions = config - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -2523,6 +2523,6 @@ mod tests { .with_event_field(&owned_value_path!("splunk_sourcetype"), Kind::bytes(), None) .with_event_field(&owned_value_path!("timestamp"), Kind::timestamp(), None); - assert_eq!(definitions, Some(expected_definition)); + assert_eq!(definition, expected_definition); } } diff --git a/src/sources/statsd/mod.rs b/src/sources/statsd/mod.rs index 8fcb93e176e4b..a0332adcfeebe 100644 --- a/src/sources/statsd/mod.rs +++ b/src/sources/statsd/mod.rs @@ -21,7 +21,7 @@ use self::parser::ParseError; use super::util::net::{try_bind_udp_socket, SocketListenAddr, TcpNullAcker, TcpSource}; use crate::{ codecs::Decoder, - config::{GenerateConfig, Resource, SourceConfig, SourceContext, SourceOutput}, + config::{self, GenerateConfig, Output, Resource, SourceConfig, SourceContext}, event::Event, internal_events::{ EventsReceived, SocketBindError, SocketBytesReceived, SocketMode, SocketReceiveError, @@ -175,8 +175,8 @@ impl SourceConfig for StatsdConfig { } } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_metrics()] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(config::DataType::Metric)] } fn resources(&self) -> Vec { diff --git a/src/sources/syslog.rs b/src/sources/syslog.rs index 11d0d14c5e38d..7d70d94df8937 100644 --- a/src/sources/syslog.rs +++ b/src/sources/syslog.rs @@ -23,9 +23,7 @@ use vector_core::config::{LegacyKey, LogNamespace}; use crate::sources::util::build_unix_stream_source; use crate::{ codecs::Decoder, - config::{ - log_schema, DataType, GenerateConfig, Resource, SourceConfig, SourceContext, SourceOutput, - }, + config::{log_schema, DataType, GenerateConfig, Output, Resource, SourceConfig, SourceContext}, event::Event, internal_events::StreamClosedError, internal_events::{SocketBindError, SocketMode, SocketReceiveError}, @@ -240,13 +238,13 @@ impl SourceConfig for SyslogConfig { } } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = SyslogDeserializerConfig::from_source(SyslogConfig::NAME) .schema_definition(log_namespace) .with_standard_vector_source_metadata(); - vec![SourceOutput::new_logs(DataType::Log, schema_definition)] + vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] } fn resources(&self) -> Vec { @@ -498,10 +496,10 @@ mod test { ..Default::default() }; - let definitions = config - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -574,17 +572,17 @@ mod test { None, ); - assert_eq!(definitions, Some(expected_definition)); + assert_eq!(definition, expected_definition); } #[test] fn output_schema_definition_legacy_namespace() { let config = SyslogConfig::default(); - let definitions = config - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -643,7 +641,7 @@ mod test { .unknown_fields(Kind::object(Collection::from_unknown(Kind::bytes()))) .with_standard_vector_source_metadata(); - assert_eq!(definitions, Some(expected_definition)); + assert_eq!(definition, expected_definition); } #[test] diff --git a/src/sources/vector/mod.rs b/src/sources/vector/mod.rs index 7f148fff81c47..e2ab2807a6263 100644 --- a/src/sources/vector/mod.rs +++ b/src/sources/vector/mod.rs @@ -15,8 +15,8 @@ use vector_core::{ use crate::{ config::{ - DataType, GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, - SourceContext, SourceOutput, + DataType, GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, SourceConfig, + SourceContext, }, internal_events::{EventsReceived, StreamClosedError}, proto::vector as proto, @@ -191,14 +191,14 @@ impl SourceConfig for VectorConfig { Ok(Box::pin(source)) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = NativeDeserializerConfig .schema_definition(log_namespace) .with_standard_vector_source_metadata(); - vec![SourceOutput::new_logs(DataType::all(), schema_definition)] + vec![Output::default(DataType::all()).with_schema_definition(schema_definition)] } fn resources(&self) -> Vec { @@ -229,10 +229,10 @@ mod test { fn output_schema_definition_vector_namespace() { let config = VectorConfig::default(); - let definitions = config - .outputs(LogNamespace::Vector) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Vector)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata(Kind::any(), [LogNamespace::Vector]) @@ -247,17 +247,17 @@ mod test { None, ); - assert_eq!(definitions, Some(expected_definition)) + assert_eq!(definition, expected_definition) } #[test] fn output_schema_definition_legacy_namespace() { let config = VectorConfig::default(); - let definitions = config - .outputs(LogNamespace::Legacy) - .remove(0) - .schema_definition(true); + let definition = config.outputs(LogNamespace::Legacy)[0] + .clone() + .log_schema_definition + .unwrap(); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -266,7 +266,7 @@ mod test { .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) .with_event_field(&owned_value_path!("timestamp"), Kind::timestamp(), None); - assert_eq!(definitions, Some(expected_definition)) + assert_eq!(definition, expected_definition) } } diff --git a/src/test_util/mock/sources/backpressure.rs b/src/test_util/mock/sources/backpressure.rs index 146a3b6d2a828..59d3b898bfaca 100644 --- a/src/test_util/mock/sources/backpressure.rs +++ b/src/test_util/mock/sources/backpressure.rs @@ -9,10 +9,9 @@ use vector_config::configurable_component; use vector_core::{ config::LogNamespace, event::{Event, LogEvent}, - schema::Definition, }; use vector_core::{ - config::{DataType, SourceOutput}, + config::{DataType, Output}, source::Source, }; @@ -63,11 +62,8 @@ impl SourceConfig for BackpressureSourceConfig { .boxed()) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_logs( - DataType::all(), - Definition::default_legacy_namespace(), - )] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(DataType::all())] } fn can_acknowledge(&self) -> bool { diff --git a/src/test_util/mock/sources/basic.rs b/src/test_util/mock/sources/basic.rs index 7ffeb6d3d0da2..e9d7e15ff611f 100644 --- a/src/test_util/mock/sources/basic.rs +++ b/src/test_util/mock/sources/basic.rs @@ -6,9 +6,9 @@ use std::sync::{ use async_trait::async_trait; use vector_buffers::topology::channel::{limited, LimitedReceiver}; use vector_config::configurable_component; -use vector_core::{config::LogNamespace, schema::Definition}; +use vector_core::config::LogNamespace; use vector_core::{ - config::{DataType, SourceOutput}, + config::{DataType, Output}, event::{EventArray, EventContainer}, source::Source, }; @@ -132,11 +132,8 @@ impl SourceConfig for BasicSourceConfig { })) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_logs( - self.data_type.unwrap(), - Definition::default_legacy_namespace(), - )] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(self.data_type.unwrap())] } fn can_acknowledge(&self) -> bool { diff --git a/src/test_util/mock/sources/error.rs b/src/test_util/mock/sources/error.rs index 360f15632c1e4..4a84e6c12ad7b 100644 --- a/src/test_util/mock/sources/error.rs +++ b/src/test_util/mock/sources/error.rs @@ -2,9 +2,8 @@ use async_trait::async_trait; use futures_util::{future::err, FutureExt}; use vector_config::configurable_component; use vector_core::config::LogNamespace; -use vector_core::schema::Definition; use vector_core::{ - config::{DataType, SourceOutput}, + config::{DataType, Output}, source::Source, }; @@ -27,11 +26,8 @@ impl SourceConfig for ErrorSourceConfig { Ok(err(()).boxed()) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_logs( - DataType::Log, - Definition::default_legacy_namespace(), - )] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(DataType::Log)] } fn can_acknowledge(&self) -> bool { diff --git a/src/test_util/mock/sources/panic.rs b/src/test_util/mock/sources/panic.rs index 65f895f9eaf6d..c65e0ab099b19 100644 --- a/src/test_util/mock/sources/panic.rs +++ b/src/test_util/mock/sources/panic.rs @@ -1,9 +1,8 @@ use async_trait::async_trait; use vector_config::configurable_component; use vector_core::config::LogNamespace; -use vector_core::schema::Definition; use vector_core::{ - config::{DataType, SourceOutput}, + config::{DataType, Output}, source::Source, }; @@ -26,11 +25,8 @@ impl SourceConfig for PanicSourceConfig { Ok(Box::pin(async { panic!() })) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_logs( - DataType::Log, - Definition::default_legacy_namespace(), - )] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(DataType::Log)] } fn can_acknowledge(&self) -> bool { diff --git a/src/test_util/mock/sources/tripwire.rs b/src/test_util/mock/sources/tripwire.rs index 5a8d91a143a62..f24e2d18899c3 100644 --- a/src/test_util/mock/sources/tripwire.rs +++ b/src/test_util/mock/sources/tripwire.rs @@ -5,9 +5,8 @@ use futures_util::{future, FutureExt}; use stream_cancel::{Trigger, Tripwire}; use vector_config::configurable_component; use vector_core::config::LogNamespace; -use vector_core::schema::Definition; use vector_core::{ - config::{DataType, SourceOutput}, + config::{DataType, Output}, source::Source, }; @@ -66,11 +65,8 @@ impl SourceConfig for TripwireSourceConfig { )) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![SourceOutput::new_logs( - DataType::Log, - Definition::default_legacy_namespace(), - )] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(DataType::Log)] } fn can_acknowledge(&self) -> bool { diff --git a/src/test_util/mock/transforms/basic.rs b/src/test_util/mock/transforms/basic.rs index ce8673c408994..e2643bec025b2 100644 --- a/src/test_util/mock/transforms/basic.rs +++ b/src/test_util/mock/transforms/basic.rs @@ -5,7 +5,7 @@ use value::Value; use vector_config::configurable_component; use vector_core::config::LogNamespace; use vector_core::{ - config::{DataType, Input, TransformOutput}, + config::{DataType, Input, Output}, event::{ metric::{MetricData, Sample}, Event, MetricValue, @@ -14,7 +14,7 @@ use vector_core::{ transform::{FunctionTransform, OutputBuffer, Transform}, }; -use crate::config::{OutputId, TransformConfig, TransformContext}; +use crate::config::{TransformConfig, TransformContext}; /// Configuration for the `test_basic` transform. #[configurable_component(transform("test_basic", "Test (basic)"))] @@ -49,18 +49,8 @@ impl TransformConfig for BasicTransformConfig { Input::all() } - fn outputs( - &self, - definitions: &[(OutputId, schema::Definition)], - _: LogNamespace, - ) -> Vec { - vec![TransformOutput::new( - DataType::all(), - definitions - .iter() - .map(|(_output, definition)| definition.clone()) - .collect(), - )] + fn outputs(&self, _: &schema::Definition, _: LogNamespace) -> Vec { + vec![Output::default(DataType::all())] } } diff --git a/src/test_util/mock/transforms/noop.rs b/src/test_util/mock/transforms/noop.rs index b6712e4eec21f..f94bf7a1bceee 100644 --- a/src/test_util/mock/transforms/noop.rs +++ b/src/test_util/mock/transforms/noop.rs @@ -5,13 +5,13 @@ use futures_util::Stream; use vector_config::configurable_component; use vector_core::config::LogNamespace; use vector_core::{ - config::{DataType, Input, TransformOutput}, + config::{DataType, Input, Output}, event::{Event, EventContainer}, schema::Definition, transform::{FunctionTransform, OutputBuffer, TaskTransform, Transform}, }; -use crate::config::{GenerateConfig, OutputId, TransformConfig, TransformContext}; +use crate::config::{GenerateConfig, TransformConfig, TransformContext}; use super::TransformType; @@ -39,18 +39,8 @@ impl TransformConfig for NoopTransformConfig { Input::all() } - fn outputs( - &self, - definitions: &[(OutputId, Definition)], - _: LogNamespace, - ) -> Vec { - vec![TransformOutput::new( - DataType::all(), - definitions - .iter() - .map(|(_output, definition)| definition.clone()) - .collect(), - )] + fn outputs(&self, _: &Definition, _: LogNamespace) -> Vec { + vec![Output::default(DataType::all())] } async fn build(&self, _: &TransformContext) -> crate::Result { diff --git a/src/topology/builder.rs b/src/topology/builder.rs index 75957944d53fa..4c2c31531a9ca 100644 --- a/src/topology/builder.rs +++ b/src/topology/builder.rs @@ -12,7 +12,7 @@ use once_cell::sync::Lazy; use stream_cancel::{StreamExt as StreamCancelExt, Trigger, Tripwire}; use tokio::{ select, - sync::{mpsc::UnboundedSender, oneshot}, + sync::oneshot, time::{timeout, Duration}, }; use tracing::Instrument; @@ -41,8 +41,8 @@ use super::{ }; use crate::{ config::{ - ComponentKey, DataType, EnrichmentTableConfig, Input, Inputs, OutputId, ProxyConfig, - SinkConfig, SinkContext, SourceContext, TransformContext, TransformOuter, TransformOutput, + ComponentKey, DataType, EnrichmentTableConfig, Input, Inputs, Output, OutputId, + ProxyConfig, SinkConfig, SinkContext, SourceContext, TransformContext, TransformOuter, }, event::{EventArray, EventContainer}, internal_events::EventsReceived, @@ -71,582 +71,528 @@ static TRANSFORM_CONCURRENCY_LIMIT: Lazy = Lazy::new(|| { .unwrap_or_else(crate::num_threads) }); -/// Builds only the new pieces, and doesn't check their topology. -pub async fn build_pieces( - config: &super::Config, - diff: &ConfigDiff, - buffers: HashMap, -) -> Result> { - Builder::new(config, diff, buffers).build().await -} - -struct Builder<'a> { +pub(self) async fn load_enrichment_tables<'a>( config: &'a super::Config, diff: &'a ConfigDiff, - shutdown_coordinator: SourceShutdownCoordinator, - errors: Vec, - outputs: HashMap>, - tasks: HashMap, - buffers: HashMap, - inputs: HashMap, Inputs)>, - healthchecks: HashMap, - detach_triggers: HashMap, -} - -impl<'a> Builder<'a> { - fn new( - config: &'a super::Config, - diff: &'a ConfigDiff, - buffers: HashMap, - ) -> Self { - Self { - config, - diff, - buffers, - shutdown_coordinator: SourceShutdownCoordinator::default(), - errors: vec![], - outputs: HashMap::new(), - tasks: HashMap::new(), - inputs: HashMap::new(), - healthchecks: HashMap::new(), - detach_triggers: HashMap::new(), - } - } - - /// Builds the new pieces of the topology found in `self.diff`. - async fn build(mut self) -> Result> { - let enrichment_tables = self.load_enrichment_tables().await; - let source_tasks = self.build_sources().await; - self.build_transforms(enrichment_tables).await; - self.build_sinks().await; - - // We should have all the data for the enrichment tables loaded now, so switch them over to - // readonly. - enrichment_tables.finish_load(); - - if self.errors.is_empty() { - Ok(Pieces { - inputs: self.inputs, - outputs: Self::finalize_outputs(self.outputs), - tasks: self.tasks, - source_tasks, - healthchecks: self.healthchecks, - shutdown_coordinator: self.shutdown_coordinator, - detach_triggers: self.detach_triggers, - }) - } else { - Err(self.errors) - } - } - - fn finalize_outputs( - outputs: HashMap>, - ) -> HashMap, UnboundedSender>> - { - let mut finalized_outputs = HashMap::new(); - for (id, output) in outputs { - let entry = finalized_outputs - .entry(id.component) - .or_insert_with(HashMap::new); - entry.insert(id.port, output); - } +) -> (&'static enrichment::TableRegistry, Vec) { + let mut enrichment_tables = HashMap::new(); + + let mut errors = vec![]; + + // Build enrichment tables + 'tables: for (name, table) in config.enrichment_tables.iter() { + let table_name = name.to_string(); + if ENRICHMENT_TABLES.needs_reload(&table_name) { + let indexes = if !diff.enrichment_tables.is_added(name) { + // If this is an existing enrichment table, we need to store the indexes to reapply + // them again post load. + Some(ENRICHMENT_TABLES.index_fields(&table_name)) + } else { + None + }; - finalized_outputs - } + let mut table = match table.inner.build(&config.global).await { + Ok(table) => table, + Err(error) => { + errors.push(format!("Enrichment Table \"{}\": {}", name, error)); + continue; + } + }; - /// Loads, or reloads the enrichment tables. - /// The tables are stored in the `ENRICHMENT_TABLES` global variable. - async fn load_enrichment_tables(&mut self) -> &'static enrichment::TableRegistry { - let mut enrichment_tables = HashMap::new(); - - // Build enrichment tables - 'tables: for (name, table) in self.config.enrichment_tables.iter() { - let table_name = name.to_string(); - if ENRICHMENT_TABLES.needs_reload(&table_name) { - let indexes = if !self.diff.enrichment_tables.is_added(name) { - // If this is an existing enrichment table, we need to store the indexes to reapply - // them again post load. - Some(ENRICHMENT_TABLES.index_fields(&table_name)) - } else { - None - }; - - let mut table = match table.inner.build(&self.config.global).await { - Ok(table) => table, - Err(error) => { - self.errors - .push(format!("Enrichment Table \"{}\": {}", name, error)); - continue; - } - }; - - if let Some(indexes) = indexes { - for (case, index) in indexes { - match table - .add_index(case, &index.iter().map(|s| s.as_ref()).collect::>()) - { - Ok(_) => (), - Err(error) => { - // If there is an error adding an index we do not want to use the reloaded - // data, the previously loaded data will still need to be used. - // Just report the error and continue. - error!(message = "Unable to add index to reloaded enrichment table.", + if let Some(indexes) = indexes { + for (case, index) in indexes { + match table + .add_index(case, &index.iter().map(|s| s.as_ref()).collect::>()) + { + Ok(_) => (), + Err(error) => { + // If there is an error adding an index we do not want to use the reloaded + // data, the previously loaded data will still need to be used. + // Just report the error and continue. + error!(message = "Unable to add index to reloaded enrichment table.", table = ?name.to_string(), %error); - continue 'tables; - } + continue 'tables; } } } - - enrichment_tables.insert(table_name, table); } - } - - ENRICHMENT_TABLES.load(enrichment_tables); - &ENRICHMENT_TABLES + enrichment_tables.insert(table_name, table); + } } - async fn build_sources(&mut self) -> HashMap { - let mut source_tasks = HashMap::new(); - - for (key, source) in self - .config - .sources() - .filter(|(key, _)| self.diff.sources.contains_new(key)) - { - debug!(component = %key, "Building new source."); - - let typetag = source.inner.get_component_name(); - let source_outputs = source.inner.outputs(self.config.schema.log_namespace()); - - let span = error_span!( - "source", - component_kind = "source", - component_id = %key.id(), - component_type = %source.inner.get_component_name(), - // maintained for compatibility - component_name = %key.id(), - ); - let _entered_span = span.enter(); - - let task_name = format!( - ">> {} ({}, pump) >>", - source.inner.get_component_name(), - key.id() - ); + ENRICHMENT_TABLES.load(enrichment_tables); - let mut builder = SourceSender::builder().with_buffer(*SOURCE_SENDER_BUFFER_SIZE); - let mut pumps = Vec::new(); - let mut controls = HashMap::new(); - let mut schema_definitions = HashMap::with_capacity(source_outputs.len()); + (&ENRICHMENT_TABLES, errors) +} - for output in source_outputs.into_iter() { - let mut rx = builder.add_source_output(output.clone()); +pub struct Pieces { + pub(super) inputs: HashMap, Inputs)>, + pub(crate) outputs: HashMap, fanout::ControlChannel>>, + pub(super) tasks: HashMap, + pub(crate) source_tasks: HashMap, + pub(super) healthchecks: HashMap, + pub(crate) shutdown_coordinator: SourceShutdownCoordinator, + pub(crate) detach_triggers: HashMap, +} - let (mut fanout, control) = Fanout::new(); - let pump = async move { - debug!("Source pump starting."); +/// Builds only the new pieces, and doesn't check their topology. +pub async fn build_pieces( + config: &super::Config, + diff: &ConfigDiff, + mut buffers: HashMap, +) -> Result> { + let mut inputs = HashMap::new(); + let mut outputs = HashMap::new(); + let mut tasks = HashMap::new(); + let mut source_tasks = HashMap::new(); + let mut healthchecks = HashMap::new(); + let mut shutdown_coordinator = SourceShutdownCoordinator::default(); + let mut detach_triggers = HashMap::new(); - while let Some(array) = rx.next().await { - fanout.send(array).await.map_err(|e| { - debug!("Source pump finished with an error."); - TaskError::wrapped(e) - })?; - } + let mut errors = vec![]; - debug!("Source pump finished normally."); - Ok(TaskOutput::Source) - }; - - pumps.push(pump.instrument(span.clone())); - controls.insert( - OutputId { - component: key.clone(), - port: output.port.clone(), - }, - control, - ); - - let port = output.port.clone(); - if let Some(definition) = output.schema_definition(self.config.schema.enabled) { - schema_definitions.insert(port, definition); - } - } + let (enrichment_tables, enrichment_errors) = load_enrichment_tables(config, diff).await; + errors.extend(enrichment_errors); - let (pump_error_tx, mut pump_error_rx) = oneshot::channel(); + // Build sources + for (key, source) in config + .sources() + .filter(|(key, _)| diff.sources.contains_new(key)) + { + debug!(component = %key, "Building new source."); + + let typetag = source.inner.get_component_name(); + let source_outputs = source.inner.outputs(config.schema.log_namespace()); + + let span = error_span!( + "source", + component_kind = "source", + component_id = %key.id(), + component_type = %source.inner.get_component_name(), + // maintained for compatibility + component_name = %key.id(), + ); + let _entered_span = span.enter(); + + let task_name = format!( + ">> {} ({}, pump) >>", + source.inner.get_component_name(), + key.id() + ); + + let mut builder = SourceSender::builder().with_buffer(*SOURCE_SENDER_BUFFER_SIZE); + let mut pumps = Vec::new(); + let mut controls = HashMap::new(); + let mut schema_definitions = HashMap::with_capacity(source_outputs.len()); + + for output in source_outputs { + let mut rx = builder.add_output(output.clone()); + + let (mut fanout, control) = Fanout::new(); let pump = async move { - debug!("Source pump supervisor starting."); - - // Spawn all of the per-output pumps and then await their completion. - // - // If any of the pumps complete with an error, or panic/are cancelled, we return - // immediately. - let mut handles = FuturesUnordered::new(); - for pump in pumps { - handles.push(spawn_named(pump, task_name.as_ref())); - } + debug!("Source pump starting."); - let mut had_pump_error = false; - while let Some(output) = handles.try_next().await? { - if let Err(e) = output { - // Immediately send the error to the source's wrapper future, but ignore any - // errors during the send, since nested errors wouldn't make any sense here. - let _ = pump_error_tx.send(e); - had_pump_error = true; - break; - } + while let Some(array) = rx.next().await { + fanout.send(array).await.map_err(|e| { + debug!("Source pump finished with an error."); + TaskError::wrapped(e) + })?; } - if had_pump_error { - debug!("Source pump supervisor task finished with an error."); - } else { - debug!("Source pump supervisor task finished normally."); - } + debug!("Source pump finished normally."); Ok(TaskOutput::Source) }; - let pump = Task::new(key.clone(), typetag, pump); - - let pipeline = builder.build(); - - let (shutdown_signal, force_shutdown_tripwire) = - self.shutdown_coordinator.register_source(key); - - let context = SourceContext { - key: key.clone(), - globals: self.config.global.clone(), - shutdown: shutdown_signal, - out: pipeline, - proxy: ProxyConfig::merge_with_env(&self.config.global.proxy, &source.proxy), - acknowledgements: source.sink_acknowledgements, - schema_definitions, - schema: self.config.schema, - }; - let source = source.inner.build(context).await; - let server = match source { - Err(error) => { - self.errors.push(format!("Source \"{}\": {}", key, error)); - continue; - } - Ok(server) => server, - }; - // Build a wrapper future that drives the actual source future, but returns early if we've - // been signalled to forcefully shutdown, or if the source pump encounters an error. - // - // The forceful shutdown will only resolve if the source itself doesn't shutdown gracefully - // within the alloted time window. This can occur normally for certain sources, like stdin, - // where the I/O is blocking (in a separate thread) and won't wake up to check if it's time - // to shutdown unless some input is given. - let server = async move { - debug!("Source starting."); - - let mut result = select! { - biased; - - // We've been told that we must forcefully shut down. - _ = force_shutdown_tripwire => Ok(()), - - // The source pump encountered an error, which we're now bubbling up here to stop - // the source as well, since the source running makes no sense without the pump. - // - // We only match receiving a message, not the error of the sender being dropped, - // just to keep things simpler. - Ok(e) = &mut pump_error_rx => Err(e), - - // The source finished normally. - result = server => result.map_err(|_| TaskError::Opaque), - }; - - // Even though we already tried to receive any pump task error above, we may have exited - // on the source itself returning an error due to task scheduling, where the pump task - // encountered an error, sent it over the oneshot, but we were polling the source - // already and hit an error trying to send to the now-shutdown pump task. - // - // Since the error from the source is opaque at the moment (i.e. `()`), we try a final - // time to see if the pump task encountered an error, using _that_ instead if so, to - // propagate the true error that caused the source to have to stop. - if let Ok(e) = pump_error_rx.try_recv() { - result = Err(e); - } + pumps.push(pump.instrument(span.clone())); + controls.insert( + OutputId { + component: key.clone(), + port: output.port.clone(), + }, + control, + ); - match result { - Ok(()) => { - debug!("Source finished normally."); - Ok(TaskOutput::Source) - } - Err(e) => { - debug!("Source finished with an error."); - Err(e) - } - } - }; - let server = Task::new(key.clone(), typetag, server); + let schema_definition = output + .log_schema_definition + .unwrap_or_else(schema::Definition::default_legacy_namespace); - self.outputs.extend(controls); - self.tasks.insert(key.clone(), pump); - source_tasks.insert(key.clone(), server); + schema_definitions.insert(output.port, schema_definition); } - source_tasks - } + let (pump_error_tx, mut pump_error_rx) = oneshot::channel(); + let pump = async move { + debug!("Source pump supervisor starting."); - async fn build_transforms(&mut self, enrichment_tables: &enrichment::TableRegistry) { - let mut definition_cache = HashMap::default(); + // Spawn all of the per-output pumps and then await their completion. + // + // If any of the pumps complete with an error, or panic/are cancelled, we return + // immediately. + let mut handles = FuturesUnordered::new(); + for pump in pumps { + handles.push(spawn_named(pump, task_name.as_ref())); + } - for (key, transform) in self - .config - .transforms() - .filter(|(key, _)| self.diff.transforms.contains_new(key)) - { - debug!(component = %key, "Building new transform."); + let mut had_pump_error = false; + while let Some(output) = handles.try_next().await? { + if let Err(e) = output { + // Immediately send the error to the source's wrapper future, but ignore any + // errors during the send, since nested errors wouldn't make any sense here. + let _ = pump_error_tx.send(e); + had_pump_error = true; + break; + } + } - let input_definitions = - schema::input_definitions(&transform.inputs, self.config, &mut definition_cache); + if had_pump_error { + debug!("Source pump supervisor task finished with an error."); + } else { + debug!("Source pump supervisor task finished normally."); + } + Ok(TaskOutput::Source) + }; + let pump = Task::new(key.clone(), typetag, pump); + + let pipeline = builder.build(); + + let (shutdown_signal, force_shutdown_tripwire) = shutdown_coordinator.register_source(key); + + let context = SourceContext { + key: key.clone(), + globals: config.global.clone(), + shutdown: shutdown_signal, + out: pipeline, + proxy: ProxyConfig::merge_with_env(&config.global.proxy, &source.proxy), + acknowledgements: source.sink_acknowledgements, + schema_definitions, + schema: config.schema, + }; + let source = source.inner.build(context).await; + let server = match source { + Err(error) => { + errors.push(format!("Source \"{}\": {}", key, error)); + continue; + } + Ok(server) => server, + }; + + // Build a wrapper future that drives the actual source future, but returns early if we've + // been signalled to forcefully shutdown, or if the source pump encounters an error. + // + // The forceful shutdown will only resolve if the source itself doesn't shutdown gracefully + // within the alloted time window. This can occur normally for certain sources, like stdin, + // where the I/O is blocking (in a separate thread) and won't wake up to check if it's time + // to shutdown unless some input is given. + let server = async move { + debug!("Source starting."); + + let mut result = select! { + biased; - let merged_definition: Definition = input_definitions - .iter() - .map(|(_output_id, definition)| definition.clone()) - .reduce(Definition::merge) - // We may not have any definitions if all the inputs are from metrics sources. - .unwrap_or_else(Definition::any); + // We've been told that we must forcefully shut down. + _ = force_shutdown_tripwire => Ok(()), - let span = error_span!( - "transform", - component_kind = "transform", - component_id = %key.id(), - component_type = %transform.inner.get_component_name(), - // maintained for compatibility - component_name = %key.id(), - ); + // The source pump encountered an error, which we're now bubbling up here to stop + // the source as well, since the source running makes no sense without the pump. + // + // We only match receiving a message, not the error of the sender being dropped, + // just to keep things simpler. + Ok(e) = &mut pump_error_rx => Err(e), - // Create a map of the outputs to the list of possible definitions from those outputs. - let schema_definitions = transform - .inner - .outputs(&input_definitions, self.config.schema.log_namespace()) - .into_iter() - .map(|output| (output.port, output.log_schema_definitions)) - .collect::>(); - - let context = TransformContext { - key: Some(key.clone()), - globals: self.config.global.clone(), - enrichment_tables: enrichment_tables.clone(), - schema_definitions, - merged_schema_definition: merged_definition.clone(), - schema: self.config.schema, + // The source finished normally. + result = server => result.map_err(|_| TaskError::Opaque), }; - let node = TransformNode::from_parts( - key.clone(), - transform, - &input_definitions, - self.config.schema.log_namespace(), - ); + // Even though we already tried to receive any pump task error above, we may have exited + // on the source itself returning an error due to task scheduling, where the pump task + // encountered an error, sent it over the oneshot, but we were polling the source + // already and hit an error trying to send to the now-shutdown pump task. + // + // Since the error from the source is opaque at the moment (i.e. `()`), we try a final + // time to see if the pump task encountered an error, using _that_ instead if so, to + // propagate the true error that caused the source to have to stop. + if let Ok(e) = pump_error_rx.try_recv() { + result = Err(e); + } - let transform = match transform - .inner - .build(&context) - .instrument(span.clone()) - .await - { - Err(error) => { - self.errors - .push(format!("Transform \"{}\": {}", key, error)); - continue; + match result { + Ok(()) => { + debug!("Source finished normally."); + Ok(TaskOutput::Source) } - Ok(transform) => transform, - }; - - let (input_tx, input_rx) = - TopologyBuilder::standalone_memory(TOPOLOGY_BUFFER_SIZE, WhenFull::Block).await; + Err(e) => { + debug!("Source finished with an error."); + Err(e) + } + } + }; + let server = Task::new(key.clone(), typetag, server); - self.inputs - .insert(key.clone(), (input_tx, node.inputs.clone())); + outputs.extend(controls); + tasks.insert(key.clone(), pump); + source_tasks.insert(key.clone(), server); + } - let (transform_task, transform_outputs) = { - let _span = span.enter(); - build_transform(transform, node, input_rx) - }; + let mut definition_cache = HashMap::default(); - self.outputs.extend(transform_outputs); - self.tasks.insert(key.clone(), transform_task); + // Build transforms + for (key, transform) in config + .transforms() + .filter(|(key, _)| diff.transforms.contains_new(key)) + { + debug!(component = %key, "Building new transform."); + + let mut schema_definitions = HashMap::new(); + let merged_definition = + schema::merged_definition(&transform.inputs, config, &mut definition_cache); + + let span = error_span!( + "transform", + component_kind = "transform", + component_id = %key.id(), + component_type = %transform.inner.get_component_name(), + // maintained for compatibility + component_name = %key.id(), + ); + + for output in transform + .inner + .outputs(&merged_definition, config.schema.log_namespace()) + { + let definition = output + .log_schema_definition + .unwrap_or_else(|| merged_definition.clone()); + schema_definitions.insert(output.port, definition); } - } - async fn build_sinks(&mut self) { - for (key, sink) in self - .config - .sinks() - .filter(|(key, _)| self.diff.sinks.contains_new(key)) + let context = TransformContext { + key: Some(key.clone()), + globals: config.global.clone(), + enrichment_tables: enrichment_tables.clone(), + schema_definitions, + merged_schema_definition: merged_definition.clone(), + schema: config.schema, + }; + + let node = TransformNode::from_parts( + key.clone(), + transform, + &merged_definition, + config.schema.log_namespace(), + ); + + let transform = match transform + .inner + .build(&context) + .instrument(span.clone()) + .await { - debug!(component = %key, "Building new sink."); + Err(error) => { + errors.push(format!("Transform \"{}\": {}", key, error)); + continue; + } + Ok(transform) => transform, + }; - let sink_inputs = &sink.inputs; - let healthcheck = sink.healthcheck(); - let enable_healthcheck = healthcheck.enabled && self.config.healthchecks.enabled; + let (input_tx, input_rx) = + TopologyBuilder::standalone_memory(TOPOLOGY_BUFFER_SIZE, WhenFull::Block).await; - let typetag = sink.inner.get_component_name(); - let input_type = sink.inner.input().data_type(); + inputs.insert(key.clone(), (input_tx, node.inputs.clone())); - // At this point, we've validated that all transforms are valid, including any - // transform that mutates the schema provided by their sources. We can now validate the - // schema expectations of each individual sink. - if let Err(mut err) = schema::validate_sink_expectations(key, sink, self.config) { - self.errors.append(&mut err); - }; + let (transform_task, transform_outputs) = { + let _span = span.enter(); + build_transform(transform, node, input_rx) + }; - let (tx, rx) = if let Some(buffer) = self.buffers.remove(key) { - buffer - } else { - let buffer_type = match sink.buffer.stages().first().expect("cant ever be empty") { - BufferType::Memory { .. } => "memory", - BufferType::DiskV2 { .. } => "disk", - }; - let buffer_span = error_span!( - "sink", - component_kind = "sink", - component_id = %key.id(), - component_type = typetag, - component_name = %key.id(), - buffer_type, - ); - let buffer = sink - .buffer - .build( - self.config.global.data_dir.clone(), - key.to_string(), - buffer_span, - ) - .await; - match buffer { - Err(error) => { - self.errors.push(format!("Sink \"{}\": {}", key, error)); - continue; - } - Ok((tx, rx)) => (tx, Arc::new(Mutex::new(Some(rx.into_stream())))), - } - }; + outputs.extend(transform_outputs); + tasks.insert(key.clone(), transform_task); + } - let cx = SinkContext { - healthcheck, - globals: self.config.global.clone(), - proxy: ProxyConfig::merge_with_env(&self.config.global.proxy, sink.proxy()), - schema: self.config.schema, - }; + // Build sinks + for (key, sink) in config + .sinks() + .filter(|(key, _)| diff.sinks.contains_new(key)) + { + debug!(component = %key, "Building new sink."); + + let sink_inputs = &sink.inputs; + let healthcheck = sink.healthcheck(); + let enable_healthcheck = healthcheck.enabled && config.healthchecks.enabled; + + let typetag = sink.inner.get_component_name(); + let input_type = sink.inner.input().data_type(); + + // At this point, we've validated that all transforms are valid, including any + // transform that mutates the schema provided by their sources. We can now validate the + // schema expectations of each individual sink. + if let Err(mut err) = schema::validate_sink_expectations(key, sink, config) { + errors.append(&mut err); + }; - let (sink, healthcheck) = match sink.inner.build(cx).await { + let (tx, rx) = if let Some(buffer) = buffers.remove(key) { + buffer + } else { + let buffer_type = match sink.buffer.stages().first().expect("cant ever be empty") { + BufferType::Memory { .. } => "memory", + BufferType::DiskV2 { .. } => "disk", + }; + let buffer_span = error_span!( + "sink", + component_kind = "sink", + component_id = %key.id(), + component_type = typetag, + component_name = %key.id(), + buffer_type, + ); + let buffer = sink + .buffer + .build(config.global.data_dir.clone(), key.to_string(), buffer_span) + .await; + match buffer { Err(error) => { - self.errors.push(format!("Sink \"{}\": {}", key, error)); + errors.push(format!("Sink \"{}\": {}", key, error)); continue; } - Ok(built) => built, - }; + Ok((tx, rx)) => (tx, Arc::new(Mutex::new(Some(rx.into_stream())))), + } + }; + + let cx = SinkContext { + healthcheck, + globals: config.global.clone(), + proxy: ProxyConfig::merge_with_env(&config.global.proxy, sink.proxy()), + schema: config.schema, + }; + + let (sink, healthcheck) = match sink.inner.build(cx).await { + Err(error) => { + errors.push(format!("Sink \"{}\": {}", key, error)); + continue; + } + Ok(built) => built, + }; + + let (trigger, tripwire) = Tripwire::new(); + + let sink = async move { + debug!("Sink starting."); + + // Why is this Arc>> needed you ask. + // In case when this function build_pieces errors + // this future won't be run so this rx won't be taken + // which will enable us to reuse rx to rebuild + // old configuration by passing this Arc>> + // yet again. + let rx = rx + .lock() + .unwrap() + .take() + .expect("Task started but input has been taken."); + + let mut rx = wrap(rx); + + let events_received = register!(EventsReceived); + sink.run( + rx.by_ref() + .filter(|events: &EventArray| ready(filter_events_type(events, input_type))) + .inspect(|events| { + events_received.emit(CountByteSize( + events.len(), + events.estimated_json_encoded_size_of(), + )) + }) + .take_until_if(tripwire), + ) + .await + .map(|_| { + debug!("Sink finished normally."); + TaskOutput::Sink(rx) + }) + .map_err(|_| { + debug!("Sink finished with an error."); + TaskError::Opaque + }) + }; + + let task = Task::new(key.clone(), typetag, sink); + + let component_key = key.clone(); + let healthcheck_task = async move { + if enable_healthcheck { + let duration = Duration::from_secs(10); + timeout(duration, healthcheck) + .map(|result| match result { + Ok(Ok(_)) => { + info!("Healthcheck passed."); + Ok(TaskOutput::Healthcheck) + } + Ok(Err(error)) => { + error!( + msg = "Healthcheck failed.", + %error, + component_kind = "sink", + component_type = typetag, + component_id = %component_key.id(), + // maintained for compatibility + component_name = %component_key.id(), + ); + Err(TaskError::wrapped(error)) + } + Err(e) => { + error!( + msg = "Healthcheck timed out.", + component_kind = "sink", + component_type = typetag, + component_id = %component_key.id(), + // maintained for compatibility + component_name = %component_key.id(), + ); + Err(TaskError::wrapped(Box::new(e))) + } + }) + .await + } else { + info!("Healthcheck disabled."); + Ok(TaskOutput::Healthcheck) + } + }; - let (trigger, tripwire) = Tripwire::new(); - - let sink = async move { - debug!("Sink starting."); - - // Why is this Arc>> needed you ask. - // In case when this function build_pieces errors - // this future won't be run so this rx won't be taken - // which will enable us to reuse rx to rebuild - // old configuration by passing this Arc>> - // yet again. - let rx = rx - .lock() - .unwrap() - .take() - .expect("Task started but input has been taken."); - - let mut rx = wrap(rx); - - let events_received = register!(EventsReceived); - sink.run( - rx.by_ref() - .filter(|events: &EventArray| ready(filter_events_type(events, input_type))) - .inspect(|events| { - events_received.emit(CountByteSize( - events.len(), - events.estimated_json_encoded_size_of(), - )) - }) - .take_until_if(tripwire), - ) - .await - .map(|_| { - debug!("Sink finished normally."); - TaskOutput::Sink(rx) - }) - .map_err(|_| { - debug!("Sink finished with an error."); - TaskError::Opaque - }) - }; + let healthcheck_task = Task::new(key.clone(), typetag, healthcheck_task); - let task = Task::new(key.clone(), typetag, sink); - - let component_key = key.clone(); - let healthcheck_task = async move { - if enable_healthcheck { - let duration = Duration::from_secs(10); - timeout(duration, healthcheck) - .map(|result| match result { - Ok(Ok(_)) => { - info!("Healthcheck passed."); - Ok(TaskOutput::Healthcheck) - } - Ok(Err(error)) => { - error!( - msg = "Healthcheck failed.", - %error, - component_kind = "sink", - component_type = typetag, - component_id = %component_key.id(), - // maintained for compatibility - component_name = %component_key.id(), - ); - Err(TaskError::wrapped(error)) - } - Err(e) => { - error!( - msg = "Healthcheck timed out.", - component_kind = "sink", - component_type = typetag, - component_id = %component_key.id(), - // maintained for compatibility - component_name = %component_key.id(), - ); - Err(TaskError::wrapped(Box::new(e))) - } - }) - .await - } else { - info!("Healthcheck disabled."); - Ok(TaskOutput::Healthcheck) - } - }; + inputs.insert(key.clone(), (tx, sink_inputs.clone())); + healthchecks.insert(key.clone(), healthcheck_task); + tasks.insert(key.clone(), task); + detach_triggers.insert(key.clone(), trigger); + } - let healthcheck_task = Task::new(key.clone(), typetag, healthcheck_task); + // We should have all the data for the enrichment tables loaded now, so switch them over to + // readonly. + enrichment_tables.finish_load(); - self.inputs.insert(key.clone(), (tx, sink_inputs.clone())); - self.healthchecks.insert(key.clone(), healthcheck_task); - self.tasks.insert(key.clone(), task); - self.detach_triggers.insert(key.clone(), trigger); - } + let mut finalized_outputs = HashMap::new(); + for (id, output) in outputs { + let entry = finalized_outputs + .entry(id.component) + .or_insert_with(HashMap::new); + entry.insert(id.port, output); } -} -pub struct Pieces { - pub(super) inputs: HashMap, Inputs)>, - pub(crate) outputs: HashMap, fanout::ControlChannel>>, - pub(super) tasks: HashMap, - pub(crate) source_tasks: HashMap, - pub(super) healthchecks: HashMap, - pub(crate) shutdown_coordinator: SourceShutdownCoordinator, - pub(crate) detach_triggers: HashMap, + if errors.is_empty() { + let pieces = Pieces { + inputs, + outputs: finalized_outputs, + tasks, + source_tasks, + healthchecks, + shutdown_coordinator, + detach_triggers, + }; + + Ok(pieces) + } else { + Err(errors) + } } const fn filter_events_type(events: &EventArray, data_type: DataType) -> bool { @@ -663,7 +609,7 @@ struct TransformNode { typetag: &'static str, inputs: Inputs, input_details: Input, - outputs: Vec, + outputs: Vec, enable_concurrency: bool, } @@ -671,7 +617,7 @@ impl TransformNode { pub fn from_parts( key: ComponentKey, transform: &TransformOuter, - schema_definition: &[(OutputId, Definition)], + schema_definition: &Definition, global_log_namespace: LogNamespace, ) -> Self { Self { diff --git a/src/topology/schema.rs b/src/topology/schema.rs index 088967ec5b211..2b8f733e35c99 100644 --- a/src/topology/schema.rs +++ b/src/topology/schema.rs @@ -1,25 +1,44 @@ use std::collections::HashMap; - -use vector_core::config::SourceOutput; +use value::Kind; +use vector_core::config::Output; pub(super) use crate::schema::Definition; use crate::{ - config::{ComponentKey, Config, OutputId, SinkConfig, SinkOuter, TransformOutput}, + config::{ComponentKey, Config, OutputId, SinkConfig, SinkOuter}, topology, }; -/// The cache is used whilst building up the topology. -/// TODO: Describe more, especially why we have a bool in the key. -type Cache = HashMap<(bool, Vec), Vec<(OutputId, Definition)>>; - -pub fn possible_definitions( +/// Create a new [`Definition`] by recursively merging all provided inputs into a given component. +/// +/// Recursion happens when one of the components inputs references a transform that has no +/// definition output of its own, in such a case, the definition output becomes the merged output +/// of that transform's inputs. +/// +/// For example: +/// +/// Source 1 [Definition 1] -> +/// Source 2 [Definition 2] -> Transform 1 [] -> [Definition 1 & 2] +/// Source 3 [Definition 3] -> Transform 2 [Definition 4] -> [Definition 4] -> Sink +/// +/// When asking for the merged definition feeding into `Sink`, `Transform 1` returns no definition +/// of its own, when asking for its schema definition. In this case the `merged_definition` method +/// recurses further back towards `Source 1` and `Source 2`, merging the two into a new definition +/// (marked as `[Definition 1 & 2]` above). +/// +/// It then asks for the definition of `Transform 2`, which *does* defines its own definition, +/// named `Definition 4`, which overrides `Definition 3` feeding into `Transform 2`. In this case, +/// the `Sink` is only interested in `Definition 4`, and ignores `Definition 3`. +/// +/// Finally, The merged definition (named `Definition 1 & 2`), and `Definition 4` are merged +/// together to produce the new `Definition` returned by this method. +pub fn merged_definition( inputs: &[OutputId], config: &dyn ComponentContainer, - cache: &mut Cache, -) -> Vec<(OutputId, Definition)> { + cache: &mut HashMap<(bool, Vec), Definition>, +) -> Definition { if inputs.is_empty() { - return vec![]; + return Definition::default_legacy_namespace(); } // Try to get the definition from the cache. @@ -27,49 +46,66 @@ pub fn possible_definitions( return definition.clone(); } - let mut definitions = Vec::new(); + let mut definition = Definition::new(Kind::never(), Kind::never(), []); for input in inputs { let key = &input.component; // If the input is a source, the output is merged into the top-level schema. + // Not all sources contain a schema yet, in which case they use a default. if let Ok(maybe_output) = config.source_output_for_port(key, &input.port) { - let mut source_definition = input.with_definitions( - maybe_output - .unwrap_or_else(|| { - unreachable!( - "source output mis-configured - output for port {:?} missing", - &input.port - ) - }) - .schema_definition(config.schema_enabled()), - ); - - definitions.append(&mut source_definition); + let source_definition = maybe_output + .unwrap_or_else(|| { + unreachable!( + "source output mis-configured - output for port {:?} missing", + &input.port + ) + }) + .log_schema_definition + .clone() + // Schemas must be implemented for components that support the "Vector" namespace, so since + // one doesn't exist here, we can assume it's using the default "legacy" namespace schema definition + .unwrap_or_else(Definition::default_legacy_namespace); + + if config.schema_enabled() { + definition = definition.merge(source_definition); + } else { + definition = definition.merge(Definition::default_for_namespace( + source_definition.log_namespaces(), + )); + } } - // If the input is a transform, the output is merged into the top-level schema + // Not all transforms contain a schema yet. If that's the case, it's assumed + // that the transform doesn't modify the event schema, so it is passed through as-is (recursively) if let Some(inputs) = config.transform_inputs(key) { - let input_definitions = possible_definitions(inputs, config, cache); - - let mut transform_definition = input.with_definitions( - config - .transform_output_for_port(key, &input.port, &input_definitions) - .expect("transform must exist - already found inputs") - .unwrap_or_else(|| { - unreachable!( - "transform output mis-configured - output for port {:?} missing", - &input.port - ) - }) - .log_schema_definitions, - ); - - definitions.append(&mut transform_definition); + let merged_definition = merged_definition(inputs, config, cache); + + let transform_definition = config + .transform_output_for_port(key, &input.port, &merged_definition) + .expect("transform must exist - already found inputs") + .unwrap_or_else(|| { + unreachable!( + "transform output mis-configured - output for port {:?} missing", + &input.port + ) + }) + .log_schema_definition + .clone() + .unwrap_or(merged_definition); + + if config.schema_enabled() { + definition = definition.merge(transform_definition); + } else { + // Schemas must be implemented for components that support the "Vector" namespace, so since + // one doesn't exist here, we can assume it's using the default "legacy" namespace schema definit + definition = definition.merge(Definition::default_for_namespace( + transform_definition.log_namespaces(), + )); + } } } - - definitions + definition } /// Get a list of definitions from individual pipelines feeding into a component. @@ -88,14 +124,14 @@ pub fn possible_definitions( pub(super) fn expanded_definitions( inputs: &[OutputId], config: &dyn ComponentContainer, - cache: &mut Cache, -) -> Vec<(OutputId, Definition)> { + cache: &mut HashMap<(bool, Vec), Vec>, +) -> Vec { // Try to get the definition from the cache. if let Some(definitions) = cache.get(&(config.schema_enabled(), inputs.to_vec())) { return definitions.clone(); } - let mut definitions: Vec<(OutputId, Definition)> = vec![]; + let mut definitions = vec![]; let mut merged_cache = HashMap::default(); for input in inputs { @@ -110,38 +146,40 @@ pub(super) fn expanded_definitions( // After getting the source matching to the given input, we need to further narrow the // actual output of the source feeding into this input, and then get the definition // belonging to that output. - let mut source_definitions = - outputs - .into_iter() - .find_map(|output| { - if output.port == input.port { - Some(input.with_definitions( - output.schema_definition(config.schema_enabled()), - )) - } else { - None - } - }) - .unwrap_or_else(|| { - // If we find no match, it means the topology is misconfigured. This is a fatal - // error, but other parts of the topology builder deal with this state. - unreachable!("source output mis-configured") - }); - - definitions.append(&mut source_definitions); + let source_definition = outputs + .iter() + .find_map(|output| { + if output.port == input.port { + Some( + output + .log_schema_definition + .clone() + .unwrap_or_else(Definition::default_legacy_namespace), + ) + } else { + None + } + }) + .unwrap_or_else(|| { + // If we find no match, it means the topology is misconfigured. This is a fatal + // error, but other parts of the topology builder deal with this state. + unreachable!("source output mis-configured") + }); + + definitions.push(source_definition); // A transform can receive from multiple inputs, and each input needs to be expanded to // a new pipeline. } else if let Some(inputs) = config.transform_inputs(key) { - let input_definitions = possible_definitions(inputs, config, &mut merged_cache); + let merged_definition = merged_definition(inputs, config, &mut merged_cache); - let mut transform_definition = config - .transform_outputs(key, &input_definitions) + let maybe_transform_definition = config + .transform_outputs(key, &merged_definition) .expect("already found inputs") .iter() .find_map(|output| { if output.port == input.port { - Some(input.with_definitions(output.log_schema_definitions.clone())) + Some(output.log_schema_definition.clone()) } else { None } @@ -150,9 +188,24 @@ pub(super) fn expanded_definitions( // error, but other parts of the topology builder deal with this state. .expect("transform output misconfigured"); - // Append whatever number of additional pipelines we created to the existing - // pipeline definitions. - definitions.append(&mut transform_definition); + // We need to iterate over the individual inputs of a transform, as we are expected to + // expand each input into its own pipeline. + for input in inputs { + let mut expanded_definitions = match &maybe_transform_definition { + // If the transform defines its own schema definition, we no longer care about + // any upstream definitions, and use the transform definition instead. + Some(transform_definition) => vec![transform_definition.clone()], + + // If the transform does not define its own schema definition, we need to + // recursively call this function in case upstream components expand into + // multiple pipelines. + None => expanded_definitions(&[input.clone()], config, cache), + }; + + // Append whatever number of additional pipelines we created to the existing + // pipeline definitions. + definitions.append(&mut expanded_definitions); + } } } @@ -164,66 +217,6 @@ pub(super) fn expanded_definitions( definitions } -/// Returns a list of definitions from the given inputs. -pub(crate) fn input_definitions( - inputs: &[OutputId], - config: &Config, - cache: &mut Cache, -) -> Vec<(OutputId, Definition)> { - if inputs.is_empty() { - return vec![]; - } - - if let Some(definitions) = cache.get(&(config.schema_enabled(), inputs.to_vec())) { - return definitions.clone(); - } - - let mut definitions = Vec::new(); - - for input in inputs { - let key = &input.component; - - // If the input is a source we retrieve the definitions from the source - // (there should only be one) and add it to the return. - if let Ok(maybe_output) = config.source_output_for_port(key, &input.port) { - let mut source_definitions = input.with_definitions( - maybe_output - .unwrap_or_else(|| { - unreachable!( - "source output mis-configured - output for port {:?} missing", - &input.port - ) - }) - .schema_definition(config.schema_enabled()), - ); - - definitions.append(&mut source_definitions); - } - - // If the input is a transform we recurse to the upstream components to retrieve - // their definitions and pass it through the transform to get the new definitions. - if let Some(inputs) = config.transform_inputs(key) { - let transform_definitions = input_definitions(inputs, config, cache); - let mut transform_definitions = input.with_definitions( - config - .transform_output_for_port(key, &input.port, &transform_definitions) - .expect("transform must exist") - .unwrap_or_else(|| { - unreachable!( - "transform output mis-configured - output for port {:?} missing", - &input.port - ) - }) - .log_schema_definitions, - ); - - definitions.append(&mut transform_definitions); - } - } - - definitions -} - pub(super) fn validate_sink_expectations( key: &ComponentKey, sink: &SinkOuter, @@ -241,7 +234,7 @@ pub(super) fn validate_sink_expectations( let definitions = expanded_definitions(&sink.inputs, config, &mut cache); // Validate each individual definition against the sink requirement. - for (_output, definition) in definitions { + for definition in definitions { if let Err(err) = requirement.validate(&definition, config.schema.validation) { errors.append( &mut err @@ -264,15 +257,15 @@ pub(super) fn validate_sink_expectations( pub trait ComponentContainer { fn schema_enabled(&self) -> bool; - fn source_outputs(&self, key: &ComponentKey) -> Option>; + fn source_outputs(&self, key: &ComponentKey) -> Option>; fn transform_inputs(&self, key: &ComponentKey) -> Option<&[OutputId]>; fn transform_outputs( &self, key: &ComponentKey, - input_definitions: &[(OutputId, Definition)], - ) -> Option>; + merged_definition: &Definition, + ) -> Option>; /// Gets the transform output for the given port. /// @@ -283,9 +276,9 @@ pub trait ComponentContainer { &self, key: &ComponentKey, port: &Option, - input_definitions: &[(OutputId, Definition)], - ) -> Result, ()> { - if let Some(outputs) = self.transform_outputs(key, input_definitions) { + merged_definition: &Definition, + ) -> Result, ()> { + if let Some(outputs) = self.transform_outputs(key, merged_definition) { Ok(get_output_for_port(outputs, port)) } else { Err(()) @@ -301,26 +294,16 @@ pub trait ComponentContainer { &self, key: &ComponentKey, port: &Option, - ) -> Result, ()> { + ) -> Result, ()> { if let Some(outputs) = self.source_outputs(key) { - Ok(get_source_output_for_port(outputs, port)) + Ok(get_output_for_port(outputs, port)) } else { Err(()) } } } -fn get_output_for_port( - outputs: Vec, - port: &Option, -) -> Option { - outputs.into_iter().find(|output| &output.port == port) -} - -fn get_source_output_for_port( - outputs: Vec, - port: &Option, -) -> Option { +fn get_output_for_port(outputs: Vec, port: &Option) -> Option { outputs.into_iter().find(|output| &output.port == port) } @@ -329,7 +312,7 @@ impl ComponentContainer for Config { self.schema.enabled } - fn source_outputs(&self, key: &ComponentKey) -> Option> { + fn source_outputs(&self, key: &ComponentKey) -> Option> { self.source(key) .map(|source| source.inner.outputs(self.schema.log_namespace())) } @@ -341,12 +324,12 @@ impl ComponentContainer for Config { fn transform_outputs( &self, key: &ComponentKey, - input_definitions: &[(OutputId, Definition)], - ) -> Option> { + merged_definition: &Definition, + ) -> Option> { self.transform(key).map(|source| { source .inner - .outputs(input_definitions, self.schema.log_namespace()) + .outputs(merged_definition, self.schema.log_namespace()) }) } } @@ -356,20 +339,145 @@ mod tests { use std::collections::HashMap; use indexmap::IndexMap; + use lookup::lookup_v2::parse_target_path; use lookup::owned_value_path; use similar_asserts::assert_eq; use value::Kind; - use vector_core::config::{DataType, SourceOutput, TransformOutput}; + use vector_core::config::{DataType, Output}; use super::*; + #[test] + fn test_merged_definition() { + struct TestCase { + inputs: Vec<(&'static str, Option)>, + sources: IndexMap<&'static str, Vec>, + transforms: IndexMap<&'static str, Vec>, + want: Definition, + } + + impl ComponentContainer for TestCase { + fn schema_enabled(&self) -> bool { + true + } + + fn source_outputs(&self, key: &ComponentKey) -> Option> { + self.sources.get(key.id()).cloned() + } + + fn transform_inputs(&self, _key: &ComponentKey) -> Option<&[OutputId]> { + None + } + + fn transform_outputs( + &self, + key: &ComponentKey, + _merged_definition: &Definition, + ) -> Option> { + self.transforms.get(key.id()).cloned() + } + } + + for (title, case) in HashMap::from([ + ( + "no inputs", + TestCase { + inputs: vec![], + sources: IndexMap::default(), + transforms: IndexMap::default(), + want: Definition::default_legacy_namespace(), + }, + ), + ( + "single input, source with empty schema", + TestCase { + inputs: vec![("foo", None)], + sources: IndexMap::from([("foo", vec![Output::default(DataType::all())])]), + transforms: IndexMap::default(), + want: Definition::default_legacy_namespace(), + }, + ), + ( + "single input, source with schema", + TestCase { + inputs: vec![("source-foo", None)], + sources: IndexMap::from([( + "source-foo", + vec![Output::default(DataType::all()).with_schema_definition( + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("foo"), + Kind::integer().or_bytes(), + Some("foo bar"), + ), + )], + )]), + transforms: IndexMap::default(), + want: Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("foo"), + Kind::integer().or_bytes(), + Some("foo bar"), + ), + }, + ), + ( + "multiple inputs, sources with schema", + TestCase { + inputs: vec![("source-foo", None), ("source-bar", None)], + sources: IndexMap::from([ + ( + "source-foo", + vec![Output::default(DataType::all()).with_schema_definition( + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("foo"), + Kind::integer().or_bytes(), + Some("foo bar"), + ), + )], + ), + ( + "source-bar", + vec![Output::default(DataType::all()).with_schema_definition( + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("foo"), + Kind::timestamp(), + Some("baz qux"), + ), + )], + ), + ]), + transforms: IndexMap::default(), + want: Definition::empty_legacy_namespace() + .with_event_field( + &owned_value_path!("foo"), + Kind::integer().or_bytes().or_timestamp(), + Some("foo bar"), + ) + .with_meaning(parse_target_path("foo").unwrap(), "baz qux"), + }, + ), + ]) { + let inputs = case + .inputs + .iter() + .cloned() + .map(|(key, port)| OutputId { + component: key.into(), + port, + }) + .collect::>(); + + let got = merged_definition(&inputs, &case, &mut HashMap::default()); + assert_eq!(got, case.want, "{}", title); + } + } + #[test] fn test_expanded_definition() { struct TestCase { inputs: Vec<(&'static str, Option)>, - sources: IndexMap<&'static str, Vec>, - transforms: IndexMap<&'static str, (Vec, Vec)>, - want: Vec<(OutputId, Definition)>, + sources: IndexMap<&'static str, Vec>, + transforms: IndexMap<&'static str, (Vec, Vec)>, + want: Vec, } impl ComponentContainer for TestCase { @@ -377,7 +485,7 @@ mod tests { true } - fn source_outputs(&self, key: &ComponentKey) -> Option> { + fn source_outputs(&self, key: &ComponentKey) -> Option> { self.sources.get(key.id()).cloned() } @@ -388,8 +496,8 @@ mod tests { fn transform_outputs( &self, key: &ComponentKey, - _input_definitions: &[(OutputId, Definition)], - ) -> Option> { + _merged_definition: &Definition, + ) -> Option> { self.transforms.get(key.id()).cloned().map(|v| v.1) } } @@ -408,15 +516,9 @@ mod tests { "single input, source with default schema", TestCase { inputs: vec![("foo", None)], - sources: IndexMap::from([( - "foo", - vec![SourceOutput::new_logs( - DataType::all(), - Definition::default_legacy_namespace(), - )], - )]), + sources: IndexMap::from([("foo", vec![Output::default(DataType::all())])]), transforms: IndexMap::default(), - want: vec![("foo".into(), Definition::default_legacy_namespace())], + want: vec![Definition::default_legacy_namespace()], }, ), ( @@ -425,8 +527,7 @@ mod tests { inputs: vec![("source-foo", None)], sources: IndexMap::from([( "source-foo", - vec![SourceOutput::new_logs( - DataType::all(), + vec![Output::default(DataType::all()).with_schema_definition( Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("foo"), Kind::integer().or_bytes(), @@ -435,13 +536,10 @@ mod tests { )], )]), transforms: IndexMap::default(), - want: vec![( - "source-foo".into(), - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("foo"), - Kind::integer().or_bytes(), - Some("foo bar"), - ), + want: vec![Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("foo"), + Kind::integer().or_bytes(), + Some("foo bar"), )], }, ), @@ -452,8 +550,7 @@ mod tests { sources: IndexMap::from([ ( "source-foo", - vec![SourceOutput::new_logs( - DataType::all(), + vec![Output::default(DataType::all()).with_schema_definition( Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("foo"), Kind::integer().or_bytes(), @@ -463,8 +560,7 @@ mod tests { ), ( "source-bar", - vec![SourceOutput::new_logs( - DataType::all(), + vec![Output::default(DataType::all()).with_schema_definition( Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("foo"), Kind::timestamp(), @@ -475,21 +571,15 @@ mod tests { ]), transforms: IndexMap::default(), want: vec![ - ( - "source-foo".into(), - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("foo"), - Kind::integer().or_bytes(), - Some("foo bar"), - ), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("foo"), + Kind::integer().or_bytes(), + Some("foo bar"), ), - ( - "source-bar".into(), - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("foo"), - Kind::timestamp(), - Some("baz qux"), - ), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("foo"), + Kind::timestamp(), + Some("baz qux"), ), ], }, @@ -501,8 +591,7 @@ mod tests { sources: IndexMap::from([ ( "source-foo", - vec![SourceOutput::new_logs( - DataType::all(), + vec![Output::default(DataType::all()).with_schema_definition( Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("foo"), Kind::boolean(), @@ -512,8 +601,7 @@ mod tests { ), ( "source-bar", - vec![SourceOutput::new_logs( - DataType::all(), + vec![Output::default(DataType::all()).with_schema_definition( Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("bar"), Kind::integer(), @@ -526,32 +614,25 @@ mod tests { "transform-baz", ( vec![OutputId::from("source-foo")], - vec![TransformOutput::new( - DataType::all(), - vec![Definition::empty_legacy_namespace().with_event_field( + vec![Output::default(DataType::all()).with_schema_definition( + Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("baz"), Kind::regex(), Some("baz"), - )], + ), )], ), )]), want: vec![ - ( - "source-bar".into(), - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("bar"), - Kind::integer(), - Some("bar"), - ), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("bar"), + Kind::integer(), + Some("bar"), ), - ( - "transform-baz".into(), - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("baz"), - Kind::regex(), - Some("baz"), - ), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("baz"), + Kind::regex(), + Some("baz"), ), ], }, @@ -571,8 +652,7 @@ mod tests { sources: IndexMap::from([ ( "Source 1", - vec![SourceOutput::new_logs( - DataType::all(), + vec![Output::default(DataType::all()).with_schema_definition( Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("source-1"), Kind::boolean(), @@ -582,8 +662,7 @@ mod tests { ), ( "Source 2", - vec![SourceOutput::new_logs( - DataType::all(), + vec![Output::default(DataType::all()).with_schema_definition( Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("source-2"), Kind::integer(), @@ -597,13 +676,12 @@ mod tests { "Transform 1", ( vec![OutputId::from("Source 1")], - vec![TransformOutput::new( - DataType::all(), - vec![Definition::empty_legacy_namespace().with_event_field( + vec![Output::default(DataType::all()).with_schema_definition( + Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("transform-1"), Kind::regex(), None, - )], + ), )], ), ), @@ -611,13 +689,12 @@ mod tests { "Transform 2", ( vec![OutputId::from("Source 2")], - vec![TransformOutput::new( - DataType::all(), - vec![Definition::empty_legacy_namespace().with_event_field( + vec![Output::default(DataType::all()).with_schema_definition( + Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("transform-2"), Kind::float().or_null(), Some("transform-2"), - )], + ), )], ), ), @@ -625,13 +702,12 @@ mod tests { "Transform 3", ( vec![OutputId::from("Source 2")], - vec![TransformOutput::new( - DataType::all(), - vec![Definition::empty_legacy_namespace().with_event_field( + vec![Output::default(DataType::all()).with_schema_definition( + Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("transform-3"), Kind::integer(), Some("transform-3"), - )], + ), )], ), ), @@ -639,13 +715,12 @@ mod tests { "Transform 4", ( vec![OutputId::from("Source 2")], - vec![TransformOutput::new( - DataType::all(), - vec![Definition::empty_legacy_namespace().with_event_field( + vec![Output::default(DataType::all()).with_schema_definition( + Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("transform-4"), Kind::timestamp().or_bytes(), Some("transform-4"), - )], + ), )], ), ), @@ -653,44 +728,40 @@ mod tests { "Transform 5", ( vec![OutputId::from("Transform 3"), OutputId::from("Transform 4")], - vec![TransformOutput::new( - DataType::all(), - vec![Definition::empty_legacy_namespace().with_event_field( + vec![Output::default(DataType::all()).with_schema_definition( + Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("transform-5"), Kind::boolean(), Some("transform-5"), - )], + ), )], ), ), ]), want: vec![ // Pipeline 1 - ( - "Transform 1".into(), - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("transform-1"), - Kind::regex(), - None, - ), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("transform-1"), + Kind::regex(), + None, ), // Pipeline 2 - ( - "Transform 2".into(), - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("transform-2"), - Kind::float().or_null(), - Some("transform-2"), - ), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("transform-2"), + Kind::float().or_null(), + Some("transform-2"), ), // Pipeline 3 - ( - "Transform 5".into(), - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("transform-5"), - Kind::boolean(), - Some("transform-5"), - ), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("transform-5"), + Kind::boolean(), + Some("transform-5"), + ), + // Pipeline 4 + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("transform-5"), + Kind::boolean(), + Some("transform-5"), ), ], }, diff --git a/src/transforms/aggregate.rs b/src/transforms/aggregate.rs index fdeb73eaa00a8..31673bfd6bd71 100644 --- a/src/transforms/aggregate.rs +++ b/src/transforms/aggregate.rs @@ -10,7 +10,7 @@ use vector_config::configurable_component; use vector_core::config::LogNamespace; use crate::{ - config::{DataType, Input, OutputId, TransformConfig, TransformContext, TransformOutput}, + config::{DataType, Input, Output, TransformConfig, TransformContext}, event::{metric, Event, EventMetadata}, internal_events::{AggregateEventRecorded, AggregateFlushed, AggregateUpdateFailed}, schema, @@ -46,12 +46,8 @@ impl TransformConfig for AggregateConfig { Input::metric() } - fn outputs( - &self, - _: &[(OutputId, schema::Definition)], - _: LogNamespace, - ) -> Vec { - vec![TransformOutput::new(DataType::Metric, vec![])] + fn outputs(&self, _: &schema::Definition, _: LogNamespace) -> Vec { + vec![Output::default(DataType::Metric)] } } diff --git a/src/transforms/aws_ec2_metadata.rs b/src/transforms/aws_ec2_metadata.rs index 560e82ea917be..698c92c8d5268 100644 --- a/src/transforms/aws_ec2_metadata.rs +++ b/src/transforms/aws_ec2_metadata.rs @@ -18,9 +18,8 @@ use value::Kind; use vector_config::configurable_component; use vector_core::config::LogNamespace; -use crate::config::OutputId; use crate::{ - config::{DataType, Input, ProxyConfig, TransformConfig, TransformContext, TransformOutput}, + config::{DataType, Input, Output, ProxyConfig, TransformConfig, TransformContext}, event::Event, http::HttpClient, internal_events::{AwsEc2MetadataRefreshError, AwsEc2MetadataRefreshSuccessful}, @@ -244,11 +243,7 @@ impl TransformConfig for Ec2Metadata { Input::new(DataType::Metric | DataType::Log) } - fn outputs( - &self, - input_definitions: &[(OutputId, schema::Definition)], - _: LogNamespace, - ) -> Vec { + fn outputs(&self, merged_definition: &schema::Definition, _: LogNamespace) -> Vec { let added_keys = Keys::new(self.namespace.clone()); let paths = [ @@ -268,24 +263,15 @@ impl TransformConfig for Ec2Metadata { &added_keys.tags_key.log_path, ]; - let schema_definition = input_definitions - .iter() - .map(|(_output, definition)| { - let mut schema_definition = definition.clone(); + let mut schema_definition = merged_definition.clone(); - for path in paths { - schema_definition = - schema_definition.with_field(path, Kind::bytes().or_undefined(), None); - } - - schema_definition - }) - .collect(); + for path in paths { + schema_definition = + schema_definition.with_field(path, Kind::bytes().or_undefined(), None); + } - vec![TransformOutput::new( - DataType::Metric | DataType::Log, - schema_definition, - )] + vec![Output::default(DataType::Metric | DataType::Log) + .with_schema_definition(schema_definition)] } } diff --git a/src/transforms/dedupe.rs b/src/transforms/dedupe.rs index c0fe99f0204cd..9a16cb9c6d824 100644 --- a/src/transforms/dedupe.rs +++ b/src/transforms/dedupe.rs @@ -8,8 +8,7 @@ use vector_core::config::LogNamespace; use crate::{ config::{ - log_schema, DataType, GenerateConfig, Input, OutputId, TransformConfig, TransformContext, - TransformOutput, + log_schema, DataType, GenerateConfig, Input, Output, TransformConfig, TransformContext, }, event::{Event, Value}, internal_events::DedupeEventsDropped, @@ -153,18 +152,8 @@ impl TransformConfig for DedupeConfig { Input::log() } - fn outputs( - &self, - input_definitions: &[(OutputId, schema::Definition)], - _: LogNamespace, - ) -> Vec { - vec![TransformOutput::new( - DataType::Log, - input_definitions - .iter() - .map(|(_output, definition)| definition.clone()) - .collect(), - )] + fn outputs(&self, merged_definition: &schema::Definition, _: LogNamespace) -> Vec { + vec![Output::default(DataType::Log).with_schema_definition(merged_definition.clone())] } } diff --git a/src/transforms/filter.rs b/src/transforms/filter.rs index f0bed3c180bc6..212b38622f61c 100644 --- a/src/transforms/filter.rs +++ b/src/transforms/filter.rs @@ -4,10 +4,7 @@ use vector_core::config::LogNamespace; use crate::{ conditions::{AnyCondition, Condition}, - config::{ - DataType, GenerateConfig, Input, OutputId, TransformConfig, TransformContext, - TransformOutput, - }, + config::{DataType, GenerateConfig, Input, Output, TransformConfig, TransformContext}, event::Event, internal_events::FilterEventsDropped, schema, @@ -51,18 +48,8 @@ impl TransformConfig for FilterConfig { Input::all() } - fn outputs( - &self, - input_definitions: &[(OutputId, schema::Definition)], - _: LogNamespace, - ) -> Vec { - vec![TransformOutput::new( - DataType::all(), - input_definitions - .iter() - .map(|(_output, definition)| definition.clone()) - .collect(), - )] + fn outputs(&self, merged_definition: &schema::Definition, _: LogNamespace) -> Vec { + vec![Output::default(DataType::all()).with_schema_definition(merged_definition.clone())] } fn enable_concurrency(&self) -> bool { diff --git a/src/transforms/log_to_metric.rs b/src/transforms/log_to_metric.rs index 86e2eb9eb30e6..40f4fb6ba44c0 100644 --- a/src/transforms/log_to_metric.rs +++ b/src/transforms/log_to_metric.rs @@ -6,10 +6,7 @@ use vector_config::configurable_component; use vector_core::config::LogNamespace; use crate::{ - config::{ - DataType, GenerateConfig, Input, OutputId, TransformConfig, TransformContext, - TransformOutput, - }, + config::{DataType, GenerateConfig, Input, Output, TransformConfig, TransformContext}, event::{ metric::{Metric, MetricKind, MetricTags, MetricValue, StatisticKind, TagValue}, Event, Value, @@ -157,13 +154,8 @@ impl TransformConfig for LogToMetricConfig { Input::log() } - fn outputs( - &self, - _: &[(OutputId, schema::Definition)], - _: LogNamespace, - ) -> Vec { - // Converting the log to a metric means we lose all incoming `Definition`s. - vec![TransformOutput::new(DataType::Metric, Vec::new())] + fn outputs(&self, _: &schema::Definition, _: LogNamespace) -> Vec { + vec![Output::default(DataType::Metric)] } fn enable_concurrency(&self) -> bool { diff --git a/src/transforms/lua/mod.rs b/src/transforms/lua/mod.rs index ab43bc911b948..6953e9f7c30db 100644 --- a/src/transforms/lua/mod.rs +++ b/src/transforms/lua/mod.rs @@ -5,7 +5,7 @@ use vector_config::configurable_component; use vector_core::config::LogNamespace; use crate::{ - config::{GenerateConfig, Input, OutputId, TransformConfig, TransformContext, TransformOutput}, + config::{GenerateConfig, Input, Output, TransformConfig, TransformContext}, schema, transforms::Transform, }; @@ -103,14 +103,10 @@ impl TransformConfig for LuaConfig { } } - fn outputs( - &self, - input_definitions: &[(OutputId, schema::Definition)], - _: LogNamespace, - ) -> Vec { + fn outputs(&self, merged_definition: &schema::Definition, _: LogNamespace) -> Vec { match self { - LuaConfig::V1(v1) => v1.config.outputs(input_definitions), - LuaConfig::V2(v2) => v2.config.outputs(input_definitions), + LuaConfig::V1(v1) => v1.config.outputs(merged_definition), + LuaConfig::V2(v2) => v2.config.outputs(merged_definition), } } } diff --git a/src/transforms/lua/v1/mod.rs b/src/transforms/lua/v1/mod.rs index efab62a686dcd..73a165970c44e 100644 --- a/src/transforms/lua/v1/mod.rs +++ b/src/transforms/lua/v1/mod.rs @@ -5,10 +5,9 @@ use ordered_float::NotNan; use snafu::{ResultExt, Snafu}; use vector_config::configurable_component; -use crate::config::OutputId; use crate::schema::Definition; use crate::{ - config::{DataType, Input, TransformOutput}, + config::{DataType, Input, Output}, event::{Event, Value}, internal_events::{LuaGcTriggered, LuaScriptError}, schema, @@ -48,19 +47,11 @@ impl LuaConfig { Input::log() } - pub fn outputs( - &self, - input_definitions: &[(OutputId, schema::Definition)], - ) -> Vec { + pub fn outputs(&self, merged_definition: &schema::Definition) -> Vec { // Lua causes the type definition to be reset - let namespaces = input_definitions - .iter() - .flat_map(|(_output, definition)| definition.log_namespaces().clone()) - .collect(); - - let definition = Definition::default_for_namespace(&namespaces); + let definition = Definition::default_for_namespace(merged_definition.log_namespaces()); - vec![TransformOutput::new(DataType::Log, vec![definition])] + vec![Output::default(DataType::Log).with_schema_definition(definition)] } } diff --git a/src/transforms/lua/v2/mod.rs b/src/transforms/lua/v2/mod.rs index 88bf5fd2086f4..13f9d76e503ac 100644 --- a/src/transforms/lua/v2/mod.rs +++ b/src/transforms/lua/v2/mod.rs @@ -7,11 +7,10 @@ use vector_config::configurable_component; pub use vector_core::event::lua; use vector_core::transform::runtime_transform::{RuntimeTransform, Timer}; -use crate::config::OutputId; use crate::event::lua::event::LuaEvent; use crate::schema::Definition; use crate::{ - config::{self, DataType, Input, TransformOutput, CONFIG_PATHS}, + config::{self, DataType, Input, Output, CONFIG_PATHS}, event::Event, internal_events::{LuaBuildError, LuaGcTriggered}, schema, @@ -178,22 +177,11 @@ impl LuaConfig { Input::new(DataType::Metric | DataType::Log) } - pub fn outputs( - &self, - input_definitions: &[(OutputId, schema::Definition)], - ) -> Vec { + pub fn outputs(&self, merged_definition: &schema::Definition) -> Vec { // Lua causes the type definition to be reset - let namespaces = input_definitions - .iter() - .flat_map(|(_output, definition)| definition.log_namespaces().clone()) - .collect(); - - let definition = Definition::default_for_namespace(&namespaces); + let definition = Definition::default_for_namespace(merged_definition.log_namespaces()); - vec![TransformOutput::new( - DataType::Metric | DataType::Log, - vec![definition], - )] + vec![Output::default(DataType::Metric | DataType::Log).with_schema_definition(definition)] } } diff --git a/src/transforms/metric_to_log.rs b/src/transforms/metric_to_log.rs index 155f32ae48c7b..f3d7e8a0e95e0 100644 --- a/src/transforms/metric_to_log.rs +++ b/src/transforms/metric_to_log.rs @@ -11,11 +11,9 @@ use vector_config::configurable_component; use vector_core::config::LogNamespace; use vrl::prelude::BTreeMap; -use crate::config::OutputId; use crate::{ config::{ - log_schema, DataType, GenerateConfig, Input, TransformConfig, TransformContext, - TransformOutput, + log_schema, DataType, GenerateConfig, Input, Output, TransformConfig, TransformContext, }, event::{self, Event, LogEvent, Metric}, internal_events::MetricToLogSerializeError, @@ -92,11 +90,7 @@ impl TransformConfig for MetricToLogConfig { Input::metric() } - fn outputs( - &self, - _: &[(OutputId, Definition)], - global_log_namespace: LogNamespace, - ) -> Vec { + fn outputs(&self, _: &Definition, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let mut schema_definition = Definition::default_for_namespace(&BTreeSet::from([log_namespace])) @@ -229,7 +223,7 @@ impl TransformConfig for MetricToLogConfig { } } - vec![TransformOutput::new(DataType::Log, vec![schema_definition])] + vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] } fn enable_concurrency(&self) -> bool { diff --git a/src/transforms/reduce/mod.rs b/src/transforms/reduce/mod.rs index 618f44fcbb1f2..ce3f41afe6f83 100644 --- a/src/transforms/reduce/mod.rs +++ b/src/transforms/reduce/mod.rs @@ -14,10 +14,9 @@ use lookup::PathPrefix; use serde_with::serde_as; use vector_config::configurable_component; -use crate::config::OutputId; use crate::{ conditions::{AnyCondition, Condition}, - config::{DataType, Input, TransformConfig, TransformContext, TransformOutput}, + config::{DataType, Input, Output, TransformConfig, TransformContext}, event::{discriminant::Discriminant, Event, EventMetadata, LogEvent}, internal_events::ReduceStaleEventFlushed, schema, @@ -125,102 +124,91 @@ impl TransformConfig for ReduceConfig { Input::log() } - fn outputs( - &self, - input_definitions: &[(OutputId, schema::Definition)], - _: LogNamespace, - ) -> Vec { - let mut output_definitions = Vec::new(); - - for (_output, input) in input_definitions { - let mut schema_definition = input.clone(); - - for (key, merge_strategy) in self.merge_strategies.iter() { - let key = if let Ok(key) = parse_target_path(key) { - key - } else { - continue; - }; - - let input_kind = match key.prefix { - PathPrefix::Event => schema_definition.event_kind().at_path(&key.path), - PathPrefix::Metadata => schema_definition.metadata_kind().at_path(&key.path), - }; - - let new_kind = match merge_strategy { - MergeStrategy::Discard | MergeStrategy::Retain => { - /* does not change the type */ - input_kind.clone() + fn outputs(&self, input: &schema::Definition, _: LogNamespace) -> Vec { + let mut schema_definition = input.clone(); + + for (key, merge_strategy) in self.merge_strategies.iter() { + let key = if let Ok(key) = parse_target_path(key) { + key + } else { + continue; + }; + + let input_kind = match key.prefix { + PathPrefix::Event => schema_definition.event_kind().at_path(&key.path), + PathPrefix::Metadata => schema_definition.metadata_kind().at_path(&key.path), + }; + + let new_kind = match merge_strategy { + MergeStrategy::Discard | MergeStrategy::Retain => { + /* does not change the type */ + input_kind.clone() + } + MergeStrategy::Sum | MergeStrategy::Max | MergeStrategy::Min => { + // only keeps integer / float values + match (input_kind.contains_integer(), input_kind.contains_float()) { + (true, true) => Kind::float().or_integer(), + (true, false) => Kind::integer(), + (false, true) => Kind::float(), + (false, false) => Kind::undefined(), } - MergeStrategy::Sum | MergeStrategy::Max | MergeStrategy::Min => { - // only keeps integer / float values - match (input_kind.contains_integer(), input_kind.contains_float()) { - (true, true) => Kind::float().or_integer(), - (true, false) => Kind::integer(), - (false, true) => Kind::float(), - (false, false) => Kind::undefined(), - } + } + MergeStrategy::Array => { + let unknown_kind = input_kind.clone(); + Kind::array(Collection::empty().with_unknown(unknown_kind)) + } + MergeStrategy::Concat => { + let mut new_kind = Kind::never(); + + if input_kind.contains_bytes() { + new_kind.add_bytes(); } - MergeStrategy::Array => { - let unknown_kind = input_kind.clone(); - Kind::array(Collection::empty().with_unknown(unknown_kind)) + if let Some(array) = input_kind.as_array() { + // array elements can be either any type that the field can be, or any + // element of the array + let array_elements = array.reduced_kind().union(input_kind.without_array()); + new_kind.add_array(Collection::empty().with_unknown(array_elements)); } - MergeStrategy::Concat => { - let mut new_kind = Kind::never(); - - if input_kind.contains_bytes() { - new_kind.add_bytes(); - } - if let Some(array) = input_kind.as_array() { - // array elements can be either any type that the field can be, or any - // element of the array - let array_elements = - array.reduced_kind().union(input_kind.without_array()); - new_kind.add_array(Collection::empty().with_unknown(array_elements)); - } - new_kind + new_kind + } + MergeStrategy::ConcatNewline | MergeStrategy::ConcatRaw => { + // can only produce bytes (or undefined) + if input_kind.contains_bytes() { + Kind::bytes() + } else { + Kind::undefined() } - MergeStrategy::ConcatNewline | MergeStrategy::ConcatRaw => { - // can only produce bytes (or undefined) - if input_kind.contains_bytes() { - Kind::bytes() - } else { - Kind::undefined() - } + } + MergeStrategy::ShortestArray | MergeStrategy::LongestArray => { + if let Some(array) = input_kind.as_array() { + Kind::array(array.clone()) + } else { + Kind::undefined() } - MergeStrategy::ShortestArray | MergeStrategy::LongestArray => { - if let Some(array) = input_kind.as_array() { - Kind::array(array.clone()) - } else { - Kind::undefined() - } + } + MergeStrategy::FlatUnique => { + let mut array_elements = input_kind.without_array().without_object(); + if let Some(array) = input_kind.as_array() { + array_elements = array_elements.union(array.reduced_kind()); } - MergeStrategy::FlatUnique => { - let mut array_elements = input_kind.without_array().without_object(); - if let Some(array) = input_kind.as_array() { - array_elements = array_elements.union(array.reduced_kind()); - } - if let Some(object) = input_kind.as_object() { - array_elements = array_elements.union(object.reduced_kind()); - } - Kind::array(Collection::empty().with_unknown(array_elements)) + if let Some(object) = input_kind.as_object() { + array_elements = array_elements.union(object.reduced_kind()); } - }; - - // all of the merge strategies are optional. They won't produce a value unless a value actually exists - let new_kind = if input_kind.contains_undefined() { - new_kind.or_undefined() - } else { - new_kind - }; + Kind::array(Collection::empty().with_unknown(array_elements)) + } + }; - schema_definition = schema_definition.with_field(&key, new_kind, None); - } + // all of the merge strategies are optional. They won't produce a value unless a value actually exists + let new_kind = if input_kind.contains_undefined() { + new_kind.or_undefined() + } else { + new_kind + }; - output_definitions.push(schema_definition); + schema_definition = schema_definition.with_field(&key, new_kind, None); } - vec![TransformOutput::new(DataType::Log, output_definitions)] + vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] } } @@ -514,12 +502,13 @@ group_by = [ "request_id" ] Kind::bytes().or_undefined(), None, ); - let schema_definitions = reduce_config - .outputs(&[("test".into(), input_definition)], LogNamespace::Legacy) + let schema_definition = reduce_config + .outputs(&input_definition, LogNamespace::Legacy) .first() .unwrap() - .log_schema_definitions - .clone(); + .log_schema_definition + .clone() + .unwrap(); let (tx, rx) = mpsc::channel(1); let (topology, mut out) = create_topology(ReceiverStream::new(rx), reduce_config).await; @@ -557,18 +546,14 @@ group_by = [ "request_id" ] assert_eq!(output_1["message"], "test message 1".into()); assert_eq!(output_1["counter"], Value::from(8)); assert_eq!(output_1.metadata(), &metadata_1); - schema_definitions - .iter() - .for_each(|definition| definition.assert_valid_for_event(&output_1.clone().into())); + schema_definition.assert_valid_for_event(&output_1.into()); let output_2 = out.recv().await.unwrap().into_log(); assert_eq!(output_2["message"], "test message 2".into()); assert_eq!(output_2["extra_field"], "value1".into()); assert_eq!(output_2["counter"], Value::from(7)); assert_eq!(output_2.metadata(), &metadata_2); - schema_definitions - .iter() - .for_each(|definition| definition.assert_valid_for_event(&output_2.clone().into())); + schema_definition.assert_valid_for_event(&output_2.into()); drop(tx); topology.stop().await; diff --git a/src/transforms/remap.rs b/src/transforms/remap.rs index b1b35823175dd..1083634beeb6b 100644 --- a/src/transforms/remap.rs +++ b/src/transforms/remap.rs @@ -24,11 +24,9 @@ use vrl::{ CompileConfig, Program, Runtime, Terminate, VrlRuntime, }; -use crate::config::OutputId; use crate::{ config::{ - log_schema, ComponentKey, DataType, Input, TransformConfig, TransformContext, - TransformOutput, + log_schema, ComponentKey, DataType, Input, Output, TransformConfig, TransformContext, }, event::{Event, TargetEvents, VrlTarget}, internal_events::{RemapMappingAbort, RemapMappingError}, @@ -223,131 +221,92 @@ impl TransformConfig for RemapConfig { Input::all() } - fn outputs( - &self, - input_definitions: &[(OutputId, schema::Definition)], - _: LogNamespace, - ) -> Vec { - let merged_definition: Definition = input_definitions - .iter() - .map(|(_output, definition)| definition.clone()) - .reduce(Definition::merge) - .unwrap_or_else(Definition::any); - + fn outputs(&self, input_definition: &schema::Definition, _: LogNamespace) -> Vec { // We need to compile the VRL program in order to know the schema definition output of this // transform. We ignore any compilation errors, as those are caught by the transform build // step. - let compiled = self - .compile_vrl_program(enrichment::TableRegistry::default(), merged_definition) + let default_definition = self + .compile_vrl_program( + enrichment::TableRegistry::default(), + input_definition.clone(), + ) .map(|(program, _, _, external_context)| { - ( - program.final_type_state(), - external_context - .get_custom::() - .cloned() - .expect("context exists") - .0, - ) + // Apply any semantic meanings set in the VRL program + let meaning = external_context + .get_custom::() + .cloned() + .expect("context exists") + .0; + + let state = program.final_type_state(); + + let mut new_type_def = Definition::new_with_default_metadata( + state.external.target_kind().clone(), + input_definition.log_namespaces().clone(), + ); + for (id, path) in meaning { + // currently only event paths are supported + new_type_def = new_type_def.with_meaning(OwnedTargetPath::event(path), &id); + } + new_type_def }) - .map_err(|_| ()); - - let mut dropped_definitions = Vec::new(); - let mut default_definitions = Vec::new(); - - for (_output_id, input_definition) in input_definitions { - let default_definition = compiled - .clone() - .map(|(state, meaning)| { - let mut new_type_def = Definition::new( - state.external.target_kind().clone(), - state.external.metadata_kind().clone(), - input_definition.log_namespaces().clone(), - ); - - for (id, path) in input_definition.meanings() { - // Attempt to copy over the meanings from the input definition. - // The function will fail if the meaning that now points to a field that no longer exists, - // this is fine since we will no longer want that meaning in the output definition. - let _ = new_type_def.try_with_meaning(path.clone(), id); - } - - // Apply any semantic meanings set in the VRL program - for (id, path) in meaning { - // currently only event paths are supported - new_type_def = new_type_def.with_meaning(OwnedTargetPath::event(path), &id); - } - new_type_def - }) - .unwrap_or_else(|_| { - Definition::new_with_default_metadata( - // The program failed to compile, so it can "never" return a value - Kind::never(), - input_definition.log_namespaces().clone(), - ) - }); - - // When a message is dropped and re-routed, we keep the original event, but also annotate - // it with additional metadata. - let mut dropped_definition = Definition::new_with_default_metadata( - Kind::never(), - input_definition.log_namespaces().clone(), - ); + .unwrap_or_else(|_| { + Definition::new_with_default_metadata( + // The program failed to compile, so it can "never" return a value + Kind::never(), + input_definition.log_namespaces().clone(), + ) + }); - if input_definition - .log_namespaces() - .contains(&LogNamespace::Legacy) - { - dropped_definition = - dropped_definition.merge(input_definition.clone().with_event_field( - &parse_value_path(log_schema().metadata_key()).expect("valid metadata key"), - Kind::object(BTreeMap::from([ - ("reason".into(), Kind::bytes()), - ("message".into(), Kind::bytes()), - ("component_id".into(), Kind::bytes()), - ("component_type".into(), Kind::bytes()), - ("component_kind".into(), Kind::bytes()), - ])), - Some("metadata"), - )); - } + // When a message is dropped and re-routed, we keep the original event, but also annotate + // it with additional metadata. + let mut dropped_definition = Definition::new_with_default_metadata( + Kind::never(), + input_definition.log_namespaces().clone(), + ); - if input_definition - .log_namespaces() - .contains(&LogNamespace::Vector) - { - dropped_definition = dropped_definition.merge( - input_definition - .clone() - .with_metadata_field(&owned_value_path!("reason"), Kind::bytes(), None) - .with_metadata_field(&owned_value_path!("message"), Kind::bytes(), None) - .with_metadata_field( - &owned_value_path!("component_id"), - Kind::bytes(), - None, - ) - .with_metadata_field( - &owned_value_path!("component_type"), - Kind::bytes(), - None, - ) - .with_metadata_field( - &owned_value_path!("component_kind"), - Kind::bytes(), - None, - ), - ); - } + if input_definition + .log_namespaces() + .contains(&LogNamespace::Legacy) + { + dropped_definition = + dropped_definition.merge(input_definition.clone().with_event_field( + &parse_value_path(log_schema().metadata_key()).expect("valid metadata key"), + Kind::object(BTreeMap::from([ + ("reason".into(), Kind::bytes()), + ("message".into(), Kind::bytes()), + ("component_id".into(), Kind::bytes()), + ("component_type".into(), Kind::bytes()), + ("component_kind".into(), Kind::bytes()), + ])), + Some("metadata"), + )); + } - default_definitions.push(default_definition); - dropped_definitions.push(dropped_definition); + if input_definition + .log_namespaces() + .contains(&LogNamespace::Vector) + { + dropped_definition = dropped_definition.merge( + input_definition + .clone() + .with_metadata_field(&owned_value_path!("reason"), Kind::bytes(), None) + .with_metadata_field(&owned_value_path!("message"), Kind::bytes(), None) + .with_metadata_field(&owned_value_path!("component_id"), Kind::bytes(), None) + .with_metadata_field(&owned_value_path!("component_type"), Kind::bytes(), None) + .with_metadata_field(&owned_value_path!("component_kind"), Kind::bytes(), None), + ); } - let default_output = TransformOutput::new(DataType::all(), default_definitions); + let default_output = + Output::default(DataType::all()).with_schema_definition(default_definition); if self.reroute_dropped { vec![ default_output, - TransformOutput::new(DataType::all(), dropped_definitions).with_port(DROPPED), + Output::default(DataType::all()) + .with_schema_definition(dropped_definition) + .with_port(DROPPED), ] } else { vec![default_output] @@ -442,21 +401,14 @@ where .schema_definitions .get(&None) .expect("default schema required") - // TODO we can now have multiple possible definitions. - // This is going to need to be updated to store these possible definitions and then - // choose the correct one based on the input the event has come from. - .get(0) - .cloned() - .unwrap_or_else(Definition::any); + .clone(); let dropped_schema_definition = context .schema_definitions .get(&Some(DROPPED.to_owned())) .or_else(|| context.schema_definitions.get(&None)) .expect("dropped schema required") - .get(0) - .cloned() - .unwrap_or_else(Definition::any); + .clone(); Ok(Remap { component_key: context.key.clone(), @@ -702,11 +654,8 @@ mod tests { fn remap(config: RemapConfig) -> Result> { let schema_definitions = HashMap::from([ - (None, vec![test_default_schema_definition()]), - ( - Some(DROPPED.to_owned()), - vec![test_dropped_schema_definition()], - ), + (None, test_default_schema_definition()), + (Some(DROPPED.to_owned()), test_dropped_schema_definition()), ]); Remap::new_ast(config, &TransformContext::new_test(schema_definitions)) @@ -1176,11 +1125,8 @@ mod tests { ..Default::default() }; let schema_definitions = HashMap::from([ - (None, vec![test_default_schema_definition()]), - ( - Some(DROPPED.to_owned()), - vec![test_dropped_schema_definition()], - ), + (None, test_default_schema_definition()), + (Some(DROPPED.to_owned()), test_dropped_schema_definition()), ]); let context = TransformContext { key: Some(ComponentKey::from("remapper")), @@ -1437,19 +1383,13 @@ mod tests { assert_eq!( conf.outputs( - &[( - "test".into(), - schema::Definition::new_with_default_metadata( - Kind::any_object(), - [LogNamespace::Legacy] - ) - )], + &schema::Definition::new_with_default_metadata( + Kind::any_object(), + [LogNamespace::Legacy] + ), LogNamespace::Legacy ), - vec![TransformOutput::new( - DataType::all(), - vec![schema_definition] - )] + vec![Output::default(DataType::all()).with_schema_definition(schema_definition)] ); let context = TransformContext { @@ -1514,8 +1454,8 @@ mod tests { fn collect_outputs(ft: &mut dyn SyncTransform, event: Event) -> CollectedOuput { let mut outputs = TransformOutputsBuf::new_with_capacity( vec![ - TransformOutput::new(DataType::all(), vec![]), - TransformOutput::new(DataType::all(), vec![]).with_port(DROPPED), + Output::default(DataType::all()), + Output::default(DataType::all()).with_port(DROPPED), ], 1, ); @@ -1541,8 +1481,8 @@ mod tests { ) -> std::result::Result { let mut outputs = TransformOutputsBuf::new_with_capacity( vec![ - TransformOutput::new(DataType::all(), vec![]), - TransformOutput::new(DataType::all(), vec![]).with_port(DROPPED), + Output::default(DataType::all()), + Output::default(DataType::all()).with_port(DROPPED), ], 1, ); diff --git a/src/transforms/route.rs b/src/transforms/route.rs index 1a456daff6b3d..b776264f41e2b 100644 --- a/src/transforms/route.rs +++ b/src/transforms/route.rs @@ -5,10 +5,7 @@ use vector_core::transform::SyncTransform; use crate::{ conditions::{AnyCondition, Condition}, - config::{ - DataType, GenerateConfig, Input, OutputId, TransformConfig, TransformContext, - TransformOutput, - }, + config::{DataType, GenerateConfig, Input, Output, TransformConfig, TransformContext}, event::Event, schema, transforms::Transform, @@ -104,34 +101,20 @@ impl TransformConfig for RouteConfig { } } - fn outputs( - &self, - input_definitions: &[(OutputId, schema::Definition)], - _: LogNamespace, - ) -> Vec { - let mut result: Vec = self + fn outputs(&self, merged_definition: &schema::Definition, _: LogNamespace) -> Vec { + let mut result: Vec = self .route .keys() .map(|output_name| { - TransformOutput::new( - DataType::all(), - input_definitions - .iter() - .map(|(_output, definition)| definition.clone()) - .collect(), - ) - .with_port(output_name) + Output::default(DataType::all()) + .with_schema_definition(merged_definition.clone()) + .with_port(output_name) }) .collect(); result.push( - TransformOutput::new( - DataType::all(), - input_definitions - .iter() - .map(|(_output, definition)| definition.clone()) - .collect(), - ) - .with_port(UNMATCHED_ROUTE), + Output::default(DataType::all()) + .with_schema_definition(merged_definition.clone()) + .with_port(UNMATCHED_ROUTE), ); result } @@ -201,7 +184,7 @@ mod test { output_names .iter() .map(|output_name| { - TransformOutput::new(DataType::all(), vec![]).with_port(output_name.to_owned()) + Output::default(DataType::all()).with_port(output_name.to_owned()) }) .collect(), 1, @@ -242,7 +225,7 @@ mod test { output_names .iter() .map(|output_name| { - TransformOutput::new(DataType::all(), vec![]).with_port(output_name.to_owned()) + Output::default(DataType::all()).with_port(output_name.to_owned()) }) .collect(), 1, @@ -282,7 +265,7 @@ mod test { output_names .iter() .map(|output_name| { - TransformOutput::new(DataType::all(), vec![]).with_port(output_name.to_owned()) + Output::default(DataType::all()).with_port(output_name.to_owned()) }) .collect(), 1, diff --git a/src/transforms/sample.rs b/src/transforms/sample.rs index 40e16e2e653d9..45e7ad5a4c5d0 100644 --- a/src/transforms/sample.rs +++ b/src/transforms/sample.rs @@ -3,10 +3,7 @@ use vector_core::config::LogNamespace; use crate::{ conditions::{AnyCondition, Condition}, - config::{ - DataType, GenerateConfig, Input, OutputId, TransformConfig, TransformContext, - TransformOutput, - }, + config::{DataType, GenerateConfig, Input, Output, TransformConfig, TransformContext}, event::Event, internal_events::SampleEventDiscarded, schema, @@ -69,18 +66,9 @@ impl TransformConfig for SampleConfig { Input::new(DataType::Log | DataType::Trace) } - fn outputs( - &self, - input_definitions: &[(OutputId, schema::Definition)], - _: LogNamespace, - ) -> Vec { - vec![TransformOutput::new( - DataType::Log | DataType::Trace, - input_definitions - .iter() - .map(|(_output, definition)| definition.clone()) - .collect(), - )] + fn outputs(&self, merged_definition: &schema::Definition, _: LogNamespace) -> Vec { + vec![Output::default(DataType::Log | DataType::Trace) + .with_schema_definition(merged_definition.clone())] } } diff --git a/src/transforms/tag_cardinality_limit/config.rs b/src/transforms/tag_cardinality_limit/config.rs index 6a83d70d98ff3..ba38fc0e1dc96 100644 --- a/src/transforms/tag_cardinality_limit/config.rs +++ b/src/transforms/tag_cardinality_limit/config.rs @@ -1,6 +1,4 @@ -use crate::config::{ - DataType, GenerateConfig, Input, OutputId, TransformConfig, TransformContext, TransformOutput, -}; +use crate::config::{DataType, GenerateConfig, Input, Output, TransformConfig, TransformContext}; use crate::schema; use crate::transforms::tag_cardinality_limit::TagCardinalityLimit; use crate::transforms::Transform; @@ -110,11 +108,7 @@ impl TransformConfig for TagCardinalityLimitConfig { Input::metric() } - fn outputs( - &self, - _: &[(OutputId, schema::Definition)], - _: LogNamespace, - ) -> Vec { - vec![TransformOutput::new(DataType::Metric, vec![])] + fn outputs(&self, _: &schema::Definition, _: LogNamespace) -> Vec { + vec![Output::default(DataType::Metric)] } } diff --git a/src/transforms/throttle.rs b/src/transforms/throttle.rs index bdc61383db18b..5d27672ef9ea2 100644 --- a/src/transforms/throttle.rs +++ b/src/transforms/throttle.rs @@ -10,7 +10,7 @@ use vector_core::config::LogNamespace; use crate::{ conditions::{AnyCondition, Condition}, - config::{DataType, Input, OutputId, TransformConfig, TransformContext, TransformOutput}, + config::{DataType, Input, Output, TransformConfig, TransformContext}, event::Event, internal_events::{TemplateRenderingError, ThrottleEventDiscarded}, schema, @@ -59,19 +59,9 @@ impl TransformConfig for ThrottleConfig { Input::log() } - fn outputs( - &self, - input_definitions: &[(OutputId, schema::Definition)], - _: LogNamespace, - ) -> Vec { + fn outputs(&self, merged_definition: &schema::Definition, _: LogNamespace) -> Vec { // The event is not modified, so the definition is passed through as-is - vec![TransformOutput::new( - DataType::Log, - input_definitions - .iter() - .map(|(_output, definition)| definition.clone()) - .collect(), - )] + vec![Output::default(DataType::Log).with_schema_definition(merged_definition.clone())] } }