From 960635387235ea270d748038a3a0ddd615813f29 Mon Sep 17 00:00:00 2001 From: Bruce Guenter Date: Thu, 15 Jun 2023 14:00:58 -0600 Subject: [PATCH] chore(config): Make config schema output ordered (#17694) Key/value maps in the config schema currently use an `IndexMap`. This provides for stable ordering of the output between runs. Unfortunately, due to the use of initializers via `inventory`, that ordering is dependent on the order in which those initializers are run, which is in turn dependent on the choices the linker (and link-time optimization) makes when building Vector. The result is that config schemas can be very hard to compare between builds due to large blocks of it moving around in the output file. This change changes this type to a `BTreeMap`, which is fully ordered internally. This will result in a consistent ordering of maps, hopefully producing config schema output that is more consistent. --- Cargo.lock | 1 - lib/vector-config-common/Cargo.toml | 1 - lib/vector-config-common/src/schema/mod.rs | 4 +++- .../src/schema/visitors/inline_single.rs | 12 ++---------- lib/vector-config/src/schema/visitors/merge.rs | 4 ++-- lib/vector-config/src/schema/visitors/unevaluated.rs | 4 ++-- 6 files changed, 9 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aa907ba7203e4..8a3b47031e3b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9408,7 +9408,6 @@ version = "0.1.0" dependencies = [ "convert_case 0.6.0", "darling 0.13.4", - "indexmap", "once_cell", "proc-macro2 1.0.60", "quote 1.0.28", diff --git a/lib/vector-config-common/Cargo.toml b/lib/vector-config-common/Cargo.toml index 9e29eb538f98a..0d4ddaed219a0 100644 --- a/lib/vector-config-common/Cargo.toml +++ b/lib/vector-config-common/Cargo.toml @@ -7,7 +7,6 @@ license = "MPL-2.0" [dependencies] convert_case = { version = "0.6", default-features = false } darling = { version = "0.13", default-features = false, features = ["suggestions"] } -indexmap = { version = "1.9", default-features = false, features = ["serde"] } once_cell = { version = "1", default-features = false, features = ["std"] } proc-macro2 = { version = "1.0", default-features = false } serde = { version = "1.0", default-features = false, features = ["derive"] } diff --git a/lib/vector-config-common/src/schema/mod.rs b/lib/vector-config-common/src/schema/mod.rs index 2f4c6876de35e..763cdd217e8fc 100644 --- a/lib/vector-config-common/src/schema/mod.rs +++ b/lib/vector-config-common/src/schema/mod.rs @@ -8,7 +8,9 @@ pub mod visit; pub(crate) const DEFINITIONS_PREFIX: &str = "#/definitions/"; -pub type Map = indexmap::IndexMap; +// We have chosen the `BTree*` types here instead of hash tables to provide for a consistent +// ordering of the output elements between runs and changes to the configuration. +pub type Map = std::collections::BTreeMap; pub type Set = std::collections::BTreeSet; pub use self::gen::{SchemaGenerator, SchemaSettings}; diff --git a/lib/vector-config/src/schema/visitors/inline_single.rs b/lib/vector-config/src/schema/visitors/inline_single.rs index 68347e36937dd..20035118b4470 100644 --- a/lib/vector-config/src/schema/visitors/inline_single.rs +++ b/lib/vector-config/src/schema/visitors/inline_single.rs @@ -43,17 +43,11 @@ impl Visitor for InlineSingleUseReferencesVisitor { occurrence_visitor.visit_root_schema(root); let occurrence_map = occurrence_visitor.into_occurrences(); - let eligible_to_inline = occurrence_map + self.eligible_to_inline = occurrence_map .into_iter() // Filter out any schemas which have more than one occurrence, as naturally, we're // trying to inline single-use schema references. :) - .filter_map(|(def_name, occurrences)| { - if occurrences == 1 { - Some(def_name) - } else { - None - } - }) + .filter_map(|(def_name, occurrences)| (occurrences == 1).then_some(def_name)) // However, we'll also filter out some specific schema definitions which are only // referenced once, specifically: component base types and component types themselves. // @@ -72,8 +66,6 @@ impl Visitor for InlineSingleUseReferencesVisitor { .map(|s| s.as_ref().to_string()) .collect::>(); - self.eligible_to_inline = eligible_to_inline; - // Now run our own visitor logic, which will use the inline eligibility to determine if a // schema reference in a being-visited schema should be replaced inline with the original // referenced schema, in turn removing the schema definition. diff --git a/lib/vector-config/src/schema/visitors/merge.rs b/lib/vector-config/src/schema/visitors/merge.rs index 741b74157e137..a05478caef168 100644 --- a/lib/vector-config/src/schema/visitors/merge.rs +++ b/lib/vector-config/src/schema/visitors/merge.rs @@ -95,7 +95,7 @@ impl Mergeable for serde_json::Map { impl Mergeable for Map where - K: std::hash::Hash + Eq + Clone, + K: Clone + Eq + Ord, V: Clone + Mergeable, { fn merge(&mut self, other: &Self) { @@ -261,7 +261,7 @@ where fn merge_map(destination: &mut Map, source: &Map) where - K: std::hash::Hash + Eq + Clone, + K: Clone + Eq + Ord, V: Clone + Mergeable, { destination.merge(source); diff --git a/lib/vector-config/src/schema/visitors/unevaluated.rs b/lib/vector-config/src/schema/visitors/unevaluated.rs index 1619465cde24a..105c3dbfcaac4 100644 --- a/lib/vector-config/src/schema/visitors/unevaluated.rs +++ b/lib/vector-config/src/schema/visitors/unevaluated.rs @@ -361,9 +361,9 @@ fn is_markable_schema(definitions: &Map, schema: &SchemaObject) .as_ref() .and_then(|reference| { let reference = get_cleaned_schema_reference(reference); - definitions.get_full(reference) + definitions.get_key_value(reference) }) - .and_then(|(_, name, schema)| schema.as_object().map(|schema| (name, schema))) + .and_then(|(name, schema)| schema.as_object().map(|schema| (name, schema))) .map_or(false, |(name, schema)| { debug!( "Following schema reference '{}' for subschema markability.",