From d334ee1b24f5b950be403b4bcfdf0ee79c4af684 Mon Sep 17 00:00:00 2001
From: Toby Lawrence <toby@nuclearfurnace.com>
Date: Fri, 31 Mar 2023 16:45:04 -0400
Subject: [PATCH 1/5] chore(config): improve config schema output with more
 precise unevaluatedProperties + ref flattening

---
 Cargo.lock                                    |   2 +
 lib/vector-config-common/Cargo.toml           |   1 +
 .../src/schema/json_schema.rs                 |  59 +-
 lib/vector-config-common/src/schema/visit.rs  |  26 +-
 lib/vector-config/Cargo.toml                  |   1 +
 lib/vector-config/src/schema/helpers.rs       |   6 +-
 .../src/schema/visitors/inline_single.rs      | 357 +++++++++++
 .../src/schema/visitors/merge.rs              | 291 +++++++++
 lib/vector-config/src/schema/visitors/mod.rs  |   8 +
 .../src/schema/visitors/scoped_visit.rs       | 180 ++++++
 lib/vector-config/src/schema/visitors/test.rs |  15 +
 .../src/schema/visitors/unevaluated.rs        | 602 ++++++++++++++++--
 scripts/generate-component-docs.rb            |  13 +-
 .../sinks/base/gcp_stackdriver_logs.cue       |   8 +-
 .../sinks/base/gcp_stackdriver_metrics.cue    |   8 +-
 .../reference/components/sinks/base/http.cue  |   8 +-
 16 files changed, 1518 insertions(+), 67 deletions(-)
 create mode 100644 lib/vector-config/src/schema/visitors/inline_single.rs
 create mode 100644 lib/vector-config/src/schema/visitors/merge.rs
 create mode 100644 lib/vector-config/src/schema/visitors/scoped_visit.rs
 create mode 100644 lib/vector-config/src/schema/visitors/test.rs
diff --git a/Cargo.lock b/Cargo.lock
index 74aa0649f0ef8..feff9355c6a5b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -9675,6 +9675,7 @@ dependencies = [
  "serde_with 2.3.1",
  "snafu",
  "toml 0.7.3",
+ "tracing 0.1.37",
  "url",
  "vector-config-common",
  "vector-config-macros",
@@ -9693,6 +9694,7 @@ dependencies = [
  "serde",
  "serde_json",
  "syn 1.0.109",
+ "tracing 0.1.37",
 ]
 
 [[package]]
diff --git a/lib/vector-config-common/Cargo.toml b/lib/vector-config-common/Cargo.toml
index fc9025f49af8d..d8eac2cfd0485 100644
--- a/lib/vector-config-common/Cargo.toml
+++ b/lib/vector-config-common/Cargo.toml
@@ -11,4 +11,5 @@ proc-macro2 = { version = "1.0", default-features = false }
 serde = { version = "1.0", default-features = false, features = ["derive"] }
 serde_json = { version = "1.0", default-features = false, features = ["std"] }
 syn = { version = "1.0", features = ["full", "extra-traits", "visit-mut", "visit"] }
+tracing = { version = "0.1.34", default-features = false }
 quote = { version = "1.0", default-features = false }
diff --git a/lib/vector-config-common/src/schema/json_schema.rs b/lib/vector-config-common/src/schema/json_schema.rs
index d1ef4a0078ee2..e2b32c31efd18 100644
--- a/lib/vector-config-common/src/schema/json_schema.rs
+++ b/lib/vector-config-common/src/schema/json_schema.rs
@@ -1,8 +1,8 @@
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
-use std::ops::Deref;
+use std::{iter, ops::Deref};
 
-use super::{Map, Set};
+use super::{Map, Set, DEFINITIONS_PREFIX};
 
 /// A JSON Schema.
 #[allow(clippy::large_enum_variant)]
@@ -40,6 +40,26 @@ impl Schema {
         }
     }
 
+    /// Gets a reference to the inner schema object if this schema is a JSON Schema object.
+    ///
+    /// Otherwise, `None` is returned.
+    pub fn as_object(&self) -> Option<&SchemaObject> {
+        match self {
+            Schema::Object(schema) => Some(schema),
+            _ => None,
+        }
+    }
+
+    /// Gets a mutable reference to the inner schema object if this schema is a JSON Schema object.
+    ///
+    /// Otherwise, `None` is returned.
+    pub fn as_object_mut(&mut self) -> Option<&mut SchemaObject> {
+        match self {
+            Schema::Object(schema) => Some(schema),
+            _ => None,
+        }
+    }
+
     /// Converts the given schema (if it is a boolean schema) into an equivalent schema object.
     ///
     /// If the given schema is already a schema object, this has no effect.
@@ -539,6 +559,30 @@ pub enum SingleOrVec<T> {
     Vec(Vec<T>),
 }
 
+impl<T: Clone> Extend<T> for SingleOrVec<T> {
+    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
+        match self {
+            Self::Single(item) => {
+                *self = Self::Vec(iter::once(*item.clone()).chain(iter.into_iter()).collect());
+            }
+            Self::Vec(items) => items.extend(iter),
+        }
+    }
+}
+
+impl<'a, T> IntoIterator for &'a SingleOrVec<T> {
+    type Item = &'a T;
+
+    type IntoIter = std::slice::Iter<'a, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        match self {
+            SingleOrVec::Single(item) => std::slice::from_ref(item.as_ref()).iter(),
+            SingleOrVec::Vec(items) => items.as_slice().iter(),
+        }
+    }
+}
+
 impl<T> From<T> for SingleOrVec<T> {
     fn from(single: T) -> Self {
         SingleOrVec::Single(Box::new(single))
@@ -567,3 +611,14 @@ fn is_none_or_default_true(field: &Option<Box<Schema>>) -> bool {
         Some(value) => matches!(value.as_ref(), Schema::Bool(true)),
     }
 }
+
+pub fn get_cleaned_schema_reference(schema_ref: &str) -> &str {
+    if let Some(cleaned) = schema_ref.strip_prefix(DEFINITIONS_PREFIX) {
+        cleaned
+    } else {
+        panic!(
+            "Tried to clean schema reference that does not start with the definition prefix: {}",
+            schema_ref
+        );
+    }
+}
diff --git a/lib/vector-config-common/src/schema/visit.rs b/lib/vector-config-common/src/schema/visit.rs
index 04fdfbd4ac77b..242bed34331e8 100644
--- a/lib/vector-config-common/src/schema/visit.rs
+++ b/lib/vector-config-common/src/schema/visit.rs
@@ -1,4 +1,6 @@
-use super::{Map, RootSchema, Schema, SchemaObject, SingleOrVec, DEFINITIONS_PREFIX};
+use tracing::debug;
+
+use super::{get_cleaned_schema_reference, Map, RootSchema, Schema, SchemaObject, SingleOrVec};
 
 /// Trait used to recursively modify a constructed schema and its subschemas.
 pub trait Visitor: std::fmt::Debug {
@@ -51,9 +53,15 @@ pub fn visit_schema_object<V: Visitor + ?Sized>(
     schema: &mut SchemaObject,
 ) {
     if schema.reference.is_some() {
-        with_resolved_schema_reference(definitions, schema, |defs, referenced_schema| {
-            v.visit_schema(defs, referenced_schema);
-        })
+        with_resolved_schema_reference(
+            definitions,
+            schema,
+            |defs, schema_ref, referenced_schema| {
+                debug!(referent = schema_ref, "Visiting schema reference.");
+
+                v.visit_schema(defs, referenced_schema);
+            },
+        )
     }
 
     if let Some(sub) = &mut schema.subschemas {
@@ -133,17 +141,17 @@ pub fn with_resolved_schema_reference<F>(
     schema: &mut SchemaObject,
     f: F,
 ) where
-    F: FnOnce(&mut Map<String, Schema>, &mut Schema),
+    F: FnOnce(&mut Map<String, Schema>, &str, &mut Schema),
 {
     if let Some(reference) = schema.reference.as_ref() {
-        let schema_def_key = reference.replace(DEFINITIONS_PREFIX, "");
+        let schema_def_key = get_cleaned_schema_reference(reference);
         let mut referenced_schema = definitions
-            .get(&schema_def_key)
+            .get(schema_def_key)
             .cloned()
             .expect("schema reference should exist");
 
-        f(definitions, &mut referenced_schema);
+        f(definitions, schema_def_key, &mut referenced_schema);
 
-        definitions.insert(schema_def_key, referenced_schema);
+        definitions.insert(schema_def_key.to_string(), referenced_schema);
     }
 }
diff --git a/lib/vector-config/Cargo.toml b/lib/vector-config/Cargo.toml
index 80d855f05866b..98d22a68f836f 100644
--- a/lib/vector-config/Cargo.toml
+++ b/lib/vector-config/Cargo.toml
@@ -23,6 +23,7 @@ serde_json = { version = "1.0", default-features = false, features = ["std"] }
 serde_with = { version = "2.3.1", default-features = false, features = ["std"] }
 snafu = { version = "0.7.4", default-features = false }
 toml = { version = "0.7.3", default-features = false }
+tracing = { version = "0.1.34", default-features = false }
 url = { version = "2.3.1", default-features = false, features = ["serde"] }
 vrl-compiler = { package = "vrl-compiler", git = "https://github.com/vectordotdev/vrl", rev = "v0.2.0" }
 vrl-core = { package = "vrl-core", git = "https://github.com/vectordotdev/vrl", rev = "v0.2.0", default-features = false, features = ["serde"] }
diff --git a/lib/vector-config/src/schema/helpers.rs b/lib/vector-config/src/schema/helpers.rs
index 17458d7941ae5..c1baffd0731a3 100644
--- a/lib/vector-config/src/schema/helpers.rs
+++ b/lib/vector-config/src/schema/helpers.rs
@@ -12,7 +12,7 @@ use crate::{
     num::ConfigurableNumber, Configurable, ConfigurableRef, GenerateError, Metadata, ToValue,
 };
 
-use super::visitors::DisallowedUnevaluatedPropertiesVisitor;
+use super::visitors::{DisallowedUnevaluatedPropertiesVisitor, InlineSingleUseReferencesVisitor};
 
 /// Applies metadata to the given schema.
 ///
@@ -480,7 +480,9 @@ pub fn generate_internal_tagged_variant_schema(
 }
 
 pub fn default_schema_settings() -> SchemaSettings {
-    SchemaSettings::new().with_visitor(DisallowedUnevaluatedPropertiesVisitor::from_settings)
+    SchemaSettings::new()
+        .with_visitor(InlineSingleUseReferencesVisitor::from_settings)
+        .with_visitor(DisallowedUnevaluatedPropertiesVisitor::from_settings)
 }
 
 pub fn generate_root_schema<T>() -> Result<RootSchema, GenerateError>
diff --git a/lib/vector-config/src/schema/visitors/inline_single.rs b/lib/vector-config/src/schema/visitors/inline_single.rs
new file mode 100644
index 0000000000000..68347e36937dd
--- /dev/null
+++ b/lib/vector-config/src/schema/visitors/inline_single.rs
@@ -0,0 +1,357 @@
+use std::collections::{HashMap, HashSet};
+
+use serde_json::Value;
+use tracing::debug;
+use vector_config_common::schema::{visit::Visitor, *};
+
+use crate::schema::visitors::merge::Mergeable;
+
+use super::scoped_visit::{
+    visit_schema_object_scoped, SchemaReference, SchemaScopeStack, ScopedVisitor,
+};
+
+/// A visitor that inlines schema references where the referenced schema is only referenced once.
+///
+/// In many cases, the schema generation will produce schema definitions where either generics or
+/// flattening are involved, which leads to schema definitions that may only be referenced by one
+/// other schema definition, and so on.
+///
+/// This is suboptimal due to the "pointer chasing" involved to resolve those schema references,
+/// when there's no reason to inherently have a schema be defined such that it can be referenced.
+///
+/// This visitor collects a list of all schema references, and for any schemas which are referenced
+/// only once, will replace those references by inlining the referenced schema directly, and
+/// deleting the schema definition from the root definitions.
+#[derive(Debug, Default)]
+pub struct InlineSingleUseReferencesVisitor {
+    eligible_to_inline: HashSet<String>,
+}
+
+impl InlineSingleUseReferencesVisitor {
+    pub fn from_settings(_: &SchemaSettings) -> Self {
+        Self {
+            eligible_to_inline: HashSet::new(),
+        }
+    }
+}
+
+impl Visitor for InlineSingleUseReferencesVisitor {
+    fn visit_root_schema(&mut self, root: &mut RootSchema) {
+        // Build a map of schema references and the number of times they're referenced through the
+        // entire schema, by visiting the root schema in a recursive fashion, using a helper visitor.
+        let mut occurrence_visitor = OccurrenceVisitor::default();
+        occurrence_visitor.visit_root_schema(root);
+        let occurrence_map = occurrence_visitor.into_occurrences();
+
+        let eligible_to_inline = occurrence_map
+            .into_iter()
+            // Filter out any schemas which have more than one occurrence, as naturally, we're
+            // trying to inline single-use schema references. :)
+            .filter_map(|(def_name, occurrences)| {
+                if occurrences == 1 {
+                    Some(def_name)
+                } else {
+                    None
+                }
+            })
+            // However, we'll also filter out some specific schema definitions which are only
+            // referenced once, specifically: component base types and component types themselves.
+            //
+            // We do this as a lot of the tooling that parses the schema to generate documentation,
+            // and the like, depends on these schemas existing in the top-level definitions for easy
+            // lookup.
+            .filter(|def_name| {
+                let schema = root
+                    .definitions
+                    .get(def_name.as_ref())
+                    .and_then(Schema::as_object)
+                    .expect("schema definition must exist");
+
+                is_inlineable_schema(def_name.as_ref(), schema)
+            })
+            .map(|s| s.as_ref().to_string())
+            .collect::<HashSet<_>>();
+
+        self.eligible_to_inline = eligible_to_inline;
+
+        // Now run our own visitor logic, which will use the inline eligibility to determine if a
+        // schema reference in a being-visited schema should be replaced inline with the original
+        // referenced schema, in turn removing the schema definition.
+        visit::visit_root_schema(self, root);
+
+        // Now remove all of the definitions for schemas that were eligible for inlining.
+        for schema_def_name in self.eligible_to_inline.drain() {
+            debug!(
+                referent = schema_def_name,
+                "Removing schema definition from root schema."
+            );
+
+            root.definitions
+                .remove(&schema_def_name)
+                .expect("referenced schema must exist in definitions");
+        }
+    }
+
+    fn visit_schema_object(
+        &mut self,
+        definitions: &mut Map<String, Schema>,
+        schema: &mut SchemaObject,
+    ) {
+        // Recursively visit this schema first.
+        visit::visit_schema_object(self, definitions, schema);
+
+        // If this schema has a schema reference, see if it's in our inline eligibility map. If so,
+        // we remove the referenced schema from the definitions, and then merge it into the current
+        // schema, after removing the `$ref` field.
+        if let Some(schema_ref) = schema.reference.as_ref().cloned() {
+            let schema_ref = get_cleaned_schema_reference(&schema_ref);
+            if self.eligible_to_inline.contains(schema_ref) {
+                let referenced_schema = definitions
+                    .get(schema_ref)
+                    .expect("referenced schema must exist in definitions");
+
+                if let Schema::Object(referenced_schema) = referenced_schema {
+                    debug!(
+                        referent = schema_ref,
+                        "Inlining eligible schema reference into current schema."
+                    );
+
+                    schema.reference = None;
+                    schema.merge(referenced_schema);
+                }
+            }
+        }
+    }
+}
+
+fn is_inlineable_schema(definition_name: &str, schema: &SchemaObject) -> bool {
+    static DISALLOWED_SCHEMAS: &[&str] = &[
+        "vector::sources::Sources",
+        "vector::transforms::Transforms",
+        "vector::sinks::Sinks",
+    ];
+
+    // We want to avoid inlining all of the relevant top-level types used for defining components:
+    // the "outer" types (i.e. `SinkOuter<T>`), the enum/collection types (i.e. the big `Sources`
+    // enum), and the component configuration types themselves (i.e. `AmqpSinkConfig`).
+    //
+    // There's nothing _technically_ wrong with doing so, but it would break downstream consumers of
+    // the schema that parse it in order to extract the individual components and other
+    // component-specific metadata.
+    let is_component_base = get_schema_metadata_attr(schema, "docs::component_base_type").is_some();
+    let is_component = get_schema_metadata_attr(schema, "docs::component_type").is_some();
+
+    let is_allowed_schema = !DISALLOWED_SCHEMAS.contains(&definition_name);
+
+    !is_component_base && !is_component && is_allowed_schema
+}
+
+#[derive(Debug, Default)]
+struct OccurrenceVisitor {
+    scope_stack: SchemaScopeStack,
+    occurrence_map: HashMap<SchemaReference, HashSet<SchemaReference>>,
+}
+
+impl OccurrenceVisitor {
+    fn into_occurrences(self) -> HashMap<SchemaReference, usize> {
+        self.occurrence_map
+            .into_iter()
+            .map(|(k, v)| (k, v.len()))
+            .collect()
+    }
+}
+
+impl Visitor for OccurrenceVisitor {
+    fn visit_schema_object(
+        &mut self,
+        definitions: &mut Map<String, Schema>,
+        schema: &mut SchemaObject,
+    ) {
+        visit_schema_object_scoped(self, definitions, schema);
+
+        if let Some(current_schema_ref) = schema.reference.as_ref() {
+            // Track the named "parent" schema for the schema we're currently visiting so that if we
+            // visit this schema again, we don't double count any schema references that it has. The
+            // "parent" schema is simply the closest ancestor schema that was itself a schema
+            // reference, or "Root", which represents the oldest schema ancestor in the document.
+            //
+            // This lets us couple with scenarios where schema A references schema B, and is the
+            // only actual direct schema reference to schema B, but due to multiple schemas
+            // referencing schema A, would otherwise lead to both A and B being visited multiple
+            // times.
+            let current_parent_schema_ref = self.get_current_schema_scope().clone();
+            let current_schema_ref = get_cleaned_schema_reference(current_schema_ref);
+
+            let occurrences = self
+                .occurrence_map
+                .entry(current_schema_ref.into())
+                .or_insert_with(HashSet::new);
+            occurrences.insert(current_parent_schema_ref);
+        }
+    }
+}
+
+impl ScopedVisitor for OccurrenceVisitor {
+    fn push_schema_scope<S: Into<SchemaReference>>(&mut self, scope: S) {
+        self.scope_stack.push(scope.into());
+    }
+
+    fn pop_schema_scope(&mut self) {
+        self.scope_stack.pop().expect("stack was empty during pop");
+    }
+
+    fn get_current_schema_scope(&self) -> &SchemaReference {
+        self.scope_stack.current().unwrap_or(&SchemaReference::Root)
+    }
+}
+
+fn get_schema_metadata_attr<'a>(schema: &'a SchemaObject, key: &str) -> Option<&'a Value> {
+    schema
+        .extensions
+        .get("_metadata")
+        .and_then(|metadata| metadata.get(key))
+}
+
+#[cfg(test)]
+mod tests {
+    use serde_json::json;
+    use vector_config_common::schema::visit::Visitor;
+
+    use crate::schema::visitors::test::{as_schema, assert_schemas_eq};
+
+    use super::InlineSingleUseReferencesVisitor;
+
+    #[test]
+    fn no_refs() {
+        let mut actual_schema = as_schema(json!({
+            "type": "object",
+            "properties": {
+                "a": { "type": "string" }
+            }
+        }));
+
+        let expected_schema = actual_schema.clone();
+
+        let mut visitor = InlineSingleUseReferencesVisitor::default();
+        visitor.visit_root_schema(&mut actual_schema);
+
+        assert_schemas_eq(expected_schema, actual_schema);
+    }
+
+    #[test]
+    fn single_ref_single_usage() {
+        let mut actual_schema = as_schema(json!({
+            "$ref": "#/definitions/simple",
+            "definitions": {
+                "simple": {
+                    "type": "object",
+                    "properties": {
+                        "a": { "type": "string" }
+                    }
+                }
+            }
+        }));
+
+        let mut visitor = InlineSingleUseReferencesVisitor::default();
+        visitor.visit_root_schema(&mut actual_schema);
+
+        let expected_schema = as_schema(json!({
+            "type": "object",
+            "properties": {
+                "a": { "type": "string" }
+            },
+        }));
+
+        assert_schemas_eq(expected_schema, actual_schema);
+    }
+
+    // TODO(tobz): These two tests are ignored because the inliner currently works off of schema
+    // reference scopes, so two object properties within the same schema don't count as two
+    // instances of a schema being referenced.
+    //
+    // We need to refactor schema scopes to be piecemeal extensible more in the way of how
+    // `jsonschema` does it rather than an actual stack.... the current approach is good enough for
+    // now, but not optimal in the way that it could be.
+    //
+    // These are here for when I improve the situation after getting this merged.
+    #[test]
+    #[ignore]
+    fn single_ref_multiple_usages() {
+        let mut actual_schema = as_schema(json!({
+            "definitions": {
+                "simple": {
+                    "type": "object",
+                    "properties": {
+                        "a": { "type": "string" }
+                    }
+                }
+            },
+            "type": "object",
+            "properties": {
+                "a": { "$ref": "#/definitions/simple" },
+                "b": { "$ref": "#/definitions/simple" }
+            }
+        }));
+
+        let expected_schema = actual_schema.clone();
+
+        let mut visitor = InlineSingleUseReferencesVisitor::default();
+        visitor.visit_root_schema(&mut actual_schema);
+
+        assert_schemas_eq(expected_schema, actual_schema);
+    }
+
+    #[test]
+    #[ignore]
+    fn multiple_refs_mixed_usages() {
+        let mut actual_schema = as_schema(json!({
+            "definitions": {
+                "simple": {
+                    "type": "object",
+                    "properties": {
+                        "a": { "type": "string" }
+                    }
+                },
+                "advanced": {
+                    "type": "object",
+                    "properties": {
+                        "b": { "type": "string" }
+                    }
+                }
+            },
+            "type": "object",
+            "properties": {
+                "a": { "$ref": "#/definitions/simple" },
+                "b": { "$ref": "#/definitions/simple" },
+                "c": { "$ref": "#/definitions/advanced" },
+            }
+        }));
+
+        let mut visitor = InlineSingleUseReferencesVisitor::default();
+        visitor.visit_root_schema(&mut actual_schema);
+
+        let expected_schema = as_schema(json!({
+            "definitions": {
+                "simple": {
+                    "type": "object",
+                    "properties": {
+                        "a": { "type": "string" }
+                    }
+                }
+            },
+            "type": "object",
+            "properties": {
+                "a": { "$ref": "#/definitions/simple" },
+                "b": { "$ref": "#/definitions/simple" },
+                "c": {
+                    "type": "object",
+                    "properties": {
+                        "b": { "type": "string" }
+                    }
+                }
+            }
+        }));
+
+        assert_schemas_eq(expected_schema, actual_schema);
+    }
+}
diff --git a/lib/vector-config/src/schema/visitors/merge.rs b/lib/vector-config/src/schema/visitors/merge.rs
new file mode 100644
index 0000000000000..741b74157e137
--- /dev/null
+++ b/lib/vector-config/src/schema/visitors/merge.rs
@@ -0,0 +1,291 @@
+#![allow(clippy::borrowed_box)]
+
+use std::mem::discriminant;
+
+use serde_json::Value;
+use vector_config_common::schema::*;
+
+/// A type that can be merged with itself.
+pub trait Mergeable {
+    fn merge(&mut self, other: &Self);
+}
+
+impl Mergeable for SchemaObject {
+    fn merge(&mut self, other: &Self) {
+        // The logic is pretty straightforward: we merge `other` into `self`, with `self` having the
+        // higher precedence.
+        //
+        // Additionally, we only merge logical schema chunks: if the destination schema has object
+        // properties defined, and the source schema has some object properties that don't exist in
+        // the destination, they will be merged, but if there is any overlap, then the object
+        // properties in the destination would remain untouched. This merging logic applies to all
+        // map-based types.
+        //
+        // For standalone fields, such as title or description, the destination always has higher
+        // precedence. For optional fields, whichever version (destination or source) is present
+        // will win, except for when both are present, then the individual fields within the
+        // optional type will be merged according to the normal precedence rules.
+        merge_optional(&mut self.reference, other.reference.as_ref());
+        merge_schema_metadata(&mut self.metadata, other.metadata.as_ref());
+        merge_schema_instance_type(&mut self.instance_type, other.instance_type.as_ref());
+        merge_schema_format(&mut self.format, other.format.as_ref());
+        merge_schema_enum_values(&mut self.enum_values, other.enum_values.as_ref());
+        merge_schema_const_value(&mut self.const_value, other.const_value.as_ref());
+        merge_schema_subschemas(&mut self.subschemas, other.subschemas.as_ref());
+        merge_schema_number_validation(&mut self.number, other.number.as_ref());
+        merge_schema_string_validation(&mut self.string, other.string.as_ref());
+        merge_schema_array_validation(&mut self.array, other.array.as_ref());
+        merge_schema_object_validation(&mut self.object, other.object.as_ref());
+        merge_schema_extensions(&mut self.extensions, &other.extensions);
+    }
+}
+
+impl Mergeable for Value {
+    fn merge(&mut self, other: &Self) {
+        // We do a check here for ensuring both value discriminants are the same type. This is
+        // specific to `Value` but we should never really be merging identical keys together that
+        // have differing value types, as that is indicative of a weird overlap in keys between
+        // different schemas.
+        //
+        // We _may_ need to relax this in practice/in the future, but it's a solid invariant to
+        // enforce for the time being.
+        if discriminant(self) != discriminant(other) {
+            panic!("Tried to merge two `Value` types together with differing types!\n\nSelf: {:?}\n\nOther: {:?}", self, other);
+        }
+
+        match (self, other) {
+            // Maps get merged recursively.
+            (Value::Object(self_map), Value::Object(other_map)) => {
+                self_map.merge(other_map);
+            }
+            // Arrays get merged together indiscriminately.
+            (Value::Array(self_array), Value::Array(other_array)) => {
+                self_array.extend(other_array.iter().cloned());
+            }
+            // We don't merge any other value types together.
+            _ => {}
+        }
+    }
+}
+
+impl Mergeable for Schema {
+    fn merge(&mut self, other: &Self) {
+        match (self, other) {
+            // We don't merge schemas together if either of them is a boolean schema.
+            (Schema::Bool(_), _) | (_, Schema::Bool(_)) => {}
+            (Schema::Object(self_schema), Schema::Object(other_schema)) => {
+                self_schema.merge(other_schema);
+            }
+        }
+    }
+}
+
+impl Mergeable for serde_json::Map<String, Value> {
+    fn merge(&mut self, other: &Self) {
+        for (key, value) in other {
+            match self.get_mut(key) {
+                None => {
+                    self.insert(key.clone(), value.clone());
+                }
+                Some(existing) => existing.merge(value),
+            }
+        }
+    }
+}
+
+impl<K, V> Mergeable for Map<K, V>
+where
+    K: std::hash::Hash + Eq + Clone,
+    V: Clone + Mergeable,
+{
+    fn merge(&mut self, other: &Self) {
+        for (key, value) in other {
+            match self.get_mut(key) {
+                None => {
+                    self.insert(key.clone(), value.clone());
+                }
+                Some(existing) => existing.merge(value),
+            }
+        }
+    }
+}
+
+fn merge_schema_metadata(destination: &mut Option<Box<Metadata>>, source: Option<&Box<Metadata>>) {
+    merge_optional_with(destination, source, |existing, new| {
+        merge_optional(&mut existing.id, new.id.as_ref());
+        merge_optional(&mut existing.title, new.title.as_ref());
+        merge_optional(&mut existing.description, new.description.as_ref());
+        merge_optional(&mut existing.default, new.default.as_ref());
+        merge_bool(&mut existing.deprecated, new.deprecated);
+        merge_bool(&mut existing.read_only, new.read_only);
+        merge_bool(&mut existing.write_only, new.write_only);
+        merge_collection(&mut existing.examples, &new.examples);
+    });
+}
+
+fn merge_schema_instance_type(
+    destination: &mut Option<SingleOrVec<InstanceType>>,
+    source: Option<&SingleOrVec<InstanceType>>,
+) {
+    merge_optional_with(destination, source, |existing, new| {
+        let mut deduped = existing
+            .into_iter()
+            .chain(new.into_iter())
+            .cloned()
+            .collect::<Vec<_>>();
+        deduped.dedup();
+
+        *existing = deduped.into();
+    });
+}
+
+fn merge_schema_format(destination: &mut Option<String>, source: Option<&String>) {
+    merge_optional(destination, source);
+}
+
+fn merge_schema_enum_values(destination: &mut Option<Vec<Value>>, source: Option<&Vec<Value>>) {
+    merge_optional_with(destination, source, merge_collection);
+}
+
+fn merge_schema_const_value(destination: &mut Option<Value>, source: Option<&Value>) {
+    merge_optional(destination, source);
+}
+
+fn merge_schema_subschemas(
+    destination: &mut Option<Box<SubschemaValidation>>,
+    source: Option<&Box<SubschemaValidation>>,
+) {
+    merge_optional_with(destination, source, |existing, new| {
+        merge_optional_with(&mut existing.all_of, new.all_of.as_ref(), merge_collection);
+        merge_optional_with(&mut existing.any_of, new.any_of.as_ref(), merge_collection);
+        merge_optional_with(&mut existing.one_of, new.one_of.as_ref(), merge_collection);
+        merge_optional(&mut existing.if_schema, new.if_schema.as_ref());
+        merge_optional(&mut existing.then_schema, new.then_schema.as_ref());
+        merge_optional(&mut existing.else_schema, new.else_schema.as_ref());
+        merge_optional(&mut existing.not, new.not.as_ref());
+    });
+}
+
+fn merge_schema_number_validation(
+    destination: &mut Option<Box<NumberValidation>>,
+    source: Option<&Box<NumberValidation>>,
+) {
+    merge_optional_with(destination, source, |existing, new| {
+        merge_optional(&mut existing.multiple_of, new.multiple_of.as_ref());
+        merge_optional(&mut existing.maximum, new.maximum.as_ref());
+        merge_optional(
+            &mut existing.exclusive_maximum,
+            new.exclusive_minimum.as_ref(),
+        );
+        merge_optional(&mut existing.minimum, new.minimum.as_ref());
+        merge_optional(
+            &mut existing.exclusive_minimum,
+            new.exclusive_minimum.as_ref(),
+        );
+    });
+}
+
+fn merge_schema_string_validation(
+    destination: &mut Option<Box<StringValidation>>,
+    source: Option<&Box<StringValidation>>,
+) {
+    merge_optional_with(destination, source, |existing, new| {
+        merge_optional(&mut existing.max_length, new.max_length.as_ref());
+        merge_optional(&mut existing.min_length, new.min_length.as_ref());
+        merge_optional(&mut existing.pattern, new.pattern.as_ref());
+    });
+}
+
+fn merge_schema_array_validation(
+    destination: &mut Option<Box<ArrayValidation>>,
+    source: Option<&Box<ArrayValidation>>,
+) {
+    merge_optional_with(destination, source, |existing, new| {
+        merge_optional_with(&mut existing.items, new.items.as_ref(), merge_collection);
+        merge_optional(
+            &mut existing.additional_items,
+            new.additional_items.as_ref(),
+        );
+        merge_optional(
+            &mut existing.unevaluated_items,
+            new.unevaluated_items.as_ref(),
+        );
+        merge_optional(&mut existing.max_items, new.max_items.as_ref());
+        merge_optional(&mut existing.min_items, new.min_items.as_ref());
+        merge_optional(&mut existing.unique_items, new.unique_items.as_ref());
+        merge_optional(&mut existing.contains, new.contains.as_ref());
+    });
+}
+
+fn merge_schema_object_validation(
+    destination: &mut Option<Box<ObjectValidation>>,
+    source: Option<&Box<ObjectValidation>>,
+) {
+    merge_optional_with(destination, source, |existing, new| {
+        merge_optional(&mut existing.max_properties, new.max_properties.as_ref());
+        merge_optional(&mut existing.min_properties, new.min_properties.as_ref());
+        merge_collection(&mut existing.required, &new.required);
+        merge_map(&mut existing.properties, &new.properties);
+        merge_map(&mut existing.pattern_properties, &new.pattern_properties);
+        merge_optional(
+            &mut existing.additional_properties,
+            new.additional_properties.as_ref(),
+        );
+        merge_optional(
+            &mut existing.unevaluated_properties,
+            new.unevaluated_properties.as_ref(),
+        );
+        merge_optional(&mut existing.property_names, new.property_names.as_ref());
+    });
+}
+
+fn merge_schema_extensions(destination: &mut Map<String, Value>, source: &Map<String, Value>) {
+    merge_map(destination, source);
+}
+
+fn merge_bool(destination: &mut bool, source: bool) {
+    // We only treat `true` as a merge-worthy value.
+    if source {
+        *destination = true;
+    }
+}
+
+fn merge_collection<'a, E, I, T>(destination: &mut E, source: I)
+where
+    E: Extend<T>,
+    I: IntoIterator<Item = &'a T>,
+    T: Clone + 'a,
+{
+    destination.extend(source.into_iter().cloned());
+}
+
+fn merge_map<K, V>(destination: &mut Map<K, V>, source: &Map<K, V>)
+where
+    K: std::hash::Hash + Eq + Clone,
+    V: Clone + Mergeable,
+{
+    destination.merge(source);
+}
+
+fn merge_optional<T: Clone>(destination: &mut Option<T>, source: Option<&T>) {
+    merge_optional_with(destination, source, |_, _| {});
+}
+
+fn merge_optional_with<'a, T, F>(destination: &'a mut Option<T>, source: Option<&'a T>, f: F)
+where
+    T: Clone,
+    F: Fn(&'a mut T, &'a T),
+{
+    match destination {
+        // If the destination is empty, we use whatever we have in `source`. Otherwise, we leave
+        // `destination` as-is.
+        None => *destination = source.cloned(),
+        // If `destination` isn't empty, and neither is `source`, then pass them both to `f` to
+        // let it handle the actual merge logic.
+        Some(destination) => {
+            if let Some(source) = source {
+                f(destination, source);
+            }
+        }
+    }
+}
diff --git a/lib/vector-config/src/schema/visitors/mod.rs b/lib/vector-config/src/schema/visitors/mod.rs
index d9f94a9909549..59802e0ebe10b 100644
--- a/lib/vector-config/src/schema/visitors/mod.rs
+++ b/lib/vector-config/src/schema/visitors/mod.rs
@@ -1,2 +1,10 @@
+mod inline_single;
+pub mod merge;
+pub mod scoped_visit;
 mod unevaluated;
+
+#[cfg(test)]
+pub(self) mod test;
+
+pub use self::inline_single::InlineSingleUseReferencesVisitor;
 pub use self::unevaluated::DisallowedUnevaluatedPropertiesVisitor;
diff --git a/lib/vector-config/src/schema/visitors/scoped_visit.rs b/lib/vector-config/src/schema/visitors/scoped_visit.rs
new file mode 100644
index 0000000000000..937a4ee4920dd
--- /dev/null
+++ b/lib/vector-config/src/schema/visitors/scoped_visit.rs
@@ -0,0 +1,180 @@
+use std::collections::VecDeque;
+
+use vector_config_common::schema::{visit::Visitor, *};
+
+/// A schema reference which can refer to either a schema definition or the root schema itself.
+#[derive(Clone, Debug, Eq, Hash, PartialEq)]
+pub enum SchemaReference {
+    /// A defined schema.
+    Definition(String),
+
+    /// The root schema itself.
+    Root,
+}
+
+impl std::fmt::Display for SchemaReference {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Definition(name) => write!(f, "{}", name),
+            Self::Root => write!(f, "<root>"),
+        }
+    }
+}
+
+impl<'a, T> From<&'a T> for SchemaReference
+where
+    T: AsRef<str> + ?Sized,
+{
+    fn from(value: &'a T) -> Self {
+        Self::Definition(value.as_ref().to_string())
+    }
+}
+
+impl AsRef<str> for SchemaReference {
+    fn as_ref(&self) -> &str {
+        match self {
+            Self::Definition(name) => name.as_str(),
+            Self::Root => "<root>",
+        }
+    }
+}
+
+/// The schema scope stack is used to understand where in the visiting of a root schema the visitor
+/// currently is. All visiting will inherently start at the root, and continue to exist in the root
+/// scope until a schema reference is resolved, and the visitor visits the resolved schema. Once
+/// that happens, the resolved schema scope is pushed onto the stack, such that the scope is now the
+/// schema reference. This can happen multiple times as subsequent schema references are resolved.
+/// Once the visitor recurses back out of the resolved schema, it is popped from the stack.
+#[derive(Debug, Default)]
+pub struct SchemaScopeStack {
+    stack: VecDeque<SchemaReference>,
+}
+
+impl SchemaScopeStack {
+    pub fn push<S: Into<SchemaReference>>(&mut self, scope: S) {
+        self.stack.push_front(scope.into());
+    }
+
+    pub fn pop(&mut self) -> Option<SchemaReference> {
+        self.stack.pop_front()
+    }
+
+    pub fn current(&self) -> Option<&SchemaReference> {
+        self.stack.front()
+    }
+}
+
+pub trait ScopedVisitor: Visitor {
+    fn push_schema_scope<S: Into<SchemaReference>>(&mut self, scope: S);
+
+    fn pop_schema_scope(&mut self);
+
+    fn get_current_schema_scope(&self) -> &SchemaReference;
+}
+
+pub fn visit_schema_object_scoped<SV: ScopedVisitor + ?Sized>(
+    sv: &mut SV,
+    definitions: &mut Map<String, Schema>,
+    schema: &mut SchemaObject,
+) {
+    if let Some(reference) = schema.reference.as_ref() {
+        let schema_def_key = get_cleaned_schema_reference(reference);
+        let mut referenced_schema = definitions
+            .get(schema_def_key)
+            .cloned()
+            .expect("schema reference should exist");
+
+        if let Schema::Object(referenced_schema) = &mut referenced_schema {
+            sv.push_schema_scope(schema_def_key);
+
+            sv.visit_schema_object(definitions, referenced_schema);
+
+            sv.pop_schema_scope();
+        }
+
+        definitions.insert(schema_def_key.to_string(), referenced_schema);
+    }
+
+    if let Some(sub) = &mut schema.subschemas {
+        visit_vec_scoped(sv, definitions, &mut sub.all_of);
+        visit_vec_scoped(sv, definitions, &mut sub.any_of);
+        visit_vec_scoped(sv, definitions, &mut sub.one_of);
+        visit_box_scoped(sv, definitions, &mut sub.not);
+        visit_box_scoped(sv, definitions, &mut sub.if_schema);
+        visit_box_scoped(sv, definitions, &mut sub.then_schema);
+        visit_box_scoped(sv, definitions, &mut sub.else_schema);
+    }
+
+    if let Some(arr) = &mut schema.array {
+        visit_single_or_vec_scoped(sv, definitions, &mut arr.items);
+        visit_box_scoped(sv, definitions, &mut arr.additional_items);
+        visit_box_scoped(sv, definitions, &mut arr.contains);
+    }
+
+    if let Some(obj) = &mut schema.object {
+        visit_map_values_scoped(sv, definitions, &mut obj.properties);
+        visit_map_values_scoped(sv, definitions, &mut obj.pattern_properties);
+        visit_box_scoped(sv, definitions, &mut obj.additional_properties);
+        visit_box_scoped(sv, definitions, &mut obj.property_names);
+    }
+}
+
+fn visit_box_scoped<SV: ScopedVisitor + ?Sized>(
+    sv: &mut SV,
+    definitions: &mut Map<String, Schema>,
+    target: &mut Option<Box<Schema>>,
+) {
+    if let Some(s) = target {
+        if let Schema::Object(s) = s.as_mut() {
+            sv.visit_schema_object(definitions, s);
+        }
+    }
+}
+
+fn visit_vec_scoped<SV: ScopedVisitor + ?Sized>(
+    sv: &mut SV,
+    definitions: &mut Map<String, Schema>,
+    target: &mut Option<Vec<Schema>>,
+) {
+    if let Some(vec) = target {
+        for s in vec {
+            if let Schema::Object(s) = s {
+                sv.visit_schema_object(definitions, s);
+            }
+        }
+    }
+}
+
+fn visit_map_values_scoped<SV: ScopedVisitor + ?Sized>(
+    sv: &mut SV,
+    definitions: &mut Map<String, Schema>,
+    target: &mut Map<String, Schema>,
+) {
+    for s in target.values_mut() {
+        if let Schema::Object(s) = s {
+            sv.visit_schema_object(definitions, s);
+        }
+    }
+}
+
+fn visit_single_or_vec_scoped<SV: ScopedVisitor + ?Sized>(
+    sv: &mut SV,
+    definitions: &mut Map<String, Schema>,
+    target: &mut Option<SingleOrVec<Schema>>,
+) {
+    match target {
+        None => {}
+        Some(SingleOrVec::Single(s)) => {
+            if let Schema::Object(s) = s.as_mut() {
+                sv.visit_schema_object(definitions, s);
+            }
+        }
+        Some(SingleOrVec::Vec(vec)) => {
+            for s in vec {
+                if let Schema::Object(s) = s {
+                    sv.visit_schema_object(definitions, s);
+                }
+            }
+        }
+    }
+}
diff --git a/lib/vector-config/src/schema/visitors/test.rs b/lib/vector-config/src/schema/visitors/test.rs
new file mode 100644
index 0000000000000..1f1793cb8c071
--- /dev/null
+++ b/lib/vector-config/src/schema/visitors/test.rs
@@ -0,0 +1,15 @@
+use pretty_assertions::assert_eq;
+use serde_json::Value;
+use vector_config_common::schema::RootSchema;
+
+pub fn as_schema(value: Value) -> RootSchema {
+    serde_json::from_value(value).expect("should not fail to deserialize schema")
+}
+
+#[track_caller]
+pub fn assert_schemas_eq(expected: RootSchema, actual: RootSchema) {
+    let expected_json = serde_json::to_string_pretty(&expected).expect("should not fail");
+    let actual_json = serde_json::to_string_pretty(&actual).expect("should not fail");
+
+    assert_eq!(expected_json, actual_json);
+}
diff --git a/lib/vector-config/src/schema/visitors/unevaluated.rs b/lib/vector-config/src/schema/visitors/unevaluated.rs
index 1db74cc483b28..4aa34dcfbb2b8 100644
--- a/lib/vector-config/src/schema/visitors/unevaluated.rs
+++ b/lib/vector-config/src/schema/visitors/unevaluated.rs
@@ -1,10 +1,26 @@
-//use indexmap::IndexMap;
+#![allow(warnings)]
+
+use std::{
+    collections::{HashMap, HashSet},
+    convert::identity,
+    mem::discriminant,
+};
+
+use serde_json::Value;
+use tracing::{debug, error};
 use vector_config_common::schema::{
     visit::{visit_schema_object, with_resolved_schema_reference, Visitor},
-    InstanceType, Map, Schema, SchemaObject, SchemaSettings, SingleOrVec,
+    *,
+};
+
+use crate::schema::visitors::merge::Mergeable;
+
+use super::scoped_visit::{
+    visit_schema_object_scoped, SchemaReference, SchemaScopeStack, ScopedVisitor,
 };
 
-/// A visitor that marks schemas as disallowing unknown properties via `unevaluatedProperties`.
+/// A visitor that marks schemas as closed by disallowing unknown properties via
+/// `unevaluatedProperties`.
 ///
 /// This is the equivalent of `serde`'s `deny_unknown_fields` attribute: instead of only validating
 /// the properties specified in the schema, and ignoring any properties present in the JSON
@@ -15,23 +31,128 @@ use vector_config_common::schema::{
 /// with advanced subschema validation, such as `oneOf` or `allOf`, as `unevaluatedProperties`
 /// cannot simply be applied to any and all schemas indiscriminately.
 #[derive(Debug, Default)]
-pub struct DisallowedUnevaluatedPropertiesVisitor;
+pub struct DisallowedUnevaluatedPropertiesVisitor {
+    scope_stack: SchemaScopeStack,
+    eligible_to_flatten: HashMap<String, HashSet<SchemaReference>>,
+}
 
 impl DisallowedUnevaluatedPropertiesVisitor {
     pub fn from_settings(_: &SchemaSettings) -> Self {
-        Self
+        Self {
+            scope_stack: SchemaScopeStack::default(),
+            eligible_to_flatten: HashMap::new(),
+        }
     }
 }
 
 impl Visitor for DisallowedUnevaluatedPropertiesVisitor {
+    fn visit_root_schema(&mut self, root: &mut RootSchema) {
+        // Glossary:
+        //
+        //   mark(ed): setting or having `unevaluatedProperties` set to `Some(Schema::Bool(false))`
+        //   unmark(ed): unsetting or having `unevaluatedProperties` set to `None`
+
+        // For all definitions, visit _just_ the defined schema (no recursing) and build a map of
+        // child definitions -> (mark eligibility, [(parent definition, would_unmark)]), such that we know exactly
+        // which schemas refer to any given schema definition and if they would lead to the child
+        // schema being marked as `unevaluatedProperties: false`.
+        //
+        // We would filter out any child definitions that aren't eligible to be marked. For the
+        // remaining child schemas, we group the parent definitions by `would_unmark`, which
+        // indicates whether or not the given parent definition would cause the child definition to
+        // be unmarked.
+        //
+        // As an example, we would expect a parent schema referring to a child schema via `allOf` to
+        // unmark the child schema, while a parent schema referring to a child schema within a
+        // specific property to not unmark the child schema.
+        //
+        // With the grouped parent definitions, take the smaller of the two groups. This represents
+        // the set of parent schemas that we will indicate as needing to use a flattened version of
+        // the child schema when we execute our primary visit logic.
+
+        let mut eligible_to_flatten = HashMap::new();
+        let flattening_eligibility_map = build_closed_schema_flatten_eligibility_mappings(root);
+        for (child_schema_ref, metadata) in flattening_eligibility_map {
+            let would_unmark = metadata
+                .referrers
+                .iter()
+                .filter(|r| r.would_unmark)
+                .map(|r| r.referent.clone())
+                .collect::<HashSet<_>>();
+            let would_not_unmark = metadata
+                .referrers
+                .iter()
+                .filter(|r| !r.would_unmark)
+                .map(|r| r.referent.clone())
+                .collect::<HashSet<_>>();
+
+            if would_not_unmark.len() > would_unmark.len() {
+                eligible_to_flatten.insert(child_schema_ref.to_string(), would_unmark);
+            } else {
+                eligible_to_flatten.insert(child_schema_ref.to_string(), would_not_unmark);
+            }
+        }
+
+        debug!(
+            "Found {} referents eligible for flattening: {:?}",
+            eligible_to_flatten.len(),
+            eligible_to_flatten,
+        );
+
+        self.eligible_to_flatten = eligible_to_flatten;
+
+        visit::visit_root_schema(self, root);
+    }
+
     fn visit_schema_object(
         &mut self,
         definitions: &mut Map<String, Schema>,
         schema: &mut SchemaObject,
     ) {
+        // If this schema has a schema reference, check our flattening eligibility map to figure out
+        // if we need to merge it in.
+        //
+        // When a given schema reference (the actual target of `$ref`) is eligible for flattening in
+        // a given schema (what we're currently visiting) then it means that this schema would,
+        // based on its composition, lead to the schema reference either being marked or unmarked.
+        //
+        // We flatten the schema reference into this schema to avoid that from occurring, and we do
+        // so based on whichever group of referrers -- the schemas which reference the particular
+        // target schema -- is smaller, such that we do the minimum number of flattenings per target
+        // schema, to keep the schema as small as we reasonably can.
+        if let Some(reference) = schema.reference.as_ref() {
+            let current_parent_schema_ref = self.get_current_schema_scope();
+
+            if let Some(referrers) = self.eligible_to_flatten.get(reference) {
+                if referrers.contains(&current_parent_schema_ref) {
+                    let current_schema_ref = get_cleaned_schema_reference(reference);
+                    let referenced_schema = definitions
+                        .get(current_schema_ref)
+                        .expect("schema definition must exist");
+
+                    debug!(
+                        referent = current_schema_ref,
+                        referrer = current_parent_schema_ref.as_ref(),
+                        "Found eligible referent/referrer mapping."
+                    );
+
+                    if let Schema::Object(referenced_schema) = referenced_schema {
+                        debug!(
+                            referent = current_schema_ref,
+                            referrer = current_parent_schema_ref.as_ref(),
+                            "Flattening referent into referrer."
+                        );
+
+                        schema.reference = None;
+                        schema.merge(&referenced_schema);
+                    }
+                }
+            }
+        }
+
         // Visit the schema object first so that we recurse the overall schema in a depth-first
-        // fashion, applying the unevaluated properties change from the bottom up.
-        visit_schema_object(self, definitions, schema);
+        // fashion, marking eligible object schemas as closed.
+        visit_schema_object_scoped(self, definitions, schema);
 
         // Next, see if this schema has any subschema validation: `allOf`, `oneOf`, or `anyOf`.
         //
@@ -42,39 +163,378 @@ impl Visitor for DisallowedUnevaluatedPropertiesVisitor {
         // which is why we remove that from the subschemas themselves but essentially hoist it up
         // to the level of the `allOf`/`oneOf`/`anyOf`, where it can apply the correct behavior.
         let mut had_relevant_subschemas = false;
-        if let Some(subschemas) = get_subschema_validators(schema) {
-            had_relevant_subschemas = true;
-
+        if let Some(subschema) = schema.subschemas.as_mut() {
+            let mut subschemas = get_object_subschemas_from_parent_mut(subschema.as_mut());
             for subschema in subschemas {
-                // If the schema is an object schema, we'll unset `unevaluatedProperties` directly.
-                // If it isn't an object schema, we'll see if the subschema is actually a schema
-                // reference, and if so, we'll make sure to unset `unevaluatedProperties` on the
-                // resolved schema reference itself.
-                //
-                // Like the top-level schema reference logic, this ensures the schema definition is
-                // updated for subsequent resolution.
-                if let Some(object) = subschema.object.as_mut() {
-                    object.unevaluated_properties = Some(Box::new(Schema::Bool(true)));
-                } else {
-                    with_resolved_schema_reference(definitions, subschema, |_, resolved| {
-                        if let Schema::Object(schema) = resolved {
-                            if let Some(object) = schema.object.as_mut() {
-                                object.unevaluated_properties = Some(Box::new(Schema::Bool(true)));
-                            }
-                        }
-                    });
-                }
+                had_relevant_subschemas = true;
+
+                unmark_or_flatten_schema(definitions, subschema);
             }
         }
 
         // If we encountered any subschema validation, or if this schema itself is an object schema,
         // mark the schema as closed by setting `unevaluatedProperties` to `false`.
-        if had_relevant_subschemas || is_object_schema(schema.instance_type.as_ref()) {
+        if had_relevant_subschemas || is_object_schema(schema) {
             mark_schema_closed(schema);
         }
     }
 }
 
+impl ScopedVisitor for DisallowedUnevaluatedPropertiesVisitor {
+    fn push_schema_scope<S: Into<SchemaReference>>(&mut self, scope: S) {
+        self.scope_stack.push(scope.into());
+    }
+
+    fn pop_schema_scope(&mut self) {
+        self.scope_stack.pop().expect("stack was empty during pop");
+    }
+
+    fn get_current_schema_scope(&self) -> &SchemaReference {
+        self.scope_stack
+            .current()
+            .unwrap_or_else(|| &SchemaReference::Root)
+    }
+}
+
+fn is_non_object_optional_schema(
+    definitions: &mut Map<String, Schema>,
+    schema: &mut SchemaObject,
+) -> bool {
+    // Only match schemas that have subschema validation in the form of `oneOf`, and nothing else.
+    let subschemas = match schema.subschemas.as_ref() {
+        None => return false,
+        Some(subschema) => {
+            let has_non_one_of = subschema.all_of.is_some() || subschema.any_of.is_some();
+            match subschema.one_of.as_ref() {
+                Some(one_of) if !has_non_one_of => one_of,
+                _ => return false,
+            }
+        }
+    };
+
+    // There can only be two subschemas, one of which is simply a `null` schema.
+    if subschemas.len() != 2 {
+        return false;
+    }
+
+    let maybe_non_null_schema_idx = subschemas[0]
+        .as_object()
+        .map(is_null_schema)
+        .and_then(|x| x.then(|| 1))
+        .or_else(|| {
+            subschemas[1]
+                .as_object()
+                .map(is_null_schema)
+                .and_then(|x| x.then(|| 0))
+        });
+
+    let non_null_schema = match maybe_non_null_schema_idx {
+        Some(idx) => &subschemas[idx],
+        None => return false,
+    };
+
+    // Check if the non-null schema is an object schema or not.
+    non_null_schema
+        .as_object()
+        .map(|schema| !is_object_schema(schema))
+        .unwrap_or(true)
+}
+
+fn unmark_or_flatten_schema(definitions: &mut Map<String, Schema>, schema: &mut SchemaObject) {
+    // If the schema is an object schema, we'll unset `unevaluatedProperties` directly.
+    // If it isn't an object schema, we'll see if the subschema is actually a schema
+    // reference, and if so, we'll make sure to unset `unevaluatedProperties` on the
+    // resolved schema reference itself.
+    //
+    // Like the top-level schema reference logic, this ensures the schema definition is
+    // updated for subsequent resolution.
+    if let Some(object) = schema.object.as_mut() {
+        debug!("Unmarked object subschema directly.");
+
+        object.unevaluated_properties = Some(Box::new(Schema::Bool(true)));
+    } else {
+        with_resolved_schema_reference(definitions, schema, |_, schema_ref, resolved| {
+            if let Schema::Object(resolved) = resolved {
+                if let Some(object) = resolved.object.as_mut() {
+                    debug!(
+                        referent = schema_ref,
+                        "Unmarked subschema by traversing schema reference."
+                    );
+
+                    object.unevaluated_properties = Some(Box::new(Schema::Bool(true)));
+                }
+            }
+        });
+    }
+}
+
+/// A referent schema that carries the chance of being unmarking by its referrer.
+#[derive(Clone, Debug, Eq, Hash, PartialEq)]
+struct MarkableReferent {
+    // Whether or not the referent would be unmarked by the referrer.
+    would_unmark: bool,
+
+    /// The referent schema.
+    referent: SchemaReference,
+}
+
+impl MarkableReferent {
+    fn would_unmark<R: Into<SchemaReference>>(referent: R) -> Self {
+        Self {
+            would_unmark: true,
+            referent: referent.into(),
+        }
+    }
+
+    fn would_not_unmark<R: Into<SchemaReference>>(referent: R) -> Self {
+        Self {
+            would_unmark: false,
+            referent: referent.into(),
+        }
+    }
+
+    fn with_new_referent<R: Into<SchemaReference>>(&self, new_referent: R) -> Self {
+        Self {
+            would_unmark: self.would_unmark,
+            referent: new_referent.into(),
+        }
+    }
+}
+
+#[derive(Debug)]
+struct ChildMetadata {
+    /// Whether or not this schema, in isolation, should be marked.
+    should_mark: bool,
+
+    /// The set of referring schemas.
+    referrers: HashSet<MarkableReferent>,
+}
+
+impl ChildMetadata {
+    fn from_schema(schema: &SchemaObject) -> Self {
+        Self {
+            should_mark: is_markable_schema(schema),
+            referrers: HashSet::new(),
+        }
+    }
+}
+
+fn build_closed_schema_flatten_eligibility_mappings(
+    root_schema: &RootSchema,
+) -> HashMap<String, ChildMetadata> {
+    // Iterate over all definitions, and once more for the root schema, and generate a map of parent
+    // schema -> (would_unmark, child schema).
+    let mut parent_to_child = HashMap::new();
+    for (definition_name, definition) in &root_schema.definitions {
+        // We only care about full-fledged schemas, not boolean schemas.
+        let parent_schema = match definition {
+            Schema::Bool(_) => continue,
+            Schema::Object(schema) => schema,
+        };
+
+        // If a schema itself would not be considered markable, then we don't need to consider the
+        // eligibility between parent/child since there's nothing to drive the "now unmark the child
+        // schemas" logic.
+        if !is_markable_schema(parent_schema) {
+            continue;
+        }
+
+        // Collect all referents for this definition, which includes both property-based referents
+        // and subschema-based referents. Property-based referents are not required to be unmarked,
+        // while subschema-based referents must be unmarked.
+        let mut referents = HashSet::new();
+        get_referents(parent_schema, &mut referents);
+
+        // Store the parent/child mapping.
+        parent_to_child.insert(SchemaReference::from(definition_name), referents);
+    }
+
+    // Collect the referents from the root schema.
+    let mut root_referents = HashSet::new();
+    get_referents(&root_schema.schema, &mut root_referents);
+    parent_to_child.insert(SchemaReference::Root, root_referents);
+
+    // Now we build a reverse map, going from child -> parent. We'll iterate over every child
+    // referent, for every parent/child entry, calcuating the set of referrers, and if they would
+    // require unmarking the child.
+    let mut child_to_parent = HashMap::new();
+    for (parent_schema_ref, child_referents) in parent_to_child {
+        for child_referent in child_referents {
+            let entry = child_to_parent
+                .entry(child_referent.referent.as_ref().to_string())
+                .or_insert_with(|| {
+                    let child_schema =
+                        get_schema_from_reference(root_schema, &child_referent.referent);
+                    ChildMetadata::from_schema(child_schema)
+                });
+
+            // Transform the child referent into a parent referent, which preserves the "would
+            // unmark" value but now points to the parent instead, and add it to the list of
+            // _referrers_ for the child.
+            entry
+                .referrers
+                .insert(child_referent.with_new_referent(parent_schema_ref.clone()));
+        }
+    }
+
+    /*
+    // Now filter out entries that are not be eligible for flattening.
+    child_to_parent
+        .into_iter()
+        // Filter out all mappings that don't involve markable schemas.
+        .filter(|(_, metadata)| metadata.should_mark)
+        // Filter out all mappings where all referrers have the same marking behavior for the
+        // referrent, as there's nothing to differentiate between for flattening vs not flattening,
+        // so long
+        .filter(|(_, metadata)| {
+            !(metadata.referrers.iter().all(|r| r.would_unmark)
+                || metadata.referrers.iter().all(|r| !r.would_unmark))
+        })
+        .collect()*/
+
+    child_to_parent
+}
+
+fn get_schema_from_reference<'a>(
+    root_schema: &'a RootSchema,
+    schema_ref: &SchemaReference,
+) -> &'a SchemaObject {
+    match schema_ref {
+        SchemaReference::Root => &root_schema.schema,
+        SchemaReference::Definition(name) => {
+            let definition_name = get_cleaned_schema_reference(name.as_str());
+            match root_schema.definitions.get(definition_name) {
+                None => panic!("Schema definition '{}' does not exist!", definition_name),
+                Some(schema) => schema
+                    .as_object()
+                    .expect("referenced schemas should never be boolean schemas"),
+            }
+        }
+    }
+}
+
+/// Determines whether a schema is eligible to be marked.
+fn is_markable_schema(schema: &SchemaObject) -> bool {
+    // If the schema is an object schema, and does not have`additionalProperties` set, it can be
+    // marked, as marking a schema with both `unevaluatedProperties`/`additionalProperties` would
+    // otherwise be a logical inconsistency.
+    let has_additional_properties = schema
+        .object
+        .as_ref()
+        .and_then(|object| object.additional_properties.as_ref())
+        .map(|schema| matches!(schema.as_ref(), Schema::Object(_)))
+        .unwrap_or(false);
+
+    if is_object_schema(schema) && !has_additional_properties {
+        return true;
+    }
+
+    // If the schema uses subschema validation -- specifically: `allOf`, `oneOf`, or `anyOf` -- then
+    // it should be marked, so long as one of the subschemas is actually an object schema.
+    //
+    // If we're dealing with something like a `oneOf` for `Option<T>`, we'll have two
+    // subschemas: { "type": "null" } and { "$ref": "#/definitions/T" }. If the schema for `T` is,
+    // say, just a scalar schema, instead of an object schema... then it wouldn't be marked, and in
+    // turn, we wouldn't need to mark the schema for `Option<T>`: there's no properties at all.
+    if let Some(subschema) = schema.subschemas.as_ref() {
+        let mut subschemas = get_object_subschemas_from_parent(subschema);
+
+        let has_object_subschema = subschemas.any(is_object_schema);
+        if has_object_subschema {
+            return true;
+        }
+    }
+
+    false
+}
+
+/// Collects all referents from the given parent schema, and inserts them to `referents`.
+///
+/// Property schemas from `properties`, `patternProperties`, and `additionalProperties` are checked.
+/// Any such referents in a property schema are do not need to be unmarked as the "chain" between
+/// parent/child is broken implicitly by the property-level scoping of the value they would be given
+/// to validate.
+///
+/// Subschemas from `allOf`, `oneOf`, and `anyOf` are also checked. As subschema validation implies
+/// that each subschema will be given the same value to validate, even if the subschema only
+/// represents a slice of the parent schema, there is a link between parent/child that requires the
+/// child to be unmarked so that the parent can be marked to enforce `unevaluatedProperties` at the
+/// correct scope.
+///
+/// This function will recurse a schema object entirely, in terms of property schemas and
+/// subschemas, but will not recurse through schema references.
+fn get_referents(parent_schema: &SchemaObject, referents: &mut HashSet<MarkableReferent>) {
+    if let Some(parent_object) = parent_schema.object.as_ref() {
+        // For both `properties` and `patternProperties`, collect the schema reference, if any, from
+        // all property schemas.
+        for (_, property_schema) in parent_object
+            .properties
+            .iter()
+            .chain(parent_object.pattern_properties.iter())
+        {
+            if let Some(child_schema) = property_schema.as_object() {
+                if let Some(child_schema_ref) = child_schema.reference.as_ref() {
+                    referents.insert(MarkableReferent::would_not_unmark(child_schema_ref));
+                } else {
+                    get_referents(child_schema, referents);
+                }
+            }
+        }
+
+        // For `additionalProperties`, if present and defined as a schema object, collect the schema
+        // reference if one is set.
+        if let Some(additional_properties) = parent_object.additional_properties.as_ref() {
+            if let Some(child_schema) = additional_properties.as_ref().as_object() {
+                if let Some(child_schema_ref) = child_schema.reference.as_ref() {
+                    referents.insert(MarkableReferent::would_not_unmark(child_schema_ref));
+                } else {
+                    get_referents(child_schema, referents);
+                }
+            }
+        }
+    }
+
+    if let Some(subschema) = parent_schema.subschemas.as_ref() {
+        // For `allOf`, `oneOf`, and `anyOf`, collect the schema reference, if any, from their
+        // respective subschemas.
+        for subschema in get_object_subschemas_from_parent(subschema) {
+            if let Some(child_schema_ref) = subschema.reference.as_ref() {
+                referents.insert(MarkableReferent::would_unmark(child_schema_ref));
+            } else {
+                get_referents(subschema, referents);
+            }
+        }
+    }
+}
+
+fn get_object_subschemas_from_parent(
+    subschema: &SubschemaValidation,
+) -> impl Iterator<Item = &SchemaObject> {
+    [
+        subschema.all_of.as_ref(),
+        subschema.one_of.as_ref(),
+        subschema.any_of.as_ref(),
+    ]
+    .into_iter()
+    .filter_map(identity)
+    .flat_map(identity)
+    .filter_map(Schema::as_object)
+}
+
+fn get_object_subschemas_from_parent_mut(
+    subschema: &mut SubschemaValidation,
+) -> impl Iterator<Item = &mut SchemaObject> {
+    [
+        subschema.all_of.as_mut(),
+        subschema.one_of.as_mut(),
+        subschema.any_of.as_mut(),
+    ]
+    .into_iter()
+    .filter_map(identity)
+    .flat_map(identity)
+    .filter_map(Schema::as_object_mut)
+}
+
 fn mark_schema_closed(schema: &mut SchemaObject) {
     // Make sure this schema doesn't also have `additionalProperties` set to a non-boolean schema,
     // as it would be a logical inconsistency to then also set `unevaluatedProperties` to `false`.
@@ -110,16 +570,28 @@ fn mark_schema_closed(schema: &mut SchemaObject) {
     schema.object().unevaluated_properties = Some(Box::new(Schema::Bool(false)));
 }
 
-fn is_object_schema(instance_type: Option<&SingleOrVec<InstanceType>>) -> bool {
-    match instance_type {
+fn schema_type_matches(
+    schema: &SchemaObject,
+    instance_type: InstanceType,
+    allow_multiple: bool,
+) -> bool {
+    match schema.instance_type.as_ref() {
         Some(sov) => match sov {
-            SingleOrVec::Single(inner) => inner.as_ref() == &InstanceType::Object,
-            SingleOrVec::Vec(inner) => inner.contains(&InstanceType::Object),
+            SingleOrVec::Single(inner) => inner.as_ref() == &instance_type,
+            SingleOrVec::Vec(inner) => inner.contains(&instance_type) && allow_multiple,
         },
         None => false,
     }
 }
 
+fn is_object_schema(schema: &SchemaObject) -> bool {
+    schema_type_matches(schema, InstanceType::Object, true)
+}
+
+fn is_null_schema(schema: &SchemaObject) -> bool {
+    schema_type_matches(schema, InstanceType::Null, false)
+}
+
 fn get_subschema_validators(schema: &mut SchemaObject) -> Option<Vec<&mut SchemaObject>> {
     let mut validators = vec![];
 
@@ -159,22 +631,12 @@ fn get_subschema_validators(schema: &mut SchemaObject) -> Option<Vec<&mut Schema
 
 #[cfg(test)]
 mod tests {
-    use pretty_assertions::assert_eq;
-    use serde_json::{json, Value};
-    use vector_config_common::schema::{visit::Visitor, RootSchema};
-
-    use super::DisallowedUnevaluatedPropertiesVisitor;
-
-    fn as_schema(value: Value) -> RootSchema {
-        serde_json::from_value(value).expect("should not fail to deserialize schema")
-    }
+    use serde_json::json;
+    use vector_config_common::schema::visit::Visitor;
 
-    fn assert_schemas_eq(expected: RootSchema, actual: RootSchema) {
-        let expected_json = serde_json::to_string_pretty(&expected).expect("should not fail");
-        let actual_json = serde_json::to_string_pretty(&actual).expect("should not fail");
+    use crate::schema::visitors::test::{as_schema, assert_schemas_eq};
 
-        assert_eq!(expected_json, actual_json);
-    }
+    use super::DisallowedUnevaluatedPropertiesVisitor;
 
     #[test]
     fn basic_object_schema() {
@@ -465,4 +927,50 @@ mod tests {
 
         assert_schemas_eq(expected_schema, actual_schema);
     }
+
+    #[test]
+    fn conflicting_schema_usages_get_duplicated_and_flattened() {
+        let mut actual_schema = as_schema(json!({
+            "type": "object",
+            "properties": {
+                "acks": { "$ref": "#/definitions/acks" },
+                "custom_acks": { "$ref": "#/definitions/custom_acks" }
+            },
+            "definitions": {
+                "custom_acks": {
+                    "allOf": [{ "type": "object", "properties": { "ack_count": { "type": "number" } } },
+                              { "$ref": "#/definitions/acks" }]
+                },
+                "acks": { "type": "object", "properties": { "enabled": { "type": "boolean" } } }
+            }
+        }));
+
+        let mut visitor = DisallowedUnevaluatedPropertiesVisitor::default();
+        visitor.visit_root_schema(&mut actual_schema);
+
+        let expected_schema = as_schema(json!({
+            "type": "object",
+            "properties": {
+                "acks": { "$ref": "#/definitions/acks" },
+                "custom_acks": { "$ref": "#/definitions/custom_acks" }
+            },
+            "definitions": {
+                "custom_acks": {
+                    "allOf": [
+                        { "type": "object", "properties": { "ack_count": { "type": "number" } } },
+                        { "type": "object", "properties": { "enabled": { "type": "boolean" } } }
+                    ],
+                    "unevaluatedProperties": false
+                },
+                "acks": {
+                    "type": "object",
+                    "properties": { "enabled": { "type": "boolean" } },
+                    "unevaluatedProperties": false
+                }
+            },
+            "unevaluatedProperties": false
+        }));
+
+        assert_schemas_eq(expected_schema, actual_schema);
+    }
 }
diff --git a/scripts/generate-component-docs.rb b/scripts/generate-component-docs.rb
index b9a7e98381696..ed47670470c6f 100755
--- a/scripts/generate-component-docs.rb
+++ b/scripts/generate-component-docs.rb
@@ -603,6 +603,17 @@ def expand_schema_references(root_schema, unexpanded_schema)
       }
     end
 
+    if !schema['anyOf'].nil?
+      schema['anyOf'] = schema['anyOf'].map { |subschema|
+        new_subschema = expand_schema_references(root_schema, subschema)
+        if new_subschema != subschema
+          expanded = true
+        end
+
+        new_subschema
+      }
+    end
+
     if !expanded
       break
     end
@@ -914,10 +925,10 @@ def resolve_bare_schema(root_schema, schema)
           @logger.error "Relevant schema: #{JSON.pretty_generate(schema)}"
           exit 1
         end
-        additional_properties['description'] = singular_description
 
         resolved_additional_properties = resolve_schema(root_schema, additional_properties)
         resolved_additional_properties['required'] = true
+        resolved_additional_properties['description'] = singular_description
         options.push(['*', resolved_additional_properties])
       end
 
diff --git a/website/cue/reference/components/sinks/base/gcp_stackdriver_logs.cue b/website/cue/reference/components/sinks/base/gcp_stackdriver_logs.cue
index 6f73a93dd3cd8..646e9d3ec69a2 100644
--- a/website/cue/reference/components/sinks/base/gcp_stackdriver_logs.cue
+++ b/website/cue/reference/components/sinks/base/gcp_stackdriver_logs.cue
@@ -314,8 +314,12 @@ base: components: sinks: gcp_stackdriver_logs: configuration: {
 		}
 	}
 	resource: {
-		description: "The monitored resource to associate the logs with."
-		required:    true
+		description: """
+			A monitored resource.
+
+			The monitored resource to associate the logs with.
+			"""
+		required: true
 		type: object: {
 			examples: [{
 				instanceId: "Twilight"
diff --git a/website/cue/reference/components/sinks/base/gcp_stackdriver_metrics.cue b/website/cue/reference/components/sinks/base/gcp_stackdriver_metrics.cue
index f0edf5c8474a2..6f58d57ca46a9 100644
--- a/website/cue/reference/components/sinks/base/gcp_stackdriver_metrics.cue
+++ b/website/cue/reference/components/sinks/base/gcp_stackdriver_metrics.cue
@@ -256,8 +256,12 @@ base: components: sinks: gcp_stackdriver_metrics: configuration: {
 		}
 	}
 	resource: {
-		description: "The monitored resource to associate the metrics with."
-		required:    true
+		description: """
+			A monitored resource.
+
+			The monitored resource to associate the metrics with.
+			"""
+		required: true
 		type: object: {
 			examples: [{
 				instanceId: "Twilight"
diff --git a/website/cue/reference/components/sinks/base/http.cue b/website/cue/reference/components/sinks/base/http.cue
index edc96ae57ba11..7eefce772c920 100644
--- a/website/cue/reference/components/sinks/base/http.cue
+++ b/website/cue/reference/components/sinks/base/http.cue
@@ -310,8 +310,12 @@ base: components: sinks: http: configuration: {
 		}
 	}
 	method: {
-		description: "The HTTP method to use when making the request."
-		required:    false
+		description: """
+			HTTP method.
+
+			The HTTP method to use when making the request.
+			"""
+		required: false
 		type: string: {
 			default: "post"
 			enum: {

From cd6d7164e21ea0008133f258a44ca6724e6491f6 Mon Sep 17 00:00:00 2001
From: Toby Lawrence <toby@nuclearfurnace.com>
Date: Fri, 7 Apr 2023 13:12:14 -0400
Subject: [PATCH 2/5] fix flattening

---
 .../src/schema/visitors/unevaluated.rs        | 242 +++++++++++++-----
 1 file changed, 172 insertions(+), 70 deletions(-)

diff --git a/lib/vector-config/src/schema/visitors/unevaluated.rs b/lib/vector-config/src/schema/visitors/unevaluated.rs
index 4aa34dcfbb2b8..3569fa757e0a7 100644
--- a/lib/vector-config/src/schema/visitors/unevaluated.rs
+++ b/lib/vector-config/src/schema/visitors/unevaluated.rs
@@ -47,51 +47,7 @@ impl DisallowedUnevaluatedPropertiesVisitor {
 
 impl Visitor for DisallowedUnevaluatedPropertiesVisitor {
     fn visit_root_schema(&mut self, root: &mut RootSchema) {
-        // Glossary:
-        //
-        //   mark(ed): setting or having `unevaluatedProperties` set to `Some(Schema::Bool(false))`
-        //   unmark(ed): unsetting or having `unevaluatedProperties` set to `None`
-
-        // For all definitions, visit _just_ the defined schema (no recursing) and build a map of
-        // child definitions -> (mark eligibility, [(parent definition, would_unmark)]), such that we know exactly
-        // which schemas refer to any given schema definition and if they would lead to the child
-        // schema being marked as `unevaluatedProperties: false`.
-        //
-        // We would filter out any child definitions that aren't eligible to be marked. For the
-        // remaining child schemas, we group the parent definitions by `would_unmark`, which
-        // indicates whether or not the given parent definition would cause the child definition to
-        // be unmarked.
-        //
-        // As an example, we would expect a parent schema referring to a child schema via `allOf` to
-        // unmark the child schema, while a parent schema referring to a child schema within a
-        // specific property to not unmark the child schema.
-        //
-        // With the grouped parent definitions, take the smaller of the two groups. This represents
-        // the set of parent schemas that we will indicate as needing to use a flattened version of
-        // the child schema when we execute our primary visit logic.
-
-        let mut eligible_to_flatten = HashMap::new();
-        let flattening_eligibility_map = build_closed_schema_flatten_eligibility_mappings(root);
-        for (child_schema_ref, metadata) in flattening_eligibility_map {
-            let would_unmark = metadata
-                .referrers
-                .iter()
-                .filter(|r| r.would_unmark)
-                .map(|r| r.referent.clone())
-                .collect::<HashSet<_>>();
-            let would_not_unmark = metadata
-                .referrers
-                .iter()
-                .filter(|r| !r.would_unmark)
-                .map(|r| r.referent.clone())
-                .collect::<HashSet<_>>();
-
-            if would_not_unmark.len() > would_unmark.len() {
-                eligible_to_flatten.insert(child_schema_ref.to_string(), would_unmark);
-            } else {
-                eligible_to_flatten.insert(child_schema_ref.to_string(), would_not_unmark);
-            }
-        }
+        let mut eligible_to_flatten = build_closed_schema_flatten_eligibility_mappings(root);
 
         debug!(
             "Found {} referents eligible for flattening: {:?}",
@@ -311,9 +267,9 @@ struct ChildMetadata {
 }
 
 impl ChildMetadata {
-    fn from_schema(schema: &SchemaObject) -> Self {
+    fn from_schema(definitions: &Map<String, Schema>, schema: &SchemaObject) -> Self {
         Self {
-            should_mark: is_markable_schema(schema),
+            should_mark: is_markable_schema(definitions, schema),
             referrers: HashSet::new(),
         }
     }
@@ -321,7 +277,24 @@ impl ChildMetadata {
 
 fn build_closed_schema_flatten_eligibility_mappings(
     root_schema: &RootSchema,
-) -> HashMap<String, ChildMetadata> {
+) -> HashMap<String, HashSet<SchemaReference>> {
+    // For all definitions, visit _just_ the defined schema (no recursing) and build a map of child
+    // definitions -> (mark eligibility, [(parent definition, would_unmark)]), such that we know
+    // exactly which schemas refer to any given schema definition and if they would lead to the
+    // child schema being marked as `unevaluatedProperties: false`.
+    //
+    // We would filter out any child definitions that aren't eligible to be marked. For the
+    // remaining child schemas, we group the parent definitions by `would_unmark`, which indicates
+    // whether or not the given parent definition would cause the child definition to be unmarked.
+    //
+    // As an example, we would expect a parent schema referring to a child schema via `allOf` to
+    // unmark the child schema, while a parent schema referring to a child schema within a specific
+    // property to not unmark the child schema.
+    //
+    // With the grouped parent definitions, take the smaller of the two groups. This represents the
+    // set of parent schemas that we will indicate as needing to use a flattened version of the
+    // child schema when we execute our primary visit logic.
+
     // Iterate over all definitions, and once more for the root schema, and generate a map of parent
     // schema -> (would_unmark, child schema).
     let mut parent_to_child = HashMap::new();
@@ -335,7 +308,8 @@ fn build_closed_schema_flatten_eligibility_mappings(
         // If a schema itself would not be considered markable, then we don't need to consider the
         // eligibility between parent/child since there's nothing to drive the "now unmark the child
         // schemas" logic.
-        if !is_markable_schema(parent_schema) {
+        if !is_markable_schema(&root_schema.definitions, parent_schema) {
+            debug!("Schema definition '{}' not markable.", definition_name);
             continue;
         }
 
@@ -365,7 +339,7 @@ fn build_closed_schema_flatten_eligibility_mappings(
                 .or_insert_with(|| {
                     let child_schema =
                         get_schema_from_reference(root_schema, &child_referent.referent);
-                    ChildMetadata::from_schema(child_schema)
+                    ChildMetadata::from_schema(&root_schema.definitions, child_schema)
                 });
 
             // Transform the child referent into a parent referent, which preserves the "would
@@ -377,22 +351,34 @@ fn build_closed_schema_flatten_eligibility_mappings(
         }
     }
 
-    /*
-    // Now filter out entries that are not be eligible for flattening.
-    child_to_parent
-        .into_iter()
-        // Filter out all mappings that don't involve markable schemas.
-        .filter(|(_, metadata)| metadata.should_mark)
-        // Filter out all mappings where all referrers have the same marking behavior for the
-        // referrent, as there's nothing to differentiate between for flattening vs not flattening,
-        // so long
-        .filter(|(_, metadata)| {
-            !(metadata.referrers.iter().all(|r| r.would_unmark)
-                || metadata.referrers.iter().all(|r| !r.would_unmark))
-        })
-        .collect()*/
-
-    child_to_parent
+    let mut eligible_to_flatten = HashMap::new();
+    for (child_schema_ref, metadata) in child_to_parent {
+        // Don't flatten schemas which has less than two referrers.
+        if metadata.referrers.len() < 2 {
+            continue;
+        }
+
+        let would_unmark = metadata
+            .referrers
+            .iter()
+            .filter(|r| r.would_unmark)
+            .map(|r| r.referent.clone())
+            .collect::<HashSet<_>>();
+        let would_not_unmark = metadata
+            .referrers
+            .iter()
+            .filter(|r| !r.would_unmark)
+            .map(|r| r.referent.clone())
+            .collect::<HashSet<_>>();
+
+        if would_not_unmark.len() >= would_unmark.len() {
+            eligible_to_flatten.insert(child_schema_ref.to_string(), would_unmark);
+        } else {
+            eligible_to_flatten.insert(child_schema_ref.to_string(), would_not_unmark);
+        }
+    }
+
+    eligible_to_flatten
 }
 
 fn get_schema_from_reference<'a>(
@@ -414,7 +400,7 @@ fn get_schema_from_reference<'a>(
 }
 
 /// Determines whether a schema is eligible to be marked.
-fn is_markable_schema(schema: &SchemaObject) -> bool {
+fn is_markable_schema(definitions: &Map<String, Schema>, schema: &SchemaObject) -> bool {
     // If the schema is an object schema, and does not have`additionalProperties` set, it can be
     // marked, as marking a schema with both `unevaluatedProperties`/`additionalProperties` would
     // otherwise be a logical inconsistency.
@@ -436,11 +422,28 @@ fn is_markable_schema(schema: &SchemaObject) -> bool {
     // subschemas: { "type": "null" } and { "$ref": "#/definitions/T" }. If the schema for `T` is,
     // say, just a scalar schema, instead of an object schema... then it wouldn't be marked, and in
     // turn, we wouldn't need to mark the schema for `Option<T>`: there's no properties at all.
+    //
+    // We'll follow schema references in subschemas up to one level deep trying to figure this out.
     if let Some(subschema) = schema.subschemas.as_ref() {
-        let mut subschemas = get_object_subschemas_from_parent(subschema);
+        let mut subschemas = get_object_subschemas_from_parent(subschema).collect::<Vec<_>>();
 
-        let has_object_subschema = subschemas.any(is_object_schema);
-        if has_object_subschema {
+        let has_object_subschema = subschemas.iter().any(|schema| is_object_schema(*schema));
+        let has_referenced_object_subschema = subschemas
+            .iter()
+            .map(|subschema| {
+                subschema
+                    .reference
+                    .as_ref()
+                    .and_then(|reference| {
+                        let reference = get_cleaned_schema_reference(&reference);
+                        definitions.get(reference)
+                    })
+                    .and_then(Schema::as_object)
+                    .map_or(false, is_object_schema)
+            })
+            .any(identity);
+
+        if has_object_subschema || has_referenced_object_subschema {
             return true;
         }
     }
@@ -973,4 +976,103 @@ mod tests {
 
         assert_schemas_eq(expected_schema, actual_schema);
     }
+
+    #[test]
+    fn multiple_mark_unmark_references_flattened_efficiently() {
+        // This tests that if, for example, one schema reference would be marked and unmarked by
+        // multiple referrers, the referrers we choose to flatten the reference on are in the
+        // smaller group (i.e. we do as few flattenings as possible).
+
+        let mut actual_schema = as_schema(json!({
+            "type": "object",
+            "properties": {
+                "a": { "$ref": "#/definitions/a" },
+                "b": { "$ref": "#/definitions/b" },
+                "c": { "$ref": "#/definitions/c" },
+                "one": { "$ref": "#/definitions/one" },
+                "two": { "$ref": "#/definitions/two" }
+            },
+            "definitions": {
+                "one": {
+                    "allOf": [{ "$ref": "#/definitions/c" }]
+                },
+                "two": {
+                    "allOf": [{ "$ref": "#/definitions/b" }, { "$ref": "#/definitions/c" }]
+                },
+                "a": {
+                    "type": "object",
+                    "properties": { "a": { "type": "boolean" } }
+                },
+                "b": {
+                    "type": "object",
+                    "properties": { "b": { "type": "boolean" } }
+                },
+                "c": {
+                    "type": "object",
+                    "properties": { "c": { "type": "boolean" } }
+                }
+            }
+        }));
+
+        let mut visitor = DisallowedUnevaluatedPropertiesVisitor::default();
+        visitor.visit_root_schema(&mut actual_schema);
+
+        // Expectations:
+        // - Schema A is only referenced in an object property, so it's marked normally.
+        // - Schema B is referenced twice -- once as an object property and once in a subschema --
+        //   so since we prioritize flattening usages that would unmark a schema when the
+        //   would-unmark/would-not-unmark counts are equal, schema B is only flattened for the
+        //   subschema usage.
+        // - Schema C is referenced three times -- once as an object property and twice in a
+        //   subschema -- so since there's more would-unmark usages than would-not-unmark usages, we
+        //   flatten the smallest group of usages, which is the would-not-unmark group aka object
+        //   properties.
+        let expected_schema = as_schema(json!({
+            "type": "object",
+            "properties": {
+                "a": { "$ref": "#/definitions/a" },
+                "b": { "$ref": "#/definitions/b" },
+                "c": {
+                    "type": "object",
+                    "properties": { "c": { "type": "boolean" } },
+                    "unevaluatedProperties": false
+                },
+                "one": { "$ref": "#/definitions/one" },
+                "two": { "$ref": "#/definitions/two" }
+            },
+            "definitions": {
+                "one": {
+                    "allOf": [{ "$ref": "#/definitions/c" }],
+                    "unevaluatedProperties": false
+                },
+                "two": {
+                    "allOf": [
+                        {
+                            "type": "object",
+                            "properties": { "b": { "type": "boolean" } }
+                        },
+                        { "$ref": "#/definitions/c" }
+                    ],
+                    "unevaluatedProperties": false
+                },
+                "a": {
+                    "type": "object",
+                    "properties": { "a": { "type": "boolean" } },
+                    "unevaluatedProperties": false
+                },
+                "b": {
+                    "type": "object",
+                    "properties": { "b": { "type": "boolean" } },
+                    "unevaluatedProperties": false
+                },
+                "c": {
+                    "type": "object",
+                    "properties": { "c": { "type": "boolean" } }
+                }
+            },
+            "unevaluatedProperties": false
+        }));
+
+        assert_schemas_eq(expected_schema, actual_schema);
+    }
 }

From 1a5b202c7bb4da2554365ea3085c2e466d918835 Mon Sep 17 00:00:00 2001
From: Toby Lawrence <toby@nuclearfurnace.com>
Date: Fri, 7 Apr 2023 13:57:34 -0400
Subject: [PATCH 3/5] spelling error

---
 lib/vector-config/src/schema/visitors/unevaluated.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/vector-config/src/schema/visitors/unevaluated.rs b/lib/vector-config/src/schema/visitors/unevaluated.rs
index 3569fa757e0a7..520e6982d1863 100644
--- a/lib/vector-config/src/schema/visitors/unevaluated.rs
+++ b/lib/vector-config/src/schema/visitors/unevaluated.rs
@@ -329,7 +329,7 @@ fn build_closed_schema_flatten_eligibility_mappings(
     parent_to_child.insert(SchemaReference::Root, root_referents);
 
     // Now we build a reverse map, going from child -> parent. We'll iterate over every child
-    // referent, for every parent/child entry, calcuating the set of referrers, and if they would
+    // referent, for every parent/child entry, calculating the set of referrers, and if they would
     // require unmarking the child.
     let mut child_to_parent = HashMap::new();
     for (parent_schema_ref, child_referents) in parent_to_child {

From 310846b7be87c11ae335ccb916cc9fd911d6d9c5 Mon Sep 17 00:00:00 2001
From: Toby Lawrence <toby@nuclearfurnace.com>
Date: Mon, 10 Apr 2023 17:19:27 -0400
Subject: [PATCH 4/5] bunch of cleanup related to removing allowance for
 warnings

---
 .../src/schema/visitors/unevaluated.rs        | 177 +++---------------
 1 file changed, 21 insertions(+), 156 deletions(-)

diff --git a/lib/vector-config/src/schema/visitors/unevaluated.rs b/lib/vector-config/src/schema/visitors/unevaluated.rs
index 520e6982d1863..cfab8822df5be 100644
--- a/lib/vector-config/src/schema/visitors/unevaluated.rs
+++ b/lib/vector-config/src/schema/visitors/unevaluated.rs
@@ -1,15 +1,11 @@
-#![allow(warnings)]
-
 use std::{
     collections::{HashMap, HashSet},
     convert::identity,
-    mem::discriminant,
 };
 
-use serde_json::Value;
-use tracing::{debug, error};
+use tracing::debug;
 use vector_config_common::schema::{
-    visit::{visit_schema_object, with_resolved_schema_reference, Visitor},
+    visit::{with_resolved_schema_reference, Visitor},
     *,
 };
 
@@ -47,7 +43,7 @@ impl DisallowedUnevaluatedPropertiesVisitor {
 
 impl Visitor for DisallowedUnevaluatedPropertiesVisitor {
     fn visit_root_schema(&mut self, root: &mut RootSchema) {
-        let mut eligible_to_flatten = build_closed_schema_flatten_eligibility_mappings(root);
+        let eligible_to_flatten = build_closed_schema_flatten_eligibility_mappings(root);
 
         debug!(
             "Found {} referents eligible for flattening: {:?}",
@@ -80,7 +76,7 @@ impl Visitor for DisallowedUnevaluatedPropertiesVisitor {
             let current_parent_schema_ref = self.get_current_schema_scope();
 
             if let Some(referrers) = self.eligible_to_flatten.get(reference) {
-                if referrers.contains(&current_parent_schema_ref) {
+                if referrers.contains(current_parent_schema_ref) {
                     let current_schema_ref = get_cleaned_schema_reference(reference);
                     let referenced_schema = definitions
                         .get(current_schema_ref)
@@ -100,7 +96,7 @@ impl Visitor for DisallowedUnevaluatedPropertiesVisitor {
                         );
 
                         schema.reference = None;
-                        schema.merge(&referenced_schema);
+                        schema.merge(referenced_schema);
                     }
                 }
             }
@@ -120,7 +116,7 @@ impl Visitor for DisallowedUnevaluatedPropertiesVisitor {
         // to the level of the `allOf`/`oneOf`/`anyOf`, where it can apply the correct behavior.
         let mut had_relevant_subschemas = false;
         if let Some(subschema) = schema.subschemas.as_mut() {
-            let mut subschemas = get_object_subschemas_from_parent_mut(subschema.as_mut());
+            let subschemas = get_object_subschemas_from_parent_mut(subschema.as_mut());
             for subschema in subschemas {
                 had_relevant_subschemas = true;
 
@@ -146,54 +142,8 @@ impl ScopedVisitor for DisallowedUnevaluatedPropertiesVisitor {
     }
 
     fn get_current_schema_scope(&self) -> &SchemaReference {
-        self.scope_stack
-            .current()
-            .unwrap_or_else(|| &SchemaReference::Root)
-    }
-}
-
-fn is_non_object_optional_schema(
-    definitions: &mut Map<String, Schema>,
-    schema: &mut SchemaObject,
-) -> bool {
-    // Only match schemas that have subschema validation in the form of `oneOf`, and nothing else.
-    let subschemas = match schema.subschemas.as_ref() {
-        None => return false,
-        Some(subschema) => {
-            let has_non_one_of = subschema.all_of.is_some() || subschema.any_of.is_some();
-            match subschema.one_of.as_ref() {
-                Some(one_of) if !has_non_one_of => one_of,
-                _ => return false,
-            }
-        }
-    };
-
-    // There can only be two subschemas, one of which is simply a `null` schema.
-    if subschemas.len() != 2 {
-        return false;
+        self.scope_stack.current().unwrap_or(&SchemaReference::Root)
     }
-
-    let maybe_non_null_schema_idx = subschemas[0]
-        .as_object()
-        .map(is_null_schema)
-        .and_then(|x| x.then(|| 1))
-        .or_else(|| {
-            subschemas[1]
-                .as_object()
-                .map(is_null_schema)
-                .and_then(|x| x.then(|| 0))
-        });
-
-    let non_null_schema = match maybe_non_null_schema_idx {
-        Some(idx) => &subschemas[idx],
-        None => return false,
-    };
-
-    // Check if the non-null schema is an object schema or not.
-    non_null_schema
-        .as_object()
-        .map(|schema| !is_object_schema(schema))
-        .unwrap_or(true)
 }
 
 fn unmark_or_flatten_schema(definitions: &mut Map<String, Schema>, schema: &mut SchemaObject) {
@@ -257,24 +207,6 @@ impl MarkableReferent {
     }
 }
 
-#[derive(Debug)]
-struct ChildMetadata {
-    /// Whether or not this schema, in isolation, should be marked.
-    should_mark: bool,
-
-    /// The set of referring schemas.
-    referrers: HashSet<MarkableReferent>,
-}
-
-impl ChildMetadata {
-    fn from_schema(definitions: &Map<String, Schema>, schema: &SchemaObject) -> Self {
-        Self {
-            should_mark: is_markable_schema(definitions, schema),
-            referrers: HashSet::new(),
-        }
-    }
-}
-
 fn build_closed_schema_flatten_eligibility_mappings(
     root_schema: &RootSchema,
 ) -> HashMap<String, HashSet<SchemaReference>> {
@@ -336,36 +268,28 @@ fn build_closed_schema_flatten_eligibility_mappings(
         for child_referent in child_referents {
             let entry = child_to_parent
                 .entry(child_referent.referent.as_ref().to_string())
-                .or_insert_with(|| {
-                    let child_schema =
-                        get_schema_from_reference(root_schema, &child_referent.referent);
-                    ChildMetadata::from_schema(&root_schema.definitions, child_schema)
-                });
+                .or_insert_with(HashSet::new);
 
             // Transform the child referent into a parent referent, which preserves the "would
             // unmark" value but now points to the parent instead, and add it to the list of
             // _referrers_ for the child.
-            entry
-                .referrers
-                .insert(child_referent.with_new_referent(parent_schema_ref.clone()));
+            entry.insert(child_referent.with_new_referent(parent_schema_ref.clone()));
         }
     }
 
     let mut eligible_to_flatten = HashMap::new();
-    for (child_schema_ref, metadata) in child_to_parent {
-        // Don't flatten schemas which has less than two referrers.
-        if metadata.referrers.len() < 2 {
+    for (child_schema_ref, referrers) in child_to_parent {
+        // Don't flatten schemas which have less than two referrers.
+        if referrers.len() < 2 {
             continue;
         }
 
-        let would_unmark = metadata
-            .referrers
+        let would_unmark = referrers
             .iter()
             .filter(|r| r.would_unmark)
             .map(|r| r.referent.clone())
             .collect::<HashSet<_>>();
-        let would_not_unmark = metadata
-            .referrers
+        let would_not_unmark = referrers
             .iter()
             .filter(|r| !r.would_unmark)
             .map(|r| r.referent.clone())
@@ -381,24 +305,6 @@ fn build_closed_schema_flatten_eligibility_mappings(
     eligible_to_flatten
 }
 
-fn get_schema_from_reference<'a>(
-    root_schema: &'a RootSchema,
-    schema_ref: &SchemaReference,
-) -> &'a SchemaObject {
-    match schema_ref {
-        SchemaReference::Root => &root_schema.schema,
-        SchemaReference::Definition(name) => {
-            let definition_name = get_cleaned_schema_reference(name.as_str());
-            match root_schema.definitions.get(definition_name) {
-                None => panic!("Schema definition '{}' does not exist!", definition_name),
-                Some(schema) => schema
-                    .as_object()
-                    .expect("referenced schemas should never be boolean schemas"),
-            }
-        }
-    }
-}
-
 /// Determines whether a schema is eligible to be marked.
 fn is_markable_schema(definitions: &Map<String, Schema>, schema: &SchemaObject) -> bool {
     // If the schema is an object schema, and does not have`additionalProperties` set, it can be
@@ -425,9 +331,9 @@ fn is_markable_schema(definitions: &Map<String, Schema>, schema: &SchemaObject)
     //
     // We'll follow schema references in subschemas up to one level deep trying to figure this out.
     if let Some(subschema) = schema.subschemas.as_ref() {
-        let mut subschemas = get_object_subschemas_from_parent(subschema).collect::<Vec<_>>();
+        let subschemas = get_object_subschemas_from_parent(subschema).collect::<Vec<_>>();
 
-        let has_object_subschema = subschemas.iter().any(|schema| is_object_schema(*schema));
+        let has_object_subschema = subschemas.iter().any(|schema| is_object_schema(schema));
         let has_referenced_object_subschema = subschemas
             .iter()
             .map(|subschema| {
@@ -435,7 +341,7 @@ fn is_markable_schema(definitions: &Map<String, Schema>, schema: &SchemaObject)
                     .reference
                     .as_ref()
                     .and_then(|reference| {
-                        let reference = get_cleaned_schema_reference(&reference);
+                        let reference = get_cleaned_schema_reference(reference);
                         definitions.get(reference)
                     })
                     .and_then(Schema::as_object)
@@ -519,8 +425,8 @@ fn get_object_subschemas_from_parent(
         subschema.any_of.as_ref(),
     ]
     .into_iter()
-    .filter_map(identity)
-    .flat_map(identity)
+    .flatten()
+    .flatten()
     .filter_map(Schema::as_object)
 }
 
@@ -533,8 +439,8 @@ fn get_object_subschemas_from_parent_mut(
         subschema.any_of.as_mut(),
     ]
     .into_iter()
-    .filter_map(identity)
-    .flat_map(identity)
+    .flatten()
+    .flatten()
     .filter_map(Schema::as_object_mut)
 }
 
@@ -591,47 +497,6 @@ fn is_object_schema(schema: &SchemaObject) -> bool {
     schema_type_matches(schema, InstanceType::Object, true)
 }
 
-fn is_null_schema(schema: &SchemaObject) -> bool {
-    schema_type_matches(schema, InstanceType::Null, false)
-}
-
-fn get_subschema_validators(schema: &mut SchemaObject) -> Option<Vec<&mut SchemaObject>> {
-    let mut validators = vec![];
-
-    // Grab any subschemas for `allOf`/`oneOf`/`anyOf`, if present.
-    //
-    // There are other advanced validation mechanisms such as `if`/`then`/`else, but we explicitly
-    // don't handle them here as we don't currently use them in Vector's configuration schema.
-    if let Some(subschemas) = schema.subschemas.as_mut() {
-        if let Some(all_of) = subschemas.all_of.as_mut() {
-            validators.extend(all_of.iter_mut().filter_map(|validator| match validator {
-                Schema::Object(inner) => Some(inner),
-                _ => None,
-            }));
-        }
-
-        if let Some(one_of) = subschemas.one_of.as_mut() {
-            validators.extend(one_of.iter_mut().filter_map(|validator| match validator {
-                Schema::Object(inner) => Some(inner),
-                _ => None,
-            }));
-        }
-
-        if let Some(any_of) = subschemas.any_of.as_mut() {
-            validators.extend(any_of.iter_mut().filter_map(|validator| match validator {
-                Schema::Object(inner) => Some(inner),
-                _ => None,
-            }));
-        }
-    }
-
-    if validators.is_empty() {
-        None
-    } else {
-        Some(validators)
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use serde_json::json;

From a6b4fc10de8d93e51cad90adf3c4951172207d56 Mon Sep 17 00:00:00 2001
From: Toby Lawrence <toby@nuclearfurnace.com>
Date: Mon, 10 Apr 2023 17:25:09 -0400
Subject: [PATCH 5/5] unwind overzealous spelling file update

---
 .github/actions/spelling/expect.txt | 222 ++++++++++++++++++++++++++++
 1 file changed, 222 insertions(+)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index cd51390470244..476be7788f0ba 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -4,26 +4,37 @@ abcdefghijklmnopqrstuvwxyzand
 abced
 abortable
 acb
+Acho
 ack'ing
 acking
 Acq
+addrof
+addtnl
 AEAD
 agentpayload
 aimd
 akx
 allowerased
+alphanum
+AMF
 amka
+ampersat
 amping
 amqps
 amz
 amzn
+anaana
 anchore
 androideabi
 andy
+annanas
 ansicpg
+ansix
 anumber
 anycondition
+anymap
 anypb
+ANZ
 apievent
 apipodspec
 apk
@@ -34,12 +45,16 @@ aqf
 architecting
 archivable
 ARNOTAREALIDD
+Arrowz
 arshiyasolei
 asdf
 asdfasdf
+asis
 ASMS
+aspiegel
 assertverify
 Asterix
+Astring
 asynk
 atag
 atx
@@ -62,14 +77,20 @@ autospawning
 autotools
 avro
 awscli
+awseb
 awsec
 awslabs
+aww
 axum
 Aziz
 azureresourceid
 babim
+Baca
+Bada
 badunit
 bak
+baq
+barbaz
 barfoo
 barieom
 baseof
@@ -83,6 +104,7 @@ bcdea
 benchgen
 benchmarker
 benefritz
+Berita
 bev
 bfb
 bgwriter
@@ -91,6 +113,7 @@ Bincode
 bindgen
 bizbaz
 bla
+blabla
 blabop
 blafoo
 blem
@@ -98,14 +121,19 @@ blkio
 Bloblang
 Blockfi
 blockmanager
+bloob
 blpop
 blt
+bmobile
+bononos
 bonzai
+bools
 boop
 booper
 booperbot
 bopbla
 boringssl
+boto
 bottlenecked
 bpower
 brackethighlighter
@@ -175,6 +203,7 @@ cloudwatchlogs
 cmark
 CMK
 cnf
+Coc
 CODEOWNERS
 colddb
 coldline
@@ -182,11 +211,13 @@ commonmark
 comms
 compactarr
 compactmap
+compiter
 componenterror
 componenteventsdropped
 componenteventsreceived
 componenteventssent
 composability
+compvalue
 concating
 concats
 condrestart
@@ -194,6 +225,7 @@ configkey
 configmap
 confl
 confy
+Conkeror
 consigliere
 CONTEUDO
 cooldown
@@ -216,21 +248,28 @@ cryptsoft
 csb
 cstats
 csvlog
+cta
 Cthink
+cubot
 customise
 customizability
 customtype
 cwl
 Dailywarehousing
+damerau
+darp
 daschl
 dashmap
 dataflows
 datafuselabs
+Datanyze
 datasources
 datastream
 Datelike
+dateref
 datid
 datname
+davidhuie
 dbkind
 dbreader
 DBserver
@@ -257,19 +296,23 @@ defattr
 defaultauthdb
 deff
 defghijklmnopqrstuvwxyzand
+definit
 delaycompress
 deliverystream
 demoing
+derp
 descriptorpb
 deser
 deserializations
 desync
+DEUC
 developerguide
 devno
 DHAVE
 dhclient
 diffing
 diffs
+DIGNO
 disintermediate
 distrib
 dld
@@ -278,13 +321,17 @@ DNi
 dnsmsg
 docsearch
 docsrs
+doesnotcrash
 dogcats
 Dogsketch
 dogsketches
 dogstatsd
 domhandler
 Doop
+doot
 downcasted
+DQUOTE
+droid
 droptest
 dsl
 dstat
@@ -301,11 +348,13 @@ eabihf
 eay
 ebfcee
 edenhill
+Edg
 edns
 eeyun
 efg
 efgh
 Elhage
+Eluga
 emca
 EMON
 Emph
@@ -313,6 +362,7 @@ emptypb
 emt
 Enableable
 encodable
+Encryptor
 endef
 endler
 enduml
@@ -322,6 +372,7 @@ enumdecl
 enumflags
 ENVARS
 envsubst
+EOI
 EOIG
 EOL'ed
 Erfxl
@@ -329,11 +380,13 @@ Err'ing
 errorf
 Errorsfor
 esb
+ESPN
 esque
 etheus
 etl
 ETNUV
 etsy
+EUA
 eur
 eventarray
 evented
@@ -343,7 +396,10 @@ eventstoredb
 eventstreamsink
 evictingpages
 evmap
+evntslog
+EVO
 evt
+EWENE
 ewma
 examplegroup
 exitcodes
@@ -356,17 +412,20 @@ extrepo
 Failable
 fakedata
 falco
+fals
 fanatid
 fanouts
 fastcc
 fastmod
 fbarbar
 fbaro
+FBAV
 fbf
 fcharset
 fcontext
 feeney
 festeburg
+FFar
 ffebda
 ffeef
 fffffffff
@@ -377,6 +436,7 @@ filecontents
 filterlist
 finalizable
 fingerprinter
+firetv
 fizzaxbbuzz
 fizzbuzz
 fkn
@@ -386,7 +446,11 @@ Flatbuffers
 flate
 flatmaps
 flattenings
+fleep
 flork
+florp
+FME
+fnclosure
 fng
 fnil
 Fomichev
@@ -398,6 +462,7 @@ FOOBARy
 foobaz
 foobazbar
 foobla
+foofoo
 foometric
 fooo
 foos
@@ -416,16 +481,20 @@ fuchsnj
 fullhuman
 futs
 fuzzcheck
+fuzzer
 fwfiles
+FXPDEUC
 GAPI
 gaugehistogram
 gaugor
 GC'ing
 gcra
+gdx
 geh
 genericify
 genproto
 genrsa
+Genx
 geoffleyland
 geolite
 getelementptr
@@ -470,6 +539,7 @@ hashring
 hashset
 hashsum
 hba
+Hbb
 hdrhistogram
 headchunk
 hec
@@ -479,6 +549,7 @@ heka
 hereregexp
 herestring
 hexdump
+Hfoo
 highlighters
 histo
 hname
@@ -505,6 +576,7 @@ iamanapikey
 iamasplunkhectoken
 iana
 iarna
+Ideos
 idhack
 IDML
 ified
@@ -518,6 +590,7 @@ incrementalize
 indexmap
 indicatif
 indoc
+Inettv
 influxdata
 ingesters
 ingestor
@@ -526,15 +599,19 @@ initech
 Instrumentable
 interpolatedstring
 interpretervm
+intex
 invalidauth
 invokefunction
 invtrapezium
+inzone
+IOA
 ioapiset
 ionicon
 iostat
 iouring
 iowait
 IPORHOST
+iru
 isainfo
 isdbgrid
 isp
@@ -562,6 +639,7 @@ jstype
 jsvs
 jszwedko
 jtype
+JUC
 juchiast
 karppila
 kartar
@@ -571,6 +649,7 @@ keybase
 keyclock
 keyid
 keypair
+keystream
 keyxxxxx
 khvzak
 kib
@@ -586,11 +665,13 @@ labelmap
 lalrpop
 Lamport
 landingpad
+lastitem
 lastword
 ldd
 leebenson
 leveldb
 lfd
+lfoo
 libclang
 LIBGNUTLS
 liblogging
@@ -601,6 +682,7 @@ linting
 listenfd
 litstr
 llen
+Lme
 lnt
 lntable
 lntd
@@ -610,6 +692,7 @@ logbar
 logdna
 logevents
 logfmt
+logid
 lognamespace
 lognamespacing
 logplex
@@ -619,11 +702,20 @@ logsense
 logsev
 logseverity
 logtypes
+loguid
+lookaround
+lookupbufs
 losslessly
 lpop
 lpush
+LPW
+LQuery
+LRule
+LSQRBRACKET
+lstr
 Luc
 luciofranco
+luckystar
 lucperkins
 lukesteensen
 macports
@@ -634,9 +726,11 @@ Makefiles
 markdownify
 markdownlintrc
 marketo
+matchall
 maxdepth
 maxed
 maxes
+maxint
 maxs
 maxwritten
 maybeanothertest
@@ -667,11 +761,14 @@ minioadmin
 miniodat
 minwindef
 mio
+mirall
 misordering
+Miui
 mkcert
 mkto
 mlua
 mmdb
+mmdd
 Mmm
 moby
 mockwatchlogs
@@ -679,10 +776,13 @@ modulesloaddir
 mooper
 moosh
 Mooshing
+morefield
 moretags
+morevalue
 mortems
 motivatingly
 MOZGIII
+mpms
 mre
 msgpack
 mskv
@@ -692,6 +792,8 @@ msv
 multiarch
 multievents
 multitenant
+multiterm
+multitermlookahead
 munging
 musleabihf
 muslueabihf
@@ -709,10 +811,12 @@ mymachine
 mypod
 mytable
 myvalue
+MZX
 nacked
 nacks
 Namazu
 namespacefoo
+nananaman
 nananana
 nanosecs
 nats
@@ -722,14 +826,19 @@ nbase
 ndarray
 ndjson
 nearline
+nestedkey
+NETTV
 neuronull
 newcerts
 newrelix
 nextest
+nfield
 nfox
 ngx
 nightlies
 nindent
+ning
+nink
 nkey
 nmeta
 noack
@@ -743,6 +852,7 @@ nomac
 NONALPHANUM
 nonbare
 noncycles
+nonk
 nonsending
 nonstring
 noog
@@ -753,6 +863,8 @@ nopqrstuvwxyz
 norc
 norecurse
 noreplace
+norg
+norgle
 norknoog
 norknork
 nosync
@@ -769,26 +881,40 @@ nowin
 npipe
 NQTP
 nresamples
+nullandnull
 nullishness
 numbackends
+numericstart
 oap
+Obar
+Obigo
 OKD
 omfwd
 omitempty
+ond
+Onefootball
 oneline
 oneof
 onezone
+onik
+onk
 onlyfields
 onlyone
 ooba
 oobar
 ook
+oopsie
 opcounters
 openstring
 opinsights
 oplog
+opples
+OPR
 optimizable
+Optimus
+organisations
 orgid
+originsicname
 ostype
 otel
 otelcol
@@ -798,8 +924,10 @@ otlp
 otlphttp
 ouicompat
 outputspatterns
+outzone
 overaligned
 overalignment
+overwritable
 owo
 oyaml
 pablosichert
@@ -812,7 +940,9 @@ passthrough
 patchelf
 pathbuf
 pathgen
+peci
 peekable
+peeker
 PEMS
 pgmajfault
 PII
@@ -824,6 +954,7 @@ plork
 pnh
 podspec
 Ponge
+ponk
 portpicker
 POSINT
 postinst
@@ -839,7 +970,9 @@ prerot
 presetdir
 pretrunc
 prettydiff
+prettytable
 primaryfont
+printstd
 probot
 processname
 procid
@@ -852,14 +985,23 @@ protoc
 protofbuf
 protosizer
 Prt
+PRTG
 psv
+PTST
 publickey
 purgecss
 pyld
+QFn
+QGIS
+qmobile
 qqq
+qstr
+queryroot
+QUESTIONMARK
 quickcheck
 quix
 quuux
+quuz
 qux
 quz
 qwe
@@ -868,6 +1010,7 @@ rande
 RANDFILE
 rawconfig
 rawstring
+rbaz
 rdkafka
 rdr
 readnone
@@ -879,15 +1022,18 @@ referenceable
 regexes
 regexset
 reinstantiate
+Rekonq
 reloadable
 remapper
 remotehost
 reorganisation
 reparse
+replacen
 replacepkgs
 replicaset
 replset
 reqwest
+rer
 rereleased
 reserialize
 resharding
@@ -895,6 +1041,7 @@ resourcemanager
 respawn
 restorecon
 retryable
+rhv
 rkyv
 rmem
 rmi
@@ -903,10 +1050,18 @@ rmpv
 rndc
 rngs
 rolledback
+rootquery
+roxmltree
 rpd
 rpush
+rquery
+RRRRRRRLLLLLLLLLLLLLLLLLLLLLLLL
+RRule
+rsplitn
+RSQRBRACKET
 rstrings
 RTTs
+rulenum
 runc
 runhcs
 rusoto
@@ -919,11 +1074,13 @@ Rustinomicon
 rustls
 RUSTSEC
 rustup
+rustyline
 rxi
 rxmsg
 rxs
 ryangjchandler
 ryu
+saanich
 sadf
 samehost
 samenet
@@ -931,10 +1088,12 @@ samerole
 sameuser
 sandboxed
 sandboxing
+sby
 sccache
 schemaless
 schemars
 schoen
+schucks
 scl
 sda
 sdata
@@ -942,13 +1101,16 @@ SDID
 seahash
 secfrac
 Seedable
+segmentbuf
 semanage
 sematext
 SEO
+sequencenum
 serie
 serverlogs
 serviceaccount
 servicebus
+Seznam
 sfixed
 sfrag
 sghall
@@ -956,6 +1118,8 @@ shane
 sharedstatedir
 Shenzhen
 shiplift
+shning
+shnoog
 shortcode
 shortstat
 should've
@@ -974,6 +1138,7 @@ sizecache
 Sizefor
 skaffold
 skinparam
+SKIPDATA
 skywalking
 slashin
 slf
@@ -1003,14 +1168,18 @@ spencergilbert
 splitn
 SPOF
 spog
+spork
 springframework
 srcport
 SREs
+SResult
 sret
 SRPMS
 ssekms
 ssn
 sspi
+SSSZ
+SSSZZ
 sstrings
 stabilises
 stackdrive
@@ -1028,12 +1197,14 @@ strat
 strconv
 streamsink
 strng
+strp
 structfield
 subchunks
 suberr
 subfolders
 subfooter
 sublimelinter
+sublocation
 subsec
 substrategies
 subtagline
@@ -1046,6 +1217,7 @@ supertrait
 suser
 sustainability
 svalue
+Swiftfox
 Sya
 sysfs
 sysinit
@@ -1055,6 +1227,7 @@ systemid
 Syu
 Szwedko
 tablesmap
+tac
 tagfoo
 tagline
 tagset
@@ -1064,11 +1237,18 @@ Takeaways
 targetgroup
 tarpit
 tcmalloc
+tdkind
+Techvision
+tecno
+Teleca
+Telechips
 telecom
+Telesys
 templatable
 templateable
 templating
 terabytes
+termcolor
 terraform
 tes
 testevent
@@ -1083,8 +1263,11 @@ thaweddb
 thicc
 Thimphu
 thinkies
+Thisais
 thiserror
 thisisastring
+thonk
+thot
 threatmanager
 throughputs
 thrpt
@@ -1103,6 +1286,7 @@ tobz
 tocbot
 todos
 tokio
+Tolino
 Tomola
 tonydanza
 toolbars
@@ -1110,30 +1294,41 @@ toolchains
 TOOLSDIRECTORY
 toolset
 toor
+topbuzz
 topdir
 topojson
 toproto
+torvec
+Toughpad
 Toxiproxy
 Tpng
 Trauring
 Treemap
 Trello
+Treo
 trialled
 triggerable
 tripwires
 Trivago
 trivy
 Troutwine
+tru
 TRUSTSTORE
 TSDB
 Tsvg
 turbofish
+Twitterbot
 twox
 txmsg
 txs
+Tygron
 typechecked
 typetag
+tzs
 uap
+uaparser
+uas
+UCWEB
 udm
 UIDs
 uieao
@@ -1143,7 +1338,9 @@ unacked
 undertagline
 underutilized
 underutilizing
+Unescaping
 unevictable
+ungrokkable
 unioning
 unitdir
 unmark
@@ -1152,6 +1349,7 @@ unnests
 unstructuredlogentries
 unsync
 untuple
+uol
 upgradable
 urql
 usecase
@@ -1159,6 +1357,7 @@ userinfo
 userlands
 usermod
 userpass
+ustr
 uucp
 UVY
 uwtable
@@ -1166,6 +1365,7 @@ valfoo
 validpaths
 Varda
 vdev
+Vdroid
 VECTORCFG
 vectordir
 vectordotdev
@@ -1173,15 +1373,19 @@ vectorized
 vendored
 veryyyyyyy
 viceversa
+VIERA
 viewkind
 visualising
+VLC
 VMs
 VNQ
 volumeconfig
 vrl
+VRR
 vts
 vvo
 vvv
+VVVVVVVVRRRRRRRRRRRRRRRRR
 VYa
 wahhh
 waitsforfullbatch
@@ -1192,11 +1396,16 @@ waninfo
 wasmtime
 watchexec
 watchlogs
+Waterfox
 wayfor
 webgraphviz
 webservers
 websites
+webviews
 weee
+weekyear
+Wellco
+Weltbild
 wemustcontinuetodiscard
 wensite
 whoopsie
@@ -1205,6 +1414,7 @@ willreturn
 winioctl
 wiredtiger
 wiremock
+with'quote
 wix
 wixobj
 WIXUI
@@ -1221,15 +1431,20 @@ writablelib
 writeback
 wrongpass
 wrongsecret
+wronly
 wtcache
 wtime
 wtimeouts
 wtr
 wurstmeister
 wwang
+xaablabla
 xact
+xbar
 xcatsy
 Xcg
+XENENE
+Xiao
 xlarge
 xpack
 xscale
@@ -1238,6 +1453,9 @@ XUtil
 xvf
 XVXv
 xxs
+xxxxxxx
+xxxxxxxxxxxxxxx
+xxxxxxxxxxxxxxxxxxxxxxx
 xzy
 YAMLs
 YBv
@@ -1247,15 +1465,19 @@ yippeee
 yolo
 YRjhx
 ytt
+zalgo
 zam
+Zania
 ZDfz
 zerk
 zibble
 zieme
+Zii
 zirp
 zoob
 zoobub
 zoog
+zook
 zoop
 zork
 zorp