From fcedae48711b2f29343ce7440898040f6aed9b23 Mon Sep 17 00:00:00 2001
From: Gus Wynn <gus@materialize.com>
Date: Fri, 4 Feb 2022 13:43:10 -0800
Subject: [PATCH] Plumb librdkafka log level through storage layer

Supersedes #10875.
Fixes #10441.

Co-authored-by: Nikhil Benesch <nikhil.benesch@gmail.com>
---
 Cargo.lock                                    |   2 +
 clippy.toml                                   |   2 +
 src/compute/src/bin/computed.rs               |   7 +-
 src/compute/src/compute_state.rs              |   4 +
 src/compute/src/server.rs                     |  12 +-
 src/compute/src/sink/kafka.rs                 |  27 +-
 src/coord/src/catalog.rs                      |   4 +-
 src/coord/src/catalog/config.rs               |   5 -
 src/coord/src/coord.rs                        |  29 +-
 src/coord/src/sink_connector.rs               |  10 +-
 src/dataflow-types/Cargo.toml                 |   3 +-
 src/dataflow-types/src/types.rs               | 324 ++++++++++--------
 src/kafka-util/src/bin/kgen.rs                |   3 +-
 src/kafka-util/src/client.rs                  |  56 ++-
 src/materialized/src/bin/materialized/main.rs |   9 +-
 src/materialized/src/lib.rs                   |  16 +-
 src/materialized/tests/util.rs                |   5 +-
 src/ore/src/lib.rs                            |   1 +
 src/ore/src/tracing.rs                        |  17 +-
 src/sql/src/catalog.rs                        |   5 +-
 src/sql/src/kafka_util.rs                     |   5 +-
 src/sql/src/normalize.rs                      |   2 +-
 src/sql/src/pure.rs                           |  24 +-
 src/sql/src/query_model/test/catalog.rs       |   3 +-
 src/sqllogictest/Cargo.toml                   |   1 +
 src/sqllogictest/src/runner.rs                |   3 +-
 src/storage/src/render/mod.rs                 |   1 -
 src/storage/src/render/sources.rs             |   9 +-
 src/storage/src/server.rs                     |   9 +-
 src/storage/src/source/kafka.rs               |  42 +--
 src/storage/src/source/kinesis.rs             |  25 +-
 src/storage/src/source/mod.rs                 |  16 +-
 src/storage/src/source/pubnub.rs              |  10 +-
 src/storage/src/source/s3.rs                  |  30 +-
 src/storage/src/storage_state.rs              |   6 +-
 src/storaged/src/main.rs                      |   8 +-
 src/testdrive/src/action.rs                   |   4 +-
 test/test-util/src/kafka/kafka_client.rs      |   7 +-
 38 files changed, 429 insertions(+), 317 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 31e92085ee21b..fe9d6e8541f18 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3286,6 +3286,7 @@ dependencies = [
  "tokio-stream",
  "tokio-util 0.7.2",
  "tracing",
+ "tracing-subscriber",
  "url",
  "uuid",
 ]
@@ -3978,6 +3979,7 @@ dependencies = [
  "tokio",
  "tokio-postgres",
  "tower-http",
+ "tracing",
  "uuid",
  "walkdir",
 ]
diff --git a/clippy.toml b/clippy.toml
index 6956b005b8d5c..541e4d90597e4 100644
--- a/clippy.toml
+++ b/clippy.toml
@@ -8,4 +8,6 @@ disallowed-methods = [
     { path = "tokio::runtime::Handle::spawn_blocking", reason = "use the spawn wrappers in `mz_ore::task` instead" },
     { path = "tokio::runtime::Runtime::spawn", reason = "use the spawn wrappers in `mz_ore::task` instead" },
     { path = "tokio::runtime::Runtime::spawn_blocking", reason = "use the spawn wrappers in `mz_ore::task` instead" },
+    # Use the wrapper that sets the log level correctly
+    { path = "rdkafka::config::ClientConfig::new", reason = "use the `client::create_new_client_config` wrapper in `kafka_util` instead" },
 ]
diff --git a/src/compute/src/bin/computed.rs b/src/compute/src/bin/computed.rs
index 0a8f78a02ac4c..ad4b08fd897b8 100644
--- a/src/compute/src/bin/computed.rs
+++ b/src/compute/src/bin/computed.rs
@@ -17,7 +17,6 @@ use futures::sink::SinkExt;
 use futures::stream::TryStreamExt;
 use http::HeaderMap;
 use mz_build_info::{build_info, BuildInfo};
-use mz_dataflow_types::sources::AwsExternalId;
 use serde::de::DeserializeOwned;
 use serde::ser::Serialize;
 use tokio::net::TcpListener;
@@ -27,6 +26,7 @@ use tracing_subscriber::filter::Targets;
 
 use mz_dataflow_types::client::{ComputeClient, GenericClient};
 use mz_dataflow_types::reconciliation::command::ComputeCommandReconcile;
+use mz_dataflow_types::ConnectorContext;
 use mz_ore::metrics::MetricsRegistry;
 use mz_ore::now::SYSTEM_TIME;
 
@@ -254,10 +254,7 @@ async fn run(args: Args) -> Result<(), anyhow::Error> {
         experimental_mode: false,
         metrics_registry: MetricsRegistry::new(),
         now: SYSTEM_TIME.clone(),
-        aws_external_id: args
-            .aws_external_id
-            .map(AwsExternalId::ISwearThisCameFromACliArgOrEnvVariable)
-            .unwrap_or(AwsExternalId::NotProvided),
+        connector_context: ConnectorContext::from_cli_args(&args.log_filter, args.aws_external_id),
     };
 
     let serve_config = ServeConfig {
diff --git a/src/compute/src/compute_state.rs b/src/compute/src/compute_state.rs
index d06c8e086f358..ee62914784721 100644
--- a/src/compute/src/compute_state.rs
+++ b/src/compute/src/compute_state.rs
@@ -26,6 +26,7 @@ use tokio::sync::mpsc;
 
 use mz_dataflow_types::client::{ComputeCommand, ComputeResponse};
 use mz_dataflow_types::logging::LoggingConfig;
+use mz_dataflow_types::ConnectorContext;
 use mz_dataflow_types::{DataflowError, PeekResponse, TailResponse};
 use mz_repr::{Diff, GlobalId, Row, Timestamp};
 use mz_storage::boundary::ComputeReplay;
@@ -63,6 +64,9 @@ pub struct ComputeState {
     pub sink_metrics: SinkBaseMetrics,
     /// The logger, from Timely's logging framework, if logs are enabled.
     pub materialized_logger: Option<logging::materialized::Logger>,
+    /// Configuration for sink connectors.
+    // TODO: remove when sinks move to storage.
+    pub connector_context: ConnectorContext,
 }
 
 /// A wrapper around [ComputeState] with a live timely worker and response channel.
diff --git a/src/compute/src/server.rs b/src/compute/src/server.rs
index fdee2dd252022..b34fac761454e 100644
--- a/src/compute/src/server.rs
+++ b/src/compute/src/server.rs
@@ -20,7 +20,7 @@ use timely::worker::Worker as TimelyWorker;
 use tokio::sync::mpsc;
 
 use mz_dataflow_types::client::{ComputeCommand, ComputeResponse, LocalClient, LocalComputeClient};
-use mz_dataflow_types::sources::AwsExternalId;
+use mz_dataflow_types::ConnectorContext;
 use mz_ore::metrics::MetricsRegistry;
 use mz_ore::now::NowFn;
 use mz_storage::boundary::ComputeReplay;
@@ -42,8 +42,9 @@ pub struct Config {
     pub now: NowFn,
     /// Metrics registry through which dataflow metrics will be reported.
     pub metrics_registry: MetricsRegistry,
-    /// An external ID to use for all AWS AssumeRole operations.
-    pub aws_external_id: AwsExternalId,
+    /// Configuration for sink connectors.
+    // TODO: remove when sinks move to storage.
+    pub connector_context: ConnectorContext,
 }
 
 /// A handle to a running dataflow server.
@@ -106,6 +107,7 @@ pub fn serve_boundary<CR: ComputeReplay, B: Fn(usize) -> CR + Send + Sync + 'sta
             compute_boundary,
             compute_response_tx,
             metrics_bundle: metrics_bundle.clone(),
+            connector_context: config.connector_context.clone(),
         }
         .run()
     })
@@ -143,6 +145,9 @@ where
     compute_response_tx: mpsc::UnboundedSender<ComputeResponse>,
     /// Metrics bundle.
     metrics_bundle: (SinkBaseMetrics, TraceMetrics),
+    /// Configuration for sink connectors.
+    // TODO: remove when sinks move to storage.
+    pub connector_context: ConnectorContext,
 }
 
 impl<'w, A, CR> Worker<'w, A, CR>
@@ -201,6 +206,7 @@ where
                             reported_frontiers: HashMap::new(),
                             sink_metrics: self.metrics_bundle.0.clone(),
                             materialized_logger: None,
+                            connector_context: self.connector_context.clone(),
                         });
                     }
                     ComputeCommand::DropInstance => {
diff --git a/src/compute/src/sink/kafka.rs b/src/compute/src/sink/kafka.rs
index 83dfa87a83c99..d1282de46452f 100644
--- a/src/compute/src/sink/kafka.rs
+++ b/src/compute/src/sink/kafka.rs
@@ -51,7 +51,7 @@ use mz_interchange::avro::{
     self, get_debezium_transaction_schema, AvroEncoder, AvroSchemaGenerator,
 };
 use mz_interchange::encode::Encode;
-use mz_kafka_util::client::MzClientContext;
+use mz_kafka_util::client::{create_new_client_config, MzClientContext};
 use mz_ore::cast::CastFrom;
 use mz_ore::collections::CollectionExt;
 use mz_ore::metrics::{CounterVecExt, DeleteOnDropCounter, DeleteOnDropGauge, GaugeVecExt};
@@ -137,6 +137,7 @@ where
             sink.as_of.clone(),
             Rc::clone(&shared_frontier),
             &compute_state.sink_metrics.kafka,
+            compute_state.connector_context.librdkafka_log_level,
         );
 
         compute_state
@@ -434,9 +435,11 @@ impl KafkaSinkState {
         activator: Activator,
         write_frontier: Rc<RefCell<Antichain<Timestamp>>>,
         metrics: &KafkaBaseMetrics,
+        librdkafka_log_level: tracing::Level,
     ) -> Self {
-        let config = Self::create_producer_config(&connector);
-        let consistency_client_config = Self::create_consistency_client_config(&connector);
+        let config = Self::create_producer_config(&connector, librdkafka_log_level);
+        let consistency_client_config =
+            Self::create_consistency_client_config(&connector, librdkafka_log_level);
 
         let metrics = Arc::new(SinkMetrics::new(
             metrics,
@@ -486,8 +489,11 @@ impl KafkaSinkState {
         }
     }
 
-    fn create_producer_config(connector: &KafkaSinkConnector) -> ClientConfig {
-        let mut config = ClientConfig::new();
+    fn create_producer_config(
+        connector: &KafkaSinkConnector,
+        librdkafka_log_level: tracing::Level,
+    ) -> ClientConfig {
+        let mut config = create_new_client_config(librdkafka_log_level);
         config.set("bootstrap.servers", &connector.addrs.to_string());
 
         // Ensure that messages are sinked in order and without duplicates. Note that
@@ -535,8 +541,11 @@ impl KafkaSinkState {
         config
     }
 
-    fn create_consistency_client_config(connector: &KafkaSinkConnector) -> ClientConfig {
-        let mut config = ClientConfig::new();
+    fn create_consistency_client_config(
+        connector: &KafkaSinkConnector,
+        librdkafka_log_level: tracing::Level,
+    ) -> ClientConfig {
+        let mut config = create_new_client_config(librdkafka_log_level);
         config.set("bootstrap.servers", &connector.addrs.to_string());
         for (k, v) in connector.config_options.iter() {
             // We explicitly reject `statistics.interval.ms` here so that we don't
@@ -1023,6 +1032,7 @@ fn kafka<G>(
     as_of: SinkAsOf,
     write_frontier: Rc<RefCell<Antichain<Timestamp>>>,
     metrics: &KafkaBaseMetrics,
+    librdkafka_log_level: tracing::Level,
 ) -> Rc<dyn Any>
 where
     G: Scope<Timestamp = Timestamp>,
@@ -1089,6 +1099,7 @@ where
         shared_gate_ts,
         write_frontier,
         metrics,
+        librdkafka_log_level,
     )
 }
 
@@ -1112,6 +1123,7 @@ pub fn produce_to_kafka<G>(
     shared_gate_ts: Rc<Cell<Option<Timestamp>>>,
     write_frontier: Rc<RefCell<Antichain<Timestamp>>>,
     metrics: &KafkaBaseMetrics,
+    librdkafka_log_level: tracing::Level,
 ) -> Rc<dyn Any>
 where
     G: Scope<Timestamp = Timestamp>,
@@ -1131,6 +1143,7 @@ where
         activator,
         write_frontier,
         metrics,
+        librdkafka_log_level,
     );
 
     let mut vector = Vec::new();
diff --git a/src/coord/src/catalog.rs b/src/coord/src/catalog.rs
index c5759c9f9afa6..d599b80bf4ed1 100644
--- a/src/coord/src/catalog.rs
+++ b/src/coord/src/catalog.rs
@@ -31,7 +31,7 @@ use mz_dataflow_types::client::{
 use mz_dataflow_types::logging::LoggingConfig as DataflowLoggingConfig;
 use mz_dataflow_types::sinks::{SinkConnector, SinkConnectorBuilder, SinkEnvelope};
 use mz_dataflow_types::sources::{
-    AwsExternalId, ConnectorInner, ExternalSourceConnector, SourceConnector, Timeline,
+    ConnectorInner, ExternalSourceConnector, SourceConnector, Timeline,
 };
 use mz_expr::{ExprHumanizer, MirScalarExpr, OptimizedMirRelationExpr};
 use mz_ore::collections::CollectionExt;
@@ -1377,7 +1377,6 @@ impl<S: Append> Catalog<S> {
                     cluster_id: config.storage.cluster_id(),
                     session_id: Uuid::new_v4(),
                     build_info: config.build_info,
-                    aws_external_id: config.aws_external_id.clone(),
                     timestamp_frequency: config.timestamp_frequency,
                     now: config.now.clone(),
                 },
@@ -1917,7 +1916,6 @@ impl<S: Append> Catalog<S> {
             storage,
             experimental_mode,
             build_info: &DUMMY_BUILD_INFO,
-            aws_external_id: AwsExternalId::NotProvided,
             timestamp_frequency: Duration::from_secs(1),
             now,
             skip_migrations: true,
diff --git a/src/coord/src/catalog/config.rs b/src/coord/src/catalog/config.rs
index 4b78f21de8a81..1327766f6f50c 100644
--- a/src/coord/src/catalog/config.rs
+++ b/src/coord/src/catalog/config.rs
@@ -10,7 +10,6 @@
 use std::time::Duration;
 
 use mz_build_info::BuildInfo;
-use mz_dataflow_types::sources::AwsExternalId;
 use mz_ore::metrics::MetricsRegistry;
 
 use crate::catalog::storage;
@@ -24,10 +23,6 @@ pub struct Config<'a, S> {
     pub experimental_mode: Option<bool>,
     /// Information about this build of Materialize.
     pub build_info: &'static BuildInfo,
-    /// An [External ID][] to use for all AWS AssumeRole operations.
-    ///
-    /// [External ID]: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user_externalid.html
-    pub aws_external_id: AwsExternalId,
     /// Timestamp frequency to use for CREATE SOURCE
     pub timestamp_frequency: Duration,
     /// Function to generate wall clock now; can be mocked.
diff --git a/src/coord/src/coord.rs b/src/coord/src/coord.rs
index 38a6dcb080aeb..bcc791699f472 100644
--- a/src/coord/src/coord.rs
+++ b/src/coord/src/coord.rs
@@ -99,10 +99,10 @@ use mz_dataflow_types::client::{
 };
 use mz_dataflow_types::sinks::{SinkAsOf, SinkConnector, SinkDesc, TailSinkConnector};
 use mz_dataflow_types::sources::{
-    AwsExternalId, ExternalSourceConnector, PostgresSourceConnector, SourceConnector, Timeline,
+    ExternalSourceConnector, PostgresSourceConnector, SourceConnector, Timeline,
 };
 use mz_dataflow_types::{
-    BuildDesc, DataflowDesc, DataflowDescription, IndexDesc, PeekResponse, Update,
+    BuildDesc, ConnectorContext, DataflowDesc, DataflowDescription, IndexDesc, PeekResponse, Update,
 };
 use mz_expr::{
     permutation_for_arrangement, CollectionPlan, ExprHumanizer, MirRelationExpr, MirScalarExpr,
@@ -233,12 +233,12 @@ pub struct Config<S> {
     pub logical_compaction_window: Option<Duration>,
     pub experimental_mode: bool,
     pub build_info: &'static BuildInfo,
-    pub aws_external_id: AwsExternalId,
     pub metrics_registry: MetricsRegistry,
     pub now: NowFn,
     pub secrets_controller: Box<dyn SecretsController>,
     pub availability_zones: Vec<String>,
     pub replica_sizes: ClusterReplicaSizeMap,
+    pub connector_context: ConnectorContext,
 }
 
 struct PendingPeek {
@@ -363,6 +363,9 @@ pub struct Coordinator<S> {
     replica_sizes: ClusterReplicaSizeMap,
     /// Valid availability zones for replicas.
     availability_zones: Vec<String>,
+
+    /// Extra context to pass through to connector creation.
+    connector_context: ConnectorContext,
 }
 
 /// Metadata about an active connection.
@@ -627,9 +630,13 @@ impl<S: Append + 'static> Coordinator<S> {
                             panic!("sink already initialized during catalog boot")
                         }
                     };
-                    let connector = sink_connector::build(builder.clone(), entry.id())
-                        .await
-                        .with_context(|| format!("recreating sink {}", entry.name()))?;
+                    let connector = sink_connector::build(
+                        builder.clone(),
+                        entry.id(),
+                        self.connector_context.clone(),
+                    )
+                    .await
+                    .with_context(|| format!("recreating sink {}", entry.name()))?;
                     self.handle_sink_connector_ready(
                         entry.id(),
                         entry.oid(),
@@ -1410,8 +1417,8 @@ impl<S: Append + 'static> Coordinator<S> {
                     match mz_sql::connectors::populate_connectors(stmt, &catalog, &mut vec![]) {
                         Ok(stmt) => mz_sql::pure::purify_create_source(
                             self.now(),
-                            self.catalog.config().aws_external_id.clone(),
                             stmt,
+                            self.connector_context.clone(),
                         ),
                         Err(e) => return tx.send(Err(e.into()), session),
                     };
@@ -2355,6 +2362,7 @@ impl<S: Append + 'static> Coordinator<S> {
         // main coordinator thread when the future completes.
         let connector_builder = sink.connector_builder;
         let internal_cmd_tx = self.internal_cmd_tx.clone();
+        let connector_context = self.connector_context.clone();
         task::spawn(
             || format!("sink_connector_ready:{}", sink.from),
             async move {
@@ -2364,7 +2372,8 @@ impl<S: Append + 'static> Coordinator<S> {
                         tx,
                         id,
                         oid,
-                        result: sink_connector::build(connector_builder, id).await,
+                        result: sink_connector::build(connector_builder, id, connector_context)
+                            .await,
                         compute_instance,
                     }))
                     .expect("sending to internal_cmd_tx cannot fail");
@@ -4711,12 +4720,12 @@ pub async fn serve<S: Append + 'static>(
         logical_compaction_window,
         experimental_mode,
         build_info,
-        aws_external_id,
         metrics_registry,
         now,
         secrets_controller,
         replica_sizes,
         availability_zones,
+        connector_context,
     }: Config<S>,
 ) -> Result<(Handle, Client), CoordError> {
     let (cmd_tx, cmd_rx) = mpsc::unbounded_channel();
@@ -4726,7 +4735,6 @@ pub async fn serve<S: Append + 'static>(
         storage,
         experimental_mode: Some(experimental_mode),
         build_info,
-        aws_external_id,
         timestamp_frequency,
         now: now.clone(),
         skip_migrations: false,
@@ -4766,6 +4774,7 @@ pub async fn serve<S: Append + 'static>(
                 secrets_controller,
                 replica_sizes,
                 availability_zones,
+                connector_context,
             };
             let bootstrap = handle.block_on(coord.bootstrap(builtin_table_updates));
             let ok = bootstrap.is_ok();
diff --git a/src/coord/src/sink_connector.rs b/src/coord/src/sink_connector.rs
index c57dfd0db10de..cb60e37810622 100644
--- a/src/coord/src/sink_connector.rs
+++ b/src/coord/src/sink_connector.rs
@@ -11,14 +11,14 @@ use std::time::Duration;
 
 use anyhow::{anyhow, Context};
 use rdkafka::admin::{AdminClient, AdminOptions, NewTopic, ResourceSpecifier, TopicReplication};
-use rdkafka::config::ClientConfig;
 
 use mz_dataflow_types::sinks::{
     KafkaSinkConnector, KafkaSinkConnectorBuilder, KafkaSinkConnectorRetention,
     KafkaSinkConsistencyConnector, PersistSinkConnector, PersistSinkConnectorBuilder,
     PublishedSchemaInfo, SinkConnector, SinkConnectorBuilder,
 };
-use mz_kafka_util::client::MzClientContext;
+use mz_dataflow_types::ConnectorContext;
+use mz_kafka_util::client::{create_new_client_config, MzClientContext};
 use mz_ore::collections::CollectionExt;
 use mz_repr::GlobalId;
 
@@ -27,9 +27,10 @@ use crate::error::CoordError;
 pub async fn build(
     builder: SinkConnectorBuilder,
     id: GlobalId,
+    connector_context: ConnectorContext,
 ) -> Result<SinkConnector, CoordError> {
     match builder {
-        SinkConnectorBuilder::Kafka(k) => build_kafka(k, id).await,
+        SinkConnectorBuilder::Kafka(k) => build_kafka(k, id, connector_context).await,
         SinkConnectorBuilder::Persist(p) => build_persist_sink(p, id),
     }
 }
@@ -204,6 +205,7 @@ async fn publish_kafka_schemas(
 async fn build_kafka(
     builder: KafkaSinkConnectorBuilder,
     id: GlobalId,
+    connector_context: ConnectorContext,
 ) -> Result<SinkConnector, CoordError> {
     let maybe_append_nonce = {
         let reuse_topic = builder.reuse_topic;
@@ -219,7 +221,7 @@ async fn build_kafka(
     let topic = maybe_append_nonce(&builder.topic_prefix);
 
     // Create Kafka topic
-    let mut config = ClientConfig::new();
+    let mut config = create_new_client_config(connector_context.librdkafka_log_level);
     config.set("bootstrap.servers", &builder.broker_addrs.to_string());
     for (k, v) in builder.config_options.iter() {
         // Explicitly reject the statistics interval option here because its not
diff --git a/src/dataflow-types/Cargo.toml b/src/dataflow-types/Cargo.toml
index f1e81fa5834e3..5528921dde08e 100644
--- a/src/dataflow-types/Cargo.toml
+++ b/src/dataflow-types/Cargo.toml
@@ -28,7 +28,7 @@ mz-ccsr = { path = "../ccsr" }
 mz-expr = { path = "../expr" }
 mz-interchange = { path = "../interchange" }
 mz-kafka-util = { path = "../kafka-util" }
-mz-ore = { path = "../ore" }
+mz-ore = { path = "../ore", features = ["tracing_"] }
 mz-orchestrator = { path = "../orchestrator" }
 mz-persist = { path = "../persist" }
 mz-persist-client = { path = "../persist-client" }
@@ -47,6 +47,7 @@ tokio-serde = { version = "0.8.0", features = ["bincode"] }
 tokio-stream = " 0.1.8"
 tokio-util = { version = "0.7.2", features = ["codec"] }
 tracing = "0.1.34"
+tracing-subscriber = "0.3.11"
 url = { version = "2.2.2", features = ["serde"] }
 uuid = { version = "0.8.2", features = ["serde", "v4"] }
 proptest = { git = "https://github.com/MaterializeInc/proptest.git", default-features = false, features = ["std"]}
diff --git a/src/dataflow-types/src/types.rs b/src/dataflow-types/src/types.rs
index 90732a69461de..2978eed7c8b58 100644
--- a/src/dataflow-types/src/types.rs
+++ b/src/dataflow-types/src/types.rs
@@ -28,6 +28,7 @@ use mz_repr::proto::{any_uuid, FromProtoIfSome, ProtoRepr, TryFromProtoError, Tr
 use mz_repr::{Diff, GlobalId, RelationType, Row};
 
 use crate::client::controller::storage::CollectionMetadata;
+use crate::types::aws::AwsExternalId;
 use crate::types::sinks::SinkDesc;
 use crate::types::sources::SourceDesc;
 use crate::Plan;
@@ -486,6 +487,183 @@ impl Arbitrary for DataflowDescription<Plan, CollectionMetadata, mz_repr::Timest
     }
 }
 
+/// AWS configuration for sources and sinks.
+pub mod aws {
+    use http::Uri;
+    use serde::{Deserialize, Serialize};
+
+    /// A wrapper for [`Uri`] that implements [`Serialize`] and `Deserialize`.
+    #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
+    pub struct SerdeUri(#[serde(with = "http_serde::uri")] pub Uri);
+
+    /// An [external ID] to use for all AWS AssumeRole operations.
+    ///
+    /// **WARNING:** it is critical for security that this ID is **not**
+    /// provided by end users of Materialize. It must be provided by the
+    /// operator of the Materialize service.
+    ///
+    /// This type protects against accidental construction of an
+    /// `AwsExternalId`. The only approved way to construct an `AwsExternalId`
+    /// is via [`ConnectorContext::from_cli_args`].
+    ///
+    /// [`ConnectorContext::from_cli_args`]: crate::ConnectorContext::from_cli_args
+    /// [external ID]: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user_externalid.html
+    #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
+    pub struct AwsExternalId(pub(super) String);
+
+    /// AWS configuration overrides for a source or sink.
+    ///
+    /// This is a distinct type from any of the configuration types built into the
+    /// AWS SDK so that we can implement `Serialize` and `Deserialize`.
+    #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
+    pub struct AwsConfig {
+        /// AWS Credentials, or where to find them
+        pub credentials: AwsCredentials,
+        /// The AWS region to use.
+        ///
+        /// Uses the default region (looking at env vars, config files, etc) if not provided.
+        pub region: Option<String>,
+        /// The AWS role to assume.
+        pub role: Option<AwsAssumeRole>,
+        /// The custom AWS endpoint to use, if any.
+        pub endpoint: Option<SerdeUri>,
+    }
+
+    /// AWS credentials for a source or sink.
+    #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
+    pub enum AwsCredentials {
+        /// Look for credentials using the [default credentials chain][credchain]
+        ///
+        /// [credchain]: aws_config::default_provider::credentials::DefaultCredentialsChain
+        Default,
+        /// Load credentials using the given named profile
+        Profile { profile_name: String },
+        /// Use the enclosed static credentials
+        Static {
+            access_key_id: String,
+            secret_access_key: String,
+            session_token: Option<String>,
+        },
+    }
+
+    /// A role for Materialize to assume when performing AWS API calls.
+    #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
+    pub struct AwsAssumeRole {
+        /// The Amazon Resource Name of the role to assume.
+        pub arn: String,
+    }
+
+    impl AwsConfig {
+        /// Loads the AWS SDK configuration object from the environment, then
+        /// applies the overrides from this object.
+        pub async fn load(&self, external_id: Option<&AwsExternalId>) -> aws_types::SdkConfig {
+            use aws_config::default_provider::credentials::DefaultCredentialsChain;
+            use aws_config::default_provider::region::DefaultRegionChain;
+            use aws_config::sts::AssumeRoleProvider;
+            use aws_smithy_http::endpoint::Endpoint;
+            use aws_types::credentials::SharedCredentialsProvider;
+            use aws_types::region::Region;
+
+            let region = match &self.region {
+                Some(region) => Some(Region::new(region.clone())),
+                _ => {
+                    let mut rc = DefaultRegionChain::builder();
+                    if let AwsCredentials::Profile { profile_name } = &self.credentials {
+                        rc = rc.profile_name(profile_name);
+                    }
+                    rc.build().region().await
+                }
+            };
+
+            let mut cred_provider = match &self.credentials {
+                AwsCredentials::Default => SharedCredentialsProvider::new(
+                    DefaultCredentialsChain::builder()
+                        .region(region.clone())
+                        .build()
+                        .await,
+                ),
+                AwsCredentials::Profile { profile_name } => SharedCredentialsProvider::new(
+                    DefaultCredentialsChain::builder()
+                        .profile_name(profile_name)
+                        .region(region.clone())
+                        .build()
+                        .await,
+                ),
+                AwsCredentials::Static {
+                    access_key_id,
+                    secret_access_key,
+                    session_token,
+                } => SharedCredentialsProvider::new(aws_types::Credentials::from_keys(
+                    access_key_id,
+                    secret_access_key,
+                    session_token.clone(),
+                )),
+            };
+
+            if let Some(AwsAssumeRole { arn }) = &self.role {
+                let mut role = AssumeRoleProvider::builder(arn).session_name("materialized");
+                // This affects which region to perform STS on, not where
+                // anything else happens.
+                if let Some(region) = &region {
+                    role = role.region(region.clone());
+                }
+                if let Some(external_id) = external_id {
+                    role = role.external_id(&external_id.0);
+                }
+                cred_provider = SharedCredentialsProvider::new(role.build(cred_provider));
+            }
+
+            let mut loader = aws_config::from_env()
+                .region(region)
+                .credentials_provider(cred_provider);
+            if let Some(endpoint) = &self.endpoint {
+                loader = loader.endpoint_resolver(Endpoint::immutable(endpoint.0.clone()));
+            }
+            loader.load().await
+        }
+    }
+}
+
+/// Extra context to pass through when instantiating a connector for a source or
+/// sink.
+///
+/// Should be kept cheaply cloneable.
+#[derive(Debug, Clone)]
+pub struct ConnectorContext {
+    /// The level for librdkafka's logs.
+    pub librdkafka_log_level: tracing::Level,
+    /// An external ID to use for all AWS AssumeRole operations.
+    pub aws_external_id: Option<AwsExternalId>,
+}
+
+impl ConnectorContext {
+    /// Constructs a new connector context from command line arguments.
+    ///
+    /// **WARNING:** it is critical for security that the `aws_external_id` be
+    /// provided by the operator of the Materialize service (i.e., via a CLI
+    /// argument or environment variable) and not the end user of Materialize
+    /// (e.g., via a configuration option in a SQL statement). See
+    /// [`AwsExternalId`] for details.
+    pub fn from_cli_args(
+        filter: &tracing_subscriber::filter::Targets,
+        aws_external_id: Option<String>,
+    ) -> ConnectorContext {
+        ConnectorContext {
+            librdkafka_log_level: mz_ore::tracing::target_level(filter, "librdkafka"),
+            aws_external_id: aws_external_id.map(AwsExternalId),
+        }
+    }
+}
+
+impl Default for ConnectorContext {
+    fn default() -> ConnectorContext {
+        ConnectorContext {
+            librdkafka_log_level: tracing::Level::INFO,
+            aws_external_id: None,
+        }
+    }
+}
+
 impl<T> DataflowDescription<OptimizedMirRelationExpr, (), T> {
     /// Creates a new dataflow description with a human-readable name.
     pub fn new(name: String) -> Self {
@@ -848,7 +1026,6 @@ pub mod sources {
     use chrono::NaiveDateTime;
     use differential_dataflow::lattice::Lattice;
     use globset::Glob;
-    use http::Uri;
     use mz_persist_client::read::ReadHandle;
     use mz_persist_client::{PersistLocation, ShardId};
     use mz_persist_types::Codec64;
@@ -862,6 +1039,7 @@ pub mod sources {
     use mz_repr::proto::TryFromProtoError;
     use mz_repr::{ColumnType, GlobalId, RelationDesc, RelationType, Row, ScalarType};
 
+    use crate::aws::AwsConfig;
     use crate::postgres_source::PostgresSourceDetails;
     use crate::DataflowError;
 
@@ -2029,150 +2207,6 @@ pub mod sources {
         SqsNotifications { queue: String },
     }
 
-    /// A wrapper for [`Uri`] that implements [`Serialize`] and `Deserialize`.
-    #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
-    pub struct SerdeUri(#[serde(with = "http_serde::uri")] pub Uri);
-
-    /// AWS configuration overrides for a source or sink.
-    ///
-    /// This is a distinct type from any of the configuration types built into the
-    /// AWS SDK so that we can implement `Serialize` and `Deserialize`.
-    #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
-    pub struct AwsConfig {
-        /// AWS Credentials, or where to find them
-        pub credentials: AwsCredentials,
-        /// The AWS region to use.
-        ///
-        /// Uses the default region (looking at env vars, config files, etc) if not provided.
-        pub region: Option<String>,
-        /// The AWS role to assume.
-        pub role: Option<AwsAssumeRole>,
-        /// The custom AWS endpoint to use, if any.
-        pub endpoint: Option<SerdeUri>,
-    }
-
-    /// AWS credentials for a source or sink.
-    #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
-    pub enum AwsCredentials {
-        /// Look for credentials using the [default credentials chain][credchain]
-        ///
-        /// [credchain]: aws_config::default_provider::credentials::DefaultCredentialsChain
-        Default,
-        /// Load credentials using the given named profile
-        Profile { profile_name: String },
-        /// Use the enclosed static credentials
-        Static {
-            access_key_id: String,
-            secret_access_key: String,
-            session_token: Option<String>,
-        },
-    }
-
-    /// A role for Materialize to assume when performing AWS API calls.
-    #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
-    pub struct AwsAssumeRole {
-        /// The Amazon Resource Name of the role to assume.
-        pub arn: String,
-    }
-
-    /// An external ID to use for all AWS AssumeRole operations.
-    ///
-    /// Note that it is critical for security that this ID can **not** be provided by users running
-    /// in Materialize Cloud, it must be provided by Materialize. Currently this guarantee is
-    /// satisfied by only making this accessible from the CLI, which users do not have access to.
-    ///
-    /// <https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user_externalid.html>
-    #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
-    pub enum AwsExternalId {
-        NotProvided,
-        ISwearThisCameFromACliArgOrEnvVariable(String),
-    }
-
-    impl Default for AwsExternalId {
-        fn default() -> Self {
-            AwsExternalId::NotProvided
-        }
-    }
-
-    impl AwsExternalId {
-        fn get(&self) -> Option<&str> {
-            match self {
-                AwsExternalId::NotProvided => None,
-                AwsExternalId::ISwearThisCameFromACliArgOrEnvVariable(v) => Some(v),
-            }
-        }
-    }
-
-    impl AwsConfig {
-        /// Loads the AWS SDK configuration object from the environment, then
-        /// applies the overrides from this object.
-        pub async fn load(&self, external_id: AwsExternalId) -> aws_types::SdkConfig {
-            use aws_config::default_provider::credentials::DefaultCredentialsChain;
-            use aws_config::default_provider::region::DefaultRegionChain;
-            use aws_config::sts::AssumeRoleProvider;
-            use aws_smithy_http::endpoint::Endpoint;
-            use aws_types::credentials::SharedCredentialsProvider;
-            use aws_types::region::Region;
-
-            let region = match &self.region {
-                Some(region) => Some(Region::new(region.clone())),
-                _ => {
-                    let mut rc = DefaultRegionChain::builder();
-                    if let AwsCredentials::Profile { profile_name } = &self.credentials {
-                        rc = rc.profile_name(profile_name);
-                    }
-                    rc.build().region().await
-                }
-            };
-
-            let mut cred_provider = match &self.credentials {
-                AwsCredentials::Default => SharedCredentialsProvider::new(
-                    DefaultCredentialsChain::builder()
-                        .region(region.clone())
-                        .build()
-                        .await,
-                ),
-                AwsCredentials::Profile { profile_name } => SharedCredentialsProvider::new(
-                    DefaultCredentialsChain::builder()
-                        .profile_name(profile_name)
-                        .region(region.clone())
-                        .build()
-                        .await,
-                ),
-                AwsCredentials::Static {
-                    access_key_id,
-                    secret_access_key,
-                    session_token,
-                } => SharedCredentialsProvider::new(aws_types::Credentials::from_keys(
-                    access_key_id,
-                    secret_access_key,
-                    session_token.clone(),
-                )),
-            };
-
-            if let Some(AwsAssumeRole { arn }) = &self.role {
-                let mut role = AssumeRoleProvider::builder(arn).session_name("materialized");
-                // This affects which region to perform STS on, not where
-                // anything else happens.
-                if let Some(region) = &region {
-                    role = role.region(region.clone());
-                }
-                if let Some(external_id) = external_id.get() {
-                    role = role.external_id(external_id);
-                }
-                cred_provider = SharedCredentialsProvider::new(role.build(cred_provider));
-            }
-
-            let mut loader = aws_config::from_env()
-                .region(region)
-                .credentials_provider(cred_provider);
-            if let Some(endpoint) = &self.endpoint {
-                loader = loader.endpoint_resolver(Endpoint::immutable(endpoint.0.clone()));
-            }
-            loader.load().await
-        }
-    }
-
     #[derive(Debug, Clone)]
     pub struct SourceData(pub Result<Row, DataflowError>);
 
diff --git a/src/kafka-util/src/bin/kgen.rs b/src/kafka-util/src/bin/kgen.rs
index e2d785dfb7fad..d81c6eb2f2533 100644
--- a/src/kafka-util/src/bin/kgen.rs
+++ b/src/kafka-util/src/bin/kgen.rs
@@ -26,7 +26,6 @@ use rdkafka::error::KafkaError;
 use rdkafka::producer::{BaseRecord, Producer, ThreadedProducer};
 use rdkafka::types::RDKafkaErrorCode;
 use rdkafka::util::Timeout;
-use rdkafka::ClientConfig;
 use serde_json::Map;
 use url::Url;
 
@@ -695,7 +694,7 @@ async fn main() -> anyhow::Result<()> {
             let mut key_gen = key_gen.clone();
             let mut value_gen = value_gen.clone();
             let producer: ThreadedProducer<mz_kafka_util::client::MzClientContext> =
-                ClientConfig::new()
+                mz_kafka_util::client::create_new_client_config_simple()
                     .set("bootstrap.servers", args.bootstrap_server.to_string())
                     .create_with_context(mz_kafka_util::client::MzClientContext)
                     .unwrap();
diff --git a/src/kafka-util/src/client.rs b/src/kafka-util/src/client.rs
index 96726df203e2d..40e8c55b7d6d7 100644
--- a/src/kafka-util/src/client.rs
+++ b/src/kafka-util/src/client.rs
@@ -14,10 +14,11 @@ use std::time::Duration;
 use anyhow::bail;
 use mz_ore::collections::CollectionExt;
 use rdkafka::client::Client;
+use rdkafka::config::{ClientConfig, RDKafkaLogLevel};
 use rdkafka::consumer::ConsumerContext;
 use rdkafka::producer::{DefaultProducerContext, DeliveryResult, ProducerContext};
 use rdkafka::ClientContext;
-use tracing::{debug, error, info, warn};
+use tracing::{debug, error, info, warn, Level};
 
 /// A `ClientContext` implementation that uses `tracing` instead of `log` macros.
 ///
@@ -90,3 +91,56 @@ pub fn get_partitions<C: ClientContext>(
 
     Ok(meta_topic.partitions().iter().map(|x| x.id()).collect())
 }
+
+/// A simpler version of [`create_new_client_config`] that defaults
+/// the `log_level` to `INFO` and should only be used in tests.
+pub fn create_new_client_config_simple() -> ClientConfig {
+    create_new_client_config(tracing::Level::INFO)
+}
+
+/// Build a new [`rdkafka`] [`ClientConfig`] with its `log_level` set correctly
+/// based on the passed through [`tracing::Level`]. This level should be
+/// determined for `target: "librdkafka"`.
+pub fn create_new_client_config(tracing_level: Level) -> ClientConfig {
+    #[allow(clippy::disallowed_methods)]
+    let mut config = ClientConfig::new();
+
+    let level = if tracing_level >= Level::DEBUG {
+        RDKafkaLogLevel::Debug
+    } else if tracing_level >= Level::INFO {
+        RDKafkaLogLevel::Info
+    } else if tracing_level >= Level::WARN {
+        RDKafkaLogLevel::Warning
+    } else {
+        RDKafkaLogLevel::Error
+    };
+    // WARNING WARNING WARNING
+    //
+    // For whatever reason, if you change this `target` to something else, this
+    // log line might break. I (guswynn) did some extensive investigation with
+    // the tracing folks, and we learned that this edge case only happens with
+    // 1. a different target
+    // 2. only this file (so far as I can tell)
+    // 3. only in certain subscriber combinations
+    // 4. only if the `tracing-log` feature is on.
+    //
+    // Our conclusion was that one of our dependencies is doing something
+    // problematic with `log`.
+    //
+    // For now, this works, and prints a nice log line exactly when we want it.
+    //
+    // TODO(guswynn): when we can remove `tracing-log`, remove this warning
+    tracing::debug!(target: "librdkafka", level = ?level, "Determined log level for librdkafka");
+    config.set_log_level(level);
+
+    // Patch the librdkafka debug log system into the Rust `log` ecosystem. This
+    // is a very simple integration at the moment; enabling `debug`-level logs
+    // for the `librdkafka` target enables the full firehouse of librdkafka
+    // debug logs. We may want to investigate finer-grained control.
+    if tracing_level >= Level::DEBUG {
+        tracing::debug!(target: "librdkafka", "Enabling debug logs for rdkafka");
+        config.set("debug", "all");
+    }
+
+    config
+}
diff --git a/src/materialized/src/bin/materialized/main.rs b/src/materialized/src/bin/materialized/main.rs
index 5c8dc13bbba02..cedea51a06ec8 100644
--- a/src/materialized/src/bin/materialized/main.rs
+++ b/src/materialized/src/bin/materialized/main.rs
@@ -49,7 +49,7 @@ use uuid::Uuid;
 use materialized::{
     OrchestratorBackend, OrchestratorConfig, SecretsControllerConfig, TlsConfig, TlsMode,
 };
-use mz_dataflow_types::sources::AwsExternalId;
+use mz_dataflow_types::ConnectorContext;
 use mz_frontegg_auth::{FronteggAuthentication, FronteggConfig};
 use mz_orchestrator_kubernetes::{KubernetesImagePullPolicy, KubernetesOrchestratorConfig};
 use mz_orchestrator_process::ProcessOrchestratorConfig;
@@ -472,7 +472,7 @@ fn run(args: Args) -> Result<(), anyhow::Error> {
     // handled by the custom panic handler.
     let metrics_registry = MetricsRegistry::new();
     runtime.block_on(mz_ore::tracing::configure(mz_ore::tracing::TracingConfig {
-        log_filter: args.log_filter,
+        log_filter: args.log_filter.clone(),
         opentelemetry_endpoint: args.opentelemetry_endpoint,
         opentelemetry_headers: args
             .opentelemetry_header
@@ -747,14 +747,11 @@ max log level: {max_log_level}",
         orchestrator,
         secrets_controller: Some(secrets_controller),
         experimental_mode: args.experimental,
-        aws_external_id: args
-            .aws_external_id
-            .map(AwsExternalId::ISwearThisCameFromACliArgOrEnvVariable)
-            .unwrap_or(AwsExternalId::NotProvided),
         metrics_registry,
         now: SYSTEM_TIME.clone(),
         replica_sizes,
         availability_zones: args.availability_zone,
+        connector_context: ConnectorContext::from_cli_args(&args.log_filter, args.aws_external_id),
     }))?;
 
     eprintln!(
diff --git a/src/materialized/src/lib.rs b/src/materialized/src/lib.rs
index 0eb1e8a880e60..e4dd1d0820403 100644
--- a/src/materialized/src/lib.rs
+++ b/src/materialized/src/lib.rs
@@ -28,7 +28,7 @@ use futures::StreamExt;
 use mz_build_info::{build_info, BuildInfo};
 use mz_dataflow_types::client::controller::ClusterReplicaSizeMap;
 use mz_dataflow_types::client::RemoteClient;
-use mz_dataflow_types::sources::AwsExternalId;
+use mz_dataflow_types::ConnectorContext;
 use mz_frontegg_auth::FronteggAuthentication;
 use mz_orchestrator::{Orchestrator, ServiceConfig, ServicePort};
 use mz_orchestrator_kubernetes::{KubernetesOrchestrator, KubernetesOrchestratorConfig};
@@ -98,6 +98,12 @@ pub struct Config {
     /// stash instead of sqlite from the `data_directory`.
     pub catalog_postgres_stash: Option<String>,
 
+    // === Connector options. ===
+    /// Configuration for source and sink connectors created by the storage
+    /// layer. This can include configuration for external
+    /// sources.
+    pub connector_context: ConnectorContext,
+
     // === Platform options. ===
     /// Configuration of service orchestration.
     pub orchestrator: OrchestratorConfig,
@@ -106,12 +112,6 @@ pub struct Config {
     /// Optional configuration for a secrets controller.
     pub secrets_controller: Option<SecretsControllerConfig>,
 
-    // === AWS options. ===
-    /// An [external ID] to be supplied to all AWS AssumeRole operations.
-    ///
-    /// [external id]: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user_externalid.html
-    pub aws_external_id: AwsExternalId,
-
     // === Mode switches. ===
     /// Whether to permit usage of experimental features.
     pub experimental_mode: bool,
@@ -396,12 +396,12 @@ async fn serve_stash<S: mz_stash::Append + 'static>(
         logical_compaction_window: config.logical_compaction_window,
         experimental_mode: config.experimental_mode,
         build_info: &BUILD_INFO,
-        aws_external_id: config.aws_external_id.clone(),
         metrics_registry: config.metrics_registry.clone(),
         now: config.now,
         secrets_controller,
         replica_sizes: config.replica_sizes.clone(),
         availability_zones: config.availability_zones.clone(),
+        connector_context: config.connector_context,
     })
     .await?;
 
diff --git a/src/materialized/tests/util.rs b/src/materialized/tests/util.rs
index 3eb9f941b424e..23437dc932f3e 100644
--- a/src/materialized/tests/util.rs
+++ b/src/materialized/tests/util.rs
@@ -25,7 +25,6 @@ use tokio::runtime::Runtime;
 use tower_http::cors::AllowOrigin;
 
 use materialized::{OrchestratorBackend, OrchestratorConfig, TlsMode};
-use mz_dataflow_types::sources::AwsExternalId;
 use mz_frontegg_auth::FronteggAuthentication;
 use mz_orchestrator_process::ProcessOrchestratorConfig;
 use mz_ore::id_gen::PortAllocator;
@@ -44,7 +43,6 @@ lazy_static! {
 #[derive(Clone)]
 pub struct Config {
     data_directory: Option<PathBuf>,
-    aws_external_id: AwsExternalId,
     logging_granularity: Option<Duration>,
     tls: Option<materialized::TlsConfig>,
     frontegg: Option<FronteggAuthentication>,
@@ -58,7 +56,6 @@ impl Default for Config {
     fn default() -> Config {
         Config {
             data_directory: None,
-            aws_external_id: AwsExternalId::NotProvided,
             logging_granularity: Some(Duration::from_secs(1)),
             tls: None,
             frontegg: None,
@@ -164,7 +161,6 @@ pub fn start_server(config: Config) -> Result<Server, anyhow::Error> {
             linger: false,
         },
         secrets_controller: None,
-        aws_external_id: config.aws_external_id,
         listen_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 0),
         tls: config.tls,
         frontegg: config.frontegg,
@@ -175,6 +171,7 @@ pub fn start_server(config: Config) -> Result<Server, anyhow::Error> {
         cors_allowed_origin: AllowOrigin::list([]),
         replica_sizes: Default::default(),
         availability_zones: Default::default(),
+        connector_context: Default::default(),
     }))?;
     let server = Server {
         inner,
diff --git a/src/ore/src/lib.rs b/src/ore/src/lib.rs
index ae0ac4816e1f2..6cbe6e1d3efdf 100644
--- a/src/ore/src/lib.rs
+++ b/src/ore/src/lib.rs
@@ -54,6 +54,7 @@ pub mod now;
 pub mod option;
 pub mod panic;
 pub mod path;
+#[cfg_attr(nightly_doc_features, doc(cfg(feature = "process")))]
 pub mod result;
 #[cfg_attr(nightly_doc_features, doc(cfg(feature = "network")))]
 #[cfg(feature = "network")]
diff --git a/src/ore/src/tracing.rs b/src/ore/src/tracing.rs
index c174aa6d6ad7e..ff174516d3d4d 100644
--- a/src/ore/src/tracing.rs
+++ b/src/ore/src/tracing.rs
@@ -19,7 +19,7 @@ use opentelemetry::sdk::{trace, Resource};
 use opentelemetry::KeyValue;
 use tonic::metadata::MetadataMap;
 use tonic::transport::Endpoint;
-use tracing::{Event, Subscriber};
+use tracing::{Event, Level, Subscriber};
 use tracing_subscriber::filter::{LevelFilter, Targets};
 use tracing_subscriber::fmt::format::{format, Format, Writer};
 use tracing_subscriber::fmt::{self, FmtContext, FormatEvent, FormatFields};
@@ -147,6 +147,21 @@ pub async fn configure(config: TracingConfig) -> Result<(), anyhow::Error> {
     Ok(())
 }
 
+/// Returns the level of a specific target from a [`Targets`].
+pub fn target_level(targets: &Targets, target: &str) -> Level {
+    if targets.would_enable(target, &Level::TRACE) {
+        Level::TRACE
+    } else if targets.would_enable(target, &Level::DEBUG) {
+        Level::DEBUG
+    } else if targets.would_enable(target, &Level::INFO) {
+        Level::INFO
+    } else if targets.would_enable(target, &Level::WARN) {
+        Level::WARN
+    } else {
+        Level::ERROR
+    }
+}
+
 /// A wrapper around a `tracing_subscriber` `Format` that
 /// prepends a subprocess name to the event logs
 #[derive(Debug)]
diff --git a/src/sql/src/catalog.rs b/src/sql/src/catalog.rs
index 5a9c1eeef68e9..6c22a7a525631 100644
--- a/src/sql/src/catalog.rs
+++ b/src/sql/src/catalog.rs
@@ -19,10 +19,10 @@ use std::time::{Duration, Instant};
 
 use chrono::{DateTime, Utc, MIN_DATETIME};
 use lazy_static::lazy_static;
-use mz_dataflow_types::sources::{AwsExternalId, SourceConnector};
 
 use mz_build_info::{BuildInfo, DUMMY_BUILD_INFO};
 use mz_dataflow_types::client::ComputeInstanceId;
+use mz_dataflow_types::sources::SourceConnector;
 use mz_expr::{DummyHumanizer, ExprHumanizer, MirScalarExpr};
 use mz_ore::now::{EpochMillis, NowFn, NOW_ZERO};
 use mz_repr::{ColumnName, GlobalId, RelationDesc, ScalarType};
@@ -207,8 +207,6 @@ pub struct CatalogConfig {
     pub experimental_mode: bool,
     /// Information about this build of Materialize.
     pub build_info: &'static BuildInfo,
-    /// An external ID to be supplied to all AWS AssumeRole operations.
-    pub aws_external_id: AwsExternalId,
     /// Default timestamp frequency for CREATE SOURCE
     pub timestamp_frequency: Duration,
     /// Function that returns a wall clock now time; can safely be mocked to return
@@ -602,7 +600,6 @@ lazy_static! {
         session_id: Uuid::from_u128(0),
         experimental_mode: true,
         build_info: &DUMMY_BUILD_INFO,
-        aws_external_id: AwsExternalId::NotProvided,
         timestamp_frequency: Duration::from_secs(1),
         now: NOW_ZERO.clone(),
     };
diff --git a/src/sql/src/kafka_util.rs b/src/sql/src/kafka_util.rs
index 6adea2f343caf..0a951b99cc99a 100644
--- a/src/sql/src/kafka_util.rs
+++ b/src/sql/src/kafka_util.rs
@@ -17,7 +17,7 @@ use std::sync::{Arc, Mutex};
 
 use anyhow::{anyhow, bail};
 
-use mz_kafka_util::client::MzClientContext;
+use mz_kafka_util::client::{create_new_client_config, MzClientContext};
 use mz_ore::task;
 use rdkafka::client::ClientContext;
 use rdkafka::consumer::{BaseConsumer, Consumer, ConsumerContext};
@@ -204,8 +204,9 @@ pub async fn create_consumer(
     broker: &str,
     topic: &str,
     options: &BTreeMap<String, String>,
+    librdkafka_log_level: tracing::Level,
 ) -> Result<Arc<BaseConsumer<KafkaErrCheckContext>>, anyhow::Error> {
-    let mut config = rdkafka::ClientConfig::new();
+    let mut config = create_new_client_config(librdkafka_log_level);
     config.set("bootstrap.servers", broker);
     for (k, v) in options {
         config.set(k, v);
diff --git a/src/sql/src/normalize.rs b/src/sql/src/normalize.rs
index b0fa0bb0eb43f..fda836cc8ebd5 100644
--- a/src/sql/src/normalize.rs
+++ b/src/sql/src/normalize.rs
@@ -19,7 +19,7 @@ use std::collections::BTreeMap;
 use anyhow::{bail, Context};
 use itertools::Itertools;
 
-use mz_dataflow_types::sources::{AwsAssumeRole, AwsConfig, AwsCredentials, SerdeUri};
+use mz_dataflow_types::aws::{AwsAssumeRole, AwsConfig, AwsCredentials, SerdeUri};
 use mz_repr::ColumnName;
 use mz_sql_parser::ast::display::AstDisplay;
 use mz_sql_parser::ast::visit_mut::{self, VisitMut};
diff --git a/src/sql/src/pure.rs b/src/sql/src/pure.rs
index 440ae99cc9258..cdb5c635bfa12 100644
--- a/src/sql/src/pure.rs
+++ b/src/sql/src/pure.rs
@@ -27,8 +27,9 @@ use tracing::info;
 use uuid::Uuid;
 
 use mz_ccsr::{Client, GetBySubjectError};
+use mz_dataflow_types::aws::{AwsConfig, AwsExternalId};
 use mz_dataflow_types::postgres_source::PostgresSourceDetails;
-use mz_dataflow_types::sources::{AwsConfig, AwsExternalId};
+use mz_dataflow_types::ConnectorContext;
 
 use mz_repr::strconv;
 
@@ -52,8 +53,8 @@ use crate::normalize;
 /// locking access to the catalog for an unbounded amount of time.
 pub async fn purify_create_source(
     now: u64,
-    aws_external_id: AwsExternalId,
     mut stmt: CreateSourceStatement<Raw>,
+    connector_context: ConnectorContext,
 ) -> Result<CreateSourceStatement<Raw>, anyhow::Error> {
     let CreateSourceStatement {
         connector,
@@ -102,9 +103,14 @@ pub async fn purify_create_source(
                 // Verify that the provided security options are valid and then test them.
                 kafka_util::extract_config(&mut with_options_map)?
             };
-            let consumer = kafka_util::create_consumer(&broker, &topic, &config_options)
-                .await
-                .map_err(|e| anyhow!("Failed to create and connect Kafka consumer: {}", e))?;
+            let consumer = kafka_util::create_consumer(
+                &broker,
+                &topic,
+                &config_options,
+                connector_context.librdkafka_log_level,
+            )
+            .await
+            .map_err(|e| anyhow!("Failed to create and connect Kafka consumer: {}", e))?;
 
             // Translate `kafka_time_offset` to `start_offset`.
             match kafka_util::lookup_start_offsets(
@@ -137,7 +143,8 @@ pub async fn purify_create_source(
         }
         CreateSourceConnector::S3 { .. } => {
             let aws_config = normalize::aws_config(&mut with_options_map, None)?;
-            validate_aws_credentials(&aws_config, aws_external_id).await?;
+            validate_aws_credentials(&aws_config, connector_context.aws_external_id.as_ref())
+                .await?;
         }
         CreateSourceConnector::Kinesis { arn } => {
             let region = arn
@@ -147,7 +154,8 @@ pub async fn purify_create_source(
                 .ok_or_else(|| anyhow!("Provided ARN does not include an AWS region"))?;
 
             let aws_config = normalize::aws_config(&mut with_options_map, Some(region.into()))?;
-            validate_aws_credentials(&aws_config, aws_external_id).await?;
+            validate_aws_credentials(&aws_config, connector_context.aws_external_id.as_ref())
+                .await?;
         }
         CreateSourceConnector::Postgres {
             conn,
@@ -504,7 +512,7 @@ async fn compile_proto(
 /// whether the specified AWS configuration is valid.
 async fn validate_aws_credentials(
     config: &AwsConfig,
-    external_id: AwsExternalId,
+    external_id: Option<&AwsExternalId>,
 ) -> Result<(), anyhow::Error> {
     let config = config.load(external_id).await;
     let sts_client = aws_sdk_sts::Client::new(&config);
diff --git a/src/sql/src/query_model/test/catalog.rs b/src/sql/src/query_model/test/catalog.rs
index b11b75339e001..5daa15a1ecfe0 100644
--- a/src/sql/src/query_model/test/catalog.rs
+++ b/src/sql/src/query_model/test/catalog.rs
@@ -23,7 +23,7 @@ use crate::DEFAULT_SCHEMA;
 use chrono::MIN_DATETIME;
 use lazy_static::lazy_static;
 use mz_build_info::DUMMY_BUILD_INFO;
-use mz_dataflow_types::sources::{AwsExternalId, SourceConnector};
+use mz_dataflow_types::sources::SourceConnector;
 use mz_expr::{DummyHumanizer, ExprHumanizer, MirScalarExpr};
 use mz_lowertest::*;
 use mz_ore::now::{EpochMillis, NOW_ZERO};
@@ -42,7 +42,6 @@ lazy_static! {
         session_id: Uuid::from_u128(0),
         experimental_mode: false,
         build_info: &DUMMY_BUILD_INFO,
-        aws_external_id: AwsExternalId::NotProvided,
         timestamp_frequency: Duration::from_secs(1),
         now: NOW_ZERO.clone(),
     };
diff --git a/src/sqllogictest/Cargo.toml b/src/sqllogictest/Cargo.toml
index 6f78c7a8e0531..354da8691ffd0 100644
--- a/src/sqllogictest/Cargo.toml
+++ b/src/sqllogictest/Cargo.toml
@@ -30,6 +30,7 @@ serde_json = "1.0.81"
 tempfile = "3.2.0"
 time = "0.3.9"
 tokio = "1.18.2"
+tracing = "0.1.34"
 tokio-postgres = { git = "https://github.com/MaterializeInc/rust-postgres", branch = "mz-0.7.2", features = ["with-chrono-0_4", "with-uuid-0_8", "with-serde_json-1"] }
 tower-http = { version = "0.3.3", features = ["cors"] }
 uuid = "0.8.2"
diff --git a/src/sqllogictest/src/runner.rs b/src/sqllogictest/src/runner.rs
index 69dcc92401d3e..d567eecf60105 100644
--- a/src/sqllogictest/src/runner.rs
+++ b/src/sqllogictest/src/runner.rs
@@ -57,7 +57,6 @@ use tower_http::cors::AllowOrigin;
 use uuid::Uuid;
 
 use materialized::{OrchestratorBackend, OrchestratorConfig};
-use mz_dataflow_types::sources::AwsExternalId;
 use mz_orchestrator_process::ProcessOrchestratorConfig;
 use mz_ore::id_gen::PortAllocator;
 use mz_ore::metrics::MetricsRegistry;
@@ -579,7 +578,6 @@ impl Runner {
                 linger: false,
             },
             secrets_controller: None,
-            aws_external_id: AwsExternalId::NotProvided,
             listen_addr: SocketAddr::new(IpAddr::V4(Ipv4Addr::LOCALHOST), 0),
             tls: None,
             frontegg: None,
@@ -590,6 +588,7 @@ impl Runner {
             now: SYSTEM_TIME.clone(),
             replica_sizes: Default::default(),
             availability_zones: Default::default(),
+            connector_context: Default::default(),
         };
         let server = materialized::serve(mz_config).await?;
         let client = connect(&server).await;
diff --git a/src/storage/src/render/mod.rs b/src/storage/src/render/mod.rs
index 3a95934865958..e4944ceb33a1a 100644
--- a/src/storage/src/render/mod.rs
+++ b/src/storage/src/render/mod.rs
@@ -138,7 +138,6 @@ pub fn build_storage_dataflow<A: Allocate, B: StorageCapture>(
 ) {
     let worker_logging = timely_worker.log_register().get("timely");
     let name = format!("Source dataflow: {debug_name}");
-
     timely_worker.dataflow_core(&name, worker_logging, Box::new(()), |_, scope| {
         // The scope.clone() occurs to allow import in the region.
         // We build a region here to establish a pattern of a scope inside the dataflow,
diff --git a/src/storage/src/render/sources.rs b/src/storage/src/render/sources.rs
index 8e9fd3e790a92..9776b1c29a34f 100644
--- a/src/storage/src/render/sources.rs
+++ b/src/storage/src/render/sources.rs
@@ -271,7 +271,6 @@ where
                 base_metrics: &storage_state.source_metrics,
                 storage_metadata,
                 as_of: as_of_frontier.clone(),
-                aws_external_id: storage_state.aws_external_id.clone(),
             };
 
             // Pubnub and Postgres are `SimpleSource`s, so they produce _raw_ sources
@@ -307,7 +306,7 @@ where
                         let ((ok, err), cap) = source::create_raw_source::<_, KafkaSourceReader>(
                             base_source_config,
                             &connector,
-                            storage_state.aws_external_id.clone(),
+                            storage_state.connector_context.clone(),
                         );
                         ((SourceType::Delimited(ok), err), cap)
                     }
@@ -318,7 +317,7 @@ where
                         >(
                             base_source_config,
                             &connector,
-                            storage_state.aws_external_id.clone(),
+                            storage_state.connector_context.clone(),
                         );
                         ((SourceType::Delimited(ok), err), cap)
                     }
@@ -326,7 +325,7 @@ where
                         let ((ok, err), cap) = source::create_raw_source::<_, S3SourceReader>(
                             base_source_config,
                             &connector,
-                            storage_state.aws_external_id.clone(),
+                            storage_state.connector_context.clone(),
                         );
                         ((SourceType::ByteStream(ok), err), cap)
                     }
@@ -335,7 +334,7 @@ where
                         let ((ok, err), cap) = source::create_raw_source::<_, PubNubSourceReader>(
                             base_source_config,
                             &connector,
-                            storage_state.aws_external_id.clone(),
+                            storage_state.connector_context.clone(),
                         );
                         ((SourceType::Row(ok), err), cap)
                     }
diff --git a/src/storage/src/server.rs b/src/storage/src/server.rs
index e7031ff8bbf4f..120f0a4f84220 100644
--- a/src/storage/src/server.rs
+++ b/src/storage/src/server.rs
@@ -21,7 +21,7 @@ use timely::worker::Worker as TimelyWorker;
 use tokio::sync::mpsc;
 
 use mz_dataflow_types::client::{LocalClient, LocalStorageClient, StorageCommand, StorageResponse};
-use mz_dataflow_types::sources::AwsExternalId;
+use mz_dataflow_types::ConnectorContext;
 use mz_ore::metrics::MetricsRegistry;
 use mz_ore::now::NowFn;
 
@@ -44,8 +44,8 @@ pub struct Config {
     pub now: NowFn,
     /// Metrics registry through which dataflow metrics will be reported.
     pub metrics_registry: MetricsRegistry,
-    /// An external ID to use for all AWS AssumeRole operations.
-    pub aws_external_id: AwsExternalId,
+    /// Configuration for source and sink connectors.
+    pub connector_context: ConnectorContext,
 }
 
 /// A handle to a running dataflow server.
@@ -104,7 +104,6 @@ pub fn serve_boundary<SC: StorageCapture, B: Fn(usize) -> SC + Send + Sync + 'st
 
     let tokio_executor = tokio::runtime::Handle::current();
     let now = config.now;
-    let aws_external_id = config.aws_external_id.clone();
 
     let worker_guards = timely::execute::execute(config.timely_config, move |timely_worker| {
         let timely_worker_index = timely_worker.index();
@@ -137,9 +136,9 @@ pub fn serve_boundary<SC: StorageCapture, B: Fn(usize) -> SC + Send + Sync + 'st
                 last_bindings_feedback: Instant::now(),
                 now: now.clone(),
                 source_metrics,
-                aws_external_id: aws_external_id.clone(),
                 timely_worker_index,
                 timely_worker_peers,
+                connector_context: config.connector_context.clone(),
             },
             storage_boundary,
             storage_response_tx,
diff --git a/src/storage/src/source/kafka.rs b/src/storage/src/source/kafka.rs
index f58bf73d51a84..1e0eb7259407f 100644
--- a/src/storage/src/source/kafka.rs
+++ b/src/storage/src/source/kafka.rs
@@ -20,22 +20,22 @@ use rdkafka::statistics::Statistics;
 use rdkafka::topic_partition_list::Offset;
 use rdkafka::{ClientConfig, ClientContext, Message, TopicPartitionList};
 use timely::scheduling::activate::SyncActivator;
-use tracing::{debug, error, info, warn};
+use tracing::{error, info, warn};
 use uuid::Uuid;
 
 use mz_dataflow_types::sources::{
-    encoding::SourceDataEncoding, AwsExternalId, ExternalSourceConnector, KafkaOffset,
-    KafkaSourceConnector, MzOffset,
+    encoding::SourceDataEncoding, ExternalSourceConnector, KafkaOffset, KafkaSourceConnector,
+    MzOffset,
 };
+use mz_dataflow_types::ConnectorContext;
 use mz_expr::PartitionId;
-use mz_kafka_util::{client::MzClientContext, KafkaAddrs};
+use mz_kafka_util::{client::create_new_client_config, client::MzClientContext, KafkaAddrs};
 use mz_ore::thread::{JoinHandleExt, UnparkOnDropHandle};
 use mz_repr::{adt::jsonb::Jsonb, GlobalId};
 
 use crate::source::{NextMessage, SourceMessage, SourceReader, SourceReaderError};
 
 use self::metrics::KafkaPartitionMetrics;
-use super::metrics::SourceBaseMetrics;
 
 mod metrics;
 
@@ -85,16 +85,15 @@ impl SourceReader for KafkaSourceReader {
         worker_count: usize,
         consumer_activator: SyncActivator,
         connector: ExternalSourceConnector,
-        _: AwsExternalId,
         restored_offsets: Vec<(PartitionId, Option<MzOffset>)>,
         _: SourceDataEncoding,
-        base_metrics: SourceBaseMetrics,
+        metrics: crate::source::metrics::SourceBaseMetrics,
+        connector_context: ConnectorContext,
     ) -> Result<Self, anyhow::Error> {
         let kc = match connector {
             ExternalSourceConnector::Kafka(kc) => kc,
             _ => unreachable!(),
         };
-
         let KafkaSourceConnector {
             addrs,
             config_options,
@@ -109,6 +108,7 @@ impl SourceReader for KafkaSourceReader {
             group_id_prefix,
             cluster_id,
             &config_options,
+            connector_context.librdkafka_log_level,
         );
         let (stats_tx, stats_rx) = crossbeam_channel::unbounded();
         let consumer: BaseConsumer<GlueConsumerContext> = kafka_config
@@ -184,12 +184,7 @@ impl SourceReader for KafkaSourceReader {
             partition_info,
             include_headers: kc.include_headers.is_some(),
             _metadata_thread_handle: metadata_thread_handle,
-            partition_metrics: KafkaPartitionMetrics::new(
-                base_metrics,
-                partition_ids,
-                topic,
-                source_id,
-            ),
+            partition_metrics: KafkaPartitionMetrics::new(metrics, partition_ids, topic, source_id),
         })
     }
 
@@ -507,8 +502,9 @@ fn create_kafka_config(
     group_id_prefix: Option<String>,
     cluster_id: Uuid,
     config_options: &BTreeMap<String, String>,
+    librdkafka_log_level: tracing::Level,
 ) -> ClientConfig {
-    let mut kafka_config = ClientConfig::new();
+    let mut kafka_config = create_new_client_config(librdkafka_log_level);
 
     // Broker configuration.
     kafka_config.set("bootstrap.servers", &addrs.to_string());
@@ -555,16 +551,6 @@ fn create_kafka_config(
         ),
     );
 
-    // Patch the librdkafka debug log system into the Rust `log` ecosystem.
-    // This is a very simple integration at the moment; enabling `debug`-level
-    // logs for the `librdkafka` target enables the full firehouse of librdkafka
-    // debug logs. We may want to investigate finer-grained control.
-    // TODO(guswynn): replace this when https://github.com/tokio-rs/tracing/pull/1821 is merged
-    if log::log_enabled!(target: "librdkafka", log::Level::Debug) {
-        debug!("Enabling 'debug' for rdkafka");
-        kafka_config.set("debug", "all");
-    }
-
     // Set additional configuration operations from the user. While these look
     // arbitrary, other layers of the system tightly control which configuration
     // options are allowable.
@@ -708,9 +694,11 @@ mod tests {
     use std::time::Duration;
 
     use rdkafka::consumer::{BaseConsumer, Consumer};
-    use rdkafka::{ClientConfig, Message, Offset, TopicPartitionList};
+    use rdkafka::{Message, Offset, TopicPartitionList};
     use uuid::Uuid;
 
+    use mz_kafka_util::client::create_new_client_config_simple;
+
     // Splitting off a partition queue with an `Offset` that is not `Offset::Beginning` seems to
     // lead to a race condition where sometimes we receive messages from polling the main consumer
     // instead of on the partition queue. This can be surfaced by running the test in a loop (in
@@ -728,7 +716,7 @@ mod tests {
         let topic_name = "queue-test";
         let pid = 0;
 
-        let mut kafka_config = ClientConfig::new();
+        let mut kafka_config = create_new_client_config_simple();
         kafka_config.set("bootstrap.servers", "localhost:9092".to_string());
         kafka_config.set("enable.auto.commit", "false");
         kafka_config.set("group.id", Uuid::new_v4().to_string());
diff --git a/src/storage/src/source/kinesis.rs b/src/storage/src/source/kinesis.rs
index 19e7779c5faad..cf35089f0b556 100644
--- a/src/storage/src/source/kinesis.rs
+++ b/src/storage/src/source/kinesis.rs
@@ -21,16 +21,15 @@ use prometheus::core::AtomicI64;
 use timely::scheduling::SyncActivator;
 use tracing::error;
 
-use mz_dataflow_types::sources::{
-    encoding::SourceDataEncoding, AwsExternalId, ExternalSourceConnector, KinesisSourceConnector,
-    MzOffset,
-};
-use mz_dataflow_types::SourceErrorDetails;
+use mz_dataflow_types::aws::AwsExternalId;
+use mz_dataflow_types::sources::encoding::SourceDataEncoding;
+use mz_dataflow_types::sources::{ExternalSourceConnector, KinesisSourceConnector, MzOffset};
+use mz_dataflow_types::{ConnectorContext, SourceErrorDetails};
 use mz_expr::PartitionId;
 use mz_ore::metrics::{DeleteOnDropGauge, GaugeVecExt};
 use mz_repr::GlobalId;
 
-use crate::source::metrics::{KinesisMetrics, SourceBaseMetrics};
+use crate::source::metrics::KinesisMetrics;
 use crate::source::{NextMessage, SourceMessage, SourceReader, SourceReaderError};
 
 /// To read all data from a Kinesis stream, we need to continually update
@@ -128,17 +127,21 @@ impl SourceReader for KinesisSourceReader {
         _worker_count: usize,
         _consumer_activator: SyncActivator,
         connector: ExternalSourceConnector,
-        aws_external_id: AwsExternalId,
         _restored_offsets: Vec<(PartitionId, Option<MzOffset>)>,
         _encoding: SourceDataEncoding,
-        base_metrics: SourceBaseMetrics,
+        metrics: crate::source::metrics::SourceBaseMetrics,
+        connector_context: ConnectorContext,
     ) -> Result<Self, anyhow::Error> {
         let kc = match connector {
             ExternalSourceConnector::Kinesis(kc) => kc,
             _ => unreachable!(),
         };
 
-        let state = block_on(create_state(&base_metrics.kinesis, kc, aws_external_id));
+        let state = block_on(create_state(
+            &metrics.kinesis,
+            kc,
+            connector_context.aws_external_id.as_ref(),
+        ));
         match state {
             Ok((kinesis_client, stream_name, shard_set, shard_queue)) => Ok(KinesisSourceReader {
                 kinesis_client,
@@ -148,7 +151,7 @@ impl SourceReader for KinesisSourceReader {
                 shard_set,
                 stream_name,
                 processed_message_count: 0,
-                base_metrics: base_metrics.kinesis,
+                base_metrics: metrics.kinesis,
             }),
             Err(e) => Err(anyhow!("{}", e)),
         }
@@ -263,7 +266,7 @@ impl SourceReader for KinesisSourceReader {
 async fn create_state(
     base_metrics: &KinesisMetrics,
     c: KinesisSourceConnector,
-    aws_external_id: AwsExternalId,
+    aws_external_id: Option<&AwsExternalId>,
 ) -> Result<
     (
         KinesisClient,
diff --git a/src/storage/src/source/mod.rs b/src/storage/src/source/mod.rs
index eae7731869785..6ee3a7347fc0b 100644
--- a/src/storage/src/source/mod.rs
+++ b/src/storage/src/source/mod.rs
@@ -55,8 +55,8 @@ use tracing::error;
 
 use mz_avro::types::Value;
 use mz_dataflow_types::sources::encoding::SourceDataEncoding;
-use mz_dataflow_types::sources::{AwsExternalId, ExternalSourceConnector, MzOffset};
-use mz_dataflow_types::{DecodeError, SourceError, SourceErrorDetails};
+use mz_dataflow_types::sources::{ExternalSourceConnector, MzOffset};
+use mz_dataflow_types::{ConnectorContext, DecodeError, SourceError, SourceErrorDetails};
 use mz_expr::PartitionId;
 use mz_ore::cast::CastFrom;
 use mz_ore::metrics::{CounterVecExt, DeleteOnDropCounter, DeleteOnDropGauge, GaugeVecExt};
@@ -118,8 +118,6 @@ pub struct RawSourceCreationConfig<'a, G> {
     pub now: NowFn,
     /// The metrics & registry that each source instantiates.
     pub base_metrics: &'a SourceBaseMetrics,
-    /// An external ID to use for all AWS AssumeRole operations.
-    pub aws_external_id: AwsExternalId,
     /// Storage Metadata
     pub storage_metadata: CollectionMetadata,
     /// As Of
@@ -168,10 +166,10 @@ where
         worker_count: usize,
         consumer_activator: SyncActivator,
         connector: ExternalSourceConnector,
-        aws_external_id: AwsExternalId,
         restored_offsets: Vec<(PartitionId, Option<MzOffset>)>,
         encoding: SourceDataEncoding,
         metrics: crate::source::metrics::SourceBaseMetrics,
+        connector_context: ConnectorContext,
     ) -> Result<Self, anyhow::Error> {
         S::new(
             source_name,
@@ -180,10 +178,10 @@ where
             worker_count,
             consumer_activator,
             connector,
-            aws_external_id,
             restored_offsets,
             encoding,
             metrics,
+            connector_context,
         )
         .map(Self)
     }
@@ -388,10 +386,10 @@ pub trait SourceReader {
         worker_count: usize,
         consumer_activator: SyncActivator,
         connector: ExternalSourceConnector,
-        aws_external_id: AwsExternalId,
         restored_offsets: Vec<(PartitionId, Option<MzOffset>)>,
         encoding: SourceDataEncoding,
         metrics: crate::source::metrics::SourceBaseMetrics,
+        connector_context: ConnectorContext,
     ) -> Result<Self, anyhow::Error>
     where
         Self: Sized;
@@ -910,7 +908,7 @@ where
 pub fn create_raw_source<G, S: 'static>(
     config: RawSourceCreationConfig<G>,
     source_connector: &ExternalSourceConnector,
-    aws_external_id: AwsExternalId,
+    connector_context: ConnectorContext,
 ) -> (
     (
         timely::dataflow::Stream<G, SourceOutput<S::Key, S::Value>>,
@@ -971,10 +969,10 @@ where
                 worker_count,
                 sync_activator,
                 source_connector.clone(),
-                aws_external_id.clone(),
                 start_offsets,
                 encoding,
                 base_metrics,
+                connector_context.clone(),
             );
             let source_stream = match source_reader {
                 Ok(s) => s.into_stream(timestamp_frequency).fuse(),
diff --git a/src/storage/src/source/pubnub.rs b/src/storage/src/source/pubnub.rs
index 3bb2441b57b77..2fc115743dde3 100644
--- a/src/storage/src/source/pubnub.rs
+++ b/src/storage/src/source/pubnub.rs
@@ -17,13 +17,11 @@ use pubnub_hyper::{Builder, DefaultRuntime, DefaultTransport, PubNub};
 use timely::scheduling::SyncActivator;
 use tracing::info;
 
-use mz_dataflow_types::sources::{
-    encoding::SourceDataEncoding, AwsExternalId, ExternalSourceConnector, MzOffset,
-};
+use mz_dataflow_types::sources::{encoding::SourceDataEncoding, ExternalSourceConnector, MzOffset};
+use mz_dataflow_types::ConnectorContext;
 use mz_expr::PartitionId;
 use mz_repr::{Datum, GlobalId, Row};
 
-use super::metrics::SourceBaseMetrics;
 use crate::source::{SourceMessage, SourceReader, SourceReaderError};
 
 /// Information required to sync data from PubNub
@@ -45,10 +43,10 @@ impl SourceReader for PubNubSourceReader {
         _worker_count: usize,
         _consumer_activator: SyncActivator,
         connector: ExternalSourceConnector,
-        _aws_external_id: AwsExternalId,
         _restored_offsets: Vec<(PartitionId, Option<MzOffset>)>,
         _encoding: SourceDataEncoding,
-        _metrics: SourceBaseMetrics,
+        _: crate::source::metrics::SourceBaseMetrics,
+        _: ConnectorContext,
     ) -> Result<Self, anyhow::Error> {
         let pubnub_conn = match connector {
             ExternalSourceConnector::PubNub(pubnub_conn) => pubnub_conn,
diff --git a/src/storage/src/source/s3.rs b/src/storage/src/source/s3.rs
index a896d7faede05..2200416959ec8 100644
--- a/src/storage/src/source/s3.rs
+++ b/src/storage/src/source/s3.rs
@@ -47,10 +47,10 @@ use tokio::time::{self, Duration};
 use tokio_util::io::{ReaderStream, StreamReader};
 use tracing::{debug, error, trace, warn};
 
-use mz_dataflow_types::sources::{
-    encoding::SourceDataEncoding, AwsConfig, AwsExternalId, Compression, ExternalSourceConnector,
-    MzOffset, S3KeySource,
-};
+use mz_dataflow_types::aws::{AwsConfig, AwsExternalId};
+use mz_dataflow_types::sources::encoding::SourceDataEncoding;
+use mz_dataflow_types::sources::{Compression, ExternalSourceConnector, MzOffset, S3KeySource};
+use mz_dataflow_types::ConnectorContext;
 use mz_expr::PartitionId;
 use mz_ore::retry::{Retry, RetryReader};
 use mz_ore::task;
@@ -126,12 +126,12 @@ async fn download_objects_task(
     tx: Sender<S3Result<InternalMessage>>,
     mut shutdown_rx: tokio::sync::watch::Receiver<DataflowStatus>,
     aws_config: AwsConfig,
-    aws_external_id: AwsExternalId,
+    aws_external_id: Option<AwsExternalId>,
     activator: SyncActivator,
     compression: Compression,
     metrics: SourceBaseMetrics,
 ) {
-    let config = aws_config.load(aws_external_id).await;
+    let config = aws_config.load(aws_external_id.as_ref()).await;
     let client = aws_sdk_s3::Client::new(&config);
 
     struct BucketInfo {
@@ -239,11 +239,11 @@ async fn scan_bucket_task(
     source_id: String,
     glob: Option<GlobMatcher>,
     aws_config: AwsConfig,
-    aws_external_id: AwsExternalId,
+    aws_external_id: Option<AwsExternalId>,
     tx: Sender<S3Result<KeyInfo>>,
     base_metrics: SourceBaseMetrics,
 ) {
-    let config = aws_config.load(aws_external_id).await;
+    let config = aws_config.load(aws_external_id.as_ref()).await;
     let client = aws_sdk_s3::Client::new(&config);
 
     let glob = glob.as_ref();
@@ -352,7 +352,7 @@ async fn read_sqs_task(
     glob: Option<GlobMatcher>,
     queue: String,
     aws_config: AwsConfig,
-    aws_external_id: AwsExternalId,
+    aws_external_id: Option<AwsExternalId>,
     tx: Sender<S3Result<KeyInfo>>,
     mut shutdown_rx: tokio::sync::watch::Receiver<DataflowStatus>,
     base_metrics: SourceBaseMetrics,
@@ -362,7 +362,7 @@ async fn read_sqs_task(
         source_id, queue,
     );
 
-    let config = aws_config.load(aws_external_id).await;
+    let config = aws_config.load(aws_external_id.as_ref()).await;
     let client = aws_sdk_sqs::Client::new(&config);
 
     let glob = glob.as_ref();
@@ -778,10 +778,10 @@ impl SourceReader for S3SourceReader {
         _worker_count: usize,
         consumer_activator: SyncActivator,
         connector: ExternalSourceConnector,
-        aws_external_id: AwsExternalId,
         _restored_offsets: Vec<(PartitionId, Option<MzOffset>)>,
         _encoding: SourceDataEncoding,
-        metrics: SourceBaseMetrics,
+        metrics: crate::source::metrics::SourceBaseMetrics,
+        connector_context: ConnectorContext,
     ) -> Result<Self, anyhow::Error> {
         let s3_conn = match connector {
             ExternalSourceConnector::S3(s3_conn) => s3_conn,
@@ -805,7 +805,7 @@ impl SourceReader for S3SourceReader {
                     dataflow_tx,
                     shutdown_rx.clone(),
                     s3_conn.aws.clone(),
-                    aws_external_id.clone(),
+                    connector_context.aws_external_id.clone(),
                     consumer_activator,
                     s3_conn.compression,
                     metrics.clone(),
@@ -827,7 +827,7 @@ impl SourceReader for S3SourceReader {
                                 source_id.to_string(),
                                 glob.clone(),
                                 s3_conn.aws.clone(),
-                                aws_external_id.clone(),
+                                connector_context.aws_external_id.clone(),
                                 keys_tx.clone(),
                                 metrics.clone(),
                             ),
@@ -845,7 +845,7 @@ impl SourceReader for S3SourceReader {
                                 glob.clone(),
                                 queue,
                                 s3_conn.aws.clone(),
-                                aws_external_id.clone(),
+                                connector_context.aws_external_id.clone(),
                                 keys_tx.clone(),
                                 shutdown_rx.clone(),
                                 metrics.clone(),
diff --git a/src/storage/src/storage_state.rs b/src/storage/src/storage_state.rs
index 00d0aa80fa707..3b7bc284790cb 100644
--- a/src/storage/src/storage_state.rs
+++ b/src/storage/src/storage_state.rs
@@ -25,8 +25,8 @@ use timely::worker::Worker as TimelyWorker;
 use tokio::sync::mpsc;
 
 use mz_dataflow_types::client::{RenderSourcesCommand, StorageCommand, StorageResponse};
-use mz_dataflow_types::sources::AwsExternalId;
 use mz_dataflow_types::sources::{ExternalSourceConnector, SourceConnector};
+use mz_dataflow_types::ConnectorContext;
 use mz_ore::now::NowFn;
 use mz_repr::{Diff, GlobalId, Row, Timestamp};
 
@@ -75,12 +75,12 @@ pub struct StorageState {
     pub now: NowFn,
     /// Metrics for the source-specific side of dataflows.
     pub source_metrics: SourceBaseMetrics,
-    /// An external ID to use for all AWS AssumeRole operations.
-    pub aws_external_id: AwsExternalId,
     /// Index of the associated timely dataflow worker.
     pub timely_worker_index: usize,
     /// Peers in the associated timely dataflow worker.
     pub timely_worker_peers: usize,
+    /// Configuration for source and sink connectors.
+    pub connector_context: ConnectorContext,
 }
 
 /// State about a single table.
diff --git a/src/storaged/src/main.rs b/src/storaged/src/main.rs
index 5805bea4f3708..468d03227fb06 100644
--- a/src/storaged/src/main.rs
+++ b/src/storaged/src/main.rs
@@ -7,6 +7,7 @@
 // the Business Source License, use of this software will be governed
 // by the Apache License, Version 2.0.
 
+use std::env;
 use std::fmt;
 use std::path::PathBuf;
 use std::process;
@@ -25,7 +26,7 @@ use tracing_subscriber::filter::Targets;
 
 use mz_build_info::{build_info, BuildInfo};
 use mz_dataflow_types::client::{GenericClient, StorageClient};
-use mz_dataflow_types::sources::AwsExternalId;
+use mz_dataflow_types::ConnectorContext;
 use mz_ore::metrics::MetricsRegistry;
 use mz_ore::now::SYSTEM_TIME;
 use mz_pid_file::PidFile;
@@ -178,10 +179,7 @@ async fn run(args: Args) -> Result<(), anyhow::Error> {
         experimental_mode: false,
         metrics_registry: MetricsRegistry::new(),
         now: SYSTEM_TIME.clone(),
-        aws_external_id: args
-            .aws_external_id
-            .map(AwsExternalId::ISwearThisCameFromACliArgOrEnvVariable)
-            .unwrap_or(AwsExternalId::NotProvided),
+        connector_context: ConnectorContext::from_cli_args(&args.log_filter, args.aws_external_id),
     };
 
     let serve_config = ServeConfig {
diff --git a/src/testdrive/src/action.rs b/src/testdrive/src/action.rs
index 59f505b567484..d5a751569be99 100644
--- a/src/testdrive/src/action.rs
+++ b/src/testdrive/src/action.rs
@@ -28,7 +28,7 @@ use itertools::Itertools;
 use lazy_static::lazy_static;
 use mz_coord::catalog::{Catalog, ConnCatalog};
 use mz_coord::session::Session;
-use mz_kafka_util::client::MzClientContext;
+use mz_kafka_util::client::{create_new_client_config_simple, MzClientContext};
 use mz_ore::now::NOW_ZERO;
 use rand::Rng;
 use rdkafka::ClientConfig;
@@ -795,7 +795,7 @@ pub async fn create_state(
         use rdkafka::admin::{AdminClient, AdminOptions};
         use rdkafka::producer::FutureProducer;
 
-        let mut kafka_config = ClientConfig::new();
+        let mut kafka_config = create_new_client_config_simple();
         kafka_config.set("bootstrap.servers", &config.kafka_addr);
         kafka_config.set("group.id", "materialize-testdrive");
         kafka_config.set("auto.offset.reset", "earliest");
diff --git a/test/test-util/src/kafka/kafka_client.rs b/test/test-util/src/kafka/kafka_client.rs
index 4f0756211bcc5..34e41716e846f 100644
--- a/test/test-util/src/kafka/kafka_client.rs
+++ b/test/test-util/src/kafka/kafka_client.rs
@@ -13,11 +13,10 @@ use std::time::Duration;
 
 use anyhow::Context;
 use rdkafka::admin::{AdminClient, AdminOptions, NewTopic, TopicReplication};
-use rdkafka::config::ClientConfig;
 use rdkafka::error::KafkaError;
 use rdkafka::producer::{DeliveryFuture, FutureProducer, FutureRecord};
 
-use mz_kafka_util::client::MzClientContext;
+use mz_kafka_util::client::{create_new_client_config_simple, MzClientContext};
 
 pub struct KafkaClient {
     producer: FutureProducer<MzClientContext>,
@@ -30,7 +29,7 @@ impl KafkaClient {
         group_id: &str,
         configs: &[(&str, &str)],
     ) -> Result<KafkaClient, anyhow::Error> {
-        let mut config = ClientConfig::new();
+        let mut config = create_new_client_config_simple();
         config.set("bootstrap.servers", kafka_url);
         config.set("group.id", group_id);
         for (key, val) in configs {
@@ -53,7 +52,7 @@ impl KafkaClient {
         configs: &[(&str, &str)],
         timeout: Option<Duration>,
     ) -> Result<(), anyhow::Error> {
-        let mut config = ClientConfig::new();
+        let mut config = create_new_client_config_simple();
         config.set("bootstrap.servers", &self.kafka_url);
 
         let client = config