-
Notifications
You must be signed in to change notification settings - Fork 453
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve internal opentelemetry logging #2128
Changes from 37 commits
7da284e
573994d
1bfd464
90470c3
5f1b990
47abb6d
46ce8af
bbfc03c
e5260d9
a79b636
197b12a
91e56ea
fa7148e
a78357c
a3fba36
54e0755
4c8cb42
5c4bab9
5e95e00
da3716d
62b9f83
591c45a
b6e2327
2714508
b0c129c
83763b1
e325316
19822b1
6b98c16
d248750
f9e3d2d
d4c1eb7
d5120a2
c103467
b7470b1
30f7a4c
adc3fd2
3c4d20c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -217,14 +217,26 @@ mod tests { | |
use opentelemetry_sdk::trace; | ||
use opentelemetry_sdk::trace::{Sampler, TracerProvider}; | ||
use tracing::error; | ||
use tracing_subscriber::layer::SubscriberExt; | ||
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt; | ||
use tracing_subscriber::Layer; | ||
|
||
pub fn attributes_contains(log_record: &LogRecord, key: &Key, value: &AnyValue) -> bool { | ||
log_record | ||
.attributes_iter() | ||
.any(|(k, v)| k == key && v == value) | ||
} | ||
|
||
fn create_tracing_subscriber( | ||
_exporter: InMemoryLogsExporter, | ||
logger_provider: &LoggerProvider, | ||
) -> impl tracing::Subscriber { | ||
let level_filter = tracing_subscriber::filter::LevelFilter::WARN; // Capture WARN and ERROR levels | ||
let layer = | ||
layer::OpenTelemetryTracingBridge::new(logger_provider).with_filter(level_filter); // No filter based on target, only based on log level | ||
|
||
tracing_subscriber::registry().with(layer) | ||
} | ||
|
||
// cargo test --features=testing | ||
#[test] | ||
fn tracing_appender_standalone() { | ||
|
@@ -234,8 +246,7 @@ mod tests { | |
.with_simple_exporter(exporter.clone()) | ||
.build(); | ||
|
||
let layer = layer::OpenTelemetryTracingBridge::new(&logger_provider); | ||
let subscriber = tracing_subscriber::registry().with(layer); | ||
let subscriber = create_tracing_subscriber(exporter.clone(), &logger_provider); | ||
|
||
// avoiding setting tracing subscriber as global as that does not | ||
// play well with unit tests. | ||
|
@@ -315,8 +326,7 @@ mod tests { | |
.with_simple_exporter(exporter.clone()) | ||
.build(); | ||
|
||
let layer = layer::OpenTelemetryTracingBridge::new(&logger_provider); | ||
let subscriber = tracing_subscriber::registry().with(layer); | ||
let subscriber = create_tracing_subscriber(exporter.clone(), &logger_provider); | ||
|
||
// avoiding setting tracing subscriber as global as that does not | ||
// play well with unit tests. | ||
|
@@ -427,16 +437,15 @@ mod tests { | |
.with_simple_exporter(exporter.clone()) | ||
.build(); | ||
|
||
let layer = layer::OpenTelemetryTracingBridge::new(&logger_provider); | ||
let subscriber = tracing_subscriber::registry().with(layer); | ||
let subscriber = create_tracing_subscriber(exporter.clone(), &logger_provider); | ||
|
||
// avoiding setting tracing subscriber as global as that does not | ||
// play well with unit tests. | ||
let _guard = tracing::subscriber::set_default(subscriber); | ||
drop(tracing_log::LogTracer::init()); | ||
|
||
// Act | ||
log::error!("log from log crate"); | ||
log::error!(target: "my-system", "log from log crate"); | ||
logger_provider.force_flush(); | ||
|
||
// Assert TODO: move to helper methods | ||
|
@@ -493,8 +502,7 @@ mod tests { | |
.with_simple_exporter(exporter.clone()) | ||
.build(); | ||
|
||
let layer = layer::OpenTelemetryTracingBridge::new(&logger_provider); | ||
let subscriber = tracing_subscriber::registry().with(layer); | ||
let subscriber = create_tracing_subscriber(exporter.clone(), &logger_provider); | ||
|
||
// avoiding setting tracing subscriber as global as that does not | ||
// play well with unit tests. | ||
|
@@ -513,7 +521,7 @@ mod tests { | |
let span_id = cx.span().span_context().span_id(); | ||
|
||
// logging is done inside span context. | ||
log::error!("log from log crate"); | ||
log::error!(target: "my-system", "log from log crate"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need these changes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes we can remove the target, will follow up in subsequent cleanups. |
||
(trace_id, span_id) | ||
}); | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,8 +14,9 @@ | |
use opentelemetry::{ | ||
global, | ||
logs::{LogError, LogResult}, | ||
InstrumentationLibrary, | ||
otel_error, otel_warn, InstrumentationLibrary, | ||
}; | ||
|
||
use std::sync::atomic::AtomicBool; | ||
use std::{cmp::min, env, sync::Mutex}; | ||
use std::{ | ||
|
@@ -98,16 +99,12 @@ | |
fn emit(&self, record: &mut LogRecord, instrumentation: &InstrumentationLibrary) { | ||
// noop after shutdown | ||
if self.is_shutdown.load(std::sync::atomic::Ordering::Relaxed) { | ||
otel_warn!( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could continue to be the hot path even after shutdown so it might not be a great idea to keep logging warnings for this. Let's a TODO to revisit this and decide if we should remove the logging here or make it less severe. |
||
name: "simple_log_processor_emit_after_shutdown" | ||
); | ||
return; | ||
} | ||
|
||
#[cfg(feature = "experimental-internal-logs")] | ||
tracing::debug!( | ||
name: "simple_log_processor_emit", | ||
target: "opentelemetry-sdk", | ||
event_name = record.event_name | ||
); | ||
|
||
let result = self | ||
.exporter | ||
.lock() | ||
|
@@ -117,6 +114,10 @@ | |
futures_executor::block_on(exporter.export(LogBatch::new(log_tuple))) | ||
}); | ||
if let Err(err) = result { | ||
otel_error!( | ||
name: "simple_log_processor_emit_error", | ||
error = format!("{:?}", err) | ||
); | ||
global::handle_error(err); | ||
} | ||
} | ||
|
@@ -132,6 +133,9 @@ | |
exporter.shutdown(); | ||
Ok(()) | ||
} else { | ||
otel_error!( | ||
name: "simple_log_processor_shutdown_error" | ||
); | ||
Err(LogError::Other( | ||
"simple logprocessor mutex poison during shutdown".into(), | ||
)) | ||
|
@@ -167,6 +171,10 @@ | |
))); | ||
|
||
if let Err(err) = result { | ||
otel_error!( | ||
name: "batch_log_processor_emit_error", | ||
error = format!("{:?}", err) | ||
); | ||
global::handle_error(LogError::Other(err.into())); | ||
} | ||
} | ||
|
@@ -224,13 +232,6 @@ | |
// Log has finished, add to buffer of pending logs. | ||
BatchMessage::ExportLog(log) => { | ||
logs.push(log); | ||
#[cfg(feature = "experimental-internal-logs")] | ||
tracing::debug!( | ||
name: "batch_log_processor_record_count", | ||
target: "opentelemetry-sdk", | ||
current_batch_size = logs.len() | ||
); | ||
|
||
if logs.len() == config.max_export_batch_size { | ||
let result = export_with_timeout( | ||
config.max_export_timeout, | ||
|
@@ -241,6 +242,10 @@ | |
.await; | ||
|
||
if let Err(err) = result { | ||
otel_error!( | ||
name: "batch_log_processor_export_error", | ||
error = format!("{:?}", err) | ||
); | ||
global::handle_error(err); | ||
} | ||
} | ||
|
@@ -261,8 +266,18 @@ | |
"failed to send flush result: {:?}", | ||
result | ||
))); | ||
otel_error!( | ||
name: "batch_log_processor_flush_error", | ||
error = format!("{:?}", result), | ||
message = "Failed to send flush result" | ||
); | ||
} | ||
} else if let Err(err) = result { | ||
otel_error!( | ||
name: "batch_log_processor_flush_error", | ||
error = format!("{:?}", err), | ||
message = "Flush failed" | ||
); | ||
global::handle_error(err); | ||
} | ||
} | ||
|
@@ -279,6 +294,11 @@ | |
exporter.shutdown(); | ||
|
||
if let Err(result) = ch.send(result) { | ||
otel_error!( | ||
name: "batch_log_processor_shutdown_error", | ||
error = format!("{:?}", result), | ||
message = "Failed to send shutdown result" | ||
); | ||
global::handle_error(LogError::from(format!( | ||
"failed to send batch processor shutdown result: {:?}", | ||
result | ||
|
@@ -295,7 +315,6 @@ | |
} | ||
} | ||
})); | ||
|
||
// Return batch processor with link to worker | ||
BatchLogProcessor { message_sender } | ||
} | ||
|
@@ -338,7 +357,13 @@ | |
pin_mut!(timeout); | ||
match future::select(export, timeout).await { | ||
Either::Left((export_res, _)) => export_res, | ||
Either::Right((_, _)) => ExportResult::Err(LogError::ExportTimedOut(time_out)), | ||
Either::Right((_, _)) => { | ||
otel_error!( | ||
name: "export_with_timeout_timeout", | ||
timeout_duration = time_out.as_millis() | ||
); | ||
ExportResult::Err(LogError::ExportTimedOut(time_out)) | ||
} | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,7 @@ pub(crate) use aggregate::{AggregateBuilder, ComputeAggregation, Measure}; | |
pub(crate) use exponential_histogram::{EXPO_MAX_SCALE, EXPO_MIN_SCALE}; | ||
use once_cell::sync::Lazy; | ||
use opentelemetry::metrics::MetricsError; | ||
use opentelemetry::{global, KeyValue}; | ||
use opentelemetry::{global, otel_warn, KeyValue}; | ||
|
||
use crate::metrics::AttributeSet; | ||
|
||
|
@@ -147,6 +147,9 @@ impl<AU: AtomicallyUpdate<T>, T: Number, O: Operation> ValueMap<AU, T, O> { | |
O::update_tracker(&new_tracker, measurement, index); | ||
trackers.insert(STREAM_OVERFLOW_ATTRIBUTES.clone(), Arc::new(new_tracker)); | ||
global::handle_error(MetricsError::Other("Warning: Maximum data points for metric stream exceeded. Entry added to overflow. Subsequent overflows to same metric until next collect will not be logged.".into())); | ||
otel_warn!( name: "ValueMap.measure", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are user actionable logs, so it should use a end user friendly name like "MetricCarinalityLimitHit"/similar,. |
||
message = "Warning: Maximum data points for metric stream exceeded. Entry added to overflow. Subsequent overflows to same metric until next collect will not be logged." | ||
lalitb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
); | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is exporter required to be passed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Forgot to remove as part of updating the method. Will cleanup in subsequent PR.