-
Notifications
You must be signed in to change notification settings - Fork 490
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Global Log handler cleanup - Logs SDK #2184
Changes from 16 commits
d7fc754
f6e5cb9
76bafef
3006c16
d63957d
96e652a
a4a812f
886a583
0d5bf14
a8d772e
c25923d
b8819c4
0bd341b
4979249
f1182ff
40f7a5c
f695605
e99195b
a566aac
8d88392
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,8 @@ | ||
use super::{BatchLogProcessor, LogProcessor, LogRecord, SimpleLogProcessor, TraceContext}; | ||
use crate::{export::logs::LogExporter, runtime::RuntimeChannel, Resource}; | ||
use opentelemetry::otel_warn; | ||
use opentelemetry::{ | ||
global, | ||
logs::{LogError, LogResult}, | ||
otel_debug, | ||
trace::TraceContextExt, | ||
Context, InstrumentationLibrary, | ||
}; | ||
|
@@ -24,15 +23,14 @@ | |
inner: Arc::new(LoggerProviderInner { | ||
processors: Vec::new(), | ||
resource: Resource::empty(), | ||
is_shutdown: AtomicBool::new(true), | ||
}), | ||
is_shutdown: Arc::new(AtomicBool::new(true)), | ||
}); | ||
|
||
#[derive(Debug, Clone)] | ||
/// Creator for `Logger` instances. | ||
pub struct LoggerProvider { | ||
inner: Arc<LoggerProviderInner>, | ||
is_shutdown: Arc<AtomicBool>, | ||
} | ||
|
||
/// Default logger name if empty string is provided. | ||
|
@@ -73,7 +71,7 @@ | |
|
||
fn library_logger(&self, library: Arc<InstrumentationLibrary>) -> Self::Logger { | ||
// If the provider is shutdown, new logger will refer a no-op logger provider. | ||
if self.is_shutdown.load(Ordering::Relaxed) { | ||
if self.inner.is_shutdown.load(Ordering::Relaxed) { | ||
return Logger::new(library, NOOP_LOGGER_PROVIDER.clone()); | ||
} | ||
Logger::new(library, self.clone()) | ||
|
@@ -105,6 +103,7 @@ | |
/// Shuts down this `LoggerProvider` | ||
pub fn shutdown(&self) -> LogResult<()> { | ||
if self | ||
.inner | ||
.is_shutdown | ||
.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst) | ||
.is_ok() | ||
|
@@ -114,24 +113,36 @@ | |
let mut errs = vec![]; | ||
for processor in &self.inner.processors { | ||
if let Err(err) = processor.shutdown() { | ||
otel_warn!( | ||
name: "logger_provider_shutdown_error", | ||
error = format!("{:?}", err) | ||
); | ||
match err { | ||
// Specific handling for mutex poisoning | ||
LogError::MutexPoisoned(_) => { | ||
otel_debug!( | ||
name: "LoggerProvider.Shutdown.MutexPoisoned", | ||
); | ||
} | ||
_ => { | ||
otel_debug!( | ||
name: "LoggerProvider.Shutdown.Error", | ||
error = format!("{err}") | ||
); | ||
} | ||
} | ||
errs.push(err); | ||
} | ||
} | ||
|
||
if errs.is_empty() { | ||
Ok(()) | ||
} else { | ||
// consolidate errors from all the processors - not all may be user errors | ||
Err(LogError::Other(format!("{errs:?}").into())) | ||
} | ||
} else { | ||
otel_warn!( | ||
name: "logger_provider_already_shutdown" | ||
let error = LogError::AlreadyShutdown("LoggerProvider".to_string()); | ||
otel_debug!( | ||
name: "LoggerProvider.Shutdown.AlreadyShutdown", | ||
); | ||
Err(LogError::Other("logger provider already shut down".into())) | ||
Err(error) | ||
} | ||
} | ||
} | ||
|
@@ -140,14 +151,38 @@ | |
struct LoggerProviderInner { | ||
processors: Vec<Box<dyn LogProcessor>>, | ||
resource: Resource, | ||
is_shutdown: AtomicBool, | ||
} | ||
|
||
impl Drop for LoggerProviderInner { | ||
fn drop(&mut self) { | ||
for processor in &mut self.processors { | ||
if let Err(err) = processor.shutdown() { | ||
global::handle_error(err); | ||
if self | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with this change, but please make it own separate PR, so we can keep this PR strictly for replacing global error handler with internal logger. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was done in response to the comment - #2184 (comment). Should we keep it in this PR with separate changelog entry? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I always prefer short, very focused PRs! |
||
.is_shutdown | ||
.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst) | ||
.is_ok() | ||
{ | ||
for processor in &mut self.processors { | ||
if let Err(err) = processor.shutdown() { | ||
match err { | ||
// Specific handling for mutex poisoning | ||
LogError::MutexPoisoned(_) => { | ||
otel_debug!( | ||
name: "LoggerProvider.Drop.ShutdownMutexPoisoned", | ||
); | ||
} | ||
_ => { | ||
otel_debug!( | ||
name: "LoggerProvider.Drop.ShutdownError", | ||
error = format!("{err}") | ||
); | ||
} | ||
} | ||
} | ||
} | ||
} else { | ||
otel_debug!( | ||
name: "LoggerProvider.Drop.AlreadyShutdown", | ||
); | ||
} | ||
} | ||
} | ||
|
@@ -202,8 +237,8 @@ | |
inner: Arc::new(LoggerProviderInner { | ||
processors: self.processors, | ||
resource, | ||
is_shutdown: AtomicBool::new(false), | ||
}), | ||
is_shutdown: Arc::new(AtomicBool::new(false)), | ||
}; | ||
|
||
// invoke set_resource on all the processors | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,9 +12,8 @@ | |
#[cfg(feature = "logs_level_enabled")] | ||
use opentelemetry::logs::Severity; | ||
use opentelemetry::{ | ||
global, | ||
logs::{LogError, LogResult}, | ||
otel_error, otel_warn, InstrumentationLibrary, | ||
otel_debug, otel_error, otel_warn, InstrumentationLibrary, | ||
}; | ||
|
||
use std::sync::atomic::AtomicBool; | ||
|
@@ -99,26 +98,36 @@ | |
fn emit(&self, record: &mut LogRecord, instrumentation: &InstrumentationLibrary) { | ||
// noop after shutdown | ||
if self.is_shutdown.load(std::sync::atomic::Ordering::Relaxed) { | ||
// this is a warning, as the user is trying to log after the processor has been shutdown | ||
otel_warn!( | ||
name: "simple_log_processor_emit_after_shutdown" | ||
name: "SimpleLogProcessor.Emit.ProcessorShutdown", | ||
); | ||
return; | ||
} | ||
|
||
let result = self | ||
.exporter | ||
.lock() | ||
.map_err(|_| LogError::Other("simple logprocessor mutex poison".into())) | ||
.map_err(|_| LogError::MutexPoisoned("SimpleLogProcessor".into())) | ||
.and_then(|mut exporter| { | ||
let log_tuple = &[(record as &LogRecord, instrumentation)]; | ||
futures_executor::block_on(exporter.export(LogBatch::new(log_tuple))) | ||
}); | ||
if let Err(err) = result { | ||
otel_error!( | ||
name: "simple_log_processor_emit_error", | ||
error = format!("{:?}", err) | ||
); | ||
global::handle_error(err); | ||
// Handle errors with specific static names | ||
match result { | ||
Err(LogError::MutexPoisoned(_)) => { | ||
// logging as debug as this is not a user error | ||
otel_debug!( | ||
name: "SimpleLogProcessor.Emit.MutexPoisoning", | ||
); | ||
} | ||
Err(err) => { | ||
otel_error!( | ||
name: "SimpleLogProcessor.Emit.ExportError", | ||
error = format!("{}",err) | ||
); | ||
} | ||
_ => {} | ||
} | ||
} | ||
|
||
|
@@ -133,12 +142,7 @@ | |
exporter.shutdown(); | ||
Ok(()) | ||
} else { | ||
otel_error!( | ||
name: "simple_log_processor_shutdown_error" | ||
); | ||
Err(LogError::Other( | ||
"simple logprocessor mutex poison during shutdown".into(), | ||
)) | ||
Err(LogError::MutexPoisoned("SimpleLogProcessor".into())) | ||
} | ||
} | ||
|
||
|
@@ -170,12 +174,12 @@ | |
instrumentation.clone(), | ||
))); | ||
|
||
// TODO - Implement throttling to prevent error flooding when the queue is full or closed. | ||
if let Err(err) = result { | ||
otel_error!( | ||
name: "batch_log_processor_emit_error", | ||
error = format!("{:?}", err) | ||
name: "BatchLogProcessor.Export.Error", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will this be triggered when channel is full? If yes, we need to rethink this, as this can spam the log output. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this error only triggers when channel is full or closed. We need to add some throttling or logic to prevent flooding - have added the TODO for now, as we need common strategy for such flooding. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree we need a common strategy, but lets remove the error log from here. It'll flood as-is when buffer is full. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we remove altogether, or make it otel_debug for now - with comment to change it to otel_error once throttling is ready. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. either of them are fine with me, though I slightly prefer removing altogether, as I don't know if we can ship a throttling solution for next release. |
||
error = format!("{}", err) | ||
); | ||
global::handle_error(LogError::Other(err.into())); | ||
} | ||
} | ||
|
||
|
@@ -243,10 +247,9 @@ | |
|
||
if let Err(err) = result { | ||
otel_error!( | ||
name: "batch_log_processor_export_error", | ||
error = format!("{:?}", err) | ||
name: "BatchLogProcessor.Export.Error", | ||
error = format!("{}", err) | ||
); | ||
global::handle_error(err); | ||
} | ||
} | ||
} | ||
|
@@ -261,24 +264,12 @@ | |
.await; | ||
|
||
if let Some(channel) = res_channel { | ||
if let Err(result) = channel.send(result) { | ||
global::handle_error(LogError::from(format!( | ||
"failed to send flush result: {:?}", | ||
result | ||
))); | ||
otel_error!( | ||
name: "batch_log_processor_flush_error", | ||
error = format!("{:?}", result), | ||
message = "Failed to send flush result" | ||
if let Err(send_error) = channel.send(result) { | ||
otel_debug!( | ||
name: "BatchLogProcessor.Flush.SendResultError", | ||
cijothomas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
error = format!("{:?}", send_error), | ||
); | ||
} | ||
} else if let Err(err) = result { | ||
otel_error!( | ||
name: "batch_log_processor_flush_error", | ||
error = format!("{:?}", err), | ||
message = "Flush failed" | ||
); | ||
global::handle_error(err); | ||
} | ||
} | ||
// Stream has terminated or processor is shutdown, return to finish execution. | ||
|
@@ -293,21 +284,14 @@ | |
|
||
exporter.shutdown(); | ||
|
||
if let Err(result) = ch.send(result) { | ||
otel_error!( | ||
name: "batch_log_processor_shutdown_error", | ||
error = format!("{:?}", result), | ||
message = "Failed to send shutdown result" | ||
if let Err(send_error) = ch.send(result) { | ||
otel_debug!( | ||
name: "BatchLogProcessor.Shutdown.SendResultError", | ||
error = format!("{:?}", send_error), | ||
); | ||
global::handle_error(LogError::from(format!( | ||
"failed to send batch processor shutdown result: {:?}", | ||
result | ||
))); | ||
} | ||
|
||
break; | ||
} | ||
|
||
// propagate the resource | ||
BatchMessage::SetResource(resource) => { | ||
exporter.set_resource(&resource); | ||
|
@@ -357,13 +341,7 @@ | |
pin_mut!(timeout); | ||
match future::select(export, timeout).await { | ||
Either::Left((export_res, _)) => export_res, | ||
Either::Right((_, _)) => { | ||
otel_error!( | ||
name: "export_with_timeout_timeout", | ||
timeout_duration = time_out.as_millis() | ||
); | ||
ExportResult::Err(LogError::ExportTimedOut(time_out)) | ||
} | ||
Either::Right((_, _)) => ExportResult::Err(LogError::ExportTimedOut(time_out)), | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,11 +50,30 @@ macro_rules! otel_warn { | |
{ | ||
tracing::warn!(name: $name, target: env!("CARGO_PKG_NAME"), ""); | ||
} | ||
#[cfg(not(feature = "internal-logs"))] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree with this change, but for future - please make them into own separate PR to make it easier to review. |
||
{ | ||
#[allow(unused_variables)] | ||
{ | ||
|
||
} | ||
} | ||
}; | ||
(name: $name:expr, $($key:ident = $value:expr),+ $(,)?) => { | ||
#[cfg(feature = "internal-logs")] | ||
{ | ||
tracing::warn!(name: $name, target: env!("CARGO_PKG_NAME"), $($key = $value),+, ""); | ||
tracing::warn!(name: $name, | ||
target: env!("CARGO_PKG_NAME"), | ||
$($key = { | ||
$value | ||
}),+, | ||
"" | ||
) | ||
} | ||
#[cfg(not(feature = "internal-logs"))] | ||
{ | ||
{ | ||
let _ = ($name, $($value),+); | ||
} | ||
} | ||
}; | ||
} | ||
|
@@ -104,11 +123,31 @@ macro_rules! otel_error { | |
{ | ||
tracing::error!(name: $name, target: env!("CARGO_PKG_NAME"), ""); | ||
} | ||
#[cfg(not(feature = "internal-logs"))] | ||
{ | ||
#[allow(unused_variables)] | ||
{ | ||
|
||
} | ||
} | ||
}; | ||
(name: $name:expr, $($key:ident = $value:expr),+ $(,)?) => { | ||
#[cfg(feature = "internal-logs")] | ||
{ | ||
tracing::error!(name: $name, target: env!("CARGO_PKG_NAME"), $($key = $value),+, ""); | ||
tracing::error!(name: $name, | ||
target: env!("CARGO_PKG_NAME"), | ||
$($key = { | ||
$value | ||
}),+, | ||
"" | ||
) | ||
} | ||
#[cfg(not(feature = "internal-logs"))] | ||
{ | ||
{ | ||
let _ = ($name, $($value),+); | ||
cijothomas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
} | ||
} | ||
}; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this looks redundant, as returned Result already contains this information. If we need to keep this, then maybe keep it at debug/info level.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So if the error is returned as Result to the user, we decide not to log it even if this could be actionable ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not to log or log at a very low severity.
Do you see any reason why this should be logged, given Result already carries this information? "Internal logging" is more for things that are otherwise hard to figure out, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I prefer logging at a lower severity (debug?) for internal debugging. When we request logs from the customer at the debug level, they should include everything necessary for troubleshooting, even if the customer disregards the shutdown result.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is a good point! Agree with debug! here. It won't cause noise in normal operations, but when we ask users to report bug/issues, we can ask to collect all logs, including debug, so we'll have enough to investigate.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed this to debug now.