Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve internal opentelemetry logging #2128

Merged
merged 38 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from 37 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
7da284e
initial commit
lalitb Sep 18, 2024
573994d
instrument internal code
lalitb Sep 19, 2024
1bfd464
instrument..
lalitb Sep 19, 2024
90470c3
instrument..
lalitb Sep 19, 2024
5f1b990
update example
lalitb Sep 19, 2024
47abb6d
Merge branch 'main' into log-handler-using-tracing
lalitb Sep 19, 2024
46ce8af
update example
lalitb Sep 19, 2024
bbfc03c
Merge branch 'log-handler-using-tracing' of github.com:lalitb/opentel…
lalitb Sep 19, 2024
e5260d9
changes for filtering
lalitb Sep 19, 2024
a79b636
Merge branch 'main' into log-handler-using-tracing
TommyCpp Sep 21, 2024
197b12a
use structured logging
lalitb Sep 23, 2024
91e56ea
review comments
lalitb Sep 27, 2024
fa7148e
update self diagnostic
lalitb Sep 27, 2024
a78357c
allow internal events in pipeline
lalitb Sep 27, 2024
a3fba36
Merge branch 'main' into log-handler-using-tracing
lalitb Sep 27, 2024
54e0755
fix macros
lalitb Oct 1, 2024
4c8cb42
revert appender-tracing example
lalitb Oct 1, 2024
5c4bab9
Merge branch 'main' into log-handler-using-tracing
lalitb Oct 1, 2024
5e95e00
update instrument
lalitb Oct 2, 2024
da3716d
cont..
lalitb Oct 2, 2024
62b9f83
lint fix
lalitb Oct 2, 2024
591c45a
fix macro to dump operation name as name keyt
lalitb Oct 2, 2024
b6e2327
Merge branch 'main' into log-handler-using-tracing
lalitb Oct 2, 2024
2714508
fix unit test for appender-tracing
lalitb Oct 3, 2024
b0c129c
Merge branch 'log-handler-using-tracing' of github.com:lalitb/opentel…
lalitb Oct 3, 2024
83763b1
fix unit tests, and doc test
lalitb Oct 3, 2024
e325316
add macros under feature flag
lalitb Oct 3, 2024
19822b1
make feature default
lalitb Oct 3, 2024
6b98c16
fix build
lalitb Oct 3, 2024
d248750
fix lint
lalitb Oct 3, 2024
f9e3d2d
lint
lalitb Oct 3, 2024
d4c1eb7
use eprintln for warn and error if internal-logs flag is disabled
lalitb Oct 3, 2024
d5120a2
review comments
lalitb Oct 3, 2024
c103467
remove tracing from opentelemetry crate, and use name as metadata
lalitb Oct 3, 2024
b7470b1
fix
lalitb Oct 3, 2024
30f7a4c
remove tracing from otel dev dep
lalitb Oct 3, 2024
adc3fd2
remove debug
lalitb Oct 3, 2024
3c4d20c
Update opentelemetry-sdk/src/metrics/internal/mod.rs
lalitb Oct 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/self-diagnostics/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ publish = false

[dependencies]
opentelemetry = { path = "../../opentelemetry" }
opentelemetry_sdk = { path = "../../opentelemetry-sdk", features = ["rt-tokio", "experimental-internal-logs"]}
opentelemetry_sdk = { path = "../../opentelemetry-sdk", features = ["rt-tokio"]}
opentelemetry-stdout = { path = "../../opentelemetry-stdout"}
opentelemetry-appender-tracing = { path = "../../opentelemetry-appender-tracing"}
tokio = { workspace = true, features = ["full"] }
tracing = { workspace = true, features = ["std"]}
tracing-core = { workspace = true }
tracing-subscriber = { version = "0.3.18", features = ["env-filter","registry", "std"]}
opentelemetry-otlp = { path = "../../opentelemetry-otlp", features = ["http-proto", "reqwest-client", "logs", "experimental-internal-logs"] }
opentelemetry-otlp = { path = "../../opentelemetry-otlp", features = ["http-proto", "reqwest-client", "logs"] }
once_cell ={ version = "1.19.0"}
ctrlc = "3.4"
1 change: 1 addition & 0 deletions opentelemetry-appender-tracing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ tracing-subscriber = { workspace = true, features = ["registry", "std"] }
log = { workspace = true }
opentelemetry-stdout = { path = "../opentelemetry-stdout", features = ["logs"] }
opentelemetry_sdk = { path = "../opentelemetry-sdk", features = ["logs", "testing"] }
tracing-subscriber = { workspace = true, features = ["registry", "std", "env-filter"] }
tracing-log = "0.2"
async-trait = { workspace = true }
criterion = { workspace = true }
Expand Down
30 changes: 19 additions & 11 deletions opentelemetry-appender-tracing/src/layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,26 @@ mod tests {
use opentelemetry_sdk::trace;
use opentelemetry_sdk::trace::{Sampler, TracerProvider};
use tracing::error;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt;
use tracing_subscriber::Layer;

pub fn attributes_contains(log_record: &LogRecord, key: &Key, value: &AnyValue) -> bool {
log_record
.attributes_iter()
.any(|(k, v)| k == key && v == value)
}

fn create_tracing_subscriber(
_exporter: InMemoryLogsExporter,
logger_provider: &LoggerProvider,
) -> impl tracing::Subscriber {
let level_filter = tracing_subscriber::filter::LevelFilter::WARN; // Capture WARN and ERROR levels
let layer =
layer::OpenTelemetryTracingBridge::new(logger_provider).with_filter(level_filter); // No filter based on target, only based on log level

tracing_subscriber::registry().with(layer)
}

// cargo test --features=testing
#[test]
fn tracing_appender_standalone() {
Expand All @@ -234,8 +246,7 @@ mod tests {
.with_simple_exporter(exporter.clone())
.build();

let layer = layer::OpenTelemetryTracingBridge::new(&logger_provider);
let subscriber = tracing_subscriber::registry().with(layer);
let subscriber = create_tracing_subscriber(exporter.clone(), &logger_provider);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is exporter required to be passed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forgot to remove as part of updating the method. Will cleanup in subsequent PR.


// avoiding setting tracing subscriber as global as that does not
// play well with unit tests.
Expand Down Expand Up @@ -315,8 +326,7 @@ mod tests {
.with_simple_exporter(exporter.clone())
.build();

let layer = layer::OpenTelemetryTracingBridge::new(&logger_provider);
let subscriber = tracing_subscriber::registry().with(layer);
let subscriber = create_tracing_subscriber(exporter.clone(), &logger_provider);

// avoiding setting tracing subscriber as global as that does not
// play well with unit tests.
Expand Down Expand Up @@ -427,16 +437,15 @@ mod tests {
.with_simple_exporter(exporter.clone())
.build();

let layer = layer::OpenTelemetryTracingBridge::new(&logger_provider);
let subscriber = tracing_subscriber::registry().with(layer);
let subscriber = create_tracing_subscriber(exporter.clone(), &logger_provider);

// avoiding setting tracing subscriber as global as that does not
// play well with unit tests.
let _guard = tracing::subscriber::set_default(subscriber);
drop(tracing_log::LogTracer::init());

// Act
log::error!("log from log crate");
log::error!(target: "my-system", "log from log crate");
logger_provider.force_flush();

// Assert TODO: move to helper methods
Expand Down Expand Up @@ -493,8 +502,7 @@ mod tests {
.with_simple_exporter(exporter.clone())
.build();

let layer = layer::OpenTelemetryTracingBridge::new(&logger_provider);
let subscriber = tracing_subscriber::registry().with(layer);
let subscriber = create_tracing_subscriber(exporter.clone(), &logger_provider);

// avoiding setting tracing subscriber as global as that does not
// play well with unit tests.
Expand All @@ -513,7 +521,7 @@ mod tests {
let span_id = cx.span().span_context().span_id();

// logging is done inside span context.
log::error!("log from log crate");
log::error!(target: "my-system", "log from log crate");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need these changes?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes we can remove the target, will follow up in subsequent cleanups.

(trace_id, span_id)
});

Expand Down
4 changes: 2 additions & 2 deletions opentelemetry-otlp/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ trace = ["opentelemetry/trace", "opentelemetry_sdk/trace", "opentelemetry-proto/
metrics = ["opentelemetry/metrics", "opentelemetry_sdk/metrics", "opentelemetry-proto/metrics"]
logs = ["opentelemetry/logs", "opentelemetry_sdk/logs", "opentelemetry-proto/logs"]
populate-logs-event-name = ["opentelemetry-proto/populate-logs-event-name"]
experimental-internal-logs = ["tracing"]
internal-logs = ["tracing"]

# add ons
serialize = ["serde", "serde_json"]

default = ["grpc-tonic", "trace", "metrics", "logs"]
default = ["grpc-tonic", "trace", "metrics", "logs", "internal-logs"]

# grpc using tonic
grpc-tonic = ["tonic", "prost", "http", "tokio", "opentelemetry-proto/gen-tonic"]
Expand Down
24 changes: 1 addition & 23 deletions opentelemetry-otlp/src/metric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,29 +294,7 @@
#[async_trait]
impl PushMetricsExporter for MetricsExporter {
async fn export(&self, metrics: &mut ResourceMetrics) -> Result<()> {
#[cfg(feature = "experimental-internal-logs")]
tracing::debug!(
name = "export_metrics",
target = "opentelemetry-otlp",
metrics_count = metrics
.scope_metrics
.iter()
.map(|scope| scope.metrics.len())
.sum::<usize>(),
status = "started"
);
let result = self.client.export(metrics).await;
#[cfg(feature = "experimental-internal-logs")]
tracing::debug!(
name = "export_metrics",
target = "opentelemetry-otlp",
status = if result.is_ok() {
"completed"
} else {
"failed"
}
);
result
self.client.export(metrics).await

Check warning on line 297 in opentelemetry-otlp/src/metric.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-otlp/src/metric.rs#L297

Added line #L297 was not covered by tests
}

async fn force_flush(&self) -> Result<()> {
Expand Down
4 changes: 2 additions & 2 deletions opentelemetry-sdk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ temp-env = { workspace = true }
pprof = { version = "0.13", features = ["flamegraph", "criterion"] }

[features]
default = ["trace", "metrics", "logs"]
default = ["trace", "metrics", "logs", "internal-logs"]
trace = ["opentelemetry/trace", "rand", "async-trait", "percent-encoding"]
jaeger_remote_sampler = ["trace", "opentelemetry-http", "http", "serde", "serde_json", "url"]
logs = ["opentelemetry/logs", "async-trait", "serde_json"]
Expand All @@ -52,7 +52,7 @@ testing = ["opentelemetry/testing", "trace", "metrics", "logs", "rt-async-std",
rt-tokio = ["tokio", "tokio-stream"]
rt-tokio-current-thread = ["tokio", "tokio-stream"]
rt-async-std = ["async-std"]
experimental-internal-logs = ["tracing"]
internal-logs = ["tracing"]

[[bench]]
name = "context"
Expand Down
9 changes: 8 additions & 1 deletion opentelemetry-sdk/src/logs/log_emitter.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use super::{BatchLogProcessor, LogProcessor, LogRecord, SimpleLogProcessor, TraceContext};
use crate::{export::logs::LogExporter, runtime::RuntimeChannel, Resource};
use opentelemetry::otel_warn;
use opentelemetry::{
global,
logs::{LogError, LogResult},
Expand Down Expand Up @@ -49,7 +50,6 @@
attributes: Option<Vec<opentelemetry::KeyValue>>,
) -> Logger {
let name = name.into();

let component_name = if name.is_empty() {
Cow::Borrowed(DEFAULT_COMPONENT_NAME)
} else {
Expand Down Expand Up @@ -114,6 +114,10 @@
let mut errs = vec![];
for processor in &self.inner.processors {
if let Err(err) = processor.shutdown() {
otel_warn!(

Check warning on line 117 in opentelemetry-sdk/src/logs/log_emitter.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_emitter.rs#L117

Added line #L117 was not covered by tests
name: "logger_provider_shutdown_error",
error = format!("{:?}", err)

Check warning on line 119 in opentelemetry-sdk/src/logs/log_emitter.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_emitter.rs#L119

Added line #L119 was not covered by tests
);
errs.push(err);
}
}
Expand All @@ -124,6 +128,9 @@
Err(LogError::Other(format!("{errs:?}").into()))
}
} else {
otel_warn!(
name: "logger_provider_already_shutdown"
);
Err(LogError::Other("logger provider already shut down".into()))
}
}
Expand Down
59 changes: 42 additions & 17 deletions opentelemetry-sdk/src/logs/log_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
use opentelemetry::{
global,
logs::{LogError, LogResult},
InstrumentationLibrary,
otel_error, otel_warn, InstrumentationLibrary,
};

use std::sync::atomic::AtomicBool;
use std::{cmp::min, env, sync::Mutex};
use std::{
Expand Down Expand Up @@ -98,16 +99,12 @@
fn emit(&self, record: &mut LogRecord, instrumentation: &InstrumentationLibrary) {
// noop after shutdown
if self.is_shutdown.load(std::sync::atomic::Ordering::Relaxed) {
otel_warn!(
Copy link
Contributor

@utpilla utpilla Oct 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could continue to be the hot path even after shutdown so it might not be a great idea to keep logging warnings for this. Let's a TODO to revisit this and decide if we should remove the logging here or make it less severe.

name: "simple_log_processor_emit_after_shutdown"
);
return;
}

#[cfg(feature = "experimental-internal-logs")]
tracing::debug!(
name: "simple_log_processor_emit",
target: "opentelemetry-sdk",
event_name = record.event_name
);

let result = self
.exporter
.lock()
Expand All @@ -117,6 +114,10 @@
futures_executor::block_on(exporter.export(LogBatch::new(log_tuple)))
});
if let Err(err) = result {
otel_error!(

Check warning on line 117 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L117

Added line #L117 was not covered by tests
name: "simple_log_processor_emit_error",
error = format!("{:?}", err)

Check warning on line 119 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L119

Added line #L119 was not covered by tests
);
global::handle_error(err);
}
}
Expand All @@ -132,6 +133,9 @@
exporter.shutdown();
Ok(())
} else {
otel_error!(
name: "simple_log_processor_shutdown_error"
);

Check warning on line 138 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L136-L138

Added lines #L136 - L138 were not covered by tests
Err(LogError::Other(
"simple logprocessor mutex poison during shutdown".into(),
))
Expand Down Expand Up @@ -167,6 +171,10 @@
)));

if let Err(err) = result {
otel_error!(
name: "batch_log_processor_emit_error",
error = format!("{:?}", err)

Check warning on line 176 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L176

Added line #L176 was not covered by tests
);
global::handle_error(LogError::Other(err.into()));
}
}
Expand Down Expand Up @@ -224,13 +232,6 @@
// Log has finished, add to buffer of pending logs.
BatchMessage::ExportLog(log) => {
logs.push(log);
#[cfg(feature = "experimental-internal-logs")]
tracing::debug!(
name: "batch_log_processor_record_count",
target: "opentelemetry-sdk",
current_batch_size = logs.len()
);

if logs.len() == config.max_export_batch_size {
let result = export_with_timeout(
config.max_export_timeout,
Expand All @@ -241,6 +242,10 @@
.await;

if let Err(err) = result {
otel_error!(

Check warning on line 245 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L245

Added line #L245 was not covered by tests
name: "batch_log_processor_export_error",
error = format!("{:?}", err)

Check warning on line 247 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L247

Added line #L247 was not covered by tests
);
global::handle_error(err);
}
}
Expand All @@ -261,8 +266,18 @@
"failed to send flush result: {:?}",
result
)));
otel_error!(

Check warning on line 269 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L269

Added line #L269 was not covered by tests
name: "batch_log_processor_flush_error",
error = format!("{:?}", result),

Check warning on line 271 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L271

Added line #L271 was not covered by tests
message = "Failed to send flush result"
);
}
} else if let Err(err) = result {
otel_error!(

Check warning on line 276 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L276

Added line #L276 was not covered by tests
name: "batch_log_processor_flush_error",
error = format!("{:?}", err),

Check warning on line 278 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L278

Added line #L278 was not covered by tests
message = "Flush failed"
);
global::handle_error(err);
}
}
Expand All @@ -279,6 +294,11 @@
exporter.shutdown();

if let Err(result) = ch.send(result) {
otel_error!(

Check warning on line 297 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L297

Added line #L297 was not covered by tests
name: "batch_log_processor_shutdown_error",
error = format!("{:?}", result),

Check warning on line 299 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L299

Added line #L299 was not covered by tests
message = "Failed to send shutdown result"
);
global::handle_error(LogError::from(format!(
"failed to send batch processor shutdown result: {:?}",
result
Expand All @@ -295,7 +315,6 @@
}
}
}));

// Return batch processor with link to worker
BatchLogProcessor { message_sender }
}
Expand Down Expand Up @@ -338,7 +357,13 @@
pin_mut!(timeout);
match future::select(export, timeout).await {
Either::Left((export_res, _)) => export_res,
Either::Right((_, _)) => ExportResult::Err(LogError::ExportTimedOut(time_out)),
Either::Right((_, _)) => {
otel_error!(

Check warning on line 361 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L361

Added line #L361 was not covered by tests
name: "export_with_timeout_timeout",
timeout_duration = time_out.as_millis()

Check warning on line 363 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L363

Added line #L363 was not covered by tests
);
ExportResult::Err(LogError::ExportTimedOut(time_out))

Check warning on line 365 in opentelemetry-sdk/src/logs/log_processor.rs

View check run for this annotation

Codecov / codecov/patch

opentelemetry-sdk/src/logs/log_processor.rs#L365

Added line #L365 was not covered by tests
}
}
}

Expand Down
5 changes: 4 additions & 1 deletion opentelemetry-sdk/src/metrics/internal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub(crate) use aggregate::{AggregateBuilder, ComputeAggregation, Measure};
pub(crate) use exponential_histogram::{EXPO_MAX_SCALE, EXPO_MIN_SCALE};
use once_cell::sync::Lazy;
use opentelemetry::metrics::MetricsError;
use opentelemetry::{global, KeyValue};
use opentelemetry::{global, otel_warn, KeyValue};

use crate::metrics::AttributeSet;

Expand Down Expand Up @@ -147,6 +147,9 @@ impl<AU: AtomicallyUpdate<T>, T: Number, O: Operation> ValueMap<AU, T, O> {
O::update_tracker(&new_tracker, measurement, index);
trackers.insert(STREAM_OVERFLOW_ATTRIBUTES.clone(), Arc::new(new_tracker));
global::handle_error(MetricsError::Other("Warning: Maximum data points for metric stream exceeded. Entry added to overflow. Subsequent overflows to same metric until next collect will not be logged.".into()));
otel_warn!( name: "ValueMap.measure",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are user actionable logs, so it should use a end user friendly name like "MetricCarinalityLimitHit"/similar,.
Good to follow up separately on all logs.

message = "Warning: Maximum data points for metric stream exceeded. Entry added to overflow. Subsequent overflows to same metric until next collect will not be logged."
lalitb marked this conversation as resolved.
Show resolved Hide resolved
);
}
}
}
Expand Down
Loading
Loading