From 30a317df22ad1987340eb2a962a98975075fc0ff Mon Sep 17 00:00:00 2001 From: Mike Goldsmth Date: Thu, 12 Sep 2024 13:05:30 +0100 Subject: [PATCH 1/3] feat: Improve log messages to be more informative --- cmd/refinery/main.go | 2 +- collect/collect.go | 11 ++++++----- collect/stressRelief.go | 2 +- metrics/otel_metrics.go | 10 +++++----- sharder/deterministic.go | 2 +- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/cmd/refinery/main.go b/cmd/refinery/main.go index 24b45b3da6..d708c7915a 100644 --- a/cmd/refinery/main.go +++ b/cmd/refinery/main.go @@ -334,5 +334,5 @@ func main() { // unregister ourselves before we go close(done) time.Sleep(100 * time.Millisecond) - a.Logger.Error().Logf("Caught signal \"%s\"", sig) + a.Logger.Error().WithField("signal", sig).Logf("Caught OS signal") } diff --git a/collect/collect.go b/collect/collect.go index fcd6430015..a1f001a79d 100644 --- a/collect/collect.go +++ b/collect/collect.go @@ -30,7 +30,7 @@ import ( "github.com/sirupsen/logrus" ) -var ErrWouldBlock = errors.New("not adding span, channel buffer is full") +var ErrWouldBlock = errors.New("not adding span, channel buffer is full. Span will not be processed and will be lost.") var CollectorHealthKey = "collector" type Collector interface { @@ -270,7 +270,7 @@ func (i *InMemCollector) checkAlloc() { WithField("num_traces_sent", len(tracesSent)). WithField("datasize_sent", totalDataSizeSent). WithField("new_trace_count", i.cache.GetCacheCapacity()). - Logf("evicting large traces early due to memory overage") + Logf("evicting large traces early due to memory overage. Evicted traces will be processed and if selected by sampling, sent to Honeycomb.") // Manually GC here - without this we can easily end up evicting more than we // need to, since total alloc won't be updated until after a GC pass. @@ -796,7 +796,7 @@ func (i *InMemCollector) send(trace *types.Trace, sendReason string) { // if we're supposed to drop this trace, and dry run mode is not enabled, then we're done. if !shouldSend && !i.Config.GetIsDryRun() { i.Metrics.Increment("trace_send_dropped") - i.Logger.Info().WithFields(logFields).Logf("Dropping trace because of sampling") + i.Logger.Info().WithFields(logFields).Logf("Dropping trace because of sampling decision") return } i.Metrics.Increment("trace_send_kept") @@ -805,9 +805,10 @@ func (i *InMemCollector) send(trace *types.Trace, sendReason string) { // ok, we're not dropping this trace; send all the spans if i.Config.GetIsDryRun() && !shouldSend { - i.Logger.Info().WithFields(logFields).Logf("Trace would have been dropped, but dry run mode is enabled") + i.Logger.Info().WithFields(logFields).Logf("Trace would have been dropped, but sending because dry run mode is enabled") + } else { + i.Logger.Info().WithFields(logFields).Logf("Sending trace") } - i.Logger.Info().WithFields(logFields).Logf("Sending trace") for _, sp := range trace.GetSpans() { if i.Config.GetAddRuleReasonToTrace() { sp.Data["meta.refinery.reason"] = reason diff --git a/collect/stressRelief.go b/collect/stressRelief.go index 4e77caa265..893d43b7ba 100644 --- a/collect/stressRelief.go +++ b/collect/stressRelief.go @@ -255,7 +255,7 @@ func (s *StressRelief) UpdateFromConfig(cfg config.StressReliefConfig) { s.mode = Always default: // validation shouldn't let this happen but we'll be safe... s.mode = Never - s.Logger.Error().Logf("StressRelief mode is '%s' which shouldn't happen", cfg.Mode) + s.Logger.Error().WithString("mode", cfg.Mode).Logf("Invalid StressRelief mode") } s.Logger.Debug().WithField("mode", s.mode).Logf("setting StressRelief mode") diff --git a/metrics/otel_metrics.go b/metrics/otel_metrics.go index 1fc12899ae..7688e1ecf0 100644 --- a/metrics/otel_metrics.go +++ b/metrics/otel_metrics.go @@ -189,7 +189,7 @@ func (o *OTelMetrics) Register(name string, metricType string) { case "counter": ctr, err := o.meter.Int64Counter(name) if err != nil { - o.Logger.Error().WithString("msg", "failed to create counter").WithString("name", name) + o.Logger.Error().WithString("name", name).Logf("failed to create counter") return } o.counters[name] = ctr @@ -210,26 +210,26 @@ func (o *OTelMetrics) Register(name string, metricType string) { metric.WithFloat64Callback(f), ) if err != nil { - o.Logger.Error().WithString("msg", "failed to create gauge").WithString("name", name) + o.Logger.Error().WithString("name", name).Logf("failed to create gauge") return } o.gauges[name] = g case "histogram": h, err := o.meter.Float64Histogram(name) if err != nil { - o.Logger.Error().WithString("msg", "failed to create histogram").WithString("name", name) + o.Logger.Error().WithString("name", name).Logf("failed to create histogram") return } o.histograms[name] = h case "updown": ud, err := o.meter.Int64UpDownCounter(name) if err != nil { - o.Logger.Error().WithString("msg", "failed to create updown").WithString("name", name) + o.Logger.Error().WithString("name", name).Logf("failed to create updown counter") return } o.updowns[name] = ud default: - o.Logger.Error().WithString("msg", "unknown metric type").WithString("type", metricType) + o.Logger.Error().WithString("type", metricType).Logf("unknown metric type") return } } diff --git a/sharder/deterministic.go b/sharder/deterministic.go index a1ae132748..48912d153f 100644 --- a/sharder/deterministic.go +++ b/sharder/deterministic.go @@ -133,7 +133,7 @@ func (d *DeterministicSharder) Start() error { time.Sleep(5 * time.Second) } - d.Logger.Error().WithFields(map[string]interface{}{"peers": d.peers, "self": self}).Logf("list of current peers") + d.Logger.Error().WithFields(map[string]interface{}{"peers": d.peers, "self": self}).Logf("failed to find self in the peer list") return errors.New("failed to find self in the peer list") } From 23f42b82419a12dadef58949a51c2a9137934a7a Mon Sep 17 00:00:00 2001 From: Mike Goldsmith Date: Fri, 13 Sep 2024 10:20:05 +0100 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Kent Quirk --- collect/collect.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/collect/collect.go b/collect/collect.go index a1f001a79d..0d14f044c8 100644 --- a/collect/collect.go +++ b/collect/collect.go @@ -30,7 +30,7 @@ import ( "github.com/sirupsen/logrus" ) -var ErrWouldBlock = errors.New("not adding span, channel buffer is full. Span will not be processed and will be lost.") +var ErrWouldBlock = errors.New("Dropping span as channel buffer is full. Span will not be processed and will be lost.") var CollectorHealthKey = "collector" type Collector interface { @@ -270,7 +270,7 @@ func (i *InMemCollector) checkAlloc() { WithField("num_traces_sent", len(tracesSent)). WithField("datasize_sent", totalDataSizeSent). WithField("new_trace_count", i.cache.GetCacheCapacity()). - Logf("evicting large traces early due to memory overage. Evicted traces will be processed and if selected by sampling, sent to Honeycomb.") + Logf("Making some trace decisions early due to memory overrun.") // Manually GC here - without this we can easily end up evicting more than we // need to, since total alloc won't be updated until after a GC pass. From c50a4ca093fa8937ec59ae810d34a29c151b1fc0 Mon Sep 17 00:00:00 2001 From: Mike Goldsmth Date: Mon, 16 Sep 2024 17:08:17 +0100 Subject: [PATCH 3/3] make memory overrun message warn --- collect/collect.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collect/collect.go b/collect/collect.go index 8aac5367bf..0b98ce3a0d 100644 --- a/collect/collect.go +++ b/collect/collect.go @@ -265,7 +265,7 @@ func (i *InMemCollector) checkAlloc() { i.cache.RemoveTraces(tracesSent) // Treat any MaxAlloc overage as an error so we know it's happening - i.Logger.Error(). + i.Logger.Warn(). WithField("cache_size", cap). WithField("alloc", mem.Alloc). WithField("num_traces_sent", len(tracesSent)).