From d19cb01a6ce4ec375c5181e3f9bf32ffe4c36e76 Mon Sep 17 00:00:00 2001 From: Oliver Townsend <133903322+ogtownsend@users.noreply.github.com> Date: Thu, 22 Feb 2024 07:37:22 -0800 Subject: [PATCH] Add metric for num logs in buffer and missed logs (#11852) * Add metric for num logs in buffer * Set metrics in buffer, remove ticker * Add missed logs metric * Remove logsInBuffer var * Add more metrics * Change to inc and dec --- .../ocr2keeper/evmregistry/v21/active_list.go | 8 +++- .../evmregistry/v21/logprovider/buffer.go | 3 ++ .../evmregistry/v21/logprovider/provider.go | 2 + .../evmregistry/v21/logprovider/recoverer.go | 8 +++- .../evmregistry/v21/prommetrics/metrics.go | 38 +++++++++++++++++++ 5 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/prommetrics/metrics.go diff --git a/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/active_list.go b/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/active_list.go index 55c01939cb8..27c13f079b2 100644 --- a/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/active_list.go +++ b/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/active_list.go @@ -9,6 +9,7 @@ import ( ocr2keepers "github.com/smartcontractkit/chainlink-common/pkg/types/automation" "github.com/smartcontractkit/chainlink/v2/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/core" + "github.com/smartcontractkit/chainlink/v2/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/prommetrics" ) // ActiveUpkeepList is a list to manage active upkeep IDs @@ -49,9 +50,10 @@ func (al *activeList) Reset(ids ...*big.Int) { for _, id := range ids { al.items[id.String()] = true } + prommetrics.AutomationActiveUpkeeps.Set(float64(len(al.items))) } -// Add adds new entries to the list +// Add adds new entries to the list. Returns the number of items added func (al *activeList) Add(ids ...*big.Int) int { al.lock.Lock() defer al.lock.Unlock() @@ -63,10 +65,11 @@ func (al *activeList) Add(ids ...*big.Int) int { al.items[key] = true } } + prommetrics.AutomationActiveUpkeeps.Set(float64(len(al.items))) return count } -// Remove removes entries from the list +// Remove removes entries from the list. Returns the number of items removed func (al *activeList) Remove(ids ...*big.Int) int { al.lock.Lock() defer al.lock.Unlock() @@ -79,6 +82,7 @@ func (al *activeList) Remove(ids ...*big.Int) int { delete(al.items, key) } } + prommetrics.AutomationActiveUpkeeps.Set(float64(len(al.items))) return count } diff --git a/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/logprovider/buffer.go b/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/logprovider/buffer.go index 9f11a1fca01..6418d683869 100644 --- a/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/logprovider/buffer.go +++ b/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/logprovider/buffer.go @@ -12,6 +12,7 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/chains/evm/logpoller" "github.com/smartcontractkit/chainlink/v2/core/logger" + "github.com/smartcontractkit/chainlink/v2/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/prommetrics" ) var ( @@ -230,6 +231,7 @@ func (b *logEventBuffer) enqueue(id *big.Int, logs ...logpoller.Log) int { } if added > 0 { lggr.Debugw("Added logs to buffer", "addedLogs", added, "dropped", dropped, "latestBlock", latestBlock) + prommetrics.AutomationLogsInLogBuffer.Add(float64(added - dropped)) } return added - dropped @@ -331,6 +333,7 @@ func (b *logEventBuffer) dequeueRange(start, end int64, upkeepLimit, totalLimit if len(results) > 0 { b.lggr.Debugw("Dequeued logs", "results", len(results), "start", start, "end", end) + prommetrics.AutomationLogsInLogBuffer.Sub(float64(len(results))) } return results diff --git a/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/logprovider/provider.go b/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/logprovider/provider.go index d1360faaf6d..e06593a9109 100644 --- a/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/logprovider/provider.go +++ b/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/logprovider/provider.go @@ -24,6 +24,7 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/gethwrappers/generated/automation_utils_2_1" "github.com/smartcontractkit/chainlink/v2/core/logger" "github.com/smartcontractkit/chainlink/v2/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/core" + "github.com/smartcontractkit/chainlink/v2/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/prommetrics" "github.com/smartcontractkit/chainlink/v2/core/services/pg" "github.com/smartcontractkit/chainlink/v2/core/utils" ) @@ -162,6 +163,7 @@ func (p *logEventProvider) GetLatestPayloads(ctx context.Context) ([]ocr2keepers if err != nil { return nil, fmt.Errorf("%w: %s", ErrHeadNotAvailable, err) } + prommetrics.AutomationLogProviderLatestBlock.Set(float64(latest.BlockNumber)) start := latest.BlockNumber - p.opts.LookbackBlocks if start <= 0 { start = 1 diff --git a/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/logprovider/recoverer.go b/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/logprovider/recoverer.go index 13b8bb17245..2eef5db17d9 100644 --- a/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/logprovider/recoverer.go +++ b/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/logprovider/recoverer.go @@ -27,6 +27,7 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/chains/evm/logpoller" "github.com/smartcontractkit/chainlink/v2/core/logger" "github.com/smartcontractkit/chainlink/v2/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/core" + "github.com/smartcontractkit/chainlink/v2/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/prommetrics" "github.com/smartcontractkit/chainlink/v2/core/services/pg" "github.com/smartcontractkit/chainlink/v2/core/utils" ) @@ -305,7 +306,7 @@ func (r *logRecoverer) GetRecoveryProposals(ctx context.Context) ([]ocr2keepers. var results, pending []ocr2keepers.UpkeepPayload for _, payload := range r.pending { if allLogsCounter >= MaxProposals { - // we have enough proposals, pushed the rest are pushed back to pending + // we have enough proposals, the rest are pushed back to pending pending = append(pending, payload) continue } @@ -321,6 +322,7 @@ func (r *logRecoverer) GetRecoveryProposals(ctx context.Context) ([]ocr2keepers. } r.pending = pending + prommetrics.AutomationRecovererPendingPayloads.Set(float64(len(r.pending))) r.lggr.Debugf("found %d recoverable payloads", len(results)) @@ -417,6 +419,7 @@ func (r *logRecoverer) recoverFilter(ctx context.Context, f upkeepFilter, startB added, alreadyPending, ok := r.populatePending(f, filteredLogs) if added > 0 { r.lggr.Debugw("found missed logs", "added", added, "alreadyPending", alreadyPending, "upkeepID", f.upkeepID) + prommetrics.AutomationRecovererMissedLogs.Add(float64(added)) } if !ok { r.lggr.Debugw("failed to add all logs to pending", "upkeepID", f.upkeepID) @@ -673,6 +676,7 @@ func (r *logRecoverer) addPending(payload ocr2keepers.UpkeepPayload) error { } if !exist { r.pending = append(pending, payload) + prommetrics.AutomationRecovererPendingPayloads.Inc() } return nil } @@ -684,6 +688,8 @@ func (r *logRecoverer) removePending(workID string) { for _, p := range r.pending { if p.WorkID != workID { updated = append(updated, p) + } else { + prommetrics.AutomationRecovererPendingPayloads.Dec() } } r.pending = updated diff --git a/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/prommetrics/metrics.go b/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/prommetrics/metrics.go new file mode 100644 index 00000000000..cebbac59884 --- /dev/null +++ b/core/services/ocr2/plugins/ocr2keeper/evmregistry/v21/prommetrics/metrics.go @@ -0,0 +1,38 @@ +package prommetrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +// AutomationNamespace is the namespace for all Automation related metrics +const AutomationLogTriggerNamespace = "automation_log_trigger" + +// Automation metrics +var ( + AutomationLogsInLogBuffer = promauto.NewGauge(prometheus.GaugeOpts{ + Namespace: AutomationLogTriggerNamespace, + Name: "num_logs_in_log_buffer", + Help: "The total number of logs currently being stored in the log buffer", + }) + AutomationRecovererMissedLogs = promauto.NewCounter(prometheus.CounterOpts{ + Namespace: AutomationLogTriggerNamespace, + Name: "num_recoverer_missed_logs", + Help: "How many valid log triggers were identified as being missed by the recoverer", + }) + AutomationRecovererPendingPayloads = promauto.NewGauge(prometheus.GaugeOpts{ + Namespace: AutomationLogTriggerNamespace, + Name: "num_recoverer_pending_payloads", + Help: "How many log trigger payloads are currently pending in the recoverer", + }) + AutomationActiveUpkeeps = promauto.NewGauge(prometheus.GaugeOpts{ + Namespace: AutomationLogTriggerNamespace, + Name: "num_active_upkeeps", + Help: "How many log trigger upkeeps are currently active", + }) + AutomationLogProviderLatestBlock = promauto.NewGauge(prometheus.GaugeOpts{ + Namespace: AutomationLogTriggerNamespace, + Name: "log_provider_latest_block", + Help: "The latest block number the log provider has seen", + }) +)