Skip to content

Commit

Permalink
fix(promtail): prevent panic due to duplicate metric registration aft…
Browse files Browse the repository at this point in the history
…er reloaded (#10798)

**What this PR does / why we need it**:
Prevent Promtail panicking after getting reloaded.

**Which issue(s) this PR fixes**:
Fixes #10796 

**Special notes for your reviewer**:

**Checklist**
- [x] Reviewed the
[`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md)
guide (**required**)
- [ ] Documentation added
- [ ] Tests updated
- [x] `CHANGELOG.md` updated
- [ ] If the change is worth mentioning in the release notes, add
`add-to-release-notes` label
- [ ] Changes that require user attention or interaction to upgrade are
documented in `docs/sources/setup/upgrade/_index.md`
- [ ] For Helm chart changes bump the Helm chart version in
`production/helm/loki/Chart.yaml` and update
`production/helm/loki/CHANGELOG.md` and
`production/helm/loki/README.md`. [Example
PR](d10549e)

---------

Co-authored-by: Michel Hollands <42814411+MichelHollands@users.noreply.github.com>
  • Loading branch information
hainenber and MichelHollands authored Oct 12, 2023
1 parent 48087e0 commit 47e2c58
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
##### Fixes

* [10631](https://github.com/grafana/loki/pull/10631) **thampiotr**: Fix race condition in cleaning up metrics when stopping to tail files.
* [10798](https://github.com/grafana/loki/pull/10798) **hainenber**: Fix agent panicking after reloaded due to duplicate metric collector registration.

#### LogCLI

Expand Down
6 changes: 6 additions & 0 deletions clients/pkg/promtail/targets/lokipush/pushtarget.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ func (t *PushTarget) run() error {
serverCfg := &t.config.Server
serverCfg.Log = util_log.InitLogger(serverCfg, prometheus.NewRegistry(), true, false)

// Set new registry for upcoming metric server
// If not, it'll likely panic when the tool gets reloaded.
if t.config.Server.Registerer == nil {
t.config.Server.Registerer = prometheus.NewRegistry()
}

srv, err := server.New(t.config.Server)
if err != nil {
return err
Expand Down
50 changes: 43 additions & 7 deletions clients/pkg/promtail/wal/watcher_metrics.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package wal

import "github.com/prometheus/client_golang/prometheus"
import (
"errors"

"github.com/prometheus/client_golang/prometheus"
)

type WatcherMetrics struct {
recordsRead *prometheus.CounterVec
Expand Down Expand Up @@ -69,13 +73,45 @@ func NewWatcherMetrics(reg prometheus.Registerer) *WatcherMetrics {
),
}

// Collectors will be re-registered to registry if it's got reloaded
// Reuse the old collectors instead of panicking out.
if reg != nil {
reg.MustRegister(m.recordsRead)
reg.MustRegister(m.recordDecodeFails)
reg.MustRegister(m.droppedWriteNotifications)
reg.MustRegister(m.segmentRead)
reg.MustRegister(m.currentSegment)
reg.MustRegister(m.watchersRunning)
if err := reg.Register(m.recordsRead); err != nil {
are := &prometheus.AlreadyRegisteredError{}
if errors.As(err, are) {
m.recordsRead = are.ExistingCollector.(*prometheus.CounterVec)
}
}
if err := reg.Register(m.recordDecodeFails); err != nil {
are := &prometheus.AlreadyRegisteredError{}
if errors.As(err, are) {
m.recordDecodeFails = are.ExistingCollector.(*prometheus.CounterVec)
}
}
if err := reg.Register(m.droppedWriteNotifications); err != nil {
are := &prometheus.AlreadyRegisteredError{}
if errors.As(err, are) {
m.droppedWriteNotifications = are.ExistingCollector.(*prometheus.CounterVec)
}
}
if err := reg.Register(m.segmentRead); err != nil {
are := &prometheus.AlreadyRegisteredError{}
if errors.As(err, are) {
m.segmentRead = are.ExistingCollector.(*prometheus.CounterVec)
}
}
if err := reg.Register(m.currentSegment); err != nil {
are := &prometheus.AlreadyRegisteredError{}
if errors.As(err, are) {
m.currentSegment = are.ExistingCollector.(*prometheus.GaugeVec)
}
}
if err := reg.Register(m.watchersRunning); err != nil {
are := &prometheus.AlreadyRegisteredError{}
if errors.As(err, are) {
m.watchersRunning = are.ExistingCollector.(*prometheus.GaugeVec)
}
}
}

return m
Expand Down

0 comments on commit 47e2c58

Please sign in to comment.