Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metric for bytes logged in WAL and Checkpoints #2497

Merged
merged 1 commit into from
Apr 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
* [ENHANCEMENT] Allow 1w (where w denotes week) and 1y (where y denotes year) when setting `-store.cache-lookups-older-than` and `-store.max-look-back-period`. #2454
* [ENHANCEMENT] Optimize index queries for matchers using "a|b|c"-type regex. #2446 #2475
* [ENHANCEMENT] Added per tenant metrics for queries and chunks and bytes read from chunk store: #2463
* [ENHANCEMENT] Experimental WAL: New metrics `cortex_ingester_wal_logged_bytes_total` and `cortex_ingester_checkpoint_logged_bytes_total` added to track total bytes logged to disk for WAL and checkpoints. #2497
* `cortex_chunk_store_fetched_chunks_total` and `cortex_chunk_store_fetched_chunk_bytes_total`
* `cortex_query_frontend_queries_total` (per tenant queries counted by the frontend)
* [ENHANCEMENT] query-frontend now also logs the POST data of long queries. #2481
Expand Down
27 changes: 21 additions & 6 deletions pkg/ingester/wal.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,13 @@ type walWrapper struct {
checkpointMtx sync.Mutex

// Checkpoint metrics.
checkpointDeleteFail prometheus.Counter
checkpointDeleteTotal prometheus.Counter
checkpointCreationFail prometheus.Counter
checkpointCreationTotal prometheus.Counter
checkpointDuration prometheus.Summary
checkpointDeleteFail prometheus.Counter
checkpointDeleteTotal prometheus.Counter
checkpointCreationFail prometheus.Counter
checkpointCreationTotal prometheus.Counter
checkpointDuration prometheus.Summary
checkpointLoggedBytesTotal prometheus.Counter
walLoggedBytesTotal prometheus.Counter
}

// newWAL creates a WAL object. If the WAL is disabled, then the returned WAL is a no-op WAL.
Expand Down Expand Up @@ -124,6 +126,14 @@ func newWAL(cfg WALConfig, userStatesFunc func() map[string]*userState, register
Help: "Time taken to create a checkpoint.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
})
w.checkpointLoggedBytesTotal = promauto.With(registerer).NewCounter(prometheus.CounterOpts{
Name: "cortex_ingester_checkpoint_logged_bytes_total",
Help: "Total number of bytes written to disk for checkpointing.",
})
w.walLoggedBytesTotal = promauto.With(registerer).NewCounter(prometheus.CounterOpts{
Name: "cortex_ingester_wal_logged_bytes_total",
Help: "Total number of bytes written to disk for WAL records.",
})

w.wait.Add(1)
go w.run()
Expand All @@ -148,6 +158,7 @@ func (w *walWrapper) Log(record *Record) error {
if err != nil {
return err
}
w.walLoggedBytesTotal.Add(float64(len(buf)))
return w.wal.Log(buf)
}
}
Expand Down Expand Up @@ -401,7 +412,11 @@ func (w *walWrapper) checkpointSeries(cp *wal.WAL, userID string, fp model.Finge
return wireChunks, err
}

return wireChunks, cp.Log(buf)
err = cp.Log(buf)
if err == nil {
w.checkpointLoggedBytesTotal.Add(float64(len(buf)))
}
return wireChunks, err
}

type walRecoveryParameters struct {
Expand Down