diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d1c9ca6b64..5f6694f94ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,8 +3,12 @@ * [CHANGE] From path.Join to filepath.Join [#338](https://github.com/grafana/tempo/pull/338) * [CHANGE] Upgrade Cortex from v1.3.0 to v.1.4.0 [#341](https://github.com/grafana/tempo/pull/341) * [CHANGE] Compact more than 2 blocks at a time [#348](https://github.com/grafana/tempo/pull/348) +* [CHANGE] Remove tempodb_compaction_duration_seconds metric. [#360](https://github.com/grafana/tempo/pull/360) * [ENHANCEMENT] Add tempodb_compaction_objects_combined metric. [#339](https://github.com/grafana/tempo/pull/339) * [ENHANCEMENT] Added OpenMetrics exemplar support. [#359](https://github.com/grafana/tempo/pull/359) +* [ENHANCEMENT] Add tempodb_compaction_objects_written metric. [#360](https://github.com/grafana/tempo/pull/360) +* [ENHANCEMENT] Add tempodb_compaction_bytes_written metric. [#360](https://github.com/grafana/tempo/pull/360) +* [ENHANCEMENT] Add tempodb_compaction_blocks_total metric. [#360](https://github.com/grafana/tempo/pull/360) * [BUGFIX] Frequent errors logged by compactor regarding meta not found [#327](https://github.com/grafana/tempo/pull/327) * [BUGFIX] Fix distributors panicking on rollout [#343](https://github.com/grafana/tempo/pull/343) diff --git a/operations/tempo-mixin/tempo-operational.json b/operations/tempo-mixin/tempo-operational.json index 7378fc95611..64c771d1e31 100644 --- a/operations/tempo-mixin/tempo-operational.json +++ b/operations/tempo-mixin/tempo-operational.json @@ -1353,115 +1353,6 @@ "y": 7 }, "hiddenSeries": false, - "id": 52, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "nullPointMode": "connected", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 2, - "points": true, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(tempodb_compaction_duration_seconds_bucket{job=~\"$namespace/compactor\"}[$__rate_interval])) by (le,level))", - "interval": "", - "legendFormat": ".99-{{level}}", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(tempodb_compaction_duration_seconds_bucket{job=~\"$namespace/compactor\"}[$__rate_interval])) by (le,level))", - "hide": true, - "interval": "", - "legendFormat": ".9-{{level}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(tempodb_compaction_duration_seconds_bucket{job=~\"$namespace/compactor\"}[$__rate_interval])) by (le,level))", - "hide": true, - "interval": "", - "legendFormat": ".5-{{level}}", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "compaction duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 3, - "x": 18, - "y": 7 - }, - "hiddenSeries": false, "id": 53, "legend": { "avg": false, @@ -1559,7 +1450,7 @@ "gridPos": { "h": 5, "w": 3, - "x": 21, + "x": 18, "y": 7 }, "hiddenSeries": false, @@ -4522,8 +4413,8 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 8, - "w": 7, + "h": 6, + "w": 4, "x": 0, "y": 77 }, @@ -4555,7 +4446,7 @@ "steppedLine": false, "targets": [ { - "expr": "increase(tempodb_compaction_objects_combined_total{job=~\"$namespace/compactor\"}[$__rate_interval])", + "expr": "increase(tempodb_compaction_objects_combined_total{cluster=\"$cluster\",job=~\"$namespace/compactor\"}[$__rate_interval])", "interval": "", "legendFormat": "", "refId": "A" @@ -4603,6 +4494,292 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$ds", + "description": "", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 77 + }, + "hiddenSeries": false, + "id": 85, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.0-beta1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tempodb_compaction_objects_written{cluster=\"$cluster\",job=\"$namespace/compactor\"}[$__rate_interval])) by (level)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Objects Written / s", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 77 + }, + "hiddenSeries": false, + "id": 88, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.4.0-7095pre", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tempodb_compaction_bytes_written{cluster=\"$cluster\",job=\"$namespace/compactor\"}[$__rate_interval])) by (level)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Bytes Written / s", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$ds", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 77 + }, + "hiddenSeries": false, + "id": 86, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.0-beta1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(tempodb_compaction_blocks_total{cluster=\"$cluster\",job=\"$namespace/compactor\"}[5m])) by (level)", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Blocks Compacted", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "refresh": "30s", diff --git a/tempodb/compactor.go b/tempodb/compactor.go index 369fe20798f..814f4244257 100644 --- a/tempodb/compactor.go +++ b/tempodb/compactor.go @@ -21,11 +21,20 @@ import ( ) var ( - metricCompactionDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ + metricCompactionBlocks = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: "tempodb", - Name: "compaction_duration_seconds", - Help: "Records the amount of time to compact a set of blocks.", - Buckets: prometheus.ExponentialBuckets(30, 2, 10), + Name: "compaction_blocks_total", + Help: "Total number of blocks compacted.", + }, []string{"level"}) + metricCompactionObjectsWritten = promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: "tempodb", + Name: "compaction_objects_written", + Help: "Total number of objects written to backend during compaction.", + }, []string{"level"}) + metricCompactionBytesWritten = promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: "tempodb", + Name: "compaction_bytes_written", + Help: "Total number of bytes written to backend during compaction.", }, []string{"level"}) metricCompactionErrors = promauto.NewCounter(prometheus.CounterOpts{ Namespace: "tempodb", @@ -108,12 +117,11 @@ func (rw *readerWriter) compact(blockMetas []*encoding.BlockMeta, tenantID strin } compactionLevel := compactionLevelForBlocks(blockMetas) + compactionLevelLabel := strconv.Itoa(int(compactionLevel)) nextCompactionLevel := compactionLevel + 1 - start := time.Now() defer func() { level.Info(rw.logger).Log("msg", "compaction complete") - metricCompactionDuration.WithLabelValues(strconv.Itoa(int(compactionLevel))).Observe(time.Since(start).Seconds()) }() var err error @@ -219,6 +227,8 @@ func (rw *readerWriter) compact(blockMetas []*encoding.BlockMeta, tenantID strin // mark old blocks compacted so they don't show up in polling markCompacted(rw, tenantID, blockMetas, newCompactedBlocks) + metricCompactionBlocks.WithLabelValues(compactionLevelLabel).Add(float64(len(blockMetas))) + return nil } @@ -227,6 +237,11 @@ func appendBlock(rw *readerWriter, tracker backend.AppendTracker, block *wal.Com if err != nil { return nil, err } + + compactionLevelLabel := strconv.Itoa(int(block.BlockMeta().CompactionLevel - 1)) + metricCompactionObjectsWritten.WithLabelValues(compactionLevelLabel).Add(float64(block.CurrentBufferedObjects())) + metricCompactionBytesWritten.WithLabelValues(compactionLevelLabel).Add(float64(block.CurrentBufferLength())) + block.ResetBuffer() return tracker, nil diff --git a/tempodb/compactor_test.go b/tempodb/compactor_test.go index f0f0a4bc8b9..1c45265aabc 100644 --- a/tempodb/compactor_test.go +++ b/tempodb/compactor_test.go @@ -94,7 +94,7 @@ func TestCompaction(t *testing.T) { _, err = rand.Read(id) assert.NoError(t, err, "unexpected creating random id") - req := test.MakeRequest(i*10, id) + req := test.MakeRequest(10, id) reqs = append(reqs, req) ids = append(ids, id) @@ -260,6 +260,14 @@ func GetCounterValue(metric prometheus.Counter) (float64, error) { return m.Counter.GetValue(), nil } +func GetCounterVecValue(metric *prometheus.CounterVec, label string) (float64, error) { + var m = &dto.Metric{} + if err := metric.WithLabelValues(label).Write(m); err != nil { + return 0, err + } + return m.Counter.GetValue(), nil +} + func TestCompactionUpdatesBlocklist(t *testing.T) { tempDir, err := ioutil.TempDir("/tmp", "") defer os.RemoveAll(tempDir) @@ -322,6 +330,77 @@ func TestCompactionUpdatesBlocklist(t *testing.T) { } } +func TestCompactionMetrics(t *testing.T) { + tempDir, err := ioutil.TempDir("/tmp", "") + defer os.RemoveAll(tempDir) + assert.NoError(t, err, "unexpected error creating temp dir") + + r, w, c, err := New(&Config{ + Backend: "local", + Pool: &pool.Config{ + MaxWorkers: 10, + QueueDepth: 100, + }, + Local: &local.Config{ + Path: path.Join(tempDir, "traces"), + }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), + IndexDownsample: rand.Int()%20 + 1, + BloomFP: .01, + }, + BlocklistPoll: 0, + }, log.NewNopLogger()) + assert.NoError(t, err) + + c.EnableCompaction(&CompactorConfig{ + ChunkSizeBytes: 10, + MaxCompactionRange: 24 * time.Hour, + BlockRetention: 0, + CompactedBlockRetention: 0, + }, &mockSharder{}) + + // Cut x blocks with y records each + blockCount := 5 + recordCount := 1 + cutTestBlocks(t, w, blockCount, recordCount) + + rw := r.(*readerWriter) + rw.pollBlocklist() + + // Get starting metrics + processedStart, err := GetCounterVecValue(metricCompactionObjectsWritten, "0") + assert.NoError(t, err) + + blocksStart, err := GetCounterVecValue(metricCompactionBlocks, "0") + assert.NoError(t, err) + + bytesStart, err := GetCounterVecValue(metricCompactionBytesWritten, "0") + assert.NoError(t, err) + + // compact everything + err = rw.compact(rw.blocklist(testTenantID), testTenantID) + assert.NoError(t, err) + + // Check metric + processedEnd, err := GetCounterVecValue(metricCompactionObjectsWritten, "0") + assert.NoError(t, err) + assert.Equal(t, float64(blockCount*recordCount), processedEnd-processedStart) + + blocksEnd, err := GetCounterVecValue(metricCompactionBlocks, "0") + assert.NoError(t, err) + assert.Equal(t, float64(blockCount), blocksEnd-blocksStart) + + bytesEnd, err := GetCounterVecValue(metricCompactionBytesWritten, "0") + assert.NoError(t, err) + bytesPerRecord := + 4 /* total length */ + + 4 /* id length */ + + 16 /* id */ + + 3 /* test record length */ + assert.Equal(t, float64(blockCount*recordCount*bytesPerRecord), bytesEnd-bytesStart) +} + func cutTestBlocks(t *testing.T, w Writer, blockCount int, recordCount int) { wal := w.WAL() for i := 0; i < blockCount; i++ { diff --git a/tempodb/wal/compactor_block.go b/tempodb/wal/compactor_block.go index bf0d9a02963..ef8a211c713 100644 --- a/tempodb/wal/compactor_block.go +++ b/tempodb/wal/compactor_block.go @@ -17,8 +17,9 @@ type CompactorBlock struct { bloom *bloom.ShardedBloomFilter - appendBuffer *bytes.Buffer - appender encoding.Appender + bufferedObjects int + appendBuffer *bytes.Buffer + appender encoding.Appender } func newCompactorBlock(id uuid.UUID, tenantID string, bloomFP float64, indexDownsample int, metas []*encoding.BlockMeta, filepath string, estimatedObjects int) (*CompactorBlock, error) { @@ -56,6 +57,7 @@ func (c *CompactorBlock) Write(id encoding.ID, object []byte) error { if err != nil { return err } + c.bufferedObjects++ c.meta.ObjectAdded(id) c.bloom.Add(id) return nil @@ -69,8 +71,13 @@ func (c *CompactorBlock) CurrentBufferLength() int { return c.appendBuffer.Len() } +func (c *CompactorBlock) CurrentBufferedObjects() int { + return c.bufferedObjects +} + func (c *CompactorBlock) ResetBuffer() { c.appendBuffer.Reset() + c.bufferedObjects = 0 } func (c *CompactorBlock) Length() int { diff --git a/tempodb/wal/compactor_block_test.go b/tempodb/wal/compactor_block_test.go index c34aa4f1da0..b3b37cc35a0 100644 --- a/tempodb/wal/compactor_block_test.go +++ b/tempodb/wal/compactor_block_test.go @@ -95,5 +95,7 @@ func TestCompactorBlockWrite(t *testing.T) { } records := cb.Records() - assert.Equal(t, math.Ceil(float64(numObjects)/3), float64(len(records))) + assert.Equal(t, math.Ceil(float64(numObjects)/float64(walCfg.IndexDownsample)), float64(len(records))) + + assert.Equal(t, numObjects, cb.CurrentBufferedObjects()) }