diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cd998b51c2..80cd3cf26a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,11 @@ ## main / unreleased +* [BUGFIX] Update port spec for GCS docker-compose example [#869](https://github.com/grafana/tempo/pull/869) (@zalegrala) +* [BUGFIX] Cortex upgrade to fix an issue where unhealthy compactors can't be forgotten [#878](https://github.com/grafana/tempo/pull/878) (@joe-elliott) +* [ENHANCEMENT] Added "query blocks" cli option. [#876](https://github.com/grafana/tempo/pull/876) (@joe-elliott) * [ENHANCEMENT] Make s3 backend readError logic more robust [#905](https://github.com/grafana/tempo/pull/905) (@wei840222) * [ENHANCEMENT] Include additional detail when searching for traces [#916](https://github.com/grafana/tempo/pull/916) (@zalegrala) +* [ENHANCEMENT] Add `gen index` and `gen bloom` commands to tempo-cli. [#903](https://github.com/grafana/tempo/pull/903) (@annanay25) ## v1.1.0 / 2021-08-26 diff --git a/cmd/tempo-cli/cmd-gen-bloom.go b/cmd/tempo-cli/cmd-gen-bloom.go new file mode 100644 index 00000000000..6e40889be26 --- /dev/null +++ b/cmd/tempo-cli/cmd-gen-bloom.go @@ -0,0 +1,163 @@ +package main + +import ( + "bytes" + "context" + "fmt" + "io" + "os" + "strconv" + + "github.com/google/uuid" + willf_bloom "github.com/willf/bloom" + + "github.com/grafana/tempo/tempodb/backend" + "github.com/grafana/tempo/tempodb/encoding" + "github.com/grafana/tempo/tempodb/encoding/common" +) + +type bloomCmd struct { + TenantID string `arg:"" help:"tenant-id within the bucket"` + BlockID string `arg:"" help:"block ID to list"` + BloomFP float64 `arg:"" help:"bloom filter false positive rate (use prod settings!)"` + BloomShardSize int `arg:"" help:"bloom filter shard size (use prod settings!)"` + backendOptions +} + +type forEachRecord func(id common.ID) error + +func ReplayBlockAndDoForEachRecord(meta *backend.BlockMeta, filepath string, forEach forEachRecord) error { + v, err := encoding.FromVersion(meta.Version) + if err != nil { + return err + } + + // replay file to extract records + f, err := os.OpenFile(filepath, os.O_RDONLY, 0644) + if err != nil { + return err + } + + dataReader, err := v.NewDataReader(backend.NewContextReaderWithAllReader(f), meta.Encoding) + if err != nil { + return fmt.Errorf("error creating data reader: %w", err) + } + defer dataReader.Close() + + var buffer []byte + objectRW := v.NewObjectReaderWriter() + for { + buffer, _, err := dataReader.NextPage(buffer) + if err == io.EOF { + break + } + if err != nil { + return fmt.Errorf("error reading page from datareader: %w", err) + } + + iter := encoding.NewIterator(bytes.NewReader(buffer), objectRW) + var iterErr error + for { + var id common.ID + id, _, iterErr = iter.Next(context.TODO()) + if iterErr != nil { + break + } + err := forEach(id) + if err != nil { + return fmt.Errorf("error adding to bloom filter: %w", err) + } + } + + if iterErr != io.EOF { + return iterErr + } + } + + return nil +} + +func (cmd *bloomCmd) Run(ctx *globalOptions) error { + blockID, err := uuid.Parse(cmd.BlockID) + if err != nil { + return err + } + + r, w, _, err := loadBackend(&cmd.backendOptions, ctx) + if err != nil { + return err + } + + meta, err := r.BlockMeta(context.TODO(), blockID, cmd.TenantID) + if err != nil { + return err + } + + // replay file and add records to bloom filter + bloom := common.NewBloom(cmd.BloomFP, uint(cmd.BloomShardSize), uint(meta.TotalObjects)) + if bloom.GetShardCount() != int(meta.BloomShardCount) { + err := fmt.Errorf("shards in generated bloom filter do not match block meta, please use prod settings for bloom shard size and FP") + fmt.Println(err.Error()) + return err + } + + addToBloom := func(id common.ID) error { + bloom.Add(id) + return nil + } + + err = ReplayBlockAndDoForEachRecord(meta, cmd.backendOptions.Bucket+cmd.TenantID+"/"+cmd.BlockID+"/"+dataFilename, addToBloom) + if err != nil { + fmt.Println("error replaying block", err) + return err + } + + // write to the local backend + bloomBytes, err := bloom.Marshal() + if err != nil { + fmt.Println("error marshalling bloom filter") + return err + } + + for i := 0; i < len(bloomBytes); i++ { + err = w.Write(context.TODO(), bloomFilePrefix+strconv.Itoa(i), blockID, cmd.TenantID, bloomBytes[i], false) + if err != nil { + fmt.Println("error writing bloom filter to backend", err) + return err + } + } + + fmt.Println("bloom written to backend successfully") + + // verify generated bloom + shardedBloomFilter := make([]*willf_bloom.BloomFilter, meta.BloomShardCount) + for i := 0; i < int(meta.BloomShardCount); i++ { + bloomBytes, err := r.Read(context.TODO(), bloomFilePrefix+strconv.Itoa(i), blockID, cmd.TenantID, false) + if err != nil { + fmt.Println("error reading bloom from backend") + return nil + } + shardedBloomFilter[i] = &willf_bloom.BloomFilter{} + _, err = shardedBloomFilter[i].ReadFrom(bytes.NewReader(bloomBytes)) + if err != nil { + fmt.Println("error parsing bloom") + return nil + } + } + + testBloom := func(id common.ID) error { + key := common.ShardKeyForTraceID(id, int(meta.BloomShardCount)) + if !shardedBloomFilter[key].Test(id) { + return fmt.Errorf("id not added to bloom, filter is likely corrupt") + } + return nil + } + err = ReplayBlockAndDoForEachRecord(meta, cmd.backendOptions.Bucket+cmd.TenantID+"/"+cmd.BlockID+"/"+dataFilename, testBloom) + if err != nil { + fmt.Println("error replaying block", err) + return err + } + + fmt.Println("bloom filter verified") + return nil +} diff --git a/cmd/tempo-cli/cmd-gen-index.go b/cmd/tempo-cli/cmd-gen-index.go new file mode 100644 index 00000000000..f36c4ad5f41 --- /dev/null +++ b/cmd/tempo-cli/cmd-gen-index.go @@ -0,0 +1,195 @@ +package main + +import ( + "bytes" + "context" + "fmt" + "io" + "os" + + "github.com/google/uuid" + + "github.com/grafana/tempo/tempodb/backend" + "github.com/grafana/tempo/tempodb/encoding" + "github.com/grafana/tempo/tempodb/encoding/common" +) + +type indexCmd struct { + TenantID string `arg:"" help:"tenant-id within the bucket"` + BlockID string `arg:"" help:"block ID to list"` + backendOptions +} + +func ReplayBlockAndGetRecords(meta *backend.BlockMeta, filepath string) ([]common.Record, error, error) { + v, err := encoding.FromVersion(meta.Version) + if err != nil { + return nil, nil, err + } + + var replayError error + // replay file to extract records + f, err := os.OpenFile(filepath, os.O_RDONLY, 0644) + if err != nil { + return nil, nil, err + } + + dataReader, err := v.NewDataReader(backend.NewContextReaderWithAllReader(f), meta.Encoding) + if err != nil { + return nil, nil, err + } + defer dataReader.Close() + + var buffer []byte + var records []common.Record + objectRW := v.NewObjectReaderWriter() + currentOffset := uint64(0) + for { + buffer, pageLen, err := dataReader.NextPage(buffer) + if err == io.EOF { + break + } + if err != nil { + replayError = err + break + } + + iter := encoding.NewIterator(bytes.NewReader(buffer), objectRW) + var lastID common.ID + var iterErr error + for { + var id common.ID + id, _, iterErr = iter.Next(context.TODO()) + if iterErr != nil { + break + } + lastID = id + } + + if iterErr != io.EOF { + replayError = iterErr + break + } + + // make a copy so we don't hold onto the iterator buffer + recordID := append([]byte(nil), lastID...) + records = append(records, common.Record{ + ID: recordID, + Start: currentOffset, + Length: pageLen, + }) + currentOffset += uint64(pageLen) + } + + return records, replayError, nil +} + +func VerifyIndex(indexReader common.IndexReader, dataReader common.DataReader) error { + for i := 0; ; i++ { + record, err := indexReader.At(context.TODO(), i) + if err != nil { + return err + } + + if record == nil { + break + } + + // read data file at record position + _, _, err = dataReader.Read(context.TODO(), []common.Record{*record}, nil) + if err != nil { + fmt.Println("index/data is corrupt, record/data mismatch") + return err + } + } + return nil +} + +func (cmd *indexCmd) Run(ctx *globalOptions) error { + blockID, err := uuid.Parse(cmd.BlockID) + if err != nil { + return err + } + + r, w, _, err := loadBackend(&cmd.backendOptions, ctx) + if err != nil { + return err + } + + meta, err := r.BlockMeta(context.TODO(), blockID, cmd.TenantID) + if err != nil { + return err + } + + // replay file to extract records + records, replayError, err := ReplayBlockAndGetRecords(meta, cmd.backendOptions.Bucket+cmd.TenantID+"/"+cmd.BlockID+"/"+dataFilename) + if replayError != nil { + fmt.Println("error replaying block. data file likely corrupt", replayError) + return replayError + } + if err != nil { + fmt.Println("error accessing data/meta file") + return err + } + + // write using IndexWriter + v, err := encoding.FromVersion(meta.Version) + if err != nil { + fmt.Println("error creating versioned encoding", err) + return err + } + + indexWriter := v.NewIndexWriter(int(meta.IndexPageSize)) + indexBytes, err := indexWriter.Write(records) + if err != nil { + fmt.Println("error writing records to indexWriter", err) + return err + } + + // write to the local backend + err = w.Write(context.TODO(), "index", blockID, cmd.TenantID, indexBytes, false) + if err != nil { + fmt.Println("error writing index to backend", err) + return err + } + + fmt.Println("index written to backend successfully") + + // verify generated index + + // get index file with records + indexFilePath := cmd.backendOptions.Bucket + cmd.TenantID + "/" + cmd.BlockID + "/" + indexFilename + indexFile, err := os.OpenFile(indexFilePath, os.O_RDONLY, 0644) + if err != nil { + fmt.Println("error opening index file") + return err + } + + indexReader, err := v.NewIndexReader(backend.NewContextReaderWithAllReader(indexFile), int(meta.IndexPageSize), len(records)) + if err != nil { + fmt.Println("error reading index file") + return err + } + + // data reader + dataFilePath := cmd.backendOptions.Bucket + cmd.TenantID + "/" + cmd.BlockID + "/" + dataFilename + dataFile, err := os.OpenFile(dataFilePath, os.O_RDONLY, 0644) + if err != nil { + fmt.Println("error opening data file") + return err + } + + dataReader, err := v.NewDataReader(backend.NewContextReaderWithAllReader(dataFile), meta.Encoding) + if err != nil { + fmt.Println("error reading data file") + return err + } + defer dataReader.Close() + + err = VerifyIndex(indexReader, dataReader) + if err != nil { + return err + } + + fmt.Println("index verified!") + return nil +} diff --git a/cmd/tempo-cli/cmd-list-block.go b/cmd/tempo-cli/cmd-list-block.go index 7c0c676f2bf..36b3427fa09 100644 --- a/cmd/tempo-cli/cmd-list-block.go +++ b/cmd/tempo-cli/cmd-list-block.go @@ -25,7 +25,7 @@ type listBlockCmd struct { } func (cmd *listBlockCmd) Run(ctx *globalOptions) error { - r, c, err := loadBackend(&cmd.backendOptions, ctx) + r, _, c, err := loadBackend(&cmd.backendOptions, ctx) if err != nil { return err } diff --git a/cmd/tempo-cli/cmd-list-blocks.go b/cmd/tempo-cli/cmd-list-blocks.go index 5eb16fabe6b..b3a6d5a01b0 100644 --- a/cmd/tempo-cli/cmd-list-blocks.go +++ b/cmd/tempo-cli/cmd-list-blocks.go @@ -17,7 +17,7 @@ type listBlocksCmd struct { } func (l *listBlocksCmd) Run(ctx *globalOptions) error { - r, c, err := loadBackend(&l.backendOptions, ctx) + r, _, c, err := loadBackend(&l.backendOptions, ctx) if err != nil { return err } diff --git a/cmd/tempo-cli/cmd-list-cachesummary.go b/cmd/tempo-cli/cmd-list-cachesummary.go index c9495bc243a..1ddc5f98259 100644 --- a/cmd/tempo-cli/cmd-list-cachesummary.go +++ b/cmd/tempo-cli/cmd-list-cachesummary.go @@ -14,7 +14,7 @@ type listCacheSummaryCmd struct { } func (l *listCacheSummaryCmd) Run(ctx *globalOptions) error { - r, c, err := loadBackend(&l.backendOptions, ctx) + r, _, c, err := loadBackend(&l.backendOptions, ctx) if err != nil { return err } diff --git a/cmd/tempo-cli/cmd-list-compactionsummary.go b/cmd/tempo-cli/cmd-list-compactionsummary.go index 9733c96e847..34f459d0ddb 100644 --- a/cmd/tempo-cli/cmd-list-compactionsummary.go +++ b/cmd/tempo-cli/cmd-list-compactionsummary.go @@ -17,7 +17,7 @@ type listCompactionSummaryCmd struct { } func (l *listCompactionSummaryCmd) Run(ctx *globalOptions) error { - r, c, err := loadBackend(&l.backendOptions, ctx) + r, _, c, err := loadBackend(&l.backendOptions, ctx) if err != nil { return err } diff --git a/cmd/tempo-cli/cmd-list-index.go b/cmd/tempo-cli/cmd-list-index.go index faf4f2189bc..f607492a346 100644 --- a/cmd/tempo-cli/cmd-list-index.go +++ b/cmd/tempo-cli/cmd-list-index.go @@ -23,7 +23,7 @@ func (cmd *listIndexCmd) Run(ctx *globalOptions) error { return err } - r, _, err := loadBackend(&cmd.backendOptions, ctx) + r, _, _, err := loadBackend(&cmd.backendOptions, ctx) if err != nil { return err } diff --git a/cmd/tempo-cli/cmd-query-blocks.go b/cmd/tempo-cli/cmd-query-blocks.go index 75f9188a625..b14f5657372 100644 --- a/cmd/tempo-cli/cmd-query-blocks.go +++ b/cmd/tempo-cli/cmd-query-blocks.go @@ -30,7 +30,7 @@ type queryBlocksCmd struct { } func (cmd *queryBlocksCmd) Run(ctx *globalOptions) error { - r, c, err := loadBackend(&cmd.backendOptions, ctx) + r, _, c, err := loadBackend(&cmd.backendOptions, ctx) if err != nil { return err } diff --git a/cmd/tempo-cli/cmd-view-index.go b/cmd/tempo-cli/cmd-view-index.go index d05cf0688de..40c8fe3d07d 100644 --- a/cmd/tempo-cli/cmd-view-index.go +++ b/cmd/tempo-cli/cmd-view-index.go @@ -22,7 +22,7 @@ func (cmd *viewIndexCmd) Run(ctx *globalOptions) error { return err } - r, _, err := loadBackend(&cmd.backendOptions, ctx) + r, _, _, err := loadBackend(&cmd.backendOptions, ctx) if err != nil { return err } diff --git a/cmd/tempo-cli/main.go b/cmd/tempo-cli/main.go index f31c3c5c75f..cdd9db91a14 100644 --- a/cmd/tempo-cli/main.go +++ b/cmd/tempo-cli/main.go @@ -16,13 +16,19 @@ import ( "github.com/grafana/tempo/tempodb/backend/s3" ) +const ( + dataFilename = "data" + indexFilename = "index" + bloomFilePrefix = "bloom-" +) + type globalOptions struct { ConfigFile string `type:"path" short:"c" help:"Path to tempo config file"` } type backendOptions struct { Backend string `help:"backend to connect to (s3/gcs/local/azure), optional, overrides backend in config file" enum:",s3,gcs,local,azure"` - Bucket string `help:"bucket to scan, optional, overrides bucket in config file"` + Bucket string `help:"bucket (or path on local backend) to scan, optional, overrides bucket in config file"` S3Endpoint string `name:"s3-endpoint" help:"s3 endpoint (s3.dualstack.us-east-2.amazonaws.com), optional, overrides endpoint in config file"` S3User string `name:"s3-user" help:"s3 username, optional, overrides username in config file"` @@ -44,6 +50,11 @@ var cli struct { Index viewIndexCmd `cmd:"" help:"View contents of block index"` } `cmd:""` + Gen struct { + Index indexCmd `cmd:"" help:"Generate index for a block"` + Bloom bloomCmd `cmd:"" help:"Generate bloom for a block"` + } `cmd:""` + Query struct { API queryCmd `cmd:"" help:"query tempo http api"` Blocks queryBlocksCmd `cmd:"" help:"query for a traceid directly from backend blocks"` @@ -61,7 +72,7 @@ func main() { ctx.FatalIfErrorf(err) } -func loadBackend(b *backendOptions, g *globalOptions) (backend.Reader, backend.Compactor, error) { +func loadBackend(b *backendOptions, g *globalOptions) (backend.Reader, backend.Writer, backend.Compactor, error) { // Defaults cfg := app.Config{} cfg.RegisterFlagsAndApplyDefaults("", &flag.FlagSet{}) @@ -70,12 +81,12 @@ func loadBackend(b *backendOptions, g *globalOptions) (backend.Reader, backend.C if g.ConfigFile != "" { buff, err := ioutil.ReadFile(g.ConfigFile) if err != nil { - return nil, nil, fmt.Errorf("failed to read configFile %s: %w", g.ConfigFile, err) + return nil, nil, nil, fmt.Errorf("failed to read configFile %s: %w", g.ConfigFile, err) } err = yaml.UnmarshalStrict(buff, &cfg) if err != nil { - return nil, nil, fmt.Errorf("failed to parse configFile %s: %w", g.ConfigFile, err) + return nil, nil, nil, fmt.Errorf("failed to parse configFile %s: %w", g.ConfigFile, err) } } @@ -97,24 +108,25 @@ func loadBackend(b *backendOptions, g *globalOptions) (backend.Reader, backend.C var err error var r backend.RawReader + var w backend.RawWriter var c backend.Compactor switch cfg.StorageConfig.Trace.Backend { case "local": - r, _, c, err = local.New(cfg.StorageConfig.Trace.Local) + r, w, c, err = local.New(cfg.StorageConfig.Trace.Local) case "gcs": - r, _, c, err = gcs.New(cfg.StorageConfig.Trace.GCS) + r, w, c, err = gcs.New(cfg.StorageConfig.Trace.GCS) case "s3": - r, _, c, err = s3.New(cfg.StorageConfig.Trace.S3) + r, w, c, err = s3.New(cfg.StorageConfig.Trace.S3) case "azure": - r, _, c, err = azure.New(cfg.StorageConfig.Trace.Azure) + r, w, c, err = azure.New(cfg.StorageConfig.Trace.Azure) default: err = fmt.Errorf("unknown backend %s", cfg.StorageConfig.Trace.Backend) } if err != nil { - return nil, nil, err + return nil, nil, nil, err } - return backend.NewReader(r), c, nil + return backend.NewReader(r), backend.NewWriter(w), c, nil } diff --git a/cmd/tempo-cli/test-data/single-tenant/b18beca6-4d7f-4464-9f72-f343e688a4a0/bloom-0-copy b/cmd/tempo-cli/test-data/single-tenant/b18beca6-4d7f-4464-9f72-f343e688a4a0/bloom-0-copy new file mode 100644 index 00000000000..b3e41b466a5 Binary files /dev/null and b/cmd/tempo-cli/test-data/single-tenant/b18beca6-4d7f-4464-9f72-f343e688a4a0/bloom-0-copy differ diff --git a/cmd/tempo-cli/test-data/single-tenant/b18beca6-4d7f-4464-9f72-f343e688a4a0/data b/cmd/tempo-cli/test-data/single-tenant/b18beca6-4d7f-4464-9f72-f343e688a4a0/data new file mode 100644 index 00000000000..a56fd498e1e Binary files /dev/null and b/cmd/tempo-cli/test-data/single-tenant/b18beca6-4d7f-4464-9f72-f343e688a4a0/data differ diff --git a/cmd/tempo-cli/test-data/single-tenant/b18beca6-4d7f-4464-9f72-f343e688a4a0/index-copy b/cmd/tempo-cli/test-data/single-tenant/b18beca6-4d7f-4464-9f72-f343e688a4a0/index-copy new file mode 100644 index 00000000000..406614748e0 Binary files /dev/null and b/cmd/tempo-cli/test-data/single-tenant/b18beca6-4d7f-4464-9f72-f343e688a4a0/index-copy differ diff --git a/cmd/tempo-cli/test-data/single-tenant/b18beca6-4d7f-4464-9f72-f343e688a4a0/meta.json b/cmd/tempo-cli/test-data/single-tenant/b18beca6-4d7f-4464-9f72-f343e688a4a0/meta.json new file mode 100644 index 00000000000..08209831522 --- /dev/null +++ b/cmd/tempo-cli/test-data/single-tenant/b18beca6-4d7f-4464-9f72-f343e688a4a0/meta.json @@ -0,0 +1 @@ +{"format":"v2","blockID":"b18beca6-4d7f-4464-9f72-f343e688a4a0","minID":"AAAAAAAAAAAAsSEqOi2DZA==","maxID":"AAAAAAAAAAD/7CsmwNUYYA==","tenantID":"single-tenant","startTime":"2021-08-18T11:42:35.8352083Z","endTime":"2021-08-18T11:44:35.7427324Z","totalObjects":621,"size":462536,"compactionLevel":0,"encoding":"zstd","indexPageSize":256000,"totalRecords":611,"dataEncoding":"v1","bloomShards":1} \ No newline at end of file diff --git a/docs/tempo/dev-internals/Tempo Encoding.drawio b/docs/tempo/dev-internals/Tempo Encoding.drawio new file mode 100644 index 00000000000..380f55c1086 --- /dev/null +++ b/docs/tempo/dev-internals/Tempo Encoding.drawio @@ -0,0 +1 @@ +7V1bd9u4Ef4tfdBp9iE6AHh/tJ249Z6kya7bpts3SoQkJhSpUpRt7a8vQIA3gKRoSyRIneTBoUACojAz31wwA8y0u+3L32J3t/kceTiYIeC9zLQPM4R0y9DIf7TlyFqQnrWsY99jbbBoePT/xLwR8NaD7+F95cEkioLE31Ubl1EY4mVSaXPjOHquPraKguq37tw1lhoel24gt37zvWTDWjUNgOLG37G/3vCvJrccdmfrZk/zR/cb14ueS03ax5l2F0dRwq62L3c4oNOXTQzrd99wN3+zGIdJlw7Rrw/W4r8vPz79ahy/W/f3O/0fy/eGxV8uOWY/GXtkBvjHKE420ToK3eBj0XobR4fQw3RYQD4Vz3yKoh1phKTxO06SIyene0gi0rRJtgG/i1/85D+0+9zgn/4o3fnwwkdOPxyzD2ESH0ud6Mc/yveKbumnrB/7ffRHNc4bb9pHh3iJWyYrY0A3XuOk5Tkrpy4RDBxtMXkf0i/GgZv4T9X3cDmDrvPnChKSC07F11DUngpFFVDGVkkZ22ADP7nBgX+Vv90FeEt+Lvn6KNzXUu6TuyCoWpltN/DXIblekp44Jg1POE58glo3/MbW9zxGWLz3/3QX6Xh0zneRHybpDzNuZ8aHnAp0APxSB6m8c4FjZfq0cKA8y3z492Bu6YDjDtcL73X2sTMh+Ohf6c8pPRKtVnvCACKl8pd4O/GgDWqpM0K5mhBSQss5UyIFRshYOKMW5y9bA9Uh2JvxXj1wiyTpN7sdJqwQz5AZkBm5XdCrNb1690CFeOUu8S8Sh1X553njJ/hx56ZT/0wsriqvNEqyRMFG2dQdozJvps7n7bkwfmAGBJuS3WOCZhKdBZoQOErlriR1hQyekrtmqTNksUs/fcWxTyaMgvmlZRF1lUWg1G5Bksg8JjF2t364vg2i5Q9ZcOSWf258okPBgj0P0g+HPeED6jWkPERHJBfR4jtRbfT2Ko5ogxvSP6mM8m9jPVw6nLv8Qdqzgefk4tEPyZzX3qaC4e6zIX3Chi8zdEdFCLO3oUNv6N/bw2qFY+wV0KBY+i1B+mEu6s8V30cWfwj6kn/rFUiatXj+UzOPZC3kbWqeKzVxZnLzrwN7aq4d8zZ6N8TPZXYy3S0lS7jY70qD7neUF9ioKVcx+u/i6Mn3Uu703MT9FhP6xvPS+5X7Nb22YpaBsMoyuaItcQwCdRyD+uIY3ZygoYZUWWp2R+2AVOoGR/acOKwvjrXEnpzPxJi21WeyEafVeJ0kWyKTrONOa/EaLF5IwwDWtK8oc/I7oxkLyjENnLLIwQ9SrKbK2ouew71LvW6vC1pz9T0lRDac8SEylGZl/IiszHU2utrr57rO59EU/aRpDzRVGqDUtZ807YGmSt1q25R08lUGnRnvthlQtmNWg85w7OYUlM3e5LijruO/ieVDaIe9j+Ey8vxwzYwfFkZUboI4glNo1YQR8ojhMFFER60J8pYoIqwAW4FzA0CbNQl1BeXY0JO8SDNADAQZgsltGhK/Dxw1d9SaZxPj965BELUmN5Td6ycZ1gbgd2CNjd+Nnx7mK9jd6cjuSmN+xhTDuKMnKdNUl19v10xn7pT+CQtJGuy2/n4Tx+6x9Bj3HBq/1zGr6/wQGEISnPSiRnsPcsFe4rKWvDmZrKwxLEp0ZmfnXAP0QlwHTnCd6bT36InrrOnlLAlWIBrSDOwMohdJWjqf65wTXIfO72Fy16ZfPrWnmIVsjR4dS2nmSvnUauc65FitHXpiugySS+5UdQmwxI7m/w40Tf92FYXJ+33KUDfkAYh2L+S/lLxpHtA6Zcr3yyiIYvZIvF68Q7pNn0qzgJDuFNcG+KUYu8g7wbNyVtEqircuna5yOhMmHtzxeYNj/JfG5clrCOtyaGiL6zpQE5KJeexjxIFdR2K9JzlgM0CgFlRFVb0jD4FSQxmWFMFU0z1BR+2gON8TQhl/ryyphzNzo/iBuWaiqqecZY6PF7uQTDaWlfPaLN30D8uE3GA3T7tNldqMp8uyREv6cLiMtmydK4kJIu5r8uh9OgFehPfhX+nVxn3KUnQrWT9ZTi4gKprmCB2TTTrwXfoiRV5vmu1ZvASO1afni7GLEeTnIrULDaMK03U0yDuA+LnI3GCZ28K6rFgVw95LispJAxmWaOKLBkHP9TWOnNVwZboDNQTfC7vX0JFTpcLoVYds9hb5C6f1xn3mCEWrGUuZn/EqjUWqMGI/STAruig7UMNb1YJON+oyMPUhrWpNrkeTpST0bmjBPJWIwN3v/eUUIPe0Odw1WAIbVkZKRDNqaJa1nbuAYmh6ywKKKervrkgNkVaFaksfFqk1JLHe42G3C/wcrbkNSPA7zmSXoObKX18HkGvtYEGAXEf2mZn92ViwSmttrqEyT9ladcQeAxxAaSrpBdz4IfNPjK6ppOZFlrtqlqf01tVbR1y9bUCfy3GPLoHG9hAkfl7YSeTcTaj/JtoIBEWoW5k+zbw3WgziyQ9mQxQ1oOITt2Jd5zWgERfMlpCEbkG9iiJjNyuhodYHnX7MsDMAQaNBmw2UEyQnRHMpva0V0SFyP8WVfduWrf1hY+im2jKON4lD1T+As+F072CLqedFy7OVmUnF2ZQlEHWm6tn5cGdSVY5j0fdF4Cu1Wu79+pLZZz8dGj+ReTm4QbolwYL7ULlX9duB6Iu0t1DlehVmjNlAt8KrghBV7ZjxL60YcnyswgeDazdLKGyA6vcDMieT6X1BOMtms//MtDMLcuQKhWqx4XWgj3kypAMcOLFaw4zF5FrDDH3AQ2OB4QBIpFlzAYsMUINFGWANgkXTSa+9JBbpHbHIbNjSZBgssiajJ0awSNKdpkpThqzJ+EAqaGPpKmnjqN2m8Url7ez6ljOTMSej4KZE1Mus4rw2LZ+WvgkGlH4iL1+H7T36Scw35cWfh8b1nnd+8+a5/VulCIgOsubUGKWDbnVhTbGWRlmgMAvAjdzygVCpOTux9fTORFVrMtUEfwugGxzLoC1kj0DDVB3sg4qjfW9iewHMBlzLMrvu26N6LUuTGD9N41xGNFiY5BtrszVdYe/sNJ8fe4cd3/Wq0TRIIjIJmzRy9ewT/mVdyXcs/LAmmj7JSCQ8EYokXGsL2ahodlYksvrix9ruPaZ6WGg8gIDetrGRNaQm7IoIahcOTHknrw91Ij68gS8eJAKtuqizM6RStCe4S2ezdW+AOSj/g5I49J38ZHbe/Qs25M6diaWSn03cyGGTtG3Z5b6ychr7VNqjJu6bPfq0R1NebP1M015npQNQFCKnhgTktA3VyGn93ITpFbhoTWRRSE6a6eQeUK5nhwRE3EtolJZJIp518hQ+4uCb54FcngQ7t8xqp/5gz5J11e94GcVUW6kMn0jb3qgPn1jXt/f3KXNdM6tE0E1r7mgQaLpuI8vICq16iGnoE9Qtyrawcrra3Irzc7MskpIE/auUY3sXbQmmXI3K4EzcpjNsYE5tsyVHtpO/spowpfrCHJ++gECOwzyEa7xPauKl+427o5fLw4Ly64kZWzCk+7TIG/Ld0r4cksAPMW/33PjHFzKMn6SwMwdGtRGlrZfaT0X0UbSu+c12bzSAcnC8GWaugQhIM5vCH+qogGoikm7i/o5dNZn4UHSnDeVlZlCfnsmj8IwB1DnMqNbkyaI0DSZPZa8vkFcXsb+yl52f4UserhysS3drwfLzvFqb7+5ylaVL+mkjS7er62vjL13K2VsATHaorQrAtLTxAeb0ctJVAua5O/cM5SPKBlsZMDNfo6GIb6IYdrICyjINwVHUx49hcij5ge4Vqc7q04X80jGA2PTOZVEIYpkSGjuI6TLrl0GsxoyT060Kqy7bdIdttYqnF2ZuY/1Wy80WAtCjX0XOOVQEPXWWm22Nz3JDP0HvFaAHpwF6NTXP1xzdZ0zcCl/IGb+nWbOZ6ZfFdzIlzExTh1uGMzrcsqZnrCk8RC/bq/Q0biHF+2xMscZrAmTV1NZF1OyVy1JbFCMbAkJpkN51gaZHi2x6pckqY2mdT/Tu6XxQXa/meUsHxFfuour4nTe7NlDbPrY2MNq+pecca2jJm1eWrE3RnW7dAK4m4a22/6wlxWGiNmwDQpcCj3o20W/l2SwZQMCuPjewlU3afPs+gWzXsOJvC2vZCNScYzPwgr89xdxwZSZVlnQ99kpTWw5r8sKW9JyBbzef0nfZBe6V1LlwPm4FSEuvxijReXA5RNWnTEamAlk2ghKjWBfPTlSftOTIK5EyV5845qUrSsjzMsxBKcIRaCjPV3m9sXhqpL4NQkeuZPjgx0Ts040y2SlHbkorWvG+IsqaHQBLLDuJrISPk/5wiioudurt5dIzxNmvy7ysqw5D/YmPbKCPz/zKyAAuQwdTJEPN/kV2izT3QAW5aIWeo7wh351WaQX4CdNRtpF3CKYvCEhQI4XfURaEujMdexQEOflVgYIVs4KRJU+MVTMv4gFXF5wXOceNsuZjEh+WREXclzbIBWUzcxl502fUPH6R06NueaMOK97AqORjHNFT0QuFS1hs85lMJH3i/w== \ No newline at end of file diff --git a/docs/tempo/dev-internals/Tempo Encoding.png b/docs/tempo/dev-internals/Tempo Encoding.png new file mode 100644 index 00000000000..d5c2ad37b33 Binary files /dev/null and b/docs/tempo/dev-internals/Tempo Encoding.png differ diff --git a/docs/tempo/website/operations/tempo_cli.md b/docs/tempo/website/operations/tempo_cli.md index a9be44aa1f9..2a1d2871fab 100644 --- a/docs/tempo/website/operations/tempo_cli.md +++ b/docs/tempo/website/operations/tempo_cli.md @@ -199,3 +199,39 @@ Arguments: ```bash tempo-cli view index -c ./tempo.yaml single-tenant ca314fba-efec-4852-ba3f-8d2b0bbf69f1 ``` + +## Generate Bloom Filter + +To generate the bloom filter for a block if the files were deleted/corrupted. + +**Note:** ensure that the block is in a local backend in the expected directory hierarchy, i.e. `path / tenant / blocks`. + +Arguments: +- `tenant-id` The tenant ID. Use `single-tenant` for single tenant setups. +- `block-id` The block ID as UUID string. +- `bloom-fp` The false positive to be used for the bloom filter. +- `bloom-shard-size` The shard size to be used for the bloom filter. + +**Example:** +```bash +tempo-cli gen bloom --backend=local --bucket=./cmd/tempo-cli/test-data/ single-tenant b18beca6-4d7f-4464-9f72-f343e688a4a0 0.05 100000 +``` + +The bloom filter will be generated at the required location under the block folder. + +## Generate Index + +To generate the index/bloom for a block if the files were deleted/corrupted. + +**Note:** ensure that the block is in a local backend in the expected directory hierarchy, i.e. `path / tenant / blocks`. + +Arguments: +- `tenant-id` The tenant ID. Use `single-tenant` for single tenant setups. +- `block-id` The block ID as UUID string. + +**Example:** +```bash +tempo-cli gen index --backend=local --bucket=./cmd/tempo-cli/test-data/ single-tenant b18beca6-4d7f-4464-9f72-f343e688a4a0 +``` + +The index will be generated at the required location under the block folder. diff --git a/docs/tempo/website/troubleshooting/bad-blocks.md b/docs/tempo/website/troubleshooting/bad-blocks.md index ee068c45f88..4bb50a25ce3 100644 --- a/docs/tempo/website/troubleshooting/bad-blocks.md +++ b/docs/tempo/website/troubleshooting/bad-blocks.md @@ -15,10 +15,22 @@ This might indicate that there is a bad (corrupted) block in the backend. A block can get corrupted if the ingester crashed while flushing the block to the backend. +## Fixing bad blocks + +At the moment, a backend block can be fixed if either the index or bloom-filter is corrupt/deleted. + +To fix such a block, first download it onto a machine where you can run the `tempo-cli`. + +Next run the `tempo-cli`'s `gen index` / `gen bloom` commands depending on which file is corrupt/deleted. +The command will create a fresh index/bloom-filter from the data file at the required location (in the block folder). +To read all the options for this command, check the [cli docs](../../operations/tempo_cli). + +Finally, upload the generated index or bloom-filter onto the object store backend under the folder for the block. + ## Removing bad blocks -At this moment it's not possible to repair a bad block. -The only solution is to remove the block, which can result in some loss of data. +If the above step on fixing bad blocks reveals that the data file is corrupt, the only remaining solution is to delete +the block, which can result in some loss of data. The mechanism to remove a block from the backend is backend-specific, but the block to remove will be at: