This repository has been archived by the owner on Apr 2, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 168
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Batching will help to achieve better ingest performance, especially if traces are sent one by one (which is the case for Jaeger collector). Adds async support for traces meaning that client doesn't need to wait for DB write. This increases ingest performance with a small risk of data loss. New CLI flag `tracing.async-acks` added.
- Loading branch information
1 parent
9c1653a
commit a7546c1
Showing
19 changed files
with
569 additions
and
176 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,214 @@ | ||
package trace | ||
|
||
import ( | ||
"context" | ||
"runtime" | ||
"sync" | ||
"time" | ||
|
||
"github.com/timescale/promscale/pkg/log" | ||
"go.opentelemetry.io/collector/pdata/ptrace" | ||
) | ||
|
||
const ( | ||
defaultReqBufferSize = 10000 // tweaking this does not result in significant perf impact | ||
defaultBatchSize = 5000 // this is >= (we might produce bigger batches sometimes) | ||
defaultBatchTimeout = 200 * time.Millisecond // arbitrary picked. We don't want client to wait too long | ||
defaultMaxBufferedBatches = 200 // arbitrary picked. We want to avoid wait on batches | ||
) | ||
|
||
var defaultBatchWorkers = runtime.NumCPU() / 2 // we only take half so other half can be used for writers | ||
|
||
// We batch individual InsertTracesReq | ||
type Batch struct { | ||
traces ptrace.Traces | ||
spanCount int | ||
reqStatus []chan error | ||
} | ||
|
||
func NewBatch() *Batch { | ||
return &Batch{ | ||
traces: ptrace.NewTraces(), | ||
spanCount: 0, | ||
reqStatus: make([]chan error, 0, defaultBatchSize), | ||
} | ||
} | ||
|
||
func (tb *Batch) add(in InsertTracesReq) { | ||
inSpans := in.payload.SpanCount() | ||
if inSpans == 0 { | ||
return | ||
} | ||
in.payload.ResourceSpans().MoveAndAppendTo(tb.traces.ResourceSpans()) | ||
tb.spanCount += inSpans | ||
tb.reqStatus = append(tb.reqStatus, in.err) | ||
} | ||
|
||
func (tb *Batch) isFull() bool { | ||
return tb.spanCount >= defaultBatchSize | ||
} | ||
|
||
func (tb *Batch) isEmpty() bool { | ||
return tb.spanCount == 0 | ||
} | ||
|
||
// Batcher batches trace requests and sends batches to batch writer | ||
// This is done to achieve better ingest performance especially b/c | ||
// Jaeger collector sends traces one by one | ||
type Batcher struct { | ||
in chan InsertTracesReq | ||
stop chan struct{} | ||
batchers int | ||
batchWriter *batchWriter | ||
once sync.Once | ||
bufferedBatches chan Batch | ||
wg sync.WaitGroup | ||
timeout time.Duration | ||
} | ||
|
||
type WriteTraces func(context.Context, ptrace.Traces) error | ||
|
||
func NewBatcher(batchers, writers int, writer Writer) *Batcher { | ||
return newBatcherWithTimeout(batchers, writers, writer, defaultBatchTimeout) | ||
} | ||
|
||
func newBatcherWithTimeout(batchers, writers int, writer Writer, timeout time.Duration) *Batcher { | ||
if batchers == 0 || writers == 0 { | ||
panic("number of batchers and writeres must be greater then zero") | ||
} | ||
bufferedBatches := make(chan Batch, defaultMaxBufferedBatches) | ||
return &Batcher{ | ||
batchWriter: newBatchWriter(writers, writer, bufferedBatches), | ||
in: make(chan InsertTracesReq, defaultReqBufferSize), | ||
stop: make(chan struct{}, 1), | ||
batchers: batchers, | ||
bufferedBatches: bufferedBatches, | ||
timeout: timeout, | ||
} | ||
} | ||
|
||
func (b *Batcher) Run() { | ||
for i := 0; i < b.batchers; i++ { | ||
b.wg.Add(1) | ||
go func() { | ||
defer b.wg.Done() | ||
b.batch() | ||
}() | ||
} | ||
b.batchWriter.run() | ||
} | ||
|
||
func (b *Batcher) batch() { | ||
ticker := time.NewTicker(b.timeout) | ||
batch := NewBatch() | ||
flushBatch := func(batch *Batch) *Batch { | ||
batchCp := *batch | ||
b.bufferedBatches <- batchCp | ||
return NewBatch() | ||
} | ||
for { | ||
select { | ||
case <-b.stop: | ||
ticker.Stop() | ||
if !batch.isEmpty() { | ||
flushBatch(batch) | ||
} | ||
return | ||
case item := <-b.in: | ||
if batch.isFull() { | ||
batch = flushBatch(batch) | ||
ticker.Reset(b.timeout) | ||
} | ||
batch.add(item) | ||
case <-ticker.C: | ||
if !batch.isEmpty() { | ||
batch = flushBatch(batch) | ||
} | ||
} | ||
} | ||
} | ||
|
||
func (b *Batcher) Stop() { | ||
b.once.Do(func() { | ||
b.stop <- struct{}{} | ||
close(b.stop) | ||
b.wg.Wait() | ||
close(b.bufferedBatches) | ||
b.batchWriter.stop() | ||
}) | ||
} | ||
|
||
// batchWriter writes batches using writer | ||
type batchWriter struct { | ||
batches chan Batch | ||
writers int | ||
writer Writer | ||
stopCh chan struct{} | ||
once sync.Once | ||
wg sync.WaitGroup | ||
} | ||
|
||
func newBatchWriter(writers int, writer Writer, batches chan Batch) *batchWriter { | ||
return &batchWriter{ | ||
writer: writer, | ||
writers: writers, | ||
batches: batches, | ||
stopCh: make(chan struct{}, 1), | ||
} | ||
} | ||
|
||
func (bw *batchWriter) run() { | ||
for i := 0; i < bw.writers; i++ { | ||
bw.wg.Add(1) | ||
go func() { | ||
defer bw.wg.Done() | ||
for { | ||
select { | ||
case b, ok := <-bw.batches: | ||
if !ok { | ||
return | ||
} | ||
bw.flush(b) | ||
case <-bw.stopCh: | ||
timeoutCtx, cancel := context.WithTimeout(context.Background(), time.Second*30) | ||
defer cancel() | ||
bw.drainBuffer(timeoutCtx) | ||
return | ||
} | ||
} | ||
}() | ||
} | ||
} | ||
|
||
func (bw *batchWriter) flush(b Batch) { | ||
if b.spanCount != 0 { | ||
err := bw.writer.InsertTraces(context.Background(), b.traces) | ||
for _, req := range b.reqStatus { | ||
req <- err | ||
} | ||
} | ||
|
||
} | ||
|
||
func (bw *batchWriter) drainBuffer(ctx context.Context) { | ||
for { | ||
select { | ||
case b, ok := <-bw.batches: | ||
if !ok { | ||
return | ||
} | ||
bw.flush(b) | ||
case <-ctx.Done(): | ||
log.Warn("msg", "Forced batchWriter shutdown due to timeout. Some batches migth not be written.") | ||
return | ||
} | ||
} | ||
} | ||
|
||
func (bw *batchWriter) stop() { | ||
bw.once.Do(func() { | ||
bw.stopCh <- struct{}{} | ||
close(bw.stopCh) | ||
bw.wg.Wait() | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
package trace | ||
|
||
import ( | ||
"context" | ||
"testing" | ||
"time" | ||
|
||
"github.com/timescale/promscale/pkg/tests/testdata" | ||
|
||
"go.opentelemetry.io/collector/pdata/ptrace" | ||
) | ||
|
||
type noopWriter struct { | ||
callBack func(t ptrace.Traces) | ||
} | ||
|
||
func (nw *noopWriter) InsertTraces(ctx context.Context, traces ptrace.Traces) error { | ||
nw.callBack(traces) | ||
return nil | ||
} | ||
|
||
func (nw *noopWriter) Close() {} | ||
|
||
func TestTraceBatcherBatching(t *testing.T) { | ||
traces := testdata.GenerateTestTraces(4) | ||
batchChecker := func(traces ptrace.Traces) { | ||
if traces.SpanCount() <= 250 { // one trace has 250 spans | ||
t.Errorf("wrong batch size. got: %v", traces.SpanCount()) | ||
} | ||
} | ||
batcher := newBatcherWithTimeout(1, 1, &noopWriter{callBack: batchChecker}, time.Hour) // we set long enough batch timeout | ||
batcher.Run() | ||
for _, t := range traces { | ||
batcher.in <- InsertTracesReq{payload: t, err: make(chan error, 1)} | ||
} | ||
batcher.Stop() | ||
} | ||
|
||
func TestTraceBatcherTimeout(t *testing.T) { | ||
traces := testdata.GenerateTestTraces(4) | ||
flushCounter := 0 | ||
batchChecker := func(traces ptrace.Traces) { | ||
flushCounter++ | ||
if traces.SpanCount() != 250 { // one trace has 250 spans meaning there is no batching on size | ||
t.Errorf("wrong batch size. got: %v", traces.SpanCount()) | ||
} | ||
} | ||
batcher := NewBatcher(1, 1, &noopWriter{callBack: batchChecker}) | ||
batcher.Run() | ||
for _, t := range traces { | ||
batcher.in <- InsertTracesReq{payload: t, err: make(chan error, 1)} | ||
time.Sleep(350 * time.Millisecond) // to make sure batch timeout is reached | ||
} | ||
batcher.Stop() | ||
if flushCounter != 4 { | ||
t.Errorf("wrong number of batch flushes. got: %v", flushCounter) | ||
} | ||
} |
Oops, something went wrong.