Preallocate byte slices from prom pool (#679)

* Preallocate byte slices from prom pool Signed-off-by: Annanay <annanayagarwal@gmail.com> * make fmt Signed-off-by: Annanay <annanayagarwal@gmail.com> * Add tests around prealloc Signed-off-by: Annanay <annanayagarwal@gmail.com> * Add CHANGELOG, comment out test and add reason Signed-off-by: Annanay <annanayagarwal@gmail.com> * Adjust pool bucket sizes Signed-off-by: Annanay <annanayagarwal@gmail.com>
grafana · Apr 30, 2021 · 30d2497 · 30d2497
1 parent 6ccd6d8
commit 30d2497
Show file tree

Hide file tree

Showing 10 changed files with 232 additions and 54 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 * [ENHANCEMENT] Improve WAL Replay by not rebuilding the WAL. [#668](https://github.com/grafana/tempo/pull/668)
 * [ENHANCEMENT] Add config option to disable write extension to the ingesters. [#677](https://github.com/grafana/tempo/pull/677)
+* [ENHANCEMENT] Preallocate byte slices on ingester request unmarshal. [#679](https://github.com/grafana/tempo/pull/679)
 
 ## v0.7.0
 

diff --git a/integration/microservices/README.md b/integration/microservices/README.md
@@ -1,13 +1,15 @@
 # tempo-load-test
-This repo aims to make it easier to measure and analyze tempo performance in micro-services mode.  There are already many examples for running tempo under load, but they use the single-binary approach and are not representative of what is occuring in larger installations.  Here tempo is run with separate containers for distributor and ingesters, and replication factor = 2, meaning that the distributor will mirror all incoming traces to 2 ingesters.  
+This repo aims to make it easier to measure and analyze tempo performance in micro-services mode.
+There are already many examples for running tempo under load, but they use the single-binary approach and are not representative of what is occuring in larger installations.
+Here tempo is run with separate containers for distributor and ingesters, and replication factor = 3, meaning that the distributor will mirror all incoming traces to 3 ingesters.
 
-![dashboard](/dashboard.png)
+![dashboard](./dashboard.png)
 
 # What this repo contains
 1. Tempo in micro-services mode
     1. 1x distributor
-    1. 2x ingesters
-    1. ReplicationFactor=2 meaning that the distributor mirrors incoming traces
+    1. 3x ingesters
+    1. ReplicationFactor=3 meaning that the distributor mirrors incoming traces
 1. S3/Min.IO virtual storage
 1. Dashboard and metrics using
     1. Prometheus
@@ -28,10 +30,12 @@ This repo is expected to be used in conjuction with tempo development in a rapid
 *Repeat steps 1-2 to see how code changes affect performance.*
 
 ## Controlling load
-The synthetic-load-generator is included and configured to issue 1000 spans/s per instance.  By default 2 instances are ran which will issue 2000 spans/s.  Change the `scale:` value in `docker-compose.yaml` to increase or decrease the load as desired.
+The synthetic-load-generator is included and configured to issue 1000 spans/s per instance.
+To increase load, use the `--scale` flag as shown below:
 
-1. Edit docker-compose.yaml
-1. Run `docker-compose up -d` to dynamically add or remove load generator instances.
+```
+docker-compose up -d --scale synthetic-load-generator=4
+```
 
 # Key Metrics
 As tempo is designed to be very horizontally scaleable, the key metrics are _per volume unit_, i.e. spans / s / cpu core.  
diff --git a/integration/microservices/prometheus.yaml b/integration/microservices/prometheus.yaml
@@ -18,4 +18,5 @@ scrape_configs:
     - targets:
       - distributor:3100
       - ingester-0:3100
-      - ingester-1:3100
+      - ingester-1:3100
+      - ingester-2:3100
diff --git a/integration/microservices/tempo.yaml b/integration/microservices/tempo.yaml
@@ -21,7 +21,7 @@ memberlist:
   - ingester-2:7946
 
 overrides:
-  ingestion_burst_size: 100000
+  ingestion_burst_size_bytes: 10_000_000
   max_traces_per_user: 1000000
 
 server:

diff --git a/modules/distributor/distributor.go b/modules/distributor/distributor.go
@@ -273,11 +273,11 @@ func (d *Distributor) sendToIngestersViaBytes(ctx context.Context, userID string
 		localCtx = user.InjectOrgID(localCtx, userID)
 
 		req := tempopb.PushBytesRequest{
-			Requests: make([][]byte, len(indexes)),
+			Requests: make([]tempopb.PreallocRequest, len(indexes)),
 		}
 
 		for i, j := range indexes {
-			req.Requests[i] = rawRequests[j][0:]
+			req.Requests[i].Request = rawRequests[j][0:]
 		}
 
 		c, err := d.pool.GetClientFor(ingester.Addr)

diff --git a/modules/ingester/ingester.go b/modules/ingester/ingester.go
@@ -182,7 +182,7 @@ func (i *Ingester) PushBytes(ctx context.Context, req *tempopb.PushBytesRequest)
 	// Unmarshal and push each request
 	for _, v := range req.Requests {
 		r := tempopb.PushRequest{}
-		err := r.Unmarshal(v)
+		err := r.Unmarshal(v.Request)
 		if err != nil {
 			return nil, err
 		}
@@ -192,6 +192,10 @@ func (i *Ingester) PushBytes(ctx context.Context, req *tempopb.PushBytesRequest)
 			return nil, err
 		}
 	}
+
+	// Reuse request instead of handing over to GC
+	tempopb.ReuseRequest(req)
+
 	return &tempopb.PushResponse{}, nil
 }
 

diff --git a/pkg/tempopb/prealloc.go b/pkg/tempopb/prealloc.go
@@ -0,0 +1,46 @@
+package tempopb
+
+import (
+	"github.com/prometheus/prometheus/pkg/pool"
+)
+
+var (
+	// buckets: [0.5KiB, 1KiB, 2KiB, 4KiB, 8KiB, 16KiB]
+	bytePool = pool.New(500, 16_000, 2, func(size int) interface{} { return make([]byte, 0, size) })
+)
+
+// PreallocRequest is a (repeated bytes requests) which preallocs slices on Unmarshal.
+type PreallocRequest struct {
+	Request []byte
+}
+
+// Unmarshal implements proto.Message.
+func (r *PreallocRequest) Unmarshal(dAtA []byte) error {
+	r.Request = bytePool.Get(len(dAtA)).([]byte)
+	r.Request = r.Request[:len(dAtA)]
+	copy(r.Request, dAtA)
+	return nil
+}
+
+// MarshalTo implements proto.Marshaller.
+// returned int is not used
+func (r *PreallocRequest) MarshalTo(dAtA []byte) (int, error) {
+	copy(dAtA[:], r.Request[:])
+	return len(r.Request), nil
+}
+
+// Size implements proto.Sizer.
+func (r *PreallocRequest) Size() (n int) {
+	if r == nil {
+		return 0
+	}
+	return len(r.Request)
+}
+
+// ReuseRequest puts the byte slice back into bytePool for reuse.
+func ReuseRequest(req *PushBytesRequest) {
+	for i := range req.Requests {
+		// We want to preserve the underlying allocated memory, [:0] helps us retains the cap() of the slice
+		bytePool.Put(req.Requests[i].Request[:0])
+	}
+}
diff --git a/pkg/tempopb/prealloc_test.go b/pkg/tempopb/prealloc_test.go
@@ -0,0 +1,116 @@
+package tempopb
+
+import (
+	"math/rand"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestUnmarshal(t *testing.T) {
+	var dummyData = make([]byte, 10)
+	rand.Read(dummyData)
+
+	preallocReq := &PreallocRequest{}
+	err := preallocReq.Unmarshal(dummyData)
+	assert.NoError(t, err)
+
+	assert.Equal(t, dummyData, preallocReq.Request)
+}
+
+func TestMarshal(t *testing.T) {
+	preallocReq := &PreallocRequest{
+		Request: make([]byte, 10),
+	}
+	rand.Read(preallocReq.Request)
+
+	var dummyData = make([]byte, 10)
+	_, err := preallocReq.MarshalTo(dummyData)
+	assert.NoError(t, err)
+
+	assert.Equal(t, preallocReq.Request, dummyData)
+}
+
+func TestSize(t *testing.T) {
+	preallocReq := &PreallocRequest{
+		Request: make([]byte, 10),
+	}
+	assert.Equal(t, 10, preallocReq.Size())
+}
+
+/* The prometheus pool pkg is a wrapper around sync.Pool
+
+From the comments on sync.Pool pkg:
+// Get selects an arbitrary item from the Pool, removes it from the
+// Pool, and returns it to the caller.
+// Get may choose to ignore the pool and treat it as empty.
+// Callers should not assume any relation between values passed to Put and
+// the values returned by Get.
+
+And for those reasons, the test below is rendered flaky. However, it should have little impact in a production environment.
+Commenting it out but retaining as part of the package as an indicator that the logic is tested.
+
+func TestReuseRequest(t *testing.T) {
+	tests := []struct {
+		name          string
+		donate        int
+		request       int
+		expectedEqual bool
+	}{
+		{
+			name:          "same size",
+			donate:        1500,
+			request:       1500,
+			expectedEqual: true,
+		},
+		{
+			name:          "larger donate - same bucket",
+			donate:        1600,
+			request:       1500,
+			expectedEqual: true,
+		},
+		{
+			name:    "larger donate - different bucket",
+			donate:  2100,
+			request: 1500,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// create push requests of known size
+			req := MakeBytesRequestWithSize(tt.donate)
+			assert.Len(t, req.Requests, 1)
+			expectedAddr := &req.Requests[0].Request[0]
+
+			// "donate" to bytePool
+			ReuseRequest(req)
+
+			// unmarshal a new request
+			var dummyData = make([]byte, tt.request)
+			preallocReq := &PreallocRequest{}
+			assert.NoError(t, preallocReq.Unmarshal(dummyData))
+			actualAddr := &preallocReq.Request[0]
+
+			if tt.expectedEqual {
+				assert.Equal(t, expectedAddr, actualAddr)
+			} else {
+				assert.NotEqual(t, expectedAddr, actualAddr)
+			}
+		})
+	}
+}
+
+func MakeBytesRequestWithSize(maxBytes int) *PushBytesRequest {
+	reqBytes := make([]byte, maxBytes)
+	rand.Read(reqBytes)
+
+	return &PushBytesRequest{
+		Requests: []PreallocRequest{
+			{
+				Request: reqBytes,
+			},
+		},
+	}
+}
+*/
diff --git a/pkg/tempopb/tempo.pb.go b/pkg/tempopb/tempo.pb.go