diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc
index 8e6611da1dd..d1a162565fe 100644
--- a/CHANGELOG.next.asciidoc
+++ b/CHANGELOG.next.asciidoc
@@ -75,6 +75,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
 - Fix bug with `monitoring.cluster_uuid` setting not always being exposed via GET /state Beats API. {issue}16732[16732] {pull}17420[17420]
 - Fix building on FreeBSD by removing build flags from `add_cloudfoundry_metadata` processor. {pull}17486[17486]
 - Do not rotate log files on startup when interval is configured and rotateonstartup is disabled. {pull}17613[17613]
+- Fix goroutine leak and Elasticsearch output file descriptor leak when output reloading is in use. {issue}10491[10491] {pull}17381[17381]
 
 *Auditbeat*
 
diff --git a/libbeat/esleg/eslegclient/connection.go b/libbeat/esleg/eslegclient/connection.go
index b591307c444..e1c20f795bd 100644
--- a/libbeat/esleg/eslegclient/connection.go
+++ b/libbeat/esleg/eslegclient/connection.go
@@ -63,11 +63,15 @@ type ConnectionSettings struct {
 	Parameters       map[string]string
 	CompressionLevel int
 	EscapeHTML       bool
-	Timeout          time.Duration
+
+	Timeout         time.Duration
+	IdleConnTimeout time.Duration
 }
 
 // NewConnection returns a new Elasticsearch client
 func NewConnection(s ConnectionSettings) (*Connection, error) {
+	s = settingsWithDefaults(s)
+
 	u, err := url.Parse(s.URL)
 	if err != nil {
 		return nil, fmt.Errorf("failed to parse elasticsearch URL: %v", err)
@@ -124,6 +128,7 @@ func NewConnection(s ConnectionSettings) (*Connection, error) {
 				DialTLS:         tlsDialer.Dial,
 				TLSClientConfig: s.TLS.ToConfig(),
 				Proxy:           proxy,
+				IdleConnTimeout: s.IdleConnTimeout,
 			},
 			Timeout: s.Timeout,
 		},
@@ -132,6 +137,15 @@ func NewConnection(s ConnectionSettings) (*Connection, error) {
 	}, nil
 }
 
+func settingsWithDefaults(s ConnectionSettings) ConnectionSettings {
+	settings := s
+	if settings.IdleConnTimeout == 0 {
+		settings.IdleConnTimeout = 1 * time.Minute
+	}
+
+	return settings
+}
+
 // NewClients returns a list of Elasticsearch clients based on the given
 // configuration. It accepts the same configuration parameters as the Elasticsearch
 // output, except for the output specific configuration options.  If multiple hosts
@@ -266,6 +280,7 @@ func (conn *Connection) Ping() (string, error) {
 
 // Close closes a connection.
 func (conn *Connection) Close() error {
+	conn.HTTP.CloseIdleConnections()
 	return nil
 }
 
diff --git a/libbeat/publisher/pipeline/batch.go b/libbeat/publisher/pipeline/batch.go
index 5a8903c5814..54ba2058d74 100644
--- a/libbeat/publisher/pipeline/batch.go
+++ b/libbeat/publisher/pipeline/batch.go
@@ -24,7 +24,13 @@ import (
 	"github.com/elastic/beats/v7/libbeat/publisher/queue"
 )
 
-type Batch struct {
+type Batch interface {
+	publisher.Batch
+
+	reduceTTL() bool
+}
+
+type batch struct {
 	original queue.Batch
 	ctx      *batchContext
 	ttl      int
@@ -38,17 +44,17 @@ type batchContext struct {
 
 var batchPool = sync.Pool{
 	New: func() interface{} {
-		return &Batch{}
+		return &batch{}
 	},
 }
 
-func newBatch(ctx *batchContext, original queue.Batch, ttl int) *Batch {
+func newBatch(ctx *batchContext, original queue.Batch, ttl int) *batch {
 	if original == nil {
 		panic("empty batch")
 	}
 
-	b := batchPool.Get().(*Batch)
-	*b = Batch{
+	b := batchPool.Get().(*batch)
+	*b = batch{
 		original: original,
 		ctx:      ctx,
 		ttl:      ttl,
@@ -57,45 +63,47 @@ func newBatch(ctx *batchContext, original queue.Batch, ttl int) *Batch {
 	return b
 }
 
-func releaseBatch(b *Batch) {
-	*b = Batch{} // clear batch
+func releaseBatch(b *batch) {
+	*b = batch{} // clear batch
 	batchPool.Put(b)
 }
 
-func (b *Batch) Events() []publisher.Event {
+func (b *batch) Events() []publisher.Event {
 	return b.events
 }
 
-func (b *Batch) ACK() {
-	b.ctx.observer.outBatchACKed(len(b.events))
+func (b *batch) ACK() {
+	if b.ctx != nil {
+		b.ctx.observer.outBatchACKed(len(b.events))
+	}
 	b.original.ACK()
 	releaseBatch(b)
 }
 
-func (b *Batch) Drop() {
+func (b *batch) Drop() {
 	b.original.ACK()
 	releaseBatch(b)
 }
 
-func (b *Batch) Retry() {
+func (b *batch) Retry() {
 	b.ctx.retryer.retry(b)
 }
 
-func (b *Batch) Cancelled() {
+func (b *batch) Cancelled() {
 	b.ctx.retryer.cancelled(b)
 }
 
-func (b *Batch) RetryEvents(events []publisher.Event) {
+func (b *batch) RetryEvents(events []publisher.Event) {
 	b.updEvents(events)
 	b.Retry()
 }
 
-func (b *Batch) CancelledEvents(events []publisher.Event) {
+func (b *batch) CancelledEvents(events []publisher.Event) {
 	b.updEvents(events)
 	b.Cancelled()
 }
 
-func (b *Batch) updEvents(events []publisher.Event) {
+func (b *batch) updEvents(events []publisher.Event) {
 	l1 := len(b.events)
 	l2 := len(events)
 	if l1 > l2 {
@@ -105,3 +113,33 @@ func (b *Batch) updEvents(events []publisher.Event) {
 
 	b.events = events
 }
+
+// reduceTTL reduces the time to live for all events that have no 'guaranteed'
+// sending requirements.  reduceTTL returns true if the batch is still alive.
+func (b *batch) reduceTTL() bool {
+	if b.ttl <= 0 {
+		return true
+	}
+
+	b.ttl--
+	if b.ttl > 0 {
+		return true
+	}
+
+	// filter for evens with guaranteed send flags
+	events := b.events[:0]
+	for _, event := range b.events {
+		if event.Guaranteed() {
+			events = append(events, event)
+		}
+	}
+	b.events = events
+
+	if len(b.events) > 0 {
+		b.ttl = -1 // we need infinite retry for all events left in this batch
+		return true
+	}
+
+	// all events have been dropped:
+	return false
+}
diff --git a/libbeat/publisher/pipeline/consumer.go b/libbeat/publisher/pipeline/consumer.go
index 4dd211052c2..a5c4a97e25a 100644
--- a/libbeat/publisher/pipeline/consumer.go
+++ b/libbeat/publisher/pipeline/consumer.go
@@ -138,7 +138,7 @@ func (c *eventConsumer) loop(consumer queue.Consumer) {
 
 	var (
 		out    workQueue
-		batch  *Batch
+		batch  Batch
 		paused = true
 	)
 
@@ -154,7 +154,7 @@ func (c *eventConsumer) loop(consumer queue.Consumer) {
 		}
 
 		paused = c.paused()
-		if !paused && c.out != nil && batch != nil {
+		if c.out != nil && batch != nil {
 			out = c.out.workQueue
 		} else {
 			out = nil
@@ -195,6 +195,9 @@ func (c *eventConsumer) loop(consumer queue.Consumer) {
 			handleSignal(sig)
 		case out <- batch:
 			batch = nil
+			if paused {
+				out = nil
+			}
 		}
 	}
 }
diff --git a/libbeat/publisher/pipeline/controller.go b/libbeat/publisher/pipeline/controller.go
index 05bd65338a9..837a70eab77 100644
--- a/libbeat/publisher/pipeline/controller.go
+++ b/libbeat/publisher/pipeline/controller.go
@@ -22,6 +22,7 @@ import (
 	"github.com/elastic/beats/v7/libbeat/common"
 	"github.com/elastic/beats/v7/libbeat/common/reload"
 	"github.com/elastic/beats/v7/libbeat/outputs"
+	"github.com/elastic/beats/v7/libbeat/publisher"
 	"github.com/elastic/beats/v7/libbeat/publisher/queue"
 )
 
@@ -34,7 +35,8 @@ type outputController struct {
 	monitors Monitors
 	observer outputObserver
 
-	queue queue.Queue
+	queue     queue.Queue
+	workQueue workQueue
 
 	retryer  *retryer
 	consumer *eventConsumer
@@ -50,7 +52,7 @@ type outputGroup struct {
 	timeToLive int // event lifetime
 }
 
-type workQueue chan *Batch
+type workQueue chan publisher.Batch
 
 // outputWorker instances pass events from the shared workQueue to the outputs.Client
 // instances.
@@ -62,18 +64,19 @@ func newOutputController(
 	beat beat.Info,
 	monitors Monitors,
 	observer outputObserver,
-	b queue.Queue,
+	queue queue.Queue,
 ) *outputController {
 	c := &outputController{
-		beat:     beat,
-		monitors: monitors,
-		observer: observer,
-		queue:    b,
+		beat:      beat,
+		monitors:  monitors,
+		observer:  observer,
+		queue:     queue,
+		workQueue: makeWorkQueue(),
 	}
 
 	ctx := &batchContext{}
-	c.consumer = newEventConsumer(monitors.Logger, b, ctx)
-	c.retryer = newRetryer(monitors.Logger, observer, nil, c.consumer)
+	c.consumer = newEventConsumer(monitors.Logger, queue, ctx)
+	c.retryer = newRetryer(monitors.Logger, observer, c.workQueue, c.consumer)
 	ctx.observer = observer
 	ctx.retryer = c.retryer
 
@@ -86,27 +89,26 @@ func (c *outputController) Close() error {
 	c.consumer.sigPause()
 	c.consumer.close()
 	c.retryer.close()
+	close(c.workQueue)
 
 	if c.out != nil {
 		for _, out := range c.out.outputs {
 			out.Close()
 		}
-		close(c.out.workQueue)
 	}
 
 	return nil
 }
 
 func (c *outputController) Set(outGrp outputs.Group) {
-	// create new outputGroup with shared work queue
+	// create new output group with the shared work queue
 	clients := outGrp.Clients
-	queue := makeWorkQueue()
 	worker := make([]outputWorker, len(clients))
 	for i, client := range clients {
-		worker[i] = makeClientWorker(c.observer, queue, client)
+		worker[i] = makeClientWorker(c.observer, c.workQueue, client)
 	}
 	grp := &outputGroup{
-		workQueue:  queue,
+		workQueue:  c.workQueue,
 		outputs:    worker,
 		timeToLive: outGrp.Retry + 1,
 		batchSize:  outGrp.BatchSize,
@@ -119,7 +121,6 @@ func (c *outputController) Set(outGrp outputs.Group) {
 			c.retryer.sigOutputRemoved()
 		}
 	}
-	c.retryer.updOutput(queue)
 	for range clients {
 		c.retryer.sigOutputAdded()
 	}
@@ -141,7 +142,7 @@ func (c *outputController) Set(outGrp outputs.Group) {
 }
 
 func makeWorkQueue() workQueue {
-	return workQueue(make(chan *Batch, 0))
+	return workQueue(make(chan publisher.Batch, 0))
 }
 
 // Reload the output
diff --git a/libbeat/publisher/pipeline/controller_test.go b/libbeat/publisher/pipeline/controller_test.go
new file mode 100644
index 00000000000..32bdc54109a
--- /dev/null
+++ b/libbeat/publisher/pipeline/controller_test.go
@@ -0,0 +1,114 @@
+// Licensed to Elasticsearch B.V. under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Elasticsearch B.V. licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package pipeline
+
+import (
+	"sync"
+	"testing"
+	"testing/quick"
+	"time"
+
+	"github.com/elastic/beats/v7/libbeat/beat"
+	"github.com/elastic/beats/v7/libbeat/common/atomic"
+	"github.com/elastic/beats/v7/libbeat/logp"
+	"github.com/elastic/beats/v7/libbeat/outputs"
+	"github.com/elastic/beats/v7/libbeat/publisher"
+	"github.com/elastic/beats/v7/libbeat/publisher/queue"
+	"github.com/elastic/beats/v7/libbeat/publisher/queue/memqueue"
+	"github.com/elastic/beats/v7/libbeat/tests/resources"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestOutputReload(t *testing.T) {
+	tests := map[string]func(mockPublishFn) outputs.Client{
+		"client":         newMockClient,
+		"network_client": newMockNetworkClient,
+	}
+
+	for name, ctor := range tests {
+		t.Run(name, func(t *testing.T) {
+			seedPRNG(t)
+
+			goroutines := resources.NewGoroutinesChecker()
+			defer goroutines.Check(t)
+
+			err := quick.Check(func(q uint) bool {
+				numEventsToPublish := 15000 + (q % 500) // 15000 to 19999
+				numOutputReloads := 350 + (q % 150)     // 350 to 499
+
+				queueFactory := func(ackListener queue.ACKListener) (queue.Queue, error) {
+					return memqueue.NewQueue(
+						logp.L(),
+						memqueue.Settings{
+							ACKListener: ackListener,
+							Events:      int(numEventsToPublish),
+						}), nil
+				}
+
+				var publishedCount atomic.Uint
+				countingPublishFn := func(batch publisher.Batch) error {
+					publishedCount.Add(uint(len(batch.Events())))
+					return nil
+				}
+
+				pipeline, err := New(
+					beat.Info{},
+					Monitors{},
+					queueFactory,
+					outputs.Group{},
+					Settings{},
+				)
+				require.NoError(t, err)
+				defer pipeline.Close()
+
+				pipelineClient, err := pipeline.Connect()
+				require.NoError(t, err)
+				defer pipelineClient.Close()
+
+				var wg sync.WaitGroup
+				wg.Add(1)
+				go func() {
+					for i := uint(0); i < numEventsToPublish; i++ {
+						pipelineClient.Publish(beat.Event{})
+					}
+					wg.Done()
+				}()
+
+				for i := uint(0); i < numOutputReloads; i++ {
+					outputClient := ctor(countingPublishFn)
+					out := outputs.Group{
+						Clients: []outputs.Client{outputClient},
+					}
+					pipeline.output.Set(out)
+				}
+
+				wg.Wait()
+
+				timeout := 20 * time.Second
+				return waitUntilTrue(timeout, func() bool {
+					return uint(numEventsToPublish) == publishedCount.Load()
+				})
+			}, &quick.Config{MaxCount: 25})
+
+			if err != nil {
+				t.Error(err)
+			}
+		})
+	}
+}
diff --git a/libbeat/publisher/pipeline/output.go b/libbeat/publisher/pipeline/output.go
index 02ec2975db6..fa2ce73a28c 100644
--- a/libbeat/publisher/pipeline/output.go
+++ b/libbeat/publisher/pipeline/output.go
@@ -18,25 +18,27 @@
 package pipeline
 
 import (
-	"github.com/elastic/beats/v7/libbeat/common/atomic"
 	"github.com/elastic/beats/v7/libbeat/logp"
 	"github.com/elastic/beats/v7/libbeat/outputs"
 )
 
-// clientWorker manages output client of type outputs.Client, not supporting reconnect.
-type clientWorker struct {
+type worker struct {
+	id       uint
 	observer outputObserver
 	qu       workQueue
-	client   outputs.Client
-	closed   atomic.Bool
+	done     chan struct{}
+}
+
+// clientWorker manages output client of type outputs.Client, not supporting reconnect.
+type clientWorker struct {
+	worker
+	client outputs.Client
 }
 
 // netClientWorker manages reconnectable output clients of type outputs.NetworkClient.
 type netClientWorker struct {
-	observer outputObserver
-	qu       workQueue
-	client   outputs.NetworkClient
-	closed   atomic.Bool
+	worker
+	client outputs.NetworkClient
 
 	batchSize  int
 	batchSizer func() int
@@ -44,96 +46,114 @@ type netClientWorker struct {
 }
 
 func makeClientWorker(observer outputObserver, qu workQueue, client outputs.Client) outputWorker {
+	w := worker{
+		observer: observer,
+		qu:       qu,
+		done:     make(chan struct{}),
+	}
+
+	var c interface {
+		outputWorker
+		run()
+	}
+
 	if nc, ok := client.(outputs.NetworkClient); ok {
-		c := &netClientWorker{
-			observer: observer,
-			qu:       qu,
-			client:   nc,
-			logger:   logp.NewLogger("publisher_pipeline_output"),
+		c = &netClientWorker{
+			worker: w,
+			client: nc,
+			logger: logp.NewLogger("publisher_pipeline_output"),
 		}
-		go c.run()
-		return c
+	} else {
+		c = &clientWorker{worker: w, client: client}
 	}
-	c := &clientWorker{observer: observer, qu: qu, client: client}
+
 	go c.run()
 	return c
 }
 
+func (w *worker) close() {
+	close(w.done)
+}
+
 func (w *clientWorker) Close() error {
-	w.closed.Store(true)
+	w.worker.close()
 	return w.client.Close()
 }
 
 func (w *clientWorker) run() {
-	for !w.closed.Load() {
-		for batch := range w.qu {
-			if w.closed.Load() {
-				if batch != nil {
-					batch.Cancelled()
-				}
-				return
-			}
+	for {
+		// We wait for either the worker to be closed or for there to be a batch of
+		// events to publish.
+		select {
+
+		case <-w.done:
+			return
 
-			w.observer.outBatchSend(len(batch.events))
+		case batch := <-w.qu:
+			if batch == nil {
+				continue
+			}
+			w.observer.outBatchSend(len(batch.Events()))
 
 			if err := w.client.Publish(batch); err != nil {
-				break
+				return
 			}
 		}
 	}
 }
 
 func (w *netClientWorker) Close() error {
-	w.closed.Store(true)
+	w.worker.close()
 	return w.client.Close()
 }
 
 func (w *netClientWorker) run() {
-	for !w.closed.Load() {
-		reconnectAttempts := 0
-
-		// start initial connect loop from first batch, but return
-		// batch to pipeline for other outputs to catch up while we're trying to connect
-		for batch := range w.qu {
-			batch.Cancelled()
+	var (
+		connected         = false
+		reconnectAttempts = 0
+	)
 
-			if w.closed.Load() {
-				w.logger.Infof("Closed connection to %v", w.client)
-				return
-			}
+	for {
+		// We wait for either the worker to be closed or for there to be a batch of
+		// events to publish.
+		select {
 
-			if reconnectAttempts > 0 {
-				w.logger.Infof("Attempting to reconnect to %v with %d reconnect attempt(s)", w.client, reconnectAttempts)
-			} else {
-				w.logger.Infof("Connecting to %v", w.client)
-			}
+		case <-w.done:
+			return
 
-			err := w.client.Connect()
-			if err != nil {
-				w.logger.Errorf("Failed to connect to %v: %v", w.client, err)
-				reconnectAttempts++
+		case batch := <-w.qu:
+			if batch == nil {
 				continue
 			}
 
-			w.logger.Infof("Connection to %v established", w.client)
-			reconnectAttempts = 0
-			break
-		}
+			// Try to (re)connect so we can publish batch
+			if !connected {
+				// Return batch to other output workers while we try to (re)connect
+				batch.Cancelled()
 
-		// send loop
-		for batch := range w.qu {
-			if w.closed.Load() {
-				if batch != nil {
-					batch.Cancelled()
+				if reconnectAttempts == 0 {
+					w.logger.Infof("Connecting to %v", w.client)
+				} else {
+					w.logger.Infof("Attempting to reconnect to %v with %d reconnect attempt(s)", w.client, reconnectAttempts)
 				}
-				return
+
+				err := w.client.Connect()
+				connected = err == nil
+				if connected {
+					w.logger.Infof("Connection to %v established", w.client)
+					reconnectAttempts = 0
+				} else {
+					w.logger.Errorf("Failed to connect to %v: %v", w.client, err)
+					reconnectAttempts++
+				}
+
+				continue
 			}
 
-			err := w.client.Publish(batch)
-			if err != nil {
+			if err := w.client.Publish(batch); err != nil {
 				w.logger.Errorf("Failed to publish events: %v", err)
 				// on error return to connect loop
-				break
+				connected = false
 			}
 		}
 	}
diff --git a/libbeat/publisher/pipeline/output_test.go b/libbeat/publisher/pipeline/output_test.go
index d89c166ee15..5f471ddf396 100644
--- a/libbeat/publisher/pipeline/output_test.go
+++ b/libbeat/publisher/pipeline/output_test.go
@@ -18,9 +18,7 @@
 package pipeline
 
 import (
-	"flag"
 	"math"
-	"math/rand"
 	"sync"
 	"testing"
 	"testing/quick"
@@ -32,11 +30,6 @@ import (
 	"github.com/elastic/beats/v7/libbeat/logp"
 	"github.com/elastic/beats/v7/libbeat/outputs"
 	"github.com/elastic/beats/v7/libbeat/publisher"
-	"github.com/elastic/beats/v7/libbeat/publisher/queue"
-)
-
-var (
-	SeedFlag = flag.Int64("seed", 0, "Randomization seed")
 )
 
 func TestMakeClientWorker(t *testing.T) {
@@ -51,6 +44,11 @@ func TestMakeClientWorker(t *testing.T) {
 
 			err := quick.Check(func(i uint) bool {
 				numBatches := 300 + (i % 100) // between 300 and 399
+				numEvents := atomic.MakeUint(0)
+
+				wqu := makeWorkQueue()
+				retryer := newRetryer(logp.NewLogger("test"), nilObserver, wqu, nil)
+				defer retryer.close()
 
 				var published atomic.Uint
 				publishFn := func(batch publisher.Batch) error {
@@ -58,13 +56,13 @@ func TestMakeClientWorker(t *testing.T) {
 					return nil
 				}
 
-				wqu := makeWorkQueue()
 				client := ctor(publishFn)
-				makeClientWorker(nilObserver, wqu, client)
 
-				numEvents := atomic.MakeUint(0)
-				for batchIdx := uint(0); batchIdx <= numBatches; batchIdx++ {
-					batch := randomBatch(50, 150, wqu)
+				worker := makeClientWorker(nilObserver, wqu, client)
+				defer worker.Close()
+
+				for i := uint(0); i < numBatches; i++ {
+					batch := randomBatch(50, 150).withRetryer(retryer)
 					numEvents.Add(uint(len(batch.Events())))
 					wqu <- batch
 				}
@@ -85,13 +83,14 @@ func TestMakeClientWorker(t *testing.T) {
 	}
 }
 
-func TestMakeClientWorkerAndClose(t *testing.T) {
+func TestReplaceClientWorker(t *testing.T) {
 	tests := map[string]func(mockPublishFn) outputs.Client{
 		"client":         newMockClient,
 		"network_client": newMockNetworkClient,
 	}
 
 	const minEventsInBatch = 50
+	const maxEventsInBatch = 150
 
 	for name, ctor := range tests {
 		t.Run(name, func(t *testing.T) {
@@ -101,21 +100,28 @@ func TestMakeClientWorkerAndClose(t *testing.T) {
 				numBatches := 1000 + (i % 100) // between 1000 and 1099
 
 				wqu := makeWorkQueue()
-				numEvents := atomic.MakeUint(0)
+				retryer := newRetryer(logp.NewLogger("test"), nilObserver, wqu, nil)
+				defer retryer.close()
+
+				var batches []publisher.Batch
+				var numEvents int
+				for i := uint(0); i < numBatches; i++ {
+					batch := randomBatch(minEventsInBatch, maxEventsInBatch).withRetryer(retryer)
+					numEvents += batch.Len()
+					batches = append(batches, batch)
+				}
 
 				var wg sync.WaitGroup
 				wg.Add(1)
 				go func() {
 					defer wg.Done()
-					for batchIdx := uint(0); batchIdx <= numBatches; batchIdx++ {
-						batch := randomBatch(minEventsInBatch, 150, wqu)
-						numEvents.Add(uint(len(batch.Events())))
+					for _, batch := range batches {
 						wqu <- batch
 					}
 				}()
 
 				// Publish at least 1 batch worth of events but no more than 20% events
-				publishLimit := uint(math.Max(minEventsInBatch, float64(numEvents.Load())*0.2))
+				publishLimit := uint(math.Max(minEventsInBatch, float64(numEvents)*0.2))
 
 				var publishedFirst atomic.Uint
 				blockCtrl := make(chan struct{})
@@ -145,6 +151,7 @@ func TestMakeClientWorkerAndClose(t *testing.T) {
 				// Close worker before all batches have had time to be published
 				err := worker.Close()
 				require.NoError(t, err)
+
 				close(blockCtrl)
 
 				// Start new worker to drain work queue
@@ -161,7 +168,7 @@ func TestMakeClientWorkerAndClose(t *testing.T) {
 				// Make sure that all events have eventually been published
 				timeout = 20 * time.Second
 				return waitUntilTrue(timeout, func() bool {
-					return numEvents.Load() == publishedFirst.Load()+publishedLater.Load()
+					return numEvents == int(publishedFirst.Load()+publishedLater.Load())
 				})
 			}, &quick.Config{MaxCount: 25})
 
@@ -171,91 +178,3 @@ func TestMakeClientWorkerAndClose(t *testing.T) {
 		})
 	}
 }
-
-type mockPublishFn func(publisher.Batch) error
-
-func newMockClient(publishFn mockPublishFn) outputs.Client {
-	return &mockClient{publishFn: publishFn}
-}
-
-type mockClient struct {
-	publishFn mockPublishFn
-}
-
-func (c *mockClient) String() string { return "mock_client" }
-func (c *mockClient) Close() error   { return nil }
-func (c *mockClient) Publish(batch publisher.Batch) error {
-	return c.publishFn(batch)
-}
-
-func newMockNetworkClient(publishFn mockPublishFn) outputs.Client {
-	return &mockNetworkClient{newMockClient(publishFn)}
-}
-
-type mockNetworkClient struct {
-	outputs.Client
-}
-
-func (c *mockNetworkClient) Connect() error { return nil }
-
-type mockQueue struct{}
-
-func (q mockQueue) Close() error                                     { return nil }
-func (q mockQueue) BufferConfig() queue.BufferConfig                 { return queue.BufferConfig{} }
-func (q mockQueue) Producer(cfg queue.ProducerConfig) queue.Producer { return mockProducer{} }
-func (q mockQueue) Consumer() queue.Consumer                         { return mockConsumer{} }
-
-type mockProducer struct{}
-
-func (p mockProducer) Publish(event publisher.Event) bool    { return true }
-func (p mockProducer) TryPublish(event publisher.Event) bool { return true }
-func (p mockProducer) Cancel() int                           { return 0 }
-
-type mockConsumer struct{}
-
-func (c mockConsumer) Get(eventCount int) (queue.Batch, error) { return &Batch{}, nil }
-func (c mockConsumer) Close() error                            { return nil }
-
-func randomBatch(min, max int, wqu workQueue) *Batch {
-	numEvents := randIntBetween(min, max)
-	events := make([]publisher.Event, numEvents)
-
-	consumer := newEventConsumer(logp.L(), mockQueue{}, &batchContext{})
-	retryer := newRetryer(logp.L(), nilObserver, wqu, consumer)
-
-	batch := Batch{
-		events: events,
-		ctx: &batchContext{
-			observer: nilObserver,
-			retryer:  retryer,
-		},
-	}
-
-	return &batch
-}
-
-// randIntBetween returns a random integer in [min, max)
-func randIntBetween(min, max int) int {
-	return rand.Intn(max-min) + min
-}
-
-func seedPRNG(t *testing.T) {
-	seed := *SeedFlag
-	if seed == 0 {
-		seed = time.Now().UnixNano()
-	}
-
-	t.Logf("reproduce test with `go test ... -seed %v`", seed)
-	rand.Seed(seed)
-}
-
-func waitUntilTrue(duration time.Duration, fn func() bool) bool {
-	end := time.Now().Add(duration)
-	for time.Now().Before(end) {
-		if fn() {
-			return true
-		}
-		time.Sleep(1 * time.Millisecond)
-	}
-	return false
-}
diff --git a/libbeat/publisher/pipeline/retry.go b/libbeat/publisher/pipeline/retry.go
index a65a7d227c8..0d724e80278 100644
--- a/libbeat/publisher/pipeline/retry.go
+++ b/libbeat/publisher/pipeline/retry.go
@@ -36,7 +36,7 @@ type retryer struct {
 
 	done chan struct{}
 
-	consumer *eventConsumer
+	consumer interruptor
 
 	sig        chan retryerSignal
 	out        workQueue
@@ -44,6 +44,11 @@ type retryer struct {
 	doneWaiter sync.WaitGroup
 }
 
+type interruptor interface {
+	sigWait()
+	sigUnWait()
+}
+
 type retryQueue chan batchEvent
 
 type retryerSignal struct {
@@ -53,7 +58,7 @@ type retryerSignal struct {
 
 type batchEvent struct {
 	tag   retryerBatchTag
-	batch *Batch
+	batch Batch
 }
 
 type retryerEventTag uint8
@@ -75,7 +80,7 @@ func newRetryer(
 	log *logp.Logger,
 	observer outputObserver,
 	out workQueue,
-	c *eventConsumer,
+	c interruptor,
 ) *retryer {
 	r := &retryer{
 		logger:     log,
@@ -106,18 +111,11 @@ func (r *retryer) sigOutputRemoved() {
 	r.sig <- retryerSignal{tag: sigRetryerOutputRemoved}
 }
 
-func (r *retryer) updOutput(ch workQueue) {
-	r.sig <- retryerSignal{
-		tag:     sigRetryerUpdateOutput,
-		channel: ch,
-	}
-}
-
-func (r *retryer) retry(b *Batch) {
+func (r *retryer) retry(b Batch) {
 	r.in <- batchEvent{tag: retryBatch, batch: b}
 }
 
-func (r *retryer) cancelled(b *Batch) {
+func (r *retryer) cancelled(b Batch) {
 	r.in <- batchEvent{tag: cancelledBatch, batch: b}
 }
 
@@ -127,9 +125,9 @@ func (r *retryer) loop() {
 		out             workQueue
 		consumerBlocked bool
 
-		active     *Batch
+		active     Batch
 		activeSize int
-		buffer     []*Batch
+		buffer     []Batch
 		numOutputs int
 
 		log = r.logger
@@ -144,21 +142,22 @@ func (r *retryer) loop() {
 				countFailed  int
 				countDropped int
 				batch        = evt.batch
-				countRetry   = len(batch.events)
+				countRetry   = len(batch.Events())
+				alive        = true
 			)
 
 			if evt.tag == retryBatch {
-				countFailed = len(batch.events)
+				countFailed = len(batch.Events())
 				r.observer.eventsFailed(countFailed)
 
-				decBatch(batch)
+				alive = batch.reduceTTL()
 
-				countRetry = len(batch.events)
+				countRetry = len(batch.Events())
 				countDropped = countFailed - countRetry
 				r.observer.eventsDropped(countDropped)
 			}
 
-			if len(batch.events) == 0 {
+			if !alive {
 				log.Info("Drop batch")
 				batch.Drop()
 			} else {
@@ -166,14 +165,9 @@ func (r *retryer) loop() {
 				buffer = append(buffer, batch)
 				out = r.out
 				active = buffer[0]
-				activeSize = len(active.events)
+				activeSize = len(active.Events())
 				if !consumerBlocked {
-					consumerBlocked = blockConsumer(numOutputs, len(buffer))
-					if consumerBlocked {
-						log.Info("retryer: send wait signal to consumer")
-						r.consumer.sigWait()
-						log.Info("  done")
-					}
+					consumerBlocked = r.checkConsumerBlock(numOutputs, len(buffer))
 				}
 			}
 
@@ -187,51 +181,53 @@ func (r *retryer) loop() {
 				out = nil
 			} else {
 				active = buffer[0]
-				activeSize = len(active.events)
+				activeSize = len(active.Events())
 			}
 
 			if consumerBlocked {
-				consumerBlocked = blockConsumer(numOutputs, len(buffer))
-				if !consumerBlocked {
-					log.Info("retryer: send unwait-signal to consumer")
-					r.consumer.sigUnWait()
-					log.Info("  done")
-				}
+				consumerBlocked = r.checkConsumerBlock(numOutputs, len(buffer))
 			}
 
 		case sig := <-r.sig:
 			switch sig.tag {
-			case sigRetryerUpdateOutput:
-				r.out = sig.channel
 			case sigRetryerOutputAdded:
 				numOutputs++
+				if consumerBlocked {
+					consumerBlocked = r.checkConsumerBlock(numOutputs, len(buffer))
+				}
 			case sigRetryerOutputRemoved:
 				numOutputs--
+				if !consumerBlocked {
+					consumerBlocked = r.checkConsumerBlock(numOutputs, len(buffer))
+				}
 			}
 		}
 	}
 }
 
-func blockConsumer(numOutputs, numBatches int) bool {
-	return numBatches/3 >= numOutputs
-}
-
-func decBatch(batch *Batch) {
-	if batch.ttl <= 0 {
-		return
+func (r *retryer) checkConsumerBlock(numOutputs, numBatches int) bool {
+	consumerBlocked := blockConsumer(numOutputs, numBatches)
+	if r.consumer == nil {
+		return consumerBlocked
 	}
 
-	batch.ttl--
-	if batch.ttl > 0 {
-		return
-	}
-
-	// filter for evens with guaranteed send flags
-	events := batch.events[:0]
-	for _, event := range batch.events {
-		if event.Guaranteed() {
-			events = append(events, event)
+	if consumerBlocked {
+		r.logger.Info("retryer: send wait signal to consumer")
+		if r.consumer != nil {
+			r.consumer.sigWait()
+		}
+		r.logger.Info("  done")
+	} else {
+		r.logger.Info("retryer: send unwait signal to consumer")
+		if r.consumer != nil {
+			r.consumer.sigUnWait()
 		}
+		r.logger.Info("  done")
 	}
-	batch.events = events
+
+	return consumerBlocked
+}
+
+func blockConsumer(numOutputs, numBatches int) bool {
+	return numBatches/3 >= numOutputs
 }
diff --git a/libbeat/publisher/pipeline/testing.go b/libbeat/publisher/pipeline/testing.go
new file mode 100644
index 00000000000..1d5c2b908ff
--- /dev/null
+++ b/libbeat/publisher/pipeline/testing.go
@@ -0,0 +1,176 @@
+// Licensed to Elasticsearch B.V. under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Elasticsearch B.V. licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package pipeline
+
+import (
+	"flag"
+	"math/rand"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/elastic/beats/v7/libbeat/outputs"
+	"github.com/elastic/beats/v7/libbeat/publisher"
+	"github.com/elastic/beats/v7/libbeat/publisher/queue"
+)
+
+var (
+	SeedFlag = flag.Int64("seed", 0, "Randomization seed")
+)
+
+type mockPublishFn func(publisher.Batch) error
+
+func newMockClient(publishFn mockPublishFn) outputs.Client {
+	return &mockClient{publishFn: publishFn}
+}
+
+type mockClient struct {
+	publishFn mockPublishFn
+}
+
+func (c *mockClient) String() string { return "mock_client" }
+func (c *mockClient) Close() error   { return nil }
+func (c *mockClient) Publish(batch publisher.Batch) error {
+	return c.publishFn(batch)
+}
+
+func newMockNetworkClient(publishFn mockPublishFn) outputs.Client {
+	return &mockNetworkClient{newMockClient(publishFn)}
+}
+
+type mockNetworkClient struct {
+	outputs.Client
+}
+
+func (c *mockNetworkClient) Connect() error { return nil }
+
+type mockQueue struct{}
+
+func (q mockQueue) Close() error                                     { return nil }
+func (q mockQueue) BufferConfig() queue.BufferConfig                 { return queue.BufferConfig{} }
+func (q mockQueue) Producer(cfg queue.ProducerConfig) queue.Producer { return mockProducer{} }
+func (q mockQueue) Consumer() queue.Consumer                         { return mockConsumer{} }
+
+type mockProducer struct{}
+
+func (p mockProducer) Publish(event publisher.Event) bool    { return true }
+func (p mockProducer) TryPublish(event publisher.Event) bool { return true }
+func (p mockProducer) Cancel() int                           { return 0 }
+
+type mockConsumer struct{}
+
+func (c mockConsumer) Get(eventCount int) (queue.Batch, error) { return &batch{}, nil }
+func (c mockConsumer) Close() error                            { return nil }
+
+type mockBatch struct {
+	mu     sync.Mutex
+	events []publisher.Event
+
+	onEvents    func()
+	onACK       func()
+	onDrop      func()
+	onRetry     func()
+	onCancelled func()
+	onReduceTTL func() bool
+}
+
+func (b *mockBatch) Events() []publisher.Event {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	signalFn(b.onEvents)
+	return b.events
+}
+
+func (b *mockBatch) ACK()                                 { signalFn(b.onACK) }
+func (b *mockBatch) Drop()                                { signalFn(b.onDrop) }
+func (b *mockBatch) Retry()                               { signalFn(b.onRetry) }
+func (b *mockBatch) Cancelled()                           { signalFn(b.onCancelled) }
+func (b *mockBatch) RetryEvents(events []publisher.Event) { b.updateEvents(events); signalFn(b.onRetry) }
+
+func (b *mockBatch) reduceTTL() bool {
+	if b.onReduceTTL != nil {
+		return b.onReduceTTL()
+	}
+	return true
+}
+
+func (b *mockBatch) CancelledEvents(events []publisher.Event) {
+	b.updateEvents(events)
+	signalFn(b.onCancelled)
+}
+
+func (b *mockBatch) updateEvents(events []publisher.Event) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	b.events = events
+}
+
+func (b *mockBatch) Len() int {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	return len(b.events)
+}
+
+func (b *mockBatch) withRetryer(r *retryer) *mockBatch {
+	return &mockBatch{
+		events:      b.events,
+		onACK:       b.onACK,
+		onDrop:      b.onDrop,
+		onRetry:     func() { r.retry(b) },
+		onCancelled: func() { r.cancelled(b) },
+		onReduceTTL: b.onReduceTTL,
+	}
+}
+
+func signalFn(fn func()) {
+	if fn != nil {
+		fn()
+	}
+}
+
+func randomBatch(min, max int) *mockBatch {
+	return &mockBatch{
+		events: make([]publisher.Event, randIntBetween(min, max)),
+	}
+}
+
+// randIntBetween returns a random integer in [min, max)
+func randIntBetween(min, max int) int {
+	return rand.Intn(max-min) + min
+}
+
+func seedPRNG(t *testing.T) {
+	seed := *SeedFlag
+	if seed == 0 {
+		seed = time.Now().UnixNano()
+	}
+
+	t.Logf("reproduce test with `go test ... -seed %v`", seed)
+	rand.Seed(seed)
+}
+
+func waitUntilTrue(duration time.Duration, fn func() bool) bool {
+	end := time.Now().Add(duration)
+	for time.Now().Before(end) {
+		if fn() {
+			return true
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+	return false
+}