Skip to content

Commit

Permalink
Add backoff package and fix Consul CPU usage (#635)
Browse files Browse the repository at this point in the history
* Add backoff package

Justification for jitter and growth factor:
https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/.

Add backoff to the Consul instancer loop.

Fixes go-kit/kit#627.

* Revert "Add backoff package"

This reverts commit 924501ae1fcfadaa27593e9c019283412c513928.

* Get rid of external package and update exponential

* Add instancer backoff

* Fix old exponential name

* Add doc comment

* Fixup & respond to review
  • Loading branch information
nicot authored and peterbourgon committed Apr 2, 2018
1 parent cf74be6 commit c72562b
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
6 changes: 6 additions & 0 deletions sd/consul/instancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ package consul
import (
"fmt"
"io"
"time"

consul "github.com/hashicorp/consul/api"

"github.com/go-kit/kit/log"
"github.com/go-kit/kit/sd"
"github.com/go-kit/kit/sd/internal/instance"
"github.com/go-kit/kit/util/conn"
)

const defaultIndex = 0
Expand Down Expand Up @@ -59,6 +61,7 @@ func (s *Instancer) loop(lastIndex uint64) {
var (
instances []string
err error
d time.Duration = 10 * time.Millisecond
)
for {
instances, lastIndex, err = s.getInstances(lastIndex, s.quitc)
Expand All @@ -67,9 +70,12 @@ func (s *Instancer) loop(lastIndex uint64) {
return // stopped via quitc
case err != nil:
s.logger.Log("err", err)
time.Sleep(d)
d = conn.Exponential(d)
s.cache.Update(sd.Event{Err: err})
default:
s.cache.Update(sd.Event{Instances: instances})
d = 10 * time.Millisecond
}
}
}
Expand Down
11 changes: 9 additions & 2 deletions util/conn/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package conn

import (
"errors"
"math/rand"
"net"
"time"

Expand Down Expand Up @@ -103,7 +104,7 @@ func (m *Manager) loop() {
case conn = <-connc:
if conn == nil {
// didn't work
backoff = exponential(backoff) // wait longer
backoff = Exponential(backoff) // wait longer
reconnectc = m.after(backoff) // try again
} else {
// worked!
Expand Down Expand Up @@ -132,12 +133,18 @@ func dial(d Dialer, network, address string, logger log.Logger) net.Conn {
return conn
}

func exponential(d time.Duration) time.Duration {
// Exponential takes a duration and returns another one that is twice as long, +/- 50%. It is
// used to provide backoff for operations that may fail and should avoid thundering herds.
// See https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ for rationale
func Exponential(d time.Duration) time.Duration {
d *= 2
jitter := rand.Float64() + 0.5
d = time.Duration(int64(float64(d.Nanoseconds()) * jitter))
if d > time.Minute {
d = time.Minute
}
return d

}

// ErrConnectionUnavailable is returned by the Manager's Write method when the
Expand Down

0 comments on commit c72562b

Please sign in to comment.