From c72562b117760ab3916dc73f0ac9e13dd11aec03 Mon Sep 17 00:00:00 2001 From: Nico Tonozzi Date: Sun, 1 Apr 2018 19:41:32 -0600 Subject: [PATCH] Add backoff package and fix Consul CPU usage (#635) * Add backoff package Justification for jitter and growth factor: https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/. Add backoff to the Consul instancer loop. Fixes https://github.com/go-kit/kit/issues/627. * Revert "Add backoff package" This reverts commit 924501ae1fcfadaa27593e9c019283412c513928. * Get rid of external package and update exponential * Add instancer backoff * Fix old exponential name * Add doc comment * Fixup & respond to review --- sd/consul/instancer.go | 6 ++++++ util/conn/manager.go | 11 +++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/sd/consul/instancer.go b/sd/consul/instancer.go index 38b18f0..6eb87a4 100644 --- a/sd/consul/instancer.go +++ b/sd/consul/instancer.go @@ -3,12 +3,14 @@ package consul import ( "fmt" "io" + "time" consul "github.com/hashicorp/consul/api" "github.com/go-kit/kit/log" "github.com/go-kit/kit/sd" "github.com/go-kit/kit/sd/internal/instance" + "github.com/go-kit/kit/util/conn" ) const defaultIndex = 0 @@ -59,6 +61,7 @@ func (s *Instancer) loop(lastIndex uint64) { var ( instances []string err error + d time.Duration = 10 * time.Millisecond ) for { instances, lastIndex, err = s.getInstances(lastIndex, s.quitc) @@ -67,9 +70,12 @@ func (s *Instancer) loop(lastIndex uint64) { return // stopped via quitc case err != nil: s.logger.Log("err", err) + time.Sleep(d) + d = conn.Exponential(d) s.cache.Update(sd.Event{Err: err}) default: s.cache.Update(sd.Event{Instances: instances}) + d = 10 * time.Millisecond } } } diff --git a/util/conn/manager.go b/util/conn/manager.go index 0b7db62..725cbbc 100644 --- a/util/conn/manager.go +++ b/util/conn/manager.go @@ -2,6 +2,7 @@ package conn import ( "errors" + "math/rand" "net" "time" @@ -103,7 +104,7 @@ func (m *Manager) loop() { case conn = <-connc: if conn == nil { // didn't work - backoff = exponential(backoff) // wait longer + backoff = Exponential(backoff) // wait longer reconnectc = m.after(backoff) // try again } else { // worked! @@ -132,12 +133,18 @@ func dial(d Dialer, network, address string, logger log.Logger) net.Conn { return conn } -func exponential(d time.Duration) time.Duration { +// Exponential takes a duration and returns another one that is twice as long, +/- 50%. It is +// used to provide backoff for operations that may fail and should avoid thundering herds. +// See https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ for rationale +func Exponential(d time.Duration) time.Duration { d *= 2 + jitter := rand.Float64() + 0.5 + d = time.Duration(int64(float64(d.Nanoseconds()) * jitter)) if d > time.Minute { d = time.Minute } return d + } // ErrConnectionUnavailable is returned by the Manager's Write method when the