-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Balancing #107
Balancing #107
Changes from 8 commits
646852c
7ca93a6
67e3852
5631bc0
bc6098e
ca8f512
c0a686d
2f13508
452efe9
ded1321
3847aea
64b4e9a
5d65ae6
58d540c
470741f
7e5e6ec
723a025
73be064
7ce573a
5f53768
f54769a
d76597c
1b94974
8478a75
22537e1
a27c1b0
b7c8c25
458d581
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -117,7 +117,7 @@ func (meter *CallMeter) Calls() float64 { | |
|
||
// CallsInLastPeriod returns number of calls in last duration | ||
func (meter *CallMeter) CallsInLastPeriod(period time.Duration) float64 { | ||
lastPeriodSeries := meter.histogram.pickLastSeries(period) | ||
lastPeriodSeries := meter.histogram.PickLastSeries(period) | ||
sum := float64(0) | ||
now := meter.now() | ||
for _, series := range lastPeriodSeries { | ||
|
@@ -146,7 +146,7 @@ func (meter *CallMeter) SetActive(active bool) { | |
|
||
// TimeSpent returns float64 repesentation of time spent in execution | ||
func (meter *CallMeter) TimeSpent() float64 { | ||
allSeries := meter.histogram.pickLastSeries(meter.resolution) | ||
allSeries := meter.histogram.PickLastSeries(meter.resolution) | ||
sum := float64(0) | ||
now := meter.now() | ||
|
||
|
@@ -170,14 +170,12 @@ func (series *dataSeries) Add(value float64, dateTime time.Time) { | |
series.data = append(series.data, &timeValue{dateTime, value}) | ||
} | ||
|
||
func (series *dataSeries) ValueRangeFun(timeStart, timeEnd time.Time, fun func(*timeValue)) []float64 { | ||
dataRange := []float64{} | ||
func (series *dataSeries) ValueRangeFun(timeStart, timeEnd time.Time, fun func(*timeValue)) { | ||
for _, timeVal := range series.data { | ||
if (timeStart == timeVal.date || timeStart.Before(timeVal.date)) && timeEnd.After(timeVal.date) { | ||
fun(timeVal) | ||
} | ||
} | ||
return dataRange | ||
} | ||
|
||
func (series *dataSeries) ValueRange(timeStart, timeEnd time.Time) []float64 { | ||
|
@@ -226,7 +224,8 @@ func (h *histogram) pickSeries(at time.Time) *dataSeries { | |
return h.data[idx] | ||
} | ||
|
||
func (h *histogram) pickLastSeries(period time.Duration) []*dataSeries { | ||
// PickLastSeries returns slice of dataSeries tracking at least given period of time | ||
func (h *histogram) PickLastSeries(period time.Duration) []*dataSeries { | ||
h.mx.Lock() | ||
defer h.mx.Unlock() | ||
if period > h.retention { | ||
|
@@ -294,7 +293,7 @@ func newBreaker(retention int, callTimeLimit time.Duration, | |
closeDelay, maxDelay time.Duration) Breaker { | ||
return &NodeBreaker{ | ||
timeData: newLenLimitCounter(retention), | ||
successData: newLenLimitCounter(retention), | ||
failures: newLenLimitCounter(retention), | ||
rate: errorRate, | ||
callTimeLimit: callTimeLimit, | ||
timeLimitPercentile: timeLimitPercentile, | ||
|
@@ -310,25 +309,25 @@ type NodeBreaker struct { | |
callTimeLimit time.Duration | ||
timeLimitPercentile float64 | ||
timeData *lengthDelimitedCounter | ||
successData *lengthDelimitedCounter | ||
failures *lengthDelimitedCounter | ||
now func() time.Time | ||
closeDelay time.Duration | ||
maxDelay time.Duration | ||
state *openStateTracker | ||
} | ||
|
||
// Record collects call data and returns bool if breaker should be open | ||
// Record collects call data and returns bool if breaker should be opened | ||
func (breaker *NodeBreaker) Record(duration time.Duration, success bool) bool { | ||
breaker.timeData.Add(float64(duration)) | ||
successValue := float64(1) | ||
failValue := float64(1) | ||
if success { | ||
successValue = float64(0) | ||
failValue = float64(0) | ||
} | ||
breaker.successData.Add(successValue) | ||
breaker.failures.Add(failValue) | ||
return breaker.ShouldOpen() | ||
} | ||
|
||
// ShouldOpen checks if breaker should be open | ||
// ShouldOpen checks if breaker should be opened | ||
func (breaker *NodeBreaker) ShouldOpen() bool { | ||
exceeded := breaker.limitsExceeded() | ||
if breaker.state != nil { | ||
|
@@ -368,7 +367,7 @@ func (breaker *NodeBreaker) limitsExceeded() bool { | |
percentile := breaker.timeData.Percentile(breaker.timeLimitPercentile) | ||
if percentile > float64(breaker.callTimeLimit) { | ||
breaker.openBreaker() | ||
log.Debugf("Breaker: time percentile exceeded %f", percentile) | ||
log.Debugf("Breaker: time percentile exceeded %f / %f", percentile, float64(breaker.callTimeLimit)) | ||
return true | ||
} | ||
return false | ||
|
@@ -384,12 +383,12 @@ func (breaker *NodeBreaker) openBreaker() { | |
|
||
func (breaker *NodeBreaker) reset() { | ||
breaker.timeData.Reset() | ||
breaker.successData.Reset() | ||
breaker.failures.Reset() | ||
} | ||
|
||
func (breaker *NodeBreaker) errorRate() float64 { | ||
sum := breaker.successData.Sum() | ||
count := float64(len(breaker.successData.values)) | ||
sum := breaker.failures.Sum() | ||
count := float64(len(breaker.failures.values)) | ||
return sum / count | ||
} | ||
|
||
|
@@ -522,24 +521,28 @@ func (ms *MeasuredStorage) RoundTrip(req *http.Request) (*http.Response, error) | |
log.Debugf("MeasuredStorage %s: Got request id %s\n", ms.Name, reqID) | ||
resp, err := ms.RoundTripper.RoundTrip(req) | ||
duration := time.Since(start) | ||
success := backend.IsSuccessful(resp, err) | ||
success := backendSuccess(resp, err) | ||
open := ms.Breaker.Record(duration, success) | ||
log.Debugf("MeasuredStorage %s: Request %s took %s was successful: %t, opened breaker %t\n", ms.Name, reqID, duration, success, open) | ||
log.Debugf("s %s: Request %s took %s was successful: %t, opened breaker %t\n", ms.Name, reqID, duration, success, open) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You replaced |
||
|
||
ms.Node.UpdateTimeSpent(duration) | ||
ms.Node.SetActive(!open) | ||
raportMetrics(ms.RoundTripper, start, open) | ||
reportMetrics(ms.RoundTripper, start, open) | ||
return resp, err | ||
} | ||
|
||
func backendSuccess(response *http.Response, err error) bool { | ||
return err == nil && response != nil && response.StatusCode < 500 | ||
} | ||
|
||
// IsActive checks Breaker status propagates it to Node compound | ||
func (ms *MeasuredStorage) IsActive() bool { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i don't like the fact that the method's name hides what it's actually doing and you have to take a look at the doc to know the whole truth. |
||
isActive := !ms.Breaker.ShouldOpen() | ||
ms.Node.SetActive(isActive) | ||
return ms.Node.IsActive() | ||
} | ||
|
||
func raportMetrics(rt http.RoundTripper, since time.Time, open bool) { | ||
func reportMetrics(rt http.RoundTripper, since time.Time, open bool) { | ||
if b, ok := rt.(*backend.Backend); ok { | ||
prefix := fmt.Sprintf("reqs.backend.%s.balancer", b.Name) | ||
metrics.UpdateSince(prefix+".duration", since) | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
This file was deleted.
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Won't this be too expensive to calculate?
It happens every time we want to know the node's weight, which is basically every call. If there are a lot rps, then each call we have to traverse each series in a histogram for all of the active nodes to elect a node.
We could simply aggregate this instead of calculating.