Skip to content

Commit

Permalink
add a histogram util (koordinator-sh#1365)
Browse files Browse the repository at this point in the history
Signed-off-by: Fansong Zeng <fanster.z@gmail.com>
  • Loading branch information
hormes committed Jun 8, 2023
1 parent ff48357 commit b8e22c7
Show file tree
Hide file tree
Showing 9 changed files with 1,208 additions and 0 deletions.
1 change: 1 addition & 0 deletions .licenseignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ test/e2e/testing-manifests
test/e2e/e2e.go
test/e2e/e2e_test.go
test/e2e/suites.go
pkg/util/histogram
139 changes: 139 additions & 0 deletions pkg/util/histogram/decaying_histogram.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
Copyright 2023 The Koordinator Authors.
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package histogram

import (
"fmt"
"math"
"time"
)

var (
// When the decay factor exceeds 2^maxDecayExponent the histogram is
// renormalized by shifting the decay start time forward.
maxDecayExponent = 100
)

// A histogram that gives newer samples a higher weight than the old samples,
// gradually decaying ("forgetting") the past samples. The weight of each sample
// is multiplied by the factor of 2^((sampleTime - referenceTimestamp) / halfLife).
// This means that the sample loses half of its weight ("importance") with
// each halfLife period.
// Since only relative (and not absolute) weights of samples matter, the
// referenceTimestamp can be shifted at any time, which is equivalent to multiplying all
// weights by a constant. In practice the referenceTimestamp is shifted forward whenever
// the exponents become too large, to avoid floating point arithmetics overflow.
type decayingHistogram struct {
histogram
// Decay half life period.
halfLife time.Duration
// Reference time for determining the relative age of samples.
// It is always an integer multiple of halfLife.
referenceTimestamp time.Time
}

// NewDecayingHistogram returns a new DecayingHistogram instance using given options.
func NewDecayingHistogram(options HistogramOptions, halfLife time.Duration) Histogram {
return &decayingHistogram{
histogram: *NewHistogram(options).(*histogram),
halfLife: halfLife,
referenceTimestamp: time.Time{},
}
}

func (h *decayingHistogram) Percentile(percentile float64) float64 {
return h.histogram.Percentile(percentile)
}

func (h *decayingHistogram) AddSample(value float64, weight float64, time time.Time) {
h.histogram.AddSample(value, weight*h.decayFactor(time), time)
}

func (h *decayingHistogram) SubtractSample(value float64, weight float64, time time.Time) {
h.histogram.SubtractSample(value, weight*h.decayFactor(time), time)
}

func (h *decayingHistogram) Merge(other Histogram) {
o := other.(*decayingHistogram)
if h.halfLife != o.halfLife {
panic("can't merge decaying histograms with different half life periods")
}
// Align the older referenceTimestamp with the younger one.
if h.referenceTimestamp.Before(o.referenceTimestamp) {
h.shiftReferenceTimestamp(o.referenceTimestamp)
} else if o.referenceTimestamp.Before(h.referenceTimestamp) {
o.shiftReferenceTimestamp(h.referenceTimestamp)
}
h.histogram.Merge(&o.histogram)
}

func (h *decayingHistogram) Equals(other Histogram) bool {
h2, typesMatch := (other).(*decayingHistogram)
return typesMatch && h.halfLife == h2.halfLife && h.referenceTimestamp == h2.referenceTimestamp && h.histogram.Equals(&h2.histogram)
}

func (h *decayingHistogram) IsEmpty() bool {
return h.histogram.IsEmpty()
}

func (h *decayingHistogram) String() string {
return fmt.Sprintf("referenceTimestamp: %v, halfLife: %v\n%s", h.referenceTimestamp, h.halfLife, h.histogram.String())
}

func (h *decayingHistogram) shiftReferenceTimestamp(newreferenceTimestamp time.Time) {
// Make sure the decay start is an integer multiple of halfLife.
newreferenceTimestamp = newreferenceTimestamp.Round(h.halfLife)
exponent := round(float64(h.referenceTimestamp.Sub(newreferenceTimestamp)) / float64(h.halfLife))
h.histogram.scale(math.Ldexp(1., exponent)) // Scale all weights by 2^exponent.
h.referenceTimestamp = newreferenceTimestamp
}

func (h *decayingHistogram) decayFactor(timestamp time.Time) float64 {
// Max timestamp before the exponent grows too large.
maxAllowedTimestamp := h.referenceTimestamp.Add(
time.Duration(int64(h.halfLife) * int64(maxDecayExponent)))
if timestamp.After(maxAllowedTimestamp) {
// The exponent has grown too large. Renormalize the histogram by
// shifting the referenceTimestamp to the current timestamp and rescaling
// the weights accordingly.
h.shiftReferenceTimestamp(timestamp)
}
return math.Exp2(float64(timestamp.Sub(h.referenceTimestamp)) / float64(h.halfLife))
}

func (h *decayingHistogram) SaveToChekpoint() (*HistogramCheckpoint, error) {
checkpoint, err := h.histogram.SaveToChekpoint()
if err != nil {
return checkpoint, err
}
checkpoint.ReferenceTimestamp = h.referenceTimestamp
return checkpoint, nil
}

func (h *decayingHistogram) LoadFromCheckpoint(checkpoint *HistogramCheckpoint) error {
err := h.histogram.LoadFromCheckpoint(checkpoint)
if err != nil {
return err
}
h.referenceTimestamp = checkpoint.ReferenceTimestamp
return nil
}

func round(x float64) int {
return int(math.Floor(x + 0.5))
}
170 changes: 170 additions & 0 deletions pkg/util/histogram/decaying_histogram_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
/*
Copyright 2023 The Koordinator Authors.
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package histogram

import (
"testing"
"time"

"github.com/stretchr/testify/assert"
)

var (
startTime = time.Unix(1234567890, 0) // Arbitrary timestamp.
)

// Verifies that Percentile() returns 0.0 when called on an empty decaying histogram
// for any percentile.
func TestPercentilesEmptyDecayingHistogram(t *testing.T) {
h := NewDecayingHistogram(testHistogramOptions, time.Hour)
for p := -0.5; p <= 1.5; p += 0.5 {
assert.Equal(t, 0.0, h.Percentile(p))
}
}

// Verify that a sample with a large weight is almost entirely (but not 100%)
// decayed after sufficient amount of time elapses.
func TestSimpleDecay(t *testing.T) {
h := NewDecayingHistogram(testHistogramOptions, time.Hour)
// Add a sample with a very large weight.
h.AddSample(2, 1000, startTime)
// Add another sample 20 half life periods later. Its relative weight is
// expected to be 2^20 * 0.001 > 1000 times larger than the first sample.
h.AddSample(1, 1, startTime.Add(time.Hour*20))
assert.InEpsilon(t, 2, h.Percentile(0.999), valueEpsilon)
assert.InEpsilon(t, 3, h.Percentile(1.0), valueEpsilon)
}

// Verify that the decaying histogram behaves correctly after the decaying
// factor grows by more than 2^maxDecayExponent.
func TestLongtermDecay(t *testing.T) {
h := NewDecayingHistogram(testHistogramOptions, time.Hour)
// Add a sample with a very large weight.
h.AddSample(2, 1, startTime)
// Add another sample later, such that the relative decay factor of the
// two samples will exceed 2^maxDecayExponent.
h.AddSample(1, 1, startTime.Add(time.Hour*101))
assert.InEpsilon(t, 2, h.Percentile(1.0), valueEpsilon)
}

// Verify specific values of percentiles on an example decaying histogram with
// 4 samples added with different timestamps.
func TestDecayingHistogramPercentiles(t *testing.T) {
h := NewDecayingHistogram(testHistogramOptions, time.Hour)
timestamp := startTime
// Add four samples with both values and weights equal to 1, 2, 3, 4,
// each separated by one half life period from the previous one.
for i := 1; i <= 4; i++ {
h.AddSample(float64(i), float64(i), timestamp)
timestamp = timestamp.Add(time.Hour)
}
// The expected distribution is:
// bucket = [1..2], weight = 1 * 2^(-3), percentiles ~ 0% ... 2%
// bucket = [2..3], weight = 2 * 2^(-2), percentiles ~ 3% ... 10%
// bucket = [3..4], weight = 3 * 2^(-1), percentiles ~ 11% ... 34%
// bucket = [4..5], weight = 4 * 2^(-0), percentiles ~ 35% ... 100%
assert.InEpsilon(t, 2, h.Percentile(0.00), valueEpsilon)
assert.InEpsilon(t, 2, h.Percentile(0.02), valueEpsilon)
assert.InEpsilon(t, 3, h.Percentile(0.03), valueEpsilon)
assert.InEpsilon(t, 3, h.Percentile(0.10), valueEpsilon)
assert.InEpsilon(t, 4, h.Percentile(0.11), valueEpsilon)
assert.InEpsilon(t, 4, h.Percentile(0.34), valueEpsilon)
assert.InEpsilon(t, 5, h.Percentile(0.35), valueEpsilon)
assert.InEpsilon(t, 5, h.Percentile(1.00), valueEpsilon)
}

// Verifies that the decaying histogram behaves the same way as a regular
// histogram if the time is fixed and no decaying happens.
func TestNoDecay(t *testing.T) {
h := NewDecayingHistogram(testHistogramOptions, time.Hour)
for i := 1; i <= 4; i++ {
h.AddSample(float64(i), float64(i), startTime)
}
assert.InEpsilon(t, 2, h.Percentile(0.0), valueEpsilon)
assert.InEpsilon(t, 3, h.Percentile(0.2), valueEpsilon)
assert.InEpsilon(t, 2, h.Percentile(0.1), valueEpsilon)
assert.InEpsilon(t, 3, h.Percentile(0.3), valueEpsilon)
assert.InEpsilon(t, 4, h.Percentile(0.4), valueEpsilon)
assert.InEpsilon(t, 4, h.Percentile(0.5), valueEpsilon)
assert.InEpsilon(t, 4, h.Percentile(0.6), valueEpsilon)
assert.InEpsilon(t, 5, h.Percentile(0.7), valueEpsilon)
assert.InEpsilon(t, 5, h.Percentile(0.8), valueEpsilon)
assert.InEpsilon(t, 5, h.Percentile(0.9), valueEpsilon)
assert.InEpsilon(t, 5, h.Percentile(1.0), valueEpsilon)
}

// Verifies that Merge() works as expected on two sample decaying histograms.
func TestDecayingHistogramMerge(t *testing.T) {
h1 := NewDecayingHistogram(testHistogramOptions, time.Hour)
h1.AddSample(1, 1, startTime)
h1.AddSample(2, 1, startTime.Add(time.Hour))

h2 := NewDecayingHistogram(testHistogramOptions, time.Hour)
h2.AddSample(2, 1, startTime.Add(time.Hour*2))
h2.AddSample(3, 1, startTime.Add(time.Hour))

expected := NewDecayingHistogram(testHistogramOptions, time.Hour)
expected.AddSample(2, 1, startTime.Add(time.Hour*2))
expected.AddSample(2, 1, startTime.Add(time.Hour))
expected.AddSample(3, 1, startTime.Add(time.Hour))
expected.AddSample(1, 1, startTime)

h1.Merge(h2)
assert.True(t, h1.Equals(expected))
}

func TestDecayingHistogramSaveToCheckpoint(t *testing.T) {
d := &decayingHistogram{
histogram: *NewHistogram(testHistogramOptions).(*histogram),
halfLife: time.Hour,
referenceTimestamp: time.Time{},
}
d.AddSample(2, 1, startTime.Add(time.Hour*100))
assert.NotEqual(t, d.referenceTimestamp, time.Time{})

checkpoint, err := d.SaveToChekpoint()
assert.NoError(t, err)
assert.Equal(t, checkpoint.ReferenceTimestamp, d.referenceTimestamp)
// Just check that buckets are not empty, actual testing of bucketing
// belongs to Histogram
assert.NotEmpty(t, checkpoint.BucketWeights)
assert.NotZero(t, checkpoint.TotalWeight)
}

func TestDecayingHistogramLoadFromCheckpoint(t *testing.T) {
location, _ := time.LoadLocation("UTC")
timestamp := time.Date(2018, time.January, 2, 3, 4, 5, 0, location)

checkpoint := HistogramCheckpoint{
TotalWeight: 6.0,
BucketWeights: map[int]uint32{
0: 1,
},
ReferenceTimestamp: timestamp,
}
d := &decayingHistogram{
histogram: *NewHistogram(testHistogramOptions).(*histogram),
halfLife: time.Hour,
referenceTimestamp: time.Time{},
}
err := d.LoadFromCheckpoint(&checkpoint)
assert.NoError(t, err)

assert.False(t, d.histogram.IsEmpty())
assert.Equal(t, timestamp, d.referenceTimestamp)
}
Loading

0 comments on commit b8e22c7

Please sign in to comment.