diff --git a/prometheus/counter.go b/prometheus/counter.go index 4ce84e7a8..18aaefdb6 100644 --- a/prometheus/counter.go +++ b/prometheus/counter.go @@ -106,7 +106,11 @@ type counter struct { // in the struct to guarantee alignment for atomic operations. // http://golang.org/pkg/sync/atomic/#pkg-note-BUG valBits uint64 - valInt uint64 + + // change is used to record the numbers which will lose if it adds with float64 directly, + // due to the rounding error of IEEE754 representation. + // we aggregate the values until it's large enough to conquer the rounding error + change uint64 selfCollector desc *Desc @@ -119,6 +123,20 @@ type counter struct { now func() time.Time } +// addWithRoundingErrorChecking adds the base and addend, +// and returns the result and checks whether the addend is too small that causes a rounding error. +// +// note that if a part of addend is added on base but the left causes a rounding error, +// we don't respect this case +func addWithRoundingErrorChecking(base, addend float64) (float64, bool) { + if addend == 0 { + return base, false + } + + sum := base + addend + return sum, sum == base +} + func (c *counter) Desc() *Desc { return c.desc } @@ -128,15 +146,42 @@ func (c *counter) Add(v float64) { panic(errors.New("counter cannot decrease in value")) } - ival := uint64(v) - if float64(ival) == v { - atomic.AddUint64(&c.valInt, ival) - return - } - for { oldBits := atomic.LoadUint64(&c.valBits) - newBits := math.Float64bits(math.Float64frombits(oldBits) + v) + + u := uint64(v) + + // not the full value of v will be added into the float as a part of them might have rounding + // error as well, we don't respect this case(we don't have a proper way to handle it). + newF, hasRoundingErr := addWithRoundingErrorChecking(math.Float64frombits(oldBits), v) + if hasRoundingErr && v == float64(u) { + // we believe the float64(uint64(v)) is equal to v if it's an integer + // because it causes a rounding error, + // it doesn't equal only when v is a quite large number or it's a float + + oldChange := atomic.LoadUint64(&c.change) + newF, isChangeSmall := addWithRoundingErrorChecking(math.Float64frombits(oldBits), float64(oldChange+u)) + + if isChangeSmall { + if atomic.CompareAndSwapUint64(&c.change, oldChange, oldChange+u) { + return + } + continue + } + newBits := math.Float64bits(newF) + if atomic.CompareAndSwapUint64(&c.valBits, oldBits, newBits) { + // todo: here we might lose some small values, but it's acceptable + // otherwise we have no way to avoid this using atomic + // mutex might be too heavy for our case here + atomic.StoreUint64(&c.change, 0) + return + } + // fail to update the change as another go routine changes it + continue + } + + // we have no idea about the precision losing for float number + newBits := math.Float64bits(newF) if atomic.CompareAndSwapUint64(&c.valBits, oldBits, newBits) { return } @@ -149,12 +194,17 @@ func (c *counter) AddWithExemplar(v float64, e Labels) { } func (c *counter) Inc() { - atomic.AddUint64(&c.valInt, 1) + // to keep the efficiency, we still increase the change, + // and ignore the rare overflow case as only Inc and float with rounding error will use it + atomic.AddUint64(&c.change, 1) } func (c *counter) get() float64 { fval := math.Float64frombits(atomic.LoadUint64(&c.valBits)) - ival := atomic.LoadUint64(&c.valInt) + ival := atomic.LoadUint64(&c.change) + // it's tolerated to lose precision for float during collection + // as this is unavoidable. + // what the client could do is try to keep those data for the future when losing precision return fval + float64(ival) } diff --git a/prometheus/counter_test.go b/prometheus/counter_test.go index 3686199b5..90e3f01f9 100644 --- a/prometheus/counter_test.go +++ b/prometheus/counter_test.go @@ -38,22 +38,22 @@ func TestCounterAdd(t *testing.T) { if expected, got := 0.0, math.Float64frombits(counter.valBits); expected != got { t.Errorf("Expected %f, got %f.", expected, got) } - if expected, got := uint64(1), counter.valInt; expected != got { + if expected, got := uint64(1), counter.change; expected != got { t.Errorf("Expected %d, got %d.", expected, got) } counter.Add(42) - if expected, got := 0.0, math.Float64frombits(counter.valBits); expected != got { + if expected, got := 42.0, math.Float64frombits(counter.valBits); expected != got { t.Errorf("Expected %f, got %f.", expected, got) } - if expected, got := uint64(43), counter.valInt; expected != got { + if expected, got := uint64(1), counter.change; expected != got { t.Errorf("Expected %d, got %d.", expected, got) } counter.Add(24.42) - if expected, got := 24.42, math.Float64frombits(counter.valBits); expected != got { + if expected, got := 42+24.42, math.Float64frombits(counter.valBits); expected != got { t.Errorf("Expected %f, got %f.", expected, got) } - if expected, got := uint64(43), counter.valInt; expected != got { + if expected, got := uint64(1), counter.change; expected != got { t.Errorf("Expected %d, got %d.", expected, got) } @@ -79,6 +79,114 @@ func TestCounterAdd(t *testing.T) { } } +func TestCounterOverflowAdd(t *testing.T) { + now := time.Now() + + counter := NewCounter(CounterOpts{ + Name: "test", + Help: "test help", + ConstLabels: Labels{"a": "1", "b": "2"}, + now: func() time.Time { return now }, + }).(*counter) + counter.change = math.MaxUint64 + counter.Inc() + + // this is expected due to the overflow + if expected, got := uint64(0), counter.change; expected != got { + t.Errorf("Expected %d, got %d.", expected, got) + } + + m := &dto.Metric{} + counter.Write(m) + + expected := &dto.Metric{ + Label: []*dto.LabelPair{ + {Name: proto.String("a"), Value: proto.String("1")}, + {Name: proto.String("b"), Value: proto.String("2")}, + }, + Counter: &dto.Counter{ + Value: proto.Float64(0), + CreatedTimestamp: timestamppb.New(now), + }, + } + if !proto.Equal(expected, m) { + t.Errorf("expected %q, got %q", expected, m) + } +} + +func TestCounterPrecisionLosingAdd(t *testing.T) { + now := time.Now() + + counter := NewCounter(CounterOpts{ + Name: "test", + Help: "test help", + ConstLabels: Labels{"a": "1", "b": "2"}, + now: func() time.Time { return now }, + }).(*counter) + + counter.Add(1 << 53) + if expected, got := float64(1<<53), math.Float64frombits(counter.valBits); expected != got { + t.Errorf("Expected %f, got %f.", expected, got) + } + + counter.Inc() + if expected, got := uint64(1), counter.change; expected != got { + t.Errorf("Expected %d, got %d.", expected, got) + } + + // along with the value 1 inside change, the sum could be added into the float without losing precision + counter.Add(1) + if expected, got := float64(1<<53+2), math.Float64frombits(counter.valBits); expected != got { + t.Errorf("Expected %f, got %f.", expected, got) + } + // the change is reset to 0 + if expected, got := uint64(0), counter.change; expected != got { + t.Errorf("Expected %d, got %d.", expected, got) + } + + // the value will flip with 2 + counter.Add(1) + if expected, got := float64(1<<53+4), math.Float64frombits(counter.valBits); expected != got { + t.Errorf("Expected %f, got %f.", expected, got) + } + if expected, got := uint64(0), counter.change; expected != got { + t.Errorf("Expected %d, got %d.", expected, got) + } + + counter.Add(1) + if expected, got := float64(1<<53+4), math.Float64frombits(counter.valBits); expected != got { + t.Errorf("Expected %f, got %f.", expected, got) + } + if expected, got := uint64(1), counter.change; expected != got { + t.Errorf("Expected %d, got %d.", expected, got) + } + + counter.Add(1) + if expected, got := float64(1<<53+6), math.Float64frombits(counter.valBits); expected != got { + t.Errorf("Expected %f, got %f.", expected, got) + } + if expected, got := uint64(0), counter.change; expected != got { + t.Errorf("Expected %d, got %d.", expected, got) + } + + m := &dto.Metric{} + counter.Write(m) + + expected := &dto.Metric{ + Label: []*dto.LabelPair{ + {Name: proto.String("a"), Value: proto.String("1")}, + {Name: proto.String("b"), Value: proto.String("2")}, + }, + Counter: &dto.Counter{ + Value: proto.Float64(1<<53 + 6), + CreatedTimestamp: timestamppb.New(now), + }, + } + if !proto.Equal(expected, m) { + t.Errorf("expected %q, got %q", expected, m) + } +} + func decreaseCounter(c *counter) (err error) { defer func() { if e := recover(); e != nil { @@ -157,7 +265,7 @@ func TestCounterAddInf(t *testing.T) { if expected, got := 0.0, math.Float64frombits(counter.valBits); expected != got { t.Errorf("Expected %f, got %f.", expected, got) } - if expected, got := uint64(1), counter.valInt; expected != got { + if expected, got := uint64(1), counter.change; expected != got { t.Errorf("Expected %d, got %d.", expected, got) } @@ -165,7 +273,7 @@ func TestCounterAddInf(t *testing.T) { if expected, got := math.Inf(1), math.Float64frombits(counter.valBits); expected != got { t.Errorf("valBits expected %f, got %f.", expected, got) } - if expected, got := uint64(1), counter.valInt; expected != got { + if expected, got := uint64(1), counter.change; expected != got { t.Errorf("valInts expected %d, got %d.", expected, got) } @@ -173,7 +281,7 @@ func TestCounterAddInf(t *testing.T) { if expected, got := math.Inf(1), math.Float64frombits(counter.valBits); expected != got { t.Errorf("Expected %f, got %f.", expected, got) } - if expected, got := uint64(2), counter.valInt; expected != got { + if expected, got := uint64(2), counter.change; expected != got { t.Errorf("Expected %d, got %d.", expected, got) } @@ -207,7 +315,7 @@ func TestCounterAddLarge(t *testing.T) { if expected, got := large, math.Float64frombits(counter.valBits); expected != got { t.Errorf("valBits expected %f, got %f.", expected, got) } - if expected, got := uint64(0), counter.valInt; expected != got { + if expected, got := uint64(0), counter.change; expected != got { t.Errorf("valInts expected %d, got %d.", expected, got) } @@ -240,7 +348,7 @@ func TestCounterAddSmall(t *testing.T) { if expected, got := small, math.Float64frombits(counter.valBits); expected != got { t.Errorf("valBits expected %f, got %f.", expected, got) } - if expected, got := uint64(0), counter.valInt; expected != got { + if expected, got := uint64(0), counter.change; expected != got { t.Errorf("valInts expected %d, got %d.", expected, got) } @@ -386,3 +494,91 @@ func expectCTsForMetricVecValues(t testing.TB, vec *MetricVec, typ dto.MetricTyp } } } + +func Test_hasRoundingError(t *testing.T) { + // for reviewing, could use this online tool to convert decimal + // https://baseconvert.com/ieee-754-floating-point + tests := []struct { + name string + base float64 + delta float64 + wantRoundingError bool + wantNumber float64 + }{ + { + name: "no rounding error", + base: 0, + delta: 1, + wantRoundingError: false, + wantNumber: 1, + }, + { + name: "no rounding error", + base: 1<<53 - 1, + delta: 1, + wantRoundingError: false, + wantNumber: 1 << 53, + }, + { + name: "rounding error", + base: 1 << 53, + delta: 1, + wantRoundingError: true, + wantNumber: 1 << 53, + }, + { + name: "no rounding error", + base: 1 << 53, + delta: 2, + wantRoundingError: true, + wantNumber: 1<<53 + 2, + }, + { + name: "no rounding error", + base: 1<<53 + 2, + delta: 1, + wantRoundingError: false, + // there is a precision losing + wantNumber: 1<<53 + 4, + }, + { + name: "rounding error", + base: 1 << 54, + delta: 1, + wantRoundingError: true, + wantNumber: 1 << 54, + }, + { + name: "rounding error", + base: 1 << 54, + delta: 3, + wantRoundingError: false, + wantNumber: 18014398509481988, + }, + { + name: "rounding error", + base: 1 << 64, + delta: 1000, + wantRoundingError: true, + wantNumber: 1 << 64, + }, + { + name: "no rounding error", + base: 1 << 64, + delta: 10000, + wantRoundingError: false, + wantNumber: 18446744073709559808, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + expectNumber, hasRoundingErr := addWithRoundingErrorChecking(tt.base, tt.delta) + if expectNumber != tt.wantNumber { + t.Errorf("addWithRoundingErrorChecking() = %f, wantRoundingError %f", expectNumber, tt.wantNumber) + } + if hasRoundingErr != tt.wantRoundingError { + t.Errorf("addWithRoundingErrorChecking() = %v, wantRoundingError %v", hasRoundingErr, tt.wantRoundingError) + } + }) + } +}