Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add early termination metrics case by case #3093

Merged
merged 8 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 133 additions & 4 deletions snow/consensus/snowman/poll/early_term_no_traversal.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,137 @@
package poll

import (
"errors"
"fmt"
"time"

"github.com/prometheus/client_golang/prometheus"

"github.com/ava-labs/avalanchego/ids"
"github.com/ava-labs/avalanchego/utils/bag"
)

var (
errPollDurationVectorMetrics = errors.New("failed to register poll_duration vector metrics")
errPollCountVectorMetrics = errors.New("failed to register poll_count vector metrics")

terminationReason = "reason"
exhaustedReason = "exhausted"
earlyFailReason = "early_fail"
earlyAlphaPrefReason = "early_alpha_pref"
earlyAlphaConfReason = "early_alpha_conf"

exhaustedLabel = prometheus.Labels{
terminationReason: exhaustedReason,
}
earlyFailLabel = prometheus.Labels{
terminationReason: earlyFailReason,
}
earlyAlphaPrefLabel = prometheus.Labels{
terminationReason: earlyAlphaPrefReason,
}
earlyAlphaConfLabel = prometheus.Labels{
terminationReason: earlyAlphaConfReason,
}
)

type earlyTermNoTraversalMetrics struct {
durPolls *prometheus.GaugeVec
durExhaustedPolls prometheus.Gauge
durEarlyFailPolls prometheus.Gauge
durEarlyAlphaPrefPolls prometheus.Gauge
durEarlyAlphaConfPolls prometheus.Gauge

countPolls *prometheus.CounterVec
countExhaustedPolls prometheus.Counter
countEarlyFailPolls prometheus.Counter
countEarlyAlphaPrefPolls prometheus.Counter
countEarlyAlphaConfPolls prometheus.Counter
StephenButtolph marked this conversation as resolved.
Show resolved Hide resolved
}

func newEarlyTermNoTraversalMetrics(reg prometheus.Registerer) (*earlyTermNoTraversalMetrics, error) {
pollCountVec := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "poll_count",
Help: "Total # of terminated polls by reason",
}, []string{terminationReason})
if err := reg.Register(pollCountVec); err != nil {
return nil, fmt.Errorf("%w: %w", errPollCountVectorMetrics, err)
}
durPollsVec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "poll_duration",
Help: "time (in ns) polls took to complete by reason",
}, []string{terminationReason})
if err := reg.Register(durPollsVec); err != nil {
return nil, fmt.Errorf("%w: %w", errPollDurationVectorMetrics, err)
}

return &earlyTermNoTraversalMetrics{
durPolls: durPollsVec,
durExhaustedPolls: durPollsVec.With(exhaustedLabel),
durEarlyFailPolls: durPollsVec.With(earlyFailLabel),
durEarlyAlphaPrefPolls: durPollsVec.With(earlyAlphaPrefLabel),
durEarlyAlphaConfPolls: durPollsVec.With(earlyAlphaConfLabel),
countPolls: pollCountVec,
countExhaustedPolls: pollCountVec.With(exhaustedLabel),
countEarlyFailPolls: pollCountVec.With(earlyFailLabel),
countEarlyAlphaPrefPolls: pollCountVec.With(earlyAlphaPrefLabel),
countEarlyAlphaConfPolls: pollCountVec.With(earlyAlphaConfLabel),
}, nil
}

func (m *earlyTermNoTraversalMetrics) observeExhausted(duration time.Duration) {
m.durExhaustedPolls.Add(float64(duration.Nanoseconds()))
m.countExhaustedPolls.Inc()
}

func (m *earlyTermNoTraversalMetrics) observeEarlyFail(duration time.Duration) {
m.durEarlyFailPolls.Add(float64(duration.Nanoseconds()))
m.countEarlyFailPolls.Inc()
}

func (m *earlyTermNoTraversalMetrics) observeEarlyAlphaPref(duration time.Duration) {
m.durEarlyAlphaPrefPolls.Add(float64(duration.Nanoseconds()))
m.countEarlyAlphaPrefPolls.Inc()
}

func (m *earlyTermNoTraversalMetrics) observeEarlyAlphaConf(duration time.Duration) {
m.durEarlyAlphaConfPolls.Add(float64(duration.Nanoseconds()))
m.countEarlyAlphaConfPolls.Inc()
}

type earlyTermNoTraversalFactory struct {
alphaPreference int
alphaConfidence int

metrics *earlyTermNoTraversalMetrics
}

// NewEarlyTermNoTraversalFactory returns a factory that returns polls with
// early termination, without doing DAG traversals
func NewEarlyTermNoTraversalFactory(alphaPreference int, alphaConfidence int) Factory {
func NewEarlyTermNoTraversalFactory(
alphaPreference int,
alphaConfidence int,
reg prometheus.Registerer,
) (Factory, error) {
metrics, err := newEarlyTermNoTraversalMetrics(reg)
if err != nil {
return nil, err
}

return &earlyTermNoTraversalFactory{
alphaPreference: alphaPreference,
alphaConfidence: alphaConfidence,
}
metrics: metrics,
}, nil
}

func (f *earlyTermNoTraversalFactory) New(vdrs bag.Bag[ids.NodeID]) Poll {
return &earlyTermNoTraversalPoll{
polled: vdrs,
alphaPreference: f.alphaPreference,
alphaConfidence: f.alphaConfidence,
metrics: f.metrics,
start: time.Now(),
}
}

Expand All @@ -40,6 +146,10 @@ type earlyTermNoTraversalPoll struct {
polled bag.Bag[ids.NodeID]
alphaPreference int
alphaConfidence int

metrics *earlyTermNoTraversalMetrics
start time.Time
finished bool
}

// Vote registers a response for this poll
Expand Down Expand Up @@ -67,20 +177,39 @@ func (p *earlyTermNoTraversalPoll) Drop(vdr ids.NodeID) {
// transitive voting.
// 4. A single element has achieved an alphaConfidence majority.
func (p *earlyTermNoTraversalPoll) Finished() bool {
if p.finished {
return true
}

remaining := p.polled.Len()
if remaining == 0 {
p.finished = true
p.metrics.observeExhausted(time.Since(p.start))
return true // Case 1
}

received := p.votes.Len()
maxPossibleVotes := received + remaining
if maxPossibleVotes < p.alphaPreference {
p.finished = true
p.metrics.observeEarlyFail(time.Since(p.start))
return true // Case 2
}

_, freq := p.votes.Mode()
return freq >= p.alphaPreference && maxPossibleVotes < p.alphaConfidence || // Case 3
freq >= p.alphaConfidence // Case 4
if freq >= p.alphaPreference && maxPossibleVotes < p.alphaConfidence {
marun marked this conversation as resolved.
Show resolved Hide resolved
p.finished = true
p.metrics.observeEarlyAlphaPref(time.Since(p.start))
return true // Case 3
}

if freq >= p.alphaConfidence {
p.finished = true
p.metrics.observeEarlyAlphaConf(time.Since(p.start))
return true // Case 4
}

return false
}

// Result returns the result of this poll
Expand Down
35 changes: 24 additions & 11 deletions snow/consensus/snowman/poll/early_term_no_traversal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,25 @@ package poll
import (
"testing"

"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/require"

"github.com/ava-labs/avalanchego/utils/bag"
)

func newEarlyTermNoTraversalTestFactory(require *require.Assertions, alpha int) Factory {
factory, err := NewEarlyTermNoTraversalFactory(alpha, alpha, prometheus.NewRegistry())
require.NoError(err)
return factory
}

func TestEarlyTermNoTraversalResults(t *testing.T) {
require := require.New(t)

vdrs := bag.Of(vdr1) // k = 1
alpha := 1

factory := NewEarlyTermNoTraversalFactory(alpha, alpha)
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
poll := factory.New(vdrs)

poll.Vote(vdr1, blkID1)
Expand All @@ -31,10 +38,12 @@ func TestEarlyTermNoTraversalResults(t *testing.T) {
}

func TestEarlyTermNoTraversalString(t *testing.T) {
require := require.New(t)

vdrs := bag.Of(vdr1, vdr2) // k = 2
alpha := 2

factory := NewEarlyTermNoTraversalFactory(alpha, alpha)
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
poll := factory.New(vdrs)

poll.Vote(vdr1, blkID1)
Expand All @@ -43,7 +52,7 @@ func TestEarlyTermNoTraversalString(t *testing.T) {
NodeID-BaMPFdqMUQ46BV8iRcwbVfsam55kMqcp: 1
received Bag[ids.ID]: (Size = 1)
SYXsAycDPUu4z2ZksJD5fh5nTDcH3vCFHnpcVye5XuJ2jArg: 1`
require.Equal(t, expected, poll.String())
require.Equal(expected, poll.String())
}

func TestEarlyTermNoTraversalDropsDuplicatedVotes(t *testing.T) {
Expand All @@ -52,7 +61,7 @@ func TestEarlyTermNoTraversalDropsDuplicatedVotes(t *testing.T) {
vdrs := bag.Of(vdr1, vdr2) // k = 2
alpha := 2

factory := NewEarlyTermNoTraversalFactory(alpha, alpha)
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
poll := factory.New(vdrs)

poll.Vote(vdr1, blkID1)
Expand All @@ -72,7 +81,7 @@ func TestEarlyTermNoTraversalTerminatesEarlyWithoutAlphaPreference(t *testing.T)
vdrs := bag.Of(vdr1, vdr2, vdr3) // k = 3
alpha := 2

factory := NewEarlyTermNoTraversalFactory(alpha, alpha)
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
poll := factory.New(vdrs)

poll.Drop(vdr1)
Expand All @@ -90,7 +99,8 @@ func TestEarlyTermNoTraversalTerminatesEarlyWithAlphaPreference(t *testing.T) {
alphaPreference := 3
alphaConfidence := 5

factory := NewEarlyTermNoTraversalFactory(alphaPreference, alphaConfidence)
factory, err := NewEarlyTermNoTraversalFactory(alphaPreference, alphaConfidence, prometheus.NewRegistry())
require.NoError(err)
poll := factory.New(vdrs)

poll.Vote(vdr1, blkID1)
Expand All @@ -114,7 +124,8 @@ func TestEarlyTermNoTraversalTerminatesEarlyWithAlphaConfidence(t *testing.T) {
alphaPreference := 3
alphaConfidence := 3

factory := NewEarlyTermNoTraversalFactory(alphaPreference, alphaConfidence)
factory, err := NewEarlyTermNoTraversalFactory(alphaPreference, alphaConfidence, prometheus.NewRegistry())
require.NoError(err)
poll := factory.New(vdrs)

poll.Vote(vdr1, blkID1)
Expand All @@ -138,7 +149,7 @@ func TestEarlyTermNoTraversalForSharedAncestor(t *testing.T) {
vdrs := bag.Of(vdr1, vdr2, vdr3, vdr4) // k = 4
alpha := 4

factory := NewEarlyTermNoTraversalFactory(alpha, alpha)
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
poll := factory.New(vdrs)

poll.Vote(vdr1, blkID2)
Expand All @@ -160,7 +171,7 @@ func TestEarlyTermNoTraversalWithWeightedResponses(t *testing.T) {
vdrs := bag.Of(vdr1, vdr2, vdr2) // k = 3
alpha := 2

factory := NewEarlyTermNoTraversalFactory(alpha, alpha)
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
poll := factory.New(vdrs)

poll.Vote(vdr2, blkID1)
Expand All @@ -174,12 +185,14 @@ func TestEarlyTermNoTraversalWithWeightedResponses(t *testing.T) {
}

func TestEarlyTermNoTraversalDropWithWeightedResponses(t *testing.T) {
require := require.New(t)

vdrs := bag.Of(vdr1, vdr2, vdr2) // k = 3
alpha := 2

factory := NewEarlyTermNoTraversalFactory(alpha, alpha)
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
poll := factory.New(vdrs)

poll.Drop(vdr2)
require.True(t, poll.Finished())
require.True(poll.Finished())
}
18 changes: 10 additions & 8 deletions snow/consensus/snowman/poll/set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ var (
func TestNewSetErrorOnPollsMetrics(t *testing.T) {
require := require.New(t)

factory := NewEarlyTermNoTraversalFactory(1, 1)
alpha := 1
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
log := logging.NoLog{}
registerer := prometheus.NewRegistry()

Expand All @@ -45,7 +46,8 @@ func TestNewSetErrorOnPollsMetrics(t *testing.T) {
func TestNewSetErrorOnPollDurationMetrics(t *testing.T) {
require := require.New(t)

factory := NewEarlyTermNoTraversalFactory(1, 1)
alpha := 1
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
log := logging.NoLog{}
registerer := prometheus.NewRegistry()

Expand All @@ -63,7 +65,7 @@ func TestCreateAndFinishPollOutOfOrder_NewerFinishesFirst(t *testing.T) {
vdrs := []ids.NodeID{vdr1, vdr2, vdr3} // k = 3
alpha := 3

factory := NewEarlyTermNoTraversalFactory(alpha, alpha)
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
log := logging.NoLog{}
registerer := prometheus.NewRegistry()
s, err := NewSet(factory, log, registerer)
Expand Down Expand Up @@ -99,7 +101,7 @@ func TestCreateAndFinishPollOutOfOrder_OlderFinishesFirst(t *testing.T) {
vdrs := []ids.NodeID{vdr1, vdr2, vdr3} // k = 3
alpha := 3

factory := NewEarlyTermNoTraversalFactory(alpha, alpha)
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
log := logging.NoLog{}
registerer := prometheus.NewRegistry()
s, err := NewSet(factory, log, registerer)
Expand Down Expand Up @@ -135,7 +137,7 @@ func TestCreateAndFinishPollOutOfOrder_UnfinishedPollsGaps(t *testing.T) {
vdrs := []ids.NodeID{vdr1, vdr2, vdr3} // k = 3
alpha := 3

factory := NewEarlyTermNoTraversalFactory(alpha, alpha)
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
log := logging.NoLog{}
registerer := prometheus.NewRegistry()
s, err := NewSet(factory, log, registerer)
Expand Down Expand Up @@ -179,7 +181,7 @@ func TestCreateAndFinishSuccessfulPoll(t *testing.T) {
vdrs := bag.Of(vdr1, vdr2) // k = 2
alpha := 2

factory := NewEarlyTermNoTraversalFactory(alpha, alpha)
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
log := logging.NoLog{}
registerer := prometheus.NewRegistry()
s, err := NewSet(factory, log, registerer)
Expand Down Expand Up @@ -211,7 +213,7 @@ func TestCreateAndFinishFailedPoll(t *testing.T) {
vdrs := bag.Of(vdr1, vdr2) // k = 2
alpha := 1

factory := NewEarlyTermNoTraversalFactory(alpha, alpha)
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
log := logging.NoLog{}
registerer := prometheus.NewRegistry()
s, err := NewSet(factory, log, registerer)
Expand Down Expand Up @@ -240,7 +242,7 @@ func TestSetString(t *testing.T) {
vdrs := bag.Of(vdr1) // k = 1
alpha := 1

factory := NewEarlyTermNoTraversalFactory(alpha, alpha)
factory := newEarlyTermNoTraversalTestFactory(require, alpha)
log := logging.NoLog{}
registerer := prometheus.NewRegistry()
s, err := NewSet(factory, log, registerer)
Expand Down
Loading
Loading