Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add more consensus metrics on rejected proposals (backport #1216) #1221

Merged
merged 1 commit into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 40 additions & 11 deletions consensus/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ const (
type Metrics struct {
// Height of the chain.
Height metrics.Gauge
// The height when the metrics started from
StartHeight metrics.Gauge

// ValidatorLastSignedHeight of a validator.
ValidatorLastSignedHeight metrics.Gauge
Expand Down Expand Up @@ -90,6 +92,12 @@ type Metrics struct {
// timestamp and the timestamp of the latest prevote in a round where 100%
// of the voting power on the network issued prevotes.
FullPrevoteMessageDelay metrics.Gauge

// The amount of proposals that were rejected by the application.
ApplicationRejectedProposals metrics.Counter

// The amount of proposals that failed to be received in time
TimedOutProposals metrics.Counter
}

// PrometheusMetrics returns Metrics build using Prometheus client library.
Expand All @@ -107,6 +115,12 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Name: "height",
Help: "Height of the chain.",
}, labels).With(labelsAndValues...),
StartHeight: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "start_height",
Help: "Height that metrics began",
}, labels).With(labelsAndValues...),
Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Expand Down Expand Up @@ -249,13 +263,26 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Help: "Difference in seconds between the proposal timestamp and the timestamp " +
"of the latest prevote that achieved 100% of the voting power in the prevote step.",
}, labels).With(labelsAndValues...),
ApplicationRejectedProposals: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "application_rejected_proposals",
Help: "Number of proposals rejected by the application",
}, labels).With(labelsAndValues...),
TimedOutProposals: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "timed_out_proposals",
Help: "Number of proposals that failed to be received in time",
}, labels).With(labelsAndValues...),
}
}

// NopMetrics returns no-op Metrics.
func NopMetrics() *Metrics {
return &Metrics{
Height: discard.NewGauge(),
Height: discard.NewGauge(),
StartHeight: discard.NewGauge(),

ValidatorLastSignedHeight: discard.NewGauge(),

Expand All @@ -274,16 +301,18 @@ func NopMetrics() *Metrics {
BlockIntervalSeconds: discard.NewHistogram(),
BlockTimeSeconds: discard.NewGauge(),

NumTxs: discard.NewGauge(),
BlockSizeBytes: discard.NewGauge(),
TotalTxs: discard.NewGauge(),
CommittedHeight: discard.NewGauge(),
FastSyncing: discard.NewGauge(),
StateSyncing: discard.NewGauge(),
BlockParts: discard.NewCounter(),
BlockGossipPartsReceived: discard.NewCounter(),
QuorumPrevoteMessageDelay: discard.NewGauge(),
FullPrevoteMessageDelay: discard.NewGauge(),
NumTxs: discard.NewGauge(),
BlockSizeBytes: discard.NewGauge(),
TotalTxs: discard.NewGauge(),
CommittedHeight: discard.NewGauge(),
FastSyncing: discard.NewGauge(),
StateSyncing: discard.NewGauge(),
BlockParts: discard.NewCounter(),
BlockGossipPartsReceived: discard.NewCounter(),
QuorumPrevoteMessageDelay: discard.NewGauge(),
FullPrevoteMessageDelay: discard.NewGauge(),
ApplicationRejectedProposals: discard.NewCounter(),
TimedOutProposals: discard.NewCounter(),
}
}

Expand Down
5 changes: 5 additions & 0 deletions consensus/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,8 @@ func (cs *State) OnStart() error {
}
}

cs.metrics.StartHeight.Set(float64(cs.Height))

// we need the timeoutRoutine for replay so
// we don't block on the tick chan.
// NOTE: we will get a build up of garbage go routines
Expand Down Expand Up @@ -1287,6 +1289,7 @@ func (cs *State) defaultDoPrevote(height int64, round int32) {
// If ProposalBlock is nil, prevote nil.
if cs.ProposalBlock == nil {
logger.Debug("prevote step: ProposalBlock is nil")
cs.metrics.TimedOutProposals.Add(1)
cs.signAddVote(cmtproto.PrevoteType, nil, types.PartSetHeader{})
return
}
Expand All @@ -1303,12 +1306,14 @@ func (cs *State) defaultDoPrevote(height int64, round int32) {
stateMachineValidBlock, err := cs.blockExec.ProcessProposal(cs.ProposalBlock)
if err != nil {
cs.Logger.Error("state machine returned an error when trying to process proposal block", "err", err)
return
}

// Vote nil if application invalidated the block
if !stateMachineValidBlock {
// The app says we must vote nil
logger.Error("prevote step: the application deems this block to be mustVoteNil", "err", err)
cs.metrics.ApplicationRejectedProposals.Add(1)
cs.signAddVote(cmtproto.PrevoteType, nil, types.PartSetHeader{})
return
}
Expand Down
Loading