Skip to content

Commit

Permalink
feat: add more consensus metrics on rejected proposals (backport #1216)…
Browse files Browse the repository at this point in the history
… (#1221)
  • Loading branch information
mergify[bot] committed Feb 12, 2024
1 parent 0246db1 commit c81cc45
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 11 deletions.
51 changes: 40 additions & 11 deletions consensus/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ const (
type Metrics struct {
// Height of the chain.
Height metrics.Gauge
// The height when the metrics started from
StartHeight metrics.Gauge

// ValidatorLastSignedHeight of a validator.
ValidatorLastSignedHeight metrics.Gauge
Expand Down Expand Up @@ -90,6 +92,12 @@ type Metrics struct {
// timestamp and the timestamp of the latest prevote in a round where 100%
// of the voting power on the network issued prevotes.
FullPrevoteMessageDelay metrics.Gauge

// The amount of proposals that were rejected by the application.
ApplicationRejectedProposals metrics.Counter

// The amount of proposals that failed to be received in time
TimedOutProposals metrics.Counter
}

// PrometheusMetrics returns Metrics build using Prometheus client library.
Expand All @@ -107,6 +115,12 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Name: "height",
Help: "Height of the chain.",
}, labels).With(labelsAndValues...),
StartHeight: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "start_height",
Help: "Height that metrics began",
}, labels).With(labelsAndValues...),
Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Expand Down Expand Up @@ -249,13 +263,26 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Help: "Difference in seconds between the proposal timestamp and the timestamp " +
"of the latest prevote that achieved 100% of the voting power in the prevote step.",
}, labels).With(labelsAndValues...),
ApplicationRejectedProposals: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "application_rejected_proposals",
Help: "Number of proposals rejected by the application",
}, labels).With(labelsAndValues...),
TimedOutProposals: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "timed_out_proposals",
Help: "Number of proposals that failed to be received in time",
}, labels).With(labelsAndValues...),
}
}

// NopMetrics returns no-op Metrics.
func NopMetrics() *Metrics {
return &Metrics{
Height: discard.NewGauge(),
Height: discard.NewGauge(),
StartHeight: discard.NewGauge(),

ValidatorLastSignedHeight: discard.NewGauge(),

Expand All @@ -274,16 +301,18 @@ func NopMetrics() *Metrics {
BlockIntervalSeconds: discard.NewHistogram(),
BlockTimeSeconds: discard.NewGauge(),

NumTxs: discard.NewGauge(),
BlockSizeBytes: discard.NewGauge(),
TotalTxs: discard.NewGauge(),
CommittedHeight: discard.NewGauge(),
FastSyncing: discard.NewGauge(),
StateSyncing: discard.NewGauge(),
BlockParts: discard.NewCounter(),
BlockGossipPartsReceived: discard.NewCounter(),
QuorumPrevoteMessageDelay: discard.NewGauge(),
FullPrevoteMessageDelay: discard.NewGauge(),
NumTxs: discard.NewGauge(),
BlockSizeBytes: discard.NewGauge(),
TotalTxs: discard.NewGauge(),
CommittedHeight: discard.NewGauge(),
FastSyncing: discard.NewGauge(),
StateSyncing: discard.NewGauge(),
BlockParts: discard.NewCounter(),
BlockGossipPartsReceived: discard.NewCounter(),
QuorumPrevoteMessageDelay: discard.NewGauge(),
FullPrevoteMessageDelay: discard.NewGauge(),
ApplicationRejectedProposals: discard.NewCounter(),
TimedOutProposals: discard.NewCounter(),
}
}

Expand Down
5 changes: 5 additions & 0 deletions consensus/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,8 @@ func (cs *State) OnStart() error {
}
}

cs.metrics.StartHeight.Set(float64(cs.Height))

// we need the timeoutRoutine for replay so
// we don't block on the tick chan.
// NOTE: we will get a build up of garbage go routines
Expand Down Expand Up @@ -1287,6 +1289,7 @@ func (cs *State) defaultDoPrevote(height int64, round int32) {
// If ProposalBlock is nil, prevote nil.
if cs.ProposalBlock == nil {
logger.Debug("prevote step: ProposalBlock is nil")
cs.metrics.TimedOutProposals.Add(1)
cs.signAddVote(cmtproto.PrevoteType, nil, types.PartSetHeader{})
return
}
Expand All @@ -1303,12 +1306,14 @@ func (cs *State) defaultDoPrevote(height int64, round int32) {
stateMachineValidBlock, err := cs.blockExec.ProcessProposal(cs.ProposalBlock)
if err != nil {
cs.Logger.Error("state machine returned an error when trying to process proposal block", "err", err)
return
}

// Vote nil if application invalidated the block
if !stateMachineValidBlock {
// The app says we must vote nil
logger.Error("prevote step: the application deems this block to be mustVoteNil", "err", err)
cs.metrics.ApplicationRejectedProposals.Add(1)
cs.signAddVote(cmtproto.PrevoteType, nil, types.PartSetHeader{})
return
}
Expand Down

0 comments on commit c81cc45

Please sign in to comment.