Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Prometheus metrics for unrelayed-packets and unrelayed-acknoledgments #1356

Merged
merged 3 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions docs/advanced_usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,18 @@ Exported metrics:

| **Exported Metric** | **Description** | **Type** |
|:---------------------------------------------: |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |:--------: |
| cosmos_relayer_observed_packets_total | The total number of observed packets | Counter |
| cosmos_relayer_relayed_packets_total | The total number of relayed packets | Counter |
| cosmos_relayer_observed_packets_total | The total number of observed packets | Counter |
| cosmos_relayer_relayed_packets_total | The total number of relayed packets | Counter |
| cosmos_relayer_chain_latest_height | The current height of the chain | Gauge |
| cosmos_relayer_wallet_balance | The current balance for the relayer's wallet | Gauge |
| cosmos_relayer_fees_spent | The amount of fees spent from the relayer's wallet | Gauge |
| cosmos_relayer_tx_failure | <br>The total number of tx failures broken up into categories:<br> - "packet messages are redundant"<br> - "insufficient funds"<br> - "invalid coins"<br> - "out of gas"<br><br><br>"Tx Failure" is the the catch all bucket | Counter |
| cosmos_relayer_block_query_errors_total | The total number of block query failures. The failures are separated into two categories:<br> - "RPC Client"<br> - "IBC Header" | Counter |
| cosmos_relayer_client_expiration_seconds | Seconds until the client expires | Gauge |
| cosmos_relayer_client_trusting_period_seconds | The trusting period (in seconds) of the client | Gauge |
| cosmos_relayer_tx_failure | <br>The total number of tx failures broken up into categories:<br> - "packet messages are redundant"<br> - "insufficient funds"<br> - "invalid coins"<br> - "out of gas"<br><br><br>"Tx Failure" is the the catch all bucket | Counter |
| cosmos_relayer_block_query_errors_total | The total number of block query failures. The failures are separated into two categories:<br> - "RPC Client"<br> - "IBC Header" | Counter |
| cosmos_relayer_client_expiration_seconds | Seconds until the client expires | Gauge |
| cosmos_relayer_client_trusting_period_seconds | The trusting period (in seconds) of the client | Gauge |
| cosmos_relayer_unrelayed_packets | Current number of unrelayed packet sequences on a specific path and channel. This is updated after each flush (default is 5 min) | Gauge |
| cosmos_relayer_unrelayed_acks | Current number of unrelayed acknoledgment sequences on a specific path and channel. This is updated after each flush (default is 5 min) | Gauge |




Expand Down
19 changes: 19 additions & 0 deletions relayer/processor/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ type PrometheusMetrics struct {
BlockQueryFailure *prometheus.CounterVec
ClientExpiration *prometheus.GaugeVec
ClientTrustingPeriod *prometheus.GaugeVec
UnrelayedPackets *prometheus.GaugeVec
UnrelayedAcks *prometheus.GaugeVec
}

func (m *PrometheusMetrics) AddPacketsObserved(pathName, chain, channel, port, eventType string, count int) {
Expand Down Expand Up @@ -56,6 +58,14 @@ func (m *PrometheusMetrics) IncTxFailure(pathName, chain, errDesc string) {
m.TxFailureError.WithLabelValues(pathName, chain, errDesc).Inc()
}

func (m *PrometheusMetrics) SetUnrelayedPackets(pathName, srcChain, destChain, srcChannel, destChannel string, unrelayedPackets int) {
m.UnrelayedPackets.WithLabelValues(pathName, srcChain, destChain, srcChannel, destChannel).Set(float64(unrelayedPackets))
}

func (m *PrometheusMetrics) SetUnrelayedAcks(pathName, srcChain, destChain, srcChannel, destChannel string, UnrelayedAcks int) {
m.UnrelayedAcks.WithLabelValues(pathName, srcChain, destChain, srcChannel, destChannel).Set(float64(UnrelayedAcks))
}

func NewPrometheusMetrics() *PrometheusMetrics {
packetLabels := []string{"path_name", "chain", "channel", "port", "type"}
heightLabels := []string{"chain"}
Expand All @@ -64,6 +74,7 @@ func NewPrometheusMetrics() *PrometheusMetrics {
walletLabels := []string{"chain", "gas_price", "key", "address", "denom"}
clientExpirationLables := []string{"path_name", "chain", "client_id", "trusting_period"}
clientTrustingPeriodLables := []string{"path_name", "chain", "client_id"}
unrelayedSeqsLabels := []string{"path_name", "src_chain", "dest_chain", "src_channel", "dest_channel"}
registry := prometheus.NewRegistry()
registerer := promauto.With(registry)
return &PrometheusMetrics{
Expand Down Expand Up @@ -104,5 +115,13 @@ func NewPrometheusMetrics() *PrometheusMetrics {
Name: "cosmos_relayer_client_trusting_period_seconds",
Help: "The trusting period (in seconds) of the client",
}, clientTrustingPeriodLables),
UnrelayedPackets: registerer.NewGaugeVec(prometheus.GaugeOpts{
Name: "cosmos_relayer_unrelayed_packets",
Help: "Current number of unrelayed packets on both the source and destination chains for a specific path and channel",
}, unrelayedSeqsLabels),
UnrelayedAcks: registerer.NewGaugeVec(prometheus.GaugeOpts{
Name: "cosmos_relayer_unrelayed_acks",
Help: "Current number of unrelayed acknowledgements on both the source and destination chains for a specific path and channel",
}, unrelayedSeqsLabels),
}
}
12 changes: 12 additions & 0 deletions relayer/processor/path_processor_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,10 @@ func (pp *PathProcessor) queuePendingRecvAndAcks(

if len(seqs) == 0 {
src.log.Debug("Nothing to flush", zap.String("channel", k.ChannelID), zap.String("port", k.PortID))
if pp.metrics != nil {
pp.metrics.SetUnrelayedPackets(pp.pathEnd1.info.PathName, src.info.ChainID, dst.info.ChainID, k.ChannelID, k.CounterpartyChannelID, 0)
pp.metrics.SetUnrelayedAcks(pp.pathEnd1.info.PathName, src.info.ChainID, dst.info.ChainID, k.ChannelID, k.CounterpartyChannelID, 0)
}
return nil, nil
}

Expand All @@ -1214,6 +1218,10 @@ func (pp *PathProcessor) queuePendingRecvAndAcks(
return nil, err
}

if pp.metrics != nil {
pp.metrics.SetUnrelayedPackets(pp.pathEnd1.info.PathName, src.info.ChainID, dst.info.ChainID, k.ChannelID, k.CounterpartyChannelID, len(unrecv))
}

dstHeight := int64(dst.latestBlock.Height)

var order chantypes.Order
Expand Down Expand Up @@ -1327,6 +1335,10 @@ SeqLoop:
unacked = append(unacked, seq)
}

if pp.metrics != nil {
pp.metrics.SetUnrelayedAcks(pp.pathEnd1.info.PathName, src.info.ChainID, dst.info.ChainID, k.ChannelID, k.CounterpartyChannelID, len(unacked))
}

for i, seq := range unacked {
dstMu.Lock()
ck := k.Counterparty()
Expand Down
Loading