Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configurable range for histogram buckets for latency metrics #3382

Merged
merged 16 commits into from
Jun 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
- Add two new configurations for the telemetry `buckets`:
- `latency_submitted` used to specify the range and number of
buckets for the `tx_latency_submitted` metric.
- `latency_confirmed` used to specify the range and number of
buckets for the `tx_latency_confirmed` metric.
([#3366](https://github.com/informalsystems/hermes/issues/3366))
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,19 @@ host = '127.0.0.1'
# by the telemetry service. Default: 3001
port = 3001

[telemetry.buckets]
# Specify the range of the 10 histogram buckets in ms for the `tx_latency_submitted` metric.
# Default: { start = 500, end = 10000, buckets = 10 }
# The default will give the following buckets:
# [500, 2450, 4400, 6350, 8300, 10250, 12200, 14150, 16100, 18050, 20000]
# latency_submitted = { start = 500, end = 20000, buckets = 10 }

# Specify the range of the 10 histogram buckets in ms for the `tx_latency_confirmed` metric.
# Default: { start = 1000, end = 20000, buckets = 10 }
# The default will give the following buckets:
# [1000, 3900, 6800, 9700, 12600, 15500, 18400, 21300, 24200, 27100, 30000]
# latency_confirmed = { start = 1000, end = 30000, buckets = 10 }


# A chains section includes parameters related to a chain and the full node to which
# the relayer can send transactions and queries.
Expand Down
7 changes: 6 additions & 1 deletion crates/relayer-cli/src/commands/start.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,12 @@ fn spawn_telemetry_server(config: &Config) {

let _span = tracing::error_span!("telemetry").entered();

let state = ibc_telemetry::global();
let state = ibc_telemetry::init(
config.telemetry.buckets.latency_submitted.range.clone(),
config.telemetry.buckets.latency_submitted.buckets,
config.telemetry.buckets.latency_confirmed.range.clone(),
config.telemetry.buckets.latency_confirmed.buckets,
);
let telemetry = config.telemetry.clone();

if !telemetry.enabled {
Expand Down
98 changes: 98 additions & 0 deletions crates/relayer/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::{
fs,
fs::File,
io::Write,
ops::Range,
path::{Path, PathBuf},
};
use tendermint::block::Height as BlockHeight;
Expand Down Expand Up @@ -204,6 +205,26 @@ pub mod default {
pub fn max_grpc_decoding_size() -> Byte {
Byte::from_bytes(33554432)
}

pub fn latency_submitted() -> HistogramConfig {
HistogramConfig {
range: Range {
start: 500,
end: 20000,
},
buckets: 10,
}
}

pub fn latency_confirmed() -> HistogramConfig {
HistogramConfig {
range: Range {
start: 1000,
end: 30000,
},
buckets: 10,
}
}
}

#[derive(Clone, Debug, Default, Deserialize, Serialize)]
Expand Down Expand Up @@ -385,6 +406,60 @@ pub struct TelemetryConfig {
pub enabled: bool,
pub host: String,
pub port: u16,
#[serde(default = "HistogramBuckets::default")]
pub buckets: HistogramBuckets,
}

#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct HistogramBuckets {
#[serde(default = "default::latency_submitted")]
pub latency_submitted: HistogramConfig,
#[serde(default = "default::latency_confirmed")]
pub latency_confirmed: HistogramConfig,
}

impl Default for HistogramBuckets {
fn default() -> Self {
Self {
latency_submitted: default::latency_submitted(),
latency_confirmed: default::latency_confirmed(),
}
}
}

#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(try_from = "HistogramRangeUnchecked")]
pub struct HistogramConfig {
#[serde(flatten)]
pub range: Range<u64>,
pub buckets: u64,
}

impl TryFrom<HistogramRangeUnchecked> for HistogramConfig {
type Error = String;

fn try_from(value: HistogramRangeUnchecked) -> Result<Self, Self::Error> {
if value.start > value.end {
return Err(format!(
"histogram range min `{}` must be smaller or equal than max `{}`",
value.start, value.end
));
}
Ok(Self {
range: Range {
start: value.start,
end: value.end,
},
buckets: value.buckets,
})
}
}

#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct HistogramRangeUnchecked {
start: u64,
end: u64,
buckets: u64,
}

/// Default values for the telemetry configuration.
Expand All @@ -396,6 +471,7 @@ impl Default for TelemetryConfig {
enabled: false,
host: "127.0.0.1".to_string(),
port: 3001,
buckets: HistogramBuckets::default(),
}
}
}
Expand Down Expand Up @@ -661,6 +737,28 @@ mod tests {
dbg!(config);
}

#[test]
fn parse_valid_telemetry() {
let path = concat!(
env!("CARGO_MANIFEST_DIR"),
"/tests/config/fixtures/relayer_conf_example_valid_telemetry.toml"
);

let config = load(path).expect("could not parse config");

dbg!(config);
}

#[test]
fn parse_invalid_telemetry() {
let path = concat!(
env!("CARGO_MANIFEST_DIR"),
"/tests/config/fixtures/relayer_conf_example_invalid_telemetry.toml"
);

assert!(load(path).is_err());
}

#[test]
fn serialize_valid_config() {
let path = concat!(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
[global]
log_level = 'error'

[mode]

[mode.clients]
enabled = true
refresh = true
misbehaviour = true

[mode.connections]
enabled = false

[mode.channels]
enabled = false

[mode.packets]
enabled = true
clear_interval = 100
clear_on_start = true
tx_confirmation = true

[telemetry]
enabled = true
host = '127.0.0.1'
port = 3001

[telemetry.buckets]
latency_submitted = { start = 5000, end = 1000, buckets = 10 } # start can't be smaller than end
latency_confirmed = { start = 5000, end = 10000, buckets = 10 }

[[chains]]
id = 'chain_A'
rpc_addr = 'http://127.0.0.1:26657'
grpc_addr = 'http://127.0.0.1:9090'
event_source = { mode = 'push', url = 'ws://localhost:26657/websocket' }
rpc_timeout = '10s'
account_prefix = 'cosmos'
key_name = 'testkey'
store_prefix = 'ibc'
max_gas = 200000
gas_price = { price = 0.001, denom = 'stake' }
max_msg_num = 4
max_tx_size = 1048576
clock_drift = '5s'
trusting_period = '14days'
trust_threshold = { numerator = '1', denominator = '3' }
address_type = { derivation = 'cosmos' }

[chains.packet_filter]
policy = 'allow'
list = [
['ica*', '*'],
['transfer', 'channel-0'],
]

[[chains]]
id = 'chain_B'
rpc_addr = 'http://127.0.0.1:26557'
grpc_addr = 'http://127.0.0.1:9090'
event_source = { mode = 'push', url = 'ws://localhost:26557/websocket' }
rpc_timeout = '10s'
account_prefix = 'cosmos'
key_name = 'testkey'
store_prefix = 'ibc'
gas_price = { price = 0.001, denom = 'stake' }
clock_drift = '5s'
trusting_period = '14days'
trust_threshold = { numerator = '1', denominator = '3' }
address_type = { derivation = 'ethermint', proto_type = { pk_type = '/injective.crypto.v1beta1.ethsecp256k1.PubKey' } }
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
[global]
log_level = 'error'

[mode]

[mode.clients]
enabled = true
refresh = true
misbehaviour = true

[mode.connections]
enabled = false

[mode.channels]
enabled = false

[mode.packets]
enabled = true
clear_interval = 100
clear_on_start = true
tx_confirmation = true

[telemetry]
enabled = true
host = '127.0.0.1'
port = 3001

[telemetry.buckets]
latency_submitted = { start = 5000, end = 10000, buckets = 10 }
latency_confirmed = { start = 5000, end = 10000, buckets = 10 }

[[chains]]
id = 'chain_A'
rpc_addr = 'http://127.0.0.1:26657'
grpc_addr = 'http://127.0.0.1:9090'
event_source = { mode = 'push', url = 'ws://localhost:26657/websocket' }
rpc_timeout = '10s'
account_prefix = 'cosmos'
key_name = 'testkey'
store_prefix = 'ibc'
max_gas = 200000
gas_price = { price = 0.001, denom = 'stake' }
max_msg_num = 4
max_tx_size = 1048576
clock_drift = '5s'
trusting_period = '14days'
trust_threshold = { numerator = '1', denominator = '3' }
address_type = { derivation = 'cosmos' }

[chains.packet_filter]
policy = 'allow'
list = [
['ica*', '*'],
['transfer', 'channel-0'],
]

[[chains]]
id = 'chain_B'
rpc_addr = 'http://127.0.0.1:26557'
grpc_addr = 'http://127.0.0.1:9090'
event_source = { mode = 'push', url = 'ws://localhost:26557/websocket' }
rpc_timeout = '10s'
account_prefix = 'cosmos'
key_name = 'testkey'
store_prefix = 'ibc'
gas_price = { price = 0.001, denom = 'stake' }
clock_drift = '5s'
trusting_period = '14days'
trust_threshold = { numerator = '1', denominator = '3' }
address_type = { derivation = 'ethermint', proto_type = { pk_type = '/injective.crypto.v1beta1.ethsecp256k1.PubKey' } }
1 change: 1 addition & 0 deletions crates/telemetry/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ serde_json = "1.0.94"
serde = "1.0.164"
axum = "0.6.18"
tokio = "1.26.0"
tracing = "0.1.36"

[dependencies.tendermint]
version = "0.32.0"
Expand Down
Loading