From 26f2728a7b7fae58463733c63afd883bc6b3c1ec Mon Sep 17 00:00:00 2001 From: Andy Nogueira Date: Thu, 13 May 2021 16:29:52 -0400 Subject: [PATCH 01/26] Initial telemetry support implementation (#868) --- Cargo.lock | 66 +++++++++++++++++++++++ relayer/Cargo.toml | 16 ++++++ relayer/src/config.rs | 6 +++ relayer/src/lib.rs | 4 ++ relayer/src/supervisor.rs | 17 ++++++ relayer/src/telemetry.rs | 2 + relayer/src/telemetry/relayer_state.rs | 7 +++ relayer/src/telemetry/service.rs | 75 ++++++++++++++++++++++++++ 8 files changed, 193 insertions(+) create mode 100644 relayer/src/telemetry.rs create mode 100644 relayer/src/telemetry/relayer_state.rs create mode 100644 relayer/src/telemetry/service.rs diff --git a/Cargo.lock b/Cargo.lock index c51e4bed20..074cd0bd09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -560,6 +560,16 @@ dependencies = [ "syn", ] +[[package]] +name = "dashmap" +version = "4.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e77a43b28d0668df09411cb0bc9a8c2adc40f9a048afe863e05fd43251e8e39c" +dependencies = [ + "cfg-if 1.0.0", + "num_cpus", +] + [[package]] name = "der" version = "0.3.3" @@ -1244,10 +1254,15 @@ dependencies = [ "hdpath", "hex", "humantime-serde", + "hyper", "ibc", "ibc-proto", "itertools 0.10.0", "k256", + "lazy_static", + "opentelemetry", + "opentelemetry-prometheus", + "prometheus", "prost", "prost-types", "retry", @@ -1684,6 +1699,36 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "opentelemetry" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "492848ff47f11b7f9de0443b404e2c5775f695e1af6b7076ca25f999581d547a" +dependencies = [ + "async-trait", + "crossbeam-channel 0.5.1", + "dashmap", + "fnv", + "futures", + "js-sys", + "lazy_static", + "percent-encoding", + "pin-project", + "rand 0.8.3", + "thiserror", +] + +[[package]] +name = "opentelemetry-prometheus" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f41760047df46012aaf2bb87fec0efed4d97f7a6af6825858c3b4d9438dadb94" +dependencies = [ + "opentelemetry", + "prometheus", + "protobuf", +] + [[package]] name = "os_str_bytes" version = "2.4.0" @@ -1838,6 +1883,21 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "prometheus" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5986aa8d62380092d2f50f8b1cdba9cb9b6731ffd4b25b51fd126b6c3e05b99c" +dependencies = [ + "cfg-if 1.0.0", + "fnv", + "lazy_static", + "memchr", + "parking_lot", + "protobuf", + "thiserror", +] + [[package]] name = "prost" version = "0.7.0" @@ -1871,6 +1931,12 @@ dependencies = [ "prost", ] +[[package]] +name = "protobuf" +version = "2.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45604fc7a88158e7d514d8e22e14ac746081e7a70d7690074dd0029ee37458d6" + [[package]] name = "quote" version = "1.0.9" diff --git a/relayer/Cargo.toml b/relayer/Cargo.toml index 20c26ce4d6..f0303401a2 100644 --- a/relayer/Cargo.toml +++ b/relayer/Cargo.toml @@ -72,3 +72,19 @@ ibc = { version = "0.3.0", path = "../modules", features = ["mocks"] } # Needed for generating (synthetic) light blocks. tendermint-testgen = { version = "=0.19.0" } + +# Dependencies needed for telemetry support +[dependencies.opentelemetry] +version = "0.14.0" + +[dependencies.opentelemetry-prometheus] +version = "0.7.0" + +[dependencies.hyper] +version = "0.14.7" + +[dependencies.lazy_static] +version = "1.4.0" + +[dependencies.prometheus] +version = "0.12.0" diff --git a/relayer/src/config.rs b/relayer/src/config.rs index 6f26e62be3..b6329690cd 100644 --- a/relayer/src/config.rs +++ b/relayer/src/config.rs @@ -87,6 +87,10 @@ pub struct GlobalConfig { /// All valid log levels, as defined in tracing: /// https://docs.rs/tracing-core/0.1.17/tracing_core/struct.Level.html pub log_level: String, + + pub telemetry_enabled: bool, + + pub telemetry_port: u16, } impl Default for GlobalConfig { @@ -94,6 +98,8 @@ impl Default for GlobalConfig { Self { strategy: Strategy::default(), log_level: "info".to_string(), + telemetry_enabled: true, + telemetry_port: 3000, } } } diff --git a/relayer/src/lib.rs b/relayer/src/lib.rs index 687695c5f5..e1a22eabf0 100644 --- a/relayer/src/lib.rs +++ b/relayer/src/lib.rs @@ -13,6 +13,9 @@ //! //! [Hermes]: https://docs.rs/ibc-relayer-cli/0.2.0/ +#[macro_use] +extern crate lazy_static; + pub mod chain; pub mod channel; pub mod config; @@ -28,6 +31,7 @@ pub mod object; pub mod registry; pub mod relay; pub mod supervisor; +pub mod telemetry; pub mod transfer; pub mod upgrade_chain; pub mod util; diff --git a/relayer/src/supervisor.rs b/relayer/src/supervisor.rs index 5692e48abd..85b304994d 100644 --- a/relayer/src/supervisor.rs +++ b/relayer/src/supervisor.rs @@ -29,6 +29,7 @@ use crate::{ }; mod error; +use crate::telemetry::service::TelemetryService; pub use error::Error; /// The supervisor listens for events on multiple pairs of chains, @@ -38,6 +39,7 @@ pub struct Supervisor { config: Config, registry: Registry, workers: HashMap, + telemetry: Option, } impl Supervisor { @@ -45,10 +47,25 @@ impl Supervisor { pub fn spawn(config: Config) -> Result { let registry = Registry::new(config.clone()); + // Start the telemetry service + let telemetry = match config.global.telemetry_enabled { + true => { + println!( + "TELEMETRY ENABLED ON PORT: {:?}", + config.global.telemetry_port + ); + Some(TelemetryService { + listen_port: config.global.telemetry_port, + }) + } + false => None, + }; + Ok(Self { config, registry, workers: HashMap::new(), + telemetry, }) } diff --git a/relayer/src/telemetry.rs b/relayer/src/telemetry.rs new file mode 100644 index 0000000000..29f058ecde --- /dev/null +++ b/relayer/src/telemetry.rs @@ -0,0 +1,2 @@ +pub mod relayer_state; +pub mod service; diff --git a/relayer/src/telemetry/relayer_state.rs b/relayer/src/telemetry/relayer_state.rs new file mode 100644 index 0000000000..709df64e3c --- /dev/null +++ b/relayer/src/telemetry/relayer_state.rs @@ -0,0 +1,7 @@ +use opentelemetry::metrics::BoundCounter; +use opentelemetry_prometheus::PrometheusExporter; + +pub struct RelayerState { + pub exporter: PrometheusExporter, + pub tx_counter: BoundCounter<'static, u64>, +} diff --git a/relayer/src/telemetry/service.rs b/relayer/src/telemetry/service.rs new file mode 100644 index 0000000000..a54c29ef81 --- /dev/null +++ b/relayer/src/telemetry/service.rs @@ -0,0 +1,75 @@ +use hyper::{ + header::CONTENT_TYPE, + service::{make_service_fn, service_fn}, + Body, Request, Response, Server, +}; + +use crate::telemetry::relayer_state::RelayerState; +use opentelemetry::{global, KeyValue}; +use prometheus::{Encoder, TextEncoder}; +use std::convert::Infallible; +use std::sync::Arc; + +lazy_static! { + static ref HANDLER_ALL: [KeyValue; 1] = [KeyValue::new("handler", "all")]; +} + +pub struct TelemetryService { + pub(crate) listen_port: u16, +} + +async fn serve_req( + _req: Request, + state: Arc, +) -> Result, hyper::Error> { + let mut buffer = vec![]; + let encoder = TextEncoder::new(); + let metric_families = state.exporter.registry().gather(); + encoder.encode(&metric_families, &mut buffer).unwrap(); + + state.tx_counter.add(1); + + let response = Response::builder() + .status(200) + .header(CONTENT_TYPE, encoder.format_type()) + .body(Body::from(buffer)) + .unwrap(); + + Ok(response) +} + +impl TelemetryService { + pub async fn run(self) -> Result> { + let exporter = opentelemetry_prometheus::exporter().init(); + + let meter = global::meter("hermes/relayer"); + let state = Arc::new(RelayerState { + exporter, + tx_counter: meter + .u64_counter("hermes.tx_count") + .with_description("Total number of transactions processed via the relayer.") + .init() + .bind(HANDLER_ALL.as_ref()), + }); + + // For every connection, we must make a `Service` to handle all + // incoming HTTP requests on said connection. + let make_svc = make_service_fn(move |_conn| { + let state = state.clone(); + // This is the `Service` that will handle the connection. + // `service_fn` is a helper to convert a function that + // returns a Response into a `Service`. + async move { Ok::<_, Infallible>(service_fn(move |req| serve_req(req, state.clone()))) } + }); + + let addr = ([127, 0, 0, 1], self.listen_port).into(); + + let server = Server::bind(&addr).serve(make_svc); + + println!("Telemetry service listening on http://{}", addr); + + server.await?; + + Ok(self) + } +} From ec75bbefecde756e95e61e2af8743ee8f8fb259e Mon Sep 17 00:00:00 2001 From: Andy Nogueira Date: Wed, 19 May 2021 16:05:29 -0400 Subject: [PATCH 02/26] Refactored code for state and service. Replaced hyper with rouille (#868) --- Cargo.lock | 556 +++++++++++++++++++++++-- relayer/Cargo.toml | 23 +- relayer/src/lib.rs | 3 + relayer/src/telemetry.rs | 2 +- relayer/src/telemetry/relayer_state.rs | 7 - relayer/src/telemetry/service.rs | 89 ++-- relayer/src/telemetry/state.rs | 31 ++ 7 files changed, 585 insertions(+), 126 deletions(-) delete mode 100644 relayer/src/telemetry/relayer_state.rs create mode 100644 relayer/src/telemetry/state.rs diff --git a/Cargo.lock b/Cargo.lock index a1ad160f2b..dd1cd3f7e8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,6 +57,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "adler32" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" + [[package]] name = "aho-corasick" version = "0.7.18" @@ -100,12 +106,24 @@ version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28b2cd92db5cbd74e8e5028f7e27dd7aa3090e89e4f2a197cc7c8dfb69c7063b" +[[package]] +name = "arrayref" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" + [[package]] name = "arrayvec" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" +[[package]] +name = "ascii" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbf56136a5198c7b01a49e3afcbef6cf84597273d298f54432926024107b0109" + [[package]] name = "async-stream" version = "0.3.1" @@ -146,7 +164,7 @@ checksum = "e00550829ef8e2c4115250d0ee43305649b0fa95f78a32ce5b07da0b73d95c5c" dependencies = [ "futures-io", "futures-util", - "log", + "log 0.4.14", "pin-project-lite", "tokio", "tokio-rustls", @@ -165,6 +183,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "autocfg" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" + [[package]] name = "autocfg" version = "1.0.1" @@ -187,6 +211,15 @@ dependencies = [ "serde", ] +[[package]] +name = "base64" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e" +dependencies = [ + "byteorder", +] + [[package]] name = "base64" version = "0.13.0" @@ -256,6 +289,17 @@ dependencies = [ "wyz", ] +[[package]] +name = "blake2b_simd" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587" +dependencies = [ + "arrayref", + "arrayvec", + "constant_time_eq", +] + [[package]] name = "block-buffer" version = "0.9.0" @@ -272,6 +316,36 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d696c370c750c948ada61c69a0ee2cbbb9c50b1019ddb86d9317157a99c2cae" +[[package]] +name = "brotli-sys" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4445dea95f4c2b41cde57cc9fee236ae4dbae88d8fcbdb4750fc1bb5d86aaecd" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "brotli2" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cb036c3eade309815c15ddbacec5b22c4d1f3983a774ab2eac2e3e9ea85568e" +dependencies = [ + "brotli-sys", + "libc", +] + +[[package]] +name = "buf_redux" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b953a6887648bb07a535631f2bc00fbdb2a2216f135552cb3f534ed136b9c07f" +dependencies = [ + "memchr", + "safemem", +] + [[package]] name = "bumpalo" version = "3.6.1" @@ -328,6 +402,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "chunked_transfer" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fff857943da45f546682664a79488be82e69e43c1a7a2307679ab9afb3a66d2e" + [[package]] name = "clap" version = "3.0.0-beta.2" @@ -360,6 +440,15 @@ dependencies = [ "syn", ] +[[package]] +name = "cloudabi" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" +dependencies = [ + "bitflags", +] + [[package]] name = "color-backtrace" version = "0.3.0" @@ -377,6 +466,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "279bc8fc53f788a75c7804af68237d1fce02cde1e275a886a4b320604dc2aeda" +[[package]] +name = "constant_time_eq" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" + [[package]] name = "contracts" version = "0.4.0" @@ -467,7 +562,7 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" dependencies = [ - "autocfg", + "autocfg 1.0.1", "cfg-if 0.1.10", "lazy_static", ] @@ -478,7 +573,7 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4feb231f0d4d6af81aed15928e58ecf5816aa62a2393e2c82f46973e92a9a278" dependencies = [ - "autocfg", + "autocfg 1.0.1", "cfg-if 1.0.0", "lazy_static", ] @@ -570,6 +665,17 @@ dependencies = [ "num_cpus", ] +[[package]] +name = "deflate" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707b6a7b384888a70c8d2e8650b3e60170dfc6a67bb4aa67b6dfca57af4bedb4" +dependencies = [ + "adler32", + "byteorder", + "gzip-header", +] + [[package]] name = "der" version = "0.3.3" @@ -601,6 +707,17 @@ dependencies = [ "generic-array", ] +[[package]] +name = "dirs" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901" +dependencies = [ + "libc", + "redox_users 0.3.5", + "winapi", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -618,7 +735,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ "libc", - "redox_users", + "redox_users 0.4.0", "winapi", ] @@ -719,6 +836,18 @@ dependencies = [ "subtle", ] +[[package]] +name = "filetime" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d34cfa13a63ae058bfa601fe9e313bbdb3746427c1459185464ce0fcf62e1e8" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "redox_syscall 0.2.8", + "winapi", +] + [[package]] name = "fnv" version = "1.0.7" @@ -760,6 +889,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + [[package]] name = "funty" version = "1.1.0" @@ -820,7 +955,7 @@ version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4c40298486cdf52cc00cd6d6987892ba502c7656a16a4192a9992b1ccedd121" dependencies = [ - "autocfg", + "autocfg 1.0.1", "proc-macro-hack", "proc-macro2", "quote", @@ -845,7 +980,7 @@ version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "feb5c238d27e2bf94ffdfd27b2c29e3df4a68c4193bb6427384259e2bf191967" dependencies = [ - "autocfg", + "autocfg 1.0.1", "futures-channel", "futures-core", "futures-io", @@ -885,7 +1020,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" dependencies = [ "typenum", - "version_check", + "version_check 0.9.3", ] [[package]] @@ -967,6 +1102,15 @@ dependencies = [ "syn", ] +[[package]] +name = "gzip-header" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0131feb3d3bb2a5a238d8a4d09f6353b7ebfdc52e77bccbf4ea6eaa751dde639" +dependencies = [ + "crc32fast", +] + [[package]] name = "h2" version = "0.3.3" @@ -1014,12 +1158,12 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0b7591fb62902706ae8e7aaff416b1b0fa2c0fd0878b46dc13baa3712d8a855" dependencies = [ - "base64", + "base64 0.13.0", "bitflags", "bytes", "headers-core", "http", - "mime", + "mime 0.3.16", "sha-1", "time", ] @@ -1178,7 +1322,7 @@ dependencies = [ "ct-logs", "futures-util", "hyper", - "log", + "log 0.4.14", "rustls", "rustls-native-certs", "tokio", @@ -1256,7 +1400,6 @@ dependencies = [ "hdpath", "hex", "humantime-serde", - "hyper", "ibc", "ibc-proto", "itertools 0.10.0", @@ -1269,6 +1412,7 @@ dependencies = [ "prost-types", "retry", "ripemd160", + "rouille", "serde", "serde_cbor", "serde_derive", @@ -1363,7 +1507,7 @@ version = "1.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "824845a0bf897a9042383849b02c1bc219c2383772efcd5c6f9766fa4b81aef3" dependencies = [ - "autocfg", + "autocfg 1.0.1", "hashbrown", ] @@ -1470,6 +1614,15 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "log" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b" +dependencies = [ + "log 0.4.14", +] + [[package]] name = "log" version = "0.4.14" @@ -1512,7 +1665,16 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f83fb6581e8ed1f85fd45c116db8405483899489e38406156c25eb743554361d" dependencies = [ - "autocfg", + "autocfg 1.0.1", +] + +[[package]] +name = "mime" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba626b8a6de5da682e1caa06bdb42a335aee5a84db8e5046a3e8ab17ba0a3ae0" +dependencies = [ + "log 0.3.9", ] [[package]] @@ -1521,6 +1683,18 @@ version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" +[[package]] +name = "mime_guess" +version = "1.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "216929a5ee4dd316b1702eedf5e74548c123d370f47841ceaac38ca154690ca3" +dependencies = [ + "mime 0.2.6", + "phf", + "phf_codegen", + "unicase", +] + [[package]] name = "miniz_oxide" version = "0.4.4" @@ -1528,7 +1702,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" dependencies = [ "adler", - "autocfg", + "autocfg 1.0.1", ] [[package]] @@ -1538,7 +1712,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956" dependencies = [ "libc", - "log", + "log 0.4.14", "miow", "ntapi", "winapi", @@ -1567,6 +1741,24 @@ dependencies = [ "tracing-subscriber 0.2.18", ] +[[package]] +name = "multipart" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136eed74cadb9edd2651ffba732b19a450316b680e4f48d6c79e905799e19d01" +dependencies = [ + "buf_redux", + "httparse", + "log 0.4.14", + "mime 0.2.6", + "mime_guess", + "quick-error", + "rand 0.6.5", + "safemem", + "tempfile", + "twoway", +] + [[package]] name = "native-tls" version = "0.2.7" @@ -1575,7 +1767,7 @@ checksum = "b8d96b2e1c8da3957d58100b09f102c6d9cfdfced01b7ec5a8974044bb09dbd4" dependencies = [ "lazy_static", "libc", - "log", + "log 0.4.14", "openssl", "openssl-probe", "openssl-sys", @@ -1595,7 +1787,7 @@ dependencies = [ "funty", "lexical-core", "memchr", - "version_check", + "version_check 0.9.3", ] [[package]] @@ -1624,7 +1816,7 @@ version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" dependencies = [ - "autocfg", + "autocfg 1.0.1", "num-traits", ] @@ -1634,7 +1826,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" dependencies = [ - "autocfg", + "autocfg 1.0.1", ] [[package]] @@ -1694,7 +1886,7 @@ version = "0.9.63" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6b0d6fb7d80f877617dfcb014e605e2b5ab2fb0afdf27935219bb6bd984cb98" dependencies = [ - "autocfg", + "autocfg 1.0.1", "cc", "libc", "pkg-config", @@ -1766,7 +1958,7 @@ dependencies = [ "cfg-if 1.0.0", "instant", "libc", - "redox_syscall", + "redox_syscall 0.2.8", "smallvec 1.6.1", "winapi", ] @@ -1786,6 +1978,45 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" +[[package]] +name = "phf" +version = "0.7.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3da44b85f8e8dfaec21adae67f95d93244b2ecf6ad2a692320598dcc8e6dd18" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.7.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b03e85129e324ad4166b06b2c7491ae27fe3ec353af72e72cd1654c7225d517e" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.7.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09364cc93c159b8b06b1f4dd8a4398984503483891b0c26b867cf431fb132662" +dependencies = [ + "phf_shared", + "rand 0.6.5", +] + +[[package]] +name = "phf_shared" +version = "0.7.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234f71a15de2288bcb7e3b6515828d22af7ec8598ee6d24c3b526fa0a80b67a0" +dependencies = [ + "siphasher", + "unicase", +] + [[package]] name = "pin-project" version = "1.0.7" @@ -1850,7 +2081,7 @@ dependencies = [ "proc-macro2", "quote", "syn", - "version_check", + "version_check 0.9.3", ] [[package]] @@ -1861,7 +2092,7 @@ checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ "proc-macro2", "quote", - "version_check", + "version_check 0.9.3", ] [[package]] @@ -1939,6 +2170,12 @@ version = "2.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45604fc7a88158e7d514d8e22e14ac746081e7a70d7690074dd0029ee37458d6" +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" version = "1.0.9" @@ -1960,6 +2197,25 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "643f8f41a8ebc4c5dc4515c82bb8abd397b527fc20fd681b7c011c2aee5d44fb" +[[package]] +name = "rand" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca" +dependencies = [ + "autocfg 0.1.7", + "libc", + "rand_chacha 0.1.1", + "rand_core 0.4.2", + "rand_hc 0.1.0", + "rand_isaac", + "rand_jitter", + "rand_os", + "rand_pcg", + "rand_xorshift", + "winapi", +] + [[package]] name = "rand" version = "0.7.3" @@ -1985,6 +2241,16 @@ dependencies = [ "rand_hc 0.3.0", ] +[[package]] +name = "rand_chacha" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef" +dependencies = [ + "autocfg 0.1.7", + "rand_core 0.3.1", +] + [[package]] name = "rand_chacha" version = "0.2.2" @@ -2005,6 +2271,21 @@ dependencies = [ "rand_core 0.6.2", ] +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", +] + +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + [[package]] name = "rand_core" version = "0.5.1" @@ -2023,6 +2304,15 @@ dependencies = [ "getrandom 0.2.2", ] +[[package]] +name = "rand_hc" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4" +dependencies = [ + "rand_core 0.3.1", +] + [[package]] name = "rand_hc" version = "0.2.0" @@ -2041,6 +2331,74 @@ dependencies = [ "rand_core 0.6.2", ] +[[package]] +name = "rand_isaac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "rand_jitter" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b" +dependencies = [ + "libc", + "rand_core 0.4.2", + "winapi", +] + +[[package]] +name = "rand_os" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071" +dependencies = [ + "cloudabi", + "fuchsia-cprng", + "libc", + "rand_core 0.4.2", + "rdrand", + "winapi", +] + +[[package]] +name = "rand_pcg" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44" +dependencies = [ + "autocfg 0.1.7", + "rand_core 0.4.2", +] + +[[package]] +name = "rand_xorshift" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "redox_syscall" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" + [[package]] name = "redox_syscall" version = "0.2.8" @@ -2050,6 +2408,17 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de0737333e7a9502c789a36d7c7fa6092a49895d4faa31ca5df163857ded2e9d" +dependencies = [ + "getrandom 0.1.16", + "redox_syscall 0.1.57", + "rust-argon2", +] + [[package]] name = "redox_users" version = "0.4.0" @@ -2057,7 +2426,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" dependencies = [ "getrandom 0.2.2", - "redox_syscall", + "redox_syscall 0.2.8", ] [[package]] @@ -2128,6 +2497,44 @@ dependencies = [ "opaque-debug", ] +[[package]] +name = "rouille" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cfaebc11a52b7415f07e69f18f8240a0ea5eedf0dcb888c5fb7b432e7b4729b" +dependencies = [ + "base64 0.10.1", + "brotli2", + "chrono", + "deflate", + "filetime", + "multipart", + "num_cpus", + "percent-encoding", + "rand 0.7.3", + "serde", + "serde_derive", + "serde_json", + "sha1", + "term", + "threadpool", + "time", + "tiny_http", + "url", +] + +[[package]] +name = "rust-argon2" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b18820d944b33caa75a71378964ac46f58517c92b6ae5f762636247c09e78fb" +dependencies = [ + "base64 0.13.0", + "blake2b_simd", + "constant_time_eq", + "crossbeam-utils 0.8.4", +] + [[package]] name = "rustc-demangle" version = "0.1.19" @@ -2146,8 +2553,8 @@ version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35edb675feee39aec9c99fa5ff985081995a06d594114ae14cbe797ad7b7a6d7" dependencies = [ - "base64", - "log", + "base64 0.13.0", + "log 0.4.14", "ring", "sct", "webpki", @@ -2171,6 +2578,12 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +[[package]] +name = "safemem" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" + [[package]] name = "same-file" version = "1.0.6" @@ -2370,6 +2783,12 @@ dependencies = [ "opaque-debug", ] +[[package]] +name = "sha1" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2579985fda508104f7587689507983eadd6a6e84dd35d6d115361f530916fa0d" + [[package]] name = "sha2" version = "0.9.5" @@ -2439,6 +2858,12 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc47a29ce97772ca5c927f75bac34866b16d64e07f330c3248e2d7226623901b" +[[package]] +name = "siphasher" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac" + [[package]] name = "slab" version = "0.4.3" @@ -2457,7 +2882,7 @@ dependencies = [ "fs2", "fxhash", "libc", - "log", + "log 0.4.14", "parking_lot", ] @@ -2578,7 +3003,7 @@ dependencies = [ "cfg-if 1.0.0", "libc", "rand 0.8.3", - "redox_syscall", + "redox_syscall 0.2.8", "remove_dir_all", "winapi", ] @@ -2703,6 +3128,17 @@ dependencies = [ "tendermint", ] +[[package]] +name = "term" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd106a334b7657c10b7c540a0106114feadeb4dc314513e97df481d5d966f42" +dependencies = [ + "byteorder", + "dirs", + "winapi", +] + [[package]] name = "termcolor" version = "1.1.2" @@ -2750,6 +3186,15 @@ dependencies = [ "once_cell", ] +[[package]] +name = "threadpool" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" +dependencies = [ + "num_cpus", +] + [[package]] name = "time" version = "0.1.43" @@ -2778,6 +3223,19 @@ dependencies = [ "zeroize", ] +[[package]] +name = "tiny_http" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce51b50006056f590c9b7c3808c3bd70f0d1101666629713866c227d6e58d39" +dependencies = [ + "ascii", + "chrono", + "chunked_transfer", + "log 0.4.14", + "url", +] + [[package]] name = "tinyvec" version = "1.2.0" @@ -2799,7 +3257,7 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83f0c8e7c0addab50b663055baf787d0af7f413a46e6e7fb9559a4e4db7137a5" dependencies = [ - "autocfg", + "autocfg 1.0.1", "bytes", "libc", "memchr", @@ -2865,7 +3323,7 @@ dependencies = [ "bytes", "futures-core", "futures-sink", - "log", + "log 0.4.14", "pin-project-lite", "tokio", ] @@ -2887,7 +3345,7 @@ checksum = "2ac42cd97ac6bd2339af5bcabf105540e21e45636ec6fa6aae5e85d44db31be0" dependencies = [ "async-stream", "async-trait", - "base64", + "base64 0.13.0", "bytes", "futures-core", "futures-util", @@ -2947,7 +3405,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09adeb8c97449311ccd28a427f96fb563e7fd31aabf994189879d9da2394b89d" dependencies = [ "cfg-if 1.0.0", - "log", + "log 0.4.14", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -2990,7 +3448,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3" dependencies = [ "lazy_static", - "log", + "log 0.4.14", "tracing-core", ] @@ -3055,25 +3513,43 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ada8297e8d70872fa9a551d93250a9f407beb9f37ef86494eb20012a2ff7c24" dependencies = [ - "base64", + "base64 0.13.0", "byteorder", "bytes", "http", "httparse", "input_buffer", - "log", + "log 0.4.14", "rand 0.8.3", "sha-1", "url", "utf-8", ] +[[package]] +name = "twoway" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59b11b2b5241ba34be09c3cc85a36e56e48f9888862e19cedf23336d35316ed1" +dependencies = [ + "memchr", +] + [[package]] name = "typenum" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879f6906492a7cd215bfa4cf595b600146ccfac0c79bcbd1f3000162af5e8b06" +[[package]] +name = "unicase" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4765f83163b74f957c797ad9253caf97f103fb064d3999aea9568d09fc8a33" +dependencies = [ + "version_check 0.1.5", +] + [[package]] name = "unicode-bidi" version = "0.3.5" @@ -3152,6 +3628,12 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" +[[package]] +name = "version_check" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" + [[package]] name = "version_check" version = "0.9.3" @@ -3184,7 +3666,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" dependencies = [ - "log", + "log 0.4.14", "try-lock", ] @@ -3218,7 +3700,7 @@ checksum = "3b33f6a0694ccfea53d94db8b2ed1c3a8a4c86dd936b13b9f0a15ec4a451b900" dependencies = [ "bumpalo", "lazy_static", - "log", + "log 0.4.14", "proc-macro2", "quote", "syn", diff --git a/relayer/Cargo.toml b/relayer/Cargo.toml index f59c86cfa5..f6c8b92445 100644 --- a/relayer/Cargo.toml +++ b/relayer/Cargo.toml @@ -51,6 +51,11 @@ tonic = "0.4" dirs-next = "2.0.0" dyn-clone = "1.0.3" retry = { version = "1.2.1", default-features = false } +lazy_static = "1.4.0" +opentelemetry = "0.14.0" +opentelemetry-prometheus = "0.7.0" +prometheus = "0.12.0" +rouille = "3.1.1" [dependencies.tendermint] version = "=0.19.0" @@ -71,20 +76,4 @@ serial_test = "0.5.0" ibc = { version = "0.3.1", path = "../modules", features = ["mocks"] } # Needed for generating (synthetic) light blocks. -tendermint-testgen = { version = "=0.19.0" } - -# Dependencies needed for telemetry support -[dependencies.opentelemetry] -version = "0.14.0" - -[dependencies.opentelemetry-prometheus] -version = "0.7.0" - -[dependencies.hyper] -version = "0.14.7" - -[dependencies.lazy_static] -version = "1.4.0" - -[dependencies.prometheus] -version = "0.12.0" +tendermint-testgen = { version = "=0.19.0" } \ No newline at end of file diff --git a/relayer/src/lib.rs b/relayer/src/lib.rs index e1a22eabf0..313f9dcc27 100644 --- a/relayer/src/lib.rs +++ b/relayer/src/lib.rs @@ -16,6 +16,9 @@ #[macro_use] extern crate lazy_static; +#[macro_use] +extern crate rouille; + pub mod chain; pub mod channel; pub mod config; diff --git a/relayer/src/telemetry.rs b/relayer/src/telemetry.rs index 29f058ecde..23a758c564 100644 --- a/relayer/src/telemetry.rs +++ b/relayer/src/telemetry.rs @@ -1,2 +1,2 @@ -pub mod relayer_state; +pub mod state; pub mod service; diff --git a/relayer/src/telemetry/relayer_state.rs b/relayer/src/telemetry/relayer_state.rs deleted file mode 100644 index 709df64e3c..0000000000 --- a/relayer/src/telemetry/relayer_state.rs +++ /dev/null @@ -1,7 +0,0 @@ -use opentelemetry::metrics::BoundCounter; -use opentelemetry_prometheus::PrometheusExporter; - -pub struct RelayerState { - pub exporter: PrometheusExporter, - pub tx_counter: BoundCounter<'static, u64>, -} diff --git a/relayer/src/telemetry/service.rs b/relayer/src/telemetry/service.rs index a54c29ef81..dbe26616cf 100644 --- a/relayer/src/telemetry/service.rs +++ b/relayer/src/telemetry/service.rs @@ -1,75 +1,36 @@ -use hyper::{ - header::CONTENT_TYPE, - service::{make_service_fn, service_fn}, - Body, Request, Response, Server, -}; - -use crate::telemetry::relayer_state::RelayerState; -use opentelemetry::{global, KeyValue}; use prometheus::{Encoder, TextEncoder}; -use std::convert::Infallible; -use std::sync::Arc; -lazy_static! { - static ref HANDLER_ALL: [KeyValue; 1] = [KeyValue::new("handler", "all")]; -} +use crate::telemetry::state::TelemetryState; pub struct TelemetryService { pub(crate) listen_port: u16, } -async fn serve_req( - _req: Request, - state: Arc, -) -> Result, hyper::Error> { - let mut buffer = vec![]; - let encoder = TextEncoder::new(); - let metric_families = state.exporter.registry().gather(); - encoder.encode(&metric_families, &mut buffer).unwrap(); - - state.tx_counter.add(1); - - let response = Response::builder() - .status(200) - .header(CONTENT_TYPE, encoder.format_type()) - .body(Body::from(buffer)) - .unwrap(); - - Ok(response) -} - impl TelemetryService { pub async fn run(self) -> Result> { - let exporter = opentelemetry_prometheus::exporter().init(); - - let meter = global::meter("hermes/relayer"); - let state = Arc::new(RelayerState { - exporter, - tx_counter: meter - .u64_counter("hermes.tx_count") - .with_description("Total number of transactions processed via the relayer.") - .init() - .bind(HANDLER_ALL.as_ref()), - }); - - // For every connection, we must make a `Service` to handle all - // incoming HTTP requests on said connection. - let make_svc = make_service_fn(move |_conn| { - let state = state.clone(); - // This is the `Service` that will handle the connection. - // `service_fn` is a helper to convert a function that - // returns a Response into a `Service`. - async move { Ok::<_, Infallible>(service_fn(move |req| serve_req(req, state.clone()))) } - }); - - let addr = ([127, 0, 0, 1], self.listen_port).into(); - - let server = Server::bind(&addr).serve(make_svc); - - println!("Telemetry service listening on http://{}", addr); - - server.await?; - - Ok(self) + let state = TelemetryState::init(); + + rouille::start_server(format!("localhost:{}", self.listen_port), move |request| { + router!(request, + // The prometheus endpoint + (GET) (/metrics) => { + + state.tx_counter.add(1); + + let mut buffer = vec![]; + let encoder = TextEncoder::new(); + let metric_families = state.exporter.registry().gather(); + encoder.encode(&metric_families, &mut buffer).unwrap(); + + rouille::Response::from_data(encoder.format_type().to_string(), buffer) + }, + + // Any route other than /metrics + // return an empty response with a 404 status code. + _ => { + rouille::Response::empty_404() + } + ) + }) } } diff --git a/relayer/src/telemetry/state.rs b/relayer/src/telemetry/state.rs new file mode 100644 index 0000000000..ece2e2d170 --- /dev/null +++ b/relayer/src/telemetry/state.rs @@ -0,0 +1,31 @@ +use opentelemetry::metrics::BoundCounter; +use opentelemetry_prometheus::PrometheusExporter; +use std::sync::Arc; +use opentelemetry::global; +use opentelemetry::KeyValue; + +lazy_static! { + static ref HANDLER_ALL: [KeyValue; 1] = [KeyValue::new("hermes", "all")]; +} + +pub struct TelemetryState { + pub exporter: PrometheusExporter, + + // Count the number of trans + pub tx_counter: BoundCounter<'static, u64>, +} + +impl TelemetryState { + pub fn init() -> Arc { + let exporter = opentelemetry_prometheus::exporter().init(); + let meter = global::meter("hermes"); + return Arc::new(TelemetryState { + exporter, + tx_counter: meter + .u64_counter("hermes.tx_count") + .with_description("Total number of transactions processed via the relayer.") + .init() + .bind(HANDLER_ALL.as_ref()), + }); + } +} From 6954493ddd01433102f2766ba8a3007f489f6acd Mon Sep 17 00:00:00 2001 From: Andy Nogueira Date: Wed, 19 May 2021 16:49:13 -0400 Subject: [PATCH 03/26] Initial logic to include the telemetry in the Supervisor (#868) --- relayer/src/supervisor.rs | 22 ++++++++++------------ relayer/src/telemetry/service.rs | 13 +++++-------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/relayer/src/supervisor.rs b/relayer/src/supervisor.rs index 22e2f489a5..7c7b9db80e 100644 --- a/relayer/src/supervisor.rs +++ b/relayer/src/supervisor.rs @@ -25,12 +25,13 @@ use crate::{ }, object::{Client, Object, UnidirectionalChannelPath}, registry::Registry, + telemetry::service::TelemetryService, util::try_recv_multiple, worker::{WorkerMap, WorkerMsg}, }; mod error; -use crate::telemetry::service::TelemetryService; +use crate::telemetry::state::TelemetryState; pub use error::Error; /// The supervisor listens for events on multiple pairs of chains, @@ -41,6 +42,7 @@ pub struct Supervisor { registry: Registry, workers: WorkerMap, worker_msg_rx: Receiver, + telemetry_state: Arc, } impl Supervisor { @@ -50,17 +52,10 @@ impl Supervisor { let (worker_msg_tx, worker_msg_rx) = crossbeam_channel::unbounded(); // Start the telemetry service - let telemetry = match config.global.telemetry_enabled { - true => { - println!( - "TELEMETRY ENABLED ON PORT: {:?}", - config.global.telemetry_port - ); - Some(TelemetryService { - listen_port: config.global.telemetry_port, - }) - } - false => None, + let telemetry_state = TelemetryState::init(); + match config.global.telemetry_enabled { + true => TelemetryService::run(telemetry_state.clone(), config.global.telemetry_port), + false => println!("Telemetry not enabled"), }; Ok(Self { @@ -68,6 +63,7 @@ impl Supervisor { registry, workers: WorkerMap::new(worker_msg_tx), worker_msg_rx, + telemetry_state, }) } @@ -112,6 +108,8 @@ impl Supervisor { IbcEvent::SendPacket(ref packet) => { if let Ok(object) = Object::for_send_packet(packet, src_chain) { collected.per_object.entry(object).or_default().push(event); + // Increase counter + self.telemetry_state.tx_counter.add(1); } } IbcEvent::TimeoutPacket(ref packet) => { diff --git a/relayer/src/telemetry/service.rs b/relayer/src/telemetry/service.rs index dbe26616cf..99932ab4cc 100644 --- a/relayer/src/telemetry/service.rs +++ b/relayer/src/telemetry/service.rs @@ -1,16 +1,13 @@ use prometheus::{Encoder, TextEncoder}; use crate::telemetry::state::TelemetryState; +use std::sync::Arc; -pub struct TelemetryService { - pub(crate) listen_port: u16, -} +pub struct TelemetryService {} impl TelemetryService { - pub async fn run(self) -> Result> { - let state = TelemetryState::init(); - - rouille::start_server(format!("localhost:{}", self.listen_port), move |request| { + pub fn run(state: Arc, listen_port: u16) -> () { + rouille::start_server(format!("localhost:{}", listen_port), move |request| { router!(request, // The prometheus endpoint (GET) (/metrics) => { @@ -31,6 +28,6 @@ impl TelemetryService { rouille::Response::empty_404() } ) - }) + }); } } From 1781413353f620a54389e52ffae92dada0025739 Mon Sep 17 00:00:00 2001 From: Andy Nogueira Date: Sat, 22 May 2021 08:28:45 -0400 Subject: [PATCH 04/26] Refactored logic into server and service. Server working (#868) --- config.toml | 4 +- relayer-cli/src/commands/start_multi.rs | 2 + relayer/src/supervisor.rs | 13 ------ relayer/src/telemetry.rs | 3 +- relayer/src/telemetry/server.rs | 54 +++++++++++++++++++++++++ relayer/src/telemetry/service.rs | 44 +++++++++----------- relayer/src/telemetry/state.rs | 14 +++---- 7 files changed, 87 insertions(+), 47 deletions(-) create mode 100644 relayer/src/telemetry/server.rs diff --git a/config.toml b/config.toml index 2904ee82d5..9f0f3f2ece 100644 --- a/config.toml +++ b/config.toml @@ -1,6 +1,8 @@ [global] strategy = 'naive' -log_level = 'error' +log_level = 'debug' +telemetry_enabled = true +telemetry_port = 3002 [[chains]] id = 'ibc-0' diff --git a/relayer-cli/src/commands/start_multi.rs b/relayer-cli/src/commands/start_multi.rs index b7259eec9b..cb54d0bd11 100644 --- a/relayer-cli/src/commands/start_multi.rs +++ b/relayer-cli/src/commands/start_multi.rs @@ -4,6 +4,7 @@ use ibc_relayer::supervisor::Supervisor; use crate::conclude::Output; use crate::prelude::*; +use ibc_relayer::telemetry::server::TelemetryServer; #[derive(Clone, Command, Debug, Options)] pub struct StartMultiCmd {} @@ -11,6 +12,7 @@ pub struct StartMultiCmd {} impl Runnable for StartMultiCmd { fn run(&self) { let config = app_config(); + let telemetry_server = TelemetryServer::spawn(config.global.telemetry_port); let supervisor = Supervisor::spawn(config.clone()).expect("failed to spawn supervisor"); match supervisor.run() { Ok(()) => Output::success_msg("done").exit(), diff --git a/relayer/src/supervisor.rs b/relayer/src/supervisor.rs index 7c7b9db80e..e21145c9bb 100644 --- a/relayer/src/supervisor.rs +++ b/relayer/src/supervisor.rs @@ -25,13 +25,11 @@ use crate::{ }, object::{Client, Object, UnidirectionalChannelPath}, registry::Registry, - telemetry::service::TelemetryService, util::try_recv_multiple, worker::{WorkerMap, WorkerMsg}, }; mod error; -use crate::telemetry::state::TelemetryState; pub use error::Error; /// The supervisor listens for events on multiple pairs of chains, @@ -42,7 +40,6 @@ pub struct Supervisor { registry: Registry, workers: WorkerMap, worker_msg_rx: Receiver, - telemetry_state: Arc, } impl Supervisor { @@ -50,20 +47,11 @@ impl Supervisor { pub fn spawn(config: Config) -> Result { let registry = Registry::new(config.clone()); let (worker_msg_tx, worker_msg_rx) = crossbeam_channel::unbounded(); - - // Start the telemetry service - let telemetry_state = TelemetryState::init(); - match config.global.telemetry_enabled { - true => TelemetryService::run(telemetry_state.clone(), config.global.telemetry_port), - false => println!("Telemetry not enabled"), - }; - Ok(Self { config, registry, workers: WorkerMap::new(worker_msg_tx), worker_msg_rx, - telemetry_state, }) } @@ -109,7 +97,6 @@ impl Supervisor { if let Ok(object) = Object::for_send_packet(packet, src_chain) { collected.per_object.entry(object).or_default().push(event); // Increase counter - self.telemetry_state.tx_counter.add(1); } } IbcEvent::TimeoutPacket(ref packet) => { diff --git a/relayer/src/telemetry.rs b/relayer/src/telemetry.rs index 23a758c564..d9254de434 100644 --- a/relayer/src/telemetry.rs +++ b/relayer/src/telemetry.rs @@ -1,2 +1,3 @@ -pub mod state; pub mod service; +pub mod server; +pub mod state; \ No newline at end of file diff --git a/relayer/src/telemetry/server.rs b/relayer/src/telemetry/server.rs new file mode 100644 index 0000000000..bd375785fa --- /dev/null +++ b/relayer/src/telemetry/server.rs @@ -0,0 +1,54 @@ +use prometheus::{Encoder, TextEncoder}; + +use crate::telemetry::service::MetricUpdate; +use crossbeam_channel::Sender; +use tracing::info; +use crate::telemetry::state::TelemetryState; + +pub struct TelemetryServer { + pub state: TelemetryState, +} + +impl TelemetryServer { + fn new(state: TelemetryState) -> TelemetryServer { + TelemetryServer { state } + } +} + +impl TelemetryServer { + fn run(listen_port: u16) -> () { + let telemetry_state = TelemetryState::new(); + rouille::start_server(format!("localhost:{}", listen_port), move |request| { + router!(request, + // The prometheus endpoint + (GET) (/metrics) => { + telemetry_state.packets_relayed.add(1); + info!("metrics called on telemetry server"); + let mut buffer = vec![]; + let encoder = TextEncoder::new(); + let metric_families = telemetry_state.exporter.registry().gather(); + encoder.encode(&metric_families, &mut buffer).unwrap(); + dbg!(metric_families); + rouille::Response::from_data(encoder.format_type().to_string(), buffer) + }, + + // Any route other than /metrics + // return an empty response with a 404 status code. + _ => { + rouille::Response::empty_404() + } + ) + }); + } + + pub fn spawn(port: u16) -> Sender { + + let (tx, _rx) = crossbeam_channel::unbounded(); + //let (service, tx) = TelemetryService::new(app_state.clone()); + //let server = TelemetryServer::new(app_state.clone()); + std::thread::spawn(move || TelemetryServer::run( port)); + //std::thread::spawn(|| service.run()); + + tx + } +} \ No newline at end of file diff --git a/relayer/src/telemetry/service.rs b/relayer/src/telemetry/service.rs index 99932ab4cc..884757a406 100644 --- a/relayer/src/telemetry/service.rs +++ b/relayer/src/telemetry/service.rs @@ -1,33 +1,27 @@ -use prometheus::{Encoder, TextEncoder}; -use crate::telemetry::state::TelemetryState; use std::sync::Arc; +use crossbeam_channel::Receiver; +use crate::telemetry::state::TelemetryState; -pub struct TelemetryService {} - -impl TelemetryService { - pub fn run(state: Arc, listen_port: u16) -> () { - rouille::start_server(format!("localhost:{}", listen_port), move |request| { - router!(request, - // The prometheus endpoint - (GET) (/metrics) => { - - state.tx_counter.add(1); +pub enum MetricUpdate { + PacketsRelayed(u64) +} - let mut buffer = vec![]; - let encoder = TextEncoder::new(); - let metric_families = state.exporter.registry().gather(); - encoder.encode(&metric_families, &mut buffer).unwrap(); +pub struct TelemetryService { + pub state: Arc, + pub rx: Receiver +} - rouille::Response::from_data(encoder.format_type().to_string(), buffer) - }, +impl TelemetryService { + fn run(self) { + while let Ok(update) = self.rx.recv() { + self.apply_update(update); + } + } - // Any route other than /metrics - // return an empty response with a 404 status code. - _ => { - rouille::Response::empty_404() - } - ) - }); + fn apply_update(&self, update: MetricUpdate) { + match update { + MetricUpdate::PacketsRelayed(n) => self.state.packets_relayed.add(n ), + } } } diff --git a/relayer/src/telemetry/state.rs b/relayer/src/telemetry/state.rs index ece2e2d170..4fee5db59c 100644 --- a/relayer/src/telemetry/state.rs +++ b/relayer/src/telemetry/state.rs @@ -1,6 +1,5 @@ use opentelemetry::metrics::BoundCounter; use opentelemetry_prometheus::PrometheusExporter; -use std::sync::Arc; use opentelemetry::global; use opentelemetry::KeyValue; @@ -12,20 +11,21 @@ pub struct TelemetryState { pub exporter: PrometheusExporter, // Count the number of trans - pub tx_counter: BoundCounter<'static, u64>, + pub packets_relayed: BoundCounter<'static, u64>, } impl TelemetryState { - pub fn init() -> Arc { + pub fn new() -> TelemetryState { let exporter = opentelemetry_prometheus::exporter().init(); let meter = global::meter("hermes"); - return Arc::new(TelemetryState { + let telemetry_state = TelemetryState { exporter, - tx_counter: meter + packets_relayed: meter .u64_counter("hermes.tx_count") .with_description("Total number of transactions processed via the relayer.") .init() .bind(HANDLER_ALL.as_ref()), - }); + }; + telemetry_state } -} +} \ No newline at end of file From 9e7a9547de29ff14395865f6c18f1157aa63401d Mon Sep 17 00:00:00 2001 From: Andy Nogueira Date: Sat, 22 May 2021 08:59:15 -0400 Subject: [PATCH 05/26] Added new methods for state and server (#868) --- relayer/src/telemetry/server.rs | 11 ++++------- relayer/src/telemetry/state.rs | 1 + 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/relayer/src/telemetry/server.rs b/relayer/src/telemetry/server.rs index bd375785fa..6adca643e9 100644 --- a/relayer/src/telemetry/server.rs +++ b/relayer/src/telemetry/server.rs @@ -13,11 +13,8 @@ impl TelemetryServer { fn new(state: TelemetryState) -> TelemetryServer { TelemetryServer { state } } -} -impl TelemetryServer { - fn run(listen_port: u16) -> () { - let telemetry_state = TelemetryState::new(); + fn run(&self, telemetry_state: TelemetryState, listen_port: u16) -> () { rouille::start_server(format!("localhost:{}", listen_port), move |request| { router!(request, // The prometheus endpoint @@ -42,11 +39,11 @@ impl TelemetryServer { } pub fn spawn(port: u16) -> Sender { - + let telemetry_state = TelemetryState::new(); let (tx, _rx) = crossbeam_channel::unbounded(); //let (service, tx) = TelemetryService::new(app_state.clone()); - //let server = TelemetryServer::new(app_state.clone()); - std::thread::spawn(move || TelemetryServer::run( port)); + let server = TelemetryServer::new(telemetry_state.clone()); + std::thread::spawn(move || server.run( telemetry_state,port)); //std::thread::spawn(|| service.run()); tx diff --git a/relayer/src/telemetry/state.rs b/relayer/src/telemetry/state.rs index 4fee5db59c..96615fa9d9 100644 --- a/relayer/src/telemetry/state.rs +++ b/relayer/src/telemetry/state.rs @@ -7,6 +7,7 @@ lazy_static! { static ref HANDLER_ALL: [KeyValue; 1] = [KeyValue::new("hermes", "all")]; } +#[derive(Clone)] pub struct TelemetryState { pub exporter: PrometheusExporter, From 42ee6858ac63e322f9f1650f091722d88afabd0f Mon Sep 17 00:00:00 2001 From: Andy Nogueira Date: Sat, 22 May 2021 10:11:54 -0400 Subject: [PATCH 06/26] Telemetry service logic working, recording a metric (#868) --- config.toml | 16 +++++++++++++++- relayer-cli/src/commands/start_multi.rs | 6 +++--- relayer/src/supervisor.rs | 10 ++++++++-- relayer/src/telemetry.rs | 21 ++++++++++++++++++++- relayer/src/telemetry/server.rs | 20 +++----------------- relayer/src/telemetry/service.rs | 17 +++++++++++------ relayer/src/telemetry/state.rs | 8 ++++---- 7 files changed, 64 insertions(+), 34 deletions(-) diff --git a/config.toml b/config.toml index 9f0f3f2ece..e6ec130248 100644 --- a/config.toml +++ b/config.toml @@ -2,7 +2,7 @@ strategy = 'naive' log_level = 'debug' telemetry_enabled = true -telemetry_port = 3002 +telemetry_port = 3001 [[chains]] id = 'ibc-0' @@ -38,6 +38,20 @@ fee_amount = 1000 clock_drift = '5s' trusting_period = '14days' +[[chains]] +id = 'cosmoshub-4' +rpc_addr = 'http://cosmos.decentrox.com:26657' +grpc_addr = 'http://cosmos.decentrox.com:9090' +websocket_addr = 'ws://cosmos.decentrox.com:26657/websocket' +account_prefix = 'cosmos' +key_name = 'testkey' +store_prefix = 'ibc' +gas = 3000000 +fee_denom = 'uatom' +fee_amount = 300 +clock_drift = '5s' +trusting_period = '14days' + [chains.trust_threshold] numerator = '1' denominator = '3' diff --git a/relayer-cli/src/commands/start_multi.rs b/relayer-cli/src/commands/start_multi.rs index cb54d0bd11..a61fb2c902 100644 --- a/relayer-cli/src/commands/start_multi.rs +++ b/relayer-cli/src/commands/start_multi.rs @@ -4,7 +4,7 @@ use ibc_relayer::supervisor::Supervisor; use crate::conclude::Output; use crate::prelude::*; -use ibc_relayer::telemetry::server::TelemetryServer; +use ibc_relayer::telemetry; #[derive(Clone, Command, Debug, Options)] pub struct StartMultiCmd {} @@ -12,8 +12,8 @@ pub struct StartMultiCmd {} impl Runnable for StartMultiCmd { fn run(&self) { let config = app_config(); - let telemetry_server = TelemetryServer::spawn(config.global.telemetry_port); - let supervisor = Supervisor::spawn(config.clone()).expect("failed to spawn supervisor"); + let telemetry = telemetry::spawn(config.global.telemetry_port); + let supervisor = Supervisor::spawn(config.clone(), telemetry).expect("failed to spawn supervisor"); match supervisor.run() { Ok(()) => Output::success_msg("done").exit(), Err(e) => Output::error(e).exit(), diff --git a/relayer/src/supervisor.rs b/relayer/src/supervisor.rs index e21145c9bb..83eaa0f167 100644 --- a/relayer/src/supervisor.rs +++ b/relayer/src/supervisor.rs @@ -2,7 +2,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration}; use anomaly::BoxError; -use crossbeam_channel::Receiver; +use crossbeam_channel::{Receiver, Sender}; use itertools::Itertools; use tracing::{debug, error, trace, warn}; @@ -31,6 +31,7 @@ use crate::{ mod error; pub use error::Error; +use crate::telemetry::service::MetricUpdate; /// The supervisor listens for events on multiple pairs of chains, /// and dispatches the events it receives to the appropriate @@ -40,18 +41,21 @@ pub struct Supervisor { registry: Registry, workers: WorkerMap, worker_msg_rx: Receiver, + telemetry: Sender } impl Supervisor { /// Spawns a [`Supervisor`] which will listen for events on all the chains in the [`Config`]. - pub fn spawn(config: Config) -> Result { + pub fn spawn(config: Config, telemetry: Sender) -> Result { let registry = Registry::new(config.clone()); let (worker_msg_tx, worker_msg_rx) = crossbeam_channel::unbounded(); + Ok(Self { config, registry, workers: WorkerMap::new(worker_msg_tx), worker_msg_rx, + telemetry }) } @@ -133,6 +137,8 @@ impl Supervisor { .map(|c| c.id.clone()) .collect_vec(); + let _ = self.telemetry.send(MetricUpdate::RelayChainsNumber(chain_ids.len() as u64)); + for chain_id in chain_ids { let chain = match self.registry.get_or_spawn(&chain_id) { Ok(chain_handle) => chain_handle, diff --git a/relayer/src/telemetry.rs b/relayer/src/telemetry.rs index d9254de434..67f17f04c0 100644 --- a/relayer/src/telemetry.rs +++ b/relayer/src/telemetry.rs @@ -1,3 +1,22 @@ +use crate::telemetry::state::TelemetryState; +use crate::telemetry::service::TelemetryService; +use crate::telemetry::server::TelemetryServer; +use crossbeam_channel::Sender; +use crate::telemetry::service::MetricUpdate; + pub mod service; pub mod server; -pub mod state; \ No newline at end of file +pub mod state; + +pub fn spawn(port: u16) -> Sender { + let (tx, rx) = crossbeam_channel::unbounded(); + let telemetry_state = TelemetryState::new(); + let service = TelemetryService::new(telemetry_state.clone(), rx); + let server = TelemetryServer::new(telemetry_state.clone()); + + // Start the telemetry service and server + std::thread::spawn(move || server.run( telemetry_state.clone(),port)); + std::thread::spawn(move || service.run()); + + tx +} \ No newline at end of file diff --git a/relayer/src/telemetry/server.rs b/relayer/src/telemetry/server.rs index 6adca643e9..9f1ae26fc3 100644 --- a/relayer/src/telemetry/server.rs +++ b/relayer/src/telemetry/server.rs @@ -1,7 +1,4 @@ use prometheus::{Encoder, TextEncoder}; - -use crate::telemetry::service::MetricUpdate; -use crossbeam_channel::Sender; use tracing::info; use crate::telemetry::state::TelemetryState; @@ -10,16 +7,16 @@ pub struct TelemetryServer { } impl TelemetryServer { - fn new(state: TelemetryState) -> TelemetryServer { + pub(crate) fn new(state: TelemetryState) -> TelemetryServer { TelemetryServer { state } } - fn run(&self, telemetry_state: TelemetryState, listen_port: u16) -> () { + pub(crate) fn run(&self, telemetry_state: TelemetryState, listen_port: u16) -> () { rouille::start_server(format!("localhost:{}", listen_port), move |request| { router!(request, // The prometheus endpoint (GET) (/metrics) => { - telemetry_state.packets_relayed.add(1); + //telemetry_state.packets_relayed.add(1); info!("metrics called on telemetry server"); let mut buffer = vec![]; let encoder = TextEncoder::new(); @@ -37,15 +34,4 @@ impl TelemetryServer { ) }); } - - pub fn spawn(port: u16) -> Sender { - let telemetry_state = TelemetryState::new(); - let (tx, _rx) = crossbeam_channel::unbounded(); - //let (service, tx) = TelemetryService::new(app_state.clone()); - let server = TelemetryServer::new(telemetry_state.clone()); - std::thread::spawn(move || server.run( telemetry_state,port)); - //std::thread::spawn(|| service.run()); - - tx - } } \ No newline at end of file diff --git a/relayer/src/telemetry/service.rs b/relayer/src/telemetry/service.rs index 884757a406..d47d803f2d 100644 --- a/relayer/src/telemetry/service.rs +++ b/relayer/src/telemetry/service.rs @@ -1,19 +1,24 @@ - -use std::sync::Arc; use crossbeam_channel::Receiver; use crate::telemetry::state::TelemetryState; pub enum MetricUpdate { - PacketsRelayed(u64) + RelayChainsNumber(u64) } pub struct TelemetryService { - pub state: Arc, + pub state: TelemetryState, pub rx: Receiver } impl TelemetryService { - fn run(self) { + pub(crate) fn new(state: TelemetryState, rx: Receiver) -> Self { + Self { + state, + rx, + } + } + + pub(crate) fn run(self) { while let Ok(update) = self.rx.recv() { self.apply_update(update); } @@ -21,7 +26,7 @@ impl TelemetryService { fn apply_update(&self, update: MetricUpdate) { match update { - MetricUpdate::PacketsRelayed(n) => self.state.packets_relayed.add(n ), + MetricUpdate::RelayChainsNumber(n) => self.state.relay_chains_num.add(n ), } } } diff --git a/relayer/src/telemetry/state.rs b/relayer/src/telemetry/state.rs index 96615fa9d9..2fd9ef54dc 100644 --- a/relayer/src/telemetry/state.rs +++ b/relayer/src/telemetry/state.rs @@ -12,7 +12,7 @@ pub struct TelemetryState { pub exporter: PrometheusExporter, // Count the number of trans - pub packets_relayed: BoundCounter<'static, u64>, + pub relay_chains_num: BoundCounter<'static, u64>, } impl TelemetryState { @@ -21,9 +21,9 @@ impl TelemetryState { let meter = global::meter("hermes"); let telemetry_state = TelemetryState { exporter, - packets_relayed: meter - .u64_counter("hermes.tx_count") - .with_description("Total number of transactions processed via the relayer.") + relay_chains_num: meter + .u64_counter("relay_chains_num") + .with_description("Number of chains the relay is connecting to") .init() .bind(HANDLER_ALL.as_ref()), }; From 5893724cd013a5f9ac49fd52621a4bffacea837e Mon Sep 17 00:00:00 2001 From: Andy Nogueira Date: Sat, 22 May 2021 15:36:03 -0400 Subject: [PATCH 07/26] Added more metrics (#868) --- config.toml | 14 -------------- relayer/src/supervisor.rs | 14 ++++++++++---- relayer/src/telemetry/server.rs | 4 ---- relayer/src/telemetry/service.rs | 8 +++++++- relayer/src/telemetry/state.rs | 29 +++++++++++++++++++++++++++-- 5 files changed, 44 insertions(+), 25 deletions(-) diff --git a/config.toml b/config.toml index e6ec130248..84f578d101 100644 --- a/config.toml +++ b/config.toml @@ -38,20 +38,6 @@ fee_amount = 1000 clock_drift = '5s' trusting_period = '14days' -[[chains]] -id = 'cosmoshub-4' -rpc_addr = 'http://cosmos.decentrox.com:26657' -grpc_addr = 'http://cosmos.decentrox.com:9090' -websocket_addr = 'ws://cosmos.decentrox.com:26657/websocket' -account_prefix = 'cosmos' -key_name = 'testkey' -store_prefix = 'ibc' -gas = 3000000 -fee_denom = 'uatom' -fee_amount = 300 -clock_drift = '5s' -trusting_period = '14days' - [chains.trust_threshold] numerator = '1' denominator = '3' diff --git a/relayer/src/supervisor.rs b/relayer/src/supervisor.rs index 83eaa0f167..1bb586271e 100644 --- a/relayer/src/supervisor.rs +++ b/relayer/src/supervisor.rs @@ -110,6 +110,8 @@ impl Supervisor { } IbcEvent::WriteAcknowledgement(ref packet) => { if let Ok(object) = Object::for_write_ack(packet, src_chain) { + // TODO: Find a better place to record the telemetry metric + let _ = self.telemetry.send(MetricUpdate::AcknowledgePacket(1)); collected.per_object.entry(object).or_default().push(event); } } @@ -137,11 +139,12 @@ impl Supervisor { .map(|c| c.id.clone()) .collect_vec(); - let _ = self.telemetry.send(MetricUpdate::RelayChainsNumber(chain_ids.len() as u64)); - for chain_id in chain_ids { let chain = match self.registry.get_or_spawn(&chain_id) { - Ok(chain_handle) => chain_handle, + Ok(chain_handle) => { + let _ = self.telemetry.send(MetricUpdate::RelayChainsNumber(1)); + chain_handle + }, Err(e) => { error!("skipping workers for chain id {}. reason: failed to spawn chain runtime with error: {}", chain_id, e); continue; @@ -149,7 +152,10 @@ impl Supervisor { }; let channels = match chain.query_channels(req.clone()) { - Ok(channels) => channels, + Ok(channels) => { + let _ = self.telemetry.send(MetricUpdate::RelayChannelsNumber(1)); + channels + }, Err(e) => { error!("failed to query channels from {}: {}", chain_id, e); continue; diff --git a/relayer/src/telemetry/server.rs b/relayer/src/telemetry/server.rs index 9f1ae26fc3..5e6cd3f0e6 100644 --- a/relayer/src/telemetry/server.rs +++ b/relayer/src/telemetry/server.rs @@ -1,5 +1,4 @@ use prometheus::{Encoder, TextEncoder}; -use tracing::info; use crate::telemetry::state::TelemetryState; pub struct TelemetryServer { @@ -16,13 +15,10 @@ impl TelemetryServer { router!(request, // The prometheus endpoint (GET) (/metrics) => { - //telemetry_state.packets_relayed.add(1); - info!("metrics called on telemetry server"); let mut buffer = vec![]; let encoder = TextEncoder::new(); let metric_families = telemetry_state.exporter.registry().gather(); encoder.encode(&metric_families, &mut buffer).unwrap(); - dbg!(metric_families); rouille::Response::from_data(encoder.format_type().to_string(), buffer) }, diff --git a/relayer/src/telemetry/service.rs b/relayer/src/telemetry/service.rs index d47d803f2d..4109271522 100644 --- a/relayer/src/telemetry/service.rs +++ b/relayer/src/telemetry/service.rs @@ -2,7 +2,10 @@ use crossbeam_channel::Receiver; use crate::telemetry::state::TelemetryState; pub enum MetricUpdate { - RelayChainsNumber(u64) + RelayChainsNumber(u64), + RelayChannelsNumber(u64), + AcknowledgePacket(u64), + TxCount(u64), } pub struct TelemetryService { @@ -27,6 +30,9 @@ impl TelemetryService { fn apply_update(&self, update: MetricUpdate) { match update { MetricUpdate::RelayChainsNumber(n) => self.state.relay_chains_num.add(n ), + MetricUpdate::RelayChannelsNumber(n) => self.state.relay_channels_num.add(n), + MetricUpdate::AcknowledgePacket(n) => self.state.tx_msg_ibc_acknowledge_packet.add(n), + MetricUpdate::TxCount(n) => self.state.tx_count.add(n), } } } diff --git a/relayer/src/telemetry/state.rs b/relayer/src/telemetry/state.rs index 2fd9ef54dc..2a18da4c62 100644 --- a/relayer/src/telemetry/state.rs +++ b/relayer/src/telemetry/state.rs @@ -11,8 +11,18 @@ lazy_static! { pub struct TelemetryState { pub exporter: PrometheusExporter, - // Count the number of trans + // Number of chains the relay is connecting to pub relay_chains_num: BoundCounter<'static, u64>, + + // Number of channels the relay is connecting to + pub relay_channels_num: BoundCounter<'static, u64>, + + // Total number of IBC packets acknowledged + pub tx_msg_ibc_acknowledge_packet: BoundCounter<'static, u64>, + + // Total number of txs processed via Relay tx + pub tx_count: BoundCounter<'static, u64>, + } impl TelemetryState { @@ -22,10 +32,25 @@ impl TelemetryState { let telemetry_state = TelemetryState { exporter, relay_chains_num: meter - .u64_counter("relay_chains_num") + .u64_counter("hermes_chains_num") .with_description("Number of chains the relay is connecting to") .init() .bind(HANDLER_ALL.as_ref()), + relay_channels_num: meter + .u64_counter("hermes_channels_num") + .with_description("Number of channels the relay is connecting to") + .init() + .bind(HANDLER_ALL.as_ref()), + tx_msg_ibc_acknowledge_packet: meter + .u64_counter("hermes_tx_msg_ibc_acknowledge_packet") + .with_description("Total number of IBC packets acknowledged") + .init() + .bind(HANDLER_ALL.as_ref()), + tx_count: meter + .u64_counter("tx_count") + .with_description("Total number of txs processed via Relay tx") + .init() + .bind(HANDLER_ALL.as_ref()), }; telemetry_state } From 9d7985eab36b568b9e37e89ba4b8ddb90e15f30d Mon Sep 17 00:00:00 2001 From: Andy Nogueira Date: Sun, 23 May 2021 07:42:52 -0400 Subject: [PATCH 08/26] Added logic to disable/enable telemetry service and server (#868) --- config.toml | 2 +- relayer-cli/src/commands/start_multi.rs | 8 +++++-- relayer/src/supervisor.rs | 10 ++++----- relayer/src/telemetry.rs | 28 ++++++++++++++----------- relayer/src/telemetry/server.rs | 4 ++-- relayer/src/telemetry/service.rs | 11 ++++------ relayer/src/telemetry/state.rs | 7 +++---- 7 files changed, 37 insertions(+), 33 deletions(-) diff --git a/config.toml b/config.toml index 84f578d101..82c9ce6581 100644 --- a/config.toml +++ b/config.toml @@ -1,6 +1,6 @@ [global] strategy = 'naive' -log_level = 'debug' +log_level = 'info' telemetry_enabled = true telemetry_port = 3001 diff --git a/relayer-cli/src/commands/start_multi.rs b/relayer-cli/src/commands/start_multi.rs index a61fb2c902..accd764e9f 100644 --- a/relayer-cli/src/commands/start_multi.rs +++ b/relayer-cli/src/commands/start_multi.rs @@ -12,8 +12,12 @@ pub struct StartMultiCmd {} impl Runnable for StartMultiCmd { fn run(&self) { let config = app_config(); - let telemetry = telemetry::spawn(config.global.telemetry_port); - let supervisor = Supervisor::spawn(config.clone(), telemetry).expect("failed to spawn supervisor"); + let telemetry = telemetry::spawn( + config.global.telemetry_port, + config.global.telemetry_enabled, + ); + let supervisor = + Supervisor::spawn(config.clone(), telemetry).expect("failed to spawn supervisor"); match supervisor.run() { Ok(()) => Output::success_msg("done").exit(), Err(e) => Output::error(e).exit(), diff --git a/relayer/src/supervisor.rs b/relayer/src/supervisor.rs index 1bb586271e..fbefb25b76 100644 --- a/relayer/src/supervisor.rs +++ b/relayer/src/supervisor.rs @@ -30,8 +30,8 @@ use crate::{ }; mod error; -pub use error::Error; use crate::telemetry::service::MetricUpdate; +pub use error::Error; /// The supervisor listens for events on multiple pairs of chains, /// and dispatches the events it receives to the appropriate @@ -41,7 +41,7 @@ pub struct Supervisor { registry: Registry, workers: WorkerMap, worker_msg_rx: Receiver, - telemetry: Sender + telemetry: Sender, } impl Supervisor { @@ -55,7 +55,7 @@ impl Supervisor { registry, workers: WorkerMap::new(worker_msg_tx), worker_msg_rx, - telemetry + telemetry, }) } @@ -144,7 +144,7 @@ impl Supervisor { Ok(chain_handle) => { let _ = self.telemetry.send(MetricUpdate::RelayChainsNumber(1)); chain_handle - }, + } Err(e) => { error!("skipping workers for chain id {}. reason: failed to spawn chain runtime with error: {}", chain_id, e); continue; @@ -155,7 +155,7 @@ impl Supervisor { Ok(channels) => { let _ = self.telemetry.send(MetricUpdate::RelayChannelsNumber(1)); channels - }, + } Err(e) => { error!("failed to query channels from {}: {}", chain_id, e); continue; diff --git a/relayer/src/telemetry.rs b/relayer/src/telemetry.rs index 67f17f04c0..c620bf4ce6 100644 --- a/relayer/src/telemetry.rs +++ b/relayer/src/telemetry.rs @@ -1,22 +1,26 @@ -use crate::telemetry::state::TelemetryState; -use crate::telemetry::service::TelemetryService; use crate::telemetry::server::TelemetryServer; -use crossbeam_channel::Sender; use crate::telemetry::service::MetricUpdate; +use crate::telemetry::service::TelemetryService; +use crate::telemetry::state::TelemetryState; +use crossbeam_channel::Sender; -pub mod service; pub mod server; +pub mod service; pub mod state; -pub fn spawn(port: u16) -> Sender { +pub fn spawn(port: u16, enabled: bool) -> Sender { let (tx, rx) = crossbeam_channel::unbounded(); - let telemetry_state = TelemetryState::new(); - let service = TelemetryService::new(telemetry_state.clone(), rx); - let server = TelemetryServer::new(telemetry_state.clone()); - // Start the telemetry service and server - std::thread::spawn(move || server.run( telemetry_state.clone(),port)); - std::thread::spawn(move || service.run()); + // Only start the telemetry service and server if it is enabled in the configuration + if enabled { + let telemetry_state = TelemetryState::new(); + let service = TelemetryService::new(telemetry_state.clone(), rx); + let server = TelemetryServer::new(telemetry_state.clone()); + + // Start the telemetry service and server + std::thread::spawn(move || server.run(telemetry_state.clone(), port)); + std::thread::spawn(move || service.run()); + } tx -} \ No newline at end of file +} diff --git a/relayer/src/telemetry/server.rs b/relayer/src/telemetry/server.rs index 5e6cd3f0e6..dff177fdf6 100644 --- a/relayer/src/telemetry/server.rs +++ b/relayer/src/telemetry/server.rs @@ -1,5 +1,5 @@ -use prometheus::{Encoder, TextEncoder}; use crate::telemetry::state::TelemetryState; +use prometheus::{Encoder, TextEncoder}; pub struct TelemetryServer { pub state: TelemetryState, @@ -30,4 +30,4 @@ impl TelemetryServer { ) }); } -} \ No newline at end of file +} diff --git a/relayer/src/telemetry/service.rs b/relayer/src/telemetry/service.rs index 4109271522..4a9f3363b2 100644 --- a/relayer/src/telemetry/service.rs +++ b/relayer/src/telemetry/service.rs @@ -1,5 +1,5 @@ -use crossbeam_channel::Receiver; use crate::telemetry::state::TelemetryState; +use crossbeam_channel::Receiver; pub enum MetricUpdate { RelayChainsNumber(u64), @@ -10,15 +10,12 @@ pub enum MetricUpdate { pub struct TelemetryService { pub state: TelemetryState, - pub rx: Receiver + pub rx: Receiver, } impl TelemetryService { pub(crate) fn new(state: TelemetryState, rx: Receiver) -> Self { - Self { - state, - rx, - } + Self { state, rx } } pub(crate) fn run(self) { @@ -29,7 +26,7 @@ impl TelemetryService { fn apply_update(&self, update: MetricUpdate) { match update { - MetricUpdate::RelayChainsNumber(n) => self.state.relay_chains_num.add(n ), + MetricUpdate::RelayChainsNumber(n) => self.state.relay_chains_num.add(n), MetricUpdate::RelayChannelsNumber(n) => self.state.relay_channels_num.add(n), MetricUpdate::AcknowledgePacket(n) => self.state.tx_msg_ibc_acknowledge_packet.add(n), MetricUpdate::TxCount(n) => self.state.tx_count.add(n), diff --git a/relayer/src/telemetry/state.rs b/relayer/src/telemetry/state.rs index 2a18da4c62..d48f63bec3 100644 --- a/relayer/src/telemetry/state.rs +++ b/relayer/src/telemetry/state.rs @@ -1,7 +1,7 @@ -use opentelemetry::metrics::BoundCounter; -use opentelemetry_prometheus::PrometheusExporter; use opentelemetry::global; +use opentelemetry::metrics::BoundCounter; use opentelemetry::KeyValue; +use opentelemetry_prometheus::PrometheusExporter; lazy_static! { static ref HANDLER_ALL: [KeyValue; 1] = [KeyValue::new("hermes", "all")]; @@ -22,7 +22,6 @@ pub struct TelemetryState { // Total number of txs processed via Relay tx pub tx_count: BoundCounter<'static, u64>, - } impl TelemetryState { @@ -54,4 +53,4 @@ impl TelemetryState { }; telemetry_state } -} \ No newline at end of file +} From 2a56c9924bcf6393c8a8045ecf8fdb8cbe16e403 Mon Sep 17 00:00:00 2001 From: Andy Nogueira Date: Sun, 23 May 2021 08:38:12 -0400 Subject: [PATCH 09/26] Added more metrics to service. Hookup the packet timeout metric (#868) --- relayer/src/supervisor.rs | 7 ++-- relayer/src/telemetry.rs | 4 ++- relayer/src/telemetry/server.rs | 3 +- relayer/src/telemetry/service.rs | 18 ++++++++-- relayer/src/telemetry/state.rs | 62 +++++++++++++++++++++++++++++--- 5 files changed, 82 insertions(+), 12 deletions(-) diff --git a/relayer/src/supervisor.rs b/relayer/src/supervisor.rs index fbefb25b76..a63ec4a6a0 100644 --- a/relayer/src/supervisor.rs +++ b/relayer/src/supervisor.rs @@ -100,18 +100,19 @@ impl Supervisor { IbcEvent::SendPacket(ref packet) => { if let Ok(object) = Object::for_send_packet(packet, src_chain) { collected.per_object.entry(object).or_default().push(event); - // Increase counter } } IbcEvent::TimeoutPacket(ref packet) => { if let Ok(object) = Object::for_timeout_packet(packet, src_chain) { + // TODO: Is this the right place to record the telemetry metric ? + let _ = self.telemetry.send(MetricUpdate::TimeoutPacket(1)); collected.per_object.entry(object).or_default().push(event); } } IbcEvent::WriteAcknowledgement(ref packet) => { if let Ok(object) = Object::for_write_ack(packet, src_chain) { - // TODO: Find a better place to record the telemetry metric - let _ = self.telemetry.send(MetricUpdate::AcknowledgePacket(1)); + // TODO: Is this the right place to record the telemetry metric ? + let _ = self.telemetry.send(MetricUpdate::IbcAcknowledgePacket(1)); collected.per_object.entry(object).or_default().push(event); } } diff --git a/relayer/src/telemetry.rs b/relayer/src/telemetry.rs index c620bf4ce6..d412bc6f7b 100644 --- a/relayer/src/telemetry.rs +++ b/relayer/src/telemetry.rs @@ -13,7 +13,9 @@ pub fn spawn(port: u16, enabled: bool) -> Sender { // Only start the telemetry service and server if it is enabled in the configuration if enabled { - let telemetry_state = TelemetryState::new(); + let telemetry_state = TelemetryState { + ..Default::default() + }; let service = TelemetryService::new(telemetry_state.clone(), rx); let server = TelemetryServer::new(telemetry_state.clone()); diff --git a/relayer/src/telemetry/server.rs b/relayer/src/telemetry/server.rs index dff177fdf6..2caf8fed73 100644 --- a/relayer/src/telemetry/server.rs +++ b/relayer/src/telemetry/server.rs @@ -10,7 +10,8 @@ impl TelemetryServer { TelemetryServer { state } } - pub(crate) fn run(&self, telemetry_state: TelemetryState, listen_port: u16) -> () { + #[allow(clippy::manual_strip)] + pub(crate) fn run(&self, telemetry_state: TelemetryState, listen_port: u16) { rouille::start_server(format!("localhost:{}", listen_port), move |request| { router!(request, // The prometheus endpoint diff --git a/relayer/src/telemetry/service.rs b/relayer/src/telemetry/service.rs index 4a9f3363b2..ec9d15e6f3 100644 --- a/relayer/src/telemetry/service.rs +++ b/relayer/src/telemetry/service.rs @@ -4,8 +4,14 @@ use crossbeam_channel::Receiver; pub enum MetricUpdate { RelayChainsNumber(u64), RelayChannelsNumber(u64), - AcknowledgePacket(u64), TxCount(u64), + TxSuccess(u64), + TxFailed(u64), + IbcAcknowledgePacket(u64), + IbcRecvPacket(u64), + IbcTransferSend(u64), + IbcTransferReceive(u64), + TimeoutPacket(u64), } pub struct TelemetryService { @@ -28,8 +34,16 @@ impl TelemetryService { match update { MetricUpdate::RelayChainsNumber(n) => self.state.relay_chains_num.add(n), MetricUpdate::RelayChannelsNumber(n) => self.state.relay_channels_num.add(n), - MetricUpdate::AcknowledgePacket(n) => self.state.tx_msg_ibc_acknowledge_packet.add(n), + MetricUpdate::IbcAcknowledgePacket(n) => { + self.state.tx_msg_ibc_acknowledge_packet.add(n) + } + MetricUpdate::IbcRecvPacket(n) => self.state.tx_msg_ibc_recv_packet.add(n), MetricUpdate::TxCount(n) => self.state.tx_count.add(n), + MetricUpdate::TxSuccess(n) => self.state.tx_successful.add(n), + MetricUpdate::TxFailed(n) => self.state.tx_failed.add(n), + MetricUpdate::IbcTransferSend(n) => self.state.ibc_transfer_send.add(n), + MetricUpdate::IbcTransferReceive(n) => self.state.ibc_transfer_receive.add(n), + MetricUpdate::TimeoutPacket(n) => self.state.ibc_timeout_packet.add(n), } } } diff --git a/relayer/src/telemetry/state.rs b/relayer/src/telemetry/state.rs index d48f63bec3..41e784024d 100644 --- a/relayer/src/telemetry/state.rs +++ b/relayer/src/telemetry/state.rs @@ -20,12 +20,30 @@ pub struct TelemetryState { // Total number of IBC packets acknowledged pub tx_msg_ibc_acknowledge_packet: BoundCounter<'static, u64>, - // Total number of txs processed via Relay tx + // Total number of txs processed via relay tx pub tx_count: BoundCounter<'static, u64>, + + // Total number of successful txs processed via relay tx + pub tx_successful: BoundCounter<'static, u64>, + + // Total number of failed txs processed via relay tx + pub tx_failed: BoundCounter<'static, u64>, + + // Total number of IBC transfers sent from a chain (source or sink) + pub ibc_transfer_send: BoundCounter<'static, u64>, + + // Total number of IBC transfers received to a chain (source or sink) + pub ibc_transfer_receive: BoundCounter<'static, u64>, + + // Total number of IBC packets received + pub tx_msg_ibc_recv_packet: BoundCounter<'static, u64>, + + // Total number of IBC timeout packets + pub ibc_timeout_packet: BoundCounter<'static, u64>, } -impl TelemetryState { - pub fn new() -> TelemetryState { +impl Default for TelemetryState { + fn default() -> Self { let exporter = opentelemetry_prometheus::exporter().init(); let meter = global::meter("hermes"); let telemetry_state = TelemetryState { @@ -46,8 +64,42 @@ impl TelemetryState { .init() .bind(HANDLER_ALL.as_ref()), tx_count: meter - .u64_counter("tx_count") - .with_description("Total number of txs processed via Relay tx") + .u64_counter("hermes_tx_count") + .with_description("Total number of txs processed via relay tx") + .init() + .bind(HANDLER_ALL.as_ref()), + tx_successful: meter + .u64_counter("hermes_tx_successful") + .with_description("Total number of successful txs processed via relay tx") + .init() + .bind(HANDLER_ALL.as_ref()), + tx_failed: meter + .u64_counter("hermes_tx_failed") + .with_description("Total number of failed txs processed via relay tx") + .init() + .bind(HANDLER_ALL.as_ref()), + ibc_transfer_send: meter + .u64_counter("hermes_ibc_transfer_send") + .with_description( + "Total number of IBC transfers sent from a chain (source or sink)", + ) + .init() + .bind(HANDLER_ALL.as_ref()), + ibc_transfer_receive: meter + .u64_counter("hermes_ibc_transfer_receive") + .with_description( + "Total number of IBC transfers received to a chain (source or sink)", + ) + .init() + .bind(HANDLER_ALL.as_ref()), + tx_msg_ibc_recv_packet: meter + .u64_counter("hermes_tx_msg_ibc_recv_packet") + .with_description("Total number of IBC packets received") + .init() + .bind(HANDLER_ALL.as_ref()), + ibc_timeout_packet: meter + .u64_counter("hermes_ibc_timeout_packet") + .with_description("Total number of IBC timeout packets") .init() .bind(HANDLER_ALL.as_ref()), }; From a178789297913ca068d711b9a03d09d22c2aec0e Mon Sep 17 00:00:00 2001 From: Romain Ruetschi Date: Wed, 26 May 2021 14:57:11 +0200 Subject: [PATCH 10/26] Move telemetry service into `ibc-telemetry` crate --- Cargo.lock | 15 ++++-- Cargo.toml | 3 +- relayer-cli/Cargo.toml | 9 ++-- relayer-cli/src/commands/start.rs | 26 ++++++---- relayer/Cargo.toml | 13 ++--- relayer/src/lib.rs | 7 --- relayer/src/supervisor.rs | 47 ++++++++++++++----- relayer/src/telemetry.rs | 28 ----------- telemetry/Cargo.toml | 13 +++++ telemetry/src/lib.rs | 47 +++++++++++++++++++ .../src/telemetry => telemetry/src}/server.rs | 4 +- .../telemetry => telemetry/src}/service.rs | 5 +- .../src/telemetry => telemetry/src}/state.rs | 11 ++--- 13 files changed, 146 insertions(+), 82 deletions(-) delete mode 100644 relayer/src/telemetry.rs create mode 100644 telemetry/Cargo.toml create mode 100644 telemetry/src/lib.rs rename {relayer/src/telemetry => telemetry/src}/server.rs (95%) rename {relayer/src/telemetry => telemetry/src}/service.rs (95%) rename {relayer/src/telemetry => telemetry/src}/state.rs (95%) diff --git a/Cargo.lock b/Cargo.lock index 5768f6d7fd..a8ca3995cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1403,17 +1403,13 @@ dependencies = [ "humantime-serde", "ibc", "ibc-proto", + "ibc-telemetry", "itertools 0.10.0", "k256", - "lazy_static", - "opentelemetry", - "opentelemetry-prometheus", - "prometheus", "prost", "prost-types", "retry", "ripemd160", - "rouille", "serde", "serde_cbor", "serde_derive", @@ -1450,6 +1446,7 @@ dependencies = [ "ibc", "ibc-proto", "ibc-relayer", + "ibc-telemetry", "itertools 0.10.0", "once_cell", "prost", @@ -1473,6 +1470,14 @@ dependencies = [ [[package]] name = "ibc-telemetry" version = "0.1.0" +dependencies = [ + "crossbeam-channel 0.5.1", + "once_cell", + "opentelemetry", + "opentelemetry-prometheus", + "prometheus", + "rouille", +] [[package]] name = "ics23" diff --git a/Cargo.toml b/Cargo.toml index b78f2a0226..82a431efe2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,8 @@ members = [ "modules", "relayer", "relayer-cli", - "proto" + "telemetry", + "proto", ] exclude = [ diff --git a/relayer-cli/Cargo.toml b/relayer-cli/Cargo.toml index fa4ca716e2..3b81d02b09 100644 --- a/relayer-cli/Cargo.toml +++ b/relayer-cli/Cargo.toml @@ -17,12 +17,15 @@ description = """ name = "hermes" [features] +default = ["telemetry"] profiling = ["ibc-relayer/profiling"] +telemetry = ["ibc-relayer/telemetry", "ibc-telemetry"] [dependencies] -ibc = { version = "0.3.2", path = "../modules" } -ibc-relayer = { version = "0.3.2", path = "../relayer" } -ibc-proto = { version = "0.8.0", path = "../proto" } +ibc = { version = "0.3.2", path = "../modules" } +ibc-relayer = { version = "0.3.2", path = "../relayer" } +ibc-proto = { version = "0.8.0", path = "../proto" } +ibc-telemetry = { version = "0.1.0", path = "../telemetry", optional = true } anomaly = "0.2.0" gumdrop = { version = "0.7", features = ["default_expr"] } diff --git a/relayer-cli/src/commands/start.rs b/relayer-cli/src/commands/start.rs index aef70b7069..f666c37b22 100644 --- a/relayer-cli/src/commands/start.rs +++ b/relayer-cli/src/commands/start.rs @@ -1,7 +1,7 @@ use abscissa_core::{Command, Options, Runnable}; +use ibc_relayer::config::Config; use ibc_relayer::supervisor::Supervisor; -use ibc_relayer::telemetry; use crate::conclude::Output; use crate::prelude::*; @@ -13,17 +13,25 @@ impl Runnable for StartCmd { fn run(&self) { let config = app_config(); - let telemetry = telemetry::spawn( - config.global.telemetry_port, - config.global.telemetry_enabled, - ); - - let supervisor = - Supervisor::spawn(config.clone(), telemetry).expect("failed to spawn supervisor"); - + let supervisor = spawn_supervisor(config.clone()); match supervisor.run() { Ok(()) => Output::success_msg("done").exit(), Err(e) => Output::error(e).exit(), } } } + +#[cfg(feature = "telemetry")] +fn spawn_supervisor(config: Config) -> Supervisor { + let telemetry = ibc_telemetry::spawn( + config.global.telemetry_port, + config.global.telemetry_enabled, + ); + + Supervisor::spawn_with_telemetry(config, telemetry) +} + +#[cfg(not(feature = "telemetry"))] +fn spawn_supervisor(config: Config) -> Supervisor { + Supervisor::spawn(config) +} diff --git a/relayer/Cargo.toml b/relayer/Cargo.toml index 931b582682..c8369c04c8 100644 --- a/relayer/Cargo.toml +++ b/relayer/Cargo.toml @@ -14,10 +14,12 @@ description = """ [features] profiling = [] +telemetry = ["ibc-telemetry"] [dependencies] -ibc = { version = "0.3.2", path = "../modules" } -ibc-proto = { version = "0.8.0", path = "../proto" } +ibc = { version = "0.3.2", path = "../modules" } +ibc-proto = { version = "0.8.0", path = "../proto" } +ibc-telemetry = { version = "0.1.0", path = "../telemetry", optional = true } subtle-encoding = "0.5" anomaly = "0.2.0" @@ -53,13 +55,6 @@ dyn-clone = "1.0.3" retry = { version = "1.2.1", default-features = false } async-stream = "0.3.2" -# telemetry -lazy_static = "1.4.0" -opentelemetry = "0.14.0" -opentelemetry-prometheus = "0.7.0" -prometheus = "0.12.0" -rouille = "3.1.1" - [dependencies.tendermint] version = "=0.19.0" diff --git a/relayer/src/lib.rs b/relayer/src/lib.rs index f17858c7f1..8a0f1d53e7 100644 --- a/relayer/src/lib.rs +++ b/relayer/src/lib.rs @@ -16,12 +16,6 @@ //! //! [Hermes]: https://docs.rs/ibc-relayer-cli/0.2.0/ -#[macro_use] -extern crate lazy_static; - -#[macro_use] -extern crate rouille; - pub mod chain; pub mod channel; pub mod config; @@ -36,7 +30,6 @@ pub mod macros; pub mod object; pub mod registry; pub mod supervisor; -pub mod telemetry; pub mod transfer; pub mod upgrade_chain; pub mod util; diff --git a/relayer/src/supervisor.rs b/relayer/src/supervisor.rs index de03aaa58a..82b6804d3f 100644 --- a/relayer/src/supervisor.rs +++ b/relayer/src/supervisor.rs @@ -2,7 +2,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration}; use anomaly::BoxError; -use crossbeam_channel::{Receiver, Sender}; +use crossbeam_channel::Receiver; use itertools::Itertools; use tracing::{debug, error, trace, warn}; @@ -16,6 +16,9 @@ use ibc::{ use ibc_proto::ibc::core::channel::v1::QueryChannelsRequest; +#[cfg(feature = "telemetry")] +use ibc_telemetry::service::MetricUpdate; + use crate::{ chain::{counterparty::channel_connection_client, handle::ChainHandle}, config::Config, @@ -30,9 +33,18 @@ use crate::{ }; mod error; -use crate::telemetry::service::MetricUpdate; pub use error::Error; +#[cfg(feature = "telemetry")] +use ibc_telemetry::TelemetryHandle; + +macro_rules! metric { + ($t:expr, $e:expr) => { + #[cfg(feature = "telemetry")] + $t.send($e); + }; +} + /// The supervisor listens for events on multiple pairs of chains, /// and dispatches the events it receives to the appropriate /// worker, based on the [`Object`] associated with each event. @@ -41,22 +53,35 @@ pub struct Supervisor { registry: Registry, workers: WorkerMap, worker_msg_rx: Receiver, - telemetry: Sender, + + #[cfg(feature = "telemetry")] + telemetry: TelemetryHandle, } impl Supervisor { /// Spawns a [`Supervisor`] which will listen for events on all the chains in the [`Config`]. - pub fn spawn(config: Config, telemetry: Sender) -> Result { + pub fn spawn(config: Config) -> Self { let registry = Registry::new(config.clone()); let (worker_msg_tx, worker_msg_rx) = crossbeam_channel::unbounded(); - Ok(Self { + Self { config, registry, workers: WorkerMap::new(worker_msg_tx), worker_msg_rx, - telemetry, - }) + + #[cfg(feature = "telemetry")] + telemetry: TelemetryHandle::noop(), + } + } + + #[cfg(feature = "telemetry")] + /// Spawns a [`Supervisor`] which will listen for events on all the chains in the [`Config`], + /// with telemetry enabled. + pub fn spawn_with_telemetry(config: Config, telemetry: TelemetryHandle) -> Self { + let mut supervisor = Self::spawn(config); + supervisor.telemetry = telemetry; + supervisor } /// Collect the events we are interested in from an [`EventBatch`], @@ -105,14 +130,14 @@ impl Supervisor { IbcEvent::TimeoutPacket(ref packet) => { if let Ok(object) = Object::for_timeout_packet(packet, src_chain) { // TODO: Is this the right place to record the telemetry metric ? - let _ = self.telemetry.send(MetricUpdate::TimeoutPacket(1)); + metric!(self.telemetry, MetricUpdate::TimeoutPacket(1)); collected.per_object.entry(object).or_default().push(event); } } IbcEvent::WriteAcknowledgement(ref packet) => { if let Ok(object) = Object::for_write_ack(packet, src_chain) { // TODO: Is this the right place to record the telemetry metric ? - let _ = self.telemetry.send(MetricUpdate::IbcAcknowledgePacket(1)); + metric!(self.telemetry, MetricUpdate::IbcAcknowledgePacket(1)); collected.per_object.entry(object).or_default().push(event); } } @@ -143,7 +168,7 @@ impl Supervisor { for chain_id in chain_ids { let chain = match self.registry.get_or_spawn(&chain_id) { Ok(chain_handle) => { - let _ = self.telemetry.send(MetricUpdate::RelayChainsNumber(1)); + metric!(self.telemetry, MetricUpdate::RelayChainsNumber(1)); chain_handle } Err(e) => { @@ -154,7 +179,7 @@ impl Supervisor { let channels = match chain.query_channels(req.clone()) { Ok(channels) => { - let _ = self.telemetry.send(MetricUpdate::RelayChannelsNumber(1)); + metric!(self.telemetry, MetricUpdate::RelayChannelsNumber(1)); channels } Err(e) => { diff --git a/relayer/src/telemetry.rs b/relayer/src/telemetry.rs deleted file mode 100644 index d412bc6f7b..0000000000 --- a/relayer/src/telemetry.rs +++ /dev/null @@ -1,28 +0,0 @@ -use crate::telemetry::server::TelemetryServer; -use crate::telemetry::service::MetricUpdate; -use crate::telemetry::service::TelemetryService; -use crate::telemetry::state::TelemetryState; -use crossbeam_channel::Sender; - -pub mod server; -pub mod service; -pub mod state; - -pub fn spawn(port: u16, enabled: bool) -> Sender { - let (tx, rx) = crossbeam_channel::unbounded(); - - // Only start the telemetry service and server if it is enabled in the configuration - if enabled { - let telemetry_state = TelemetryState { - ..Default::default() - }; - let service = TelemetryService::new(telemetry_state.clone(), rx); - let server = TelemetryServer::new(telemetry_state.clone()); - - // Start the telemetry service and server - std::thread::spawn(move || server.run(telemetry_state.clone(), port)); - std::thread::spawn(move || service.run()); - } - - tx -} diff --git a/telemetry/Cargo.toml b/telemetry/Cargo.toml new file mode 100644 index 0000000000..5181370a09 --- /dev/null +++ b/telemetry/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "ibc-telemetry" +version = "0.1.0" +authors = ["Informal Systems "] +edition = "2018" + +[dependencies] +crossbeam-channel = "0.5.1" +once_cell = "1.7.2" +opentelemetry = "0.14.0" +opentelemetry-prometheus = "0.7.0" +prometheus = "0.12.0" +rouille = "3.1.1" diff --git a/telemetry/src/lib.rs b/telemetry/src/lib.rs new file mode 100644 index 0000000000..8edb3d1df4 --- /dev/null +++ b/telemetry/src/lib.rs @@ -0,0 +1,47 @@ +pub mod server; +pub mod service; +pub mod state; + +use crossbeam_channel::Sender; + +use crate::{ + server::TelemetryServer, + service::{MetricUpdate, TelemetryService}, + state::TelemetryState, +}; + +pub struct TelemetryHandle { + tx: Option>, +} + +impl TelemetryHandle { + pub fn noop() -> Self { + Self { tx: None } + } + + pub fn send(&self, update: MetricUpdate) { + if let Some(ref tx) = self.tx { + let _ = tx.send(update); + } + } +} + +pub fn spawn(port: u16, enabled: bool) -> TelemetryHandle { + let (tx, rx) = crossbeam_channel::unbounded(); + + // Only start the telemetry service and server if it is enabled in the configuration + if !enabled { + return TelemetryHandle::noop(); + } + + let telemetry_state = TelemetryState::default(); + + let service = TelemetryService::new(telemetry_state.clone(), rx); + let server = TelemetryServer::new(telemetry_state.clone()); + + // Start the telemetry service and server + std::thread::spawn(move || server.run(telemetry_state.clone(), port)); + std::thread::spawn(move || service.run()); + + TelemetryHandle { tx: Some(tx) } +} diff --git a/relayer/src/telemetry/server.rs b/telemetry/src/server.rs similarity index 95% rename from relayer/src/telemetry/server.rs rename to telemetry/src/server.rs index 2caf8fed73..896e73979b 100644 --- a/relayer/src/telemetry/server.rs +++ b/telemetry/src/server.rs @@ -1,5 +1,7 @@ -use crate::telemetry::state::TelemetryState; use prometheus::{Encoder, TextEncoder}; +use rouille::router; + +use crate::state::TelemetryState; pub struct TelemetryServer { pub state: TelemetryState, diff --git a/relayer/src/telemetry/service.rs b/telemetry/src/service.rs similarity index 95% rename from relayer/src/telemetry/service.rs rename to telemetry/src/service.rs index ec9d15e6f3..ac563f1ec3 100644 --- a/relayer/src/telemetry/service.rs +++ b/telemetry/src/service.rs @@ -1,6 +1,8 @@ -use crate::telemetry::state::TelemetryState; use crossbeam_channel::Receiver; +use crate::state::TelemetryState; + +#[derive(Debug)] pub enum MetricUpdate { RelayChainsNumber(u64), RelayChannelsNumber(u64), @@ -14,6 +16,7 @@ pub enum MetricUpdate { TimeoutPacket(u64), } +#[derive(Debug)] pub struct TelemetryService { pub state: TelemetryState, pub rx: Receiver, diff --git a/relayer/src/telemetry/state.rs b/telemetry/src/state.rs similarity index 95% rename from relayer/src/telemetry/state.rs rename to telemetry/src/state.rs index 41e784024d..3129a0edb6 100644 --- a/relayer/src/telemetry/state.rs +++ b/telemetry/src/state.rs @@ -1,13 +1,10 @@ -use opentelemetry::global; -use opentelemetry::metrics::BoundCounter; -use opentelemetry::KeyValue; +use once_cell::sync::Lazy; +use opentelemetry::{global, metrics::BoundCounter, KeyValue}; use opentelemetry_prometheus::PrometheusExporter; -lazy_static! { - static ref HANDLER_ALL: [KeyValue; 1] = [KeyValue::new("hermes", "all")]; -} +static HANDLER_ALL: Lazy<[KeyValue; 1]> = Lazy::new(|| [KeyValue::new("hermes", "all")]); -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct TelemetryState { pub exporter: PrometheusExporter, From 4861504bd3bb6184117f1014452c54959fe1f59b Mon Sep 17 00:00:00 2001 From: Romain Ruetschi Date: Wed, 26 May 2021 15:33:29 +0200 Subject: [PATCH 11/26] Move `metric!` macro into its own module --- relayer/src/lib.rs | 2 ++ relayer/src/supervisor.rs | 8 +------- relayer/src/telemetry.rs | 7 +++++++ 3 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 relayer/src/telemetry.rs diff --git a/relayer/src/lib.rs b/relayer/src/lib.rs index 8a0f1d53e7..79f416be15 100644 --- a/relayer/src/lib.rs +++ b/relayer/src/lib.rs @@ -34,3 +34,5 @@ pub mod transfer; pub mod upgrade_chain; pub mod util; pub mod worker; + +mod telemetry; diff --git a/relayer/src/supervisor.rs b/relayer/src/supervisor.rs index 82b6804d3f..ec67ed6121 100644 --- a/relayer/src/supervisor.rs +++ b/relayer/src/supervisor.rs @@ -26,6 +26,7 @@ use crate::{ self, monitor::{EventBatch, UnwrapOrClone}, }, + metric, object::{Client, Object, UnidirectionalChannelPath}, registry::Registry, util::try_recv_multiple, @@ -38,13 +39,6 @@ pub use error::Error; #[cfg(feature = "telemetry")] use ibc_telemetry::TelemetryHandle; -macro_rules! metric { - ($t:expr, $e:expr) => { - #[cfg(feature = "telemetry")] - $t.send($e); - }; -} - /// The supervisor listens for events on multiple pairs of chains, /// and dispatches the events it receives to the appropriate /// worker, based on the [`Object`] associated with each event. diff --git a/relayer/src/telemetry.rs b/relayer/src/telemetry.rs new file mode 100644 index 0000000000..c82078c96b --- /dev/null +++ b/relayer/src/telemetry.rs @@ -0,0 +1,7 @@ +#[macro_export] +macro_rules! metric { + ($t:expr, $e:expr) => { + #[cfg(feature = "telemetry")] + $t.send($e); + }; +} From 2a190d8c9433a38527e9345d531ca66f3e8b8d00 Mon Sep 17 00:00:00 2001 From: Romain Ruetschi Date: Wed, 26 May 2021 15:33:56 +0200 Subject: [PATCH 12/26] Move telemetry config under `[telemetry]` section --- config.toml | 6 ++++-- relayer-cli/src/commands/start.rs | 5 +---- relayer/src/config.rs | 23 +++++++++++++++++------ 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/config.toml b/config.toml index 82c9ce6581..e0f2cb773c 100644 --- a/config.toml +++ b/config.toml @@ -1,8 +1,10 @@ [global] strategy = 'naive' log_level = 'info' -telemetry_enabled = true -telemetry_port = 3001 + +[telemetry] +enabled = true +port = 3001 [[chains]] id = 'ibc-0' diff --git a/relayer-cli/src/commands/start.rs b/relayer-cli/src/commands/start.rs index f666c37b22..bff32ca8e6 100644 --- a/relayer-cli/src/commands/start.rs +++ b/relayer-cli/src/commands/start.rs @@ -23,10 +23,7 @@ impl Runnable for StartCmd { #[cfg(feature = "telemetry")] fn spawn_supervisor(config: Config) -> Supervisor { - let telemetry = ibc_telemetry::spawn( - config.global.telemetry_port, - config.global.telemetry_enabled, - ); + let telemetry = ibc_telemetry::spawn(config.telemetry.port, config.telemetry.enabled); Supervisor::spawn_with_telemetry(config, telemetry) } diff --git a/relayer/src/config.rs b/relayer/src/config.rs index 072765a3d8..a887c9b981 100644 --- a/relayer/src/config.rs +++ b/relayer/src/config.rs @@ -39,6 +39,8 @@ pub mod default { #[derive(Clone, Debug, Default, Deserialize, Serialize)] pub struct Config { pub global: GlobalConfig, + #[serde(default)] + pub telemetry: TelemetryConfig, #[serde(default = "Vec::new", skip_serializing_if = "Vec::is_empty")] pub chains: Vec, #[serde(skip_serializing_if = "Option::is_none")] @@ -88,10 +90,6 @@ pub struct GlobalConfig { /// All valid log levels, as defined in tracing: /// https://docs.rs/tracing-core/0.1.17/tracing_core/struct.Level.html pub log_level: String, - - pub telemetry_enabled: bool, - - pub telemetry_port: u16, } impl Default for GlobalConfig { @@ -99,8 +97,21 @@ impl Default for GlobalConfig { Self { strategy: Strategy::default(), log_level: "info".to_string(), - telemetry_enabled: true, - telemetry_port: 3000, + } + } +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct TelemetryConfig { + pub enabled: bool, + pub port: u16, +} + +impl Default for TelemetryConfig { + fn default() -> Self { + Self { + enabled: true, + port: 3000, } } } From 85ff90276b0d0293d7aa60509b8e3443a9eaa86d Mon Sep 17 00:00:00 2001 From: Romain Ruetschi Date: Wed, 26 May 2021 15:35:29 +0200 Subject: [PATCH 13/26] Disable telemetry by default, fix port to 3001 --- relayer/src/config.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/relayer/src/config.rs b/relayer/src/config.rs index a887c9b981..0454d4944a 100644 --- a/relayer/src/config.rs +++ b/relayer/src/config.rs @@ -110,8 +110,8 @@ pub struct TelemetryConfig { impl Default for TelemetryConfig { fn default() -> Self { Self { - enabled: true, - port: 3000, + enabled: false, + port: 3001, } } } From c2f2fa98c9b889273ea4aff3acb77627a9cf67bc Mon Sep 17 00:00:00 2001 From: Romain Ruetschi Date: Wed, 26 May 2021 16:17:29 +0200 Subject: [PATCH 14/26] Try to fix libm.so error --- ci/relayer.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/relayer.Dockerfile b/ci/relayer.Dockerfile index 58f055350b..74b6402218 100644 --- a/ci/relayer.Dockerfile +++ b/ci/relayer.Dockerfile @@ -7,7 +7,7 @@ LABEL maintainer="hello@informal.systems" ARG RELEASE # Add Python 3 -RUN apt-get update -y && apt-get install python3 -y && apt-get install python3-toml -y +RUN apt-get update -y && apt-get install libc6-dev python3 python3-toml -y # Copy relayer executable COPY ./hermes /usr/bin/hermes From cf98b0dde6fb47578db81a7381653dabd81a622d Mon Sep 17 00:00:00 2001 From: Romain Ruetschi Date: Wed, 26 May 2021 16:31:59 +0200 Subject: [PATCH 15/26] Wrap telemetry state in Arc and simplify server a little --- telemetry/src/lib.rs | 9 ++++---- telemetry/src/server.rs | 50 +++++++++++++++++----------------------- telemetry/src/service.rs | 15 ++++++++---- 3 files changed, 36 insertions(+), 38 deletions(-) diff --git a/telemetry/src/lib.rs b/telemetry/src/lib.rs index 8edb3d1df4..e3e5a384a7 100644 --- a/telemetry/src/lib.rs +++ b/telemetry/src/lib.rs @@ -2,10 +2,11 @@ pub mod server; pub mod service; pub mod state; +use std::sync::Arc; + use crossbeam_channel::Sender; use crate::{ - server::TelemetryServer, service::{MetricUpdate, TelemetryService}, state::TelemetryState, }; @@ -34,13 +35,11 @@ pub fn spawn(port: u16, enabled: bool) -> TelemetryHandle { return TelemetryHandle::noop(); } - let telemetry_state = TelemetryState::default(); - + let telemetry_state = Arc::new(TelemetryState::default()); let service = TelemetryService::new(telemetry_state.clone(), rx); - let server = TelemetryServer::new(telemetry_state.clone()); // Start the telemetry service and server - std::thread::spawn(move || server.run(telemetry_state.clone(), port)); + std::thread::spawn(move || server::run(telemetry_state, port)); std::thread::spawn(move || service.run()); TelemetryHandle { tx: Some(tx) } diff --git a/telemetry/src/server.rs b/telemetry/src/server.rs index 896e73979b..e614220434 100644 --- a/telemetry/src/server.rs +++ b/telemetry/src/server.rs @@ -1,36 +1,28 @@ +use std::sync::Arc; + use prometheus::{Encoder, TextEncoder}; use rouille::router; use crate::state::TelemetryState; -pub struct TelemetryServer { - pub state: TelemetryState, -} - -impl TelemetryServer { - pub(crate) fn new(state: TelemetryState) -> TelemetryServer { - TelemetryServer { state } - } - - #[allow(clippy::manual_strip)] - pub(crate) fn run(&self, telemetry_state: TelemetryState, listen_port: u16) { - rouille::start_server(format!("localhost:{}", listen_port), move |request| { - router!(request, - // The prometheus endpoint - (GET) (/metrics) => { - let mut buffer = vec![]; - let encoder = TextEncoder::new(); - let metric_families = telemetry_state.exporter.registry().gather(); - encoder.encode(&metric_families, &mut buffer).unwrap(); - rouille::Response::from_data(encoder.format_type().to_string(), buffer) - }, +#[allow(clippy::manual_strip)] +pub fn run(telemetry_state: Arc, port: u16) { + rouille::start_server(("localhost", port), move |request| { + router!(request, + // The prometheus endpoint + (GET) (/metrics) => { + let mut buffer = vec![]; + let encoder = TextEncoder::new(); + let metric_families = telemetry_state.exporter.registry().gather(); + encoder.encode(&metric_families, &mut buffer).unwrap(); + rouille::Response::from_data(encoder.format_type().to_string(), buffer) + }, - // Any route other than /metrics - // return an empty response with a 404 status code. - _ => { - rouille::Response::empty_404() - } - ) - }); - } + // Any route other than /metrics + // return an empty response with a 404 status code. + _ => { + rouille::Response::empty_404() + } + ) + }); } diff --git a/telemetry/src/service.rs b/telemetry/src/service.rs index ac563f1ec3..b59b2f7f24 100644 --- a/telemetry/src/service.rs +++ b/telemetry/src/service.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use crossbeam_channel::Receiver; use crate::state::TelemetryState; @@ -16,18 +18,23 @@ pub enum MetricUpdate { TimeoutPacket(u64), } +pub fn run(telemetry_state: Arc, rx: Receiver) { + let service = TelemetryService::new(telemetry_state, rx); + service.run() +} + #[derive(Debug)] pub struct TelemetryService { - pub state: TelemetryState, - pub rx: Receiver, + state: Arc, + rx: Receiver, } impl TelemetryService { - pub(crate) fn new(state: TelemetryState, rx: Receiver) -> Self { + pub fn new(state: Arc, rx: Receiver) -> Self { Self { state, rx } } - pub(crate) fn run(self) { + pub fn run(self) { while let Ok(update) = self.rx.recv() { self.apply_update(update); } From 7dd967fed3c00ceed19dc3acd15c1a20f9e8cf89 Mon Sep 17 00:00:00 2001 From: Romain Ruetschi Date: Wed, 26 May 2021 16:49:42 +0200 Subject: [PATCH 16/26] Simplify server a bit more --- telemetry/src/server.rs | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/telemetry/src/server.rs b/telemetry/src/server.rs index e614220434..c9d32aa7fc 100644 --- a/telemetry/src/server.rs +++ b/telemetry/src/server.rs @@ -1,28 +1,41 @@ use std::sync::Arc; use prometheus::{Encoder, TextEncoder}; -use rouille::router; +use rouille::Request; use crate::state::TelemetryState; -#[allow(clippy::manual_strip)] +enum Route { + Metrics, + Other, +} + +impl Route { + fn from_request(request: &Request) -> Route { + if request.url() == "/metrics" { + Route::Metrics + } else { + Route::Other + } + } +} + pub fn run(telemetry_state: Arc, port: u16) { rouille::start_server(("localhost", port), move |request| { - router!(request, + match Route::from_request(request) { // The prometheus endpoint - (GET) (/metrics) => { + Route::Metrics => { let mut buffer = vec![]; let encoder = TextEncoder::new(); let metric_families = telemetry_state.exporter.registry().gather(); encoder.encode(&metric_families, &mut buffer).unwrap(); - rouille::Response::from_data(encoder.format_type().to_string(), buffer) - }, - // Any route other than /metrics - // return an empty response with a 404 status code. - _ => { - rouille::Response::empty_404() + rouille::Response::from_data(encoder.format_type().to_string(), buffer) } - ) - }); + + // Any other route + // Return an empty response with a 404 status code. + Route::Other => rouille::Response::empty_404(), + } + }) } From 4201f92afaab1a0463080cd296234bc086803667 Mon Sep 17 00:00:00 2001 From: Romain Ruetschi Date: Thu, 27 May 2021 10:04:29 +0200 Subject: [PATCH 17/26] Fix glibc version mismatch between CI and Docker image --- .github/workflows/e2e.yaml | 2 +- ci/relayer.Dockerfile | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 37c80bbc3a..d85b742fa1 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -22,7 +22,7 @@ on: jobs: test-end-to-end: - runs-on: ubuntu-latest + runs-on: ubuntu-18.04 steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 diff --git a/ci/relayer.Dockerfile b/ci/relayer.Dockerfile index 74b6402218..80c810b6b7 100644 --- a/ci/relayer.Dockerfile +++ b/ci/relayer.Dockerfile @@ -1,13 +1,13 @@ ##################################################### #### Relayer image #### ##################################################### -FROM rust:slim +FROM ubuntu:18.04 LABEL maintainer="hello@informal.systems" ARG RELEASE # Add Python 3 -RUN apt-get update -y && apt-get install libc6-dev python3 python3-toml -y +RUN apt-get update -y && apt-get install python3 python3-toml -y # Copy relayer executable COPY ./hermes /usr/bin/hermes From 0dbf86aab1e36231e4e701dfdb8b080e57619ff1 Mon Sep 17 00:00:00 2001 From: Romain Ruetschi Date: Thu, 27 May 2021 12:11:03 +0200 Subject: [PATCH 18/26] Push telemetry handle down into workers --- modules/src/events.rs | 6 +-- relayer-cli/src/commands/start.rs | 6 +-- relayer/src/chain/handle.rs | 48 +++++++++++++------- relayer/src/link.rs | 69 ++++++++++++++++++----------- relayer/src/supervisor.rs | 35 ++++----------- relayer/src/telemetry.rs | 37 +++++++++++++++- relayer/src/worker.rs | 13 ++++-- relayer/src/worker/channel.rs | 20 +++++++-- relayer/src/worker/client.rs | 15 ++++++- relayer/src/worker/map.rs | 23 ++++++++-- relayer/src/worker/uni_chan_path.rs | 5 +++ telemetry/src/lib.rs | 13 +++--- telemetry/src/metric.rs | 14 ++++++ telemetry/src/service.rs | 41 ++++++----------- telemetry/src/state.rs | 14 ++++-- 15 files changed, 235 insertions(+), 124 deletions(-) create mode 100644 telemetry/src/metric.rs diff --git a/modules/src/events.rs b/modules/src/events.rs index 3277469251..ee09d360cd 100644 --- a/modules/src/events.rs +++ b/modules/src/events.rs @@ -66,11 +66,11 @@ pub enum IbcEvent { } /// For use in debug messages -pub struct VecIbcEvents(pub Vec); -impl fmt::Display for VecIbcEvents { +pub struct PrettyEvents<'a>(pub &'a [IbcEvent]); +impl<'a> fmt::Display for PrettyEvents<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { writeln!(f, "events:")?; - for v in &self.0 { + for v in self.0 { writeln!(f, "\t{}", v)?; } Ok(()) diff --git a/relayer-cli/src/commands/start.rs b/relayer-cli/src/commands/start.rs index bff32ca8e6..2f480645d2 100644 --- a/relayer-cli/src/commands/start.rs +++ b/relayer-cli/src/commands/start.rs @@ -24,11 +24,11 @@ impl Runnable for StartCmd { #[cfg(feature = "telemetry")] fn spawn_supervisor(config: Config) -> Supervisor { let telemetry = ibc_telemetry::spawn(config.telemetry.port, config.telemetry.enabled); - - Supervisor::spawn_with_telemetry(config, telemetry) + Supervisor::spawn(config, telemetry) } #[cfg(not(feature = "telemetry"))] fn spawn_supervisor(config: Config) -> Supervisor { - Supervisor::spawn(config) + let telemetry = ibc_relayer::telemetry::TelemetryDisabled; + Supervisor::spawn(config, telemetry) } diff --git a/relayer/src/chain/handle.rs b/relayer/src/chain/handle.rs index a8acf44fac..7e8d17562b 100644 --- a/relayer/src/chain/handle.rs +++ b/relayer/src/chain/handle.rs @@ -1,37 +1,44 @@ -use std::fmt::Debug; -use std::sync::Arc; +use std::{ + fmt::{self, Debug}, + sync::Arc, +}; use crossbeam_channel as channel; use dyn_clone::DynClone; use serde::{Serialize, Serializer}; -use ibc::ics02_client::client_consensus::{AnyConsensusState, AnyConsensusStateWithHeight}; -use ibc::ics02_client::client_state::AnyClientState; -use ibc::ics02_client::events::UpdateClient; -use ibc::ics02_client::misbehaviour::AnyMisbehaviour; -use ibc::ics04_channel::channel::IdentifiedChannelEnd; -use ibc::query::QueryTxRequest; use ibc::{ events::IbcEvent, - ics02_client::header::AnyHeader, + ics02_client::{ + client_consensus::{AnyConsensusState, AnyConsensusStateWithHeight}, + client_state::AnyClientState, + events::UpdateClient, + header::AnyHeader, + misbehaviour::AnyMisbehaviour, + }, ics03_connection::{connection::ConnectionEnd, version::Version}, ics04_channel::{ - channel::ChannelEnd, + channel::{ChannelEnd, IdentifiedChannelEnd}, packet::{PacketMsgType, Sequence}, }, ics23_commitment::commitment::CommitmentPrefix, ics24_host::identifier::{ChainId, ChannelId, ClientId, ConnectionId, PortId}, proofs::Proofs, + query::QueryTxRequest, signer::Signer, Height, }; -use ibc_proto::ibc::core::channel::v1::{ - PacketState, QueryChannelsRequest, QueryConnectionChannelsRequest, - QueryNextSequenceReceiveRequest, QueryPacketAcknowledgementsRequest, - QueryPacketCommitmentsRequest, QueryUnreceivedAcksRequest, QueryUnreceivedPacketsRequest, + +use ibc_proto::ibc::core::{ + channel::v1::{ + PacketState, QueryChannelsRequest, QueryConnectionChannelsRequest, + QueryNextSequenceReceiveRequest, QueryPacketAcknowledgementsRequest, + QueryPacketCommitmentsRequest, QueryUnreceivedAcksRequest, QueryUnreceivedPacketsRequest, + }, + client::v1::QueryConsensusStatesRequest, + commitment::v1::MerkleProof, }; -use ibc_proto::ibc::core::client::v1::QueryConsensusStatesRequest; -use ibc_proto::ibc::core::commitment::v1::MerkleProof; + pub use prod::ProdChainHandle; use crate::{ @@ -60,6 +67,15 @@ impl ChainHandlePair { } } +impl Debug for ChainHandlePair { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ChainHandlePair") + .field("a", &self.a.id()) + .field("b", &self.b.id()) + .finish() + } +} + pub type Subscription = channel::Receiver>>; pub type ReplyTo = channel::Sender>; diff --git a/relayer/src/link.rs b/relayer/src/link.rs index 9b164ac97e..86d03915bb 100644 --- a/relayer/src/link.rs +++ b/relayer/src/link.rs @@ -9,10 +9,9 @@ use prost_types::Any; use thiserror::Error; use tracing::{debug, error, info, trace, warn}; -use ibc::events::VecIbcEvents; use ibc::{ downcast, - events::{IbcEvent, IbcEventType}, + events::{IbcEvent, IbcEventType, PrettyEvents}, ics03_connection::connection::State as ConnectionState, ics04_channel::{ channel::{ChannelEnd, Order, QueryPacketEventDataRequest, State as ChannelState}, @@ -590,7 +589,7 @@ impl RelayPath { fn relay_from_operational_data( &mut self, initial_od: OperationalData, - ) -> Result, LinkError> { + ) -> Result { // We will operate on potentially different operational data if the initial one fails. let mut odata = initial_od; @@ -607,16 +606,17 @@ impl RelayPath { // Consume the operational data by attempting to send its messages match self.send_from_operational_data(odata.clone()) { - Ok(events) => { + Ok(summary) => { // Done with this op. data info!("[{}] success", self); - return Ok(events); + + return Ok(summary); } Err(LinkError::SendError(ev)) => { // This error means we can retry error!("[{}] error {}", self, ev); match self.regenerate_operational_data(odata.clone()) { - None => return Ok(vec![]), // Nothing to retry + None => return Ok(RelaySummary::empty()), // Nothing to retry Some(new_od) => odata = new_od, } } @@ -626,7 +626,8 @@ impl RelayPath { } } } - Ok(vec![]) + + Ok(RelaySummary::empty()) } /// Helper for managing retries of the `relay_from_operational_data` method. @@ -712,10 +713,10 @@ impl RelayPath { fn send_from_operational_data( &mut self, odata: OperationalData, - ) -> Result, LinkError> { + ) -> Result { if odata.batch.is_empty() { error!("[{}] ignoring empty operational data!", self); - return Ok(vec![]); + return Ok(RelaySummary::empty()); } let target = match odata.target { @@ -726,7 +727,7 @@ impl RelayPath { let msgs = odata.assemble_msgs(self)?; let tx_events = target.send_msgs(msgs)?; - info!("[{}] result {}\n", self, VecIbcEvents(tx_events.clone())); + info!("[{}] result {}\n", self, PrettyEvents(&tx_events)); let ev = tx_events .clone() @@ -735,7 +736,7 @@ impl RelayPath { match ev { Some(ev) => Err(LinkError::SendError(Box::new(ev))), - None => Ok(tx_events), + None => Ok(RelaySummary::from_events(tx_events)), } } @@ -822,11 +823,7 @@ impl RelayPath { ); let dst_tx_events = self.dst_chain().send_msgs(dst_update)?; - info!( - "[{}] result {}\n", - self, - VecIbcEvents(dst_tx_events.clone()) - ); + info!("[{}] result {}\n", self, PrettyEvents(&dst_tx_events)); dst_err_ev = dst_tx_events .into_iter() @@ -867,11 +864,7 @@ impl RelayPath { ); let src_tx_events = self.src_chain().send_msgs(src_update)?; - info!( - "[{}] result {}\n", - self, - VecIbcEvents(src_tx_events.clone()) - ); + info!("[{}] result {}\n", self, PrettyEvents(&src_tx_events)); src_err_ev = src_tx_events .into_iter() @@ -1280,17 +1273,20 @@ impl RelayPath { /// Checks if there are any operational data items ready, and if so performs the relaying /// of corresponding packets to the target chain. - pub fn execute_schedule(&mut self) -> Result<(), LinkError> { + pub fn execute_schedule(&mut self) -> Result { let (src_ods, dst_ods) = self.try_fetch_scheduled_operational_data(); + + let mut summary = RelaySummary::empty(); + for od in dst_ods { - self.relay_from_operational_data(od)?; + summary.extend(self.relay_from_operational_data(od)?); } for od in src_ods { - self.relay_from_operational_data(od)?; + summary.extend(self.relay_from_operational_data(od)?); } - Ok(()) + Ok(summary) } /// Refreshes the scheduled batches. @@ -1680,7 +1676,7 @@ impl Link { // Block waiting for all of the scheduled data (until `None` is returned) while let Some(odata) = self.a_to_b.fetch_scheduled_operational_data() { let mut last_res = self.a_to_b.relay_from_operational_data(odata)?; - results.append(&mut last_res); + results.append(&mut last_res.events); } Ok(results) @@ -1694,9 +1690,28 @@ impl Link { // Block waiting for all of the scheduled data while let Some(odata) = self.a_to_b.fetch_scheduled_operational_data() { let mut last_res = self.a_to_b.relay_from_operational_data(odata)?; - results.append(&mut last_res); + results.append(&mut last_res.events); } Ok(results) } } + +#[derive(Clone, Debug)] +pub struct RelaySummary { + events: Vec, +} + +impl RelaySummary { + pub fn empty() -> Self { + Self { events: vec![] } + } + + pub fn from_events(events: Vec) -> Self { + Self { events } + } + + pub fn extend(&mut self, other: RelaySummary) { + self.events.extend(other.events) + } +} diff --git a/relayer/src/supervisor.rs b/relayer/src/supervisor.rs index afdd967f31..b2720bd5ea 100644 --- a/relayer/src/supervisor.rs +++ b/relayer/src/supervisor.rs @@ -13,9 +13,6 @@ use ibc::{ use ibc_proto::ibc::core::channel::v1::QueryChannelsRequest; -#[cfg(feature = "telemetry")] -use ibc_telemetry::service::MetricUpdate; - use crate::{ chain::{counterparty::channel_connection_client, handle::ChainHandle}, config::Config, @@ -26,6 +23,7 @@ use crate::{ metric, object::{Channel, Client, Object, UnidirectionalChannelPath}, registry::Registry, + telemetry::TelemetryHandle, util::try_recv_multiple, worker::{WorkerMap, WorkerMsg}, }; @@ -35,9 +33,6 @@ use crate::chain::counterparty::channel_state_on_destination; use crate::config::Strategy; pub use error::Error; -#[cfg(feature = "telemetry")] -use ibc_telemetry::TelemetryHandle; - /// The supervisor listens for events on multiple pairs of chains, /// and dispatches the events it receives to the appropriate /// worker, based on the [`Object`] associated with each event. @@ -46,37 +41,25 @@ pub struct Supervisor { registry: Registry, workers: WorkerMap, worker_msg_rx: Receiver, - - #[cfg(feature = "telemetry")] telemetry: TelemetryHandle, } impl Supervisor { /// Spawns a [`Supervisor`] which will listen for events on all the chains in the [`Config`]. - pub fn spawn(config: Config) -> Self { + pub fn spawn(config: Config, telemetry: TelemetryHandle) -> Self { let registry = Registry::new(config.clone()); let (worker_msg_tx, worker_msg_rx) = crossbeam_channel::unbounded(); + let workers = WorkerMap::new(worker_msg_tx, telemetry.clone()); Self { config, registry, - workers: WorkerMap::new(worker_msg_tx), + workers, worker_msg_rx, - - #[cfg(feature = "telemetry")] - telemetry: TelemetryHandle::noop(), + telemetry, } } - #[cfg(feature = "telemetry")] - /// Spawns a [`Supervisor`] which will listen for events on all the chains in the [`Config`], - /// with telemetry enabled. - pub fn spawn_with_telemetry(config: Config, telemetry: TelemetryHandle) -> Self { - let mut supervisor = Self::spawn(config); - supervisor.telemetry = telemetry; - supervisor - } - fn handshake_enabled(&self) -> bool { self.config.global.strategy == Strategy::HandshakeAndPackets } @@ -165,14 +148,14 @@ impl Supervisor { IbcEvent::TimeoutPacket(ref packet) => { if let Ok(object) = Object::for_timeout_packet(packet, src_chain) { // TODO: Is this the right place to record the telemetry metric ? - metric!(self.telemetry, MetricUpdate::TimeoutPacket(1)); + metric!(self.telemetry, TimeoutPacket(1)); collected.per_object.entry(object).or_default().push(event); } } IbcEvent::WriteAcknowledgement(ref packet) => { if let Ok(object) = Object::for_write_ack(packet, src_chain) { // TODO: Is this the right place to record the telemetry metric ? - metric!(self.telemetry, MetricUpdate::IbcAcknowledgePacket(1)); + metric!(self.telemetry, IbcAcknowledgePacket(1)); collected.per_object.entry(object).or_default().push(event); } } @@ -203,7 +186,7 @@ impl Supervisor { for chain_id in chain_ids { let chain = match self.registry.get_or_spawn(&chain_id) { Ok(chain_handle) => { - metric!(self.telemetry, MetricUpdate::RelayChainsNumber(1)); + metric!(self.telemetry, RelayChainsNumber(1)); chain_handle } Err(e) => { @@ -214,7 +197,7 @@ impl Supervisor { let channels = match chain.query_channels(req.clone()) { Ok(channels) => { - metric!(self.telemetry, MetricUpdate::RelayChannelsNumber(1)); + metric!(self.telemetry, RelayChannelsNumber(1)); channels } Err(e) => { diff --git a/relayer/src/telemetry.rs b/relayer/src/telemetry.rs index c82078c96b..6345bbca9b 100644 --- a/relayer/src/telemetry.rs +++ b/relayer/src/telemetry.rs @@ -1,7 +1,42 @@ +// If the `telemetry` feature is enableb, re-export the `ibc-telemetry` handle. +#[cfg(feature = "telemetry")] +pub type TelemetryHandle = ibc_telemetry::TelemetryHandle; + +// Otherwise, define and export a dummy type. +#[cfg(not(feature = "telemetry"))] +#[derive(Clone, Debug)] +pub struct TelemetryDisabled; + +#[cfg(not(feature = "telemetry"))] +pub type TelemetryHandle = TelemetryDisabled; + +/// A macro to send metric updates via a telemetry handle, +/// only if the `telemetry` feature is enabled. +/// Otherwise, it compiles to a no-op which still +/// references the given field to avoid dead_code +/// warnings. +/// +/// ## Note +/// The macro imports `ibc_telemetry::MetricUpdate` into scope and all its variants. +/// +/// ## Example +/// +/// ```rust,ignore +/// metric!(self.telemetry, TxCount(1)); +/// ``` #[macro_export] macro_rules! metric { ($t:expr, $e:expr) => { #[cfg(feature = "telemetry")] - $t.send($e); + #[allow(unused_imports)] + { + use ibc_telemetry::{MetricUpdate, MetricUpdate::*}; + $t.send($e); + } + + #[cfg(not(feature = "telemetry"))] + { + let _ = &$t; + } }; } diff --git a/relayer/src/worker.rs b/relayer/src/worker.rs index a2ec1c95b0..babe9c3980 100644 --- a/relayer/src/worker.rs +++ b/relayer/src/worker.rs @@ -3,7 +3,7 @@ use std::fmt; use crossbeam_channel::Sender; use tracing::{debug, error, info}; -use crate::{chain::handle::ChainHandlePair, object::Object}; +use crate::{chain::handle::ChainHandlePair, object::Object, telemetry::TelemetryHandle}; pub mod retry_strategy; @@ -49,6 +49,7 @@ impl Worker { chains: ChainHandlePair, object: Object, msg_tx: Sender, + telemetry: TelemetryHandle, ) -> WorkerHandle { let (cmd_tx, cmd_rx) = crossbeam_channel::unbounded(); @@ -61,10 +62,14 @@ impl Worker { ); let worker = match object { - Object::Client(client) => Self::Client(ClientWorker::new(client, chains, cmd_rx)), - Object::Channel(channel) => Self::Channel(ChannelWorker::new(channel, chains, cmd_rx)), + Object::Client(client) => { + Self::Client(ClientWorker::new(client, chains, cmd_rx, telemetry)) + } + Object::Channel(channel) => { + Self::Channel(ChannelWorker::new(channel, chains, cmd_rx, telemetry)) + } Object::UnidirectionalChannelPath(path) => { - Self::UniChanPath(UniChanPathWorker::new(path, chains, cmd_rx)) + Self::UniChanPath(UniChanPathWorker::new(path, chains, cmd_rx, telemetry)) } }; diff --git a/relayer/src/worker/channel.rs b/relayer/src/worker/channel.rs index 3714845a53..c8be257ea4 100644 --- a/relayer/src/worker/channel.rs +++ b/relayer/src/worker/channel.rs @@ -5,6 +5,7 @@ use crossbeam_channel::Receiver; use tracing::{debug, warn}; use crate::channel::Channel as RelayChannel; +use crate::telemetry::TelemetryHandle; use crate::{ chain::handle::ChainHandlePair, object::Channel, util::retry::retry_with_index, worker::retry_strategy, @@ -16,14 +17,24 @@ pub struct ChannelWorker { channel: Channel, chains: ChainHandlePair, cmd_rx: Receiver, + + // no metrics for this worker yet + #[allow(dead_code)] + telemetry: TelemetryHandle, } impl ChannelWorker { - pub fn new(channel: Channel, chains: ChainHandlePair, cmd_rx: Receiver) -> Self { + pub fn new( + channel: Channel, + chains: ChainHandlePair, + cmd_rx: Receiver, + telemetry: TelemetryHandle, + ) -> Self { Self { channel, chains, cmd_rx, + telemetry, } } @@ -32,8 +43,6 @@ impl ChannelWorker { let a_chain = self.chains.a.clone(); let b_chain = self.chains.b.clone(); - let mut handshake_channel; - // Flag that indicates if the worker should actively resume handshake. // Set on start or when event based handshake fails. let mut resume_handshake = true; @@ -48,13 +57,15 @@ impl ChannelWorker { // process the last event, the one with highest "rank". let last_event = batch.events.last(); debug!("channel worker starts processing {:#?}", last_event); + match last_event { Some(event) => { - handshake_channel = RelayChannel::restore_from_event( + let mut handshake_channel = RelayChannel::restore_from_event( a_chain.clone(), b_chain.clone(), event.clone(), )?; + retry_with_index( retry_strategy::worker_default_strategy(), |index| handshake_channel.step_event(event.clone(), index), @@ -70,6 +81,7 @@ impl ChannelWorker { if !resume_handshake { continue; } + debug!( "channel worker starts processing block event at {:#?}", current_height diff --git a/relayer/src/worker/client.rs b/relayer/src/worker/client.rs index 7dd801d8e4..7113feb525 100644 --- a/relayer/src/worker/client.rs +++ b/relayer/src/worker/client.rs @@ -9,7 +9,9 @@ use ibc::{events::IbcEvent, ics02_client::events::UpdateClient}; use crate::{ chain::handle::ChainHandlePair, foreign_client::{ForeignClient, ForeignClientError, MisbehaviourResults}, + metric, object::Client, + telemetry::TelemetryHandle, }; use super::WorkerCmd; @@ -18,14 +20,21 @@ pub struct ClientWorker { client: Client, chains: ChainHandlePair, cmd_rx: Receiver, + telemetry: TelemetryHandle, } impl ClientWorker { - pub fn new(client: Client, chains: ChainHandlePair, cmd_rx: Receiver) -> Self { + pub fn new( + client: Client, + chains: ChainHandlePair, + cmd_rx: Receiver, + telemetry: TelemetryHandle, + ) -> Self { Self { client, chains, cmd_rx, + telemetry, } } @@ -72,7 +81,9 @@ impl ClientWorker { // Run misbehaviour. If evidence submitted the loop will exit in next // iteration with frozen client - self.detect_misbehaviour(&client, Some(update)); + if self.detect_misbehaviour(&client, Some(update)) { + metric!(self.telemetry, IbcClientMisbehaviour(1)); + } } } } diff --git a/relayer/src/worker/map.rs b/relayer/src/worker/map.rs index d9ce4ac892..fbdada974b 100644 --- a/relayer/src/worker/map.rs +++ b/relayer/src/worker/map.rs @@ -1,11 +1,13 @@ use std::collections::HashMap; use crossbeam_channel::Sender; + use ibc::ics24_host::identifier::ChainId; use crate::{ chain::handle::{ChainHandle, ChainHandlePair}, object::Object, + telemetry::TelemetryHandle, }; use super::{Worker, WorkerHandle, WorkerMsg}; @@ -15,15 +17,17 @@ use super::{Worker, WorkerHandle, WorkerMsg}; pub struct WorkerMap { workers: HashMap, msg_tx: Sender, + telemetry: TelemetryHandle, } impl WorkerMap { /// Create a new worker map, which will spawn workers with /// the given channel for sending messages back to the [`Supervisor`]. - pub fn new(msg_tx: Sender) -> Self { + pub fn new(msg_tx: Sender, telemetry: TelemetryHandle) -> Self { Self { workers: HashMap::new(), msg_tx, + telemetry, } } @@ -72,9 +76,22 @@ impl WorkerMap { if self.workers.contains_key(&object) { &self.workers[&object] } else { - let handles = ChainHandlePair { a: src, b: dst }; - let worker = Worker::spawn(handles, object.clone(), self.msg_tx.clone()); + let worker = self.spawn_worker(src, dst, &object); self.workers.entry(object).or_insert(worker) } } + + fn spawn_worker( + &mut self, + src: Box, + dst: Box, + object: &Object, + ) -> WorkerHandle { + Worker::spawn( + ChainHandlePair { a: src, b: dst }, + object.clone(), + self.msg_tx.clone(), + self.telemetry.clone(), + ) + } } diff --git a/relayer/src/worker/uni_chan_path.rs b/relayer/src/worker/uni_chan_path.rs index 6a5243edf2..446245e771 100644 --- a/relayer/src/worker/uni_chan_path.rs +++ b/relayer/src/worker/uni_chan_path.rs @@ -8,16 +8,19 @@ use crate::{ chain::handle::ChainHandlePair, link::{Link, LinkParameters}, object::UnidirectionalChannelPath, + telemetry::TelemetryHandle, util::retry::{retry_with_index, RetryResult}, worker::retry_strategy, }; use super::WorkerCmd; +#[derive(Debug)] pub struct UniChanPathWorker { path: UnidirectionalChannelPath, chains: ChainHandlePair, cmd_rx: Receiver, + telemetry: TelemetryHandle, } impl UniChanPathWorker { @@ -25,11 +28,13 @@ impl UniChanPathWorker { path: UnidirectionalChannelPath, chains: ChainHandlePair, cmd_rx: Receiver, + telemetry: TelemetryHandle, ) -> Self { Self { path, chains, cmd_rx, + telemetry, } } diff --git a/telemetry/src/lib.rs b/telemetry/src/lib.rs index e3e5a384a7..b2c964db95 100644 --- a/telemetry/src/lib.rs +++ b/telemetry/src/lib.rs @@ -1,16 +1,19 @@ pub mod server; + pub mod service; +pub use service::TelemetryService; + pub mod state; +pub use state::TelemetryState; + +pub mod metric; +pub use metric::MetricUpdate; use std::sync::Arc; use crossbeam_channel::Sender; -use crate::{ - service::{MetricUpdate, TelemetryService}, - state::TelemetryState, -}; - +#[derive(Clone, Debug)] pub struct TelemetryHandle { tx: Option>, } diff --git a/telemetry/src/metric.rs b/telemetry/src/metric.rs new file mode 100644 index 0000000000..705cc4b36c --- /dev/null +++ b/telemetry/src/metric.rs @@ -0,0 +1,14 @@ +#[derive(Debug)] +pub enum MetricUpdate { + RelayChainsNumber(u64), + RelayChannelsNumber(u64), + TxCount(u64), + TxSuccess(u64), + TxFailed(u64), + IbcAcknowledgePacket(u64), + IbcRecvPacket(u64), + IbcTransferSend(u64), + IbcTransferReceive(u64), + TimeoutPacket(u64), + IbcClientMisbehaviour(u64), +} diff --git a/telemetry/src/service.rs b/telemetry/src/service.rs index b59b2f7f24..944a2e256a 100644 --- a/telemetry/src/service.rs +++ b/telemetry/src/service.rs @@ -2,21 +2,7 @@ use std::sync::Arc; use crossbeam_channel::Receiver; -use crate::state::TelemetryState; - -#[derive(Debug)] -pub enum MetricUpdate { - RelayChainsNumber(u64), - RelayChannelsNumber(u64), - TxCount(u64), - TxSuccess(u64), - TxFailed(u64), - IbcAcknowledgePacket(u64), - IbcRecvPacket(u64), - IbcTransferSend(u64), - IbcTransferReceive(u64), - TimeoutPacket(u64), -} +use crate::{MetricUpdate, TelemetryState}; pub fn run(telemetry_state: Arc, rx: Receiver) { let service = TelemetryService::new(telemetry_state, rx); @@ -41,19 +27,20 @@ impl TelemetryService { } fn apply_update(&self, update: MetricUpdate) { + use MetricUpdate::*; + match update { - MetricUpdate::RelayChainsNumber(n) => self.state.relay_chains_num.add(n), - MetricUpdate::RelayChannelsNumber(n) => self.state.relay_channels_num.add(n), - MetricUpdate::IbcAcknowledgePacket(n) => { - self.state.tx_msg_ibc_acknowledge_packet.add(n) - } - MetricUpdate::IbcRecvPacket(n) => self.state.tx_msg_ibc_recv_packet.add(n), - MetricUpdate::TxCount(n) => self.state.tx_count.add(n), - MetricUpdate::TxSuccess(n) => self.state.tx_successful.add(n), - MetricUpdate::TxFailed(n) => self.state.tx_failed.add(n), - MetricUpdate::IbcTransferSend(n) => self.state.ibc_transfer_send.add(n), - MetricUpdate::IbcTransferReceive(n) => self.state.ibc_transfer_receive.add(n), - MetricUpdate::TimeoutPacket(n) => self.state.ibc_timeout_packet.add(n), + TxCount(n) => self.state.tx_count.add(n), + TxSuccess(n) => self.state.tx_successful.add(n), + TxFailed(n) => self.state.tx_failed.add(n), + RelayChainsNumber(n) => self.state.relay_chains_num.add(n), + RelayChannelsNumber(n) => self.state.relay_channels_num.add(n), + TimeoutPacket(n) => self.state.ibc_timeout_packet.add(n), + IbcAcknowledgePacket(n) => self.state.tx_msg_ibc_acknowledge_packet.add(n), + IbcRecvPacket(n) => self.state.tx_msg_ibc_recv_packet.add(n), + IbcTransferSend(n) => self.state.ibc_transfer_send.add(n), + IbcTransferReceive(n) => self.state.ibc_transfer_receive.add(n), + IbcClientMisbehaviour(n) => self.state.ibc_client_misbehaviour.add(n), } } } diff --git a/telemetry/src/state.rs b/telemetry/src/state.rs index 3129a0edb6..b3cef5dff6 100644 --- a/telemetry/src/state.rs +++ b/telemetry/src/state.rs @@ -37,13 +37,17 @@ pub struct TelemetryState { // Total number of IBC timeout packets pub ibc_timeout_packet: BoundCounter<'static, u64>, + + // Total number of client misbehaviours + pub ibc_client_misbehaviour: BoundCounter<'static, u64>, } impl Default for TelemetryState { fn default() -> Self { let exporter = opentelemetry_prometheus::exporter().init(); let meter = global::meter("hermes"); - let telemetry_state = TelemetryState { + + Self { exporter, relay_chains_num: meter .u64_counter("hermes_chains_num") @@ -99,7 +103,11 @@ impl Default for TelemetryState { .with_description("Total number of IBC timeout packets") .init() .bind(HANDLER_ALL.as_ref()), - }; - telemetry_state + ibc_client_misbehaviour: meter + .u64_counter("hermes_ibc_client_misbehaviour") + .with_description("Total number of client misbehaviours") + .init() + .bind(HANDLER_ALL.as_ref()), + } } } From 02b9044d6d1eec6d832b62b0ed8e27202df1a7a6 Mon Sep 17 00:00:00 2001 From: Romain Ruetschi Date: Thu, 27 May 2021 16:43:24 +0200 Subject: [PATCH 19/26] Implement `workers`, `ibc_client_misbehaviours` and `receive_packets` metrics --- Cargo.lock | 1 + relayer-cli/src/commands/start.rs | 4 + relayer/src/link.rs | 4 +- relayer/src/supervisor.rs | 17 +-- relayer/src/telemetry.rs | 2 +- relayer/src/worker/client.rs | 8 +- relayer/src/worker/map.rs | 14 +++ relayer/src/worker/uni_chan_path.rs | 59 +++++++---- telemetry/Cargo.toml | 4 +- telemetry/src/metric.rs | 50 +++++++-- telemetry/src/service.rs | 18 ++-- telemetry/src/state.rs | 156 ++++++++++++---------------- 12 files changed, 192 insertions(+), 145 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a8ca3995cd..ce42c4660a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1472,6 +1472,7 @@ name = "ibc-telemetry" version = "0.1.0" dependencies = [ "crossbeam-channel 0.5.1", + "ibc", "once_cell", "opentelemetry", "opentelemetry-prometheus", diff --git a/relayer-cli/src/commands/start.rs b/relayer-cli/src/commands/start.rs index 2f480645d2..e2fc50ceed 100644 --- a/relayer-cli/src/commands/start.rs +++ b/relayer-cli/src/commands/start.rs @@ -29,6 +29,10 @@ fn spawn_supervisor(config: Config) -> Supervisor { #[cfg(not(feature = "telemetry"))] fn spawn_supervisor(config: Config) -> Supervisor { + if config.telemetry.enabled { + warn!("telemetry enabled in the config but Hermes was built without telemetry support"); + } + let telemetry = ibc_relayer::telemetry::TelemetryDisabled; Supervisor::spawn(config, telemetry) } diff --git a/relayer/src/link.rs b/relayer/src/link.rs index 86d03915bb..685451c7c1 100644 --- a/relayer/src/link.rs +++ b/relayer/src/link.rs @@ -1699,7 +1699,9 @@ impl Link { #[derive(Clone, Debug)] pub struct RelaySummary { - events: Vec, + pub events: Vec, + // errors: todo!(), + // timings: todo!(), } impl RelaySummary { diff --git a/relayer/src/supervisor.rs b/relayer/src/supervisor.rs index b2720bd5ea..18595ad5a3 100644 --- a/relayer/src/supervisor.rs +++ b/relayer/src/supervisor.rs @@ -20,7 +20,6 @@ use crate::{ self, monitor::{EventBatch, UnwrapOrClone}, }, - metric, object::{Channel, Client, Object, UnidirectionalChannelPath}, registry::Registry, telemetry::TelemetryHandle, @@ -41,6 +40,8 @@ pub struct Supervisor { registry: Registry, workers: WorkerMap, worker_msg_rx: Receiver, + + #[allow(dead_code)] telemetry: TelemetryHandle, } @@ -147,15 +148,11 @@ impl Supervisor { } IbcEvent::TimeoutPacket(ref packet) => { if let Ok(object) = Object::for_timeout_packet(packet, src_chain) { - // TODO: Is this the right place to record the telemetry metric ? - metric!(self.telemetry, TimeoutPacket(1)); collected.per_object.entry(object).or_default().push(event); } } IbcEvent::WriteAcknowledgement(ref packet) => { if let Ok(object) = Object::for_write_ack(packet, src_chain) { - // TODO: Is this the right place to record the telemetry metric ? - metric!(self.telemetry, IbcAcknowledgePacket(1)); collected.per_object.entry(object).or_default().push(event); } } @@ -185,10 +182,7 @@ impl Supervisor { for chain_id in chain_ids { let chain = match self.registry.get_or_spawn(&chain_id) { - Ok(chain_handle) => { - metric!(self.telemetry, RelayChainsNumber(1)); - chain_handle - } + Ok(chain_handle) => chain_handle, Err(e) => { error!("skipping workers for chain id {}. reason: failed to spawn chain runtime with error: {}", chain_id, e); continue; @@ -196,10 +190,7 @@ impl Supervisor { }; let channels = match chain.query_channels(req.clone()) { - Ok(channels) => { - metric!(self.telemetry, RelayChannelsNumber(1)); - channels - } + Ok(channels) => channels, Err(e) => { error!("failed to query channels from {}: {}", chain_id, e); continue; diff --git a/relayer/src/telemetry.rs b/relayer/src/telemetry.rs index 6345bbca9b..3c2cebc3d1 100644 --- a/relayer/src/telemetry.rs +++ b/relayer/src/telemetry.rs @@ -30,7 +30,7 @@ macro_rules! metric { #[cfg(feature = "telemetry")] #[allow(unused_imports)] { - use ibc_telemetry::{MetricUpdate, MetricUpdate::*}; + use ibc_telemetry::{metric::Op, MetricUpdate, MetricUpdate::*}; $t.send($e); } diff --git a/relayer/src/worker/client.rs b/relayer/src/worker/client.rs index 7113feb525..585e498cfc 100644 --- a/relayer/src/worker/client.rs +++ b/relayer/src/worker/client.rs @@ -82,7 +82,13 @@ impl ClientWorker { // Run misbehaviour. If evidence submitted the loop will exit in next // iteration with frozen client if self.detect_misbehaviour(&client, Some(update)) { - metric!(self.telemetry, IbcClientMisbehaviour(1)); + metric!( + self.telemetry, + IbcClientMisbehaviour( + self.client.dst_chain_id.clone(), + self.client.dst_client_id.clone() + ) + ); } } } diff --git a/relayer/src/worker/map.rs b/relayer/src/worker/map.rs index fbdada974b..587c25a04e 100644 --- a/relayer/src/worker/map.rs +++ b/relayer/src/worker/map.rs @@ -3,9 +3,11 @@ use std::collections::HashMap; use crossbeam_channel::Sender; use ibc::ics24_host::identifier::ChainId; +use ibc_telemetry::metric::WorkerType; use crate::{ chain::handle::{ChainHandle, ChainHandlePair}, + metric, object::Object, telemetry::TelemetryHandle, }; @@ -40,6 +42,7 @@ impl WorkerMap { /// the map and wait for its thread to terminate. pub fn remove_stopped(&mut self, object: &Object) -> bool { if let Some(handle) = self.workers.remove(object) { + metric!(self.telemetry, Worker(metric_type(object), Op::Sub(1))); let _ = handle.join(); true } else { @@ -87,6 +90,8 @@ impl WorkerMap { dst: Box, object: &Object, ) -> WorkerHandle { + metric!(self.telemetry, Worker(metric_type(object), Op::Add(1))); + Worker::spawn( ChainHandlePair { a: src, b: dst }, object.clone(), @@ -95,3 +100,12 @@ impl WorkerMap { ) } } + +#[cfg(feature = "telemetry")] +fn metric_type(o: &Object) -> WorkerType { + match o { + Object::Client(_) => WorkerType::Client, + Object::Channel(_) => WorkerType::Channel, + Object::UnidirectionalChannelPath(_) => WorkerType::Packet, + } +} diff --git a/relayer/src/worker/uni_chan_path.rs b/relayer/src/worker/uni_chan_path.rs index 446245e771..e6dc1cabdd 100644 --- a/relayer/src/worker/uni_chan_path.rs +++ b/relayer/src/worker/uni_chan_path.rs @@ -2,11 +2,13 @@ use std::{thread, time::Duration}; use anomaly::BoxError; use crossbeam_channel::Receiver; +use ibc::events::IbcEvent; use tracing::{error, warn}; use crate::{ chain::handle::ChainHandlePair, - link::{Link, LinkParameters}, + link::{Link, LinkParameters, RelaySummary}, + metric, object::UnidirectionalChannelPath, telemetry::TelemetryHandle, util::retry::{retry_with_index, RetryResult}, @@ -40,14 +42,12 @@ impl UniChanPathWorker { /// Run the event loop for events associated with a [`UnidirectionalChannelPath`]. pub fn run(self) -> Result<(), BoxError> { - let rx = self.cmd_rx; - let mut link = Link::new_from_opts( self.chains.a.clone(), self.chains.b.clone(), LinkParameters { - src_port_id: self.path.src_port_id, - src_channel_id: self.path.src_channel_id, + src_port_id: self.path.src_port_id.clone(), + src_channel_id: self.path.src_channel_id.clone(), }, )?; @@ -61,20 +61,26 @@ impl UniChanPathWorker { thread::sleep(Duration::from_millis(200)); let result = retry_with_index(retry_strategy::worker_default_strategy(), |index| { - Self::step(rx.try_recv().ok(), &mut link, index) + Self::step(self.cmd_rx.try_recv().ok(), &mut link, index) }); - if let Err(retries) = result { - return Err(format!( - "UnidirectionalChannelPath worker failed after {} retries", - retries - ) - .into()); + match result { + Ok(summary) => { + metric!(self.telemetry, self.receive_packet_metric(&summary)); + } + + Err(retries) => { + return Err(format!( + "UnidirectionalChannelPath worker failed after {} retries", + retries + ) + .into()); + } } } } - fn step(cmd: Option, link: &mut Link, index: u64) -> RetryResult<(), u64> { + fn step(cmd: Option, link: &mut Link, index: u64) -> RetryResult { if let Some(cmd) = cmd { let result = match cmd { WorkerCmd::IbcEvents { batch } => { @@ -98,12 +104,13 @@ impl UniChanPathWorker { .refresh_schedule() .and_then(|_| link.a_to_b.execute_schedule()); - if let Err(e) = result { - error!("{}", e); - return RetryResult::Retry(index); + match result { + Ok(summary) => RetryResult::Ok(summary), + Err(e) => { + error!("{}", e); + RetryResult::Retry(index) + } } - - RetryResult::Ok(()) } /// Get a reference to the uni chan path worker's chains. @@ -115,4 +122,20 @@ impl UniChanPathWorker { pub fn object(&self) -> &UnidirectionalChannelPath { &self.path } + + #[cfg(feature = "telemetry")] + fn receive_packet_metric(&self, summary: &RelaySummary) -> ibc_telemetry::MetricUpdate { + let count = summary + .events + .iter() + .filter(|e| matches!(e, IbcEvent::WriteAcknowledgement(_))) + .count(); + + ibc_telemetry::MetricUpdate::ReceivePacket( + self.path.src_chain_id.clone(), + self.path.src_channel_id.clone(), + self.path.src_port_id.clone(), + count as u64, + ) + } } diff --git a/telemetry/Cargo.toml b/telemetry/Cargo.toml index 5181370a09..a6992458c8 100644 --- a/telemetry/Cargo.toml +++ b/telemetry/Cargo.toml @@ -5,8 +5,10 @@ authors = ["Informal Systems "] edition = "2018" [dependencies] +ibc = { version = "0.3.2", path = "../modules" } + crossbeam-channel = "0.5.1" -once_cell = "1.7.2" +once_cell = "1.7.2" opentelemetry = "0.14.0" opentelemetry-prometheus = "0.7.0" prometheus = "0.12.0" diff --git a/telemetry/src/metric.rs b/telemetry/src/metric.rs index 705cc4b36c..5b1351ed3e 100644 --- a/telemetry/src/metric.rs +++ b/telemetry/src/metric.rs @@ -1,14 +1,42 @@ +use std::fmt; + +use ibc::ics24_host::identifier::{ChainId, ChannelId, ClientId, PortId}; + #[derive(Debug)] pub enum MetricUpdate { - RelayChainsNumber(u64), - RelayChannelsNumber(u64), - TxCount(u64), - TxSuccess(u64), - TxFailed(u64), - IbcAcknowledgePacket(u64), - IbcRecvPacket(u64), - IbcTransferSend(u64), - IbcTransferReceive(u64), - TimeoutPacket(u64), - IbcClientMisbehaviour(u64), + Worker(WorkerType, Op), + IbcClientMisbehaviour(ChainId, ClientId), + ReceivePacket(ChainId, ChannelId, PortId, u64), +} + +#[derive(Copy, Clone, Debug)] +pub enum WorkerType { + Client, + Channel, + Packet, +} + +impl fmt::Display for WorkerType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Client => write!(f, "client"), + Self::Channel => write!(f, "channel"), + Self::Packet => write!(f, "packet"), + } + } +} + +#[derive(Copy, Clone, Debug)] +pub enum Op { + Add(i64), + Sub(i64), +} + +impl Op { + pub fn to_i64(&self) -> i64 { + match self { + Self::Add(n) => *n, + Self::Sub(n) => -n, + } + } } diff --git a/telemetry/src/service.rs b/telemetry/src/service.rs index 944a2e256a..769741e474 100644 --- a/telemetry/src/service.rs +++ b/telemetry/src/service.rs @@ -30,17 +30,13 @@ impl TelemetryService { use MetricUpdate::*; match update { - TxCount(n) => self.state.tx_count.add(n), - TxSuccess(n) => self.state.tx_successful.add(n), - TxFailed(n) => self.state.tx_failed.add(n), - RelayChainsNumber(n) => self.state.relay_chains_num.add(n), - RelayChannelsNumber(n) => self.state.relay_channels_num.add(n), - TimeoutPacket(n) => self.state.ibc_timeout_packet.add(n), - IbcAcknowledgePacket(n) => self.state.tx_msg_ibc_acknowledge_packet.add(n), - IbcRecvPacket(n) => self.state.tx_msg_ibc_recv_packet.add(n), - IbcTransferSend(n) => self.state.ibc_transfer_send.add(n), - IbcTransferReceive(n) => self.state.ibc_transfer_receive.add(n), - IbcClientMisbehaviour(n) => self.state.ibc_client_misbehaviour.add(n), + Worker(worker_type, op) => self.state.worker(worker_type, op), + IbcClientMisbehaviour(chain, client) => { + self.state.ibc_client_misbehaviour(&chain, &client) + } + ReceivePacket(chain, channel, port, count) => { + self.state.receive_packets(&chain, &channel, &port, count) + } } } } diff --git a/telemetry/src/state.rs b/telemetry/src/state.rs index b3cef5dff6..c5f0a5f84e 100644 --- a/telemetry/src/state.rs +++ b/telemetry/src/state.rs @@ -1,47 +1,71 @@ -use once_cell::sync::Lazy; -use opentelemetry::{global, metrics::BoundCounter, KeyValue}; +use opentelemetry::{ + global, + metrics::{Counter, UpDownCounter}, + KeyValue, +}; use opentelemetry_prometheus::PrometheusExporter; -static HANDLER_ALL: Lazy<[KeyValue; 1]> = Lazy::new(|| [KeyValue::new("hermes", "all")]); +use ibc::ics24_host::identifier::{ChainId, ChannelId, ClientId, PortId}; -#[derive(Clone, Debug)] +use crate::metric::{Op, WorkerType}; + +#[derive(Debug)] pub struct TelemetryState { pub exporter: PrometheusExporter, - // Number of chains the relay is connecting to - pub relay_chains_num: BoundCounter<'static, u64>, - - // Number of channels the relay is connecting to - pub relay_channels_num: BoundCounter<'static, u64>, - - // Total number of IBC packets acknowledged - pub tx_msg_ibc_acknowledge_packet: BoundCounter<'static, u64>, + /// Number of workers per object + pub workers: UpDownCounter, - // Total number of txs processed via relay tx - pub tx_count: BoundCounter<'static, u64>, + /// Number of client misbehaviours per client + pub ibc_client_misbehaviours: Counter, - // Total number of successful txs processed via relay tx - pub tx_successful: BoundCounter<'static, u64>, - - // Total number of failed txs processed via relay tx - pub tx_failed: BoundCounter<'static, u64>, + /// Number of receive packets relayed, per channel + pub receive_packets: Counter, +} - // Total number of IBC transfers sent from a chain (source or sink) - pub ibc_transfer_send: BoundCounter<'static, u64>, +impl TelemetryState { + pub fn worker(&self, worker_type: WorkerType, op: Op) { + let labels = &[KeyValue::new("type", worker_type.to_string())]; + self.workers.add(op.to_i64(), labels); + } - // Total number of IBC transfers received to a chain (source or sink) - pub ibc_transfer_receive: BoundCounter<'static, u64>, + pub fn ibc_client_misbehaviour(&self, chain: &ChainId, client: &ClientId) { + let labels = &[ + KeyValue::new("chain", chain.to_string()), + KeyValue::new("client", client.to_string()), + ]; - // Total number of IBC packets received - pub tx_msg_ibc_recv_packet: BoundCounter<'static, u64>, + self.ibc_client_misbehaviours.add(1, labels); + } - // Total number of IBC timeout packets - pub ibc_timeout_packet: BoundCounter<'static, u64>, + pub fn receive_packets( + &self, + src_chain: &ChainId, + src_channel: &ChannelId, + src_port: &PortId, + count: u64, + ) { + let labels = &[ + KeyValue::new("src_chain", src_chain.to_string()), + KeyValue::new("src_channel", src_channel.to_string()), + KeyValue::new("src_port", src_port.to_string()), + ]; - // Total number of client misbehaviours - pub ibc_client_misbehaviour: BoundCounter<'static, u64>, + self.receive_packets.add(count, labels); + } } +// Supervisor: +// - [x] number of workers per type (gauge) +// +// Client: +// - [x] misbehaviors per client (counter) +// - [ ] updates per client (counter) √ +// +// Packet: +// - [ ] write acknowledgment events per object, wo/ destination (counter) √ +// => `receive_packets` + impl Default for TelemetryState { fn default() -> Self { let exporter = opentelemetry_prometheus::exporter().init(); @@ -49,65 +73,21 @@ impl Default for TelemetryState { Self { exporter, - relay_chains_num: meter - .u64_counter("hermes_chains_num") - .with_description("Number of chains the relay is connecting to") - .init() - .bind(HANDLER_ALL.as_ref()), - relay_channels_num: meter - .u64_counter("hermes_channels_num") - .with_description("Number of channels the relay is connecting to") - .init() - .bind(HANDLER_ALL.as_ref()), - tx_msg_ibc_acknowledge_packet: meter - .u64_counter("hermes_tx_msg_ibc_acknowledge_packet") - .with_description("Total number of IBC packets acknowledged") - .init() - .bind(HANDLER_ALL.as_ref()), - tx_count: meter - .u64_counter("hermes_tx_count") - .with_description("Total number of txs processed via relay tx") - .init() - .bind(HANDLER_ALL.as_ref()), - tx_successful: meter - .u64_counter("hermes_tx_successful") - .with_description("Total number of successful txs processed via relay tx") - .init() - .bind(HANDLER_ALL.as_ref()), - tx_failed: meter - .u64_counter("hermes_tx_failed") - .with_description("Total number of failed txs processed via relay tx") - .init() - .bind(HANDLER_ALL.as_ref()), - ibc_transfer_send: meter - .u64_counter("hermes_ibc_transfer_send") - .with_description( - "Total number of IBC transfers sent from a chain (source or sink)", - ) - .init() - .bind(HANDLER_ALL.as_ref()), - ibc_transfer_receive: meter - .u64_counter("hermes_ibc_transfer_receive") - .with_description( - "Total number of IBC transfers received to a chain (source or sink)", - ) - .init() - .bind(HANDLER_ALL.as_ref()), - tx_msg_ibc_recv_packet: meter - .u64_counter("hermes_tx_msg_ibc_recv_packet") - .with_description("Total number of IBC packets received") - .init() - .bind(HANDLER_ALL.as_ref()), - ibc_timeout_packet: meter - .u64_counter("hermes_ibc_timeout_packet") - .with_description("Total number of IBC timeout packets") - .init() - .bind(HANDLER_ALL.as_ref()), - ibc_client_misbehaviour: meter - .u64_counter("hermes_ibc_client_misbehaviour") - .with_description("Total number of client misbehaviours") - .init() - .bind(HANDLER_ALL.as_ref()), + + workers: meter + .i64_up_down_counter("workers") + .with_description("Number of workers per object") + .init(), + + ibc_client_misbehaviours: meter + .u64_counter("ibc_client_misbehaviours") + .with_description("Number of misbehaviours detected per client") + .init(), + + receive_packets: meter + .u64_counter("receive_packets") + .with_description("Number of receive packets relayed per channel") + .init(), } } } From 80fdcdfdeb89aea8704741ca472d6701c2f288b2 Mon Sep 17 00:00:00 2001 From: Romain Ruetschi Date: Thu, 27 May 2021 17:05:09 +0200 Subject: [PATCH 20/26] Add `ibc_client_update` metric --- relayer/src/worker/client.rs | 20 ++++++++++++++---- relayer/src/worker/uni_chan_path.rs | 2 +- telemetry/src/metric.rs | 3 ++- telemetry/src/service.rs | 12 ++++++++--- telemetry/src/state.rs | 32 +++++++++++++++++------------ 5 files changed, 47 insertions(+), 22 deletions(-) diff --git a/relayer/src/worker/client.rs b/relayer/src/worker/client.rs index 585e498cfc..f9717bb6ff 100644 --- a/relayer/src/worker/client.rs +++ b/relayer/src/worker/client.rs @@ -63,10 +63,22 @@ impl ClientWorker { thread::sleep(Duration::from_millis(600)); // Run client refresh, exit only if expired or frozen - if let Err(e @ ForeignClientError::ExpiredOrFrozen(..)) = client.refresh() { - error!("failed to refresh client '{}': {}", client, e); - continue; - } + match client.refresh() { + Ok(Some(_)) => { + metric!( + self.telemetry, + IbcClientUpdate( + self.client.dst_chain_id.clone(), + self.client.dst_client_id.clone() + ) + ); + } + Err(e @ ForeignClientError::ExpiredOrFrozen(..)) => { + error!("failed to refresh client '{}': {}", client, e); + continue; + } + _ => (), + }; if skip_misbehaviour { continue; diff --git a/relayer/src/worker/uni_chan_path.rs b/relayer/src/worker/uni_chan_path.rs index e6dc1cabdd..773d81de50 100644 --- a/relayer/src/worker/uni_chan_path.rs +++ b/relayer/src/worker/uni_chan_path.rs @@ -131,7 +131,7 @@ impl UniChanPathWorker { .filter(|e| matches!(e, IbcEvent::WriteAcknowledgement(_))) .count(); - ibc_telemetry::MetricUpdate::ReceivePacket( + ibc_telemetry::MetricUpdate::IbcReceivePacket( self.path.src_chain_id.clone(), self.path.src_channel_id.clone(), self.path.src_port_id.clone(), diff --git a/telemetry/src/metric.rs b/telemetry/src/metric.rs index 5b1351ed3e..04e778ec48 100644 --- a/telemetry/src/metric.rs +++ b/telemetry/src/metric.rs @@ -6,7 +6,8 @@ use ibc::ics24_host::identifier::{ChainId, ChannelId, ClientId, PortId}; pub enum MetricUpdate { Worker(WorkerType, Op), IbcClientMisbehaviour(ChainId, ClientId), - ReceivePacket(ChainId, ChannelId, PortId, u64), + IbcClientUpdate(ChainId, ClientId), + IbcReceivePacket(ChainId, ChannelId, PortId, u64), } #[derive(Copy, Clone, Debug)] diff --git a/telemetry/src/service.rs b/telemetry/src/service.rs index 769741e474..26e0c93445 100644 --- a/telemetry/src/service.rs +++ b/telemetry/src/service.rs @@ -30,12 +30,18 @@ impl TelemetryService { use MetricUpdate::*; match update { - Worker(worker_type, op) => self.state.worker(worker_type, op), + Worker(worker_type, op) => { + self.state.worker(worker_type, op); + } + IbcClientUpdate(chain, client) => { + self.state.ibc_client_update(&chain, &client); + } IbcClientMisbehaviour(chain, client) => { self.state.ibc_client_misbehaviour(&chain, &client) } - ReceivePacket(chain, channel, port, count) => { - self.state.receive_packets(&chain, &channel, &port, count) + IbcReceivePacket(chain, channel, port, count) => { + self.state + .ibc_receive_packets(&chain, &channel, &port, count); } } } diff --git a/telemetry/src/state.rs b/telemetry/src/state.rs index c5f0a5f84e..8b81c76dbc 100644 --- a/telemetry/src/state.rs +++ b/telemetry/src/state.rs @@ -16,6 +16,9 @@ pub struct TelemetryState { /// Number of workers per object pub workers: UpDownCounter, + /// Number of client updates per client + pub ibc_client_updates: Counter, + /// Number of client misbehaviours per client pub ibc_client_misbehaviours: Counter, @@ -29,6 +32,15 @@ impl TelemetryState { self.workers.add(op.to_i64(), labels); } + pub fn ibc_client_update(&self, chain: &ChainId, client: &ClientId) { + let labels = &[ + KeyValue::new("chain", chain.to_string()), + KeyValue::new("client", client.to_string()), + ]; + + self.ibc_client_updates.add(1, labels); + } + pub fn ibc_client_misbehaviour(&self, chain: &ChainId, client: &ClientId) { let labels = &[ KeyValue::new("chain", chain.to_string()), @@ -38,7 +50,7 @@ impl TelemetryState { self.ibc_client_misbehaviours.add(1, labels); } - pub fn receive_packets( + pub fn ibc_receive_packets( &self, src_chain: &ChainId, src_channel: &ChannelId, @@ -55,17 +67,6 @@ impl TelemetryState { } } -// Supervisor: -// - [x] number of workers per type (gauge) -// -// Client: -// - [x] misbehaviors per client (counter) -// - [ ] updates per client (counter) √ -// -// Packet: -// - [ ] write acknowledgment events per object, wo/ destination (counter) √ -// => `receive_packets` - impl Default for TelemetryState { fn default() -> Self { let exporter = opentelemetry_prometheus::exporter().init(); @@ -79,13 +80,18 @@ impl Default for TelemetryState { .with_description("Number of workers per object") .init(), + ibc_client_updates: meter + .u64_counter("ibc_client_updates") + .with_description("Number of client updates performed per client") + .init(), + ibc_client_misbehaviours: meter .u64_counter("ibc_client_misbehaviours") .with_description("Number of misbehaviours detected per client") .init(), receive_packets: meter - .u64_counter("receive_packets") + .u64_counter("ibc_receive_packets") .with_description("Number of receive packets relayed per channel") .init(), } From 56a76c20d48574b43563bea2b4d7faec90a23dbb Mon Sep 17 00:00:00 2001 From: Romain Ruetschi Date: Fri, 28 May 2021 11:54:00 +0200 Subject: [PATCH 21/26] Remove need for telemetry service by passing around the telemetry state --- relayer-cli/src/commands/start.rs | 14 ++++++-- relayer/src/lib.rs | 3 +- relayer/src/supervisor.rs | 6 ++-- relayer/src/telemetry.rs | 17 ++++------ relayer/src/worker.rs | 4 +-- relayer/src/worker/channel.rs | 7 ++-- relayer/src/worker/client.rs | 32 +++++++++--------- relayer/src/worker/map.rs | 20 +++++------ relayer/src/worker/uni_chan_path.rs | 24 +++++++------- telemetry/src/lib.rs | 47 ++++---------------------- telemetry/src/metric.rs | 43 ------------------------ telemetry/src/server.rs | 2 +- telemetry/src/service.rs | 48 --------------------------- telemetry/src/state.rs | 51 ++++++++++++++++++++++------- 14 files changed, 110 insertions(+), 208 deletions(-) delete mode 100644 telemetry/src/metric.rs delete mode 100644 telemetry/src/service.rs diff --git a/relayer-cli/src/commands/start.rs b/relayer-cli/src/commands/start.rs index e2fc50ceed..8190b776d6 100644 --- a/relayer-cli/src/commands/start.rs +++ b/relayer-cli/src/commands/start.rs @@ -23,14 +23,22 @@ impl Runnable for StartCmd { #[cfg(feature = "telemetry")] fn spawn_supervisor(config: Config) -> Supervisor { - let telemetry = ibc_telemetry::spawn(config.telemetry.port, config.telemetry.enabled); - Supervisor::spawn(config, telemetry) + let state = ibc_telemetry::new_state(); + + if config.telemetry.enabled { + ibc_telemetry::spawn(config.telemetry.port, state.clone()); + } + + Supervisor::spawn(config, state) } #[cfg(not(feature = "telemetry"))] fn spawn_supervisor(config: Config) -> Supervisor { if config.telemetry.enabled { - warn!("telemetry enabled in the config but Hermes was built without telemetry support"); + warn!( + "telemetry enabled in the config but Hermes was built without telemetry support, \ + build Hermes with --features=telemetry to enable telemetry support." + ); } let telemetry = ibc_relayer::telemetry::TelemetryDisabled; diff --git a/relayer/src/lib.rs b/relayer/src/lib.rs index 79f416be15..a6be011169 100644 --- a/relayer/src/lib.rs +++ b/relayer/src/lib.rs @@ -30,9 +30,8 @@ pub mod macros; pub mod object; pub mod registry; pub mod supervisor; +pub mod telemetry; pub mod transfer; pub mod upgrade_chain; pub mod util; pub mod worker; - -mod telemetry; diff --git a/relayer/src/supervisor.rs b/relayer/src/supervisor.rs index 18595ad5a3..561977a83d 100644 --- a/relayer/src/supervisor.rs +++ b/relayer/src/supervisor.rs @@ -22,7 +22,7 @@ use crate::{ }, object::{Channel, Client, Object, UnidirectionalChannelPath}, registry::Registry, - telemetry::TelemetryHandle, + telemetry::Telemetry, util::try_recv_multiple, worker::{WorkerMap, WorkerMsg}, }; @@ -42,12 +42,12 @@ pub struct Supervisor { worker_msg_rx: Receiver, #[allow(dead_code)] - telemetry: TelemetryHandle, + telemetry: Telemetry, } impl Supervisor { /// Spawns a [`Supervisor`] which will listen for events on all the chains in the [`Config`]. - pub fn spawn(config: Config, telemetry: TelemetryHandle) -> Self { + pub fn spawn(config: Config, telemetry: Telemetry) -> Self { let registry = Registry::new(config.clone()); let (worker_msg_tx, worker_msg_rx) = crossbeam_channel::unbounded(); let workers = WorkerMap::new(worker_msg_tx, telemetry.clone()); diff --git a/relayer/src/telemetry.rs b/relayer/src/telemetry.rs index 3c2cebc3d1..abda4e7bf4 100644 --- a/relayer/src/telemetry.rs +++ b/relayer/src/telemetry.rs @@ -1,6 +1,6 @@ -// If the `telemetry` feature is enableb, re-export the `ibc-telemetry` handle. +// If the `telemetry` feature is enabled, re-export the `ibc-telemetry` state. #[cfg(feature = "telemetry")] -pub type TelemetryHandle = ibc_telemetry::TelemetryHandle; +pub type Telemetry = std::sync::Arc; // Otherwise, define and export a dummy type. #[cfg(not(feature = "telemetry"))] @@ -8,7 +8,7 @@ pub type TelemetryHandle = ibc_telemetry::TelemetryHandle; pub struct TelemetryDisabled; #[cfg(not(feature = "telemetry"))] -pub type TelemetryHandle = TelemetryDisabled; +pub type Telemetry = TelemetryDisabled; /// A macro to send metric updates via a telemetry handle, /// only if the `telemetry` feature is enabled. @@ -26,17 +26,12 @@ pub type TelemetryHandle = TelemetryDisabled; /// ``` #[macro_export] macro_rules! metric { - ($t:expr, $e:expr) => { + ($e:expr) => { #[cfg(feature = "telemetry")] #[allow(unused_imports)] { - use ibc_telemetry::{metric::Op, MetricUpdate, MetricUpdate::*}; - $t.send($e); - } - - #[cfg(not(feature = "telemetry"))] - { - let _ = &$t; + use ibc_telemetry::state::WorkerType; + $e; } }; } diff --git a/relayer/src/worker.rs b/relayer/src/worker.rs index babe9c3980..c99cef81af 100644 --- a/relayer/src/worker.rs +++ b/relayer/src/worker.rs @@ -3,7 +3,7 @@ use std::fmt; use crossbeam_channel::Sender; use tracing::{debug, error, info}; -use crate::{chain::handle::ChainHandlePair, object::Object, telemetry::TelemetryHandle}; +use crate::{chain::handle::ChainHandlePair, object::Object, telemetry::Telemetry}; pub mod retry_strategy; @@ -49,7 +49,7 @@ impl Worker { chains: ChainHandlePair, object: Object, msg_tx: Sender, - telemetry: TelemetryHandle, + telemetry: Telemetry, ) -> WorkerHandle { let (cmd_tx, cmd_rx) = crossbeam_channel::unbounded(); diff --git a/relayer/src/worker/channel.rs b/relayer/src/worker/channel.rs index c8be257ea4..78c6ee1f17 100644 --- a/relayer/src/worker/channel.rs +++ b/relayer/src/worker/channel.rs @@ -5,7 +5,7 @@ use crossbeam_channel::Receiver; use tracing::{debug, warn}; use crate::channel::Channel as RelayChannel; -use crate::telemetry::TelemetryHandle; +use crate::telemetry::Telemetry; use crate::{ chain::handle::ChainHandlePair, object::Channel, util::retry::retry_with_index, worker::retry_strategy, @@ -18,9 +18,8 @@ pub struct ChannelWorker { chains: ChainHandlePair, cmd_rx: Receiver, - // no metrics for this worker yet #[allow(dead_code)] - telemetry: TelemetryHandle, + telemetry: Telemetry, } impl ChannelWorker { @@ -28,7 +27,7 @@ impl ChannelWorker { channel: Channel, chains: ChainHandlePair, cmd_rx: Receiver, - telemetry: TelemetryHandle, + telemetry: Telemetry, ) -> Self { Self { channel, diff --git a/relayer/src/worker/client.rs b/relayer/src/worker/client.rs index f9717bb6ff..de23a9f15a 100644 --- a/relayer/src/worker/client.rs +++ b/relayer/src/worker/client.rs @@ -11,7 +11,7 @@ use crate::{ foreign_client::{ForeignClient, ForeignClientError, MisbehaviourResults}, metric, object::Client, - telemetry::TelemetryHandle, + telemetry::Telemetry, }; use super::WorkerCmd; @@ -20,7 +20,9 @@ pub struct ClientWorker { client: Client, chains: ChainHandlePair, cmd_rx: Receiver, - telemetry: TelemetryHandle, + + #[allow(dead_code)] + telemetry: Telemetry, } impl ClientWorker { @@ -28,7 +30,7 @@ impl ClientWorker { client: Client, chains: ChainHandlePair, cmd_rx: Receiver, - telemetry: TelemetryHandle, + telemetry: Telemetry, ) -> Self { Self { client, @@ -65,13 +67,11 @@ impl ClientWorker { // Run client refresh, exit only if expired or frozen match client.refresh() { Ok(Some(_)) => { - metric!( - self.telemetry, - IbcClientUpdate( - self.client.dst_chain_id.clone(), - self.client.dst_client_id.clone() - ) - ); + metric!(self.telemetry.ibc_client_update( + &self.client.dst_chain_id, + &self.client.dst_client_id, + 1 + )); } Err(e @ ForeignClientError::ExpiredOrFrozen(..)) => { error!("failed to refresh client '{}': {}", client, e); @@ -94,13 +94,11 @@ impl ClientWorker { // Run misbehaviour. If evidence submitted the loop will exit in next // iteration with frozen client if self.detect_misbehaviour(&client, Some(update)) { - metric!( - self.telemetry, - IbcClientMisbehaviour( - self.client.dst_chain_id.clone(), - self.client.dst_client_id.clone() - ) - ); + metric!(self.telemetry.ibc_client_misbehaviour( + &self.client.dst_chain_id, + &self.client.dst_client_id, + 1 + )); } } } diff --git a/relayer/src/worker/map.rs b/relayer/src/worker/map.rs index 587c25a04e..36ce67e86c 100644 --- a/relayer/src/worker/map.rs +++ b/relayer/src/worker/map.rs @@ -3,13 +3,12 @@ use std::collections::HashMap; use crossbeam_channel::Sender; use ibc::ics24_host::identifier::ChainId; -use ibc_telemetry::metric::WorkerType; use crate::{ chain::handle::{ChainHandle, ChainHandlePair}, metric, object::Object, - telemetry::TelemetryHandle, + telemetry::Telemetry, }; use super::{Worker, WorkerHandle, WorkerMsg}; @@ -19,13 +18,13 @@ use super::{Worker, WorkerHandle, WorkerMsg}; pub struct WorkerMap { workers: HashMap, msg_tx: Sender, - telemetry: TelemetryHandle, + telemetry: Telemetry, } impl WorkerMap { /// Create a new worker map, which will spawn workers with /// the given channel for sending messages back to the [`Supervisor`]. - pub fn new(msg_tx: Sender, telemetry: TelemetryHandle) -> Self { + pub fn new(msg_tx: Sender, telemetry: Telemetry) -> Self { Self { workers: HashMap::new(), msg_tx, @@ -42,7 +41,7 @@ impl WorkerMap { /// the map and wait for its thread to terminate. pub fn remove_stopped(&mut self, object: &Object) -> bool { if let Some(handle) = self.workers.remove(object) { - metric!(self.telemetry, Worker(metric_type(object), Op::Sub(1))); + metric!(self.telemetry.worker(metric_type(object), -1)); let _ = handle.join(); true } else { @@ -90,7 +89,7 @@ impl WorkerMap { dst: Box, object: &Object, ) -> WorkerHandle { - metric!(self.telemetry, Worker(metric_type(object), Op::Add(1))); + metric!(self.telemetry.worker(metric_type(object), 1)); Worker::spawn( ChainHandlePair { a: src, b: dst }, @@ -102,10 +101,11 @@ impl WorkerMap { } #[cfg(feature = "telemetry")] -fn metric_type(o: &Object) -> WorkerType { +fn metric_type(o: &Object) -> ibc_telemetry::state::WorkerType { + use ibc_telemetry::state::WorkerType::*; match o { - Object::Client(_) => WorkerType::Client, - Object::Channel(_) => WorkerType::Channel, - Object::UnidirectionalChannelPath(_) => WorkerType::Packet, + Object::Client(_) => Client, + Object::Channel(_) => Channel, + Object::UnidirectionalChannelPath(_) => Packet, } } diff --git a/relayer/src/worker/uni_chan_path.rs b/relayer/src/worker/uni_chan_path.rs index 773d81de50..cd407eaaca 100644 --- a/relayer/src/worker/uni_chan_path.rs +++ b/relayer/src/worker/uni_chan_path.rs @@ -2,7 +2,6 @@ use std::{thread, time::Duration}; use anomaly::BoxError; use crossbeam_channel::Receiver; -use ibc::events::IbcEvent; use tracing::{error, warn}; use crate::{ @@ -10,7 +9,7 @@ use crate::{ link::{Link, LinkParameters, RelaySummary}, metric, object::UnidirectionalChannelPath, - telemetry::TelemetryHandle, + telemetry::Telemetry, util::retry::{retry_with_index, RetryResult}, worker::retry_strategy, }; @@ -22,7 +21,7 @@ pub struct UniChanPathWorker { path: UnidirectionalChannelPath, chains: ChainHandlePair, cmd_rx: Receiver, - telemetry: TelemetryHandle, + telemetry: Telemetry, } impl UniChanPathWorker { @@ -30,7 +29,7 @@ impl UniChanPathWorker { path: UnidirectionalChannelPath, chains: ChainHandlePair, cmd_rx: Receiver, - telemetry: TelemetryHandle, + telemetry: Telemetry, ) -> Self { Self { path, @@ -66,7 +65,8 @@ impl UniChanPathWorker { match result { Ok(summary) => { - metric!(self.telemetry, self.receive_packet_metric(&summary)); + metric!(self.receive_packet_metric(&summary)); + let _ = summary; } Err(retries) => { @@ -124,17 +124,19 @@ impl UniChanPathWorker { } #[cfg(feature = "telemetry")] - fn receive_packet_metric(&self, summary: &RelaySummary) -> ibc_telemetry::MetricUpdate { + fn receive_packet_metric(&self, summary: &RelaySummary) { + use ibc::events::IbcEvent::WriteAcknowledgement; + let count = summary .events .iter() - .filter(|e| matches!(e, IbcEvent::WriteAcknowledgement(_))) + .filter(|e| matches!(e, WriteAcknowledgement(_))) .count(); - ibc_telemetry::MetricUpdate::IbcReceivePacket( - self.path.src_chain_id.clone(), - self.path.src_channel_id.clone(), - self.path.src_port_id.clone(), + self.telemetry.ibc_receive_packets( + &self.path.src_chain_id, + &self.path.src_channel_id, + &self.path.src_port_id, count as u64, ) } diff --git a/telemetry/src/lib.rs b/telemetry/src/lib.rs index b2c964db95..446d566761 100644 --- a/telemetry/src/lib.rs +++ b/telemetry/src/lib.rs @@ -1,49 +1,14 @@ pub mod server; - -pub mod service; -pub use service::TelemetryService; - pub mod state; -pub use state::TelemetryState; - -pub mod metric; -pub use metric::MetricUpdate; - -use std::sync::Arc; - -use crossbeam_channel::Sender; -#[derive(Clone, Debug)] -pub struct TelemetryHandle { - tx: Option>, -} +use std::{sync::Arc, thread::JoinHandle}; -impl TelemetryHandle { - pub fn noop() -> Self { - Self { tx: None } - } +pub use crate::state::TelemetryState; - pub fn send(&self, update: MetricUpdate) { - if let Some(ref tx) = self.tx { - let _ = tx.send(update); - } - } +pub fn new_state() -> Arc { + Arc::new(TelemetryState::default()) } -pub fn spawn(port: u16, enabled: bool) -> TelemetryHandle { - let (tx, rx) = crossbeam_channel::unbounded(); - - // Only start the telemetry service and server if it is enabled in the configuration - if !enabled { - return TelemetryHandle::noop(); - } - - let telemetry_state = Arc::new(TelemetryState::default()); - let service = TelemetryService::new(telemetry_state.clone(), rx); - - // Start the telemetry service and server - std::thread::spawn(move || server::run(telemetry_state, port)); - std::thread::spawn(move || service.run()); - - TelemetryHandle { tx: Some(tx) } +pub fn spawn(port: u16, state: Arc) -> JoinHandle<()> { + std::thread::spawn(move || server::run(state, port)) } diff --git a/telemetry/src/metric.rs b/telemetry/src/metric.rs deleted file mode 100644 index 04e778ec48..0000000000 --- a/telemetry/src/metric.rs +++ /dev/null @@ -1,43 +0,0 @@ -use std::fmt; - -use ibc::ics24_host::identifier::{ChainId, ChannelId, ClientId, PortId}; - -#[derive(Debug)] -pub enum MetricUpdate { - Worker(WorkerType, Op), - IbcClientMisbehaviour(ChainId, ClientId), - IbcClientUpdate(ChainId, ClientId), - IbcReceivePacket(ChainId, ChannelId, PortId, u64), -} - -#[derive(Copy, Clone, Debug)] -pub enum WorkerType { - Client, - Channel, - Packet, -} - -impl fmt::Display for WorkerType { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Client => write!(f, "client"), - Self::Channel => write!(f, "channel"), - Self::Packet => write!(f, "packet"), - } - } -} - -#[derive(Copy, Clone, Debug)] -pub enum Op { - Add(i64), - Sub(i64), -} - -impl Op { - pub fn to_i64(&self) -> i64 { - match self { - Self::Add(n) => *n, - Self::Sub(n) => -n, - } - } -} diff --git a/telemetry/src/server.rs b/telemetry/src/server.rs index c9d32aa7fc..a0fc153e9d 100644 --- a/telemetry/src/server.rs +++ b/telemetry/src/server.rs @@ -27,7 +27,7 @@ pub fn run(telemetry_state: Arc, port: u16) { Route::Metrics => { let mut buffer = vec![]; let encoder = TextEncoder::new(); - let metric_families = telemetry_state.exporter.registry().gather(); + let metric_families = telemetry_state.gather(); encoder.encode(&metric_families, &mut buffer).unwrap(); rouille::Response::from_data(encoder.format_type().to_string(), buffer) diff --git a/telemetry/src/service.rs b/telemetry/src/service.rs deleted file mode 100644 index 26e0c93445..0000000000 --- a/telemetry/src/service.rs +++ /dev/null @@ -1,48 +0,0 @@ -use std::sync::Arc; - -use crossbeam_channel::Receiver; - -use crate::{MetricUpdate, TelemetryState}; - -pub fn run(telemetry_state: Arc, rx: Receiver) { - let service = TelemetryService::new(telemetry_state, rx); - service.run() -} - -#[derive(Debug)] -pub struct TelemetryService { - state: Arc, - rx: Receiver, -} - -impl TelemetryService { - pub fn new(state: Arc, rx: Receiver) -> Self { - Self { state, rx } - } - - pub fn run(self) { - while let Ok(update) = self.rx.recv() { - self.apply_update(update); - } - } - - fn apply_update(&self, update: MetricUpdate) { - use MetricUpdate::*; - - match update { - Worker(worker_type, op) => { - self.state.worker(worker_type, op); - } - IbcClientUpdate(chain, client) => { - self.state.ibc_client_update(&chain, &client); - } - IbcClientMisbehaviour(chain, client) => { - self.state.ibc_client_misbehaviour(&chain, &client) - } - IbcReceivePacket(chain, channel, port, count) => { - self.state - .ibc_receive_packets(&chain, &channel, &port, count); - } - } - } -} diff --git a/telemetry/src/state.rs b/telemetry/src/state.rs index 8b81c76dbc..9c833db826 100644 --- a/telemetry/src/state.rs +++ b/telemetry/src/state.rs @@ -1,3 +1,5 @@ +use std::fmt; + use opentelemetry::{ global, metrics::{Counter, UpDownCounter}, @@ -6,50 +8,75 @@ use opentelemetry::{ use opentelemetry_prometheus::PrometheusExporter; use ibc::ics24_host::identifier::{ChainId, ChannelId, ClientId, PortId}; +use prometheus::proto::MetricFamily; + +#[derive(Copy, Clone, Debug)] +pub enum WorkerType { + Client, + Channel, + Packet, +} -use crate::metric::{Op, WorkerType}; +impl fmt::Display for WorkerType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Client => write!(f, "client"), + Self::Channel => write!(f, "channel"), + Self::Packet => write!(f, "packet"), + } + } +} #[derive(Debug)] pub struct TelemetryState { - pub exporter: PrometheusExporter, + exporter: PrometheusExporter, /// Number of workers per object - pub workers: UpDownCounter, + workers: UpDownCounter, /// Number of client updates per client - pub ibc_client_updates: Counter, + ibc_client_updates: Counter, /// Number of client misbehaviours per client - pub ibc_client_misbehaviours: Counter, + ibc_client_misbehaviours: Counter, /// Number of receive packets relayed, per channel - pub receive_packets: Counter, + receive_packets: Counter, } impl TelemetryState { - pub fn worker(&self, worker_type: WorkerType, op: Op) { + /// Gather the metrics for export + pub fn gather(&self) -> Vec { + self.exporter.registry().gather() + } + + /// Update the number of workers per object + pub fn worker(&self, worker_type: WorkerType, count: i64) { let labels = &[KeyValue::new("type", worker_type.to_string())]; - self.workers.add(op.to_i64(), labels); + self.workers.add(count, labels); } - pub fn ibc_client_update(&self, chain: &ChainId, client: &ClientId) { + /// Update the number of client updates per client + pub fn ibc_client_update(&self, chain: &ChainId, client: &ClientId, count: u64) { let labels = &[ KeyValue::new("chain", chain.to_string()), KeyValue::new("client", client.to_string()), ]; - self.ibc_client_updates.add(1, labels); + self.ibc_client_updates.add(count, labels); } - pub fn ibc_client_misbehaviour(&self, chain: &ChainId, client: &ClientId) { + /// Number of client misbehaviours per client + pub fn ibc_client_misbehaviour(&self, chain: &ChainId, client: &ClientId, count: u64) { let labels = &[ KeyValue::new("chain", chain.to_string()), KeyValue::new("client", client.to_string()), ]; - self.ibc_client_misbehaviours.add(1, labels); + self.ibc_client_misbehaviours.add(count, labels); } + /// Number of receive packets relayed, per channel pub fn ibc_receive_packets( &self, src_chain: &ChainId, From ffccfb2824799a8cec0eff27eaea1ef52ddcf4f0 Mon Sep 17 00:00:00 2001 From: Anca Zamfir Date: Fri, 28 May 2021 16:01:02 +0200 Subject: [PATCH 22/26] Add ack and timeout metrics --- relayer/src/worker/uni_chan_path.rs | 47 ++++++++++++++++++++++++++-- telemetry/src/state.rs | 48 +++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 3 deletions(-) diff --git a/relayer/src/worker/uni_chan_path.rs b/relayer/src/worker/uni_chan_path.rs index cd407eaaca..59868ad980 100644 --- a/relayer/src/worker/uni_chan_path.rs +++ b/relayer/src/worker/uni_chan_path.rs @@ -65,8 +65,7 @@ impl UniChanPathWorker { match result { Ok(summary) => { - metric!(self.receive_packet_metric(&summary)); - let _ = summary; + self.packet_metrics(&summary); } Err(retries) => { @@ -124,7 +123,14 @@ impl UniChanPathWorker { } #[cfg(feature = "telemetry")] - fn receive_packet_metric(&self, summary: &RelaySummary) { + fn packet_metrics(&self, summary: &RelaySummary) { + metric!(self.receive_packet_metrics(&summary)); + metric!(self.acknowledgment_metrics(&summary)); + metric!(self.timeout_metrics(&summary)); + } + + #[cfg(feature = "telemetry")] + fn receive_packet_metrics(&self, summary: &RelaySummary) { use ibc::events::IbcEvent::WriteAcknowledgement; let count = summary @@ -140,4 +146,39 @@ impl UniChanPathWorker { count as u64, ) } + + #[cfg(feature = "telemetry")] + fn acknowledgment_metrics(&self, summary: &RelaySummary) { + use ibc::events::IbcEvent::AcknowledgePacket; + + let count = summary + .events + .iter() + .filter(|e| matches!(e, AcknowledgePacket(_))) + .count(); + + self.telemetry.ibc_acknowledgment_packets( + &self.path.src_chain_id, + &self.path.src_channel_id, + &self.path.src_port_id, + count as u64, + ) + } + + #[cfg(feature = "telemetry")] + fn timeout_metrics(&self, summary: &RelaySummary) { + use ibc::events::IbcEvent::TimeoutPacket; + let count = summary + .events + .iter() + .filter(|e| matches!(e, TimeoutPacket(_))) + .count(); + + self.telemetry.ibc_timeout_packets( + &self.path.src_chain_id, + &self.path.src_channel_id, + &self.path.src_port_id, + count as u64, + ) + } } diff --git a/telemetry/src/state.rs b/telemetry/src/state.rs index 9c833db826..b5d9471659 100644 --- a/telemetry/src/state.rs +++ b/telemetry/src/state.rs @@ -42,6 +42,12 @@ pub struct TelemetryState { /// Number of receive packets relayed, per channel receive_packets: Counter, + + /// Number of acknowledgment packets relayed, per channel + acknowledgment_packets: Counter, + + /// Number of timeout packets relayed, per channel + timeout_packets: Counter, } impl TelemetryState { @@ -92,6 +98,38 @@ impl TelemetryState { self.receive_packets.add(count, labels); } + + pub fn ibc_acknowledgment_packets( + &self, + src_chain: &ChainId, + src_channel: &ChannelId, + src_port: &PortId, + count: u64, + ) { + let labels = &[ + KeyValue::new("src_chain", src_chain.to_string()), + KeyValue::new("src_channel", src_channel.to_string()), + KeyValue::new("src_port", src_port.to_string()), + ]; + + self.acknowledgment_packets.add(count, labels); + } + + pub fn ibc_timeout_packets( + &self, + src_chain: &ChainId, + src_channel: &ChannelId, + src_port: &PortId, + count: u64, + ) { + let labels = &[ + KeyValue::new("src_chain", src_chain.to_string()), + KeyValue::new("src_channel", src_channel.to_string()), + KeyValue::new("src_port", src_port.to_string()), + ]; + + self.timeout_packets.add(count, labels); + } } impl Default for TelemetryState { @@ -121,6 +159,16 @@ impl Default for TelemetryState { .u64_counter("ibc_receive_packets") .with_description("Number of receive packets relayed per channel") .init(), + + acknowledgment_packets: meter + .u64_counter("ibc_acknowledgment_packets") + .with_description("Number of acknowledgment packets relayed per channel") + .init(), + + timeout_packets: meter + .u64_counter("ibc_timeout_packets") + .with_description("Number of timeout packets relayed per channel") + .init(), } } } From 4cf8e16e8287d6114b0610ac24025c5b9fdf54cb Mon Sep 17 00:00:00 2001 From: Anca Zamfir Date: Fri, 28 May 2021 16:17:09 +0200 Subject: [PATCH 23/26] Fix compilation when telemetry feature is not included --- relayer/src/worker/uni_chan_path.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relayer/src/worker/uni_chan_path.rs b/relayer/src/worker/uni_chan_path.rs index 59868ad980..96e482e581 100644 --- a/relayer/src/worker/uni_chan_path.rs +++ b/relayer/src/worker/uni_chan_path.rs @@ -122,11 +122,11 @@ impl UniChanPathWorker { &self.path } - #[cfg(feature = "telemetry")] fn packet_metrics(&self, summary: &RelaySummary) { metric!(self.receive_packet_metrics(&summary)); metric!(self.acknowledgment_metrics(&summary)); metric!(self.timeout_metrics(&summary)); + let _ = summary; } #[cfg(feature = "telemetry")] From c4af7bbd49eea764da0da83d0a86efbedb055336 Mon Sep 17 00:00:00 2001 From: Adi Seredinschi Date: Mon, 31 May 2021 14:01:23 +0300 Subject: [PATCH 24/26] FMT --- relayer/src/chain/handle.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/relayer/src/chain/handle.rs b/relayer/src/chain/handle.rs index 8001a3589c..7dd282c9f9 100644 --- a/relayer/src/chain/handle.rs +++ b/relayer/src/chain/handle.rs @@ -31,14 +31,14 @@ use ibc::{ use ibc_proto::ibc::core::{ channel::v1::{ - PacketState, QueryChannelsRequest, QueryConnectionChannelsRequest, - QueryNextSequenceReceiveRequest, QueryPacketAcknowledgementsRequest, - QueryPacketCommitmentsRequest, QueryUnreceivedAcksRequest, QueryUnreceivedPacketsRequest, - QueryChannelClientStateRequest, + PacketState, QueryChannelClientStateRequest, QueryChannelsRequest, + QueryConnectionChannelsRequest, QueryNextSequenceReceiveRequest, + QueryPacketAcknowledgementsRequest, QueryPacketCommitmentsRequest, + QueryUnreceivedAcksRequest, QueryUnreceivedPacketsRequest, }, - client::v1::{QueryConsensusStatesRequest, QueryClientStatesRequest}, - connection::v1::QueryClientConnectionsRequest, + client::v1::{QueryClientStatesRequest, QueryConsensusStatesRequest}, commitment::v1::MerkleProof, + connection::v1::QueryClientConnectionsRequest, }; pub use prod::ProdChainHandle; From 0d8c926edd5f2623738aef7646134cafc70b4423 Mon Sep 17 00:00:00 2001 From: Romain Ruetschi Date: Tue, 1 Jun 2021 10:46:51 +0200 Subject: [PATCH 25/26] Rename metric! macro to telemetry! --- relayer/src/telemetry.rs | 13 +++++-------- relayer/src/worker/client.rs | 26 +++++++++++++++----------- relayer/src/worker/map.rs | 6 +++--- relayer/src/worker/uni_chan_path.rs | 14 +++++++------- 4 files changed, 30 insertions(+), 29 deletions(-) diff --git a/relayer/src/telemetry.rs b/relayer/src/telemetry.rs index abda4e7bf4..fd64b665a0 100644 --- a/relayer/src/telemetry.rs +++ b/relayer/src/telemetry.rs @@ -12,25 +12,22 @@ pub type Telemetry = TelemetryDisabled; /// A macro to send metric updates via a telemetry handle, /// only if the `telemetry` feature is enabled. -/// Otherwise, it compiles to a no-op which still -/// references the given field to avoid dead_code -/// warnings. +/// Otherwise, it compiles to a no-op. /// /// ## Note -/// The macro imports `ibc_telemetry::MetricUpdate` into scope and all its variants. +/// Equivalent to `#[cfg(feature = "telemetry")]`, but +/// should be preferred over the latter. /// /// ## Example /// /// ```rust,ignore -/// metric!(self.telemetry, TxCount(1)); +/// telemetry!(self.telemetry.tx_count(1)); /// ``` #[macro_export] -macro_rules! metric { +macro_rules! telemetry { ($e:expr) => { #[cfg(feature = "telemetry")] - #[allow(unused_imports)] { - use ibc_telemetry::state::WorkerType; $e; } }; diff --git a/relayer/src/worker/client.rs b/relayer/src/worker/client.rs index de23a9f15a..c9bdcef421 100644 --- a/relayer/src/worker/client.rs +++ b/relayer/src/worker/client.rs @@ -9,8 +9,8 @@ use ibc::{events::IbcEvent, ics02_client::events::UpdateClient}; use crate::{ chain::handle::ChainHandlePair, foreign_client::{ForeignClient, ForeignClientError, MisbehaviourResults}, - metric, object::Client, + telemetry, telemetry::Telemetry, }; @@ -67,11 +67,13 @@ impl ClientWorker { // Run client refresh, exit only if expired or frozen match client.refresh() { Ok(Some(_)) => { - metric!(self.telemetry.ibc_client_update( - &self.client.dst_chain_id, - &self.client.dst_client_id, - 1 - )); + telemetry! { + self.telemetry.ibc_client_update( + &self.client.dst_chain_id, + &self.client.dst_client_id, + 1 + ) + }; } Err(e @ ForeignClientError::ExpiredOrFrozen(..)) => { error!("failed to refresh client '{}': {}", client, e); @@ -94,11 +96,13 @@ impl ClientWorker { // Run misbehaviour. If evidence submitted the loop will exit in next // iteration with frozen client if self.detect_misbehaviour(&client, Some(update)) { - metric!(self.telemetry.ibc_client_misbehaviour( - &self.client.dst_chain_id, - &self.client.dst_client_id, - 1 - )); + telemetry! { + self.telemetry.ibc_client_misbehaviour( + &self.client.dst_chain_id, + &self.client.dst_client_id, + 1 + ) + }; } } } diff --git a/relayer/src/worker/map.rs b/relayer/src/worker/map.rs index 36ce67e86c..64538abac2 100644 --- a/relayer/src/worker/map.rs +++ b/relayer/src/worker/map.rs @@ -6,8 +6,8 @@ use ibc::ics24_host::identifier::ChainId; use crate::{ chain::handle::{ChainHandle, ChainHandlePair}, - metric, object::Object, + telemetry, telemetry::Telemetry, }; @@ -41,7 +41,7 @@ impl WorkerMap { /// the map and wait for its thread to terminate. pub fn remove_stopped(&mut self, object: &Object) -> bool { if let Some(handle) = self.workers.remove(object) { - metric!(self.telemetry.worker(metric_type(object), -1)); + telemetry!(self.telemetry.worker(metric_type(object), -1)); let _ = handle.join(); true } else { @@ -89,7 +89,7 @@ impl WorkerMap { dst: Box, object: &Object, ) -> WorkerHandle { - metric!(self.telemetry.worker(metric_type(object), 1)); + telemetry!(self.telemetry.worker(metric_type(object), 1)); Worker::spawn( ChainHandlePair { a: src, b: dst }, diff --git a/relayer/src/worker/uni_chan_path.rs b/relayer/src/worker/uni_chan_path.rs index 96e482e581..2a8b8540cc 100644 --- a/relayer/src/worker/uni_chan_path.rs +++ b/relayer/src/worker/uni_chan_path.rs @@ -7,8 +7,8 @@ use tracing::{error, warn}; use crate::{ chain::handle::ChainHandlePair, link::{Link, LinkParameters, RelaySummary}, - metric, object::UnidirectionalChannelPath, + telemetry, telemetry::Telemetry, util::retry::{retry_with_index, RetryResult}, worker::retry_strategy, @@ -64,8 +64,8 @@ impl UniChanPathWorker { }); match result { - Ok(summary) => { - self.packet_metrics(&summary); + Ok(_summary) => { + telemetry!(self.packet_metrics(&_summary)); } Err(retries) => { @@ -122,11 +122,11 @@ impl UniChanPathWorker { &self.path } + #[cfg(feature = "telemetry")] fn packet_metrics(&self, summary: &RelaySummary) { - metric!(self.receive_packet_metrics(&summary)); - metric!(self.acknowledgment_metrics(&summary)); - metric!(self.timeout_metrics(&summary)); - let _ = summary; + self.receive_packet_metrics(&summary); + self.acknowledgment_metrics(&summary); + self.timeout_metrics(&summary); } #[cfg(feature = "telemetry")] From 49640195c0eb710d7c93d2fa4dd66d5d493549cc Mon Sep 17 00:00:00 2001 From: Romain Ruetschi Date: Tue, 1 Jun 2021 10:48:34 +0200 Subject: [PATCH 26/26] Add `clippy --no-default-features` to CI --- .github/workflows/rust.yml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 5ad00d20b9..1976c07649 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -42,7 +42,7 @@ jobs: command: fmt args: --all -- --check - clippy_check: + clippy_all_features: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -56,6 +56,20 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} args: --all-features --all-targets + clippy_no_default_features: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + components: clippy + override: true + - uses: actions-rs/clippy-check@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + args: --no-default-features --all-targets + test-stable: runs-on: ubuntu-latest steps: