From 2cd9062a304e4f0b8991a47332ed1225f75b4789 Mon Sep 17 00:00:00 2001 From: Andy Pliszka Date: Fri, 25 Oct 2024 13:19:59 -0400 Subject: [PATCH] feat: add separate flag for metrics (#1504) * feat: adds metrics server address to config and sets localhost defaults * uses metrics-listen-addr for metrics server * sets default debug server api-listen-addr to 127.0.0.1:5183 * sets default metrics server metrics-listen-addr to 127.0.0.1:9100 * refactor: separates debug and metrics server * moves metrics to separate server * feat: changes the default metrics port to 5184 * refactor: removes address flags for debug and metrics server * removes address flags so config file is the source of truth for addresses * adds tests for flags to enable debug and metrics servers * adds tests for config file settings for addresses for debug and metrics servers * adjusts log messages * refactor: renames api-listen-addr to debug-listen-addr * feat: restores cli flags to set addresses for debug and metrics servers * restores address flags * renames `--debug-addr` to `--debug-listen-addr` to be consistent with config file setting `debug-listen-addr:` * renames `--metrics-addr` to `--metrics-listen-addr` to be consistent with config file setting `metrics-listen-addr:` * updates docs * feat: improves error reporting * chore: code cleanup * feat: rollbacks --debug-addr flag * feat: deprecates api-listen-addr * fix: closes body * fix: reformat log messages to follow styleguide --- README.md | 2 +- cmd/config.go | 30 +- cmd/config_test.go | 64 ++++ cmd/flags.go | 48 ++- cmd/start.go | 139 ++++++-- cmd/start_test.go | 399 +++++++++++++++++++++++ docs/advanced_usage.md | 6 +- docs/troubleshooting.md | 6 +- examples/config_EXAMPLE.yaml | 3 +- interchaintest/relayer.go | 2 +- internal/relaydebug/debugserver.go | 12 +- internal/relayermetrics/metricsserver.go | 43 +++ internal/relayertest/system.go | 13 + 13 files changed, 699 insertions(+), 68 deletions(-) create mode 100644 cmd/config_test.go create mode 100644 cmd/start_test.go create mode 100644 internal/relayermetrics/metricsserver.go diff --git a/README.md b/README.md index 3c9d2e5df..88e296a93 100644 --- a/README.md +++ b/README.md @@ -193,7 +193,7 @@ Additional information on how IBC works can be found [here](https://ibc.cosmos.n $ rly start ``` - >When running multiple instances of `rly start`, you will need to use the `--debug-addr` flag and provide an address:port. You can also pass an empty string `''` to turn off this feature or pass `localhost:0` to randomly select a port. + >When running multiple instances of `rly start`, you will need to use the `--enable-debug-server` and `--debug-listen-addr` flag and provide an address:port. You can also pass an empty string `''` to turn off this feature or pass `localhost:0` to randomly select a port. --- [[TROUBLESHOOTING](docs/troubleshooting.md)] diff --git a/cmd/config.go b/cmd/config.go index 253580507..a211c85a9 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -488,24 +488,28 @@ func DefaultConfig(memo string) *Config { // GlobalConfig describes any global relayer settings type GlobalConfig struct { - APIListenPort string `yaml:"api-listen-addr" json:"api-listen-addr"` - Timeout string `yaml:"timeout" json:"timeout"` - Memo string `yaml:"memo" json:"memo"` - LightCacheSize int `yaml:"light-cache-size" json:"light-cache-size"` - LogLevel string `yaml:"log-level" json:"log-level"` - ICS20MemoLimit int `yaml:"ics20-memo-limit" json:"ics20-memo-limit"` - MaxReceiverSize int `yaml:"max-receiver-size" json:"max-receiver-size"` + ApiListenPort string `yaml:"api-listen-addr,omitempty" json:"api-listen-addr,omitempty"` + DebugListenPort string `yaml:"debug-listen-addr" json:"debug-listen-addr"` + MetricsListenPort string `yaml:"metrics-listen-addr" json:"metrics-listen-addr"` + Timeout string `yaml:"timeout" json:"timeout"` + Memo string `yaml:"memo" json:"memo"` + LightCacheSize int `yaml:"light-cache-size" json:"light-cache-size"` + LogLevel string `yaml:"log-level" json:"log-level"` + ICS20MemoLimit int `yaml:"ics20-memo-limit" json:"ics20-memo-limit"` + MaxReceiverSize int `yaml:"max-receiver-size" json:"max-receiver-size"` } // newDefaultGlobalConfig returns a global config with defaults set func newDefaultGlobalConfig(memo string) GlobalConfig { return GlobalConfig{ - APIListenPort: ":5183", - Timeout: "10s", - LightCacheSize: 20, - Memo: memo, - LogLevel: "info", - MaxReceiverSize: 150, + ApiListenPort: "", + DebugListenPort: "127.0.0.1:5183", + MetricsListenPort: "127.0.0.1:5184", + Timeout: "10s", + LightCacheSize: 20, + Memo: memo, + LogLevel: "info", + MaxReceiverSize: 150, } } diff --git a/cmd/config_test.go b/cmd/config_test.go new file mode 100644 index 000000000..226ee3aa3 --- /dev/null +++ b/cmd/config_test.go @@ -0,0 +1,64 @@ +package cmd_test + +import ( + "fmt" + "os" + "testing" + + "github.com/cosmos/relayer/v2/cmd" + "github.com/cosmos/relayer/v2/internal/relayertest" + "github.com/cosmos/relayer/v2/relayer/chains/cosmos" + "github.com/stretchr/testify/require" +) + +func TestDefaultConfig(t *testing.T) { + t.Parallel() + + sys := relayertest.NewSystem(t) + + _ = sys.MustRun(t, "config", "init") + + sys.MustAddChain(t, "testChain", cmd.ProviderConfigWrapper{ + Type: "cosmos", + Value: cosmos.CosmosProviderConfig{ + ChainID: "testcosmos", + KeyringBackend: "test", + Timeout: "10s", + }, + }) + + tests := []struct { + setting string + wantedPresent bool + }{ + { + "debug-listen-addr: 127.0.0.1:5183", + true, + }, + { + "metrics-listen-addr: 127.0.0.1:5184", + true, + }, + { + "api-listen-addr: 127.0.0.1:5184", + false, + }, + } + + for _, tt := range tests { + t.Run(tt.setting, func(t *testing.T) { + sys := setupRelayer(t) + + configFile := fmt.Sprintf("%s/config/config.yaml", sys.HomeDir) + data, err := os.ReadFile(configFile) + require.NoError(t, err) + config := string(data) + + if tt.wantedPresent { + require.Contains(t, config, tt.setting) + } else { + require.NotContains(t, config, tt.setting) + } + }) + } +} diff --git a/cmd/flags.go b/cmd/flags.go index a0c42d3ef..00524fa8e 100644 --- a/cmd/flags.go +++ b/cmd/flags.go @@ -38,8 +38,11 @@ const ( flagDstPort = "dst-port" flagOrder = "order" flagVersion = "version" - flagEnableDebugServer = "enable-debug-server" flagDebugAddr = "debug-addr" + flagEnableDebugServer = "enable-debug-server" + flagDebugListenAddr = "debug-listen-addr" + flagEnableMetricsServer = "enable-metrics-server" + flagMetricsListenAddr = "metrics-listen-addr" flagOverwriteConfig = "overwrite" flagLimit = "limit" flagHeight = "height" @@ -422,14 +425,27 @@ func debugServerFlags(v *viper.Viper, cmd *cobra.Command) *cobra.Command { cmd.Flags().String( flagDebugAddr, "", - "address to use for debug and metrics server. By default, "+ - "will be the api-listen-addr parameter in the global config.", + "address to use for debug server. By default, "+ + "will be the debug-listen-addr parameter in the global config. "+ + "DEPRECATED: Use --debug-listen-addr flag.", ) if err := v.BindPFlag(flagDebugAddr, cmd.Flags().Lookup(flagDebugAddr)); err != nil { panic(err) } + cmd.Flags().String( + flagDebugListenAddr, + "", + "address to use for debug server. By default, "+ + "will be the debug-listen-addr parameter in the global config. "+ + "Make sure to enable debug server using --enable-debug-server flag.", + ) + + if err := v.BindPFlag(flagDebugListenAddr, cmd.Flags().Lookup(flagDebugListenAddr)); err != nil { + panic(err) + } + cmd.Flags().Bool( flagEnableDebugServer, false, @@ -443,6 +459,32 @@ func debugServerFlags(v *viper.Viper, cmd *cobra.Command) *cobra.Command { return cmd } +func metricsServerFlags(v *viper.Viper, cmd *cobra.Command) *cobra.Command { + cmd.Flags().String( + flagMetricsListenAddr, + "", + "address to use for metrics server. By default, "+ + "will be the metrics-listen-addr parameter in the global config. "+ + "Make sure to enable metrics server using --enable-metrics-server flag.", + ) + + if err := v.BindPFlag(flagMetricsListenAddr, cmd.Flags().Lookup(flagMetricsListenAddr)); err != nil { + panic(err) + } + + cmd.Flags().Bool( + flagEnableMetricsServer, + false, + "enables metrics server. By default, the metrics server is disabled due to security concerns.", + ) + + if err := v.BindPFlag(flagEnableMetricsServer, cmd.Flags().Lookup(flagEnableMetricsServer)); err != nil { + panic(err) + } + + return cmd +} + func processorFlag(v *viper.Viper, cmd *cobra.Command) *cobra.Command { cmd.Flags().StringP(flagProcessor, "p", relayer.ProcessorEvents, "which relayer processor to use") if err := v.BindPFlag(flagProcessor, cmd.Flags().Lookup(flagProcessor)); err != nil { diff --git a/cmd/start.go b/cmd/start.go index 8e1d38d5d..3d74a2637 100644 --- a/cmd/start.go +++ b/cmd/start.go @@ -24,6 +24,7 @@ import ( "strings" "github.com/cosmos/relayer/v2/internal/relaydebug" + "github.com/cosmos/relayer/v2/internal/relayermetrics" "github.com/cosmos/relayer/v2/relayer" "github.com/cosmos/relayer/v2/relayer/chains/cosmos" "github.com/cosmos/relayer/v2/relayer/processor" @@ -92,49 +93,16 @@ $ %s start demo-path2 --max-tx-size 10`, appName, appName, appName, appName)), return err } - var prometheusMetrics *processor.PrometheusMetrics - - debugAddr := a.config.Global.APIListenPort - - debugAddrFlag, err := cmd.Flags().GetString(flagDebugAddr) + err = setupDebugServer(cmd, a, err) if err != nil { return err } - if debugAddrFlag != "" { - debugAddr = debugAddrFlag - } - - flagEnableDebugServer, err := cmd.Flags().GetBool(flagEnableDebugServer) + prometheusMetrics, err := setupMetricsServer(cmd, a, err, chains) if err != nil { return err } - if flagEnableDebugServer == false || debugAddr == "" { - a.log.Info("Skipping debug server due to empty debug address flag") - } else { - a.log.Warn("SECURITY WARNING! Debug server is enabled. It should only be used for non-production deployments.") - ln, err := net.Listen("tcp", debugAddr) - if err != nil { - a.log.Error( - "Failed to listen on debug address. If you have another relayer process open, use --" + - flagDebugAddr + - " to pick a different address.", - ) - - return fmt.Errorf("failed to listen on debug address %q: %w", debugAddr, err) - } - log := a.log.With(zap.String("sys", "debughttp")) - log.Info("Debug server listening", zap.String("addr", debugAddr)) - prometheusMetrics = processor.NewPrometheusMetrics() - relaydebug.StartDebugServer(cmd.Context(), log, ln, prometheusMetrics.Registry) - for _, chain := range chains { - if ccp, ok := chain.ChainProvider.(*cosmos.CosmosProvider); ok { - ccp.SetMetrics(prometheusMetrics) - } - } - } - processorType, err := cmd.Flags().GetString(flagProcessor) if err != nil { return err @@ -195,6 +163,7 @@ $ %s start demo-path2 --max-tx-size 10`, appName, appName, appName, appName)), cmd = updateTimeFlags(a.viper, cmd) cmd = strategyFlag(a.viper, cmd) cmd = debugServerFlags(a.viper, cmd) + cmd = metricsServerFlags(a.viper, cmd) cmd = processorFlag(a.viper, cmd) cmd = initBlockFlag(a.viper, cmd) cmd = flushIntervalFlag(a.viper, cmd) @@ -202,3 +171,103 @@ $ %s start demo-path2 --max-tx-size 10`, appName, appName, appName, appName)), cmd = stuckPacketFlags(a.viper, cmd) return cmd } + +func setupMetricsServer(cmd *cobra.Command, a *appState, err error, chains map[string]*relayer.Chain) (*processor.PrometheusMetrics, error) { + var prometheusMetrics *processor.PrometheusMetrics + + metricsListenAddr := a.config.Global.MetricsListenPort + + metricsListenAddrFlag, err := cmd.Flags().GetString(flagMetricsListenAddr) + if err != nil { + return nil, err + } + + if metricsListenAddrFlag != "" { + metricsListenAddr = metricsListenAddrFlag + } + + flagEnableMetricsServer, err := cmd.Flags().GetBool(flagEnableMetricsServer) + if err != nil { + return nil, err + } + + if flagEnableMetricsServer == false { + a.log.Info("Metrics server is disabled you can enable it using --enable-metrics-server flag") + } else if metricsListenAddr == "" { + a.log.Warn("Disabled metrics server due to missing metrics-listen-addr setting in config file or --metrics-listen-addr flag") + } else { + a.log.Info("Metrics server is enabled") + ln, err := net.Listen("tcp", metricsListenAddr) + if err != nil { + a.log.Error(fmt.Sprintf("Failed to start metrics server you can change the address and port using metrics-listen-addr config settingh or --metrics-listen-flag")) + + return nil, fmt.Errorf("failed to listen on metrics address %q: %w", metricsListenAddr, err) + } + log := a.log.With(zap.String("sys", "metricshttp")) + log.Info("Metrics server listening", zap.String("addr", metricsListenAddr)) + prometheusMetrics = processor.NewPrometheusMetrics() + relayermetrics.StartMetricsServer(cmd.Context(), log, ln, prometheusMetrics.Registry) + for _, chain := range chains { + if ccp, ok := chain.ChainProvider.(*cosmos.CosmosProvider); ok { + ccp.SetMetrics(prometheusMetrics) + } + } + } + return prometheusMetrics, nil +} + +func setupDebugServer(cmd *cobra.Command, a *appState, err error) error { + debugListenAddr := a.config.Global.DebugListenPort + + if debugListenAddr == "" { + debugListenAddr = a.config.Global.ApiListenPort + if debugListenAddr != "" { + a.log.Warn("DEPRECATED: api-listen-addr config setting is deprecated use debug-listen-addr instead") + } + } + + debugAddrFlag, err := cmd.Flags().GetString(flagDebugAddr) + if err != nil { + return err + } + + debugListenAddrFlag, err := cmd.Flags().GetString(flagDebugListenAddr) + if err != nil { + return err + } + + if debugAddrFlag != "" { + debugListenAddr = debugAddrFlag + a.log.Warn("DEPRECATED: --debug-addr flag is deprecated use --enable-debug-server and --debug-listen-addr instead") + } + + if debugListenAddrFlag != "" { + debugListenAddr = debugListenAddrFlag + } + + flagEnableDebugServer, err := cmd.Flags().GetBool(flagEnableDebugServer) + if err != nil { + return err + } + + enableDebugServer := flagEnableDebugServer == true || debugAddrFlag != "" + + if enableDebugServer == false { + a.log.Info("Debug server is disabled you can enable it using --enable-debug-server flag") + } else if debugListenAddr == "" { + a.log.Warn("Disabled debug server due to missing debug-listen-addr setting in config file or --debug-listen-addr flag") + } else { + a.log.Info("Debug server is enabled") + a.log.Warn("SECURITY WARNING! Debug server should only be run with caution and proper security in place") + ln, err := net.Listen("tcp", debugListenAddr) + if err != nil { + a.log.Error(fmt.Sprintf("Failed to start debug server you can change the address and port using debug-listen-addr config settingh or --debug-listen-flag")) + + return fmt.Errorf("failed to listen on debug address %q: %w", debugListenAddr, err) + } + log := a.log.With(zap.String("sys", "debughttp")) + log.Info("Debug server listening", zap.String("addr", debugListenAddr)) + relaydebug.StartDebugServer(cmd.Context(), log, ln) + } + return nil +} diff --git a/cmd/start_test.go b/cmd/start_test.go new file mode 100644 index 000000000..e6cd30bf6 --- /dev/null +++ b/cmd/start_test.go @@ -0,0 +1,399 @@ +package cmd_test + +import ( + "errors" + "fmt" + "net" + "net/http" + "os" + "strings" + "testing" + + "github.com/cosmos/relayer/v2/cmd" + "github.com/cosmos/relayer/v2/internal/relaydebug" + "github.com/cosmos/relayer/v2/internal/relayermetrics" + "github.com/cosmos/relayer/v2/internal/relayertest" + "github.com/cosmos/relayer/v2/relayer/chains/cosmos" + "github.com/stretchr/testify/require" + "go.uber.org/zap" + "go.uber.org/zap/zaptest/observer" +) + +func TestMetricsServerFlags(t *testing.T) { + t.Parallel() + + tests := []struct { + description string + args []string + wantedPort int + wantedRunning bool + wantedMessages []string + err error + }{ + { + "should start relayer without metrics server given no flags", + []string{"start"}, + 0, + false, + []string{"Metrics server is disabled you can enable it using --enable-metrics-server flag"}, + nil, + }, + { + "should start relayer with metrics server running on default port", + []string{"start", "--enable-metrics-server"}, + relayermetrics.MetricsServerPort, + true, + []string{"Metrics server is enabled", "Metrics server listening"}, + nil, + }, + { + "should start relayer with metrics server running on custom port", + []string{"start", "--enable-metrics-server", "--metrics-listen-addr", "127.0.0.1:7778"}, + 7778, + true, + []string{"Metrics server is enabled", "Metrics server listening"}, + nil, + }, + { + "should start relayer without metrics server when metrics server is not enabled", + []string{"start", "--metrics-listen-addr", "127.0.0.1:7778"}, + 0, + false, + []string{"Metrics server is disabled you can enable it using --enable-metrics-server flag"}, + nil, + }, + { + "should not start relayer and report an error when address is not provided", + []string{"start", "--metrics-listen-addr"}, + 0, + false, + nil, + errors.New("flag needs an argument: --metrics-listen-addr"), + }, + } + + for _, tt := range tests { + t.Run(tt.description, func(t *testing.T) { + sys := setupRelayer(t) + logs, logger := setupLogger() + + result := sys.MustRunWithLogger(t, logger, tt.args...) + + if tt.err != nil { + require.Error(t, result.Err, tt.err) + } else { + require.NoError(t, result.Err) + } + + if tt.wantedRunning == true { + requireRunningMetricsServer(t, logs, tt.wantedPort) + } else { + requireDisabledMetricsServer(t, logs, tt.wantedPort) + } + requireMessages(t, logs, tt.wantedMessages) + }) + } +} + +func TestMetricsServerConfig(t *testing.T) { + t.Parallel() + + tests := []struct { + description string + args []string + newSetting string + wantedPort int + serverRunning bool + wantedMessages []string + }{ + { + "should starts relayer on custom address and port provided in config file", + []string{"start", "--enable-metrics-server"}, + "metrics-listen-addr: 127.0.0.1:6184", + 6184, + true, + []string{"Metrics server is enabled", "Metrics server listening"}, + }, + { + "should starts relayer on custom address provided via flag", + []string{"start", "--enable-metrics-server", "--metrics-listen-addr", "127.0.0.1:7184"}, + "", + 7184, + true, + []string{"Metrics server is enabled", "Metrics server listening"}, + }, + } + + for _, tt := range tests { + t.Run(tt.description, func(t *testing.T) { + sys := setupRelayer(t) + + updateConfig(t, sys, "metrics-listen-addr: 127.0.0.1:5184", tt.newSetting) + + logs, logger := setupLogger() + + sys.MustRunWithLogger(t, logger, tt.args...) + + if tt.serverRunning == true { + requireRunningMetricsServer(t, logs, tt.wantedPort) + } else { + requireDisabledMetricsServer(t, logs, tt.wantedPort) + } + requireMessages(t, logs, tt.wantedMessages) + }) + } +} + +func TestMissingMetricsListenAddr(t *testing.T) { + sys := setupRelayer(t) + + logs, logger := setupLogger() + + updateConfig(t, sys, "metrics-listen-addr: 127.0.0.1:5184", "") + + sys.MustRunWithLogger(t, logger, []string{"start", "--enable-metrics-server"}...) + + requireDisabledMetricsServer(t, logs, 0) + requireMessage(t, logs, "Disabled metrics server due to missing metrics-listen-addr setting in config file or --metrics-listen-addr flag") +} + +func TestDebugServerFlags(t *testing.T) { + t.Parallel() + + tests := []struct { + description string + args []string + wantedPort int + wantedRunning bool + wantedMessages []string + err error + }{ + { + "should start relayer without debug server", + []string{"start"}, + 0, + false, + []string{"Debug server is disabled you can enable it using --enable-debug-server flag"}, + nil, + }, + { + "should not start the relayer and report an error when address is missing", + []string{"start", "--debug-addr"}, + 0, + false, + nil, + errors.New("flag needs an argument: --debug-addr"), + }, + { + "should start relayer with debug server given --debug-addr flag and address", + []string{"start", "--debug-addr", "127.0.0.1:7777"}, + 7777, + true, + []string{ + "Debug server is enabled", "SECURITY WARNING! Debug server should only be run with caution and proper security in place", + "DEPRECATED: --debug-addr flag is deprecated use --enable-debug-server and --debug-listen-addr instead", + }, + nil, + }, + { + "should start relayer with debug server given --enable-debug-server flag", + []string{"start", "--enable-debug-server"}, + relaydebug.DebugServerPort, + true, + []string{"Debug server is enabled", "SECURITY WARNING! Debug server should only be run with caution and proper security in place"}, + nil, + }, + { + "should start relayer with debug server given --enable-debug-server flag and an address", + []string{"start", "--enable-debug-server", "--debug-listen-addr", "127.0.0.1:7779"}, + 7779, + true, + []string{"Debug server is enabled", "SECURITY WARNING! Debug server should only be run with caution and proper security in place"}, + nil, + }, + { + "should not start relayer and report an error when address is missing", + []string{"start", "--enable-debug-server", "--debug-listen-addr"}, + 0, + false, + nil, + errors.New("flag needs an argument: --debug-listen-addr"), + }, + } + + for _, tt := range tests { + t.Run(tt.description, func(t *testing.T) { + sys := setupRelayer(t) + logs, logger := setupLogger() + + result := sys.MustRunWithLogger(t, logger, tt.args...) + + if tt.err != nil { + require.Error(t, result.Err, tt.err) + } else { + require.NoError(t, result.Err) + } + + if tt.wantedRunning == true { + requireRunningDebugServer(t, logs, tt.wantedPort) + } else { + requireDisabledDebugServer(t, logs, tt.wantedPort) + } + requireMessages(t, logs, tt.wantedMessages) + }) + } +} + +func TestDebugServerConfig(t *testing.T) { + t.Parallel() + + tests := []struct { + description string + args []string + newSetting string + wantedPort int + wantedRunning bool + }{ + { + "should start debug server on custom address and port set in config file", + []string{"start", "--enable-debug-server"}, + "debug-listen-addr: 127.0.0.1:6183", + 6183, + true, + }, + { + "should start debug server on custom address and port set via flag", + []string{"start", "--enable-debug-server", "--debug-listen-addr", "127.0.0.1:7183"}, + "debug-listen-addr: 127.0.0.1:6183", + 7183, + true, + }, + { + "should start debug server on custom address and port set via deprecated flag", + []string{"start", "--enable-debug-server", "--debug-addr", "127.0.0.1:9183"}, + "debug-listen-addr: 127.0.0.1:6183", + 9183, + true, + }, + { + "should start debug server on custom address and port set via deprecated config", + []string{"start", "--enable-debug-server"}, + "api-listen-addr: 127.0.0.1:10183", + 10183, + true, + }, + } + + for _, tt := range tests { + t.Run(tt.description, func(t *testing.T) { + sys := setupRelayer(t) + + updateConfig(t, sys, "debug-listen-addr: 127.0.0.1:5183", tt.newSetting) + + logs, logger := setupLogger() + + sys.MustRunWithLogger(t, logger, tt.args...) + + if tt.wantedRunning == true { + requireRunningDebugServer(t, logs, tt.wantedPort) + } else { + requireDisabledDebugServer(t, logs, tt.wantedPort) + } + }) + } +} + +func TestMissingDebugListenAddr(t *testing.T) { + sys := setupRelayer(t) + + logs, logger := setupLogger() + + updateConfig(t, sys, "debug-listen-addr: 127.0.0.1:5183", "") + + sys.MustRunWithLogger(t, logger, []string{"start", "--enable-debug-server"}...) + + requireDisabledMetricsServer(t, logs, 0) + requireMessage(t, logs, "Disabled debug server due to missing debug-listen-addr setting in config file or --debug-listen-addr flag") +} + +func requireDisabledMetricsServer(t *testing.T, logs *observer.ObservedLogs, port int) { + conn, err := net.Dial("tcp", fmt.Sprintf("127.0.0.1:%d", port)) + if conn != nil { + defer conn.Close() + } + require.Error(t, err, "Server should be disabled") +} + +func requireRunningMetricsServer(t *testing.T, logs *observer.ObservedLogs, port int) { + conn, err := net.Dial("tcp", fmt.Sprintf("127.0.0.1:%d", port)) + if conn != nil { + defer conn.Close() + } + require.NoError(t, err, fmt.Sprintf("Metrics server should be running on port %d", port)) + res, err := http.Get(fmt.Sprintf("http://127.0.0.1:%d/metrics", port)) + require.NoError(t, err) + defer res.Body.Close() + require.Equal(t, res.StatusCode, 200) +} + +func requireDisabledDebugServer(t *testing.T, logs *observer.ObservedLogs, port int) { + conn, err := net.Dial("tcp", fmt.Sprintf("127.0.0.1:%d", port)) + if conn != nil { + defer conn.Close() + } + require.Error(t, err, "Server should be disabled") +} + +func requireRunningDebugServer(t *testing.T, logs *observer.ObservedLogs, port int) { + conn, err := net.Dial("tcp", fmt.Sprintf("127.0.0.1:%d", port)) + if conn != nil { + defer conn.Close() + } + require.NoError(t, err, fmt.Sprintf("Server should be running on port %d", port)) + res, err := http.Get(fmt.Sprintf("http://127.0.0.1:%d/debug/pprof/goroutine", port)) + require.NoError(t, err) + defer res.Body.Close() + require.Equal(t, res.StatusCode, 200) +} + +func requireMessages(t *testing.T, logs *observer.ObservedLogs, messages []string) { + for _, message := range messages { + requireMessage(t, logs, message) + } +} + +func requireMessage(t *testing.T, logs *observer.ObservedLogs, message string) { + require.Len(t, logs.FilterMessage(message).All(), 1, fmt.Sprintf("Expected message '%s' not found in logs", message), logs.All()) +} + +func setupLogger() (*observer.ObservedLogs, *zap.Logger) { + observedZapCore, observedLogs := observer.New(zap.InfoLevel) + observedLogger := zap.New(observedZapCore) + return observedLogs, observedLogger +} + +func setupRelayer(t *testing.T) *relayertest.System { + sys := relayertest.NewSystem(t) + + _ = sys.MustRun(t, "config", "init") + + sys.MustAddChain(t, "testChain", cmd.ProviderConfigWrapper{ + Type: "cosmos", + Value: cosmos.CosmosProviderConfig{ + ChainID: "testcosmos", + KeyringBackend: "test", + Timeout: "10s", + }, + }) + return sys +} + +func updateConfig(t *testing.T, sys *relayertest.System, oldSetting string, newSetting string) { + configFile := fmt.Sprintf("%s/config/config.yaml", sys.HomeDir) + data, err := os.ReadFile(configFile) + require.NoError(t, err) + + newConfig := strings.Replace(string(data), oldSetting, newSetting, 1) + + os.WriteFile(configFile, []byte(newConfig), 0644) +} diff --git a/docs/advanced_usage.md b/docs/advanced_usage.md index def6c6b96..91fc94f0b 100644 --- a/docs/advanced_usage.md +++ b/docs/advanced_usage.md @@ -4,8 +4,10 @@ **Prometheus exporter** -If you started `rly` with the default `--debug-addr` argument, -you can use `http://$IP:5183/relayer/metrics` as a target for your prometheus scraper. +If you started `rly` with `--enable-metrics-server` argument, +you can use `http://127.0.0.1:5184/relayer/metrics` as a target for your prometheus scraper. + +You can use `--metrics-listen-addr $IP:7777` to customize the address and port where the metrics server. Exported metrics: diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 1988a16fa..af7c5ca21 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -65,11 +65,11 @@ $ rly query clients-expiration ## Inspect Go runtime debug data -If you started `rly` with the default `--debug-addr` argument, -you can open `http://localhost:7597` in your browser to explore details from the Go runtime. +If you started `rly` with `--enable-debug-server` argument, +you can open `http://127.0.0.1:5183` in your browser to explore details from the Go runtime. If you need active assistance from the Relayer development team regarding an unresponsive Relayer instance, -it will be helpful to provide the output from `http://localhost:7597/debug/pprof/goroutine?debug=2` at a minimum. +it will be helpful to provide the output from `http://127.0.0.1:7597/debug/pprof/goroutine?debug=2` at a minimum.
diff --git a/examples/config_EXAMPLE.yaml b/examples/config_EXAMPLE.yaml index 900d9d608..ff78a5145 100644 --- a/examples/config_EXAMPLE.yaml +++ b/examples/config_EXAMPLE.yaml @@ -1,5 +1,6 @@ global: - api-listen-addr: :5183 + debug-listen-addr: 127.0.0.1:5183 + metrics-listen-addr: 127.0.0.1:5184 timeout: 10s memo: "" light-cache-size: 20 diff --git a/interchaintest/relayer.go b/interchaintest/relayer.go index 0aa4b1945..2ece6b085 100644 --- a/interchaintest/relayer.go +++ b/interchaintest/relayer.go @@ -290,7 +290,7 @@ func (r *Relayer) start(ctx context.Context, remainingArgs ...string) { // Start the debug server on a random port. // It won't be reachable without introspecting the output, // but this will allow catching any possible data races around the debug server. - args := append([]string{"start", "--debug-addr", "localhost:0"}, remainingArgs...) + args := append([]string{"start", "--enable-debug-server", "--debug-listen-addr", "localhost:0", "--enable-metrics-server", "--metrics-listen-addr", "localhost:0"}, remainingArgs...) res := r.Sys().RunC(ctx, r.log(), args...) if res.Err != nil { r.errCh <- res.Err diff --git a/internal/relaydebug/debugserver.go b/internal/relaydebug/debugserver.go index d72721a71..3eed17342 100644 --- a/internal/relaydebug/debugserver.go +++ b/internal/relaydebug/debugserver.go @@ -6,16 +6,16 @@ import ( "net/http" "net/http/pprof" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" "go.uber.org/zap" ) +const DebugServerPort = 5183 + // StartDebugServer starts a debug server in a background goroutine, // accepting connections on the given listener. // Any HTTP logging will be written at info level to the given logger. // The server will be forcefully shut down when ctx finishes. -func StartDebugServer(ctx context.Context, log *zap.Logger, ln net.Listener, registry *prometheus.Registry) { +func StartDebugServer(ctx context.Context, log *zap.Logger, ln net.Listener) { // Although we could just import net/http/pprof and rely on the default global server, // we may want many instances of this in test, // and we will probably want more endpoints as time goes on, @@ -33,12 +33,6 @@ func StartDebugServer(ctx context.Context, log *zap.Logger, ln net.Listener, reg // so operators don't see a mysterious 404 page. mux.Handle("/", http.RedirectHandler("/debug/pprof", http.StatusSeeOther)) - // Serve default prometheus metrics - mux.Handle("/metrics", promhttp.Handler()) - - // Serve relayer metrics - mux.Handle("/relayer/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{})) - srv := &http.Server{ Handler: mux, ErrorLog: zap.NewStdLog(log), diff --git a/internal/relayermetrics/metricsserver.go b/internal/relayermetrics/metricsserver.go new file mode 100644 index 000000000..fc2183fe4 --- /dev/null +++ b/internal/relayermetrics/metricsserver.go @@ -0,0 +1,43 @@ +package relayermetrics + +import ( + "context" + "net" + "net/http" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + "go.uber.org/zap" +) + +const MetricsServerPort = 5184 + +// StartMetricsServer starts a metrics server in a background goroutine, +// accepting connections on the given listener. +// Any HTTP logging will be written at info level to the given logger. +// The server will be forcefully shut down when ctx finishes. +func StartMetricsServer(ctx context.Context, log *zap.Logger, ln net.Listener, registry *prometheus.Registry) { + // Set up new mux identical to the default mux configuration in net/http/pprof. + mux := http.NewServeMux() + + // Serve default prometheus metrics + mux.Handle("/metrics", promhttp.Handler()) + + // Serve relayer metrics + mux.Handle("/relayer/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{})) + + srv := &http.Server{ + Handler: mux, + ErrorLog: zap.NewStdLog(log), + BaseContext: func(net.Listener) context.Context { + return ctx + }, + } + + go srv.Serve(ln) + + go func() { + <-ctx.Done() + srv.Close() + }() +} diff --git a/internal/relayertest/system.go b/internal/relayertest/system.go index 59daad624..88a95943b 100644 --- a/internal/relayertest/system.go +++ b/internal/relayertest/system.go @@ -106,6 +106,19 @@ func (s *System) MustRunWithInput(t *testing.T, in io.Reader, args ...string) Ru return res } +func (s *System) MustRunWithLogger(t *testing.T, logger *zap.Logger, args ...string) RunResult { + t.Helper() + + res := s.RunWithInput(logger, bytes.NewReader(nil), args...) + if res.Err != nil { + t.Logf("Error executing %v: %v", args, res.Err) + t.Logf("Stdout: %q", res.Stdout.String()) + t.Logf("Stderr: %q", res.Stderr.String()) + } + + return res +} + // MustAddChain serializes pcw to disk and calls "chains add --file". func (s *System) MustAddChain(t *testing.T, chainName string, pcw cmd.ProviderConfigWrapper) { t.Helper()