Skip to content

Commit

Permalink
Node health (#1203)
Browse files Browse the repository at this point in the history
* chore: link to SSV API docs in configs & README

* initial commit

* add node health route to ssv API

* update health route

* update health route

* update health route

* update health route

* deploy to stage

* add plaintext response

* lint

* lint

* change to good/bad

* lint

* lint

* refactor

* Revert "deploy to stage"

This reverts commit 2f54f4e.

* lint

* add inbound/outbound count for health + deploy to stage

* change ports back

* update count

* lint

* update conns

* lint

* remove connected peer count

* test blocked ports

* Revert "remove connected peer count"

This reverts commit 79e2b94.

* leave only active peers count

* Revert "test blocked ports"

This reverts commit 6fc9282.

* ci to stage

* add mutex to nodes access

* refactor: node health API (#1222)

* refactor: node health API

* added cpu_cores to healthcheck output

* fix inbound/outbound stats

* Remove CPU core reporting

---------

Co-authored-by: moshe-blox <moshe@blox.io>
Co-authored-by: Matus Kysel <matus@blox.io>
Co-authored-by: moshe-blox <89339422+moshe-blox@users.noreply.github.com>
  • Loading branch information
4 people authored Dec 21, 2023
1 parent f452a79 commit 84582f9
Show file tree
Hide file tree
Showing 10 changed files with 214 additions and 21 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ to run validators in a decentralized and trustless way.
The following documents contain instructions and information on how to get started:
* [Operator Node Installation](https://docs.ssv.network/run-a-node/operator-node/installation)
* [Developers' Guide](./docs/DEV_GUIDE.md)
* [SSV API Docs](https://bloxapp.github.io/ssv/)

## Contribution

Expand Down
129 changes: 113 additions & 16 deletions api/handlers/node.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
package handlers

import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"

"github.com/bloxapp/ssv/api"
networkpeers "github.com/bloxapp/ssv/network/peers"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
ma "github.com/multiformats/go-multiaddr"
manet "github.com/multiformats/go-multiaddr/net"

"github.com/bloxapp/ssv/api"
networkpeers "github.com/bloxapp/ssv/network/peers"
"github.com/bloxapp/ssv/nodeprobe"
)

const healthyPeerCount = 30

type TopicIndex interface {
PeersByTopic() ([]peer.ID, map[string][]peer.ID)
}
Expand Down Expand Up @@ -44,10 +54,43 @@ type identityJSON struct {
Version string `json:"version"`
}

type healthStatus struct {
err error
}

func (h healthStatus) MarshalJSON() ([]byte, error) {
if h.err == nil {
return json.Marshal("good")
}
return json.Marshal(fmt.Sprintf("bad: %s", h.err.Error()))
}

type healthCheckJSON struct {
P2P healthStatus `json:"p2p"`
BeaconNode healthStatus `json:"beacon_node"`
ExecutionNode healthStatus `json:"execution_node"`
EventSyncer healthStatus `json:"event_syncer"`
Advanced struct {
Peers int `json:"peers"`
InboundConns int `json:"inbound_conns"`
OutboundConns int `json:"outbound_conns"`
ListenAddresses []string `json:"p2p_listen_addresses"`
} `json:"advanced"`
}

func (hc healthCheckJSON) String() string {
b, err := json.MarshalIndent(hc, "", " ")
if err != nil {
return fmt.Sprintf("error marshalling healthCheckJSON: %s", err.Error())
}
return string(b)
}

type Node struct {
PeersIndex networkpeers.Index
TopicIndex TopicIndex
Network network.Network
NodeProber *nodeprobe.Prober
}

func (h *Node) Identity(w http.ResponseWriter, r *http.Request) error {
Expand All @@ -65,6 +108,73 @@ func (h *Node) Identity(w http.ResponseWriter, r *http.Request) error {

func (h *Node) Peers(w http.ResponseWriter, r *http.Request) error {
peers := h.Network.Peers()
resp := h.peers(peers)
return api.Render(w, r, resp)
}

func (h *Node) Topics(w http.ResponseWriter, r *http.Request) error {
peers, byTopic := h.TopicIndex.PeersByTopic()

resp := AllPeersAndTopicsJSON{
AllPeers: peers,
}
for topic, peers := range byTopic {
resp.PeersByTopic = append(resp.PeersByTopic, topicIndexJSON{TopicName: topic, Peers: peers})
}

return api.Render(w, r, resp)
}

func (h *Node) Health(w http.ResponseWriter, r *http.Request) error {
ctx := context.Background()
var resp healthCheckJSON

// Retrieve P2P listen addresses.
for _, addr := range h.Network.ListenAddresses() {
if addr.String() == "/p2p-circuit" || addr.Decapsulate(ma.StringCast("/ip4/0.0.0.0")) == nil {
// Skip circuit and non-IP4 addresses.
continue
}
netAddr, err := manet.ToNetAddr(addr)
if err != nil {
return fmt.Errorf("failed to convert multiaddr to net.Addr: %w", err)
}
resp.Advanced.ListenAddresses = append(resp.Advanced.ListenAddresses, netAddr.String())
}

// Count peers and connections.
peers := h.Network.Peers()
for _, p := range h.peers(peers) {
if p.Connectedness == network.Connected.String() {
resp.Advanced.Peers++
}
for _, conn := range p.Connections {
if conn.Direction == network.DirInbound.String() {
resp.Advanced.InboundConns++
} else {
resp.Advanced.OutboundConns++
}
}
}

// Report whether P2P is healthy.
if resp.Advanced.Peers == 0 {
resp.P2P = healthStatus{errors.New("no peers are connected")}
} else if resp.Advanced.Peers < healthyPeerCount {
resp.P2P = healthStatus{errors.New("not enough connected peers")}
} else if resp.Advanced.InboundConns == 0 {
resp.P2P = healthStatus{errors.New("not enough inbound connections, port is likely not reachable")}
}

// Check the health of Ethereum nodes and EventSyncer.
resp.BeaconNode = healthStatus{h.NodeProber.CheckBeaconNodeHealth(ctx)}
resp.ExecutionNode = healthStatus{h.NodeProber.CheckExecutionNodeHealth(ctx)}
resp.EventSyncer = healthStatus{(h.NodeProber.CheckEventSyncerHealth(ctx))}

return api.Render(w, r, resp)
}

func (h *Node) peers(peers []peer.ID) []peerJSON {
resp := make([]peerJSON, len(peers))
for i, id := range peers {
resp[i] = peerJSON{
Expand All @@ -91,18 +201,5 @@ func (h *Node) Peers(w http.ResponseWriter, r *http.Request) error {
}
resp[i].Version = nodeInfo.Metadata.NodeVersion
}
return api.Render(w, r, resp)
}

func (h *Node) Topics(w http.ResponseWriter, r *http.Request) error {
allpeers, peerbytpc := h.TopicIndex.PeersByTopic()
alland := AllPeersAndTopicsJSON{}
tpcs := []topicIndexJSON{}
for topic, peerz := range peerbytpc {
tpcs = append(tpcs, topicIndexJSON{TopicName: topic, Peers: peerz})
}
alland.AllPeers = allpeers
alland.PeersByTopic = tpcs

return api.Render(w, r, alland)
return resp
}
30 changes: 28 additions & 2 deletions api/handling.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
package api

import (
"fmt"
"net/http"

"github.com/go-chi/render"
"github.com/golang/gddo/httputil"
)

const (
contentTypePlainText = "text/plain"
contentTypeJSON = "application/json"
)

type HandlerFunc func(http.ResponseWriter, *http.Request) error
Expand All @@ -22,7 +29,26 @@ func Handler(h HandlerFunc) http.HandlerFunc {
}
}

// Render negotiates the content type and renders the response, defaulting to JSON.
// Response must implement fmt.Stringer to be rendered as plain text.
func Render(w http.ResponseWriter, r *http.Request, response any) error {
render.JSON(w, r, response)
return nil
// Negotiate content type, defaulting to JSON.
contentType := httputil.NegotiateContentType(
r,
[]string{contentTypePlainText, contentTypeJSON},
contentTypeJSON,
)

switch contentType {
case contentTypePlainText:
// Try rendering as a string, otherwise fallback to JSON.
if stringer, ok := response.(fmt.Stringer); ok {
render.PlainText(w, r, stringer.String())
return nil
}
fallthrough
default:
render.JSON(w, r, response)
return nil
}
}
6 changes: 4 additions & 2 deletions api/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@ import (
"runtime"
"time"

"github.com/bloxapp/ssv/api"
"github.com/bloxapp/ssv/api/handlers"
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
"go.uber.org/zap"

"github.com/bloxapp/ssv/api"
"github.com/bloxapp/ssv/api/handlers"
)

type Server struct {
Expand Down Expand Up @@ -44,6 +45,7 @@ func (s *Server) Run() error {
router.Get("/v1/node/identity", api.Handler(s.node.Identity))
router.Get("/v1/node/peers", api.Handler(s.node.Peers))
router.Get("/v1/node/topics", api.Handler(s.node.Topics))
router.Get("/v1/node/health", api.Handler(s.node.Health))
router.Get("/v1/validators", api.Handler(s.validators.List))

s.logger.Info("Serving SSV API", zap.String("addr", s.addr))
Expand Down
1 change: 1 addition & 0 deletions cli/operator/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ var StartNodeCmd = &cobra.Command{
PeersIndex: p2pNetwork.(p2pv1.PeersIndexProvider).PeersIndex(),
Network: p2pNetwork.(p2pv1.HostProvider).Host().Network(),
TopicIndex: p2pNetwork.(handlers.TopicIndex),
NodeProber: nodeProber,
},
&handlers.Validators{
Shares: nodeStorage.Shares(),
Expand Down
6 changes: 5 additions & 1 deletion config/config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,8 @@ p2p:
OperatorPrivateKey:

# This enables monitoring at the specified port, see https://github.com/bloxapp/ssv/tree/main/monitoring
MetricsAPIPort: 15000
MetricsAPIPort: 15000

# This enables the SSV API at the specified port. Refer to the documentation at https://bloxapp.github.io/ssv/
# It's recommended to keep this port private to prevent potential resource-intensive attacks.
# SSVAPIPort: 16000
4 changes: 4 additions & 0 deletions config/config.exporter.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,7 @@ bootnode:

LocalEventsPath: # path to local events. used for running the node with custom local events
WebSocketAPIPort: 16000

# This enables the SSV API at the specified port. Refer to the documentation at https://bloxapp.github.io/ssv/
# It's recommended to keep this port private to prevent potential resource-intensive attacks.
# SSVAPIPort: 16000
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ require (
github.com/ferranbt/fastssz v0.1.3
github.com/go-chi/chi/v5 v5.0.8
github.com/go-chi/render v1.0.2
github.com/golang/gddo v0.0.0-20200528160355-8d077c1d8f4c
github.com/golang/mock v1.6.0
github.com/google/uuid v1.3.0
github.com/gorilla/websocket v1.5.0
Expand Down
Loading

0 comments on commit 84582f9

Please sign in to comment.