Skip to content

Commit

Permalink
Add separate readiness endpoints that are more focused on network rea… (
Browse files Browse the repository at this point in the history
  • Loading branch information
cmmarslender authored Nov 28, 2024
1 parent 0d7a179 commit 8299426
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 3 deletions.
1 change: 1 addition & 0 deletions cmd/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ var serveCmd = &cobra.Command{
}

go h.DNSCheckLoop()
go h.FullNodeCheckLoop()

log.Fatalln(h.StartServer())
},
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/chia-network/chia-healthcheck

go 1.19
go 1.21

require (
github.com/chia-network/go-chia-libs v0.6.0
Expand Down
8 changes: 7 additions & 1 deletion internal/healthcheck/dns.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,18 @@ func (h *Healthcheck) DNSCheckLoop() {
log.Println("Received NO IPs. Not Ready!")
}()

time.Sleep(30 * time.Second)
time.Sleep(min(30*time.Second, viper.GetDuration("healthcheck-threshold")/2))
}
}

// seederHealthcheck endpoint for the seeder service as a whole (Are we sending DNS responses)
func (h *Healthcheck) seederHealthcheck() func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
timeMetricHealthcheckHelper(h.lastDNSTimeGT1, w, r)
}
}

func (h *Healthcheck) seederReadiness() func(w http.ResponseWriter, r *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
timeMetricHealthcheckHelper(h.lastDNSTime, w, r)
}
Expand Down
28 changes: 28 additions & 0 deletions internal/healthcheck/fullnode.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"time"

log "github.com/sirupsen/logrus"
"github.com/spf13/viper"

"github.com/chia-network/go-chia-libs/pkg/types"
)
Expand Down Expand Up @@ -34,9 +35,36 @@ func (h *Healthcheck) fullNodeReceive(resp *types.WebsocketResponse) {
h.lastHeightTime = time.Now()
}

// FullNodeCheckLoop runs a loop checking if full node ports are open
func (h *Healthcheck) FullNodeCheckLoop() {
for {
func() {
if !isPortOpen(viper.GetString("hostname"), h.chiaConfig.FullNode.Port) {
log.Errorf("Full node port %d is not open", h.chiaConfig.FullNode.Port)
return
}
if !isPortOpen(viper.GetString("hostname"), h.chiaConfig.FullNode.RPCPort) {
log.Errorf("Full node RPC port %d is not open", h.chiaConfig.FullNode.RPCPort)
return
}
h.lastFullNodeActivity = time.Now()
}()

// Loop every thirty seconds, or healthcheckthreshold/2 if the threshold is less than 15seconds
time.Sleep(min(30*time.Second, viper.GetDuration("healthcheck-threshold")/2))
}
}

// Healthcheck endpoint for the full node service as a whole
func (h *Healthcheck) fullNodeHealthcheck() func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
timeMetricHealthcheckHelper(h.lastHeightTime, w, r)
}
}

// Healthcheck endpoint for the full node service as a whole
func (h *Healthcheck) fullNodeReadiness() func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
timeMetricHealthcheckHelper(h.lastFullNodeActivity, w, r)
}
}
29 changes: 28 additions & 1 deletion internal/healthcheck/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package healthcheck

import (
"fmt"
"net"
"net/http"
"net/url"
"time"

"github.com/chia-network/go-chia-libs/pkg/config"
log "github.com/sirupsen/logrus"
"github.com/spf13/viper"

Expand All @@ -17,15 +19,21 @@ import (
type Healthcheck struct {
healthcheckPort uint16
client *rpc.Client
chiaConfig *config.ChiaConfig

// Last block height we received
lastHeight uint32

// Time we received the last block height
lastHeightTime time.Time

// Last full node activity
lastFullNodeActivity time.Time

// Last time we got a successful DNS response
lastDNSTime time.Time
// Last time we got a successful DNS response with at least one peer
lastDNSTimeGT1 time.Time

// Time we got a good response from the timelord
lastTimelordTime time.Time
Expand All @@ -41,7 +49,12 @@ func NewHealthcheck(port uint16, logLevel log.Level) (*Healthcheck, error) {

log.SetLevel(logLevel)

healthcheck.client, err = rpc.NewClient(rpc.ConnectionModeWebsocket, rpc.WithAutoConfig(), rpc.WithBaseURL(&url.URL{
chiaConfig, err := config.GetChiaConfig()
if err != nil {
return nil, err
}
healthcheck.chiaConfig = chiaConfig
healthcheck.client, err = rpc.NewClient(rpc.ConnectionModeWebsocket, rpc.WithManualConfig(*chiaConfig), rpc.WithBaseURL(&url.URL{
Scheme: "wss",
Host: viper.GetString("hostname"),
}))
Expand Down Expand Up @@ -75,8 +88,11 @@ func (h *Healthcheck) StartServer() error {
log.Printf("Starting healthcheck server on port %d", h.healthcheckPort)

http.HandleFunc("/full_node", h.fullNodeHealthcheck())
http.HandleFunc("/full_node/readiness", h.fullNodeReadiness())
http.HandleFunc("/seeder", h.seederHealthcheck())
http.HandleFunc("/seeder/readiness", h.seederReadiness())
http.HandleFunc("/timelord", h.timelordHealthcheck())
http.HandleFunc("/timelord/readiness", h.timelordHealthcheck())
return http.ListenAndServe(fmt.Sprintf(":%d", h.healthcheckPort), nil)
}

Expand Down Expand Up @@ -141,3 +157,14 @@ func timeMetricHealthcheckHelper(lastTime time.Time, w http.ResponseWriter, r *h
}
}
}

func isPortOpen(host string, port uint16) bool {
address := fmt.Sprintf("%s:%d", host, port)
conn, err := net.DialTimeout("tcp", address, 5*time.Second)
if err != nil {
// Port is not open or the host is unreachable
return false
}
_ = conn.Close()
return true
}

0 comments on commit 8299426

Please sign in to comment.