diff --git a/server/etcdserver/api/etcdhttp/metrics.go b/server/etcdserver/api/etcdhttp/metrics.go index 48799e18a301..b369b01fd81a 100644 --- a/server/etcdserver/api/etcdhttp/metrics.go +++ b/server/etcdserver/api/etcdhttp/metrics.go @@ -47,7 +47,9 @@ func HandleMetricsHealth(lg *zap.Logger, mux *http.ServeMux, srv etcdserver.Serv // and its corresponding timeout. func HandleMetricsHealthForV3(lg *zap.Logger, mux *http.ServeMux, srv *etcdserver.EtcdServer) { mux.Handle(PathMetrics, promhttp.Handler()) - mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health { return checkV3Health(lg, srv, excludedAlarms, serializable) })) + mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health { + return checkV3Health(lg, srv, excludedAlarms, serializable) + })) } // HandlePrometheus registers prometheus handler on '/metrics'. @@ -65,8 +67,13 @@ func NewHealthHandler(lg *zap.Logger, hfunc func(excludedAlarms AlarmSet, Serial return } excludedAlarms := getExcludedAlarms(r) - // when the query parameter "consistency=s" is provided, then etcdserver should only check local etcd member's health - // instead of the etcd cluster's health. + // Kubernetes Probes (i.e. livenessProbe) use "/health" endpoint to make a decision whether to restart a specific container. + // In this case, it should only check local etcd member's health instead of etcd cluster's health. When the cluster isn't healthy, + // such as no raft leader, restarting the local etcd member cannot help, and it could even make the situation worse. So the endpoint + // should provide an option to let users choose to check the etcd cluster's health or local etcd member's health. + // The default behaviour is to check the etcd cluster's health so as to be backward compatible. + // When the query parameter "serializable=true" is provided, then etcdserver should only do serializable read, which means + // it checks local etcd member's health instead of the etcd cluster's health. serializableFlag := getSerializableFlag(r) h := hfunc(excludedAlarms, serializableFlag) defer func() { @@ -132,9 +139,9 @@ func getExcludedAlarms(r *http.Request) (alarms AlarmSet) { } func getSerializableFlag(r *http.Request) bool { - getConsistency, found := r.URL.Query()["consistency"] + getConsistency, found := r.URL.Query()["serializable"] if found { - if len(getConsistency) == 1 && getConsistency[0] == "s" { + if len(getConsistency) == 1 && getConsistency[0] == "true" { return true } }