Skip to content

Commit

Permalink
feat: pinger can return exit code when failed
Browse files Browse the repository at this point in the history
Use this feature to add e2e test later.
  • Loading branch information
oilbeater committed Nov 9, 2020
1 parent deef10a commit aa86e40
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 37 deletions.
7 changes: 5 additions & 2 deletions pkg/pinger/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ type Configuration struct {
DaemonSetName string
Interval int
Mode string
ExitCode int
InternalDNS string
ExternalDNS string
NodeName string
Expand Down Expand Up @@ -54,6 +55,7 @@ func ParseFlags() (*Configuration, error) {
argDaemonSetName = pflag.String("ds-name", "kube-ovn-pinger", "kube-ovn-pinger daemonset name")
argInterval = pflag.Int("interval", 5, "interval seconds between consecutive pings")
argMode = pflag.String("mode", "server", "server or job Mode")
argExitCode = pflag.Int("exit-code", 0, "exit code when failure happens")
argInternalDns = pflag.String("internal-dns", "kubernetes.default", "check dns from pod")
argExternalDns = pflag.String("external-dns", "alauda.cn", "check external dns resolve from pod")
argExternalAddress = pflag.String("external-address", "114.114.114.114", "check ping connection to an external address, default empty that will disable external check")
Expand Down Expand Up @@ -101,6 +103,7 @@ func ParseFlags() (*Configuration, error) {
DaemonSetName: *argDaemonSetName,
Interval: *argInterval,
Mode: *argMode,
ExitCode: *argExitCode,
InternalDNS: *argInternalDns,
ExternalDNS: *argExternalDns,
PodIP: os.Getenv("POD_IP"),
Expand Down Expand Up @@ -136,10 +139,10 @@ func ParseFlags() (*Configuration, error) {
}
for _, arg := range ds.Spec.Template.Spec.Containers[0].Command {
arg = strings.Trim(arg, "\"")
if strings.HasPrefix(arg, "--external-address=") {
if config.ExternalAddress == "114.114.114.114" && strings.HasPrefix(arg, "--external-address=") {
config.ExternalAddress = strings.TrimPrefix(arg, "--external-address=")
}
if strings.HasPrefix(arg, "--external-dns=") {
if config.ExternalDNS == "alauda.cn" && strings.HasPrefix(arg, "--external-dns=") {
config.ExternalDNS = strings.TrimPrefix(arg, "--external-dns=")
}
}
Expand Down
20 changes: 11 additions & 9 deletions pkg/pinger/ovn.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,39 +9,40 @@ import (
"strings"
)

func checkOvs(config *Configuration) {
func checkOvs(config *Configuration) error {
output, err := exec.Command("/usr/share/openvswitch/scripts/ovs-ctl", "status").CombinedOutput()
if err != nil {
klog.Errorf("check ovs status failed %v, %s", err, string(output))
SetOvsDownMetrics(config.NodeName)
return
return err
}
klog.Infof("ovs-vswitchd and ovsdb are up")
SetOvsUpMetrics(config.NodeName)
return
return nil
}

func checkOvnController(config *Configuration) {
func checkOvnController(config *Configuration) error {
output, err := exec.Command("/usr/share/ovn/scripts/ovn-ctl", "status_controller").CombinedOutput()
if err != nil {
klog.Errorf("check ovn_controller status failed %v, %q", err, output)
SetOvnControllerDownMetrics(config.NodeName)
return
return err
}
klog.Infof("ovn_controller is up")
SetOvnControllerUpMetrics(config.NodeName)
return nil
}

func checkPortBindings(config *Configuration) {
func checkPortBindings(config *Configuration) error {
klog.Infof("start to check port binding")
ovsBindings, err := checkOvsBindings()
if err != nil {
return
return err
}

sbBindings, err := checkSBBindings(config)
if err != nil {
return
return err
}
klog.Infof("port in sb is %v", sbBindings)
misMatch := []string{}
Expand All @@ -53,11 +54,12 @@ func checkPortBindings(config *Configuration) {
if len(misMatch) > 0 {
klog.Errorf("%d port %v not exist in sb-bindings", len(misMatch), misMatch)
inconsistentPortBindingGauge.WithLabelValues(config.NodeName).Set(float64(len(misMatch)))
return fmt.Errorf("%d port %v not exist in sb-bindings", len(misMatch), misMatch)
} else {
klog.Infof("ovs and ovn-sb binding check passed")
inconsistentPortBindingGauge.WithLabelValues(config.NodeName).Set(0)
}
return
return nil
}

func checkOvsBindings() ([]string, error) {
Expand Down
92 changes: 66 additions & 26 deletions pkg/pinger/ping.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package pinger

import (
"context"
"fmt"
"math"
"net"
"os"
"time"

goping "github.com/oilbeater/go-ping"
Expand All @@ -14,51 +16,73 @@ import (
)

func StartPinger(config *Configuration, e *Exporter) {
errHappens := false
for {
if config.NetworkMode == "kube-ovn" {
checkOvs(config)
checkOvnController(config)
checkPortBindings(config)
if checkOvs(config) != nil ||
checkOvnController(config) != nil ||
checkPortBindings(config) != nil {
errHappens = true
}
e.ovsMetricsUpdate()
}

ping(config)
if ping(config) != nil {
errHappens = true
}
if config.Mode != "server" {
break
}
time.Sleep(time.Duration(config.Interval) * time.Second)
}
if errHappens && config.ExitCode != 0 {
os.Exit(config.ExitCode)
}
}

func ping(config *Configuration) {
checkApiServer(config)
pingNodes(config)
pingPods(config)
internalNslookup(config)
func ping(config *Configuration) error {
errHappens := false
if checkApiServer(config) != nil ||
pingNodes(config) != nil ||
pingPods(config) != nil ||
internalNslookup(config) != nil {
errHappens = true
}

if config.ExternalDNS != "" {
externalNslookup(config)
if externalNslookup(config) != nil {
errHappens = true
}
}

if config.ExternalAddress != "" {
pingExternal(config)
if pingExternal(config) != nil {
errHappens = true
}
}
if errHappens {
return fmt.Errorf("ping failed")
}
return nil
}

func pingNodes(config *Configuration) {
func pingNodes(config *Configuration) error {
klog.Infof("start to check node connectivity")
nodes, err := config.KubeClient.CoreV1().Nodes().List(metav1.ListOptions{})
if err != nil {
klog.Errorf("failed to list nodes, %v", err)
return
return err
}

var pingErr error
for _, no := range nodes.Items {
for _, addr := range no.Status.Addresses {
if addr.Type == v1.NodeInternalIP {
func(nodeIP, nodeName string) {
pinger, err := goping.NewPinger(nodeIP)
if err != nil {
klog.Errorf("failed to init pinger, %v", err)
pingErr = err
return
}
pinger.SetPrivileged(true)
Expand All @@ -70,6 +94,9 @@ func pingNodes(config *Configuration) {
stats := pinger.Statistics()
klog.Infof("ping node: %s %s, count: %d, loss count %d, average rtt %.2fms",
nodeName, nodeIP, pinger.Count, int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))), float64(stats.AvgRtt)/float64(time.Millisecond))
if int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))) != 0 {
pingErr = fmt.Errorf("ping failed")
}
SetNodePingMetrics(
config.NodeName,
config.HostIP,
Expand All @@ -81,27 +108,30 @@ func pingNodes(config *Configuration) {
}
}
}
return pingErr
}

func pingPods(config *Configuration) {
func pingPods(config *Configuration) error {
klog.Infof("start to check pod connectivity")
ds, err := config.KubeClient.AppsV1().DaemonSets(config.DaemonSetNamespace).Get(config.DaemonSetName, metav1.GetOptions{})
if err != nil {
klog.Errorf("failed to get peer ds: %v", err)
return
return err
}
pods, err := config.KubeClient.CoreV1().Pods(config.DaemonSetNamespace).List(metav1.ListOptions{LabelSelector: labels.Set(ds.Spec.Selector.MatchLabels).String()})
if err != nil {
klog.Errorf("failed to list peer pods: %v", err)
return
return err
}

var pingErr error
for _, pod := range pods.Items {
if pod.Status.PodIP != "" {
func(podIp, podName, nodeIP, nodeName string) {
pinger, err := goping.NewPinger(podIp)
if err != nil {
klog.Errorf("failed to init pinger, %v", err)
pingErr = err
return
}
pinger.SetPrivileged(true)
Expand All @@ -113,6 +143,9 @@ func pingPods(config *Configuration) {
stats := pinger.Statistics()
klog.Infof("ping pod: %s %s, count: %d, loss count %d, average rtt %.2fms",
podName, podIp, pinger.Count, int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))), float64(stats.AvgRtt)/float64(time.Millisecond))
if int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))) != 0 {
pingErr = fmt.Errorf("ping failed")
}
SetPodPingMetrics(
config.NodeName,
config.HostIP,
Expand All @@ -125,17 +158,18 @@ func pingPods(config *Configuration) {
}(pod.Status.PodIP, pod.Name, pod.Status.HostIP, pod.Spec.NodeName)
}
}
return pingErr
}

func pingExternal(config *Configuration) {
func pingExternal(config *Configuration) error {
if config.ExternalAddress == "" {
return
return nil
}
klog.Infof("start to check ping external to %s", config.ExternalAddress)
pinger, err := goping.NewPinger(config.ExternalAddress)
if err != nil {
klog.Errorf("failed to init pinger, %v", err)
return
return err
}
pinger.SetPrivileged(true)
pinger.Timeout = 5 * time.Second
Expand All @@ -153,9 +187,13 @@ func pingExternal(config *Configuration) {
config.ExternalAddress,
float64(stats.AvgRtt)/float64(time.Millisecond),
int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))))
if int(math.Abs(float64(stats.PacketsSent-stats.PacketsRecv))) != 0 {
return fmt.Errorf("ping failed")
}
return nil
}

func internalNslookup(config *Configuration) {
func internalNslookup(config *Configuration) error {
klog.Infof("start to check dns connectivity")
t1 := time.Now()
ctx, cancel := context.WithTimeout(context.TODO(), 10*time.Second)
Expand All @@ -166,13 +204,14 @@ func internalNslookup(config *Configuration) {
if err != nil {
klog.Errorf("failed to resolve dns %s, %v", config.InternalDNS, err)
SetInternalDnsUnhealthyMetrics(config.NodeName)
return
return err
}
SetInternalDnsHealthyMetrics(config.NodeName, float64(elpased)/float64(time.Millisecond))
klog.Infof("resolve dns %s to %v in %.2fms", config.InternalDNS, addrs, float64(elpased)/float64(time.Millisecond))
return nil
}

func externalNslookup(config *Configuration) {
func externalNslookup(config *Configuration) error {
klog.Infof("start to check dns connectivity")
t1 := time.Now()
ctx, cancel := context.WithTimeout(context.TODO(), 10*time.Second)
Expand All @@ -183,23 +222,24 @@ func externalNslookup(config *Configuration) {
if err != nil {
klog.Errorf("failed to resolve dns %s, %v", config.ExternalDNS, err)
SetExternalDnsUnhealthyMetrics(config.NodeName)
return
return err
}
SetExternalDnsHealthyMetrics(config.NodeName, float64(elpased)/float64(time.Millisecond))
klog.Infof("resolve dns %s to %v in %.2fms", config.ExternalDNS, addrs, float64(elpased)/float64(time.Millisecond))
return nil
}

func checkApiServer(config *Configuration) {
func checkApiServer(config *Configuration) error {
klog.Infof("start to check apiserver connectivity")
t1 := time.Now()
_, err := config.KubeClient.Discovery().ServerVersion()
elpased := time.Since(t1)
if err != nil {
klog.Errorf("failed to connect to apiserver: %v", err)
SetApiserverUnhealthyMetrics(config.NodeName)
return
return err
}
klog.Infof("connect to apiserver success in %.2fms", float64(elpased)/float64(time.Millisecond))
SetApiserverHealthyMetrics(config.NodeName, float64(elpased)/float64(time.Millisecond))
return
return nil
}

0 comments on commit aa86e40

Please sign in to comment.