Skip to content

Commit

Permalink
Merge pull request #16848 from sosiouxme/20171012-NetworkCheck-interrupt
Browse files Browse the repository at this point in the history
Automatic merge from submit-queue (batch tested with PRs 16848, 16874).

Fix some diagnostic error handling (NetworkCheck and DiagnosticPod)

Fixes #16847

A keyboard interrupt on the NetworkCheck diagnostic will actually abort it (giving it a chance to clean up) and proceed to the next diagnostic.

The same is done for DiagnosticPod (which previously did not catch the signal and cleanup at all).
  • Loading branch information
openshift-merge-robot authored Oct 16, 2017
2 parents 7f10b2d + 4c90bb6 commit 7e74be9
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 22 deletions.
40 changes: 31 additions & 9 deletions pkg/diagnostics/client/run_diagnostics_pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ package client
import (
"bufio"
"fmt"
"os"
"os/signal"
"regexp"
"strconv"
"syscall"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -51,11 +54,11 @@ func (d *DiagnosticPod) CanRun() (bool, error) {
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d *DiagnosticPod) Check() types.DiagnosticResult {
r := types.NewDiagnosticResult("DiagnosticPod")
d.runDiagnosticPod(nil, r)
d.runDiagnosticPod(r)
return r
}

func (d *DiagnosticPod) runDiagnosticPod(service *kapi.Service, r types.DiagnosticResult) {
func (d *DiagnosticPod) runDiagnosticPod(r types.DiagnosticResult) {
loglevel := d.Level
if loglevel > 2 {
loglevel = 2 // need to show summary at least
Expand All @@ -78,14 +81,33 @@ func (d *DiagnosticPod) runDiagnosticPod(service *kapi.Service, r types.Diagnost
r.Error("DCli2001", err, fmt.Sprintf("Creating diagnostic pod with image %s failed. Error: (%[2]T) %[2]v", imageName, err))
return
}
defer func() { // delete what we created, or notify that we couldn't
zero := int64(0)
delOpts := metav1.DeleteOptions{TypeMeta: pod.TypeMeta, GracePeriodSeconds: &zero}
if err := d.KubeClient.Core().Pods(d.Namespace).Delete(pod.ObjectMeta.Name, &delOpts); err != nil {
r.Error("DCl2002", err, fmt.Sprintf("Deleting diagnostic pod '%s' failed. Error: %s", pod.ObjectMeta.Name, fmt.Sprintf("(%T) %[1]s", err)))
}

// Jump straight to clean up if there is an interrupt/terminate signal while running diagnostic
done := make(chan bool, 1)
sig := make(chan os.Signal, 1)
signal.Notify(sig, os.Interrupt, syscall.SIGTERM)
go func() {
<-sig
r.Warn("DCli2014", nil, "Interrupt received; aborting diagnostic.")
done <- true
}()
pod, err = d.KubeClient.Core().Pods(d.Namespace).Get(pod.ObjectMeta.Name, metav1.GetOptions{}) // status is filled in post-create
go func() {
d.processDiagnosticPodResults(pod, imageName, r)
done <- true
}()

<-done
signal.Stop(sig)
// delete what we created, or notify that we couldn't
zero := int64(0)
delOpts := metav1.DeleteOptions{TypeMeta: pod.TypeMeta, GracePeriodSeconds: &zero}
if err := d.KubeClient.Core().Pods(d.Namespace).Delete(pod.ObjectMeta.Name, &delOpts); err != nil {
r.Error("DCl2002", err, fmt.Sprintf("Deleting diagnostic pod '%s' failed. Error: %s", pod.ObjectMeta.Name, fmt.Sprintf("(%T) %[1]s", err)))
}
}

func (d *DiagnosticPod) processDiagnosticPodResults(protoPod *kapi.Pod, imageName string, r types.DiagnosticResult) {
pod, err := d.KubeClient.Core().Pods(d.Namespace).Get(protoPod.ObjectMeta.Name, metav1.GetOptions{}) // status is filled in post-create
if err != nil {
r.Error("DCli2003", err, fmt.Sprintf("Retrieving the diagnostic pod definition failed. Error: (%T) %[1]v", err))
return
Expand Down
31 changes: 18 additions & 13 deletions pkg/diagnostics/network/run_pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func (d *NetworkDiagnostic) Check() types.DiagnosticResult {
return d.res
}
if !ok {
d.res.Warn("DNet2002", nil, "Skipping network diagnostics check. Reason: Not using openshift network plugin.")
d.res.Info("DNet2002", "Skipping network diagnostics check. Reason: Not using openshift network plugin.")
return d.res
}

Expand All @@ -99,22 +99,27 @@ func (d *NetworkDiagnostic) Check() types.DiagnosticResult {
return d.res
}

d.runNetworkDiagnostic()
// Abort and clean up if there is an interrupt/terminate signal while running network diagnostics
done := make(chan bool, 1)
sig := make(chan os.Signal, 1)
signal.Notify(sig, os.Interrupt, syscall.SIGTERM)
go func() {
<-sig
d.res.Warn("DNet2014", nil, "Interrupt received; aborting network diagnostic.")
done <- true
}()
go func() {
d.runNetworkDiagnostic()
done <- true
}()
<-done
signal.Stop(sig)
d.Cleanup()

return d.res
}

func (d *NetworkDiagnostic) runNetworkDiagnostic() {
// Do clean up if there is an interrupt/terminate signal while running network diagnostics
c := make(chan os.Signal, 2)
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
go func() {
<-c
d.Cleanup()
}()

defer func() {
d.Cleanup()
}()
// Setup test environment
if err := d.TestSetup(); err != nil {
d.res.Error("DNet2005", err, fmt.Sprintf("Setting up test environment for network diagnostics failed: %v", err))
Expand Down

0 comments on commit 7e74be9

Please sign in to comment.