Skip to content

Commit

Permalink
Merge pull request #26208 from vrutkovs/sandboxes-neverdeleted-networ…
Browse files Browse the repository at this point in the history
…k-update

Bug 1970315: testPodSandboxCreation: skip sandbox errors for pods which were not deleted during network update
  • Loading branch information
openshift-merge-robot authored Jul 1, 2021
2 parents ca0a867 + 54660ee commit 7a95251
Showing 1 changed file with 21 additions and 3 deletions.
24 changes: 21 additions & 3 deletions pkg/synthetictests/networking.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"strings"
"time"

"github.com/openshift/origin/pkg/monitor/intervalcreation"
"github.com/openshift/origin/pkg/monitor/monitorapi"

"github.com/openshift/origin/pkg/test/ginkgo"
Expand Down Expand Up @@ -33,6 +34,10 @@ func testPodSandboxCreation(events monitorapi.Intervals) []*ginkgo.JUnitTestCase

failures := []string{}
flakes := []string{}
operatorsProgressing := intervalcreation.IntervalsFromEvents_OperatorProgressing(events, time.Time{}, time.Time{})
networkOperatorProgressing := operatorsProgressing.Filter(func(ev monitorapi.EventInterval) bool {
return ev.Locator == "clusteroperator/network" || ev.Locator == "clusteroperator/machine-config"
})
eventsForPods := getEventsByPod(events)
for _, event := range events {
if !strings.Contains(event.Message, "reason/FailedCreatePodSandBox Failed to create pod sandbox") {
Expand All @@ -48,16 +53,29 @@ func testPodSandboxCreation(events monitorapi.Intervals) []*ginkgo.JUnitTestCase
}
deletionTime := getPodDeletionTime(eventsForPods[event.Locator], event.Locator)
if deletionTime == nil {
// this indicates a failure to create the sandbox that should not happen
failures = append(failures, fmt.Sprintf("%v - never deleted - %v", event.Locator, event.Message))
// mark sandboxes errors as flakes if networking is being updated
match := -1
for i := range networkOperatorProgressing {
matchesFrom := event.From.After(networkOperatorProgressing[i].From)
matchesTo := event.To.Before(networkOperatorProgressing[i].To)
if matchesFrom && matchesTo {
match = i
break
}
}
if match != -1 {
flakes = append(flakes, fmt.Sprintf("%v - never deleted - network rollout - %v", event.Locator, event.Message))
} else {
failures = append(failures, fmt.Sprintf("%v - never deleted - %v", event.Locator, event.Message))
}
} else {
timeBetweenDeleteAndFailure := event.From.Sub(*deletionTime)
switch {
case timeBetweenDeleteAndFailure < 1*time.Second:
// nothing here, one second is close enough to be ok, the kubelet and CNI just didn't know
case timeBetweenDeleteAndFailure < 5*time.Second:
// withing five seconds, it ought to be long enough to know, but it's close enough to flake and not fail
flakes = append(failures, fmt.Sprintf("%v - %0.2f seconds after deletion - %v", event.Locator, timeBetweenDeleteAndFailure.Seconds(), event.Message))
flakes = append(flakes, fmt.Sprintf("%v - %0.2f seconds after deletion - %v", event.Locator, timeBetweenDeleteAndFailure.Seconds(), event.Message))
case deletionTime.Before(event.From):
// something went wrong. More than five seconds after the pod ws deleted, the CNI is trying to set up pod sandboxes and can't
failures = append(failures, fmt.Sprintf("%v - %0.2f seconds after deletion - %v", event.Locator, timeBetweenDeleteAndFailure.Seconds(), event.Message))
Expand Down

0 comments on commit 7a95251

Please sign in to comment.