Skip to content

Commit

Permalink
testPodSandboxCreation: skip sandbox errors for pods which were not d…
Browse files Browse the repository at this point in the history
…eleted during network update

"pods should successfully create sandboxes" test should mark pod events as flakes if network is being updated. These pods eventually get created, so the test is too strict
  • Loading branch information
vrutkovs committed Jun 8, 2021
1 parent 98086d5 commit 124818d
Showing 1 changed file with 17 additions and 2 deletions.
19 changes: 17 additions & 2 deletions pkg/synthetictests/networking.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"strings"
"time"

"github.com/openshift/origin/pkg/monitor/intervalcreation"
"github.com/openshift/origin/pkg/monitor/monitorapi"

"github.com/openshift/origin/pkg/test/ginkgo"
Expand Down Expand Up @@ -44,16 +45,30 @@ func testPodSandboxCreation(events monitorapi.Intervals) []*ginkgo.JUnitTestCase
}
deletionTime := getPodDeletionTime(eventsForPods[event.Locator], event.Locator)
if deletionTime == nil {
// mark sandboxes errors as flakes if networking is being updated
// these pods eventually get created
operatorsProgressing := intervalcreation.IntervalsFromEvents_OperatorProgressing(events, event.From, event.To)
networkProgressing := false
for _, ev := range operatorsProgressing {
if strings.Contains(ev.Message, "clusteroperator/network") || strings.Contains(ev.Message, "clusteroperator/machine-config") {
networkProgressing = true
break
}
}
// this indicates a failure to create the sandbox that should not happen
failures = append(failures, fmt.Sprintf("%v - never deleted - %v", event.Locator, event.Message))
if networkProgressing {
flakes = append(flakes, fmt.Sprintf("%v - never deleted - network rollout - %v", event.Locator, event.Message))
} else {
failures = append(failures, fmt.Sprintf("%v - never deleted - %v", event.Locator, event.Message))
}
} else {
timeBetweenDeleteAndFailure := event.From.Sub(*deletionTime)
switch {
case timeBetweenDeleteAndFailure < 1*time.Second:
// nothing here, one second is close enough to be ok, the kubelet and CNI just didn't know
case timeBetweenDeleteAndFailure < 5*time.Second:
// withing five seconds, it ought to be long enough to know, but it's close enough to flake and not fail
flakes = append(failures, fmt.Sprintf("%v - %0.2f seconds after deletion - %v", event.Locator, timeBetweenDeleteAndFailure.Seconds(), event.Message))
flakes = append(flakes, fmt.Sprintf("%v - %0.2f seconds after deletion - %v", event.Locator, timeBetweenDeleteAndFailure.Seconds(), event.Message))
case deletionTime.Before(event.From):
// something went wrong. More than five seconds after the pod ws deleted, the CNI is trying to set up pod sandboxes and can't
failures = append(failures, fmt.Sprintf("%v - %0.2f seconds after deletion - %v", event.Locator, timeBetweenDeleteAndFailure.Seconds(), event.Message))
Expand Down

0 comments on commit 124818d

Please sign in to comment.