Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support to ignore volumeless pods by Resiliency #137

Merged
merged 8 commits into from
Oct 4, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@
# Sharmila Ramamoorthy (sharmilarama)

# for all files:
* @medegw01 @gallacher @tdawe @alikdell @atye @hoppea2 @coulof @shaynafinocchiaro @lj-software @sharmilarama
* @gallacher @tdawe @alikdell @atye @hoppea2 @coulof @shaynafinocchiaro @sharmilarama @arnchiequ-dell @isaiasA1
2 changes: 2 additions & 0 deletions .github/containerscan/allowedlist.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ general:
- CVE-2021-44568
- CVE-2022-0778
- CVE-2022-2526
- CVE-2022-27664
# Disputed CVEs
- CVE-2019-1010022
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we take this opportunity to review this allowedList for CVEs?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have ubi image upgrade story, we should check in that story. I will add in acceptance criteria

- CVE-2022-26280
- CVE-2018-25032
- CVE-2022-1271
- CVE-2022-2175
- CVE-2021-44716

5 changes: 5 additions & 0 deletions cmd/podmon/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ const (
skipArrayConnectionValidation = false
driverPath = "csi-vxflexos.dellemc.com"
driverConfigParamsDefault = "resources/driver-config-params.yaml"
ignoreVolumelessPods = false
// -- Below are constants for dynamic configuration --
defaultLogLevel = log.DebugLevel
podmonArrayConnectivityPollRate = "PODMON_ARRAY_CONNECTIVITY_POLL_RATE"
Expand Down Expand Up @@ -127,6 +128,7 @@ func main() {
monitor.PodmonTaintKey = fmt.Sprintf("%s.%s", monitor.Driver.GetDriverName(), monitor.PodmonTaintKeySuffix)
monitor.SetArrayConnectivityPollRate(time.Duration(*args.arrayConnectivityPollRate) * time.Second)
monitor.ArrayConnectivityConnectionLossThreshold = *args.arrayConnectivityConnectionLossThreshold
monitor.IgnoreVolumelessPods = *args.ignoreVolumelessPods
err := K8sAPI.Connect(args.kubeconfig)
if err != nil {
log.Errorf("kubernetes connection error: %s", err)
Expand Down Expand Up @@ -211,6 +213,7 @@ type PodmonArgs struct {
driverConfigParamsFile *string // Set the location of the driver ConfigMap
driverPodLabelKey *string // driverPodLabelKey for annotating driver node pods to be watched/processed
driverPodLabelValue *string // driverPodLabelValue value for annotating driver node pods to be watched/processed
ignoreVolumelessPods *bool // Ignore volumeless pods even if those has Resiliency label
}

var args PodmonArgs
Expand All @@ -231,6 +234,7 @@ func getArgs() {
args.driverConfigParamsFile = flag.String("driver-config-params", driverConfigParamsDefault, "Full path to the YAML file containing the driver ConfigMap")
args.driverPodLabelKey = flag.String("driverPodLabelKey", driverPodLabelKey, "label key for pods or other objects to be monitored")
args.driverPodLabelValue = flag.String("driverPodLabelValue", driverPodLabelValue, "label value for pods or other objects to be monitored")
args.ignoreVolumelessPods = flag.Bool("ignoreVolumelessPods", ignoreVolumelessPods, "ingnore volumeless pods even though they have podmon label")
})

// -- For testing purposes. Re-default the values since main will be called multiple times --
Expand All @@ -247,6 +251,7 @@ func getArgs() {
*args.driverConfigParamsFile = driverConfigParamsDefault
*args.driverPodLabelKey = driverPodLabelKey
*args.driverPodLabelValue = driverPodLabelValue
*args.ignoreVolumelessPods = ignoreVolumelessPods
flag.Parse()
}

Expand Down
32 changes: 25 additions & 7 deletions internal/monitor/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ type ControllerPodInfo struct { // information controller keeps on hand about a

const notFound = "not found"
const hostNameTopologyKey = "kubernetes.io/hostname"
const arrayIDVolumeAttribute = "arrayID"
const storageSystemVolumeAttribute = "StorageSystem"
const defaultArray = "default"

// controllerModePodHandler handles controller mode functionality when a pod event happens
func (cm *PodMonitorType) controllerModePodHandler(pod *v1.Pod, eventType watch.EventType) error {
Expand Down Expand Up @@ -104,10 +107,19 @@ func (cm *PodMonitorType) controllerModePodHandler(pod *v1.Pod, eventType watch.
// If ready, we want to save the PodKeyToControllerPodInfo
// It will use these items to clean up pods if the array reports no connectivity.
if ready {
arrayIDs, err := cm.podToArrayIDs(pod)
arrayIDs, pvcCount, err := cm.podToArrayIDs(ctx, pod)
log.Infof("IgnoreVolumelessPods %t pvcCount %d", IgnoreVolumelessPods, pvcCount)
if err != nil {
log.Errorf("Could not determine pod to arrayIDs: %s", err)
} else {
// Do not keep track of Volumeless pods
if IgnoreVolumelessPods && pvcCount == 0 {
log.Infof("podKey %s ignore because Volumeless", podKey)
return nil
}
}
log.Infof("podKey %s pvcCount %d arrayIDs %v", podKey, pvcCount, arrayIDs)

podAffinityLabels := cm.getPodAffinityLabels(pod)
if len(podAffinityLabels) > 0 {
log.Infof("podKey %s podAffinityLabels %v", podKey, podAffinityLabels)
Expand Down Expand Up @@ -192,6 +204,12 @@ func (cm *PodMonitorType) controllerCleanupPod(pod *v1.Pod, node *v1.Node, reaso
return false
}

// ignoreVolumeless pod
if IgnoreVolumelessPods && len(pvlist) == 0 {
log.WithFields(fields).Infof("Ignoring volumeless pod")
return true
}

// Get the volume handles from the PVs
volIDs := make([]string, 0)
for _, pv := range pvlist {
Expand Down Expand Up @@ -366,12 +384,12 @@ func (cm *PodMonitorType) callControllerUnpublishVolume(node *v1.Node, volumeID
return err
}

// podToArrayIDs returns the array IDs used by the pod)
// TODO: multi-array
func (cm *PodMonitorType) podToArrayIDs(pod *v1.Pod) ([]string, error) {
arrayIDs := make([]string, 1)
arrayIDs[0] = "default"
return arrayIDs, nil
// podToArrayIDs returns the array IDs used by the pod, along with pvCount, and error
func (cm *PodMonitorType) podToArrayIDs(ctx context.Context, pod *v1.Pod) ([]string, int, error) {
arrayIDs := make([]string, 0)
pvlist, _ := K8sAPI.GetPersistentVolumesInPod(ctx, pod)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this return an error that is being discarded? Should it not be checked?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we don't need, error will result in empty pvlist. We check for error where we need to, here for now not needed. When we add multi-array support, then we will need to check error and accordingly return error.

arrayIDs = append(arrayIDs, defaultArray)
return arrayIDs, len(pvlist), nil
}

// ArrayConnectivityMonitor -- periodically checks array connectivity to all the nodes using it.
Expand Down
12 changes: 12 additions & 0 deletions internal/monitor/features/controller.feature
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,15 @@ Feature: Controller Monitor
| "node1" | "Ready" | "none" | "Updated" | "false" | "true" |
| "node1" | "NotReady" | "GetPod" | "Updated" | "false" | "false" |
| "node1" | "NotReady" | "GetNode" | "Updated" | "false" | "false" |

@controller-mode
Scenario Outline: test IgnoreVolumelessPods
Given a controller monitor "vxflex"
And a pod for node <podnode> with <nvol> volumes condition <condition> affinity <affin>
When I call controllerModePodHandler with event "Updated"
Then the pod is cleaned <cleaned>

Examples:
| podnode | nvol | condition | affin | error | cleaned | errormsg |
| "node1" | 0 | "Ready" | "true" | "NodeNotConnected" | "false" | "none" |
| "node1" | 0 | "Ready" | "false" | "NodeConnected" | "false" | "Connected true" |
18 changes: 18 additions & 0 deletions internal/monitor/features/node.feature
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,21 @@ Feature: Controller Monitor
| driver | nodeName | pods | vols | devs | cleaned | unMountErr | rmDirErr | taintErr | k8apiErr | errorMsg |
| vxflex | "node1" | 1 | 1 | 1 | 1 | "none" | "none" | "none" | "none" | "none" |
| isilon | "node1" | 1 | 1 | 1 | 1 | "none" | "none" | "none" | "none" | "none" |

@node-mode
Scenario Outline: Testing monitor.nodeModeCleanupPods
Given a controller monitor <driver>
And node <nodeName> env vars set
And I have a <pods> pods for node <nodeName> with <vols> volumes <devs> devices condition ""
And the controller cleaned up <cleaned> pods for node <nodeName>
And I induce error <k8apiErr>
And I induce error <unMountErr>
And I induce error <rmDirErr>
And I induce error <taintErr>
When I call nodeModeCleanupPods for node <nodeName>
And the last log message contains <errorMsg>

Examples:
| driver | nodeName | pods | vols | devs | cleaned | unMountErr | rmDirErr | taintErr | k8apiErr | errorMsg |
| vxflex | "node1" | 1 | 0 | 0 |0 | "none" | "none" | "none" | "none" | "none" |
| vxflex | "node1" | 1 | 0 | 0 |0 | "none" | "none" | "none" | "none" | "none" |
2 changes: 2 additions & 0 deletions internal/monitor/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ var (
dynamicConfigUpdateMutex sync.Mutex
// arrayConnectivityPollRate is the rate it polls to check array connectivity to nodes.
arrayConnectivityPollRate = ShortTimeout
//IgnoreVolumelessPods when set will keep labeled pods with no volumes from being force deleted on node or connectivity failures.
IgnoreVolumelessPods bool
)

// GetArrayConnectivityPollRate returns the array connectivity poll rate.
Expand Down
9 changes: 8 additions & 1 deletion internal/monitor/monitor_steps_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ func (f *feature) aPodForNodeWithVolumesCondition(node string, nvolumes int, con
}

func (f *feature) aPodForNodeWithVolumesConditionAffinity(node string, nvolumes int, condition, affinity string) error {
// To test IgnoreVolumelessPods nvolumes is supplied 0 to induce it volumeless pod
if nvolumes == 0 {
IgnoreVolumelessPods = true
}
pod := f.createPod(node, nvolumes, condition, affinity)
f.pod = pod
f.k8sapiMock.AddPod(pod)
Expand All @@ -158,6 +162,10 @@ func (f *feature) aPodForNodeWithVolumesConditionAffinity(node string, nvolumes

func (f *feature) iHaveAPodsForNodeWithVolumesDevicesCondition(nPods int, nodeName string, nvolumes, ndevices int, condition string) error {
var err error
// To test IgnoreVolumelessPods nvolumes is supplied 0 to induce it volumeless pod
if nvolumes == 0 {
IgnoreVolumelessPods = true
}
f.podList = make([]*v1.Pod, nPods)
mockPaths := make([]string, nPods)
defer func() {
Expand Down Expand Up @@ -1132,7 +1140,6 @@ func MonitorTestScenarioInit(context *godog.ScenarioContext) {
context.Step(`^a controller monitor unity$`, f.aControllerMonitorUnity)
context.Step(`^a controller monitor vxflex$`, f.aControllerMonitorVxflex)
context.Step(`^a controller monitor isilon$`, f.aControllerMonitorisilon)
//context.Step(`^a pod for node "([^"]*)" with (\d+) volumes condition "([^"]*)"$`, f.aPodForNodeWithVolumesCondition)
context.Step(`^a pod for node "([^"]*)" with (\d+) volumes condition "([^"]*)"$`, f.aPodForNodeWithVolumesCondition)
context.Step(`^a pod for node "([^"]*)" with (\d+) volumes condition "([^"]*)" affinity "([^"]*)"$`, f.aPodForNodeWithVolumesConditionAffinity)
context.Step(`^I call controllerCleanupPod for node "([^"]*)"$`, f.iCallControllerCleanupPodForNode)
Expand Down
6 changes: 6 additions & 0 deletions internal/monitor/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,12 @@ func (pm *PodMonitorType) nodeModeCleanupPods(node *v1.Node) bool {

// Check containers to make sure they're not running. This uses the containerInfos map obtained above.
pod := podInfo.Pod
// Get the PVs associated with this pod.
pvlist, _ := K8sAPI.GetPersistentVolumesInPod(ctx, pod)
if IgnoreVolumelessPods && len(pvlist) == 0 {
// Ignore pod without pvcs
return true
}
for _, containerStatus := range pod.Status.ContainerStatuses {
containerID := containerStatus.ContainerID
cid := strings.Split(containerID, "//")
Expand Down