Skip to content

Commit

Permalink
Fix issue with infinite reconciling higher MTU
Browse files Browse the repository at this point in the history
When the MTU set in the SRIOV Network Node Policy is lower than the
actual MTU of the PF, it triggers the reconcile loop for the Node state
indefinitely, preventing the configuration from completing.

Signed-off-by: amaslennikov <amaslennikov@nvidia.com>
  • Loading branch information
almaslennikov committed Jun 4, 2024
1 parent 8d32f42 commit c2c7817
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 6 deletions.
2 changes: 1 addition & 1 deletion api/v1/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ func GetEswitchModeFromStatus(ifaceStatus *InterfaceExt) string {
func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool {
if ifaceSpec.Mtu > 0 {
mtu := ifaceSpec.Mtu
if mtu != ifaceStatus.Mtu {
if mtu > ifaceStatus.Mtu {
log.V(2).Info("NeedToUpdateSriov(): MTU needs update", "desired", mtu, "current", ifaceStatus.Mtu)
return true
}
Expand Down
78 changes: 73 additions & 5 deletions test/conformance/tests/test_sriov_operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1035,12 +1035,12 @@ var _ = Describe("[sriov] operator", func() {
}, 3*time.Minute, time.Second).Should(Equal(corev1.PodRunning))
})

It("should reconcile managed VF if status changes", func() {
It("should reconcile managed VF if status is changed", func() {
originalMtu := sriovDevice.Mtu
newMtu := 1000
lowerMtu := originalMtu - 500

By("manually changing the MTU")
_, errOutput, err := runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("echo %d > /sys/bus/pci/devices/%s/net/%s/mtu", newMtu, sriovDevice.PciAddress, sriovDevice.Name))
By("manually decreasing the MTU")
_, errOutput, err := runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("echo %d > /sys/bus/pci/devices/%s/net/%s/mtu", lowerMtu, sriovDevice.PciAddress, sriovDevice.Name))
Expect(err).ToNot(HaveOccurred())
Expect(errOutput).To(Equal(""))

Expand All @@ -1053,7 +1053,7 @@ var _ = Describe("[sriov] operator", func() {
IgnoreExtras,
Fields{
"Name": Equal(sriovDevice.Name),
"Mtu": Equal(newMtu),
"Mtu": Equal(lowerMtu),
"PciAddress": Equal(sriovDevice.PciAddress),
"NumVfs": Equal(sriovDevice.NumVfs),
})))
Expand All @@ -1071,6 +1071,74 @@ var _ = Describe("[sriov] operator", func() {
"PciAddress": Equal(sriovDevice.PciAddress),
"NumVfs": Equal(sriovDevice.NumVfs),
})))

higherMtu := originalMtu + 500

By("manually increasing the MTU")
_, errOutput, err = runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("echo %d > /sys/bus/pci/devices/%s/net/%s/mtu", higherMtu, sriovDevice.PciAddress, sriovDevice.Name))
Expect(err).ToNot(HaveOccurred())
Expect(errOutput).To(Equal(""))

By("waiting for the mtu to be updated in the status")
Eventually(func() sriovv1.InterfaceExts {
nodeState, err := clients.SriovNetworkNodeStates(operatorNamespace).Get(context.Background(), node, metav1.GetOptions{})
Expect(err).ToNot(HaveOccurred())
return nodeState.Status.Interfaces
}, 3*time.Minute, 1*time.Second).Should(ContainElement(MatchFields(
IgnoreExtras,
Fields{
"Name": Equal(sriovDevice.Name),
"Mtu": Equal(higherMtu),
"PciAddress": Equal(sriovDevice.PciAddress),
"NumVfs": Equal(sriovDevice.NumVfs),
})))

By("expecting the mtu to consistently stay at the new higher level")
Consistently(func() sriovv1.InterfaceExts {
nodeState, err := clients.SriovNetworkNodeStates(operatorNamespace).Get(context.Background(), node, metav1.GetOptions{})
Expect(err).ToNot(HaveOccurred())
return nodeState.Status.Interfaces
}, 3*time.Minute, 15*time.Second).Should(ContainElement(MatchFields(
IgnoreExtras,
Fields{
"Name": Equal(sriovDevice.Name),
"Mtu": Equal(higherMtu),
"PciAddress": Equal(sriovDevice.PciAddress),
"NumVfs": Equal(sriovDevice.NumVfs),
})))

By("manually returning the MTU to the original level")
_, errOutput, err = runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("echo %d > /sys/bus/pci/devices/%s/net/%s/mtu", originalMtu, sriovDevice.PciAddress, sriovDevice.Name))
Expect(err).ToNot(HaveOccurred())
Expect(errOutput).To(Equal(""))

By("waiting for the mtu to be updated in the status")
Eventually(func() sriovv1.InterfaceExts {
nodeState, err := clients.SriovNetworkNodeStates(operatorNamespace).Get(context.Background(), node, metav1.GetOptions{})
Expect(err).ToNot(HaveOccurred())
return nodeState.Status.Interfaces
}, 3*time.Minute, 1*time.Second).Should(ContainElement(MatchFields(
IgnoreExtras,
Fields{
"Name": Equal(sriovDevice.Name),
"Mtu": Equal(originalMtu),
"PciAddress": Equal(sriovDevice.PciAddress),
"NumVfs": Equal(sriovDevice.NumVfs),
})))

By("expecting the mtu to consistently stay at the original level")
Consistently(func() sriovv1.InterfaceExts {
nodeState, err := clients.SriovNetworkNodeStates(operatorNamespace).Get(context.Background(), node, metav1.GetOptions{})
Expect(err).ToNot(HaveOccurred())
return nodeState.Status.Interfaces
}, 3*time.Minute, 15*time.Second).Should(ContainElement(MatchFields(
IgnoreExtras,
Fields{
"Name": Equal(sriovDevice.Name),
"Mtu": Equal(originalMtu),
"PciAddress": Equal(sriovDevice.PciAddress),
"NumVfs": Equal(sriovDevice.NumVfs),
})))
})
})

Expand Down

0 comments on commit c2c7817

Please sign in to comment.