Skip to content

Commit

Permalink
Merge pull request #642 from ykulazhenkov/pr-switchdev-configuration
Browse files Browse the repository at this point in the history
[switchdev 8/x] Add support for switchdev configuration to sriov pkg
  • Loading branch information
zeeke committed Mar 5, 2024
2 parents 860daa8 + d324d46 commit efea23b
Show file tree
Hide file tree
Showing 6 changed files with 332 additions and 22 deletions.
16 changes: 16 additions & 0 deletions api/v1/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,22 @@ func FindInterface(interfaces Interfaces, name string) (iface Interface, err err
return Interface{}, fmt.Errorf("unable to find interface: %v", name)
}

// GetEswitchModeFromSpec returns ESwitchMode from the interface spec, returns legacy if not set
func GetEswitchModeFromSpec(ifaceSpec *Interface) string {
if ifaceSpec.EswitchMode == "" {
return ESwithModeLegacy
}
return ifaceSpec.EswitchMode
}

// GetEswitchModeFromStatus returns ESwitchMode from the interface status, returns legacy if not set
func GetEswitchModeFromStatus(ifaceStatus *InterfaceExt) string {
if ifaceStatus.EswitchMode == "" {
return ESwithModeLegacy
}
return ifaceStatus.EswitchMode
}

func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool {
if ifaceSpec.Mtu > 0 {
mtu := ifaceSpec.Mtu
Expand Down
64 changes: 64 additions & 0 deletions api/v1/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -802,3 +802,67 @@ func TestVhostVdpaNodePolicyApply(t *testing.T) {
})
}
}

func TestGetEswitchModeFromSpec(t *testing.T) {
testtable := []struct {
tname string
spec *v1.Interface
expectedResult string
}{
{
tname: "set to legacy",
spec: &v1.Interface{EswitchMode: v1.ESwithModeLegacy},
expectedResult: v1.ESwithModeLegacy,
},
{
tname: "set to switchdev",
spec: &v1.Interface{EswitchMode: v1.ESwithModeSwitchDev},
expectedResult: v1.ESwithModeSwitchDev,
},
{
tname: "not set",
spec: &v1.Interface{},
expectedResult: v1.ESwithModeLegacy,
},
}
for _, tc := range testtable {
t.Run(tc.tname, func(t *testing.T) {
result := v1.GetEswitchModeFromSpec(tc.spec)
if diff := cmp.Diff(tc.expectedResult, result); diff != "" {
t.Errorf("unexpected result (-want +got):\n%s", diff)
}
})
}
}

func TestGetEswitchModeFromStatus(t *testing.T) {
testtable := []struct {
tname string
spec *v1.InterfaceExt
expectedResult string
}{
{
tname: "set to legacy",
spec: &v1.InterfaceExt{EswitchMode: v1.ESwithModeLegacy},
expectedResult: v1.ESwithModeLegacy,
},
{
tname: "set to switchdev",
spec: &v1.InterfaceExt{EswitchMode: v1.ESwithModeSwitchDev},
expectedResult: v1.ESwithModeSwitchDev,
},
{
tname: "not set",
spec: &v1.InterfaceExt{},
expectedResult: v1.ESwithModeLegacy,
},
}
for _, tc := range testtable {
t.Run(tc.tname, func(t *testing.T) {
result := v1.GetEswitchModeFromStatus(tc.spec)
if diff := cmp.Diff(tc.expectedResult, result); diff != "" {
t.Errorf("unexpected result (-want +got):\n%s", diff)
}
})
}
}
178 changes: 160 additions & 18 deletions pkg/host/internal/sriov/sriov.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ type sriov struct {
kernelHelper types.KernelInterface
networkHelper types.NetworkInterface
udevHelper types.UdevInterface
vdpaHelper types.VdpaInterface
netlinkLib netlinkPkg.NetlinkLib
dputilsLib dputilsPkg.DPUtilsLib
}
Expand All @@ -44,12 +45,14 @@ func New(utilsHelper utils.CmdInterface,
kernelHelper types.KernelInterface,
networkHelper types.NetworkInterface,
udevHelper types.UdevInterface,
vdpaHelper types.VdpaInterface,
netlinkLib netlinkPkg.NetlinkLib,
dputilsLib dputilsPkg.DPUtilsLib) types.SriovInterface {
return &sriov{utilsHelper: utilsHelper,
kernelHelper: kernelHelper,
networkHelper: networkHelper,
udevHelper: udevHelper,
vdpaHelper: vdpaHelper,
netlinkLib: netlinkLib,
dputilsLib: dputilsLib,
}
Expand Down Expand Up @@ -77,22 +80,28 @@ func (s *sriov) SetSriovNumVfs(pciAddr string, numVfs int) error {

func (s *sriov) ResetSriovDevice(ifaceStatus sriovnetworkv1.InterfaceExt) error {
log.Log.V(2).Info("ResetSriovDevice(): reset SRIOV device", "address", ifaceStatus.PciAddress)
if err := s.SetSriovNumVfs(ifaceStatus.PciAddress, 0); err != nil {
return err
}
if ifaceStatus.LinkType == consts.LinkTypeETH {
var mtu int
eswitchMode := sriovnetworkv1.ESwithModeLegacy
is := sriovnetworkv1.InitialState.GetInterfaceStateByPciAddress(ifaceStatus.PciAddress)
if is != nil {
mtu = is.Mtu
eswitchMode = sriovnetworkv1.GetEswitchModeFromStatus(is)
} else {
mtu = 1500
}
log.Log.V(2).Info("ResetSriovDevice(): reset mtu", "value", mtu)
if err := s.networkHelper.SetNetdevMTU(ifaceStatus.PciAddress, mtu); err != nil {
return err
}
log.Log.V(2).Info("ResetSriovDevice(): reset eswitch mode and number of VFs", "mode", eswitchMode)
if err := s.setEswitchModeAndNumVFs(ifaceStatus.PciAddress, eswitchMode, 0); err != nil {
return err
}
} else if ifaceStatus.LinkType == consts.LinkTypeIB {
if err := s.SetSriovNumVfs(ifaceStatus.PciAddress, 0); err != nil {
return err
}
if err := s.networkHelper.SetNetdevMTU(ifaceStatus.PciAddress, 2048); err != nil {
return err
}
Expand Down Expand Up @@ -305,6 +314,9 @@ func (s *sriov) configSriovPFDevice(iface *sriovnetworkv1.Interface) error {
log.Log.Error(err, "configSriovPFDevice(): fail to set NumVfs for device", "device", iface.PciAddress)
return err
}
if err := s.configureHWOptionsForSwitchdev(iface); err != nil {
return err
}
err := s.addUdevRules(iface)
if err != nil {
log.Log.Error(err, "configSriovPFDevice(): fail to set add udev rules", "device", iface.PciAddress)
Expand All @@ -330,6 +342,45 @@ func (s *sriov) configSriovPFDevice(iface *sriovnetworkv1.Interface) error {
return nil
}

func (s *sriov) configureHWOptionsForSwitchdev(iface *sriovnetworkv1.Interface) error {
log.Log.V(2).Info("configureHWOptionsForSwitchdev(): configure HW options for device",
"device", iface.PciAddress)
if sriovnetworkv1.GetEswitchModeFromSpec(iface) != sriovnetworkv1.ESwithModeSwitchDev {
// we need to configure HW options only for PFs for which switchdev is a target mode
return nil
}
if err := s.networkHelper.EnableHwTcOffload(iface.Name); err != nil {
return err
}
desiredFlowSteeringMode := "smfs"
currentFlowSteeringMode, err := s.networkHelper.GetDevlinkDeviceParam(iface.PciAddress, "flow_steering_mode")
if err != nil {
if errors.Is(err, syscall.EINVAL) {
log.Log.V(2).Info("configureHWOptionsForSwitchdev(): software flow steering is not supported by the device, skip configuration",
"device", iface.PciAddress)
return nil
}
log.Log.Error(err, "configureHWOptionsForSwitchdev(): fail to read current flow steering mode for the device", "device", iface.PciAddress)
return err
}
if currentFlowSteeringMode == desiredFlowSteeringMode {
return nil
}
currentEswitchMode, err := s.GetNicSriovMode(iface.PciAddress)
if err != nil {
return err
}
// flow steering mode can be changed only when NIC is in legacy mode
if currentEswitchMode != sriovnetworkv1.ESwithModeLegacy {
s.setEswitchModeAndNumVFs(iface.PciAddress, sriovnetworkv1.ESwithModeLegacy, 0)
}
if err := s.networkHelper.SetDevlinkDeviceParam(iface.PciAddress, "flow_steering_mode", desiredFlowSteeringMode); err != nil {
log.Log.Error(err, "configureHWOptionsForSwitchdev(): fail to configure flow steering mode for the device", "device", iface.PciAddress)
return err
}
return nil
}

func (s *sriov) checkExternallyManagedPF(iface *sriovnetworkv1.Interface) error {
log.Log.V(2).Info("checkExternallyManagedPF(): configure PF sriov device",
"device", iface.PciAddress)
Expand All @@ -341,6 +392,17 @@ func (s *sriov) checkExternallyManagedPF(iface *sriovnetworkv1.Interface) error
log.Log.Error(nil, errMsg)
return fmt.Errorf(errMsg)
}
currentEswitchMode, err := s.GetNicSriovMode(iface.PciAddress)
if err != nil {
return err
}
expectedEswitchMode := sriovnetworkv1.GetEswitchModeFromSpec(iface)
if currentEswitchMode != expectedEswitchMode {
errMsg := fmt.Sprintf("checkExternallyManagedPF(): requested ESwitchMode mode \"%s\" is not equal to configured \"%s\" "+
"but the policy is configured as ExternallyManaged for device %s", expectedEswitchMode, currentEswitchMode, iface.PciAddress)
log.Log.Error(nil, errMsg)
return fmt.Errorf(errMsg)
}
currentMtu := s.networkHelper.GetNetdevMTU(iface.PciAddress)
if iface.Mtu > 0 && iface.Mtu > currentMtu {
err := fmt.Errorf("checkExternallyManagedPF(): requested MTU(%d) is greater than configured MTU(%d) for device %s. cannot change MTU as policy is configured as ExternallyManaged",
Expand Down Expand Up @@ -434,7 +496,16 @@ func (s *sriov) configSriovVFDevices(iface *sriovnetworkv1.Interface) error {
if err = s.kernelHelper.UnbindDriverIfNeeded(addr, group.IsRdma); err != nil {
return err
}

// we set eswitch mode before this point and if the desired mode (and current at this point)
// is legacy, then VDPA device is already automatically disappeared,
// so we don't need to check it
if sriovnetworkv1.GetEswitchModeFromSpec(iface) == sriovnetworkv1.ESwithModeSwitchDev && group.VdpaType == "" {
if err := s.vdpaHelper.DeleteVDPADevice(addr); err != nil {
log.Log.Error(err, "configSriovVFDevices(): fail to delete VDPA device",
"device", addr)
return err
}
}
if !sriovnetworkv1.StringInArray(group.DeviceType, vars.DpdkDrivers) {
if err := s.kernelHelper.BindDefaultDriver(addr); err != nil {
log.Log.Error(err, "configSriovVFDevices(): fail to bind default driver for device", "device", addr)
Expand All @@ -447,6 +518,13 @@ func (s *sriov) configSriovVFDevices(iface *sriovnetworkv1.Interface) error {
return err
}
}
if sriovnetworkv1.GetEswitchModeFromSpec(iface) == sriovnetworkv1.ESwithModeSwitchDev && group.VdpaType != "" {
if err := s.vdpaHelper.CreateVDPADevice(addr, group.VdpaType); err != nil {
log.Log.Error(err, "configSriovVFDevices(): fail to create VDPA device",
"vdpaType", group.VdpaType, "device", addr)
return err
}
}
} else {
if err := s.kernelHelper.BindDpdkDriver(addr, group.DeviceType); err != nil {
log.Log.Error(err, "configSriovVFDevices(): fail to bind driver for device",
Expand Down Expand Up @@ -779,13 +857,12 @@ func (s *sriov) ConfigSriovDeviceVirtual(iface *sriovnetworkv1.Interface) error

func (s *sriov) GetNicSriovMode(pciAddress string) (string, error) {
log.Log.V(2).Info("GetNicSriovMode()", "device", pciAddress)

devLink, err := s.netlinkLib.DevLinkGetDeviceByName("pci", pciAddress)
if err != nil {
if errors.Is(err, syscall.ENODEV) {
// the device doesn't support devlink
return "", nil
return sriovnetworkv1.ESwithModeLegacy, nil
}
log.Log.Error(err, "GetNicSriovMode(): failed to get eswitch mode", "device", pciAddress)
return "", err
}

Expand Down Expand Up @@ -823,32 +900,97 @@ func (s *sriov) encapTypeToLinkType(encapType string) string {

// create required udev rules for PF:
// * rule to disable NetworkManager for VFs - for all modes
// * rule to rename VF representors - only for switchdev mode
func (s *sriov) addUdevRules(iface *sriovnetworkv1.Interface) error {
log.Log.V(2).Info("addUdevRules(): add udev rules for device",
"device", iface.PciAddress)
// TODO add creation of switchdev-related UDEV rules
return s.udevHelper.AddUdevRule(iface.PciAddress)
if err := s.udevHelper.AddUdevRule(iface.PciAddress); err != nil {
return err
}
if sriovnetworkv1.GetEswitchModeFromSpec(iface) == sriovnetworkv1.ESwithModeSwitchDev {
portName, err := s.networkHelper.GetPhysPortName(iface.Name)
if err != nil {
return err
}
switchID, err := s.networkHelper.GetPhysSwitchID(iface.Name)
if err != nil {
return err
}
if err := s.udevHelper.AddVfRepresentorUdevRule(iface.PciAddress, iface.Name, switchID, portName); err != nil {
return err
}
}
return nil
}

// remove all udev rules for PF created by the operator
func (s *sriov) removeUdevRules(pciAddress string) error {
log.Log.V(2).Info("removeUdevRules(): remove udev rules for device",
"device", pciAddress)
// TODO add support for removing switchdev-related UDEV rules
return s.udevHelper.RemoveUdevRule(pciAddress)
if err := s.udevHelper.RemoveUdevRule(pciAddress); err != nil {
return err
}
return s.udevHelper.RemoveVfRepresentorUdevRule(pciAddress)
}

// create VFs on the PF
func (s *sriov) createVFs(iface *sriovnetworkv1.Interface) error {
expectedEswitchMode := sriovnetworkv1.GetEswitchModeFromSpec(iface)
log.Log.V(2).Info("createVFs(): configure VFs for device",
"device", iface.PciAddress, "count", iface.NumVfs)
if iface.NumVfs == s.dputilsLib.GetVFconfigured(iface.PciAddress) {
log.Log.V(2).Info("createVFs(): device is already configured",
"device", iface.PciAddress, "count", iface.NumVfs)
return nil
"device", iface.PciAddress, "count", iface.NumVfs, "mode", expectedEswitchMode)

if s.dputilsLib.GetVFconfigured(iface.PciAddress) == iface.NumVfs {
currentEswitchMode, err := s.GetNicSriovMode(iface.PciAddress)
if err != nil {
return err
}
if currentEswitchMode == expectedEswitchMode {
log.Log.V(2).Info("createVFs(): device is already configured",
"device", iface.PciAddress, "count", iface.NumVfs, "mode", expectedEswitchMode)
return nil
}
}
// TODO add support for VF creation in switchdev mode
return s.SetSriovNumVfs(iface.PciAddress, iface.NumVfs)
return s.setEswitchModeAndNumVFs(iface.PciAddress, expectedEswitchMode, iface.NumVfs)
}

func (s *sriov) setEswitchMode(pciAddr, eswitchMode string) error {
log.Log.V(2).Info("setEswitchMode(): set eswitch mode", "device", pciAddr, "mode", eswitchMode)
if err := s.unbindAllVFsOnPF(pciAddr); err != nil {
log.Log.Error(err, "setEswitchMode(): failed to unbind VFs", "device", pciAddr, "mode", eswitchMode)
return err
}
if err := s.SetNicSriovMode(pciAddr, eswitchMode); err != nil {
err = fmt.Errorf("failed to switch NIC to SRIOV %s mode: %v", eswitchMode, err)
log.Log.Error(err, "setEswitchMode(): failed to set mode", "device", pciAddr, "mode", eswitchMode)
return err
}
return nil
}

func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode string, numVFs int) error {
log.Log.V(2).Info("setEswitchModeAndNumVFs(): configure VFs for device",
"device", pciAddr, "count", numVFs, "mode", desiredEswitchMode)

currentEswitchMode, err := s.GetNicSriovMode(pciAddr)
if err != nil {
return err
}

// always switch NIC to the legacy mode before creating VFs. This is required because some drivers
// may not support VF creation in the switchdev mode
if currentEswitchMode != sriovnetworkv1.ESwithModeLegacy {
if err := s.setEswitchMode(pciAddr, sriovnetworkv1.ESwithModeLegacy); err != nil {
return err
}
}
if err := s.SetSriovNumVfs(pciAddr, numVFs); err != nil {
return err
}

if desiredEswitchMode == sriovnetworkv1.ESwithModeSwitchDev {
return s.setEswitchMode(pciAddr, sriovnetworkv1.ESwithModeSwitchDev)
}
return nil
}

// retrieve all VFs for the PF and unbind them from a driver
Expand Down
Loading

0 comments on commit efea23b

Please sign in to comment.