Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable Vfio multifunction devices #4394

Merged
merged 3 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
227 changes: 193 additions & 34 deletions pkg/pillar/hypervisor/kvm.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,27 +384,37 @@ const qemuNetTemplate = `
{{- end}}
`

const qemuPciPassthruTemplate = `
const qemuRootPortPciPassthruTemplate = `
[device "pci.{{.PCIId}}"]
driver = "pcie-root-port"
port = "1{{.PCIId}}"
chassis = "{{.PCIId}}"
bus = "pcie.0"
multifunction = "on"
addr = "{{printf "0x%x" .PCIId}}"
`

const qemuPCIPassthruBridgeTemplate = `
[device "pcie-bridge.{{.Bus}}"]
driver = "pcie-pci-bridge"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why pcir-pci and no pcie-pcie? Shown PCIe device ans PCI has consequences and usually not a good idea

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, because there is no need to translate. You either create a dedicated port to control BDF assignments or plug device into a PCIe switch

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried this:

[device "pci.7"]
  driver = "pcie-root-port"
  port = "17"
  chassis = "7"
  bus = "pcie.0"
  multifunction = "on"
  addr = "0x7"
  slot = "7"



[device "upstream_port.1"]
  driver = "x3130-upstream"
  bus = "pci.7"

[device "downstream_port1"]
  driver = "xio3130-downstream"
  bus = "upstream_port.1"
  slot = "71"

[device "downstream_port2"]
  driver = "xio3130-downstream"
  bus = "upstream_port.1"
  slot = "72"



#[device "pcie-bridge.7"]
#  driver = "pcie-pci-bridge"
#  bus = "pci.7"
#  addr = "0x0"

[device]
  driver = "vfio-pci"
  host = "00:0d.0"
#  bus = "pcie-bridge.7"
  bus = "downstream_port1"
  addr = "0x1"
[device]
  driver = "vfio-pci"
  host = "00:0d.2"
#  bus = "pcie-bridge.7"
  bus = "downstream_port2"
  addr = "0x2"

but I get the same error message:

qemu-system-x86_64:xen1.cfg:264: vfio 0000:00:0d.2: group 7 used in multiple address spaces

bus = "pci.{{.Bus}}"
addr = "{{printf "0x%x" .PCIId}}"
`

const qemuPciPassthruTemplate = `
[device]
driver = "vfio-pci"
host = "{{.PciShortAddr}}"
bus = "pci.{{.PCIId}}"
addr = "0x0"
bus = "{{.Bus}}"
addr = "{{.Addr}}"
{{- if .Xvga }}
x-vga = "on"
{{- end -}}
{{- if .Xopregion }}
x-igd-opregion = "on"
{{- end -}}
`

const qemuSerialTemplate = `
[chardev "charserial-usr{{.ID}}"]
backend = "serial"
Expand Down Expand Up @@ -929,12 +939,184 @@ func getFmlCustomResolution(status *types.DomainStatus, globalConfig *types.Conf
return "", fmt.Errorf("invalid fml resolution %s", fmlResolutions)
}

type pciDevicesWithBridge struct {
bridgeBus string
devs []*pciDevice
}

type multifunctionDevs map[string]*pciDevicesWithBridge // key: pci long without function number

func (md multifunctionDevs) isMultiFunction(p pciDevice) bool {
OhmSpectator marked this conversation as resolved.
Show resolved Hide resolved
pciLongWOFunc, err := p.pciLongWOFunction()
if err != nil {
return false
}
devs, found := md[pciLongWOFunc]
return found && len(devs.devs) > 1
}

func (md multifunctionDevs) index(p pciDevice) int {
pciLongWOFunc, err := p.pciLongWOFunction()
if err != nil {
logrus.Warnf("retrieving pci address without function failed: %v", err)
return -1
}
pciDevices, found := md[pciLongWOFunc]
if !found {
return -1
}

for i, dev := range pciDevices.devs {
if p.ioType == dev.ioType && p.pciLong == dev.pciLong {
return i
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We depend on the slice remaining unmodified and unsorted after it is created. If we remove an element from it, the indices can change. Currently, we don't have this behaviour, but it would be beneficial to explicitly state the requirement that the slice remains unmodified. A comment added to the slice would be a good solution.

}
}

return -1
}

func multifunctionDevGroup(pcis []pciDevice) multifunctionDevs {
mds := make(multifunctionDevs)

for i, pa := range pcis {
pciWithoutFunction, err := pa.pciLongWOFunction()
if err != nil {
logrus.Warnf("retrieving pci address without function failed: %v", err)
continue
}

_, found := mds[pciWithoutFunction]
if !found {
mds[pciWithoutFunction] = &pciDevicesWithBridge{
bridgeBus: "",
devs: []*pciDevice{},
}
}
mds[pciWithoutFunction].devs = append(mds[pciWithoutFunction].devs, &pcis[i])
}

return mds
}

func (p pciDevice) pciLongWOFunction() (string, error) {
pciLongSplit := strings.Split(p.pciLong, ".")
if len(pciLongSplit) == 0 {
return "", fmt.Errorf("could not split %s", p.pciLong)
}
pciWithoutFunction := strings.Join(pciLongSplit[0:len(pciLongSplit)-1], ".")

return pciWithoutFunction, nil
}

type pciAssignmentsTemplateFiller struct {
multifunctionDevices multifunctionDevs
file io.Writer
}

func (f *pciAssignmentsTemplateFiller) pciEBridge(pciID int, pciWOFunction string) error {
pciChildTemplateVars := struct {
PCIId int
Bus int
}{
PCIId: 0,
Bus: pciID,
}

tPCIeBridge, err := template.New("qemuPCIeBridge").Parse(qemuPCIPassthruBridgeTemplate)
if err != nil {
return fmt.Errorf("parsing qemuPCIPassthruBridgeTemplate failed: %w", err)
}
if err := tPCIeBridge.Execute(f.file, pciChildTemplateVars); err != nil {
return fmt.Errorf("can't write PCIe bridge Passthrough to config file (%w)", err)
}
f.multifunctionDevices[pciWOFunction].bridgeBus = fmt.Sprintf("pcie-bridge.%d", pciID)

return nil
}

func (f *pciAssignmentsTemplateFiller) do(file io.Writer, pciAssignments []pciDevice, pciID int) error {
if len(pciAssignments) == 0 {
return nil
}

pciPTContext := struct {
PCIId int
PciShortAddr string
Xvga bool
Xopregion bool
Bus string
Addr string
}{PCIId: pciID, PciShortAddr: "", Xvga: false, Xopregion: false}

tPCI, _ := template.New("qemuPCI").Parse(qemuPciPassthruTemplate)

pciEBridgeForMultiFuncDevCreated := make(map[string]struct{}) // key: pci long without function number
for i, pa := range pciAssignments {
pciPTContext.Xopregion = false
pciPTContext.Xvga = pa.isVGA()
pciPTContext.Bus = fmt.Sprintf("pci.%d", pciPTContext.PCIId)
pciPTContext.PciShortAddr = types.PCILongToShort(pa.pciLong)

if vendor, err := pa.vid(); err == nil {
// check for Intel vendor
if vendor == "0x8086" {
if pciPTContext.Xvga {
// we set opregion for Intel vga
// https://github.com/qemu/qemu/blob/stable-5.0/docs/igd-assign.txt#L91-L96
pciPTContext.Xopregion = true
}
}
}

pciLongWoFunc, err := pa.pciLongWOFunction()
if err != nil {
logrus.Warnf("retrieving pci address without function failed: %v", err)
continue
}
_, pciEBridgeCreated := pciEBridgeForMultiFuncDevCreated[pciLongWoFunc]
pciEBridgeForMultiFuncDevCreated[pciLongWoFunc] = struct{}{}

// for non-multifunction devices, every pci device gets a "pcie-root-port"
if !f.multifunctionDevices.isMultiFunction(pa) || !pciEBridgeCreated {
tRootPortPCI, _ := template.New("qemuRootPortPCI").Parse(qemuRootPortPciPassthruTemplate)
if err := tRootPortPCI.Execute(file, pciPTContext); err != nil {
return logError("can't write Root Port PCI Passthrough to config file (%v)", err)
}
}
if f.multifunctionDevices.isMultiFunction(pa) {
if !pciEBridgeCreated {
err := f.pciEBridge(pciPTContext.PCIId, pciLongWoFunc)
if err != nil {
logrus.Warnf("could not write template: %v, skipping", err)
}
}

pciPTContext.Bus = f.multifunctionDevices[pciLongWoFunc].bridgeBus
pciPTContext.PCIId = f.multifunctionDevices.index(pa)
if pciPTContext.PCIId < 0 {
logrus.Warn("PCIId is less than 0 - skipping")
}
pciPTContext.Addr = fmt.Sprintf("0x%x", pciPTContext.PCIId+1) // Unsupported PCI slot 0 for standard hotplug controller
OhmSpectator marked this conversation as resolved.
Show resolved Hide resolved
} else {
pciPTContext.Bus = fmt.Sprintf("pci.%d", pciPTContext.PCIId)
pciPTContext.Addr = "0x0"
}
if err := tPCI.Execute(file, pciPTContext); err != nil {
return logError("can't write PCI Passthrough to config file (%v)", err)
}
pciPTContext.PCIId = pciID + i + 1
}

return nil
}

// CreateDomConfig creates a domain config (a qemu config file,
// typically named something like xen-%d.cfg)
func (ctx KvmContext) CreateDomConfig(domainName string,
config types.DomainConfig, status types.DomainStatus,
diskStatusList []types.DiskStatus, aa *types.AssignableAdapters,
globalConfig *types.ConfigItemValueMap, swtpmCtrlSock string, file *os.File) error {

virtualizationMode := ""
bootLoaderSettingsFile, err := getOVMFSettingsFilename(domainName)
if err != nil {
Expand Down Expand Up @@ -1079,38 +1261,15 @@ func (ctx KvmContext) CreateDomConfig(domainName string,
}
}
}
if len(pciAssignments) != 0 {
pciPTContext := struct {
PCIId int
PciShortAddr string
Xvga bool
Xopregion bool
}{PCIId: netContext.PCIId, PciShortAddr: "", Xvga: false, Xopregion: false}

t, _ = template.New("qemuPciPT").Parse(qemuPciPassthruTemplate)
for _, pa := range pciAssignments {
short := types.PCILongToShort(pa.pciLong)
pciPTContext.Xvga = pa.isVGA()

if vendor, err := pa.vid(); err == nil {
// check for Intel vendor
if vendor == "0x8086" {
if pciPTContext.Xvga {
// we set opregion for Intel vga
// https://github.com/qemu/qemu/blob/stable-5.0/docs/igd-assign.txt#L91-L96
pciPTContext.Xopregion = true
}
}
}

pciPTContext.PciShortAddr = short
if err := t.Execute(file, pciPTContext); err != nil {
return logError("can't write PCI Passthrough to config file %s (%v)", file.Name(), err)
}
pciPTContext.Xvga = false
pciPTContext.Xopregion = false
pciPTContext.PCIId = pciPTContext.PCIId + 1
}
pciAssignmentsFiller := pciAssignmentsTemplateFiller{
multifunctionDevices: multifunctionDevGroup(pciAssignments),
file: file,
}

err = pciAssignmentsFiller.do(file, pciAssignments, netContext.PCIId)
OhmSpectator marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return fmt.Errorf("writing to template file %s failed: %w", file.Name(), err)
}
if len(serialAssignments) != 0 {
serialPortContext := struct {
Expand Down
Loading
Loading