Skip to content

Commit

Permalink
Merge pull request #12135 from mihalicyn/netprio_cgroup_v2_replacement
Browse files Browse the repository at this point in the history
Add support for network device limits.priority option
  • Loading branch information
tomponline authored Sep 25, 2023
2 parents b3e17a9 + 47d6d6f commit a700a2f
Show file tree
Hide file tree
Showing 18 changed files with 246 additions and 23 deletions.
1 change: 1 addition & 0 deletions doc/.wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -307,3 +307,4 @@ Zettabyte
ZFS
zpool
zpools
qdisc
5 changes: 5 additions & 0 deletions doc/api-extensions.md
Original file line number Diff line number Diff line change
Expand Up @@ -2293,3 +2293,8 @@ This introduces a syslog socket that can receive syslog formatted log messages.
## `event_lifecycle_name_and_project`

This adds the fields `Name` and `Project` to `lifecycle` events.

## `instances_nic_limits_priority`

This introduces a new per-NIC `limits.priority` option that works with both cgroup1 and cgroup2 unlike the deprecated `limits.network.priority` instance setting, which only worked with cgroup1.

1 change: 1 addition & 0 deletions doc/config_options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ The higher the value, the less likely the instance is to be swapped to disk.
Controls how much priority to give to the instance's network requests when under load.

Specify an integer between 0 and 10.
This option is deprecated. Use the per-NIC `limits.priority` option instead.
```

```{config:option} limits.processes instance-resource-limits
Expand Down
3 changes: 3 additions & 0 deletions doc/reference/devices_nic.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ Key | Type | Default | Managed | Description
`limits.egress` | string | - | no | I/O limit in bit/s for outgoing traffic (various suffixes supported, see {ref}`instances-limit-units`)
`limits.ingress` | string | - | no | I/O limit in bit/s for incoming traffic (various suffixes supported, see {ref}`instances-limit-units`)
`limits.max` | string | - | no | I/O limit in bit/s for both incoming and outgoing traffic (same as setting both `limits.ingress` and `limits.egress`)
`limits.priority` | integer | - | The `skb->priority` value (32-bit unsigned integer) for outgoing traffic, to be used by the kernel queuing discipline (qdisc) to prioritize network packets (The effect of this value depends on the particular qdisc implementation, for example, `SKBPRIO` or `QFQ`. Consult the kernel qdisc documentation before setting this value.)
`maas.subnet.ipv4` | string | - | yes | MAAS IPv4 subnet to register the instance in
`maas.subnet.ipv6` | string | - | yes | MAAS IPv6 subnet to register the instance in
`mtu` | integer | parent MTU | yes | The MTU of the new interface
Expand Down Expand Up @@ -354,6 +355,7 @@ Key | Type | Default | Description
`limits.egress` | string | - | I/O limit in bit/s for outgoing traffic (various suffixes supported, see {ref}`instances-limit-units`)
`limits.ingress` | string | - | I/O limit in bit/s for incoming traffic (various suffixes supported, see {ref}`instances-limit-units`)
`limits.max` | string | - | I/O limit in bit/s for both incoming and outgoing traffic (same as setting both `limits.ingress` and `limits.egress`)
`limits.priority` | integer | - | The `skb->priority` value (32-bit unsigned integer) for outgoing traffic, to be used by the kernel queuing discipline (qdisc) to prioritize network packets (The effect of this value depends on the particular qdisc implementation, for example, `SKBPRIO` or `QFQ`. Consult the kernel qdisc documentation before setting this value.)
`mtu` | integer | kernel assigned | The MTU of the new interface
`name` | string | kernel assigned | The name of the interface inside the instance
`queue.tx.length` | integer | - | The transmit queue length for the NIC
Expand Down Expand Up @@ -442,6 +444,7 @@ Key | Type | Default | Description
`limits.egress` | string | - | I/O limit in bit/s for outgoing traffic (various suffixes supported, see {ref}`instances-limit-units`)
`limits.ingress` | string | - | I/O limit in bit/s for incoming traffic (various suffixes supported, see {ref}`instances-limit-units`)
`limits.max` | string | - | I/O limit in bit/s for both incoming and outgoing traffic (same as setting both `limits.ingress` and `limits.egress`)
`limits.priority` | integer | - | The `skb->priority` value (32-bit unsigned integer) for outgoing traffic, to be used by the kernel queuing discipline (qdisc) to prioritize network packets (The effect of this value depends on the particular qdisc implementation, for example, `SKBPRIO` or `QFQ`. Consult the kernel qdisc documentation before setting this value.)
`mtu` | integer | parent MTU | The MTU of the new interface
`name` | string | kernel assigned | The name of the interface inside the instance
`parent` | string | - | The name of the host device to join the instance to
Expand Down
2 changes: 1 addition & 1 deletion lxd/cgroup/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ func (info *Info) Warnings() []cluster.Warning {
if !info.Supports(NetPrio, nil) {
warnings = append(warnings, cluster.Warning{
TypeCode: warningtype.MissingCGroupNetworkPriorityController,
LastMessage: "network priority will be ignored",
LastMessage: "per-instance network priority will be ignored. Please use per-device limits.priority instead",
})
}

Expand Down
60 changes: 49 additions & 11 deletions lxd/device/device_utils_network.go
Original file line number Diff line number Diff line change
Expand Up @@ -486,33 +486,33 @@ func networkNICRouteDelete(routeDev string, routes ...string) {
}

// networkSetupHostVethLimits applies any network rate limits to the veth device specified in the config.
func networkSetupHostVethLimits(m deviceConfig.Device) error {
func networkSetupHostVethLimits(d *deviceCommon, oldConfig deviceConfig.Device, bridged bool) error {
var err error

veth := m["host_name"]
veth := d.config["host_name"]

if veth == "" || !network.InterfaceExists(veth) {
return fmt.Errorf("Unknown or missing host side veth device %q", veth)
}

// Apply max limit
if m["limits.max"] != "" {
m["limits.ingress"] = m["limits.max"]
m["limits.egress"] = m["limits.max"]
if d.config["limits.max"] != "" {
d.config["limits.ingress"] = d.config["limits.max"]
d.config["limits.egress"] = d.config["limits.max"]
}

// Parse the values
var ingressInt int64
if m["limits.ingress"] != "" {
ingressInt, err = units.ParseBitSizeString(m["limits.ingress"])
if d.config["limits.ingress"] != "" {
ingressInt, err = units.ParseBitSizeString(d.config["limits.ingress"])
if err != nil {
return err
}
}

var egressInt int64
if m["limits.egress"] != "" {
egressInt, err = units.ParseBitSizeString(m["limits.egress"])
if d.config["limits.egress"] != "" {
egressInt, err = units.ParseBitSizeString(d.config["limits.egress"])
if err != nil {
return err
}
Expand All @@ -525,7 +525,7 @@ func networkSetupHostVethLimits(m deviceConfig.Device) error {
_ = qdisc.Delete()

// Apply new limits
if m["limits.ingress"] != "" {
if d.config["limits.ingress"] != "" {
qdiscHTB := &ip.QdiscHTB{Qdisc: ip.Qdisc{Dev: veth, Handle: "1:0", Root: true}, Default: "10"}
err := qdiscHTB.Add()
if err != nil {
Expand All @@ -545,7 +545,7 @@ func networkSetupHostVethLimits(m deviceConfig.Device) error {
}
}

if m["limits.egress"] != "" {
if d.config["limits.egress"] != "" {
qdisc = &ip.Qdisc{Dev: veth, Handle: "ffff:0", Ingress: true}
err := qdisc.Add()
if err != nil {
Expand All @@ -560,6 +560,44 @@ func networkSetupHostVethLimits(m deviceConfig.Device) error {
}
}

var networkPriority uint64
if d.config["limits.priority"] != "" {
networkPriority, err = strconv.ParseUint(d.config["limits.priority"], 10, 32)
if err != nil {
return fmt.Errorf("Failed to parse limits.priority %q: %w", d.config["limits.priority"], err)
}
}

if oldConfig != nil && oldConfig["limits.priority"] != d.config["limits.priority"] {
err = d.state.Firewall.InstanceClearNetPrio(d.inst.Project().Name, d.inst.Name(), veth)
if err != nil {
return err
}
}

if oldConfig == nil || (oldConfig != nil && oldConfig["limits.priority"] != d.config["limits.priority"]) {
if networkPriority != 0 {
if bridged && d.state.Firewall.String() == "xtables" {
return fmt.Errorf("Failed to setup instance device network priority. The xtables firewall driver does not support required functionality.")
}

err = d.state.Firewall.InstanceSetupNetPrio(d.inst.Project().Name, d.inst.Name(), veth, uint32(networkPriority))
if err != nil {
return fmt.Errorf("Failed to setup instance device network priority: %w", err)
}
}
}

return nil
}

// networkClearHostVethLimits clears any network rate limits to the veth device specified in the config.
func networkClearHostVethLimits(d *deviceCommon) error {
err := d.state.Firewall.InstanceClearNetPrio(d.inst.Project().Name, d.inst.Name(), d.config["host_name"])
if err != nil {
return err
}

return nil
}

Expand Down
1 change: 1 addition & 0 deletions lxd/device/nic.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ func nicValidationRules(requiredFields []string, optionalFields []string, instCo
"limits.ingress": validate.IsAny,
"limits.egress": validate.IsAny,
"limits.max": validate.IsAny,
"limits.priority": validate.Optional(validate.IsUint32),
"security.mac_filtering": validate.IsAny,
"security.ipv4_filtering": validate.IsAny,
"security.ipv6_filtering": validate.IsAny,
Expand Down
15 changes: 12 additions & 3 deletions lxd/device/nic_bridged.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ func (d *nicBridged) validateConfig(instConf instance.ConfigReader) error {
"limits.ingress",
"limits.egress",
"limits.max",
"limits.priority",
"ipv4.address",
"ipv6.address",
"ipv4.routes",
Expand Down Expand Up @@ -456,7 +457,7 @@ func (d *nicBridged) UpdatableFields(oldDevice Type) []string {
return []string{}
}

return []string{"limits.ingress", "limits.egress", "limits.max", "ipv4.routes", "ipv6.routes", "ipv4.routes.external", "ipv6.routes.external", "ipv4.address", "ipv6.address", "security.mac_filtering", "security.ipv4_filtering", "security.ipv6_filtering"}
return []string{"limits.ingress", "limits.egress", "limits.max", "limits.priority", "ipv4.routes", "ipv6.routes", "ipv4.routes.external", "ipv6.routes.external", "ipv4.address", "ipv6.address", "security.mac_filtering", "security.ipv4_filtering", "security.ipv6_filtering"}
}

// Add is run when a device is added to a non-snapshot instance whether or not the instance is running.
Expand Down Expand Up @@ -557,7 +558,7 @@ func (d *nicBridged) Start() (*deviceConfig.RunConfig, error) {
}

// Apply host-side limits.
err = networkSetupHostVethLimits(d.config)
err = networkSetupHostVethLimits(&d.deviceCommon, nil, true)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -741,7 +742,7 @@ func (d *nicBridged) Update(oldDevices deviceConfig.Devices, isRunning bool) err
}

// Apply host-side limits.
err = networkSetupHostVethLimits(d.config)
err = networkSetupHostVethLimits(&d.deviceCommon, oldConfig, true)
if err != nil {
return err
}
Expand Down Expand Up @@ -800,6 +801,14 @@ func (d *nicBridged) Stop() (*deviceConfig.RunConfig, error) {
return nil, err
}

// Populate device config with volatile fields (hwaddr and host_name) if needed.
networkVethFillFromVolatile(d.config, d.volatileGet())

err = networkClearHostVethLimits(&d.deviceCommon)
if err != nil {
return nil, err
}

// Setup post-stop actions.
runConf := deviceConfig.RunConfig{
PostHooks: []func() error{d.postStop},
Expand Down
15 changes: 12 additions & 3 deletions lxd/device/nic_p2p.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ func (d *nicP2P) validateConfig(instConf instance.ConfigReader) error {
"limits.ingress",
"limits.egress",
"limits.max",
"limits.priority",
"ipv4.routes",
"ipv6.routes",
"boot.priority",
Expand Down Expand Up @@ -67,7 +68,7 @@ func (d *nicP2P) UpdatableFields(oldDevice Type) []string {
return []string{}
}

return []string{"limits.ingress", "limits.egress", "limits.max", "ipv4.routes", "ipv6.routes"}
return []string{"limits.ingress", "limits.egress", "limits.max", "limits.priority", "ipv4.routes", "ipv6.routes"}
}

// Start is run when the device is added to a running instance or instance is starting up.
Expand Down Expand Up @@ -130,7 +131,7 @@ func (d *nicP2P) Start() (*deviceConfig.RunConfig, error) {
}

// Apply host-side limits.
err = networkSetupHostVethLimits(d.config)
err = networkSetupHostVethLimits(&d.deviceCommon, nil, false)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -189,7 +190,7 @@ func (d *nicP2P) Update(oldDevices deviceConfig.Devices, isRunning bool) error {
}

// Apply host-side limits.
err = networkSetupHostVethLimits(d.config)
err = networkSetupHostVethLimits(&d.deviceCommon, oldConfig, false)
if err != nil {
return err
}
Expand All @@ -199,6 +200,14 @@ func (d *nicP2P) Update(oldDevices deviceConfig.Devices, isRunning bool) error {

// Stop is run when the device is removed from the instance.
func (d *nicP2P) Stop() (*deviceConfig.RunConfig, error) {
// Populate device config with volatile fields (hwaddr and host_name) if needed.
networkVethFillFromVolatile(d.config, d.volatileGet())

err := networkClearHostVethLimits(&d.deviceCommon)
if err != nil {
return nil, err
}

runConf := deviceConfig.RunConfig{
PostHooks: []func() error{d.postStop},
}
Expand Down
15 changes: 12 additions & 3 deletions lxd/device/nic_routed.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func (d *nicRouted) UpdatableFields(oldDevice Type) []string {
return []string{}
}

return []string{"limits.ingress", "limits.egress", "limits.max"}
return []string{"limits.ingress", "limits.egress", "limits.max", "limits.priority"}
}

// validateConfig checks the supplied config for correctness.
Expand All @@ -69,6 +69,7 @@ func (d *nicRouted) validateConfig(instConf instance.ConfigReader) error {
"limits.ingress",
"limits.egress",
"limits.max",
"limits.priority",
"ipv4.gateway",
"ipv6.gateway",
"ipv4.routes",
Expand Down Expand Up @@ -346,7 +347,7 @@ func (d *nicRouted) Start() (*deviceConfig.RunConfig, error) {
networkVethFillFromVolatile(d.config, saveData)

// Apply host-side limits.
err = networkSetupHostVethLimits(d.config)
err = networkSetupHostVethLimits(&d.deviceCommon, nil, false)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -577,7 +578,7 @@ func (d *nicRouted) Update(oldDevices deviceConfig.Devices, isRunning bool) erro
networkVethFillFromVolatile(d.config, v)

// Apply host-side limits.
err = networkSetupHostVethLimits(d.config)
err = networkSetupHostVethLimits(&d.deviceCommon, oldDevices[d.name], false)
if err != nil {
return err
}
Expand All @@ -588,6 +589,14 @@ func (d *nicRouted) Update(oldDevices deviceConfig.Devices, isRunning bool) erro

// Stop is run when the device is removed from the instance.
func (d *nicRouted) Stop() (*deviceConfig.RunConfig, error) {
// Populate device config with volatile fields (hwaddr and host_name) if needed.
networkVethFillFromVolatile(d.config, d.volatileGet())

err := networkClearHostVethLimits(&d.deviceCommon)
if err != nil {
return nil, err
}

runConf := deviceConfig.RunConfig{
PostHooks: []func() error{d.postStop},
}
Expand Down
40 changes: 39 additions & 1 deletion lxd/firewall/drivers/drivers_nftables.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,11 +372,12 @@ func (d Nftables) NetworkClear(networkName string, _ bool, _ []uint) error {
"fwd", "pstrt", "in", "out", // Chains used for network operation rules.
"aclin", "aclout", "aclfwd", "acl", // Chains used by ACL rules.
"fwdprert", "fwdout", "fwdpstrt", // Chains used by Address Forward rules.
"egress", // Chains added for limits.priority option
}

// Remove chains created by network rules.
// Remove from ip and ip6 tables to ensure cleanup for instances started before we moved to inet table
err := d.removeChains([]string{"inet", "ip", "ip6"}, networkName, removeChains...)
err := d.removeChains([]string{"inet", "ip", "ip6", "netdev"}, networkName, removeChains...)
if err != nil {
return fmt.Errorf("Failed clearing nftables rules for network %q: %w", networkName, err)
}
Expand Down Expand Up @@ -667,6 +668,43 @@ func (d Nftables) InstanceClearRPFilter(projectName string, instanceName string,
return nil
}

// InstanceSetupNetPrio activates setting of skb->priority for the specified instance device on the host interface.
func (d Nftables) InstanceSetupNetPrio(projectName string, instanceName string, deviceName string, netPrio uint32) error {
deviceLabel := d.instanceDeviceLabel(projectName, instanceName, deviceName)
tplFields := map[string]any{
"namespace": nftablesNamespace,
"family": "netdev",
"chainSeparator": nftablesChainSeparator,
"deviceLabel": deviceLabel,
"deviceName": deviceName,
"netPrio": netPrio,
}

err := d.applyNftConfig(nftablesInstanceNetPrio, tplFields)
if err != nil {
return fmt.Errorf("Failed adding netprio rules for instance device %q: %w", deviceLabel, err)
}

return nil
}

// InstanceClearNetPrio removes setting of skb->priority for the specified instance device on the host interface.
func (d Nftables) InstanceClearNetPrio(projectName string, instanceName string, deviceName string) error {
if deviceName == "" {
return fmt.Errorf("Failed clearing netprio rules for instance %q in project %q: device name is empty", projectName, instanceName)
}

deviceLabel := d.instanceDeviceLabel(projectName, instanceName, deviceName)
chainLabel := fmt.Sprintf("netprio%s%s", nftablesChainSeparator, deviceLabel)

err := d.removeChains([]string{"netdev"}, chainLabel, "egress")
if err != nil {
return fmt.Errorf("Failed clearing netprio rules for instance device %q: %w", deviceLabel, err)
}

return nil
}

// NetworkApplyACLRules applies ACL rules to the existing firewall chains.
func (d Nftables) NetworkApplyACLRules(networkName string, rules []ACLRule) error {
nftRules := make([]string, 0)
Expand Down
8 changes: 8 additions & 0 deletions lxd/firewall/drivers/drivers_nftables_templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,3 +260,11 @@ chain prert{{.chainSeparator}}{{.deviceLabel}} {
iif "{{.hostName}}" fib saddr . iif oif missing drop
}
`))

// nftablesInstanceNetPrio defines the rules to perform setting of skb->priority.
var nftablesInstanceNetPrio = template.Must(template.New("nftablesInstanceNetPrio").Parse(`
chain egress{{.chainSeparator}}netprio{{.chainSeparator}}{{.deviceLabel}} {
type filter hook egress device "{{.deviceName}}" priority 0 ;
meta priority set "{{.netPrio}}"
}
`))
Loading

0 comments on commit a700a2f

Please sign in to comment.