Skip to content

Commit

Permalink
Disable iptables for L2-only traffic when everything is allowed by ACLs
Browse files Browse the repository at this point in the history
Avoid using iptables for L2-forwaded traffic if ACLs allow everything
(without rate limiting) and flow logging is disabled. Otherwise,
unnecessary packet processing steps are added which reduce network
performance significantly.

In NFV use cases, filtering and flow logging are typically handled
by applications (VNFs), so EVE should only focus on providing efficient
virtual wiring.

Signed-off-by: Milan Lenco <milan@zededa.com>
  • Loading branch information
milan-zededa committed Nov 15, 2024
1 parent 905baf7 commit 5b4730f
Show file tree
Hide file tree
Showing 5 changed files with 358 additions and 54 deletions.
4 changes: 0 additions & 4 deletions pkg/dom0-ztools/rootfs/etc/sysctl.d/02-eve.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@
# zedrouter settings
net.ipv4.ip_forward = 1
net.ipv6.conf.all.forwarding = 1
# We use ip6tables for the bridge
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-arptables = 1
# The following differs from default linuxkit/alpine of 1
net.ipv4.conf.all.rp_filter = 2
net.netfilter.nf_conntrack_acct = 1
Expand Down
88 changes: 85 additions & 3 deletions pkg/pillar/nireconciler/linux_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,17 @@ import (
// | +------------------------------------------------------------+ |
// | | Global | |
// | | | |
// | | +------------------------------+ | |
// | | | Sysctl | | |
// | | | (enable iptables for bridge) | | |
// | | +------------------------------+ | |
// | | | |
// | | +--------------------------+ +---------------------+ | |
// | | | Ports | | IPSets | | |
// | | | | | | | |
// | | | +--------------+ | | +---------+ | | |
// | | | | Port | ... | | | IPSet | ... | | |
// | | | | (external) | ... | | +---------+ | | |
// | | | | (external) | | | +---------+ | | |
// | | | +--------------+ | | | | |
// | | +--------------------------+ +---------------------+ | |
// | | | |
Expand Down Expand Up @@ -326,6 +331,7 @@ func (r *LinuxNIReconciler) getIntendedGlobalState() dg.Graph {
Description: "Global configuration",
}
intendedCfg := dg.New(graphArgs)
intendedCfg.PutItem(r.getIntendedHostSysctl(), nil)
intendedCfg.PutSubGraph(r.getIntendedPorts())
intendedCfg.PutSubGraph(r.getIntendedGlobalIPSets())
if r.withFlowlog() {
Expand All @@ -336,6 +342,80 @@ func (r *LinuxNIReconciler) getIntendedGlobalState() dg.Graph {
return intendedCfg
}

func (r *LinuxNIReconciler) getIntendedHostSysctl() linux.Sysctl {
var bridgeCallIptables, bridgeCallIp6tables bool

Check failure on line 346 in pkg/pillar/nireconciler/linux_config.go

View workflow job for this annotation

GitHub Actions / yetus

revive: var bridgeCallIp6tables should be bridgeCallIP6tables https://revive.run/r#var-naming
// If ACLs allow everything without rate-limiting and flow-logging
// is not enabled in any network instance, we can disable calling
// iptables from bridges to improve performance of L2-forwarding.
allIpv4TrafficAllowed := true
allIpv6TrafficAllowed := true
flowLoggingIsEnabled := false
for _, ni := range r.nis {
if ni.deleted {
continue
}
if ni.config.EnableFlowlog {
flowLoggingIsEnabled = true
break
}
}
for _, app := range r.apps {
if app.deleted {
continue
}
for _, adapter := range app.config.AppNetAdapterList {
var hasAllowAllRuleIPv4, hasAllowAllRuleIPv6 bool
var hasLimitRule, hasDropRule bool
for _, ace := range adapter.ACLs {
// Default action (if not specified) is to allow traffic to continue.
allowRule := true
for _, action := range ace.Actions {
if action.Drop {
hasDropRule = true
allowRule = false
}
if action.Limit {
hasLimitRule = true
allowRule = false
}
if action.PortMap {
allowRule = false
}
}
if allowRule && len(ace.Matches) == 1 && ace.Matches[0].Type == "ip" {
if _, subnet, err := net.ParseCIDR(ace.Matches[0].Value); err == nil {
ones, bits := subnet.Mask.Size()
if ones == 0 && subnet.IP.IsUnspecified() {
if bits == net.IPv4len*8 {
hasAllowAllRuleIPv4 = true
}
if bits == net.IPv6len*8 {
hasAllowAllRuleIPv6 = true
}
}
}
}
}
if hasLimitRule || hasDropRule {
allIpv4TrafficAllowed = false
allIpv6TrafficAllowed = false
}
if !hasAllowAllRuleIPv4 {
allIpv4TrafficAllowed = false
}
if !hasAllowAllRuleIPv6 {
allIpv6TrafficAllowed = false
}
}
}
bridgeCallIptables = flowLoggingIsEnabled || !allIpv4TrafficAllowed
bridgeCallIp6tables = flowLoggingIsEnabled || !allIpv6TrafficAllowed
return linux.Sysctl{
BridgeCallIptables: &bridgeCallIptables,
BridgeCallIp6tables: &bridgeCallIp6tables,
}
}

func (r *LinuxNIReconciler) getIntendedPorts() dg.Graph {
graphArgs := dg.InitArgs{
Name: PortsSG,
Expand Down Expand Up @@ -1249,11 +1329,13 @@ func (r *LinuxNIReconciler) getIntendedAppConnCfg(niID uuid.UUID,
IfName: vif.PodVIF.GuestIfName,
ItemRef: dg.Reference(linux.VIF{HostIfName: vif.hostIfName}),
}
enableDAD := false
enableARPNotify := true
intendedAppConnCfg.PutItem(linux.Sysctl{
ForApp: itemForApp,
NetIf: appVifRef,
EnableDAD: false,
EnableARPNotify: true,
EnableDAD: &enableDAD,
EnableARPNotify: &enableARPNotify,
}, nil)
// Gateways not covered by IP subnets should be routed explicitly
// using connected routes.
Expand Down
9 changes: 6 additions & 3 deletions pkg/pillar/nireconciler/linux_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -961,7 +961,8 @@ func (r *LinuxNIReconciler) AddAppConn(ctx context.Context,
}
r.apps[appID] = appInfo
reconcileReason := fmt.Sprintf("connecting new app (%v)", appID)
// Rebuild and reconcile also global config to update the set of intended IPSets.
// Rebuild and reconcile also global config to update the set of intended IPSets
// and sysctl settings.
r.scheduleGlobalCfgRebuild(reconcileReason)
// Rebuild and reconcile config of every NI that this app is trying to connect into.
for _, vif := range vifs {
Expand Down Expand Up @@ -1006,7 +1007,8 @@ func (r *LinuxNIReconciler) UpdateAppConn(ctx context.Context,
}
r.apps[appID] = appInfo
reconcileReason := fmt.Sprintf("updating app connectivity (%v)", appID)
// Rebuild and reconcile also global config to update the set of intended IPSets.
// Rebuild and reconcile also global config to update the set of intended IPSets
// and sysctl settings.
r.scheduleGlobalCfgRebuild(reconcileReason)
// Reconcile every NI that this app is disconnecting from, connecting to,
// or just updating its connection parameters with.
Expand Down Expand Up @@ -1039,7 +1041,8 @@ func (r *LinuxNIReconciler) DelAppConn(ctx context.Context,
// Deleted from the map when removal is completed successfully (incl. async ops).
r.apps[appID].deleted = true
reconcileReason := fmt.Sprintf("disconnecting app (%v)", appID)
// Rebuild and reconcile also global config to update the set of intended IPSets.
// Rebuild and reconcile also global config to update the set of intended IPSets
// and sysctl settings.
r.scheduleGlobalCfgRebuild(reconcileReason)
// Reconcile every NI that this app is trying to disconnect from.
for _, vif := range r.apps[appID].vifs {
Expand Down
130 changes: 130 additions & 0 deletions pkg/pillar/nireconciler/linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,20 @@ var (
},
RuleID: 1,
},
{
Matches: []types.ACEMatch{
{
Type: "ip",
Value: "0.0.0.0/0",
},
},
Actions: []types.ACEAction{
{
Drop: false,
},
},
RuleID: 2,
},
},
},
// Put two VIFs into the same switch NI (just to cover this scenario).
Expand Down Expand Up @@ -2974,6 +2988,122 @@ func TestCNI(test *testing.T) {
t.Expect(err).ToNot(HaveOccurred())
}

func TestHostSysctl(test *testing.T) {
t := initTest(test, false)
networkMonitor.AddOrUpdateInterface(eth0)
networkMonitor.AddOrUpdateInterface(keth0)
networkMonitor.AddOrUpdateInterface(eth1)
networkMonitor.AddOrUpdateInterface(keth1)
networkMonitor.AddOrUpdateInterface(eth3)
networkMonitor.AddOrUpdateInterface(keth3)
var routes []netmonitor.Route
routes = append(routes, eth0Routes...)
routes = append(routes, eth1Routes...)
routes = append(routes, eth3Routes...)
networkMonitor.UpdateRoutes(routes)
ctx := reconciler.MockRun(context.Background())
updatesCh := niReconciler.WatchReconcilerUpdates()
niReconciler.RunInitialReconcile(ctx)

// Create 2 local and 1 switch network instance.
var recUpdate nirec.ReconcilerUpdate
networkMonitor.AddOrUpdateInterface(ni1BridgeIf)
niStatus, err := niReconciler.AddNI(ctx, ni1Config, ni1Bridge)
t.Expect(err).ToNot(HaveOccurred())
t.Eventually(updatesCh).Should(Receive(&recUpdate))
t.Expect(recUpdate.UpdateType).To(Equal(nirec.NIReconcileStatusChanged))
t.Expect(recUpdate.NIStatus.Equal(niStatus)).To(BeTrue())

niStatus, err = niReconciler.AddNI(ctx, ni2Config, ni2Bridge)
t.Expect(err).ToNot(HaveOccurred())
t.Eventually(updatesCh).Should(Receive(&recUpdate))
t.Expect(recUpdate.UpdateType).To(Equal(nirec.NIReconcileStatusChanged))
t.Expect(recUpdate.NIStatus.Equal(niStatus)).To(BeTrue())

networkMonitor.AddOrUpdateInterface(ni5BridgeIf)
niStatus, err = niReconciler.AddNI(ctx, ni5Config, ni5Bridge)
t.Expect(err).ToNot(HaveOccurred())
t.Eventually(updatesCh).Should(Receive(&recUpdate))
t.Expect(recUpdate.UpdateType).To(Equal(nirec.NIReconcileStatusChanged))
t.Expect(recUpdate.NIStatus.Equal(niStatus)).To(BeTrue())

// Flow logging is disabled in all network instances and there are
// no apps deployed hence bridges do not need iptables.
hostSysctlRef := dg.Reference(linuxitems.Sysctl{})
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIptables: false"))
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIp6tables: false"))

// Connection application with allow-all for IPv4 + portmap rules.
// This does not require to trigger iptables from bridge, but ip6tables
// should be triggered (still we get performance improvement at least for IPv4).
appStatus, err := niReconciler.AddAppConn(ctx, app2NetConfig, app2Num, cnirpc.AppPod{}, app2VIFs)
t.Expect(err).ToNot(HaveOccurred())
t.Eventually(updatesCh).Should(Receive(&recUpdate))
t.Expect(recUpdate.UpdateType).To(Equal(nirec.AppConnReconcileStatusChanged))
t.Expect(recUpdate.AppConnStatus.Equal(appStatus)).To(BeTrue())

t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIptables: false"))
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIp6tables: true"))

// Connect application with a LIMIT rule.
appStatus, err = niReconciler.AddAppConn(ctx, app1NetConfig, app1Num, cnirpc.AppPod{}, app1VIFs)
t.Expect(err).ToNot(HaveOccurred())
t.Eventually(updatesCh).Should(Receive(&recUpdate))
t.Expect(recUpdate.UpdateType).To(Equal(nirec.AppConnReconcileStatusChanged))
t.Expect(recUpdate.AppConnStatus.Equal(appStatus)).To(BeTrue())

// With LIMIT rule present, iptables should be called from the bridge.
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIptables: true"))
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIp6tables: true"))

// Remove app with the LIMIT rule. For IPv4 bridge should not call iptables anymore.
appStatus, err = niReconciler.DelAppConn(ctx, app1UUID.UUID)
t.Expect(err).ToNot(HaveOccurred())
t.Eventually(updatesCh).Should(Receive(&recUpdate))
t.Expect(recUpdate.UpdateType).To(Equal(nirec.AppConnReconcileStatusChanged))
t.Expect(recUpdate.AppConnStatus.Equal(appStatus)).To(BeTrue())

t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIptables: false"))
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIp6tables: true"))

// Enable flow logging in one of the network instances.
// This enforces the use of iptables from bridge for both IPv4 and IPv6.
ni2Config.EnableFlowlog = true
_, err = niReconciler.UpdateNI(ctx, ni2Config, ni2Bridge)
t.Expect(err).ToNot(HaveOccurred())

t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIptables: true"))
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIp6tables: true"))

// Delete everything.
_, err = niReconciler.DelAppConn(ctx, app2UUID.UUID)
t.Expect(err).ToNot(HaveOccurred())
_, err = niReconciler.DelNI(ctx, ni1UUID.UUID)
t.Expect(err).ToNot(HaveOccurred())
_, err = niReconciler.DelNI(ctx, ni2UUID.UUID)
t.Expect(err).ToNot(HaveOccurred())
_, err = niReconciler.DelNI(ctx, ni5UUID.UUID)
t.Expect(err).ToNot(HaveOccurred())

t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIptables: false"))
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIp6tables: false"))

// Revert back to flow logging being disabled.
ni2Config.EnableFlowlog = false
}

// This test uses it own network config, not the config globally defined.
/*
+-------------------+
Expand Down
Loading

0 comments on commit 5b4730f

Please sign in to comment.