Skip to content

Commit

Permalink
Disable iptables for L2-only traffic when everything is allowed by ACLs
Browse files Browse the repository at this point in the history
Avoid using iptables for L2-forwaded traffic if ACLs allow everything
(without rate limiting) and flow logging is disabled. Otherwise,
unnecessary packet processing steps are added which reduce network
performance significantly.

In NFV use cases, filtering and flow logging are typically handled
by applications (VNFs), so EVE should only focus on providing efficient
virtual wiring.

Signed-off-by: Milan Lenco <milan@zededa.com>
  • Loading branch information
milan-zededa authored and OhmSpectator committed Nov 18, 2024
1 parent 2db86b8 commit 38ab4b7
Show file tree
Hide file tree
Showing 6 changed files with 358 additions and 54 deletions.
4 changes: 0 additions & 4 deletions pkg/dom0-ztools/rootfs/etc/sysctl.d/02-eve.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@
# zedrouter settings
net.ipv4.ip_forward = 1
net.ipv6.conf.all.forwarding = 1
# We use ip6tables for the bridge
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-arptables = 1
# The following differs from default linuxkit/alpine of 1
net.ipv4.conf.all.rp_filter = 2
net.netfilter.nf_conntrack_acct = 1
Expand Down
53 changes: 53 additions & 0 deletions pkg/pillar/nireconciler/linux_acl.go
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,59 @@ func (r *LinuxNIReconciler) withFlowlog() bool {
return false
}

func (r *LinuxNIReconciler) doACLsAllowAllTraffic() (
allIpv4TrafficAllowed, allIpv6TrafficAllowed bool) {
allIpv4TrafficAllowed = true
allIpv6TrafficAllowed = true
for _, app := range r.apps {
if app.deleted {
continue
}
for _, adapter := range app.config.AppNetAdapterList {
var hasAllowAllRuleIPv4, hasAllowAllRuleIPv6 bool
for _, ace := range adapter.ACLs {
// Default action (if not specified) is to allow traffic to continue.
allowRule := true
for _, action := range ace.Actions {
if action.Drop || action.Limit {
// Let's keep this simple and not try to distinguish
// between IPv4 and IPv6 DROP/LIMIT rules.
allIpv4TrafficAllowed = false
allIpv6TrafficAllowed = false
return
}
if action.PortMap {
allowRule = false
}
}
if allowRule && len(ace.Matches) == 1 && ace.Matches[0].Type == "ip" {
if _, subnet, err := net.ParseCIDR(ace.Matches[0].Value); err == nil {
ones, bits := subnet.Mask.Size()
if ones == 0 && subnet.IP.IsUnspecified() {
if bits == net.IPv4len*8 {
hasAllowAllRuleIPv4 = true
}
if bits == net.IPv6len*8 {
hasAllowAllRuleIPv6 = true
}
}
}
}
}
if !hasAllowAllRuleIPv4 {
allIpv4TrafficAllowed = false
}
if !hasAllowAllRuleIPv6 {
allIpv6TrafficAllowed = false
}
if !allIpv4TrafficAllowed && !allIpv6TrafficAllowed {
return
}
}
}
return
}

func (r *LinuxNIReconciler) getIntendedL2FwdChain() dg.Graph {
graphArgs := dg.InitArgs{
Name: ACLChainL2FwdSG,
Expand Down
35 changes: 32 additions & 3 deletions pkg/pillar/nireconciler/linux_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,17 @@ import (
// | +------------------------------------------------------------+ |
// | | Global | |
// | | | |
// | | +------------------------------+ | |
// | | | Sysctl | | |
// | | | (enable iptables for bridge) | | |
// | | +------------------------------+ | |
// | | | |
// | | +--------------------------+ +---------------------+ | |
// | | | Ports | | IPSets | | |
// | | | | | | | |
// | | | +--------------+ | | +---------+ | | |
// | | | | Port | ... | | | IPSet | ... | | |
// | | | | (external) | ... | | +---------+ | | |
// | | | | (external) | | | +---------+ | | |
// | | | +--------------+ | | | | |
// | | +--------------------------+ +---------------------+ | |
// | | | |
Expand Down Expand Up @@ -326,6 +331,7 @@ func (r *LinuxNIReconciler) getIntendedGlobalState() dg.Graph {
Description: "Global configuration",
}
intendedCfg := dg.New(graphArgs)
intendedCfg.PutItem(r.getIntendedHostSysctl(), nil)
intendedCfg.PutSubGraph(r.getIntendedPorts())
intendedCfg.PutSubGraph(r.getIntendedGlobalIPSets())
if r.withFlowlog() {
Expand All @@ -336,6 +342,27 @@ func (r *LinuxNIReconciler) getIntendedGlobalState() dg.Graph {
return intendedCfg
}

func (r *LinuxNIReconciler) getIntendedHostSysctl() linux.Sysctl {
var bridgeCallIptables, bridgeCallIp6tables bool
if r.withFlowlog() {
// Enforce the use of iptables in order to get conntrack entry
// created for every flow, which is then used to obtain flow metadata.
bridgeCallIptables = true
bridgeCallIp6tables = true
return linux.Sysctl{
BridgeCallIptables: &bridgeCallIptables,
BridgeCallIp6tables: &bridgeCallIp6tables,
}
}
allIpv4TrafficAllowed, allIpv6TrafficAllowed := r.doACLsAllowAllTraffic()
bridgeCallIptables = !allIpv4TrafficAllowed
bridgeCallIp6tables = !allIpv6TrafficAllowed
return linux.Sysctl{
BridgeCallIptables: &bridgeCallIptables,
BridgeCallIp6tables: &bridgeCallIp6tables,
}
}

func (r *LinuxNIReconciler) getIntendedPorts() dg.Graph {
graphArgs := dg.InitArgs{
Name: PortsSG,
Expand Down Expand Up @@ -1249,11 +1276,13 @@ func (r *LinuxNIReconciler) getIntendedAppConnCfg(niID uuid.UUID,
IfName: vif.PodVIF.GuestIfName,
ItemRef: dg.Reference(linux.VIF{HostIfName: vif.hostIfName}),
}
enableDAD := false
enableARPNotify := true
intendedAppConnCfg.PutItem(linux.Sysctl{
ForApp: itemForApp,
NetIf: appVifRef,
EnableDAD: false,
EnableARPNotify: true,
EnableDAD: &enableDAD,
EnableARPNotify: &enableARPNotify,
}, nil)
// Gateways not covered by IP subnets should be routed explicitly
// using connected routes.
Expand Down
9 changes: 6 additions & 3 deletions pkg/pillar/nireconciler/linux_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -961,7 +961,8 @@ func (r *LinuxNIReconciler) AddAppConn(ctx context.Context,
}
r.apps[appID] = appInfo
reconcileReason := fmt.Sprintf("connecting new app (%v)", appID)
// Rebuild and reconcile also global config to update the set of intended IPSets.
// Rebuild and reconcile also global config to update the set of intended IPSets
// and sysctl settings.
r.scheduleGlobalCfgRebuild(reconcileReason)
// Rebuild and reconcile config of every NI that this app is trying to connect into.
for _, vif := range vifs {
Expand Down Expand Up @@ -1006,7 +1007,8 @@ func (r *LinuxNIReconciler) UpdateAppConn(ctx context.Context,
}
r.apps[appID] = appInfo
reconcileReason := fmt.Sprintf("updating app connectivity (%v)", appID)
// Rebuild and reconcile also global config to update the set of intended IPSets.
// Rebuild and reconcile also global config to update the set of intended IPSets
// and sysctl settings.
r.scheduleGlobalCfgRebuild(reconcileReason)
// Reconcile every NI that this app is disconnecting from, connecting to,
// or just updating its connection parameters with.
Expand Down Expand Up @@ -1039,7 +1041,8 @@ func (r *LinuxNIReconciler) DelAppConn(ctx context.Context,
// Deleted from the map when removal is completed successfully (incl. async ops).
r.apps[appID].deleted = true
reconcileReason := fmt.Sprintf("disconnecting app (%v)", appID)
// Rebuild and reconcile also global config to update the set of intended IPSets.
// Rebuild and reconcile also global config to update the set of intended IPSets
// and sysctl settings.
r.scheduleGlobalCfgRebuild(reconcileReason)
// Reconcile every NI that this app is trying to disconnect from.
for _, vif := range r.apps[appID].vifs {
Expand Down
130 changes: 130 additions & 0 deletions pkg/pillar/nireconciler/linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,20 @@ var (
},
RuleID: 1,
},
{
Matches: []types.ACEMatch{
{
Type: "ip",
Value: "0.0.0.0/0",
},
},
Actions: []types.ACEAction{
{
Drop: false,
},
},
RuleID: 2,
},
},
},
// Put two VIFs into the same switch NI (just to cover this scenario).
Expand Down Expand Up @@ -2974,6 +2988,122 @@ func TestCNI(test *testing.T) {
t.Expect(err).ToNot(HaveOccurred())
}

func TestHostSysctl(test *testing.T) {
t := initTest(test, false)
networkMonitor.AddOrUpdateInterface(eth0)
networkMonitor.AddOrUpdateInterface(keth0)
networkMonitor.AddOrUpdateInterface(eth1)
networkMonitor.AddOrUpdateInterface(keth1)
networkMonitor.AddOrUpdateInterface(eth3)
networkMonitor.AddOrUpdateInterface(keth3)
var routes []netmonitor.Route
routes = append(routes, eth0Routes...)
routes = append(routes, eth1Routes...)
routes = append(routes, eth3Routes...)
networkMonitor.UpdateRoutes(routes)
ctx := reconciler.MockRun(context.Background())
updatesCh := niReconciler.WatchReconcilerUpdates()
niReconciler.RunInitialReconcile(ctx)

// Create 2 local and 1 switch network instance.
var recUpdate nirec.ReconcilerUpdate
networkMonitor.AddOrUpdateInterface(ni1BridgeIf)
niStatus, err := niReconciler.AddNI(ctx, ni1Config, ni1Bridge)
t.Expect(err).ToNot(HaveOccurred())
t.Eventually(updatesCh).Should(Receive(&recUpdate))
t.Expect(recUpdate.UpdateType).To(Equal(nirec.NIReconcileStatusChanged))
t.Expect(recUpdate.NIStatus.Equal(niStatus)).To(BeTrue())

niStatus, err = niReconciler.AddNI(ctx, ni2Config, ni2Bridge)
t.Expect(err).ToNot(HaveOccurred())
t.Eventually(updatesCh).Should(Receive(&recUpdate))
t.Expect(recUpdate.UpdateType).To(Equal(nirec.NIReconcileStatusChanged))
t.Expect(recUpdate.NIStatus.Equal(niStatus)).To(BeTrue())

networkMonitor.AddOrUpdateInterface(ni5BridgeIf)
niStatus, err = niReconciler.AddNI(ctx, ni5Config, ni5Bridge)
t.Expect(err).ToNot(HaveOccurred())
t.Eventually(updatesCh).Should(Receive(&recUpdate))
t.Expect(recUpdate.UpdateType).To(Equal(nirec.NIReconcileStatusChanged))
t.Expect(recUpdate.NIStatus.Equal(niStatus)).To(BeTrue())

// Flow logging is disabled in all network instances and there are
// no apps deployed hence bridges do not need iptables.
hostSysctlRef := dg.Reference(linuxitems.Sysctl{})
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIptables: false"))
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIp6tables: false"))

// Connection application with allow-all for IPv4 + portmap rules.
// This does not require to trigger iptables from bridge, but ip6tables
// should be triggered (still we get performance improvement at least for IPv4).
appStatus, err := niReconciler.AddAppConn(ctx, app2NetConfig, app2Num, cnirpc.AppPod{}, app2VIFs)
t.Expect(err).ToNot(HaveOccurred())
t.Eventually(updatesCh).Should(Receive(&recUpdate))
t.Expect(recUpdate.UpdateType).To(Equal(nirec.AppConnReconcileStatusChanged))
t.Expect(recUpdate.AppConnStatus.Equal(appStatus)).To(BeTrue())

t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIptables: false"))
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIp6tables: true"))

// Connect application with a LIMIT rule.
appStatus, err = niReconciler.AddAppConn(ctx, app1NetConfig, app1Num, cnirpc.AppPod{}, app1VIFs)
t.Expect(err).ToNot(HaveOccurred())
t.Eventually(updatesCh).Should(Receive(&recUpdate))
t.Expect(recUpdate.UpdateType).To(Equal(nirec.AppConnReconcileStatusChanged))
t.Expect(recUpdate.AppConnStatus.Equal(appStatus)).To(BeTrue())

// With LIMIT rule present, iptables should be called from the bridge.
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIptables: true"))
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIp6tables: true"))

// Remove app with the LIMIT rule. For IPv4 bridge should not call iptables anymore.
appStatus, err = niReconciler.DelAppConn(ctx, app1UUID.UUID)
t.Expect(err).ToNot(HaveOccurred())
t.Eventually(updatesCh).Should(Receive(&recUpdate))
t.Expect(recUpdate.UpdateType).To(Equal(nirec.AppConnReconcileStatusChanged))
t.Expect(recUpdate.AppConnStatus.Equal(appStatus)).To(BeTrue())

t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIptables: false"))
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIp6tables: true"))

// Enable flow logging in one of the network instances.
// This enforces the use of iptables from bridge for both IPv4 and IPv6.
ni2Config.EnableFlowlog = true
_, err = niReconciler.UpdateNI(ctx, ni2Config, ni2Bridge)
t.Expect(err).ToNot(HaveOccurred())

t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIptables: true"))
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIp6tables: true"))

// Delete everything.
_, err = niReconciler.DelAppConn(ctx, app2UUID.UUID)
t.Expect(err).ToNot(HaveOccurred())
_, err = niReconciler.DelNI(ctx, ni1UUID.UUID)
t.Expect(err).ToNot(HaveOccurred())
_, err = niReconciler.DelNI(ctx, ni2UUID.UUID)
t.Expect(err).ToNot(HaveOccurred())
_, err = niReconciler.DelNI(ctx, ni5UUID.UUID)
t.Expect(err).ToNot(HaveOccurred())

t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIptables: false"))
t.Expect(itemDescription(hostSysctlRef)).To(
ContainSubstring("bridgeCallIp6tables: false"))

// Revert back to flow logging being disabled.
ni2Config.EnableFlowlog = false
}

// This test uses it own network config, not the config globally defined.
/*
+-------------------+
Expand Down
Loading

0 comments on commit 38ab4b7

Please sign in to comment.