Skip to content

Commit

Permalink
Promote feature CleanupStaleUDPSvcConntrack from Alpha to Beta
Browse files Browse the repository at this point in the history
In antrea-io#5112, due to the limitations of the Go netlink library, AntreaProxy
would unconditionally delete conntrack entries added by kube-proxy in
conntrack zone 0. AntreaProxy was supposed to only delete its own entries
in conntrack zones 65520 or 65521. To address this, a feature was added
to isolate the relevant code.

After the merge of antrea-io#6193, the netlink library was updated, allowing
AntreaProxy to precisely delete conntrack entries in zones 65520 or 65521.
It is now safe to enable the corresponding code by default.

Signed-off-by: Hongliang Liu <lhongliang@vmware.com>
  • Loading branch information
hongliangl committed May 29, 2024
1 parent df1655c commit 24a3f0c
Show file tree
Hide file tree
Showing 11 changed files with 590 additions and 372 deletions.
2 changes: 1 addition & 1 deletion build/charts/antrea/conf/antrea-agent.conf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ featureGates:

# Enable support for cleaning up stale UDP Service conntrack connections in AntreaProxy. This requires AntreaProxy to
# be enabled, otherwise this flag will not take effect.
{{- include "featureGate" (dict "featureGates" .Values.featureGates "name" "CleanupStaleUDPSvcConntrack" "default" false) }}
{{- include "featureGate" (dict "featureGates" .Values.featureGates "name" "CleanupStaleUDPSvcConntrack" "default" true) }}

# Enable traceflow which provides packet tracing feature to diagnose network issue.
{{- include "featureGate" (dict "featureGates" .Values.featureGates "name" "Traceflow" "default" true) }}
Expand Down
6 changes: 3 additions & 3 deletions build/yamls/antrea-aks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3554,7 +3554,7 @@ data:
# Enable support for cleaning up stale UDP Service conntrack connections in AntreaProxy. This requires AntreaProxy to
# be enabled, otherwise this flag will not take effect.
# CleanupStaleUDPSvcConntrack: false
# CleanupStaleUDPSvcConntrack: true
# Enable traceflow which provides packet tracing feature to diagnose network issue.
# Traceflow: true
Expand Down Expand Up @@ -4920,7 +4920,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 30843b57762c91dfcffb560917191e3bc7e662c06552759bac2a173bc060b82c
checksum/config: 65986906c1bc568c9c397f6b5c14b672200ca6170f13d0ef9a984f8a9aedb006
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -5158,7 +5158,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 30843b57762c91dfcffb560917191e3bc7e662c06552759bac2a173bc060b82c
checksum/config: 65986906c1bc568c9c397f6b5c14b672200ca6170f13d0ef9a984f8a9aedb006
labels:
app: antrea
component: antrea-controller
Expand Down
6 changes: 3 additions & 3 deletions build/yamls/antrea-eks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3554,7 +3554,7 @@ data:
# Enable support for cleaning up stale UDP Service conntrack connections in AntreaProxy. This requires AntreaProxy to
# be enabled, otherwise this flag will not take effect.
# CleanupStaleUDPSvcConntrack: false
# CleanupStaleUDPSvcConntrack: true
# Enable traceflow which provides packet tracing feature to diagnose network issue.
# Traceflow: true
Expand Down Expand Up @@ -4920,7 +4920,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 30843b57762c91dfcffb560917191e3bc7e662c06552759bac2a173bc060b82c
checksum/config: 65986906c1bc568c9c397f6b5c14b672200ca6170f13d0ef9a984f8a9aedb006
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -5159,7 +5159,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 30843b57762c91dfcffb560917191e3bc7e662c06552759bac2a173bc060b82c
checksum/config: 65986906c1bc568c9c397f6b5c14b672200ca6170f13d0ef9a984f8a9aedb006
labels:
app: antrea
component: antrea-controller
Expand Down
6 changes: 3 additions & 3 deletions build/yamls/antrea-gke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3554,7 +3554,7 @@ data:
# Enable support for cleaning up stale UDP Service conntrack connections in AntreaProxy. This requires AntreaProxy to
# be enabled, otherwise this flag will not take effect.
# CleanupStaleUDPSvcConntrack: false
# CleanupStaleUDPSvcConntrack: true
# Enable traceflow which provides packet tracing feature to diagnose network issue.
# Traceflow: true
Expand Down Expand Up @@ -4920,7 +4920,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: d5cdb5356795c44a69c66fad1b4d67f7c00cdcbe837f3b3b50260e4d9dfd1e7e
checksum/config: a65260de329b786d7f83f3e2f02bd291e86b09001a968915d3395f827955a0d0
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -5156,7 +5156,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: d5cdb5356795c44a69c66fad1b4d67f7c00cdcbe837f3b3b50260e4d9dfd1e7e
checksum/config: a65260de329b786d7f83f3e2f02bd291e86b09001a968915d3395f827955a0d0
labels:
app: antrea
component: antrea-controller
Expand Down
6 changes: 3 additions & 3 deletions build/yamls/antrea-ipsec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3567,7 +3567,7 @@ data:
# Enable support for cleaning up stale UDP Service conntrack connections in AntreaProxy. This requires AntreaProxy to
# be enabled, otherwise this flag will not take effect.
# CleanupStaleUDPSvcConntrack: false
# CleanupStaleUDPSvcConntrack: true
# Enable traceflow which provides packet tracing feature to diagnose network issue.
# Traceflow: true
Expand Down Expand Up @@ -4933,7 +4933,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 50f2864cf09e4732327b963130bd59a9fc06c560784b161c94e813c000367615
checksum/config: 778ab76bb11f42d9ce9bc0cecd3fd341b6652cbe09564a4c46b3c7b39981b852
checksum/ipsec-secret: d0eb9c52d0cd4311b6d252a951126bf9bea27ec05590bed8a394f0f792dcb2a4
labels:
app: antrea
Expand Down Expand Up @@ -5215,7 +5215,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 50f2864cf09e4732327b963130bd59a9fc06c560784b161c94e813c000367615
checksum/config: 778ab76bb11f42d9ce9bc0cecd3fd341b6652cbe09564a4c46b3c7b39981b852
labels:
app: antrea
component: antrea-controller
Expand Down
6 changes: 3 additions & 3 deletions build/yamls/antrea.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3554,7 +3554,7 @@ data:
# Enable support for cleaning up stale UDP Service conntrack connections in AntreaProxy. This requires AntreaProxy to
# be enabled, otherwise this flag will not take effect.
# CleanupStaleUDPSvcConntrack: false
# CleanupStaleUDPSvcConntrack: true
# Enable traceflow which provides packet tracing feature to diagnose network issue.
# Traceflow: true
Expand Down Expand Up @@ -4920,7 +4920,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: ac3c14eed7ca0dc28bf2d659cd2c4e4a39d55278fb9a8759c30ea12eff89e518
checksum/config: 57cac567ab34af8bff4cad2f86d6560ca57bf158b3e2c2e7025ca0c65a9d77d0
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -5156,7 +5156,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: ac3c14eed7ca0dc28bf2d659cd2c4e4a39d55278fb9a8759c30ea12eff89e518
checksum/config: 57cac567ab34af8bff4cad2f86d6560ca57bf158b3e2c2e7025ca0c65a9d77d0
labels:
app: antrea
component: antrea-controller
Expand Down
2 changes: 1 addition & 1 deletion docs/feature-gates.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ edit the Agent configuration in the
| `AntreaProxy` | Agent | `true` | GA | v0.8 | v0.11 | v1.14 | Yes | Must be enabled for Windows. |
| `EndpointSlice` | Agent | `true` | GA | v0.13.0 | v1.11 | v1.14 | Yes | |
| `TopologyAwareHints` | Agent | `true` | Beta | v1.8 | v1.12 | N/A | Yes | |
| `CleanupStaleUDPSvcConntrack` | Agent | `false` | Alpha | v1.13 | N/A | N/A | Yes | |
| `CleanupStaleUDPSvcConntrack` | Agent | `true` | Beta | v1.13 | v2.1 | N/A | Yes | |
| `LoadBalancerModeDSR` | Agent | `false` | Alpha | v1.13 | N/A | N/A | Yes | |
| `AntreaPolicy` | Agent + Controller | `true` | Beta | v0.8 | v1.0 | N/A | No | Agent side config required from v0.9.0+. |
| `Traceflow` | Agent + Controller | `true` | Beta | v0.8 | v0.11 | N/A | Yes | |
Expand Down
20 changes: 19 additions & 1 deletion pkg/agent/proxy/proxier.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,10 @@ func (p *proxier) removeStaleServiceConntrackEntries(svcPortName k8sproxy.Servic
svcPort := uint16(svcInfo.Port())
nodePort := uint16(svcInfo.NodePort())
svcProto := svcInfo.OFProtocol
virutalNodePortDNATIP := agentconfig.VirtualNodePortDNATIPv4
if p.isIPv6 {
virutalNodePortDNATIP = agentconfig.VirtualNodePortDNATIPv6
}

svcIPToPort := make(map[string]uint16)
svcIPToPort[svcInfo.ClusterIP().String()] = svcPort
Expand All @@ -354,6 +358,7 @@ func (p *proxier) removeStaleServiceConntrackEntries(svcPortName k8sproxy.Servic
for _, nodeIP := range p.nodePortAddresses {
svcIPToPort[nodeIP.String()] = nodePort
}
svcIPToPort[virutalNodePortDNATIP.String()] = nodePort
}

for svcIPStr, port := range svcIPToPort {
Expand All @@ -376,6 +381,10 @@ func (p *proxier) removeStaleConntrackEntries(svcPortName k8sproxy.ServicePortNa
externalIPStrings := svcInfo.ExternalIPStrings()
pLoadBalancerIPStrings := pSvcInfo.LoadBalancerIPStrings()
loadBalancerIPStrings := svcInfo.LoadBalancerIPStrings()
virutalNodePortDNATIP := agentconfig.VirtualNodePortDNATIPv4
if p.isIPv6 {
virutalNodePortDNATIP = agentconfig.VirtualNodePortDNATIPv6
}
var svcPortChanged, svcNodePortChanged bool

staleSvcIPToPort := make(map[string]uint16)
Expand Down Expand Up @@ -404,11 +413,13 @@ func (p *proxier) removeStaleConntrackEntries(svcPortName k8sproxy.ServicePortNa
staleSvcIPToPort[ip] = pSvcPort
}
}
// If the NodePort of the Service is changed, delete the contrack entries related to the Node IPs and the Service nodePort.
// If the NodePort of the Service is changed, delete the contrack entries related to all the Node IPs, the virtual
// IP used to DNAT NodePort traffic from the external on Node, and the Service nodePort.
if pNodePort != nodePort {
for _, nodeIP := range p.nodePortAddresses {
staleSvcIPToPort[nodeIP.String()] = pNodePort
}
staleSvcIPToPort[virutalNodePortDNATIP.String()] = pNodePort
svcNodePortChanged = true
}
// Delete the conntrack entries due to the change of the Service.
Expand Down Expand Up @@ -744,6 +755,13 @@ func (p *proxier) installServices() {
pSvcInfo.ExternalPolicyLocal() != svcInfo.ExternalPolicyLocal() ||
pSvcInfo.InternalPolicyLocal() != svcInfo.InternalPolicyLocal()
if p.cleanupStaleUDPSvcConntrack && needClearConntrackEntries(pSvcInfo.OFProtocol) {
// We clean the UDP conntrack entries when Service IPs or ports are changed. However, we DO NOT clean
// the UDP conntrack entries related to remote Endpoints that are still referenced by the Service but
// are no longer selectable Endpoints for the corresponding Service IPs (for externalTrafficPolicy,
// these IPs are loadBalancerIPs, externalIPs and NodeIPs; for internalTrafficPolicy, these IPs
// clusterIPs) when externalTrafficPolicy or internalTrafficPolicy is changed from Cluster to Local.
// Consequently, the connections, which are supposed to select local Endpoints, will continue send
// packets to remote Endpoints due to the existing UDP conntrack entries until they timeout.
needCleanupStaleUDPServiceConntrack = svcInfo.Port() != pSvcInfo.Port() ||
svcInfo.ClusterIP().String() != pSvcInfo.ClusterIP().String() ||
needUpdateServiceExternalAddresses
Expand Down
Loading

0 comments on commit 24a3f0c

Please sign in to comment.