From a6d597a81193296a862f1de65650390376507386 Mon Sep 17 00:00:00 2001 From: anamehra <54692434+anamehra@users.noreply.github.com> Date: Wed, 29 Mar 2023 09:53:32 -0700 Subject: [PATCH] chassis-packet: resolve the missing static routes (#14230) arp_update should resolve the missing arp/ndp static route entries. Added code to check for missing entries and try ping to resolve the missing entry. Why I did it Fixes #14179 chassis-packet: missing arp entries for static routes causing high orchagent cpu usage It is observed that some sonic-mgmt test case calls sonic-clear arp, which clears the static arp entries as well. Orchagent or arp_update process does not try to resolve the missing arp entries after clear. How I did it arp_update should resolve the missing arp/ndp static route entries. Added code to check for missing entries and try ping if any found to resolve it. How to verify it After boot or config reload, check ipv4 and ipv4 neigh entries to make sure all static route entries are present manual validation: Use sonic-clear arp and sonic-clear ndp to clear all neighbor entries run arp_update Check for neigh entries. All entries should be present. Signed-off-by: anamehra --- files/build_templates/arp_update_vars.j2 | 3 ++- files/scripts/arp_update | 27 +++++++++++++++++------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/files/build_templates/arp_update_vars.j2 b/files/build_templates/arp_update_vars.j2 index 91992e781ac0..9f4ba4a42050 100644 --- a/files/build_templates/arp_update_vars.j2 +++ b/files/build_templates/arp_update_vars.j2 @@ -4,5 +4,6 @@ "pc_interface" : "{% for (name, prefix) in PORTCHANNEL_INTERFACE|pfx_filter %}{% if prefix|ipv6 %}{{ name }} {% endif %}{% endfor %}", "vlan_sub_interface": "{% for (name, prefix) in VLAN_SUB_INTERFACE|pfx_filter %}{% if prefix|ipv6 %}{{ name }} {% endif %}{% endfor %}", "vlan" : "{% if VLAN %}{{ VLAN.keys() | join(' ') }}{% endif %}", - "static_route_nexthops": "{% if STATIC_ROUTE %}{% for static_route_prefix, static_route_attr in STATIC_ROUTE.items() %}{%- if static_route_prefix -%}{{ static_route_attr['nexthop'].split(',') | join(' ') | lower + " " }}{%- endif -%}{% endfor %}{% endif %}" + "static_route_nexthops": "{% if STATIC_ROUTE %}{% for static_route_prefix, static_route_attr in STATIC_ROUTE.items() %}{%- if static_route_prefix -%}{{ static_route_attr['nexthop'].split(',') | join(' ') | lower + " " }}{%- endif -%}{% endfor %}{% endif %}", + "static_route_ifnames": "{% if STATIC_ROUTE %}{% for static_route_prefix, static_route_attr in STATIC_ROUTE.items() %}{%- if static_route_prefix -%}{{ static_route_attr['ifname'].split(',') | join(' ') + " " }}{%- endif -%}{% endfor %}{% endif %}" } diff --git a/files/scripts/arp_update b/files/scripts/arp_update index b686e1a93e24..2725f034668b 100755 --- a/files/scripts/arp_update +++ b/files/scripts/arp_update @@ -14,13 +14,16 @@ while /bin/true; do ARP_UPDATE_VARS=$(sonic-cfggen -d -t ${ARP_UPDATE_VARS_FILE}) SWITCH_TYPE=$(echo $ARP_UPDATE_VARS | jq -r '.switch_type') if [[ "$SWITCH_TYPE" == "chassis-packet" ]]; then - STATIC_ROUTE_NEXTHOPS=$(echo $ARP_UPDATE_VARS | jq -r '.static_route_nexthops') + # Get array of Nexthops and ifnames. Nexthops and ifnames are mapped one to one + STATIC_ROUTE_NEXTHOPS=($(echo $ARP_UPDATE_VARS | jq -r '.static_route_nexthops')) + STATIC_ROUTE_IFNAMES=($(echo $ARP_UPDATE_VARS | jq -r '.static_route_ifnames')) # on supervisor/rp exit the script gracefully - if [[ -z "$STATIC_ROUTE_NEXTHOPS" ]]; then + if [[ -z "$STATIC_ROUTE_NEXTHOPS" ]] || [[ -z "$STATIC_ROUTE_IFNAMES" ]]; then logger "arp_update: exiting as no static route in packet based chassis" exit 0 fi - for nexthop in $STATIC_ROUTE_NEXTHOPS; do + for i in ${!STATIC_ROUTE_NEXTHOPS[@]}; do + nexthop="${STATIC_ROUTE_NEXTHOPS[i]}" if [[ $nexthop == *"."* ]]; then neigh_state=( $(ip -4 neigh show | grep -w $nexthop | tr -s ' ' | cut -d ' ' -f 3,4) ) ping_prefix=ping @@ -28,11 +31,19 @@ while /bin/true; do neigh_state=( $(ip -6 neigh show | grep -w $nexthop | tr -s ' ' | cut -d ' ' -f 3,4) ) ping_prefix=ping6 fi - - if [[ "${neigh_state[1]}" == "INCOMPLETE" ]] || [[ "${neigh_state[1]}" == "FAILED" ]]; then - pingcmd="timeout 0.2 $ping_prefix -I ${neigh_state[0]} -n -q -i 0 -c 1 -W 1 $nexthop >/dev/null" - eval $pingcmd - logger "arp_update: sttaic route nexthop not resolved, pinging $nexthop on ${neigh_state[0]}" + if [[ -z "${neigh_state}" ]] || [[ "${neigh_state[1]}" == "INCOMPLETE" ]] || [[ "${neigh_state[1]}" == "FAILED" ]]; then + interface="${STATIC_ROUTE_IFNAMES[i]}" + if [[ -z "$interface" ]]; then + # should never be here, handling just in case + logger "ERR: arp_update: missing interface entry for static route $nexthop" + interface=${neigh_state[0]} + fi + intf_up=$(ip link show $interface | grep "state UP") + if [[ -n "$intf_up" ]]; then + pingcmd="timeout 0.2 $ping_prefix -I ${interface} -n -q -i 0 -c 1 -W 1 $nexthop >/dev/null" + eval $pingcmd + logger "arp_update: static route nexthop not resolved, pinging $nexthop on ${neigh_state[0]}" + fi fi done