From ee02c26993262c96cc39d0f831f7589b10a44fd7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 11 Jun 2019 10:19:40 +0300 Subject: [PATCH 1/7] mlxsw: spectrum: Use different seeds for ECMP and LAG hash The same hash function and seed are used for both ECMP and LAG hash. Therefore, when a LAG device is used as a nexthop device as part of an ECMP group, hash polarization can occur and all the traffic will be hashed to a single LAG slave. Fix this by using a different seed for the LAG hash. Fixes: fa73989f2697 ("mlxsw: spectrum: Use a stable ECMP/LAG seed") Signed-off-by: Ido Schimmel Reported-by: Alex Veber Tested-by: Alex Veber Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index dfe6b44baf635..23204356ad888 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4280,13 +4280,16 @@ static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) } } +#define MLXSW_SP_LAG_SEED_INIT 0xcafecafe + static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) { char slcr_pl[MLXSW_REG_SLCR_LEN]; u32 seed; int err; - seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0); + seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), + MLXSW_SP_LAG_SEED_INIT); mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC | MLXSW_REG_SLCR_LAG_HASH_DMAC | MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE | From 83d5782681cc12b3d485a83cb34c46b2445f510c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 11 Jun 2019 10:19:41 +0300 Subject: [PATCH 2/7] mlxsw: spectrum_router: Refresh nexthop neighbour when it becomes dead The driver tries to periodically refresh neighbours that are used to reach nexthops. This is done by periodically calling neigh_event_send(). However, if the neighbour becomes dead, there is nothing we can do to return it to a connected state and the above function call is basically a NOP. This results in the nexthop never being written to the device's adjacency table and therefore never used to forward packets. Fix this by dropping our reference from the dead neighbour and associating the nexthop with a new neigbhour which we will try to refresh. Fixes: a7ff87acd995 ("mlxsw: spectrum_router: Implement next-hop routing") Signed-off-by: Ido Schimmel Reported-by: Alex Veber Tested-by: Alex Veber Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 73 ++++++++++++++++++- 1 file changed, 70 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 1cda8a248b12e..ef554739dd54f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2363,7 +2363,7 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) static void mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, - bool removing); + bool removing, bool dead); static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding) { @@ -2507,7 +2507,8 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) memcpy(neigh_entry->ha, ha, ETH_ALEN); mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected); - mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected); + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected, + dead); if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); @@ -3472,13 +3473,79 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, nh->update = 1; } +static int +mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n, *old_n = neigh_entry->key.n; + struct mlxsw_sp_nexthop *nh; + bool entry_connected; + u8 nud_state, dead; + int err; + + nh = list_first_entry(&neigh_entry->nexthop_list, + struct mlxsw_sp_nexthop, neigh_list_node); + + n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev); + if (!n) { + n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr, + nh->rif->dev); + if (IS_ERR(n)) + return PTR_ERR(n); + neigh_event_send(n, NULL); + } + + mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); + neigh_entry->key.n = n; + err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + if (err) + goto err_neigh_entry_insert; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + entry_connected = nud_state & NUD_VALID && !dead; + + list_for_each_entry(nh, &neigh_entry->nexthop_list, + neigh_list_node) { + neigh_release(old_n); + neigh_clone(n); + __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + } + + neigh_release(n); + + return 0; + +err_neigh_entry_insert: + neigh_entry->key.n = old_n; + mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + neigh_release(n); + return err; +} + static void mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, - bool removing) + bool removing, bool dead) { struct mlxsw_sp_nexthop *nh; + if (list_empty(&neigh_entry->nexthop_list)) + return; + + if (dead) { + int err; + + err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp, + neigh_entry); + if (err) + dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n"); + return; + } + list_for_each_entry(nh, &neigh_entry->nexthop_list, neigh_list_node) { __mlxsw_sp_nexthop_neigh_update(nh, removing); From 45a69b70f54854a043530cc0f86fc30d77bc9620 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 11 Jun 2019 10:19:42 +0300 Subject: [PATCH 3/7] selftests: mlxsw: Test nexthop offload indication Test that IPv4 and IPv6 nexthops are correctly marked with offload indication in response to neighbour events. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/rtnetlink.sh | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index 1c30f302a1e75..5c39e5f6a4800 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -28,6 +28,7 @@ ALL_TESTS=" vlan_interface_uppers_test bridge_extern_learn_test neigh_offload_test + nexthop_offload_test devlink_reload_test " NUM_NETIFS=2 @@ -607,6 +608,52 @@ neigh_offload_test() ip -4 address del 192.0.2.1/24 dev $swp1 } +nexthop_offload_test() +{ + # Test that IPv4 and IPv6 nexthops are marked as offloaded + RET=0 + + sysctl_set net.ipv6.conf.$swp2.keep_addr_on_down 1 + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 192.0.2.2/24 2001:db8:1::2/64 + setup_wait + + ip -4 route add 198.51.100.0/24 vrf v$swp1 \ + nexthop via 192.0.2.2 dev $swp1 + ip -6 route add 2001:db8:2::/64 vrf v$swp1 \ + nexthop via 2001:db8:1::2 dev $swp1 + + ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload + check_err $? "ipv4 nexthop not marked as offloaded when should" + ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload + check_err $? "ipv6 nexthop not marked as offloaded when should" + + ip link set dev $swp2 down + sleep 1 + + ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload + check_fail $? "ipv4 nexthop marked as offloaded when should not" + ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload + check_fail $? "ipv6 nexthop marked as offloaded when should not" + + ip link set dev $swp2 up + setup_wait + + ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload + check_err $? "ipv4 nexthop not marked as offloaded after neigh add" + ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload + check_err $? "ipv6 nexthop not marked as offloaded after neigh add" + + log_test "nexthop offload indication" + + ip -6 route del 2001:db8:2::/64 vrf v$swp1 + ip -4 route del 198.51.100.0/24 vrf v$swp1 + + simple_if_fini $swp2 192.0.2.2/24 2001:db8:1::2/64 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 + sysctl_restore net.ipv6.conf.$swp2.keep_addr_on_down +} + devlink_reload_test() { # Test that after executing all the above configuration tests, a From e49f9adffb28c5acbaea46ddfe2d17b1373c2473 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 11 Jun 2019 10:19:43 +0300 Subject: [PATCH 4/7] mlxsw: spectrum_flower: Fix TOS matching The TOS value was not extracted correctly. Fix it. Fixes: 87996f91f739 ("mlxsw: spectrum_flower: Add support for ip tos") Reported-by: Alexander Petrovskiy Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 15f804453cd61..96b23c856f4de 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -247,8 +247,8 @@ static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp, match.mask->tos & 0x3); mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP, - match.key->tos >> 6, - match.mask->tos >> 6); + match.key->tos >> 2, + match.mask->tos >> 2); return 0; } From 0b0c0098348f61e864d85269614bce15f6bbfde7 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 11 Jun 2019 10:19:44 +0300 Subject: [PATCH 5/7] selftests: tc_flower: Add TOS matching test Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/tc_flower.sh | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index 29bcfa84aec71..124803eea4a9d 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -2,7 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ - match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test" + match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test \ + match_ip_tos_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -276,6 +277,39 @@ match_vlan_test() log_test "VLAN match ($tcflags)" } +match_ip_tos_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 ip_tos 0x20 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 ip_tos 0x18 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip tos=18 -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter (0x18)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter (0x18)" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip tos=20 -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_fail $? "Matched on a wrong filter (0x20)" + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct filter (0x20)" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + log_test "ip_tos match ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} From e891ce1dd2a54c41a3b70d822cd832735c03b892 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 11 Jun 2019 10:19:45 +0300 Subject: [PATCH 6/7] mlxsw: spectrum_buffers: Reduce pool size on Spectrum-2 Due to an issue on Spectrum-2, in front-panel ports split four ways, 2 out of 32 port buffers cannot be used. To work around this, the next FW release will mark them as unused, and will report correspondingly lower total shared buffer size. mlxsw will pick up the new value through a query to cap_total_buffer_size resource. However the initial size for shared buffer pool 0 is hard-coded and therefore needs to be updated. Thus reduce the pool size by 2.7 MiB (which corresponds to 2/32 of the total size of 42 MiB), and round down to the whole number of cells. Fixes: fe099bf682ab ("mlxsw: spectrum_buffers: Add Spectrum-2 shared buffer configuration") Signed-off-by: Petr Machata Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 8512dd49e4201..1537f70bc26d0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -437,8 +437,8 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp1_sb_prs[] = { MLXSW_SP1_SB_PR_CPU_SIZE, true, false), }; -#define MLXSW_SP2_SB_PR_INGRESS_SIZE 40960000 -#define MLXSW_SP2_SB_PR_EGRESS_SIZE 40960000 +#define MLXSW_SP2_SB_PR_INGRESS_SIZE 38128752 +#define MLXSW_SP2_SB_PR_EGRESS_SIZE 38128752 #define MLXSW_SP2_SB_PR_CPU_SIZE (256 * 1000) /* Order according to mlxsw_sp2_sb_pool_dess */ From 4b14cc313f076c37b646cee06a85f0db59cf216c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 11 Jun 2019 10:19:46 +0300 Subject: [PATCH 7/7] mlxsw: spectrum: Disallow prio-tagged packets when PVID is removed When PVID is removed from a bridge port, the Linux bridge drops both untagged and prio-tagged packets. Align mlxsw with this behavior. Fixes: 148f472da5db ("mlxsw: reg: Add the Switch Port Acceptable Frame Types register") Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index e8002bfc1e8f1..7ed63ed657c71 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -997,7 +997,7 @@ static inline void mlxsw_reg_spaft_pack(char *payload, u8 local_port, MLXSW_REG_ZERO(spaft, payload); mlxsw_reg_spaft_local_port_set(payload, local_port); mlxsw_reg_spaft_allow_untagged_set(payload, allow_untagged); - mlxsw_reg_spaft_allow_prio_tagged_set(payload, true); + mlxsw_reg_spaft_allow_prio_tagged_set(payload, allow_untagged); mlxsw_reg_spaft_allow_tagged_set(payload, true); }