From df41c2cbd72dc240d906d81bebe5623086bef4c3 Mon Sep 17 00:00:00 2001 From: Thomas Cappleman Date: Fri, 9 Apr 2021 17:17:20 +0100 Subject: [PATCH] [orchagent] Add separate next hop table and orch **What I did** Added a new next hop group table to APP_DB, and orchagent support to use this as an alternative to including next hop information in the route table **Why I did it** Improves performance and occupancy usage when using the new table **How I verified it** Extended SWSS unit tests to cover new code, and ran existing tests Signed-off-by: Thomas Cappleman --- .gitignore | 4 + doc/swss-schema.md | 25 +- orchagent/Makefile.am | 1 + orchagent/fgnhgorch.cpp | 132 +-- orchagent/neighorch.cpp | 16 +- orchagent/neighorch.h | 4 +- orchagent/nexthopkey.h | 12 + orchagent/nhgorch.cpp | 1319 ++++++++++++++++++++++++++ orchagent/nhgorch.h | 242 +++++ orchagent/orchdaemon.cpp | 8 +- orchagent/orchdaemon.h | 1 + orchagent/routeorch.cpp | 1146 ++++++++++++++--------- orchagent/routeorch.h | 44 +- orchagent/saihelper.cpp | 13 +- tests/mock_tests/Makefile.am | 1 + tests/mock_tests/aclorch_ut.cpp | 2 +- tests/test_nhg.py | 1531 +++++++++++++++++++++++++++++-- 17 files changed, 3891 insertions(+), 610 deletions(-) create mode 100644 orchagent/nhgorch.cpp create mode 100644 orchagent/nhgorch.h diff --git a/.gitignore b/.gitignore index 439a0999f30..213cb489932 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,7 @@ tests/mock_tests/tests.trs tests/test-suite.log tests/tests.log tests/tests.trs + +# IDE files # +############# +.vscode diff --git a/doc/swss-schema.md b/doc/swss-schema.md index 7f25803a287..c26ec445df4 100644 --- a/doc/swss-schema.md +++ b/doc/swss-schema.md @@ -159,8 +159,29 @@ and reflects the LAG ports into the redis under: `LAG_TABLE::port` ;Status: Mandatory key = ROUTE_TABLE:prefix nexthop = *prefix, ;IP addresses separated “,” (empty indicates no gateway) - intf = ifindex? PORT_TABLE.key ; zero or more separated by “,” (zero indicates no interface) - blackhole = BIT ; Set to 1 if this route is a blackhole (or null0) + ifname = ifindex? PORT_TABLE.key ; zero or more separated by “,” (zero indicates no interface) + weight = weight_list ; List of weights. + nexthop_group = string ; index within the NEXT_HOP_GROUP_TABLE, used instead of nexthop and intf fields + +--------------------------------------------- + +###### LABEL_ROUTE_TABLE +; Defines schema for MPLS label route table attributes +key = LABEL_ROUTE_TABLE:mpls_label ; MPLS label +; field = value +nexthop = STRING ; Comma-separated list of nexthops. +ifname = STRING ; Comma-separated list of interfaces. +weight = STRING ; Comma-separated list of weights. +nexthop_group = string ; index within the NEXT_HOP_GROUP_TABLE, used instead of nexthop and intf fields + +--------------------------------------------- +### NEXT_HOP_GROUP_TABLE + ;Stores a list of groups of one or more next hops + ;Status: Mandatory + key = NEXT_HOP_GROUP_TABLE:string ; arbitrary index for the next hop group + nexthop = *prefix, ;IP addresses separated “,” (empty indicates no gateway) + ifname = ifindex? PORT_TABLE.key ; zero or more separated by “,” (zero indicates no interface) + weight = weight_list ; List of weights. --------------------------------------------- ### NEIGH_TABLE diff --git a/orchagent/Makefile.am b/orchagent/Makefile.am index d1240cf9af2..43005e6d1fd 100644 --- a/orchagent/Makefile.am +++ b/orchagent/Makefile.am @@ -31,6 +31,7 @@ orchagent_SOURCES = \ orchdaemon.cpp \ orch.cpp \ notifications.cpp \ + nhgorch.cpp \ routeorch.cpp \ neighorch.cpp \ intfsorch.cpp \ diff --git a/orchagent/fgnhgorch.cpp b/orchagent/fgnhgorch.cpp index 6ca37cad819..87167ad1fa5 100644 --- a/orchagent/fgnhgorch.cpp +++ b/orchagent/fgnhgorch.cpp @@ -69,7 +69,7 @@ void FgNhgOrch::update(SubjectType type, void *cntx) { continue; } - + if (!validNextHopInNextHopGroup(nhk)) { SWSS_LOG_WARN("Failed validNextHopInNextHopGroup for nh %s ip %s", @@ -142,11 +142,11 @@ bool FgNhgOrch::bake() } /* calculateBankHashBucketStartIndices: generates the hash_bucket_indices for all banks - * and stores it in fgNhgEntry for the group. - * The function will identify the # of next-hops assigned to each bank and + * and stores it in fgNhgEntry for the group. + * The function will identify the # of next-hops assigned to each bank and * assign the total number of hash buckets for a bank, based on the proportional - * number of next-hops in the bank. - * eg: Bank0: 6 nh, Bank1: 3 nh, total buckets: 30 => + * number of next-hops in the bank. + * eg: Bank0: 6 nh, Bank1: 3 nh, total buckets: 30 => * calculateBankHashBucketStartIndices: Bank0: Bucket# 0-19, Bank1: Bucket# 20-29 */ void FgNhgOrch::calculateBankHashBucketStartIndices(FgNhgEntry *fgNhgEntry) @@ -278,7 +278,7 @@ bool FgNhgOrch::createFineGrainedNextHopGroup(FGNextHopGroupEntry &syncd_fg_rout if (platform == VS_PLATFORM_SUBSTRING) { - /* TODO: need implementation for SAI_NEXT_HOP_GROUP_ATTR_REAL_SIZE */ + /* TODO: need implementation for SAI_NEXT_HOP_GROUP_ATTR_REAL_SIZE */ fgNhgEntry->real_bucket_size = fgNhgEntry->configured_bucket_size; } else @@ -358,7 +358,7 @@ bool FgNhgOrch::validNextHopInNextHopGroup(const NextHopKey& nexthop) } fgNhgEntry = member_entry->second; } - else + else { fgNhgEntry = prefix_entry->second; } @@ -383,7 +383,7 @@ bool FgNhgOrch::validNextHopInNextHopGroup(const NextHopKey& nexthop) nhs_to_add.push_back(nexthop); nhopgroup_members_set[nexthop] = m_neighOrch->getNextHopId(nexthop); - if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, + if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, bank_member_changes, nhopgroup_members_set, route_table.first)) { SWSS_LOG_ERROR("Failed to set fine grained next hop %s", @@ -428,7 +428,7 @@ bool FgNhgOrch::invalidNextHopInNextHopGroup(const NextHopKey& nexthop) } fgNhgEntry = member_entry->second; } - else + else { fgNhgEntry = prefix_entry->second; } @@ -461,7 +461,7 @@ bool FgNhgOrch::invalidNextHopInNextHopGroup(const NextHopKey& nexthop) bank_member_changes[fgNhgEntry->next_hops[nexthop.ip_address].bank]. nhs_to_del.push_back(nexthop); - if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, + if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, bank_member_changes, nhopgroup_members_set, route_table.first)) { SWSS_LOG_ERROR("Failed to set fine grained next hop %s", @@ -483,15 +483,15 @@ bool FgNhgOrch::invalidNextHopInNextHopGroup(const NextHopKey& nexthop) /* setActiveBankHashBucketChanges: Sets hash buckets for active banks and called on a PER bank basis * This function deals with a scenario where next-hop changes occurred for the route, * and the next-hop change didn't cause an entire bank to go active/inactive. - * The function uses bank_member_changes to compute the hash buckets to modify, in order to satisfy the next-hop + * The function uses bank_member_changes to compute the hash buckets to modify, in order to satisfy the next-hop * availability for the route/neigh. * Eg: Prefix A had nhs 1, 2, 3 with 1, 2, 3, being equally distributed over hash buckets * 0-59(20 buckets per nh). If there was a nh removal of nh 2, this fn would equally redistribute hash buckets - * for nh 2 to nh 1 and nh 3. Leading to 30 hash buckets, each, for nh 1 and nh 3, and none for nh 2. + * for nh 2 to nh 1 and nh 3. Leading to 30 hash buckets, each, for nh 1 and nh 3, and none for nh 2. * Thereby achieving consistent and layered hashing. */ bool FgNhgOrch::setActiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, - uint32_t bank, uint32_t syncd_bank, std::vector bank_member_changes, + uint32_t bank, uint32_t syncd_bank, std::vector bank_member_changes, std::map &nhopgroup_members_set, const IpPrefix &ipPrefix) { SWSS_LOG_ENTER(); @@ -506,7 +506,7 @@ bool FgNhgOrch::setActiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_rou HashBuckets *hash_buckets = &(bank_fgnhg_map->at(bank_member_change.nhs_to_del[del_idx])); for (uint32_t i = 0; i < hash_buckets->size(); i++) { - if (!writeHashBucketChange(syncd_fg_route_entry, hash_buckets->at(i), + if (!writeHashBucketChange(syncd_fg_route_entry, hash_buckets->at(i), nhopgroup_members_set[bank_member_change.nhs_to_add[add_idx]], ipPrefix, bank_member_change.nhs_to_add[add_idx])) { @@ -526,7 +526,7 @@ bool FgNhgOrch::setActiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_rou } /* Given that we resolved add + del on a bank in the above while stmt - * We will either have add OR delete left to do, and the logic below + * We will either have add OR delete left to do, and the logic below * relies on this fact */ if (del_idx < bank_member_change.nhs_to_del.size()) @@ -545,7 +545,7 @@ bool FgNhgOrch::setActiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_rou NextHopKey round_robin_nh = bank_member_change.active_nhs[i % bank_member_change.active_nhs.size()]; - if (!writeHashBucketChange(syncd_fg_route_entry, hash_buckets->at(i), + if (!writeHashBucketChange(syncd_fg_route_entry, hash_buckets->at(i), nhopgroup_members_set[round_robin_nh], ipPrefix, round_robin_nh)) { return false; @@ -553,18 +553,18 @@ bool FgNhgOrch::setActiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_rou bank_fgnhg_map->at(round_robin_nh).push_back(hash_buckets->at(i)); /* Logic below ensure that # hash buckets assigned to a nh is equalized, - * we could have used simple round robin to reassign hash buckets to - * other available nhs, but for cases where # hash buckets is not + * we could have used simple round robin to reassign hash buckets to + * other available nhs, but for cases where # hash buckets is not * divisible by # of nhs, simple round robin can make the hash bucket * distribution non-ideal, thereby nhs can attract unequal traffic */ if (num_nhs_with_one_more == 0) { if (bank_fgnhg_map->at(round_robin_nh).size() == exp_bucket_size) { - SWSS_LOG_INFO("%s reached %d, don't remove more buckets", - (bank_member_change.active_nhs[i % bank_member_change.active_nhs.size()]).to_string().c_str(), + SWSS_LOG_INFO("%s reached %d, don't remove more buckets", + (bank_member_change.active_nhs[i % bank_member_change.active_nhs.size()]).to_string().c_str(), exp_bucket_size); - bank_member_change.active_nhs.erase(bank_member_change.active_nhs.begin() + + bank_member_change.active_nhs.erase(bank_member_change.active_nhs.begin() + (i % bank_member_change.active_nhs.size())); } else if (bank_fgnhg_map->at(round_robin_nh).size() > exp_bucket_size) @@ -579,10 +579,10 @@ bool FgNhgOrch::setActiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_rou if (bank_fgnhg_map->at(round_robin_nh).size() == exp_bucket_size +1) { - SWSS_LOG_INFO("%s reached %d, don't remove more buckets num_nhs_with_one_more %d", - (bank_member_change.active_nhs[i %bank_member_change.active_nhs.size()]).to_string().c_str(), + SWSS_LOG_INFO("%s reached %d, don't remove more buckets num_nhs_with_one_more %d", + (bank_member_change.active_nhs[i %bank_member_change.active_nhs.size()]).to_string().c_str(), exp_bucket_size +1, num_nhs_with_one_more -1); - bank_member_change.active_nhs.erase(bank_member_change.active_nhs.begin() + + bank_member_change.active_nhs.erase(bank_member_change.active_nhs.begin() + (i % bank_member_change.active_nhs.size())); num_nhs_with_one_more--; } @@ -614,7 +614,7 @@ bool FgNhgOrch::setActiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_rou while(add_idx < bank_member_change.nhs_to_add.size()) { - (*bank_fgnhg_map)[bank_member_change.nhs_to_add[add_idx]] = + (*bank_fgnhg_map)[bank_member_change.nhs_to_add[add_idx]] = std::vector(); auto it = bank_member_change.active_nhs.begin(); if (num_nhs_with_eq_to_exp > 0) @@ -645,7 +645,7 @@ bool FgNhgOrch::setActiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_rou { uint32_t last_elem = map_entry->at((*map_entry).size() - 1); - if (!writeHashBucketChange(syncd_fg_route_entry, last_elem, + if (!writeHashBucketChange(syncd_fg_route_entry, last_elem, nhopgroup_members_set[bank_member_change.nhs_to_add[add_idx]], ipPrefix, bank_member_change.nhs_to_add[add_idx])) { @@ -656,8 +656,8 @@ bool FgNhgOrch::setActiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_rou (*map_entry).erase((*map_entry).end() - 1); } /* Logic below ensure that # hash buckets assigned to a nh is equalized, - * we could have used simple round robin to reassign hash buckets to - * other available nhs, but for cases where # hash buckets is not + * we could have used simple round robin to reassign hash buckets to + * other available nhs, but for cases where # hash buckets is not * divisible by # of nhs, simple round robin can make the hash bucket * distribution non-ideal, thereby nhs can attract unequal traffic */ if (num_nhs_with_one_more == 0) @@ -682,7 +682,7 @@ bool FgNhgOrch::setActiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_rou { if (map_entry->size() == exp_bucket_size +1) { - SWSS_LOG_INFO("%s reached %d, don't remove more buckets num_nhs_with_one_more %d", + SWSS_LOG_INFO("%s reached %d, don't remove more buckets num_nhs_with_one_more %d", it->to_string().c_str(), exp_bucket_size + 1, num_nhs_with_one_more -1); it = bank_member_change.active_nhs.erase(it); num_nhs_with_one_more--; @@ -748,7 +748,7 @@ bool FgNhgOrch::setInactiveBankToNextAvailableActiveBank(FGNextHopGroupEntry *sy if (new_bank_idx == bank_member_changes.size()) { - SWSS_LOG_NOTICE("All banks of FG next-hops are down for prefix %s", + SWSS_LOG_NOTICE("All banks of FG next-hops are down for prefix %s", ipPrefix.to_string().c_str()); /* Case where there are no active banks */ /* Note: There is no way to set a NULL OID to the now inactive next-hops @@ -771,10 +771,10 @@ bool FgNhgOrch::setInactiveBankToNextAvailableActiveBank(FGNextHopGroupEntry *sy * Eg: Lets assume prefix A had nhs 1, 2, 3, 4, 5, 6 with nhs being equally distributed over hash buckets * 0-59(10 per nh). Now there was a nh deletion of 1, 2, 3 which constituted bank 0(4, 5, 6 constituted bank 1) * This function will identify that all of bank 0's nh are down and re-assign all the hash buckets(0-29) for these nhs to - * nhs from bank 1, along with making local struct changes to track this for future route/neigh changes. + * nhs from bank 1, along with making local struct changes to track this for future route/neigh changes. */ bool FgNhgOrch::setInactiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, - uint32_t bank,std::vector &bank_member_changes, + uint32_t bank,std::vector &bank_member_changes, std::map &nhopgroup_members_set, const IpPrefix &ipPrefix) { SWSS_LOG_ENTER(); @@ -789,7 +789,7 @@ bool FgNhgOrch::setInactiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_r NextHopKey bank_nh_memb = bank_member_changes[bank]. nhs_to_add[i % bank_member_changes[bank].nhs_to_add.size()]; - if (!writeHashBucketChange(syncd_fg_route_entry, i, + if (!writeHashBucketChange(syncd_fg_route_entry, i, nhopgroup_members_set[bank_nh_memb], ipPrefix, bank_nh_memb)) { return false; @@ -800,7 +800,7 @@ bool FgNhgOrch::setInactiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_r } syncd_fg_route_entry->inactive_to_active_map[bank] = bank; - SWSS_LOG_NOTICE("Bank# %d of FG next-hops is up for prefix %s", + SWSS_LOG_NOTICE("Bank# %d of FG next-hops is up for prefix %s", bank, ipPrefix.to_string().c_str()); } else if (bank_member_changes[bank].nhs_to_del.size() > 0) @@ -818,7 +818,7 @@ bool FgNhgOrch::setInactiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_r syncd_fg_route_entry->active_nexthops.erase(memb); } - SWSS_LOG_NOTICE("Bank# %d of FG next-hops is down for prefix %s", bank, + SWSS_LOG_NOTICE("Bank# %d of FG next-hops is down for prefix %s", bank, ipPrefix.to_string().c_str()); } else @@ -836,7 +836,7 @@ bool FgNhgOrch::setInactiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_r } else { - if (!setActiveBankHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, + if (!setActiveBankHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, active_bank, bank, bank_member_changes, nhopgroup_members_set, ipPrefix)) { SWSS_LOG_INFO("Failed setActiveBankHashBucketChanges"); @@ -848,8 +848,8 @@ bool FgNhgOrch::setInactiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_r } -bool FgNhgOrch::computeAndSetHashBucketChanges(FGNextHopGroupEntry *syncd_fg_route_entry, - FgNhgEntry *fgNhgEntry, std::vector &bank_member_changes, +bool FgNhgOrch::computeAndSetHashBucketChanges(FGNextHopGroupEntry *syncd_fg_route_entry, + FgNhgEntry *fgNhgEntry, std::vector &bank_member_changes, std::map &nhopgroup_members_set, const IpPrefix &ipPrefix) { @@ -863,10 +863,10 @@ bool FgNhgOrch::computeAndSetHashBucketChanges(FGNextHopGroupEntry *syncd_fg_rou { /* Active bank is is determined by there being active nhs on the bank OR * an edge case where all active_nhs went down(nhs_to_del > 0) BUT - * simultaneously, nhs were added(nhs_to_add > 0). + * simultaneously, nhs were added(nhs_to_add > 0). * Route this to fn which deals with active banks */ - if (!setActiveBankHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, + if (!setActiveBankHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, bank_idx, bank_idx, bank_member_changes, nhopgroup_members_set, ipPrefix)) { return false; @@ -874,7 +874,7 @@ bool FgNhgOrch::computeAndSetHashBucketChanges(FGNextHopGroupEntry *syncd_fg_rou } else { - if (!setInactiveBankHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, + if (!setInactiveBankHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, bank_idx, bank_member_changes, nhopgroup_members_set, ipPrefix)) { return false; @@ -886,7 +886,7 @@ bool FgNhgOrch::computeAndSetHashBucketChanges(FGNextHopGroupEntry *syncd_fg_rou } -bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, +bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, std::vector &bank_member_changes, std::map &nhopgroup_members_set, const IpPrefix &ipPrefix) { @@ -895,7 +895,7 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh sai_status_t status; bool isWarmReboot = false; auto nexthopsMap = m_recoveryMap.find(ipPrefix.to_string()); - for (uint32_t i = 0; i < fgNhgEntry->hash_bucket_indices.size(); i++) + for (uint32_t i = 0; i < fgNhgEntry->hash_bucket_indices.size(); i++) { uint32_t bank = i; syncd_fg_route_entry.inactive_to_active_map[bank] = bank; @@ -916,9 +916,9 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh break; } } - SWSS_LOG_NOTICE("Bank# %d of FG next-hops is down for prefix %s", + SWSS_LOG_NOTICE("Bank# %d of FG next-hops is down for prefix %s", i, ipPrefix.to_string().c_str()); - } + } if (bank_member_changes[bank].nhs_to_add.size() == 0) { @@ -980,12 +980,12 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh { SWSS_LOG_ERROR("Failed to create next hop group %" PRIx64 " member %" PRIx64 ": %d", syncd_fg_route_entry.next_hop_group_id, next_hop_group_member_id, status); - + if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry)) { SWSS_LOG_ERROR("Failed to clean-up after next-hop member creation failure"); } - + return false; } @@ -1009,7 +1009,7 @@ bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNh bool FgNhgOrch::isRouteFineGrained(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const NextHopGroupKey &nextHops) { SWSS_LOG_ENTER(); - + if (!isFineGrainedConfigured || (vrf_id != gVirtualRouterId)) { return false; @@ -1044,7 +1044,7 @@ bool FgNhgOrch::isRouteFineGrained(sai_object_id_t vrf_id, const IpPrefix &ipPre */ if (fgNhgEntry != member_entry->second) { - SWSS_LOG_INFO("FG nh found across different FG_NH groups: %s expected %s, actual %s", + SWSS_LOG_INFO("FG nh found across different FG_NH groups: %s expected %s, actual %s", nhk.to_string().c_str(), fgNhgEntry->fg_nhg_name.c_str(), member_entry->second->fg_nhg_name.c_str()); return false; } @@ -1107,7 +1107,7 @@ bool FgNhgOrch::setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const break; } } - + if (m_syncdFGRouteTables.find(vrf_id) != m_syncdFGRouteTables.end() && m_syncdFGRouteTables.at(vrf_id).find(ipPrefix) != m_syncdFGRouteTables.at(vrf_id).end() && m_syncdFGRouteTables.at(vrf_id).at(ipPrefix).nhg_key == nextHops) @@ -1178,10 +1178,10 @@ bool FgNhgOrch::setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const nhs_to_add.push_back(nhk); next_hop_to_add = true; } - else + else { FGNextHopGroupEntry *syncd_fg_route_entry = &(syncd_fg_route_entry_it->second); - if (syncd_fg_route_entry->active_nexthops.find(nhk) == + if (syncd_fg_route_entry->active_nexthops.find(nhk) == syncd_fg_route_entry->active_nexthops.end()) { bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address].bank]. @@ -1211,7 +1211,7 @@ bool FgNhgOrch::setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const } } - if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, bank_member_changes, + if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, bank_member_changes, nhopgroup_members_set, ipPrefix)) { return false; @@ -1244,7 +1244,7 @@ bool FgNhgOrch::setFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const SWSS_LOG_NOTICE("Created route %s:%s", ipPrefix.to_string().c_str(), nextHops.to_string().c_str()); } - m_syncdFGRouteTables[vrf_id][ipPrefix].nhg_key = nextHops; + m_syncdFGRouteTables[vrf_id][ipPrefix].nhg_key = nextHops; for (uint32_t bank_idx = 0; bank_idx < bank_member_changes.size(); bank_idx++) { @@ -1280,7 +1280,7 @@ bool FgNhgOrch::removeFgNhg(sai_object_id_t vrf_id, const IpPrefix &ipPrefix) auto it_route_table = m_syncdFGRouteTables.find(vrf_id); if (it_route_table == m_syncdFGRouteTables.end()) { - SWSS_LOG_INFO("No route table found for %s, vrf_id 0x%" PRIx64, + SWSS_LOG_INFO("No route table found for %s, vrf_id 0x%" PRIx64, ipPrefix.to_string().c_str(), vrf_id); return true; } @@ -1360,7 +1360,7 @@ void FgNhgOrch::cleanupIpInLinkToIpMap(const string &link, const IpAddress &ip, SWSS_LOG_WARN("Unexpected case where structs are out of sync for %s", link.c_str()); return; - } + } for (auto ip_it = begin(link_entry->second); ip_it != end(link_entry->second); ip_it++) { if (*ip_it == ip) @@ -1378,7 +1378,7 @@ bool FgNhgOrch::doTaskFgNhg(const KeyOpFieldsValuesTuple & t) SWSS_LOG_ENTER(); string op = kfvOp(t); string key = kfvKey(t); - string fg_nhg_name = key; + string fg_nhg_name = key; auto fgNhg_entry = m_FgNhgs.find(fg_nhg_name); FGMatchMode match_mode = ROUTE_BASED; @@ -1412,7 +1412,7 @@ bool FgNhgOrch::doTaskFgNhg(const KeyOpFieldsValuesTuple & t) return true; } - if (fgNhg_entry != m_FgNhgs.end()) + if (fgNhg_entry != m_FgNhgs.end()) { SWSS_LOG_WARN("FG_NHG %s already exists, ignoring", fg_nhg_name.c_str()); } @@ -1422,7 +1422,7 @@ bool FgNhgOrch::doTaskFgNhg(const KeyOpFieldsValuesTuple & t) fgNhgEntry.configured_bucket_size = bucket_size; fgNhgEntry.fg_nhg_name = fg_nhg_name; fgNhgEntry.match_mode = match_mode; - SWSS_LOG_NOTICE("Added new FG_NHG entry with bucket_size %d, match_mode: %'" PRIu8, + SWSS_LOG_NOTICE("Added new FG_NHG entry with bucket_size %d, match_mode: %'" PRIu8, bucket_size, match_mode); isFineGrainedConfigured = true; m_FgNhgs[fg_nhg_name] = fgNhgEntry; @@ -1435,7 +1435,7 @@ bool FgNhgOrch::doTaskFgNhg(const KeyOpFieldsValuesTuple & t) SWSS_LOG_INFO("Received delete call for non-existent entry %s", fg_nhg_name.c_str()); } - else + else { /* Check if there are no child objects associated prior to deleting */ if (fgNhg_entry->second.prefixes.size() == 0 && fgNhg_entry->second.next_hops.size() == 0) @@ -1446,7 +1446,7 @@ bool FgNhgOrch::doTaskFgNhg(const KeyOpFieldsValuesTuple & t) } else { - SWSS_LOG_INFO("Child Prefix/Member entries are still associated with this FG_NHG %s", + SWSS_LOG_INFO("Child Prefix/Member entries are still associated with this FG_NHG %s", fg_nhg_name.c_str()); return false; } @@ -1560,7 +1560,7 @@ bool FgNhgOrch::doTaskFgNhgPrefix(const KeyOpFieldsValuesTuple & t) m_syncdFGRouteTables.at(vrf_id).find(ip_prefix) != m_syncdFGRouteTables.at(vrf_id).end()) { nhg = m_syncdFGRouteTables.at(vrf_id).at(ip_prefix).nhg_key; - } + } auto delCache = m_fgPrefixDelCache.find(ip_prefix); if (delCache == m_fgPrefixDelCache.end()) @@ -1577,7 +1577,7 @@ bool FgNhgOrch::doTaskFgNhgPrefix(const KeyOpFieldsValuesTuple & t) } } - m_fgNhgPrefixes.erase(ip_prefix); + m_fgNhgPrefixes.erase(ip_prefix); } else { @@ -1601,7 +1601,7 @@ bool FgNhgOrch::doTaskFgNhgPrefix(const KeyOpFieldsValuesTuple & t) break; } } - m_fgNhgPrefixes.erase(ip_prefix); + m_fgNhgPrefixes.erase(ip_prefix); m_routeTable.set(ip_prefix.to_string(), generateRouteTableFromNhgKey(delCache->second)); SWSS_LOG_INFO("Perform APP_DB addition with prefix %s", ip_prefix.to_string().c_str()); @@ -1768,9 +1768,9 @@ bool FgNhgOrch::doTaskFgNhgMember(const KeyOpFieldsValuesTuple & t) } return true; } - -void FgNhgOrch::doTask(Consumer& consumer) + +void FgNhgOrch::doTask(Consumer& consumer) { SWSS_LOG_ENTER(); const string & table_name = consumer.getTableName(); diff --git a/orchagent/neighorch.cpp b/orchagent/neighorch.cpp index 2ec12904ca2..c0510003efa 100644 --- a/orchagent/neighorch.cpp +++ b/orchagent/neighorch.cpp @@ -7,6 +7,7 @@ #include "directory.h" #include "muxorch.h" #include "subscriberstatetable.h" +#include "nhgorch.h" extern sai_neighbor_api_t* sai_neighbor_api; extern sai_next_hop_api_t* sai_next_hop_api; @@ -15,6 +16,7 @@ extern PortsOrch *gPortsOrch; extern sai_object_id_t gSwitchId; extern CrmOrch *gCrmOrch; extern RouteOrch *gRouteOrch; +extern NhgOrch *gNhgOrch; extern FgNhgOrch *gFgNhgOrch; extern Directory gDirectory; extern string gMySwitchType; @@ -31,7 +33,7 @@ NeighOrch::NeighOrch(DBConnector *appDb, string tableName, IntfsOrch *intfsOrch, SWSS_LOG_ENTER(); m_fdbOrch->attach(this); - + if(gMySwitchType == "voq") { //Add subscriber to process VOQ system neigh @@ -149,10 +151,12 @@ bool NeighOrch::hasNextHop(const NextHopKey &nexthop) return m_syncdNextHops.find(nexthop) != m_syncdNextHops.end(); } -bool NeighOrch::addNextHop(NextHopKey &nexthop) +bool NeighOrch::addNextHop(const NextHopKey &const_nexthop) { SWSS_LOG_ENTER(); + NextHopKey nexthop = {const_nexthop.ip_address, const_nexthop.alias}; + Port p; if (!gPortsOrch->getPort(nexthop.alias, p)) { @@ -283,6 +287,7 @@ bool NeighOrch::setNextHopFlag(const NextHopKey &nexthop, const uint32_t nh_flag { case NHFLAGS_IFDOWN: rc = gRouteOrch->invalidnexthopinNextHopGroup(nexthop, count); + rc &= gNhgOrch->invalidateNextHop(nexthop); break; default: assert(0); @@ -312,6 +317,7 @@ bool NeighOrch::clearNextHopFlag(const NextHopKey &nexthop, const uint32_t nh_fl { case NHFLAGS_IFDOWN: rc = gRouteOrch->validnexthopinNextHopGroup(nexthop, count); + rc &= gNhgOrch->validateNextHop(nexthop); break; default: assert(0); @@ -371,10 +377,12 @@ bool NeighOrch::ifChangeInformNextHop(const string &alias, bool if_up) return rc; } -bool NeighOrch::removeNextHop(NextHopKey& nexthop) +bool NeighOrch::removeNextHop(const NextHopKey& const_nexthop) { SWSS_LOG_ENTER(); + NextHopKey nexthop = {const_nexthop.ip_address, const_nexthop.alias}; + if(m_intfsOrch->isRemoteSystemPortIntf(nexthop.alias)) { //For remote system ports kernel nexthops are always on inband. Change the key @@ -874,7 +882,7 @@ bool NeighOrch::removeNeighbor(const NeighborEntry &neighborEntry, bool disable) NeighborUpdate update = { neighborEntry, MacAddress(), false }; notify(SUBJECT_TYPE_NEIGH_CHANGE, static_cast(&update)); - + if(gMySwitchType == "voq") { //Sync the neighbor to delete from the CHASSIS_APP_DB diff --git a/orchagent/neighorch.h b/orchagent/neighorch.h index fa7bb5403a6..add420ac5c8 100644 --- a/orchagent/neighorch.h +++ b/orchagent/neighorch.h @@ -48,8 +48,8 @@ class NeighOrch : public Orch, public Subject, public Observer ~NeighOrch(); bool hasNextHop(const NextHopKey&); - bool addNextHop(NextHopKey&); - bool removeNextHop(NextHopKey&); + bool addNextHop(const NextHopKey&); + bool removeNextHop(const NextHopKey&); sai_object_id_t getNextHopId(const NextHopKey&); sai_object_id_t getLocalNextHopId(const NextHopKey&); diff --git a/orchagent/nexthopkey.h b/orchagent/nexthopkey.h index 1600161ebe3..7cbfc48ee41 100644 --- a/orchagent/nexthopkey.h +++ b/orchagent/nexthopkey.h @@ -4,6 +4,7 @@ #include "ipaddress.h" #include "tokenize.h" #include "label.h" +#include "intfsorch.h" #define NH_DELIMITER '@' #define NHG_DELIMITER ',' @@ -23,6 +24,8 @@ struct NextHopKey NextHopKey(const IpAddress &ip, const std::string &alias) : ip_address(ip), alias(alias), vni(0), mac_address() {} NextHopKey(const std::string &str) { + SWSS_LOG_ENTER(); + if (str.find(NHG_DELIMITER) != string::npos) { std::string err = "Error converting " + str + " to NextHop"; @@ -32,16 +35,19 @@ struct NextHopKey std::string ip_str; if (label_delimiter != std::string::npos) { + SWSS_LOG_INFO("Labeled next hop"); label_stack = LabelStack(str.substr(0, label_delimiter)); ip_str = str.substr(label_delimiter+1); } else { + SWSS_LOG_INFO("Unlabeled next hop"); ip_str = str; } auto keys = tokenize(ip_str, NH_DELIMITER); vni = 0; mac_address = MacAddress(); + if (keys.size() == 1) { ip_address = keys[0]; @@ -62,6 +68,7 @@ struct NextHopKey throw std::invalid_argument(err); } } + NextHopKey(const std::string &str, bool overlay_nh) { if (str.find(NHG_DELIMITER) != string::npos) @@ -136,6 +143,11 @@ struct NextHopKey { return (ip_address.getV4Addr() == 0); } + + NextHopKey ipKey() const + { + return NextHopKey(ip_address, alias); + } }; #endif /* SWSS_NEXTHOPKEY_H */ diff --git a/orchagent/nhgorch.cpp b/orchagent/nhgorch.cpp new file mode 100644 index 00000000000..a7dcd2b202a --- /dev/null +++ b/orchagent/nhgorch.cpp @@ -0,0 +1,1319 @@ +#include "nhgorch.h" +#include "switchorch.h" +#include "neighorch.h" +#include "crmorch.h" +#include "routeorch.h" +#include "bulker.h" +#include "logger.h" +#include "swssnet.h" + +extern sai_object_id_t gSwitchId; + +extern PortsOrch *gPortsOrch; +extern CrmOrch *gCrmOrch; +extern NeighOrch *gNeighOrch; +extern SwitchOrch *gSwitchOrch; +extern RouteOrch *gRouteOrch; + +extern sai_next_hop_group_api_t* sai_next_hop_group_api; +extern sai_next_hop_api_t* sai_next_hop_api; +extern sai_switch_api_t* sai_switch_api; + +unsigned int NextHopGroup::m_count = 0; + +/* Default maximum number of next hop groups */ +#define DEFAULT_NUMBER_OF_ECMP_GROUPS 128 +#define DEFAULT_MAX_ECMP_GROUP_SIZE 32 + +NhgOrch::NhgOrch(DBConnector *db, string tableName) : + Orch(db, tableName) +{ + SWSS_LOG_ENTER(); + + /* Get the switch's maximum next hop group capacity. */ + sai_attribute_t attr; + attr.id = SAI_SWITCH_ATTR_NUMBER_OF_ECMP_GROUPS; + + sai_status_t status = sai_switch_api->get_switch_attribute(gSwitchId, + 1, + &attr); + + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_WARN("Failed to get switch attribute number of ECMP groups. \ + Use default value. rv:%d", status); + m_maxNhgCount = DEFAULT_NUMBER_OF_ECMP_GROUPS; + } + else + { + m_maxNhgCount = attr.value.s32; + + /* + * ASIC specific workaround to re-calculate maximum ECMP groups + * according to diferent ECMP mode used. + * + * On Mellanox platform, the maximum ECMP groups returned is the value + * under the condition that the ECMP group size is 1. Deviding this + * number by DEFAULT_MAX_ECMP_GROUP_SIZE gets the maximum number of + * ECMP groups when the maximum ECMP group size is 32. + */ + char *platform = getenv("platform"); + if (platform && strstr(platform, MLNX_PLATFORM_SUBSTRING)) + { + SWSS_LOG_NOTICE("Mellanox platform - divide capacity by %d", + DEFAULT_MAX_ECMP_GROUP_SIZE); + m_maxNhgCount /= DEFAULT_MAX_ECMP_GROUP_SIZE; + } + } + + /* Set switch's next hop group capacity. */ + vector fvTuple; + fvTuple.emplace_back("MAX_NEXTHOP_GROUP_COUNT", + to_string(m_maxNhgCount)); + gSwitchOrch->set_switch_capability(fvTuple); + + SWSS_LOG_NOTICE("Maximum number of ECMP groups supported is %d", + m_maxNhgCount); +} + +/* + * Purpose: Perform the operations requested by APPL_DB users. + * + * Description: Iterate over the untreated operations list and resolve them. + * The operations supported are SET and DEL. If an operation + * could not be resolved, it will either remain in the list, or be + * removed, depending on the case. + * + * Params: IN consumer - The cosumer object. + * + * Returns: Nothing. + */ +void NhgOrch::doTask(Consumer& consumer) +{ + SWSS_LOG_ENTER(); + + if (!gPortsOrch->allPortsReady()) + { + return; + } + + auto it = consumer.m_toSync.begin(); + + while (it != consumer.m_toSync.end()) + { + KeyOpFieldsValuesTuple t = it->second; + + string index = kfvKey(t); + string op = kfvOp(t); + + SWSS_LOG_INFO("Next hop group table key %s, op %s", + index.c_str(), op.c_str()); + + bool success; + const auto& nhg_it = m_syncdNextHopGroups.find(index); + + if (op == SET_COMMAND) + { + string ips; + string aliases; + + /* Get group's next hop IPs and aliases */ + for (auto i : kfvFieldsValues(t)) + { + if (fvField(i) == "nexthop") + ips = fvValue(i); + + if (fvField(i) == "ifname") + aliases = fvValue(i); + } + + /* Split ips and alaises strings into vectors of tokens. */ + vector ipv = tokenize(ips, ','); + vector alsv = tokenize(aliases, ','); + + /* Create the next hop group key. */ + string nhg_str = ipv[0] + NH_DELIMITER + alsv[0]; + + for (uint32_t i = 1; i < ipv.size(); i++) + { + nhg_str += NHG_DELIMITER + ipv[i] + NH_DELIMITER + alsv[i]; + } + + NextHopGroupKey nhg_key = NextHopGroupKey(nhg_str); + + /* If the group does not exist, create one. */ + if (nhg_it == m_syncdNextHopGroups.end()) + { + SWSS_LOG_NOTICE("Adding next hop group %s with %s", + index.c_str(), + nhg_str.c_str()); + /* + * If we'd have to create a SAI object for the group, and we + * already reached the limit, we're going to create a temporary + * group, represented by one of it's NH only until we have + * enough resources to sync the whole group. The item is going + * to be kept in the sync list so we keep trying to create the + * actual group when there are enough resources. + */ + if ((nhg_key.getSize() > 1) && + (NextHopGroup::getCount() >= m_maxNhgCount)) + { + SWSS_LOG_WARN("Next hop group count reached it's limit."); + + try + { + auto nhg = std::make_unique( + createTempNhg(nhg_key)); + SWSS_LOG_NOTICE("Adding temp next hop group with %s", + nhg->to_string().c_str()); + if (nhg->sync()) + { + SWSS_LOG_INFO("Temporary NHG successfully synced"); + m_syncdNextHopGroups.emplace(index, + NhgEntry(std::move(nhg))); + } + else + { + SWSS_LOG_WARN( + "Failed to sync temporary NHG %s with %s", + index.c_str(), + nhg_key.to_string().c_str()); + } + } + catch (const std::exception& e) + { + SWSS_LOG_WARN( + "Got exception: %s while adding temp group %s", + e.what(), + nhg_key.to_string().c_str()); + } + + /* + * We set the success to false so we keep trying to update + * this group in order to sync the whole group. + */ + success = false; + } + else + { + auto nhg = std::make_unique(nhg_key); + success = nhg->sync(); + + if (success) + { + SWSS_LOG_INFO("NHG successfully synced"); + m_syncdNextHopGroups.emplace(index, + NhgEntry(std::move(nhg))); + } + } + } + /* If the group exists, update it. */ + else + { + SWSS_LOG_NOTICE("Update next hop group %s with %s", + index.c_str(), + nhg_str.c_str()); + + const auto& nhg_ptr = nhg_it->second.nhg; + + + /* + * A NHG update should never change the SAI ID of the NHG if it + * is still referenced by some other objects, as they would not + * be notified about this change. The only exception to this + * rule is for the temporary NHGs, as the referencing objects + * will keep querying the NhgOrch for any SAI ID updates. + */ + if (!nhg_ptr->isTemp() && + ((nhg_key.getSize() == 1) || (nhg_ptr->getSize() == 1)) && + (nhg_it->second.ref_count > 0)) + { + SWSS_LOG_WARN("Next hop group %s update would change SAI " + "ID while referenced, so not performed", + index.c_str()); + success = false; + } + /* + * If the update would mandate promoting a temporary next hop + * group to a multiple next hops group and we do not have the + * resources yet, we have to skip it until we have enough + * resources. + */ + else if (nhg_ptr->isTemp() && + (nhg_key.getSize() > 1) && + (NextHopGroup::getCount() >= m_maxNhgCount)) + { + /* + * If the group was updated in such way that the previously + * chosen next hop does not represent the new group key, + * update the temporary group to choose a new next hop from + * the new key. + */ + if (!nhg_key.contains(nhg_ptr->getKey())) + { + SWSS_LOG_NOTICE("Updating temporary group %s to %s", + index.c_str(), + nhg_key.to_string().c_str()); + + try + { + /* Create the new temporary next hop group. */ + auto new_nhg = std::make_unique( + createTempNhg(nhg_key)); + + /* + * If we successfully sync the new group, update + * only the next hop group entry's pointer so we + * don't mess up the reference counter, as other + * objects may already reference it. + */ + if (new_nhg->sync()) + { + SWSS_LOG_INFO( + "Temporary NHG successfully synced"); + nhg_it->second.nhg = std::move(new_nhg); + } + else + { + SWSS_LOG_WARN( + "Failed to sync updated temp NHG %s with %s", + index.c_str(), + nhg_key.to_string().c_str()); + } + } + catch (const std::exception& e) + { + SWSS_LOG_WARN( + "Got exception: %s while adding temp group %s", + e.what(), + nhg_key.to_string().c_str()); + } + } + + /* + * Because the resources are exhausted, we have to keep + * trying to update the temporary group until we can + * promote it to a fully functional group. + */ + success = false; + } + /* Common update, when all the requirements are met. */ + else + { + success = nhg_ptr->update(nhg_key); + } + } + } + else if (op == DEL_COMMAND) + { + SWSS_LOG_NOTICE("Deleting next hop group %s", index.c_str()); + + /* If the group does not exist, do nothing. */ + if (nhg_it == m_syncdNextHopGroups.end()) + { + SWSS_LOG_WARN("Unable to find group with key %s to remove", + index.c_str()); + /* Mark the operation as successful to consume it. */ + success = true; + } + /* If the group does exist, but it's still referenced, skip. */ + else if (nhg_it->second.ref_count > 0) + { + SWSS_LOG_WARN( + "Unable to remove group %s which is referenced", + index.c_str()); + success = false; + } + /* Else, if the group is no more referenced, delete it. */ + else + { + const auto& nhg = nhg_it->second.nhg; + + success = nhg->desync(); + + if (success) + { + m_syncdNextHopGroups.erase(nhg_it); + } + } + } + else + { + SWSS_LOG_ERROR("Unknown operation type %s\n", op.c_str()); + /* Mark the operation as successful to consume it. */ + success = true; + } + + /* Depending on the operation success, consume it or skip it. */ + if (success) + { + it = consumer.m_toSync.erase(it); + } + else + { + ++it; + } + } +} + +/* + * Purpose: Validate a next hop for any groups that contains it. + * + * Description: Iterate over all next hop groups and validate the next hop in + * those who contain it. + * + * Params: IN nh_key - The next hop to validate. + * + * Returns: true, if the next hop was successfully validated in all + * containing groups; + * false, otherwise. + */ +bool NhgOrch::validateNextHop(const NextHopKey& nh_key) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Validating next hop %s", nh_key.to_string().c_str()); + + /* + * Iterate through all groups and validate the next hop in those who + * contain it. + */ + for (auto& it : m_syncdNextHopGroups) + { + auto& nhg = it.second.nhg; + + SWSS_LOG_INFO("Check if next hop in group %s", + it.first.c_str()); + + if (nhg->hasNextHop(nh_key)) + { + SWSS_LOG_INFO("Group has next hop"); + + /* + * If sync fails, exit right away, as we expect it to be due to a + * raeson for which any other future validations will fail too. + */ + if (!nhg->validateNextHop(nh_key)) + { + SWSS_LOG_ERROR("Failed to validate next hop %s in group %s", + nh_key.to_string().c_str(), + it.first.c_str()); + return false; + } + } + } + + return true; +} + +/* + * Purpose: Invalidate a next hop for any groups containing it. + * + * Description: Iterate through the next hop groups and desync the next hop + * from those that contain it. + * + * Params: IN nh_key - The next hop to invalidate. + * + * Returns: true, if the next hop was successfully invalidatedd from all + * containing groups; + * false, otherwise. + */ +bool NhgOrch::invalidateNextHop(const NextHopKey& nh_key) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Invalidating next hop %s", nh_key.to_string().c_str()); + + /* + * Iterate through all groups and invalidate the next hop from those who + * contain it. + */ + for (auto& it : m_syncdNextHopGroups) + { + auto& nhg = it.second.nhg; + + SWSS_LOG_INFO("Check if next hop is in group %s", + it.first.c_str()); + + if (nhg->hasNextHop(nh_key)) + { + SWSS_LOG_INFO("Group has next hop"); + + /* If the desync fails, exit right away. */ + if (!nhg->invalidateNextHop(nh_key)) + { + SWSS_LOG_WARN("Failed to invalidate next hop %s from group %s", + nh_key.to_string().c_str(), + it.first.c_str()); + return false; + } + } + } + + return true; +} + +/* + * Purpose: Increase the ref count for a next hop group. + * + * Description: Increment the ref count for a next hop group by 1. + * + * Params: IN index - The index of the next hop group. + * + * Returns: Nothing. + */ +void NhgOrch::incNhgRefCount(const std::string& index) +{ + SWSS_LOG_ENTER(); + + NhgEntry& nhg_entry = m_syncdNextHopGroups.at(index); + + SWSS_LOG_INFO("Increment group %s ref count from %u to %u", + index.c_str(), + nhg_entry.ref_count, + nhg_entry.ref_count + 1); + + ++nhg_entry.ref_count; +} + +/* + * Purpose: Decrease the ref count for a next hop group. + * + * Description: Decrement the ref count for a next hop group by 1. + * + * Params: IN index - The index of the next hop group. + * + * Returns: Nothing. + */ +void NhgOrch::decNhgRefCount(const std::string& index) +{ + SWSS_LOG_ENTER(); + + NhgEntry& nhg_entry = m_syncdNextHopGroups.at(index); + + /* Sanity check so we don't overflow. */ + assert(nhg_entry.ref_count > 0); + + SWSS_LOG_INFO("Decrement group %s ref count from %u to %u", + index.c_str(), + nhg_entry.ref_count, + nhg_entry.ref_count - 1); + + --nhg_entry.ref_count; +} + +/* + * Purpose: Get the next hop ID of the member. + * + * Description: Get the SAI ID of the next hop from NeighOrch. + * + * Params: None. + * + * Returns: The SAI ID of the next hop, or SAI_NULL_OBJECT_ID if the next + * hop is not valid. + */ +sai_object_id_t NextHopGroupMember::getNhId() const +{ + SWSS_LOG_ENTER(); + + sai_object_id_t nh_id = SAI_NULL_OBJECT_ID; + + if (gNeighOrch->hasNextHop(m_nh_key)) + { + SWSS_LOG_INFO("NeighOrch has next hop %s", + m_nh_key.to_string().c_str()); + nh_id = gNeighOrch->getNextHopId(m_nh_key); + } + /* + * If the next hop is labeled and the IP next hop exists, create the + * labeled one over NeighOrch as it doesn't know about these next hops. + * We don't do this in the constructor as the IP next hop may be added + * after the object is created and would never create the labeled next hop + * afterwards. + */ + else if (isLabeled() && gNeighOrch->hasNextHop(m_nh_key.ipKey())) + { + SWSS_LOG_INFO("Create labeled next hop %s", + m_nh_key.to_string().c_str()); + gNeighOrch->addNextHop(m_nh_key); + nh_id = gNeighOrch->getNextHopId(m_nh_key); + } + + return nh_id; +} + +/* + * Purpose: Move assignment operator. + * + * Description: Perform member-wise swap. + * + * Params: IN nhgm - The next hop group member to swap. + * + * Returns: Reference to this object. + */ +NextHopGroupMember& NextHopGroupMember::operator=(NextHopGroupMember&& nhgm) +{ + SWSS_LOG_ENTER(); + + std::swap(m_nh_key, nhgm.m_nh_key); + std::swap(m_gm_id, nhgm.m_gm_id); + + return *this; +} + +/* + * Purpose: Sync the group member with the given group member ID. + * + * Description: Set the group member's SAI ID to the the one given and + * increment the appropriate ref counters. + * + * Params: IN gm_id - The group member SAI ID to set. + * + * Returns: Nothing. + */ +void NextHopGroupMember::sync(sai_object_id_t gm_id) +{ + SWSS_LOG_ENTER(); + + /* The SAI ID should be updated from invalid to something valid. */ + assert((m_gm_id == SAI_NULL_OBJECT_ID) && (gm_id != SAI_NULL_OBJECT_ID)); + + m_gm_id = gm_id; + gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER); + gNeighOrch->increaseNextHopRefCount(m_nh_key); +} + +/* + * Purpose: Desync the group member, resetting it's SAI ID. + * + * Description: Reset the group member's SAI ID and decrement the appropriate + * ref counters. + * + * Params: None. + * + * Returns: Nothing. + */ +void NextHopGroupMember::desync() +{ + SWSS_LOG_ENTER(); + + /* + * If the member is already desynced, exit so we don't decrement the ref + * counters more than once. + */ + if (!isSynced()) + { + SWSS_LOG_INFO("Next hop group member %s is already desynced", + m_nh_key.to_string().c_str()); + return; + } + + m_gm_id = SAI_NULL_OBJECT_ID; + gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER); + gNeighOrch->decreaseNextHopRefCount(m_nh_key); +} + +/* + * Purpose: Destructor. + * + * Description: Assert the group member is desynced and remove the labeled + * next hop from NeighOrch if it is unreferenced. + * + * Params: None. + * + * Returns: Nothing. + */ +NextHopGroupMember::~NextHopGroupMember() +{ + SWSS_LOG_ENTER(); + + /* + * The group member should be desynced from it's group before destroying + * it. + */ + assert(!isSynced()); + + /* + * If the labeled next hop is unreferenced, delete it from NeighOrch as + * NhgOrch and RouteOrch are the ones controlling it's lifetime. They both + * watch over these labeled next hops, so it doesn't matter who created + * them as they're both doing the same checks before deleting a labeled + * next hop. + */ + if (isLabeled() && (gNeighOrch->getNextHopRefCount(m_nh_key) == 0)) + { + SWSS_LOG_INFO("Delete labeled next hop %s", + m_nh_key.to_string().c_str()); + gNeighOrch->removeNextHop(m_nh_key); + } +} + +/* + * Purpose: Constructor. + * + * Description: Initialize the group's members based on the next hop group key. + * + * Params: IN key - The next hop group's key. + * + * Returns: Nothing. + */ +NextHopGroup::NextHopGroup(const NextHopGroupKey& key) : + m_key(key), + m_id(SAI_NULL_OBJECT_ID), + m_is_temp(false) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Creating next hop group %s", m_key.to_string().c_str()); + + /* Parse the key and create the members. */ + for (const auto& it : m_key.getNextHops()) + { + SWSS_LOG_INFO("Adding next hop %s to the group", + it.to_string().c_str()); + m_members.emplace(it, NextHopGroupMember(it)); + } +} + +/* + * Purpose: Move constructor. + * + * Description: Initialize the members by doing member-wise move construct. + * + * Params: IN nhg - The rvalue object to initialize from. + * + * Returns: Nothing. + */ +NextHopGroup::NextHopGroup(NextHopGroup&& nhg) : + m_key(std::move(nhg.m_key)), + m_id(std::move(nhg.m_id)), + m_members(std::move(nhg.m_members)), + m_is_temp(nhg.m_is_temp) +{ + SWSS_LOG_ENTER(); + + /* Invalidate the rvalue SAI ID. */ + nhg.m_id = SAI_NULL_OBJECT_ID; +} + +/* + * Purpose: Move assignment operator. + * + * Description: Perform member-wise swap. + * + * Params: IN nhg - The rvalue object to swap with. + * + * Returns: Referene to this object. + */ +NextHopGroup& NextHopGroup::operator=(NextHopGroup&& nhg) +{ + SWSS_LOG_ENTER(); + + std::swap(m_key, nhg.m_key); + std::swap(m_id, nhg.m_id); + std::swap(m_members, nhg.m_members); + m_is_temp = nhg.m_is_temp; + + return *this; +} + +/* + * Purpose: Sync a next hop group. + * + * Description: Fill in the NHG ID. If the group contains only one NH, this ID + * will be the SAI ID of the next hop that NeighOrch owns. If it + * has more than one NH, create a group over the SAI API and then + * add it's members. + * + * Params: None. + * + * Returns: true, if the operation was successful; + * false, otherwise. + */ +bool NextHopGroup::sync() +{ + SWSS_LOG_ENTER(); + + /* If the group is already synced, exit. */ + if (isSynced()) + { + SWSS_LOG_INFO("Group %s is already synced", to_string().c_str()); + return true; + } + + /* + * If the group has only one member, the group ID will be the member's NH + * ID. + */ + if (m_members.size() == 1) + { + const NextHopGroupMember& nhgm = m_members.begin()->second; + + if (nhgm.getNhId() == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_WARN("Next hop %s is not synced", + nhgm.getNhKey().to_string().c_str()); + return false; + } + else + { + m_id = nhgm.getNhId(); + } + } + /* If the key contains more than one NH, create a group. */ + else + { + /* Assert the group is not empty. */ + assert(!m_members.empty()); + + /* Create the group over SAI. */ + sai_attribute_t nhg_attr; + vector nhg_attrs; + + nhg_attr.id = SAI_NEXT_HOP_GROUP_ATTR_TYPE; + nhg_attr.value.s32 = SAI_NEXT_HOP_GROUP_TYPE_ECMP; + nhg_attrs.push_back(nhg_attr); + + sai_status_t status = sai_next_hop_group_api->create_next_hop_group( + &m_id, + gSwitchId, + (uint32_t)nhg_attrs.size(), + nhg_attrs.data()); + + /* If the operation fails, exit. */ + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to create next hop group %s, rv:%d", + m_key.to_string().c_str(), status); + return false; + } + + SWSS_LOG_INFO("Next hop group %s has SAI ID %lu", + m_key.to_string().c_str(), + m_id); + + /* Increment the amount of programmed next hop groups. */ + gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP); + + /* Increment the number of synced NHGs. */ + ++m_count; + + /* + * Try creating the next hop group's members over SAI. + */ + if (!syncMembers(m_key.getNextHops())) + { + SWSS_LOG_WARN("Failed to create next hop members of group %s", + to_string().c_str()); + return false; + } + } + + return true; +} + +/* + * Purpose: Create a temporary next hop group when resources are exhausted. + * + * Description: Choose one member to represent the group and create a group + * with only that next hop as a member. Any object referencing + * the SAI ID of a temporary group should keep querying NhgOrch in + * case the group is updated, as it's SAI ID will change at that + * point. + * + * Params: IN index - The CP index of the next hop group. + * + * Returns: The created temporary next hop group. + */ +NextHopGroup NhgOrch::createTempNhg(const NextHopGroupKey& nhg_key) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Syncing temporary group %s", nhg_key.to_string().c_str()); + + /* Get a list of all valid next hops in the group. */ + std::list valid_nhs; + + for (const auto& nh_key : nhg_key.getNextHops()) + { + /* + * Check if the IP next hop exists. We check for the IP next hop as + * the group might contain labeled NHs which we should create if their + * IP next hop does exist. + */ + if (gNeighOrch->hasNextHop(nh_key.ipKey())) + { + SWSS_LOG_INFO("Next hop %s is a candidate for temporary group %s", + nh_key.to_string().c_str(), + nhg_key.to_string().c_str()); + valid_nhs.push_back(nh_key); + } + } + + /* If there is no valid member, exit. */ + if (valid_nhs.empty()) + { + SWSS_LOG_INFO("There is no valid NH to sync temporary group %s", + nhg_key.to_string().c_str()); + throw std::logic_error("No valid NH in the key"); + } + + /* Randomly select the valid NH to represent the group. */ + auto it = valid_nhs.begin(); + advance(it, rand() % valid_nhs.size()); + + /* Create the temporary group. */ + NextHopGroup nhg(NextHopGroupKey(it->to_string())); + nhg.setTemp(true); + + return nhg; +} + +/* + * Purpose: Desync the next hop group. + * + * Description: Reset the group's SAI ID. If the group has more than one + * members, desync the members and the group. + * + * Params: None. + * + * Returns: true, if the operation was successful; + * false, otherwise + */ +bool NextHopGroup::desync() +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Desyncing group %s", to_string().c_str()); + + /* If the group is already desynced, there is nothing to be done. */ + if (!isSynced()) + { + SWSS_LOG_INFO("Group %s is already desynced", + m_key.to_string().c_str()); + return true; + } + + /* + * If the group has more than one members, desync it's members, then the + * group. + */ + if (m_members.size() > 1) + { + /* Desync group's members. If we failed to desync the members, exit. */ + if (!desyncMembers(m_key.getNextHops())) + { + SWSS_LOG_ERROR("Failed to desync group %s members", + to_string().c_str()); + return false; + } + + /* Desync the group. */ + sai_status_t status = sai_next_hop_group_api-> + remove_next_hop_group(m_id); + + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to remove next hop group %lu, rv: %d", + m_id, status); + return false; + } + + /* If the operation is successful, release the resources. */ + gCrmOrch->decCrmResUsedCounter( + CrmResourceType::CRM_NEXTHOP_GROUP); + --m_count; + } + + /* Reset the group ID. */ + m_id = SAI_NULL_OBJECT_ID; + + return true; +} + +/* + * Purpose: Sync the given next hop group's members over the SAI API. + * + * Description: Iterate over the given members and sync them. If the member + * is already synced, we skip it. If any of the next hops isn't + * already synced by the neighOrch, this will fail. Any next hop + * which has the neighbor interface down will be skipped. + * + * Params: IN nh_keys - The next hop keys of the members to sync. + * + * Returns: true, if the members were added succesfully; + * false, otherwise. + */ +bool NextHopGroup::syncMembers(const std::set& nh_keys) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Adding next hop group %s members", + to_string().c_str()); + + /* This method should not be called for groups with only one NH. */ + assert(m_members.size() > 1); + + ObjectBulker nextHopGroupMemberBulker( + sai_next_hop_group_api, gSwitchId); + + /* + * Iterate over the given next hops. + * If the group member is already synced, skip it. + * If any next hop is not synced, thus neighOrch doesn't have it, stop + * immediately. + * If a next hop's interface is down, skip it from being synced. + */ + std::map syncingMembers; + + for (const auto& nh_key : nh_keys) + { + SWSS_LOG_INFO("Checking if next hop %s is valid", + nh_key.to_string().c_str()); + + NextHopGroupMember& nhgm = m_members.at(nh_key); + + /* If the member is already synced, continue. */ + if (nhgm.getGmId() != SAI_NULL_OBJECT_ID) + { + SWSS_LOG_INFO("Update member"); + continue; + } + + /* + * If the next hop doesn't exist, stop from syncing the members. + */ + if (nhgm.getNhId() == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_WARN("Failed to get next hop %s in group %s", + nhgm.to_string().c_str(), to_string().c_str()); + return false; + } + + /* If the neighbor's interface is down, skip from being syncd. */ + if (gNeighOrch->isNextHopFlagSet(nh_key, NHFLAGS_IFDOWN)) + { + SWSS_LOG_WARN("Skip next hop %s in group %s, interface is down", + nh_key.to_string().c_str(), to_string().c_str()); + continue; + } + + /* Create the next hop group member's attributes and fill them. */ + vector nhgm_attrs = createNhgmAttrs(nhgm); + + /* Add a bulker entry for this member. */ + nextHopGroupMemberBulker.create_entry(&syncingMembers[nh_key], + (uint32_t)nhgm_attrs.size(), + nhgm_attrs.data()); + } + + /* Flush the bulker to perform the sync. */ + nextHopGroupMemberBulker.flush(); + + /* + * Go through the synced members and increment the Crm ref count for the + * successful ones. + */ + bool success = true; + for (const auto& mbr : syncingMembers) + { + SWSS_LOG_INFO("Checking next hop member %s has a valid SAI ID", + mbr.first.to_string().c_str()); + + /* Check that the returned member ID is valid. */ + if (mbr.second == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_ERROR("Failed to create next hop group %lu's member %s", + m_id, mbr.first.to_string().c_str()); + success = false; + } + else + { + m_members.at(mbr.first).sync(mbr.second); + } + } + + SWSS_LOG_INFO("Returning %d", success); + + return success; +} +/* + * Purpose: Desync the given group's members over the SAI API. + * + * Description: Go through the given members and desync them. + * + * Params: IN nh_keys - The next hop keys of the members to desync. + * + * Returns: true, if the operation was successful; + * false, otherwise + */ +bool NextHopGroup::desyncMembers(const std::set& nh_keys) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Removing members of group %s", + to_string().c_str()); + + /* This method should not be called for groups with only one NH. */ + assert(m_members.size() > 1); + + ObjectBulker nextHopGroupMemberBulker( + sai_next_hop_group_api, gSwitchId); + + /* + * Iterate through the given group members add them to be desynced. + * + * Keep track of the SAI delete statuses in case one of them returns an + * error. We assume that deletion should always succeed. If for some + * reason it doesn't, there's nothing we can do, but we'll log an error + * later. + */ + std::map statuses; + + for (const auto& nh_key : nh_keys) + { + SWSS_LOG_INFO("Desyncing member %s", nh_key.to_string().c_str()); + + const NextHopGroupMember& nhgm = m_members.at(nh_key); + + if (nhgm.isSynced()) + { + SWSS_LOG_INFO("Removing next hop group member %s", + nh_key.to_string().c_str()); + nextHopGroupMemberBulker.remove_entry(&statuses[nh_key], + nhgm.getGmId()); + } + } + + /* Flush the bulker to apply the changes. */ + nextHopGroupMemberBulker.flush(); + + /* + * Iterate over the statuses map and check if the removal was successful. + * If it was, decrement the Crm counter and reset the member's ID. If it + * wasn't, log an error message. + */ + bool success = true; + + for (const auto& status : statuses) + { + SWSS_LOG_INFO("Check member's %s status", + status.first.to_string().c_str()); + + if (status.second == SAI_STATUS_SUCCESS) + { + SWSS_LOG_INFO("Member was successfully desynced"); + m_members.at(status.first).desync(); + } + else + { + SWSS_LOG_ERROR("Could not remove next hop group member %s, rv: %d", + status.first.to_string().c_str(), status.second); + success = false; + } + } + + SWSS_LOG_INFO("Returning %d", success); + + return success; +} + +/* + * Purpose: Update the next hop group based on a new next hop group key. + * + * Description: Update the group's members by removing the members that aren't + * in the new next hop group and adding the new members. We first + * remove the missing members to avoid cases where we reached the + * ASIC group members limit. This will not update the group's SAI + * ID in any way, unless we are promoting a temporary group. + * + * Params: IN nhg_key - The new next hop group key to update to. + * + * Returns: true, if the operation was successful; + * false, otherwise. + */ +bool NextHopGroup::update(const NextHopGroupKey& nhg_key) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Update group %s with %s", + to_string().c_str(), nhg_key.to_string().c_str()); + + /* + * There are three cases where the SAI ID of the NHG will change: + * - changing a single next hop group to another single next hop group + * - changing a single next hop group to a multiple next hop group + * - changing a multiple next hop group to a single next hop group + * + * For these kind of updates, we can simply swap the existing group with + * the updated group, as we have no way of preserving the existing SAI ID. + * + * Also, we can perform the same operation if the group is not synced at + * all. + */ + if ((nhg_key.getSize() == 1) || (m_members.size() == 1) || !isSynced()) + { + SWSS_LOG_INFO("Updating group without preserving it's SAI ID"); + + bool was_synced = isSynced(); + *this = NextHopGroup(nhg_key); + + /* Sync the group only if it was synced before. */ + return (was_synced ? sync() : true); + } + /* + * If we are updating a multiple next hop group to another multiple next + * hop group, we can preserve it's SAI ID by only updating it's members. + * This way, any objects referencing the SAI ID of this object will + * continue to work. + */ + else + { + /* Update the key. */ + m_key = nhg_key; + + std::set new_nh_keys = nhg_key.getNextHops(); + std::set removed_nh_keys; + + /* Mark the members that need to be removed. */ + for (auto& mbr_it : m_members) + { + const NextHopKey& nh_key = mbr_it.first; + + /* Look for the existing member inside the new ones. */ + const auto& new_nh_key_it = new_nh_keys.find(nh_key); + + /* If the member is not found, then it needs to be removed. */ + if (new_nh_key_it == new_nh_keys.end()) + { + SWSS_LOG_INFO("Add member %s to be desynced", + nh_key.to_string().c_str()); + removed_nh_keys.insert(nh_key); + } + else + { + /* + * Erase the member from the new members list as it already + * exists. + */ + new_nh_keys.erase(new_nh_key_it); + } + } + + /* Desync the removed members. */ + if (!desyncMembers(removed_nh_keys)) + { + SWSS_LOG_WARN("Failed to desync members from group %s", + to_string().c_str()); + return false; + } + + /* Remove the desynced members. */ + for (const auto& nh_key : removed_nh_keys) + { + m_members.erase(nh_key); + } + + /* Add any new members to the group. */ + for (const auto& it : new_nh_keys) + { + m_members.emplace(it, NextHopGroupMember(it)); + } + + /* + * Sync all the members of the group. We sync all of them because + * there may be previous members that were not successfully synced + * before the update, so we must make sure we sync those as well. + */ + if (!syncMembers(m_key.getNextHops())) + { + SWSS_LOG_WARN("Failed to sync new members for group %s", + to_string().c_str()); + return false; + } + + return true; + } +} + +/* + * Purpose: Create the attributes vector for a next hop group member. + * + * Description: Create the group ID and next hop ID attributes. + * + * Params: IN nhgm - The next hop group member. + * + * Returns: The attributes vector for the given next hop. + */ +vector NextHopGroup::createNhgmAttrs( + const NextHopGroupMember& nhgm) const +{ + SWSS_LOG_ENTER(); + + vector nhgm_attrs; + sai_attribute_t nhgm_attr; + + /* Fill in the group ID. */ + nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID; + nhgm_attr.value.oid = m_id; + nhgm_attrs.push_back(nhgm_attr); + + /* Fill in the next hop ID. */ + nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID; + nhgm_attr.value.oid = nhgm.getNhId(); + nhgm_attrs.push_back(nhgm_attr); + + return nhgm_attrs; +} + +/* + * Purpose: Validate a next hop in the group. + * + * Description: Sync the validated next hop group member. + * + * Params: IN nh_key - The next hop to validate. + * + * Returns: true, if the operation was successful; + * false, otherwise. + */ +bool NextHopGroup::validateNextHop(const NextHopKey& nh_key) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Validate NH %s in group %s", + nh_key.to_string().c_str(), + to_string().c_str()); + + /* + * If the group has only one member, there is nothing to be done. The + * member is only a reference to the next hop owned by NeighOrch, so it is + * not for us to take any decisions regarding those. + */ + if (m_members.size() == 1) + { + return true; + } + + return syncMembers({nh_key}); +} + +/* + * Purpose: Invalidate a next hop in the group. + * + * Description: Sync the invalidated next hop group member. + * + * Params: IN nh_key - The next hop to invalidate. + * + * Returns: true, if the operation was successful; + * false, otherwise. + */ +bool NextHopGroup::invalidateNextHop(const NextHopKey& nh_key) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Invalidate NH %s in group %s", + nh_key.to_string().c_str(), + to_string().c_str()); + + /* + * If the group has only one member, there is nothing to be done. The + * member is only a reference to the next hop owned by NeighOrch, so it is + * not for us to take any decisions regarding those. + */ + if (m_members.size() == 1) + { + return true; + } + + return desyncMembers({nh_key}); +} diff --git a/orchagent/nhgorch.h b/orchagent/nhgorch.h new file mode 100644 index 00000000000..d7bb588b068 --- /dev/null +++ b/orchagent/nhgorch.h @@ -0,0 +1,242 @@ +#pragma once + +#include "orch.h" +#include "nexthopgroupkey.h" + +class NextHopGroupMember +{ +public: + NextHopGroupMember(const std::pair& nhgm) : + m_nh_key(nhgm.first), + m_gm_id(SAI_NULL_OBJECT_ID) {} + + /* Constructors / Assignment operators. */ + NextHopGroupMember(const NextHopKey& nh_key) : + m_nh_key(nh_key), + m_gm_id(SAI_NULL_OBJECT_ID) {} + + NextHopGroupMember(NextHopGroupMember&& nhgm) : + m_nh_key(std::move(nhgm.m_nh_key)), + m_gm_id(nhgm.m_gm_id) + { nhgm.m_gm_id = SAI_NULL_OBJECT_ID; } + + NextHopGroupMember& operator=(NextHopGroupMember&& nhgm); + + /* + * Prevent object copying so we don't end up having multiple objects + * referencing the same SAI objects. + */ + NextHopGroupMember(const NextHopGroupMember&) = delete; + void operator=(const NextHopGroupMember&) = delete; + + /* Destructor. */ + virtual ~NextHopGroupMember(); + + /* Sync / Desync. */ + void sync(sai_object_id_t gm_id); + void desync(); + + /* Getters / Setters. */ + inline const NextHopKey& getNhKey() const { return m_nh_key; } + sai_object_id_t getNhId() const; + inline sai_object_id_t getGmId() const { return m_gm_id; } + inline bool isSynced() const { return m_gm_id != SAI_NULL_OBJECT_ID; } + + /* Check if the next hop is labeled. */ + inline bool isLabeled() const { return !m_nh_key.label_stack.empty(); } + + /* Convert member's details to string. */ + std::string to_string() const + { + return m_nh_key.to_string() + + ", SAI ID: " + std::to_string(m_gm_id); + } + +private: + /* The key of the next hop of this member. */ + NextHopKey m_nh_key; + + /* The group member SAI ID for this member. */ + sai_object_id_t m_gm_id; +}; + +/* Map indexed by NextHopKey, containing the SAI ID of the group member. */ +typedef std::map NhgMembers; + +/* + * NextHopGroup class representing a next hop group object. + */ +class NextHopGroup +{ +public: + /* Constructors. */ + explicit NextHopGroup(const NextHopGroupKey& key); + NextHopGroup(NextHopGroup&& nhg); + NextHopGroup& operator=(NextHopGroup&& nhg); + + /* Destructor. */ + virtual ~NextHopGroup() { desync(); } + + /* Sync the group, creating the group's and members SAI IDs. */ + bool sync(); + + /* Desync the group, reseting the group's and members SAI IDs. */ + bool desync(); + + /* + * Update the group based on a new next hop group key. This will also + * perform any sync / desync necessary. + */ + bool update(const NextHopGroupKey& nhg_key); + + /* Check if the group contains the given next hop. */ + inline bool hasNextHop(const NextHopKey& nh_key) const { + return m_members.find(nh_key) != m_members.end(); } + + /* Validate a next hop in the group, syncing it. */ + bool validateNextHop(const NextHopKey& nh_key); + + /* Invalidate a next hop in the group, desyncing it. */ + bool invalidateNextHop(const NextHopKey& nh_key); + + /* Increment the number of existing groups. */ + static inline void incCount() { ++m_count; } + + /* Decrement the number of existing groups. */ + static inline void decCount() { assert(m_count > 0); --m_count; } + + /* Getters / Setters. */ + inline const NextHopGroupKey& getKey() const { return m_key; } + inline sai_object_id_t getId() const { return m_id; } + static inline unsigned int getCount() { return m_count; } + inline bool isTemp() const { return m_is_temp; } + inline void setTemp(bool is_temp) { m_is_temp = is_temp; } + inline bool isSynced() const { return m_id != SAI_NULL_OBJECT_ID; } + inline size_t getSize() const { return m_members.size(); } + + /* Convert NHG's details to a string. */ + std::string to_string() const + { + return m_key.to_string() + ", SAI ID: " + std::to_string(m_id); + } + +private: + + /* The next hop group key of this group. */ + NextHopGroupKey m_key; + + /* The SAI ID of the group. */ + sai_object_id_t m_id; + + /* Members of this next hop group. */ + NhgMembers m_members; + + /* Whether the group is temporary or not. */ + bool m_is_temp; + + /* + * Number of existing groups. Incremented when an object is created and + * decremented when an object is destroyed. This will also account for the + * groups created by RouteOrch. + */ + static unsigned int m_count; + + /* Add group's members over the SAI API for the given keys. */ + bool syncMembers(const std::set& nh_keys); + + /* Remove group's members the SAI API from the given keys. */ + bool desyncMembers(const std::set& nh_keys); + + /* Create the attributes vector for a next hop group member. */ + vector createNhgmAttrs( + const NextHopGroupMember& nhgm) const; +}; + +/* + * Structure describing a next hop group which NhgOrch owns. Beside having a + * unique pointer to that next hop group, we also want to keep a ref count so + * NhgOrch knows how many other objects reference the next hop group in order + * not to delete them while still being referenced. + */ +struct NhgEntry +{ + /* Pointer to the next hop group. NhgOrch is the sole owner of it. */ + std::unique_ptr nhg; + + /* Number of external objects referencing this next hop group. */ + unsigned int ref_count; + + NhgEntry() = default; + explicit NhgEntry(std::unique_ptr&& _nhg, + unsigned int _ref_count = 0) : + nhg(std::move(_nhg)), ref_count(_ref_count) {} +}; + +/* + * Map indexed by next hop group's CP ID, containing the next hop group for + * that ID and the number of objects referencing it. + */ +typedef std::unordered_map NhgTable; + +/* + * Next Hop Group Orchestrator class that handles NEXT_HOP_GROUP_TABLE + * updates. + */ +class NhgOrch : public Orch +{ +public: + /* + * Constructor. + */ + NhgOrch(DBConnector *db, string tableName); + + /* Check if the next hop group given by it's index exists. */ + inline bool hasNhg(const std::string& index) const + { return m_syncdNextHopGroups.find(index) != + m_syncdNextHopGroups.end(); } + + /* + * Get the next hop group given by it's index. If the index does not exist + * in map, a std::out_of_range exception will be thrown. + */ + inline const NextHopGroup& getNhg(const std::string& index) const + { return *m_syncdNextHopGroups.at(index).nhg; } + + /* Add a temporary next hop group when resources are exhausted. */ + NextHopGroup createTempNhg(const NextHopGroupKey& nhg_key); + + /* Getters / Setters. */ + inline unsigned int getMaxNhgCount() const { return m_maxNhgCount; } + static inline unsigned int getNhgCount() + { return NextHopGroup::getCount(); } + + /* Validate / Invalidate a next hop. */ + bool validateNextHop(const NextHopKey& nh_key); + bool invalidateNextHop(const NextHopKey& nh_key); + + /* Increase / Decrease the number of next hop groups. */ + inline void incNhgCount() + { + assert(NextHopGroup::getCount() < m_maxNhgCount); + NextHopGroup::incCount(); + } + inline void decNhgCount() { NextHopGroup::decCount(); } + + /* Increase / Decrease ref count for a NHG given by it's index. */ + void incNhgRefCount(const std::string& index); + void decNhgRefCount(const std::string& index); + +private: + + /* + * Switch's maximum number of next hop groups capacity. + */ + unsigned int m_maxNhgCount; + + /* + * The next hop group table. + */ + NhgTable m_syncdNextHopGroups; + + void doTask(Consumer& consumer); +}; diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 592d5141f4d..b4f7a4ece28 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -30,6 +30,7 @@ FdbOrch *gFdbOrch; IntfsOrch *gIntfsOrch; NeighOrch *gNeighOrch; RouteOrch *gRouteOrch; +NhgOrch *gNhgOrch; FgNhgOrch *gFgNhgOrch; AclOrch *gAclOrch; MirrorOrch *gMirrorOrch; @@ -152,7 +153,8 @@ bool OrchDaemon::init() { APP_ROUTE_TABLE_NAME, routeorch_pri }, { APP_LABEL_ROUTE_TABLE_NAME, routeorch_pri } }; - gRouteOrch = new RouteOrch(m_applDb, route_tables, gSwitchOrch, gNeighOrch, gIntfsOrch, vrf_orch, gFgNhgOrch); + gRouteOrch = new RouteOrch(m_applDb, route_tables, gNeighOrch, gIntfsOrch, vrf_orch, gFgNhgOrch); + gNhgOrch = new NhgOrch(m_applDb, APP_NEXT_HOP_GROUP_TABLE_NAME); CoppOrch *copp_orch = new CoppOrch(m_applDb, APP_COPP_TABLE_NAME); TunnelDecapOrch *tunnel_decap_orch = new TunnelDecapOrch(m_applDb, APP_TUNNEL_DECAP_TABLE_NAME); @@ -277,7 +279,7 @@ bool OrchDaemon::init() }; gMacsecOrch = new MACsecOrch(m_applDb, m_stateDb, macsec_app_tables, gPortsOrch); - + /* * The order of the orch list is important for state restore of warm start and * the queued processing in m_toSync map after gPortsOrch->allPortsReady() is set. @@ -286,7 +288,7 @@ bool OrchDaemon::init() * when iterating ConsumerMap. This is ensured implicitly by the order of keys in ordered map. * For cases when Orch has to process tables in specific order, like PortsOrch during warm start, it has to override Orch::doTask() */ - m_orchList = { gSwitchOrch, gCrmOrch, gPortsOrch, gBufferOrch, gIntfsOrch, gNeighOrch, gRouteOrch, copp_orch, tunnel_decap_orch, qos_orch, wm_orch, policer_orch, sflow_orch, debug_counter_orch, gMacsecOrch}; + m_orchList = { gSwitchOrch, gCrmOrch, gPortsOrch, gBufferOrch, gIntfsOrch, gNeighOrch, gRouteOrch, gNhgOrch, copp_orch, tunnel_decap_orch, qos_orch, wm_orch, policer_orch, sflow_orch, debug_counter_orch, gMacsecOrch}; bool initialize_dtel = false; if (platform == BFN_PLATFORM_SUBSTRING || platform == VS_PLATFORM_SUBSTRING) diff --git a/orchagent/orchdaemon.h b/orchagent/orchdaemon.h index 1d34574284f..ab38a2c9f71 100644 --- a/orchagent/orchdaemon.h +++ b/orchagent/orchdaemon.h @@ -10,6 +10,7 @@ #include "intfsorch.h" #include "neighorch.h" #include "routeorch.h" +#include "nhgorch.h" #include "copporch.h" #include "tunneldecaporch.h" #include "qosorch.h" diff --git a/orchagent/routeorch.cpp b/orchagent/routeorch.cpp index aea81ba13ef..2b01f9ea75a 100644 --- a/orchagent/routeorch.cpp +++ b/orchagent/routeorch.cpp @@ -2,6 +2,7 @@ #include #include #include "routeorch.h" +#include "nhgorch.h" #include "logger.h" #include "swssnet.h" #include "crmorch.h" @@ -18,61 +19,25 @@ extern sai_switch_api_t* sai_switch_api; extern PortsOrch *gPortsOrch; extern CrmOrch *gCrmOrch; extern Directory gDirectory; +extern NhgOrch *gNhgOrch; /* Default maximum number of next hop groups */ #define DEFAULT_NUMBER_OF_ECMP_GROUPS 128 #define DEFAULT_MAX_ECMP_GROUP_SIZE 32 -RouteOrch::RouteOrch(DBConnector *db, vector &tableNames, SwitchOrch *switchOrch, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch, FgNhgOrch *fgNhgOrch) : +RouteOrch::RouteOrch(DBConnector *db, vector &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch, FgNhgOrch *fgNhgOrch) : gRouteBulker(sai_route_api), gLabelRouteBulker(sai_mpls_api), gNextHopGroupMemberBulker(sai_next_hop_group_api, gSwitchId), Orch(db, tableNames), - m_switchOrch(switchOrch), m_neighOrch(neighOrch), m_intfsOrch(intfsOrch), m_vrfOrch(vrfOrch), m_fgNhgOrch(fgNhgOrch), - m_nextHopGroupCount(0), m_resync(false) { SWSS_LOG_ENTER(); - sai_attribute_t attr; - attr.id = SAI_SWITCH_ATTR_NUMBER_OF_ECMP_GROUPS; - - sai_status_t status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr); - if (status != SAI_STATUS_SUCCESS) - { - SWSS_LOG_WARN("Failed to get switch attribute number of ECMP groups. \ - Use default value. rv:%d", status); - m_maxNextHopGroupCount = DEFAULT_NUMBER_OF_ECMP_GROUPS; - } - else - { - m_maxNextHopGroupCount = attr.value.s32; - - /* - * ASIC specific workaround to re-calculate maximum ECMP groups - * according to different ECMP mode used. - * - * On Mellanox platform, the maximum ECMP groups returned is the value - * under the condition that the ECMP group size is 1. Dividing this - * number by DEFAULT_MAX_ECMP_GROUP_SIZE gets the maximum number of - * ECMP groups when the maximum ECMP group size is 32. - */ - char *platform = getenv("platform"); - if (platform && strstr(platform, MLNX_PLATFORM_SUBSTRING)) - { - m_maxNextHopGroupCount /= DEFAULT_MAX_ECMP_GROUP_SIZE; - } - } - vector fvTuple; - fvTuple.emplace_back("MAX_NEXTHOP_GROUP_COUNT", to_string(m_maxNextHopGroupCount)); - m_switchOrch->set_switch_capability(fvTuple); - - SWSS_LOG_NOTICE("Maximum number of ECMP groups supported is %d", m_maxNextHopGroupCount); - IpPrefix default_ip_prefix("0.0.0.0/0"); sai_route_entry_t unicast_route_entry; @@ -81,10 +46,12 @@ RouteOrch::RouteOrch(DBConnector *db, vector &tableNames, copy(unicast_route_entry.destination, default_ip_prefix); subnet(unicast_route_entry.destination, unicast_route_entry.destination); + sai_attribute_t attr; attr.id = SAI_ROUTE_ENTRY_ATTR_PACKET_ACTION; attr.value.s32 = SAI_PACKET_ACTION_DROP; - status = sai_route_api->create_route_entry(&unicast_route_entry, 1, &attr); + sai_status_t status = sai_route_api->create_route_entry( + &unicast_route_entry, 1, &attr); if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to create IPv4 default route with packet action drop"); @@ -94,7 +61,7 @@ RouteOrch::RouteOrch(DBConnector *db, vector &tableNames, gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_IPV4_ROUTE); /* Add default IPv4 route into the m_syncdRoutes */ - m_syncdRoutes[gVirtualRouterId][default_ip_prefix] = NextHopGroupKey(); + m_syncdRoutes[gVirtualRouterId][default_ip_prefix] = RouteNhg(); SWSS_LOG_NOTICE("Create IPv4 default route with packet action drop"); @@ -113,7 +80,7 @@ RouteOrch::RouteOrch(DBConnector *db, vector &tableNames, gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_IPV6_ROUTE); /* Add default IPv6 route into the m_syncdRoutes */ - m_syncdRoutes[gVirtualRouterId][v6_default_ip_prefix] = NextHopGroupKey(); + m_syncdRoutes[gVirtualRouterId][v6_default_ip_prefix] = RouteNhg(); SWSS_LOG_NOTICE("Create IPv6 default route with packet action drop"); @@ -254,7 +221,7 @@ void RouteOrch::attach(Observer *observer, const IpAddress& dstAddr, sai_object_ SWSS_LOG_NOTICE("Attached next hop observer of route %s for destination IP %s", observerEntry->second.routeTable.rbegin()->first.to_string().c_str(), dstAddr.to_string().c_str()); - NextHopUpdate update = { vrf_id, dstAddr, route->first, route->second }; + NextHopUpdate update = { vrf_id, dstAddr, route->first, route->second.nhg_key }; observer->update(SUBJECT_TYPE_NEXTHOP_CHANGE, static_cast(&update)); } } @@ -515,10 +482,13 @@ void RouteOrch::doPrefixTask(Consumer& consumer) if (op == SET_COMMAND) { + SWSS_LOG_INFO("Set operation"); + string ips; string aliases; string vni_labels; string remote_macs; + string nhg_index; bool& excp_intfs_flag = ctx.excp_intfs_flag; bool overlay_nh = false; @@ -537,97 +507,152 @@ void RouteOrch::doPrefixTask(Consumer& consumer) if (fvField(i) == "router_mac") remote_macs = fvValue(i); + + if (fvField(i) == "nexthop_group") + nhg_index = fvValue(i); } - vector& ipv = ctx.ipv; - ipv = tokenize(ips, ','); - vector alsv = tokenize(aliases, ','); - vector vni_labelv = tokenize(vni_labels, ','); - vector rmacv = tokenize(remote_macs, ','); + SWSS_LOG_INFO("Route %s has nexthop_group: %s, ips: %s, " + "aliases: %s", + ip_prefix.to_string().c_str(), + nhg_index.c_str(), + ips.c_str(), + aliases.c_str()); /* - * For backward compatibility, adjust ip string from old format to - * new format. Meanwhile it can deal with some abnormal cases. + * A route should not fill both nexthop_group and ips / + * aliases. */ - - /* Resize the ip vector to match ifname vector - * as tokenize(",", ',') will miss the last empty segment. */ - if (alsv.size() == 0) + if (!nhg_index.empty() && (!ips.empty() || !aliases.empty())) { - SWSS_LOG_WARN("Skip the route %s, for it has an empty ifname field.", key.c_str()); + SWSS_LOG_ERROR("Route %s has both nexthop_group and ips/aliases", + key.c_str()); it = consumer.m_toSync.erase(it); continue; } - else if (alsv.size() != ipv.size()) - { - SWSS_LOG_NOTICE("Route %s: resize ipv to match alsv, %zd -> %zd.", key.c_str(), ipv.size(), alsv.size()); - ipv.resize(alsv.size()); - } - /* Set the empty ip(s) to zero - * as IpAddress("") will construct a incorrect ip. */ - for (auto &ip : ipv) + ctx.nhg_index = nhg_index; + + /* + * If the nexthop_group is empty, create the next hop group key + * based on the IPs and aliases. Otherwise, get the key from + * the NhgOrch. + */ + vector& ipv = ctx.ipv; + vector alsv; + vector vni_labelv; + vector rmacv; + + /* Check if the next hop group is owned by the NhgOrch. */ + if (nhg_index.empty()) { - if (ip.empty()) + vector& ipv = ctx.ipv; + ipv = tokenize(ips, ','); + alsv = tokenize(aliases, ','); + vni_labelv = tokenize(vni_labels, ','); + rmacv = tokenize(remote_macs, ','); + + /* + * For backward compatibility, adjust ip string from old format to + * new format. Meanwhile it can deal with some abnormal cases. + */ + + /* Resize the ip vector to match ifname vector + * as tokenize(",", ',') will miss the last empty segment. */ + if (alsv.size() == 0) { - SWSS_LOG_NOTICE("Route %s: set the empty nexthop ip to zero.", key.c_str()); - ip = ip_prefix.isV4() ? "0.0.0.0" : "::"; + SWSS_LOG_WARN("Skip the route %s, for it has an empty ifname field.", key.c_str()); + it = consumer.m_toSync.erase(it); + continue; + } + else if (alsv.size() != ipv.size()) + { + SWSS_LOG_NOTICE("Route %s: resize ipv to match alsv, %zd -> %zd.", key.c_str(), ipv.size(), alsv.size()); + ipv.resize(alsv.size()); } - } - for (auto alias : alsv) - { - /* skip route to management, docker, loopback - * TODO: for route to loopback interface, the proper - * way is to create loopback interface and then create - * route pointing to it, so that we can traps packets to - * CPU */ - if (alias == "eth0" || alias == "docker0" || alias == "tun0" || - alias == "lo" || !alias.compare(0, strlen(LOOPBACK_PREFIX), LOOPBACK_PREFIX)) + /* Set the empty ip(s) to zero + * as IpAddress("") will construst a incorrect ip. */ + for (auto &ip : ipv) { - excp_intfs_flag = true; - break; + if (ip.empty()) + { + SWSS_LOG_NOTICE("Route %s: set the empty nexthop ip to zero.", key.c_str()); + ip = ip_prefix.isV4() ? "0.0.0.0" : "::"; + } } - } - // TODO: cannot trust m_portsOrch->getPortIdByAlias because sometimes alias is empty - if (excp_intfs_flag) - { - /* If any existing routes are updated to point to the - * above interfaces, remove them from the ASIC. */ - if (removeRoute(ctx)) - it = consumer.m_toSync.erase(it); - else - it++; - continue; - } + for (auto alias : alsv) + { + /* skip route to management, docker, loopback + * TODO: for route to loopback interface, the proper + * way is to create loopback interface and then create + * route pointing to it, so that we can traps packets to + * CPU */ + if (alias == "eth0" || alias == "docker0" || + alias == "lo" || !alias.compare(0, strlen(LOOPBACK_PREFIX), LOOPBACK_PREFIX)) + { + excp_intfs_flag = true; + break; + } + } - string nhg_str = ""; - NextHopGroupKey& nhg = ctx.nhg; + // TODO: cannot trust m_portsOrch->getPortIdByAlias because sometimes alias is empty + if (excp_intfs_flag) + { + /* If any existing routes are updated to point to the + * above interfaces, remove them from the ASIC. */ + if (removeRoute(ctx)) + it = consumer.m_toSync.erase(it); + else + it++; + continue; + } - if (overlay_nh == false) - { - nhg_str = ipv[0] + NH_DELIMITER + alsv[0]; + string nhg_str = ""; + NextHopGroupKey& nhg = ctx.nhg; - for (uint32_t i = 1; i < ipv.size(); i++) + if (overlay_nh == false) { - nhg_str += NHG_DELIMITER + ipv[i] + NH_DELIMITER + alsv[i]; - } + nhg_str = ipv[0] + NH_DELIMITER + alsv[0]; - nhg = NextHopGroupKey(nhg_str); + for (uint32_t i = 1; i < ipv.size(); i++) + { + nhg_str += NHG_DELIMITER + ipv[i] + NH_DELIMITER + alsv[i]; + } + nhg = NextHopGroupKey(nhg_str); + } + else + { + nhg_str = ipv[0] + NH_DELIMITER + "vni" + alsv[0] + NH_DELIMITER + vni_labelv[0] + NH_DELIMITER + rmacv[0]; + for (uint32_t i = 1; i < ipv.size(); i++) + { + nhg_str += NHG_DELIMITER + ipv[i] + NH_DELIMITER + "vni" + alsv[i] + NH_DELIMITER + vni_labelv[i] + NH_DELIMITER + rmacv[i]; + } + + nhg = NextHopGroupKey(nhg_str, overlay_nh); + } } else { - nhg_str = ipv[0] + NH_DELIMITER + "vni" + alsv[0] + NH_DELIMITER + vni_labelv[0] + NH_DELIMITER + rmacv[0]; - for (uint32_t i = 1; i < ipv.size(); i++) + try { - nhg_str += NHG_DELIMITER + ipv[i] + NH_DELIMITER + "vni" + alsv[i] + NH_DELIMITER + vni_labelv[i] + NH_DELIMITER + rmacv[i]; + const NextHopGroup& nh_group = gNhgOrch->getNhg(nhg_index); + ctx.nhg = nh_group.getKey(); + ctx.is_temp = nh_group.isTemp(); + } + catch (const std::out_of_range& e) + { + SWSS_LOG_ERROR("Next hop group %s does not exist", + nhg_index.c_str()); + ++it; + continue; } - - nhg = NextHopGroupKey(nhg_str, overlay_nh); } + NextHopGroupKey& nhg = ctx.nhg; + if (ipv.size() == 1 && IpAddress(ipv[0]).isZero()) { /* blackhole to be done */ @@ -660,9 +685,15 @@ void RouteOrch::doPrefixTask(Consumer& consumer) it++; } } + /* + * Check if the route does not exist or needs to be updated or + * if the route is using a temporary next hop group owned by + * NhgOrch. + */ else if (m_syncdRoutes.find(vrf_id) == m_syncdRoutes.end() || m_syncdRoutes.at(vrf_id).find(ip_prefix) == m_syncdRoutes.at(vrf_id).end() || - m_syncdRoutes.at(vrf_id).at(ip_prefix) != nhg) + m_syncdRoutes.at(vrf_id).at(ip_prefix) != RouteNhg(nhg, ctx.nhg_index) || + ctx.is_temp) { if (addRoute(ctx, nhg)) it = consumer.m_toSync.erase(it); @@ -670,18 +701,24 @@ void RouteOrch::doPrefixTask(Consumer& consumer) it++; } else + { + SWSS_LOG_INFO("Route %s is duplicate entry", key.c_str()); /* Duplicate entry */ it = consumer.m_toSync.erase(it); + } // If already exhaust the nexthop groups, and there are pending removing routes in bulker, // flush the bulker and possibly collect some released nexthop groups - if (m_nextHopGroupCount >= m_maxNextHopGroupCount && gRouteBulker.removing_entries_count() > 0) + if (gNhgOrch->getNhgCount() >= gNhgOrch->getMaxNhgCount() && + gRouteBulker.removing_entries_count() > 0) { break; } } else if (op == DEL_COMMAND) { + SWSS_LOG_INFO("Delete operation"); + if (removeRoute(ctx)) it = consumer.m_toSync.erase(it); else @@ -750,8 +787,9 @@ void RouteOrch::doPrefixTask(Consumer& consumer) it_prev++; } else if (m_syncdRoutes.find(vrf_id) == m_syncdRoutes.end() || - m_syncdRoutes.at(vrf_id).find(ip_prefix) == m_syncdRoutes.at(vrf_id).end() || - m_syncdRoutes.at(vrf_id).at(ip_prefix) != nhg) + m_syncdRoutes.at(vrf_id).find(ip_prefix) == m_syncdRoutes.at(vrf_id).end() || + m_syncdRoutes.at(vrf_id).at(ip_prefix) != RouteNhg(nhg, ctx.nhg_index) || + ctx.is_temp) { if (addRoutePost(ctx, nhg)) it_prev = consumer.m_toSync.erase(it_prev); @@ -808,13 +846,13 @@ void RouteOrch::notifyNextHopChangeObservers(sai_object_id_t vrf_id, const IpPre update_required = true; } - entry.second.routeTable.emplace(prefix, nexthops); + entry.second.routeTable.emplace(prefix, RouteNhg(nexthops, "")); } else { - if (route->second != nexthops) + if (route->second.nhg_key != nexthops) { - route->second = nexthops; + route->second.nhg_key = nexthops; /* If changed route is best match update observers */ if (entry.second.routeTable.rbegin()->first == route->first) { @@ -845,7 +883,7 @@ void RouteOrch::notifyNextHopChangeObservers(sai_object_id_t vrf_id, const IpPre assert(!entry.second.routeTable.empty()); auto route = entry.second.routeTable.rbegin(); - NextHopUpdate update = { vrf_id, entry.first.second, route->first, route->second }; + NextHopUpdate update = { vrf_id, entry.first.second, route->first, route->second.nhg_key }; for (auto observer : entry.second.observers) { @@ -942,7 +980,7 @@ const NextHopGroupKey RouteOrch::getSyncdRouteNhgKey(sai_object_id_t vrf_id, con auto route_entry = route_table->second.find(ipPrefix); if (route_entry != route_table->second.end()) { - nhg = route_entry->second; + nhg = route_entry->second.nhg_key; } } return nhg; @@ -952,7 +990,7 @@ bool RouteOrch::createFineGrainedNextHopGroup(sai_object_id_t &next_hop_group_id { SWSS_LOG_ENTER(); - if (m_nextHopGroupCount >= m_maxNextHopGroupCount) + if (gNhgOrch->getNhgCount() >= gNhgOrch->getMaxNhgCount()) { SWSS_LOG_DEBUG("Failed to create new next hop group. \ Reaching maximum number of next hop groups."); @@ -970,7 +1008,7 @@ bool RouteOrch::createFineGrainedNextHopGroup(sai_object_id_t &next_hop_group_id } gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP); - m_nextHopGroupCount++; + gNhgOrch->incNhgCount(); return true; } @@ -988,7 +1026,7 @@ bool RouteOrch::removeFineGrainedNextHopGroup(sai_object_id_t &next_hop_group_id } gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP); - m_nextHopGroupCount--; + gNhgOrch->decNhgCount(); return true; } @@ -999,10 +1037,10 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops) assert(!hasNextHopGroup(nexthops)); - if (m_nextHopGroupCount >= m_maxNextHopGroupCount) + if (gNhgOrch->getNhgCount() >= gNhgOrch->getMaxNhgCount()) { - SWSS_LOG_DEBUG("Failed to create new next hop group. \ - Reaching maximum number of next hop groups."); + SWSS_LOG_WARN("Reached maximum next hop groups of %u", + gNhgOrch->getMaxNhgCount()); return false; } @@ -1028,11 +1066,10 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops) } else { - SWSS_LOG_INFO("Failed to get next hop %s in %s", + SWSS_LOG_WARN("Failed to get next hop %s in %s", it.to_string().c_str(), nexthops.to_string().c_str()); return false; } - // skip next hop group member create for neighbor from down port if (m_neighOrch->isNextHopFlagSet(it, NHFLAGS_IFDOWN)) { @@ -1064,7 +1101,7 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops) return handleSaiCreateStatus(SAI_API_NEXT_HOP_GROUP, status); } - m_nextHopGroupCount ++; + gNhgOrch->incNhgCount(); SWSS_LOG_NOTICE("Create next hop group %s", nexthops.to_string().c_str()); gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP); @@ -1191,13 +1228,14 @@ bool RouteOrch::removeNextHopGroup(const NextHopGroupKey &nexthops) return handleSaiRemoveStatus(SAI_API_NEXT_HOP_GROUP, status); } - m_nextHopGroupCount --; + gNhgOrch->decNhgCount(); gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP); set next_hop_set = nexthops.getNextHops(); for (auto it : next_hop_set) { m_neighOrch->decreaseNextHopRefCount(it); + if (overlay_nh && !m_neighOrch->getNextHopRefCount(it)) { if(!m_neighOrch->removeTunnelNextHop(it)) @@ -1239,12 +1277,12 @@ bool RouteOrch::updateNextHopRoutes(const NextHopKey& nextHop, uint32_t& numRout for (auto rt_entry : rt_table.second) { // Skip routes with ecmp nexthops - if (rt_entry.second.getSize() > 1) + if (rt_entry.second.nhg_key.getSize() > 1) { continue; } - if (rt_entry.second.contains(nextHop)) + if (rt_entry.second.nhg_key.contains(nextHop)) { SWSS_LOG_INFO("Updating route %s during nexthop status change", rt_entry.first.to_string().c_str()); @@ -1284,7 +1322,12 @@ void RouteOrch::addTempRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextH /* Remove next hops that are not in m_syncdNextHops */ for (auto it = next_hop_set.begin(); it != next_hop_set.end();) { - if (!m_neighOrch->hasNextHop(*it)) + /* + * Check if the IP next hop exists in NeighOrch. The next hop may be + * a labeled one, which are created by RouteOrch or NhgOrch if the IP + * next hop exists. + */ + if (!m_neighOrch->hasNextHop(it->ipKey())) { SWSS_LOG_INFO("Failed to get next hop %s for %s", (*it).to_string().c_str(), ipPrefix.to_string().c_str()); @@ -1350,145 +1393,177 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) return false; } } - else if (nextHops.getSize() == 1) + else if (ctx.nhg_index.empty()) { - /* The route is pointing to a next hop */ - NextHopKey nexthop; - if (overlay_nh) - { - nexthop = NextHopKey(nextHops.to_string(), overlay_nh); - } - else - { - nexthop = NextHopKey(nextHops.to_string()); - } + SWSS_LOG_INFO("Next hop group is not owned by NhgOrch"); - if (nexthop.ip_address.isZero()) + if (nextHops.getSize() == 1) { - next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); - /* rif is not created yet */ - if (next_hop_id == SAI_NULL_OBJECT_ID) + /* The route is pointing to a next hop */ + NextHopKey nexthop; + if (overlay_nh) { - SWSS_LOG_INFO("Failed to get next hop %s for %s", - nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); - return false; + nexthop = NextHopKey(nextHops.to_string(), overlay_nh); } - } - else - { - if (m_neighOrch->hasNextHop(nexthop)) + else { - next_hop_id = m_neighOrch->getNextHopId(nexthop); + nexthop = NextHopKey(nextHops.to_string()); } - /* See if there is an IP neighbor nexthop */ - else if (nexthop.label_stack.getSize() && - m_neighOrch->hasNextHop(NextHopKey(nexthop.ip_address, nexthop.alias))) + + if (nexthop.ip_address.isZero()) { - m_neighOrch->addNextHop(nexthop); - next_hop_id = m_neighOrch->getNextHopId(nexthop); + next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); + /* rif is not created yet */ + if (next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_INFO("Failed to get next hop %s for %s", + nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); + return false; + } } else { - if(overlay_nh) + if (m_neighOrch->hasNextHop(nexthop)) { - SWSS_LOG_INFO("create remote vtep %s", nexthop.to_string(overlay_nh).c_str()); - status = createRemoteVtep(vrf_id, nexthop); - if (status == false) + next_hop_id = m_neighOrch->getNextHopId(nexthop); + } + /* See if there is an IP neighbor nexthop */ + else if (nexthop.label_stack.getSize() && + m_neighOrch->hasNextHop(NextHopKey(nexthop.ip_address, nexthop.alias))) + { + m_neighOrch->addNextHop(nexthop); + next_hop_id = m_neighOrch->getNextHopId(nexthop); + } + else + { + if(overlay_nh) { - SWSS_LOG_ERROR("Failed to create remote vtep %s", nexthop.to_string(overlay_nh).c_str()); - return false; + SWSS_LOG_INFO("create remote vtep %s", nexthop.to_string(overlay_nh).c_str()); + status = createRemoteVtep(vrf_id, nexthop); + if (status == false) + { + SWSS_LOG_ERROR("Failed to create remote vtep %s", nexthop.to_string(overlay_nh).c_str()); + return false; + } + next_hop_id = m_neighOrch->addTunnelNextHop(nexthop); + if (next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_ERROR("Failed to create Tunnel Nexthop %s", nexthop.to_string(overlay_nh).c_str()); + return false; + } } - next_hop_id = m_neighOrch->addTunnelNextHop(nexthop); - if (next_hop_id == SAI_NULL_OBJECT_ID) + else { - SWSS_LOG_ERROR("Failed to create Tunnel Nexthop %s", nexthop.to_string(overlay_nh).c_str()); + SWSS_LOG_INFO("Failed to get next hop %s for %s", + nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); return false; } } - else - { - SWSS_LOG_INFO("Failed to get next hop %s for %s", - nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); - return false; - } } } - } - /* The route is pointing to a next hop group */ - else - { - /* Check if there is already an existing next hop group */ - if (!hasNextHopGroup(nextHops)) + /* The route is pointing to a next hop group */ + else { - /* Try to create a new next hop group */ - if (!addNextHopGroup(nextHops)) + /* Check if there is already an existing next hop group */ + if (!hasNextHopGroup(nextHops)) { - /* NextHopGroup is in "Ip1|alias1,Ip2|alias2,..." format*/ - std::vector nhops = tokenize(nextHops.to_string(), ','); - for(auto it = nhops.begin(); it != nhops.end(); ++it) + SWSS_LOG_INFO("Next hop group %s does not exist", + nextHops.to_string().c_str()); + /* Try to create a new next hop group */ + if (!addNextHopGroup(nextHops)) { - NextHopKey nextHop; - if (overlay_nh) - { - nextHop = NextHopKey(*it, overlay_nh); - } - else - { - nextHop = NextHopKey(*it); - } + /* Failed to create the next hop group and check if a temporary route is needed */ + SWSS_LOG_WARN("Failed to create next hop group %s", + nextHops.to_string().c_str()); - if(!m_neighOrch->hasNextHop(nextHop)) + /* NextHopGroup is in "Ip1|alias1,Ip2|alias2,..." format*/ + std::vector nhops = tokenize(nextHops.to_string(), ','); + for(auto it = nhops.begin(); it != nhops.end(); ++it) { - if(overlay_nh) + NextHopKey nextHop; + if (overlay_nh) { - SWSS_LOG_INFO("create remote vtep %s ecmp", nextHop.to_string(overlay_nh).c_str()); - status = createRemoteVtep(vrf_id, nextHop); - if (status == false) - { - SWSS_LOG_ERROR("Failed to create remote vtep %s ecmp", nextHop.to_string(overlay_nh).c_str()); - return false; - } - next_hop_id = m_neighOrch->addTunnelNextHop(nextHop); - if (next_hop_id == SAI_NULL_OBJECT_ID) + nextHop = NextHopKey(*it, overlay_nh); + } + else + { + nextHop = NextHopKey(*it); + } + + if(!m_neighOrch->hasNextHop(nextHop)) + { + if(overlay_nh) { - SWSS_LOG_ERROR("Failed to create Tunnel Nexthop %s", nextHop.to_string(overlay_nh).c_str()); - return false; + SWSS_LOG_INFO("create remote vtep %s ecmp", nextHop.to_string(overlay_nh).c_str()); + status = createRemoteVtep(vrf_id, nextHop); + if (status == false) + { + SWSS_LOG_ERROR("Failed to create remote vtep %s ecmp", nextHop.to_string(overlay_nh).c_str()); + return false; + } + next_hop_id = m_neighOrch->addTunnelNextHop(nextHop); + if (next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_ERROR("Failed to create Tunnel Nexthop %s", nextHop.to_string(overlay_nh).c_str()); + return false; + } } } } - } - /* Failed to create the next hop group and check if a temporary route is needed */ + /* Failed to create the next hop group and check if a temporary route is needed */ - /* If the current next hop is part of the next hop group to sync, - * then return false and no need to add another temporary route. */ - if (it_route != m_syncdRoutes.at(vrf_id).end() && it_route->second.getSize() == 1) - { - NextHopKey nexthop; - auto old_nextHops = it_route->second; + /* If the current next hop is part of the next hop group to sync, + * then return false and no need to add another temporary route. */ + if (it_route != m_syncdRoutes.at(vrf_id).end() && it_route->second.nhg_key.getSize() == 1) + { + NextHopKey nexthop; + auto old_nextHops = it_route->second.nhg_key; - if (old_nextHops.is_overlay_nexthop()) { - nexthop = NextHopKey(it_route->second.to_string(), true); - } else { - nexthop = NextHopKey(it_route->second.to_string()); - } + if (old_nextHops.is_overlay_nexthop()) { + nexthop = NextHopKey(it_route->second.nhg_key.to_string(), true); + } else { + nexthop = NextHopKey(it_route->second.nhg_key.to_string()); + } - if (nextHops.contains(nexthop)) - { - return false; + if (nextHops.contains(nexthop)) + { + SWSS_LOG_NOTICE("Temporary route already added via %s", + nexthop.to_string().c_str()); + return false; + } } - } - /* Add a temporary route when a next hop group cannot be added, - * and there is no temporary route right now or the current temporary - * route is not pointing to a member of the next hop group to sync. */ - addTempRoute(ctx, nextHops); - /* Return false since the original route is not successfully added */ - return false; + /* Add a temporary route when a next hop group cannot be added, + * and there is no temporary route right now or the current temporary + * route is not pointing to a member of the next hop group to sync. */ + SWSS_LOG_NOTICE("Adding temporary route"); + addTempRoute(ctx, nextHops); + /* Return false since the original route is not successfully added */ + return false; + } } - } - next_hop_id = m_syncdNextHopGroups[nextHops].next_hop_group_id; + next_hop_id = m_syncdNextHopGroups[nextHops].next_hop_group_id; + } } + else + { + SWSS_LOG_INFO("Next hop group is owned by NhgOrch with index %s", + ctx.nhg_index.c_str()); + try + { + const NextHopGroup& nhg = gNhgOrch->getNhg(ctx.nhg_index); + next_hop_id = nhg.getId(); + } + catch(const std::out_of_range& e) + { + SWSS_LOG_WARN("Next hop group key %s does not exist", + ctx.nhg_index.c_str()); + return false; + } + } + + SWSS_LOG_INFO("Next hop ID: %lu", next_hop_id); /* Sync the route entry */ sai_route_entry_t route_entry; @@ -1523,7 +1598,7 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) else { /* Set the packet action to forward when there was no next hop (dropped) */ - if (it_route->second.getSize() == 0) + if (it_route->second.nhg_key.getSize() == 0) { route_attr.id = SAI_ROUTE_ENTRY_ATTR_PACKET_ACTION; route_attr.value.s32 = SAI_PACKET_ACTION_FORWARD; @@ -1548,6 +1623,11 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) gRouteBulker.set_entry_attribute(&object_statuses.back(), &route_entry, &route_attr); } } + + SWSS_LOG_NOTICE("Added route %s with next hop(s) %s", + ipPrefix.to_string().c_str(), + nextHops.to_string().c_str()); + return false; } @@ -1567,53 +1647,71 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey return false; } - /* next_hop_id indicates the next hop id or next hop group id of this route */ - sai_object_id_t next_hop_id; + SWSS_LOG_INFO("Checking next hop group %s", nextHops.to_string().c_str()); if (m_fgNhgOrch->isRouteFineGrained(vrf_id, ipPrefix, nextHops)) { /* Route is pointing to Fine Grained ECMP nexthop group */ isFineGrained = true; } - else if (nextHops.getSize() == 1) + /* Check that the next hop group is not owned by NhgOrch. */ + else if (ctx.nhg_index.empty()) { - /* The route is pointing to a next hop */ - NextHopKey nexthop; - if(nextHops.is_overlay_nexthop()) { - nexthop = NextHopKey(nextHops.to_string(), true); - } else { - nexthop = NextHopKey(nextHops.to_string()); - } + SWSS_LOG_INFO("Next hop group is not owned by NhgOrch"); - if (nexthop.ip_address.isZero()) + /* The route is pointing to a next hop */ + if (nextHops.getSize() == 1) { - next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); - /* rif is not created yet */ - if (next_hop_id == SAI_NULL_OBJECT_ID) + NextHopKey nexthop; + if(nextHops.is_overlay_nexthop()) { + nexthop = NextHopKey(nextHops.to_string(), true); + } else { + nexthop = NextHopKey(nextHops.to_string()); + } + + if (nexthop.ip_address.isZero()) { - SWSS_LOG_INFO("Failed to get next hop %s for %s", - nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); - return false; + sai_object_id_t next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); + /* rif is not created yet */ + if (next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_WARN("Failed to get next hop %s for %s", + nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); + return false; + } + } + else + { + if (!m_neighOrch->hasNextHop(nexthop)) + { + SWSS_LOG_WARN("Failed to get next hop %s for %s", + nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); + return false; + } } } + /* The route is pointing to a next hop group */ else { - if (!m_neighOrch->hasNextHop(nexthop)) + if (!hasNextHopGroup(nextHops)) { - SWSS_LOG_INFO("Failed to get next hop %s for %s", - nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); + SWSS_LOG_WARN("Next hop group is temporary, represented by %s", + ctx.tmp_next_hop.to_string().c_str()); + // Previous added an temporary route + auto& tmp_next_hop = ctx.tmp_next_hop; + addRoutePost(ctx, tmp_next_hop); return false; } } } - /* The route is pointing to a next hop group */ else { - if (!hasNextHopGroup(nextHops)) + SWSS_LOG_INFO("NhgOrch owns the next hop group with index %s", + ctx.nhg_index.c_str()); + if (!gNhgOrch->hasNhg(ctx.nhg_index)) { - // Previous added an temporary route - auto& tmp_next_hop = ctx.tmp_next_hop; - addRoutePost(ctx, tmp_next_hop); + SWSS_LOG_WARN("Failed to get next hop group with index %s", + ctx.nhg_index.c_str()); return false; } } @@ -1648,16 +1746,16 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey else { /* Route already exists */ - auto nh_entry = m_syncdNextHopGroups.find(it_route->second); + auto nh_entry = m_syncdNextHopGroups.find(it_route->second.nhg_key); if (nh_entry != m_syncdNextHopGroups.end()) { /* Case where route was pointing to non-fine grained nhs in the past, * and transitioned to Fine Grained ECMP */ - decreaseNextHopRefCount(it_route->second); - if (it_route->second.getSize() > 1 - && m_syncdNextHopGroups[it_route->second].ref_count == 0) + decreaseNextHopRefCount(it_route->second.nhg_key); + if (it_route->second.nhg_key.getSize() > 1 + && m_syncdNextHopGroups[it_route->second.nhg_key].ref_count == 0) { - m_bulkNhgReducedRefCnt.emplace(it_route->second); + m_bulkNhgReducedRefCnt.emplace(it_route->second.nhg_key); } } SWSS_LOG_INFO("FG Post set route %s with next hop(s) %s", @@ -1671,9 +1769,11 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey { SWSS_LOG_ERROR("Failed to create route %s with next hop(s) %s", ipPrefix.to_string().c_str(), nextHops.to_string().c_str()); - /* Clean up the newly created next hop group entry */ - if (nextHops.getSize() > 1) + + /* Check that the next hop group is not owned by NhgOrch. */ + if (ctx.nhg_index.empty() && nextHops.getSize() > 1) { + /* Clean up the newly created next hop group entry */ removeNextHopGroup(nextHops); } return handleSaiCreateStatus(SAI_API_ROUTE, status); @@ -1688,10 +1788,19 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_IPV6_ROUTE); } - /* Increase the ref_count for the next hop (group) entry */ - increaseNextHopRefCount(nextHops); + /* Increase the ref_count for the next hop group. */ + if (ctx.nhg_index.empty()) + { + increaseNextHopRefCount(nextHops); + } + else + { + SWSS_LOG_INFO("Increment NhgOrch's NHG %s ref count", + ctx.nhg_index.c_str()); + gNhgOrch->incNhgRefCount(ctx.nhg_index); + } - SWSS_LOG_INFO("Post create route %s with next hop(s) %s", + SWSS_LOG_NOTICE("Post create route %s with next hop(s) %s", ipPrefix.to_string().c_str(), nextHops.to_string().c_str()); } else @@ -1699,7 +1808,7 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey sai_status_t status; /* Set the packet action to forward when there was no next hop (dropped) */ - if (it_route->second.getSize() == 0) + if (it_route->second.nhg_key.getSize() == 0) { status = *it_status++; if (status != SAI_STATUS_SUCCESS) @@ -1718,9 +1827,6 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey return handleSaiSetStatus(SAI_API_ROUTE, status); } - /* Increase the ref_count for the next hop (group) entry */ - increaseNextHopRefCount(nextHops); - if (m_fgNhgOrch->syncdContainsFgNhg(vrf_id, ipPrefix)) { /* Remove FG nhg since prefix now points to standard nhg/nhs */ @@ -1728,27 +1834,57 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey } else { - decreaseNextHopRefCount(it_route->second); - auto ol_nextHops = it_route->second; - if (it_route->second.getSize() > 1 - && m_syncdNextHopGroups[it_route->second].ref_count == 0) + /* Decrease the ref count for the previous next hop group. */ + if (it_route->second.nhg_index.empty()) { - m_bulkNhgReducedRefCnt.emplace(it_route->second); - } else if (ol_nextHops.is_overlay_nexthop()){ - - SWSS_LOG_NOTICE("Update overlay Nexthop %s", ol_nextHops.to_string().c_str()); - removeOverlayNextHops(vrf_id, ol_nextHops); + /* The next hop group is owned by RouteOrch. */ + decreaseNextHopRefCount(it_route->second.nhg_key); + if (it_route->second.nhg_key.getSize() > 1 + && m_syncdNextHopGroups[it_route->second.nhg_key].ref_count == 0) + { + m_bulkNhgReducedRefCnt.emplace(it_route->second.nhg_key); + } + else if (it_route->second.nhg_key.is_overlay_nexthop()) + { + SWSS_LOG_NOTICE("Update overlay Nexthop %s", it_route->second.nhg_key.to_string().c_str()); + removeOverlayNextHops(vrf_id, it_route->second.nhg_key); + } + } + else + { + /* The next hop group is owned by NeighOrch. */ + SWSS_LOG_INFO("Decrement NhgOrch's NHG %s ref count", + it_route->second.nhg_index.c_str()); + gNhgOrch->decNhgRefCount(it_route->second.nhg_index); } } - SWSS_LOG_INFO("Post set route %s with next hop(s) %s", + if (ctx.nhg_index.empty()) + { + /* Increase the ref_count for the next hop (group) entry */ + increaseNextHopRefCount(nextHops); + } + else + { + SWSS_LOG_INFO("Increment NhgOrch's NHG %s ref count", + ctx.nhg_index.c_str()); + gNhgOrch->incNhgRefCount(ctx.nhg_index); + } + + SWSS_LOG_NOTICE("Post set route %s with next hop(s) %s", ipPrefix.to_string().c_str(), nextHops.to_string().c_str()); } - m_syncdRoutes[vrf_id][ipPrefix] = nextHops; + m_syncdRoutes[vrf_id][ipPrefix] = RouteNhg(nextHops, ctx.nhg_index); notifyNextHopChangeObservers(vrf_id, ipPrefix, nextHops, true); - return true; + + /* + * If the route uses a temporary synced NHG owned by NhgOrch, return false + * in order to keep trying to update the route in case the NHG is updated, + * which will update the SAI ID of the group as well. + */ + return !ctx.is_temp; } bool RouteOrch::removeRoute(RouteBulkContext& ctx) @@ -1854,7 +1990,7 @@ bool RouteOrch::removeRoutePost(const RouteBulkContext& ctx) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to remove route prefix:%s\n", ipPrefix.to_string().c_str()); - return handleSaiRemoveStatus(SAI_API_ROUTE, status); + return handleSaiSetStatus(SAI_API_ROUTE, status); } if (ipPrefix.isV4()) @@ -1874,47 +2010,67 @@ bool RouteOrch::removeRoutePost(const RouteBulkContext& ctx) } else { - /* - * Decrease the reference count only when the route is pointing to a next hop. - */ - decreaseNextHopRefCount(it_route->second); + /* Check that the next hop group is not owned by NhgOrch. */ + if (it_route->second.nhg_index.empty()) + { + auto ol_nextHops = it_route->second.nhg_key; - auto ol_nextHops = it_route->second; + /* + * Decrease the reference count only when the route is pointing to a next hop. + */ + decreaseNextHopRefCount(ol_nextHops); - if (it_route->second.getSize() > 1 - && m_syncdNextHopGroups[it_route->second].ref_count == 0) - { - m_bulkNhgReducedRefCnt.emplace(it_route->second); - } - /* - * Additionally check if the NH has label and its ref count == 0, then - * remove the label next hop. - */ - else if (it_route->second.getSize() == 1) - { - NextHopKey nexthop(it_route->second.to_string()); - if (nexthop.label_stack.getSize() && - (m_neighOrch->getNextHopRefCount(nexthop) == 0)) + if (ol_nextHops.getSize() > 1 + && m_syncdNextHopGroups[ol_nextHops].ref_count == 0) { - m_neighOrch->removeNextHop(nexthop); + m_bulkNhgReducedRefCnt.emplace(ol_nextHops); + } + /* + * Additionally check if the NH has label and its ref count == 0, then + * remove the label next hop. + */ + else if (ol_nextHops.getSize() == 1) + { + NextHopKey nexthop; + bool overlay_nh = ol_nextHops.is_overlay_nexthop(); + if (overlay_nh) + { + nexthop = NextHopKey(ol_nextHops.to_string(), overlay_nh); + } + else + { + nexthop = NextHopKey(ol_nextHops.to_string()); + } + + if (nexthop.label_stack.getSize() && + (m_neighOrch->getNextHopRefCount(nexthop) == 0)) + { + m_neighOrch->removeNextHop(nexthop); + } + else if (ol_nextHops.is_overlay_nexthop()) + { + SWSS_LOG_NOTICE("Remove overlay Nexthop %s", ol_nextHops.to_string().c_str()); + removeOverlayNextHops(vrf_id, ol_nextHops); + } } } - else if (ol_nextHops.is_overlay_nexthop()) + else { - SWSS_LOG_NOTICE("Remove overlay Nexthop %s", ol_nextHops.to_string().c_str()); - removeOverlayNextHops(vrf_id, ol_nextHops); + SWSS_LOG_INFO("Decrement NhgOrch's NHG %s ref count", + it_route->second.nhg_index.c_str()); + gNhgOrch->decNhgRefCount(it_route->second.nhg_index); } } SWSS_LOG_INFO("Remove route %s with next hop(s) %s", - ipPrefix.to_string().c_str(), it_route->second.to_string().c_str()); + ipPrefix.to_string().c_str(), it_route->second.nhg_key.to_string().c_str()); if (ipPrefix.isDefaultRoute()) { - it_route_table->second[ipPrefix] = NextHopGroupKey(); + it_route_table->second[ipPrefix] = RouteNhg(); /* Notify about default route next hop change */ - notifyNextHopChangeObservers(vrf_id, ipPrefix, it_route_table->second[ipPrefix], true); + notifyNextHopChangeObservers(vrf_id, ipPrefix, it_route_table->second[ipPrefix].nhg_key, true); } else { @@ -2114,12 +2270,15 @@ void RouteOrch::doLabelTask(Consumer& consumer) if (op == SET_COMMAND) { + SWSS_LOG_INFO("Set operation"); + string ips; string aliases; string vni_labels; string remote_macs; bool& excp_intfs_flag = ctx.excp_intfs_flag; bool overlay_nh = false; + string nhg_index; for (auto i : kfvFieldsValues(t)) { @@ -2136,16 +2295,45 @@ void RouteOrch::doLabelTask(Consumer& consumer) if (fvField(i) == "router_mac") remote_macs = fvValue(i); + + if (fvField(i) == "nexthop_group") + nhg_index = fvValue(i); + } + + SWSS_LOG_INFO("Label route %u has nexthop_group: %s, ips: %s, aliases: %s", + label, + nhg_index.c_str(), + ips.c_str(), + aliases.c_str()); + + /* + * A route should not fill both nexthop_group and ips / + * aliases. + */ + if (!nhg_index.empty() && (!ips.empty() || !aliases.empty())) + { + SWSS_LOG_ERROR("Route %s has both nexthop_group and ips/aliases", + key.c_str()); + it = consumer.m_toSync.erase(it); + continue; } + + ctx.nhg_index = nhg_index; + vector& ipv = ctx.ipv; vector alsv; - if (aliases.length() == 0) + /* + * If the nexthop_group is empty, create the next hop group key + * based on the IPs and aliases. Otherwise, get the key from + * the NhgOrch. + */ + if (nhg_index.empty() && aliases.length() == 0) { // No next hop specified, so just pop the label. ctx.nhg = NextHopGroupKey(); } - else + else if (nhg_index.empty()) { ipv = tokenize(ips, ','); alsv = tokenize(aliases, ','); @@ -2187,12 +2375,11 @@ void RouteOrch::doLabelTask(Consumer& consumer) continue; } - string nhg_str = ""; + string nhg_str; if (overlay_nh == false) { nhg_str = ipv[0] + NH_DELIMITER + alsv[0]; - for (uint32_t i = 1; i < ipv.size(); i++) { nhg_str += NHG_DELIMITER + ipv[i] + NH_DELIMITER + alsv[i]; @@ -2211,6 +2398,22 @@ void RouteOrch::doLabelTask(Consumer& consumer) ctx.nhg = NextHopGroupKey(nhg_str, overlay_nh); } } + else + { + try + { + const NextHopGroup& nh_group = gNhgOrch->getNhg(nhg_index); + ctx.nhg = nh_group.getKey(); + ctx.is_temp = nh_group.isTemp(); + } + catch (const std::out_of_range& e) + { + SWSS_LOG_ERROR("Next hop group %s does not exist", + nhg_index.c_str()); + ++it; + continue; + } + } NextHopGroupKey& nhg = ctx.nhg; @@ -2238,7 +2441,8 @@ void RouteOrch::doLabelTask(Consumer& consumer) } else if (m_syncdLabelRoutes.find(vrf_id) == m_syncdLabelRoutes.end() || m_syncdLabelRoutes.at(vrf_id).find(label) == m_syncdLabelRoutes.at(vrf_id).end() || - m_syncdLabelRoutes.at(vrf_id).at(label) != nhg) + m_syncdLabelRoutes.at(vrf_id).at(label) != RouteNhg(nhg, nhg_index) || + ctx.is_temp) { if (addLabelRoute(ctx, nhg)) it = consumer.m_toSync.erase(it); @@ -2246,13 +2450,16 @@ void RouteOrch::doLabelTask(Consumer& consumer) it++; } else + { /* Duplicate entry */ + SWSS_LOG_INFO("Route %s is duplicate entry", key.c_str()); it = consumer.m_toSync.erase(it); + } // If already exhaust the nexthop groups, and there are pending removing routes in bulker, // flush the bulker and possibly collect some released nexthop groups - if (m_nextHopGroupCount >= m_maxNextHopGroupCount && - gLabelRouteBulker.removing_entries_count() > 0) + if (gNhgOrch->getNhgCount() >= gNhgOrch->getMaxNhgCount() && + gRouteBulker.removing_entries_count() > 0) { break; } @@ -2260,6 +2467,7 @@ void RouteOrch::doLabelTask(Consumer& consumer) else if (op == DEL_COMMAND) { /* Cannot locate the route or remove succeed */ + SWSS_LOG_INFO("Delete operation"); if (removeLabelRoute(ctx)) it = consumer.m_toSync.erase(it); else @@ -2329,7 +2537,8 @@ void RouteOrch::doLabelTask(Consumer& consumer) } else if (m_syncdLabelRoutes.find(vrf_id) == m_syncdLabelRoutes.end() || m_syncdLabelRoutes.at(vrf_id).find(label) == m_syncdLabelRoutes.at(vrf_id).end() || - m_syncdLabelRoutes.at(vrf_id).at(label) != nhg) + m_syncdLabelRoutes.at(vrf_id).at(label) != RouteNhg(nhg, ctx.nhg_index) || + ctx.is_temp) { if (addLabelRoutePost(ctx, nhg)) it_prev = consumer.m_toSync.erase(it_prev); @@ -2369,7 +2578,12 @@ void RouteOrch::addTempLabelRoute(LabelRouteBulkContext& ctx, const NextHopGroup /* Remove next hops that are not in m_syncdNextHops */ for (auto it = next_hop_set.begin(); it != next_hop_set.end();) { - if (!m_neighOrch->hasNextHop(*it)) + /* + * Check if the IP next hop exists in NeighOrch. The next hop may be + * a labeled one, which are created by RouteOrch or NhgOrch if the IP + * next hop exists. + */ + if (!m_neighOrch->hasNextHop(it->ipKey())) { SWSS_LOG_INFO("Failed to get next hop %s for %u", (*it).to_string().c_str(), label); @@ -2401,6 +2615,10 @@ bool RouteOrch::addLabelRoute(LabelRouteBulkContext& ctx, const NextHopGroupKey sai_object_id_t& vrf_id = ctx.vrf_id; Label& label = ctx.label; + SWSS_LOG_NOTICE("Adding route for label %u with next hop(s) %s", + label, + nextHops.to_string().c_str()); + /* next_hop_id indicates the next hop id or next hop group id of this route */ sai_object_id_t next_hop_id; @@ -2413,83 +2631,108 @@ bool RouteOrch::addLabelRoute(LabelRouteBulkContext& ctx, const NextHopGroupKey auto it_route = m_syncdLabelRoutes.at(vrf_id).find(label); /* The route has no next hop specified so just pop the label */ - if (nextHops.getSize() == 0) + if ((nextHops.getSize() == 0) && ctx.nhg_index.empty()) { next_hop_id = 0; } - /* The route is pointing to a next hop */ - else if (nextHops.getSize() == 1) + else if (ctx.nhg_index.empty()) { - NextHopKey nexthop(nextHops.to_string()); - if (nexthop.ip_address.isZero()) - { - next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); - /* rif is not created yet */ - if (next_hop_id == SAI_NULL_OBJECT_ID) - { - SWSS_LOG_INFO("Failed to get next hop %s for %u", - nextHops.to_string().c_str(), label); - return false; - } - } - else + SWSS_LOG_INFO("Next hop group is not owned by NhgOrch"); + /* The route is pointing to a next hop */ + if (nextHops.getSize() == 1) { - if (m_neighOrch->hasNextHop(nexthop)) + NextHopKey nexthop(nextHops.to_string()); + if (nexthop.ip_address.isZero()) { - next_hop_id = m_neighOrch->getNextHopId(nexthop); - } - /* See if there is an IP neighbor nexthop */ - else if (nexthop.label_stack.getSize() && - m_neighOrch->hasNextHop(NextHopKey(nexthop.ip_address, nexthop.alias))) - { - m_neighOrch->addNextHop(nexthop); - next_hop_id = m_neighOrch->getNextHopId(nexthop); + next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); + /* rif is not created yet */ + if (next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_WARN("Failed to get next hop %s for %u", + nextHops.to_string().c_str(), label); + return false; + } } else { - SWSS_LOG_INFO("Failed to get next hop %s for %u", - nextHops.to_string().c_str(), label); - return false; + if (m_neighOrch->hasNextHop(nexthop)) + { + next_hop_id = m_neighOrch->getNextHopId(nexthop); + } + /* See if there is an IP neighbor nexthop */ + else if (nexthop.label_stack.getSize() && + m_neighOrch->hasNextHop(NextHopKey(nexthop.ip_address, nexthop.alias))) + { + m_neighOrch->addNextHop(nexthop); + next_hop_id = m_neighOrch->getNextHopId(nexthop); + } + else + { + SWSS_LOG_WARN("Failed to get next hop %s for %u", + nextHops.to_string().c_str(), label); + return false; + } } } - } - /* The route is pointing to a next hop group */ - else - { - /* Check if there is already an existing next hop group */ - if (!hasNextHopGroup(nextHops)) + /* The route is pointing to a next hop group */ + else { - /* Try to create a new next hop group */ - if (!addNextHopGroup(nextHops)) + /* Check if there is already an existing next hop group */ + if (!hasNextHopGroup(nextHops)) { - /* Failed to create the next hop group and check if a temporary route is needed */ - - /* If the current next hop is part of the next hop group to sync, - * then return false and no need to add another temporary route. */ - if (it_route != m_syncdLabelRoutes.at(vrf_id).end() && it_route->second.getSize() == 1) + SWSS_LOG_INFO("Next hop group %s does not exist", + nextHops.to_string().c_str()); + /* Try to create a new next hop group */ + if (!addNextHopGroup(nextHops)) { - NextHopKey nexthop(it_route->second.to_string()); - if (nextHops.contains(nexthop)) + /* Failed to create the next hop group and check if a temporary route is needed */ + SWSS_LOG_WARN("Failed to create next hop group %s", + nextHops.to_string().c_str()); + + /* If the current next hop is part of the next hop group to sync, + * then return false and no need to add another temporary route. */ + if (it_route != m_syncdLabelRoutes.at(vrf_id).end() && it_route->second.nhg_key.getSize() == 1) { - return false; + NextHopKey nexthop(it_route->second.nhg_key.to_string()); + if (nextHops.contains(nexthop)) + { + return false; + } } - } - /* Add a temporary route when a next hop group cannot be added, - * and there is no temporary route right now or the current temporary - * route is not pointing to a member of the next hop group to sync. */ - addTempLabelRoute(ctx, nextHops); - /* Return false since the original route is not successfully added */ - return false; + /* Add a temporary route when a next hop group cannot be added, + * and there is no temporary route right now or the current temporary + * route is not pointing to a member of the next hop group to sync. */ + addTempLabelRoute(ctx, nextHops); + /* Return false since the original route is not successfully added */ + return false; + } } - } - next_hop_id = m_syncdNextHopGroups[nextHops].next_hop_group_id; + next_hop_id = m_syncdNextHopGroups[nextHops].next_hop_group_id; + } } + else + { + SWSS_LOG_INFO("Next hop group is owned by NhgOrch with index %s", + ctx.nhg_index.c_str()); + try + { + const NextHopGroup& nhg = gNhgOrch->getNhg(ctx.nhg_index); + next_hop_id = nhg.getId(); + } + catch(const std::out_of_range& e) + { + SWSS_LOG_WARN("Next hop group key %s does not exist", + ctx.nhg_index.c_str()); + return false; + } + } + + SWSS_LOG_INFO("Next hop ID: %lu", next_hop_id); /* Sync the inseg entry */ sai_inseg_entry_t inseg_entry; - // route_entry.vr_id = vrf_id; No VRF support for MPLS? inseg_entry.switch_id = gSwitchId; inseg_entry.label = label; @@ -2552,39 +2795,55 @@ bool RouteOrch::addLabelRoutePost(const LabelRouteBulkContext& ctx, const NextHo /* next_hop_id indicates the next hop id or next hop group id of this route */ sai_object_id_t next_hop_id; - /* The route is pointing to a next hop */ - if (nextHops.getSize() == 1) + /* Check that the next hop group is not owned by NhgOrch. */ + if (ctx.nhg_index.empty()) { - NextHopKey nexthop(nextHops.to_string()); - if (nexthop.ip_address.isZero()) + SWSS_LOG_INFO("Next hop group is not owned by NhgOrch"); + /* The route is pointing to a next hop */ + if (nextHops.getSize() == 1) { - next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); - /* rif is not created yet */ - if (next_hop_id == SAI_NULL_OBJECT_ID) + NextHopKey nexthop(nextHops.to_string()); + if (nexthop.ip_address.isZero()) { - SWSS_LOG_INFO("Failed to get next hop %s for label %u", - nextHops.to_string().c_str(), label); - return false; + next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); + /* rif is not created yet */ + if (next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_INFO("Failed to get next hop %s for label %u", + nextHops.to_string().c_str(), label); + return false; + } + } + else + { + if (!m_neighOrch->hasNextHop(nexthop)) + { + SWSS_LOG_INFO("Failed to get next hop %s for label %u", + nextHops.to_string().c_str(), label); + return false; + } } } - else + /* The route is pointing to a next hop group */ + else if (nextHops.getSize() > 1) { - if (!m_neighOrch->hasNextHop(nexthop)) + if (!hasNextHopGroup(nextHops)) { - SWSS_LOG_INFO("Failed to get next hop %s for label %u", - nextHops.to_string().c_str(), label); + // Previous added an temporary route + auto& tmp_next_hop = ctx.tmp_next_hop; + addLabelRoutePost(ctx, tmp_next_hop); return false; } } } - /* The route is pointing to a next hop group */ else { - if (!hasNextHopGroup(nextHops)) + SWSS_LOG_INFO("NhgOrch owns the next hop group with index %s", + ctx.nhg_index.c_str()); + if (!gNhgOrch->hasNhg(ctx.nhg_index)) { - // Previous added an temporary route - auto& tmp_next_hop = ctx.tmp_next_hop; - addLabelRoutePost(ctx, tmp_next_hop); + SWSS_LOG_WARN("Failed to get next hop group with index %s", + ctx.nhg_index.c_str()); return false; } } @@ -2608,16 +2867,26 @@ bool RouteOrch::addLabelRoutePost(const LabelRouteBulkContext& ctx, const NextHo gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_MPLS_INSEG); /* Increase the ref_count for the next hop (group) entry */ - increaseNextHopRefCount(nextHops); - SWSS_LOG_INFO("Post create label %u with next hop(s) %s", - label, nextHops.to_string().c_str()); + if (ctx.nhg_index.empty()) + { + increaseNextHopRefCount(nextHops); + } + else + { + SWSS_LOG_INFO("Increment NhgOrch's NHG %s ref count", + ctx.nhg_index.c_str()); + gNhgOrch->incNhgRefCount(ctx.nhg_index); + } + + SWSS_LOG_INFO("Create label route %u with next hop(s) %s", + label, nextHops.to_string().c_str()); } else { sai_status_t status; /* Set the packet action to forward when there was no next hop (dropped) */ - if (it_route->second.getSize() == 0) + if (it_route->second.nhg_key.getSize() == 0) { status = *it_status++; if (status != SAI_STATUS_SUCCESS) @@ -2636,24 +2905,42 @@ bool RouteOrch::addLabelRoutePost(const LabelRouteBulkContext& ctx, const NextHo return false; } - /* Increase the ref_count for the next hop (group) entry */ - increaseNextHopRefCount(nextHops); + /* Decrease the ref count for the previous next hop group. */ + if (it_route->second.nhg_index.empty()) + { + decreaseNextHopRefCount(it_route->second.nhg_key); + if (it_route->second.nhg_key.getSize() > 1 + && m_syncdNextHopGroups[it_route->second.nhg_key].ref_count == 0) + { + m_bulkNhgReducedRefCnt.emplace(it_route->second.nhg_key); + } + } + else + { + /* The next hop group is owned by NeighOrch. */ + SWSS_LOG_INFO("Decrement NhgOrch's NHG %s ref count", + it_route->second.nhg_index.c_str()); + gNhgOrch->decNhgRefCount(it_route->second.nhg_index); + } - decreaseNextHopRefCount(it_route->second); - if (it_route->second.getSize() > 1 - && m_syncdNextHopGroups[it_route->second].ref_count == 0) + /* Increase the ref_count for the next hop (group) entry */ + if (ctx.nhg_index.empty()) + { + increaseNextHopRefCount(nextHops); + } + else { - m_bulkNhgReducedRefCnt.emplace(it_route->second); + SWSS_LOG_INFO("Increment NhgOrch's NHG %s ref count", + ctx.nhg_index.c_str()); + gNhgOrch->incNhgRefCount(ctx.nhg_index); } - SWSS_LOG_INFO("Post set label %u with next hop(s) %s", - label, nextHops.to_string().c_str()); + + SWSS_LOG_INFO("Set label route %u with next hop(s) %s", + label, nextHops.to_string().c_str()); } - m_syncdLabelRoutes[vrf_id][label] = nextHops; + m_syncdLabelRoutes[vrf_id][label] = RouteNhg(nextHops, ctx.nhg_index); -#if 0 // TODO: MPLS observers? - notifyNextHopChangeObservers(vrf_id, label, nextHops, true); -#endif // 0 TODO return true; } @@ -2672,7 +2959,6 @@ bool RouteOrch::removeLabelRoute(LabelRouteBulkContext& ctx) } sai_inseg_entry_t inseg_entry; - //inseg_entry.vr_id = vrf_id; No VRF support for MPLS inseg_entry.switch_id = gSwitchId; inseg_entry.label = label; @@ -2721,31 +3007,43 @@ bool RouteOrch::removeLabelRoutePost(const LabelRouteBulkContext& ctx) gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_MPLS_INSEG); - /* - * Decrease the reference count only when the route is pointing to a next hop. - */ - decreaseNextHopRefCount(it_route->second); - if (it_route->second.getSize() > 1 - && m_syncdNextHopGroups[it_route->second].ref_count == 0) + if (it_route->second.nhg_index.empty()) { - m_bulkNhgReducedRefCnt.emplace(it_route->second); - } - /* - * Additionally check if the NH has label and its ref count == 0, then - * remove the label next hop. - */ - else if (it_route->second.getSize() == 1) - { - NextHopKey nexthop(it_route->second.to_string()); - if (nexthop.label_stack.getSize() && - (m_neighOrch->getNextHopRefCount(nexthop) == 0)) + /* + * Decrease the reference count only when the route is pointing to a next hop. + * Decrease the reference count when the route is pointing to a next hop group, + * and check whether the reference count decreases to zero. If yes, then we need + * to remove the next hop group. + */ + decreaseNextHopRefCount(it_route->second.nhg_key); + if (it_route->second.nhg_key.getSize() > 1 + && m_syncdNextHopGroups[it_route->second.nhg_key].ref_count == 0) { - m_neighOrch->removeNextHop(nexthop); + m_bulkNhgReducedRefCnt.emplace(it_route->second.nhg_key); } + /* + * Additionally check if the NH has label and its ref count == 0, then + * remove the label next hop. + */ + else if (it_route->second.nhg_key.getSize() == 1) + { + NextHopKey nexthop(it_route->second.nhg_key.to_string()); + if (nexthop.label_stack.getSize() && + (m_neighOrch->getNextHopRefCount(nexthop) == 0)) + { + m_neighOrch->removeNextHop(nexthop); + } + } + } + else + { + SWSS_LOG_INFO("Decrement NhgOrch's NHG %s ref count", + it_route->second.nhg_index.c_str()); + gNhgOrch->decNhgRefCount(it_route->second.nhg_index); } - SWSS_LOG_INFO("Remove label %u with next hop(s) %s", - label, it_route->second.to_string().c_str()); + SWSS_LOG_INFO("Remove label route %u with next hop(s) %s", + label, it_route->second.nhg_key.to_string().c_str()); it_route_table->second.erase(label); diff --git a/orchagent/routeorch.h b/orchagent/routeorch.h index 1eecde5be36..1984ca9b93b 100644 --- a/orchagent/routeorch.h +++ b/orchagent/routeorch.h @@ -40,16 +40,40 @@ struct NextHopUpdate NextHopGroupKey nexthopGroup; }; +/* + * Structure describing the next hop group used by a route. As the next hop + * groups can either be owned by RouteOrch or by NhgOrch, we have to keep track + * of the next hop group index, as it is the one telling us which one owns it. + */ +struct RouteNhg +{ + NextHopGroupKey nhg_key; + + /* + * Index of the next hop group used. Filled only if referencing a + * NhgOrch's owned next hop group. + */ + std::string nhg_index; + + RouteNhg() = default; + RouteNhg(const NextHopGroupKey& key, const std::string& index) : + nhg_key(key), nhg_index(index) {} + + bool operator==(const RouteNhg& rnhg) + { return ((nhg_key == rnhg.nhg_key) && (nhg_index == rnhg.nhg_index)); } + bool operator!=(const RouteNhg& rnhg) { return !(*this == rnhg); } +}; + struct NextHopObserverEntry; /* NextHopGroupTable: NextHopGroupKey, NextHopGroupEntry */ typedef std::map NextHopGroupTable; /* RouteTable: destination network, NextHopGroupKey */ -typedef std::map RouteTable; +typedef std::map RouteTable; /* RouteTables: vrf_id, RouteTable */ typedef std::map RouteTables; /* LabelRouteTable: destination label, next hop address(es) */ -typedef std::map LabelRouteTable; +typedef std::map LabelRouteTable; /* LabelRouteTables: vrf_id, LabelRouteTable */ typedef std::map LabelRouteTables; /* Host: vrf_id, IpAddress */ @@ -68,13 +92,16 @@ struct RouteBulkContext std::deque object_statuses; // Bulk statuses NextHopGroupKey tmp_next_hop; // Temporary next hop NextHopGroupKey nhg; + std::string nhg_index; sai_object_id_t vrf_id; IpPrefix ip_prefix; bool excp_intfs_flag; std::vector ipv; + // is_temp will track if the NhgOrch's owned NHG is temporary or not + bool is_temp; RouteBulkContext() - : excp_intfs_flag(false) + : excp_intfs_flag(false), is_temp(false) { } @@ -90,6 +117,7 @@ struct RouteBulkContext ipv.clear(); excp_intfs_flag = false; vrf_id = SAI_NULL_OBJECT_ID; + is_temp = false; } }; @@ -98,13 +126,16 @@ struct LabelRouteBulkContext std::deque object_statuses; // Bulk statuses NextHopGroupKey tmp_next_hop; // Temporary next hop NextHopGroupKey nhg; + std::string nhg_index; sai_object_id_t vrf_id; Label label; bool excp_intfs_flag; std::vector ipv; + // is_temp will track if the NhgOrch's owned NHG is temporary or not + bool is_temp; LabelRouteBulkContext() - : excp_intfs_flag(false) + : excp_intfs_flag(false), is_temp(false) { } @@ -126,7 +157,7 @@ struct LabelRouteBulkContext class RouteOrch : public Orch, public Subject { public: - RouteOrch(DBConnector *db, vector &tableNames, SwitchOrch *switchOrch, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch, FgNhgOrch *fgNhgOrch); + RouteOrch(DBConnector *db, vector &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch, FgNhgOrch *fgNhgOrch); bool hasNextHopGroup(const NextHopGroupKey&) const; sai_object_id_t getNextHopGroupId(const NextHopGroupKey&); @@ -156,14 +187,11 @@ class RouteOrch : public Orch, public Subject bool removeFineGrainedNextHopGroup(sai_object_id_t &next_hop_group_id); private: - SwitchOrch *m_switchOrch; NeighOrch *m_neighOrch; IntfsOrch *m_intfsOrch; VRFOrch *m_vrfOrch; FgNhgOrch *m_fgNhgOrch; - int m_nextHopGroupCount; - int m_maxNextHopGroupCount; bool m_resync; RouteTables m_syncdRoutes; diff --git a/orchagent/saihelper.cpp b/orchagent/saihelper.cpp index dac7b1e92f9..409dd67533f 100644 --- a/orchagent/saihelper.cpp +++ b/orchagent/saihelper.cpp @@ -80,15 +80,15 @@ static map hardware_access_map = map gProfileMap; -sai_status_t mdio_read(uint64_t platform_context, - uint32_t mdio_addr, uint32_t reg_addr, +sai_status_t mdio_read(uint64_t platform_context, + uint32_t mdio_addr, uint32_t reg_addr, uint32_t number_of_registers, uint32_t *data) { return SAI_STATUS_NOT_IMPLEMENTED; } -sai_status_t mdio_write(uint64_t platform_context, - uint32_t mdio_addr, uint32_t reg_addr, +sai_status_t mdio_write(uint64_t platform_context, + uint32_t mdio_addr, uint32_t reg_addr, uint32_t number_of_registers, uint32_t *data) { return SAI_STATUS_NOT_IMPLEMENTED; @@ -198,6 +198,7 @@ void initSaiApi() sai_log_set(SAI_API_NEIGHBOR, SAI_LOG_LEVEL_NOTICE); sai_log_set(SAI_API_NEXT_HOP, SAI_LOG_LEVEL_NOTICE); sai_log_set(SAI_API_NEXT_HOP_GROUP, SAI_LOG_LEVEL_NOTICE); + sai_log_set(SAI_API_MPLS, SAI_LOG_LEVEL_NOTICE); sai_log_set(SAI_API_ROUTE, SAI_LOG_LEVEL_NOTICE); sai_log_set(SAI_API_MPLS, SAI_LOG_LEVEL_NOTICE); sai_log_set(SAI_API_LAG, SAI_LOG_LEVEL_NOTICE); @@ -438,8 +439,8 @@ sai_status_t initSaiPhyApi(swss::gearbox_phy_t *phy) { SWSS_LOG_ERROR("BOX: Failed to get firmware major version:%d rtn:%d", phy->phy_id, status); return status; - } - else + } + else { phy->firmware_major_version = string(attr.value.chardata); } diff --git a/tests/mock_tests/Makefile.am b/tests/mock_tests/Makefile.am index 80f9e7173e4..d02826349b1 100644 --- a/tests/mock_tests/Makefile.am +++ b/tests/mock_tests/Makefile.am @@ -38,6 +38,7 @@ tests_SOURCES = aclorch_ut.cpp \ $(top_srcdir)/orchagent/notifications.cpp \ $(top_srcdir)/orchagent/routeorch.cpp \ $(top_srcdir)/orchagent/fgnhgorch.cpp \ + $(top_srcdir)/orchagent/nhgorch.cpp \ $(top_srcdir)/orchagent/neighorch.cpp \ $(top_srcdir)/orchagent/intfsorch.cpp \ $(top_srcdir)/orchagent/portsorch.cpp \ diff --git a/tests/mock_tests/aclorch_ut.cpp b/tests/mock_tests/aclorch_ut.cpp index 507754bb78c..bd90d27d749 100644 --- a/tests/mock_tests/aclorch_ut.cpp +++ b/tests/mock_tests/aclorch_ut.cpp @@ -360,7 +360,7 @@ namespace aclorch_test { APP_ROUTE_TABLE_NAME, routeorch_pri }, { APP_LABEL_ROUTE_TABLE_NAME, routeorch_pri } }; - gRouteOrch = new RouteOrch(m_app_db.get(), route_tables, gSwitchOrch, gNeighOrch, gIntfsOrch, gVrfOrch, gFgNhgOrch); + gRouteOrch = new RouteOrch(m_app_db.get(), route_tables, gNeighOrch, gIntfsOrch, gVrfOrch, gFgNhgOrch); PolicerOrch *policer_orch = new PolicerOrch(m_config_db.get(), "POLICER"); diff --git a/tests/test_nhg.py b/tests/test_nhg.py index 485fc7661c1..e62043c5c4c 100644 --- a/tests/test_nhg.py +++ b/tests/test_nhg.py @@ -10,30 +10,115 @@ class TestNextHopGroup(object): - def test_route_nhg(self, dvs, dvs_route, testlog): + ASIC_NHS_STR = "ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP" + ASIC_NHG_STR = "ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP" + ASIC_NHGM_STR = ASIC_NHG_STR + "_MEMBER" + ASIC_RT_STR = "ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY" + ASIC_INSEG_STR = "ASIC_STATE:SAI_OBJECT_TYPE_INSEG_ENTRY" + + def get_route_id(self, prefix, dvs): + for k in dvs.get_asic_db().get_keys(self.ASIC_RT_STR): + if json.loads(k)['dest'] == prefix: + return k + + return None + + def get_inseg_id(self, label, dvs): + for k in dvs.get_asic_db().get_keys(self.ASIC_INSEG_STR): + print(json.loads(k)) + if json.loads(k)['label'] == label: + return k + + return None + + def get_nhg_id(self, nhg_index, dvs): + # Add a route with the given index, then retrieve the next hop group ID + # from that route + asic_db = dvs.get_asic_db() + asic_rts_count = len(asic_db.get_keys(self.ASIC_RT_STR)) + + fvs = swsscommon.FieldValuePairs([('nexthop_group', nhg_index)]) + prefix = '255.255.255.255/24' + ps = swsscommon.ProducerStateTable(dvs.get_app_db().db_connection, + "ROUTE_TABLE") + ps.set(prefix, fvs) + + # Assert the route is created + try: + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_rts_count + 1) + except: + return None + + # Get the route ID for the created route + rt_id = self.get_route_id(prefix, dvs) + assert rt_id != None + + # Get the NHGID + nhgid = asic_db.get_entry(self.ASIC_RT_STR, rt_id)["SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID"] + + # Remove the added route + ps._del(prefix) + asic_db.wait_for_deleted_entry(self.ASIC_RT_STR, rt_id) + + # Return the NHGID + return nhgid + + def get_nhgm_ids(self, nhg_index, dvs): + nhgid = self.get_nhg_id(nhg_index, dvs) + nhgms = [] + asic_db = dvs.get_asic_db() + + for k in asic_db.get_keys(self.ASIC_NHGM_STR): + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + + # Sometimes some of the NHGMs have no fvs for some reason, so + # we skip those + try: + if fvs['SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID'] == nhgid: + nhgms.append(k) + except KeyError as e: + pass + + return nhgms + + def port_name(self, i): + return "Ethernet" + str(i * 4) + + def port_ip(self, i): + return "10.0.0." + str(i * 2) + + def port_ipprefix(self, i): + return self.port_ip(i) + "/31" + + def peer_ip(self, i): + return "10.0.0." + str(i * 2 + 1) + + def port_mac(self, i): + return "00:00:00:00:00:0" + str(i) + + def config_intf(self, i, dvs): config_db = dvs.get_config_db() - fvs = {"NULL": "NULL"} - config_db.create_entry("INTERFACE", "Ethernet0", fvs) - config_db.create_entry("INTERFACE", "Ethernet4", fvs) - config_db.create_entry("INTERFACE", "Ethernet8", fvs) - config_db.create_entry("INTERFACE", "Ethernet0|10.0.0.0/31", fvs) - config_db.create_entry("INTERFACE", "Ethernet4|10.0.0.2/31", fvs) - config_db.create_entry("INTERFACE", "Ethernet8|10.0.0.4/31", fvs) - dvs.runcmd("config interface startup Ethernet0") - dvs.runcmd("config interface startup Ethernet4") - dvs.runcmd("config interface startup Ethernet8") - - dvs.runcmd("arp -s 10.0.0.1 00:00:00:00:00:01") - dvs.runcmd("arp -s 10.0.0.3 00:00:00:00:00:02") - dvs.runcmd("arp -s 10.0.0.5 00:00:00:00:00:03") - - assert dvs.servers[0].runcmd("ip link set down dev eth0") == 0 - assert dvs.servers[1].runcmd("ip link set down dev eth0") == 0 - assert dvs.servers[2].runcmd("ip link set down dev eth0") == 0 - - assert dvs.servers[0].runcmd("ip link set up dev eth0") == 0 - assert dvs.servers[1].runcmd("ip link set up dev eth0") == 0 - assert dvs.servers[2].runcmd("ip link set up dev eth0") == 0 + fvs = {'NULL': 'NULL'} + + config_db.create_entry("INTERFACE", self.port_name(i), fvs) + config_db.create_entry("INTERFACE", "{}|{}".format(self.port_name(i), self.port_ipprefix(i)), fvs) + dvs.runcmd("config interface startup " + self.port_name(i)) + dvs.runcmd("arp -s {} {}".format(self.peer_ip(i), self.port_mac(i))) + assert dvs.servers[i].runcmd("ip link set down dev eth0") == 0 + assert dvs.servers[i].runcmd("ip link set up dev eth0") == 0 + + def flap_intf(self, i, status, dvs): + assert status in ['up', 'down'] + + dvs.servers[i].runcmd("ip link set {} dev eth0".format(status)) == 0 + time.sleep(1) + fvs = dvs.get_app_db().get_entry("PORT_TABLE", "Ethernet%d" % (i * 4)) + assert bool(fvs) + assert fvs["oper_status"] == status + + def test_route_nhg(self, dvs, dvs_route, testlog): + for i in range(3): + self.config_intf(i, dvs) rtprefix = "2.2.2.0/24" @@ -51,55 +136,41 @@ def test_route_nhg(self, dvs, dvs_route, testlog): rtkeys = dvs_route.check_asicdb_route_entries([rtprefix]) # assert the route points to next hop group - fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY", rtkeys[0]) + fvs = asic_db.get_entry(self.ASIC_RT_STR, rtkeys[0]) nhgid = fvs["SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID"] - fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", nhgid) + fvs = asic_db.get_entry(self.ASIC_NHG_STR, nhgid) assert bool(fvs) - keys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER") + keys = asic_db.get_keys(self.ASIC_NHGM_STR) assert len(keys) == 3 for k in keys: - fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER", k) + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) assert fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid # bring links down one-by-one for i in [0, 1, 2]: - dvs.servers[i].runcmd("ip link set down dev eth0") == 0 - - time.sleep(1) + self.flap_intf(i, 'down', dvs) - fvs = app_db.get_entry("PORT_TABLE", "Ethernet%d" % (i * 4)) - - assert bool(fvs) - assert fvs["oper_status"] == "down" - - keys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER") + keys = asic_db.get_keys(self.ASIC_NHGM_STR) assert len(keys) == 2 - i # bring links up one-by-one for i in [0, 1, 2]: - dvs.servers[i].runcmd("ip link set up dev eth0") == 0 - - time.sleep(1) + self.flap_intf(i, 'up', dvs) - fvs = app_db.get_entry("PORT_TABLE", "Ethernet%d" % (i * 4)) - - assert bool(fvs) - assert fvs["oper_status"] == "up" - - keys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER") + keys = asic_db.get_keys(self.ASIC_NHGM_STR) assert len(keys) == i + 1 for k in keys: - fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER", k) + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) assert fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid # Remove route 2.2.2.0/24 @@ -108,6 +179,370 @@ def test_route_nhg(self, dvs, dvs_route, testlog): # Wait for route 2.2.2.0/24 to be removed dvs_route.check_asicdb_deleted_route_entries([rtprefix]) + def test_label_route_nhg(self, dvs, testlog): + for i in range(3): + self.config_intf(i, dvs) + + app_db = dvs.get_app_db() + lr_ps = swsscommon.ProducerStateTable(app_db.db_connection, "LABEL_ROUTE_TABLE") + + asic_db = dvs.get_asic_db() + asic_insegs_count = len(asic_db.get_keys(self.ASIC_INSEG_STR)) + + # add label route + fvs = swsscommon.FieldValuePairs([("nexthop","10.0.0.1,10.0.0.3,10.0.0.5"), + ("ifname", "Ethernet0,Ethernet4,Ethernet8")]) + lr_ps.set("10", fvs) + + # check if route was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_INSEG_STR, asic_insegs_count + 1) + keys = asic_db.get_keys(self.ASIC_INSEG_STR) + + k = self.get_inseg_id('10', dvs) + assert k is not None + + # assert the route points to next hop group + fvs = asic_db.get_entry(self.ASIC_INSEG_STR, k) + + nhgid = fvs["SAI_INSEG_ENTRY_ATTR_NEXT_HOP_ID"] + + fvs = asic_db.get_entry(self.ASIC_NHG_STR, nhgid) + + assert bool(fvs) + + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + + assert len(keys) == 3 + + for k in keys: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + + assert fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid + + # bring links down one-by-one + for i in [0, 1, 2]: + self.flap_intf(i, 'down', dvs) + + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + + assert len(keys) == 2 - i + + # bring links up one-by-one + for i in [0, 1, 2]: + self.flap_intf(i, 'up', dvs) + + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + + assert len(keys) == i + 1 + + for k in keys: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + assert fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid + + # Remove label route 10 + lr_ps._del("10") + + # Wait for label route 10 to be removed + asic_db.wait_for_n_keys(self.ASIC_INSEG_STR, asic_insegs_count) + + def test_nhgorch_labeled_nhs(self, dvs, testlog): + for i in range(2): + self.config_intf(i, dvs) + + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + nhg_ps = swsscommon.ProducerStateTable(app_db.db_connection, "NEXT_HOP_GROUP_TABLE") + asic_nhgs_count = len(asic_db.get_keys(self.ASIC_NHG_STR)) + asic_nhgms_count = len(asic_db.get_keys(self.ASIC_NHGM_STR)) + asic_nhs_count = len(asic_db.get_keys(self.ASIC_NHS_STR)) + + # Add a group containing labeled weighted NHs + fvs = swsscommon.FieldValuePairs([('nexthop', '1+10.0.0.1,3+10.0.0.3'), + ('ifname', 'Ethernet0,Ethernet4'), + ('weight', '2,4')]) + nhg_ps.set('group1', fvs) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + + # NhgOrch should create two next hops for the labeled ones + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 2) + + # Assert the weights are properly set + nhgm_ids = self.get_nhgm_ids('group1', dvs) + weights = [] + for k in nhgm_ids: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + weights.append(fvs['SAI_NEXT_HOP_GROUP_MEMBER_ATTR_WEIGHT']) + assert set(weights) == set(['2', '4']) + + # Create a new single next hop with the same label + fvs = swsscommon.FieldValuePairs([('nexthop', '1+10.0.0.1'), + ('ifname', 'Ethernet0')]) + nhg_ps.set('group2', fvs) + + # No new next hop should be added + time.sleep(1) + assert len(asic_db.get_keys(self.ASIC_NHS_STR)) == asic_nhs_count + 2 + + # Create a new single next hop with a different label + fvs = swsscommon.FieldValuePairs([('nexthop', '2+10.0.0.1'), + ('ifname', 'Ethernet0')]) + nhg_ps.set('group3', fvs) + + # A new next hop should be added + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 3) + + # Delete group3 + nhg_ps._del('group3') + + # Group3's NH should be deleted + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 2) + + # Delete group2 + nhg_ps._del('group2') + + # The number of NHs should be the same as they are still referenced by + # group1 + time.sleep(1) + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 2) + + # Update group1 with one weight only + fvs = swsscommon.FieldValuePairs([('nexthop', '2+10.0.0.1,4+10.0.0.3'), + ('ifname', 'Ethernet0,Ethernet4'), + ('weight', '2')]) + nhg_ps.set('group1', fvs) + + # The next hop group members and next hops should be replaced + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 2) + + # Assert the weights of the NHGMs are the expected ones + nhgm_ids = self.get_nhgm_ids('group1', dvs) + weights = set() + for nhgm_id in nhgm_ids: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, nhgm_id) + weights.add(fvs['SAI_NEXT_HOP_GROUP_MEMBER_ATTR_WEIGHT']) + assert weights == set(['2', '1']) + + # Update group1 with no weights and both labeled and unlabeled NHs + fvs = swsscommon.FieldValuePairs([('nexthop', '2+10.0.0.1,10.0.0.3'), + ('ifname', 'Ethernet0,Ethernet4')]) + nhg_ps.set('group1', fvs) + + # Group members should be replaced and one NH should get deleted + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 1) + + # Assert the weights of the NHGMs are the expected ones + nhgm_ids = self.get_nhgm_ids('group1', dvs) + weights = [] + for nhgm_id in nhgm_ids: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, nhgm_id) + weights.append(fvs['SAI_NEXT_HOP_GROUP_MEMBER_ATTR_WEIGHT']) + assert weights == ['1', '1'] + + # Delete group1 + nhg_ps._del('group1') + + # Wait for the group and it's members to be deleted + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count) + + # The two next hops should also get deleted + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count) + + def test_nhgorch_excp_group_cases(self, dvs, testlog): + for i in range(3): + self.config_intf(i, dvs) + + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + nhg_ps = swsscommon.ProducerStateTable(app_db.db_connection, "NEXT_HOP_GROUP_TABLE") + rt_ps = swsscommon.ProducerStateTable(app_db.db_connection, "ROUTE_TABLE") + + def get_nhg_keys(): + return asic_db.get_keys(self.ASIC_NHG_STR) + + def get_nhgm_keys(): + return asic_db.get_keys(self.ASIC_NHGM_STR) + + def get_rt_keys(): + return asic_db.get_keys(self.ASIC_RT_STR) + + # Count existing objects + prev_nhg_keys = get_nhg_keys() + asic_nhgs_count = len(get_nhg_keys()) + asic_nhgms_count = len(get_nhgm_keys()) + asic_nhs_count = len(asic_db.get_keys(self.ASIC_NHS_STR)) + + # Remove a group that does not exist + nhg_ps._del("group1") + time.sleep(1) + assert len(get_nhg_keys()) == asic_nhgs_count + + # Create a next hop group with a member that does not exist - should fail + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3,10.0.0.63'), + ("ifname", "Ethernet0,Ethernet4,Ethernet124")]) + nhg_ps.set("group1", fvs) + time.sleep(1) + assert len(asic_db.get_keys(self.ASIC_NHG_STR)) == asic_nhgs_count + + # Issue an update for this next hop group that doesn't yet exist, + # which contains only valid NHs. This will overwrite the previous + # operation and create the group. + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.5'), + ("ifname", "Ethernet0,Ethernet8")]) + nhg_ps.set("group1", fvs) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + + # Check the group has it's two members + for nhgid in asic_db.get_keys(self.ASIC_NHG_STR): + if nhgid not in prev_nhg_keys: + break + + count = 0 + for k in asic_db.get_keys(self.ASIC_NHGM_STR): + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + if fvs['SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID'] == nhgid: + count += 1 + assert count == 2 + + # Add a route referencing the new group + asic_rts_count = len(get_rt_keys()) + fvs = swsscommon.FieldValuePairs([('nexthop_group', 'group1')]) + rt_ps.set('2.2.2.0/24', fvs) + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_rts_count + 1) + + # Get the route key + for rt_key in get_rt_keys(): + k = json.loads(rt_key) + + if k['dest'] == "2.2.2.0/24": + break + + # Try removing the group while it still has references - should fail + nhg_ps._del('group1') + time.sleep(1) + assert len(get_nhg_keys()) == asic_nhgs_count + 1 + + # Create a new group + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ('ifname', 'Ethernet0,Ethernet4')]) + nhg_ps.set("group2", fvs) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 2) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 4) + + # Update the route to point to the new group + fvs = swsscommon.FieldValuePairs([('nexthop_group', 'group2')]) + rt_ps.set('2.2.2.0/24', fvs) + + # The first group should have got deleted + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + + assert asic_db.get_entry(self.ASIC_RT_STR, rt_key)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] != nhgid + + # Update the route with routeOrch's owned next hop group + nhgid = asic_db.get_entry(self.ASIC_RT_STR, rt_key)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ('ifname', 'Ethernet0,Ethernet4')]) + rt_ps.set('2.2.2.0/24', fvs) + + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 2) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 4) + + # Assert the next hop group ID changed + assert asic_db.get_entry(self.ASIC_RT_STR, rt_key)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] != nhgid + nhgid = asic_db.get_entry(self.ASIC_RT_STR, rt_key)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] + + # Update the route to point back to group2 + fvs = swsscommon.FieldValuePairs([('nexthop_group', 'group2')]) + rt_ps.set('2.2.2.0/24', fvs) + + # The routeOrch's owned next hop group should get deleted + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + + # Assert the route points back to group2 + assert asic_db.get_entry(self.ASIC_RT_STR, rt_key)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] != nhgid + + # Create a new group with the same members as group2 + nhgid = asic_db.get_entry(self.ASIC_RT_STR, rt_key)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ('ifname', 'Ethernet0,Ethernet4')]) + nhg_ps.set("group1", fvs) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 2) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 4) + + # Update the route to point to the new group + fvs = swsscommon.FieldValuePairs([('nexthop_group', 'group1')]) + rt_ps.set('2.2.2.0/24', fvs) + time.sleep(1) + + # Assert the next hop group ID changed + assert asic_db.get_entry(self.ASIC_RT_STR, rt_key)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] != nhgid + + # Remove the route + rt_ps._del('2.2.2.0/24') + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_rts_count) + + # Remove the groups + nhg_ps._del('group1') + nhg_ps._del('group2') + + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count) + + # Create a route with labeled NHs + fvs = swsscommon.FieldValuePairs([('nexthop', '1+10.0.0.1,3+10.0.0.3'), + ('ifname', 'Ethernet0,Ethernet4'), + ('weight', '2,4')]) + rt_ps.set('2.2.2.0/24', fvs) + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_rts_count + 1) + + # Two new next hops should be created + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 2) + + # Create a NHG with the same details + nhg_ps.set('group1', fvs) + + # No new next hops should be created + time.sleep(1) + assert len(asic_db.get_keys(self.ASIC_NHS_STR)) == asic_nhs_count + 2 + + # Update the group with a different NH + fvs = swsscommon.FieldValuePairs([('nexthop', '2+10.0.0.1,3+10.0.0.3'), + ('ifname', 'Ethernet0,Ethernet4'), + ('weight', '2,4')]) + nhg_ps.set('group1', fvs) + + # A new next hop should be created + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 3) + + # Remove the route + rt_ps._del('2.2.2.0/24') + + # One NH should become unreferenced and should be deleted. The other + # one is still referenced by NhgOrch's owned NHG. + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 2) + + # Remove the group + nhg_ps._del('group1') + + # Both new next hops should be deleted + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count) + + # Add a route with a NHG that does not exist + fvs = swsscommon.FieldValuePairs([('nexthop_group', 'group1')]) + rt_ps.set('2.2.2.0/24', fvs) + time.sleep(1) + assert asic_rts_count == len(asic_db.get_keys(self.ASIC_RT_STR)) + + # Remove the pending route + rt_ps._del('2.2.2.0/24') + def test_route_nhg_exhaust(self, dvs, testlog): """ Test the situation of exhausting ECMP group, assume SAI_SWITCH_ATTR_NUMBER_OF_ECMP_GROUPS is 512 @@ -125,21 +560,6 @@ def test_route_nhg_exhaust(self, dvs, testlog): # TODO: check ECMP 512 - def port_name(i): - return "Ethernet" + str(i * 4) - - def port_ip(i): - return "10.0.0." + str(i * 2) - - def peer_ip(i): - return "10.0.0." + str(i * 2 + 1) - - def port_ipprefix(i): - return port_ip(i) + "/31" - - def port_mac(i): - return "00:00:00:00:00:0" + str(i) - def gen_ipprefix(r): """ Construct route like 2.X.X.0/24 """ ip = ipaddress.IPv4Address(IP_INTEGER_BASE + r * 256) @@ -152,25 +572,16 @@ def gen_nhg_fvs(binary): ifname = [] for i in range(MAX_PORT_COUNT): if binary[i] == '1': - nexthop.append(peer_ip(i)) - ifname.append(port_name(i)) + nexthop.append(self.peer_ip(i)) + ifname.append(self.port_name(i)) nexthop = ','.join(nexthop) ifname = ','.join(ifname) fvs = swsscommon.FieldValuePairs([("nexthop", nexthop), ("ifname", ifname)]) return fvs - def asic_route_exists(keys, ipprefix): - for k in keys: - rt_key = json.loads(k) - - if rt_key['dest'] == ipprefix: - return k - else: - return None - def asic_route_nhg_fvs(k): - fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY", k) + fvs = asic_db.get_entry(self.ASIC_RT_STR, k) if not fvs: return None @@ -179,7 +590,7 @@ def asic_route_nhg_fvs(k): if nhgid is None: return None - fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", nhgid) + fvs = asic_db.get_entry(self.ASIC_NHG_STR, nhgid) return fvs MAX_ECMP_COUNT = 512 @@ -189,16 +600,8 @@ def asic_route_nhg_fvs(k): else: IP_INTEGER_BASE = int(ipaddress.IPv4Address(str("2.2.2.0"))) - config_db = dvs.get_config_db() - fvs = {"NULL": "NULL"} - for i in range(MAX_PORT_COUNT): - config_db.create_entry("INTERFACE", port_name(i), fvs) - config_db.create_entry("INTERFACE", "{}|{}".format(port_name(i), port_ipprefix(i)), fvs) - dvs.runcmd("config interface startup " + port_name(i)) - dvs.runcmd("arp -s {} {}".format(peer_ip(i), port_mac(i))) - assert dvs.servers[i].runcmd("ip link set down dev eth0") == 0 - assert dvs.servers[i].runcmd("ip link set up dev eth0") == 0 + self.config_intf(i, dvs) app_db = dvs.get_app_db() asic_db = dvs.get_asic_db() @@ -207,7 +610,7 @@ def asic_route_nhg_fvs(k): # Add first batch of routes with unique nexthop groups in AppDB route_count = 0 r = 0 - asic_routes_count = len(asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY")) + asic_routes_count = len(asic_db.get_keys(self.ASIC_RT_STR)) while route_count < MAX_ECMP_COUNT: r += 1 fmt = '{{0:0{}b}}'.format(MAX_PORT_COUNT) @@ -221,12 +624,34 @@ def asic_route_nhg_fvs(k): route_count += 1 # Wait and check ASIC DB the count of nexthop groups used - asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", MAX_ECMP_COUNT) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) # Wait and check ASIC DB the count of routes - asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY", asic_routes_count + MAX_ECMP_COUNT) + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + MAX_ECMP_COUNT) asic_routes_count += MAX_ECMP_COUNT + # Add a route with labeled NHs + asic_nhs_count = len(asic_db.get_keys(self.ASIC_NHS_STR)) + route_ipprefix = gen_ipprefix(route_count) + fvs = swsscommon.FieldValuePairs([('nexthop', '1+10.0.0.1,3+10.0.0.3'), + ('ifname', 'Ethernet0,Ethernet4')]) + ps.set(route_ipprefix, fvs) + route_count += 1 + + # A temporary route should be created + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 1) + + # A NH should be elected as the temporary NHG and it should be created + # as it doesn't exist. + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 1) + + # Delete the route. The route and the added labeled NH should be + # removed. + ps._del(route_ipprefix) + route_count -= 1 + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count) + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count) + # Add second batch of routes with unique nexthop groups in AppDB # Add more routes with new nexthop group in AppDBdd route_ipprefix = gen_ipprefix(route_count) @@ -247,18 +672,18 @@ def asic_route_nhg_fvs(k): last_ipprefix = route_ipprefix # Wait until we get expected routes and check ASIC DB on the count of nexthop groups used, and it should not increase - keys = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY", asic_routes_count + 10) - asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", MAX_ECMP_COUNT) + keys = asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 10) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) # Check the route points to next hop group # Note: no need to wait here - k = asic_route_exists(keys, "2.2.2.0/24") + k = self.get_route_id("2.2.2.0/24", dvs) assert k is not None fvs = asic_route_nhg_fvs(k) assert fvs is not None # Check the second batch does not point to next hop group - k = asic_route_exists(keys, base_ipprefix) + k = self.get_route_id(base_ipprefix, dvs) assert k is not None fvs = asic_route_nhg_fvs(k) assert not(fvs) @@ -276,20 +701,938 @@ def asic_route_nhg_fvs(k): route_ipprefix = gen_ipprefix(route_count) ps._del(route_ipprefix) route_count += 1 + asic_routes_count -= MAX_ECMP_COUNT # Wait and check the second batch points to next hop group # Check ASIC DB on the count of nexthop groups used, and it should not increase or decrease - asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", 10) - keys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY") - k = asic_route_exists(keys, base_ipprefix) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, 10) + keys = asic_db.get_keys(self.ASIC_RT_STR) + k = self.get_route_id(base_ipprefix, dvs) assert k is not None fvs = asic_route_nhg_fvs(k) assert fvs is not None - k = asic_route_exists(keys, last_ipprefix) + k = self.get_route_id(last_ipprefix, dvs) assert k is not None fvs = asic_route_nhg_fvs(k) assert fvs is not None + # Cleanup + + # Remove second batch of routes + for i in range(10): + route_ipprefix = gen_ipprefix(MAX_ECMP_COUNT + i) + ps._del(route_ipprefix) + + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, 0) + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count) + + def test_nhgorch_nhg_exhaust(self, dvs, testlog): + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + nhg_ps = swsscommon.ProducerStateTable(app_db.db_connection, "NEXT_HOP_GROUP_TABLE") + rt_ps = swsscommon.ProducerStateTable(app_db.db_connection, "ROUTE_TABLE") + + MAX_ECMP_COUNT = 512 + MAX_PORT_COUNT = 10 + + r = 0 + fmt = '{{0:0{}b}}'.format(MAX_PORT_COUNT) + asic_nhgs_count = len(asic_db.get_keys(self.ASIC_NHG_STR)) + nhg_count = asic_nhgs_count + first_valid_nhg = nhg_count + + def gen_nhg_fvs(binary): + nexthop = [] + ifname = [] + + for i in range(MAX_PORT_COUNT): + if binary[i] == '1': + nexthop.append(self.peer_ip(i)) + ifname.append(self.port_name(i)) + + nexthop = ','.join(nexthop) + ifname = ','.join(ifname) + fvs = swsscommon.FieldValuePairs([("nexthop", nexthop), ("ifname", ifname)]) + + return fvs + + def gen_nhg_index(nhg_number): + return "group{}".format(nhg_number) + + def gen_valid_binary(): + nonlocal r + + while True: + r += 1 + binary = fmt.format(r) + # We need at least 2 ports for a nexthop group + if binary.count('1') <= 1: + continue + return binary + + def create_temp_nhg(): + nonlocal nhg_count + + binary = gen_valid_binary() + nhg_fvs = gen_nhg_fvs(binary) + nhg_index = gen_nhg_index(nhg_count) + nhg_ps.set(nhg_index, nhg_fvs) + nhg_count += 1 + + return nhg_index, binary + + def delete_nhg(): + nonlocal first_valid_nhg + + del_nhg_index = gen_nhg_index(first_valid_nhg) + del_nhg_id = asic_nhgs[del_nhg_index] + + nhg_ps._del(del_nhg_index) + asic_nhgs.pop(del_nhg_index) + first_valid_nhg += 1 + + return del_nhg_id + + def nhg_exists(nhg_index): + return self.get_nhg_id(nhg_index, dvs) is not None + + config_db = dvs.get_config_db() + fvs = {"NULL": "NULL"} + + # Create interfaces + for i in range(MAX_PORT_COUNT): + self.config_intf(i, dvs) + + asic_nhgs = {} + + # Add first batch of next hop groups to reach the NHG limit + while nhg_count < MAX_ECMP_COUNT: + r += 1 + binary = fmt.format(r) + # We need at least 2 ports for a nexthop group + if binary.count('1') <= 1: + continue + nhg_fvs = gen_nhg_fvs(binary) + nhg_index = gen_nhg_index(nhg_count) + nhg_ps.set(nhg_index, nhg_fvs) + + # Wait for the group to be added + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + nhg_count + 1) + + # Save the NHG index/ID pair + asic_nhgs[nhg_index] = self.get_nhg_id(nhg_index, dvs) + + nhg_count += 1 + + # Add a new next hop group - it should create a temporary one instead + prev_nhgs = asic_db.get_keys(self.ASIC_NHG_STR) + nhg_index, _ = create_temp_nhg() + + # Assert no new group has been added + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Assert the same NHGs are in ASIC DB + assert prev_nhgs == asic_db.get_keys(self.ASIC_NHG_STR) + + # Delete an existing next hop group + del_nhg_id = delete_nhg() + + # Wait for the key to be deleted + asic_db.wait_for_deleted_entry(self.ASIC_NHG_STR, del_nhg_id) + + # Wait for the temporary group to be promoted and replace the deleted + # NHG + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Save the promoted NHG index/ID + asic_nhgs[nhg_index] = self.get_nhg_id(nhg_index, dvs) + + # Update a group + binary = gen_valid_binary() + nhg_fvs = gen_nhg_fvs(binary) + nhg_index = gen_nhg_index(first_valid_nhg) + + # Save the previous members + prev_nhg_members = self.get_nhgm_ids(nhg_index, dvs) + nhg_ps.set(nhg_index, nhg_fvs) + + # Wait a second so the NHG members get updated + time.sleep(1) + + # Assert the group was updated by checking it's members + assert self.get_nhgm_ids(nhg_index, dvs) != prev_nhg_members + + # Create a new temporary group + nhg_index, _ = create_temp_nhg() + time.sleep(1) + + # Delete the temporary group + nhg_ps._del(nhg_index) + + # Assert the NHG does not exist anymore + assert not nhg_exists(nhg_index) + + # Assert the number of groups is the same + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Create a new temporary group + nhg_index, binary = create_temp_nhg() + + # Save the number of group members + binary_count = binary.count('1') + + # Update the temporary group with a different number of members + while True: + r += 1 + binary = fmt.format(r) + # We need at least 2 ports for a nexthop group + if binary.count('1') <= 1 or binary.count('1') == binary_count: + continue + binary_count = binary.count('1') + break + nhg_fvs = gen_nhg_fvs(binary) + nhg_ps.set(nhg_index, nhg_fvs) + + # Delete a group + del_nhg_id = delete_nhg() + + # Wait for the group to be deleted + asic_db.wait_for_deleted_entry(self.ASIC_NHG_STR, del_nhg_id) + + # The temporary group should be promoted + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Save the promoted NHG index/ID + asic_nhgs[nhg_index] = self.get_nhg_id(nhg_index, dvs) + + # Assert it has the updated details by checking the number of members + assert len(self.get_nhgm_ids(nhg_index, dvs)) == binary_count + + # Add a route + asic_rts_count = len(asic_db.get_keys(self.ASIC_RT_STR)) + + rt_fvs = swsscommon.FieldValuePairs([('nexthop_group', nhg_index)]) + rt_ps.set('2.2.2.0/24', rt_fvs) + + # Assert the route is created + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_rts_count + 1) + + # Save the previous NHG ID + prev_nhg_id = asic_nhgs[nhg_index] + + # Create a new temporary group + nhg_index, binary = create_temp_nhg() + + # Get the route ID + rt_id = self.get_route_id('2.2.2.0/24', dvs) + assert rt_id != None + + # Update the route to point to the temporary NHG + rt_fvs = swsscommon.FieldValuePairs([('nexthop_group', nhg_index)]) + rt_ps.set('2.2.2.0/24', rt_fvs) + + # Wait for the route to change it's NHG ID + asic_db.wait_for_field_negative_match(self.ASIC_RT_STR, + rt_id, + {'SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID': prev_nhg_id}) + + # Save the new route NHG ID + prev_nhg_id = asic_db.get_entry(self.ASIC_RT_STR, rt_id)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] + + # Update the temporary NHG with one that has different NHs + + # Create a new binary that uses the other interfaces than the previous + # binary was using + new_binary = [] + + for i in range(len(binary)): + if binary[i] == '1': + new_binary.append('0') + else: + new_binary.append('1') + + binary = ''.join(new_binary) + assert binary.count('1') > 1 + + nhg_fvs = gen_nhg_fvs(binary) + nhg_ps.set(nhg_index, nhg_fvs) + + # The NHG ID of the route should change + asic_db.wait_for_field_negative_match(self.ASIC_RT_STR, + rt_id, + {'SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID': prev_nhg_id}) + + # Save the new route NHG ID + prev_nhg_id = asic_db.get_entry(self.ASIC_RT_STR, rt_id)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] + + # Delete a NHG. + del_nhg_id = delete_nhg() + + # Wait for the NHG to be deleted + asic_db.wait_for_deleted_entry(self.ASIC_NHG_STR, del_nhg_id) + + # The temporary group should get promoted. + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Save the promoted NHG index/ID + asic_nhgs[nhg_index] = self.get_nhg_id(nhg_index, dvs) + + # Assert the NHGID of the route changed due to temporary group being + # promoted. + asic_db.wait_for_field_match(self.ASIC_RT_STR, + rt_id, + {'SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID': asic_nhgs[nhg_index]}) + + # Try updating the promoted NHG to a single NHG. Should fail as it no + # longer is a temporary NHG + nhg_id = self.get_nhg_id(nhg_index, dvs) + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1'), + ('ifname', 'Ethernet0')]) + nhg_ps.set(nhg_index, fvs) + time.sleep(1) + assert bool(asic_db.get_entry(self.ASIC_NHG_STR, nhg_id)) == True + + # Save the NHG index + prev_nhg_index = nhg_index + prev_nhg_id = nhg_id + + # Create a temporary next hop groups + nhg_index, binary = create_temp_nhg() + nhg_id = self.get_nhg_id(nhg_index, dvs) + + # Update the route to point to the temporary group + fvs = swsscommon.FieldValuePairs([('nexthop_group', nhg_index)]) + rt_ps.set('2.2.2.0/24', fvs) + asic_db.wait_for_field_match(self.ASIC_RT_STR, + rt_id, + {'SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID': nhg_id}) + + # The previous group should be updated to a single NHG now, freeing a + # NHG slot in ASIC and promoting the temporary one + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + asic_nhgs[nhg_index] = self.get_nhg_id(nhg_index, dvs) + assert self.get_nhg_id(nhg_index, dvs) != prev_nhg_id + + # Create a temporary next hop groups + nhg_index, binary = create_temp_nhg() + nhg_id = self.get_nhg_id(nhg_index, dvs) + + # Update the route to point to the temporary group + fvs = swsscommon.FieldValuePairs([('nexthop_group', nhg_index)]) + rt_ps.set('2.2.2.0/24', fvs) + asic_db.wait_for_field_match(self.ASIC_RT_STR, + rt_id, + {'SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID': nhg_id}) + + # Update the group to a single NHG + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1'), + ('ifname', 'Ethernet0')]) + nhg_ps.set(nhg_index, fvs) + nhg_id = self.get_nhg_id(nhg_index, dvs) + + # Wait for the route to update it's NHG ID + asic_db.wait_for_field_match(self.ASIC_RT_STR, + rt_id, + {'SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID': nhg_id}) + + # Try to update the nexthop group to another one + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.3'), + ('ifname', 'Ethernet4')]) + nhg_ps.set(nhg_index, fvs) + + # Wait a second so the database operations finish + time.sleep(1) + + # The update should fail as the group is not temporary anymore and it + # should keep it's previous NHG ID + asic_db.wait_for_field_match(self.ASIC_RT_STR, + rt_id, + {'SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID': nhg_id}) + + # Create a next hop group that contains labeled NHs that do not exist + # in NeighOrch + asic_nhs_count = len(asic_db.get_keys(self.ASIC_NHS_STR)) + fvs = swsscommon.FieldValuePairs([('nexthop', '1+10.0.0.1,3+10.0.0.3'), + ('ifname', 'Ethernet0,Ethernet4')]) + nhg_index = gen_nhg_index(nhg_count) + nhg_ps.set(nhg_index, fvs) + nhg_count += 1 + + # A temporary next hop should be elected to represent the group and + # thus a new labeled next hop should be created + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 1) + + # Delete a next hop group + delete_nhg() + + # The group should be promoted and the other labeled NH should also get + # created + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 2) + + # Save the promoted NHG index/ID + asic_nhgs[nhg_index] = self.get_nhg_id(nhg_index, dvs) + + # Update the route with a RouteOrch's owned NHG + binary = gen_valid_binary() + nhg_fvs = gen_nhg_fvs(binary) + rt_ps.set('2.2.2.0/24', nhg_fvs) + + # Assert no new group has been added + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Delete a next hop group + delete_nhg() + + # The temporary group should be promoted + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Create a temporary NHG owned by NhgOrch + nhg_index, _ = create_temp_nhg() + + # Assert no new group has been added + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Delete a next hop group + delete_nhg() + + # Check that the temporary group is promoted + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Save the promoted NHG index/ID + asic_nhgs[nhg_index] = self.get_nhg_id(nhg_index, dvs) + + # Create a temporary NHG that contains only NHs that do not exist + nhg_fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.21,10.0.0.23'), + ('ifname', 'Ethernet40,Ethernet44')]) + nhg_index = gen_nhg_index(nhg_count) + nhg_count += 1 + nhg_ps.set(nhg_index, nhg_fvs) + + # Assert the group is not created + assert not nhg_exists(nhg_index) + + # Update the temporary NHG to a valid one + binary = gen_valid_binary() + nhg_fvs = gen_nhg_fvs(binary) + nhg_ps.set(nhg_index, nhg_fvs) + + # Assert the temporary group was updated and the group got created + nhg_id = self.get_nhg_id(nhg_index, dvs) + assert nhg_id is not None + + # Update the temporary NHG to an invalid one again + nhg_fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.21,10.0.0.23'), + ('ifname', 'Ethernet40,Ethernet44')]) + nhg_ps.set(nhg_index, nhg_fvs) + + # The update should fail and the temporary NHG should still be pointing + # to the old valid NH + assert self.get_nhg_id(nhg_index, dvs) == nhg_id + + # Delete the temporary group + nhg_ps._del(nhg_index) + + # Cleanup + + # Delete the route + rt_ps._del('2.2.2.0/24') + asic_db.wait_for_deleted_entry(self.ASIC_RT_STR, rt_id) + + # Delete the next hop groups + for k in asic_nhgs: + nhg_ps._del(k) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count) + + def test_nhgorch_multi_nh_group(self, dvs, testlog): + for i in range(4): + self.config_intf(i, dvs) + + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + + # create next hop group in APPL DB + + nhg_ps = swsscommon.ProducerStateTable(app_db.db_connection, "NEXT_HOP_GROUP_TABLE") + + prev_nhg_keys = asic_db.get_keys(self.ASIC_NHG_STR) + asic_nhgs_count = len(prev_nhg_keys) + asic_nhgms_count = len(asic_db.get_keys(self.ASIC_NHGM_STR)) + + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3,10.0.0.5'), + ("ifname", "Ethernet0,Ethernet4,Ethernet8")]) + nhg_ps.set("group1", fvs) + + # check if group was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + keys = asic_db.get_keys(self.ASIC_NHG_STR) + + found_nhg = False + + for nhgid in keys: + if nhgid not in prev_nhg_keys: + found_nhg = True + break + + assert found_nhg == True + + # check if members were propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 3) + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + count = 0 + + for k in keys: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + + if fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid: + count += 1 + + assert count == 3 + + # create route in APPL DB + + rt_ps = swsscommon.ProducerStateTable(app_db.db_connection, "ROUTE_TABLE") + + asic_routes_count = len(asic_db.get_keys(self.ASIC_RT_STR)) + + fvs = swsscommon.FieldValuePairs([("nexthop_group", "group1")]) + rt_ps.set("2.2.2.0/24", fvs) + + # check if route was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 1) + keys = asic_db.get_keys(self.ASIC_RT_STR) + + k = self.get_route_id('2.2.2.0/24', dvs) + assert k is not None + + # assert the route points to next hop group + fvs = asic_db.get_entry(self.ASIC_RT_STR, k) + assert fvs["SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID"] == nhgid + + # bring links down one-by-one + for i in [0, 1, 2]: + self.flap_intf(i, 'down', dvs) + + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + + assert len(keys) == (asic_nhgms_count + 2 - i) + assert bool(asic_db.get_entry(self.ASIC_NHG_STR, nhgid)) + + # bring links up one-by-one + for i in [0, 1, 2]: + self.flap_intf(i, 'up', dvs) + + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + + assert len(keys) == (asic_nhgms_count + i + 1) + + count = 0 + for k in keys: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + if fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid: + count += 1 + assert count == i + 1 + + # Bring an interface down + self.flap_intf(1, 'down', dvs) + + # One group member will get deleted + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, 2) + + # Create a group that contains a NH that uses the down link + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ("ifname", "Ethernet0,Ethernet4")]) + nhg_ps.set('group2', fvs) + + # The group should get created, but it will not contained the NH that + # has the link down + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 2) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 3) + + # Update the NHG with one interface down + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.3,10.0.0.1'), + ("ifname", "Ethernet4,Ethernet0")]) + nhg_ps.set("group1", fvs) + + # Wait for group members to update - the group will contain only the + # members that have their links up + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + + # Bring the interface up + self.flap_intf(1, 'up', dvs) + + # Check that the missing member of group1 and group2 is being added + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 4) + + # Remove group2 + nhg_ps._del('group2') + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + + # Create group2 with a NH that does not exist + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.3,10.0.0.63'), + ("ifname", "Ethernet4,Ethernet124")]) + nhg_ps.set("group2", fvs) + + # The groups should not be created + time.sleep(1) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + + # Update group1 with a NH that does not exist + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.3,10.0.0.63'), + ("ifname", "Ethernet4,Ethernet124")]) + nhg_ps.set("group1", fvs) + + # The update should fail, leaving group1 with only the unremoved + # members + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 1) + + # Configure the missing NH's interface + self.config_intf(31, dvs) + + # A couple more routes will be added to ASIC_DB + asic_routes_count += 2 + + # Group2 should get created and group1 should be updated + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 2) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 4) + + # Delete group2 + nhg_ps._del('group2') + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + + # Update the NHG, adding two new members + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3,10.0.0.5,10.0.0.7'), + ("ifname", "Ethernet0,Ethernet4,Ethernet8,Ethernet12")]) + nhg_ps.set("group1", fvs) + + # Wait for members to be added + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 4) + + # Update the group to one NH only - orchagent should fail as it is + # referenced + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1'), ("ifname", "Ethernet0")]) + nhg_ps.set("group1", fvs) + time.sleep(1) + + assert bool(asic_db.get_entry(self.ASIC_NHG_STR, nhgid)) + assert len(asic_db.get_keys(self.ASIC_NHGM_STR)) == asic_nhgms_count + 4 + + # Remove route 2.2.2.0/24 + rt_ps._del("2.2.2.0/24") + + # Wait for route 2.2.2.0/24 to be removed + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count) + + # The group is not referenced anymore, so it should get updated to a + # single next hop group, removing the ASIC group + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count) + + # Remove group1 + nhg_ps._del("group1") + + def test_nhgorch_single_nh_group(self, dvs, testlog): + for i in range(2): + self.config_intf(i, dvs) + + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + + # Count existing objects + asic_nhgs_count = len(asic_db.get_keys(self.ASIC_NHG_STR)) + asic_nhgms_count = len(asic_db.get_keys(self.ASIC_NHGM_STR)) + + # Create single next hop group in APPL DB + nhg_ps = swsscommon.ProducerStateTable(app_db.db_connection, "NEXT_HOP_GROUP_TABLE") + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1'), ("ifname", "Ethernet0")]) + nhg_ps.set("group1", fvs) + time.sleep(1) + + # Check that the group was not created in ASIC DB + assert len(asic_db.get_keys(self.ASIC_NHG_STR)) == asic_nhgs_count + + # Get the NHG ID. The UT assumes there is only one next hop with IP 10.0.0.1 + count = 0 + nhgid = 0 + for k in asic_db.get_keys(self.ASIC_NHS_STR): + fvs = asic_db.get_entry(self.ASIC_NHS_STR, k) + if fvs['SAI_NEXT_HOP_ATTR_IP'] == '10.0.0.1': + nhgid = k + count += 1 + assert count == 1 + + # create route in APPL DB + + rt_ps = swsscommon.ProducerStateTable(app_db.db_connection, "ROUTE_TABLE") + + asic_routes_count = len(asic_db.get_keys(self.ASIC_RT_STR)) + + fvs = swsscommon.FieldValuePairs([("nexthop_group", "group1")]) + rt_ps.set("2.2.2.0/24", fvs) + + # check if route was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 1) + keys = asic_db.get_keys(self.ASIC_RT_STR) + + k = self.get_route_id('2.2.2.0/24', dvs) + + # assert the route points to next hop group + fvs = asic_db.get_entry(self.ASIC_RT_STR, k) + assert fvs["SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID"] == nhgid + + # Bring group's link down + self.flap_intf(0, 'down', dvs) + + # Check that the group still has the same ID and everything works + assert nhgid == self.get_nhg_id('group1', dvs) + + # Bring group's link back up + self.flap_intf(0, 'up', dvs) + + # Check that the group still has the same ID and everything works + assert nhgid == self.get_nhg_id('group1', dvs) + + # Bring an interface down + self.flap_intf(1, 'down', dvs) + + # Create group2 pointing to the NH which's link is down + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.3'), ("ifname", "Ethernet4")]) + nhg_ps.set("group2", fvs) + + # The group should be created. To test this, add a route pointing to + # it. If the group exists, the route will be created as well. + fvs = swsscommon.FieldValuePairs([("nexthop_group", "group2")]) + rt_ps.set('2.2.4.0/24', fvs) + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 2) + + # Delete the route and the group + rt_ps._del('2.2.4.0/24') + nhg_ps._del('group2') + + # Bring the interface back up + self.flap_intf(1, 'up', dvs) + + # Create group2 pointing to a NH that does not exist + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.61'), ("ifname", "Ethernet120")]) + nhg_ps.set("group2", fvs) + + # The group should fail to be created. To test this, we add a route + # pointing to it. The route should not be created. + fvs = swsscommon.FieldValuePairs([("nexthop_group", "group2")]) + rt_ps.set('2.2.4.0/24', fvs) + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 1) + + # Configure the NH's interface + self.config_intf(30, dvs) + + # A couple of more routes will be added to ASIC_DB + asic_routes_count += 2 + + # The group should be created, so the route should be added + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 2) + + # Delete the route and the group + rt_ps._del('2.2.4.0/24') + nhg_ps._del('group2') + + # Update the group to a multiple NH group - should fail as the group + # is referenced + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ("ifname", "Ethernet0,Ethernet4")]) + nhg_ps.set("group1", fvs) + time.sleep(1) + assert len(asic_db.get_keys(self.ASIC_NHG_STR)) == asic_nhgs_count + + # Update group1 to point to another NH - should fail as the group is + # referenced + nhgid = self.get_nhg_id('group1', dvs) + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.3'), ("ifname", "Ethernet4")]) + nhg_ps.set("group1", fvs) + assert nhgid == self.get_nhg_id('group1', dvs) + + # Remove route 2.2.2.0/24 + rt_ps._del("2.2.2.0/24") + + # Wait for route 2.2.2.0/24 to be removed + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count) + + # The group is not referenced anymore, so it should be updated + assert nhgid != self.get_nhg_id('group1', dvs) + + # Update the group to a multiple NH group - should work as the group is + # not referenced + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ("ifname", "Ethernet0,Ethernet4")]) + nhg_ps.set("group1", fvs) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + + # Remove group1 + nhg_ps._del("group1") + + def test_nhgorch_label_route(self, dvs, testlog): + for i in range(4): + self.config_intf(i, dvs) + + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + + # create next hop group in APPL DB + + nhg_ps = swsscommon.ProducerStateTable(app_db.db_connection, "NEXT_HOP_GROUP_TABLE") + + prev_nhg_keys = asic_db.get_keys(self.ASIC_NHG_STR) + asic_nhgs_count = len(prev_nhg_keys) + asic_nhgms_count = len(asic_db.get_keys(self.ASIC_NHGM_STR)) + + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3,10.0.0.5'), + ("ifname", "Ethernet0,Ethernet4,Ethernet8")]) + nhg_ps.set("group1", fvs) + + # check if group was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + keys = asic_db.get_keys(self.ASIC_NHG_STR) + + found_nhg = False + + for nhgid in keys: + if nhgid not in prev_nhg_keys: + found_nhg = True + break + + assert found_nhg == True + + # check if members were propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 3) + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + count = 0 + + for k in keys: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + + if fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid: + count += 1 + + assert count == 3 + + # create prefix route in APPL DB + + rt_ps = swsscommon.ProducerStateTable(app_db.db_connection, "ROUTE_TABLE") + + asic_routes_count = len(asic_db.get_keys(self.ASIC_RT_STR)) + + fvs = swsscommon.FieldValuePairs([("nexthop_group", "group1")]) + rt_ps.set("2.2.2.0/24", fvs) + + # check if route was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 1) + keys = asic_db.get_keys(self.ASIC_RT_STR) + + k = self.get_route_id('2.2.2.0/24', dvs) + + # assert the route points to next hop group + fvs = asic_db.get_entry(self.ASIC_RT_STR, k) + assert fvs["SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID"] == nhgid + + # create label route in APPL DB pointing to the same next hop group + + lrt_ps = swsscommon.ProducerStateTable(app_db.db_connection, "LABEL_ROUTE_TABLE") + + asic_insegs_count = len(asic_db.get_keys(self.ASIC_INSEG_STR)) + + fvs = swsscommon.FieldValuePairs([("nexthop_group", "group1")]) + lrt_ps.set("20", fvs) + + # check if label route was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_INSEG_STR, asic_insegs_count + 1) + keys = asic_db.get_keys(self.ASIC_INSEG_STR) + + k = self.get_inseg_id('20', dvs) + assert k is not None + + # assert the route points to next hop group + fvs = asic_db.get_entry(self.ASIC_INSEG_STR, k) + assert fvs["SAI_INSEG_ENTRY_ATTR_NEXT_HOP_ID"] == nhgid + + # bring links down one-by-one + for i in [0, 1, 2]: + self.flap_intf(i, 'down', dvs) + + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + + assert len(keys) == (asic_nhgms_count + 2 - i) + assert bool(asic_db.get_entry(self.ASIC_NHG_STR, nhgid)) + + # bring links up one-by-one + for i in [0, 1, 2]: + self.flap_intf(i, 'up', dvs) + + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + + assert len(keys) == (asic_nhgms_count + i + 1) + + count = 0 + for k in keys: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + if fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid: + count += 1 + assert count == i + 1 + + # Bring an interface down + self.flap_intf(1, 'down', dvs) + + # One group member will get deleted + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, 2) + + # Remove route 2.2.2.0/24 + rt_ps._del("2.2.2.0/24") + + # Wait for route 2.2.2.0/24 to be removed + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count) + + # Remove label route 20 + lrt_ps._del("20") + + # Wait for route 20 to be removed + asic_db.wait_for_n_keys(self.ASIC_INSEG_STR, asic_insegs_count) + + # Remove group1 + nhg_ps._del("group1") + + # create label route in APPL DBwith no properties (so just popping the label) + + lrt_ps = swsscommon.ProducerStateTable(app_db.db_connection, "LABEL_ROUTE_TABLE") + + asic_insegs_count = len(asic_db.get_keys(self.ASIC_INSEG_STR)) + + fvs = swsscommon.FieldValuePairs([("ifname","")]) + lrt_ps.set("30", fvs) + + # check if label route was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_INSEG_STR, asic_insegs_count + 1) + keys = asic_db.get_keys(self.ASIC_INSEG_STR) + + k = self.get_inseg_id('30', dvs) + assert k is not None + + # assert the route points to no next hop group + fvs = asic_db.get_entry(self.ASIC_INSEG_STR, k) + assert fvs["SAI_INSEG_ENTRY_ATTR_NEXT_HOP_ID"] == 'oid:0x0' + + # Remove label route 30 + lrt_ps._del("30") + + # Wait for route 20 to be removed + asic_db.wait_for_n_keys(self.ASIC_INSEG_STR, asic_insegs_count) # Add Dummy always-pass test at end as workaroud # for issue when Flaky fail on final test it invokes module tear-down before retrying