From 2da3ab3a1e54530213006b9c8a36ce84f5c5372b Mon Sep 17 00:00:00 2001 From: Tom cappleman Date: Fri, 6 Aug 2021 14:03:04 +0100 Subject: [PATCH] [orchagent] Add separate next hop table and orch **What I did** Added a new next hop group table to APP_DB, and orchagent support to use this as an alternative to including next hop information in the route table **Why I did it** Improves performance and occupancy usage when using the new table **How I verified it** Extended SWSS unit tests to cover new code, and ran existing tests Signed-off-by: Thomas Cappleman --- doc/swss-schema.md | 24 +- orchagent/Makefile.am | 1 + orchagent/mplsrouteorch.cpp | 552 ++++++----- orchagent/neighorch.cpp | 4 + orchagent/nexthopkey.h | 9 +- orchagent/nhgorch.cpp | 1388 ++++++++++++++++++++++++++++ orchagent/nhgorch.h | 242 +++++ orchagent/orchdaemon.cpp | 8 +- orchagent/orchdaemon.h | 1 + orchagent/routeorch.cpp | 762 +++++++++------- orchagent/routeorch.h | 44 +- tests/mock_tests/Makefile.am | 1 + tests/mock_tests/aclorch_ut.cpp | 2 +- tests/test_nhg.py | 1524 +++++++++++++++++++++++++++++-- 14 files changed, 3901 insertions(+), 661 deletions(-) create mode 100644 orchagent/nhgorch.cpp create mode 100644 orchagent/nhgorch.h diff --git a/doc/swss-schema.md b/doc/swss-schema.md index 7f25803a287..cd1ddd194d4 100644 --- a/doc/swss-schema.md +++ b/doc/swss-schema.md @@ -159,8 +159,30 @@ and reflects the LAG ports into the redis under: `LAG_TABLE::port` ;Status: Mandatory key = ROUTE_TABLE:prefix nexthop = *prefix, ;IP addresses separated “,” (empty indicates no gateway) - intf = ifindex? PORT_TABLE.key ; zero or more separated by “,” (zero indicates no interface) + ifname = ifindex? PORT_TABLE.key ; zero or more separated by “,” (zero indicates no interface) blackhole = BIT ; Set to 1 if this route is a blackhole (or null0) + weight = weight_list ; List of weights. + nexthop_group = string ; index within the NEXT_HOP_GROUP_TABLE, used instead of nexthop and intf fields + +--------------------------------------------- + +###### LABEL_ROUTE_TABLE +; Defines schema for MPLS label route table attributes +key = LABEL_ROUTE_TABLE:mpls_label ; MPLS label +; field = value +nexthop = STRING ; Comma-separated list of nexthops. +ifname = STRING ; Comma-separated list of interfaces. +weight = STRING ; Comma-separated list of weights. +nexthop_group = string ; index within the NEXT_HOP_GROUP_TABLE, used instead of nexthop and intf fields + +--------------------------------------------- +### NEXT_HOP_GROUP_TABLE + ;Stores a list of groups of one or more next hops + ;Status: Mandatory + key = NEXT_HOP_GROUP_TABLE:string ; arbitrary index for the next hop group + nexthop = *prefix, ;IP addresses separated “,” (empty indicates no gateway) + ifname = ifindex? PORT_TABLE.key ; zero or more separated by “,” (zero indicates no interface) + weight = weight_list ; List of weights. --------------------------------------------- ### NEIGH_TABLE diff --git a/orchagent/Makefile.am b/orchagent/Makefile.am index f4317134f63..4c6583a217b 100644 --- a/orchagent/Makefile.am +++ b/orchagent/Makefile.am @@ -32,6 +32,7 @@ orchagent_SOURCES = \ orchdaemon.cpp \ orch.cpp \ notifications.cpp \ + nhgorch.cpp \ routeorch.cpp \ mplsrouteorch.cpp \ neighorch.cpp \ diff --git a/orchagent/mplsrouteorch.cpp b/orchagent/mplsrouteorch.cpp index 321f259179a..0e57e197118 100644 --- a/orchagent/mplsrouteorch.cpp +++ b/orchagent/mplsrouteorch.cpp @@ -6,12 +6,14 @@ #include "swssnet.h" #include "converter.h" #include "crmorch.h" +#include "nhgorch.h" #include "directory.h" extern sai_object_id_t gVirtualRouterId; extern sai_object_id_t gSwitchId; extern CrmOrch *gCrmOrch; +extern NhgOrch *gNhgOrch; void RouteOrch::doLabelTask(Consumer& consumer) { @@ -121,12 +123,14 @@ void RouteOrch::doLabelTask(Consumer& consumer) if (op == SET_COMMAND) { + SWSS_LOG_INFO("Set operation"); + string ips; string aliases; string mpls_nhs; uint8_t& pop_count = ctx.pop_count; bool& excp_intfs_flag = ctx.excp_intfs_flag; - bool blackhole = false; + string nhg_index; for (auto i : kfvFieldsValues(t)) { @@ -142,74 +146,97 @@ void RouteOrch::doLabelTask(Consumer& consumer) if (fvField(i) == "mpls_pop") pop_count = to_uint(fvValue(i)); - if (fvField(i) == "blackhole") - blackhole = fvValue(i) == "true"; + if (fvField(i) == "nexthop_group") + nhg_index = fvValue(i); } - vector ipv = tokenize(ips, ','); - vector alsv = tokenize(aliases, ','); - vector mpls_nhv = tokenize(mpls_nhs, ','); - /* Resize the ip vector to match ifname vector - * as tokenize(",", ',') will miss the last empty segment. */ - if (alsv.size() == 0 && !blackhole) + SWSS_LOG_INFO("Label route %u has nexthop_group: %s, ips: %s, " + "MPLS NHs: %s, aliases: %s", + label, + nhg_index.c_str(), + ips.c_str(), + mpls_nhs.c_str(), + aliases.c_str()); + + /* + * A route should not fill both nexthop_group and ips / + * aliases. + */ + if (!nhg_index.empty() && (!ips.empty() || !aliases.empty())) { - SWSS_LOG_WARN("Skip the route %s, for it has an empty ifname field.", key.c_str()); + SWSS_LOG_ERROR("Route %s has both nexthop_group and ips/aliases", + key.c_str()); it = consumer.m_toSync.erase(it); continue; } - else if (alsv.size() != ipv.size()) + + ctx.nhg_index = nhg_index; + + vector ipv; + vector alsv; + vector mpls_nhv; + + /* + * If the nexthop_group is empty, create the next hop group key + * based on the IPs and aliases. Otherwise, get the key from + * the NhgOrch. + */ + if (nhg_index.empty() && aliases.length() == 0) { - SWSS_LOG_NOTICE("Route %s: resize ipv to match alsv, %zd -> %zd.", key.c_str(), ipv.size(), alsv.size()); - ipv.resize(alsv.size()); + // No next hop specified, so just pop the label. + ctx.nhg = NextHopGroupKey(); } - - /* Set the empty ip(s) to zero - * as IpAddress("") will construct a incorrect ip. */ - for (auto &ip : ipv) + else if (nhg_index.empty()) { - if (ip.empty()) + + ipv = tokenize(ips, ','); + alsv = tokenize(aliases, ','); + mpls_nhv = tokenize(mpls_nhs, ','); + + /* Resize the ip vector to match ifname vector + * as tokenize(",", ',') will miss the last empty segment. */ + if (alsv.size() == 0) { - SWSS_LOG_NOTICE("Route %s: set the empty nexthop ip to zero.", key.c_str()); - ip = "0.0.0.0"; + SWSS_LOG_WARN("Skip the route %s, for it has an empty ifname field.", key.c_str()); + it = consumer.m_toSync.erase(it); + continue; + } + else if (alsv.size() != ipv.size()) + { + SWSS_LOG_NOTICE("Route %s: resize ipv to match alsv, %zd -> %zd.", key.c_str(), ipv.size(), alsv.size()); + ipv.resize(alsv.size()); } - } - for (auto alias : alsv) - { - /* skip route to management, docker, loopback - * TODO: for route to loopback interface, the proper - * way is to create loopback interface and then create - * route pointing to it, so that we can traps packets to - * CPU */ - if (alias == "eth0" || alias == "docker0" || - alias == "lo" || !alias.compare(0, strlen(LOOPBACK_PREFIX), LOOPBACK_PREFIX)) + for (auto alias : alsv) { - excp_intfs_flag = true; - break; + /* skip route to management, docker, loopback + * TODO: for route to loopback interface, the proper + * way is to create loopback interface and then create + * route pointing to it, so that we can traps packets to + * CPU */ + if (alias == "eth0" || alias == "docker0" || + alias == "lo" || !alias.compare(0, strlen(LOOPBACK_PREFIX), LOOPBACK_PREFIX)) + { + excp_intfs_flag = true; + break; + } } - } - // TODO: cannot trust m_portsOrch->getPortIdByAlias because sometimes alias is empty - if (excp_intfs_flag) - { - /* If any existing routes are updated to point to the - * above interfaces, remove them from the ASIC. */ - if (removeLabelRoute(ctx)) - it = consumer.m_toSync.erase(it); - else - it++; - continue; - } + // TODO: cannot trust m_portsOrch->getPortIdByAlias because sometimes alias is empty + if (excp_intfs_flag) + { + /* If any existing routes are updated to point to the + * above interfaces, remove them from the ASIC. */ + if (removeLabelRoute(ctx)) + it = consumer.m_toSync.erase(it); + else + it++; + continue; + } - string nhg_str = ""; - NextHopGroupKey& nhg = ctx.nhg; + string nhg_str = ""; + NextHopGroupKey& nhg = ctx.nhg; - if (blackhole) - { - nhg = NextHopGroupKey(); - } - else - { for (uint32_t i = 0; i < ipv.size(); i++) { if (i) nhg_str += NHG_DELIMITER; @@ -222,6 +249,24 @@ void RouteOrch::doLabelTask(Consumer& consumer) nhg = NextHopGroupKey(nhg_str); } + else + { + try + { + const NextHopGroup& nh_group = gNhgOrch->getNhg(nhg_index); + ctx.nhg = nh_group.getKey(); + ctx.is_temp = nh_group.isTemp(); + } + catch (const std::out_of_range& e) + { + SWSS_LOG_ERROR("Next hop group %s does not exist", + nhg_index.c_str()); + ++it; + continue; + } + } + + NextHopGroupKey& nhg = ctx.nhg; if (nhg.getSize() == 1 && nhg.hasIntfNextHop()) { @@ -247,7 +292,8 @@ void RouteOrch::doLabelTask(Consumer& consumer) } else if (m_syncdLabelRoutes.find(vrf_id) == m_syncdLabelRoutes.end() || m_syncdLabelRoutes.at(vrf_id).find(label) == m_syncdLabelRoutes.at(vrf_id).end() || - m_syncdLabelRoutes.at(vrf_id).at(label) != nhg) + m_syncdLabelRoutes.at(vrf_id).at(label) != RouteNhg(nhg, nhg_index) || + ctx.is_temp) { if (addLabelRoute(ctx, nhg)) it = consumer.m_toSync.erase(it); @@ -255,13 +301,16 @@ void RouteOrch::doLabelTask(Consumer& consumer) it++; } else + { /* Duplicate entry */ + SWSS_LOG_INFO("Route %s is duplicate entry", key.c_str()); it = consumer.m_toSync.erase(it); + } // If already exhaust the nexthop groups, and there are pending removing routes in bulker, // flush the bulker and possibly collect some released nexthop groups - if (m_nextHopGroupCount >= m_maxNextHopGroupCount && - gLabelRouteBulker.removing_entries_count() > 0) + if (gNhgOrch->getNhgCount() >= gNhgOrch->getMaxNhgCount() && + gRouteBulker.removing_entries_count() > 0) { break; } @@ -269,6 +318,7 @@ void RouteOrch::doLabelTask(Consumer& consumer) else if (op == DEL_COMMAND) { /* Cannot locate the route or remove succeed */ + SWSS_LOG_INFO("Delete operation"); if (removeLabelRoute(ctx)) it = consumer.m_toSync.erase(it); else @@ -337,7 +387,8 @@ void RouteOrch::doLabelTask(Consumer& consumer) } else if (m_syncdLabelRoutes.find(vrf_id) == m_syncdLabelRoutes.end() || m_syncdLabelRoutes.at(vrf_id).find(label) == m_syncdLabelRoutes.at(vrf_id).end() || - m_syncdLabelRoutes.at(vrf_id).at(label) != nhg) + m_syncdLabelRoutes.at(vrf_id).at(label) != RouteNhg(nhg, ctx.nhg_index) || + ctx.is_temp) { if (addLabelRoutePost(ctx, nhg)) it_prev = consumer.m_toSync.erase(it_prev); @@ -377,7 +428,12 @@ void RouteOrch::addTempLabelRoute(LabelRouteBulkContext& ctx, const NextHopGroup /* Remove next hops that are not in m_syncdNextHops */ for (auto it = next_hop_set.begin(); it != next_hop_set.end();) { - if (!m_neighOrch->hasNextHop(*it)) + /* + * Check if the IP next hop exists in NeighOrch. The next hop may be + * a labeled one, which are created by RouteOrch or NhgOrch if the IP + * next hop exists. + */ + if (!m_neighOrch->hasNextHop(it->ipKey())) { SWSS_LOG_INFO("Failed to get next hop %s for %u", (*it).to_string().c_str(), label); @@ -409,9 +465,12 @@ bool RouteOrch::addLabelRoute(LabelRouteBulkContext& ctx, const NextHopGroupKey sai_object_id_t& vrf_id = ctx.vrf_id; Label& label = ctx.label; + SWSS_LOG_NOTICE("Adding route for label %u with next hop(s) %s", + label, + nextHops.to_string().c_str()); + /* next_hop_id indicates the next hop id or next hop group id of this route */ sai_object_id_t next_hop_id; - bool blackhole = false; if (m_syncdLabelRoutes.find(vrf_id) == m_syncdLabelRoutes.end()) { @@ -421,97 +480,104 @@ bool RouteOrch::addLabelRoute(LabelRouteBulkContext& ctx, const NextHopGroupKey auto it_route = m_syncdLabelRoutes.at(vrf_id).find(label); - if (nextHops.getSize() == 0) + /* The route has no next hop specified so just pop the label */ + if ((nextHops.getSize() == 0) && ctx.nhg_index.empty()) { - /* The route is pointing to a blackhole */ - blackhole = true; + next_hop_id = 0; } - /* The route is pointing to a next hop */ - else if (nextHops.getSize() == 1) + else if (ctx.nhg_index.empty()) { - const NextHopKey& nexthop = *nextHops.getNextHops().begin(); - if (nexthop.isIntfNextHop()) - { - next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); - /* rif is not created yet */ - if (next_hop_id == SAI_NULL_OBJECT_ID) - { - SWSS_LOG_INFO("Failed to get next hop %s for %u", - nextHops.to_string().c_str(), label); - return false; - } - } - else + /* The route is pointing to a next hop */ + if (nextHops.getSize() == 1) { - if (m_neighOrch->hasNextHop(nexthop)) + const NextHopKey& nexthop = *nextHops.getNextHops().begin(); + if (nexthop.isIntfNextHop()) { - next_hop_id = m_neighOrch->getNextHopId(nexthop); - } - /* For non-existent MPLS NH, check if IP neighbor NH exists */ - else if (nexthop.isMplsNextHop() && - m_neighOrch->hasNextHop(NextHopKey(nexthop.ip_address, nexthop.alias))) - { - /* since IP neighbor NH exists, neighbor is resolved, add MPLS NH */ - m_neighOrch->addNextHop(nexthop); - next_hop_id = m_neighOrch->getNextHopId(nexthop); + next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); + /* rif is not created yet */ + if (next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_INFO("Failed to get next hop %s for %u", + nextHops.to_string().c_str(), label); + return false; + } } - /* IP neighbor is not yet resolved */ else { - SWSS_LOG_INFO("Failed to get next hop %s for %u", - nextHops.to_string().c_str(), label); - return false; + if (m_neighOrch->hasNextHop(nexthop)) + { + next_hop_id = m_neighOrch->getNextHopId(nexthop); + } + /* See if there is an IP neighbor nexthop */ + else if (nexthop.label_stack.getSize() && + m_neighOrch->hasNextHop(NextHopKey(nexthop.ip_address, nexthop.alias))) + { + m_neighOrch->addNextHop(nexthop); + next_hop_id = m_neighOrch->getNextHopId(nexthop); + } + else + { + SWSS_LOG_INFO("Failed to get next hop %s for %u", + nextHops.to_string().c_str(), label); + return false; + } } } - } - /* The route is pointing to a next hop group */ - else - { - /* Check if there is already an existing next hop group */ - if (!hasNextHopGroup(nextHops)) + /* The route is pointing to a next hop group */ + else { - /* Try to create a new next hop group */ - if (!addNextHopGroup(nextHops)) + /* Check if there is already an existing next hop group */ + if (!hasNextHopGroup(nextHops)) { - for (auto it = nextHops.getNextHops().begin(); it != nextHops.getNextHops().end(); ++it) + /* Try to create a new next hop group */ + if (!addNextHopGroup(nextHops)) { - const NextHopKey& nextHop = *it; - if (!m_neighOrch->hasNextHop(nextHop)) - { - SWSS_LOG_INFO("Failed to get next hop %s in %s, resolving neighbor", - nextHop.to_string().c_str(), nextHops.to_string().c_str()); - m_neighOrch->resolveNeighbor(nextHop); - } - } - - /* Failed to create the next hop group and check if a temporary route is needed */ + /* Failed to create the next hop group and check if a temporary route is needed */ - /* If the current next hop is part of the next hop group to sync, - * then return false and no need to add another temporary route. */ - if (it_route != m_syncdLabelRoutes.at(vrf_id).end() && it_route->second.getSize() == 1) - { - const NextHopKey& nexthop = *it_route->second.getNextHops().begin(); - if (nextHops.contains(nexthop)) + /* If the current next hop is part of the next hop group to sync, + * then return false and no need to add another temporary route. */ + if (it_route != m_syncdLabelRoutes.at(vrf_id).end() && it_route->second.nhg_key.getSize() == 1) { - return false; + NextHopKey nexthop(it_route->second.nhg_key.to_string()); + if (nextHops.contains(nexthop)) + { + return false; + } } - } - /* Add a temporary route when a next hop group cannot be added, - * and there is no temporary route right now or the current temporary - * route is not pointing to a member of the next hop group to sync. */ - addTempLabelRoute(ctx, nextHops); - /* Return false since the original route is not successfully added */ - return false; + /* Add a temporary route when a next hop group cannot be added, + * and there is no temporary route right now or the current temporary + * route is not pointing to a member of the next hop group to sync. */ + addTempLabelRoute(ctx, nextHops); + /* Return false since the original route is not successfully added */ + return false; + } } - } - next_hop_id = m_syncdNextHopGroups[nextHops].next_hop_group_id; + next_hop_id = m_syncdNextHopGroups[nextHops].next_hop_group_id; + } + } + else + { + SWSS_LOG_INFO("Next hop group is owned by NhgOrch with index %s", + ctx.nhg_index.c_str()); + try + { + const NextHopGroup& nhg = gNhgOrch->getNhg(ctx.nhg_index); + next_hop_id = nhg.getId(); + } + catch(const std::out_of_range& e) + { + SWSS_LOG_WARN("Next hop group key %s does not exist", + ctx.nhg_index.c_str()); + return false; + } } + SWSS_LOG_INFO("Next hop ID: %lu", next_hop_id); + /* Sync the inseg entry */ sai_inseg_entry_t inseg_entry; - // route_entry.vr_id = vrf_id; No VRF support for MPLS? inseg_entry.switch_id = gSwitchId; inseg_entry.label = label; @@ -527,16 +593,8 @@ bool RouteOrch::addLabelRoute(LabelRouteBulkContext& ctx, const NextHopGroupKey if (it_route == m_syncdLabelRoutes.at(vrf_id).end()) { vector inseg_attrs; - if (blackhole) - { - inseg_attr.id = SAI_INSEG_ENTRY_ATTR_PACKET_ACTION; - inseg_attr.value.s32 = SAI_PACKET_ACTION_DROP; - } - else - { - inseg_attr.id = SAI_INSEG_ENTRY_ATTR_NEXT_HOP_ID; - inseg_attr.value.oid = next_hop_id; - } + inseg_attr.id = SAI_INSEG_ENTRY_ATTR_NEXT_HOP_ID; + inseg_attr.value.oid = next_hop_id; inseg_attrs.push_back(inseg_attr); inseg_attr.id = SAI_INSEG_ENTRY_ATTR_NUM_OF_POP; inseg_attr.value.u32 = ctx.pop_count; @@ -555,31 +613,29 @@ bool RouteOrch::addLabelRoute(LabelRouteBulkContext& ctx, const NextHopGroupKey else { /* Set the packet action to forward when there was no next hop (dropped) */ - if (it_route->second.getSize() == 0 && !blackhole) + if (it_route->second.nhg_key.getSize() == 0) { + vector inseg_attrs; + inseg_attr.id = SAI_INSEG_ENTRY_ATTR_NEXT_HOP_ID; + inseg_attr.value.oid = next_hop_id; + inseg_attrs.push_back(inseg_attr); + inseg_attr.id = SAI_INSEG_ENTRY_ATTR_NUM_OF_POP; + inseg_attr.value.u32 = ctx.pop_count; + inseg_attrs.push_back(inseg_attr); + inseg_attr.id = SAI_INSEG_ENTRY_ATTR_PACKET_ACTION; inseg_attr.value.s32 = SAI_PACKET_ACTION_FORWARD; object_statuses.emplace_back(); gLabelRouteBulker.set_entry_attribute(&object_statuses.back(), &inseg_entry, &inseg_attr); } - else if (blackhole) - { - inseg_attr.id = SAI_ROUTE_ENTRY_ATTR_PACKET_ACTION; - inseg_attr.value.s32 = SAI_PACKET_ACTION_DROP; - object_statuses.emplace_back(); - gLabelRouteBulker.set_entry_attribute(&object_statuses.back(), &inseg_entry, &inseg_attr); - } - else - { - inseg_attr.id = SAI_INSEG_ENTRY_ATTR_NEXT_HOP_ID; - inseg_attr.value.oid = next_hop_id; + inseg_attr.id = SAI_INSEG_ENTRY_ATTR_NEXT_HOP_ID; + inseg_attr.value.oid = next_hop_id; - /* Set the next hop ID to a new value */ - object_statuses.emplace_back(); - gLabelRouteBulker.set_entry_attribute(&object_statuses.back(), &inseg_entry, &inseg_attr); - } + /* Set the next hop ID to a new value */ + object_statuses.emplace_back(); + gLabelRouteBulker.set_entry_attribute(&object_statuses.back(), &inseg_entry, &inseg_attr); } return false; } @@ -590,7 +646,6 @@ bool RouteOrch::addLabelRoutePost(const LabelRouteBulkContext& ctx, const NextHo const sai_object_id_t& vrf_id = ctx.vrf_id; const Label& label = ctx.label; - bool blackhole = false; const auto& object_statuses = ctx.object_statuses; @@ -603,44 +658,55 @@ bool RouteOrch::addLabelRoutePost(const LabelRouteBulkContext& ctx, const NextHo /* next_hop_id indicates the next hop id or next hop group id of this route */ sai_object_id_t next_hop_id; - if (nextHops.getSize() == 0) - { - /* The route is pointing to a blackhole */ - blackhole = true; - } - if (nextHops.getSize() == 1) + /* Check that the next hop group is not owned by NhgOrch. */ + if (ctx.nhg_index.empty()) { /* The route is pointing to a next hop */ - const NextHopKey& nexthop = *nextHops.getNextHops().begin(); - if (nexthop.isIntfNextHop()) + if (nextHops.getSize() == 1) { - next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); - /* rif is not created yet */ - if (next_hop_id == SAI_NULL_OBJECT_ID) + NextHopKey nexthop(nextHops.to_string()); + if (nexthop.isIntfNextHop()) { - SWSS_LOG_INFO("Failed to get next hop %s for label %u", - nextHops.to_string().c_str(), label); - return false; + + next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); + /* rif is not created yet */ + if (next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_INFO("Failed to get next hop %s for label %u", + nextHops.to_string().c_str(), label); + return false; + } + } + else + { + if (!m_neighOrch->hasNextHop(nexthop)) + { + SWSS_LOG_INFO("Failed to get next hop %s for label %u", + nextHops.to_string().c_str(), label); + return false; + } } } - else + /* The route is pointing to a next hop group */ + else if (nextHops.getSize() > 1) { - if (!m_neighOrch->hasNextHop(nexthop)) + if (!hasNextHopGroup(nextHops)) { - SWSS_LOG_INFO("Failed to get next hop %s for label %u", - nextHops.to_string().c_str(), label); + // Previous added an temporary route + auto& tmp_next_hop = ctx.tmp_next_hop; + addLabelRoutePost(ctx, tmp_next_hop); return false; } } } - /* The route is pointing to a next hop group */ else { - if (!hasNextHopGroup(nextHops)) + SWSS_LOG_INFO("NhgOrch owns the next hop group with index %s", + ctx.nhg_index.c_str()); + if (!gNhgOrch->hasNhg(ctx.nhg_index)) { - // Previous added an temporary route - auto& tmp_next_hop = ctx.tmp_next_hop; - addLabelRoutePost(ctx, tmp_next_hop); + SWSS_LOG_WARN("Failed to get next hop group with index %s", + ctx.nhg_index.c_str()); return false; } } @@ -664,27 +730,33 @@ bool RouteOrch::addLabelRoutePost(const LabelRouteBulkContext& ctx, const NextHo gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_MPLS_INSEG); /* Increase the ref_count for the next hop (group) entry */ - increaseNextHopRefCount(nextHops); - SWSS_LOG_INFO("Post create label %u with next hop(s) %s", - label, nextHops.to_string().c_str()); + if (ctx.nhg_index.empty()) + { + increaseNextHopRefCount(nextHops); + } + else + { + SWSS_LOG_INFO("Increment NhgOrch's NHG %s ref count", + ctx.nhg_index.c_str()); + gNhgOrch->incNhgRefCount(ctx.nhg_index); + } + + SWSS_LOG_INFO("Create label route %u with next hop(s) %s", + label, nextHops.to_string().c_str()); } else { sai_status_t status; - /* Set the packet action to forward when there was no next hop (dropped) and not pointing to blackhole */ - if (it_route->second.getSize() == 0 && !blackhole) + /* Set the packet action to forward when there was no next hop (dropped) */ + if (it_route->second.nhg_key.getSize() == 0) { status = *it_status++; if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to set label %u with packet action forward, %d", label, status); - task_process_status handle_status = handleSaiSetStatus(SAI_API_MPLS, status); - if (handle_status != task_success) - { - return parseHandleSaiStatusFailure(handle_status); - } + return false; } } @@ -693,44 +765,42 @@ bool RouteOrch::addLabelRoutePost(const LabelRouteBulkContext& ctx, const NextHo { SWSS_LOG_ERROR("Failed to set label %u with next hop(s) %s", label, nextHops.to_string().c_str()); - task_process_status handle_status = handleSaiSetStatus(SAI_API_MPLS, status); - if (handle_status != task_success) + return false; + } + + /* Decrease the ref count for the previous next hop group. */ + if (it_route->second.nhg_index.empty()) + { + decreaseNextHopRefCount(it_route->second.nhg_key); + if (it_route->second.nhg_key.getSize() > 1 + && m_syncdNextHopGroups[it_route->second.nhg_key].ref_count == 0) { - return parseHandleSaiStatusFailure(handle_status); + m_bulkNhgReducedRefCnt.emplace(it_route->second.nhg_key, 0); } } + else + { + /* The next hop group is owned by NeighOrch. */ + SWSS_LOG_INFO("Decrement NhgOrch's NHG %s ref count", + it_route->second.nhg_index.c_str()); + gNhgOrch->decNhgRefCount(it_route->second.nhg_index); + } /* Increase the ref_count for the next hop (group) entry */ - increaseNextHopRefCount(nextHops); - - decreaseNextHopRefCount(it_route->second); - if (it_route->second.getSize() > 1 - && m_syncdNextHopGroups[it_route->second].ref_count == 0) + if (ctx.nhg_index.empty()) { - m_bulkNhgReducedRefCnt.emplace(it_route->second, 0); + increaseNextHopRefCount(nextHops); } - - if (blackhole) + else { - /* Set the packet action to drop for blackhole routes */ - status = *it_status++; - if (status != SAI_STATUS_SUCCESS) - { - SWSS_LOG_ERROR("Failed to set blackhole label %u with packet action drop, %d", - label, status); - task_process_status handle_status = handleSaiSetStatus(SAI_API_MPLS, status); - if (handle_status != task_success) - { - return parseHandleSaiStatusFailure(handle_status); - } - } + gNhgOrch->incNhgRefCount(ctx.nhg_index); } - SWSS_LOG_INFO("Post set label %u with next hop(s) %s", - label, nextHops.to_string().c_str()); + SWSS_LOG_INFO("Set label route %u with next hop(s) %s", + label, nextHops.to_string().c_str()); } - m_syncdLabelRoutes[vrf_id][label] = nextHops; + m_syncdLabelRoutes[vrf_id][label] = RouteNhg(nextHops, ctx.nhg_index); return true; } @@ -750,7 +820,6 @@ bool RouteOrch::removeLabelRoute(LabelRouteBulkContext& ctx) } sai_inseg_entry_t inseg_entry; - //inseg_entry.vr_id = vrf_id; No VRF support for MPLS inseg_entry.switch_id = gSwitchId; inseg_entry.label = label; @@ -794,40 +863,45 @@ bool RouteOrch::removeLabelRoutePost(const LabelRouteBulkContext& ctx) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to remove label:%u\n", label); - task_process_status handle_status = handleSaiRemoveStatus(SAI_API_MPLS, status); - if (handle_status != task_success) - { - return parseHandleSaiStatusFailure(handle_status); - } + return false; } gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_MPLS_INSEG); - /* - * Decrease the reference count only when the route is pointing to a next hop. - */ - decreaseNextHopRefCount(it_route->second); - if (it_route->second.getSize() > 1 - && m_syncdNextHopGroups[it_route->second].ref_count == 0) - { - m_bulkNhgReducedRefCnt.emplace(it_route->second, 0); - } - /* - * Additionally check if the NH has label and its ref count == 0, then - * remove the label next hop. - */ - else if (it_route->second.getSize() == 1) + if (it_route->second.nhg_index.empty()) { - const NextHopKey& nexthop = *it_route->second.getNextHops().begin(); - if (nexthop.isMplsNextHop() && - (m_neighOrch->getNextHopRefCount(nexthop) == 0)) + /* + * Decrease the reference count only when the route is pointing to a next hop. + */ + decreaseNextHopRefCount(it_route->second.nhg_key); + if (it_route->second.nhg_key.getSize() > 1 + && m_syncdNextHopGroups[it_route->second.nhg_key].ref_count == 0) { - m_neighOrch->removeMplsNextHop(nexthop); + m_bulkNhgReducedRefCnt.emplace(it_route->second.nhg_key, 0); + } + /* + * Additionally check if the NH has label and its ref count == 0, then + * remove the label next hop. + */ + else if (it_route->second.nhg_key.getSize() == 1) + { + NextHopKey nexthop(it_route->second.nhg_key.to_string()); + if (nexthop.isMplsNextHop() && + (m_neighOrch->getNextHopRefCount(nexthop) == 0)) + { + m_neighOrch->removeMplsNextHop(nexthop); + } } } + else + { + SWSS_LOG_INFO("Decrement NhgOrch's NHG %s ref count", + it_route->second.nhg_index.c_str()); + gNhgOrch->decNhgRefCount(it_route->second.nhg_index); + } - SWSS_LOG_INFO("Remove label %u with next hop(s) %s", - label, it_route->second.to_string().c_str()); + SWSS_LOG_INFO("Remove label route %u with next hop(s) %s", + label, it_route->second.nhg_key.to_string().c_str()); it_route_table->second.erase(label); @@ -838,4 +912,4 @@ bool RouteOrch::removeLabelRoutePost(const LabelRouteBulkContext& ctx) } return true; -} +} \ No newline at end of file diff --git a/orchagent/neighorch.cpp b/orchagent/neighorch.cpp index e3f9a4175e9..10eff1721bc 100644 --- a/orchagent/neighorch.cpp +++ b/orchagent/neighorch.cpp @@ -7,6 +7,7 @@ #include "directory.h" #include "muxorch.h" #include "subscriberstatetable.h" +#include "nhgorch.h" extern sai_neighbor_api_t* sai_neighbor_api; extern sai_next_hop_api_t* sai_next_hop_api; @@ -15,6 +16,7 @@ extern PortsOrch *gPortsOrch; extern sai_object_id_t gSwitchId; extern CrmOrch *gCrmOrch; extern RouteOrch *gRouteOrch; +extern NhgOrch *gNhgOrch; extern FgNhgOrch *gFgNhgOrch; extern Directory gDirectory; extern string gMySwitchType; @@ -322,6 +324,7 @@ bool NeighOrch::setNextHopFlag(const NextHopKey &nexthop, const uint32_t nh_flag { case NHFLAGS_IFDOWN: rc = gRouteOrch->invalidnexthopinNextHopGroup(nexthop, count); + rc &= gNhgOrch->invalidateNextHop(nexthop); break; default: assert(0); @@ -351,6 +354,7 @@ bool NeighOrch::clearNextHopFlag(const NextHopKey &nexthop, const uint32_t nh_fl { case NHFLAGS_IFDOWN: rc = gRouteOrch->validnexthopinNextHopGroup(nexthop, count); + rc &= gNhgOrch->validateNextHop(nexthop); break; default: assert(0); diff --git a/orchagent/nexthopkey.h b/orchagent/nexthopkey.h index 87c294415a2..f4d2a8fac3d 100644 --- a/orchagent/nexthopkey.h +++ b/orchagent/nexthopkey.h @@ -4,6 +4,7 @@ #include "ipaddress.h" #include "tokenize.h" #include "label.h" +#include "intfsorch.h" #define LABELSTACK_DELIMITER '+' #define NH_DELIMITER '@' @@ -160,10 +161,16 @@ struct NextHopKey std::string str; if (isMplsNextHop()) { - label_stack.to_string() + LABELSTACK_DELIMITER; + str = label_stack.to_string() + LABELSTACK_DELIMITER; } return str; } + + // Method to get the underlying IP/interface pair for the next hop. + NextHopKey ipKey() const + { + return NextHopKey(ip_address, alias); + } }; #endif /* SWSS_NEXTHOPKEY_H */ diff --git a/orchagent/nhgorch.cpp b/orchagent/nhgorch.cpp new file mode 100644 index 00000000000..694c479c1d6 --- /dev/null +++ b/orchagent/nhgorch.cpp @@ -0,0 +1,1388 @@ +#include "nhgorch.h" +#include "switchorch.h" +#include "neighorch.h" +#include "crmorch.h" +#include "routeorch.h" +#include "bulker.h" +#include "logger.h" +#include "swssnet.h" + +extern sai_object_id_t gSwitchId; + +extern PortsOrch *gPortsOrch; +extern CrmOrch *gCrmOrch; +extern NeighOrch *gNeighOrch; +extern SwitchOrch *gSwitchOrch; +extern RouteOrch *gRouteOrch; + +extern size_t gMaxBulkSize; + +extern sai_next_hop_group_api_t* sai_next_hop_group_api; +extern sai_next_hop_api_t* sai_next_hop_api; +extern sai_switch_api_t* sai_switch_api; + +unsigned int NextHopGroup::m_count = 0; + +/* Default maximum number of next hop groups */ +#define DEFAULT_NUMBER_OF_ECMP_GROUPS 128 +#define DEFAULT_MAX_ECMP_GROUP_SIZE 32 + +NhgOrch::NhgOrch(DBConnector *db, string tableName) : + Orch(db, tableName) +{ + SWSS_LOG_ENTER(); + + /* Get the switch's maximum next hop group capacity. */ + sai_attribute_t attr; + attr.id = SAI_SWITCH_ATTR_NUMBER_OF_ECMP_GROUPS; + + sai_status_t status = sai_switch_api->get_switch_attribute(gSwitchId, + 1, + &attr); + + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_WARN("Failed to get switch attribute number of ECMP groups. \ + Use default value. rv:%d", status); + m_maxNhgCount = DEFAULT_NUMBER_OF_ECMP_GROUPS; + } + else + { + m_maxNhgCount = attr.value.s32; + + /* + * ASIC specific workaround to re-calculate maximum ECMP groups + * according to diferent ECMP mode used. + * + * On Mellanox platform, the maximum ECMP groups returned is the value + * under the condition that the ECMP group size is 1. Deviding this + * number by DEFAULT_MAX_ECMP_GROUP_SIZE gets the maximum number of + * ECMP groups when the maximum ECMP group size is 32. + */ + char *platform = getenv("platform"); + if (platform && strstr(platform, MLNX_PLATFORM_SUBSTRING)) + { + SWSS_LOG_NOTICE("Mellanox platform - divide capacity by %d", + DEFAULT_MAX_ECMP_GROUP_SIZE); + m_maxNhgCount /= DEFAULT_MAX_ECMP_GROUP_SIZE; + } + } + + /* Set switch's next hop group capacity. */ + vector fvTuple; + fvTuple.emplace_back("MAX_NEXTHOP_GROUP_COUNT", + to_string(m_maxNhgCount)); + gSwitchOrch->set_switch_capability(fvTuple); + + SWSS_LOG_NOTICE("Maximum number of ECMP groups supported is %d", + m_maxNhgCount); +} + +/* + * Purpose: Perform the operations requested by APPL_DB users. + * + * Description: Iterate over the untreated operations list and resolve them. + * The operations supported are SET and DEL. If an operation + * could not be resolved, it will either remain in the list, or be + * removed, depending on the case. + * + * Params: IN consumer - The cosumer object. + * + * Returns: Nothing. + */ +void NhgOrch::doTask(Consumer& consumer) +{ + SWSS_LOG_ENTER(); + + if (!gPortsOrch->allPortsReady()) + { + return; + } + + auto it = consumer.m_toSync.begin(); + + while (it != consumer.m_toSync.end()) + { + KeyOpFieldsValuesTuple t = it->second; + + string index = kfvKey(t); + string op = kfvOp(t); + + SWSS_LOG_INFO("Next hop group table key %s, op %s", + index.c_str(), op.c_str()); + + bool success; + const auto& nhg_it = m_syncdNextHopGroups.find(index); + + if (op == SET_COMMAND) + { + string ips; + string aliases; + string weights; + string mpls_nhs; + + /* Get group's next hop IPs and aliases */ + for (auto i : kfvFieldsValues(t)) + { + if (fvField(i) == "nexthop") + ips = fvValue(i); + + if (fvField(i) == "ifname") + aliases = fvValue(i); + + if (fvField(i) == "weight") + weights = fvValue(i); + + if (fvField(i) == "mpls_nh") + mpls_nhs = fvValue(i); + } + + /* Split ips and alaises strings into vectors of tokens. */ + vector ipv = tokenize(ips, ','); + vector alsv = tokenize(aliases, ','); + vector mpls_nhv = tokenize(mpls_nhs, ','); + + /* Create the next hop group key. */ + string nhg_str; + + for (uint32_t i = 0; i < ipv.size(); i++) + { + if (i) nhg_str += NHG_DELIMITER; + if (!mpls_nhv.empty() && mpls_nhv[i] != "na") + { + nhg_str += mpls_nhv[i] + LABELSTACK_DELIMITER; + } + nhg_str += ipv[i] + NH_DELIMITER + alsv[i]; + } + + NextHopGroupKey nhg_key = NextHopGroupKey(nhg_str, weights); + + /* If the group does not exist, create one. */ + if (nhg_it == m_syncdNextHopGroups.end()) + { + SWSS_LOG_NOTICE("Adding next hop group %s with %s", + index.c_str(), + nhg_str.c_str()); + /* + * If we'd have to create a SAI object for the group, and we + * already reached the limit, we're going to create a temporary + * group, represented by one of it's NH only until we have + * enough resources to sync the whole group. The item is going + * to be kept in the sync list so we keep trying to create the + * actual group when there are enough resources. + */ + if ((nhg_key.getSize() > 1) && + (NextHopGroup::getCount() >= m_maxNhgCount)) + { + SWSS_LOG_WARN("Next hop group count reached its limit."); + + try + { + auto nhg = std::make_unique( + createTempNhg(nhg_key)); + SWSS_LOG_NOTICE("Adding temp next hop group with %s", + nhg->to_string().c_str()); + if (nhg->sync()) + { + SWSS_LOG_INFO("Temporary NHG successfully synced"); + m_syncdNextHopGroups.emplace(index, + NhgEntry(std::move(nhg))); + } + else + { + SWSS_LOG_WARN( + "Failed to sync temporary NHG %s with %s", + index.c_str(), + nhg_key.to_string().c_str()); + } + } + catch (const std::exception& e) + { + SWSS_LOG_WARN( + "Got exception: %s while adding temp group %s", + e.what(), + nhg_key.to_string().c_str()); + } + + /* + * We set the success to false so we keep trying to update + * this group in order to sync the whole group. + */ + success = false; + } + else + { + auto nhg = std::make_unique(nhg_key); + success = nhg->sync(); + + if (success) + { + SWSS_LOG_INFO("NHG successfully synced"); + m_syncdNextHopGroups.emplace(index, + NhgEntry(std::move(nhg))); + } + } + } + /* If the group exists, update it. */ + else + { + SWSS_LOG_NOTICE("Update next hop group %s with %s", + index.c_str(), + nhg_str.c_str()); + + const auto& nhg_ptr = nhg_it->second.nhg; + + + /* + * A NHG update should never change the SAI ID of the NHG if it + * is still referenced by some other objects, as they would not + * be notified about this change. The only exception to this + * rule is for the temporary NHGs, as the referencing objects + * will keep querying the NhgOrch for any SAI ID updates. + */ + if (!nhg_ptr->isTemp() && + ((nhg_key.getSize() == 1) || (nhg_ptr->getSize() == 1)) && + (nhg_it->second.ref_count > 0)) + { + SWSS_LOG_WARN("Next hop group %s update would change SAI " + "ID while referenced, so not performed", + index.c_str()); + success = false; + } + /* + * If the update would mandate promoting a temporary next hop + * group to a multiple next hops group and we do not have the + * resources yet, we have to skip it until we have enough + * resources. + */ + else if (nhg_ptr->isTemp() && + (nhg_key.getSize() > 1) && + (NextHopGroup::getCount() >= m_maxNhgCount)) + { + /* + * If the group was updated in such way that the previously + * chosen next hop does not represent the new group key, + * update the temporary group to choose a new next hop from + * the new key. + */ + if (!nhg_key.contains(nhg_ptr->getKey())) + { + SWSS_LOG_NOTICE("Updating temporary group %s to %s", + index.c_str(), + nhg_key.to_string().c_str()); + + try + { + /* Create the new temporary next hop group. */ + auto new_nhg = std::make_unique( + createTempNhg(nhg_key)); + + /* + * If we successfully sync the new group, update + * only the next hop group entry's pointer so we + * don't mess up the reference counter, as other + * objects may already reference it. + */ + if (new_nhg->sync()) + { + SWSS_LOG_INFO( + "Temporary NHG successfully synced"); + nhg_it->second.nhg = std::move(new_nhg); + } + else + { + SWSS_LOG_WARN( + "Failed to sync updated temp NHG %s with %s", + index.c_str(), + nhg_key.to_string().c_str()); + } + } + catch (const std::exception& e) + { + SWSS_LOG_WARN( + "Got exception: %s while adding temp group %s", + e.what(), + nhg_key.to_string().c_str()); + } + } + + /* + * Because the resources are exhausted, we have to keep + * trying to update the temporary group until we can + * promote it to a fully functional group. + */ + success = false; + } + /* Common update, when all the requirements are met. */ + else + { + success = nhg_ptr->update(nhg_key); + } + } + } + else if (op == DEL_COMMAND) + { + SWSS_LOG_NOTICE("Deleting next hop group %s", index.c_str()); + + /* If the group does not exist, do nothing. */ + if (nhg_it == m_syncdNextHopGroups.end()) + { + SWSS_LOG_WARN("Unable to find group with key %s to remove", + index.c_str()); + /* Mark the operation as successful to consume it. */ + success = true; + } + /* If the group does exist, but it's still referenced, skip. */ + else if (nhg_it->second.ref_count > 0) + { + SWSS_LOG_WARN( + "Unable to remove group %s which is referenced", + index.c_str()); + success = false; + } + /* Else, if the group is no more referenced, delete it. */ + else + { + const auto& nhg = nhg_it->second.nhg; + + success = nhg->desync(); + + if (success) + { + m_syncdNextHopGroups.erase(nhg_it); + } + } + } + else + { + SWSS_LOG_ERROR("Unknown operation type %s\n", op.c_str()); + /* Mark the operation as successful to consume it. */ + success = true; + } + + /* Depending on the operation success, consume it or skip it. */ + if (success) + { + it = consumer.m_toSync.erase(it); + } + else + { + ++it; + } + } +} + +/* + * Purpose: Validate a next hop for any groups that contains it. + * + * Description: Iterate over all next hop groups and validate the next hop in + * those who contain it. + * + * Params: IN nh_key - The next hop to validate. + * + * Returns: true, if the next hop was successfully validated in all + * containing groups; + * false, otherwise. + */ +bool NhgOrch::validateNextHop(const NextHopKey& nh_key) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Validating next hop %s", nh_key.to_string().c_str()); + + /* + * Iterate through all groups and validate the next hop in those who + * contain it. + */ + for (auto& it : m_syncdNextHopGroups) + { + auto& nhg = it.second.nhg; + + SWSS_LOG_INFO("Check if next hop in group %s", + it.first.c_str()); + + if (nhg->hasNextHop(nh_key)) + { + SWSS_LOG_INFO("Group has next hop"); + + /* + * If sync fails, exit right away, as we expect it to be due to a + * raeson for which any other future validations will fail too. + */ + if (!nhg->validateNextHop(nh_key)) + { + SWSS_LOG_ERROR("Failed to validate next hop %s in group %s", + nh_key.to_string().c_str(), + it.first.c_str()); + return false; + } + } + } + + return true; +} + +/* + * Purpose: Invalidate a next hop for any groups containing it. + * + * Description: Iterate through the next hop groups and desync the next hop + * from those that contain it. + * + * Params: IN nh_key - The next hop to invalidate. + * + * Returns: true, if the next hop was successfully invalidatedd from all + * containing groups; + * false, otherwise. + */ +bool NhgOrch::invalidateNextHop(const NextHopKey& nh_key) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Invalidating next hop %s", nh_key.to_string().c_str()); + + /* + * Iterate through all groups and invalidate the next hop from those who + * contain it. + */ + for (auto& it : m_syncdNextHopGroups) + { + auto& nhg = it.second.nhg; + + SWSS_LOG_INFO("Check if next hop is in group %s", + it.first.c_str()); + + if (nhg->hasNextHop(nh_key)) + { + SWSS_LOG_INFO("Group has next hop"); + + /* If the desync fails, exit right away. */ + if (!nhg->invalidateNextHop(nh_key)) + { + SWSS_LOG_WARN("Failed to invalidate next hop %s from group %s", + nh_key.to_string().c_str(), + it.first.c_str()); + return false; + } + } + } + + return true; +} + +/* + * Purpose: Increase the ref count for a next hop group. + * + * Description: Increment the ref count for a next hop group by 1. + * + * Params: IN index - The index of the next hop group. + * + * Returns: Nothing. + */ +void NhgOrch::incNhgRefCount(const std::string& index) +{ + SWSS_LOG_ENTER(); + + NhgEntry& nhg_entry = m_syncdNextHopGroups.at(index); + + SWSS_LOG_INFO("Increment group %s ref count from %u to %u", + index.c_str(), + nhg_entry.ref_count, + nhg_entry.ref_count + 1); + + ++nhg_entry.ref_count; +} + +/* + * Purpose: Decrease the ref count for a next hop group. + * + * Description: Decrement the ref count for a next hop group by 1. + * + * Params: IN index - The index of the next hop group. + * + * Returns: Nothing. + */ +void NhgOrch::decNhgRefCount(const std::string& index) +{ + SWSS_LOG_ENTER(); + + NhgEntry& nhg_entry = m_syncdNextHopGroups.at(index); + + /* Sanity check so we don't overflow. */ + assert(nhg_entry.ref_count > 0); + + SWSS_LOG_INFO("Decrement group %s ref count from %u to %u", + index.c_str(), + nhg_entry.ref_count, + nhg_entry.ref_count - 1); + + --nhg_entry.ref_count; +} + +/* + * Purpose: Get the next hop ID of the member. + * + * Description: Get the SAI ID of the next hop from NeighOrch. + * + * Params: None. + * + * Returns: The SAI ID of the next hop, or SAI_NULL_OBJECT_ID if the next + * hop is not valid. + */ +sai_object_id_t NextHopGroupMember::getNhId() const +{ + SWSS_LOG_ENTER(); + + sai_object_id_t nh_id = SAI_NULL_OBJECT_ID; + + if (gNeighOrch->hasNextHop(m_nh_key)) + { + SWSS_LOG_INFO("NeighOrch has next hop %s", + m_nh_key.to_string().c_str()); + nh_id = gNeighOrch->getNextHopId(m_nh_key); + } + /* + * If the next hop is labeled and the IP next hop exists, create the + * labeled one over NeighOrch as it doesn't know about these next hops. + * We don't do this in the constructor as the IP next hop may be added + * after the object is created and would never create the labeled next hop + * afterwards. + */ + else if (isLabeled() && gNeighOrch->hasNextHop(m_nh_key.ipKey())) + { + SWSS_LOG_INFO("Create labeled next hop %s", + m_nh_key.to_string().c_str()); + gNeighOrch->addNextHop(m_nh_key); + nh_id = gNeighOrch->getNextHopId(m_nh_key); + } + + return nh_id; +} + +/* + * Purpose: Move assignment operator. + * + * Description: Perform member-wise swap. + * + * Params: IN nhgm - The next hop group member to swap. + * + * Returns: Reference to this object. + */ +NextHopGroupMember& NextHopGroupMember::operator=(NextHopGroupMember&& nhgm) +{ + SWSS_LOG_ENTER(); + + std::swap(m_nh_key, nhgm.m_nh_key); + std::swap(m_gm_id, nhgm.m_gm_id); + + return *this; +} + +/* + * Purpose: Update the weight of a member. + * + * Description: Set the new member's weight and if the member is synced, update + * the SAI attribute as well. + * + * Params: IN weight - The weight of the next hop group member. + * + * Returns: true, if the operation was successful; + * false, otherwise. + */ +bool NextHopGroupMember::updateWeight(uint32_t weight) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Update group member %s weight from %u to %u", + m_nh_key.to_string().c_str(), + m_nh_key.weight, + weight); + + bool success = true; + + m_nh_key.weight = weight; + + if (isSynced()) + { + SWSS_LOG_INFO("Updating SAI weight attribute"); + + sai_attribute_t nhgm_attr; + nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_WEIGHT; + nhgm_attr.value.s32 = m_nh_key.weight; + + sai_status_t status = sai_next_hop_group_api-> + set_next_hop_group_member_attribute(m_gm_id, &nhgm_attr); + success = status == SAI_STATUS_SUCCESS; + } + + SWSS_LOG_INFO("Returning %d", success); + return success; +} + +/* + * Purpose: Sync the group member with the given group member ID. + * + * Description: Set the group member's SAI ID to the the one given and + * increment the appropriate ref counters. + * + * Params: IN gm_id - The group member SAI ID to set. + * + * Returns: Nothing. + */ +void NextHopGroupMember::sync(sai_object_id_t gm_id) +{ + SWSS_LOG_ENTER(); + + /* The SAI ID should be updated from invalid to something valid. */ + assert((m_gm_id == SAI_NULL_OBJECT_ID) && (gm_id != SAI_NULL_OBJECT_ID)); + + m_gm_id = gm_id; + gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER); + gNeighOrch->increaseNextHopRefCount(m_nh_key); +} + +/* + * Purpose: Desync the group member, resetting it's SAI ID. + * + * Description: Reset the group member's SAI ID and decrement the appropriate + * ref counters. + * + * Params: None. + * + * Returns: Nothing. + */ +void NextHopGroupMember::desync() +{ + SWSS_LOG_ENTER(); + + /* + * If the member is already desynced, exit so we don't decrement the ref + * counters more than once. + */ + if (!isSynced()) + { + SWSS_LOG_INFO("Next hop group member %s is already desynced", + m_nh_key.to_string().c_str()); + return; + } + + m_gm_id = SAI_NULL_OBJECT_ID; + gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER); + gNeighOrch->decreaseNextHopRefCount(m_nh_key); +} + +/* + * Purpose: Destructor. + * + * Description: Assert the group member is desynced and remove the labeled + * next hop from NeighOrch if it is unreferenced. + * + * Params: None. + * + * Returns: Nothing. + */ +NextHopGroupMember::~NextHopGroupMember() +{ + SWSS_LOG_ENTER(); + + /* + * The group member should be desynced from it's group before destroying + * it. + */ + assert(!isSynced()); + + /* + * If the labeled next hop is unreferenced, delete it from NeighOrch as + * NhgOrch and RouteOrch are the ones controlling it's lifetime. They both + * watch over these labeled next hops, so it doesn't matter who created + * them as they're both doing the same checks before deleting a labeled + * next hop. + */ + if (isLabeled() && (gNeighOrch->getNextHopRefCount(m_nh_key) == 0)) + { + SWSS_LOG_INFO("Delete labeled next hop %s", + m_nh_key.to_string().c_str()); + gNeighOrch->removeMplsNextHop(m_nh_key); + } +} + +/* + * Purpose: Constructor. + * + * Description: Initialize the group's members based on the next hop group key. + * + * Params: IN key - The next hop group's key. + * + * Returns: Nothing. + */ +NextHopGroup::NextHopGroup(const NextHopGroupKey& key) : + m_key(key), + m_id(SAI_NULL_OBJECT_ID), + m_is_temp(false) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Creating next hop group %s", m_key.to_string().c_str()); + + /* Parse the key and create the members. */ + for (const auto& it : m_key.getNextHops()) + { + SWSS_LOG_INFO("Adding next hop %s to the group", + it.to_string().c_str()); + m_members.emplace(it, NextHopGroupMember(it)); + } +} + +/* + * Purpose: Move constructor. + * + * Description: Initialize the members by doing member-wise move construct. + * + * Params: IN nhg - The rvalue object to initialize from. + * + * Returns: Nothing. + */ +NextHopGroup::NextHopGroup(NextHopGroup&& nhg) : + m_key(std::move(nhg.m_key)), + m_id(std::move(nhg.m_id)), + m_members(std::move(nhg.m_members)), + m_is_temp(nhg.m_is_temp) +{ + SWSS_LOG_ENTER(); + + /* Invalidate the rvalue SAI ID. */ + nhg.m_id = SAI_NULL_OBJECT_ID; +} + +/* + * Purpose: Move assignment operator. + * + * Description: Perform member-wise swap. + * + * Params: IN nhg - The rvalue object to swap with. + * + * Returns: Referene to this object. + */ +NextHopGroup& NextHopGroup::operator=(NextHopGroup&& nhg) +{ + SWSS_LOG_ENTER(); + + std::swap(m_key, nhg.m_key); + std::swap(m_id, nhg.m_id); + std::swap(m_members, nhg.m_members); + m_is_temp = nhg.m_is_temp; + + return *this; +} + +/* + * Purpose: Sync a next hop group. + * + * Description: Fill in the NHG ID. If the group contains only one NH, this ID + * will be the SAI ID of the next hop that NeighOrch owns. If it + * has more than one NH, create a group over the SAI API and then + * add it's members. + * + * Params: None. + * + * Returns: true, if the operation was successful; + * false, otherwise. + */ +bool NextHopGroup::sync() +{ + SWSS_LOG_ENTER(); + + /* If the group is already synced, exit. */ + if (isSynced()) + { + SWSS_LOG_INFO("Group %s is already synced", to_string().c_str()); + return true; + } + + /* + * If the group has only one member, the group ID will be the member's NH + * ID. + */ + if (m_members.size() == 1) + { + const NextHopGroupMember& nhgm = m_members.begin()->second; + + if (nhgm.getNhId() == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_WARN("Next hop %s is not synced", + nhgm.getNhKey().to_string().c_str()); + return false; + } + else + { + m_id = nhgm.getNhId(); + } + } + /* If the key contains more than one NH, create a group. */ + else + { + /* Assert the group is not empty. */ + assert(!m_members.empty()); + + /* Create the group over SAI. */ + sai_attribute_t nhg_attr; + vector nhg_attrs; + + nhg_attr.id = SAI_NEXT_HOP_GROUP_ATTR_TYPE; + nhg_attr.value.s32 = SAI_NEXT_HOP_GROUP_TYPE_ECMP; + nhg_attrs.push_back(nhg_attr); + + sai_status_t status = sai_next_hop_group_api->create_next_hop_group( + &m_id, + gSwitchId, + (uint32_t)nhg_attrs.size(), + nhg_attrs.data()); + + /* If the operation fails, exit. */ + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to create next hop group %s, rv:%d", + m_key.to_string().c_str(), status); + return false; + } + + SWSS_LOG_INFO("Next hop group %s has SAI ID %lu", + m_key.to_string().c_str(), + m_id); + + /* Increment the amount of programmed next hop groups. */ + gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP); + + /* Increment the number of synced NHGs. */ + ++m_count; + + /* + * Try creating the next hop group's members over SAI. + */ + if (!syncMembers(m_key.getNextHops())) + { + SWSS_LOG_WARN("Failed to create next hop members of group %s", + to_string().c_str()); + return false; + } + } + + return true; +} + +/* + * Purpose: Create a temporary next hop group when resources are exhausted. + * + * Description: Choose one member to represent the group and create a group + * with only that next hop as a member. Any object referencing + * the SAI ID of a temporary group should keep querying NhgOrch in + * case the group is updated, as it's SAI ID will change at that + * point. + * + * Params: IN index - The CP index of the next hop group. + * + * Returns: The created temporary next hop group. + */ +NextHopGroup NhgOrch::createTempNhg(const NextHopGroupKey& nhg_key) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Syncing temporary group %s", nhg_key.to_string().c_str()); + + /* Get a list of all valid next hops in the group. */ + std::list valid_nhs; + + for (const auto& nh_key : nhg_key.getNextHops()) + { + /* + * Check if the IP next hop exists. We check for the IP next hop as + * the group might contain labeled NHs which we should create if their + * IP next hop does exist. + */ + if (gNeighOrch->hasNextHop(nh_key.ipKey())) + { + SWSS_LOG_INFO("Next hop %s is a candidate for temporary group %s", + nh_key.to_string().c_str(), + nhg_key.to_string().c_str()); + valid_nhs.push_back(nh_key); + } + } + + /* If there is no valid member, exit. */ + if (valid_nhs.empty()) + { + SWSS_LOG_INFO("There is no valid NH to sync temporary group %s", + nhg_key.to_string().c_str()); + throw std::logic_error("No valid NH in the key"); + } + + /* Randomly select the valid NH to represent the group. */ + auto it = valid_nhs.begin(); + advance(it, rand() % valid_nhs.size()); + + /* Create the temporary group. */ + NextHopGroup nhg(NextHopGroupKey(it->to_string())); + nhg.setTemp(true); + + return nhg; +} + +/* + * Purpose: Desync the next hop group. + * + * Description: Reset the group's SAI ID. If the group has more than one + * members, desync the members and the group. + * + * Params: None. + * + * Returns: true, if the operation was successful; + * false, otherwise + */ +bool NextHopGroup::desync() +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Desyncing group %s", to_string().c_str()); + + /* If the group is already desynced, there is nothing to be done. */ + if (!isSynced()) + { + SWSS_LOG_INFO("Group %s is already desynced", + m_key.to_string().c_str()); + return true; + } + + /* + * If the group has more than one members, desync it's members, then the + * group. + */ + if (m_members.size() > 1) + { + /* Desync group's members. If we failed to desync the members, exit. */ + if (!desyncMembers(m_key.getNextHops())) + { + SWSS_LOG_ERROR("Failed to desync group %s members", + to_string().c_str()); + return false; + } + + /* Desync the group. */ + sai_status_t status = sai_next_hop_group_api-> + remove_next_hop_group(m_id); + + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to remove next hop group %lu, rv: %d", + m_id, status); + return false; + } + + /* If the operation is successful, release the resources. */ + gCrmOrch->decCrmResUsedCounter( + CrmResourceType::CRM_NEXTHOP_GROUP); + --m_count; + } + + /* Reset the group ID. */ + m_id = SAI_NULL_OBJECT_ID; + + return true; +} + +/* + * Purpose: Sync the given next hop group's members over the SAI API. + * + * Description: Iterate over the given members and sync them. If the member + * is already synced, we skip it. If any of the next hops isn't + * already synced by the neighOrch, this will fail. Any next hop + * which has the neighbor interface down will be skipped. + * + * Params: IN nh_keys - The next hop keys of the members to sync. + * + * Returns: true, if the members were added succesfully; + * false, otherwise. + */ +bool NextHopGroup::syncMembers(const std::set& nh_keys) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Adding next hop group %s members", + to_string().c_str()); + + /* This method should not be called for groups with only one NH. */ + assert(m_members.size() > 1); + + ObjectBulker nextHopGroupMemberBulker( + sai_next_hop_group_api, gSwitchId, gMaxBulkSize); + + /* + * Iterate over the given next hops. + * If the group member is already synced, skip it. + * If any next hop is not synced, thus neighOrch doesn't have it, stop + * immediately. + * If a next hop's interface is down, skip it from being synced. + */ + std::map syncingMembers; + + for (const auto& nh_key : nh_keys) + { + SWSS_LOG_INFO("Checking if next hop %s is valid", + nh_key.to_string().c_str()); + + NextHopGroupMember& nhgm = m_members.at(nh_key); + + /* If the member is already synced, continue. */ + if (nhgm.getGmId() != SAI_NULL_OBJECT_ID) + { + SWSS_LOG_INFO("Update member"); + continue; + } + + /* + * If the next hop doesn't exist, stop from syncing the members. + */ + if (nhgm.getNhId() == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_WARN("Failed to get next hop %s in group %s", + nhgm.to_string().c_str(), to_string().c_str()); + return false; + } + + /* If the neighbor's interface is down, skip from being syncd. */ + if (gNeighOrch->isNextHopFlagSet(nh_key, NHFLAGS_IFDOWN)) + { + SWSS_LOG_WARN("Skip next hop %s in group %s, interface is down", + nh_key.to_string().c_str(), to_string().c_str()); + continue; + } + + /* Create the next hop group member's attributes and fill them. */ + vector nhgm_attrs = createNhgmAttrs(nhgm); + + /* Add a bulker entry for this member. */ + nextHopGroupMemberBulker.create_entry(&syncingMembers[nh_key], + (uint32_t)nhgm_attrs.size(), + nhgm_attrs.data()); + } + + /* Flush the bulker to perform the sync. */ + nextHopGroupMemberBulker.flush(); + + /* + * Go through the synced members and increment the Crm ref count for the + * successful ones. + */ + bool success = true; + for (const auto& mbr : syncingMembers) + { + SWSS_LOG_INFO("Checking next hop member %s has a valid SAI ID", + mbr.first.to_string().c_str()); + + /* Check that the returned member ID is valid. */ + if (mbr.second == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_ERROR("Failed to create next hop group %lu's member %s", + m_id, mbr.first.to_string().c_str()); + success = false; + } + else + { + m_members.at(mbr.first).sync(mbr.second); + } + } + + SWSS_LOG_INFO("Returning %d", success); + + return success; +} +/* + * Purpose: Desync the given group's members over the SAI API. + * + * Description: Go through the given members and desync them. + * + * Params: IN nh_keys - The next hop keys of the members to desync. + * + * Returns: true, if the operation was successful; + * false, otherwise + */ +bool NextHopGroup::desyncMembers(const std::set& nh_keys) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Removing members of group %s", + to_string().c_str()); + + /* This method should not be called for groups with only one NH. */ + assert(m_members.size() > 1); + + ObjectBulker nextHopGroupMemberBulker( + sai_next_hop_group_api, gSwitchId, gMaxBulkSize); + + /* + * Iterate through the given group members add them to be desynced. + * + * Keep track of the SAI delete statuses in case one of them returns an + * error. We assume that deletion should always succeed. If for some + * reason it doesn't, there's nothing we can do, but we'll log an error + * later. + */ + std::map statuses; + + for (const auto& nh_key : nh_keys) + { + SWSS_LOG_INFO("Desyncing member %s", nh_key.to_string().c_str()); + + const NextHopGroupMember& nhgm = m_members.at(nh_key); + + if (nhgm.isSynced()) + { + SWSS_LOG_INFO("Removing next hop group member %s", + nh_key.to_string().c_str()); + nextHopGroupMemberBulker.remove_entry(&statuses[nh_key], + nhgm.getGmId()); + } + } + + /* Flush the bulker to apply the changes. */ + nextHopGroupMemberBulker.flush(); + + /* + * Iterate over the statuses map and check if the removal was successful. + * If it was, decrement the Crm counter and reset the member's ID. If it + * wasn't, log an error message. + */ + bool success = true; + + for (const auto& status : statuses) + { + SWSS_LOG_INFO("Check member's %s status", + status.first.to_string().c_str()); + + if (status.second == SAI_STATUS_SUCCESS) + { + SWSS_LOG_INFO("Member was successfully desynced"); + m_members.at(status.first).desync(); + } + else + { + SWSS_LOG_ERROR("Could not remove next hop group member %s, rv: %d", + status.first.to_string().c_str(), status.second); + success = false; + } + } + + SWSS_LOG_INFO("Returning %d", success); + + return success; +} + +/* + * Purpose: Update the next hop group based on a new next hop group key. + * + * Description: Update the group's members by removing the members that aren't + * in the new next hop group and adding the new members. We first + * remove the missing members to avoid cases where we reached the + * ASIC group members limit. This will not update the group's SAI + * ID in any way, unless we are promoting a temporary group. + * + * Params: IN nhg_key - The new next hop group key to update to. + * + * Returns: true, if the operation was successful; + * false, otherwise. + */ +bool NextHopGroup::update(const NextHopGroupKey& nhg_key) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Update group %s with %s", + to_string().c_str(), nhg_key.to_string().c_str()); + + /* + * There are three cases where the SAI ID of the NHG will change: + * - changing a single next hop group to another single next hop group + * - changing a single next hop group to a multiple next hop group + * - changing a multiple next hop group to a single next hop group + * + * For these kind of updates, we can simply swap the existing group with + * the updated group, as we have no way of preserving the existing SAI ID. + * + * Also, we can perform the same operation if the group is not synced at + * all. + */ + if ((nhg_key.getSize() == 1) || (m_members.size() == 1) || !isSynced()) + { + SWSS_LOG_INFO("Updating group without preserving it's SAI ID"); + + bool was_synced = isSynced(); + *this = NextHopGroup(nhg_key); + + /* Sync the group only if it was synced before. */ + return (was_synced ? sync() : true); + } + /* + * If we are updating a multiple next hop group to another multiple next + * hop group, we can preserve it's SAI ID by only updating it's members. + * This way, any objects referencing the SAI ID of this object will + * continue to work. + */ + else + { + /* Update the key. */ + m_key = nhg_key; + + std::set new_nh_keys = nhg_key.getNextHops(); + std::set removed_nh_keys; + + /* Mark the members that need to be removed. */ + for (auto& mbr_it : m_members) + { + const NextHopKey& nh_key = mbr_it.first; + + /* Look for the existing member inside the new ones. */ + const auto& new_nh_key_it = new_nh_keys.find(nh_key); + + /* If the member is not found, then it needs to be removed. */ + if (new_nh_key_it == new_nh_keys.end()) + { + SWSS_LOG_INFO("Add member %s to be desynced", + nh_key.to_string().c_str()); + removed_nh_keys.insert(nh_key); + } + /* If the member is updated, update it's weight. */ + else + { + if (!mbr_it.second.updateWeight(new_nh_key_it->weight)) + { + SWSS_LOG_WARN("Failed to update member %s weight", + nh_key.to_string().c_str()); + return false; + } + + /* + * Erase the member from the new members list as it already + * exists. + */ + new_nh_keys.erase(new_nh_key_it); + } + } + + /* Desync the removed members. */ + if (!desyncMembers(removed_nh_keys)) + { + SWSS_LOG_WARN("Failed to desync members from group %s", + to_string().c_str()); + return false; + } + + /* Remove the desynced members. */ + for (const auto& nh_key : removed_nh_keys) + { + m_members.erase(nh_key); + } + + /* Add any new members to the group. */ + for (const auto& it : new_nh_keys) + { + m_members.emplace(it, NextHopGroupMember(it)); + } + + /* + * Sync all the members of the group. We sync all of them because + * there may be previous members that were not successfully synced + * before the update, so we must make sure we sync those as well. + */ + if (!syncMembers(m_key.getNextHops())) + { + SWSS_LOG_WARN("Failed to sync new members for group %s", + to_string().c_str()); + return false; + } + + return true; + } +} + +/* + * Purpose: Create the attributes vector for a next hop group member. + * + * Description: Create the group ID and next hop ID attributes. + * + * Params: IN nhgm - The next hop group member. + * + * Returns: The attributes vector for the given next hop. + */ +vector NextHopGroup::createNhgmAttrs( + const NextHopGroupMember& nhgm) const +{ + SWSS_LOG_ENTER(); + + vector nhgm_attrs; + sai_attribute_t nhgm_attr; + + /* Fill in the group ID. */ + nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID; + nhgm_attr.value.oid = m_id; + nhgm_attrs.push_back(nhgm_attr); + + /* Fill in the next hop ID. */ + nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID; + nhgm_attr.value.oid = nhgm.getNhId(); + nhgm_attrs.push_back(nhgm_attr); + + /* Fill in the wright. */ + nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_WEIGHT; + nhgm_attr.value.s32 = nhgm.getWeight(); + nhgm_attrs.push_back(nhgm_attr); + + return nhgm_attrs; +} + +/* + * Purpose: Validate a next hop in the group. + * + * Description: Sync the validated next hop group member. + * + * Params: IN nh_key - The next hop to validate. + * + * Returns: true, if the operation was successful; + * false, otherwise. + */ +bool NextHopGroup::validateNextHop(const NextHopKey& nh_key) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Validate NH %s in group %s", + nh_key.to_string().c_str(), + to_string().c_str()); + + /* + * If the group has only one member, there is nothing to be done. The + * member is only a reference to the next hop owned by NeighOrch, so it is + * not for us to take any decisions regarding those. + */ + if (m_members.size() == 1) + { + return true; + } + + return syncMembers({nh_key}); +} + +/* + * Purpose: Invalidate a next hop in the group. + * + * Description: Sync the invalidated next hop group member. + * + * Params: IN nh_key - The next hop to invalidate. + * + * Returns: true, if the operation was successful; + * false, otherwise. + */ +bool NextHopGroup::invalidateNextHop(const NextHopKey& nh_key) +{ + SWSS_LOG_ENTER(); + SWSS_LOG_INFO("Invalidate NH %s in group %s", + nh_key.to_string().c_str(), + to_string().c_str()); + + /* + * If the group has only one member, there is nothing to be done. The + * member is only a reference to the next hop owned by NeighOrch, so it is + * not for us to take any decisions regarding those. + */ + if (m_members.size() == 1) + { + return true; + } + + return desyncMembers({nh_key}); +} diff --git a/orchagent/nhgorch.h b/orchagent/nhgorch.h new file mode 100644 index 00000000000..e81f83993bf --- /dev/null +++ b/orchagent/nhgorch.h @@ -0,0 +1,242 @@ +#pragma once + +#include "orch.h" +#include "nexthopgroupkey.h" + +class NextHopGroupMember +{ +public: + /* Constructors / Assignment operators. */ + NextHopGroupMember(const NextHopKey& nh_key) : + m_nh_key(nh_key), + m_gm_id(SAI_NULL_OBJECT_ID) {} + + NextHopGroupMember(NextHopGroupMember&& nhgm) : + m_nh_key(std::move(nhgm.m_nh_key)), + m_gm_id(nhgm.m_gm_id) + { nhgm.m_gm_id = SAI_NULL_OBJECT_ID; } + + NextHopGroupMember& operator=(NextHopGroupMember&& nhgm); + + /* + * Prevent object copying so we don't end up having multiple objects + * referencing the same SAI objects. + */ + NextHopGroupMember(const NextHopGroupMember&) = delete; + void operator=(const NextHopGroupMember&) = delete; + + /* Destructor. */ + virtual ~NextHopGroupMember(); + + /* Update member's weight and update the SAI attribute as well. */ + bool updateWeight(uint32_t weight); + + /* Sync / Desync. */ + void sync(sai_object_id_t gm_id); + void desync(); + + /* Getters / Setters. */ + inline const NextHopKey& getNhKey() const { return m_nh_key; } + inline uint32_t getWeight() const { return m_nh_key.weight; } + sai_object_id_t getNhId() const; + inline sai_object_id_t getGmId() const { return m_gm_id; } + inline bool isSynced() const { return m_gm_id != SAI_NULL_OBJECT_ID; } + + /* Check if the next hop is labeled. */ + inline bool isLabeled() const { return !m_nh_key.label_stack.empty(); } + + /* Convert member's details to string. */ + std::string to_string() const + { + return m_nh_key.to_string() + + ", SAI ID: " + std::to_string(m_gm_id); + } + +private: + /* The key of the next hop of this member. */ + NextHopKey m_nh_key; + + /* The group member SAI ID for this member. */ + sai_object_id_t m_gm_id; +}; + +/* Map indexed by NextHopKey, containing the SAI ID of the group member. */ +typedef std::map NhgMembers; + +/* + * NextHopGroup class representing a next hop group object. + */ +class NextHopGroup +{ +public: + /* Constructors. */ + explicit NextHopGroup(const NextHopGroupKey& key); + NextHopGroup(NextHopGroup&& nhg); + NextHopGroup& operator=(NextHopGroup&& nhg); + + /* Destructor. */ + virtual ~NextHopGroup() { desync(); } + + /* Sync the group, creating the group's and members SAI IDs. */ + bool sync(); + + /* Desync the group, reseting the group's and members SAI IDs. */ + bool desync(); + + /* + * Update the group based on a new next hop group key. This will also + * perform any sync / desync necessary. + */ + bool update(const NextHopGroupKey& nhg_key); + + /* Check if the group contains the given next hop. */ + inline bool hasNextHop(const NextHopKey& nh_key) const { + return m_members.find(nh_key) != m_members.end(); } + + /* Validate a next hop in the group, syncing it. */ + bool validateNextHop(const NextHopKey& nh_key); + + /* Invalidate a next hop in the group, desyncing it. */ + bool invalidateNextHop(const NextHopKey& nh_key); + + /* Increment the number of existing groups. */ + static inline void incCount() { ++m_count; } + + /* Decrement the number of existing groups. */ + static inline void decCount() { assert(m_count > 0); --m_count; } + + /* Getters / Setters. */ + inline const NextHopGroupKey& getKey() const { return m_key; } + inline sai_object_id_t getId() const { return m_id; } + static inline unsigned int getCount() { return m_count; } + inline bool isTemp() const { return m_is_temp; } + inline void setTemp(bool is_temp) { m_is_temp = is_temp; } + inline bool isSynced() const { return m_id != SAI_NULL_OBJECT_ID; } + inline size_t getSize() const { return m_members.size(); } + + /* Convert NHG's details to a string. */ + std::string to_string() const + { + return m_key.to_string() + ", SAI ID: " + std::to_string(m_id); + } + +private: + + /* The next hop group key of this group. */ + NextHopGroupKey m_key; + + /* The SAI ID of the group. */ + sai_object_id_t m_id; + + /* Members of this next hop group. */ + NhgMembers m_members; + + /* Whether the group is temporary or not. */ + bool m_is_temp; + + /* + * Number of existing groups. Incremented when an object is created and + * decremented when an object is destroyed. This will also account for the + * groups created by RouteOrch. + */ + static unsigned int m_count; + + /* Add group's members over the SAI API for the given keys. */ + bool syncMembers(const std::set& nh_keys); + + /* Remove group's members the SAI API from the given keys. */ + bool desyncMembers(const std::set& nh_keys); + + /* Create the attributes vector for a next hop group member. */ + vector createNhgmAttrs( + const NextHopGroupMember& nhgm) const; +}; + +/* + * Structure describing a next hop group which NhgOrch owns. Beside having a + * unique pointer to that next hop group, we also want to keep a ref count so + * NhgOrch knows how many other objects reference the next hop group in order + * not to delete them while still being referenced. + */ +struct NhgEntry +{ + /* Pointer to the next hop group. NhgOrch is the sole owner of it. */ + std::unique_ptr nhg; + + /* Number of external objects referencing this next hop group. */ + unsigned int ref_count; + + NhgEntry() = default; + explicit NhgEntry(std::unique_ptr&& _nhg, + unsigned int _ref_count = 0) : + nhg(std::move(_nhg)), ref_count(_ref_count) {} +}; + +/* + * Map indexed by next hop group's CP ID, containing the next hop group for + * that ID and the number of objects referencing it. + */ +typedef std::unordered_map NhgTable; + +/* + * Next Hop Group Orchestrator class that handles NEXT_HOP_GROUP_TABLE + * updates. + */ +class NhgOrch : public Orch +{ +public: + /* + * Constructor. + */ + NhgOrch(DBConnector *db, string tableName); + + /* Check if the next hop group given by it's index exists. */ + inline bool hasNhg(const std::string& index) const + { return m_syncdNextHopGroups.find(index) != + m_syncdNextHopGroups.end(); } + + /* + * Get the next hop group given by it's index. If the index does not exist + * in map, a std::out_of_range exception will be thrown. + */ + inline const NextHopGroup& getNhg(const std::string& index) const + { return *m_syncdNextHopGroups.at(index).nhg; } + + /* Add a temporary next hop group when resources are exhausted. */ + NextHopGroup createTempNhg(const NextHopGroupKey& nhg_key); + + /* Getters / Setters. */ + inline unsigned int getMaxNhgCount() const { return m_maxNhgCount; } + static inline unsigned int getNhgCount() + { return NextHopGroup::getCount(); } + + /* Validate / Invalidate a next hop. */ + bool validateNextHop(const NextHopKey& nh_key); + bool invalidateNextHop(const NextHopKey& nh_key); + + /* Increase / Decrease the number of next hop groups. */ + inline void incNhgCount() + { + assert(NextHopGroup::getCount() < m_maxNhgCount); + NextHopGroup::incCount(); + } + inline void decNhgCount() { NextHopGroup::decCount(); } + + /* Increase / Decrease ref count for a NHG given by it's index. */ + void incNhgRefCount(const std::string& index); + void decNhgRefCount(const std::string& index); + +private: + + /* + * Switch's maximum number of next hop groups capacity. + */ + unsigned int m_maxNhgCount; + + /* + * The next hop group table. + */ + NhgTable m_syncdNextHopGroups; + + void doTask(Consumer& consumer); +}; diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index a546ffdac14..2b454fc9753 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -31,6 +31,7 @@ FdbOrch *gFdbOrch; IntfsOrch *gIntfsOrch; NeighOrch *gNeighOrch; RouteOrch *gRouteOrch; +NhgOrch *gNhgOrch; FgNhgOrch *gFgNhgOrch; AclOrch *gAclOrch; MirrorOrch *gMirrorOrch; @@ -162,7 +163,8 @@ bool OrchDaemon::init() { APP_ROUTE_TABLE_NAME, routeorch_pri }, { APP_LABEL_ROUTE_TABLE_NAME, routeorch_pri } }; - gRouteOrch = new RouteOrch(m_applDb, route_tables, gSwitchOrch, gNeighOrch, gIntfsOrch, vrf_orch, gFgNhgOrch); + gRouteOrch = new RouteOrch(m_applDb, route_tables, gNeighOrch, gIntfsOrch, vrf_orch, gFgNhgOrch); + gNhgOrch = new NhgOrch(m_applDb, APP_NEXT_HOP_GROUP_TABLE_NAME); CoppOrch *copp_orch = new CoppOrch(m_applDb, APP_COPP_TABLE_NAME); TunnelDecapOrch *tunnel_decap_orch = new TunnelDecapOrch(m_applDb, APP_TUNNEL_DECAP_TABLE_NAME); @@ -287,7 +289,7 @@ bool OrchDaemon::init() }; gMacsecOrch = new MACsecOrch(m_applDb, m_stateDb, macsec_app_tables, gPortsOrch); - + /* * The order of the orch list is important for state restore of warm start and * the queued processing in m_toSync map after gPortsOrch->allPortsReady() is set. @@ -296,7 +298,7 @@ bool OrchDaemon::init() * when iterating ConsumerMap. This is ensured implicitly by the order of keys in ordered map. * For cases when Orch has to process tables in specific order, like PortsOrch during warm start, it has to override Orch::doTask() */ - m_orchList = { gSwitchOrch, gCrmOrch, gPortsOrch, gBufferOrch, gIntfsOrch, gNeighOrch, gRouteOrch, copp_orch, tunnel_decap_orch, qos_orch, wm_orch, policer_orch, sflow_orch, debug_counter_orch, gMacsecOrch}; + m_orchList = { gSwitchOrch, gCrmOrch, gPortsOrch, gBufferOrch, gIntfsOrch, gNeighOrch, gNhgOrch, gRouteOrch, copp_orch, tunnel_decap_orch, qos_orch, wm_orch, policer_orch, sflow_orch, debug_counter_orch, gMacsecOrch}; bool initialize_dtel = false; if (platform == BFN_PLATFORM_SUBSTRING || platform == VS_PLATFORM_SUBSTRING) diff --git a/orchagent/orchdaemon.h b/orchagent/orchdaemon.h index bdcc053ed3b..fbe14626644 100644 --- a/orchagent/orchdaemon.h +++ b/orchagent/orchdaemon.h @@ -11,6 +11,7 @@ #include "intfsorch.h" #include "neighorch.h" #include "routeorch.h" +#include "nhgorch.h" #include "copporch.h" #include "tunneldecaporch.h" #include "qosorch.h" diff --git a/orchagent/routeorch.cpp b/orchagent/routeorch.cpp index 04907b3de91..cac58c65092 100644 --- a/orchagent/routeorch.cpp +++ b/orchagent/routeorch.cpp @@ -2,6 +2,7 @@ #include #include #include "routeorch.h" +#include "nhgorch.h" #include "logger.h" #include "swssnet.h" #include "crmorch.h" @@ -18,6 +19,7 @@ extern sai_switch_api_t* sai_switch_api; extern PortsOrch *gPortsOrch; extern CrmOrch *gCrmOrch; extern Directory gDirectory; +extern NhgOrch *gNhgOrch; extern size_t gMaxBulkSize; @@ -25,56 +27,19 @@ extern size_t gMaxBulkSize; #define DEFAULT_NUMBER_OF_ECMP_GROUPS 128 #define DEFAULT_MAX_ECMP_GROUP_SIZE 32 -RouteOrch::RouteOrch(DBConnector *db, vector &tableNames, SwitchOrch *switchOrch, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch, FgNhgOrch *fgNhgOrch) : +RouteOrch::RouteOrch(DBConnector *db, vector &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch, FgNhgOrch *fgNhgOrch) : gRouteBulker(sai_route_api, gMaxBulkSize), gLabelRouteBulker(sai_mpls_api, gMaxBulkSize), gNextHopGroupMemberBulker(sai_next_hop_group_api, gSwitchId, gMaxBulkSize), Orch(db, tableNames), - m_switchOrch(switchOrch), m_neighOrch(neighOrch), m_intfsOrch(intfsOrch), m_vrfOrch(vrfOrch), m_fgNhgOrch(fgNhgOrch), - m_nextHopGroupCount(0), m_resync(false) { SWSS_LOG_ENTER(); - sai_attribute_t attr; - attr.id = SAI_SWITCH_ATTR_NUMBER_OF_ECMP_GROUPS; - - sai_status_t status = sai_switch_api->get_switch_attribute(gSwitchId, 1, &attr); - if (status != SAI_STATUS_SUCCESS) - { - SWSS_LOG_WARN("Failed to get switch attribute number of ECMP groups. \ - Use default value. rv:%d", status); - m_maxNextHopGroupCount = DEFAULT_NUMBER_OF_ECMP_GROUPS; - } - else - { - m_maxNextHopGroupCount = attr.value.s32; - - /* - * ASIC specific workaround to re-calculate maximum ECMP groups - * according to different ECMP mode used. - * - * On Mellanox platform, the maximum ECMP groups returned is the value - * under the condition that the ECMP group size is 1. Dividing this - * number by DEFAULT_MAX_ECMP_GROUP_SIZE gets the maximum number of - * ECMP groups when the maximum ECMP group size is 32. - */ - char *platform = getenv("platform"); - if (platform && strstr(platform, MLNX_PLATFORM_SUBSTRING)) - { - m_maxNextHopGroupCount /= DEFAULT_MAX_ECMP_GROUP_SIZE; - } - } - vector fvTuple; - fvTuple.emplace_back("MAX_NEXTHOP_GROUP_COUNT", to_string(m_maxNextHopGroupCount)); - m_switchOrch->set_switch_capability(fvTuple); - - SWSS_LOG_NOTICE("Maximum number of ECMP groups supported is %d", m_maxNextHopGroupCount); - IpPrefix default_ip_prefix("0.0.0.0/0"); sai_route_entry_t unicast_route_entry; @@ -83,10 +48,12 @@ RouteOrch::RouteOrch(DBConnector *db, vector &tableNames, copy(unicast_route_entry.destination, default_ip_prefix); subnet(unicast_route_entry.destination, unicast_route_entry.destination); + sai_attribute_t attr; attr.id = SAI_ROUTE_ENTRY_ATTR_PACKET_ACTION; attr.value.s32 = SAI_PACKET_ACTION_DROP; - status = sai_route_api->create_route_entry(&unicast_route_entry, 1, &attr); + sai_status_t status = sai_route_api->create_route_entry( + &unicast_route_entry, 1, &attr); if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to create IPv4 default route with packet action drop"); @@ -96,7 +63,7 @@ RouteOrch::RouteOrch(DBConnector *db, vector &tableNames, gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_IPV4_ROUTE); /* Add default IPv4 route into the m_syncdRoutes */ - m_syncdRoutes[gVirtualRouterId][default_ip_prefix] = NextHopGroupKey(); + m_syncdRoutes[gVirtualRouterId][default_ip_prefix] = RouteNhg(); SWSS_LOG_NOTICE("Create IPv4 default route with packet action drop"); @@ -115,7 +82,7 @@ RouteOrch::RouteOrch(DBConnector *db, vector &tableNames, gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_IPV6_ROUTE); /* Add default IPv6 route into the m_syncdRoutes */ - m_syncdRoutes[gVirtualRouterId][v6_default_ip_prefix] = NextHopGroupKey(); + m_syncdRoutes[gVirtualRouterId][v6_default_ip_prefix] = RouteNhg(); SWSS_LOG_NOTICE("Create IPv6 default route with packet action drop"); @@ -277,7 +244,7 @@ void RouteOrch::attach(Observer *observer, const IpAddress& dstAddr, sai_object_ SWSS_LOG_NOTICE("Attached next hop observer of route %s for destination IP %s", observerEntry->second.routeTable.rbegin()->first.to_string().c_str(), dstAddr.to_string().c_str()); - NextHopUpdate update = { vrf_id, dstAddr, route->first, route->second }; + NextHopUpdate update = { vrf_id, dstAddr, route->first, route->second.nhg_key }; observer->update(SUBJECT_TYPE_NEXTHOP_CHANGE, static_cast(&update)); } } @@ -549,12 +516,15 @@ void RouteOrch::doTask(Consumer& consumer) if (op == SET_COMMAND) { + SWSS_LOG_INFO("Set operation"); + string ips; string aliases; string mpls_nhs; string vni_labels; string remote_macs; string weights; + string nhg_index; bool& excp_intfs_flag = ctx.excp_intfs_flag; bool overlay_nh = false; bool blackhole = false; @@ -583,108 +553,165 @@ void RouteOrch::doTask(Consumer& consumer) if (fvField(i) == "weight") weights = fvValue(i); + + if (fvField(i) == "nexthop_group") + nhg_index = fvValue(i); } - vector ipv = tokenize(ips, ','); - vector alsv = tokenize(aliases, ','); - vector mpls_nhv = tokenize(mpls_nhs, ','); - vector vni_labelv = tokenize(vni_labels, ','); - vector rmacv = tokenize(remote_macs, ','); + SWSS_LOG_INFO("Route %s has nexthop_group: %s, ips: %s, " + "MPLS nhs: %s, aliases: %s", + ip_prefix.to_string().c_str(), + nhg_index.c_str(), + ips.c_str(), + mpls_nhs.c_str(), + aliases.c_str()); /* - * For backward compatibility, adjust ip string from old format to - * new format. Meanwhile it can deal with some abnormal cases. + * A route should not fill both nexthop_group and ips / + * aliases. */ - - /* Resize the ip vector to match ifname vector - * as tokenize(",", ',') will miss the last empty segment. */ - if (alsv.size() == 0 && !blackhole) + if (!nhg_index.empty() && (!ips.empty() || !aliases.empty())) { - SWSS_LOG_WARN("Skip the route %s, for it has an empty ifname field.", key.c_str()); + SWSS_LOG_ERROR("Route %s has both nexthop_group and ips/aliases", + key.c_str()); it = consumer.m_toSync.erase(it); continue; } - else if (alsv.size() != ipv.size()) - { - SWSS_LOG_NOTICE("Route %s: resize ipv to match alsv, %zd -> %zd.", key.c_str(), ipv.size(), alsv.size()); - ipv.resize(alsv.size()); - } - /* Set the empty ip(s) to zero - * as IpAddress("") will construct a incorrect ip. */ - for (auto &ip : ipv) + ctx.nhg_index = nhg_index; + + /* + * If the nexthop_group is empty, create the next hop group key + * based on the IPs and aliases. Otherwise, get the key from + * the NhgOrch. + */ + vector ipv; + vector alsv; + vector mpls_nhv; + vector vni_labelv; + vector rmacv; + + /* Check if the next hop group is owned by the NhgOrch. */ + if (nhg_index.empty()) { - if (ip.empty()) + ipv = tokenize(ips, ','); + alsv = tokenize(aliases, ','); + mpls_nhv = tokenize(mpls_nhs, ','); + vni_labelv = tokenize(vni_labels, ','); + rmacv = tokenize(remote_macs, ','); + + /* + * For backward compatibility, adjust ip string from old format to + * new format. Meanwhile it can deal with some abnormal cases. + */ + + /* Resize the ip vector to match ifname vector + * as tokenize(",", ',') will miss the last empty segment. */ + if (alsv.size() == 0 && !blackhole) { - SWSS_LOG_NOTICE("Route %s: set the empty nexthop ip to zero.", key.c_str()); - ip = ip_prefix.isV4() ? "0.0.0.0" : "::"; + SWSS_LOG_WARN("Skip the route %s, for it has an empty ifname field.", key.c_str()); + it = consumer.m_toSync.erase(it); + continue; + } + else if (alsv.size() != ipv.size()) + { + SWSS_LOG_NOTICE("Route %s: resize ipv to match alsv, %zd -> %zd.", key.c_str(), ipv.size(), alsv.size()); + ipv.resize(alsv.size()); } - } - for (auto alias : alsv) - { - /* skip route to management, docker, loopback - * TODO: for route to loopback interface, the proper - * way is to create loopback interface and then create - * route pointing to it, so that we can traps packets to - * CPU */ - if (alias == "eth0" || alias == "docker0" || - alias == "lo" || !alias.compare(0, strlen(LOOPBACK_PREFIX), LOOPBACK_PREFIX)) + /* Set the empty ip(s) to zero + * as IpAddress("") will construct a incorrect ip. */ + for (auto &ip : ipv) { - excp_intfs_flag = true; - break; + if (ip.empty()) + { + SWSS_LOG_NOTICE("Route %s: set the empty nexthop ip to zero.", key.c_str()); + ip = ip_prefix.isV4() ? "0.0.0.0" : "::"; + } } - } - // TODO: cannot trust m_portsOrch->getPortIdByAlias because sometimes alias is empty - if (excp_intfs_flag) - { - /* If any existing routes are updated to point to the - * above interfaces, remove them from the ASIC. */ - if (removeRoute(ctx)) - it = consumer.m_toSync.erase(it); - else - it++; - continue; - } + for (auto alias : alsv) + { + /* skip route to management, docker, loopback + * TODO: for route to loopback interface, the proper + * way is to create loopback interface and then create + * route pointing to it, so that we can traps packets to + * CPU */ + if (alias == "eth0" || alias == "docker0" || + alias == "lo" || !alias.compare(0, strlen(LOOPBACK_PREFIX), LOOPBACK_PREFIX)) + { + excp_intfs_flag = true; + break; + } + } - string nhg_str = ""; - NextHopGroupKey& nhg = ctx.nhg; + // TODO: cannot trust m_portsOrch->getPortIdByAlias because sometimes alias is empty + if (excp_intfs_flag) + { + /* If any existing routes are updated to point to the + * above interfaces, remove them from the ASIC. */ + if (removeRoute(ctx)) + it = consumer.m_toSync.erase(it); + else + it++; + continue; + } - if (blackhole) - { - nhg = NextHopGroupKey(); - } - else if (overlay_nh == false) - { - for (uint32_t i = 0; i < ipv.size(); i++) + string nhg_str = ""; + NextHopGroupKey& nhg = ctx.nhg; + + if (blackhole) + { + nhg = NextHopGroupKey(); + } + else if (overlay_nh == false) { - if (i) nhg_str += NHG_DELIMITER; - if (alsv[i] == "tun0" && !(IpAddress(ipv[i]).isZero())) + for (uint32_t i = 0; i < ipv.size(); i++) { - alsv[i] = gIntfsOrch->getRouterIntfsAlias(ipv[i]); + if (i) nhg_str += NHG_DELIMITER; + if (alsv[i] == "tun0" && !(IpAddress(ipv[i]).isZero())) + { + alsv[i] = gIntfsOrch->getRouterIntfsAlias(ipv[i]); + } + if (!mpls_nhv.empty() && mpls_nhv[i] != "na") + { + nhg_str += mpls_nhv[i] + LABELSTACK_DELIMITER; + } + nhg_str += ipv[i] + NH_DELIMITER + alsv[i]; } - if (!mpls_nhv.empty() && mpls_nhv[i] != "na") + + nhg = NextHopGroupKey(nhg_str, weights); + } + else + { + for (uint32_t i = 0; i < ipv.size(); i++) { - nhg_str += mpls_nhv[i] + LABELSTACK_DELIMITER; + if (i) nhg_str += NHG_DELIMITER; + nhg_str += ipv[i] + NH_DELIMITER + "vni" + alsv[i] + NH_DELIMITER + vni_labelv[i] + NH_DELIMITER + rmacv[i]; } - nhg_str += ipv[i] + NH_DELIMITER + alsv[i]; - } - - nhg = NextHopGroupKey(nhg_str, weights); + nhg = NextHopGroupKey(nhg_str, overlay_nh); + } } else { - for (uint32_t i = 0; i < ipv.size(); i++) + try { - if (i) nhg_str += NHG_DELIMITER; - nhg_str += ipv[i] + NH_DELIMITER + "vni" + alsv[i] + NH_DELIMITER + vni_labelv[i] + NH_DELIMITER + rmacv[i]; + const NextHopGroup& nh_group = gNhgOrch->getNhg(nhg_index); + ctx.nhg = nh_group.getKey(); + ctx.is_temp = nh_group.isTemp(); + } + catch (const std::out_of_range& e) + { + SWSS_LOG_ERROR("Next hop group %s does not exist", + nhg_index.c_str()); + ++it; + continue; } - - nhg = NextHopGroupKey(nhg_str, overlay_nh); } + NextHopGroupKey& nhg = ctx.nhg; + if (nhg.getSize() == 1 && nhg.hasIntfNextHop()) { if (alsv[0] == "unknown") @@ -720,9 +747,15 @@ void RouteOrch::doTask(Consumer& consumer) it++; } } + /* + * Check if the route does not exist or needs to be updated or + * if the route is using a temporary next hop group owned by + * NhgOrch. + */ else if (m_syncdRoutes.find(vrf_id) == m_syncdRoutes.end() || m_syncdRoutes.at(vrf_id).find(ip_prefix) == m_syncdRoutes.at(vrf_id).end() || - m_syncdRoutes.at(vrf_id).at(ip_prefix) != nhg) + m_syncdRoutes.at(vrf_id).at(ip_prefix) != RouteNhg(nhg, ctx.nhg_index) || + ctx.is_temp) { if (addRoute(ctx, nhg)) it = consumer.m_toSync.erase(it); @@ -730,18 +763,24 @@ void RouteOrch::doTask(Consumer& consumer) it++; } else + { + SWSS_LOG_INFO("Route %s is duplicate entry", key.c_str()); /* Duplicate entry */ it = consumer.m_toSync.erase(it); + } // If already exhaust the nexthop groups, and there are pending removing routes in bulker, // flush the bulker and possibly collect some released nexthop groups - if (m_nextHopGroupCount >= m_maxNextHopGroupCount && gRouteBulker.removing_entries_count() > 0) + if (gNhgOrch->getNhgCount() >= gNhgOrch->getMaxNhgCount() && + gRouteBulker.removing_entries_count() > 0) { break; } } else if (op == DEL_COMMAND) { + SWSS_LOG_INFO("Delete operation"); + if (removeRoute(ctx)) it = consumer.m_toSync.erase(it); else @@ -809,8 +848,9 @@ void RouteOrch::doTask(Consumer& consumer) it_prev++; } else if (m_syncdRoutes.find(vrf_id) == m_syncdRoutes.end() || - m_syncdRoutes.at(vrf_id).find(ip_prefix) == m_syncdRoutes.at(vrf_id).end() || - m_syncdRoutes.at(vrf_id).at(ip_prefix) != nhg) + m_syncdRoutes.at(vrf_id).find(ip_prefix) == m_syncdRoutes.at(vrf_id).end() || + m_syncdRoutes.at(vrf_id).at(ip_prefix) != RouteNhg(nhg, ctx.nhg_index) || + ctx.is_temp) { if (addRoutePost(ctx, nhg)) it_prev = consumer.m_toSync.erase(it_prev); @@ -871,13 +911,13 @@ void RouteOrch::notifyNextHopChangeObservers(sai_object_id_t vrf_id, const IpPre update_required = true; } - entry.second.routeTable.emplace(prefix, nexthops); + entry.second.routeTable.emplace(prefix, RouteNhg(nexthops, "")); } else { - if (route->second != nexthops) + if (route->second.nhg_key != nexthops) { - route->second = nexthops; + route->second.nhg_key = nexthops; /* If changed route is best match update observers */ if (entry.second.routeTable.rbegin()->first == route->first) { @@ -908,7 +948,7 @@ void RouteOrch::notifyNextHopChangeObservers(sai_object_id_t vrf_id, const IpPre assert(!entry.second.routeTable.empty()); auto route = entry.second.routeTable.rbegin(); - NextHopUpdate update = { vrf_id, entry.first.second, route->first, route->second }; + NextHopUpdate update = { vrf_id, entry.first.second, route->first, route->second.nhg_key }; for (auto observer : entry.second.observers) { @@ -985,7 +1025,7 @@ const NextHopGroupKey RouteOrch::getSyncdRouteNhgKey(sai_object_id_t vrf_id, con auto route_entry = route_table->second.find(ipPrefix); if (route_entry != route_table->second.end()) { - nhg = route_entry->second; + nhg = route_entry->second.nhg_key; } } return nhg; @@ -995,7 +1035,7 @@ bool RouteOrch::createFineGrainedNextHopGroup(sai_object_id_t &next_hop_group_id { SWSS_LOG_ENTER(); - if (m_nextHopGroupCount >= m_maxNextHopGroupCount) + if (gNhgOrch->getNhgCount() >= gNhgOrch->getMaxNhgCount()) { SWSS_LOG_DEBUG("Failed to create new next hop group. \ Reaching maximum number of next hop groups."); @@ -1017,7 +1057,7 @@ bool RouteOrch::createFineGrainedNextHopGroup(sai_object_id_t &next_hop_group_id } gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP); - m_nextHopGroupCount++; + gNhgOrch->incNhgCount(); return true; } @@ -1039,7 +1079,7 @@ bool RouteOrch::removeFineGrainedNextHopGroup(sai_object_id_t &next_hop_group_id } gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP); - m_nextHopGroupCount--; + gNhgOrch->decNhgCount(); return true; } @@ -1050,10 +1090,10 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops) assert(!hasNextHopGroup(nexthops)); - if (m_nextHopGroupCount >= m_maxNextHopGroupCount) + if (gNhgOrch->getNhgCount() >= gNhgOrch->getMaxNhgCount()) { - SWSS_LOG_DEBUG("Failed to create new next hop group. \ - Reaching maximum number of next hop groups."); + SWSS_LOG_WARN("Reached maximum next hop groups of %u", + gNhgOrch->getMaxNhgCount()); return false; } @@ -1080,11 +1120,10 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops) } else { - SWSS_LOG_INFO("Failed to get next hop %s in %s", + SWSS_LOG_WARN("Failed to get next hop %s in %s", it.to_string().c_str(), nexthops.to_string().c_str()); return false; } - // skip next hop group member create for neighbor from down port if (m_neighOrch->isNextHopFlagSet(it, NHFLAGS_IFDOWN)) { @@ -1126,7 +1165,7 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops) } } - m_nextHopGroupCount ++; + gNhgOrch->incNhgCount(); SWSS_LOG_NOTICE("Create next hop group %s", nexthops.to_string().c_str()); gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP); @@ -1280,13 +1319,14 @@ bool RouteOrch::removeNextHopGroup(const NextHopGroupKey &nexthops) } } - m_nextHopGroupCount --; + gNhgOrch->decNhgCount(); gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP); set next_hop_set = nexthops.getNextHops(); for (auto it : next_hop_set) { m_neighOrch->decreaseNextHopRefCount(it); + if (overlay_nh && !m_neighOrch->getNextHopRefCount(it)) { if(!m_neighOrch->removeTunnelNextHop(it)) @@ -1329,12 +1369,12 @@ bool RouteOrch::updateNextHopRoutes(const NextHopKey& nextHop, uint32_t& numRout for (auto rt_entry : rt_table.second) { // Skip routes with ecmp nexthops - if (rt_entry.second.getSize() > 1) + if (rt_entry.second.nhg_key.getSize() > 1) { continue; } - if (rt_entry.second.contains(nextHop)) + if (rt_entry.second.nhg_key.contains(nextHop)) { SWSS_LOG_INFO("Updating route %s during nexthop status change", rt_entry.first.to_string().c_str()); @@ -1374,7 +1414,12 @@ void RouteOrch::addTempRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextH /* Remove next hops that are not in m_syncdNextHops */ for (auto it = next_hop_set.begin(); it != next_hop_set.end();) { - if (!m_neighOrch->hasNextHop(*it)) + /* + * Check if the IP next hop exists in NeighOrch. The next hop may be + * a labeled one, which are created by RouteOrch or NhgOrch if the IP + * next hop exists. + */ + if (!m_neighOrch->hasNextHop(it->ipKey())) { SWSS_LOG_INFO("Failed to get next hop %s for %s", (*it).to_string().c_str(), ipPrefix.to_string().c_str()); @@ -1434,149 +1479,169 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) /* We get 3 return values from setFgNhg: * 1. success/failure: on addition/modification of nexthop group/members * 2. next_hop_id: passed as a param to fn, used for sai route creation - * 3. prevNhgWasFineGrained: passed as a param to fn, used to determine transitions + * 3. prevNhgWasFineGrained: passed as a param to fn, used to determine transitions * between regular and FG ECMP, this is an optimization to prevent multiple lookups */ if (!m_fgNhgOrch->setFgNhg(vrf_id, ipPrefix, nextHops, next_hop_id, prevNhgWasFineGrained)) { return false; } } - else if (nextHops.getSize() == 0) + else if (ctx.nhg_index.empty()) { - /* The route is pointing to a blackhole */ - blackhole = true; - } - else if (nextHops.getSize() == 1) - { - /* The route is pointing to a next hop */ - const NextHopKey& nexthop = *nextHops.getNextHops().begin(); - if (nexthop.isIntfNextHop()) + if (nextHops.getSize() == 0) { - if(gPortsOrch->isInbandPort(nexthop.alias)) - { - //This routes is the static route added for the remote system neighbors - //We do not need this route in the ASIC since the static neighbor creation - //in ASIC adds the same full mask route (host route) in ASIC automatically - //So skip. - return true; - } - - next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); - /* rif is not created yet */ - if (next_hop_id == SAI_NULL_OBJECT_ID) - { - SWSS_LOG_INFO("Failed to get next hop %s for %s", - nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); - return false; - } + /* The route is pointing to a blackhole */ + blackhole = true; } - else + else if (nextHops.getSize() == 1) { - if (m_neighOrch->hasNextHop(nexthop)) + /* The route is pointing to a next hop */ + const NextHopKey& nexthop = *nextHops.getNextHops().begin(); + if (nexthop.isIntfNextHop()) { - next_hop_id = m_neighOrch->getNextHopId(nexthop); - } - /* For non-existent MPLS NH, check if IP neighbor NH exists */ - else if (nexthop.isMplsNextHop() && - m_neighOrch->hasNextHop(NextHopKey(nexthop.ip_address, nexthop.alias))) - { - /* since IP neighbor NH exists, neighbor is resolved, add MPLS NH */ - m_neighOrch->addNextHop(nexthop); - next_hop_id = m_neighOrch->getNextHopId(nexthop); + if(gPortsOrch->isInbandPort(nexthop.alias)) + { + //This routes is the static route added for the remote system neighbors + //We do not need this route in the ASIC since the static neighbor creation + //in ASIC adds the same full mask route (host route) in ASIC automatically + //So skip. + return true; + } + + next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); + /* rif is not created yet */ + if (next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_INFO("Failed to get next hop %s for %s", + nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); + return false; + } } /* IP neighbor is not yet resolved */ else { - if(overlay_nh) + if (m_neighOrch->hasNextHop(nexthop)) + { + next_hop_id = m_neighOrch->getNextHopId(nexthop); + } + /* See if there is an IP neighbor nexthop */ + else if (nexthop.isMplsNextHop() && + m_neighOrch->hasNextHop(NextHopKey(nexthop.ip_address, nexthop.alias))) + { + m_neighOrch->addNextHop(nexthop); + next_hop_id = m_neighOrch->getNextHopId(nexthop); + } + else { - SWSS_LOG_INFO("create remote vtep %s", nexthop.to_string(overlay_nh).c_str()); - status = createRemoteVtep(vrf_id, nexthop); - if (status == false) + if(overlay_nh) { - SWSS_LOG_ERROR("Failed to create remote vtep %s", nexthop.to_string(overlay_nh).c_str()); - return false; + SWSS_LOG_INFO("create remote vtep %s", nexthop.to_string(overlay_nh).c_str()); + status = createRemoteVtep(vrf_id, nexthop); + if (status == false) + { + SWSS_LOG_ERROR("Failed to create remote vtep %s", nexthop.to_string(overlay_nh).c_str()); + return false; + } + next_hop_id = m_neighOrch->addTunnelNextHop(nexthop); + if (next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_ERROR("Failed to create Tunnel Nexthop %s", nexthop.to_string(overlay_nh).c_str()); + return false; + } } - next_hop_id = m_neighOrch->addTunnelNextHop(nexthop); - if (next_hop_id == SAI_NULL_OBJECT_ID) + else { - SWSS_LOG_ERROR("Failed to create Tunnel Nexthop %s", nexthop.to_string(overlay_nh).c_str()); + SWSS_LOG_INFO("Failed to get next hop %s for %s, resolving neighbor", + nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); + m_neighOrch->resolveNeighbor(nexthop); return false; } } - else - { - SWSS_LOG_INFO("Failed to get next hop %s for %s, resolving neighbor", - nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); - m_neighOrch->resolveNeighbor(nexthop); - return false; - } } } - } - /* The route is pointing to a next hop group */ - else - { - /* Check if there is already an existing next hop group */ - if (!hasNextHopGroup(nextHops)) + /* The route is pointing to a next hop group */ + else { - /* Try to create a new next hop group */ - if (!addNextHopGroup(nextHops)) + /* Check if there is already an existing next hop group */ + if (!hasNextHopGroup(nextHops)) { - for(auto it = nextHops.getNextHops().begin(); it != nextHops.getNextHops().end(); ++it) + /* Try to create a new next hop group */ + if (!addNextHopGroup(nextHops)) { - const NextHopKey& nextHop = *it; - if(!m_neighOrch->hasNextHop(nextHop)) + for(auto it = nextHops.getNextHops().begin(); it != nextHops.getNextHops().end(); ++it) { - if(overlay_nh) + const NextHopKey& nextHop = *it; + if(!m_neighOrch->hasNextHop(nextHop)) { - SWSS_LOG_INFO("create remote vtep %s ecmp", nextHop.to_string(overlay_nh).c_str()); - status = createRemoteVtep(vrf_id, nextHop); - if (status == false) + if(overlay_nh) { - SWSS_LOG_ERROR("Failed to create remote vtep %s ecmp", nextHop.to_string(overlay_nh).c_str()); - return false; + SWSS_LOG_INFO("create remote vtep %s ecmp", nextHop.to_string(overlay_nh).c_str()); + status = createRemoteVtep(vrf_id, nextHop); + if (status == false) + { + SWSS_LOG_ERROR("Failed to create remote vtep %s ecmp", nextHop.to_string(overlay_nh).c_str()); + return false; + } + next_hop_id = m_neighOrch->addTunnelNextHop(nextHop); + if (next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_ERROR("Failed to create Tunnel Nexthop %s", nextHop.to_string(overlay_nh).c_str()); + return false; + } } - next_hop_id = m_neighOrch->addTunnelNextHop(nextHop); - if (next_hop_id == SAI_NULL_OBJECT_ID) + else { - SWSS_LOG_ERROR("Failed to create Tunnel Nexthop %s", nextHop.to_string(overlay_nh).c_str()); - return false; + SWSS_LOG_INFO("Failed to get next hop %s in %s, resolving neighbor", + nextHop.to_string().c_str(), nextHops.to_string().c_str()); + m_neighOrch->resolveNeighbor(nextHop); } } - else - { - SWSS_LOG_INFO("Failed to get next hop %s in %s, resolving neighbor", - nextHop.to_string().c_str(), nextHops.to_string().c_str()); - m_neighOrch->resolveNeighbor(nextHop); - } } - } - /* Failed to create the next hop group and check if a temporary route is needed */ + /* Failed to create the next hop group and check if a temporary route is needed */ - /* If the current next hop is part of the next hop group to sync, - * then return false and no need to add another temporary route. */ - if (it_route != m_syncdRoutes.at(vrf_id).end() && it_route->second.getSize() == 1) - { - const NextHopKey& nexthop = *it_route->second.getNextHops().begin(); - if (nextHops.contains(nexthop)) + /* If the current next hop is part of the next hop group to sync, + * then return false and no need to add another temporary route. */ + if (it_route != m_syncdRoutes.at(vrf_id).end() && it_route->second.nhg_key.getSize() == 1) { - return false; + const NextHopKey& nexthop = *it_route->second.nhg_key.getNextHops().begin(); + if (nextHops.contains(nexthop)) + { + return false; + } } - } - /* Add a temporary route when a next hop group cannot be added, - * and there is no temporary route right now or the current temporary - * route is not pointing to a member of the next hop group to sync. */ - addTempRoute(ctx, nextHops); - /* Return false since the original route is not successfully added */ - return false; + /* Add a temporary route when a next hop group cannot be added, + * and there is no temporary route right now or the current temporary + * route is not pointing to a member of the next hop group to sync. */ + addTempRoute(ctx, nextHops); + /* Return false since the original route is not successfully added */ + return false; + } } - } - next_hop_id = m_syncdNextHopGroups[nextHops].next_hop_group_id; + next_hop_id = m_syncdNextHopGroups[nextHops].next_hop_group_id; + } + } + else + { + SWSS_LOG_INFO("Next hop group is owned by NhgOrch with index %s", + ctx.nhg_index.c_str()); + try + { + const NextHopGroup& nhg = gNhgOrch->getNhg(ctx.nhg_index); + next_hop_id = nhg.getId(); + } + catch(const std::out_of_range& e) + { + SWSS_LOG_WARN("Next hop group key %s does not exist", + ctx.nhg_index.c_str()); + return false; + } } + SWSS_LOG_INFO("Next hop ID: %lu", next_hop_id); + /* Sync the route entry */ sai_route_entry_t route_entry; route_entry.vr_id = vrf_id; @@ -1618,7 +1683,7 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) else { /* Set the packet action to forward when there was no next hop (dropped) and not pointing to blackhole*/ - if (it_route->second.getSize() == 0 && !blackhole) + if (it_route->second.nhg_key.getSize() == 0 && !blackhole) { route_attr.id = SAI_ROUTE_ENTRY_ATTR_PACKET_ACTION; route_attr.value.s32 = SAI_PACKET_ACTION_FORWARD; @@ -1629,11 +1694,11 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) if (curNhgIsFineGrained && prevNhgWasFineGrained) { - /* Don't change route entry if the route is previously fine grained and new nhg is also fine grained. + /* Don't change route entry if the route is previously fine grained and new nhg is also fine grained. * We already modifed sai nhg objs as part of setFgNhg to account for nhg change. */ object_statuses.emplace_back(SAI_STATUS_SUCCESS); } - else + else { route_attr.id = SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID; route_attr.value.oid = next_hop_id; @@ -1652,6 +1717,11 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) gRouteBulker.set_entry_attribute(&object_statuses.back(), &route_entry, &route_attr); } } + + SWSS_LOG_NOTICE("Added route %s with next hop(s) %s", + ipPrefix.to_string().c_str(), + nextHops.to_string().c_str()); + return false; } @@ -1672,52 +1742,69 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey return false; } - /* next_hop_id indicates the next hop id or next hop group id of this route */ - sai_object_id_t next_hop_id; + SWSS_LOG_INFO("Checking next hop group %s", nextHops.to_string().c_str()); if (m_fgNhgOrch->isRouteFineGrained(vrf_id, ipPrefix, nextHops)) { /* Route is pointing to Fine Grained ECMP nexthop group */ isFineGrained = true; } - else if (nextHops.getSize() == 0) + /* Check that the next hop group is not owned by NhgOrch. */ + else if (ctx.nhg_index.empty()) { - /* The route is pointing to a blackhole */ - blackhole = true; - } - else if (nextHops.getSize() == 1) - { - /* The route is pointing to a next hop */ - const NextHopKey& nexthop = *nextHops.getNextHops().begin(); - if (nexthop.isIntfNextHop()) + SWSS_LOG_INFO("Next hop group is not owned by NhgOrch"); + if (nextHops.getSize() == 0) { - next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); - /* rif is not created yet */ - if (next_hop_id == SAI_NULL_OBJECT_ID) + /* The route is pointing to a blackhole */ + blackhole = true; + } + else if (nextHops.getSize() == 1) + { + /* The route is pointing to a next hop */ + const NextHopKey& nexthop = *nextHops.getNextHops().begin(); + if (nexthop.isIntfNextHop()) { - SWSS_LOG_INFO("Failed to get next hop %s for %s", - nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); - return false; + auto next_hop_id = m_intfsOrch->getRouterIntfsId(nexthop.alias); + /* rif is not created yet */ + if (next_hop_id == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_WARN("Failed to get next hop %s for %s", + nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); + return false; + } + } + else + { + if (!m_neighOrch->hasNextHop(nexthop)) + { + SWSS_LOG_WARN("Failed to get next hop %s for %s", + nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); + return false; + } } } + /* The route is pointing to a next hop group */ else { - if (!m_neighOrch->hasNextHop(nexthop)) + if (!hasNextHopGroup(nextHops)) { - SWSS_LOG_INFO("Failed to get next hop %s for %s", - nextHops.to_string().c_str(), ipPrefix.to_string().c_str()); + SWSS_LOG_WARN("Next hop group is temporary, represented by %s", + ctx.tmp_next_hop.to_string().c_str()); + // Previous added an temporary route + auto& tmp_next_hop = ctx.tmp_next_hop; + addRoutePost(ctx, tmp_next_hop); return false; } } } - /* The route is pointing to a next hop group */ else { - if (!hasNextHopGroup(nextHops)) + SWSS_LOG_INFO("NhgOrch owns the next hop group with index %s", + ctx.nhg_index.c_str()); + if (!gNhgOrch->hasNhg(ctx.nhg_index)) { - // Previous added an temporary route - auto& tmp_next_hop = ctx.tmp_next_hop; - addRoutePost(ctx, tmp_next_hop); + SWSS_LOG_WARN("Failed to get next hop group with index %s", + ctx.nhg_index.c_str()); return false; } } @@ -1752,16 +1839,16 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey else { /* Route already exists */ - auto nh_entry = m_syncdNextHopGroups.find(it_route->second); + auto nh_entry = m_syncdNextHopGroups.find(it_route->second.nhg_key); if (nh_entry != m_syncdNextHopGroups.end()) { /* Case where route was pointing to non-fine grained nhs in the past, * and transitioned to Fine Grained ECMP */ - decreaseNextHopRefCount(it_route->second); - if (it_route->second.getSize() > 1 - && m_syncdNextHopGroups[it_route->second].ref_count == 0) + decreaseNextHopRefCount(it_route->second.nhg_key); + if (it_route->second.nhg_key.getSize() > 1 + && m_syncdNextHopGroups[it_route->second.nhg_key].ref_count == 0) { - m_bulkNhgReducedRefCnt.emplace(it_route->second, 0); + m_bulkNhgReducedRefCnt.emplace(it_route->second.nhg_key, 0); } } SWSS_LOG_INFO("FG Post set route %s with next hop(s) %s", @@ -1775,9 +1862,11 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey { SWSS_LOG_ERROR("Failed to create route %s with next hop(s) %s", ipPrefix.to_string().c_str(), nextHops.to_string().c_str()); - /* Clean up the newly created next hop group entry */ - if (nextHops.getSize() > 1) + + /* Check that the next hop group is not owned by NhgOrch. */ + if (ctx.nhg_index.empty() && nextHops.getSize() > 1) { + /* Clean up the newly created next hop group entry */ removeNextHopGroup(nextHops); } task_process_status handle_status = handleSaiCreateStatus(SAI_API_ROUTE, status); @@ -1796,10 +1885,19 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_IPV6_ROUTE); } - /* Increase the ref_count for the next hop (group) entry */ - increaseNextHopRefCount(nextHops); + /* Increase the ref_count for the next hop group. */ + if (ctx.nhg_index.empty()) + { + increaseNextHopRefCount(nextHops); + } + else + { + SWSS_LOG_INFO("Increment NhgOrch's NHG %s ref count", + ctx.nhg_index.c_str()); + gNhgOrch->incNhgRefCount(ctx.nhg_index); + } - SWSS_LOG_INFO("Post create route %s with next hop(s) %s", + SWSS_LOG_NOTICE("Post create route %s with next hop(s) %s", ipPrefix.to_string().c_str(), nextHops.to_string().c_str()); } else @@ -1807,7 +1905,7 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey sai_status_t status; /* Set the packet action to forward when there was no next hop (dropped) and not pointing to blackhole */ - if (it_route->second.getSize() == 0 && !blackhole) + if (it_route->second.nhg_key.getSize() == 0 && !blackhole) { status = *it_status++; if (status != SAI_STATUS_SUCCESS) @@ -1834,9 +1932,6 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey } } - /* Increase the ref_count for the next hop (group) entry */ - increaseNextHopRefCount(nextHops); - if (m_fgNhgOrch->syncdContainsFgNhg(vrf_id, ipPrefix)) { /* Remove FG nhg since prefix now points to standard nhg/nhs */ @@ -1844,17 +1939,28 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey } else { - decreaseNextHopRefCount(it_route->second); - auto ol_nextHops = it_route->second; - if (it_route->second.getSize() > 1 - && m_syncdNextHopGroups[it_route->second].ref_count == 0) + /* Decrease the ref count for the previous next hop group. */ + if (it_route->second.nhg_index.empty()) { - m_bulkNhgReducedRefCnt.emplace(it_route->second, 0); + decreaseNextHopRefCount(it_route->second.nhg_key); + auto ol_nextHops = it_route->second.nhg_key; + if (ol_nextHops.getSize() > 1 + && m_syncdNextHopGroups[ol_nextHops].ref_count == 0) + { + m_bulkNhgReducedRefCnt.emplace(ol_nextHops, 0); + } + else if (ol_nextHops.is_overlay_nexthop()) + { + SWSS_LOG_NOTICE("Update overlay Nexthop %s", ol_nextHops.to_string().c_str()); + m_bulkNhgReducedRefCnt.emplace(ol_nextHops, vrf_id); + } } - else if (ol_nextHops.is_overlay_nexthop()) + else { - SWSS_LOG_NOTICE("Update overlay Nexthop %s", ol_nextHops.to_string().c_str()); - m_bulkNhgReducedRefCnt.emplace(ol_nextHops, vrf_id); + /* The next hop group is owned by NeighOrch. */ + SWSS_LOG_INFO("Decrement NhgOrch's NHG %s ref count", + it_route->second.nhg_index.c_str()); + gNhgOrch->decNhgRefCount(it_route->second.nhg_index); } } @@ -1874,14 +1980,32 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey } } - SWSS_LOG_INFO("Post set route %s with next hop(s) %s", + if (ctx.nhg_index.empty()) + { + /* Increase the ref_count for the next hop (group) entry */ + increaseNextHopRefCount(nextHops); + } + else + { + SWSS_LOG_INFO("Increment NhgOrch's NHG %s ref count", + ctx.nhg_index.c_str()); + gNhgOrch->incNhgRefCount(ctx.nhg_index); + } + + SWSS_LOG_NOTICE("Post set route %s with next hop(s) %s", ipPrefix.to_string().c_str(), nextHops.to_string().c_str()); } - m_syncdRoutes[vrf_id][ipPrefix] = nextHops; + m_syncdRoutes[vrf_id][ipPrefix] = RouteNhg(nextHops, ctx.nhg_index); notifyNextHopChangeObservers(vrf_id, ipPrefix, nextHops, true); - return true; + + /* + * If the route uses a temporary synced NHG owned by NhgOrch, return false + * in order to keep trying to update the route in case the NHG is updated, + * which will update the SAI ID of the group as well. + */ + return !ctx.is_temp; } bool RouteOrch::removeRoute(RouteBulkContext& ctx) @@ -2019,47 +2143,57 @@ bool RouteOrch::removeRoutePost(const RouteBulkContext& ctx) } else { - /* - * Decrease the reference count only when the route is pointing to a next hop. - */ - decreaseNextHopRefCount(it_route->second); + /* Check that the next hop group is not owned by NhgOrch. */ + if (it_route->second.nhg_index.empty()) + { + auto ol_nextHops = it_route->second.nhg_key; - auto ol_nextHops = it_route->second; + /* + * Decrease the reference count only when the route is pointing to a next hop. + */ + decreaseNextHopRefCount(ol_nextHops); - if (it_route->second.getSize() > 1 - && m_syncdNextHopGroups[it_route->second].ref_count == 0) - { - m_bulkNhgReducedRefCnt.emplace(it_route->second, 0); - } - else if (ol_nextHops.is_overlay_nexthop()) - { - SWSS_LOG_NOTICE("Remove overlay Nexthop %s", ol_nextHops.to_string().c_str()); - m_bulkNhgReducedRefCnt.emplace(ol_nextHops, vrf_id); - } - /* - * Additionally check if the NH has label and its ref count == 0, then - * remove the label next hop. - */ - else if (it_route->second.getSize() == 1) - { - const NextHopKey& nexthop = *it_route->second.getNextHops().begin(); - if (nexthop.isMplsNextHop() && - (m_neighOrch->getNextHopRefCount(nexthop) == 0)) + if (ol_nextHops.getSize() > 1 + && m_syncdNextHopGroups[ol_nextHops].ref_count == 0) { - m_neighOrch->removeMplsNextHop(nexthop); + m_bulkNhgReducedRefCnt.emplace(ol_nextHops, 0); } + /* + * Additionally check if the NH has label and its ref count == 0, then + * remove the label next hop. + */ + else if (ol_nextHops.getSize() == 1) + { + NextHopKey nexthop(ol_nextHops.to_string()); + if (nexthop.isMplsNextHop() && + (m_neighOrch->getNextHopRefCount(nexthop) == 0)) + { + m_neighOrch->removeMplsNextHop(nexthop); + } + } + else if (ol_nextHops.is_overlay_nexthop()) + { + SWSS_LOG_NOTICE("Remove overlay Nexthop %s", ol_nextHops.to_string().c_str()); + m_bulkNhgReducedRefCnt.emplace(ol_nextHops, vrf_id); + } + } + else + { + SWSS_LOG_INFO("Decrement NhgOrch's NHG %s ref count", + it_route->second.nhg_index.c_str()); + gNhgOrch->decNhgRefCount(it_route->second.nhg_index); } } SWSS_LOG_INFO("Remove route %s with next hop(s) %s", - ipPrefix.to_string().c_str(), it_route->second.to_string().c_str()); + ipPrefix.to_string().c_str(), it_route->second.nhg_key.to_string().c_str()); if (ipPrefix.isDefaultRoute() && vrf_id == gVirtualRouterId) { - it_route_table->second[ipPrefix] = NextHopGroupKey(); + it_route_table->second[ipPrefix] = RouteNhg(); /* Notify about default route next hop change */ - notifyNextHopChangeObservers(vrf_id, ipPrefix, it_route_table->second[ipPrefix], true); + notifyNextHopChangeObservers(vrf_id, ipPrefix, it_route_table->second[ipPrefix].nhg_key, true); } else { diff --git a/orchagent/routeorch.h b/orchagent/routeorch.h index 20e79699d58..90c1a43b753 100644 --- a/orchagent/routeorch.h +++ b/orchagent/routeorch.h @@ -40,16 +40,40 @@ struct NextHopUpdate NextHopGroupKey nexthopGroup; }; +/* + * Structure describing the next hop group used by a route. As the next hop + * groups can either be owned by RouteOrch or by NhgOrch, we have to keep track + * of the next hop group index, as it is the one telling us which one owns it. + */ +struct RouteNhg +{ + NextHopGroupKey nhg_key; + + /* + * Index of the next hop group used. Filled only if referencing a + * NhgOrch's owned next hop group. + */ + std::string nhg_index; + + RouteNhg() = default; + RouteNhg(const NextHopGroupKey& key, const std::string& index) : + nhg_key(key), nhg_index(index) {} + + bool operator==(const RouteNhg& rnhg) + { return ((nhg_key == rnhg.nhg_key) && (nhg_index == rnhg.nhg_index)); } + bool operator!=(const RouteNhg& rnhg) { return !(*this == rnhg); } +}; + struct NextHopObserverEntry; /* NextHopGroupTable: NextHopGroupKey, NextHopGroupEntry */ typedef std::map NextHopGroupTable; /* RouteTable: destination network, NextHopGroupKey */ -typedef std::map RouteTable; +typedef std::map RouteTable; /* RouteTables: vrf_id, RouteTable */ typedef std::map RouteTables; /* LabelRouteTable: destination label, next hop address(es) */ -typedef std::map LabelRouteTable; +typedef std::map LabelRouteTable; /* LabelRouteTables: vrf_id, LabelRouteTable */ typedef std::map LabelRouteTables; /* Host: vrf_id, IpAddress */ @@ -68,12 +92,15 @@ struct RouteBulkContext std::deque object_statuses; // Bulk statuses NextHopGroupKey tmp_next_hop; // Temporary next hop NextHopGroupKey nhg; + std::string nhg_index; sai_object_id_t vrf_id; IpPrefix ip_prefix; bool excp_intfs_flag; + // is_temp will track if the NhgOrch's owned NHG is temporary or not + bool is_temp; RouteBulkContext() - : excp_intfs_flag(false) + : excp_intfs_flag(false), is_temp(false) { } @@ -88,6 +115,7 @@ struct RouteBulkContext nhg.clear(); excp_intfs_flag = false; vrf_id = SAI_NULL_OBJECT_ID; + is_temp = false; } }; @@ -96,13 +124,16 @@ struct LabelRouteBulkContext std::deque object_statuses; // Bulk statuses NextHopGroupKey tmp_next_hop; // Temporary next hop NextHopGroupKey nhg; + std::string nhg_index; sai_object_id_t vrf_id; Label label; bool excp_intfs_flag; uint8_t pop_count; + // is_temp will track if the NhgOrch's owned NHG is temporary or not + bool is_temp; LabelRouteBulkContext() - : excp_intfs_flag(false) + : excp_intfs_flag(false), is_temp(false) { } @@ -123,7 +154,7 @@ struct LabelRouteBulkContext class RouteOrch : public Orch, public Subject { public: - RouteOrch(DBConnector *db, vector &tableNames, SwitchOrch *switchOrch, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch, FgNhgOrch *fgNhgOrch); + RouteOrch(DBConnector *db, vector &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch, FgNhgOrch *fgNhgOrch); bool hasNextHopGroup(const NextHopGroupKey&) const; sai_object_id_t getNextHopGroupId(const NextHopGroupKey&); @@ -157,14 +188,11 @@ class RouteOrch : public Orch, public Subject std::string getLinkLocalEui64Addr(void); private: - SwitchOrch *m_switchOrch; NeighOrch *m_neighOrch; IntfsOrch *m_intfsOrch; VRFOrch *m_vrfOrch; FgNhgOrch *m_fgNhgOrch; - int m_nextHopGroupCount; - int m_maxNextHopGroupCount; bool m_resync; RouteTables m_syncdRoutes; diff --git a/tests/mock_tests/Makefile.am b/tests/mock_tests/Makefile.am index 69f25a6b0e1..bbe2d46e6f7 100644 --- a/tests/mock_tests/Makefile.am +++ b/tests/mock_tests/Makefile.am @@ -39,6 +39,7 @@ tests_SOURCES = aclorch_ut.cpp \ $(top_srcdir)/orchagent/routeorch.cpp \ $(top_srcdir)/orchagent/mplsrouteorch.cpp \ $(top_srcdir)/orchagent/fgnhgorch.cpp \ + $(top_srcdir)/orchagent/nhgorch.cpp \ $(top_srcdir)/orchagent/neighorch.cpp \ $(top_srcdir)/orchagent/intfsorch.cpp \ $(top_srcdir)/orchagent/portsorch.cpp \ diff --git a/tests/mock_tests/aclorch_ut.cpp b/tests/mock_tests/aclorch_ut.cpp index 4b1ce101ae5..83123848fd9 100644 --- a/tests/mock_tests/aclorch_ut.cpp +++ b/tests/mock_tests/aclorch_ut.cpp @@ -362,7 +362,7 @@ namespace aclorch_test { APP_ROUTE_TABLE_NAME, routeorch_pri }, { APP_LABEL_ROUTE_TABLE_NAME, routeorch_pri } }; - gRouteOrch = new RouteOrch(m_app_db.get(), route_tables, gSwitchOrch, gNeighOrch, gIntfsOrch, gVrfOrch, gFgNhgOrch); + gRouteOrch = new RouteOrch(m_app_db.get(), route_tables, gNeighOrch, gIntfsOrch, gVrfOrch, gFgNhgOrch); PolicerOrch *policer_orch = new PolicerOrch(m_config_db.get(), "POLICER"); diff --git a/tests/test_nhg.py b/tests/test_nhg.py index 7a40b2a9421..da252641eb3 100644 --- a/tests/test_nhg.py +++ b/tests/test_nhg.py @@ -10,30 +10,115 @@ class TestNextHopGroup(object): - def test_route_nhg(self, dvs, dvs_route, testlog): + ASIC_NHS_STR = "ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP" + ASIC_NHG_STR = "ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP" + ASIC_NHGM_STR = ASIC_NHG_STR + "_MEMBER" + ASIC_RT_STR = "ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY" + ASIC_INSEG_STR = "ASIC_STATE:SAI_OBJECT_TYPE_INSEG_ENTRY" + + def get_route_id(self, prefix, dvs): + for k in dvs.get_asic_db().get_keys(self.ASIC_RT_STR): + if json.loads(k)['dest'] == prefix: + return k + + return None + + def get_inseg_id(self, label, dvs): + for k in dvs.get_asic_db().get_keys(self.ASIC_INSEG_STR): + print(json.loads(k)) + if json.loads(k)['label'] == label: + return k + + return None + + def get_nhg_id(self, nhg_index, dvs): + # Add a route with the given index, then retrieve the next hop group ID + # from that route + asic_db = dvs.get_asic_db() + asic_rts_count = len(asic_db.get_keys(self.ASIC_RT_STR)) + + fvs = swsscommon.FieldValuePairs([('nexthop_group', nhg_index)]) + prefix = '255.255.255.255/24' + ps = swsscommon.ProducerStateTable(dvs.get_app_db().db_connection, + "ROUTE_TABLE") + ps.set(prefix, fvs) + + # Assert the route is created + try: + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_rts_count + 1) + except: + return None + + # Get the route ID for the created route + rt_id = self.get_route_id(prefix, dvs) + assert rt_id != None + + # Get the NHGID + nhgid = asic_db.get_entry(self.ASIC_RT_STR, rt_id)["SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID"] + + # Remove the added route + ps._del(prefix) + asic_db.wait_for_deleted_entry(self.ASIC_RT_STR, rt_id) + + # Return the NHGID + return nhgid + + def get_nhgm_ids(self, nhg_index, dvs): + nhgid = self.get_nhg_id(nhg_index, dvs) + nhgms = [] + asic_db = dvs.get_asic_db() + + for k in asic_db.get_keys(self.ASIC_NHGM_STR): + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + + # Sometimes some of the NHGMs have no fvs for some reason, so + # we skip those + try: + if fvs['SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID'] == nhgid: + nhgms.append(k) + except KeyError as e: + pass + + return nhgms + + def port_name(self, i): + return "Ethernet" + str(i * 4) + + def port_ip(self, i): + return "10.0.0." + str(i * 2) + + def port_ipprefix(self, i): + return self.port_ip(i) + "/31" + + def peer_ip(self, i): + return "10.0.0." + str(i * 2 + 1) + + def port_mac(self, i): + return "00:00:00:00:00:0" + str(i) + + def config_intf(self, i, dvs): config_db = dvs.get_config_db() - fvs = {"NULL": "NULL"} - config_db.create_entry("INTERFACE", "Ethernet0", fvs) - config_db.create_entry("INTERFACE", "Ethernet4", fvs) - config_db.create_entry("INTERFACE", "Ethernet8", fvs) - config_db.create_entry("INTERFACE", "Ethernet0|10.0.0.0/31", fvs) - config_db.create_entry("INTERFACE", "Ethernet4|10.0.0.2/31", fvs) - config_db.create_entry("INTERFACE", "Ethernet8|10.0.0.4/31", fvs) - dvs.runcmd("config interface startup Ethernet0") - dvs.runcmd("config interface startup Ethernet4") - dvs.runcmd("config interface startup Ethernet8") - - dvs.runcmd("arp -s 10.0.0.1 00:00:00:00:00:01") - dvs.runcmd("arp -s 10.0.0.3 00:00:00:00:00:02") - dvs.runcmd("arp -s 10.0.0.5 00:00:00:00:00:03") - - assert dvs.servers[0].runcmd("ip link set down dev eth0") == 0 - assert dvs.servers[1].runcmd("ip link set down dev eth0") == 0 - assert dvs.servers[2].runcmd("ip link set down dev eth0") == 0 - - assert dvs.servers[0].runcmd("ip link set up dev eth0") == 0 - assert dvs.servers[1].runcmd("ip link set up dev eth0") == 0 - assert dvs.servers[2].runcmd("ip link set up dev eth0") == 0 + fvs = {'NULL': 'NULL'} + + config_db.create_entry("INTERFACE", self.port_name(i), fvs) + config_db.create_entry("INTERFACE", "{}|{}".format(self.port_name(i), self.port_ipprefix(i)), fvs) + dvs.runcmd("config interface startup " + self.port_name(i)) + dvs.runcmd("arp -s {} {}".format(self.peer_ip(i), self.port_mac(i))) + assert dvs.servers[i].runcmd("ip link set down dev eth0") == 0 + assert dvs.servers[i].runcmd("ip link set up dev eth0") == 0 + + def flap_intf(self, i, status, dvs): + assert status in ['up', 'down'] + + dvs.servers[i].runcmd("ip link set {} dev eth0".format(status)) == 0 + time.sleep(1) + fvs = dvs.get_app_db().get_entry("PORT_TABLE", "Ethernet%d" % (i * 4)) + assert bool(fvs) + assert fvs["oper_status"] == status + + def test_route_nhg(self, dvs, dvs_route, testlog): + for i in range(3): + self.config_intf(i, dvs) rtprefix = "2.2.2.0/24" @@ -53,20 +138,20 @@ def test_route_nhg(self, dvs, dvs_route, testlog): rtkeys = dvs_route.check_asicdb_route_entries([rtprefix]) # assert the route points to next hop group - fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY", rtkeys[0]) + fvs = asic_db.get_entry(self.ASIC_RT_STR, rtkeys[0]) nhgid = fvs["SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID"] - fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", nhgid) + fvs = asic_db.get_entry(self.ASIC_NHG_STR, nhgid) assert bool(fvs) - keys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER") + keys = asic_db.get_keys(self.ASIC_NHGM_STR) assert len(keys) == 3 for k in keys: - fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER", k) + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) assert fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid @@ -178,36 +263,22 @@ def test_route_nhg(self, dvs, dvs_route, testlog): # bring links down one-by-one for i in [0, 1, 2]: - dvs.servers[i].runcmd("ip link set down dev eth0") == 0 - - time.sleep(1) - - fvs = app_db.get_entry("PORT_TABLE", "Ethernet%d" % (i * 4)) - - assert bool(fvs) - assert fvs["oper_status"] == "down" + self.flap_intf(i, 'down', dvs) - keys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER") + keys = asic_db.get_keys(self.ASIC_NHGM_STR) assert len(keys) == 2 - i # bring links up one-by-one for i in [0, 1, 2]: - dvs.servers[i].runcmd("ip link set up dev eth0") == 0 + self.flap_intf(i, 'up', dvs) - time.sleep(1) - - fvs = app_db.get_entry("PORT_TABLE", "Ethernet%d" % (i * 4)) - - assert bool(fvs) - assert fvs["oper_status"] == "up" - - keys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER") + keys = asic_db.get_keys(self.ASIC_NHGM_STR) assert len(keys) == i + 1 for k in keys: - fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER", k) + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) assert fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid # Remove route 2.2.2.0/24 @@ -216,6 +287,359 @@ def test_route_nhg(self, dvs, dvs_route, testlog): # Wait for route 2.2.2.0/24 to be removed dvs_route.check_asicdb_deleted_route_entries([rtprefix]) + def test_label_route_nhg(self, dvs, testlog): + for i in range(3): + self.config_intf(i, dvs) + + app_db = dvs.get_app_db() + lr_ps = swsscommon.ProducerStateTable(app_db.db_connection, "LABEL_ROUTE_TABLE") + + asic_db = dvs.get_asic_db() + asic_insegs_count = len(asic_db.get_keys(self.ASIC_INSEG_STR)) + + # add label route + fvs = swsscommon.FieldValuePairs([("nexthop","10.0.0.1,10.0.0.3,10.0.0.5"), + ("ifname", "Ethernet0,Ethernet4,Ethernet8")]) + lr_ps.set("10", fvs) + + # check if route was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_INSEG_STR, asic_insegs_count + 1) + keys = asic_db.get_keys(self.ASIC_INSEG_STR) + + k = self.get_inseg_id('10', dvs) + assert k is not None + + # assert the route points to next hop group + fvs = asic_db.get_entry(self.ASIC_INSEG_STR, k) + + nhgid = fvs["SAI_INSEG_ENTRY_ATTR_NEXT_HOP_ID"] + + fvs = asic_db.get_entry(self.ASIC_NHG_STR, nhgid) + + assert bool(fvs) + + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + + assert len(keys) == 3 + + for k in keys: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + + assert fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid + + # bring links down one-by-one + for i in [0, 1, 2]: + self.flap_intf(i, 'down', dvs) + + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + + assert len(keys) == 2 - i + + # bring links up one-by-one + for i in [0, 1, 2]: + self.flap_intf(i, 'up', dvs) + + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + + assert len(keys) == i + 1 + + for k in keys: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + assert fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid + + # Remove label route 10 + lr_ps._del("10") + + # Wait for label route 10 to be removed + asic_db.wait_for_n_keys(self.ASIC_INSEG_STR, asic_insegs_count) + + def test_nhgorch_labeled_nhs(self, dvs, testlog): + for i in range(2): + self.config_intf(i, dvs) + + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + nhg_ps = swsscommon.ProducerStateTable(app_db.db_connection, "NEXT_HOP_GROUP_TABLE") + asic_nhgs_count = len(asic_db.get_keys(self.ASIC_NHG_STR)) + asic_nhgms_count = len(asic_db.get_keys(self.ASIC_NHGM_STR)) + asic_nhs_count = len(asic_db.get_keys(self.ASIC_NHS_STR)) + + # Add a group containing labeled weighted NHs + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ('mpls_nh', 'push1,push3'), + ('ifname', 'Ethernet0,Ethernet4'), + ('weight', '2,4')]) + nhg_ps.set('group1', fvs) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + + # NhgOrch should create two next hops for the labeled ones + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 2) + + # Assert the weights are properly set + nhgm_ids = self.get_nhgm_ids('group1', dvs) + weights = [] + for k in nhgm_ids: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + weights.append(fvs['SAI_NEXT_HOP_GROUP_MEMBER_ATTR_WEIGHT']) + assert set(weights) == set(['2', '4']) + + # Create a new single next hop with the same label + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1'), + ('mpls_nh', 'push1'), + ('ifname', 'Ethernet0')]) + nhg_ps.set('group2', fvs) + + # No new next hop should be added + time.sleep(1) + assert len(asic_db.get_keys(self.ASIC_NHS_STR)) == asic_nhs_count + 2 + + # Create a new single next hop with a different label + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1'), + ('mpls_nh', 'push2'), + ('ifname', 'Ethernet0')]) + nhg_ps.set('group3', fvs) + + # A new next hop should be added + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 3) + + # Delete group3 + nhg_ps._del('group3') + + # Group3's NH should be deleted + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 2) + + # Delete group2 + nhg_ps._del('group2') + + # The number of NHs should be the same as they are still referenced by + # group1 + time.sleep(1) + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 2) + + # Update group1 with no weights and both labeled and unlabeled NHs + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ('mpls_nh', 'push2,na'), + ('ifname', 'Ethernet0,Ethernet4')]) + nhg_ps.set('group1', fvs) + + # Group members should be replaced and one NH should get deleted + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 1) + + # Assert the weights of the NHGMs are the expected ones + nhgm_ids = self.get_nhgm_ids('group1', dvs) + weights = [] + for nhgm_id in nhgm_ids: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, nhgm_id) + weights.append(fvs['SAI_NEXT_HOP_GROUP_MEMBER_ATTR_WEIGHT']) + assert weights == ['0', '0'] + + # Delete group1 + nhg_ps._del('group1') + + # Wait for the group and it's members to be deleted + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count) + + # The two next hops should also get deleted + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count) + + def test_nhgorch_excp_group_cases(self, dvs, testlog): + for i in range(3): + self.config_intf(i, dvs) + + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + nhg_ps = swsscommon.ProducerStateTable(app_db.db_connection, "NEXT_HOP_GROUP_TABLE") + rt_ps = swsscommon.ProducerStateTable(app_db.db_connection, "ROUTE_TABLE") + + def get_nhg_keys(): + return asic_db.get_keys(self.ASIC_NHG_STR) + + def get_nhgm_keys(): + return asic_db.get_keys(self.ASIC_NHGM_STR) + + def get_rt_keys(): + return asic_db.get_keys(self.ASIC_RT_STR) + + # Count existing objects + prev_nhg_keys = get_nhg_keys() + asic_nhgs_count = len(get_nhg_keys()) + asic_nhgms_count = len(get_nhgm_keys()) + asic_nhs_count = len(asic_db.get_keys(self.ASIC_NHS_STR)) + + # Remove a group that does not exist + nhg_ps._del("group1") + time.sleep(1) + assert len(get_nhg_keys()) == asic_nhgs_count + + # Create a next hop group with a member that does not exist - should fail + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3,10.0.0.63'), + ("ifname", "Ethernet0,Ethernet4,Ethernet124")]) + nhg_ps.set("group1", fvs) + time.sleep(1) + assert len(asic_db.get_keys(self.ASIC_NHG_STR)) == asic_nhgs_count + + # Issue an update for this next hop group that doesn't yet exist, + # which contains only valid NHs. This will overwrite the previous + # operation and create the group. + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.5'), + ("ifname", "Ethernet0,Ethernet8")]) + nhg_ps.set("group1", fvs) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + + # Check the group has it's two members + for nhgid in asic_db.get_keys(self.ASIC_NHG_STR): + if nhgid not in prev_nhg_keys: + break + + count = 0 + for k in asic_db.get_keys(self.ASIC_NHGM_STR): + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + if fvs['SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID'] == nhgid: + count += 1 + assert count == 2 + + # Add a route referencing the new group + asic_rts_count = len(get_rt_keys()) + fvs = swsscommon.FieldValuePairs([('nexthop_group', 'group1')]) + rt_ps.set('2.2.2.0/24', fvs) + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_rts_count + 1) + + # Get the route key + for rt_key in get_rt_keys(): + k = json.loads(rt_key) + + if k['dest'] == "2.2.2.0/24": + break + + # Try removing the group while it still has references - should fail + nhg_ps._del('group1') + time.sleep(1) + assert len(get_nhg_keys()) == asic_nhgs_count + 1 + + # Create a new group + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ('ifname', 'Ethernet0,Ethernet4')]) + nhg_ps.set("group2", fvs) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 2) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 4) + + # Update the route to point to the new group + fvs = swsscommon.FieldValuePairs([('nexthop_group', 'group2')]) + rt_ps.set('2.2.2.0/24', fvs) + + # The first group should have got deleted + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + + assert asic_db.get_entry(self.ASIC_RT_STR, rt_key)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] != nhgid + + # Update the route with routeOrch's owned next hop group + nhgid = asic_db.get_entry(self.ASIC_RT_STR, rt_key)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ('ifname', 'Ethernet0,Ethernet4')]) + rt_ps.set('2.2.2.0/24', fvs) + + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 2) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 4) + + # Assert the next hop group ID changed + time.sleep(1) + assert asic_db.get_entry(self.ASIC_RT_STR, rt_key)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] != nhgid + nhgid = asic_db.get_entry(self.ASIC_RT_STR, rt_key)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] + + # Update the route to point back to group2 + fvs = swsscommon.FieldValuePairs([('nexthop_group', 'group2')]) + rt_ps.set('2.2.2.0/24', fvs) + + # The routeOrch's owned next hop group should get deleted + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + + # Assert the route points back to group2 + assert asic_db.get_entry(self.ASIC_RT_STR, rt_key)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] != nhgid + + # Create a new group with the same members as group2 + nhgid = asic_db.get_entry(self.ASIC_RT_STR, rt_key)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ('ifname', 'Ethernet0,Ethernet4')]) + nhg_ps.set("group1", fvs) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 2) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 4) + + # Update the route to point to the new group + fvs = swsscommon.FieldValuePairs([('nexthop_group', 'group1')]) + rt_ps.set('2.2.2.0/24', fvs) + time.sleep(1) + + # Assert the next hop group ID changed + assert asic_db.get_entry(self.ASIC_RT_STR, rt_key)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] != nhgid + + # Remove the route + rt_ps._del('2.2.2.0/24') + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_rts_count) + + # Remove the groups + nhg_ps._del('group1') + nhg_ps._del('group2') + + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count) + + # Create a route with labeled NHs + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ('mpls_nh', 'push1,push3'), + ('ifname', 'Ethernet0,Ethernet4'), + ('weight', '2,4')]) + rt_ps.set('2.2.2.0/24', fvs) + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_rts_count + 1) + + # Two new next hops should be created + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 2) + + # Create a NHG with the same details + nhg_ps.set('group1', fvs) + + # No new next hops should be created + time.sleep(1) + assert len(asic_db.get_keys(self.ASIC_NHS_STR)) == asic_nhs_count + 2 + + # Update the group with a different NH + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ('mpls_nh', 'push2,push3'), + ('ifname', 'Ethernet0,Ethernet4'), + ('weight', '2,4')]) + nhg_ps.set('group1', fvs) + + # A new next hop should be created + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 3) + + # Remove the route + rt_ps._del('2.2.2.0/24') + + # One NH should become unreferenced and should be deleted. The other + # one is still referenced by NhgOrch's owned NHG. + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 2) + + # Remove the group + nhg_ps._del('group1') + + # Both new next hops should be deleted + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count) + + # Add a route with a NHG that does not exist + fvs = swsscommon.FieldValuePairs([('nexthop_group', 'group1')]) + rt_ps.set('2.2.2.0/24', fvs) + time.sleep(1) + assert asic_rts_count == len(asic_db.get_keys(self.ASIC_RT_STR)) + + # Remove the pending route + rt_ps._del('2.2.2.0/24') + def test_route_nhg_exhaust(self, dvs, testlog): """ Test the situation of exhausting ECMP group, assume SAI_SWITCH_ATTR_NUMBER_OF_ECMP_GROUPS is 512 @@ -233,21 +657,6 @@ def test_route_nhg_exhaust(self, dvs, testlog): # TODO: check ECMP 512 - def port_name(i): - return "Ethernet" + str(i * 4) - - def port_ip(i): - return "10.0.0." + str(i * 2) - - def peer_ip(i): - return "10.0.0." + str(i * 2 + 1) - - def port_ipprefix(i): - return port_ip(i) + "/31" - - def port_mac(i): - return "00:00:00:00:00:0" + str(i) - def gen_ipprefix(r): """ Construct route like 2.X.X.0/24 """ ip = ipaddress.IPv4Address(IP_INTEGER_BASE + r * 256) @@ -260,25 +669,16 @@ def gen_nhg_fvs(binary): ifname = [] for i in range(MAX_PORT_COUNT): if binary[i] == '1': - nexthop.append(peer_ip(i)) - ifname.append(port_name(i)) + nexthop.append(self.peer_ip(i)) + ifname.append(self.port_name(i)) nexthop = ','.join(nexthop) ifname = ','.join(ifname) fvs = swsscommon.FieldValuePairs([("nexthop", nexthop), ("ifname", ifname)]) return fvs - def asic_route_exists(keys, ipprefix): - for k in keys: - rt_key = json.loads(k) - - if rt_key['dest'] == ipprefix: - return k - else: - return None - def asic_route_nhg_fvs(k): - fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY", k) + fvs = asic_db.get_entry(self.ASIC_RT_STR, k) if not fvs: return None @@ -287,7 +687,7 @@ def asic_route_nhg_fvs(k): if nhgid is None: return None - fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", nhgid) + fvs = asic_db.get_entry(self.ASIC_NHG_STR, nhgid) return fvs MAX_ECMP_COUNT = 512 @@ -297,25 +697,19 @@ def asic_route_nhg_fvs(k): else: IP_INTEGER_BASE = int(ipaddress.IPv4Address(str("2.2.2.0"))) - config_db = dvs.get_config_db() - fvs = {"NULL": "NULL"} - for i in range(MAX_PORT_COUNT): - config_db.create_entry("INTERFACE", port_name(i), fvs) - config_db.create_entry("INTERFACE", "{}|{}".format(port_name(i), port_ipprefix(i)), fvs) - dvs.runcmd("config interface startup " + port_name(i)) - dvs.runcmd("arp -s {} {}".format(peer_ip(i), port_mac(i))) - assert dvs.servers[i].runcmd("ip link set down dev eth0") == 0 - assert dvs.servers[i].runcmd("ip link set up dev eth0") == 0 + self.config_intf(i, dvs) app_db = dvs.get_app_db() asic_db = dvs.get_asic_db() ps = swsscommon.ProducerStateTable(app_db.db_connection, "ROUTE_TABLE") + dvs.runcmd('swssloglevel -c orchagent -l INFO') + # Add first batch of routes with unique nexthop groups in AppDB route_count = 0 r = 0 - asic_routes_count = len(asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY")) + asic_routes_count = len(asic_db.get_keys(self.ASIC_RT_STR)) while route_count < MAX_ECMP_COUNT: r += 1 fmt = '{{0:0{}b}}'.format(MAX_PORT_COUNT) @@ -329,12 +723,35 @@ def asic_route_nhg_fvs(k): route_count += 1 # Wait and check ASIC DB the count of nexthop groups used - asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", MAX_ECMP_COUNT) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) # Wait and check ASIC DB the count of routes - asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY", asic_routes_count + MAX_ECMP_COUNT) + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + MAX_ECMP_COUNT) asic_routes_count += MAX_ECMP_COUNT + # Add a route with labeled NHs + asic_nhs_count = len(asic_db.get_keys(self.ASIC_NHS_STR)) + route_ipprefix = gen_ipprefix(route_count) + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ('mpls_nh', 'push1,push3'), + ('ifname', 'Ethernet0,Ethernet4')]) + ps.set(route_ipprefix, fvs) + route_count += 1 + + # A temporary route should be created + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 1) + + # A NH should be elected as the temporary NHG and it should be created + # as it doesn't exist. + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 1) + + # Delete the route. The route and the added labeled NH should be + # removed. + ps._del(route_ipprefix) + route_count -= 1 + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count) + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count) + # Add second batch of routes with unique nexthop groups in AppDB # Add more routes with new nexthop group in AppDBdd route_ipprefix = gen_ipprefix(route_count) @@ -355,18 +772,18 @@ def asic_route_nhg_fvs(k): last_ipprefix = route_ipprefix # Wait until we get expected routes and check ASIC DB on the count of nexthop groups used, and it should not increase - keys = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY", asic_routes_count + 10) - asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", MAX_ECMP_COUNT) + keys = asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 10) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) # Check the route points to next hop group # Note: no need to wait here - k = asic_route_exists(keys, "2.2.2.0/24") + k = self.get_route_id("2.2.2.0/24", dvs) assert k is not None fvs = asic_route_nhg_fvs(k) assert fvs is not None # Check the second batch does not point to next hop group - k = asic_route_exists(keys, base_ipprefix) + k = self.get_route_id(base_ipprefix, dvs) assert k is not None fvs = asic_route_nhg_fvs(k) assert not(fvs) @@ -384,20 +801,939 @@ def asic_route_nhg_fvs(k): route_ipprefix = gen_ipprefix(route_count) ps._del(route_ipprefix) route_count += 1 + asic_routes_count -= MAX_ECMP_COUNT # Wait and check the second batch points to next hop group # Check ASIC DB on the count of nexthop groups used, and it should not increase or decrease - asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", 10) - keys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY") - k = asic_route_exists(keys, base_ipprefix) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, 10) + keys = asic_db.get_keys(self.ASIC_RT_STR) + k = self.get_route_id(base_ipprefix, dvs) assert k is not None fvs = asic_route_nhg_fvs(k) assert fvs is not None - k = asic_route_exists(keys, last_ipprefix) + k = self.get_route_id(last_ipprefix, dvs) assert k is not None fvs = asic_route_nhg_fvs(k) assert fvs is not None + # Cleanup + + # Remove second batch of routes + for i in range(10): + route_ipprefix = gen_ipprefix(MAX_ECMP_COUNT + i) + ps._del(route_ipprefix) + + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, 0) + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count) + + def test_nhgorch_nhg_exhaust(self, dvs, testlog): + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + nhg_ps = swsscommon.ProducerStateTable(app_db.db_connection, "NEXT_HOP_GROUP_TABLE") + rt_ps = swsscommon.ProducerStateTable(app_db.db_connection, "ROUTE_TABLE") + + MAX_ECMP_COUNT = 512 + MAX_PORT_COUNT = 10 + + r = 0 + fmt = '{{0:0{}b}}'.format(MAX_PORT_COUNT) + asic_nhgs_count = len(asic_db.get_keys(self.ASIC_NHG_STR)) + nhg_count = asic_nhgs_count + first_valid_nhg = nhg_count + + def gen_nhg_fvs(binary): + nexthop = [] + ifname = [] + + for i in range(MAX_PORT_COUNT): + if binary[i] == '1': + nexthop.append(self.peer_ip(i)) + ifname.append(self.port_name(i)) + + nexthop = ','.join(nexthop) + ifname = ','.join(ifname) + fvs = swsscommon.FieldValuePairs([("nexthop", nexthop), ("ifname", ifname)]) + + return fvs + + def gen_nhg_index(nhg_number): + return "group{}".format(nhg_number) + + def gen_valid_binary(): + nonlocal r + + while True: + r += 1 + binary = fmt.format(r) + # We need at least 2 ports for a nexthop group + if binary.count('1') <= 1: + continue + return binary + + def create_temp_nhg(): + nonlocal nhg_count + + binary = gen_valid_binary() + nhg_fvs = gen_nhg_fvs(binary) + nhg_index = gen_nhg_index(nhg_count) + nhg_ps.set(nhg_index, nhg_fvs) + nhg_count += 1 + + return nhg_index, binary + + def delete_nhg(): + nonlocal first_valid_nhg + + del_nhg_index = gen_nhg_index(first_valid_nhg) + del_nhg_id = asic_nhgs[del_nhg_index] + + nhg_ps._del(del_nhg_index) + asic_nhgs.pop(del_nhg_index) + first_valid_nhg += 1 + + return del_nhg_id + + def nhg_exists(nhg_index): + return self.get_nhg_id(nhg_index, dvs) is not None + + config_db = dvs.get_config_db() + fvs = {"NULL": "NULL"} + + # Create interfaces + for i in range(MAX_PORT_COUNT): + self.config_intf(i, dvs) + + asic_nhgs = {} + + # Add first batch of next hop groups to reach the NHG limit + while nhg_count < MAX_ECMP_COUNT: + r += 1 + binary = fmt.format(r) + # We need at least 2 ports for a nexthop group + if binary.count('1') <= 1: + continue + nhg_fvs = gen_nhg_fvs(binary) + nhg_index = gen_nhg_index(nhg_count) + nhg_ps.set(nhg_index, nhg_fvs) + + # Wait for the group to be added + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + nhg_count + 1) + + # Save the NHG index/ID pair + asic_nhgs[nhg_index] = self.get_nhg_id(nhg_index, dvs) + + nhg_count += 1 + + # Add a new next hop group - it should create a temporary one instead + prev_nhgs = asic_db.get_keys(self.ASIC_NHG_STR) + nhg_index, _ = create_temp_nhg() + + # Assert no new group has been added + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Assert the same NHGs are in ASIC DB + assert prev_nhgs == asic_db.get_keys(self.ASIC_NHG_STR) + + # Delete an existing next hop group + del_nhg_id = delete_nhg() + + # Wait for the key to be deleted + asic_db.wait_for_deleted_entry(self.ASIC_NHG_STR, del_nhg_id) + + # Wait for the temporary group to be promoted and replace the deleted + # NHG + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Save the promoted NHG index/ID + asic_nhgs[nhg_index] = self.get_nhg_id(nhg_index, dvs) + + # Update a group + binary = gen_valid_binary() + nhg_fvs = gen_nhg_fvs(binary) + nhg_index = gen_nhg_index(first_valid_nhg) + + # Save the previous members + prev_nhg_members = self.get_nhgm_ids(nhg_index, dvs) + nhg_ps.set(nhg_index, nhg_fvs) + + # Wait a second so the NHG members get updated + time.sleep(1) + + # Assert the group was updated by checking it's members + assert self.get_nhgm_ids(nhg_index, dvs) != prev_nhg_members + + # Create a new temporary group + nhg_index, _ = create_temp_nhg() + time.sleep(1) + + # Delete the temporary group + nhg_ps._del(nhg_index) + + # Assert the NHG does not exist anymore + assert not nhg_exists(nhg_index) + + # Assert the number of groups is the same + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Create a new temporary group + nhg_index, binary = create_temp_nhg() + + # Save the number of group members + binary_count = binary.count('1') + + # Update the temporary group with a different number of members + while True: + r += 1 + binary = fmt.format(r) + # We need at least 2 ports for a nexthop group + if binary.count('1') <= 1 or binary.count('1') == binary_count: + continue + binary_count = binary.count('1') + break + nhg_fvs = gen_nhg_fvs(binary) + nhg_ps.set(nhg_index, nhg_fvs) + + # Delete a group + del_nhg_id = delete_nhg() + + # Wait for the group to be deleted + asic_db.wait_for_deleted_entry(self.ASIC_NHG_STR, del_nhg_id) + + # The temporary group should be promoted + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Save the promoted NHG index/ID + asic_nhgs[nhg_index] = self.get_nhg_id(nhg_index, dvs) + + # Assert it has the updated details by checking the number of members + assert len(self.get_nhgm_ids(nhg_index, dvs)) == binary_count + + # Add a route + asic_rts_count = len(asic_db.get_keys(self.ASIC_RT_STR)) + + rt_fvs = swsscommon.FieldValuePairs([('nexthop_group', nhg_index)]) + rt_ps.set('2.2.2.0/24', rt_fvs) + + # Assert the route is created + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_rts_count + 1) + + # Save the previous NHG ID + prev_nhg_id = asic_nhgs[nhg_index] + + # Create a new temporary group + nhg_index, binary = create_temp_nhg() + + # Get the route ID + rt_id = self.get_route_id('2.2.2.0/24', dvs) + assert rt_id != None + + # Update the route to point to the temporary NHG + rt_fvs = swsscommon.FieldValuePairs([('nexthop_group', nhg_index)]) + rt_ps.set('2.2.2.0/24', rt_fvs) + + # Wait for the route to change it's NHG ID + asic_db.wait_for_field_negative_match(self.ASIC_RT_STR, + rt_id, + {'SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID': prev_nhg_id}) + + # Save the new route NHG ID + prev_nhg_id = asic_db.get_entry(self.ASIC_RT_STR, rt_id)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] + + # Update the temporary NHG with one that has different NHs + + # Create a new binary that uses the other interfaces than the previous + # binary was using + new_binary = [] + + for i in range(len(binary)): + if binary[i] == '1': + new_binary.append('0') + else: + new_binary.append('1') + + binary = ''.join(new_binary) + assert binary.count('1') > 1 + + nhg_fvs = gen_nhg_fvs(binary) + nhg_ps.set(nhg_index, nhg_fvs) + + # The NHG ID of the route should change + asic_db.wait_for_field_negative_match(self.ASIC_RT_STR, + rt_id, + {'SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID': prev_nhg_id}) + + # Save the new route NHG ID + prev_nhg_id = asic_db.get_entry(self.ASIC_RT_STR, rt_id)['SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID'] + + # Delete a NHG. + del_nhg_id = delete_nhg() + + # Wait for the NHG to be deleted + asic_db.wait_for_deleted_entry(self.ASIC_NHG_STR, del_nhg_id) + + # The temporary group should get promoted. + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Save the promoted NHG index/ID + asic_nhgs[nhg_index] = self.get_nhg_id(nhg_index, dvs) + + # Assert the NHGID of the route changed due to temporary group being + # promoted. + asic_db.wait_for_field_match(self.ASIC_RT_STR, + rt_id, + {'SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID': asic_nhgs[nhg_index]}) + + # Try updating the promoted NHG to a single NHG. Should fail as it no + # longer is a temporary NHG + nhg_id = self.get_nhg_id(nhg_index, dvs) + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1'), + ('ifname', 'Ethernet0')]) + nhg_ps.set(nhg_index, fvs) + time.sleep(1) + assert bool(asic_db.get_entry(self.ASIC_NHG_STR, nhg_id)) == True + + # Save the NHG index + prev_nhg_index = nhg_index + prev_nhg_id = nhg_id + + # Create a temporary next hop groups + nhg_index, binary = create_temp_nhg() + nhg_id = self.get_nhg_id(nhg_index, dvs) + + # Update the route to point to the temporary group + fvs = swsscommon.FieldValuePairs([('nexthop_group', nhg_index)]) + rt_ps.set('2.2.2.0/24', fvs) + asic_db.wait_for_field_match(self.ASIC_RT_STR, + rt_id, + {'SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID': nhg_id}) + + # The previous group should be updated to a single NHG now, freeing a + # NHG slot in ASIC and promoting the temporary one + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + asic_nhgs[nhg_index] = self.get_nhg_id(nhg_index, dvs) + assert self.get_nhg_id(nhg_index, dvs) != prev_nhg_id + + # Create a temporary next hop groups + nhg_index, binary = create_temp_nhg() + nhg_id = self.get_nhg_id(nhg_index, dvs) + + # Update the route to point to the temporary group + fvs = swsscommon.FieldValuePairs([('nexthop_group', nhg_index)]) + rt_ps.set('2.2.2.0/24', fvs) + asic_db.wait_for_field_match(self.ASIC_RT_STR, + rt_id, + {'SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID': nhg_id}) + + # Update the group to a single NHG + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1'), + ('ifname', 'Ethernet0')]) + nhg_ps.set(nhg_index, fvs) + nhg_id = self.get_nhg_id(nhg_index, dvs) + + # Wait for the route to update it's NHG ID + asic_db.wait_for_field_match(self.ASIC_RT_STR, + rt_id, + {'SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID': nhg_id}) + + # Try to update the nexthop group to another one + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.3'), + ('ifname', 'Ethernet4')]) + nhg_ps.set(nhg_index, fvs) + + # Wait a second so the database operations finish + time.sleep(1) + + # The update should fail as the group is not temporary anymore and it + # should keep it's previous NHG ID + asic_db.wait_for_field_match(self.ASIC_RT_STR, + rt_id, + {'SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID': nhg_id}) + + # Create a next hop group that contains labeled NHs that do not exist + # in NeighOrch + asic_nhs_count = len(asic_db.get_keys(self.ASIC_NHS_STR)) + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ('mpls_nh', 'push1,push3'), + ('ifname', 'Ethernet0,Ethernet4')]) + nhg_index = gen_nhg_index(nhg_count) + nhg_ps.set(nhg_index, fvs) + nhg_count += 1 + + # A temporary next hop should be elected to represent the group and + # thus a new labeled next hop should be created + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 1) + + # Delete a next hop group + delete_nhg() + + # The group should be promoted and the other labeled NH should also get + # created + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + asic_db.wait_for_n_keys(self.ASIC_NHS_STR, asic_nhs_count + 2) + + # Save the promoted NHG index/ID + asic_nhgs[nhg_index] = self.get_nhg_id(nhg_index, dvs) + + # Update the route with a RouteOrch's owned NHG + binary = gen_valid_binary() + nhg_fvs = gen_nhg_fvs(binary) + rt_ps.set('2.2.2.0/24', nhg_fvs) + + # Assert no new group has been added + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Delete a next hop group + delete_nhg() + + # The temporary group should be promoted + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Create a temporary NHG owned by NhgOrch + nhg_index, _ = create_temp_nhg() + + # Assert no new group has been added + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Delete a next hop group + delete_nhg() + + # Check that the temporary group is promoted + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, MAX_ECMP_COUNT) + + # Save the promoted NHG index/ID + asic_nhgs[nhg_index] = self.get_nhg_id(nhg_index, dvs) + + # Create a temporary NHG that contains only NHs that do not exist + nhg_fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.21,10.0.0.23'), + ('ifname', 'Ethernet40,Ethernet44')]) + nhg_index = gen_nhg_index(nhg_count) + nhg_count += 1 + nhg_ps.set(nhg_index, nhg_fvs) + + # Assert the group is not created + assert not nhg_exists(nhg_index) + + # Update the temporary NHG to a valid one + binary = gen_valid_binary() + nhg_fvs = gen_nhg_fvs(binary) + nhg_ps.set(nhg_index, nhg_fvs) + + # Assert the temporary group was updated and the group got created + nhg_id = self.get_nhg_id(nhg_index, dvs) + assert nhg_id is not None + + # Update the temporary NHG to an invalid one again + nhg_fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.21,10.0.0.23'), + ('ifname', 'Ethernet40,Ethernet44')]) + nhg_ps.set(nhg_index, nhg_fvs) + + # The update should fail and the temporary NHG should still be pointing + # to the old valid NH + assert self.get_nhg_id(nhg_index, dvs) == nhg_id + + # Delete the temporary group + nhg_ps._del(nhg_index) + + # Cleanup + + # Delete the route + rt_ps._del('2.2.2.0/24') + asic_db.wait_for_deleted_entry(self.ASIC_RT_STR, rt_id) + + # Delete the next hop groups + for k in asic_nhgs: + nhg_ps._del(k) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count) + + def test_nhgorch_multi_nh_group(self, dvs, testlog): + for i in range(4): + self.config_intf(i, dvs) + + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + + # create next hop group in APPL DB + + nhg_ps = swsscommon.ProducerStateTable(app_db.db_connection, "NEXT_HOP_GROUP_TABLE") + + prev_nhg_keys = asic_db.get_keys(self.ASIC_NHG_STR) + asic_nhgs_count = len(prev_nhg_keys) + asic_nhgms_count = len(asic_db.get_keys(self.ASIC_NHGM_STR)) + + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3,10.0.0.5'), + ("ifname", "Ethernet0,Ethernet4,Ethernet8")]) + nhg_ps.set("group1", fvs) + + # check if group was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + keys = asic_db.get_keys(self.ASIC_NHG_STR) + + found_nhg = False + + for nhgid in keys: + if nhgid not in prev_nhg_keys: + found_nhg = True + break + + assert found_nhg == True + + # check if members were propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 3) + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + count = 0 + + for k in keys: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + + if fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid: + count += 1 + + assert count == 3 + + # create route in APPL DB + + rt_ps = swsscommon.ProducerStateTable(app_db.db_connection, "ROUTE_TABLE") + + asic_routes_count = len(asic_db.get_keys(self.ASIC_RT_STR)) + + fvs = swsscommon.FieldValuePairs([("nexthop_group", "group1")]) + rt_ps.set("2.2.2.0/24", fvs) + + # check if route was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 1) + keys = asic_db.get_keys(self.ASIC_RT_STR) + + k = self.get_route_id('2.2.2.0/24', dvs) + assert k is not None + + # assert the route points to next hop group + fvs = asic_db.get_entry(self.ASIC_RT_STR, k) + assert fvs["SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID"] == nhgid + + # bring links down one-by-one + for i in [0, 1, 2]: + self.flap_intf(i, 'down', dvs) + + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + + assert len(keys) == (asic_nhgms_count + 2 - i) + assert bool(asic_db.get_entry(self.ASIC_NHG_STR, nhgid)) + + # bring links up one-by-one + for i in [0, 1, 2]: + self.flap_intf(i, 'up', dvs) + + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + + assert len(keys) == (asic_nhgms_count + i + 1) + + count = 0 + for k in keys: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + if fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid: + count += 1 + assert count == i + 1 + + # Bring an interface down + self.flap_intf(1, 'down', dvs) + + # One group member will get deleted + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, 2) + + # Create a group that contains a NH that uses the down link + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ("ifname", "Ethernet0,Ethernet4")]) + nhg_ps.set('group2', fvs) + + # The group should get created, but it will not contained the NH that + # has the link down + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 2) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 3) + + # Update the NHG with one interface down + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.3,10.0.0.1'), + ("ifname", "Ethernet4,Ethernet0")]) + nhg_ps.set("group1", fvs) + + # Wait for group members to update - the group will contain only the + # members that have their links up + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + + # Bring the interface up + self.flap_intf(1, 'up', dvs) + + # Check that the missing member of group1 and group2 is being added + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 4) + + # Remove group2 + nhg_ps._del('group2') + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 2) + + # Create group2 with a NH that does not exist + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.3,10.0.0.63'), + ("ifname", "Ethernet4,Ethernet124")]) + nhg_ps.set("group2", fvs) + + # The groups should not be created + time.sleep(1) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + + # Update group1 with a NH that does not exist + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.3,10.0.0.63'), + ("ifname", "Ethernet4,Ethernet124")]) + nhg_ps.set("group1", fvs) + + # The update should fail, leaving group1 with only the unremoved + # members + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 1) + + # Configure the missing NH's interface + self.config_intf(31, dvs) + + # A couple more routes will be added to ASIC_DB + asic_routes_count += 2 + + # Group2 should get created and group1 should be updated + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 2) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 4) + + # Delete group2 + nhg_ps._del('group2') + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + + # Update the NHG, adding two new members + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3,10.0.0.5,10.0.0.7'), + ("ifname", "Ethernet0,Ethernet4,Ethernet8,Ethernet12")]) + nhg_ps.set("group1", fvs) + + # Wait for members to be added + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 4) + + # Update the group to one NH only - orchagent should fail as it is + # referenced + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1'), ("ifname", "Ethernet0")]) + nhg_ps.set("group1", fvs) + time.sleep(1) + + assert bool(asic_db.get_entry(self.ASIC_NHG_STR, nhgid)) + assert len(asic_db.get_keys(self.ASIC_NHGM_STR)) == asic_nhgms_count + 4 + + # Remove route 2.2.2.0/24 + rt_ps._del("2.2.2.0/24") + + # Wait for route 2.2.2.0/24 to be removed + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count) + + # The group is not referenced anymore, so it should get updated to a + # single next hop group, removing the ASIC group + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count) + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count) + + # Remove group1 + nhg_ps._del("group1") + + def test_nhgorch_single_nh_group(self, dvs, testlog): + for i in range(2): + self.config_intf(i, dvs) + + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + + # Count existing objects + asic_nhgs_count = len(asic_db.get_keys(self.ASIC_NHG_STR)) + asic_nhgms_count = len(asic_db.get_keys(self.ASIC_NHGM_STR)) + + # Create single next hop group in APPL DB + nhg_ps = swsscommon.ProducerStateTable(app_db.db_connection, "NEXT_HOP_GROUP_TABLE") + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1'), ("ifname", "Ethernet0")]) + nhg_ps.set("group1", fvs) + time.sleep(1) + + # Check that the group was not created in ASIC DB + assert len(asic_db.get_keys(self.ASIC_NHG_STR)) == asic_nhgs_count + + # Get the NHG ID. The UT assumes there is only one next hop with IP 10.0.0.1 + count = 0 + nhgid = 0 + for k in asic_db.get_keys(self.ASIC_NHS_STR): + fvs = asic_db.get_entry(self.ASIC_NHS_STR, k) + if fvs['SAI_NEXT_HOP_ATTR_IP'] == '10.0.0.1': + nhgid = k + count += 1 + assert count == 1 + + # create route in APPL DB + + rt_ps = swsscommon.ProducerStateTable(app_db.db_connection, "ROUTE_TABLE") + + asic_routes_count = len(asic_db.get_keys(self.ASIC_RT_STR)) + + fvs = swsscommon.FieldValuePairs([("nexthop_group", "group1")]) + rt_ps.set("2.2.2.0/24", fvs) + + # check if route was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 1) + keys = asic_db.get_keys(self.ASIC_RT_STR) + + k = self.get_route_id('2.2.2.0/24', dvs) + + # assert the route points to next hop group + fvs = asic_db.get_entry(self.ASIC_RT_STR, k) + assert fvs["SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID"] == nhgid + + # Bring group's link down + self.flap_intf(0, 'down', dvs) + + # Check that the group still has the same ID and everything works + assert nhgid == self.get_nhg_id('group1', dvs) + + # Bring group's link back up + self.flap_intf(0, 'up', dvs) + + # Check that the group still has the same ID and everything works + assert nhgid == self.get_nhg_id('group1', dvs) + + # Bring an interface down + self.flap_intf(1, 'down', dvs) + + # Create group2 pointing to the NH which's link is down + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.3'), ("ifname", "Ethernet4")]) + nhg_ps.set("group2", fvs) + + # The group should be created. To test this, add a route pointing to + # it. If the group exists, the route will be created as well. + fvs = swsscommon.FieldValuePairs([("nexthop_group", "group2")]) + rt_ps.set('2.2.4.0/24', fvs) + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 2) + + # Delete the route and the group + rt_ps._del('2.2.4.0/24') + nhg_ps._del('group2') + + # Bring the interface back up + self.flap_intf(1, 'up', dvs) + + # Create group2 pointing to a NH that does not exist + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.61'), ("ifname", "Ethernet120")]) + nhg_ps.set("group2", fvs) + + # The group should fail to be created. To test this, we add a route + # pointing to it. The route should not be created. + fvs = swsscommon.FieldValuePairs([("nexthop_group", "group2")]) + rt_ps.set('2.2.4.0/24', fvs) + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 1) + + # Configure the NH's interface + self.config_intf(30, dvs) + + # A couple of more routes will be added to ASIC_DB + asic_routes_count += 2 + + # The group should be created, so the route should be added + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 2) + + # Delete the route and the group + rt_ps._del('2.2.4.0/24') + nhg_ps._del('group2') + + # Update the group to a multiple NH group - should fail as the group + # is referenced + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ("ifname", "Ethernet0,Ethernet4")]) + nhg_ps.set("group1", fvs) + time.sleep(1) + assert len(asic_db.get_keys(self.ASIC_NHG_STR)) == asic_nhgs_count + + # Update group1 to point to another NH - should fail as the group is + # referenced + nhgid = self.get_nhg_id('group1', dvs) + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.3'), ("ifname", "Ethernet4")]) + nhg_ps.set("group1", fvs) + assert nhgid == self.get_nhg_id('group1', dvs) + + # Remove route 2.2.2.0/24 + rt_ps._del("2.2.2.0/24") + + # Wait for route 2.2.2.0/24 to be removed + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count) + + # The group is not referenced anymore, so it should be updated + assert nhgid != self.get_nhg_id('group1', dvs) + + # Update the group to a multiple NH group - should work as the group is + # not referenced + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ("ifname", "Ethernet0,Ethernet4")]) + nhg_ps.set("group1", fvs) + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + + # Remove group1 + nhg_ps._del("group1") + + def test_nhgorch_label_route(self, dvs, testlog): + for i in range(4): + self.config_intf(i, dvs) + + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + + # create next hop group in APPL DB + + nhg_ps = swsscommon.ProducerStateTable(app_db.db_connection, "NEXT_HOP_GROUP_TABLE") + + prev_nhg_keys = asic_db.get_keys(self.ASIC_NHG_STR) + asic_nhgs_count = len(prev_nhg_keys) + asic_nhgms_count = len(asic_db.get_keys(self.ASIC_NHGM_STR)) + + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3,10.0.0.5'), + ("ifname", "Ethernet0,Ethernet4,Ethernet8")]) + nhg_ps.set("group1", fvs) + + # check if group was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_NHG_STR, asic_nhgs_count + 1) + keys = asic_db.get_keys(self.ASIC_NHG_STR) + + found_nhg = False + + for nhgid in keys: + if nhgid not in prev_nhg_keys: + found_nhg = True + break + + assert found_nhg == True + + # check if members were propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, asic_nhgms_count + 3) + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + count = 0 + + for k in keys: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + + if fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid: + count += 1 + + assert count == 3 + + # create prefix route in APPL DB + + rt_ps = swsscommon.ProducerStateTable(app_db.db_connection, "ROUTE_TABLE") + + asic_routes_count = len(asic_db.get_keys(self.ASIC_RT_STR)) + + fvs = swsscommon.FieldValuePairs([("nexthop_group", "group1")]) + rt_ps.set("2.2.2.0/24", fvs) + + # check if route was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count + 1) + keys = asic_db.get_keys(self.ASIC_RT_STR) + + k = self.get_route_id('2.2.2.0/24', dvs) + + # assert the route points to next hop group + fvs = asic_db.get_entry(self.ASIC_RT_STR, k) + assert fvs["SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID"] == nhgid + + # create label route in APPL DB pointing to the same next hop group + + lrt_ps = swsscommon.ProducerStateTable(app_db.db_connection, "LABEL_ROUTE_TABLE") + + asic_insegs_count = len(asic_db.get_keys(self.ASIC_INSEG_STR)) + + fvs = swsscommon.FieldValuePairs([("nexthop_group", "group1")]) + lrt_ps.set("20", fvs) + + # check if label route was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_INSEG_STR, asic_insegs_count + 1) + keys = asic_db.get_keys(self.ASIC_INSEG_STR) + + k = self.get_inseg_id('20', dvs) + assert k is not None + + # assert the route points to next hop group + fvs = asic_db.get_entry(self.ASIC_INSEG_STR, k) + assert fvs["SAI_INSEG_ENTRY_ATTR_NEXT_HOP_ID"] == nhgid + + # bring links down one-by-one + for i in [0, 1, 2]: + self.flap_intf(i, 'down', dvs) + + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + + assert len(keys) == (asic_nhgms_count + 2 - i) + assert bool(asic_db.get_entry(self.ASIC_NHG_STR, nhgid)) + + # bring links up one-by-one + for i in [0, 1, 2]: + self.flap_intf(i, 'up', dvs) + + keys = asic_db.get_keys(self.ASIC_NHGM_STR) + + assert len(keys) == (asic_nhgms_count + i + 1) + + count = 0 + for k in keys: + fvs = asic_db.get_entry(self.ASIC_NHGM_STR, k) + if fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhgid: + count += 1 + assert count == i + 1 + + # Bring an interface down + self.flap_intf(1, 'down', dvs) + + # One group member will get deleted + asic_db.wait_for_n_keys(self.ASIC_NHGM_STR, 2) + + # Remove route 2.2.2.0/24 + rt_ps._del("2.2.2.0/24") + + # Wait for route 2.2.2.0/24 to be removed + asic_db.wait_for_n_keys(self.ASIC_RT_STR, asic_routes_count) + + # Remove label route 20 + lrt_ps._del("20") + + # Wait for route 20 to be removed + asic_db.wait_for_n_keys(self.ASIC_INSEG_STR, asic_insegs_count) + + # Remove group1 + nhg_ps._del("group1") + + # create label route in APPL DBwith no properties (so just popping the label) + + lrt_ps = swsscommon.ProducerStateTable(app_db.db_connection, "LABEL_ROUTE_TABLE") + + asic_insegs_count = len(asic_db.get_keys(self.ASIC_INSEG_STR)) + + fvs = swsscommon.FieldValuePairs([("ifname","")]) + lrt_ps.set("30", fvs) + + # check if label route was propagated to ASIC DB + + asic_db.wait_for_n_keys(self.ASIC_INSEG_STR, asic_insegs_count + 1) + keys = asic_db.get_keys(self.ASIC_INSEG_STR) + + k = self.get_inseg_id('30', dvs) + assert k is not None + + # assert the route points to no next hop group + fvs = asic_db.get_entry(self.ASIC_INSEG_STR, k) + assert fvs["SAI_INSEG_ENTRY_ATTR_NEXT_HOP_ID"] == 'oid:0x0' + + # Remove label route 30 + lrt_ps._del("30") + + # Wait for route 20 to be removed + asic_db.wait_for_n_keys(self.ASIC_INSEG_STR, asic_insegs_count) # Add Dummy always-pass test at end as workaroud # for issue when Flaky fail on final test it invokes module tear-down before retrying