Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[muxorch] Using bulker to program routes/neighbors during switchover #3148

Merged
merged 17 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 187 additions & 19 deletions orchagent/muxorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -744,20 +744,29 @@ void MuxNbrHandler::update(NextHopKey nh, sai_object_id_t tunnelId, bool add, Mu
bool MuxNbrHandler::enable(bool update_rt)
{
NeighborEntry neigh;
std::list<NeighborBulkContext> bulk_neigh_ctx_list;
std::list<MuxRouteBulkContext> bulk_route_ctx_list;

auto it = neighbors_.begin();
while (it != neighbors_.end())
{
SWSS_LOG_INFO("Enabling neigh %s on %s", it->first.to_string().c_str(), alias_.c_str());

neigh = NeighborEntry(it->first, alias_);
if (!gNeighOrch->enableNeighbor(neigh))
{
SWSS_LOG_INFO("Enabling neigh failed for %s", neigh.ip_address.to_string().c_str());
return false;
}
bulk_neigh_ctx_list.push_back(NeighborBulkContext(neigh, true));
it++;
}

if (!gNeighOrch->createBulkNeighborEntries(bulk_neigh_ctx_list) || !gNeighOrch->flushBulkNeighborEntries(bulk_neigh_ctx_list))
{
return false;
}

it = neighbors_.begin();
while (it != neighbors_.end())
{
/* Update NH to point to learned neighbor */
neigh = NeighborEntry(it->first, alias_);
it->second = gNeighOrch->getLocalNextHopId(neigh);

/* Reprogram route */
Expand Down Expand Up @@ -795,22 +804,48 @@ bool MuxNbrHandler::enable(bool update_rt)
IpPrefix pfx = it->first.to_string();
if (update_rt)
{
if (remove_route(pfx) != SAI_STATUS_SUCCESS)
{
return false;
}
updateTunnelRoute(nh_key, false);
bulk_route_ctx_list.push_back(MuxRouteBulkContext(pfx, false));
}

it++;
}

if (update_rt)
{
if (!createBulkRouteEntries(bulk_route_ctx_list))
{
gRouteBulker.clear();
return false;
}
gRouteBulker.flush();
if (!processBulkRouteEntries(bulk_route_ctx_list))
{
gRouteBulker.clear();
return false;
}

it = neighbors_.begin();
while (it != neighbors_.end())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We've introduced three loops now in the new code. Can we optimize it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I took the loop out from the comment below, I believe the two left over are necessary, we need the neighbors enabled before we update routes to point to their next hops

{
NextHopKey nh_key = NextHopKey(it->first, alias_);
if (update_rt)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is update_rt checked here? Its already done above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch, will fix that

{
updateTunnelRoute(nh_key, false);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we move this to original section and do this loop only if Bulk api fails. So we don't have to do it everytime

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see what you're saying, I think I just wanted to make sure the order was maintained. I'll fix that

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's safe to take out this loop entirely then, if bulk api fails we will fall back, which will remove the tunnel routes anyways

}

it++;
}
}

gRouteBulker.clear();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we clear this even if its returned false above

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Took a look at this, it's not needed since we call clear from removeRoutes

return true;
}

bool MuxNbrHandler::disable(sai_object_id_t tnh)
{
NeighborEntry neigh;
std::list<NeighborBulkContext> bulk_neigh_ctx_list;
std::list<MuxRouteBulkContext> bulk_route_ctx_list;

auto it = neighbors_.begin();
while (it != neighbors_.end())
Expand Down Expand Up @@ -852,21 +887,32 @@ bool MuxNbrHandler::disable(sai_object_id_t tnh)
updateTunnelRoute(nh_key, true);

IpPrefix pfx = it->first.to_string();
if (create_route(pfx, it->second) != SAI_STATUS_SUCCESS)
{
return false;
}
bulk_route_ctx_list.push_back(MuxRouteBulkContext(pfx, it->second, true));

neigh = NeighborEntry(it->first, alias_);
if (!gNeighOrch->disableNeighbor(neigh))
{
SWSS_LOG_INFO("Disabling neigh failed for %s", neigh.ip_address.to_string().c_str());
return false;
}
bulk_neigh_ctx_list.push_back(NeighborBulkContext(neigh, false));

it++;
}

if (!createBulkRouteEntries(bulk_route_ctx_list))
{
gRouteBulker.clear();
return false;
}
gRouteBulker.flush();
if (!processBulkRouteEntries(bulk_route_ctx_list))
{
gRouteBulker.clear();
return false;
}

if (!gNeighOrch->createBulkNeighborEntries(bulk_neigh_ctx_list) || !gNeighOrch->flushBulkNeighborEntries(bulk_neigh_ctx_list))
{
return false;
}

gRouteBulker.clear();
return true;
}

Expand All @@ -881,6 +927,128 @@ sai_object_id_t MuxNbrHandler::getNextHopId(const NextHopKey nhKey)
return SAI_NULL_OBJECT_ID;
}

bool MuxNbrHandler::createBulkRouteEntries(std::list<MuxRouteBulkContext>& bulk_ctx_list)
{
int count = 0;

SWSS_LOG_INFO("Creating %d bulk route entries", (int)bulk_ctx_list.size());

for (auto ctx = bulk_ctx_list.begin(); ctx != bulk_ctx_list.end(); ctx++)
{
auto& object_statuses = ctx->object_statuses;
sai_route_entry_t route_entry;
route_entry.switch_id = gSwitchId;
route_entry.vr_id = gVirtualRouterId;
copy(route_entry.destination, ctx->pfx);
subnet(route_entry.destination, route_entry.destination);

SWSS_LOG_INFO("Creating route entry %s, nh %" PRIx64 ", add:%d", ctx->pfx.getIp().to_string().c_str(), ctx->nh, ctx->add);

object_statuses.emplace_back();
if (ctx->add)
{
sai_attribute_t attr;
vector<sai_attribute_t> attrs;

attr.id = SAI_ROUTE_ENTRY_ATTR_PACKET_ACTION;
attr.value.s32 = SAI_PACKET_ACTION_FORWARD;
attrs.push_back(attr);

attr.id = SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID;
attr.value.oid = ctx->nh;
attrs.push_back(attr);

sai_status_t status = gRouteBulker.create_entry(&object_statuses.back(), &route_entry, (uint32_t)attrs.size(), attrs.data());
if (status == SAI_STATUS_ITEM_ALREADY_EXISTS)
{
SWSS_LOG_ERROR("Failed to create add entry for tunnel route in bulker object, entry already exists %s,nh %" PRIx64,
ctx->pfx.getIp().to_string().c_str(), ctx->nh);
continue;
}
}
else
{
sai_status_t status = gRouteBulker.remove_entry(&object_statuses.back(), &route_entry);
if (status == SAI_STATUS_ITEM_ALREADY_EXISTS)
{
SWSS_LOG_ERROR("Failed to create remove entry for tunnel route in bulker object, entry already exists %s,nh %" PRIx64,
ctx->pfx.getIp().to_string().c_str(), ctx->nh);
continue;
}
}
count++;
}

SWSS_LOG_INFO("Successfully created %d bulk route entries", count);
return true;
}

bool MuxNbrHandler::processBulkRouteEntries(std::list<MuxRouteBulkContext>& bulk_ctx_list)
{
for (auto ctx = bulk_ctx_list.begin(); ctx != bulk_ctx_list.end(); ctx++)
{
auto& object_statuses = ctx->object_statuses;
auto it_status = object_statuses.begin();
sai_status_t status = *it_status++;

sai_route_entry_t route_entry;
route_entry.switch_id = gSwitchId;
route_entry.vr_id = gVirtualRouterId;
copy(route_entry.destination, ctx->pfx);
subnet(route_entry.destination, route_entry.destination);

if (ctx->add)
{
if (status != SAI_STATUS_SUCCESS)
{
if (status == SAI_STATUS_ITEM_ALREADY_EXISTS) {
SWSS_LOG_NOTICE("Tunnel route to %s already exists", ctx->pfx.to_string().c_str());
continue;
}
SWSS_LOG_ERROR("Failed to create tunnel route %s,nh %" PRIx64 " rv:%d",
ctx->pfx.getIp().to_string().c_str(), ctx->nh, status);
return false;
}

if (route_entry.destination.addr_family == SAI_IP_ADDR_FAMILY_IPV4)
{
gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_IPV4_ROUTE);
}
else
{
gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_IPV6_ROUTE);
}

SWSS_LOG_NOTICE("Created tunnel route to %s ", ctx->pfx.to_string().c_str());
}
else
{
if (status != SAI_STATUS_SUCCESS)
{
if (status == SAI_STATUS_ITEM_NOT_FOUND) {
SWSS_LOG_NOTICE("Tunnel route to %s already removed", ctx->pfx.to_string().c_str());
continue;
}
SWSS_LOG_ERROR("Failed to remove tunnel route %s, rv:%d",
ctx->pfx.getIp().to_string().c_str(), status);
return false;
}

if (route_entry.destination.addr_family == SAI_IP_ADDR_FAMILY_IPV4)
{
gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_IPV4_ROUTE);
}
else
{
gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_IPV6_ROUTE);
}

SWSS_LOG_NOTICE("Removed tunnel route to %s ", ctx->pfx.to_string().c_str());
}
}
return true;
}

void MuxNbrHandler::updateTunnelRoute(NextHopKey nh, bool add)
{
MuxOrch* mux_orch = gDirectory.get<MuxOrch*>();
Expand Down
28 changes: 27 additions & 1 deletion orchagent/muxorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "tunneldecaporch.h"
#include "aclorch.h"
#include "neighorch.h"
#include "bulker.h"

enum MuxState
{
Expand All @@ -35,6 +36,27 @@ enum MuxCableType
ACTIVE_ACTIVE
};

struct MuxRouteBulkContext
{
std::deque<sai_status_t> object_statuses; // Bulk statuses
IpPrefix pfx; // Route prefix
sai_object_id_t nh; // nexthop id
bool add; // add route bool

MuxRouteBulkContext(IpPrefix pfx, bool add)
: pfx(pfx), add(add)
{
}

MuxRouteBulkContext(IpPrefix pfx, sai_object_id_t nh, bool add)
: pfx(pfx), nh(nh), add(add)
{
}
};

extern size_t gMaxBulkSize;
extern sai_route_api_t* sai_route_api;

// Forward Declarations
class MuxOrch;
class MuxCableOrch;
Expand Down Expand Up @@ -64,7 +86,7 @@ typedef std::map<IpAddress, sai_object_id_t> MuxNeighbor;
class MuxNbrHandler
{
public:
MuxNbrHandler() = default;
MuxNbrHandler() : gRouteBulker(sai_route_api, gMaxBulkSize) {};

bool enable(bool update_rt);
bool disable(sai_object_id_t);
Expand All @@ -75,11 +97,15 @@ class MuxNbrHandler
string getAlias() const { return alias_; };

private:
bool createBulkRouteEntries(std::list<MuxRouteBulkContext>& bulk_ctx_list);
bool processBulkRouteEntries(std::list<MuxRouteBulkContext>& bulk_ctx_list);

inline void updateTunnelRoute(NextHopKey, bool = true);

private:
MuxNeighbor neighbors_;
string alias_;
EntityBulker<sai_route_api_t> gRouteBulker;
};

// Mux Cable object
Expand Down
Loading
Loading