Skip to content

Commit

Permalink
coordinator: Fix error to add route when the nic is down
Browse files Browse the repository at this point in the history
  • Loading branch information
cyclinder committed Aug 12, 2024
1 parent fc04adf commit 1de14e1
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 19 deletions.
14 changes: 10 additions & 4 deletions cmd/coordinator/cmd/command_add.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ func CmdAdd(args *skel.CmdArgs) (err error) {
}
logger.Sugar().Debugf("Get current host netns: %v", c.hostNs.Path())

// checking if the nic is in up state
if err = c.checkNICState(args.IfName); err != nil {
logger.Error("error to check pod's nic state", zap.Error(err))
return fmt.Errorf("error to check pod's nic %s state: %v", args.Args, err)
}

// check if it's first time invoke
err = c.coordinatorModeAndFirstInvoke(logger, conf.PodDefaultCniNic)
if err != nil {
Expand Down Expand Up @@ -299,10 +305,10 @@ func CmdAdd(args *skel.CmdArgs) (err error) {
return err
}

c.currentRuleTable = c.mustGetRuleNumber(c.podNics)
if c.currentRuleTable < 0 {
logger.Error("coordinator must be working with spiderpool: no spiderendpoint records found", zap.Strings("spiderNics", c.podNics))
return fmt.Errorf("coordinator must be working with spiderpool: no spiderendpoint records found")
c.currentRuleTable, err = c.mustGetRuleNumber()
if err != nil {
logger.Error("error to get rule number for the nic", zap.Error(err))
return fmt.Errorf("error to get rule number for the nic %s: %w", args.IfName, err)
}
logger.Debug("Get currentRuleTable", zap.Int("ruleTable", c.currentRuleTable))

Expand Down
44 changes: 32 additions & 12 deletions cmd/coordinator/cmd/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,21 +116,41 @@ func (c *coordinator) coordinatorModeAndFirstInvoke(logger *zap.Logger, podFirst
return fmt.Errorf("unknown tuneMode: %s", c.tuneMode)
}

func (c *coordinator) checkNICState(iface string) error {
return c.netns.Do(func(netNS ns.NetNS) error {
link, err := netlink.LinkByName(iface)
if err != nil {
return err
}

if link.Attrs().Flags != net.FlagUp {
return netlink.LinkSetUp(link)
}
return nil
})
}

// getRuleNumber return the number of rule table corresponding to the previous interface from the given interface.
// for example:
// input: net1, output: 100(eth0)
// input: net2, output: 101(net1)
func (c *coordinator) mustGetRuleNumber(spiderNics []string) int {
if len(spiderNics) == 0 {
return -1
}
func (c *coordinator) mustGetRuleNumber() (int, error) {
ruleTable := -1
err := c.netns.Do(func(netNS ns.NetNS) error {
links, err := netlink.LinkList()
if err != nil {
return fmt.Errorf("error to get link list: %w", err)
}

if c.currentInterface == defaultOverlayVethName {
return unix.RT_TABLE_MAIN
} else if spiderNics[0] == c.currentInterface {
return defaultPodRuleTable
// start from 100, and exclude lookback and eth0
// example:
// lo, eth0, net1 -> 100
// lo, eth0, net1, net2 -> 101
ruleTable = 99 + len(links) - 2
return nil
})

if err != nil {
return -1, err
}
return defaultPodRuleTable + len(spiderNics) - 1
return ruleTable, nil
}

// setupVeth sets up a pair of virtual ethernet devices. move one to the host and other
Expand Down
8 changes: 5 additions & 3 deletions pkg/networking/networking/route.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ func moveRouteTable(linkIndex, srcRuleTable, dstRuleTable int, onlyCopyOverlayDe
return fmt.Errorf("failed to RouteDel %s in main table: %+v", route.String(), err)
}
logger.Debug("Del the default route from main successfully", zap.String("Route", route.String()))
return nil
}

if onlyCopyOverlayDefaultRoute {
Expand All @@ -257,10 +258,11 @@ func moveRouteTable(linkIndex, srcRuleTable, dstRuleTable int, onlyCopyOverlayDe
}

// we need copy the all routes in main table of the podDefaultRouteNic to dstRuleTable.
// Otherwise, the reply packet don't know
// Otherwise, we don't know how to forward the packet send from the nic
route.Table = dstRuleTable
if err = netlink.RouteAdd(&route); err != nil && !os.IsExist(err) {
logger.Error("failed to RouteAdd in new table ", zap.String("route", route.String()), zap.Error(err))
return fmt.Errorf("failed to RouteAdd (%+v) to new table: %+v", route, err)
logger.Error("failed to add the route table", zap.String("route", route.String()), zap.Error(err))
// return fmt.Errorf("failed to add the route table (%+v): %+v", route, err)
}
logger.Debug("MoveRoute to new table successfully", zap.String("Route", route.String()))
return nil
Expand Down

0 comments on commit 1de14e1

Please sign in to comment.