Skip to content
This repository has been archived by the owner on Dec 20, 2023. It is now read-only.

Commit

Permalink
fib_select_multipath() to choose nexthop via naive but lockless round…
Browse files Browse the repository at this point in the history
… robin
  • Loading branch information
Nathan Taylor committed Dec 19, 2014
1 parent 0f02bdb commit 9a76db0
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 61 deletions.
6 changes: 1 addition & 5 deletions include/net/ip_fib.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,10 @@ struct fib_nh {
struct net_device *nh_dev;
struct hlist_node nh_hash;
struct fib_info *nh_parent;
unsigned int nh_flags;
unsigned long nh_flags;
unsigned char nh_scope;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int nh_weight;
int nh_power;
#endif
#ifdef CONFIG_IP_ROUTE_CLASSID
__u32 nh_tclassid;
Expand Down Expand Up @@ -111,9 +110,6 @@ struct fib_info {
#define fib_rtt fib_metrics[RTAX_RTT-1]
#define fib_advmss fib_metrics[RTAX_ADVMSS-1]
int fib_nhs;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_power;
#endif
struct rcu_head rcu;
struct fib_nh fib_nh[0];
#define fib_dev fib_nh[0].nh_dev
Expand Down
11 changes: 8 additions & 3 deletions include/uapi/linux/rtnetlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -323,9 +323,14 @@ struct rtnexthop {

/* rtnh_flags */

#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
#define RTNH_F_DEAD_OFFSET 0
#define RTNH_F_DEAD (1 << RTNH_F_DEAD_OFFSET) /* Nexthop is dead (used by multipath) */

#define RTNH_F_PERVASIVE_OFFSET 1
#define RTNH_F_PERVASIVE (1 << RTNH_F_PERVASIVE_OFFSET) /* Do recursive gateway lookup */

#define RTNH_F_ONLINK_OFFSET 2
#define RTNH_F_ONLINK (1 << RTNH_F_ONLINK_OFFSET) /* Gateway is forced on link */

/* Macros to handle hexthops */

Expand Down
76 changes: 23 additions & 53 deletions net/ipv4/fib_semantics.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/percpu.h>

#include <net/arp.h>
#include <net/ip.h>
Expand All @@ -57,7 +58,7 @@ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];

#ifdef CONFIG_IP_ROUTE_MULTIPATH

static DEFINE_SPINLOCK(fib_multipath_lock);
DEFINE_PER_CPU(int, fib_multipath_counter) = -1;

#define for_nexthops(fi) { \
int nhsel; const struct fib_nh *nh; \
Expand Down Expand Up @@ -258,9 +259,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
if (nh->nh_oif != onh->nh_oif ||
nh->nh_gw != onh->nh_gw ||
nh->nh_scope != onh->nh_scope ||
#ifdef CONFIG_IP_ROUTE_MULTIPATH
nh->nh_weight != onh->nh_weight ||
#endif
#ifdef CONFIG_IP_ROUTE_CLASSID
nh->nh_tclassid != onh->nh_tclassid ||
#endif
Expand Down Expand Up @@ -1137,12 +1136,6 @@ int fib_sync_down_dev(struct net_device *dev, int force)
else if (nexthop_nh->nh_dev == dev &&
nexthop_nh->nh_scope != scope) {
nexthop_nh->nh_flags |= RTNH_F_DEAD;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
spin_lock_bh(&fib_multipath_lock);
fi->fib_power -= nexthop_nh->nh_power;
nexthop_nh->nh_power = 0;
spin_unlock_bh(&fib_multipath_lock);
#endif
dead++;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
Expand Down Expand Up @@ -1261,10 +1254,7 @@ int fib_sync_up(struct net_device *dev)
!__in_dev_get_rtnl(dev))
continue;
alive++;
spin_lock_bh(&fib_multipath_lock);
nexthop_nh->nh_power = 0;
nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
spin_unlock_bh(&fib_multipath_lock);
clear_bit(RTNH_F_DEAD_OFFSET, &nexthop_nh->nh_flags);

This comment has been minimized.

Copy link
@tauger

tauger Jan 7, 2016

Need memory barrier around clear_bit() per the code comment on clear_bit():

 * clear_bit() is atomic and may not be reordered.  However, it does
 * not contain a memory barrier, so if it is used for locking purposes,
 * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
 * in order to ensure changes are visible on other processors.
} endfor_nexthops(fi)

if (alive > 0) {
Expand All @@ -1277,55 +1267,35 @@ int fib_sync_up(struct net_device *dev)
}

/*
* The algorithm is suboptimal, but it provides really
* fair weighted route distribution.
* The algorithm is suboptimal, and it doesn't provide really
* fair weighted route distribution, but it's lock-free!
*/
void fib_select_multipath(struct fib_result *res)
{
struct fib_info *fi = res->fi;
int w;
struct fib_nh *nexthop_nh;
int nhsel, attempts;

spin_lock_bh(&fib_multipath_lock);
if (fi->fib_power <= 0) {
int power = 0;
change_nexthops(fi) {
if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
power += nexthop_nh->nh_weight;
nexthop_nh->nh_power = nexthop_nh->nh_weight;
}
} endfor_nexthops(fi);
fi->fib_power = power;
if (power <= 0) {
spin_unlock_bh(&fib_multipath_lock);
/* Race condition: route has just become dead. */
res->nh_sel = 0;
return;
}
/* Initialise the counter with the CPU ID if not already set. */
if (this_cpu_read(fib_multipath_counter) == -1) {
this_cpu_write(fib_multipath_counter, get_cpu());

This comment has been minimized.

Copy link
@tauger

tauger Jan 7, 2016

get_cpu() would disable preemption and require a paring put_cpu() to (re)enable preemption. In this case a single smp_processor_id() should suffice.

Also, it would better to move the initialisation of per-cpu counters to a _init() function(e.g ip_fib_init()), to avoid paying initialisation check on a per-flow basis.

}

/* Round-robin multipaths, choosing the next live one or
* an arbitrary dead one if none are live. */
attempts = fi->fib_nhs;
nhsel = 0;
while (--attempts >= 0) {
this_cpu_inc(fib_multipath_counter);
nhsel = this_cpu_read(fib_multipath_counter) % fi->fib_nhs;

/* w should be random number [0..fi->fib_power-1],
* it is pretty bad approximation.
*/

w = jiffies % fi->fib_power;

change_nexthops(fi) {
if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) &&
nexthop_nh->nh_power) {
w -= nexthop_nh->nh_power;
if (w <= 0) {
nexthop_nh->nh_power--;
fi->fib_power--;
res->nh_sel = nhsel;
spin_unlock_bh(&fib_multipath_lock);
return;
}
nexthop_nh = &fi->fib_nh[nhsel];
if (!nexthop_nh->nh_flags & RTNH_F_DEAD) {
break;
}
} endfor_nexthops(fi);
}

/* Race condition: route has just become dead. */
res->nh_sel = 0;
spin_unlock_bh(&fib_multipath_lock);
/* Race condition: route may have just become dead. */
res->nh_sel = nhsel;
}
#endif

0 comments on commit 9a76db0

Please sign in to comment.