Skip to content
This repository has been archived by the owner on Dec 20, 2023. It is now read-only.

Commit

Permalink
net: sched: make bstats per cpu and estimator RCU safe
Browse files Browse the repository at this point in the history
In order to run qdisc's without locking statistics and estimators
need to be handled correctly.

To resolve bstats make the statistics per cpu. And because this is
only needed for qdiscs that are running without locks which is not
the case for most qdiscs in the near future only create percpu
stats when qdiscs set the TCQ_F_CPUSTATS flag.

Next because estimators use the bstats to calculate packets per
second and bytes per second the estimator code paths are updated
to use the per cpu statistics.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
jrfastab authored and davem330 committed Sep 30, 2014
1 parent 79cf79a commit 22e0f8b
Show file tree
Hide file tree
Showing 19 changed files with 164 additions and 51 deletions.
11 changes: 11 additions & 0 deletions include/net/gen_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
#include <linux/rtnetlink.h>
#include <linux/pkt_sched.h>

struct gnet_stats_basic_cpu {
struct gnet_stats_basic_packed bstats;
struct u64_stats_sync syncp;
};

struct gnet_dump {
spinlock_t * lock;
struct sk_buff * skb;
Expand All @@ -27,7 +32,11 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
spinlock_t *lock, struct gnet_dump *d);

int gnet_stats_copy_basic(struct gnet_dump *d,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b);
void __gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b);
int gnet_stats_copy_rate_est(struct gnet_dump *d,
const struct gnet_stats_basic_packed *b,
struct gnet_stats_rate_est64 *r);
Expand All @@ -37,11 +46,13 @@ int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
int gnet_stats_finish_copy(struct gnet_dump *d);

int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock, struct nlattr *opt);
void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_rate_est64 *rate_est);
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock, struct nlattr *opt);
bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
Expand Down
22 changes: 21 additions & 1 deletion include/net/sch_generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <linux/rcupdate.h>
#include <linux/pkt_sched.h>
#include <linux/pkt_cls.h>
#include <linux/percpu.h>
#include <net/gen_stats.h>
#include <net/rtnetlink.h>

Expand Down Expand Up @@ -58,6 +59,7 @@ struct Qdisc {
* multiqueue device.
*/
#define TCQ_F_WARN_NONWC (1 << 16)
#define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */
u32 limit;
const struct Qdisc_ops *ops;
struct qdisc_size_table __rcu *stab;
Expand All @@ -83,7 +85,10 @@ struct Qdisc {
*/
unsigned long state;
struct sk_buff_head q;
struct gnet_stats_basic_packed bstats;
union {
struct gnet_stats_basic_packed bstats;
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
} __packed;
unsigned int __state;
struct gnet_stats_queue qstats;
struct rcu_head rcu_head;
Expand Down Expand Up @@ -487,6 +492,10 @@ static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
}

static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
{
return q->flags & TCQ_F_CPUSTATS;
}

static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
const struct sk_buff *skb)
Expand All @@ -495,6 +504,17 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
}

static inline void qdisc_bstats_update_cpu(struct Qdisc *sch,
const struct sk_buff *skb)
{
struct gnet_stats_basic_cpu *bstats =
this_cpu_ptr(sch->cpu_bstats);

u64_stats_update_begin(&bstats->syncp);
bstats_update(&bstats->bstats, skb);
u64_stats_update_end(&bstats->syncp);
}

static inline void qdisc_bstats_update(struct Qdisc *sch,
const struct sk_buff *skb)
{
Expand Down
29 changes: 18 additions & 11 deletions net/core/gen_estimator.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ struct gen_estimator
u32 avpps;
struct rcu_head e_rcu;
struct rb_node node;
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
struct rcu_head head;
};

struct gen_estimator_head
Expand All @@ -115,25 +117,24 @@ static void est_timer(unsigned long arg)

rcu_read_lock();
list_for_each_entry_rcu(e, &elist[idx].list, list) {
u64 nbytes;
struct gnet_stats_basic_packed b = {0};
u64 brate;
u32 npackets;
u32 rate;

spin_lock(e->stats_lock);
read_lock(&est_lock);
if (e->bstats == NULL)
goto skip;

nbytes = e->bstats->bytes;
npackets = e->bstats->packets;
brate = (nbytes - e->last_bytes)<<(7 - idx);
e->last_bytes = nbytes;
__gnet_stats_copy_basic(&b, e->cpu_bstats, e->bstats);

brate = (b.bytes - e->last_bytes)<<(7 - idx);
e->last_bytes = b.bytes;
e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
e->rate_est->bps = (e->avbps+0xF)>>5;

rate = (npackets - e->last_packets)<<(12 - idx);
e->last_packets = npackets;
rate = (b.packets - e->last_packets)<<(12 - idx);
e->last_packets = b.packets;
e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
e->rate_est->pps = (e->avpps+0x1FF)>>10;
skip:
Expand Down Expand Up @@ -203,12 +204,14 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
*
*/
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock,
struct nlattr *opt)
{
struct gen_estimator *est;
struct gnet_estimator *parm = nla_data(opt);
struct gnet_stats_basic_packed b = {0};
int idx;

if (nla_len(opt) < sizeof(*parm))
Expand All @@ -221,15 +224,18 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
if (est == NULL)
return -ENOBUFS;

__gnet_stats_copy_basic(&b, cpu_bstats, bstats);

idx = parm->interval + 2;
est->bstats = bstats;
est->rate_est = rate_est;
est->stats_lock = stats_lock;
est->ewma_log = parm->ewma_log;
est->last_bytes = bstats->bytes;
est->last_bytes = b.bytes;
est->avbps = rate_est->bps<<5;
est->last_packets = bstats->packets;
est->last_packets = b.packets;
est->avpps = rate_est->pps<<10;
est->cpu_bstats = cpu_bstats;

spin_lock_bh(&est_tree_lock);
if (!elist[idx].timer.function) {
Expand Down Expand Up @@ -290,11 +296,12 @@ EXPORT_SYMBOL(gen_kill_estimator);
* Returns 0 on success or a negative error code.
*/
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock, struct nlattr *opt)
{
gen_kill_estimator(bstats, rate_est);
return gen_new_estimator(bstats, rate_est, stats_lock, opt);
return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, opt);
}
EXPORT_SYMBOL(gen_replace_estimator);

Expand Down
53 changes: 48 additions & 5 deletions net/core/gen_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,43 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
}
EXPORT_SYMBOL(gnet_stats_start_copy);

static void
__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu)
{
int i;

for_each_possible_cpu(i) {
struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
unsigned int start;
__u64 bytes;
__u32 packets;

do {
start = u64_stats_fetch_begin_irq(&bcpu->syncp);
bytes = bcpu->bstats.bytes;
packets = bcpu->bstats.packets;
} while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));

bstats->bytes += bcpu->bstats.bytes;
bstats->packets += bcpu->bstats.packets;
}
}

void
__gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)
{
if (cpu) {
__gnet_stats_copy_basic_cpu(bstats, cpu);
} else {
bstats->bytes = b->bytes;
bstats->packets = b->packets;
}
}
EXPORT_SYMBOL(__gnet_stats_copy_basic);

/**
* gnet_stats_copy_basic - copy basic statistics into statistic TLV
* @d: dumping handle
Expand All @@ -109,19 +146,25 @@ EXPORT_SYMBOL(gnet_stats_start_copy);
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
gnet_stats_copy_basic(struct gnet_dump *d,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)
{
struct gnet_stats_basic_packed bstats = {0};

__gnet_stats_copy_basic(&bstats, cpu, b);

if (d->compat_tc_stats) {
d->tc_stats.bytes = b->bytes;
d->tc_stats.packets = b->packets;
d->tc_stats.bytes = bstats.bytes;
d->tc_stats.packets = bstats.packets;
}

if (d->tail) {
struct gnet_stats_basic sb;

memset(&sb, 0, sizeof(sb));
sb.bytes = b->bytes;
sb.packets = b->packets;
sb.bytes = bstats.bytes;
sb.packets = bstats.packets;
return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb));
}
return 0;
Expand Down
2 changes: 1 addition & 1 deletion net/netfilter/xt_RATEEST.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
cfg.est.interval = info->interval;
cfg.est.ewma_log = info->ewma_log;

ret = gen_new_estimator(&est->bstats, &est->rstats,
ret = gen_new_estimator(&est->bstats, NULL, &est->rstats,
&est->lock, &cfg.opt);
if (ret < 0)
goto err2;
Expand Down
5 changes: 3 additions & 2 deletions net/sched/act_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,8 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
p->tcfc_tm.install = jiffies;
p->tcfc_tm.lastuse = jiffies;
if (est) {
int err = gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
int err = gen_new_estimator(&p->tcfc_bstats, NULL,
&p->tcfc_rate_est,
&p->tcfc_lock, est);
if (err) {
kfree(p);
Expand Down Expand Up @@ -619,7 +620,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
if (err < 0)
goto errout;

if (gnet_stats_copy_basic(&d, &p->tcfc_bstats) < 0 ||
if (gnet_stats_copy_basic(&d, NULL, &p->tcfc_bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfc_bstats,
&p->tcfc_rate_est) < 0 ||
gnet_stats_copy_queue(&d, &p->tcfc_qstats) < 0)
Expand Down
2 changes: 1 addition & 1 deletion net/sched/act_police.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla,

spin_lock_bh(&police->tcf_lock);
if (est) {
err = gen_replace_estimator(&police->tcf_bstats,
err = gen_replace_estimator(&police->tcf_bstats, NULL,
&police->tcf_rate_est,
&police->tcf_lock, est);
if (err)
Expand Down
29 changes: 23 additions & 6 deletions net/sched/sch_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,13 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
sch->handle = handle;

if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
if (qdisc_is_percpu_stats(sch)) {
sch->cpu_bstats =
alloc_percpu(struct gnet_stats_basic_cpu);
if (!sch->cpu_bstats)
goto err_out4;
}

if (tca[TCA_STAB]) {
stab = qdisc_get_stab(tca[TCA_STAB]);
if (IS_ERR(stab)) {
Expand All @@ -964,8 +971,11 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
else
root_lock = qdisc_lock(sch);

err = gen_new_estimator(&sch->bstats, &sch->rate_est,
root_lock, tca[TCA_RATE]);
err = gen_new_estimator(&sch->bstats,
sch->cpu_bstats,
&sch->rate_est,
root_lock,
tca[TCA_RATE]);
if (err)
goto err_out4;
}
Expand All @@ -984,6 +994,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
return NULL;

err_out4:
free_percpu(sch->cpu_bstats);
/*
* Any broken qdiscs that would require a ops->reset() here?
* The qdisc was never in action so it shouldn't be necessary.
Expand Down Expand Up @@ -1022,9 +1033,11 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
because change can't be undone. */
if (sch->flags & TCQ_F_MQROOT)
goto out;
gen_replace_estimator(&sch->bstats, &sch->rate_est,
qdisc_root_sleeping_lock(sch),
tca[TCA_RATE]);
gen_replace_estimator(&sch->bstats,
sch->cpu_bstats,
&sch->rate_est,
qdisc_root_sleeping_lock(sch),
tca[TCA_RATE]);
}
out:
return 0;
Expand Down Expand Up @@ -1299,6 +1312,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
u32 portid, u32 seq, u16 flags, int event)
{
struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
struct tcmsg *tcm;
struct nlmsghdr *nlh;
unsigned char *b = skb_tail_pointer(skb);
Expand Down Expand Up @@ -1334,7 +1348,10 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
goto nla_put_failure;

if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
if (qdisc_is_percpu_stats(q))
cpu_bstats = q->cpu_bstats;

if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
gnet_stats_copy_queue(&d, &q->qstats) < 0)
goto nla_put_failure;
Expand Down
2 changes: 1 addition & 1 deletion net/sched/sch_atm.c
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,

flow->qstats.qlen = flow->q->q.qlen;

if (gnet_stats_copy_basic(d, &flow->bstats) < 0 ||
if (gnet_stats_copy_basic(d, NULL, &flow->bstats) < 0 ||
gnet_stats_copy_queue(d, &flow->qstats) < 0)
return -1;

Expand Down
Loading

0 comments on commit 22e0f8b

Please sign in to comment.