From 5577d2a2feef63163d43e53c922da595a0d1fef2 Mon Sep 17 00:00:00 2001 From: Alexander Ostapenko Date: Thu, 27 Sep 2018 13:18:59 +0700 Subject: [PATCH 1/5] Fix #806: Reduce count of IPI during work_queue processing. --- tempesta_fw/cache.c | 17 ++++++++++++++--- tempesta_fw/sock.c | 35 ++++++++++++++++++++++++++--------- tempesta_fw/work_queue.c | 1 + tempesta_fw/work_queue.h | 27 +++++++++++++++++---------- 4 files changed, 58 insertions(+), 22 deletions(-) diff --git a/tempesta_fw/cache.c b/tempesta_fw/cache.c index 5efb103ce7..307dc51acf 100644 --- a/tempesta_fw/cache.c +++ b/tempesta_fw/cache.c @@ -1673,7 +1673,7 @@ static void tfw_cache_ipi(struct irq_work *work) { TfwWorkTasklet *ct = container_of(work, TfwWorkTasklet, ipi_work); - + clear_bit(TFW_QUEUE_B_IPI, &ct->wq.flags); tasklet_schedule(&ct->tasklet); } @@ -1726,7 +1726,6 @@ tfw_cache_process(TfwHttpMsg *msg, tfw_http_cache_cb_t action) TFW_DBG2("Cache: schedule tasklet w/ work: to_cpu=%d from_cpu=%d" " msg=%p key=%lx\n", cpu, smp_processor_id(), cw.msg, key); - if (tfw_wq_push(&ct->wq, &cw, cpu, &ct->ipi_work, tfw_cache_ipi)) { TFW_WARN("Cache work queue overrun: [%s]\n", resp ? "response" : "request"); @@ -1743,10 +1742,22 @@ static void tfw_wq_tasklet(unsigned long data) { TfwWorkTasklet *ct = (TfwWorkTasklet *)data; + TfwRBQueue *wq = &ct->wq; TfwCWork cw; - while (!tfw_wq_pop(&ct->wq, &cw)) + while (!tfw_wq_pop(wq, &cw)) tfw_cache_do_action(cw.msg, cw.action); + + set_bit(TFW_QUEUE_B_IPI, &wq->flags); + + smp_mb__after_atomic(); + + if (!tfw_wq_size(wq)) + return; + + clear_bit(TFW_QUEUE_B_IPI, &wq->flags); + + tasklet_schedule(&ct->tasklet); } /** diff --git a/tempesta_fw/sock.c b/tempesta_fw/sock.c index 8e89c84ee7..284814cae0 100644 --- a/tempesta_fw/sock.c +++ b/tempesta_fw/sock.c @@ -187,6 +187,8 @@ do { \ static void ss_ipi(struct irq_work *work) { + TfwRBQueue *wq = &per_cpu(si_wq, smp_processor_id()); + clear_bit(TFW_QUEUE_B_IPI, &wq->flags); raise_softirq(NET_TX_SOFTIRQ); } @@ -200,6 +202,7 @@ ss_turnstile_push(long ticket, SsWork *sw, int cpu) { struct irq_work *iw = &per_cpu(ipi_work, cpu); SsCloseBacklog *cb = &per_cpu(close_backlog, cpu); + TfwRBQueue *wq = &per_cpu(si_wq, cpu); SsCblNode *cn; cn = kmem_cache_alloc(ss_cbacklog_cache, GFP_ATOMIC); @@ -214,7 +217,8 @@ ss_turnstile_push(long ticket, SsWork *sw, int cpu) cb->turn = ticket; spin_unlock_bh(&cb->lock); - tfw_raise_softirq(cpu, iw, ss_ipi); + if (test_bit(TFW_QUEUE_B_IPI, &wq->flags)) + tfw_raise_softirq(cpu, iw, ss_ipi); return 0; } @@ -254,9 +258,8 @@ ss_wq_push(SsWork *sw, int cpu) } static int -ss_wq_pop(SsWork *sw, long *ticket) +ss_wq_pop(TfwRBQueue *wq, SsWork *sw, long *ticket) { - TfwRBQueue *wq = this_cpu_ptr(&si_wq); SsCloseBacklog *cb = this_cpu_ptr(&close_backlog); /* @@ -1308,9 +1311,10 @@ static void ss_tx_action(void) { SsWork sw; + int budget; struct sk_buff *skb; + TfwRBQueue *wq = this_cpu_ptr(&si_wq); long ticket = 0; - int budget; /* * @budget limits the loop to prevent live lock on constantly arriving @@ -1318,7 +1322,7 @@ ss_tx_action(void) * ariving items. */ budget = max(10UL, ss_close_q_sz()); - while ((!ss_active() || budget--) && !ss_wq_pop(&sw, &ticket)) { + while ((!ss_active() || budget--) && !ss_wq_pop(wq, &sw, &ticket)) { struct sock *sk = sw.sk; bh_lock_sock(sk); @@ -1358,11 +1362,24 @@ ss_tx_action(void) /* * Rearm softirq for local CPU if there are more jobs to do. - * ss_synchronize() is responsible for raising the softirq if there are - * more jobs in the work queue or the backlog. + * If all jobs are finished, and work queue and backlog are + * empty, then enable IPI generation by producers (disabled + * in 'ss_ipi()' handler). + * ss_synchronize() is responsible for raising the softirq + * if there are more jobs in the work queue or the backlog. */ - if (!budget) - raise_softirq(NET_TX_SOFTIRQ); + if (budget) { + set_bit(TFW_QUEUE_B_IPI, &wq->flags); + + smp_mb__after_atomic(); + + if (!ss_close_q_sz()) + return; + + clear_bit(TFW_QUEUE_B_IPI, &wq->flags); + } + + raise_softirq(NET_TX_SOFTIRQ); } /* diff --git a/tempesta_fw/work_queue.c b/tempesta_fw/work_queue.c index ba40344d18..f9e857d412 100644 --- a/tempesta_fw/work_queue.c +++ b/tempesta_fw/work_queue.c @@ -49,6 +49,7 @@ tfw_wq_init(TfwRBQueue *q, int node) q->last_head = 0; atomic64_set(&q->head, 0); atomic64_set(&q->tail, 0); + set_bit(TFW_QUEUE_B_IPI, &q->flags); q->array = kmalloc_node(QSZ * WQ_ITEM_SZ, GFP_KERNEL, node); if (!q->array) { diff --git a/tempesta_fw/work_queue.h b/tempesta_fw/work_queue.h index f5a073af8f..4f07ad8377 100644 --- a/tempesta_fw/work_queue.h +++ b/tempesta_fw/work_queue.h @@ -38,13 +38,28 @@ typedef struct { long last_head; atomic64_t head ____cacheline_aligned; atomic64_t tail ____cacheline_aligned; + unsigned long flags; } TfwRBQueue; +enum { + /* Enable IPI generation. */ + TFW_QUEUE_B_IPI = 0 +}; + int tfw_wq_init(TfwRBQueue *wq, int node); void tfw_wq_destroy(TfwRBQueue *wq); long __tfw_wq_push(TfwRBQueue *wq, void *ptr); int tfw_wq_pop_ticket(TfwRBQueue *wq, void *buf, long *ticket); +static inline int +tfw_wq_size(TfwRBQueue *q) +{ + long t = atomic64_read(&q->tail); + long h = atomic64_read(&q->head); + + return t > h ? 0 : h - t; +} + static inline void tfw_raise_softirq(int cpu, struct irq_work *work, void (*local_cpu_cb)(struct irq_work *)) @@ -63,7 +78,8 @@ tfw_wq_push(TfwRBQueue *q, void *ptr, int cpu, struct irq_work *work, if (unlikely(ticket)) return ticket; - tfw_raise_softirq(cpu, work, local_cpu_cb); + if (test_bit(TFW_QUEUE_B_IPI, &q->flags)) + tfw_raise_softirq(cpu, work, local_cpu_cb); return 0; } @@ -74,13 +90,4 @@ tfw_wq_pop(TfwRBQueue *wq, void *buf) return tfw_wq_pop_ticket(wq, buf, NULL); } -static inline int -tfw_wq_size(TfwRBQueue *q) -{ - long t = atomic64_read(&q->tail); - long h = atomic64_read(&q->head); - - return t > h ? 0 : h - t; -} - #endif /* __TFW_WORK_QUEUE_H__ */ From c39e8d7762a3840c886e398ff4837cc8457fc1a4 Mon Sep 17 00:00:00 2001 From: Alexander Ostapenko Date: Fri, 28 Sep 2018 20:37:02 +0700 Subject: [PATCH 2/5] Correction for work_queue backlog (#1066). --- tempesta_fw/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tempesta_fw/sock.c b/tempesta_fw/sock.c index 284814cae0..fea0fdcd0c 100644 --- a/tempesta_fw/sock.c +++ b/tempesta_fw/sock.c @@ -211,7 +211,7 @@ ss_turnstile_push(long ticket, SsWork *sw, int cpu) cn->ticket = ticket; memcpy(&cn->sw, sw, sizeof(*sw)); spin_lock_bh(&cb->lock); - list_add(&cn->list, &cb->head); + list_add_tail(&cn->list, &cb->head); cb->size++; if (cb->turn > ticket) cb->turn = ticket; From a82e10e919460fbd8c30ac689af151796166707d Mon Sep 17 00:00:00 2001 From: Alexander Ostapenko Date: Wed, 10 Oct 2018 20:32:05 +0700 Subject: [PATCH 3/5] Additional changes according review comments (#806). --- tempesta_fw/cache.c | 9 +-------- tempesta_fw/sock.c | 28 +++++++++++++--------------- tempesta_fw/work_queue.h | 14 ++++++++++++++ 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/tempesta_fw/cache.c b/tempesta_fw/cache.c index 307dc51acf..8c1cd410fb 100644 --- a/tempesta_fw/cache.c +++ b/tempesta_fw/cache.c @@ -1748,14 +1748,7 @@ tfw_wq_tasklet(unsigned long data) while (!tfw_wq_pop(wq, &cw)) tfw_cache_do_action(cw.msg, cw.action); - set_bit(TFW_QUEUE_B_IPI, &wq->flags); - - smp_mb__after_atomic(); - - if (!tfw_wq_size(wq)) - return; - - clear_bit(TFW_QUEUE_B_IPI, &wq->flags); + TFW_WQ_IPI_SYNC(tfw_wq_size, wq); tasklet_schedule(&ct->tasklet); } diff --git a/tempesta_fw/sock.c b/tempesta_fw/sock.c index fea0fdcd0c..3f10f7e401 100644 --- a/tempesta_fw/sock.c +++ b/tempesta_fw/sock.c @@ -217,6 +217,10 @@ ss_turnstile_push(long ticket, SsWork *sw, int cpu) cb->turn = ticket; spin_unlock_bh(&cb->lock); + /* + * We do not need explicit memory barriers after + * spinlock operation. + */ if (test_bit(TFW_QUEUE_B_IPI, &wq->flags)) tfw_raise_softirq(cpu, iw, ss_ipi); @@ -299,7 +303,7 @@ ss_wq_pop(TfwRBQueue *wq, SsWork *sw, long *ticket) } static size_t -__ss_close_q_sz(int cpu) +ss_wq_size(int cpu) { TfwRBQueue *wq = &per_cpu(si_wq, cpu); SsCloseBacklog *cb = &per_cpu(close_backlog, cpu); @@ -308,9 +312,11 @@ __ss_close_q_sz(int cpu) } static size_t -ss_close_q_sz(void) +ss_wq_local_size(TfwRBQueue *wq) { - return __ss_close_q_sz(smp_processor_id()); + SsCloseBacklog *cb = this_cpu_ptr(&close_backlog); + + return tfw_wq_size(wq) + cb->size; } /* @@ -1321,7 +1327,7 @@ ss_tx_action(void) * new items. We use some small integer as a lower bound to catch just * ariving items. */ - budget = max(10UL, ss_close_q_sz()); + budget = max(10UL, ss_wq_local_size(wq)); while ((!ss_active() || budget--) && !ss_wq_pop(wq, &sw, &ticket)) { struct sock *sk = sw.sk; @@ -1368,16 +1374,8 @@ ss_tx_action(void) * ss_synchronize() is responsible for raising the softirq * if there are more jobs in the work queue or the backlog. */ - if (budget) { - set_bit(TFW_QUEUE_B_IPI, &wq->flags); - - smp_mb__after_atomic(); - - if (!ss_close_q_sz()) - return; - - clear_bit(TFW_QUEUE_B_IPI, &wq->flags); - } + if (budget) + TFW_WQ_IPI_SYNC(ss_wq_local_size, wq); raise_softirq(NET_TX_SOFTIRQ); } @@ -1456,7 +1454,7 @@ ss_synchronize(void) while (1) { for_each_online_cpu(cpu) { int n_conn = atomic64_read(&per_cpu(__ss_act_cnt, cpu)); - int n_q = __ss_close_q_sz(cpu); + int n_q = ss_wq_size(cpu); if (n_conn + n_q) { irq_work_sync(&per_cpu(ipi_work, cpu)); schedule(); /* let softirq finish works */ diff --git a/tempesta_fw/work_queue.h b/tempesta_fw/work_queue.h index 4f07ad8377..e4ce9d2970 100644 --- a/tempesta_fw/work_queue.h +++ b/tempesta_fw/work_queue.h @@ -46,6 +46,15 @@ enum { TFW_QUEUE_B_IPI = 0 }; +#define TFW_WQ_IPI_SYNC(size_cb, wq) \ +do { \ + set_bit(TFW_QUEUE_B_IPI, &(wq)->flags); \ + smp_mb__after_atomic(); \ + if (!size_cb(wq)) \ + return; \ + clear_bit(TFW_QUEUE_B_IPI, &(wq)->flags); \ +} while (0) + int tfw_wq_init(TfwRBQueue *wq, int node); void tfw_wq_destroy(TfwRBQueue *wq); long __tfw_wq_push(TfwRBQueue *wq, void *ptr); @@ -77,6 +86,11 @@ tfw_wq_push(TfwRBQueue *q, void *ptr, int cpu, struct irq_work *work, long ticket = __tfw_wq_push(q, ptr); if (unlikely(ticket)) return ticket; + /* + * The atomic operation is 'atomic64_cmpxchg()' in + * '__tfw_wq_push()' above. + */ + smp_mb__after_atomic(); if (test_bit(TFW_QUEUE_B_IPI, &q->flags)) tfw_raise_softirq(cpu, work, local_cpu_cb); From aac45fc234d47b7534cd0214b56b8b49d88dabe0 Mon Sep 17 00:00:00 2001 From: Alexander Ostapenko Date: Thu, 11 Oct 2018 20:50:48 +0700 Subject: [PATCH 4/5] Add TODO regarding timer callbacks deactivation and memory barriers. --- tempesta_fw/http_sched_ratio.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tempesta_fw/http_sched_ratio.c b/tempesta_fw/http_sched_ratio.c index b5228407e1..8991d36efe 100644 --- a/tempesta_fw/http_sched_ratio.c +++ b/tempesta_fw/http_sched_ratio.c @@ -1011,8 +1011,14 @@ tfw_sched_ratio_del_grp(TfwSrvGroup *sg) if (!ratio) return; /* - * Make sure the timer doesn't re-arms itself. This - * also ensures that no more RCU callbacks are created. + * Make sure the timer doesn't re-arms itself. This also ensures + * that no more RCU callbacks are created. + * + * TODO: check if the memory barriers is redundand here (and in + * several similar places as well as in corresponding timer + * callbacks); also it seems that function 'del_timer_sync' + * process correctly the situation with reactivation of timer + * from callback, so perhaps we don't need 'rearm' flag at all. */ if (sg->flags & (TFW_SG_F_SCHED_RATIO_DYNAMIC | TFW_SG_F_SCHED_RATIO_PREDICT)) From 914bcca13f4b77c391d2f0fbcc31182cb32a0322 Mon Sep 17 00:00:00 2001 From: Alexander Ostapenko Date: Mon, 15 Oct 2018 17:56:57 +0700 Subject: [PATCH 5/5] Changes according comments (#806). --- tempesta_fw/cache.c | 2 +- tempesta_fw/sock.c | 4 ++-- tempesta_fw/work_queue.c | 2 +- tempesta_fw/work_queue.h | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tempesta_fw/cache.c b/tempesta_fw/cache.c index 8c1cd410fb..9fb9cfdfcb 100644 --- a/tempesta_fw/cache.c +++ b/tempesta_fw/cache.c @@ -1673,7 +1673,7 @@ static void tfw_cache_ipi(struct irq_work *work) { TfwWorkTasklet *ct = container_of(work, TfwWorkTasklet, ipi_work); - clear_bit(TFW_QUEUE_B_IPI, &ct->wq.flags); + clear_bit(TFW_QUEUE_IPI, &ct->wq.flags); tasklet_schedule(&ct->tasklet); } diff --git a/tempesta_fw/sock.c b/tempesta_fw/sock.c index 3f10f7e401..41b6b50b9f 100644 --- a/tempesta_fw/sock.c +++ b/tempesta_fw/sock.c @@ -188,7 +188,7 @@ static void ss_ipi(struct irq_work *work) { TfwRBQueue *wq = &per_cpu(si_wq, smp_processor_id()); - clear_bit(TFW_QUEUE_B_IPI, &wq->flags); + clear_bit(TFW_QUEUE_IPI, &wq->flags); raise_softirq(NET_TX_SOFTIRQ); } @@ -221,7 +221,7 @@ ss_turnstile_push(long ticket, SsWork *sw, int cpu) * We do not need explicit memory barriers after * spinlock operation. */ - if (test_bit(TFW_QUEUE_B_IPI, &wq->flags)) + if (test_bit(TFW_QUEUE_IPI, &wq->flags)) tfw_raise_softirq(cpu, iw, ss_ipi); return 0; diff --git a/tempesta_fw/work_queue.c b/tempesta_fw/work_queue.c index f9e857d412..67b5752f7b 100644 --- a/tempesta_fw/work_queue.c +++ b/tempesta_fw/work_queue.c @@ -49,7 +49,7 @@ tfw_wq_init(TfwRBQueue *q, int node) q->last_head = 0; atomic64_set(&q->head, 0); atomic64_set(&q->tail, 0); - set_bit(TFW_QUEUE_B_IPI, &q->flags); + set_bit(TFW_QUEUE_IPI, &q->flags); q->array = kmalloc_node(QSZ * WQ_ITEM_SZ, GFP_KERNEL, node); if (!q->array) { diff --git a/tempesta_fw/work_queue.h b/tempesta_fw/work_queue.h index e4ce9d2970..df072c9a71 100644 --- a/tempesta_fw/work_queue.h +++ b/tempesta_fw/work_queue.h @@ -43,16 +43,16 @@ typedef struct { enum { /* Enable IPI generation. */ - TFW_QUEUE_B_IPI = 0 + TFW_QUEUE_IPI = 0 }; #define TFW_WQ_IPI_SYNC(size_cb, wq) \ do { \ - set_bit(TFW_QUEUE_B_IPI, &(wq)->flags); \ + set_bit(TFW_QUEUE_IPI, &(wq)->flags); \ smp_mb__after_atomic(); \ if (!size_cb(wq)) \ return; \ - clear_bit(TFW_QUEUE_B_IPI, &(wq)->flags); \ + clear_bit(TFW_QUEUE_IPI, &(wq)->flags); \ } while (0) int tfw_wq_init(TfwRBQueue *wq, int node); @@ -92,7 +92,7 @@ tfw_wq_push(TfwRBQueue *q, void *ptr, int cpu, struct irq_work *work, */ smp_mb__after_atomic(); - if (test_bit(TFW_QUEUE_B_IPI, &q->flags)) + if (test_bit(TFW_QUEUE_IPI, &q->flags)) tfw_raise_softirq(cpu, work, local_cpu_cb); return 0;