Skip to content

Commit

Permalink
mptcp: stop relying on tcp_tx_skb_cache
Browse files Browse the repository at this point in the history
We want to revert the skb TX cache, but MPTCP is currently
using it unconditionally.

Rework the MPTCP tx code, so that tcp_tx_skb_cache is not
needed anymore: do the whole coalescing check, skb allocation
skb initialization/update inside mptcp_sendmsg_frag(), quite
alike the current TCP code.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Paolo Abeni authored and davem330 committed Sep 23, 2021
1 parent 04d8825 commit f70cad1
Showing 1 changed file with 77 additions and 60 deletions.
137 changes: 77 additions & 60 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -1224,6 +1224,7 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
if (likely(__mptcp_add_ext(skb, gfp))) {
skb_reserve(skb, MAX_TCP_HEADER);
skb->reserved_tailroom = skb->end - skb->tail;
INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
return skb;
}
__kfree_skb(skb);
Expand All @@ -1233,31 +1234,23 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
return NULL;
}

static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
static struct sk_buff *__mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
{
struct sk_buff *skb;

if (ssk->sk_tx_skb_cache) {
skb = ssk->sk_tx_skb_cache;
if (unlikely(!skb_ext_find(skb, SKB_EXT_MPTCP) &&
!__mptcp_add_ext(skb, gfp)))
return false;
return true;
}

skb = __mptcp_do_alloc_tx_skb(sk, gfp);
if (!skb)
return false;
return NULL;

if (likely(sk_wmem_schedule(ssk, skb->truesize))) {
ssk->sk_tx_skb_cache = skb;
return true;
tcp_skb_entail(ssk, skb);
return skb;
}
kfree_skb(skb);
return false;
return NULL;
}

static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held)
static struct sk_buff *mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held)
{
gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation;

Expand Down Expand Up @@ -1287,23 +1280,29 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_sendmsg_info *info)
{
u64 data_seq = dfrag->data_seq + info->sent;
int offset = dfrag->offset + info->sent;
struct mptcp_sock *msk = mptcp_sk(sk);
bool zero_window_probe = false;
struct mptcp_ext *mpext = NULL;
struct sk_buff *skb, *tail;
bool must_collapse = false;
int size_bias = 0;
int avail_size;
size_t ret = 0;
bool can_coalesce = false;
bool reuse_skb = true;
struct sk_buff *skb;
size_t copy;
int i;

pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u",
msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent);

if (WARN_ON_ONCE(info->sent > info->limit ||
info->limit > dfrag->data_len))
return 0;

/* compute send limit */
info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
avail_size = info->size_goal;
copy = info->size_goal;

skb = tcp_write_queue_tail(ssk);
if (skb) {
if (skb && copy > skb->len) {
/* Limit the write to the size available in the
* current skb, if any, so that we create at most a new skb.
* Explicitly tells TCP internals to avoid collapsing on later
Expand All @@ -1316,62 +1315,80 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
goto alloc_skb;
}

must_collapse = (info->size_goal > skb->len) &&
(skb_shinfo(skb)->nr_frags < sysctl_max_skb_frags);
if (must_collapse) {
size_bias = skb->len;
avail_size = info->size_goal - skb->len;
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset);
if (!can_coalesce && i >= sysctl_max_skb_frags) {
tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
}

copy -= skb->len;
} else {
alloc_skb:
if (!must_collapse &&
!mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held))
return 0;
skb = mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held);
if (!skb)
return -ENOMEM;

i = skb_shinfo(skb)->nr_frags;
reuse_skb = false;
mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
}

/* Zero window and all data acked? Probe. */
avail_size = mptcp_check_allowed_size(msk, data_seq, avail_size);
if (avail_size == 0) {
copy = mptcp_check_allowed_size(msk, data_seq, copy);
if (copy == 0) {
u64 snd_una = READ_ONCE(msk->snd_una);

if (skb || snd_una != msk->snd_nxt)
if (snd_una != msk->snd_nxt) {
tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk));
return 0;
}

zero_window_probe = true;
data_seq = snd_una - 1;
avail_size = 1;
}
copy = 1;

if (WARN_ON_ONCE(info->sent > info->limit ||
info->limit > dfrag->data_len))
return 0;
/* all mptcp-level data is acked, no skbs should be present into the
* ssk write queue
*/
WARN_ON_ONCE(reuse_skb);
}

ret = info->limit - info->sent;
tail = tcp_build_frag(ssk, avail_size + size_bias, info->flags,
dfrag->page, dfrag->offset + info->sent, &ret);
if (!tail) {
tcp_remove_empty_skb(sk, tcp_write_queue_tail(ssk));
copy = min_t(size_t, copy, info->limit - info->sent);
if (!sk_wmem_schedule(ssk, copy)) {
tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk));
return -ENOMEM;
}

/* if the tail skb is still the cached one, collapsing really happened.
*/
if (skb == tail) {
TCP_SKB_CB(tail)->tcp_flags &= ~TCPHDR_PSH;
mpext->data_len += ret;
if (can_coalesce) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
get_page(dfrag->page);
skb_fill_page_desc(skb, i, dfrag->page, offset, copy);
}

skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
sk_wmem_queued_add(ssk, copy);
sk_mem_charge(ssk, copy);
skb->ip_summed = CHECKSUM_PARTIAL;
WRITE_ONCE(tcp_sk(ssk)->write_seq, tcp_sk(ssk)->write_seq + copy);
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);

/* on skb reuse we just need to update the DSS len */
if (reuse_skb) {
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
mpext->data_len += copy;
WARN_ON_ONCE(zero_window_probe);
goto out;
}

mpext = skb_ext_find(tail, SKB_EXT_MPTCP);
if (WARN_ON_ONCE(!mpext)) {
/* should never reach here, stream corrupted */
return -EINVAL;
}

memset(mpext, 0, sizeof(*mpext));
mpext->data_seq = data_seq;
mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq;
mpext->data_len = ret;
mpext->data_len = copy;
mpext->use_map = 1;
mpext->dsn64 = 1;

Expand All @@ -1380,18 +1397,18 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
mpext->dsn64);

if (zero_window_probe) {
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
mpext->frozen = 1;
if (READ_ONCE(msk->csum_enabled))
mptcp_update_data_checksum(tail, ret);
mptcp_update_data_checksum(skb, copy);
tcp_push_pending_frames(ssk);
return 0;
}
out:
if (READ_ONCE(msk->csum_enabled))
mptcp_update_data_checksum(tail, ret);
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
return ret;
mptcp_update_data_checksum(skb, copy);
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
return copy;
}

#define MPTCP_SEND_BURST_SIZE ((1 << 16) - \
Expand Down

0 comments on commit f70cad1

Please sign in to comment.