Skip to content

Commit

Permalink
Rework ss_do_close function.
Browse files Browse the repository at this point in the history
Previously we sent FIN to remote peer during socket
closing, but didn't wait for ack to this FIN. Moreover
if we could not transmit pending skbs we closed socket
and lost these skbs. Now we start to close socket and
wait until remote peer send ACK for our FIN.
  • Loading branch information
EvgeniiMekhanik committed May 3, 2023
1 parent d494f25 commit 0b640b6
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 51 deletions.
77 changes: 33 additions & 44 deletions fw/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -545,11 +545,12 @@ EXPORT_SYMBOL(ss_send);
*
* Called with locked socket.
*/
static void
ss_do_close(struct sock *sk, int flags)
static int
ss_do_close(struct sock *sk, int flags, int wait_for_fin)
{
struct sk_buff *skb;
int data_was_unread = 0;
u8 sk_shutdown = SHUTDOWN_MASK;

T_DBG2("[%d]: Close socket %p (%s): account=%d refcnt=%u\n",
smp_processor_id(), sk, ss_statename[sk->sk_state],
Expand All @@ -572,9 +573,9 @@ ss_do_close(struct sock *sk, int flags)
*/
sk->sk_lock.owned = 1;

/* The below is mostly copy-paste from tcp_close(), 5.10.35. */
sk->sk_shutdown = SHUTDOWN_MASK;
swap(sk->sk_shutdown, sk_shutdown);

/* The below is mostly copy-paste from tcp_close(), 5.10.35. */
while ((skb = __skb_dequeue(&sk->sk_receive_queue))) {
u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;

Expand All @@ -598,41 +599,15 @@ ss_do_close(struct sock *sk, int flags)
}
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, sk->sk_allocation);
goto adjudge_to_death;
} else if (tcp_close_state(sk)) {
tcp_send_fin(sk);
}
else if (tcp_close_state(sk)) {
/* The code below is taken from tcp_send_fin(), 5.10.35. */
struct sk_buff *skb, *tskb, *tail;
struct tcp_sock *tp = tcp_sk(sk);

tskb = tail = tcp_write_queue_tail(sk);
if (!tskb && tcp_under_memory_pressure(sk))
tskb = skb_rb_last(&sk->tcp_rtx_queue);

if (tskb) {
/* Send FIN with data if we have any. */
TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
TCP_SKB_CB(tskb)->end_seq++;
tp->write_seq++;
if (!tail) {
WRITE_ONCE(tp->snd_nxt, tp->snd_nxt + 1);
goto adjudge_to_death;
}
}
else {
/* No data to send in the socket, allocate new skb. */
skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
if (unlikely(!skb)) {
T_WARN("can't send FIN due to bad alloc");
goto adjudge_to_death;
}
INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
skb_reserve(skb, MAX_TCP_HEADER);
ss_forced_mem_schedule(sk, skb->truesize);
tcp_init_nondata_skb(skb, tp->write_seq,
TCPHDR_ACK | TCPHDR_FIN);
tcp_queue_skb(sk, skb);
}
__tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
if (wait_for_fin && sk_stream_closing(sk)) {
sk->sk_lock.owned = 0;
swap(sk->sk_shutdown, sk_shutdown);
return -EAGAIN;
}

adjudge_to_death:
Expand All @@ -659,7 +634,7 @@ ss_do_close(struct sock *sk, int flags)
inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
return;
return 0;
}
}
if (sk->sk_state != TCP_CLOSE) {
Expand Down Expand Up @@ -687,6 +662,7 @@ ss_do_close(struct sock *sk, int flags)
tcp_write_queue_purge(sk);
inet_csk_destroy_sock(sk);
}
return 0;
}

/**
Expand All @@ -701,7 +677,8 @@ ss_do_close(struct sock *sk, int flags)
static void
ss_linkerror(struct sock *sk)
{
ss_do_close(sk, 0);
int r = ss_do_close(sk, 0, false);
BUG_ON(r);
ss_conn_drop_guard_exit(sk);
sock_put(sk); /* paired with ss_do_close() */
}
Expand Down Expand Up @@ -1010,7 +987,8 @@ ss_tcp_state_change(struct sock *sk)
* it on our own without calling upper layer hooks.
*/
if (ss_active_guard_enter(SS_V_ACT_NEWCONN)) {
ss_do_close(sk, 0);
r = ss_do_close(sk, 0, false);
BUG_ON(r);
sock_put(sk);
/*
* The case of a connect to an upstream server that
Expand All @@ -1023,7 +1001,8 @@ ss_tcp_state_change(struct sock *sk)
}

if (!is_srv_sock && ss_active_guard_enter(SS_V_ACT_LIVECONN)) {
ss_do_close(sk, 0);
r = ss_do_close(sk, 0, false);
BUG_ON(r);
sock_put(sk);
ss_active_guard_exit(SS_V_ACT_NEWCONN);
return;
Expand Down Expand Up @@ -1074,6 +1053,14 @@ ss_tcp_state_change(struct sock *sk)
*/
ss_close(sk, SS_F_SYNC);
}
else if (sk->sk_state == TCP_FIN_WAIT2) {
/*
* Received ack to FIN, continue to close connection.
*/
if (!skb_queue_empty(&sk->sk_receive_queue))
ss_tcp_process_data(sk);
ss_linkerror(sk);
}
else if (sk->sk_state == TCP_CLOSE) {
/*
* We reach the state on regular tcp_close() (including the
Expand Down Expand Up @@ -1389,10 +1376,12 @@ EXPORT_SYMBOL(ss_getpeername);
static void
__sk_close_locked(struct sock *sk, int flags)
{
ss_do_close(sk, flags);
int r = ss_do_close(sk, flags, true);
bh_unlock_sock(sk);
ss_conn_drop_guard_exit(sk);
sock_put(sk); /* paired with ss_do_close() */
if (!r) {
ss_conn_drop_guard_exit(sk);
sock_put(sk); /* paired with ss_do_close() */
}
}

static void
Expand Down
60 changes: 53 additions & 7 deletions linux-5.10.35.patch
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,7 @@ index 000000000..8e9b6af75
+#endif /* __TEMPESTA_H__ */
+
diff --git a/include/net/sock.h b/include/net/sock.h
index 261195598..afbffef91 100644
index 261195598..0bb658b94 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -506,6 +506,18 @@ struct sock {
Expand Down Expand Up @@ -826,7 +826,24 @@ index 261195598..afbffef91 100644
};

#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
@@ -1915,8 +1930,7 @@ static inline bool sk_rethink_txhash(struct sock *sk)
@@ -1081,6 +1096,16 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
__rc; \
})

+/**
+ * sk_stream_closing - Return 1 if we still have things to send in our buffers.
+ * @sk: socket to verify
+ */
+static inline int sk_stream_closing(struct sock *sk)
+{
+ return (1 << sk->sk_state) &
+ (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
+}
+
int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
void sk_stream_wait_close(struct sock *sk, long timeo_p);
@@ -1915,8 +1940,7 @@ static inline bool sk_rethink_txhash(struct sock *sk)
static inline struct dst_entry *
__sk_dst_get(struct sock *sk)
{
Expand Down Expand Up @@ -2046,6 +2063,27 @@ index 1301ea694..b36b9bde9 100644
#ifdef NET_SKBUFF_DATA_USES_OFFSET
skb->end = size;
#else
diff --git a/net/core/stream.c b/net/core/stream.c
index 4f1d4aa5f..47ce31da5 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -83,16 +83,6 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
}
EXPORT_SYMBOL(sk_stream_wait_connect);

-/**
- * sk_stream_closing - Return 1 if we still have things to send in our buffers.
- * @sk: socket to verify
- */
-static inline int sk_stream_closing(struct sock *sk)
-{
- return (1 << sk->sk_state) &
- (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
-}
-
void sk_stream_wait_close(struct sock *sk, long timeout)
{
if (timeout) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1dfa561e8..2ba1ce470 100644
--- a/net/ipv4/inet_connection_sock.c
Expand Down Expand Up @@ -2321,7 +2359,7 @@ index ab8ed0fc4..e260a0af6 100644
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f99494637..96baffec4 100644
index f99494637..1c8fbe94a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -39,6 +39,9 @@
Expand Down Expand Up @@ -2620,15 +2658,23 @@ index f99494637..96baffec4 100644
return -ENOMEM;

diff = tcp_skb_pcount(skb);
@@ -3454,6 +3571,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
@@ -3421,6 +3538,7 @@ void tcp_send_fin(struct sock *sk)
}
__tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF);
}
+EXPORT_SYMBOL(tcp_send_fin);

/* We get here when a process closes a file descriptor (either due to
* an explicit close() or as a byproduct of exit()'ing) and there
@@ -3454,6 +3572,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
*/
trace_tcp_send_reset(sk, NULL);
}
+EXPORT_SYMBOL(tcp_send_active_reset);

/* Send a crossed SYN-ACK during socket establishment.
* WARNING: This routine must only be called when we have already sent
@@ -4030,6 +4148,9 @@ int tcp_write_wakeup(struct sock *sk, int mib)
@@ -4030,6 +4149,9 @@ int tcp_write_wakeup(struct sock *sk, int mib)

skb = tcp_send_head(sk);
if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
Expand All @@ -2638,7 +2684,7 @@ index f99494637..96baffec4 100644
int err;
unsigned int mss = tcp_current_mss(sk);
unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
@@ -4037,6 +4158,12 @@ int tcp_write_wakeup(struct sock *sk, int mib)
@@ -4037,6 +4159,12 @@ int tcp_write_wakeup(struct sock *sk, int mib)
if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;

Expand All @@ -2651,7 +2697,7 @@ index f99494637..96baffec4 100644
/* We are probing the opening of a window
* but the window size is != 0
* must have been a result SWS avoidance ( sender )
@@ -4052,6 +4179,13 @@ int tcp_write_wakeup(struct sock *sk, int mib)
@@ -4052,6 +4180,13 @@ int tcp_write_wakeup(struct sock *sk, int mib)
tcp_set_skb_tso_segs(skb, mss);

TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
Expand Down

0 comments on commit 0b640b6

Please sign in to comment.