forked from multipath-tcp/mptcp_net-next
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Kuniyuki Iwashima says: ==================== Improve bind(addr, 0) behaviour. Currently we fail to bind sockets to ephemeral ports when all of the ports are exhausted even if all sockets have SO_REUSEADDR enabled. In this case, we still have a chance to connect to the different remote hosts. These patches add net.ipv4.ip_autobind_reuse option and fix the behaviour to fully utilize all space of the local (addr, port) tuples. Changes in v5: - Add more description to documents. - Fix sysctl option to use proc_dointvec_minmax. - Remove the Fixes: tag and squash two commits. Changes in v4: - Add net.ipv4.ip_autobind_reuse option to not change the current behaviour. - Modify .gitignore for test. https://lore.kernel.org/netdev/20200308181615.90135-1-kuniyu@amazon.co.jp/ Changes in v3: - Change the title and write more specific description of the 3rd patch. - Add a test in tools/testing/selftests/net/ as the 4th patch. https://lore.kernel.org/netdev/20200229113554.78338-1-kuniyu@amazon.co.jp/ Changes in v2: - Change the description of the 2nd patch ('localhost' -> 'address'). - Correct the description and the if statement of the 3rd patch. https://lore.kernel.org/netdev/20200226074631.67688-1-kuniyu@amazon.co.jp/ v1 with tests: https://lore.kernel.org/netdev/20200220152020.13056-1-kuniyu@amazon.co.jp/ ==================== Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
- Loading branch information
Showing
8 changed files
with
243 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,3 +23,4 @@ so_txtime | |
tcp_fastopen_backup_key | ||
nettest | ||
fin_ack_lat | ||
reuseaddr_ports_exhausted |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
162 changes: 162 additions & 0 deletions
162
tools/testing/selftests/net/reuseaddr_ports_exhausted.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
// SPDX-License-Identifier: GPL-2.0-only | ||
/* | ||
* Check if we can fully utilize 4-tuples for connect(). | ||
* | ||
* Rules to bind sockets to the same port when all ephemeral ports are | ||
* exhausted. | ||
* | ||
* 1. if there are TCP_LISTEN sockets on the port, fail to bind. | ||
* 2. if there are sockets without SO_REUSEADDR, fail to bind. | ||
* 3. if SO_REUSEADDR is disabled, fail to bind. | ||
* 4. if SO_REUSEADDR is enabled and SO_REUSEPORT is disabled, | ||
* succeed to bind. | ||
* 5. if SO_REUSEADDR and SO_REUSEPORT are enabled and | ||
* there is no socket having the both options and the same EUID, | ||
* succeed to bind. | ||
* 6. fail to bind. | ||
* | ||
* Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp> | ||
*/ | ||
#include <arpa/inet.h> | ||
#include <netinet/in.h> | ||
#include <sys/socket.h> | ||
#include <sys/types.h> | ||
#include <unistd.h> | ||
#include "../kselftest_harness.h" | ||
|
||
struct reuse_opts { | ||
int reuseaddr[2]; | ||
int reuseport[2]; | ||
}; | ||
|
||
struct reuse_opts unreusable_opts[12] = { | ||
{0, 0, 0, 0}, | ||
{0, 0, 0, 1}, | ||
{0, 0, 1, 0}, | ||
{0, 0, 1, 1}, | ||
{0, 1, 0, 0}, | ||
{0, 1, 0, 1}, | ||
{0, 1, 1, 0}, | ||
{0, 1, 1, 1}, | ||
{1, 0, 0, 0}, | ||
{1, 0, 0, 1}, | ||
{1, 0, 1, 0}, | ||
{1, 0, 1, 1}, | ||
}; | ||
|
||
struct reuse_opts reusable_opts[4] = { | ||
{1, 1, 0, 0}, | ||
{1, 1, 0, 1}, | ||
{1, 1, 1, 0}, | ||
{1, 1, 1, 1}, | ||
}; | ||
|
||
int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport) | ||
{ | ||
struct sockaddr_in local_addr; | ||
int len = sizeof(local_addr); | ||
int fd, ret; | ||
|
||
fd = socket(AF_INET, SOCK_STREAM, 0); | ||
ASSERT_NE(-1, fd) TH_LOG("failed to open socket."); | ||
|
||
ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, sizeof(int)); | ||
ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEADDR."); | ||
|
||
ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &reuseport, sizeof(int)); | ||
ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEPORT."); | ||
|
||
local_addr.sin_family = AF_INET; | ||
local_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); | ||
local_addr.sin_port = 0; | ||
|
||
if (bind(fd, (struct sockaddr *)&local_addr, len) == -1) { | ||
close(fd); | ||
return -1; | ||
} | ||
|
||
return fd; | ||
} | ||
|
||
TEST(reuseaddr_ports_exhausted_unreusable) | ||
{ | ||
struct reuse_opts *opts; | ||
int i, j, fd[2]; | ||
|
||
for (i = 0; i < 12; i++) { | ||
opts = &unreusable_opts[i]; | ||
|
||
for (j = 0; j < 2; j++) | ||
fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); | ||
|
||
ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); | ||
EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind."); | ||
|
||
for (j = 0; j < 2; j++) | ||
if (fd[j] != -1) | ||
close(fd[j]); | ||
} | ||
} | ||
|
||
TEST(reuseaddr_ports_exhausted_reusable_same_euid) | ||
{ | ||
struct reuse_opts *opts; | ||
int i, j, fd[2]; | ||
|
||
for (i = 0; i < 4; i++) { | ||
opts = &reusable_opts[i]; | ||
|
||
for (j = 0; j < 2; j++) | ||
fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); | ||
|
||
ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); | ||
|
||
if (opts->reuseport[0] && opts->reuseport[1]) { | ||
EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened."); | ||
} else { | ||
EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations."); | ||
} | ||
|
||
for (j = 0; j < 2; j++) | ||
if (fd[j] != -1) | ||
close(fd[j]); | ||
} | ||
} | ||
|
||
TEST(reuseaddr_ports_exhausted_reusable_different_euid) | ||
{ | ||
struct reuse_opts *opts; | ||
int i, j, ret, fd[2]; | ||
uid_t euid[2] = {10, 20}; | ||
|
||
for (i = 0; i < 4; i++) { | ||
opts = &reusable_opts[i]; | ||
|
||
for (j = 0; j < 2; j++) { | ||
ret = seteuid(euid[j]); | ||
ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: %d.", euid[j]); | ||
|
||
fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); | ||
|
||
ret = seteuid(0); | ||
ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: 0."); | ||
} | ||
|
||
ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); | ||
EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind because one socket can be bound in each euid."); | ||
|
||
if (fd[1] != -1) { | ||
ret = listen(fd[0], 5); | ||
ASSERT_EQ(0, ret) TH_LOG("failed to listen."); | ||
|
||
ret = listen(fd[1], 5); | ||
EXPECT_EQ(-1, ret) TH_LOG("should fail to listen because only one uid reserves the port in TCP_LISTEN."); | ||
} | ||
|
||
for (j = 0; j < 2; j++) | ||
if (fd[j] != -1) | ||
close(fd[j]); | ||
} | ||
} | ||
|
||
TEST_HARNESS_MAIN |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#!/bin/bash | ||
# SPDX-License-Identifier: GPL-2.0 | ||
# | ||
# Run tests when all ephemeral ports are exhausted. | ||
# | ||
# Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp> | ||
|
||
set +x | ||
set -e | ||
|
||
readonly NETNS="ns-$(mktemp -u XXXXXX)" | ||
|
||
setup() { | ||
ip netns add "${NETNS}" | ||
ip -netns "${NETNS}" link set lo up | ||
ip netns exec "${NETNS}" \ | ||
sysctl -w net.ipv4.ip_local_port_range="32768 32768" \ | ||
> /dev/null 2>&1 | ||
ip netns exec "${NETNS}" \ | ||
sysctl -w net.ipv4.ip_autobind_reuse=1 > /dev/null 2>&1 | ||
} | ||
|
||
cleanup() { | ||
ip netns del "${NETNS}" | ||
} | ||
|
||
trap cleanup EXIT | ||
setup | ||
|
||
do_test() { | ||
ip netns exec "${NETNS}" ./reuseaddr_ports_exhausted | ||
} | ||
|
||
do_test | ||
echo "tests done" |