Skip to content

Commit

Permalink
Merge branch 'net-fib_rules-add-dscp-selector-support'
Browse files Browse the repository at this point in the history
Ido Schimmel says:

====================
net: fib_rules: Add DSCP selector support

Currently, the kernel rejects IPv4 FIB rules that try to match on the
upper three DSCP bits:

 # ip -4 rule add tos 0x1c table 100
 # ip -4 rule add tos 0x3c table 100
 Error: Invalid tos.

The reason for that is that historically users of the FIB lookup API
only populated the lower three DSCP bits in the TOS field of the IPv4
flow key ('flowi4_tos'), which fits the TOS definition from the initial
IPv4 specification (RFC 791).

This is not very useful nowadays and instead some users want to be able
to match on the six bits DSCP field, which replaced the TOS and IP
precedence fields over 25 years ago (RFC 2474). In addition, the current
behavior differs between IPv4 and IPv6 which does allow users to match
on the entire DSCP field using the TOS selector.

Recent patchsets made sure that callers of the FIB lookup API now
populate the entire DSCP field in the IPv4 flow key. Therefore, it is
now possible to extend FIB rules to match on DSCP.

This is done by adding a new DSCP attribute which is implemented for
both IPv4 and IPv6 to provide user space programs a consistent behavior
between both address families.

The behavior of the old TOS selector is unchanged and IPv4 FIB rules
using it will only match on the lower three DSCP bits. The kernel will
reject rules that try to use both selectors.

Patch openbmc#1 adds the new DSCP attribute but rejects its usage.

Patches openbmc#2-openbmc#3 implement IPv4 and IPv6 support.

Patch openbmc#4 allows user space to use the new attribute.

Patches openbmc#5-openbmc#6 add selftests.
====================

Link: https://patch.msgid.link/20240911093748.3662015-1-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
kuba-moo committed Sep 14, 2024
2 parents 716425d + 2bf1259 commit 7bb50f3
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 8 deletions.
1 change: 1 addition & 0 deletions include/uapi/linux/fib_rules.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ enum {
FRA_IP_PROTO, /* ip proto */
FRA_SPORT_RANGE, /* sport */
FRA_DPORT_RANGE, /* dport */
FRA_DSCP, /* dscp */
__FRA_MAX
};

Expand Down
4 changes: 3 additions & 1 deletion net/core/fib_rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <linux/list.h>
#include <linux/module.h>
#include <net/net_namespace.h>
#include <net/inet_dscp.h>
#include <net/sock.h>
#include <net/fib_rules.h>
#include <net/ip_tunnels.h>
Expand Down Expand Up @@ -766,7 +767,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
[FRA_PROTOCOL] = { .type = NLA_U8 },
[FRA_IP_PROTO] = { .type = NLA_U8 },
[FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
[FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
[FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
[FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2),
};

int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
Expand Down
54 changes: 50 additions & 4 deletions net/ipv4/fib_rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ struct fib4_rule {
u8 dst_len;
u8 src_len;
dscp_t dscp;
u8 dscp_full:1; /* DSCP or TOS selector */
__be32 src;
__be32 srcmask;
__be32 dst;
Expand Down Expand Up @@ -186,7 +187,15 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
((daddr ^ r->dst) & r->dstmask))
return 0;

if (r->dscp && !fib_dscp_masked_match(r->dscp, fl4))
/* When DSCP selector is used we need to match on the entire DSCP field
* in the flow information structure. When TOS selector is used we need
* to mask the upper three DSCP bits prior to matching to maintain
* legacy behavior.
*/
if (r->dscp_full && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
return 0;
else if (!r->dscp_full && r->dscp &&
!fib_dscp_masked_match(r->dscp, fl4))
return 0;

if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
Expand Down Expand Up @@ -217,6 +226,20 @@ static struct fib_table *fib_empty_table(struct net *net)
return NULL;
}

static int fib4_nl2rule_dscp(const struct nlattr *nla, struct fib4_rule *rule4,
struct netlink_ext_ack *extack)
{
if (rule4->dscp) {
NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP");
return -EINVAL;
}

rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
rule4->dscp_full = true;

return 0;
}

static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
Expand All @@ -238,6 +261,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
rule4->dscp = inet_dsfield_to_dscp(frh->tos);

if (tb[FRA_DSCP] &&
fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0)
goto errout;

/* split local/main if they are not already split */
err = fib_unmerge(net);
if (err)
Expand Down Expand Up @@ -320,9 +347,19 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule4->dst_len != frh->dst_len))
return 0;

if (frh->tos && inet_dscp_to_dsfield(rule4->dscp) != frh->tos)
if (frh->tos &&
(rule4->dscp_full ||
inet_dscp_to_dsfield(rule4->dscp) != frh->tos))
return 0;

if (tb[FRA_DSCP]) {
dscp_t dscp;

dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2);
if (!rule4->dscp_full || rule4->dscp != dscp)
return 0;
}

#ifdef CONFIG_IP_ROUTE_CLASSID
if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
return 0;
Expand All @@ -344,7 +381,15 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,

frh->dst_len = rule4->dst_len;
frh->src_len = rule4->src_len;
frh->tos = inet_dscp_to_dsfield(rule4->dscp);

if (rule4->dscp_full) {
frh->tos = 0;
if (nla_put_u8(skb, FRA_DSCP,
inet_dscp_to_dsfield(rule4->dscp) >> 2))
goto nla_put_failure;
} else {
frh->tos = inet_dscp_to_dsfield(rule4->dscp);
}

if ((rule4->dst_len &&
nla_put_in_addr(skb, FRA_DST, rule4->dst)) ||
Expand All @@ -366,7 +411,8 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(4) /* dst */
+ nla_total_size(4) /* src */
+ nla_total_size(4); /* flow */
+ nla_total_size(4) /* flow */
+ nla_total_size(1); /* dscp */
}

static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
Expand Down
43 changes: 40 additions & 3 deletions net/ipv6/fib6_rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ struct fib6_rule {
struct rt6key src;
struct rt6key dst;
dscp_t dscp;
u8 dscp_full:1; /* DSCP or TOS selector */
};

static bool fib6_rule_matchall(const struct fib_rule *rule)
Expand Down Expand Up @@ -345,6 +346,20 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
return 1;
}

static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6,
struct netlink_ext_ack *extack)
{
if (rule6->dscp) {
NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP");
return -EINVAL;
}

rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
rule6->dscp_full = true;

return 0;
}

static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
Expand All @@ -361,6 +376,9 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
rule6->dscp = inet_dsfield_to_dscp(frh->tos);

if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0)
goto errout;

if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid table");
Expand Down Expand Up @@ -413,9 +431,19 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
return 0;

if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos)
if (frh->tos &&
(rule6->dscp_full ||
inet_dscp_to_dsfield(rule6->dscp) != frh->tos))
return 0;

if (tb[FRA_DSCP]) {
dscp_t dscp;

dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2);
if (!rule6->dscp_full || rule6->dscp != dscp)
return 0;
}

if (frh->src_len &&
nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
return 0;
Expand All @@ -434,7 +462,15 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,

frh->dst_len = rule6->dst.plen;
frh->src_len = rule6->src.plen;
frh->tos = inet_dscp_to_dsfield(rule6->dscp);

if (rule6->dscp_full) {
frh->tos = 0;
if (nla_put_u8(skb, FRA_DSCP,
inet_dscp_to_dsfield(rule6->dscp) >> 2))
goto nla_put_failure;
} else {
frh->tos = inet_dscp_to_dsfield(rule6->dscp);
}

if ((rule6->dst.plen &&
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
Expand All @@ -450,7 +486,8 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(16) /* dst */
+ nla_total_size(16); /* src */
+ nla_total_size(16) /* src */
+ nla_total_size(1); /* dscp */
}

static void fib6_rule_flush_cache(struct fib_rules_ops *ops)
Expand Down
90 changes: 90 additions & 0 deletions tools/testing/selftests/net/fib_rule_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,23 @@ fib_rule6_test()
"$getnomatch" "ipproto ipv6-icmp match" \
"ipproto ipv6-tcp no match"
fi

fib_check_iproute_support "dscp" "tos"
if [ $? -eq 0 ]; then
match="dscp 0x3f"
getmatch="tos 0xfc"
getnomatch="tos 0xf4"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "dscp redirect to table" \
"dscp no redirect to table"

match="dscp 0x3f"
getmatch="from $SRC_IP6 iif $DEV tos 0xfc"
getnomatch="from $SRC_IP6 iif $DEV tos 0xf4"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "iif dscp redirect to table" \
"iif dscp no redirect to table"
fi
}

fib_rule6_vrf_test()
Expand Down Expand Up @@ -319,6 +336,34 @@ fib_rule6_connect_test()
log_test $? 1 "rule6 dsfield tcp no connect (dsfield 0x20)"

$IP -6 rule del dsfield 0x04 table $RTABLE_PEER

ip rule help 2>&1 | grep -q dscp
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 iprule too old, missing dscp match"
cleanup_peer
return
fi

$IP -6 rule add dscp 0x3f table $RTABLE_PEER

nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xfc \
-l 2001:db8::1:11 -r 2001:db8::1:11
log_test $? 0 "rule6 dscp udp connect"

nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xfc \
-l 2001:db8::1:11 -r 2001:db8::1:11
log_test $? 0 "rule6 dscp tcp connect"

nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xf4 \
-l 2001:db8::1:11 -r 2001:db8::1:11
log_test $? 1 "rule6 dscp udp no connect"

nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xf4 \
-l 2001:db8::1:11 -r 2001:db8::1:11
log_test $? 1 "rule6 dscp tcp no connect"

$IP -6 rule del dscp 0x3f table $RTABLE_PEER

cleanup_peer
}

Expand Down Expand Up @@ -468,6 +513,23 @@ fib_rule4_test()
"$getnomatch" "ipproto icmp match" \
"ipproto tcp no match"
fi

fib_check_iproute_support "dscp" "tos"
if [ $? -eq 0 ]; then
match="dscp 0x3f"
getmatch="tos 0xfc"
getnomatch="tos 0xf4"
fib_rule4_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "dscp redirect to table" \
"dscp no redirect to table"

match="dscp 0x3f"
getmatch="from $SRC_IP iif $DEV tos 0xfc"
getnomatch="from $SRC_IP iif $DEV tos 0xf4"
fib_rule4_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "iif dscp redirect to table" \
"iif dscp no redirect to table"
fi
}

fib_rule4_vrf_test()
Expand Down Expand Up @@ -513,6 +575,34 @@ fib_rule4_connect_test()
log_test $? 1 "rule4 dsfield tcp no connect (dsfield 0x20)"

$IP -4 rule del dsfield 0x04 table $RTABLE_PEER

ip rule help 2>&1 | grep -q dscp
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 iprule too old, missing dscp match"
cleanup_peer
return
fi

$IP -4 rule add dscp 0x3f table $RTABLE_PEER

nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xfc \
-l 198.51.100.11 -r 198.51.100.11
log_test $? 0 "rule4 dscp udp connect"

nettest -q -B -t 5 -N $testns -O $peerns -Q 0xfc \
-l 198.51.100.11 -r 198.51.100.11
log_test $? 0 "rule4 dscp tcp connect"

nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xf4 \
-l 198.51.100.11 -r 198.51.100.11
log_test $? 1 "rule4 dscp udp no connect"

nettest -q -B -t 5 -N $testns -O $peerns -Q 0xf4 \
-l 198.51.100.11 -r 198.51.100.11
log_test $? 1 "rule4 dscp tcp no connect"

$IP -4 rule del dscp 0x3f table $RTABLE_PEER

cleanup_peer
}
################################################################################
Expand Down

0 comments on commit 7bb50f3

Please sign in to comment.