From 26a77d02891ab62172085a4f94af9b3c90aed387 Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Thu, 27 Jun 2024 13:27:10 +0200 Subject: [PATCH 1/9] netfilter: nfnetlink_queue: unbreak SCTP traffic when packet is enqueued with nfqueue and GSO is enabled, checksum calculation has to take into account the protocol, as SCTP uses a 32 bits CRC checksum. Enter skb_gso_segment() path in case of SCTP GSO packets because skb_zerocopy() does not support for GSO_BY_FRAGS. Joint work with Pablo. Signed-off-by: Antonio Ojea Signed-off-by: Pablo Neira Ayuso --- net/core/dev.c | 1 + net/netfilter/nfnetlink_queue.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index e7260889d4cb..8384282acadf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3386,6 +3386,7 @@ int skb_crc32c_csum_help(struct sk_buff *skb) out: return ret; } +EXPORT_SYMBOL(skb_crc32c_csum_help); __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index e0716da256bf..d2773ce9b585 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -540,6 +540,14 @@ static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb) return -1; } +static int nf_queue_checksum_help(struct sk_buff *entskb) +{ + if (skb_csum_is_sctp(entskb)) + return skb_crc32c_csum_help(entskb); + + return skb_checksum_help(entskb); +} + static struct sk_buff * nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, @@ -602,7 +610,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, case NFQNL_COPY_PACKET: if (!(queue->flags & NFQA_CFG_F_GSO) && entskb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(entskb)) + nf_queue_checksum_help(entskb)) return NULL; data_len = READ_ONCE(queue->copy_range); @@ -1014,7 +1022,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) break; } - if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(skb)) + if (!skb_is_gso(skb) || ((queue->flags & NFQA_CFG_F_GSO) && !skb_is_gso_sctp(skb))) return __nfqnl_enqueue_packet(net, queue, entry); nf_bridge_adjust_skb_data(skb); From 4e97d521c2be094718c4c5c7c4f785e8972b4af0 Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Tue, 2 Jul 2024 13:15:36 +0200 Subject: [PATCH 2/9] selftests: netfilter: nft_queue.sh: sctp coverage Test that nfqueue with and without GSO process SCTP packets correctly. Joint work with Florian and Pablo. Signed-off-by: Antonio Ojea Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/net/netfilter/config | 2 + .../selftests/net/netfilter/nft_queue.sh | 85 ++++++++++++++++++- 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 63ef80ef47a4..b2dd4db45215 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -87,3 +87,5 @@ CONFIG_XFRM_USER=m CONFIG_XFRM_STATISTICS=y CONFIG_NET_PKTGEN=m CONFIG_TUN=m +CONFIG_INET_DIAG=m +CONFIG_SCTP_DIAG=m diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index c61d23a8c88d..f3bdeb1271eb 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -25,6 +25,9 @@ cleanup() } checktool "nft --version" "test without nft tool" +checktool "socat -h" "run test without socat" + +modprobe -q sctp trap cleanup EXIT @@ -265,7 +268,6 @@ test_tcp_forward() test_tcp_localhost() { - dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT" timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & local rpid=$! @@ -375,6 +377,82 @@ EOF wait 2>/dev/null } +sctp_listener_ready() +{ + ss -S -N "$1" -ln -o "sport = :12345" | grep -q 12345 +} + +test_sctp_forward() +{ + ip netns exec "$nsrouter" nft -f /dev/stdin < "$TMPFILE1" & + local rpid=$! + + busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2" + + ip netns exec "$nsrouter" ./nf_queue -q 10 -G -t "$timeout" & + local nfqpid=$! + + ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null + + if ! ip netns exec "$nsrouter" nft delete table inet sctpq; then + echo "FAIL: Could not delete sctpq table" + exit 1 + fi + + wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain" + + if ! diff -u "$TMPINPUT" "$TMPFILE1" ; then + echo "FAIL: lost packets?!" 1>&2 + exit 1 + fi +} + +test_sctp_output() +{ + ip netns exec "$ns1" nft -f /dev/stdin < "$TMPFILE1" & + local rpid=$! + + busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2" + + ip netns exec "$ns1" ./nf_queue -q 11 -t "$timeout" & + local nfqpid=$! + + ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null + + if ! ip netns exec "$ns1" nft delete table inet sctpq; then + echo "FAIL: Could not delete sctpq table" + exit 1 + fi + + # must wait before checking completeness of output file. + wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO" + + if ! diff -u "$TMPINPUT" "$TMPFILE1" ; then + echo "FAIL: lost packets?!" 1>&2 + exit 1 + fi +} + test_queue_removal() { read tainted_then < /proc/sys/kernel/tainted @@ -443,11 +521,16 @@ test_queue 10 # same. We queue to a second program as well. load_ruleset "filter2" 20 test_queue 20 +ip netns exec "$ns1" nft flush ruleset test_tcp_forward test_tcp_localhost test_tcp_localhost_connectclose test_tcp_localhost_requeue +test_sctp_forward +test_sctp_output + +# should be last, adds vrf device in ns1 and changes routes test_icmp_vrf test_queue_removal From e2444c1d463995477fb447be9d0c54150a5c393b Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Tue, 28 May 2024 11:37:54 +0100 Subject: [PATCH 3/9] netfilter: nfnetlink: convert kfree_skb to consume_skb Use consume_skb in the batch code path to avoid generating spurious NOT_SPECIFIED skb drop reasons. Signed-off-by: Donald Hunter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 932b3ddb34f1..7784ec094097 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -402,27 +402,27 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, { nfnl_unlock(subsys_id); netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); - return kfree_skb(skb); + return consume_skb(skb); } } if (!ss->valid_genid || !ss->commit || !ss->abort) { nfnl_unlock(subsys_id); netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); - return kfree_skb(skb); + return consume_skb(skb); } if (!try_module_get(ss->owner)) { nfnl_unlock(subsys_id); netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); - return kfree_skb(skb); + return consume_skb(skb); } if (!ss->valid_genid(net, genid)) { module_put(ss->owner); nfnl_unlock(subsys_id); netlink_ack(oskb, nlh, -ERESTART, NULL); - return kfree_skb(skb); + return consume_skb(skb); } nfnl_unlock(subsys_id); @@ -567,7 +567,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, if (status & NFNL_BATCH_REPLAY) { ss->abort(net, oskb, NFNL_ABORT_AUTOLOAD); nfnl_err_reset(&err_list); - kfree_skb(skb); + consume_skb(skb); module_put(ss->owner); goto replay; } else if (status == NFNL_BATCH_DONE) { @@ -593,7 +593,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, err = ss->abort(net, oskb, abort_action); if (err == -EAGAIN) { nfnl_err_reset(&err_list); - kfree_skb(skb); + consume_skb(skb); module_put(ss->owner); status |= NFNL_BATCH_FAILURE; goto replay_abort; @@ -601,7 +601,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, } nfnl_err_deliver(&err_list, oskb); - kfree_skb(skb); + consume_skb(skb); module_put(ss->owner); } From c1aa38866b9c58dc6cf7a5fc6a3e1ca75565169e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 10 Jul 2024 10:58:29 +0200 Subject: [PATCH 4/9] netfilter: nf_tables: store new sets in dedicated list nft_set_lookup_byid() is very slow when transaction becomes large, due to walk of the transaction list. Add a dedicated list that contains only the new sets. Before: nft -f ruleset 0.07s user 0.00s system 0% cpu 1:04.84 total After: nft -f ruleset 0.07s user 0.00s system 0% cpu 30.115 total .. where ruleset contains ~10 sets with ~100k elements. The above number is for a combined flush+reload of the ruleset. With previous flush, even the first NEWELEM has to walk through a few hundred thousands of DELSET(ELEM) transactions before the first NEWSET object. To cope with random-order-newset-newsetelem we'd need to replace commit_set_list with a hashtable. Expectation is that a NEWELEM operation refers to the most recently added set, so last entry of the dedicated list should be the set we want. NB: This is not a bug fix per se (functionality is fine), but with larger transaction batches list search takes forever, so it would be nice to speed this up for -stable too, hence adding a "fixes" tag. Fixes: 958bee14d071 ("netfilter: nf_tables: use new transaction infrastructure to handle sets") Reported-by: Nadia Pinaeva Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 29 ++++++++++++++++++++--------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 1bfdd16890fa..2be4738eae1c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1674,6 +1674,7 @@ struct nft_trans_rule { struct nft_trans_set { struct nft_trans_binding nft_trans_binding; + struct list_head list_trans_newset; struct nft_set *set; u32 set_id; u32 gc_int; @@ -1875,6 +1876,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re struct nftables_pernet { struct list_head tables; struct list_head commit_list; + struct list_head commit_set_list; struct list_head binding_list; struct list_head module_list; struct list_head notify_list; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0a2f79346958..3ea5d0163510 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -393,6 +393,7 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans_binding *binding; + struct nft_trans_set *trans_set; list_add_tail(&trans->list, &nft_net->commit_list); @@ -402,9 +403,13 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr switch (trans->msg_type) { case NFT_MSG_NEWSET: + trans_set = nft_trans_container_set(trans); + if (!nft_trans_set_update(trans) && nft_set_is_anonymous(nft_trans_set(trans))) list_add_tail(&binding->binding_list, &nft_net->binding_list); + + list_add_tail(&trans_set->list_trans_newset, &nft_net->commit_set_list); break; case NFT_MSG_NEWCHAIN: if (!nft_trans_chain_update(trans) && @@ -611,6 +616,7 @@ static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, trans_set = nft_trans_container_set(trans); INIT_LIST_HEAD(&trans_set->nft_trans_binding.binding_list); + INIT_LIST_HEAD(&trans_set->list_trans_newset); if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) { nft_trans_set_id(trans) = @@ -4485,17 +4491,16 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net, { struct nftables_pernet *nft_net = nft_pernet(net); u32 id = ntohl(nla_get_be32(nla)); - struct nft_trans *trans; + struct nft_trans_set *trans; - list_for_each_entry(trans, &nft_net->commit_list, list) { - if (trans->msg_type == NFT_MSG_NEWSET) { - struct nft_set *set = nft_trans_set(trans); + /* its likely the id we need is at the tail, not at start */ + list_for_each_entry_reverse(trans, &nft_net->commit_set_list, list_trans_newset) { + struct nft_set *set = trans->set; - if (id == nft_trans_set_id(trans) && - set->table == table && - nft_active_genmask(set, genmask)) - return set; - } + if (id == trans->set_id && + set->table == table && + nft_active_genmask(set, genmask)) + return set; } return ERR_PTR(-ENOENT); } @@ -10447,6 +10452,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: + list_del(&nft_trans_container_set(trans)->list_trans_newset); if (nft_trans_set_update(trans)) { struct nft_set *set = nft_trans_set(trans); @@ -10755,6 +10761,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: + list_del(&nft_trans_container_set(trans)->list_trans_newset); if (nft_trans_set_update(trans)) { nft_trans_destroy(trans); break; @@ -10850,6 +10857,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } } + WARN_ON_ONCE(!list_empty(&nft_net->commit_set_list)); + nft_set_abort_update(&set_update_list); synchronize_rcu(); @@ -11519,6 +11528,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->tables); INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->commit_set_list); INIT_LIST_HEAD(&nft_net->binding_list); INIT_LIST_HEAD(&nft_net->module_list); INIT_LIST_HEAD(&nft_net->notify_list); @@ -11549,6 +11559,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) gc_seq = nft_gc_seq_begin(nft_net); WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + WARN_ON_ONCE(!list_empty(&nft_net->commit_set_list)); if (!list_empty(&nft_net->module_list)) nf_tables_module_autoload_cleanup(net); From c9526aeb4998393171d85225ff540e28c7d4ab86 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 15 Jul 2024 13:32:31 +0200 Subject: [PATCH 5/9] netfilter: nf_tables: do not remove elements if set backend implements .abort pipapo set backend maintains two copies of the datastructure, removing the elements from the copy that is going to be discarded slows down the abort path significantly, from several minutes to few seconds after this patch. This patch was previously reverted by f86fb94011ae ("netfilter: nf_tables: revert do not remove elements if set backend implements .abort") but it is now possible since recent work by Florian Westphal to perform on-demand clone from insert/remove path: 532aec7e878b ("netfilter: nft_set_pipapo: remove dirty flag") 3f1d886cc7c3 ("netfilter: nft_set_pipapo: move cloning of match info to insert/removal path") a238106703ab ("netfilter: nft_set_pipapo: prepare pipapo_get helper for on-demand clone") c5444786d0ea ("netfilter: nft_set_pipapo: merge deactivate helper into caller") 6c108d9bee44 ("netfilter: nft_set_pipapo: prepare walk function for on-demand clone") 8b8a2417558c ("netfilter: nft_set_pipapo: prepare destroy function for on-demand clone") 80efd2997fb9 ("netfilter: nft_set_pipapo: make pipapo_clone helper return NULL") a590f4760922 ("netfilter: nft_set_pipapo: move prove_locking helper around") after this series, the clone is fully released once aborted, no need to take it back to previous state. Thus, no stale reference to elements can occur. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3ea5d0163510..c85d037a363b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10789,7 +10789,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; } te = nft_trans_container_elem(trans); - nft_setelem_remove(net, te->set, te->elem_priv); + if (!te->set->ops->abort || + nft_setelem_is_catchall(te->set, te->elem_priv)) + nft_setelem_remove(net, te->set, te->elem_priv); + if (!nft_setelem_is_catchall(te->set, te->elem_priv)) atomic_dec(&te->set->nelems); From d5283b47e225e1473e1a07085b9c4e6bfd08ba51 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 17 Jul 2024 22:09:44 -0400 Subject: [PATCH 6/9] netfilter: move nf_ct_netns_get out of nf_conncount_init This patch is to move nf_ct_netns_get() out of nf_conncount_init() and let the consumers of nf_conncount decide if they want to turn on netfilter conntrack. It makes nf_conncount more flexible to be used in other places and avoids netfilter conntrack turned on when using it in openvswitch conntrack. Signed-off-by: Xin Long Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_count.h | 6 ++---- net/netfilter/nf_conncount.c | 15 +++------------ net/netfilter/xt_connlimit.c | 15 +++++++++++++-- net/openvswitch/conntrack.c | 5 ++--- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index e227d997fc71..1b58b5b91ff6 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -15,10 +15,8 @@ struct nf_conncount_list { unsigned int count; /* length of list */ }; -struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, - unsigned int keylen); -void nf_conncount_destroy(struct net *net, unsigned int family, - struct nf_conncount_data *data); +struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen); +void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data); unsigned int nf_conncount_count(struct net *net, struct nf_conncount_data *data, diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 34ba14e59e95..4890af4dc263 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -522,11 +522,10 @@ unsigned int nf_conncount_count(struct net *net, } EXPORT_SYMBOL_GPL(nf_conncount_count); -struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, - unsigned int keylen) +struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen) { struct nf_conncount_data *data; - int ret, i; + int i; if (keylen % sizeof(u32) || keylen / sizeof(u32) > MAX_KEYLEN || @@ -539,12 +538,6 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family if (!data) return ERR_PTR(-ENOMEM); - ret = nf_ct_netns_get(net, family); - if (ret < 0) { - kfree(data); - return ERR_PTR(ret); - } - for (i = 0; i < ARRAY_SIZE(data->root); ++i) data->root[i] = RB_ROOT; @@ -581,13 +574,11 @@ static void destroy_tree(struct rb_root *r) } } -void nf_conncount_destroy(struct net *net, unsigned int family, - struct nf_conncount_data *data) +void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data) { unsigned int i; cancel_work_sync(&data->gc_work); - nf_ct_netns_put(net, family); for (i = 0; i < ARRAY_SIZE(data->root); ++i) destroy_tree(&data->root[i]); diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 5d04ef80a61d..0e762277bcf8 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -86,6 +86,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) { struct xt_connlimit_info *info = par->matchinfo; unsigned int keylen; + int ret; keylen = sizeof(u32); if (par->family == NFPROTO_IPV6) @@ -93,8 +94,17 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) else keylen += sizeof(struct in_addr); + ret = nf_ct_netns_get(par->net, par->family); + if (ret < 0) { + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); + return ret; + } + /* init private data */ - info->data = nf_conncount_init(par->net, par->family, keylen); + info->data = nf_conncount_init(par->net, keylen); + if (IS_ERR(info->data)) + nf_ct_netns_put(par->net, par->family); return PTR_ERR_OR_ZERO(info->data); } @@ -103,7 +113,8 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_connlimit_info *info = par->matchinfo; - nf_conncount_destroy(par->net, par->family, info->data); + nf_conncount_destroy(par->net, info->data); + nf_ct_netns_put(par->net, par->family); } static struct xt_match connlimit_mt_reg __read_mostly = { diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index a3da5ee34f92..3bb4810234aa 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1608,8 +1608,7 @@ static int ovs_ct_limit_init(struct net *net, struct ovs_net *ovs_net) for (i = 0; i < CT_LIMIT_HASH_BUCKETS; i++) INIT_HLIST_HEAD(&ovs_net->ct_limit_info->limits[i]); - ovs_net->ct_limit_info->data = - nf_conncount_init(net, NFPROTO_INET, sizeof(u32)); + ovs_net->ct_limit_info->data = nf_conncount_init(net, sizeof(u32)); if (IS_ERR(ovs_net->ct_limit_info->data)) { err = PTR_ERR(ovs_net->ct_limit_info->data); @@ -1626,7 +1625,7 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net) const struct ovs_ct_limit_info *info = ovs_net->ct_limit_info; int i; - nf_conncount_destroy(net, NFPROTO_INET, info->data); + nf_conncount_destroy(net, info->data); for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) { struct hlist_head *head = &info->limits[i]; struct ovs_ct_limit *ct_limit; From 7ea0522ef81a335c2d3a0ab1c8a4fab9a23c4a03 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Aug 2024 11:56:12 +0200 Subject: [PATCH 7/9] netfilter: nf_tables: pass context structure to nft_parse_register_load Mechanical transformation, no logical changes intended. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 ++- net/bridge/netfilter/nft_meta_bridge.c | 2 +- net/ipv4/netfilter/nft_dup_ipv4.c | 4 ++-- net/ipv6/netfilter/nft_dup_ipv6.c | 4 ++-- net/netfilter/nf_tables_api.c | 3 ++- net/netfilter/nft_bitwise.c | 4 ++-- net/netfilter/nft_byteorder.c | 2 +- net/netfilter/nft_cmp.c | 6 +++--- net/netfilter/nft_ct.c | 2 +- net/netfilter/nft_dup_netdev.c | 2 +- net/netfilter/nft_dynset.c | 4 ++-- net/netfilter/nft_exthdr.c | 2 +- net/netfilter/nft_fwd_netdev.c | 6 +++--- net/netfilter/nft_hash.c | 2 +- net/netfilter/nft_lookup.c | 2 +- net/netfilter/nft_masq.c | 4 ++-- net/netfilter/nft_meta.c | 2 +- net/netfilter/nft_nat.c | 8 ++++---- net/netfilter/nft_objref.c | 2 +- net/netfilter/nft_payload.c | 2 +- net/netfilter/nft_queue.c | 2 +- net/netfilter/nft_range.c | 2 +- net/netfilter/nft_redir.c | 4 ++-- net/netfilter/nft_tproxy.c | 4 ++-- 24 files changed, 40 insertions(+), 38 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2be4738eae1c..573995e7a27e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -254,7 +254,8 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); -int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len); +int nft_parse_register_load(const struct nft_ctx *ctx, + const struct nlattr *attr, u8 *sreg, u32 len); int nft_parse_register_store(const struct nft_ctx *ctx, const struct nlattr *attr, u8 *dreg, const struct nft_data *data, diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index bd4d1b4d745f..4d8e15927217 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -142,7 +142,7 @@ static int nft_meta_bridge_set_init(const struct nft_ctx *ctx, } priv->len = len; - err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len); + err = nft_parse_register_load(ctx, tb[NFTA_META_SREG], &priv->sreg, len); if (err < 0) return err; diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index a522c3a3be52..ef5dd88107dd 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -40,13 +40,13 @@ static int nft_dup_ipv4_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_ADDR] == NULL) return -EINVAL; - err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr, + err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr, sizeof(struct in_addr)); if (err < 0) return err; if (tb[NFTA_DUP_SREG_DEV]) - err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], + err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev, sizeof(int)); return err; diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index c82f3fdd4a65..492a811828a7 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -38,13 +38,13 @@ static int nft_dup_ipv6_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_ADDR] == NULL) return -EINVAL; - err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr, + err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr, sizeof(struct in6_addr)); if (err < 0) return err; if (tb[NFTA_DUP_SREG_DEV]) - err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], + err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev, sizeof(int)); return err; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c85d037a363b..d05fc17bb9b7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -11038,7 +11038,8 @@ static int nft_validate_register_load(enum nft_registers reg, unsigned int len) return 0; } -int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len) +int nft_parse_register_load(const struct nft_ctx *ctx, + const struct nlattr *attr, u8 *sreg, u32 len) { u32 reg; int err; diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index ca857afbf061..7de95674fd8c 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -171,7 +171,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, priv->len = len; - err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg, + err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG], &priv->sreg, priv->len); if (err < 0) return err; @@ -365,7 +365,7 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx, struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); int err; - err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg, + err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG], &priv->sreg, sizeof(u32)); if (err < 0) return err; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index f6e791a68101..2f82a444d21b 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -139,7 +139,7 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, priv->len = len; - err = nft_parse_register_load(tb[NFTA_BYTEORDER_SREG], &priv->sreg, + err = nft_parse_register_load(ctx, tb[NFTA_BYTEORDER_SREG], &priv->sreg, priv->len); if (err < 0) return err; diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index cd4652259095..2605f43737bc 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -83,7 +83,7 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; - err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); + err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; @@ -222,7 +222,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, if (err < 0) return err; - err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); + err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; @@ -323,7 +323,7 @@ static int nft_cmp16_fast_init(const struct nft_ctx *ctx, if (err < 0) return err; - err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); + err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 452ed94c3a4d..67a41cd2baaf 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -606,7 +606,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, } priv->len = len; - err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len); + err = nft_parse_register_load(ctx, tb[NFTA_CT_SREG], &priv->sreg, len); if (err < 0) goto err1; diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index e5739a59ebf1..0573f96ce079 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -40,7 +40,7 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_DEV] == NULL) return -EINVAL; - return nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev, + return nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev, sizeof(int)); } diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index b4ada3ab2167..6920df754265 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -215,7 +215,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return err; } - err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key, + err = nft_parse_register_load(ctx, tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key, set->klen); if (err < 0) return err; @@ -226,7 +226,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (set->dtype == NFT_DATA_VERDICT) return -EOPNOTSUPP; - err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_DATA], + err = nft_parse_register_load(ctx, tb[NFTA_DYNSET_SREG_DATA], &priv->sreg_data, set->dlen); if (err < 0) return err; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 6eb571d0c3fd..6bfd33516241 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -588,7 +588,7 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, priv->flags = flags; priv->op = op; - return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg, + return nft_parse_register_load(ctx, tb[NFTA_EXTHDR_SREG], &priv->sreg, priv->len); } diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 358e742afad7..c83a794025f9 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -52,7 +52,7 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx, if (tb[NFTA_FWD_SREG_DEV] == NULL) return -EINVAL; - return nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, + return nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, sizeof(int)); } @@ -178,12 +178,12 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - err = nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, + err = nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, sizeof(int)); if (err < 0) return err; - return nft_parse_register_load(tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr, + return nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr, addr_len); } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 868d68302d22..5d034bbb6913 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -92,7 +92,7 @@ static int nft_jhash_init(const struct nft_ctx *ctx, priv->len = len; - err = nft_parse_register_load(tb[NFTA_HASH_SREG], &priv->sreg, len); + err = nft_parse_register_load(ctx, tb[NFTA_HASH_SREG], &priv->sreg, len); if (err < 0) return err; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index f3080fa1b226..580e4b1deb9b 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -113,7 +113,7 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (IS_ERR(set)) return PTR_ERR(set); - err = nft_parse_register_load(tb[NFTA_LOOKUP_SREG], &priv->sreg, + err = nft_parse_register_load(ctx, tb[NFTA_LOOKUP_SREG], &priv->sreg, set->klen); if (err < 0) return err; diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 8a14aaca93bb..cb43c72a8c2a 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -52,13 +52,13 @@ static int nft_masq_init(const struct nft_ctx *ctx, priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS])); if (tb[NFTA_MASQ_REG_PROTO_MIN]) { - err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN], + err = nft_parse_register_load(ctx, tb[NFTA_MASQ_REG_PROTO_MIN], &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_MASQ_REG_PROTO_MAX]) { - err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MAX], + err = nft_parse_register_load(ctx, tb[NFTA_MASQ_REG_PROTO_MAX], &priv->sreg_proto_max, plen); if (err < 0) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9139ce38ea7b..0214ad1ced2f 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -657,7 +657,7 @@ int nft_meta_set_init(const struct nft_ctx *ctx, } priv->len = len; - err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len); + err = nft_parse_register_load(ctx, tb[NFTA_META_SREG], &priv->sreg, len); if (err < 0) return err; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 808f5802c270..983dd937fe02 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -214,13 +214,13 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->family = family; if (tb[NFTA_NAT_REG_ADDR_MIN]) { - err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MIN], + err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_ADDR_MIN], &priv->sreg_addr_min, alen); if (err < 0) return err; if (tb[NFTA_NAT_REG_ADDR_MAX]) { - err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MAX], + err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_ADDR_MAX], &priv->sreg_addr_max, alen); if (err < 0) @@ -234,13 +234,13 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, plen = sizeof_field(struct nf_nat_range, min_proto.all); if (tb[NFTA_NAT_REG_PROTO_MIN]) { - err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN], + err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_PROTO_MIN], &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_NAT_REG_PROTO_MAX]) { - err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MAX], + err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_PROTO_MAX], &priv->sreg_proto_max, plen); if (err < 0) diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 509011b1ef59..09da7a3f9f96 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -143,7 +143,7 @@ static int nft_objref_map_init(const struct nft_ctx *ctx, if (!(set->flags & NFT_SET_OBJECT)) return -EINVAL; - err = nft_parse_register_load(tb[NFTA_OBJREF_SET_SREG], &priv->sreg, + err = nft_parse_register_load(ctx, tb[NFTA_OBJREF_SET_SREG], &priv->sreg, set->klen); if (err < 0) return err; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 50429cbd42da..330609a76fb2 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -981,7 +981,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, } priv->csum_type = csum_type; - return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg, + return nft_parse_register_load(ctx, tb[NFTA_PAYLOAD_SREG], &priv->sreg, priv->len); } diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index b2b8127c8d43..44e6817e6e29 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -136,7 +136,7 @@ static int nft_queue_sreg_init(const struct nft_ctx *ctx, struct nft_queue *priv = nft_expr_priv(expr); int err; - err = nft_parse_register_load(tb[NFTA_QUEUE_SREG_QNUM], + err = nft_parse_register_load(ctx, tb[NFTA_QUEUE_SREG_QNUM], &priv->sreg_qnum, sizeof(u32)); if (err < 0) return err; diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 51ae64cd268f..ea382f7bbd78 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -83,7 +83,7 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr goto err2; } - err = nft_parse_register_load(tb[NFTA_RANGE_SREG], &priv->sreg, + err = nft_parse_register_load(ctx, tb[NFTA_RANGE_SREG], &priv->sreg, desc_from.len); if (err < 0) goto err2; diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index a58bd8d291ff..6568cc264078 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -51,13 +51,13 @@ static int nft_redir_init(const struct nft_ctx *ctx, plen = sizeof_field(struct nf_nat_range, min_proto.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { - err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN], + err = nft_parse_register_load(ctx, tb[NFTA_REDIR_REG_PROTO_MIN], &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_REDIR_REG_PROTO_MAX]) { - err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MAX], + err = nft_parse_register_load(ctx, tb[NFTA_REDIR_REG_PROTO_MAX], &priv->sreg_proto_max, plen); if (err < 0) diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index 71412adb73d4..1b691393d8b1 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -254,14 +254,14 @@ static int nft_tproxy_init(const struct nft_ctx *ctx, } if (tb[NFTA_TPROXY_REG_ADDR]) { - err = nft_parse_register_load(tb[NFTA_TPROXY_REG_ADDR], + err = nft_parse_register_load(ctx, tb[NFTA_TPROXY_REG_ADDR], &priv->sreg_addr, alen); if (err < 0) return err; } if (tb[NFTA_TPROXY_REG_PORT]) { - err = nft_parse_register_load(tb[NFTA_TPROXY_REG_PORT], + err = nft_parse_register_load(ctx, tb[NFTA_TPROXY_REG_PORT], &priv->sreg_port, sizeof(u16)); if (err < 0) return err; From 14fb07130c7ddd257e30079b87499b3f89097b09 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Aug 2024 11:56:13 +0200 Subject: [PATCH 8/9] netfilter: nf_tables: allow loads only when register is initialized Reject rules where a load occurs from a register that has not seen a store early in the same rule. commit 4c905f6740a3 ("netfilter: nf_tables: initialize registers in nft_do_chain()") had to add a unconditional memset to the nftables register space to avoid leaking stack information to userspace. This memset shows up in benchmarks. After this change, this commit can be reverted again. Note that this breaks userspace compatibility, because theoretically you can do rule 1: reg2 := meta load iif, reg2 == 1 jump ... rule 2: reg2 == 2 jump ... // read access with no store in this rule ... after this change this is rejected. Neither nftables nor iptables-nft generate such rules, each rule is always standalone. This resuts in a small increase of nft_ctx structure by sizeof(long). To cope with hypothetical rulesets like the example above one could emit on-demand "reg[x] = 0" store when generating the datapath blob in nf_tables_commit_chain_prepare(). A patch that does this is linked to below. For now, lets disable this. In nf_tables, a rule is the smallest unit that can be replaced from userspace, i.e. a hypothetical ruleset that relies on earlier initialisations of registers can't be changed at will as register usage would need to be coordinated. Link: https://lore.kernel.org/netfilter-devel/20240627135330.17039-4-fw@strlen.de/ Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 38 +++++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 573995e7a27e..1cc33d946d41 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -221,6 +221,7 @@ struct nft_ctx { u8 family; u8 level; bool report; + DECLARE_BITMAP(reg_inited, NFT_REG32_NUM); }; enum nft_data_desc_flags { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d05fc17bb9b7..904f2e25b4a4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -146,6 +146,8 @@ static void nft_ctx_init(struct nft_ctx *ctx, ctx->report = nlmsg_report(nlh); ctx->flags = nlh->nlmsg_flags; ctx->seq = nlh->nlmsg_seq; + + bitmap_zero(ctx->reg_inited, NFT_REG32_NUM); } static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, @@ -11041,8 +11043,8 @@ static int nft_validate_register_load(enum nft_registers reg, unsigned int len) int nft_parse_register_load(const struct nft_ctx *ctx, const struct nlattr *attr, u8 *sreg, u32 len) { - u32 reg; - int err; + int err, invalid_reg; + u32 reg, next_register; err = nft_parse_register(attr, ®); if (err < 0) @@ -11052,11 +11054,36 @@ int nft_parse_register_load(const struct nft_ctx *ctx, if (err < 0) return err; + next_register = DIV_ROUND_UP(len, NFT_REG32_SIZE) + reg; + + /* Can't happen: nft_validate_register_load() should have failed */ + if (WARN_ON_ONCE(next_register > NFT_REG32_NUM)) + return -EINVAL; + + /* find first register that did not see an earlier store. */ + invalid_reg = find_next_zero_bit(ctx->reg_inited, NFT_REG32_NUM, reg); + + /* invalid register within the range that we're loading from? */ + if (invalid_reg < next_register) + return -ENODATA; + *sreg = reg; return 0; } EXPORT_SYMBOL_GPL(nft_parse_register_load); +static void nft_saw_register_store(const struct nft_ctx *__ctx, + int reg, unsigned int len) +{ + unsigned int registers = DIV_ROUND_UP(len, NFT_REG32_SIZE); + struct nft_ctx *ctx = (struct nft_ctx *)__ctx; + + if (WARN_ON_ONCE(len == 0 || reg < 0)) + return; + + bitmap_set(ctx->reg_inited, reg, registers); +} + static int nft_validate_register_store(const struct nft_ctx *ctx, enum nft_registers reg, const struct nft_data *data, @@ -11078,7 +11105,7 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, return err; } - return 0; + break; default: if (type != NFT_DATA_VALUE) return -EINVAL; @@ -11091,8 +11118,11 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, sizeof_field(struct nft_regs, data)) return -ERANGE; - return 0; + break; } + + nft_saw_register_store(ctx, reg, len); + return 0; } int nft_parse_register_store(const struct nft_ctx *ctx, From c88baabf16d1ef74ab8832de9761226406af5507 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 20 Aug 2024 11:56:14 +0200 Subject: [PATCH 9/9] netfilter: nf_tables: don't initialize registers in nft_do_chain() revert commit 4c905f6740a3 ("netfilter: nf_tables: initialize registers in nft_do_chain()"). Previous patch makes sure that loads from uninitialized registers are detected from the control plane. in this case rule blob auto-zeroes registers. Thus the explicit zeroing is not needed anymore. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index a48d5f0e2f3e..75598520b0fa 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -256,7 +256,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) const struct net *net = nft_net(pkt); const struct nft_expr *expr, *last; const struct nft_rule_dp *rule; - struct nft_regs regs = {}; + struct nft_regs regs; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; bool genbit = READ_ONCE(net->nft.gencursor);