Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

issue: 3382805 Fixing incorrect handling of src field of IPv4 routing #126

Merged
merged 5 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/core/event/netlink_event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,12 @@ const std::string route_nl_event::to_str() const

sprintf(outstr,
"%s. ROUTE: TABLE=%u SCOPE=%u FAMILY=%u PROTOCOL=%u DST_ADDR=%s DST_PREFIX=%u "
"TYPE=%u PREF_SRC=%s CFG_SRC=%s IFF_NAME=%s",
"TYPE=%u PREF_SRC=%s IFF_NAME=%s",
netlink_event::to_str().c_str(), p_route_val.get_table_id(), p_route_val.get_scope(),
p_route_val.get_family(), p_route_val.get_protocol(),
p_route_val.get_dst_addr().to_str(p_route_val.get_family()).c_str(),
p_route_val.get_dst_pref_len(), p_route_val.get_type(),
p_route_val.get_src_addr().to_str(p_route_val.get_family()).c_str(),
p_route_val.get_cfg_src_addr().to_str(p_route_val.get_family()).c_str(),
p_route_val.get_if_name());

return std::string(outstr);
Expand Down
1 change: 0 additions & 1 deletion src/core/netlink/route_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ void netlink_route_info::fill(struct rtnl_route *nl_route_obj)
// TODO: improve error handling
assert(family == nl_addr_get_family(addr));
m_route_val.set_src_addr(ip_address((void *)nl_addr_get_binary_addr(addr), family));
m_route_val.set_cfg_src_addr(ip_address((void *)nl_addr_get_binary_addr(addr), family));
}

rtnl_nexthop *nh = rtnl_route_nexthop_n(nl_route_obj, 0);
Expand Down
47 changes: 9 additions & 38 deletions src/core/proto/dst_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ dst_entry::dst_entry(const sock_addr &dst, uint16_t src_port, socket_data &sock_
: (header *)(new header_ipv4()))
, m_bound_ip(in6addr_any)
, m_so_bindtodevice_ip(in6addr_any)
, m_route_src_ip(in6addr_any)
, m_pkt_src_ip(in6addr_any)
, m_ring_alloc_logic_tx(sock_data.fd, ring_alloc_logic)
, m_p_tx_mem_buf_desc_list(nullptr)
Expand Down Expand Up @@ -91,7 +90,7 @@ dst_entry::~dst_entry()

if (m_p_rt_entry) {
g_p_route_table_mgr->unregister_observer(
route_rule_table_key(m_dst_ip, m_route_src_ip, m_family, m_tos), this);
route_rule_table_key(m_dst_ip, m_bound_ip, m_family, m_tos), this);
m_p_rt_entry = nullptr;
}

Expand Down Expand Up @@ -163,8 +162,8 @@ void dst_entry::init_members()

void dst_entry::set_src_addr()
{
if (!m_route_src_ip.is_anyaddr()) {
m_pkt_src_ip = m_route_src_ip;
if (!m_bound_ip.is_anyaddr()) {
m_pkt_src_ip = m_bound_ip;
dst_logfunc("Selected source address (bind): %s",
m_pkt_src_ip.to_str(get_sa_family()).c_str());
} else if (get_routing_addr_sel_src(m_pkt_src_ip)) {
Expand All @@ -191,9 +190,7 @@ void dst_entry::set_src_addr()
bool dst_entry::get_routing_addr_sel_src(ip_address &out_ip) const
{
if (m_p_rt_val) {
// For AF_INET we keep the legacy source selection logic which uses modified routing src
out_ip =
(m_family == AF_INET ? m_p_rt_val->get_src_addr() : m_p_rt_val->get_cfg_src_addr());
out_ip = m_p_rt_val->get_src_addr();
return !out_ip.is_anyaddr();
}

Expand Down Expand Up @@ -275,7 +272,7 @@ bool dst_entry::update_rt_val()
return ret_val;
}

bool dst_entry::resolve_net_dev(bool is_connect)
bool dst_entry::resolve_net_dev()
{
bool ret_val = false;

Expand All @@ -294,37 +291,12 @@ bool dst_entry::resolve_net_dev(bool is_connect)
// When XLIO will support routing with OIF, we need to check changing in outgoing interface
// Source address changes is not checked since multiple bind is not allowed on the same socket
if (!m_p_rt_entry) {
m_route_src_ip = m_bound_ip;
route_rule_table_key rtk(m_dst_ip, m_route_src_ip, m_family, m_tos);
dst_logfunc("Fetching rt_entry %s", m_route_src_ip.to_str(m_family).c_str());
route_rule_table_key rtk(m_dst_ip, m_bound_ip, m_family, m_tos);
dst_logfunc("Fetching rt_entry %s", m_bound_ip.to_str(m_family).c_str());
if (g_p_route_table_mgr->register_observer(rtk, this, &p_ces)) {
// In case this is the first time we trying to resolve route entry,
// means that register_observer was run
m_p_rt_entry = dynamic_cast<route_entry *>(p_ces);

// Routing entry src-addr is used for src-addr selection algorithm and not
// as a key for matching routing rules. Moreover, this can be a forcefully
// set src addr by XLIO. We keep this logic for IPv4 only for backward compliancy.
if (m_family == AF_INET && is_connect && m_route_src_ip.is_anyaddr()) {
dst_logfunc("Checking rt_entry src addr");
route_val *p_rt_val = nullptr;
if (m_p_rt_entry && m_p_rt_entry->get_val(p_rt_val) &&
!p_rt_val->get_src_addr().is_anyaddr()) {
g_p_route_table_mgr->unregister_observer(rtk, this);
m_route_src_ip = p_rt_val->get_src_addr();

dst_logfunc("Chaning m_route_src_ip to %s",
m_route_src_ip.to_str(m_family).c_str());

route_rule_table_key new_rtk(m_dst_ip, m_route_src_ip, m_family, m_tos);
if (g_p_route_table_mgr->register_observer(new_rtk, this, &p_ces)) {
m_p_rt_entry = dynamic_cast<route_entry *>(p_ces);
} else {
dst_logdbg("Error in route resolving logic");
return ret_val;
}
}
}
} else {
dst_logdbg("Error in registering route entry");
return ret_val;
Expand Down Expand Up @@ -539,8 +511,7 @@ bool dst_entry::offloaded_according_to_rules()
return ret_val;
}

bool dst_entry::prepare_to_send(struct xlio_rate_limit_t &rate_limit, bool skip_rules,
bool is_connect)
bool dst_entry::prepare_to_send(struct xlio_rate_limit_t &rate_limit, bool skip_rules)
{
bool resolved = false;
m_slow_path_lock.lock();
Expand All @@ -556,7 +527,7 @@ bool dst_entry::prepare_to_send(struct xlio_rate_limit_t &rate_limit, bool skip_
if (!m_b_force_os && !is_valid()) {
bool b_is_offloaded = false;
set_state(true);
if (resolve_net_dev(is_connect)) {
if (resolve_net_dev()) {
set_src_addr();
// overwrite mtu from route if exists
m_max_udp_payload_size =
Expand Down
6 changes: 2 additions & 4 deletions src/core/proto/dst_entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,7 @@ class dst_entry : public cache_observer, public tostr {
virtual void notify_cb();
virtual void notify_cb(event *ev);

virtual bool prepare_to_send(struct xlio_rate_limit_t &rate_limit, bool skip_rules = false,
bool is_connect = false);
virtual bool prepare_to_send(struct xlio_rate_limit_t &rate_limit, bool skip_rules = false);
virtual ssize_t fast_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr attr) = 0;
virtual ssize_t slow_send(const iovec *p_iov, const ssize_t sz_iov, xlio_send_attr attr,
struct xlio_rate_limit_t &rate_limit, int flags = 0,
Expand Down Expand Up @@ -150,7 +149,6 @@ class dst_entry : public cache_observer, public tostr {
header *m_header_neigh;
ip_address m_bound_ip;
ip_address m_so_bindtodevice_ip;
ip_address m_route_src_ip; // source IP used to register in route manager
ip_address m_pkt_src_ip; // source IP address copied into IP header
lock_mutex_recursive m_slow_path_lock;
lock_mutex m_tx_migration_lock;
Expand Down Expand Up @@ -195,7 +193,7 @@ class dst_entry : public cache_observer, public tostr {

virtual bool offloaded_according_to_rules();
virtual void init_members();
virtual bool resolve_net_dev(bool is_connect = false);
virtual bool resolve_net_dev();
virtual void set_src_addr();
bool update_net_dev_val();
bool update_rt_val();
Expand Down
3 changes: 1 addition & 2 deletions src/core/proto/dst_entry_udp_mc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,8 @@ void dst_entry_udp_mc::set_src_addr()
}

// The following function supposed to be called under m_lock
bool dst_entry_udp_mc::resolve_net_dev(bool is_connect)
bool dst_entry_udp_mc::resolve_net_dev()
{
NOT_IN_USE(is_connect);
bool ret_val = false;
cache_entry_subject<int, net_device_val *> *net_dev_entry = nullptr;

Expand Down
2 changes: 1 addition & 1 deletion src/core/proto/dst_entry_udp_mc.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class dst_entry_udp_mc : public dst_entry_udp {
bool m_b_mc_loopback_enabled;

virtual void set_src_addr();
virtual bool resolve_net_dev(bool is_connect = false);
virtual bool resolve_net_dev();
};

#endif /* DST_ENTRY_UDP_MC_H */
147 changes: 5 additions & 142 deletions src/core/proto/route_table_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,6 @@ route_table_mgr::route_table_mgr()
// Read Route table from kernel and save it in local variable.
update_tbl(ROUTE_DATA_TYPE);

update_rte_netdev(m_table_in4);
update_rte_netdev(m_table_in6);

// Print table
print_tbl();

Expand All @@ -104,14 +101,6 @@ route_table_mgr::~route_table_mgr()
{
rt_mgr_logdbg("");

// clear all route_entrys created in the constructor
in_addr_route_entry_map_t::iterator iter;

while ((iter = m_rte_list_for_each_net_dev.begin()) != m_rte_list_for_each_net_dev.end()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably can remove "in_addr_route_entry_map_t::iterator iter;" as well

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

delete (iter->second);
m_rte_list_for_each_net_dev.erase(iter);
}

auto cache_itr = m_cache_tbl.begin();
for (; cache_itr != m_cache_tbl.end(); cache_itr = m_cache_tbl.begin()) {
delete (cache_itr->second);
Expand Down Expand Up @@ -159,115 +148,6 @@ void route_table_mgr::update_tbl(nl_data_t data_type)
std::lock_guard<decltype(m_lock)> lock(m_lock);

netlink_socket_mgr::update_tbl(data_type);

rt_mgr_update_source_ip(m_table_in4);

return;
}

void route_table_mgr::rt_mgr_update_source_ip(route_table_t &table)
{
// for route entries which still have no src ip and no gw
for (route_val &val : table) {
if (!val.get_src_addr().is_anyaddr() || !val.get_gw_addr().is_anyaddr()) {
continue;
}

// try to get src ip from net_dev list of the interface
int longest_prefix = -1;
ip_address correct_src;
local_ip_list_t lip_list;
g_p_net_device_table_mgr->get_ip_list(lip_list, val.get_family(), val.get_if_index());
if (!lip_list.empty()) {
for (auto lip_iter = lip_list.begin(); lip_list.end() != lip_iter; ++lip_iter) {
const ip_data &ip = *lip_iter;
if (val.get_dst_addr().is_equal_with_prefix(ip.local_addr, ip.prefixlen,
val.get_family())) {
// found a match in routing table
if (ip.prefixlen > longest_prefix) {
longest_prefix = ip.prefixlen; // this is the longest prefix match
correct_src = ip.local_addr;
}
}
}
if (longest_prefix > -1) {
val.set_src_addr(correct_src);
continue;
}
}

// if still no src ip, get it from ioctl
ip_addr src_addr {0};
const char *if_name = val.get_if_name();
if (!get_ip_addr_from_ifname(if_name, src_addr, val.get_family())) {
assert(src_addr.get_family() == val.get_family());
val.set_src_addr(src_addr);
} else {
// Failed mapping if_name to IP address
rt_mgr_logwarn("could not figure out source ip for rtv = %s", val.to_str().c_str());
}
}

// for route entries with gateway, do recursive search for src ip
int num_unresolved_src = table.size();
int prev_num_unresolved_src = 0;
do {
prev_num_unresolved_src = num_unresolved_src;
num_unresolved_src = 0;
for (route_val &val : table) {
if (!val.get_gw_addr().is_anyaddr() && val.get_src_addr().is_anyaddr()) {
route_val *p_val_dst;
uint32_t table_id = val.get_table_id();
if ((p_val_dst = ::find_route_val(table, val.get_gw_addr(), table_id))) {
if (!p_val_dst->get_src_addr().is_anyaddr()) {
val.set_src_addr(p_val_dst->get_src_addr());
} else if (&val == p_val_dst) { // gateway of the entry lead to same entry
local_ip_list_t lip_offloaded_list;
g_p_net_device_table_mgr->get_ip_list(lip_offloaded_list, val.get_family(),
val.get_if_index());
for (auto lip_iter = lip_offloaded_list.begin();
lip_offloaded_list.end() != lip_iter; ++lip_iter) {
const ip_data &ip = *lip_iter;
if (val.get_gw_addr() == ip.local_addr) {
val.set_gw(ip_address::any_addr());
val.set_src_addr(ip.local_addr);
break;
}
}
}
// gateway and source are equal, no need of gw.
if (val.get_src_addr() == val.get_gw_addr()) {
val.set_gw(ip_address::any_addr());
}
}
if (val.get_src_addr().is_anyaddr()) {
num_unresolved_src++;
}
}
}
} while (num_unresolved_src && prev_num_unresolved_src > num_unresolved_src);

// for route entries which still have no src ip
for (auto iter = table.begin(); iter != table.end(); ++iter) {
route_val &val = *iter;
if (!val.get_src_addr().is_anyaddr()) {
continue;
}
if (!val.get_gw_addr().is_anyaddr()) {
rt_mgr_logdbg("could not figure out source ip for gw address. rtv = %s",
val.to_str().c_str());
}
// if still no src ip, get it from ioctl
ip_addr src_addr {0};
const char *if_name = val.get_if_name();
if (!get_ip_addr_from_ifname(if_name, src_addr, val.get_family())) {
assert(src_addr.get_family() == val.get_family());
val.set_src_addr(src_addr);
} else {
// Failed mapping if_name to IP address
rt_mgr_logdbg("could not figure out source ip for rtv = %s", val.to_str().c_str());
}
}
}

void route_table_mgr::parse_entry(struct nlmsghdr *nl_header)
Expand Down Expand Up @@ -324,7 +204,6 @@ void route_table_mgr::parse_attr(struct rtattr *rt_attribute, route_val &val)
case RTA_SRC:
case RTA_PREFSRC:
val.set_src_addr(ip_address((void *)RTA_DATA(rt_attribute), val.get_family()));
val.set_cfg_src_addr(ip_address((void *)RTA_DATA(rt_attribute), val.get_family()));
break;
case RTA_TABLE:
val.set_table_id(*(uint32_t *)RTA_DATA(rt_attribute));
Expand Down Expand Up @@ -432,12 +311,14 @@ bool route_table_mgr::route_resolve(IN route_rule_table_key key, OUT route_resul
for (const auto &table_id : table_id_list) {
p_val = ::find_route_val(rt, dst_addr, table_id);
if (p_val) {
res = *p_val;
res.mtu = p_val->get_mtu();
res.if_index = p_val->get_if_index();

rt_mgr_logdbg("dst ip '%s' resolved to if_index: %d, src-addr: %s, gw-addr: %s, "
"route-mtu: %" PRIu32,
dst_addr.to_str(family).c_str(), res.if_index,
res.src.to_str(family).c_str(), res.gw.to_str(family).c_str(), res.mtu);
dst_addr.to_str(family).c_str(), p_val->get_if_index(),
p_val->get_src_addr().to_str(family).c_str(),
p_val->get_gw_addr().to_str(family).c_str(), p_val->get_mtu());
++m_stats.n_lookup_hit;
return true;
}
Expand All @@ -448,23 +329,6 @@ bool route_table_mgr::route_resolve(IN route_rule_table_key key, OUT route_resul
return false;
}

void route_table_mgr::update_rte_netdev(route_table_t &table)
{
// Create route_entry for each netdev to receive port up/down events for net_dev_entry
for (const auto &val : table) {
const ip_address &src_addr = val.get_src_addr();
auto iter_rte = m_rte_list_for_each_net_dev.find(src_addr);
// If src_addr of interface exists in the map, no need to create another route_entry
if (iter_rte == m_rte_list_for_each_net_dev.end()) {
const ip_address &dst_ip = src_addr;
const ip_address &src_ip = ip_address::any_addr();
uint8_t tos = 0;
m_rte_list_for_each_net_dev[src_addr] =
create_new_entry(route_rule_table_key(dst_ip, src_ip, val.get_family(), tos), NULL);
}
}
}

void route_table_mgr::update_entry(INOUT route_entry *p_ent, bool b_register_to_net_dev /*= false*/)
{
rt_mgr_logdbg("entry [%p]", p_ent);
Expand Down Expand Up @@ -530,7 +394,6 @@ void route_table_mgr::new_route_event(const route_val &netlink_route_val)
val.set_dst_addr(netlink_route_val.get_dst_addr());
val.set_dst_pref_len(netlink_route_val.get_dst_pref_len());
val.set_src_addr(netlink_route_val.get_src_addr());
val.set_cfg_src_addr(netlink_route_val.get_cfg_src_addr());
val.set_gw(netlink_route_val.get_gw_addr());
val.set_family(netlink_route_val.get_family());
val.set_protocol(netlink_route_val.get_protocol());
Expand Down
Loading