Skip to content

Commit

Permalink
issue: 3613619 Avoid posting RX WQEs for Neigh ring
Browse files Browse the repository at this point in the history
Neigh ring is used for TX only, to send ARP/ICMPv6 packets. However each XLIO ring creates both SQ and RQ.
By avoiding posting RX WQEs to Neigh ring RQ, a considerable amount of memory is saved.

Signed-off-by: Alexander Grissik <agrissik@nvidia.com>
  • Loading branch information
AlexanderGrissik committed Dec 28, 2023
1 parent a27ee15 commit 1a1ec20
Show file tree
Hide file tree
Showing 9 changed files with 34 additions and 29 deletions.
19 changes: 8 additions & 11 deletions src/core/dev/net_device_val.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,18 @@ ring_alloc_logic_attr::ring_alloc_logic_attr()
: m_ring_alloc_logic(RING_LOGIC_PER_INTERFACE)
, m_user_id_key(0)
, m_use_locks(true)
, m_tx_only(false)
{
m_mem_desc.iov_base = NULL;
m_mem_desc.iov_len = 0;
init();
}

ring_alloc_logic_attr::ring_alloc_logic_attr(ring_logic_t ring_logic, bool use_locks)
ring_alloc_logic_attr::ring_alloc_logic_attr(ring_logic_t ring_logic, bool use_locks, bool tx_only)
: m_ring_alloc_logic(ring_logic)
, m_user_id_key(0)
, m_use_locks(use_locks)
, m_tx_only(tx_only)
{
m_mem_desc.iov_base = NULL;
m_mem_desc.iov_len = 0;
Expand All @@ -96,6 +98,7 @@ ring_alloc_logic_attr::ring_alloc_logic_attr(const ring_alloc_logic_attr &other)
, m_user_id_key(other.m_user_id_key)
, m_mem_desc(other.m_mem_desc)
, m_use_locks(other.m_use_locks)
, m_tx_only(other.m_tx_only)
{
}

Expand All @@ -121,6 +124,7 @@ void ring_alloc_logic_attr::init()
HASH_ITER(m_mem_desc.iov_base, uintptr_t);
HASH_ITER(m_mem_desc.iov_len, size_t);
HASH_ITER(m_use_locks, bool);
HASH_ITER(m_tx_only, bool);

m_hash = h;
#undef HASH_ITER
Expand Down Expand Up @@ -150,21 +154,13 @@ void ring_alloc_logic_attr::set_user_id_key(uint64_t user_id_key)
}
}

void ring_alloc_logic_attr::set_use_locks(bool use_locks)
{
if (m_use_locks != use_locks) {
m_use_locks = use_locks;
init();
}
}

const std::string ring_alloc_logic_attr::to_str() const
{
std::stringstream ss;

ss << "allocation logic " << m_ring_alloc_logic << " key " << m_user_id_key << " user address "
<< m_mem_desc.iov_base << " user length " << m_mem_desc.iov_len << " use locks "
<< !!m_use_locks;
<< !!m_use_locks << " tx-only " << !!m_tx_only;

return ss.str();
}
Expand Down Expand Up @@ -1429,7 +1425,8 @@ ring *net_device_val_eth::create_ring(resource_allocation_key *key)
try {
switch (m_bond) {
case NO_BOND:
ring = new ring_eth(get_if_idx(), nullptr, RING_ETH, true,
ring = new ring_eth(get_if_idx(), nullptr,
(key && key->is_tx_only() ? RING_ETH_TX : RING_ETH), true,
(key ? key->get_use_locks() : true));
break;
case ACTIVE_BACKUP:
Expand Down
14 changes: 9 additions & 5 deletions src/core/dev/net_device_val.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,24 +57,26 @@ class ib_ctx_handler;
class ring_alloc_logic_attr {
public:
ring_alloc_logic_attr();
ring_alloc_logic_attr(ring_logic_t ring_logic, bool use_locks);
ring_alloc_logic_attr(ring_logic_t ring_logic, bool use_locks, bool tx_only);
ring_alloc_logic_attr(const ring_alloc_logic_attr &other);
void set_ring_alloc_logic(ring_logic_t logic);
void set_memory_descriptor(iovec &mem_desc);
void set_user_id_key(uint64_t user_id_key);
void set_use_locks(bool use_locks);
const std::string to_str() const;
inline ring_logic_t get_ring_alloc_logic() { return m_ring_alloc_logic; }
inline ring_logic_t get_ring_alloc_logic() const { return m_ring_alloc_logic; }
inline iovec *get_memory_descriptor() { return &m_mem_desc; }
inline uint64_t get_user_id_key() { return m_user_id_key; }
inline bool get_use_locks() { return m_use_locks; }
inline uint64_t get_user_id_key() const { return m_user_id_key; }
inline bool get_use_locks() const { return m_use_locks; }
inline bool is_tx_only() const { return m_tx_only; }

bool operator==(const ring_alloc_logic_attr &other) const
{
return (m_ring_alloc_logic == other.m_ring_alloc_logic &&
m_user_id_key == other.m_user_id_key &&
m_mem_desc.iov_base == other.m_mem_desc.iov_base &&
m_mem_desc.iov_len == other.m_mem_desc.iov_len && m_use_locks == other.m_use_locks);
m_mem_desc.iov_len == other.m_mem_desc.iov_len &&
m_use_locks == other.m_use_locks && m_tx_only == other.m_tx_only);
}

bool operator!=(const ring_alloc_logic_attr &other) const { return !(*this == other); }
Expand All @@ -88,6 +90,7 @@ class ring_alloc_logic_attr {
m_mem_desc.iov_base = other.m_mem_desc.iov_base;
m_mem_desc.iov_len = other.m_mem_desc.iov_len;
m_use_locks = other.m_use_locks;
m_tx_only = other.m_tx_only;
}
return *this;
}
Expand All @@ -107,6 +110,7 @@ class ring_alloc_logic_attr {
uint64_t m_user_id_key;
iovec m_mem_desc;
bool m_use_locks;
bool m_tx_only;
void init();
};

Expand Down
8 changes: 3 additions & 5 deletions src/core/dev/qp_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ int qp_mgr::configure(struct qp_mgr_desc *desc)
m_rx_num_wr = m_max_qp_wr;
}

// When ::up() is called later, it will skip posting RX WQEs when (m_max_qp_wr == 0).
m_max_qp_wr = (desc->tx_only ? 0 : m_max_qp_wr);

qp_logdbg("HW Dummy send support for QP = %d", m_hw_dummy_send_support);

// Create associated Tx & Rx cq_mgrs
Expand Down Expand Up @@ -478,11 +481,6 @@ void qp_mgr::trigger_completion_for_all_sent_packets()
}
}

uint32_t qp_mgr::get_rx_max_wr_num()
{
return m_rx_num_wr;
}

void qp_mgr::post_recv_buffer(mem_buf_desc_t *p_mem_buf_desc)
{
if (m_n_sysvar_rx_prefetch_bytes_before_poll) {
Expand Down
7 changes: 6 additions & 1 deletion src/core/dev/qp_mgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ struct qp_mgr_desc {
ring_simple *ring;
const struct slave_data *slave;
struct ibv_comp_channel *rx_comp_event_channel;
bool tx_only;
};

/* Work request completion callback */
Expand Down Expand Up @@ -178,7 +179,11 @@ class qp_mgr {
struct ibv_qp *get_ibv_qp() const { return m_qp; };
class cq_mgr *get_tx_cq_mgr() const { return m_p_cq_mgr_tx; }
class cq_mgr *get_rx_cq_mgr() const { return m_p_cq_mgr_rx; }
virtual uint32_t get_rx_max_wr_num();

// This method is used by RQ WQEs initial post.
// Skip posting WQEs for TX only ring (m_max_qp_wr == 0).
uint32_t get_rx_max_wr_num() { return (m_max_qp_wr > 0 ? m_rx_num_wr : 0); }

// This function can be replaced with a parameter during ring creation.
// chain of calls may serve as cache warm for dummy send feature.
inline bool get_hw_dummy_send_support() { return m_hw_dummy_send_support; }
Expand Down
1 change: 1 addition & 0 deletions src/core/dev/ring_simple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ void ring_simple::create_resources()
desc.ring = this;
desc.slave = p_slave;
desc.rx_comp_event_channel = m_p_rx_comp_event_channel;
desc.tx_only = (get_type() == RING_ETH_TX);
m_p_qp_mgr = create_qp_mgr(&desc);
BULLSEYE_EXCLUDE_BLOCK_START
if (m_p_qp_mgr == NULL) {
Expand Down
2 changes: 1 addition & 1 deletion src/core/proto/neighbour.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ neigh_entry::neigh_entry(neigh_key key, transport_type_t _type, bool is_init_res
}

// Allocate one ring for g_p_neigh_table_mgr. All eigh_entry objects will share the same ring.
ring_alloc_logic_attr ring_attr(RING_LOGIC_PER_OBJECT, true);
ring_alloc_logic_attr ring_attr(RING_LOGIC_PER_OBJECT, true, true);
m_ring_allocation_logic = ring_allocation_logic_tx(g_p_neigh_table_mgr, ring_attr, this);

if (is_init_resources) {
Expand Down
4 changes: 2 additions & 2 deletions src/core/sock/sockinfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ sockinfo::sockinfo(int fd, int domain, bool use_ring_locks)
, m_rx_ready_byte_count(0)
, m_n_sysvar_rx_num_buffs_reuse(safe_mce_sys().rx_bufs_batch)
, m_n_sysvar_rx_poll_num(safe_mce_sys().rx_poll_num)
, m_ring_alloc_log_rx(safe_mce_sys().ring_allocation_logic_rx, use_ring_locks)
, m_ring_alloc_log_tx(safe_mce_sys().ring_allocation_logic_tx, use_ring_locks)
, m_ring_alloc_log_rx(safe_mce_sys().ring_allocation_logic_rx, use_ring_locks, false)
, m_ring_alloc_log_tx(safe_mce_sys().ring_allocation_logic_tx, use_ring_locks, false)
, m_pcp(0)
, m_rx_callback(NULL)
, m_rx_callback_context(NULL)
Expand Down
4 changes: 2 additions & 2 deletions src/core/sock/sockinfo_tcp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4595,8 +4595,8 @@ int sockinfo_tcp::tcp_setsockopt(int __level, int __optname, __const void *__opt
!ring_isolated) {
m_tcp_con_lock = multilock::create_new_lock(MULTILOCK_RECURSIVE, "tcp_con");
}
set_ring_logic_rx(ring_alloc_logic_attr(RING_LOGIC_ISOLATE, true));
set_ring_logic_tx(ring_alloc_logic_attr(RING_LOGIC_ISOLATE, true));
set_ring_logic_rx(ring_alloc_logic_attr(RING_LOGIC_ISOLATE, true, false));
set_ring_logic_tx(ring_alloc_logic_attr(RING_LOGIC_ISOLATE, true, false));
break;
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/core/util/xlio_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -351,9 +351,9 @@ typedef struct {
cq_stats_t cq_stats;
} cq_instance_block_t;

typedef enum { RING_ETH = 0, RING_TAP } ring_type_t;
typedef enum { RING_ETH = 0, RING_TAP, RING_ETH_TX } ring_type_t;

static const char *const ring_type_str[] = {"RING_ETH", "RING_TAP"};
static const char *const ring_type_str[] = {"RING_ETH", "RING_TAP", "RING_ETH_TX"};

// Ring stat info
typedef struct {
Expand Down

0 comments on commit 1a1ec20

Please sign in to comment.