Skip to content

Commit

Permalink
issue: 3613619 Avoid posting RX WQEs for Neigh ring
Browse files Browse the repository at this point in the history
Neigh ring is used for TX only, to send ARP/ICMPv6 packets. However each XLIO ring creates both SQ and RQ.
By avoiding posting RX WQEs to Neigh ring RQ, a considerable amount of memory is saved.

Signed-off-by: Alexander Grissik <agrissik@nvidia.com>
  • Loading branch information
AlexanderGrissik committed Jan 1, 2024
1 parent 6dd595d commit 5a67a4d
Show file tree
Hide file tree
Showing 11 changed files with 28 additions and 52 deletions.
17 changes: 5 additions & 12 deletions src/core/dev/net_device_val.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,14 +150,6 @@ void ring_alloc_logic_attr::set_user_id_key(uint64_t user_id_key)
}
}

void ring_alloc_logic_attr::set_use_locks(bool use_locks)
{
if (m_use_locks != use_locks) {
m_use_locks = use_locks;
init();
}
}

const std::string ring_alloc_logic_attr::to_str() const
{
std::stringstream ss;
Expand Down Expand Up @@ -1428,10 +1420,11 @@ ring *net_device_val_eth::create_ring(resource_allocation_key *key)

try {
switch (m_bond) {
case NO_BOND:
ring = new ring_eth(get_if_idx(), nullptr, RING_ETH, true,
(key ? key->get_use_locks() : true));
break;
case NO_BOND: {
bool tx_only = (key && key->get_ring_alloc_logic() == RING_LOGIC_NEIGH);
bool use_locks = (!key || key->get_use_locks());
ring = new ring_eth(get_if_idx(), nullptr, tx_only, use_locks);
} break;
case ACTIVE_BACKUP:
case LAG_8023ad:
ring = new ring_bond_eth(get_if_idx());
Expand Down
7 changes: 3 additions & 4 deletions src/core/dev/net_device_val.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,11 @@ class ring_alloc_logic_attr {
void set_ring_alloc_logic(ring_logic_t logic);
void set_memory_descriptor(iovec &mem_desc);
void set_user_id_key(uint64_t user_id_key);
void set_use_locks(bool use_locks);
const std::string to_str() const;
inline ring_logic_t get_ring_alloc_logic() { return m_ring_alloc_logic; }
inline ring_logic_t get_ring_alloc_logic() const { return m_ring_alloc_logic; }
inline iovec *get_memory_descriptor() { return &m_mem_desc; }
inline uint64_t get_user_id_key() { return m_user_id_key; }
inline bool get_use_locks() { return m_use_locks; }
inline uint64_t get_user_id_key() const { return m_user_id_key; }
inline bool get_use_locks() const { return m_use_locks; }

bool operator==(const ring_alloc_logic_attr &other) const
{
Expand Down
8 changes: 3 additions & 5 deletions src/core/dev/qp_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ int qp_mgr::configure(struct qp_mgr_desc *desc)
m_rx_num_wr = m_max_qp_wr;
}

// When ::up() is called later, it will skip posting RX WQEs when (m_max_qp_wr == 0).
m_max_qp_wr = (desc->tx_only ? 0 : m_max_qp_wr);

qp_logdbg("HW Dummy send support for QP = %d", m_hw_dummy_send_support);

// Create associated Tx & Rx cq_mgrs
Expand Down Expand Up @@ -478,11 +481,6 @@ void qp_mgr::trigger_completion_for_all_sent_packets()
}
}

uint32_t qp_mgr::get_rx_max_wr_num()
{
return m_rx_num_wr;
}

void qp_mgr::post_recv_buffer(mem_buf_desc_t *p_mem_buf_desc)
{
if (m_n_sysvar_rx_prefetch_bytes_before_poll) {
Expand Down
7 changes: 6 additions & 1 deletion src/core/dev/qp_mgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ struct qp_mgr_desc {
ring_simple *ring;
const struct slave_data *slave;
struct ibv_comp_channel *rx_comp_event_channel;
bool tx_only;
};

/* Work request completion callback */
Expand Down Expand Up @@ -178,7 +179,11 @@ class qp_mgr {
struct ibv_qp *get_ibv_qp() const { return m_qp; };
class cq_mgr *get_tx_cq_mgr() const { return m_p_cq_mgr_tx; }
class cq_mgr *get_rx_cq_mgr() const { return m_p_cq_mgr_rx; }
virtual uint32_t get_rx_max_wr_num();

// This method is used by RQ WQEs initial post.
// Skip posting WQEs for TX only ring (m_max_qp_wr == 0).
uint32_t get_rx_max_wr_num() { return (m_max_qp_wr > 0 ? m_rx_num_wr : 0); }

// This function can be replaced with a parameter during ring creation.
// chain of calls may serve as cache warm for dummy send feature.
inline bool get_hw_dummy_send_support() { return m_hw_dummy_send_support; }
Expand Down
4 changes: 1 addition & 3 deletions src/core/dev/ring_allocation_logic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,7 @@ uint64_t ring_allocation_logic::calc_res_key_by_logic()
res_key = sched_getcpu();
break;
BULLSEYE_EXCLUDE_BLOCK_START
case RING_LOGIC_PER_OBJECT:
res_key = reinterpret_cast<uint64_t>(m_source.m_object);
break;
case RING_LOGIC_NEIGH:
case RING_LOGIC_ISOLATE:
res_key = 0;
break;
Expand Down
11 changes: 1 addition & 10 deletions src/core/dev/ring_allocation_logic.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,24 +49,15 @@ class source_t {
public:
int m_fd;
ip_address m_ip;
const void *m_object;

source_t(int fd)
: m_fd(fd)
, m_ip(ip_address::any_addr())
, m_object(nullptr)
{
}
source_t(const ip_address &ip)
: m_fd(-1)
, m_ip(ip)
, m_object(nullptr)
{
}
source_t(const void *object)
: m_fd(-1)
, m_ip(ip_address::any_addr())
, m_object(object)
{
}
};
Expand Down Expand Up @@ -94,7 +85,7 @@ class ring_allocation_logic {
bool is_logic_support_migration()
{
return m_res_key.get_ring_alloc_logic() >= RING_LOGIC_PER_THREAD &&
m_res_key.get_ring_alloc_logic() < RING_LOGIC_PER_OBJECT && m_ring_migration_ratio > 0;
m_res_key.get_ring_alloc_logic() < RING_LOGIC_NEIGH && m_ring_migration_ratio > 0;
}
uint64_t calc_res_key_by_logic();
inline ring_logic_t get_alloc_logic_type() { return m_res_key.get_ring_alloc_logic(); }
Expand Down
1 change: 1 addition & 0 deletions src/core/dev/ring_simple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ void ring_simple::create_resources()
desc.ring = this;
desc.slave = p_slave;
desc.rx_comp_event_channel = m_p_rx_comp_event_channel;
desc.tx_only = (get_type() == RING_ETH_TX);
m_p_qp_mgr = create_qp_mgr(&desc);
BULLSEYE_EXCLUDE_BLOCK_START
if (m_p_qp_mgr == NULL) {
Expand Down
15 changes: 3 additions & 12 deletions src/core/dev/ring_simple.h
Original file line number Diff line number Diff line change
Expand Up @@ -440,23 +440,14 @@ class ring_simple : public ring_slave {

class ring_eth : public ring_simple {
public:
ring_eth(int if_index, ring *parent = NULL, ring_type_t type = RING_ETH,
bool call_create_res = true, bool use_locks = true)
: ring_simple(if_index, parent, type, use_locks)
ring_eth(int if_index, ring *parent = nullptr, bool tx_only = false, bool use_locks = true)
: ring_simple(if_index, parent, (tx_only ? RING_ETH_TX : RING_ETH), use_locks)
{
net_device_val_eth *p_ndev = dynamic_cast<net_device_val_eth *>(
g_p_net_device_table_mgr->get_net_device_val(m_parent->get_if_index()));
if (p_ndev) {
m_partition = p_ndev->get_vlan();

/* Do resource initialization for
* ring_eth_direct, ring_eth_cb inside related
* constructors because
* they use own create_qp_mgr() methods
*/
if (call_create_res) {
create_resources();
}
create_resources();
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/core/proto/neighbour.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ neigh_entry::neigh_entry(neigh_key key, transport_type_t _type, bool is_init_res
}

// Allocate one ring for g_p_neigh_table_mgr. All eigh_entry objects will share the same ring.
ring_alloc_logic_attr ring_attr(RING_LOGIC_PER_OBJECT, true);
m_ring_allocation_logic = ring_allocation_logic_tx(g_p_neigh_table_mgr, ring_attr, this);
ring_alloc_logic_attr ring_attr(RING_LOGIC_NEIGH, true);
m_ring_allocation_logic = ring_allocation_logic_tx(-1, ring_attr, this);

if (is_init_resources) {
m_p_ring = m_p_dev->reserve_ring(m_ring_allocation_logic.get_key());
Expand Down
4 changes: 2 additions & 2 deletions src/core/util/xlio_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -351,9 +351,9 @@ typedef struct {
cq_stats_t cq_stats;
} cq_instance_block_t;

typedef enum { RING_ETH = 0, RING_TAP } ring_type_t;
typedef enum { RING_ETH = 0, RING_TAP, RING_ETH_TX } ring_type_t;

static const char *const ring_type_str[] = {"RING_ETH", "RING_TAP"};
static const char *const ring_type_str[] = {"RING_ETH", "RING_TAP", "RING_ETH_TX"};

// Ring stat info
typedef struct {
Expand Down
2 changes: 1 addition & 1 deletion src/core/xlio_extra.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ typedef enum {
RING_LOGIC_PER_THREAD = 20, //!< RING_LOGIC_PER_THREAD
RING_LOGIC_PER_CORE = 30, //!< RING_LOGIC_PER_CORE
RING_LOGIC_PER_CORE_ATTACH_THREADS = 31, //!< RING_LOGIC_PER_CORE_ATTACH_THREADS
RING_LOGIC_PER_OBJECT = 32, //!< RING_LOGIC_PER_OBJECT
RING_LOGIC_NEIGH = 32, //!< RING_LOGIC_NEIGH
RING_LOGIC_ISOLATE = 33, //!< RING_LOGIC_ISOLATE
RING_LOGIC_LAST //!< RING_LOGIC_LAST
} ring_logic_t;
Expand Down

0 comments on commit 5a67a4d

Please sign in to comment.