Skip to content

Commit

Permalink
issue: 2336523 Return RX pbufs to cq_mgr instead of global pool
Browse files Browse the repository at this point in the history
Global buffer_pool lock leads to a lock contention in multi-threaded
applications. We can mitigate it by using ring per core and returning
pbufs to respective cq_mgr cache.

Signed-off-by: Dmytro Podgornyi <[email protected]>
  • Loading branch information
pasis committed Feb 24, 2021
1 parent 2879148 commit bf78b20
Show file tree
Hide file tree
Showing 9 changed files with 39 additions and 22 deletions.
12 changes: 1 addition & 11 deletions src/vma/dev/cq_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -580,17 +580,7 @@ void cq_mgr::reclaim_recv_buffer_helper(mem_buf_desc_t* buff)
temp->p_next_desc = NULL;
temp->p_prev_desc = NULL;
temp->reset_ref_count();
temp->rx.tcp.gro = 0;
temp->rx.is_vma_thr = false;
temp->rx.socketxtreme_polled = false;
temp->rx.flow_tag_id = 0;
temp->rx.tcp.p_ip_h = NULL;
temp->rx.tcp.p_tcp_h = NULL;
temp->rx.timestamps.sw.tv_nsec = 0;
temp->rx.timestamps.sw.tv_sec = 0;
temp->rx.timestamps.hw.tv_nsec = 0;
temp->rx.timestamps.hw.tv_sec = 0;
temp->rx.hw_raw_timestamp = 0;
memset(&temp->rx, 0, sizeof(temp->rx));
free_lwip_pbuf(&temp->lwip_pbuf);
m_rx_pool.push_back(temp);
}
Expand Down
1 change: 1 addition & 0 deletions src/vma/dev/ring.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ class ring
// Get/Release memory buffer descriptor with a linked data memory buffer
virtual mem_buf_desc_t* mem_buf_tx_get(ring_user_id_t id, bool b_block, int n_num_mem_bufs = 1) = 0;
virtual int mem_buf_tx_release(mem_buf_desc_t* p_mem_buf_desc_list, bool b_accounting, bool trylock = false) = 0;
virtual void mem_buf_rx_release(mem_buf_desc_t* p_mem_buf_desc) { buffer_pool::free_rx_lwip_pbuf_custom(&p_mem_buf_desc->lwip_pbuf.pbuf); };
virtual void send_ring_buffer(ring_user_id_t id, vma_ibv_send_wr* p_send_wqe, vma_wr_tx_packet_attr attr) = 0;
virtual void send_lwip_buffer(ring_user_id_t id, vma_ibv_send_wr* p_send_wqe, vma_wr_tx_packet_attr attr) = 0;

Expand Down
15 changes: 15 additions & 0 deletions src/vma/dev/ring_bond.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,21 @@ int ring_bond::mem_buf_tx_release(mem_buf_desc_t* p_mem_buf_desc_list, bool b_ac
return ret;
}

void ring_bond::mem_buf_rx_release(mem_buf_desc_t* p_mem_buf_desc)
{
uint32_t i;

for (i = 0; i < m_bond_rings.size(); i++) {
if (m_bond_rings[i] == p_mem_buf_desc->p_desc_owner) {
m_bond_rings[i]->mem_buf_rx_release(p_mem_buf_desc);
break;
}
}
if (i == m_bond_rings.size()) {
buffer_pool::free_rx_lwip_pbuf_custom(&p_mem_buf_desc->lwip_pbuf.pbuf);
}
}

void ring_bond::mem_buf_desc_return_single_to_owner_tx(mem_buf_desc_t* p_mem_buf_desc)
{
p_mem_buf_desc->p_desc_owner->mem_buf_desc_return_single_to_owner_tx(p_mem_buf_desc);
Expand Down
1 change: 1 addition & 0 deletions src/vma/dev/ring_bond.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class ring_bond : public ring {
virtual void adapt_cq_moderation();
virtual bool reclaim_recv_buffers(descq_t *rx_reuse);
virtual bool reclaim_recv_buffers(mem_buf_desc_t* rx_reuse_lst);
virtual void mem_buf_rx_release(mem_buf_desc_t* p_mem_buf_desc);
virtual int drain_and_proccess();
virtual int wait_for_notification_and_process_element(int cq_channel_fd, uint64_t* p_cq_poll_sn, void* pv_fd_ready_array = NULL);
virtual int get_num_resources() const { return m_bond_rings.size(); };
Expand Down
6 changes: 6 additions & 0 deletions src/vma/dev/ring_simple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,12 @@ int ring_simple::mem_buf_tx_release(mem_buf_desc_t* p_mem_buf_desc_list, bool b_
return accounting;
}

void ring_simple::mem_buf_rx_release(mem_buf_desc_t* p_mem_buf_desc)
{
p_mem_buf_desc->p_next_desc = NULL;
reclaim_recv_buffers(p_mem_buf_desc);
}

/* note that this function is inline, so keep it above the functions using it */
inline int ring_simple::send_buffer(vma_ibv_send_wr* p_send_wqe, vma_wr_tx_packet_attr attr)
{
Expand Down
1 change: 1 addition & 0 deletions src/vma/dev/ring_simple.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class ring_simple : public ring_slave
virtual bool reclaim_recv_buffers(mem_buf_desc_t* rx_reuse_lst);
bool reclaim_recv_buffers_no_lock(mem_buf_desc_t* rx_reuse_lst); // No locks
virtual int reclaim_recv_single_buffer(mem_buf_desc_t* rx_reuse); // No locks
virtual void mem_buf_rx_release(mem_buf_desc_t* p_mem_buf_desc);
virtual int socketxtreme_poll(struct vma_completion_t *vma_completions, unsigned int ncompletions, int flags);
virtual int drain_and_proccess();
virtual int wait_for_notification_and_process_element(int cq_channel_fd, uint64_t* p_cq_poll_sn, void* pv_fd_ready_array = NULL);
Expand Down
12 changes: 1 addition & 11 deletions src/vma/dev/ring_tap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,17 +319,7 @@ bool ring_tap::reclaim_recv_buffers(mem_buf_desc_t *buff)
temp->p_next_desc = NULL;
temp->p_prev_desc = NULL;
temp->reset_ref_count();
temp->rx.tcp.gro = 0;
temp->rx.is_vma_thr = false;
temp->rx.socketxtreme_polled = false;
temp->rx.flow_tag_id = 0;
temp->rx.tcp.p_ip_h = NULL;
temp->rx.tcp.p_tcp_h = NULL;
temp->rx.timestamps.sw.tv_nsec = 0;
temp->rx.timestamps.sw.tv_sec = 0;
temp->rx.timestamps.hw.tv_nsec = 0;
temp->rx.timestamps.hw.tv_sec = 0;
temp->rx.hw_raw_timestamp = 0;
memset(&temp->rx, 0, sizeof(temp->rx));
free_lwip_pbuf(&temp->lwip_pbuf);
m_rx_pool.push_back(temp);
}
Expand Down
12 changes: 12 additions & 0 deletions src/vma/sock/sockinfo_tcp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ inline void sockinfo_tcp::init_pbuf_custom(mem_buf_desc_t *p_desc)
p_desc->lwip_pbuf.pbuf.type = PBUF_REF;
p_desc->lwip_pbuf.pbuf.next = NULL;
p_desc->lwip_pbuf.pbuf.payload = (u8_t *)p_desc->p_buffer + p_desc->rx.tcp.n_transport_header_len;
/* Override default free function to return rx pbuf to the CQ cache */
p_desc->lwip_pbuf.custom_free_function = sockinfo_tcp::tcp_rx_pbuf_free;
}

/* change default rx_wait impl to flow based one */
Expand Down Expand Up @@ -4527,6 +4529,16 @@ struct pbuf * sockinfo_tcp::tcp_tx_pbuf_alloc(void* p_conn)
return (struct pbuf *)p_desc;
}

void sockinfo_tcp::tcp_rx_pbuf_free(struct pbuf *p_buff)
{
mem_buf_desc_t *desc = (mem_buf_desc_t *)p_buff;

if (desc->p_desc_owner != NULL)
desc->p_desc_owner->mem_buf_rx_release(desc);
else
buffer_pool::free_rx_lwip_pbuf_custom(p_buff);
}

//single buffer only
void sockinfo_tcp::tcp_tx_pbuf_free(void* p_conn, struct pbuf *p_buff)
{
Expand Down
1 change: 1 addition & 0 deletions src/vma/sock/sockinfo_tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ class sockinfo_tcp : public sockinfo, public timer_handler

static struct pbuf * tcp_tx_pbuf_alloc(void* p_conn);
static void tcp_tx_pbuf_free(void* p_conn, struct pbuf *p_buff);
static void tcp_rx_pbuf_free(struct pbuf *p_buff);
static struct tcp_seg * tcp_seg_alloc(void* p_conn);
static void tcp_seg_free(void* p_conn, struct tcp_seg * seg);

Expand Down

0 comments on commit bf78b20

Please sign in to comment.