From d0d27257da86e3255957ea0879cd41f90d093cc3 Mon Sep 17 00:00:00 2001 From: Jessie Yang Date: Mon, 6 Jan 2025 13:10:53 -0800 Subject: [PATCH] prov/efa: Remove inline write logic for rma inject We previously set the inject size to 0 to prevent using inline write, but fabtests can use inject rma for 0 size message. Make rma inject temporarily return FI_ENOSYS before firmware supports inline write. Signed-off-by: Jessie Yang --- prov/efa/src/efa_rma.c | 80 ++++++------------------------- prov/efa/test/efa_unit_test_rma.c | 39 +++++++++++---- prov/efa/test/efa_unit_tests.c | 1 + prov/efa/test/efa_unit_tests.h | 1 + 4 files changed, 47 insertions(+), 74 deletions(-) diff --git a/prov/efa/src/efa_rma.c b/prov/efa/src/efa_rma.c index cf4987c34eb..8fee3a2021b 100644 --- a/prov/efa/src/efa_rma.c +++ b/prov/efa/src/efa_rma.c @@ -200,7 +200,6 @@ static inline ssize_t efa_rma_post_write(struct efa_base_ep *base_ep, struct efa_conn *conn; #ifndef _WIN32 struct ibv_sge sge_list[msg->iov_count]; - struct ibv_data_buf inline_data_list[msg->iov_count]; #else /* MSVC compiler does not support array declarations with runtime size, so hardcode * the expected iov_limit/max_sq_sge from the lower-level efa provider. @@ -208,9 +207,14 @@ static inline ssize_t efa_rma_post_write(struct efa_base_ep *base_ep, struct ibv_sge sge_list[EFA_DEV_ATTR_MAX_WR_SGE]; struct ibv_data_buf inline_data_list[EFA_DEV_ATTR_MAX_WR_SGE]; #endif - size_t len; int i, err = 0; + if (flags & FI_INJECT) { + EFA_WARN(FI_LOG_EP_DATA, + "FI_INJECT is not supported by efa rma yet.\n"); + return -FI_ENOSYS; + } + efa_tracepoint(write_begin_msg_context, (size_t) msg->context, (size_t) msg->addr); qp = base_ep->qp; @@ -230,24 +234,13 @@ static inline ssize_t efa_rma_post_write(struct efa_base_ep *base_ep, ibv_wr_rdma_write(qp->ibv_qp_ex, msg->rma_iov[0].key, msg->rma_iov[0].addr); } - len = ofi_total_iov_len(msg->msg_iov, msg->iov_count); - if (len <= base_ep->domain->device->efa_attr.inline_buf_size && - len <= base_ep->inject_rma_size && - (!msg->desc || !efa_mr_is_hmem(msg->desc[0]))) { - for (i = 0; i < msg->iov_count; i++) { - inline_data_list[i].addr = msg->msg_iov[i].iov_base; - inline_data_list[i].length = msg->msg_iov[i].iov_len; - } - ibv_wr_set_inline_data_list(qp->ibv_qp_ex, msg->iov_count, inline_data_list); - } else { - for (i = 0; i < msg->iov_count; ++i) { - sge_list[i].addr = (uint64_t)msg->msg_iov[i].iov_base; - sge_list[i].length = msg->msg_iov[i].iov_len; - assert(msg->desc && msg->desc[i]); - sge_list[i].lkey = ((struct efa_mr *)msg->desc[i])->ibv_mr->lkey; - } - ibv_wr_set_sge_list(qp->ibv_qp_ex, msg->iov_count, sge_list); + for (i = 0; i < msg->iov_count; ++i) { + sge_list[i].addr = (uint64_t)msg->msg_iov[i].iov_base; + sge_list[i].length = msg->msg_iov[i].iov_len; + assert(msg->desc && msg->desc[i]); + sge_list[i].lkey = ((struct efa_mr *)msg->desc[i])->ibv_mr->lkey; } + ibv_wr_set_sge_list(qp->ibv_qp_ex, msg->iov_count, sge_list); conn = efa_av_addr_to_conn(base_ep->av, msg->addr); assert(conn && conn->ep_addr); @@ -348,51 +341,6 @@ ssize_t efa_rma_writedata(struct fid_ep *ep_fid, const void *buf, size_t len, return efa_rma_post_write(base_ep, &msg, FI_REMOTE_CQ_DATA | efa_tx_flags(base_ep)); } -ssize_t efa_rma_inject_write(struct fid_ep *ep_fid, const void *buf, size_t len, - fi_addr_t dest_addr, uint64_t addr, uint64_t key) -{ - struct fi_msg_rma msg; - struct iovec iov; - struct fi_rma_iov rma_iov; - struct efa_base_ep *base_ep; - int err; - - base_ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); - assert(len <= base_ep->inject_rma_size); - err = efa_rma_check_cap(base_ep); - if (err) - return err; - - EFA_SETUP_IOV(iov, buf, len); - EFA_SETUP_RMA_IOV(rma_iov, addr, len, key); - EFA_SETUP_MSG_RMA(msg, &iov, NULL, 1, dest_addr, &rma_iov, 1, NULL, 0); - - return efa_rma_post_write(base_ep, &msg, FI_INJECT); -} - -ssize_t efa_rma_inject_writedata(struct fid_ep *ep_fid, const void *buf, - size_t len, uint64_t data, fi_addr_t dest_addr, - uint64_t addr, uint64_t key) -{ - struct fi_msg_rma msg; - struct iovec iov; - struct fi_rma_iov rma_iov; - struct efa_base_ep *base_ep; - int err; - - base_ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); - assert(len <= base_ep->inject_rma_size); - err = efa_rma_check_cap(base_ep); - if (err) - return err; - - EFA_SETUP_IOV(iov, buf, len); - EFA_SETUP_RMA_IOV(rma_iov, addr, len, key); - EFA_SETUP_MSG_RMA(msg, &iov, NULL, 1, dest_addr, &rma_iov, 1, NULL, data); - - return efa_rma_post_write(base_ep, &msg, FI_INJECT | FI_REMOTE_CQ_DATA); -} - struct fi_ops_rma efa_dgram_ep_rma_ops = { .size = sizeof(struct fi_ops_rma), .read = fi_no_rma_read, @@ -414,7 +362,7 @@ struct fi_ops_rma efa_rma_ops = { .write = efa_rma_write, .writev = efa_rma_writev, .writemsg = efa_rma_writemsg, - .inject = efa_rma_inject_write, + .inject = fi_no_rma_inject, .writedata = efa_rma_writedata, - .injectdata = efa_rma_inject_writedata, + .injectdata = fi_no_rma_injectdata, }; diff --git a/prov/efa/test/efa_unit_test_rma.c b/prov/efa/test/efa_unit_test_rma.c index 40be70ec219..cb42a8528fd 100644 --- a/prov/efa/test/efa_unit_test_rma.c +++ b/prov/efa/test/efa_unit_test_rma.c @@ -25,8 +25,6 @@ static void test_efa_rma_prep(struct efa_resource *resource, fi_addr_t *addr) ibv_qpx->wr_rdma_read = &efa_mock_ibv_wr_rdma_read_save_wr; ibv_qpx->wr_rdma_write = &efa_mock_ibv_wr_rdma_write_save_wr; ibv_qpx->wr_rdma_write_imm = &efa_mock_ibv_wr_rdma_write_imm_save_wr; - ibv_qpx->wr_set_inline_data_list = - &efa_mock_ibv_wr_set_inline_data_list_no_op; ibv_qpx->wr_set_sge_list = &efa_mock_ibv_wr_set_sge_list_no_op; ibv_qpx->wr_set_ud_addr = &efa_mock_ibv_wr_set_ud_addr_no_op; ibv_qpx->wr_complete = &efa_mock_ibv_wr_complete_no_op; @@ -241,11 +239,9 @@ void test_efa_rma_inject_write(struct efa_resource **state) test_efa_rma_prep(resource, &dest_addr); efa_unit_test_buff_construct(&local_buff, resource, 32 /* buff_size */); - assert_int_equal(g_ibv_submitted_wr_id_cnt, 0); ret = fi_inject_write(resource->ep, local_buff.buff, local_buff.size, dest_addr, remote_addr, remote_key); - assert_int_equal(ret, 0); - assert_int_equal(g_ibv_submitted_wr_id_cnt, 1); + assert_int_equal(ret, -FI_ENOSYS); efa_unit_test_buff_destruct(&local_buff); } @@ -262,12 +258,39 @@ void test_efa_rma_inject_writedata(struct efa_resource **state) test_efa_rma_prep(resource, &dest_addr); efa_unit_test_buff_construct(&local_buff, resource, 32 /* buff_size */); - assert_int_equal(g_ibv_submitted_wr_id_cnt, 0); ret = fi_inject_writedata(resource->ep, local_buff.buff, local_buff.size, 0, dest_addr, remote_addr, remote_key); - assert_int_equal(ret, 0); - assert_int_equal(g_ibv_submitted_wr_id_cnt, 1); + assert_int_equal(ret, -FI_ENOSYS); + + efa_unit_test_buff_destruct(&local_buff); +} + +void test_efa_rma_writemsg_with_inject(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + struct efa_unit_test_buff local_buff; + struct iovec iov; + struct fi_msg_rma msg = {0}; + struct fi_rma_iov rma_iov; + fi_addr_t dest_addr; + void *desc; + int ret; + + test_efa_rma_prep(resource, &dest_addr); + efa_unit_test_buff_construct(&local_buff, resource, 4096 /* buff_size */); + + iov.iov_base = local_buff.buff; + iov.iov_len = local_buff.size; + desc = fi_mr_desc(local_buff.mr); + rma_iov.len = local_buff.size; + rma_iov.addr = 0x87654321; + rma_iov.key = 123456; + efa_unit_test_construct_msg_rma(&msg, &iov, &desc, 1, dest_addr, &rma_iov, + 1, NULL, 0); + + ret = fi_writemsg(resource->ep, &msg, FI_INJECT); + assert_int_equal(ret, -FI_ENOSYS); efa_unit_test_buff_destruct(&local_buff); } diff --git a/prov/efa/test/efa_unit_tests.c b/prov/efa/test/efa_unit_tests.c index 3e3ba43ef04..293e080c0dd 100644 --- a/prov/efa/test/efa_unit_tests.c +++ b/prov/efa/test/efa_unit_tests.c @@ -229,6 +229,7 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_rma_writedata, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rma_inject_write, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rma_inject_writedata, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_efa_rma_writemsg_with_inject, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_cq_read_send_success, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_cq_read_recv_success, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_cq_read_send_failure, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index 86bef64edab..689fd4fa3a8 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -251,6 +251,7 @@ void test_efa_rma_writemsg(); void test_efa_rma_writedata(); void test_efa_rma_inject_write(); void test_efa_rma_inject_writedata(); +void test_efa_rma_writemsg_with_inject(); void test_efa_cq_read_send_success(); void test_efa_cq_read_recv_success(); void test_efa_cq_read_send_failure();