From 14e6789731f41e00b6159dfb5495680ef375ca03 Mon Sep 17 00:00:00 2001 From: Sai Sunku Date: Wed, 29 Jan 2025 18:08:01 +0000 Subject: [PATCH 1/3] prov/efa: Add new efa-direct fi_info objects efa-direct is a new path that provides direct access to the EFA device with minimal overhead and features. It does not support SAS ordering, tagged messaging, atomics. It does not have copy based protocols and cannot support sending/receiving large message sizes and HMEM devices that don't have p2p support. The functionality provided by the efa-direct path is a subset of the efa rdm path. When applications do not require capabilities only supported by the efa rdm path, it is more performant for them to use the efa-direct path. Accordingly, the efa-direct fi_info objects are returned before the efa rdm fi_info objects in the fi_getinfo call. Signed-off-by: Sai Sunku --- prov/efa/src/efa.h | 12 ++ prov/efa/src/efa_domain.c | 16 +-- prov/efa/src/efa_prov.c | 46 ++++++- prov/efa/src/efa_prov.h | 10 ++ prov/efa/src/efa_prov_info.c | 184 +++++++++++++++---------- prov/efa/src/efa_prov_info.h | 2 + prov/efa/src/efa_user_info.c | 78 ++++++++++- prov/efa/src/efa_user_info.h | 3 - prov/efa/test/efa_unit_test_av.c | 4 +- prov/efa/test/efa_unit_test_cntr.c | 10 +- prov/efa/test/efa_unit_test_common.c | 2 +- prov/efa/test/efa_unit_test_cq.c | 20 +-- prov/efa/test/efa_unit_test_domain.c | 6 +- prov/efa/test/efa_unit_test_ep.c | 62 ++++----- prov/efa/test/efa_unit_test_hmem.c | 6 +- prov/efa/test/efa_unit_test_info.c | 34 ++--- prov/efa/test/efa_unit_test_mr.c | 2 +- prov/efa/test/efa_unit_test_msg.c | 4 +- prov/efa/test/efa_unit_test_ope.c | 14 +- prov/efa/test/efa_unit_test_pke.c | 2 +- prov/efa/test/efa_unit_test_rdm_peer.c | 14 +- prov/efa/test/efa_unit_test_rma.c | 2 +- prov/efa/test/efa_unit_test_runt.c | 26 ++-- prov/efa/test/efa_unit_test_send.c | 2 +- prov/efa/test/efa_unit_test_srx.c | 6 +- prov/efa/test/efa_unit_tests.h | 3 - 26 files changed, 365 insertions(+), 205 deletions(-) diff --git a/prov/efa/src/efa.h b/prov/efa/src/efa.h index 5f1cf162c2b..e371104981f 100644 --- a/prov/efa/src/efa.h +++ b/prov/efa/src/efa.h @@ -58,12 +58,24 @@ #define SHM_MAX_INJECT_SIZE 4096 +#define EFA_FABRIC_NAME "efa" +#define EFA_DIRECT_FABRIC_NAME "efa-direct" + #define EFA_EP_TYPE_IS_RDM(_info) \ (_info && _info->ep_attr && (_info->ep_attr->type == FI_EP_RDM)) #define EFA_EP_TYPE_IS_DGRAM(_info) \ (_info && _info->ep_attr && (_info->ep_attr->type == FI_EP_DGRAM)) +#define EFA_INFO_TYPE_IS_RDM(_info) \ + (_info && _info->ep_attr && (_info->ep_attr->type == FI_EP_RDM) && !strcasecmp(_info->fabric_attr->name, EFA_FABRIC_NAME)) + +#define EFA_INFO_TYPE_IS_DIRECT(_info) \ + (_info && _info->ep_attr && (_info->ep_attr->type == FI_EP_RDM) && !strcasecmp(_info->fabric_attr->name, EFA_DIRECT_FABRIC_NAME)) + +#define EFA_INFO_TYPE_IS_DGRAM(_info) \ + (_info && _info->ep_attr && (_info->ep_attr->type == FI_EP_DGRAM)) + #define EFA_DGRAM_CONNID (0x0) #define EFA_DEF_POOL_ALIGNMENT (8) diff --git a/prov/efa/src/efa_domain.c b/prov/efa/src/efa_domain.c index 34de62cebac..49b42dfcd56 100644 --- a/prov/efa/src/efa_domain.c +++ b/prov/efa/src/efa_domain.c @@ -28,7 +28,7 @@ static struct fi_ops efa_ops_domain_fid = { .ops_open = efa_domain_ops_open, }; -static struct fi_ops_domain efa_ops_domain_dgram = { +static struct fi_ops_domain efa_domain_ops = { .size = sizeof(struct fi_ops_domain), .av_open = efa_av_open, .cq_open = efa_cq_open, @@ -42,7 +42,7 @@ static struct fi_ops_domain efa_ops_domain_dgram = { .query_collective = fi_no_query_collective, }; -static struct fi_ops_domain efa_ops_domain_rdm = { +static struct fi_ops_domain efa_domain_ops_rdm = { .size = sizeof(struct fi_ops_domain), .av_open = efa_av_open, .cq_open = efa_rdm_cq_open, @@ -230,8 +230,8 @@ int efa_domain_open(struct fid_fabric *fabric_fid, struct fi_info *info, } efa_domain->mr_local = ofi_mr_local(info); - if (EFA_EP_TYPE_IS_DGRAM(info) && !efa_domain->mr_local) { - EFA_WARN(FI_LOG_EP_DATA, "dgram require FI_MR_LOCAL, but application does not support it\n"); + if ((EFA_INFO_TYPE_IS_DGRAM(info) || EFA_INFO_TYPE_IS_DIRECT(info)) && !efa_domain->mr_local) { + EFA_WARN(FI_LOG_EP_DATA, "EFA direct and dgram require FI_MR_LOCAL, but application does not support it\n"); ret = -FI_ENODATA; goto err_free; } @@ -274,7 +274,7 @@ int efa_domain_open(struct fid_fabric *fabric_fid, struct fi_info *info, } efa_domain->util_domain.domain_fid.fid.ops = &efa_ops_domain_fid; - if (EFA_EP_TYPE_IS_RDM(info)) { + if (EFA_INFO_TYPE_IS_RDM(info)) { err = efa_domain_init_rdm(efa_domain, info); if (err) { EFA_WARN(FI_LOG_DOMAIN, @@ -282,10 +282,10 @@ int efa_domain_open(struct fid_fabric *fabric_fid, struct fi_info *info, -err); goto err_free; } - efa_domain->util_domain.domain_fid.ops = &efa_ops_domain_rdm; + efa_domain->util_domain.domain_fid.ops = &efa_domain_ops_rdm; } else { - assert(EFA_EP_TYPE_IS_DGRAM(info)); - efa_domain->util_domain.domain_fid.ops = &efa_ops_domain_dgram; + assert(EFA_INFO_TYPE_IS_DGRAM(info) || EFA_INFO_TYPE_IS_DIRECT(info)); + efa_domain->util_domain.domain_fid.ops = &efa_domain_ops; } #ifndef _WIN32 diff --git a/prov/efa/src/efa_prov.c b/prov/efa/src/efa_prov.c index 1f805c6742b..e6e055c9395 100644 --- a/prov/efa/src/efa_prov.c +++ b/prov/efa/src/efa_prov.c @@ -3,6 +3,7 @@ #include #include "efa.h" +#include "efa_prov.h" #include "efa_prov_info.h" #include "efa_env.h" @@ -67,7 +68,6 @@ struct fi_provider efa_prov = { struct util_prov efa_util_prov = { .prov = &efa_prov, - .flags = 0, }; /** @@ -79,10 +79,35 @@ struct util_prov efa_util_prov = { static int efa_util_prov_initialize() { int i, err; - struct fi_info *head, *tail, *prov_info_rdm, *prov_info_dgram; + struct fi_info *head, *tail, *prov_info_rdm, *prov_info_dgram, *prov_info_direct; head = NULL; tail = NULL; + + /* + * EFA direct provider is more performant if the application can use it + * Therefore, the efa-direct info objects should be returned _before_ efa rdm or dgram + * So we populate the efa-direct info objects first + */ + for (i = 0; i < g_device_cnt; ++i) { + prov_info_direct = fi_dupinfo(g_device_list[i].rdm_info); + if (!prov_info_direct) { + EFA_WARN(FI_LOG_DOMAIN, "Failed to allocate prov_info for EFA direct\n"); + continue; + } + + efa_prov_info_set_fabric_name(prov_info_direct, EFA_DIRECT_FABRIC_NAME); + + if (!head) { + head = prov_info_direct; + } else { + assert(tail); + tail->next = prov_info_direct; + } + + tail = prov_info_direct; + } + for (i = 0; i < g_device_cnt; ++i) { err = efa_prov_info_alloc_for_rdm(&prov_info_rdm, &g_device_list[i]); if (err) { @@ -91,6 +116,8 @@ static int efa_util_prov_initialize() continue; } + efa_prov_info_set_fabric_name(prov_info_rdm, EFA_FABRIC_NAME); + if (!head) { head = prov_info_rdm; } else { @@ -108,6 +135,8 @@ static int efa_util_prov_initialize() continue; } + efa_prov_info_set_fabric_name(prov_info_dgram, EFA_FABRIC_NAME); + if (!head) { head = prov_info_dgram; } else { @@ -160,6 +189,14 @@ EFA_INI */ efa_env_initialize(); + err = efa_hmem_info_initialize(); + if (err) + goto err_free; + + /* + * efa_util_prov_initialize uses g_efa_hmem_info, so it + * must be called after efa_hmem_info_initialize + */ err = efa_util_prov_initialize(); if (err) goto err_free; @@ -169,10 +206,6 @@ EFA_INI goto err_free; } - err = efa_hmem_info_initialize(); - if (err) - goto err_free; - dlist_init(&g_efa_domain_list); return &efa_prov; @@ -202,4 +235,3 @@ static void efa_prov_finalize(void) ofi_mem_fini(); #endif } - diff --git a/prov/efa/src/efa_prov.h b/prov/efa/src/efa_prov.h index c807bd0de6c..a23369265bb 100644 --- a/prov/efa/src/efa_prov.h +++ b/prov/efa/src/efa_prov.h @@ -17,4 +17,14 @@ extern struct util_prov efa_util_prov; EFA_WARN(subsys, fn ": %s(%d)\n", strerror(errno), errno) #define EFA_DBG(subsys, ...) FI_DBG(&efa_prov, subsys, __VA_ARGS__) +static inline +int efa_prov_info_set_fabric_name(struct fi_info *prov_info, char *fabric_name) +{ + prov_info->fabric_attr->name = calloc(1, strlen(fabric_name) + 1); + if (!prov_info->fabric_attr->name) + return -FI_ENOMEM; + strcpy(prov_info->fabric_attr->name, fabric_name); + return FI_SUCCESS; +} + #endif \ No newline at end of file diff --git a/prov/efa/src/efa_prov_info.c b/prov/efa/src/efa_prov_info.c index 2f16f23816f..4bfd1ebc6cf 100644 --- a/prov/efa/src/efa_prov_info.c +++ b/prov/efa/src/efa_prov_info.c @@ -9,22 +9,19 @@ #include "rdm/efa_rdm_pkt_type.h" #define EFA_FABRIC_PREFIX "EFA-" -#define EFA_FABRIC_NAME "efa" #define EFA_DOMAIN_CAPS (FI_LOCAL_COMM | FI_REMOTE_COMM) #define EFA_RDM_TX_CAPS (OFI_TX_MSG_CAPS) #define EFA_RDM_RX_CAPS (OFI_RX_MSG_CAPS | FI_SOURCE) +#define EFA_RDM_CAPS (EFA_RDM_TX_CAPS | EFA_RDM_RX_CAPS | EFA_DOMAIN_CAPS) + #define EFA_DGRM_TX_CAPS (OFI_TX_MSG_CAPS) #define EFA_DGRM_RX_CAPS (OFI_RX_MSG_CAPS | FI_SOURCE) -#define EFA_RDM_CAPS (EFA_RDM_TX_CAPS | EFA_RDM_RX_CAPS | EFA_DOMAIN_CAPS) #define EFA_DGRM_CAPS (EFA_DGRM_TX_CAPS | EFA_DGRM_RX_CAPS | EFA_DOMAIN_CAPS) #define EFA_TX_OP_FLAGS (FI_TRANSMIT_COMPLETE) - -#define EFA_RX_MODE (0) - -#define EFA_RX_RDM_OP_FLAGS (0) -#define EFA_RX_DGRM_OP_FLAGS (0) +#define EFA_RDM_TX_OP_FLAGS (FI_INJECT | FI_COMPLETION | FI_TRANSMIT_COMPLETE | \ + FI_DELIVERY_COMPLETE) #define EFA_MSG_ORDER (0) @@ -40,30 +37,6 @@ const struct fi_fabric_attr efa_fabric_attr = { .prov_version = OFI_VERSION_DEF_PROV, }; -/** - * @brief set the fabric_attr field of a prov_info - * - * @param prov_info[out] prov_info object - * @param device[in] pointer to an efa_device struct, which contains device attributes - * @return 0 on sucessess - * -FI_ENOMEM if memory allocation failed - */ -static -int efa_prov_info_set_fabric_attr(struct fi_info *prov_info, struct efa_device *device) -{ - size_t name_len = strlen(EFA_FABRIC_NAME); - - *prov_info->fabric_attr = efa_fabric_attr; - - name_len = strlen(EFA_FABRIC_NAME); - prov_info->fabric_attr->name = calloc(1, name_len + 1); - if (!prov_info->fabric_attr->name) - return -FI_ENOMEM; - - strcpy(prov_info->fabric_attr->name, EFA_FABRIC_NAME); - return 0; -} - /** * @brief default domain_attr for prov_info */ @@ -122,6 +95,15 @@ int efa_prov_info_set_domain_attr(struct fi_info *prov_info, prov_info->domain_attr->max_ep_rx_ctx = 1; prov_info->domain_attr->resource_mgmt = FI_RM_DISABLED; prov_info->domain_attr->mr_cnt = device->ibv_attr.max_mr; + + if (ep_type == FI_EP_RDM) { + /* EFA direct path is thread safe */ + prov_info->domain_attr->threading = FI_THREAD_SAFE; + + /* EFA direct path retries indefinitely when Receiver Not Ready (RNR) */ + prov_info->domain_attr->resource_mgmt = FI_RM_ENABLED; + } + EFA_DBG(FI_LOG_DOMAIN, "Domain attribute :\n" "\t prov_info->domain_attr->cq_cnt = %zu\n" "\t prov_info->domain_attr->ep_cnt = %zu\n" @@ -167,31 +149,18 @@ void efa_prov_info_set_ep_attr(struct fi_info *prov_info, { *prov_info->ep_attr = efa_ep_attr; - if (ep_type == FI_EP_DGRAM) { - prov_info->mode |= FI_MSG_PREFIX; - prov_info->ep_attr->msg_prefix_size = 40; - } - prov_info->ep_attr->protocol = FI_PROTO_EFA; - prov_info->ep_attr->type = ep_type; + prov_info->ep_attr->max_msg_size = device->ibv_port_attr.max_msg_sz; + prov_info->ep_attr->type = ep_type; - if (prov_info->ep_attr->type == FI_EP_RDM) { - prov_info->tx_attr->inject_size = device->efa_attr.inline_buf_size; + if (ep_type == FI_EP_RDM) { + /* ep_attr->max_msg_size is the maximum of both MSG and RMA operations */ + if (prov_info->caps & FI_RMA) + prov_info->ep_attr->max_msg_size = MAX(device->ibv_port_attr.max_msg_sz, device->max_rdma_size); } else { - assert(prov_info->ep_attr->type == FI_EP_DGRAM); - /* - * Currently, there is no mechanism for device to discard - * a completion, therefore there is no way for dgram endpoint - * to implement FI_INJECT. Because FI_INJECT is not an optional - * feature, we had to set inject_size to 0. - * - * TODO: - * Remove this after implementing cq read for efa-raw - */ - prov_info->tx_attr->inject_size = 0; + assert(ep_type == FI_EP_DGRAM); + prov_info->ep_attr->msg_prefix_size = 40; } - - prov_info->ep_attr->max_msg_size = device->ibv_port_attr.max_msg_sz; } /** @@ -211,8 +180,7 @@ const struct fi_tx_attr efa_dgrm_tx_attr = { */ const struct fi_rx_attr efa_dgrm_rx_attr = { .caps = EFA_DGRM_RX_CAPS, - .mode = FI_MSG_PREFIX | EFA_RX_MODE, - .op_flags = EFA_RX_DGRM_OP_FLAGS, + .mode = FI_MSG_PREFIX, .msg_order = EFA_MSG_ORDER, .iov_limit = 1 }; @@ -234,8 +202,6 @@ const struct fi_tx_attr efa_rdm_tx_attr = { */ const struct fi_rx_attr efa_rdm_rx_attr = { .caps = EFA_RDM_RX_CAPS, - .mode = EFA_RX_MODE, - .op_flags = EFA_RX_RDM_OP_FLAGS, .msg_order = EFA_MSG_ORDER, .iov_limit = 1 }; @@ -254,12 +220,26 @@ void efa_prov_info_set_tx_rx_attr(struct fi_info *prov_info, if (ep_type == FI_EP_RDM) { *prov_info->tx_attr = efa_rdm_tx_attr; *prov_info->rx_attr = efa_rdm_rx_attr; + + prov_info->tx_attr->op_flags |= EFA_RDM_TX_OP_FLAGS; + prov_info->rx_attr->op_flags |= FI_COMPLETION; + + /* If the device supports FI_RMA, also set tx_attr and rx_attr */ + if (prov_info->caps & FI_RMA) { + prov_info->tx_attr->caps |= OFI_TX_RMA_CAPS; + prov_info->rx_attr->caps |= OFI_RX_RMA_CAPS; + } } else { assert(ep_type == FI_EP_DGRAM); *prov_info->tx_attr = efa_dgrm_tx_attr; *prov_info->rx_attr = efa_dgrm_rx_attr; } + /* efa-direct and DGRAM paths require FI_CONTEXT2 */ + prov_info->tx_attr->mode |= FI_CONTEXT2; + prov_info->rx_attr->mode |= FI_CONTEXT2; + + prov_info->tx_attr->inject_size = device->efa_attr.inline_buf_size; prov_info->tx_attr->iov_limit = device->efa_attr.max_sq_sge; prov_info->tx_attr->size = rounddown_power_of_two(device->efa_attr.max_sq_wr); prov_info->rx_attr->iov_limit = device->efa_attr.max_rq_sge; @@ -375,7 +355,7 @@ static int efa_prov_info_set_nic_attr(struct fi_info *prov_info, struct efa_devi goto err_free; } - link_attr->mtu = device->ibv_port_attr.max_msg_sz - efa_rdm_pkt_type_get_max_hdr_size(); + link_attr->mtu = device->ibv_port_attr.max_msg_sz; link_attr->speed = ofi_vrb_speed(device->ibv_port_attr.active_speed, device->ibv_port_attr.active_width); @@ -409,12 +389,46 @@ static int efa_prov_info_set_nic_attr(struct fi_info *prov_info, struct efa_devi } #if HAVE_CUDA || HAVE_NEURON || HAVE_SYNAPSEAI -void efa_prov_info_set_hmem_flags(struct fi_info *prov_info) +void efa_prov_info_set_hmem_flags(struct fi_info *prov_info, enum fi_ep_type ep_type) { - if (prov_info->ep_attr->type == FI_EP_RDM && - (ofi_hmem_is_initialized(FI_HMEM_CUDA) || + int i; + enum fi_hmem_iface iface; + struct efa_hmem_info *hmem_info; + bool enable_hmem = false; + + if (ep_type != FI_EP_RDM) + return; + + /* EFA direct only supports HMEM when p2p support is available */ + if ((ofi_hmem_is_initialized(FI_HMEM_CUDA) || ofi_hmem_is_initialized(FI_HMEM_NEURON) || ofi_hmem_is_initialized(FI_HMEM_SYNAPSEAI))) { + + /* Check if FI_HMEM_P2P_DISABLED is set */ + if (ofi_hmem_p2p_disabled()) { + EFA_WARN(FI_LOG_CORE, + "FI_HMEM capability requires peer to peer " + "support, which is disabled because " + "FI_HMEM_P2P_DISABLED was set to 1/on/true.\n"); + return; + } + + EFA_HMEM_IFACE_FOREACH(i) { + iface = efa_hmem_ifaces[i]; + hmem_info = &g_efa_hmem_info[iface]; + if (hmem_info->initialized && !hmem_info->p2p_supported_by_device) { + EFA_INFO(FI_LOG_CORE, + "EFA direct provider was compiled with support for %s HMEM interface " + "but the interface does not support p2p transfers. " + "EFA direct provider does not support HMEM transfers without p2p support. " + "HMEM support will be disabled.\n", fi_tostr(&iface, FI_TYPE_HMEM_IFACE)); + return; + } + } + enable_hmem = true; + } + + if (enable_hmem) { prov_info->caps |= FI_HMEM; prov_info->tx_attr->caps |= FI_HMEM; prov_info->rx_attr->caps |= FI_HMEM; @@ -422,7 +436,7 @@ void efa_prov_info_set_hmem_flags(struct fi_info *prov_info) } } #else -void efa_prov_info_set_hmem_flags(struct fi_info *prov_info) +void efa_prov_info_set_hmem_flags(struct fi_info *prov_info, enum fi_ep_type ep_type) { } #endif @@ -455,13 +469,25 @@ int efa_prov_info_alloc(struct fi_info **prov_info_ptr, if (!prov_info) return -FI_ENOMEM; - if (ep_type != FI_EP_RDM && ep_type != FI_EP_DGRAM) { - EFA_WARN(FI_LOG_DOMAIN, "Unsupported endpoint type: %d\n", - ep_type); - return -FI_ENODATA; + prov_info->mode |= FI_CONTEXT2; /* EFA direct path requires FI_CONTEXT2 mode */ + + if (ep_type == FI_EP_RDM) { + prov_info->caps = EFA_RDM_CAPS; + /* Claim RMA support in the efa-direct path only if read, write + * and unsolicited write are all available */ + if (efa_device_support_rdma_read() && + efa_device_support_rdma_write() && + efa_device_support_unsolicited_write_recv()) + prov_info->caps |= (OFI_TX_RMA_CAPS | OFI_RX_RMA_CAPS); + } else { + if (ep_type != FI_EP_DGRAM) { + EFA_WARN(FI_LOG_DOMAIN, "Unsupported EFA info type: %d\n", ep_type); + return -FI_ENODATA; + } + prov_info->caps = EFA_DGRM_CAPS; + prov_info->mode |= FI_MSG_PREFIX; } - prov_info->caps = (ep_type == FI_EP_RDM) ? EFA_RDM_CAPS : EFA_DGRM_CAPS; prov_info->handle = NULL; prov_info->addr_format = FI_ADDR_EFA; prov_info->src_addr = calloc(1, EFA_EP_ADDR_LEN); @@ -472,10 +498,8 @@ int efa_prov_info_alloc(struct fi_info **prov_info_ptr, prov_info->src_addrlen = EFA_EP_ADDR_LEN; memcpy(prov_info->src_addr, &device->ibv_gid, sizeof(device->ibv_gid)); - err = efa_prov_info_set_fabric_attr(prov_info, device); - if (err) { - goto err_free; - } + /* fabric_attr->name is set in efa_util_prov_initialize */ + *prov_info->fabric_attr = efa_fabric_attr; err = efa_prov_info_set_domain_attr(prov_info, device, ep_type); if (err) { @@ -491,7 +515,7 @@ int efa_prov_info_alloc(struct fi_info **prov_info_ptr, goto err_free; } - efa_prov_info_set_hmem_flags(prov_info); + efa_prov_info_set_hmem_flags(prov_info, ep_type); *prov_info_ptr = prov_info; return 0; @@ -542,6 +566,11 @@ int efa_prov_info_alloc_for_rdm(struct fi_info **prov_info_rdm_ptr, prov_info_rdm->caps |= efa_rdm_added_tx_caps | efa_rdm_added_rx_caps | efa_domain_caps; + /* efa-direct requires FI_CONTEXT2 but RDM doesn't. So unset FI_CONTEXT2 */ + prov_info_rdm->mode &= ~FI_CONTEXT2; + prov_info_rdm->tx_attr->mode &= ~FI_CONTEXT2; + prov_info_rdm->rx_attr->mode &= ~FI_CONTEXT2; + /* update domain_attr */ { /* EFA RDM endpoint ensure thread safety by pthread lock */ @@ -598,8 +627,7 @@ int efa_prov_info_alloc_for_rdm(struct fi_info **prov_info_rdm_ptr, * EFA RDM endpoint supports delivery complete by using DC capable protocols. * Therefore changing the default op_flags */ - prov_info_rdm->tx_attr->op_flags = FI_INJECT | FI_COMPLETION | FI_TRANSMIT_COMPLETE | - FI_DELIVERY_COMPLETE; + prov_info_rdm->tx_attr->op_flags |= EFA_RDM_TX_OP_FLAGS; /* Here we calculate the max msg size for emulated injection of EFA RDM endpoint. * The requirement for inject is: upon return, the user buffer can be reused immediately. @@ -641,6 +669,16 @@ int efa_prov_info_alloc_for_rdm(struct fi_info **prov_info_rdm_ptr, prov_info_rdm->rx_attr->size = efa_env.rx_size; } + /* EFA RDM can support HMEM even if p2p support is not available */ + if ((ofi_hmem_is_initialized(FI_HMEM_CUDA) || + ofi_hmem_is_initialized(FI_HMEM_NEURON) || + ofi_hmem_is_initialized(FI_HMEM_SYNAPSEAI))) { + prov_info_rdm->caps |= FI_HMEM; + prov_info_rdm->tx_attr->caps |= FI_HMEM; + prov_info_rdm->rx_attr->caps |= FI_HMEM; + prov_info_rdm->domain_attr->mr_mode |= FI_MR_HMEM; + } + *prov_info_rdm_ptr = prov_info_rdm; return 0; } diff --git a/prov/efa/src/efa_prov_info.h b/prov/efa/src/efa_prov_info.h index c5b3ff93c4a..ad34e8fe03f 100644 --- a/prov/efa/src/efa_prov_info.h +++ b/prov/efa/src/efa_prov_info.h @@ -22,4 +22,6 @@ int efa_prov_info_compare_domain_name(const struct fi_info *hints, int efa_prov_info_compare_pci_bus_id(const struct fi_info *hints, const struct fi_info *info); +void efa_prov_info_set_hmem_flags(struct fi_info *prov_info, enum fi_ep_type ep_type); + #endif diff --git a/prov/efa/src/efa_user_info.c b/prov/efa/src/efa_user_info.c index 99b37c88fe0..64838c9c628 100644 --- a/prov/efa/src/efa_user_info.c +++ b/prov/efa/src/efa_user_info.c @@ -186,7 +186,7 @@ bool efa_user_info_should_support_hmem(int version) #endif /** - * @brief update an info to match user hints + * @brief update RDM info to match user hints * * the input info is a duplicate of prov info, which matches * the capability of the EFA device. This function tailor it @@ -332,7 +332,72 @@ int efa_user_info_alter_rdm(int version, struct fi_info *info, const struct fi_i } /** - * @brief get a list of rdm info the fit user's requirements + * @brief update EFA direct info to match user hints + * + * the input info is a duplicate of prov info, which matches + * the capability of the EFA device. This function tailor it + * so it matches user provided hints + * + * @param version[in] libfabric API version + * @param info[in,out] info to be updated + * @param hints[in] user provided hints + * @return 0 on success + * negative libfabric error code on failure + */ +static +int efa_user_info_alter_direct(int version, struct fi_info *info, const struct fi_info *hints) +{ + /* + * FI_HMEM is a primary capability, therefore only check + * and claim support when explicitly requested + */ + if (hints && (hints->caps & FI_HMEM)) + info->caps |= FI_HMEM; + else + info->caps &= ~FI_HMEM; + + if (info->caps & FI_HMEM) { + /* Add FI_MR_HMEM to mr_mode when claiming support of FI_HMEM + * because EFA provider's HMEM support rely on + * application to provide descriptor for device buffer. + */ + if (hints->domain_attr && + !(hints->domain_attr->mr_mode & FI_MR_HMEM)) { + EFA_WARN(FI_LOG_CORE, + "FI_HMEM capability requires device registrations (FI_MR_HMEM)\n"); + return -FI_ENODATA; + } + + info->domain_attr->mr_mode |= FI_MR_HMEM; + } + + /* + * Handle user-provided hints and adapt the info object passed back up + * based on EFA-specific constraints. + */ + if (hints) { + /* EFA direct cannot make use of message prefix */ + if (hints->mode & FI_MSG_PREFIX) { + EFA_INFO(FI_LOG_CORE, + "FI_MSG_PREFIX supported by application but EFA direct cannot " + "use prefix. Setting prefix size to 0.\n"); + info->ep_attr->msg_prefix_size = 0; + EFA_INFO(FI_LOG_CORE, + "FI_MSG_PREFIX size = %ld\n", info->ep_attr->msg_prefix_size); + } + } + + /* Print a warning and use FI_AV_TABLE if the app requests FI_AV_MAP */ + if (hints && hints->domain_attr && hints->domain_attr->av_type == FI_AV_MAP) + EFA_WARN(FI_LOG_CORE, "FI_AV_MAP is deprecated in Libfabric 2.x. Please use FI_AV_TABLE. " + "EFA direct provider will now switch to using FI_AV_TABLE.\n"); + info->domain_attr->av_type = FI_AV_TABLE; + + return 0; +} + +/** + * @brief get a list of fi_info objects the fit user's requirements * * @param node[in] node from user's call to fi_getinfo() * @param service[in] service from user's call to fi_getinfo() @@ -393,10 +458,11 @@ int efa_get_user_info(uint32_t version, const char *node, dupinfo->fabric_attr->api_version = version; - if (prov_info->ep_attr->type == FI_EP_RDM) { + if (EFA_INFO_TYPE_IS_RDM(prov_info)) { ret = efa_user_info_alter_rdm(version, dupinfo, hints); if (ret) goto free_info; + /* If application asked for FI_REMOTE_COMM but not FI_LOCAL_COMM, it * does not want to use shm. In this case, we honor the request by * unsetting the FI_LOCAL_COMM flag in info. This way efa_rdm_ep_open() @@ -406,6 +472,12 @@ int efa_get_user_info(uint32_t version, const char *node, dupinfo->caps &= ~FI_LOCAL_COMM; } + if (EFA_INFO_TYPE_IS_DIRECT(prov_info)) { + ret = efa_user_info_alter_direct(version, dupinfo, hints); + if (ret) + goto free_info; + } + ofi_alter_info(dupinfo, hints, version); if (!*info) diff --git a/prov/efa/src/efa_user_info.h b/prov/efa/src/efa_user_info.h index 2b52ce2b58b..35f8f39e84f 100644 --- a/prov/efa/src/efa_user_info.h +++ b/prov/efa/src/efa_user_info.h @@ -10,9 +10,6 @@ int efa_user_info_set_dest_addr(const char *node, const char *service, uint64_t int efa_user_info_check_hints_addr(const char *node, const char *service, uint64_t flags, const struct fi_info *hints); -int efa_user_info_get_dgram(uint32_t version, const char *node, const char *service, - uint64_t flags, const struct fi_info *hints, struct fi_info **info); - int efa_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, const struct fi_info *hints, struct fi_info **info); diff --git a/prov/efa/test/efa_unit_test_av.c b/prov/efa/test/efa_unit_test_av.c index dd6f813a059..5acc36ee205 100644 --- a/prov/efa/test/efa_unit_test_av.c +++ b/prov/efa/test/efa_unit_test_av.c @@ -19,7 +19,7 @@ void test_av_insert_duplicate_raw_addr(struct efa_resource **state) fi_addr_t addr1, addr2; int err, num_addr; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); g_efa_unit_test_mocks.ibv_create_ah = &efa_mock_ibv_create_ah_check_mock; err = fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len); @@ -54,7 +54,7 @@ void test_av_insert_duplicate_gid(struct efa_resource **state) fi_addr_t addr1, addr2; int err, num_addr; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); g_efa_unit_test_mocks.ibv_create_ah = &efa_mock_ibv_create_ah_check_mock; err = fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len); diff --git a/prov/efa/test/efa_unit_test_cntr.c b/prov/efa/test/efa_unit_test_cntr.c index d9d4852d2f2..1f37e8e386d 100644 --- a/prov/efa/test/efa_unit_test_cntr.c +++ b/prov/efa/test/efa_unit_test_cntr.c @@ -57,7 +57,7 @@ void test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(struct efa_resource { struct efa_resource *resource = *state; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep_impl(resource); } @@ -65,7 +65,7 @@ void test_efa_rdm_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep(struct efa_resou { struct efa_resource *resource = *state; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_FABRIC_NAME); test_efa_cntr_ibv_cq_poll_list_same_tx_rx_cq_single_ep_impl(resource); } @@ -111,7 +111,7 @@ void test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(struct efa_resou { struct efa_resource *resource = *state; - efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep_impl(resource); } @@ -119,7 +119,7 @@ void test_efa_rdm_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(struct efa_r { struct efa_resource *resource = *state; - efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(resource, FI_EP_RDM, EFA_FABRIC_NAME); test_efa_cntr_ibv_cq_poll_list_separate_tx_rx_cq_single_ep_impl(resource); } @@ -132,7 +132,7 @@ void test_efa_rdm_cntr_post_initial_rx_pkts(struct efa_resource **state) struct efa_cntr *efa_cntr; uint64_t cnt; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); /* At this time, rx pkts are not growed and posted */ diff --git a/prov/efa/test/efa_unit_test_common.c b/prov/efa/test/efa_unit_test_common.c index 13bb1882465..9580d16bf77 100644 --- a/prov/efa/test/efa_unit_test_common.c +++ b/prov/efa/test/efa_unit_test_common.c @@ -222,7 +222,7 @@ void efa_unit_test_resource_construct_rdm_shm_disabled(struct efa_resource *reso int ret; bool shm_permitted = false; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); if (!resource->hints) goto err; diff --git a/prov/efa/test/efa_unit_test_cq.c b/prov/efa/test/efa_unit_test_cq.c index 82dcd38952f..51a7b56ead2 100644 --- a/prov/efa/test/efa_unit_test_cq.c +++ b/prov/efa/test/efa_unit_test_cq.c @@ -21,7 +21,7 @@ void test_impl_cq_read_empty_cq(struct efa_resource *resource, enum fi_ep_type e int ret; struct efa_base_ep *efa_base_ep; - efa_unit_test_resource_construct(resource, ep_type, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, ep_type, EFA_FABRIC_NAME); efa_base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); ibv_cqx = container_of(efa_base_ep->util_ep.rx_cq, struct efa_cq, util_cq)->ibv_cq.ibv_cq_ex; @@ -288,7 +288,7 @@ void test_ibv_cq_ex_read_bad_recv_status(struct efa_resource **state) struct ibv_cq_ex *ibv_cqx; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); /* @@ -366,7 +366,7 @@ void test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_impl(struct efa_resource struct ibv_cq_ex *ibv_cqx; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_cq = container_of(resource->cq, struct efa_rdm_cq, efa_cq.util_cq.cq_fid.fid); @@ -455,7 +455,7 @@ void test_ibv_cq_ex_read_failed_poll(struct efa_resource **state) struct efa_rdm_cq *efa_rdm_cq; struct ibv_cq_ex *ibv_cqx; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_cq = container_of(resource->cq, struct efa_rdm_cq, efa_cq.util_cq.cq_fid.fid); ibv_cqx = efa_rdm_cq->efa_cq.ibv_cq.ibv_cq_ex; @@ -498,7 +498,7 @@ void test_rdm_cq_create_error_handling(struct efa_resource **state) } efa_device_construct(&efa_device, 0, ibv_device_list[0]); - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); assert_int_equal(fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, resource->hints, &resource->info), 0); assert_int_equal(fi_fabric(resource->info->fabric_attr, &resource->fabric, NULL), 0); @@ -546,7 +546,7 @@ void test_efa_rdm_cq_ibv_cq_poll_list_same_tx_rx_cq_single_ep(struct efa_resourc { struct efa_resource *resource = *state; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); /* efa_unit_test_resource_construct binds single OFI CQ as both tx/rx cq of ep */ assert_int_equal(test_efa_rdm_cq_get_ibv_cq_poll_list_length(resource->cq), 1); @@ -563,7 +563,7 @@ void test_efa_rdm_cq_ibv_cq_poll_list_separate_tx_rx_cq_single_ep(struct efa_res struct fid_cq *txcq, *rxcq; struct fi_cq_attr cq_attr = {0}; - efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(resource, FI_EP_RDM, EFA_FABRIC_NAME); assert_int_equal(fi_cq_open(resource->domain, &cq_attr, &txcq, NULL), 0); @@ -592,7 +592,7 @@ void test_efa_rdm_cq_post_initial_rx_pkts(struct efa_resource **state) struct efa_rdm_ep *efa_rdm_ep; struct efa_rdm_cq *efa_rdm_cq; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_cq = container_of(resource->cq, struct efa_rdm_cq, efa_cq.util_cq.cq_fid.fid); @@ -653,7 +653,7 @@ static void test_impl_ibv_cq_ex_read_unknow_peer_ah(struct efa_resource *resourc expect_function_call(efa_mock_efadv_create_cq_set_eopnotsupp_and_return_null); } - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_cq = container_of(resource->cq, struct efa_rdm_cq, efa_cq.util_cq.cq_fid.fid); @@ -821,7 +821,7 @@ static void test_efa_cq_read(struct efa_resource *resource, fi_addr_t *addr, struct ibv_qp_ex *ibv_qpx; struct efa_base_ep *base_ep; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); ibv_qpx = base_ep->qp->ibv_qp_ex; diff --git a/prov/efa/test/efa_unit_test_domain.c b/prov/efa/test/efa_unit_test_domain.c index 29a21d29fb9..5152c63ca27 100644 --- a/prov/efa/test/efa_unit_test_domain.c +++ b/prov/efa/test/efa_unit_test_domain.c @@ -10,7 +10,7 @@ void test_efa_domain_open_ops_wrong_name(struct efa_resource **state) int ret; struct fi_efa_ops_domain *efa_domain_ops; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); ret = fi_open_ops(&resource->domain->fid, "arbitrary name", 0, (void **)&efa_domain_ops, NULL); assert_int_equal(ret, -FI_EINVAL); @@ -61,7 +61,7 @@ void test_efa_domain_open_ops_mr_query(struct efa_resource **state) { struct efa_resource *resource = *state; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); /* set recv_ic_id as 0 */ g_efa_unit_test_mocks.efadv_query_mr = &efa_mock_efadv_query_mr_recv_ic_id_0; @@ -114,7 +114,7 @@ void test_efa_domain_open_ops_mr_query(struct efa_resource **state) { struct efa_resource *resource = *state; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); test_efa_domain_open_ops_mr_query_common( resource, diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index 1c12b5913dd..cab4697d03b 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -37,7 +37,7 @@ void test_efa_rdm_ep_host_id(struct efa_resource **state, bool file_exists, char efa_env.host_id_file = host_id_file; } - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -272,7 +272,7 @@ void test_efa_rdm_ep_pkt_pool_flags(struct efa_resource **state) { struct efa_resource *resource = *state; efa_env.huge_page_setting = EFA_ENV_HUGE_PAGE_DISABLED; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); check_ep_pkt_pool_flags(resource->ep, OFI_BUFPOOL_NONSHARED); } @@ -290,7 +290,7 @@ void test_efa_rdm_ep_pkt_pool_page_alignment(struct efa_resource **state) struct efa_rdm_ep *efa_rdm_ep; struct efa_resource *resource = *state; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_env.huge_page_setting = EFA_ENV_HUGE_PAGE_DISABLED; ret = fi_endpoint(resource->domain, resource->info, &ep, NULL); @@ -321,7 +321,7 @@ void test_efa_rdm_read_copy_pkt_pool_128_alignment(struct efa_resource **state) struct efa_resource *resource = *state; struct efa_domain *efa_domain = NULL; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); /* rx_readcopy_pkt_pool is only created when application requested FI_HMEM */ efa_domain = container_of(resource->domain, struct efa_domain, @@ -358,7 +358,7 @@ void test_efa_rdm_pke_get_available_copy_methods_align128(struct efa_resource ** struct efa_resource *resource = *state; bool local_read_available, gdrcopy_available, cuda_memcpy_available; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_mr.peer.iface = FI_HMEM_CUDA; efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -593,7 +593,7 @@ void test_efa_rdm_ep_rma_queue_before_handshake(struct efa_resource **state, int struct efa_rdm_ope *txe; struct efa_rdm_peer *peer; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); resource->hints->caps |= FI_MSG | FI_TAGGED | FI_RMA; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), @@ -681,7 +681,7 @@ void test_efa_rdm_ep_rma_inconsistent_unsolicited_write_recv(struct efa_resource uint64_t rma_addr, rma_key; struct efa_rdm_peer *peer; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); resource->hints->caps |= FI_MSG | FI_TAGGED | FI_RMA; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 22), @@ -750,7 +750,7 @@ void test_efa_rdm_ep_send_with_shm_no_copy(struct efa_resource **state) char buff[8] = {0}; int err; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); /* create a fake peer */ err = fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len); @@ -789,7 +789,7 @@ void test_efa_rdm_ep_rma_without_caps(struct efa_resource **state) int err; uint64_t rma_addr, rma_key; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); resource->hints->caps |= FI_MSG | FI_TAGGED; resource->hints->caps &= ~FI_RMA; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; @@ -840,7 +840,7 @@ void test_efa_rdm_ep_atomic_without_caps(struct efa_resource **state) int err; uint64_t rma_addr, rma_key; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); resource->hints->caps |= FI_MSG | FI_TAGGED; resource->hints->caps &= ~FI_ATOMIC; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; @@ -895,7 +895,7 @@ void test_efa_rdm_ep_getopt(struct efa_resource **state, size_t opt_len, int exp }; size_t num_opt_names = sizeof(opt_names) / sizeof(int); - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); for (i = 0; i < num_opt_names; i++) { opt_len_temp = opt_len; @@ -941,7 +941,7 @@ void test_efa_rdm_ep_enable_qp_in_order_aligned_128_bytes_common(struct efa_reso { struct efa_resource *resource = *state; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_FABRIC_NAME); /* fi_setopt should always succeed */ assert_int_equal(fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, @@ -1068,7 +1068,7 @@ void test_efa_rdm_ep_user_zcpy_rx_disabled(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); resource->hints->mode = FI_MSG_PREFIX; @@ -1084,7 +1084,7 @@ void test_efa_rdm_ep_user_disable_p2p_zcpy_rx_disabled(struct efa_resource **sta { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); resource->hints->mode = FI_MSG_PREFIX; @@ -1100,7 +1100,7 @@ void test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_sas(struct efa_resource **state { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); resource->hints->tx_attr->msg_order = FI_ORDER_SAS; @@ -1118,7 +1118,7 @@ void test_efa_rdm_ep_user_p2p_not_supported_zcpy_rx_happy(struct efa_resource ** { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); resource->hints->mode = FI_MSG_PREFIX; @@ -1134,7 +1134,7 @@ void test_efa_rdm_ep_user_zcpy_rx_unhappy_due_to_no_mr_local(struct efa_resource { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); resource->hints->caps = FI_MSG; @@ -1148,7 +1148,7 @@ void test_efa_rdm_ep_close_discard_posted_recv(struct efa_resource **state) struct efa_resource *resource = *state; char buf[16]; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); /* Post recv and then close ep */ assert_int_equal(fi_recv(resource->ep, (void *) buf, 16, NULL, FI_ADDR_UNSPEC, NULL), 0); @@ -1168,7 +1168,7 @@ void test_efa_rdm_ep_zcpy_recv_cancel(struct efa_resource **state) struct fi_context cancel_context = {0}; struct efa_unit_test_buff recv_buff; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); resource->hints->caps = FI_MSG; @@ -1202,7 +1202,7 @@ void test_efa_rdm_ep_zcpy_recv_eagain(struct efa_resource **state) int i; struct efa_rdm_ep *efa_rdm_ep; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); resource->hints->caps = FI_MSG; @@ -1310,7 +1310,7 @@ void test_efa_rdm_ep_rx_refill_impl(struct efa_resource **state, int threshold, efa_env.internal_rx_refill_threshold = threshold; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); resource->hints->rx_attr->size = rx_size; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), @@ -1385,7 +1385,7 @@ void test_efa_rdm_ep_support_unsolicited_write_recv(struct efa_resource **state) struct efa_rdm_ep *efa_rdm_ep; struct efa_resource *resource = *state; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -1403,7 +1403,7 @@ void test_efa_rdm_ep_default_sizes(struct efa_resource **state) struct efa_rdm_ep *efa_rdm_ep; struct efa_resource *resource = *state; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -1433,7 +1433,7 @@ void test_efa_ep_open(struct efa_resource **state) struct efa_base_ep *efa_ep; struct efa_domain *efa_domain; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); efa_domain = container_of(resource->domain, struct efa_domain, @@ -1460,7 +1460,7 @@ void test_efa_ep_cancel(struct efa_resource **state) struct efa_resource *resource = *state; int ret; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); ret = fi_cancel((struct fid *)resource->ep, NULL); assert_int_equal(ret, -FI_ENOSYS); @@ -1480,7 +1480,7 @@ void test_efa_ep_getopt(struct efa_resource **state) size_t optlen; struct efa_base_ep *efa_ep; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); @@ -1525,7 +1525,7 @@ void test_efa_ep_setopt_use_device_rdma(struct efa_resource **state) bool optval; struct efa_base_ep *efa_ep; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); @@ -1555,7 +1555,7 @@ void test_efa_ep_setopt_hmem_p2p(struct efa_resource **state) size_t num_optvals = sizeof(optvals) / sizeof(int); int i, expected_return; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); /* FI_HMEM_P2P_DISABLED is not allowed */ for (i = 0; i < num_optvals; i++) { @@ -1575,7 +1575,7 @@ void test_efa_ep_setopt_rnr_retry(struct efa_resource **state) size_t optval; struct efa_base_ep *efa_ep; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); assert_false(efa_ep->efa_qp_enabled); @@ -1602,7 +1602,7 @@ void test_efa_ep_setopt_sizes(struct efa_resource **state) size_t optval; struct efa_base_ep *efa_ep; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); @@ -1644,7 +1644,7 @@ void test_efa_ep_bind_and_enable(struct efa_resource **state) struct efa_resource *resource = *state; struct efa_base_ep *efa_ep; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); efa_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); diff --git a/prov/efa/test/efa_unit_test_hmem.c b/prov/efa/test/efa_unit_test_hmem.c index 2b278bddfba..66a6cf81292 100644 --- a/prov/efa/test/efa_unit_test_hmem.c +++ b/prov/efa/test/efa_unit_test_hmem.c @@ -20,7 +20,7 @@ void test_efa_hmem_info_update_neuron(struct efa_resource **state) uint32_t efa_device_caps_orig; bool neuron_initialized_orig; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, resource->hints, &resource->info); @@ -58,7 +58,7 @@ void test_efa_hmem_info_disable_p2p_neuron(struct efa_resource **state) ofi_hmem_disable_p2p = 1; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, resource->hints, &resource->info); @@ -109,7 +109,7 @@ void test_efa_hmem_info_disable_p2p_cuda(struct efa_resource **state) ofi_hmem_disable_p2p = 1; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, resource->hints, &resource->info); diff --git a/prov/efa/test/efa_unit_test_info.c b/prov/efa/test/efa_unit_test_info.c index febb386f4f3..b3ea13e0614 100644 --- a/prov/efa/test/efa_unit_test_info.c +++ b/prov/efa/test/efa_unit_test_info.c @@ -15,7 +15,7 @@ void test_info_open_ep_with_wrong_info() struct fid_ep *ep = NULL; int err; - hints = efa_unit_test_alloc_hints(FI_EP_DGRAM, EFA_PROV_NAME); + hints = efa_unit_test_alloc_hints(FI_EP_DGRAM, EFA_FABRIC_NAME); err = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, hints, &info); assert_int_equal(err, 0); @@ -113,7 +113,7 @@ void test_info_tx_rx_msg_order_rdm_order_none(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); test_info_tx_rx_msg_order_from_hints(resource->hints, 0); @@ -123,7 +123,7 @@ void test_info_tx_rx_msg_order_rdm_order_sas(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); resource->hints->tx_attr->msg_order = FI_ORDER_SAS; @@ -135,7 +135,7 @@ void test_info_tx_rx_msg_order_dgram_order_none(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_DGRAM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_DGRAM, EFA_FABRIC_NAME); assert_non_null(resource->hints); test_info_tx_rx_msg_order_from_hints(resource->hints, 0); @@ -149,7 +149,7 @@ void test_info_tx_rx_msg_order_dgram_order_sas(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_DGRAM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_DGRAM, EFA_FABRIC_NAME); assert_non_null(resource->hints); resource->hints->tx_attr->msg_order = FI_ORDER_SAS; @@ -191,7 +191,7 @@ void test_info_max_order_size_dgram_with_atomic(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_DGRAM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_DGRAM, EFA_FABRIC_NAME); assert_non_null(resource->hints); resource->hints->caps = FI_ATOMIC; @@ -207,7 +207,7 @@ void test_info_max_order_size_rdm_with_atomic_no_order(struct efa_resource **sta { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); @@ -229,7 +229,7 @@ void test_info_max_order_size_rdm_with_atomic_order(struct efa_resource **state) - g_device_list[0].rdm_info->src_addrlen - EFA_RDM_IOV_LIMIT * sizeof(struct fi_rma_iov); - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); resource->hints->caps = FI_ATOMIC; @@ -244,7 +244,7 @@ void test_info_tx_rx_op_flags_rdm(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); resource->hints->tx_attr->op_flags = FI_DELIVERY_COMPLETE; @@ -256,7 +256,7 @@ void test_info_tx_rx_size_rdm(struct efa_resource **state) { struct efa_resource *resource = *state; - resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); assert_non_null(resource->hints); resource->hints->tx_attr->size = 16; @@ -317,7 +317,7 @@ void test_info_check_shm_info_hmem() { struct fi_info *hints; - hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); hints->caps |= FI_HMEM; test_info_check_shm_info_from_hints(hints); @@ -330,7 +330,7 @@ void test_info_check_shm_info_op_flags() { struct fi_info *hints; - hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); hints->tx_attr->op_flags |= FI_COMPLETION; hints->rx_attr->op_flags |= FI_COMPLETION; @@ -345,7 +345,7 @@ void test_info_check_shm_info_threading() { struct fi_info *hints; - hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); hints->domain_attr->threading = FI_THREAD_DOMAIN; test_info_check_shm_info_from_hints(hints); @@ -363,7 +363,7 @@ void test_info_check_hmem_cuda_support_on_api_lt_1_18() if (!hmem_ops[FI_HMEM_CUDA].initialized) skip(); - hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); hints->caps |= FI_HMEM; hints->domain_attr->mr_mode |= FI_MR_HMEM; @@ -402,7 +402,7 @@ void test_info_check_hmem_cuda_support_on_api_ge_1_18() if (!hmem_ops[FI_HMEM_CUDA].initialized) skip(); - hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); hints->caps |= FI_HMEM; hints->domain_attr->mr_mode |= FI_MR_HMEM; @@ -467,7 +467,7 @@ void test_use_device_rdma( const int env_val, unsetenv("FI_EFA_USE_DEVICE_RDMA"); } - hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); ret = fi_getinfo(api_version, NULL, NULL, 0ULL, hints, &info); assert_int_equal(ret, 0); @@ -531,7 +531,7 @@ static int get_first_nic_name(char **name) { char *nic_name = NULL; struct fi_info *hints, *info; - hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, hints, &info); fi_freeinfo(hints); if (ret) diff --git a/prov/efa/test/efa_unit_test_mr.c b/prov/efa/test/efa_unit_test_mr.c index 5516d4f325e..65f6ff39b87 100644 --- a/prov/efa/test/efa_unit_test_mr.c +++ b/prov/efa/test/efa_unit_test_mr.c @@ -11,7 +11,7 @@ void test_efa_mr_reg_counters(struct efa_resource **state) char *buf; struct fid_mr *mr; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_domain = container_of(resource->domain, struct efa_domain, util_domain.domain_fid); assert_true(efa_domain->ibv_mr_reg_ct == 0); diff --git a/prov/efa/test/efa_unit_test_msg.c b/prov/efa/test/efa_unit_test_msg.c index b0df253fbeb..cc4a1cbfd45 100644 --- a/prov/efa/test/efa_unit_test_msg.c +++ b/prov/efa/test/efa_unit_test_msg.c @@ -15,7 +15,7 @@ static void test_efa_msg_recv_prep(struct efa_resource *resource, size_t raw_addr_len = sizeof(raw_addr); int ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); ibv_qp = base_ep->qp->ibv_qp; @@ -106,7 +106,7 @@ static void test_efa_msg_send_prep(struct efa_resource *resource, size_t raw_addr_len = sizeof(raw_addr); int ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); ibv_qpx = base_ep->qp->ibv_qp_ex; diff --git a/prov/efa/test/efa_unit_test_ope.c b/prov/efa/test/efa_unit_test_ope.c index 701e2bb8c68..cdfb465a188 100644 --- a/prov/efa/test/efa_unit_test_ope.c +++ b/prov/efa/test/efa_unit_test_ope.c @@ -65,7 +65,7 @@ void test_efa_rdm_ope_prepare_to_post_send_with_no_enough_tx_pkts(struct efa_res struct efa_resource *resource = *state; struct efa_rdm_ep *efa_rdm_ep; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->efa_outstanding_tx_ops = efa_rdm_ep->efa_max_outstanding_tx_ops - 1; @@ -88,7 +88,7 @@ void test_efa_rdm_ope_prepare_to_post_send_host_memory(struct efa_resource **sta int expected_pkt_entry_cnt; int expected_pkt_entry_data_size_vec[1024]; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); /* data size should be aligned and evenly distributed. * alignment for host memory is 8 byte by default. @@ -137,7 +137,7 @@ void test_efa_rdm_ope_prepare_to_post_send_host_memory_align128(struct efa_resou int expected_pkt_entry_cnt; int expected_pkt_entry_data_size_vec[1024]; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->sendrecv_in_order_aligned_128_bytes = true; @@ -186,7 +186,7 @@ void test_efa_rdm_ope_prepare_to_post_send_cuda_memory(struct efa_resource **sta int expected_pkt_entry_cnt; int expected_pkt_entry_data_size_vec[1024]; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); /* default alignment of cuda memory is 64 bytes */ msg_length = 12000; @@ -211,7 +211,7 @@ void test_efa_rdm_ope_prepare_to_post_send_cuda_memory_align128(struct efa_resou int expected_pkt_entry_cnt; int expected_pkt_entry_data_size_vec[1024]; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->sendrecv_in_order_aligned_128_bytes = true; @@ -243,7 +243,7 @@ void test_efa_rdm_ope_post_write_0_byte(struct efa_resource **state) fi_addr_t addr; int ret, err; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); ret = fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len); assert_int_equal(ret, 0); @@ -314,7 +314,7 @@ void test_efa_rdm_rxe_post_local_read_or_queue_cleanup_txe(struct efa_resource * */ g_efa_unit_test_mocks.efa_rdm_pke_read = &efa_mock_efa_rdm_pke_read_return_mock; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); diff --git a/prov/efa/test/efa_unit_test_pke.c b/prov/efa/test/efa_unit_test_pke.c index e7fda0365a1..ec3bf09dac7 100644 --- a/prov/efa/test/efa_unit_test_pke.c +++ b/prov/efa/test/efa_unit_test_pke.c @@ -24,7 +24,7 @@ void test_efa_rdm_pke_handle_longcts_rtm_send_completion(struct efa_resource **s int err, numaddr; struct efa_rdm_ope *txe; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); diff --git a/prov/efa/test/efa_unit_test_rdm_peer.c b/prov/efa/test/efa_unit_test_rdm_peer.c index da909ed4905..461bae8f620 100644 --- a/prov/efa/test/efa_unit_test_rdm_peer.c +++ b/prov/efa/test/efa_unit_test_rdm_peer.c @@ -81,7 +81,7 @@ void test_efa_rdm_peer_reorder_expected_msg_id(struct efa_resource **state) { uint32_t msg_id, exp_msg_id; int expected_ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); msg_id = 0; exp_msg_id = 0; @@ -96,7 +96,7 @@ void test_efa_rdm_peer_reorder_smaller_msg_id(struct efa_resource **state) { uint32_t msg_id, exp_msg_id; int expected_ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); msg_id = 1; exp_msg_id = 10; @@ -110,7 +110,7 @@ void test_efa_rdm_peer_reorder_larger_msg_id(struct efa_resource **state) { uint32_t msg_id, exp_msg_id; int expected_ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); msg_id = 10; exp_msg_id = 0; @@ -125,7 +125,7 @@ void test_efa_rdm_peer_reorder_overflow_msg_id(struct efa_resource **state) { uint32_t msg_id, exp_msg_id; int expected_ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); msg_id = 16384; exp_msg_id = 0; @@ -192,7 +192,7 @@ void test_efa_rdm_peer_move_overflow_pke_to_recvwin(struct efa_resource **state) struct efa_rdm_peer *peer; struct efa_rdm_pke *pkt_entry; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); /* overflow_pke_list has a pkt entry with msg_id 18000. * After calling efa_rdm_peer_move_overflow_pke_to_recvwin when exp_msg_id = 16384, @@ -213,7 +213,7 @@ void test_efa_rdm_peer_keep_pke_in_overflow_list(struct efa_resource **state) { struct efa_rdm_peer_overflow_pke_list_entry *overflow_pke_list_entry; struct dlist_entry *tmp; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); /* overflow_pke_list has a pkt entry with msg_id 33000. * After calling efa_rdm_peer_move_overflow_pke_to_recvwin when exp_msg_id = 16384, @@ -269,7 +269,7 @@ void test_efa_rdm_peer_append_overflow_pke_to_recvwin(struct efa_resource **stat struct efa_rdm_ep *efa_rdm_ep; int ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); diff --git a/prov/efa/test/efa_unit_test_rma.c b/prov/efa/test/efa_unit_test_rma.c index fd5818657ba..ddf3a5ef8bc 100644 --- a/prov/efa/test/efa_unit_test_rma.c +++ b/prov/efa/test/efa_unit_test_rma.c @@ -15,7 +15,7 @@ static void test_efa_rma_prep(struct efa_resource *resource, fi_addr_t *addr) size_t raw_addr_len = sizeof(raw_addr); int ret; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); base_ep = container_of(resource->ep, struct efa_base_ep, util_ep.ep_fid); /* Add rma caps explicitly to ep->info to allow local test */ diff --git a/prov/efa/test/efa_unit_test_runt.c b/prov/efa/test/efa_unit_test_runt.c index 5a49d0775ac..d9feb8f4809 100644 --- a/prov/efa/test/efa_unit_test_runt.c +++ b/prov/efa/test/efa_unit_test_runt.c @@ -61,7 +61,7 @@ void test_efa_rdm_peer_get_runt_size_no_enough_runt(struct efa_resource **state) size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 1001; @@ -79,7 +79,7 @@ void test_efa_rdm_peer_get_runt_size_cuda_memory_smaller_than_alignment(struct e size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 1000; @@ -97,7 +97,7 @@ void test_efa_rdm_peer_get_runt_size_cuda_memory_exceeding_total_len(struct efa_ size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 0; @@ -115,7 +115,7 @@ void test_efa_rdm_peer_get_runt_size_cuda_memory_normal(struct efa_resource **st size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 10000; @@ -135,7 +135,7 @@ void test_efa_rdm_peer_get_runt_size_cuda_memory_128_multiple_alignment(struct e size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->sendrecv_in_order_aligned_128_bytes = 1; @@ -158,7 +158,7 @@ void test_efa_rdm_peer_get_runt_size_cuda_memory_non_128_multiple_alignment(stru size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->sendrecv_in_order_aligned_128_bytes = 1; @@ -181,7 +181,7 @@ void test_efa_rdm_peer_get_runt_size_cuda_memory_smaller_than_128_alignment(stru size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->sendrecv_in_order_aligned_128_bytes = 1; @@ -202,7 +202,7 @@ void test_efa_rdm_peer_get_runt_size_cuda_memory_exceeding_total_len_128_alignme size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); efa_rdm_ep->sendrecv_in_order_aligned_128_bytes = 1; @@ -222,7 +222,7 @@ void test_efa_rdm_peer_get_runt_size_host_memory_smaller_than_alignment(struct e size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 1000; @@ -240,7 +240,7 @@ void test_efa_rdm_peer_get_runt_size_host_memory_exceeding_total_len(struct efa_ size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); msg_length = 1111; peer_num_runt_bytes_in_flight = 0; @@ -258,7 +258,7 @@ void test_efa_rdm_peer_get_runt_size_host_memory_normal(struct efa_resource **st size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 10000; @@ -330,7 +330,7 @@ void test_efa_rdm_peer_select_readbase_rtm_no_runt(struct efa_resource **state) size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 1000; @@ -347,7 +347,7 @@ void test_efa_rdm_peer_select_readbase_rtm_do_runt(struct efa_resource **state) size_t peer_num_runt_bytes_in_flight; size_t total_runt_size; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); msg_length = 12000; peer_num_runt_bytes_in_flight = 1000; diff --git a/prov/efa/test/efa_unit_test_send.c b/prov/efa/test/efa_unit_test_send.c index 3b811e12222..4c0752b7be5 100644 --- a/prov/efa/test/efa_unit_test_send.c +++ b/prov/efa/test/efa_unit_test_send.c @@ -20,7 +20,7 @@ void test_efa_rdm_msg_send_to_local_peer_with_null_desc(struct efa_resource **st struct fi_msg msg = {0}; struct fi_msg_tagged tmsg = {0}; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); ret = fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len); assert_int_equal(ret, 0); diff --git a/prov/efa/test/efa_unit_test_srx.c b/prov/efa/test/efa_unit_test_srx.c index 57ce6402b70..2a86944aa30 100644 --- a/prov/efa/test/efa_unit_test_srx.c +++ b/prov/efa/test/efa_unit_test_srx.c @@ -18,7 +18,7 @@ void test_efa_srx_min_multi_recv_size(struct efa_resource **state) struct util_srx_ctx *srx_ctx; size_t min_multi_recv_size_new; - efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct_ep_not_enabled(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); /* Set a new min_multi_recv_size via setopt*/ @@ -42,7 +42,7 @@ void test_efa_srx_cq(struct efa_resource **state) struct efa_rdm_ep *efa_rdm_ep; struct util_srx_ctx *srx_ctx; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); srx_ctx = efa_rdm_ep_get_peer_srx_ctx(efa_rdm_ep); @@ -57,7 +57,7 @@ void test_efa_srx_lock(struct efa_resource **state) struct util_srx_ctx *srx_ctx; struct efa_domain *efa_domain; - efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_PROV_NAME); + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); srx_ctx = efa_rdm_ep_get_peer_srx_ctx(efa_rdm_ep); diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index bfe0b4c0aee..586ac8f5e6f 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -22,9 +22,6 @@ extern struct efa_mock_ibv_send_wr_list g_ibv_send_wr_list; extern struct efa_unit_test_mocks g_efa_unit_test_mocks; extern struct efa_env efa_env; -#define EFA_DIRECT_PROV_NAME "efa-direct" -#define EFA_PROV_NAME "efa" - struct efa_resource { struct fi_info *hints; struct fi_info *info; From 04b0f788145a9fd62147b0184132f7ed300bc2c8 Mon Sep 17 00:00:00 2001 From: Sai Sunku Date: Wed, 5 Feb 2025 06:07:17 +0000 Subject: [PATCH 2/3] prov/efa: Remove incorrect usage of rdm_info->ep_attr->max_msg_size Previously, device->rdm_info->ep_attr->max_msg_size would always be the max message size for FI_MSG operations (corresponding to the device MTU size). With efa direct and related changes, max_msg_size can be the maximum RMA size supported by the device which is 1GB. So use device->ibv_port_attr.max_msg_sz instead when MTU size is required. Signed-off-by: Sai Sunku --- prov/efa/src/efa_hmem.c | 2 +- prov/efa/src/efa_prov_info.c | 6 +++--- prov/efa/src/rdm/efa_rdm_ep_fiops.c | 2 +- prov/efa/test/efa_unit_test_info.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/prov/efa/src/efa_hmem.c b/prov/efa/src/efa_hmem.c index 18dba70ca2c..30a32697f4e 100644 --- a/prov/efa/src/efa_hmem.c +++ b/prov/efa/src/efa_hmem.c @@ -11,7 +11,7 @@ struct efa_hmem_info g_efa_hmem_info[OFI_HMEM_MAX]; static size_t efa_max_eager_msg_size_with_largest_header() { int mtu_size; - mtu_size = g_device_list[0].rdm_info->ep_attr->max_msg_size; + mtu_size = g_device_list[0].ibv_port_attr.max_msg_sz; return mtu_size - efa_rdm_pkt_type_get_max_hdr_size(); } diff --git a/prov/efa/src/efa_prov_info.c b/prov/efa/src/efa_prov_info.c index 4bfd1ebc6cf..423365d2317 100644 --- a/prov/efa/src/efa_prov_info.c +++ b/prov/efa/src/efa_prov_info.c @@ -602,7 +602,7 @@ int efa_prov_info_alloc_for_rdm(struct fi_info **prov_info_rdm_ptr, /* * EFA RDM endpoint implemented emulated atomic, hence set atomic size */ - max_atomic_size = device->rdm_info->ep_attr->max_msg_size + max_atomic_size = device->ibv_port_attr.max_msg_sz - sizeof(struct efa_rdm_rta_hdr) - device->rdm_info->src_addrlen - EFA_RDM_IOV_LIMIT * sizeof(struct fi_rma_iov); @@ -637,10 +637,10 @@ int efa_prov_info_alloc_for_rdm(struct fi_info **prov_info_rdm_ptr, * pkt_entry_size - maximum_header_size. */ if (efa_env.enable_shm_transfer) - min_pkt_size = MIN(device->rdm_info->ep_attr->max_msg_size - efa_rdm_pkt_type_get_max_hdr_size(), + min_pkt_size = MIN(device->ibv_port_attr.max_msg_sz - efa_rdm_pkt_type_get_max_hdr_size(), SHM_MAX_INJECT_SIZE); else - min_pkt_size = device->rdm_info->ep_attr->max_msg_size - efa_rdm_pkt_type_get_max_hdr_size(); + min_pkt_size = device->ibv_port_attr.max_msg_sz - efa_rdm_pkt_type_get_max_hdr_size(); if (min_pkt_size < efa_rdm_pkt_type_get_max_hdr_size()) { prov_info_rdm->tx_attr->inject_size = 0; diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index ba0c6940f3d..253e43aac74 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -503,7 +503,7 @@ int efa_rdm_ep_open(struct fid_domain *domain, struct fi_info *info, efa_rdm_ep->use_device_rdma = efa_rdm_get_use_device_rdma(info->fabric_attr->api_version); efa_rdm_ep->shm_permitted = true; efa_rdm_ep->msg_prefix_size = info->ep_attr->msg_prefix_size; - efa_rdm_ep->mtu_size = efa_domain->device->rdm_info->ep_attr->max_msg_size; + efa_rdm_ep->mtu_size = efa_domain->device->ibv_port_attr.max_msg_sz; efa_rdm_ep->max_data_payload_size = efa_rdm_ep->mtu_size - sizeof(struct efa_rdm_ctsdata_hdr) - sizeof(struct efa_rdm_ctsdata_opt_connid_hdr); efa_rdm_ep->min_multi_recv_size = efa_rdm_ep->mtu_size - efa_rdm_pkt_type_get_max_hdr_size(); diff --git a/prov/efa/test/efa_unit_test_info.c b/prov/efa/test/efa_unit_test_info.c index b3ea13e0614..fa34fdfa384 100644 --- a/prov/efa/test/efa_unit_test_info.c +++ b/prov/efa/test/efa_unit_test_info.c @@ -224,7 +224,7 @@ void test_info_max_order_size_rdm_with_atomic_no_order(struct efa_resource **sta void test_info_max_order_size_rdm_with_atomic_order(struct efa_resource **state) { struct efa_resource *resource = *state; - size_t max_atomic_size = g_device_list[0].rdm_info->ep_attr->max_msg_size + size_t max_atomic_size = g_device_list[0].ibv_port_attr.max_msg_sz - sizeof(struct efa_rdm_rta_hdr) - g_device_list[0].rdm_info->src_addrlen - EFA_RDM_IOV_LIMIT * sizeof(struct fi_rma_iov); From ea02120c2b1d0cb646cc4547411797b7546cef09 Mon Sep 17 00:00:00 2001 From: Sai Sunku Date: Thu, 30 Jan 2025 01:33:55 +0000 Subject: [PATCH 3/3] prov/efa: Unit test additions and fixes for efa-direct Remove temporary hacks added previously to test the efa-direct code path and add new tests Signed-off-by: Sai Sunku --- prov/efa/test/efa_unit_test_av.c | 33 ++++ prov/efa/test/efa_unit_test_common.c | 62 ++++---- prov/efa/test/efa_unit_test_ep.c | 12 +- prov/efa/test/efa_unit_test_info.c | 223 +++++++++++++++++++++++++-- prov/efa/test/efa_unit_tests.c | 15 ++ prov/efa/test/efa_unit_tests.h | 26 +++- 6 files changed, 312 insertions(+), 59 deletions(-) diff --git a/prov/efa/test/efa_unit_test_av.c b/prov/efa/test/efa_unit_test_av.c index 5acc36ee205..362b83c8207 100644 --- a/prov/efa/test/efa_unit_test_av.c +++ b/prov/efa/test/efa_unit_test_av.c @@ -2,6 +2,39 @@ /* SPDX-FileCopyrightText: Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_unit_tests.h" +#include "efa_av.h" + +/** + * @brief Verify the ep type in struct efa_av for efa RDM path + * + * @param[in] state struct efa_resource that is managed by the framework + */ +void test_av_ep_type_efa_rdm(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + struct efa_av *efa_av; + + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_FABRIC_NAME); + g_efa_unit_test_mocks.ibv_create_ah = &efa_mock_ibv_create_ah_check_mock; + efa_av = container_of(resource->av, struct efa_av, util_av.av_fid); + assert(efa_av->ep_type == FI_EP_RDM); +} + +/** + * @brief Verify the ep type in struct efa_av for efa direct path + * + * @param[in] state struct efa_resource that is managed by the framework + */ +void test_av_ep_type_efa_direct(struct efa_resource **state) +{ + struct efa_resource *resource = *state; + struct efa_av *efa_av; + + efa_unit_test_resource_construct(resource, FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); + g_efa_unit_test_mocks.ibv_create_ah = &efa_mock_ibv_create_ah_check_mock; + efa_av = container_of(resource->av, struct efa_av, util_av.av_fid); + assert(efa_av->ep_type == FI_EP_RDM); +} /** * @brief Only works on nodes with EFA devices diff --git a/prov/efa/test/efa_unit_test_common.c b/prov/efa/test/efa_unit_test_common.c index 9580d16bf77..6064332b5cf 100644 --- a/prov/efa/test/efa_unit_test_common.c +++ b/prov/efa/test/efa_unit_test_common.c @@ -52,7 +52,7 @@ void efa_unit_test_construct_msg_rma(struct fi_msg_rma *msg, struct iovec *iov, msg->data = data; } -struct fi_info *efa_unit_test_alloc_hints(enum fi_ep_type ep_type, char *prov_name) +struct fi_info *efa_unit_test_alloc_hints(enum fi_ep_type ep_type, char *fabric_name) { struct fi_info *hints; @@ -60,29 +60,33 @@ struct fi_info *efa_unit_test_alloc_hints(enum fi_ep_type ep_type, char *prov_na if (!hints) return NULL; - hints->fabric_attr->prov_name = strdup(prov_name); + if (fabric_name) + hints->fabric_attr->name = strdup(fabric_name); hints->ep_attr->type = ep_type; /* Use a minimal caps that efa / efa-direct should always support */ hints->domain_attr->mr_mode = MR_MODE_BITS; + + /* EFA direct and dgram paths require FI_CONTEXT2 */ + if (!fabric_name || !strcasecmp(fabric_name, EFA_DIRECT_FABRIC_NAME)) + hints->mode |= FI_CONTEXT2; + if (ep_type == FI_EP_DGRAM) { - hints->mode |= FI_MSG_PREFIX; + hints->mode |= FI_MSG_PREFIX | FI_CONTEXT2; } return hints; } -/* TODO: remove use_efa_direct after we have efa_direct implemented in fi_info */ void efa_unit_test_resource_construct_with_hints(struct efa_resource *resource, enum fi_ep_type ep_type, uint32_t fi_version, struct fi_info *hints, - bool enable_ep, bool open_cq, char* prov_name) + bool enable_ep, bool open_cq) { int ret = 0; struct fi_av_attr av_attr = {0}; struct fi_cq_attr cq_attr = {0}; struct fi_eq_attr eq_attr = {0}; - struct efa_domain *efa_domain; ret = fi_getinfo(fi_version, NULL, NULL, 0ULL, hints, &resource->info); if (ret) @@ -96,17 +100,6 @@ void efa_unit_test_resource_construct_with_hints(struct efa_resource *resource, if (ret) goto err; - /* - * TODO: Remove this function pointer override when we have it assigned - * for efa-direct correctly. - */ - if (!strcmp(EFA_DIRECT_PROV_NAME, prov_name)) { - efa_domain = container_of(resource->domain, struct efa_domain, util_domain.domain_fid); - - efa_domain->util_domain.domain_fid.ops->endpoint = efa_ep_open; - efa_domain->util_domain.domain_fid.ops->cq_open = efa_cq_open; - } - ret = fi_endpoint(resource->domain, resource->info, &resource->ep, NULL); if (ret) goto err; @@ -146,19 +139,18 @@ void efa_unit_test_resource_construct_with_hints(struct efa_resource *resource, assert_int_equal(ret, 0); } -void efa_unit_test_resource_construct(struct efa_resource *resource, enum fi_ep_type ep_type, char *prov_name) +void efa_unit_test_resource_construct(struct efa_resource *resource, enum fi_ep_type ep_type, char *fabric_name) { - /* TODO use prov_name here when efa-direct fi_info is implemented */ - resource->hints = efa_unit_test_alloc_hints(ep_type, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(ep_type, fabric_name); if (!resource->hints) goto err; - if (!strcmp(EFA_DIRECT_PROV_NAME, prov_name)) + if (!strcmp(EFA_DIRECT_FABRIC_NAME, fabric_name)) efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(2, 0), - resource->hints, true, true, prov_name); + resource->hints, true, true); else efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(1, 14), - resource->hints, true, true, prov_name); + resource->hints, true, true); return; err: @@ -169,19 +161,18 @@ void efa_unit_test_resource_construct(struct efa_resource *resource, enum fi_ep_ } void efa_unit_test_resource_construct_ep_not_enabled(struct efa_resource *resource, - enum fi_ep_type ep_type, char *prov_name) + enum fi_ep_type ep_type, char *fabric_name) { - /* TODO use prov_name here when efa-direct fi_info is implemented */ - resource->hints = efa_unit_test_alloc_hints(ep_type, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(ep_type, fabric_name); if (!resource->hints) goto err; - if (!strcmp(EFA_DIRECT_PROV_NAME, prov_name)) + if (!strcmp(EFA_DIRECT_FABRIC_NAME, fabric_name)) efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(2, 0), - resource->hints, false, true, prov_name); + resource->hints, false, true); else efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(1, 14), - resource->hints, false, true, prov_name); + resource->hints, false, true); return; err: @@ -192,19 +183,18 @@ void efa_unit_test_resource_construct_ep_not_enabled(struct efa_resource *resour } void efa_unit_test_resource_construct_no_cq_and_ep_not_enabled(struct efa_resource *resource, - enum fi_ep_type ep_type, char *prov_name) + enum fi_ep_type ep_type, char *fabric_name) { - /* TODO use prov_name here when efa-direct fi_info is implemented */ - resource->hints = efa_unit_test_alloc_hints(ep_type, EFA_PROV_NAME); + resource->hints = efa_unit_test_alloc_hints(ep_type, fabric_name); if (!resource->hints) goto err; - if (!strcmp(EFA_DIRECT_PROV_NAME, prov_name)) + if (!strcmp(EFA_DIRECT_FABRIC_NAME, fabric_name)) efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(2, 0), - resource->hints, false, false, prov_name); + resource->hints, false, false); else efa_unit_test_resource_construct_with_hints(resource, ep_type, FI_VERSION(1, 14), - resource->hints, false, false, prov_name); + resource->hints, false, false); return; err: @@ -227,7 +217,7 @@ void efa_unit_test_resource_construct_rdm_shm_disabled(struct efa_resource *reso goto err; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, false, true, EFA_PROV_NAME); + resource->hints, false, true); ret = fi_setopt(&resource->ep->fid, FI_OPT_ENDPOINT, FI_OPT_SHARED_MEMORY_PERMITTED, &shm_permitted, diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index cab4697d03b..9f68753c5bf 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -597,7 +597,7 @@ void test_efa_rdm_ep_rma_queue_before_handshake(struct efa_resource **state, int resource->hints->caps |= FI_MSG | FI_TAGGED | FI_RMA; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, true, true, EFA_PROV_NAME); + resource->hints, true, true); /* ensure we don't have RMA capability. */ efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -685,7 +685,7 @@ void test_efa_rdm_ep_rma_inconsistent_unsolicited_write_recv(struct efa_resource resource->hints->caps |= FI_MSG | FI_TAGGED | FI_RMA; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 22), - resource->hints, true, true, EFA_PROV_NAME); + resource->hints, true, true); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -794,7 +794,7 @@ void test_efa_rdm_ep_rma_without_caps(struct efa_resource **state) resource->hints->caps &= ~FI_RMA; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, true, true, EFA_PROV_NAME); + resource->hints, true, true); /* ensure we don't have RMA capability. */ efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -845,7 +845,7 @@ void test_efa_rdm_ep_atomic_without_caps(struct efa_resource **state) resource->hints->caps &= ~FI_ATOMIC; resource->hints->domain_attr->mr_mode |= MR_MODE_BITS; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, true, true, EFA_PROV_NAME); + resource->hints, true, true); /* ensure we don't have ATOMIC capability. */ efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -1001,7 +1001,7 @@ static void test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, ofi_hmem_disable_p2p = cuda_p2p_disabled; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, false, true, EFA_PROV_NAME); + resource->hints, false, true); /* System memory P2P should always be enabled */ assert_true(g_efa_hmem_info[FI_HMEM_SYSTEM].initialized); @@ -1314,7 +1314,7 @@ void test_efa_rdm_ep_rx_refill_impl(struct efa_resource **state, int threshold, assert_non_null(resource->hints); resource->hints->rx_attr->size = rx_size; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), - resource->hints, true, true, EFA_PROV_NAME); + resource->hints, true, true); efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); assert_int_equal(efa_rdm_ep_get_rx_pool_size(efa_rdm_ep), rx_size); diff --git a/prov/efa/test/efa_unit_test_info.c b/prov/efa/test/efa_unit_test_info.c index fa34fdfa384..33ddae84088 100644 --- a/prov/efa/test/efa_unit_test_info.c +++ b/prov/efa/test/efa_unit_test_info.c @@ -2,6 +2,7 @@ /* SPDX-FileCopyrightText: Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_unit_tests.h" +#include "efa_prov_info.h" /** * @brief test that when a wrong fi_info was used to open resource, the error is handled @@ -21,7 +22,7 @@ void test_info_open_ep_with_wrong_info() assert_int_equal(err, 0); /* dgram endpoint require FI_MSG_PREFIX */ - assert_int_equal(info->mode, FI_MSG_PREFIX); + assert_int_equal(info->mode, FI_MSG_PREFIX | FI_CONTEXT2); /* make the info wrong by setting the mode to 0 */ info->mode = 0; @@ -44,6 +45,129 @@ void test_info_open_ep_with_wrong_info() assert_int_equal(err, 0); } +/** + * @brief Verify that efa rdm path fi_info objects have some expected values + */ +void test_info_rdm_attributes() +{ + struct fi_info *hints, *info = NULL, *info_head = NULL; + int err; + + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_FABRIC_NAME); + assert_non_null(hints); + + err = fi_getinfo(FI_VERSION(1,6), NULL, NULL, 0, hints, &info_head); + assert_int_equal(err, 0); + assert_non_null(info_head); + + for (info = info_head; info; info = info->next) { + assert_true(!strcmp(info->fabric_attr->name, EFA_FABRIC_NAME)); + assert_true(strstr(info->domain_attr->name, "rdm")); + assert_int_equal(info->ep_attr->max_msg_size, UINT64_MAX); +#if HAVE_CUDA || HAVE_NEURON || HAVE_SYNAPSEAI + assert_true(info->caps | FI_HMEM); +#endif + } +} + +/** + * @brief Verify that efa dgram path fi_info objects have some expected values + */ +void test_info_dgram_attributes() +{ + struct fi_info *hints, *info = NULL, *info_head = NULL; + int err; + + hints = efa_unit_test_alloc_hints(FI_EP_DGRAM, EFA_FABRIC_NAME); + assert_non_null(hints); + + err = fi_getinfo(FI_VERSION(1,6), NULL, NULL, 0, hints, &info_head); + assert_int_equal(err, 0); + assert_non_null(info_head); + + for (info = info_head; info; info = info->next) { + assert_true(!strcmp(info->fabric_attr->name, EFA_FABRIC_NAME)); + assert_true(strstr(info->domain_attr->name, "dgrm")); + } +} + +/** + * @brief Verify that efa direct path fi_info objects have some expected values + */ +void test_info_direct_attributes() +{ + struct fi_info *hints, *info = NULL, *info_head = NULL; + int err; + + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); + assert_non_null(hints); + + err = fi_getinfo(FI_VERSION(1,6), NULL, NULL, 0, hints, &info); + assert_int_equal(err, 0); + assert_non_null(info); + for (info = info_head; info; info = info->next) { + assert_true(!strcmp(info->fabric_attr->name, EFA_DIRECT_FABRIC_NAME)); + assert_true(strstr(info->domain_attr->name, "rdm")); + assert_false(info->caps & (FI_ATOMIC | FI_TAGGED)); + assert_false(info->tx_attr->msg_order & FI_ORDER_SAS); + assert_int_equal(info->ep_attr->max_msg_size, g_device_list[0].max_rdma_size); + } +} + +/** + * @brief Verify that efa direct only supports HMEM with p2p + */ +#if HAVE_CUDA || HAVE_NEURON || HAVE_SYNAPSEAI +void test_info_direct_hmem_support_p2p() +{ + struct fi_info *info; + bool hmem_ops_cuda_init; + + info = fi_allocinfo(); + + memset(g_efa_hmem_info, 0, OFI_HMEM_MAX * sizeof(struct efa_hmem_info)); + + /* Save current value of hmem_ops[FI_HMEM_CUDA].initialized to reset later + * hmem_ops is populated in ofi_hmem_init and only runs once + * + * CUDA iface will be initialized on Nvidia GPU platforms but not on others + * Force setting hmem_ops[FI_HMEM_CUDA].initialized allows this test to + * run on all instance types + */ + hmem_ops_cuda_init = hmem_ops[FI_HMEM_CUDA].initialized; + hmem_ops[FI_HMEM_CUDA].initialized = true; + + /* g_efa_hmem_info is populated in efa_hmem_info_initialize which runs on + * every fi_getinfo call. So no need to save and reset these fields + */ + g_efa_hmem_info[FI_HMEM_CUDA].initialized = true; + g_efa_hmem_info[FI_HMEM_CUDA].p2p_supported_by_device = true; + + efa_prov_info_set_hmem_flags(info, FI_EP_RDM); + assert_true(info->caps & FI_HMEM); + assert_true(info->tx_attr->caps & FI_HMEM); + assert_true(info->rx_attr->caps & FI_HMEM); + fi_freeinfo(info); + + info = fi_allocinfo(); + g_efa_hmem_info[FI_HMEM_CUDA].initialized = true; + g_efa_hmem_info[FI_HMEM_CUDA].p2p_supported_by_device = false; + + efa_prov_info_set_hmem_flags(info, FI_EP_RDM); + assert_false(info->caps & FI_HMEM); + assert_false(info->tx_attr->caps & FI_HMEM); + assert_false(info->rx_attr->caps & FI_HMEM); + fi_freeinfo(info); + + /* Reset hmem_ops[FI_HMEM_CUDA].initialized */ + hmem_ops[FI_HMEM_CUDA].initialized = hmem_ops_cuda_init; +} +#else +void test_info_direct_hmem_support_p2p() +{ +} +#endif + /** * @brief Verify info->tx/rx_attr->msg_order is set according to hints. * @@ -420,22 +544,92 @@ void test_info_check_hmem_cuda_support_on_api_ge_1_18() fi_freeinfo(info); } +void check_no_hmem_support_when_not_requested(char *fabric_name) +{ + struct fi_info *hints, *info = NULL; + int err; + + hints = efa_unit_test_alloc_hints(FI_EP_RDM, fabric_name); + + err = fi_getinfo(FI_VERSION(1,6), NULL, NULL, 0, hints, &info); + assert_int_equal(err, 0); + assert_non_null(info); + assert_false(info->caps & FI_HMEM); + fi_freeinfo(info); +} + /** * @brief Check that EFA does not claim support of FI_HMEM when * it is not requested */ -void test_info_check_no_hmem_support_when_not_requested() +void test_info_check_no_hmem_support_when_not_requested() { + check_no_hmem_support_when_not_requested(EFA_FABRIC_NAME); + check_no_hmem_support_when_not_requested(EFA_DIRECT_FABRIC_NAME); +} + +/** + * @brief Check that EFA direct info object is not returned when atomic + * or ordering capabilities are requested + */ +void test_info_direct_unsupported() { struct fi_info *hints, *info = NULL; int err; - hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_DIRECT_FABRIC_NAME); + assert_non_null(hints); err = fi_getinfo(FI_VERSION(1,6), NULL, NULL, 0, hints, &info); assert_int_equal(err, 0); assert_non_null(info); - assert_false(info->caps & FI_HMEM); - fi_freeinfo(info); + + hints->caps |= FI_ATOMIC; + err = fi_getinfo(FI_VERSION(1,6), NULL, NULL, 0, hints, &info); + assert_int_equal(err, -FI_ENODATA); + assert_null(info); + + hints->caps &= ~FI_ATOMIC; + hints->tx_attr->msg_order = FI_ORDER_SAS; + hints->rx_attr->msg_order = FI_ORDER_SAS; + err = fi_getinfo(FI_VERSION(1,6), NULL, NULL, 0, hints, &info); + assert_int_equal(err, -FI_ENODATA); + assert_null(info); +} + +/** + * @brief Verify that efa-direct fi_info objects are returned before efa info objects + */ +void test_info_direct_ordering() +{ + struct fi_info *hints, *info = NULL, *info_head = NULL; + bool efa_direct_returned = false, efa_returned = false; + bool efa_direct_returned_after_efa = false, efa_returned_after_efa_direct = false; + int err; + + hints = efa_unit_test_alloc_hints(FI_EP_RDM, NULL); + assert_non_null(hints); + + err = fi_getinfo(FI_VERSION(1,6), NULL, NULL, 0, hints, &info_head); + assert_int_equal(err, 0); + assert_non_null(info_head); + + for (info = info_head; info; info = info->next) { + if (!strcmp(info->fabric_attr->name, EFA_DIRECT_FABRIC_NAME)) { + efa_direct_returned = true; + if (efa_returned) + efa_direct_returned_after_efa = true; + } + if (!strcmp(info->fabric_attr->name, EFA_FABRIC_NAME)) { + efa_returned = true; + if (efa_direct_returned) + efa_returned_after_efa_direct = true; + } + } + + assert_true(efa_direct_returned); + assert_true(efa_returned); + assert_true(efa_returned_after_efa_direct); + assert_false(efa_direct_returned_after_efa); } /** @@ -561,12 +755,12 @@ static int get_first_nic_name(char **name) { * @param[in] filter The value that would be set for FI_EFA_IFACE * @param[in] expect_first_name The expected name of the "first" NIC */ -static void test_efa_nic_selection(const char *filter, const char *expect_first_name) { +static void test_efa_nic_selection(const char *filter, const char *expect_first_name, char *fabric_name) { int ret; struct fi_info *hints, *info; efa_env.iface = (char *) filter; - hints = efa_unit_test_alloc_hints(FI_EP_RDM, EFA_PROV_NAME); + hints = efa_unit_test_alloc_hints(FI_EP_RDM, fabric_name); ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, hints, &info); fi_freeinfo(hints); if (expect_first_name) { @@ -588,7 +782,8 @@ void test_efa_nic_select_all_devices_matches() { ret = get_first_nic_name(&nic_name); assert_int_equal(ret, FI_SUCCESS); - test_efa_nic_selection("all", nic_name); + test_efa_nic_selection("all", nic_name, EFA_FABRIC_NAME); + test_efa_nic_selection("all", nic_name, EFA_DIRECT_FABRIC_NAME); free(nic_name); } @@ -603,7 +798,8 @@ void test_efa_nic_select_first_device_matches() { ret = get_first_nic_name(&nic_name); assert_int_equal(ret, FI_SUCCESS); - test_efa_nic_selection(nic_name, nic_name); + test_efa_nic_selection(nic_name, nic_name, EFA_FABRIC_NAME); + test_efa_nic_selection(nic_name, nic_name, EFA_DIRECT_FABRIC_NAME); free(nic_name); } @@ -626,7 +822,8 @@ void test_efa_nic_select_first_device_with_surrounding_comma_matches() { strcat(filter, nic_name); strcat(filter, ","); - test_efa_nic_selection(filter, nic_name); + test_efa_nic_selection(filter, nic_name, EFA_FABRIC_NAME); + test_efa_nic_selection(filter, nic_name, EFA_DIRECT_FABRIC_NAME); free(filter); free(nic_name); @@ -646,7 +843,8 @@ void test_efa_nic_select_first_device_first_letter_no_match() { filter[0] = nic_name[0]; filter[1] = '\0'; - test_efa_nic_selection(filter, NULL); + test_efa_nic_selection(filter, NULL, EFA_FABRIC_NAME); + test_efa_nic_selection(filter, NULL, EFA_DIRECT_FABRIC_NAME); free(nic_name); } @@ -655,7 +853,8 @@ void test_efa_nic_select_first_device_first_letter_no_match() { * Verify that empty NIC names will not select any NIC */ void test_efa_nic_select_empty_device_no_match() { - test_efa_nic_selection(",", NULL); + test_efa_nic_selection(",", NULL, EFA_FABRIC_NAME); + test_efa_nic_selection(",", NULL, EFA_DIRECT_FABRIC_NAME); } /* indicates the test shouldn't set the setopt or environment diff --git a/prov/efa/test/efa_unit_tests.c b/prov/efa/test/efa_unit_tests.c index 93991120fd4..74b6c54a777 100644 --- a/prov/efa/test/efa_unit_tests.c +++ b/prov/efa/test/efa_unit_tests.c @@ -78,8 +78,13 @@ int main(void) int ret; /* Requires an EFA device to work */ const struct CMUnitTest efa_unit_tests[] = { + /* begin efa_unit_test_av.c */ + cmocka_unit_test_setup_teardown(test_av_ep_type_efa_rdm, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_av_ep_type_efa_direct, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_av_insert_duplicate_raw_addr, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_av_insert_duplicate_gid, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + /* end efa_unit_test_av.c */ + cmocka_unit_test_setup_teardown(test_efa_device_construct_error_handling, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_ignore_missing_host_id_file, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_rdm_ep_has_valid_host_id, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), @@ -134,7 +139,13 @@ int main(void) cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_recover_forgotten_peer_ah, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_ignore_removed_peer, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_rdm_fallback_to_ibv_create_cq_ex_cq_read_ignore_forgotton_peer, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + + /* begin efa_unit_test_info.c */ cmocka_unit_test_setup_teardown(test_info_open_ep_with_wrong_info, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_info_rdm_attributes, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_info_dgram_attributes, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_info_direct_attributes, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + cmocka_unit_test_setup_teardown(test_info_direct_hmem_support_p2p, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_info_tx_rx_msg_order_rdm_order_none, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_info_tx_rx_msg_order_rdm_order_sas, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_info_tx_rx_msg_order_dgram_order_none, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), @@ -150,6 +161,8 @@ int main(void) cmocka_unit_test_setup_teardown(test_info_check_hmem_cuda_support_on_api_lt_1_18, NULL, NULL), cmocka_unit_test_setup_teardown(test_info_check_hmem_cuda_support_on_api_ge_1_18, NULL, NULL), cmocka_unit_test_setup_teardown(test_info_check_no_hmem_support_when_not_requested, NULL, NULL), + cmocka_unit_test_setup_teardown(test_info_direct_unsupported, NULL, NULL), + cmocka_unit_test_setup_teardown(test_info_direct_ordering, NULL, NULL), cmocka_unit_test_setup_teardown(test_efa_nic_select_all_devices_matches, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_nic_select_first_device_matches, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_nic_select_first_device_with_surrounding_comma_matches, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), @@ -164,6 +177,8 @@ int main(void) cmocka_unit_test_setup_teardown(test_efa_use_device_rdma_opt1, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_use_device_rdma_opt0, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_use_device_rdma_opt_old, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), + /* end efa_unit_test_info.c */ + cmocka_unit_test_setup_teardown(test_efa_hmem_info_update_neuron, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_hmem_info_disable_p2p_neuron, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_efa_hmem_info_disable_p2p_cuda, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index 586ac8f5e6f..187aea2c4e5 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -33,17 +33,17 @@ struct efa_resource { struct fid_cq *cq; }; -struct fi_info *efa_unit_test_alloc_hints(enum fi_ep_type ep_type, char *prov_name); +struct fi_info *efa_unit_test_alloc_hints(enum fi_ep_type ep_type, char *fabric_name); -void efa_unit_test_resource_construct(struct efa_resource *resource, enum fi_ep_type ep_type, char *prov_name); +void efa_unit_test_resource_construct(struct efa_resource *resource, enum fi_ep_type ep_type, char *fabric_name); void efa_unit_test_resource_construct_ep_not_enabled( - struct efa_resource *resource, enum fi_ep_type ep_type, char *prov_name); + struct efa_resource *resource, enum fi_ep_type ep_type, char *fabric_name); void efa_unit_test_resource_construct_no_cq_and_ep_not_enabled( - struct efa_resource *resource, enum fi_ep_type ep_type, char *prov_name); + struct efa_resource *resource, enum fi_ep_type ep_type, char *fabric_name); void efa_unit_test_resource_construct_with_hints(struct efa_resource *resource, enum fi_ep_type ep_type, uint32_t fi_version, struct fi_info *hints, - bool enable_ep, bool open_cq, char *prov_name); + bool enable_ep, bool open_cq); void efa_unit_test_resource_construct_rdm_shm_disabled(struct efa_resource *resource); @@ -98,8 +98,14 @@ void efa_unit_test_eager_msgrtm_pkt_construct(struct efa_rdm_pke *pkt_entry, str void efa_unit_test_handshake_pkt_construct(struct efa_rdm_pke *pkt_entry, struct efa_unit_test_handshake_pkt_attr *attr); /* test cases */ + +/* begin efa_unit_test_av.c */ +void test_av_ep_type_efa_rdm(); +void test_av_ep_type_efa_direct(); void test_av_insert_duplicate_raw_addr(); void test_av_insert_duplicate_gid(); +/* end efa_unit_test_av.c */ + void test_efa_device_construct_error_handling(); void test_efa_rdm_ep_ignore_missing_host_id_file(); void test_efa_rdm_ep_has_valid_host_id(); @@ -154,7 +160,13 @@ void test_ibv_cq_ex_read_bad_recv_rdma_with_imm_status_use_solicited_recv(); void test_ibv_cq_ex_read_recover_forgotten_peer_ah(); void test_rdm_fallback_to_ibv_create_cq_ex_cq_read_ignore_forgotton_peer(); void test_ibv_cq_ex_read_ignore_removed_peer(); + +/* begin efa_unit_test_info.c */ void test_info_open_ep_with_wrong_info(); +void test_info_rdm_attributes(); +void test_info_dgram_attributes(); +void test_info_direct_attributes(); +void test_info_direct_hmem_support_p2p(); void test_info_tx_rx_msg_order_rdm_order_none(); void test_info_tx_rx_msg_order_rdm_order_sas(); void test_info_tx_rx_msg_order_dgram_order_none(); @@ -170,6 +182,8 @@ void test_info_check_shm_info_threading(); void test_info_check_hmem_cuda_support_on_api_lt_1_18(); void test_info_check_hmem_cuda_support_on_api_ge_1_18(); void test_info_check_no_hmem_support_when_not_requested(); +void test_info_direct_unsupported(); +void test_info_direct_ordering(); void test_efa_hmem_info_update_neuron(); void test_efa_hmem_info_disable_p2p_neuron(); void test_efa_hmem_info_disable_p2p_cuda(); @@ -187,6 +201,8 @@ void test_efa_use_device_rdma_opt0(); void test_efa_use_device_rdma_env1(); void test_efa_use_device_rdma_env0(); void test_efa_use_device_rdma_opt_old(); +/* end efa_unit_test_info.c */ + void test_efa_srx_min_multi_recv_size(); void test_efa_srx_cq(); void test_efa_srx_lock();