diff --git a/prov/efa/src/efa_domain.c b/prov/efa/src/efa_domain.c index 130cfc052a9..2e81aafa666 100644 --- a/prov/efa/src/efa_domain.c +++ b/prov/efa/src/efa_domain.c @@ -297,13 +297,6 @@ int efa_domain_open(struct fid_fabric *fabric_fid, struct fi_info *info, goto err_free; } - err = efa_domain_hmem_info_init_all(efa_domain); - if (err) { - ret = err; - EFA_WARN(FI_LOG_DOMAIN, "Failed to check hmem support status. err: %d\n", ret); - goto err_free; - } - dlist_insert_tail(&efa_domain->list_entry, &g_efa_domain_list); return 0; diff --git a/prov/efa/src/efa_domain.h b/prov/efa/src/efa_domain.h index 2eaf7fc06ed..6fa13e0bd8d 100644 --- a/prov/efa/src/efa_domain.h +++ b/prov/efa/src/efa_domain.h @@ -22,7 +22,6 @@ struct efa_domain { struct ofi_mr_cache *cache; struct efa_qp **qp_table; size_t qp_table_sz_m1; - struct efa_hmem_info hmem_info[OFI_HMEM_MAX]; size_t mtu_size; size_t addrlen; bool mr_local; diff --git a/prov/efa/src/efa_hmem.c b/prov/efa/src/efa_hmem.c index 15f2513bf79..3c713221711 100644 --- a/prov/efa/src/efa_hmem.c +++ b/prov/efa/src/efa_hmem.c @@ -5,16 +5,18 @@ #include "efa_hmem.h" #include "rdm/efa_rdm_pkt_type.h" +struct efa_hmem_info g_efa_hmem_info[OFI_HMEM_MAX]; + #if HAVE_CUDA || HAVE_NEURON -static size_t efa_max_eager_msg_size_with_largest_header(struct efa_domain *efa_domain) { +static size_t efa_max_eager_msg_size_with_largest_header() { int mtu_size; - mtu_size = efa_domain->device->rdm_info->ep_attr->max_msg_size; + mtu_size = g_device_list[0].rdm_info->ep_attr->max_msg_size; return mtu_size - efa_rdm_pkt_type_get_max_hdr_size(); } #else -static size_t efa_max_eager_msg_size_with_largest_header(struct efa_domain *efa_domain) { +static size_t efa_max_eager_msg_size_with_largest_header() { return 0; } #endif @@ -23,14 +25,13 @@ static size_t efa_max_eager_msg_size_with_largest_header(struct efa_domain *efa_ * @brief Initialize the various protocol thresholds tracked in efa_hmem_info * according to the given FI_HMEM interface. * - * @param[in,out] efa_domain Pointer to struct efa_domain * @param[in] iface The FI_HMEM interface to initialize * * @return 0 */ -static int efa_domain_hmem_info_init_protocol_thresholds(struct efa_domain *efa_domain, enum fi_hmem_iface iface) +static int efa_domain_hmem_info_init_protocol_thresholds(enum fi_hmem_iface iface) { - struct efa_hmem_info *info = &efa_domain->hmem_info[iface]; + struct efa_hmem_info *info = &g_efa_hmem_info[iface]; size_t tmp_value; /* Fall back to FI_HMEM_SYSTEM initialization logic when p2p is @@ -53,8 +54,8 @@ static int efa_domain_hmem_info_init_protocol_thresholds(struct efa_domain *efa_ case FI_HMEM_CUDA: info->runt_size = EFA_DEFAULT_RUNT_SIZE; info->max_medium_msg_size = 0; - info->min_read_msg_size = efa_max_eager_msg_size_with_largest_header(efa_domain) + 1; - info->min_read_write_size = efa_max_eager_msg_size_with_largest_header(efa_domain) + 1; + info->min_read_msg_size = efa_max_eager_msg_size_with_largest_header() + 1; + info->min_read_write_size = efa_max_eager_msg_size_with_largest_header() + 1; fi_param_get_size_t(&efa_prov, "runt_size", &info->runt_size); fi_param_get_size_t(&efa_prov, "inter_min_read_message_size", &info->min_read_msg_size); fi_param_get_size_t(&efa_prov, "inter_min_read_write_size", &info->min_read_write_size); @@ -68,8 +69,8 @@ static int efa_domain_hmem_info_init_protocol_thresholds(struct efa_domain *efa_ case FI_HMEM_NEURON: info->runt_size = EFA_NEURON_RUNT_SIZE; info->max_medium_msg_size = 0; - info->min_read_msg_size = efa_max_eager_msg_size_with_largest_header(efa_domain) + 1; - info->min_read_write_size = efa_max_eager_msg_size_with_largest_header(efa_domain) + 1; + info->min_read_msg_size = efa_max_eager_msg_size_with_largest_header() + 1; + info->min_read_write_size = efa_max_eager_msg_size_with_largest_header() + 1; fi_param_get_size_t(&efa_prov, "runt_size", &info->runt_size); fi_param_get_size_t(&efa_prov, "inter_min_read_message_size", &info->min_read_msg_size); fi_param_get_size_t(&efa_prov, "inter_min_read_write_size", &info->min_read_write_size); @@ -105,7 +106,7 @@ static int efa_domain_hmem_info_init_protocol_thresholds(struct efa_domain *efa_ return 0; } -static inline void efa_domain_hmem_info_check_p2p_support_cuda(struct efa_hmem_info *info) { +static inline void efa_hmem_info_check_p2p_support_cuda(struct efa_hmem_info *info) { #if HAVE_CUDA cudaError_t cuda_ret; void *ptr = NULL; @@ -168,7 +169,7 @@ static inline void efa_domain_hmem_info_check_p2p_support_cuda(struct efa_hmem_i return; } -static inline void efa_domain_hmem_info_check_p2p_support_neuron(struct efa_hmem_info *info) { +static inline void efa_hmem_info_check_p2p_support_neuron(struct efa_hmem_info *info) { #if HAVE_NEURON struct ibv_mr *ibv_mr = NULL; int ibv_access = IBV_ACCESS_LOCAL_WRITE; @@ -239,13 +240,12 @@ static inline void efa_domain_hmem_info_check_p2p_support_neuron(struct efa_hmem /** * @brief Initialize the efa_hmem_info state for iface * - * @param[in,out] efa_domain Pointer to struct efa_domain * @param[in] iface HMEM interface */ static void -efa_domain_hmem_info_init_iface(struct efa_domain *efa_domain, enum fi_hmem_iface iface) +efa_hmem_info_init_iface(enum fi_hmem_iface iface) { - struct efa_hmem_info *info = &efa_domain->hmem_info[iface]; + struct efa_hmem_info *info = &g_efa_hmem_info[iface]; if (!ofi_hmem_is_initialized(iface)) { EFA_INFO(FI_LOG_DOMAIN, "%s is not initialized\n", @@ -262,41 +262,27 @@ efa_domain_hmem_info_init_iface(struct efa_domain *efa_domain, enum fi_hmem_ifac } info->initialized = true; - info->p2p_disabled_by_user = (iface == FI_HMEM_SYSTEM) ? false : ofi_hmem_p2p_disabled(); if (iface == FI_HMEM_SYNAPSEAI || iface == FI_HMEM_SYSTEM) { info->p2p_supported_by_device = true; - } else if (info->p2p_disabled_by_user) { + } else if (ofi_hmem_p2p_disabled()) { info->p2p_supported_by_device = false; } else { if (iface == FI_HMEM_CUDA) - efa_domain_hmem_info_check_p2p_support_cuda(info); + efa_hmem_info_check_p2p_support_cuda(info); if (iface == FI_HMEM_NEURON) - efa_domain_hmem_info_check_p2p_support_neuron(info); + efa_hmem_info_check_p2p_support_neuron(info); if (!info->p2p_supported_by_device) EFA_INFO(FI_LOG_DOMAIN, "%s P2P support is not available.\n", fi_tostr(&iface, FI_TYPE_HMEM_IFACE)); } - info->p2p_required_by_impl = true; - /* If user is using libfabric API 1.18 or later, by default EFA - * provider is permitted to use CUDA library to support CUDA - * memory, therefore p2p is not required. - */ - if (iface == FI_HMEM_CUDA && - FI_VERSION_GE(efa_domain->util_domain.fabric->fabric_fid.api_version, FI_VERSION(1, 18))) - info->p2p_required_by_impl = !hmem_ops[iface].initialized; - if (iface == FI_HMEM_SYSTEM) - info->p2p_required_by_impl = false; - - efa_domain_hmem_info_init_protocol_thresholds(efa_domain, iface); + efa_domain_hmem_info_init_protocol_thresholds(iface); } /** * @brief Validate an FI_OPT_FI_HMEM_P2P (FI_OPT_ENDPOINT) option for a * specified HMEM interface. - * Also update hmem_info[iface]->p2p_disabled_by_user accordingly. * - * @param[in,out] domain The efa_domain struct which contains an efa_hmem_info array * @param[in] iface The fi_hmem_iface enum of the FI_HMEM interface to validate * @param[in] p2p_opt The P2P option to validate * @@ -305,9 +291,9 @@ efa_domain_hmem_info_init_iface(struct efa_domain *efa_domain, enum fi_hmem_ifac * -FI_ENODATA if the given HMEM interface was not initialized * -FI_EINVAL if p2p_opt is not a valid FI_OPT_FI_HMEM_P2P option */ -int efa_domain_hmem_validate_p2p_opt(struct efa_domain *efa_domain, enum fi_hmem_iface iface, int p2p_opt) +int efa_hmem_validate_p2p_opt(enum fi_hmem_iface iface, int p2p_opt, uint32_t api_version) { - struct efa_hmem_info *info = &efa_domain->hmem_info[iface]; + struct efa_hmem_info *info = &g_efa_hmem_info[iface]; if (OFI_UNLIKELY(!info->initialized)) return -FI_ENODATA; @@ -317,7 +303,6 @@ int efa_domain_hmem_validate_p2p_opt(struct efa_domain *efa_domain, enum fi_hmem if (OFI_UNLIKELY(ofi_hmem_p2p_disabled()) || !info->p2p_supported_by_device) return -FI_EOPNOTSUPP; - info->p2p_disabled_by_user = false; return 0; /* * According to fi_setopt() document: @@ -334,14 +319,13 @@ int efa_domain_hmem_validate_p2p_opt(struct efa_domain *efa_domain, enum fi_hmem if (OFI_UNLIKELY(ofi_hmem_p2p_disabled())) return -FI_EOPNOTSUPP; - info->p2p_disabled_by_user = false; return 0; case FI_HMEM_P2P_DISABLED: - if (info->p2p_required_by_impl) + /* return -FI_EOPNOTSUPP if p2p is required by implementation */ + if (iface != FI_HMEM_CUDA || FI_VERSION_LT(api_version, FI_VERSION(1, 18))) return -FI_EOPNOTSUPP; - info->p2p_disabled_by_user = true; return 0; } @@ -354,12 +338,10 @@ int efa_domain_hmem_validate_p2p_opt(struct efa_domain *efa_domain, enum fi_hmem * struct will be used to determine which efa transfer * protocol should be selected. * - * @param[in,out] efa_domain Pointer to struct efa_domain to be initialized - * * @return 0 on success * negative libfabric error code on an unexpected error */ -int efa_domain_hmem_info_init_all(struct efa_domain *efa_domain) +int efa_hmem_info_initialize() { int ret = 0, i = 0; @@ -367,10 +349,10 @@ int efa_domain_hmem_info_init_all(struct efa_domain *efa_domain) return -FI_ENODEV; } - memset(efa_domain->hmem_info, 0, OFI_HMEM_MAX * sizeof(struct efa_hmem_info)); + memset(g_efa_hmem_info, 0, OFI_HMEM_MAX * sizeof(struct efa_hmem_info)); EFA_HMEM_IFACE_FOREACH(i) { - efa_domain_hmem_info_init_iface(efa_domain, efa_hmem_ifaces[i]); + efa_hmem_info_init_iface(efa_hmem_ifaces[i]); } return ret; diff --git a/prov/efa/src/efa_hmem.h b/prov/efa/src/efa_hmem.h index e18c0e4c534..858b7035883 100644 --- a/prov/efa/src/efa_hmem.h +++ b/prov/efa/src/efa_hmem.h @@ -23,8 +23,6 @@ static const enum fi_hmem_iface efa_hmem_ifaces[] = { struct efa_hmem_info { bool initialized; /* do we support it at all */ - bool p2p_disabled_by_user; /* Did the user disable p2p via FI_OPT_FI_HMEM_P2P? */ - bool p2p_required_by_impl; /* Is p2p required for this interface? */ bool p2p_supported_by_device; /* do we support p2p with this device */ size_t max_medium_msg_size; @@ -33,10 +31,12 @@ struct efa_hmem_info { size_t min_read_write_size; }; +extern struct efa_hmem_info g_efa_hmem_info[OFI_HMEM_MAX]; + struct efa_domain; -int efa_domain_hmem_validate_p2p_opt(struct efa_domain *efa_domain, enum fi_hmem_iface iface, int p2p_opt); -int efa_domain_hmem_info_init_all(struct efa_domain *efa_domain); +int efa_hmem_validate_p2p_opt(enum fi_hmem_iface iface, int p2p_opt, uint32_t api_version); +int efa_hmem_info_initialize(); /** * @brief Copy data from a hmem device to a system buffer diff --git a/prov/efa/src/efa_mr.c b/prov/efa/src/efa_mr.c index 0307914aff2..1e1f803b777 100644 --- a/prov/efa/src/efa_mr.c +++ b/prov/efa/src/efa_mr.c @@ -192,7 +192,7 @@ static int efa_mr_hmem_setup(struct efa_mr *efa_mr, } if (efa_mr->domain->util_domain.info_domain_caps & FI_HMEM) { - if (efa_mr->domain->hmem_info[attr->iface].initialized) { + if (g_efa_hmem_info[attr->iface].initialized) { efa_mr->peer.iface = attr->iface; } else { EFA_WARN(FI_LOG_MR, @@ -813,7 +813,7 @@ static int efa_mr_reg_impl(struct efa_mr *efa_mr, uint64_t flags, const void *at * For FI_HMEM_CUDA iface when p2p is unavailable, skip ibv_reg_mr() and * generate proprietary mr_fid key. */ - if (mr_attr.iface == FI_HMEM_CUDA && !efa_mr->domain->hmem_info[FI_HMEM_CUDA].p2p_supported_by_device) { + if (mr_attr.iface == FI_HMEM_CUDA && !g_efa_hmem_info[FI_HMEM_CUDA].p2p_supported_by_device) { efa_mr->mr_fid.key = efa_mr_cuda_non_p2p_keygen(); } else { efa_mr->ibv_mr = efa_mr_reg_ibv_mr(efa_mr, &mr_attr, fi_ibv_access, flags); diff --git a/prov/efa/src/efa_prov.c b/prov/efa/src/efa_prov.c index 85a71aa2c41..2dd5b42fecb 100644 --- a/prov/efa/src/efa_prov.c +++ b/prov/efa/src/efa_prov.c @@ -164,6 +164,10 @@ EFA_INI if (err) goto err_free; + err = efa_hmem_info_initialize(); + if (err) + goto err_free; + dlist_init(&g_efa_domain_list); return &efa_prov; diff --git a/prov/efa/src/rdm/efa_rdm_ep.h b/prov/efa/src/rdm/efa_rdm_ep.h index 316bab93d98..cebf968439c 100644 --- a/prov/efa/src/rdm/efa_rdm_ep.h +++ b/prov/efa/src/rdm/efa_rdm_ep.h @@ -307,7 +307,7 @@ int efa_rdm_ep_use_p2p(struct efa_rdm_ep *efa_rdm_ep, struct efa_mr *efa_mr) if (!efa_mr || efa_mr->peer.iface == FI_HMEM_SYSTEM) return 1; - if (efa_rdm_ep_domain(efa_rdm_ep)->hmem_info[efa_mr->peer.iface].p2p_supported_by_device) + if (g_efa_hmem_info[efa_mr->peer.iface].p2p_supported_by_device) return (efa_rdm_ep->hmem_p2p_opt != FI_HMEM_P2P_DISABLED); if (efa_rdm_ep->hmem_p2p_opt == FI_HMEM_P2P_REQUIRED) { diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index 01a4b3fd909..014ade78b46 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -434,7 +434,6 @@ static inline void efa_rdm_ep_set_use_zcpy_rx(struct efa_rdm_ep *ep) { enum fi_hmem_iface iface; - struct efa_hmem_info *hmem_info; uint64_t unsupported_caps = FI_DIRECTED_RECV | FI_TAGGED | FI_ATOMIC; ep->use_zcpy_rx = true; @@ -482,11 +481,11 @@ void efa_rdm_ep_set_use_zcpy_rx(struct efa_rdm_ep *ep) } /* Zero-copy receive requires P2P support. Disable it if any initialized HMEM iface does not support P2P. */ - for (iface = FI_HMEM_SYSTEM; iface < OFI_HMEM_MAX; ++iface) { - hmem_info = &ep->base_ep.domain->hmem_info[iface]; - if (hmem_info->initialized && - !hmem_info->p2p_disabled_by_user && - !hmem_info->p2p_supported_by_device) { + EFA_HMEM_IFACE_FOREACH(iface) { + if (g_efa_hmem_info[iface].initialized && + !ofi_hmem_p2p_disabled() && + ep->hmem_p2p_opt != FI_HMEM_P2P_DISABLED && + !g_efa_hmem_info[iface].p2p_supported_by_device) { EFA_INFO(FI_LOG_EP_CTRL, "%s does not support P2P, zero-copy receive " "protocol will be disabled\n", @@ -530,6 +529,7 @@ int efa_rdm_ep_open(struct fid_domain *domain, struct fi_info *info, struct efa_domain *efa_domain = NULL; struct efa_rdm_ep *efa_rdm_ep = NULL; int ret, retv, i; + enum fi_hmem_iface iface; efa_rdm_ep = calloc(1, sizeof(*efa_rdm_ep)); if (!efa_rdm_ep) @@ -606,6 +606,7 @@ int efa_rdm_ep_open(struct fid_domain *domain, struct fi_info *info, efa_rdm_ep_init_linked_lists(efa_rdm_ep); + efa_rdm_ep->cuda_api_permitted = (FI_VERSION_GE(info->fabric_attr->api_version, FI_VERSION(1, 18))); /* Set hmem_p2p_opt */ efa_rdm_ep->hmem_p2p_opt = FI_HMEM_P2P_DISABLED; @@ -615,16 +616,21 @@ int efa_rdm_ep_open(struct fid_domain *domain, struct fi_info *info, * tighter requirements for the default p2p opt */ EFA_HMEM_IFACE_FOREACH_NON_SYSTEM(i) { - if (efa_rdm_ep->base_ep.domain->hmem_info[efa_hmem_ifaces[i]].initialized && - efa_rdm_ep->base_ep.domain->hmem_info[efa_hmem_ifaces[i]].p2p_supported_by_device) { - efa_rdm_ep->hmem_p2p_opt = efa_rdm_ep->base_ep.domain->hmem_info[efa_hmem_ifaces[i]].p2p_required_by_impl - ? FI_HMEM_P2P_REQUIRED - : FI_HMEM_P2P_PREFERRED; + iface = efa_hmem_ifaces[i]; + if (g_efa_hmem_info[iface].initialized && + g_efa_hmem_info[iface].p2p_supported_by_device) { + /* If user is using libfabric API 1.18 or later, by default EFA + * provider is permitted to use CUDA library to support CUDA + * memory, therefore p2p is not required. + */ + efa_rdm_ep->hmem_p2p_opt = + (iface == FI_HMEM_CUDA && efa_rdm_ep->cuda_api_permitted) ? + FI_HMEM_P2P_PREFERRED : + FI_HMEM_P2P_REQUIRED; break; } } - efa_rdm_ep->cuda_api_permitted = (FI_VERSION_GE(info->fabric_attr->api_version, FI_VERSION(1, 18))); efa_rdm_ep->sendrecv_in_order_aligned_128_bytes = false; efa_rdm_ep->write_in_order_aligned_128_bytes = false; @@ -1413,7 +1419,9 @@ static int efa_rdm_ep_set_fi_hmem_p2p_opt(struct efa_rdm_ep *efa_rdm_ep, int opt * tighter restrictions on valid p2p options. */ EFA_HMEM_IFACE_FOREACH_NON_SYSTEM(i) { - err = efa_domain_hmem_validate_p2p_opt(efa_rdm_ep_domain(efa_rdm_ep), efa_hmem_ifaces[i], opt); + err = efa_hmem_validate_p2p_opt( + efa_hmem_ifaces[i], opt, + efa_rdm_ep->base_ep.info->fabric_attr->api_version); if (err == -FI_ENODATA) continue; @@ -1449,7 +1457,7 @@ static int efa_rdm_ep_set_cuda_api_permitted(struct efa_rdm_ep *ep, bool cuda_ap /* CUDA memory can be supported by using either peer to peer or CUDA API. If neither is * available, we cannot support CUDA memory */ - if (!efa_rdm_ep_domain(ep)->hmem_info[FI_HMEM_CUDA].p2p_supported_by_device) + if (!g_efa_hmem_info[FI_HMEM_CUDA].p2p_supported_by_device) return -FI_EOPNOTSUPP; ep->cuda_api_permitted = false; diff --git a/prov/efa/src/rdm/efa_rdm_msg.c b/prov/efa/src/rdm/efa_rdm_msg.c index 839cde917f0..cdbabe128c1 100644 --- a/prov/efa/src/rdm/efa_rdm_msg.c +++ b/prov/efa/src/rdm/efa_rdm_msg.c @@ -60,7 +60,6 @@ int efa_rdm_msg_select_rtm(struct efa_rdm_ep *efa_rdm_ep, struct efa_rdm_ope *tx int tagged; int eager_rtm, medium_rtm, longcts_rtm, readbase_rtm, iface; size_t eager_rtm_max_data_size; - struct efa_hmem_info *hmem_info; bool delivery_complete_requested; assert(txe->op == ofi_op_msg || txe->op == ofi_op_tagged); @@ -68,7 +67,6 @@ int efa_rdm_msg_select_rtm(struct efa_rdm_ep *efa_rdm_ep, struct efa_rdm_ope *tx assert(tagged == 0 || tagged == 1); iface = txe->desc[0] ? ((struct efa_mr*) txe->desc[0])->peer.iface : FI_HMEM_SYSTEM; - hmem_info = efa_rdm_ep_domain(efa_rdm_ep)->hmem_info; if (txe->fi_flags & FI_INJECT || efa_both_support_zero_hdr_data_transfer(efa_rdm_ep, txe->peer)) delivery_complete_requested = false; @@ -88,16 +86,16 @@ int efa_rdm_msg_select_rtm(struct efa_rdm_ep *efa_rdm_ep, struct efa_rdm_ope *tx readbase_rtm = efa_rdm_peer_select_readbase_rtm(txe->peer, efa_rdm_ep, txe); - if (use_p2p && - txe->total_len >= hmem_info[iface].min_read_msg_size && - efa_rdm_interop_rdma_read(efa_rdm_ep, txe->peer) && - (txe->desc[0] || efa_is_cache_available(efa_rdm_ep_domain(efa_rdm_ep)))) + if (use_p2p && + txe->total_len >= g_efa_hmem_info[iface].min_read_msg_size && + efa_rdm_interop_rdma_read(efa_rdm_ep, txe->peer) && + (txe->desc[0] || efa_is_cache_available(efa_rdm_ep_domain(efa_rdm_ep)))) return readbase_rtm; if (txe->total_len <= eager_rtm_max_data_size) return eager_rtm; - if (txe->total_len <= hmem_info[iface].max_medium_msg_size) + if (txe->total_len <= g_efa_hmem_info[iface].max_medium_msg_size) return medium_rtm; return longcts_rtm; diff --git a/prov/efa/src/rdm/efa_rdm_peer.c b/prov/efa/src/rdm/efa_rdm_peer.c index 9674a642be6..3e8e3dff774 100644 --- a/prov/efa/src/rdm/efa_rdm_peer.c +++ b/prov/efa/src/rdm/efa_rdm_peer.c @@ -330,18 +330,16 @@ void efa_rdm_peer_proc_pending_items_in_robuf(struct efa_rdm_peer *peer, struct size_t efa_rdm_peer_get_runt_size(struct efa_rdm_peer *peer, struct efa_rdm_ep *ep, struct efa_rdm_ope *ope) { - struct efa_hmem_info *hmem_info; size_t runt_size; size_t memory_alignment; int iface; - hmem_info = efa_rdm_ep_domain(ep)->hmem_info; iface = ope->desc[0] ? ((struct efa_mr*) ope->desc[0])->peer.iface : FI_HMEM_SYSTEM; - if (hmem_info[iface].runt_size < peer->num_runt_bytes_in_flight) + if (g_efa_hmem_info[iface].runt_size < peer->num_runt_bytes_in_flight) return 0; - runt_size = MIN(hmem_info[iface].runt_size - peer->num_runt_bytes_in_flight, ope->total_len); + runt_size = MIN(g_efa_hmem_info[iface].runt_size - peer->num_runt_bytes_in_flight, ope->total_len); memory_alignment = efa_rdm_ep_get_memory_alignment(ep, iface); /* * runt size must be aligned because: diff --git a/prov/efa/src/rdm/efa_rdm_rma.c b/prov/efa/src/rdm/efa_rdm_rma.c index ae04af66e1e..720788c8757 100644 --- a/prov/efa/src/rdm/efa_rdm_rma.c +++ b/prov/efa/src/rdm/efa_rdm_rma.c @@ -399,10 +399,10 @@ ssize_t efa_rdm_rma_post_write(struct efa_rdm_ep *ep, struct efa_rdm_ope *txe) iface = txe->desc[0] ? ((struct efa_mr*) txe->desc[0])->peer.iface : FI_HMEM_SYSTEM; - if (use_p2p && - txe->total_len >= efa_rdm_ep_domain(ep)->hmem_info[iface].min_read_write_size && - efa_rdm_interop_rdma_read(ep, txe->peer) && - (txe->desc[0] || efa_is_cache_available(efa_rdm_ep_domain(ep)))) { + if (use_p2p && + txe->total_len >= g_efa_hmem_info[iface].min_read_write_size && + efa_rdm_interop_rdma_read(ep, txe->peer) && + (txe->desc[0] || efa_is_cache_available(efa_rdm_ep_domain(ep)))) { err = efa_rdm_ope_post_send(txe, EFA_RDM_LONGREAD_RTW_PKT); if (err != -FI_ENOMEM) return err; diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index f01efc72560..adc1ba64255 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -363,7 +363,7 @@ void test_efa_rdm_pke_get_available_copy_methods_align128(struct efa_resource ** efa_rdm_ep->sendrecv_in_order_aligned_128_bytes = 1; /* p2p is available */ - efa_rdm_ep_domain(efa_rdm_ep)->hmem_info[FI_HMEM_CUDA].p2p_supported_by_device = true; + g_efa_hmem_info[FI_HMEM_CUDA].p2p_supported_by_device = true; efa_rdm_ep->hmem_p2p_opt = FI_HMEM_P2P_ENABLED; /* RDMA read is supported */ @@ -921,35 +921,30 @@ static void test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, bool cuda_p2p_supported, bool expected_use_zcpy_rx) { - struct efa_domain *efa_domain; struct efa_rdm_ep *ep; size_t max_msg_size = 1000; size_t inject_msg_size = 0; size_t inject_rma_size = 0; bool shm_permitted = false; + ofi_hmem_disable_p2p = cuda_p2p_disabled; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, FI_VERSION(1, 14), resource->hints, false, true); - efa_domain = container_of(resource->domain, struct efa_domain, - util_domain.domain_fid.fid); - /* System memory P2P should always be enabled */ - assert_true(efa_domain->hmem_info[FI_HMEM_SYSTEM].initialized); - assert_false(efa_domain->hmem_info[FI_HMEM_SYSTEM].p2p_disabled_by_user); - assert_true(efa_domain->hmem_info[FI_HMEM_SYSTEM].p2p_supported_by_device); + assert_true(g_efa_hmem_info[FI_HMEM_SYSTEM].initialized); + assert_true(g_efa_hmem_info[FI_HMEM_SYSTEM].p2p_supported_by_device); /** * We want to be able to run this test on any platform: * 1. Fake CUDA support. * 2. Disable all other hmem ifaces. */ - efa_domain->hmem_info[FI_HMEM_CUDA].initialized = true; - efa_domain->hmem_info[FI_HMEM_CUDA].p2p_disabled_by_user = cuda_p2p_disabled; - efa_domain->hmem_info[FI_HMEM_CUDA].p2p_supported_by_device = cuda_p2p_supported; + g_efa_hmem_info[FI_HMEM_CUDA].initialized = true; + g_efa_hmem_info[FI_HMEM_CUDA].p2p_supported_by_device = cuda_p2p_supported; - efa_domain->hmem_info[FI_HMEM_NEURON].initialized = false; - efa_domain->hmem_info[FI_HMEM_SYNAPSEAI].initialized = false; + g_efa_hmem_info[FI_HMEM_NEURON].initialized = false; + g_efa_hmem_info[FI_HMEM_SYNAPSEAI].initialized = false; ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); @@ -983,6 +978,8 @@ static void test_efa_rdm_ep_use_zcpy_rx_impl(struct efa_resource *resource, assert_int_equal(inject_msg_size, resource->info->tx_attr->inject_size); assert_int_equal(inject_rma_size, resource->info->tx_attr->inject_size); } + /* restore global variable */ + ofi_hmem_disable_p2p = 0; } /** diff --git a/prov/efa/test/efa_unit_test_hmem.c b/prov/efa/test/efa_unit_test_hmem.c index 55734af286a..90a366f7064 100644 --- a/prov/efa/test/efa_unit_test_hmem.c +++ b/prov/efa/test/efa_unit_test_hmem.c @@ -7,8 +7,7 @@ #if HAVE_NEURON /** * @brief Verify when neuron_alloc failed (return null), - * efa_domain_open, which call efa_hmem_info_update_neuron - * when HAVE_NEURON=1, will still return 0 but leave + * efa_hmem_info_initialize will still return 0 but leave * efa_hmem_info[FI_HMEM_NEURON].initialized and * efa_hmem_info[FI_HMEM_NEURON].p2p_supported_by_device as false. * @@ -18,7 +17,6 @@ void test_efa_hmem_info_update_neuron(struct efa_resource **state) { int ret; struct efa_resource *resource = *state; - struct efa_domain *efa_domain; uint32_t efa_device_caps_orig; bool neuron_initialized_orig; @@ -28,26 +26,21 @@ void test_efa_hmem_info_update_neuron(struct efa_resource **state) ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, resource->hints, &resource->info); assert_int_equal(ret, 0); - ret = fi_fabric(resource->info->fabric_attr, &resource->fabric, NULL); - assert_int_equal(ret, 0); - neuron_initialized_orig = hmem_ops[FI_HMEM_NEURON].initialized; hmem_ops[FI_HMEM_NEURON].initialized = true; efa_device_caps_orig = g_device_list[0].device_caps; g_device_list[0].device_caps |= EFADV_DEVICE_ATTR_CAPS_RDMA_READ; g_efa_unit_test_mocks.neuron_alloc = &efa_mock_neuron_alloc_return_null; - ret = fi_domain(resource->fabric, resource->info, &resource->domain, NULL); + ret = efa_hmem_info_initialize(); /* recover the modified global variables before doing check */ hmem_ops[FI_HMEM_NEURON].initialized = neuron_initialized_orig; g_device_list[0].device_caps = efa_device_caps_orig; assert_int_equal(ret, 0); - efa_domain = container_of(resource->domain, struct efa_domain, - util_domain.domain_fid.fid); - assert_false(efa_domain->hmem_info[FI_HMEM_NEURON].initialized); - assert_false(efa_domain->hmem_info[FI_HMEM_NEURON].p2p_supported_by_device); + assert_false(g_efa_hmem_info[FI_HMEM_NEURON].initialized); + assert_false(g_efa_hmem_info[FI_HMEM_NEURON].p2p_supported_by_device); } /** @@ -60,19 +53,17 @@ void test_efa_hmem_info_disable_p2p_neuron(struct efa_resource **state) { int ret; struct efa_resource *resource = *state; - struct efa_domain *efa_domain; uint32_t efa_device_caps_orig; bool neuron_initialized_orig; + ofi_hmem_disable_p2p = 1; + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); assert_non_null(resource->hints); ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, resource->hints, &resource->info); assert_int_equal(ret, 0); - ret = fi_fabric(resource->info->fabric_attr, &resource->fabric, NULL); - assert_int_equal(ret, 0); - neuron_initialized_orig = hmem_ops[FI_HMEM_NEURON].initialized; hmem_ops[FI_HMEM_NEURON].initialized = true; efa_device_caps_orig = g_device_list[0].device_caps; @@ -80,8 +71,7 @@ void test_efa_hmem_info_disable_p2p_neuron(struct efa_resource **state) /* neuron_alloc should not be called when p2p is disabled. efa_mock_neuron_alloc_return_mock will fail the test when it is called. */ g_efa_unit_test_mocks.neuron_alloc = efa_mock_neuron_alloc_return_mock; - ofi_hmem_disable_p2p = 1; - ret = fi_domain(resource->fabric, resource->info, &resource->domain, NULL); + ret = efa_hmem_info_initialize(); /* recover the modified global variables before doing check */ ofi_hmem_disable_p2p = 0; @@ -89,11 +79,8 @@ void test_efa_hmem_info_disable_p2p_neuron(struct efa_resource **state) hmem_ops[FI_HMEM_NEURON].initialized = neuron_initialized_orig; assert_int_equal(ret, 0); - efa_domain = container_of(resource->domain, struct efa_domain, - util_domain.domain_fid.fid); - assert_true(efa_domain->hmem_info[FI_HMEM_NEURON].p2p_disabled_by_user); - assert_true(efa_domain->hmem_info[FI_HMEM_NEURON].initialized); - assert_false(efa_domain->hmem_info[FI_HMEM_NEURON].p2p_supported_by_device); + assert_true(g_efa_hmem_info[FI_HMEM_NEURON].initialized); + assert_false(g_efa_hmem_info[FI_HMEM_NEURON].p2p_supported_by_device); } #else void test_efa_hmem_info_update_neuron() @@ -118,36 +105,30 @@ void test_efa_hmem_info_disable_p2p_cuda(struct efa_resource **state) { int ret; struct efa_resource *resource = *state; - struct efa_domain *efa_domain; bool cuda_initialized_orig; + ofi_hmem_disable_p2p = 1; + resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); assert_non_null(resource->hints); ret = fi_getinfo(FI_VERSION(1, 14), NULL, NULL, 0ULL, resource->hints, &resource->info); assert_int_equal(ret, 0); - ret = fi_fabric(resource->info->fabric_attr, &resource->fabric, NULL); - assert_int_equal(ret, 0); - cuda_initialized_orig = hmem_ops[FI_HMEM_CUDA].initialized; hmem_ops[FI_HMEM_CUDA].initialized = true; /* ofi_cudaMalloc should not be called when p2p is disabled. efa_mock_ofi_cudaMalloc_return_mock will fail the test when it is called. */ g_efa_unit_test_mocks.ofi_cudaMalloc = efa_mock_ofi_cudaMalloc_return_mock; - ofi_hmem_disable_p2p = 1; - ret = fi_domain(resource->fabric, resource->info, &resource->domain, NULL); + ret = efa_hmem_info_initialize(); /* recover the modified global variables before doing check */ ofi_hmem_disable_p2p = 0; hmem_ops[FI_HMEM_CUDA].initialized = cuda_initialized_orig; assert_int_equal(ret, 0); - efa_domain = container_of(resource->domain, struct efa_domain, - util_domain.domain_fid.fid); - assert_true(efa_domain->hmem_info[FI_HMEM_CUDA].p2p_disabled_by_user); - assert_true(efa_domain->hmem_info[FI_HMEM_CUDA].initialized); - assert_false(efa_domain->hmem_info[FI_HMEM_CUDA].p2p_supported_by_device); + assert_true(g_efa_hmem_info[FI_HMEM_CUDA].initialized); + assert_false(g_efa_hmem_info[FI_HMEM_CUDA].p2p_supported_by_device); } #else void test_efa_hmem_info_disable_p2p_cuda() diff --git a/prov/efa/test/efa_unit_test_runt.c b/prov/efa/test/efa_unit_test_runt.c index ab7537061c0..ae09f0a1c0e 100644 --- a/prov/efa/test/efa_unit_test_runt.c +++ b/prov/efa/test/efa_unit_test_runt.c @@ -27,12 +27,10 @@ void test_efa_rdm_peer_get_runt_size_impl( struct efa_mr mock_mr; struct efa_rdm_ope mock_txe; size_t runt_size; - struct efa_domain *efa_domain; int ret; efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); - efa_domain = efa_rdm_ep_domain(efa_rdm_ep); - efa_domain->hmem_info[iface].runt_size = total_runt_size; + g_efa_hmem_info[iface].runt_size = total_runt_size; /* insert a fake peer */ ret = fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len); @@ -296,13 +294,11 @@ void test_efa_rdm_peer_select_readbase_rtm_impl( fi_addr_t addr; struct efa_mr mock_mr; struct efa_rdm_ope mock_txe; - struct efa_domain *efa_domain; int readbase_rtm; int ret; efa_rdm_ep = container_of(resource->ep, struct efa_rdm_ep, base_ep.util_ep.ep_fid); - efa_domain = efa_rdm_ep_domain(efa_rdm_ep); - efa_domain->hmem_info[iface].runt_size = total_runt_size; + g_efa_hmem_info[iface].runt_size = total_runt_size; /* insert a fake peer */ ret = fi_getname(&resource->ep->fid, &raw_addr, &raw_addr_len);