Skip to content

Commit

Permalink
prov/verbs: Add support for IBV_ACCESS_RELAXED_ORDERING
Browse files Browse the repository at this point in the history
IBV_ACCESS_RELAXED_ORDERING allows the system to reorder
Send/Write/Atomic operations to improve performance.

The patch enables IBV_ACCESS_RELAXED_ORDERING if the application
has requested no ordering in TX/RX attributes.

Signed-off-by: Sylvain Didelot <[email protected]>
  • Loading branch information
sydidelot committed Oct 10, 2023
1 parent 6b62f34 commit 0609cd6
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 8 deletions.
10 changes: 10 additions & 0 deletions prov/verbs/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,16 @@ AC_DEFUN([FI_VERBS_CONFIGURE],[
AC_DEFINE_UNQUOTED([VERBS_HAVE_DMABUF_MR],[$VERBS_HAVE_DMABUF_MR],
[Whether infiniband/verbs.h has ibv_reg_dmabuf_mr() support or not])
#See if we have rdma-core IBV_ACCESS_RELAXED_ORDERING mr support
VERBS_HAVE_RELAXED_ORDERING_MR=0
AS_IF([test $verbs_ibverbs_happy -eq 1],[
AC_CHECK_DECL([IBV_ACCESS_RELAXED_ORDERING],
[VERBS_HAVE_RELAXED_ORDERING_MR=1],[],
[#include <infiniband/verbs.h>])
])
AC_DEFINE_UNQUOTED([VERBS_HAVE_RELAXED_ORDERING_MR],[$VERBS_HAVE_RELAXED_ORDERING_MR],
[Whether infiniband/verbs.h has IBV_ACCESS_RELAXED_ORDERING support or not])
CPPFLAGS=$fi_verbs_configure_save_CPPFLAGS
# Technically, verbs_ibverbs_CPPFLAGS and
Expand Down
12 changes: 9 additions & 3 deletions prov/verbs/src/verbs_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,12 @@ static int vrb_open_device_by_name(struct vrb_domain *domain, const char *name)
const char *rdma_name = ibv_get_device_name(dev_list[i]->device);
switch (domain->ep_type) {
case FI_EP_MSG:
ret = domain->ext_flags & VRB_USE_XRC ?
vrb_cmp_xrc_domain_name(name, rdma_name) :
strcmp(name, rdma_name);
if (domain->ext_flags & VRB_USE_XRC)
ret = vrb_cmp_xrc_domain_name(name, rdma_name);
else if (domain->ext_flags & VRB_USE_RO)
ret = vrb_cmp_ro_domain_name(name, rdma_name);
else
ret = strcmp(name, rdma_name);
break;
case FI_EP_DGRAM:
ret = strncmp(name, rdma_name,
Expand Down Expand Up @@ -345,6 +348,9 @@ vrb_domain(struct fid_fabric *fabric, struct fi_info *info,
if (!_domain->info)
goto err2;

if (VRB_RO_ENABLED(info))
_domain->ext_flags |= VRB_USE_RO;

_domain->ep_type = VRB_EP_TYPE(info);
_domain->ext_flags |= vrb_is_xrc_info(info) ? VRB_USE_XRC : 0;

Expand Down
3 changes: 3 additions & 0 deletions prov/verbs/src/verbs_eq.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,9 @@ vrb_pep_dev_domain_match(struct fi_info *hints, const char *devname)
if ((VRB_EP_PROTO(hints)) == FI_PROTO_RDMA_CM_IB_XRC)
ret = vrb_cmp_xrc_domain_name(hints->domain_attr->name,
devname);
else if (VRB_RO_ENABLED(hints))
ret = vrb_cmp_ro_domain_name(hints->domain_attr->name,
devname);
else
ret = strcmp(hints->domain_attr->name, devname);

Expand Down
50 changes: 45 additions & 5 deletions prov/verbs/src/verbs_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,15 @@ const struct fi_rx_attr verbs_rx_attr = {
.total_buffered_recv = 0,
};

const struct fi_rx_attr verbs_ro_rx_attr = {
.caps = VERBS_MSG_RX_CAPS,
.mode = VERBS_RX_MODE,
.op_flags = FI_COMPLETION,
.msg_order = 0,
.comp_order = FI_ORDER_STRICT,
.total_buffered_recv = 0,
};

const struct fi_rx_attr verbs_dgram_rx_attr = {
.caps = VERBS_DGRAM_RX_CAPS,
.mode = VERBS_DGRAM_RX_MODE | VERBS_RX_MODE,
Expand All @@ -132,6 +141,16 @@ const struct fi_tx_attr verbs_tx_attr = {
.rma_iov_limit = 1,
};

const struct fi_tx_attr verbs_ro_tx_attr = {
.caps = VERBS_MSG_TX_CAPS,
.mode = 0,
.op_flags = VERBS_TX_OP_FLAGS,
.msg_order = 0,
.comp_order = FI_ORDER_STRICT,
.inject_size = 0,
.rma_iov_limit = 1,
};

const struct fi_tx_attr verbs_dgram_tx_attr = {
.caps = VERBS_DGRAM_TX_CAPS,
.mode = 0,
Expand All @@ -146,18 +165,28 @@ const struct verbs_ep_domain verbs_msg_domain = {
.suffix = "",
.type = FI_EP_MSG,
.protocol = FI_PROTO_UNSPEC,
.relaxed_ordering = false,
};

const struct verbs_ep_domain verbs_msg_ro_domain = {
.suffix = "-ro",
.type = FI_EP_MSG,
.protocol = FI_PROTO_UNSPEC,
.relaxed_ordering = true,
};

const struct verbs_ep_domain verbs_msg_xrc_domain = {
.suffix = "-xrc",
.type = FI_EP_MSG,
.protocol = FI_PROTO_RDMA_CM_IB_XRC,
.relaxed_ordering = false,
};

const struct verbs_ep_domain verbs_dgram_domain = {
.suffix = "-dgram",
.type = FI_EP_DGRAM,
.protocol = FI_PROTO_UNSPEC,
.relaxed_ordering = false,
};

/* The list (not thread safe) is populated once when the provider is initialized */
Expand Down Expand Up @@ -770,8 +799,13 @@ static int vrb_alloc_info(struct ibv_context *ctx, struct fi_info **info,
switch (ep_dom->type) {
case FI_EP_MSG:
fi->caps = VERBS_MSG_CAPS;
*(fi->tx_attr) = verbs_tx_attr;
*(fi->rx_attr) = verbs_rx_attr;
if (ep_dom->relaxed_ordering) {
*(fi->tx_attr) = verbs_ro_tx_attr;
*(fi->rx_attr) = verbs_ro_rx_attr;
} else {
*(fi->tx_attr) = verbs_tx_attr;
*(fi->rx_attr) = verbs_rx_attr;
}
fi->addr_format = FI_SOCKADDR_IB;
break;
case FI_EP_DGRAM:
Expand Down Expand Up @@ -1332,7 +1366,7 @@ static int vrb_device_has_ipoib_addr(const char *dev_name)
return 0;
}

#define VERBS_NUM_DOMAIN_TYPES 3
#define VERBS_NUM_DOMAIN_TYPES 4

static int vrb_init_info(const struct fi_info **all_infos)
{
Expand Down Expand Up @@ -1379,12 +1413,14 @@ static int vrb_init_info(const struct fi_info **all_infos)
if (!vrb_gl_data.iface)
vrb_get_sib(&verbs_devs);

if (dlist_empty(&verbs_devs))
if (dlist_empty(&verbs_devs)) {
FI_WARN(&vrb_prov, FI_LOG_FABRIC,
"no valid IPoIB interfaces found, FI_EP_MSG endpoint "
"type would not be available\n");
else
} else {
ep_type[dom_count++] = &verbs_msg_domain;
ep_type[dom_count++] = &verbs_msg_ro_domain;
}

if (!vrb_gl_data.msg.prefer_xrc && VERBS_HAVE_XRC)
ep_type[dom_count++] = &verbs_msg_xrc_domain;
Expand Down Expand Up @@ -1562,6 +1598,10 @@ int vrb_get_matching_info(uint32_t version, const struct fi_info *hints,
"XRC FI_EP_MSG endpoints\n");
continue;
}

if (VRB_RO_ENABLED(hints) && (check_info->tx_attr->msg_order ||
check_info->rx_attr->msg_order))
continue;
}

if ((check_info->ep_attr->type == FI_EP_MSG) && passive) {
Expand Down
3 changes: 3 additions & 0 deletions prov/verbs/src/verbs_mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ vrb_mr_ofi2ibv_access(uint64_t ofi_access, struct vrb_domain *domain)
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_ATOMIC;

if (domain->ext_flags & VRB_USE_RO)
ibv_access |= VRB_ACCESS_RELAXED_ORDERING;

return ibv_access;
}

Expand Down
22 changes: 22 additions & 0 deletions prov/verbs/src/verbs_ofi.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@
#define VRB_EP_PROTO(info) \
(((info) && (info)->ep_attr) ? (info)->ep_attr->protocol : \
FI_PROTO_UNSPEC)
#define VRB_RO_ENABLED(info) \
((info)->tx_attr && !(info)->tx_attr->msg_order && \
(info)->rx_attr && !(info)->rx_attr->msg_order)

#define VRB_MEM_ALIGNMENT (64)
#define VRB_BUF_ALIGNMENT (4096) /* TODO: Page or MTU size */
Expand Down Expand Up @@ -366,6 +369,7 @@ struct fi_ops_cm *vrb_pep_ops_cm(struct vrb_pep *pep);
enum {
VRB_USE_XRC = BIT(0),
VRB_USE_ODP = BIT(1),
VRB_USE_RO = BIT(2),
};

struct vrb_domain {
Expand Down Expand Up @@ -437,6 +441,12 @@ int vrb_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq, void *context);
int vrb_cq_trywait(struct vrb_cq *cq);

#if VERBS_HAVE_RELAXED_ORDERING_MR
#define VRB_ACCESS_RELAXED_ORDERING IBV_ACCESS_RELAXED_ORDERING
#else
#define VRB_ACCESS_RELAXED_ORDERING 0
#endif

struct vrb_mem_desc {
struct fid_mr mr_fid;
struct ibv_mr *mr;
Expand Down Expand Up @@ -839,10 +849,12 @@ struct verbs_ep_domain {
char *suffix;
enum fi_ep_type type;
uint32_t protocol;
bool relaxed_ordering;
};

extern const struct verbs_ep_domain verbs_dgram_domain;
extern const struct verbs_ep_domain verbs_msg_xrc_domain;
extern const struct verbs_ep_domain verbs_msg_ro_domain;

int vrb_check_ep_attr(const struct fi_info *hints,
const struct fi_info *info);
Expand All @@ -860,6 +872,16 @@ static inline int vrb_cmp_xrc_domain_name(const char *domain_name,
domain_len - suffix_len) : -1;
}

static inline int vrb_cmp_ro_domain_name(const char *domain_name,
const char *rdma_name)
{
size_t domain_len = strlen(domain_name);
size_t suffix_len = strlen(verbs_msg_ro_domain.suffix);

return domain_len > suffix_len ? strncmp(domain_name, rdma_name,
domain_len - suffix_len) : -1;
}

int vrb_cq_signal(struct fid_cq *cq);

struct vrb_eq_entry *vrb_eq_alloc_entry(uint32_t event,
Expand Down

0 comments on commit 0609cd6

Please sign in to comment.