Skip to content

Commit

Permalink
UCP/UCT/TEST: Remove dummy remote key from cuda, tcp, cma
Browse files Browse the repository at this point in the history
- Update lane selection to not require REG flag when memh/rkey are not
  needed
- Remove memory registration support from cma, tcp
- Update mock test - TCP rndv is now selected earlier
  • Loading branch information
yosefe committed Feb 3, 2025
1 parent dda920b commit 9338d5f
Show file tree
Hide file tree
Showing 17 changed files with 218 additions and 209 deletions.
25 changes: 25 additions & 0 deletions src/ucp/core/ucp_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -2563,6 +2563,31 @@ void ucp_memory_detect_slowpath(ucp_context_h context, const void *address,
ucs_memory_info_set_host(mem_info);
}

void ucp_context_memaccess_tl_bitmap(ucp_context_t *context,
ucs_memory_type_t mem_type,
uint64_t md_reg_flags,
ucp_tl_bitmap_t *tl_bitmap)
{
const uct_md_attr_v2_t *md_attr;
ucp_rsc_index_t rsc_index;
ucp_md_index_t md_index;
uint64_t mem_types;

UCS_STATIC_BITMAP_RESET_ALL(tl_bitmap);
UCS_STATIC_BITMAP_FOR_EACH_BIT(rsc_index, &context->tl_bitmap) {
md_index = context->tl_rscs[rsc_index].md_index;
md_attr = &context->tl_mds[md_index].attr;
if (md_attr->flags & md_reg_flags) {
mem_types = md_attr->reg_mem_types;
} else {
mem_types = md_attr->access_mem_types;
}
if (mem_types & UCS_BIT(mem_type)) {
UCS_STATIC_BITMAP_SET(tl_bitmap, rsc_index);
}
}
}

void
ucp_context_dev_tl_bitmap(ucp_context_h context, const char *dev_name,
ucp_tl_bitmap_t *tl_bitmap)
Expand Down
28 changes: 8 additions & 20 deletions src/ucp/core/ucp_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -563,23 +563,6 @@ typedef struct ucp_tl_iface_atomic_flags {
ucs_assert(ucp_memory_type_detect(_context, _buffer, _length) == (_mem_type))


#define UCP_CONTEXT_MEM_CAP_TLS(_context, _mem_type, _cap_field, _tl_bitmap) \
{ \
const uct_md_attr_v2_t *md_attr; \
ucp_md_index_t md_index; \
ucp_rsc_index_t tl_id; \
\
UCS_STATIC_BITMAP_RESET_ALL(&(_tl_bitmap)); \
UCS_STATIC_BITMAP_FOR_EACH_BIT(tl_id, &(_context)->tl_bitmap) { \
md_index = (_context)->tl_rscs[tl_id].md_index; \
md_attr = &(_context)->tl_mds[md_index].attr; \
if (md_attr->_cap_field & UCS_BIT(_mem_type)) { \
UCS_STATIC_BITMAP_SET(&(_tl_bitmap), tl_id); \
} \
} \
}


extern ucp_am_handler_t *ucp_am_handlers[];
extern const char *ucp_feature_str[];

Expand Down Expand Up @@ -731,9 +714,14 @@ ucp_context_usage_tracker_enabled(ucp_context_h context)
return context->config.ext.dynamic_tl_switch_interval != UCS_TIME_INFINITY;
}

void
ucp_context_dev_tl_bitmap(ucp_context_h context, const char *dev_name,
ucp_tl_bitmap_t *tl_bitmap);
void ucp_context_memaccess_tl_bitmap(ucp_context_t *context,
ucs_memory_type_t mem_type,
uint64_t md_reg_flags,
ucp_tl_bitmap_t *tl_bitmap);


void ucp_context_dev_tl_bitmap(ucp_context_h context, const char *dev_name,
ucp_tl_bitmap_t *tl_bitmap);


void
Expand Down
7 changes: 3 additions & 4 deletions src/ucp/core/ucp_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -685,8 +685,7 @@ ucs_status_t ucp_worker_mem_type_eps_create(ucp_worker_h worker)
unsigned addr_indices[UCP_MAX_LANES];

ucs_memory_type_for_each(mem_type) {
UCP_CONTEXT_MEM_CAP_TLS(context, mem_type, access_mem_types,
mem_access_tls);
ucp_context_memaccess_tl_bitmap(context, mem_type, 0, &mem_access_tls);
if (UCP_MEM_IS_HOST(mem_type) ||
UCS_STATIC_BITMAP_IS_ZERO(mem_access_tls)) {
continue;
Expand Down Expand Up @@ -3713,8 +3712,8 @@ static ucs_status_t ucp_ep_query_transport(ucp_ep_h ep, ucp_ep_attr_t *attr)
lane_index * attr->transports.entry_size);

/* Each field updated in the following block must have its ending offset
* compared to attr->transports.entry_size before the field is
* updated. If the field's ending offset is greater than the
* compared to attr->transports.entry_size before the field is
* updated. If the field's ending offset is greater than the
* attr->transports.entry_size value, the field cannot be updated because
* that will cause a storage overlay.
*/
Expand Down
2 changes: 1 addition & 1 deletion src/ucp/proto/proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@


/* Maximal length of protocol description string */
#define UCP_PROTO_DESC_STR_MAX 64
#define UCP_PROTO_DESC_STR_MAX 128


/* Maximal length of protocol configuration string */
Expand Down
5 changes: 4 additions & 1 deletion src/ucp/proto/proto_debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
#include <ctype.h>


/* Maximal length of perf node name */
#define UCP_PROTO_PERF_NAME_MAX 32

/* Performance node data entry */
typedef struct {
const char *name;
Expand All @@ -33,7 +36,7 @@ struct ucp_proto_perf_node {
ucp_proto_perf_node_type_t type;

/* Name of the range */
char name[UCP_PROTO_DESC_STR_MAX];
char name[UCP_PROTO_PERF_NAME_MAX];

/* Description of the range */
char desc[UCP_PROTO_DESC_STR_MAX];
Expand Down
78 changes: 34 additions & 44 deletions src/ucp/wireup/select.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ typedef struct {
ucp_wireup_criteria_t criteria;
uint64_t local_dev_bitmap;
uint64_t remote_dev_bitmap;
ucp_md_map_t md_map;
unsigned max_lanes;
} ucp_wireup_select_bw_info_t;

Expand Down Expand Up @@ -521,9 +520,6 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_select_transport(
!ucp_wireup_check_flags(resource, md_attr->alloc_mem_types,
criteria->alloc_mem_types, criteria->title,
ucs_memory_type_names, p, endp - p) ||
!ucp_wireup_check_flags(resource, md_attr->reg_mem_types,
criteria->reg_mem_types, criteria->title,
ucs_memory_type_names, p, endp - p) ||
!ucp_wireup_check_select_flags(resource, iface_attr->cap.flags,
&local_iface_flags, criteria->title,
ucp_wireup_iface_flags, p,
Expand Down Expand Up @@ -877,6 +873,17 @@ static void ucp_wireup_unset_tl_by_md(const ucp_wireup_select_params_t *sparams,
}
}

static void ucp_wireup_memaccess_bitmap(ucp_context_t *context,
ucs_memory_type_t mem_type,
ucp_tl_bitmap_t *tl_bitmap)
{
const uint64_t md_reg_flags = UCT_MD_FLAG_NEED_MEMH | UCT_MD_FLAG_NEED_RKEY;

/* If a local or a remote key is needed, the memory domain has to be able
to register. Otherwise, it must be able to access. */
ucp_context_memaccess_tl_bitmap(context, mem_type, md_reg_flags, tl_bitmap);
}

static UCS_F_NOINLINE ucs_status_t ucp_wireup_add_memaccess_lanes(
const ucp_wireup_select_params_t *select_params, unsigned ep_init_flags,
const ucp_wireup_criteria_t *criteria, ucs_memory_type_t mem_type,
Expand All @@ -887,6 +894,7 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_add_memaccess_lanes(
ucp_wireup_criteria_t mem_criteria = *criteria;
ucp_wireup_select_info_t select_info = {0};
double reg_score = 0;
ucp_tl_bitmap_t mem_type_tl_bitmap;
int allow_am;
uint64_t remote_md_map;
ucs_status_t status;
Expand All @@ -900,13 +908,15 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_add_memaccess_lanes(
/* Select best transport which can reach registered memory */
snprintf(title, sizeof(title), criteria->title, "registered");
mem_criteria.title = title;
mem_criteria.local_md_flags = UCT_MD_FLAG_REG | criteria->local_md_flags;
mem_criteria.local_md_flags = criteria->local_md_flags;
mem_criteria.alloc_mem_types = 0;
mem_criteria.reg_mem_types = UCS_BIT(mem_type);
mem_criteria.lane_type = lane_type;

ucp_wireup_memaccess_bitmap(context, mem_type, &mem_type_tl_bitmap);
UCS_STATIC_BITMAP_AND_INPLACE(&mem_type_tl_bitmap, tl_bitmap);

status = ucp_wireup_select_transport(select_ctx, select_params,
&mem_criteria, tl_bitmap,
&mem_criteria, mem_type_tl_bitmap,
remote_md_map, UINT64_MAX, UINT64_MAX,
!allow_am, &select_info, NULL, 0);
if (status == UCS_OK) {
Expand Down Expand Up @@ -945,9 +955,9 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_add_memaccess_lanes(
* remote memory. */
snprintf(title, sizeof(title), criteria->title, "allocated");
mem_criteria.title = title;
mem_criteria.local_md_flags = UCT_MD_FLAG_ALLOC | criteria->local_md_flags;
mem_criteria.local_md_flags = UCT_MD_FLAG_ALLOC | UCT_MD_FLAG_NEED_RKEY |
criteria->local_md_flags;
mem_criteria.alloc_mem_types = UCS_BIT(mem_type);
mem_criteria.reg_mem_types = 0;
mem_criteria.lane_type = lane_type;

for (;;) {
Expand Down Expand Up @@ -1068,7 +1078,6 @@ static void ucp_wireup_criteria_init(ucp_wireup_criteria_t *criteria)
criteria->local_event_flags = 0;
criteria->remote_event_flags = 0;
criteria->alloc_mem_types = 0;
criteria->reg_mem_types = 0;
criteria->is_keepalive = 0;
criteria->calc_score = NULL;
criteria->tl_rsc_flags = 0;
Expand Down Expand Up @@ -1701,7 +1710,6 @@ ucp_wireup_add_am_bw_lanes(const ucp_wireup_select_params_t *select_params,

bw_info.local_dev_bitmap = UINT64_MAX;
bw_info.remote_dev_bitmap = UINT64_MAX;
bw_info.md_map = 0;
bw_info.max_lanes = context->config.ext.max_eager_lanes - 1;
/* rndv/am/zcopy proto should take max_rndv_lanes value into account */
if (context->config.ext.proto_enable) {
Expand Down Expand Up @@ -1806,7 +1814,6 @@ ucp_wireup_add_rma_bw_lanes(const ucp_wireup_select_params_t *select_params,
ucp_wireup_select_bw_info_t bw_info;
ucs_memory_type_t mem_type;
size_t added_lanes;
uint64_t md_reg_flag;
ucp_tl_bitmap_t tl_bitmap, mem_type_tl_bitmap;
uint8_t i;
ucp_wireup_select_flags_t iface_rma_flags, peer_rma_flags;
Expand All @@ -1822,19 +1829,14 @@ ucp_wireup_add_rma_bw_lanes(const ucp_wireup_select_params_t *select_params,
return UCS_OK;
}

if (ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE) {
md_reg_flag = 0;
} else if (ucp_ep_get_context_features(ep) &
(UCP_FEATURE_TAG | UCP_FEATURE_AM | UCP_FEATURE_RMA)) {
/* if needed for RNDV, need only access for remote registered memory */
md_reg_flag = UCT_MD_FLAG_REG;
} else {
if (!(ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE) &&
!(ucp_ep_get_context_features(ep) &
(UCP_FEATURE_TAG | UCP_FEATURE_AM | UCP_FEATURE_RMA))) {
return UCS_OK;
}

ucp_wireup_criteria_init(&bw_info.criteria);
bw_info.criteria.calc_score = ucp_wireup_rma_bw_score_func;
bw_info.criteria.local_md_flags = md_reg_flag;
bw_info.criteria.calc_score = ucp_wireup_rma_bw_score_func;
ucp_wireup_init_select_flags(&bw_info.criteria.local_iface_flags,
UCT_IFACE_FLAG_PENDING, 0);
ucp_wireup_fill_peer_err_criteria(&bw_info.criteria, ep_init_flags);
Expand All @@ -1846,7 +1848,6 @@ ucp_wireup_add_rma_bw_lanes(const ucp_wireup_select_params_t *select_params,

bw_info.local_dev_bitmap = UINT64_MAX;
bw_info.remote_dev_bitmap = UINT64_MAX;
bw_info.md_map = 0;

/* check rkey_ptr */
if (!(ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE) &&
Expand All @@ -1863,8 +1864,8 @@ ucp_wireup_add_rma_bw_lanes(const ucp_wireup_select_params_t *select_params,
bw_info.criteria.lane_type = UCP_LANE_TYPE_RKEY_PTR;
bw_info.max_lanes = 1;

UCP_CONTEXT_MEM_CAP_TLS(context, UCS_MEMORY_TYPE_HOST, access_mem_types,
tl_bitmap);
ucp_context_memaccess_tl_bitmap(context, UCS_MEMORY_TYPE_HOST, 0,
&tl_bitmap);
ucp_wireup_add_bw_lanes(select_params, &bw_info, tl_bitmap,
UCP_NULL_LANE, select_ctx, 0);
}
Expand Down Expand Up @@ -1946,16 +1947,12 @@ ucp_wireup_add_rma_bw_lanes(const ucp_wireup_select_params_t *select_params,
UCS_STATIC_BITMAP_RESET_ALL(&tl_bitmap);

ucs_memory_type_for_each(mem_type) {
UCP_CONTEXT_MEM_CAP_TLS(context, mem_type, reg_mem_types,
mem_type_tl_bitmap);
ucp_wireup_memaccess_bitmap(context, mem_type, &mem_type_tl_bitmap);

bw_info.criteria.reg_mem_types = UCS_BIT(mem_type);
added_lanes += ucp_wireup_add_bw_lanes(
select_params, &bw_info,
UCP_TL_BITMAP_AND_NOT(
mem_type_tl_bitmap, tl_bitmap),
am_lane, select_ctx,
allow_extra_path);
added_lanes += ucp_wireup_add_bw_lanes(
select_params, &bw_info,
UCP_TL_BITMAP_AND_NOT(mem_type_tl_bitmap, tl_bitmap),
am_lane, select_ctx, allow_extra_path);

UCS_STATIC_BITMAP_OR_INPLACE(&tl_bitmap, mem_type_tl_bitmap);
}
Expand Down Expand Up @@ -1997,7 +1994,6 @@ ucp_wireup_add_tag_lane(const ucp_wireup_select_params_t *select_params,

ucp_wireup_criteria_init(&criteria);
criteria.title = "tag_offload";
criteria.local_md_flags = UCT_MD_FLAG_REG; /* needed for posting tags to HW */
criteria.calc_score = ucp_wireup_am_score_func;
criteria.lane_type = UCP_LANE_TYPE_TAG;
ucp_wireup_init_select_flags(&criteria.remote_iface_flags,
Expand Down Expand Up @@ -2436,16 +2432,10 @@ ucp_wireup_construct_lanes(const ucp_wireup_select_params_t *select_params,
}

for (i = 0; key->rma_bw_lanes[i] != UCP_NULL_LANE; i++) {
lane = key->rma_bw_lanes[i];
rsc_index = select_ctx->lane_descs[lane].rsc_index;
md_index = context->tl_rscs[rsc_index].md_index;

/* Pack remote key only if needed for RMA.
* FIXME a temporary workaround to prevent the ugni uct from using rndv. */
if ((context->tl_mds[md_index].attr.flags & UCT_MD_FLAG_NEED_RKEY) &&
!(strstr(context->tl_rscs[rsc_index].tl_rsc.tl_name, "ugni"))) {
key->rma_bw_md_map |= UCS_BIT(md_index);
}
lane = key->rma_bw_lanes[i];
rsc_index = select_ctx->lane_descs[lane].rsc_index;
md_index = context->tl_rscs[rsc_index].md_index;
key->rma_bw_md_map |= UCS_BIT(md_index);
}

if (key->rkey_ptr_lane != UCP_NULL_LANE) {
Expand Down
3 changes: 0 additions & 3 deletions src/ucp/wireup/wireup.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,6 @@ typedef struct {
/* Mandatory memory types for allocation */
uint64_t alloc_mem_types;

/* Mandatory memory types for registration */
uint64_t reg_mem_types;

/* Required support of keepalive mechanism */
int is_keepalive;

Expand Down
1 change: 1 addition & 0 deletions src/uct/base/uct_md.c
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ ucs_status_t uct_md_query_v2(uct_md_h md, uct_md_attr_v2_t *md_attr)

void uct_md_base_md_query(uct_md_attr_v2_t *md_attr)
{
md_attr->flags = 0;
md_attr->reg_mem_types = 0;
md_attr->reg_nonblock_mem_types = 0;
md_attr->cache_mem_types = 0;
Expand Down
Loading

0 comments on commit 9338d5f

Please sign in to comment.