Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UCP/RNDV: Fixed assertion when scaled end offset > max_frag #10554

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/ucp/rndv/proto_rndv.inl
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ ucp_proto_rndv_bulk_max_payload(ucp_request_t *req,
* max_payload is later capped by remaining request length when
* advancing datatype iterator
*/
max_payload = end_offset - total_offset;
max_payload = ucs_min(lpriv->max_frag, end_offset - total_offset);
} else {
/* Send in round-robin fashion, each lanes sends its maximal size */
lane_offset = total_offset % max_frag_sum;
Expand Down
69 changes: 61 additions & 8 deletions test/gtest/ucp/test_ucp_proto_mock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,15 @@ class test_ucp_proto_mock : public ucp_test, public mock_iface {
EXPECT_TRUE(ctx.received);
}

void send_recv_am_range(size_t msg_start, size_t msg_end, size_t msg_step,
ucs_memory_type_t mem_type = UCS_MEMORY_TYPE_HOST)
{
for (size_t msg_size = msg_start; msg_size <= msg_end;
msg_size += msg_step) {
send_recv_am(msg_size, mem_type);
}
}

static ucp_worker_cfg_index_t ep_config_index(const entity &e)
{
return e.ep()->cfg_index;
Expand Down Expand Up @@ -555,13 +564,58 @@ UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_2_lanes, "IB_NUM_PATHS?=2",
UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_send_recv_small_frag,
"IB_NUM_PATHS?=2", "MAX_RNDV_LANES=2", "RNDV_THRESH=0")
{
for (size_t i = 1024; i <= 65536; i += 1024) {
send_recv_am(i);
}
send_recv_am_range(UCS_KBYTE, 64 * UCS_KBYTE, UCS_KBYTE);
}

UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_proto_mock_rcx, rcx, "rc_x")

class test_ucp_proto_mock_rcx2 : public test_ucp_proto_mock {
public:
test_ucp_proto_mock_rcx2()
{
mock_transport("rc_mlx5");
}

virtual void init() override
{
/* Device with high BW and lower latency */
add_mock_iface("mock_0:1", [](uct_iface_attr_t &iface_attr) {
iface_attr.cap.am.max_short = 208;
iface_attr.bandwidth.shared = 28e9;
iface_attr.latency.c = 500e-9;
iface_attr.latency.m = 1e-9;
iface_attr.cap.get.max_zcopy = 16384;
});
/* Device with lower BW and higher latency */
add_mock_iface("mock_1:1", [](uct_iface_attr_t &iface_attr) {
iface_attr.cap.am.max_short = 2000;
iface_attr.bandwidth.shared = 24e9;
iface_attr.latency.c = 600e-9;
iface_attr.latency.m = 1e-9;
});
test_ucp_proto_mock::init();
}
};

UCS_TEST_P(test_ucp_proto_mock_rcx2, rndv_send_recv_small_frag,
"IB_NUM_PATHS?=2", "MAX_RNDV_LANES=2", "RNDV_THRESH=0")
{
ucp_proto_select_key_t key = any_key();
key.param.op_id_flags = UCP_OP_ID_AM_SEND;
key.param.op_attr = 0;

check_ep_config(sender(), {
{1, 3724, "rendezvous fragmented copy-in copy-out",
"rc_mlx5/mock_0:1/path0"},
{3725, INF, "rendezvous zero-copy read from remote",
"54% on rc_mlx5/mock_0:1/path0 and 46% on rc_mlx5/mock_1:1/path0"},
}, key);

send_recv_am_range(UCS_KBYTE, 64 * UCS_KBYTE, UCS_KBYTE);
}

UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_proto_mock_rcx2, rcx, "rc_x")

class test_ucp_proto_mock_cma : public test_ucp_proto_mock {
public:
test_ucp_proto_mock_cma()
Expand Down Expand Up @@ -664,11 +718,10 @@ class test_ucp_proto_mock_gpu : public test_ucp_proto_mock {
virtual void init() override
{
add_mock_iface("mock", [](uct_iface_attr_t &iface_attr) {
iface_attr.cap.am.max_short = 2000;
iface_attr.bandwidth.shared = 28e9;
iface_attr.latency.c = 600e-9;
iface_attr.latency.m = 1e-9;
iface_attr.cap.get.max_zcopy = 16384;
iface_attr.cap.am.max_short = 2000;
iface_attr.bandwidth.shared = 28e9;
iface_attr.latency.c = 600e-9;
iface_attr.latency.m = 1e-9;
});
test_ucp_proto_mock::init();
}
Expand Down
Loading