From 582d7f2a258f222e0af224d84510e925303d60ef Mon Sep 17 00:00:00 2001 From: Sai Sunku Date: Thu, 30 Jan 2025 00:55:49 +0000 Subject: [PATCH] fabtests: Run efa tests with efa fabric name Current fabtests are designed to test the EFA RDM path which will continue to have fabric name efa. The efa-direct path will have the fabric name efa-direct and will not be selected for the current tests Signed-off-by: Sai Sunku --- fabtests/pytest/efa/conftest.py | 5 ++ fabtests/pytest/efa/efa_common.py | 4 +- fabtests/pytest/efa/test_av.py | 4 +- fabtests/pytest/efa/test_cq.py | 8 +- .../pytest/efa/test_efa_device_selection.py | 4 +- fabtests/pytest/efa/test_efa_info.py | 5 +- .../pytest/efa/test_efa_protocol_selection.py | 5 +- fabtests/pytest/efa/test_efa_shm_addr.py | 4 +- fabtests/pytest/efa/test_flood_peer.py | 6 +- fabtests/pytest/efa/test_fork_support.py | 4 +- fabtests/pytest/efa/test_mr.py | 4 +- fabtests/pytest/efa/test_multi_ep.py | 4 +- fabtests/pytest/efa/test_multi_recv.py | 5 +- fabtests/pytest/efa/test_rdm.py | 83 +++++++++++-------- fabtests/pytest/efa/test_rma_bw.py | 28 ++++--- fabtests/pytest/efa/test_rma_pingpong.py | 15 ++-- fabtests/pytest/efa/test_rnr.py | 8 +- fabtests/pytest/efa/test_runt.py | 5 +- fabtests/pytest/efa/test_setopt.py | 4 +- fabtests/pytest/efa/test_unexpected_msg.py | 4 +- 20 files changed, 121 insertions(+), 88 deletions(-) diff --git a/fabtests/pytest/efa/conftest.py b/fabtests/pytest/efa/conftest.py index 2871a9b8ca9..f541d091192 100644 --- a/fabtests/pytest/efa/conftest.py +++ b/fabtests/pytest/efa/conftest.py @@ -74,6 +74,11 @@ def zcpy_recv_message_size(request): def zcpy_recv_max_msg_size(request): return 8192 +# TODO - add efa-direct tests +@pytest.fixture(scope="module", params=["efa"]) +def fabric(request): + return request.param + @pytest.hookimpl(hookwrapper=True) def pytest_collection_modifyitems(session, config, items): # Called after collection has been performed, may filter or re-order the items in-place diff --git a/fabtests/pytest/efa/efa_common.py b/fabtests/pytest/efa/efa_common.py index 6f5e311a97f..07822eb9ca5 100644 --- a/fabtests/pytest/efa/efa_common.py +++ b/fabtests/pytest/efa/efa_common.py @@ -7,7 +7,7 @@ def efa_run_client_server_test(cmdline_args, executable, iteration_type, completion_semantic, memory_type, message_size, warmup_iteration_type=None, timeout=None, - completion_type="queue"): + completion_type="queue", fabric=None): if timeout is None: timeout = cmdline_args.timeout @@ -23,7 +23,7 @@ def efa_run_client_server_test(cmdline_args, executable, iteration_type, memory_type=memory_type, timeout=timeout, warmup_iteration_type=warmup_iteration_type, - completion_type=completion_type) + completion_type=completion_type, fabric=fabric) test.run() @retry(retry_on_exception=is_ssh_connection_error, stop_max_attempt_number=3, wait_fixed=5000) diff --git a/fabtests/pytest/efa/test_av.py b/fabtests/pytest/efa/test_av.py index 943fb525d44..09091bb569b 100644 --- a/fabtests/pytest/efa/test_av.py +++ b/fabtests/pytest/efa/test_av.py @@ -1,7 +1,7 @@ import pytest @pytest.mark.functional -def test_av_xfer(cmdline_args): +def test_av_xfer(cmdline_args, fabric): from common import ClientServerTest - test = ClientServerTest(cmdline_args, "fi_av_xfer -e rdm") + test = ClientServerTest(cmdline_args, "fi_av_xfer -e rdm", fabric=fabric) test.run() diff --git a/fabtests/pytest/efa/test_cq.py b/fabtests/pytest/efa/test_cq.py index e45a9d99237..2e68f2268f1 100644 --- a/fabtests/pytest/efa/test_cq.py +++ b/fabtests/pytest/efa/test_cq.py @@ -4,14 +4,14 @@ # of cq that efa device can support @pytest.mark.serial @pytest.mark.unit -def test_cq(cmdline_args): +def test_cq(cmdline_args, fabric): from common import UnitTest - test = UnitTest(cmdline_args, "fi_cq_test") + test = UnitTest(cmdline_args, f"fi_cq_test -f {fabric}") test.run() @pytest.mark.functional @pytest.mark.parametrize("operation_type", ["senddata", "writedata"]) -def test_cq_data(cmdline_args, operation_type): +def test_cq_data(cmdline_args, operation_type, fabric): from common import ClientServerTest - test = ClientServerTest(cmdline_args, "fi_cq_data -e rdm -o " + operation_type) + test = ClientServerTest(cmdline_args, f"fi_cq_data -e rdm -o" + operation_type, fabric=fabric) test.run() diff --git a/fabtests/pytest/efa/test_efa_device_selection.py b/fabtests/pytest/efa/test_efa_device_selection.py index c2bfe1d1339..09c3a5c3db7 100644 --- a/fabtests/pytest/efa/test_efa_device_selection.py +++ b/fabtests/pytest/efa/test_efa_device_selection.py @@ -6,7 +6,7 @@ # This test must be run in serial mode because it checks the hw counter @pytest.mark.serial @pytest.mark.functional -def test_efa_device_selection(cmdline_args): +def test_efa_device_selection(cmdline_args, fabric): if cmdline_args.server_id == cmdline_args.client_id: pytest.skip("EFA device selection test requires 2 nodes") @@ -46,7 +46,7 @@ def test_efa_device_selection(cmdline_args): cmdline_args_copy.additional_client_arguments = "-d " + client_domain_name cmdline_args_copy.strict_fabtests_mode = strict_fabtests_mode - test = ClientServerTest(cmdline_args_copy, command, message_size="1000", prefix_type=prefix_type, timeout=300) + test = ClientServerTest(cmdline_args_copy, command, message_size="1000", prefix_type=prefix_type, timeout=300, fabric=fabric) test.run() server_tx_bytes_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "tx_bytes", server_device_name) diff --git a/fabtests/pytest/efa/test_efa_info.py b/fabtests/pytest/efa/test_efa_info.py index 5e90cb79254..e939f543791 100644 --- a/fabtests/pytest/efa/test_efa_info.py +++ b/fabtests/pytest/efa/test_efa_info.py @@ -2,15 +2,16 @@ from common import UnitTest from efa_common import efa_retrieve_gid +# TODO - extend and generalize to efa-direct @pytest.mark.unit def test_efa_info(cmdline_args): test = UnitTest(cmdline_args, "fi_efa_info_test") test.run() @pytest.mark.unit -def test_comm_getinfo(cmdline_args): +def test_comm_getinfo(cmdline_args, fabric): gid = efa_retrieve_gid(cmdline_args.server_id) # use GID as source address and dest address - test = UnitTest(cmdline_args, f"fi_getinfo_test -s {gid} {gid}") + test = UnitTest(cmdline_args, f"fi_getinfo_test -s {gid} {gid} -f {fabric} ") test.run() diff --git a/fabtests/pytest/efa/test_efa_protocol_selection.py b/fabtests/pytest/efa/test_efa_protocol_selection.py index 949f2982304..bf390d6c3d7 100644 --- a/fabtests/pytest/efa/test_efa_protocol_selection.py +++ b/fabtests/pytest/efa/test_efa_protocol_selection.py @@ -8,7 +8,7 @@ @pytest.mark.functional @pytest.mark.cuda_memory @pytest.mark.parametrize("fabtest_name,cntrl_env_var", [("fi_rdm_tagged_bw", "FI_EFA_INTER_MIN_READ_MESSAGE_SIZE"), ("fi_rma_bw", "FI_EFA_INTER_MIN_READ_WRITE_SIZE")]) -def test_transfer_with_read_protocol_cuda(cmdline_args, fabtest_name, cntrl_env_var): +def test_transfer_with_read_protocol_cuda(cmdline_args, fabtest_name, cntrl_env_var, fabric): """ Verify that the read protocol is used for a 1024 byte message when the env variable switches are set to force the read protocol at 1000 bytes. @@ -50,7 +50,8 @@ def test_transfer_with_read_protocol_cuda(cmdline_args, fabtest_name, cntrl_env_ completion_semantic="transmit_complete", memory_type="cuda_to_cuda", message_size=message_size, - warmup_iteration_type="0") + warmup_iteration_type="0", + fabric=fabric) server_read_wrs_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "rdma_read_wrs") server_read_bytes_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "rdma_read_bytes") diff --git a/fabtests/pytest/efa/test_efa_shm_addr.py b/fabtests/pytest/efa/test_efa_shm_addr.py index 4f4ad487f26..0ab5ec72227 100644 --- a/fabtests/pytest/efa/test_efa_shm_addr.py +++ b/fabtests/pytest/efa/test_efa_shm_addr.py @@ -3,7 +3,7 @@ @pytest.mark.multinode -def test_efa_shm_addr(cmdline_args): +def test_efa_shm_addr(cmdline_args, fabric): server_id = cmdline_args.server_id client_id = cmdline_args.client_id if client_id == server_id: @@ -13,7 +13,7 @@ def test_efa_shm_addr(cmdline_args): # inserted for the 2nd client could be different # from its efa fi_addr. client_hostname_list = [client_id, server_id] - client_base_command = "fi_rdm" + client_base_command = f"fi_rdm -f {fabric}" server_base_command = client_base_command + " -C {}".format(len(client_hostname_list)) test = MultinodeTest(cmdline_args, server_base_command, client_base_command, client_hostname_list, run_client_asynchronously=False) diff --git a/fabtests/pytest/efa/test_flood_peer.py b/fabtests/pytest/efa/test_flood_peer.py index ee321e007f2..660cbb916d5 100644 --- a/fabtests/pytest/efa/test_flood_peer.py +++ b/fabtests/pytest/efa/test_flood_peer.py @@ -1,8 +1,8 @@ import pytest @pytest.mark.functional -def test_flood_peer(cmdline_args): +def test_flood_peer(cmdline_args, fabric): from common import ClientServerTest - test = ClientServerTest(cmdline_args, "fi_flood -e rdm -W 6400 -S 512 -T 5", - timeout=300) + test = ClientServerTest(cmdline_args, f"fi_flood -e rdm -W 6400 -S 512 -T 5", + timeout=300, fabric=fabric) test.run() diff --git a/fabtests/pytest/efa/test_fork_support.py b/fabtests/pytest/efa/test_fork_support.py index 9c6d5005bd2..ab4c87531fe 100644 --- a/fabtests/pytest/efa/test_fork_support.py +++ b/fabtests/pytest/efa/test_fork_support.py @@ -3,7 +3,7 @@ @pytest.mark.functional @pytest.mark.parametrize("environment_variable", ["FI_EFA_FORK_SAFE", "RDMAV_FORK_SAFE"]) -def test_fork_support(cmdline_args, completion_semantic, environment_variable): +def test_fork_support(cmdline_args, completion_semantic, environment_variable, fabric): from common import ClientServerTest import copy cmdline_args_copy = copy.copy(cmdline_args) @@ -11,6 +11,6 @@ def test_fork_support(cmdline_args, completion_semantic, environment_variable): cmdline_args_copy.append_environ("{}=1".format(environment_variable)) test = ClientServerTest(cmdline_args_copy, "fi_rdm_tagged_bw -K", completion_semantic=completion_semantic, - datacheck_type="with_datacheck") + datacheck_type="with_datacheck", fabric=fabric) test.run() diff --git a/fabtests/pytest/efa/test_mr.py b/fabtests/pytest/efa/test_mr.py index f47c2d3fafe..0a470930046 100644 --- a/fabtests/pytest/efa/test_mr.py +++ b/fabtests/pytest/efa/test_mr.py @@ -18,7 +18,7 @@ def test_mr_host(cmdline_args): pytest.param("neuron", marks=pytest.mark.neuron_memory), ], ) -def test_mr_hmem(cmdline_args, hmem_type): +def test_mr_hmem(cmdline_args, hmem_type, fabric): if hmem_type == "cuda" and not has_cuda(cmdline_args.server_id): pytest.skip("no cuda device") if hmem_type == "neuron" and not has_neuron(cmdline_args.server_id): @@ -26,7 +26,7 @@ def test_mr_hmem(cmdline_args, hmem_type): cmdline_args_copy = copy.copy(cmdline_args) - test_command = f"fi_mr_test -D {hmem_type}" + test_command = f"fi_mr_test -D {hmem_type} -f {fabric}" if cmdline_args.do_dmabuf_reg_for_hmem: test_command += " -R" diff --git a/fabtests/pytest/efa/test_multi_ep.py b/fabtests/pytest/efa/test_multi_ep.py index 634529f0067..7f0130d9b8b 100644 --- a/fabtests/pytest/efa/test_multi_ep.py +++ b/fabtests/pytest/efa/test_multi_ep.py @@ -2,10 +2,10 @@ @pytest.mark.functional @pytest.mark.parametrize("shared_cq", [True, False]) -def test_multi_ep(cmdline_args, shared_cq): +def test_multi_ep(cmdline_args, shared_cq, fabric): from common import ClientServerTest cmd = "fi_multi_ep -e rdm" if shared_cq: cmd += " -Q" - test = ClientServerTest(cmdline_args, cmd) + test = ClientServerTest(cmdline_args, cmd, fabric=fabric) test.run() diff --git a/fabtests/pytest/efa/test_multi_recv.py b/fabtests/pytest/efa/test_multi_recv.py index d68efac4b9f..38acb3ce008 100644 --- a/fabtests/pytest/efa/test_multi_recv.py +++ b/fabtests/pytest/efa/test_multi_recv.py @@ -4,10 +4,11 @@ [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) @pytest.mark.parametrize("message_size", ["1024", "8192"]) -def test_multi_recv(cmdline_args, iteration_type, message_size): +def test_multi_recv(cmdline_args, iteration_type, message_size, fabric): from common import ClientServerTest test = ClientServerTest(cmdline_args, "fi_multi_recv -e rdm", iteration_type, - message_size=message_size) + message_size=message_size, + fabric=fabric) test.run() diff --git a/fabtests/pytest/efa/test_rdm.py b/fabtests/pytest/efa/test_rdm.py index d42dc6dea56..f7a3942019d 100644 --- a/fabtests/pytest/efa/test_rdm.py +++ b/fabtests/pytest/efa/test_rdm.py @@ -1,4 +1,3 @@ -from default.test_rdm import test_rdm, test_rdm_bw_functional from efa.efa_common import efa_run_client_server_test from common import perf_progress_model_cli @@ -6,80 +5,98 @@ import copy +@pytest.mark.functional +def test_rdm_efa(cmdline_args, completion_semantic, fabric): + from common import ClientServerTest + test = ClientServerTest(cmdline_args, "fi_rdm", completion_semantic=completion_semantic, fabric=fabric) + test.run() + +@pytest.mark.functional +def test_rdm_bw_functional_efa(cmdline_args, completion_semantic, fabric): + from common import ClientServerTest + test = ClientServerTest(cmdline_args, "fi_flood -e rdm -v -T 1", completion_semantic=completion_semantic, fabric=fabric) + test.run() + @pytest.mark.parametrize("iteration_type", [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) -def test_rdm_pingpong(cmdline_args, iteration_type, completion_semantic, memory_type_bi_dir, completion_type): +def test_rdm_pingpong(cmdline_args, iteration_type, completion_semantic, memory_type_bi_dir, completion_type, fabric): command = "fi_rdm_pingpong" + " " + perf_progress_model_cli efa_run_client_server_test(cmdline_args, command, iteration_type, - completion_semantic, memory_type_bi_dir, "all", completion_type=completion_type) + completion_semantic, memory_type_bi_dir, "all", + completion_type=completion_type, fabric=fabric) @pytest.mark.functional @pytest.mark.serial -def test_mr_exhaustion_rdm_pingpong(cmdline_args, completion_semantic): +def test_mr_exhaustion_rdm_pingpong(cmdline_args, completion_semantic, fabric): efa_run_client_server_test(cmdline_args, "fi_efa_exhaust_mr_reg_rdm_pingpong", "short", - completion_semantic, "host_to_host", "all", timeout=1000) + completion_semantic, "host_to_host", "all", timeout=1000, + fabric=fabric) @pytest.mark.functional -def test_rdm_pingpong_range(cmdline_args, completion_semantic, memory_type_bi_dir, message_size): +def test_rdm_pingpong_range(cmdline_args, completion_semantic, memory_type_bi_dir, message_size, fabric): efa_run_client_server_test(cmdline_args, "fi_rdm_pingpong", "short", - completion_semantic, memory_type_bi_dir, message_size) + completion_semantic, memory_type_bi_dir, message_size, fabric=fabric) @pytest.mark.functional -def test_rdm_pingpong_no_inject_range(cmdline_args, completion_semantic, inject_message_size): +def test_rdm_pingpong_no_inject_range(cmdline_args, completion_semantic, inject_message_size, fabric): efa_run_client_server_test(cmdline_args, "fi_rdm_pingpong -j 0", "short", - completion_semantic, "host_to_host", inject_message_size) + completion_semantic, "host_to_host", inject_message_size, fabric=fabric) @pytest.mark.parametrize("iteration_type", [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) -def test_rdm_tagged_pingpong(cmdline_args, iteration_type, completion_semantic, memory_type_bi_dir, completion_type): +def test_rdm_tagged_pingpong(cmdline_args, iteration_type, completion_semantic, memory_type_bi_dir, completion_type, fabric): command = "fi_rdm_tagged_pingpong" + " " + perf_progress_model_cli efa_run_client_server_test(cmdline_args, command, iteration_type, - completion_semantic, memory_type_bi_dir, "all", completion_type=completion_type) + completion_semantic, memory_type_bi_dir, "all", completion_type=completion_type, + fabric=fabric) @pytest.mark.functional -def test_rdm_tagged_pingpong_range(cmdline_args, completion_semantic, memory_type_bi_dir, message_size): +def test_rdm_tagged_pingpong_range(cmdline_args, completion_semantic, memory_type_bi_dir, message_size, fabric): efa_run_client_server_test(cmdline_args, "fi_rdm_tagged_pingpong", "short", - completion_semantic, memory_type_bi_dir, message_size) + completion_semantic, memory_type_bi_dir, message_size, + fabric=fabric) @pytest.mark.parametrize("iteration_type", [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) -def test_rdm_tagged_bw(cmdline_args, iteration_type, completion_semantic, memory_type, completion_type): +def test_rdm_tagged_bw(cmdline_args, iteration_type, completion_semantic, memory_type, completion_type, fabric): command = "fi_rdm_tagged_bw" + " " + perf_progress_model_cli efa_run_client_server_test(cmdline_args, command, iteration_type, - completion_semantic, memory_type, "all", completion_type=completion_type) + completion_semantic, memory_type, "all", completion_type=completion_type, + fabric=fabric) @pytest.mark.functional -def test_rdm_tagged_bw_range(cmdline_args, completion_semantic, memory_type, message_size): +def test_rdm_tagged_bw_range(cmdline_args, completion_semantic, memory_type, message_size, fabric): efa_run_client_server_test(cmdline_args, "fi_rdm_tagged_bw", "short", - completion_semantic, memory_type, message_size) + completion_semantic, memory_type, message_size, fabric=fabric) @pytest.mark.functional -def test_rdm_tagged_bw_no_inject_range(cmdline_args, completion_semantic, inject_message_size): +def test_rdm_tagged_bw_no_inject_range(cmdline_args, completion_semantic, inject_message_size, fabric): efa_run_client_server_test(cmdline_args, "fi_rdm_tagged_bw -j 0", "short", - completion_semantic, "host_to_host", inject_message_size) + completion_semantic, "host_to_host", inject_message_size, fabric=fabric) @pytest.mark.functional @pytest.mark.parametrize("env_vars", [["FI_EFA_TX_SIZE=64"], ["FI_EFA_RX_SIZE=64"], ["FI_EFA_TX_SIZE=64", "FI_EFA_RX_SIZE=64"]]) -def test_rdm_tagged_bw_small_tx_rx(cmdline_args, completion_semantic, memory_type, completion_type, env_vars): +def test_rdm_tagged_bw_small_tx_rx(cmdline_args, completion_semantic, memory_type, completion_type, env_vars, fabric): cmdline_args_copy = copy.copy(cmdline_args) for env_var in env_vars: cmdline_args_copy.append_environ(env_var) # Use a window size larger than tx/rx size efa_run_client_server_test(cmdline_args_copy, "fi_rdm_tagged_bw -W 128", "short", - completion_semantic, memory_type, "all", completion_type=completion_type) + completion_semantic, memory_type, "all", completion_type=completion_type, + fabric=fabric) @pytest.mark.functional -def test_rdm_tagged_bw_use_fi_more(cmdline_args, completion_semantic, memory_type, message_size): +def test_rdm_tagged_bw_use_fi_more(cmdline_args, completion_semantic, memory_type, message_size, fabric): efa_run_client_server_test(cmdline_args, "fi_rdm_tagged_bw --use-fi-more", - "short", completion_semantic, memory_type, message_size) + "short", completion_semantic, memory_type, message_size, fabric=fabric) @pytest.mark.parametrize("iteration_type", [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) -def test_rdm_atomic(cmdline_args, iteration_type, completion_semantic, memory_type): +def test_rdm_atomic(cmdline_args, iteration_type, completion_semantic, memory_type, fabric): from copy import copy from common import ClientServerTest @@ -93,7 +110,7 @@ def test_rdm_atomic(cmdline_args, iteration_type, completion_semantic, memory_ty cmdline_args_copy = copy(cmdline_args) command = "fi_rdm_atomic" + " " + perf_progress_model_cli test = ClientServerTest(cmdline_args_copy, "fi_rdm_atomic", iteration_type, completion_semantic, - memory_type=memory_type, timeout=1800) + memory_type=memory_type, timeout=1800, fabric=fabric) test.run() @pytest.mark.functional @@ -108,36 +125,36 @@ def test_rdm_tagged_peek(cmdline_args): # This test is run in serial mode because it takes a lot of memory @pytest.mark.serial @pytest.mark.functional -def test_rdm_pingpong_1G(cmdline_args, completion_semantic): +def test_rdm_pingpong_1G(cmdline_args, completion_semantic, fabric): # Default window size is 64 resulting in 128GB being registered, which # exceeds max number of registered host pages efa_run_client_server_test(cmdline_args, "fi_rdm_pingpong -W 1", 2, completion_semantic=completion_semantic, message_size=1073741824, - memory_type="host_to_host", warmup_iteration_type=0) + memory_type="host_to_host", warmup_iteration_type=0, fabric=fabric) @pytest.mark.functional -def test_rdm_pingpong_zcpy_recv(cmdline_args, memory_type_bi_dir, zcpy_recv_max_msg_size, zcpy_recv_message_size): +def test_rdm_pingpong_zcpy_recv(cmdline_args, memory_type_bi_dir, zcpy_recv_max_msg_size, zcpy_recv_message_size, fabric): if cmdline_args.server_id == cmdline_args.client_id: pytest.skip("no zero copy recv for intra-node communication") cmdline_args_copy = copy.copy(cmdline_args) cmdline_args_copy.append_environ("FI_EFA_ENABLE_SHM_TRANSFER=0") efa_run_client_server_test(cmdline_args_copy, f"fi_rdm_pingpong --max-msg-size {zcpy_recv_max_msg_size}", - "short", "transmit_complete", memory_type_bi_dir, zcpy_recv_message_size) + "short", "transmit_complete", memory_type_bi_dir, zcpy_recv_message_size, fabric=fabric) @pytest.mark.functional -def test_rdm_bw_zcpy_recv(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size): +def test_rdm_bw_zcpy_recv(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size, fabric): if cmdline_args.server_id == cmdline_args.client_id: pytest.skip("no zero copy recv for intra-node communication") cmdline_args_copy = copy.copy(cmdline_args) cmdline_args_copy.append_environ("FI_EFA_ENABLE_SHM_TRANSFER=0") efa_run_client_server_test(cmdline_args_copy, f"fi_rdm_bw --max-msg-size {zcpy_recv_max_msg_size}", - "short", "transmit_complete", memory_type, zcpy_recv_message_size) + "short", "transmit_complete", memory_type, zcpy_recv_message_size, fabric=fabric) @pytest.mark.functional -def test_rdm_bw_zcpy_recv_use_fi_more(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size): +def test_rdm_bw_zcpy_recv_use_fi_more(cmdline_args, memory_type, zcpy_recv_max_msg_size, zcpy_recv_message_size, fabric): if cmdline_args.server_id == cmdline_args.client_id: pytest.skip("no zero copy recv for intra-node communication") cmdline_args_copy = copy.copy(cmdline_args) cmdline_args_copy.append_environ("FI_EFA_ENABLE_SHM_TRANSFER=0") efa_run_client_server_test(cmdline_args_copy, f"fi_rdm_bw --use-fi-more --max-msg-size {zcpy_recv_max_msg_size}", - "short", "transmit_complete", memory_type, zcpy_recv_message_size) + "short", "transmit_complete", memory_type, zcpy_recv_message_size, fabric=fabric) diff --git a/fabtests/pytest/efa/test_rma_bw.py b/fabtests/pytest/efa/test_rma_bw.py index 98ff0a3b0d2..49b30058fbd 100644 --- a/fabtests/pytest/efa/test_rma_bw.py +++ b/fabtests/pytest/efa/test_rma_bw.py @@ -7,15 +7,16 @@ @pytest.mark.parametrize("iteration_type", [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) -def test_rma_bw(cmdline_args, iteration_type, rma_operation_type, rma_bw_completion_semantic, rma_bw_memory_type): +def test_rma_bw(cmdline_args, iteration_type, rma_operation_type, rma_bw_completion_semantic, rma_bw_memory_type, fabric): command = "fi_rma_bw -e rdm" command = command + " -o " + rma_operation_type + " " + perf_progress_model_cli # rma_bw test with data verification takes longer to finish timeout = max(540, cmdline_args.timeout) - efa_run_client_server_test(cmdline_args, command, iteration_type, rma_bw_completion_semantic, rma_bw_memory_type, "all", timeout=timeout) + efa_run_client_server_test(cmdline_args, command, iteration_type, rma_bw_completion_semantic, + rma_bw_memory_type, "all", timeout=timeout, fabric=fabric) @pytest.mark.parametrize("env_vars", [["FI_EFA_TX_SIZE=64"], ["FI_EFA_RX_SIZE=64"], ["FI_EFA_TX_SIZE=64", "FI_EFA_RX_SIZE=64"]]) -def test_rma_bw_small_tx_rx(cmdline_args, rma_operation_type, rma_bw_completion_semantic, rma_bw_memory_type, env_vars): +def test_rma_bw_small_tx_rx(cmdline_args, rma_operation_type, rma_bw_completion_semantic, rma_bw_memory_type, env_vars, fabric): cmdline_args_copy = copy.copy(cmdline_args) for env_var in env_vars: cmdline_args_copy.append_environ(env_var) @@ -24,24 +25,27 @@ def test_rma_bw_small_tx_rx(cmdline_args, rma_operation_type, rma_bw_completion_ command = command + " -o " + rma_operation_type + " " + perf_progress_model_cli # rma_bw test with data verification takes longer to finish timeout = max(540, cmdline_args_copy.timeout) - efa_run_client_server_test(cmdline_args_copy, command, "short", rma_bw_completion_semantic, rma_bw_memory_type, "all", timeout=timeout) + efa_run_client_server_test(cmdline_args_copy, command, "short", rma_bw_completion_semantic, + rma_bw_memory_type, "all", timeout=timeout, fabric=fabric) @pytest.mark.functional -def test_rma_bw_range(cmdline_args, rma_operation_type, rma_bw_completion_semantic, message_size, rma_bw_memory_type): +def test_rma_bw_range(cmdline_args, rma_operation_type, rma_bw_completion_semantic, message_size, rma_bw_memory_type, fabric): command = "fi_rma_bw -e rdm" command = command + " -o " + rma_operation_type # rma_bw test with data verification takes longer to finish timeout = max(540, cmdline_args.timeout) - efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, rma_bw_memory_type, message_size, timeout=timeout) + efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, + rma_bw_memory_type, message_size, timeout=timeout, fabric=fabric) @pytest.mark.functional -def test_rma_bw_range_no_inject(cmdline_args, rma_operation_type, rma_bw_completion_semantic, inject_message_size): +def test_rma_bw_range_no_inject(cmdline_args, rma_operation_type, rma_bw_completion_semantic, inject_message_size, fabric): command = "fi_rma_bw -e rdm -j 0" command = command + " -o " + rma_operation_type # rma_bw test with data verification takes longer to finish timeout = max(540, cmdline_args.timeout) - efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, "host_to_host", inject_message_size, timeout=timeout) + efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, + "host_to_host", inject_message_size, timeout=timeout, fabric=fabric) # This test is run in serial mode because it takes a lot of memory @@ -49,7 +53,7 @@ def test_rma_bw_range_no_inject(cmdline_args, rma_operation_type, rma_bw_complet @pytest.mark.functional # TODO Add "writedata", "write" back in when EFA firmware bug is fixed @pytest.mark.parametrize("operation_type", ["read"]) -def test_rma_bw_1G(cmdline_args, operation_type, rma_bw_completion_semantic): +def test_rma_bw_1G(cmdline_args, operation_type, rma_bw_completion_semantic, fabric): # Default window size is 64 resulting in 128GB being registered, which # exceeds max number of registered host pages timeout = max(540, cmdline_args.timeout) @@ -57,14 +61,14 @@ def test_rma_bw_1G(cmdline_args, operation_type, rma_bw_completion_semantic): command = command + " -o " + operation_type efa_run_client_server_test(cmdline_args, command, 2, completion_semantic=rma_bw_completion_semantic, message_size=1073741824, - memory_type="host_to_host", warmup_iteration_type=0, timeout=timeout) + memory_type="host_to_host", warmup_iteration_type=0, timeout=timeout, fabric=fabric) @pytest.mark.functional @pytest.mark.parametrize("operation_type", ["writedata", "write"]) -def test_rma_bw_use_fi_more(cmdline_args, operation_type, rma_bw_completion_semantic, inject_message_size): +def test_rma_bw_use_fi_more(cmdline_args, operation_type, rma_bw_completion_semantic, inject_message_size, fabric): command = "fi_rma_bw -e rdm -j 0 --use-fi-more" command = command + " -o " + operation_type # rma_bw test with data verification takes longer to finish timeout = max(540, cmdline_args.timeout) efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, - "host_to_host", inject_message_size, timeout=timeout) + "host_to_host", inject_message_size, timeout=timeout, fabric=fabric) diff --git a/fabtests/pytest/efa/test_rma_pingpong.py b/fabtests/pytest/efa/test_rma_pingpong.py index 7d028f9a09a..0c1869614e7 100644 --- a/fabtests/pytest/efa/test_rma_pingpong.py +++ b/fabtests/pytest/efa/test_rma_pingpong.py @@ -14,23 +14,26 @@ def rma_pingpong_message_size(request): @pytest.mark.parametrize("iteration_type", [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) -def test_rma_pingpong(cmdline_args, iteration_type, operation_type, rma_bw_completion_semantic, memory_type_bi_dir): +def test_rma_pingpong(cmdline_args, iteration_type, operation_type, rma_bw_completion_semantic, memory_type_bi_dir, fabric): command = "fi_rma_pingpong -e rdm" command = command + " -o " + operation_type + " " + perf_progress_model_cli - efa_run_client_server_test(cmdline_args, command, iteration_type, rma_bw_completion_semantic, memory_type_bi_dir, "all") + efa_run_client_server_test(cmdline_args, command, iteration_type, rma_bw_completion_semantic, + memory_type_bi_dir, "all", fabric=fabric) @pytest.mark.functional @pytest.mark.parametrize("operation_type", ["writedata"]) -def test_rma_pingpong_range(cmdline_args, operation_type, rma_bw_completion_semantic, rma_pingpong_message_size, memory_type_bi_dir): +def test_rma_pingpong_range(cmdline_args, operation_type, rma_bw_completion_semantic, rma_pingpong_message_size, memory_type_bi_dir, fabric): command = "fi_rma_pingpong -e rdm" command = command + " -o " + operation_type - efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, memory_type_bi_dir, rma_pingpong_message_size) + efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, + memory_type_bi_dir, rma_pingpong_message_size, fabric=fabric) @pytest.mark.functional @pytest.mark.parametrize("operation_type", ["writedata"]) -def test_rma_pingpong_range_no_inject(cmdline_args, operation_type, rma_bw_completion_semantic, rma_pingpong_message_size, memory_type_bi_dir): +def test_rma_pingpong_range_no_inject(cmdline_args, operation_type, rma_bw_completion_semantic, rma_pingpong_message_size, memory_type_bi_dir, fabric): command = "fi_rma_pingpong -e rdm -j 0" command = command + " -o " + operation_type - efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, memory_type_bi_dir, rma_pingpong_message_size) + efa_run_client_server_test(cmdline_args, command, "short", rma_bw_completion_semantic, + memory_type_bi_dir, rma_pingpong_message_size, fabric=fabric) diff --git a/fabtests/pytest/efa/test_rnr.py b/fabtests/pytest/efa/test_rnr.py index a08f78c97a9..2d5e2f67b49 100644 --- a/fabtests/pytest/efa/test_rnr.py +++ b/fabtests/pytest/efa/test_rnr.py @@ -2,7 +2,7 @@ import copy @pytest.mark.functional -def test_rnr_read_cq_error(cmdline_args): +def test_rnr_read_cq_error(cmdline_args, fabric): from common import ClientServerTest if cmdline_args.server_id == cmdline_args.client_id: @@ -15,7 +15,7 @@ def test_rnr_read_cq_error(cmdline_args): # in this case. cmdline_args_copy = copy.copy(cmdline_args) cmdline_args_copy.strict_fabtests_mode = False - test = ClientServerTest(cmdline_args_copy, "fi_efa_rnr_read_cq_error") + test = ClientServerTest(cmdline_args_copy, "fi_efa_rnr_read_cq_error", fabric=fabric) test.run() packet_type_option_map = { @@ -50,7 +50,7 @@ def test_rnr_read_cq_error(cmdline_args): @pytest.mark.functional @pytest.mark.parametrize("packet_type", packet_type_option_map.keys()) -def test_rnr_queue_resend(cmdline_args, packet_type): +def test_rnr_queue_resend(cmdline_args, packet_type, fabric): from common import ClientServerTest if cmdline_args.server_id == cmdline_args.client_id: @@ -64,5 +64,5 @@ def test_rnr_queue_resend(cmdline_args, packet_type): cmdline_args_copy = copy.copy(cmdline_args) cmdline_args_copy.strict_fabtests_mode = False test = ClientServerTest(cmdline_args_copy, - "fi_efa_rnr_queue_resend " + packet_type_option_map[packet_type]) + "fi_efa_rnr_queue_resend " + packet_type_option_map[packet_type], fabric=fabric) test.run() diff --git a/fabtests/pytest/efa/test_runt.py b/fabtests/pytest/efa/test_runt.py index 701406be26d..e451809e032 100644 --- a/fabtests/pytest/efa/test_runt.py +++ b/fabtests/pytest/efa/test_runt.py @@ -11,7 +11,7 @@ pytest.param("cuda_to_cuda", "gdrcopy", marks=pytest.mark.cuda_memory), pytest.param("cuda_to_cuda", "localread", marks=pytest.mark.cuda_memory), pytest.param("neuron_to_neuron", None, marks=pytest.mark.neuron_memory)]) -def test_runt_read_functional(cmdline_args, memory_type, copy_method): +def test_runt_read_functional(cmdline_args, memory_type, copy_method, fabric): """ Verify runt reading protocol is working as expected by sending 1 message of 256 KB. 64 KB of the message will be transfered using EFA device's send capability @@ -45,7 +45,8 @@ def test_runt_read_functional(cmdline_args, memory_type, copy_method): completion_semantic="transmit_complete", memory_type=memory_type, message_size="262144", - warmup_iteration_type="0") + warmup_iteration_type="0", + fabric=fabric) server_read_wrs_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "rdma_read_wrs") server_read_bytes_after_test =efa_retrieve_hw_counter_value(cmdline_args.server_id, "rdma_read_bytes") diff --git a/fabtests/pytest/efa/test_setopt.py b/fabtests/pytest/efa/test_setopt.py index 6d5a2877541..b1388b99176 100644 --- a/fabtests/pytest/efa/test_setopt.py +++ b/fabtests/pytest/efa/test_setopt.py @@ -1,8 +1,8 @@ import pytest @pytest.mark.unit -def test_setopt(cmdline_args): +def test_setopt(cmdline_args, fabric): from common import UnitTest - test = UnitTest(cmdline_args, "fi_setopt_test") + test = UnitTest(cmdline_args, f"fi_setopt_test -f {fabric}") test.run() diff --git a/fabtests/pytest/efa/test_unexpected_msg.py b/fabtests/pytest/efa/test_unexpected_msg.py index dc1f93e3c3c..2e307c0be6c 100644 --- a/fabtests/pytest/efa/test_unexpected_msg.py +++ b/fabtests/pytest/efa/test_unexpected_msg.py @@ -9,7 +9,7 @@ @pytest.mark.functional @pytest.mark.parametrize("msg_size", [1, 512, 9000, 1048576]) # cover various switch points of shm/efa protocols @pytest.mark.parametrize("msg_count", [1, 1024, 2048]) # below and above shm's default rx size -def test_unexpected_msg(cmdline_args, msg_size, msg_count, memory_type, completion_semantic): +def test_unexpected_msg(cmdline_args, msg_size, msg_count, memory_type, completion_semantic, fabric): from common import ClientServerTest if cmdline_args.server_id == cmdline_args.client_id: if (msg_size > SHM_DEFAULT_MAX_INJECT_SIZE or memory_type != "host_to_host" or completion_semantic == "delivery_complete") and msg_count > SHM_DEFAULT_RX_SIZE: @@ -28,4 +28,4 @@ def test_unexpected_msg(cmdline_args, msg_size, msg_count, memory_type, completi efa_run_client_server_test(cmdline_args, f"fi_unexpected_msg -e rdm -M {msg_count}", iteration_type="short", completion_semantic=completion_semantic, memory_type=memory_type, - message_size=msg_size, completion_type="queue", timeout=1800) + message_size=msg_size, completion_type="queue", timeout=1800, fabric=fabric)