fabtests: Run efa tests with efa fabric name

Current fabtests are designed to test the EFA RDM path which will continue to have fabric name efa. The efa-direct path will have the fabric name efa-direct and will not be selected for the current tests Signed-off-by: Sai Sunku <[email protected]>
HewlettPackard · Feb 4, 2025 · 582d7f2 · 582d7f2
1 parent 1865e07
commit 582d7f2
Show file tree

Hide file tree

Showing 20 changed files with 121 additions and 88 deletions.
diff --git a/fabtests/pytest/efa/conftest.py b/fabtests/pytest/efa/conftest.py
@@ -74,6 +74,11 @@ def zcpy_recv_message_size(request):
 def zcpy_recv_max_msg_size(request):
     return 8192
 
+# TODO - add efa-direct tests
+@pytest.fixture(scope="module", params=["efa"])
+def fabric(request):
+    return request.param
+
 @pytest.hookimpl(hookwrapper=True)
 def pytest_collection_modifyitems(session, config, items):
     # Called after collection has been performed, may filter or re-order the items in-place

diff --git a/fabtests/pytest/efa/efa_common.py b/fabtests/pytest/efa/efa_common.py
@@ -7,7 +7,7 @@
 def efa_run_client_server_test(cmdline_args, executable, iteration_type,
                                completion_semantic, memory_type, message_size,
                                warmup_iteration_type=None, timeout=None,
-                               completion_type="queue"):
+                               completion_type="queue", fabric=None):
     if timeout is None:
         timeout = cmdline_args.timeout
 
@@ -23,7 +23,7 @@ def efa_run_client_server_test(cmdline_args, executable, iteration_type,
                             memory_type=memory_type,
                             timeout=timeout,
                             warmup_iteration_type=warmup_iteration_type,
-                            completion_type=completion_type)
+                            completion_type=completion_type, fabric=fabric)
     test.run()
 
 @retry(retry_on_exception=is_ssh_connection_error, stop_max_attempt_number=3, wait_fixed=5000)

diff --git a/fabtests/pytest/efa/test_av.py b/fabtests/pytest/efa/test_av.py
@@ -1,7 +1,7 @@
 import pytest
 
 @pytest.mark.functional
-def test_av_xfer(cmdline_args):
+def test_av_xfer(cmdline_args, fabric):
     from common import ClientServerTest
-    test = ClientServerTest(cmdline_args, "fi_av_xfer -e rdm")
+    test = ClientServerTest(cmdline_args, "fi_av_xfer -e rdm", fabric=fabric)
     test.run()
diff --git a/fabtests/pytest/efa/test_cq.py b/fabtests/pytest/efa/test_cq.py
@@ -4,14 +4,14 @@
 # of cq that efa device can support
 @pytest.mark.serial
 @pytest.mark.unit
-def test_cq(cmdline_args):
+def test_cq(cmdline_args, fabric):
     from common import UnitTest
-    test = UnitTest(cmdline_args, "fi_cq_test")
+    test = UnitTest(cmdline_args, f"fi_cq_test -f {fabric}")
     test.run()
 
 @pytest.mark.functional
 @pytest.mark.parametrize("operation_type", ["senddata", "writedata"])
-def test_cq_data(cmdline_args, operation_type):
+def test_cq_data(cmdline_args, operation_type, fabric):
     from common import ClientServerTest
-    test = ClientServerTest(cmdline_args, "fi_cq_data -e rdm -o " + operation_type)
+    test = ClientServerTest(cmdline_args, f"fi_cq_data -e rdm -o" + operation_type, fabric=fabric)
     test.run()
diff --git a/fabtests/pytest/efa/test_efa_device_selection.py b/fabtests/pytest/efa/test_efa_device_selection.py
@@ -6,7 +6,7 @@
 # This test must be run in serial mode because it checks the hw counter
 @pytest.mark.serial
 @pytest.mark.functional
-def test_efa_device_selection(cmdline_args):
+def test_efa_device_selection(cmdline_args, fabric):
 
     if cmdline_args.server_id == cmdline_args.client_id:
         pytest.skip("EFA device selection test requires 2 nodes")
@@ -46,7 +46,7 @@ def test_efa_device_selection(cmdline_args):
             cmdline_args_copy.additional_client_arguments = "-d " + client_domain_name
             cmdline_args_copy.strict_fabtests_mode = strict_fabtests_mode
 
-            test = ClientServerTest(cmdline_args_copy, command, message_size="1000", prefix_type=prefix_type, timeout=300)
+            test = ClientServerTest(cmdline_args_copy, command, message_size="1000", prefix_type=prefix_type, timeout=300, fabric=fabric)
             test.run()
 
             server_tx_bytes_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "tx_bytes", server_device_name)

diff --git a/fabtests/pytest/efa/test_efa_info.py b/fabtests/pytest/efa/test_efa_info.py
@@ -2,15 +2,16 @@
 from common import UnitTest
 from efa_common import efa_retrieve_gid
 
+# TODO - extend and generalize to efa-direct
 @pytest.mark.unit
 def test_efa_info(cmdline_args):
     test = UnitTest(cmdline_args, "fi_efa_info_test")
     test.run()
 
 @pytest.mark.unit
-def test_comm_getinfo(cmdline_args):
+def test_comm_getinfo(cmdline_args, fabric):
     gid = efa_retrieve_gid(cmdline_args.server_id)
 
     # use GID as source address and dest address
-    test = UnitTest(cmdline_args, f"fi_getinfo_test -s {gid} {gid}")
+    test = UnitTest(cmdline_args, f"fi_getinfo_test -s {gid} {gid} -f {fabric} ")
     test.run()
diff --git a/fabtests/pytest/efa/test_efa_protocol_selection.py b/fabtests/pytest/efa/test_efa_protocol_selection.py
@@ -8,7 +8,7 @@
 @pytest.mark.functional
 @pytest.mark.cuda_memory
 @pytest.mark.parametrize("fabtest_name,cntrl_env_var", [("fi_rdm_tagged_bw", "FI_EFA_INTER_MIN_READ_MESSAGE_SIZE"), ("fi_rma_bw", "FI_EFA_INTER_MIN_READ_WRITE_SIZE")])
-def test_transfer_with_read_protocol_cuda(cmdline_args, fabtest_name, cntrl_env_var):
+def test_transfer_with_read_protocol_cuda(cmdline_args, fabtest_name, cntrl_env_var, fabric):
     """
     Verify that the read protocol is used for a 1024 byte message when the env variable
     switches are set to force the read protocol at 1000 bytes.
@@ -50,7 +50,8 @@ def test_transfer_with_read_protocol_cuda(cmdline_args, fabtest_name, cntrl_env_
                                completion_semantic="transmit_complete",
                                memory_type="cuda_to_cuda",
                                message_size=message_size,
-                               warmup_iteration_type="0")
+                               warmup_iteration_type="0",
+                               fabric=fabric)
 
     server_read_wrs_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "rdma_read_wrs")
     server_read_bytes_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "rdma_read_bytes")

diff --git a/fabtests/pytest/efa/test_efa_shm_addr.py b/fabtests/pytest/efa/test_efa_shm_addr.py
@@ -3,7 +3,7 @@
 
 
 @pytest.mark.multinode
-def test_efa_shm_addr(cmdline_args):
+def test_efa_shm_addr(cmdline_args, fabric):
     server_id = cmdline_args.server_id
     client_id = cmdline_args.client_id
     if client_id == server_id:
@@ -13,7 +13,7 @@ def test_efa_shm_addr(cmdline_args):
     # inserted for the 2nd client could be different
     # from its efa fi_addr.
     client_hostname_list = [client_id, server_id]
-    client_base_command = "fi_rdm"
+    client_base_command = f"fi_rdm -f {fabric}"
     server_base_command = client_base_command + " -C {}".format(len(client_hostname_list))
     test = MultinodeTest(cmdline_args, server_base_command, client_base_command,
                          client_hostname_list, run_client_asynchronously=False)

diff --git a/fabtests/pytest/efa/test_flood_peer.py b/fabtests/pytest/efa/test_flood_peer.py
@@ -1,8 +1,8 @@
 import pytest
 
 @pytest.mark.functional
-def test_flood_peer(cmdline_args):
+def test_flood_peer(cmdline_args, fabric):
     from common import ClientServerTest
-    test = ClientServerTest(cmdline_args, "fi_flood -e rdm -W 6400 -S 512 -T 5",
-                            timeout=300)
+    test = ClientServerTest(cmdline_args, f"fi_flood -e rdm -W 6400 -S 512 -T 5",
+                            timeout=300, fabric=fabric)
     test.run()
diff --git a/fabtests/pytest/efa/test_fork_support.py b/fabtests/pytest/efa/test_fork_support.py
@@ -3,14 +3,14 @@
 
 @pytest.mark.functional
 @pytest.mark.parametrize("environment_variable", ["FI_EFA_FORK_SAFE", "RDMAV_FORK_SAFE"])
-def test_fork_support(cmdline_args, completion_semantic, environment_variable):
+def test_fork_support(cmdline_args, completion_semantic, environment_variable, fabric):
     from common import ClientServerTest
     import copy
     cmdline_args_copy = copy.copy(cmdline_args)
 
     cmdline_args_copy.append_environ("{}=1".format(environment_variable))
     test = ClientServerTest(cmdline_args_copy, "fi_rdm_tagged_bw -K",
                             completion_semantic=completion_semantic,
-                            datacheck_type="with_datacheck")
+                            datacheck_type="with_datacheck", fabric=fabric)
     test.run()
 
diff --git a/fabtests/pytest/efa/test_mr.py b/fabtests/pytest/efa/test_mr.py
@@ -18,15 +18,15 @@ def test_mr_host(cmdline_args):
         pytest.param("neuron", marks=pytest.mark.neuron_memory),
     ],
 )
-def test_mr_hmem(cmdline_args, hmem_type):
+def test_mr_hmem(cmdline_args, hmem_type, fabric):
     if hmem_type == "cuda" and not has_cuda(cmdline_args.server_id):
         pytest.skip("no cuda device")
     if hmem_type == "neuron" and not has_neuron(cmdline_args.server_id):
         pytest.skip("no neuron device")
 
     cmdline_args_copy = copy.copy(cmdline_args)
 
-    test_command = f"fi_mr_test -D {hmem_type}"
+    test_command = f"fi_mr_test -D {hmem_type} -f {fabric}"
 
     if cmdline_args.do_dmabuf_reg_for_hmem:
         test_command += " -R"

diff --git a/fabtests/pytest/efa/test_multi_ep.py b/fabtests/pytest/efa/test_multi_ep.py
@@ -2,10 +2,10 @@
 
 @pytest.mark.functional
 @pytest.mark.parametrize("shared_cq", [True, False])
-def test_multi_ep(cmdline_args, shared_cq):
+def test_multi_ep(cmdline_args, shared_cq, fabric):
     from common import ClientServerTest
     cmd = "fi_multi_ep -e rdm"
     if shared_cq:
         cmd += "  -Q"
-    test = ClientServerTest(cmdline_args, cmd)
+    test = ClientServerTest(cmdline_args, cmd, fabric=fabric)
     test.run()
diff --git a/fabtests/pytest/efa/test_multi_recv.py b/fabtests/pytest/efa/test_multi_recv.py
@@ -4,10 +4,11 @@
                          [pytest.param("short", marks=pytest.mark.short),
                           pytest.param("standard", marks=pytest.mark.standard)])
 @pytest.mark.parametrize("message_size", ["1024", "8192"])
-def test_multi_recv(cmdline_args, iteration_type, message_size):
+def test_multi_recv(cmdline_args, iteration_type, message_size, fabric):
     from common import ClientServerTest
     test = ClientServerTest(cmdline_args,
             "fi_multi_recv -e rdm",
             iteration_type,
-            message_size=message_size)
+            message_size=message_size,
+            fabric=fabric)
     test.run()