Skip to content

Commit

Permalink
fabtests: Run efa tests with efa fabric name
Browse files Browse the repository at this point in the history
Current fabtests are designed to test the EFA RDM path which will
continue to have fabric name efa. The efa-direct path will have the
fabric name efa-direct and will not be selected for the current tests

Signed-off-by: Sai Sunku <[email protected]>
  • Loading branch information
sunkuamzn authored and shijin-aws committed Feb 4, 2025
1 parent 1865e07 commit 582d7f2
Show file tree
Hide file tree
Showing 20 changed files with 121 additions and 88 deletions.
5 changes: 5 additions & 0 deletions fabtests/pytest/efa/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ def zcpy_recv_message_size(request):
def zcpy_recv_max_msg_size(request):
return 8192

# TODO - add efa-direct tests
@pytest.fixture(scope="module", params=["efa"])
def fabric(request):
return request.param

@pytest.hookimpl(hookwrapper=True)
def pytest_collection_modifyitems(session, config, items):
# Called after collection has been performed, may filter or re-order the items in-place
Expand Down
4 changes: 2 additions & 2 deletions fabtests/pytest/efa/efa_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
def efa_run_client_server_test(cmdline_args, executable, iteration_type,
completion_semantic, memory_type, message_size,
warmup_iteration_type=None, timeout=None,
completion_type="queue"):
completion_type="queue", fabric=None):
if timeout is None:
timeout = cmdline_args.timeout

Expand All @@ -23,7 +23,7 @@ def efa_run_client_server_test(cmdline_args, executable, iteration_type,
memory_type=memory_type,
timeout=timeout,
warmup_iteration_type=warmup_iteration_type,
completion_type=completion_type)
completion_type=completion_type, fabric=fabric)
test.run()

@retry(retry_on_exception=is_ssh_connection_error, stop_max_attempt_number=3, wait_fixed=5000)
Expand Down
4 changes: 2 additions & 2 deletions fabtests/pytest/efa/test_av.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

@pytest.mark.functional
def test_av_xfer(cmdline_args):
def test_av_xfer(cmdline_args, fabric):
from common import ClientServerTest
test = ClientServerTest(cmdline_args, "fi_av_xfer -e rdm")
test = ClientServerTest(cmdline_args, "fi_av_xfer -e rdm", fabric=fabric)
test.run()
8 changes: 4 additions & 4 deletions fabtests/pytest/efa/test_cq.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
# of cq that efa device can support
@pytest.mark.serial
@pytest.mark.unit
def test_cq(cmdline_args):
def test_cq(cmdline_args, fabric):
from common import UnitTest
test = UnitTest(cmdline_args, "fi_cq_test")
test = UnitTest(cmdline_args, f"fi_cq_test -f {fabric}")
test.run()

@pytest.mark.functional
@pytest.mark.parametrize("operation_type", ["senddata", "writedata"])
def test_cq_data(cmdline_args, operation_type):
def test_cq_data(cmdline_args, operation_type, fabric):
from common import ClientServerTest
test = ClientServerTest(cmdline_args, "fi_cq_data -e rdm -o " + operation_type)
test = ClientServerTest(cmdline_args, f"fi_cq_data -e rdm -o" + operation_type, fabric=fabric)
test.run()
4 changes: 2 additions & 2 deletions fabtests/pytest/efa/test_efa_device_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# This test must be run in serial mode because it checks the hw counter
@pytest.mark.serial
@pytest.mark.functional
def test_efa_device_selection(cmdline_args):
def test_efa_device_selection(cmdline_args, fabric):

if cmdline_args.server_id == cmdline_args.client_id:
pytest.skip("EFA device selection test requires 2 nodes")
Expand Down Expand Up @@ -46,7 +46,7 @@ def test_efa_device_selection(cmdline_args):
cmdline_args_copy.additional_client_arguments = "-d " + client_domain_name
cmdline_args_copy.strict_fabtests_mode = strict_fabtests_mode

test = ClientServerTest(cmdline_args_copy, command, message_size="1000", prefix_type=prefix_type, timeout=300)
test = ClientServerTest(cmdline_args_copy, command, message_size="1000", prefix_type=prefix_type, timeout=300, fabric=fabric)
test.run()

server_tx_bytes_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "tx_bytes", server_device_name)
Expand Down
5 changes: 3 additions & 2 deletions fabtests/pytest/efa/test_efa_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
from common import UnitTest
from efa_common import efa_retrieve_gid

# TODO - extend and generalize to efa-direct
@pytest.mark.unit
def test_efa_info(cmdline_args):
test = UnitTest(cmdline_args, "fi_efa_info_test")
test.run()

@pytest.mark.unit
def test_comm_getinfo(cmdline_args):
def test_comm_getinfo(cmdline_args, fabric):
gid = efa_retrieve_gid(cmdline_args.server_id)

# use GID as source address and dest address
test = UnitTest(cmdline_args, f"fi_getinfo_test -s {gid} {gid}")
test = UnitTest(cmdline_args, f"fi_getinfo_test -s {gid} {gid} -f {fabric} ")
test.run()
5 changes: 3 additions & 2 deletions fabtests/pytest/efa/test_efa_protocol_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
@pytest.mark.functional
@pytest.mark.cuda_memory
@pytest.mark.parametrize("fabtest_name,cntrl_env_var", [("fi_rdm_tagged_bw", "FI_EFA_INTER_MIN_READ_MESSAGE_SIZE"), ("fi_rma_bw", "FI_EFA_INTER_MIN_READ_WRITE_SIZE")])
def test_transfer_with_read_protocol_cuda(cmdline_args, fabtest_name, cntrl_env_var):
def test_transfer_with_read_protocol_cuda(cmdline_args, fabtest_name, cntrl_env_var, fabric):
"""
Verify that the read protocol is used for a 1024 byte message when the env variable
switches are set to force the read protocol at 1000 bytes.
Expand Down Expand Up @@ -50,7 +50,8 @@ def test_transfer_with_read_protocol_cuda(cmdline_args, fabtest_name, cntrl_env_
completion_semantic="transmit_complete",
memory_type="cuda_to_cuda",
message_size=message_size,
warmup_iteration_type="0")
warmup_iteration_type="0",
fabric=fabric)

server_read_wrs_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "rdma_read_wrs")
server_read_bytes_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "rdma_read_bytes")
Expand Down
4 changes: 2 additions & 2 deletions fabtests/pytest/efa/test_efa_shm_addr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


@pytest.mark.multinode
def test_efa_shm_addr(cmdline_args):
def test_efa_shm_addr(cmdline_args, fabric):
server_id = cmdline_args.server_id
client_id = cmdline_args.client_id
if client_id == server_id:
Expand All @@ -13,7 +13,7 @@ def test_efa_shm_addr(cmdline_args):
# inserted for the 2nd client could be different
# from its efa fi_addr.
client_hostname_list = [client_id, server_id]
client_base_command = "fi_rdm"
client_base_command = f"fi_rdm -f {fabric}"
server_base_command = client_base_command + " -C {}".format(len(client_hostname_list))
test = MultinodeTest(cmdline_args, server_base_command, client_base_command,
client_hostname_list, run_client_asynchronously=False)
Expand Down
6 changes: 3 additions & 3 deletions fabtests/pytest/efa/test_flood_peer.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import pytest

@pytest.mark.functional
def test_flood_peer(cmdline_args):
def test_flood_peer(cmdline_args, fabric):
from common import ClientServerTest
test = ClientServerTest(cmdline_args, "fi_flood -e rdm -W 6400 -S 512 -T 5",
timeout=300)
test = ClientServerTest(cmdline_args, f"fi_flood -e rdm -W 6400 -S 512 -T 5",
timeout=300, fabric=fabric)
test.run()
4 changes: 2 additions & 2 deletions fabtests/pytest/efa/test_fork_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@

@pytest.mark.functional
@pytest.mark.parametrize("environment_variable", ["FI_EFA_FORK_SAFE", "RDMAV_FORK_SAFE"])
def test_fork_support(cmdline_args, completion_semantic, environment_variable):
def test_fork_support(cmdline_args, completion_semantic, environment_variable, fabric):
from common import ClientServerTest
import copy
cmdline_args_copy = copy.copy(cmdline_args)

cmdline_args_copy.append_environ("{}=1".format(environment_variable))
test = ClientServerTest(cmdline_args_copy, "fi_rdm_tagged_bw -K",
completion_semantic=completion_semantic,
datacheck_type="with_datacheck")
datacheck_type="with_datacheck", fabric=fabric)
test.run()

4 changes: 2 additions & 2 deletions fabtests/pytest/efa/test_mr.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ def test_mr_host(cmdline_args):
pytest.param("neuron", marks=pytest.mark.neuron_memory),
],
)
def test_mr_hmem(cmdline_args, hmem_type):
def test_mr_hmem(cmdline_args, hmem_type, fabric):
if hmem_type == "cuda" and not has_cuda(cmdline_args.server_id):
pytest.skip("no cuda device")
if hmem_type == "neuron" and not has_neuron(cmdline_args.server_id):
pytest.skip("no neuron device")

cmdline_args_copy = copy.copy(cmdline_args)

test_command = f"fi_mr_test -D {hmem_type}"
test_command = f"fi_mr_test -D {hmem_type} -f {fabric}"

if cmdline_args.do_dmabuf_reg_for_hmem:
test_command += " -R"
Expand Down
4 changes: 2 additions & 2 deletions fabtests/pytest/efa/test_multi_ep.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

@pytest.mark.functional
@pytest.mark.parametrize("shared_cq", [True, False])
def test_multi_ep(cmdline_args, shared_cq):
def test_multi_ep(cmdline_args, shared_cq, fabric):
from common import ClientServerTest
cmd = "fi_multi_ep -e rdm"
if shared_cq:
cmd += " -Q"
test = ClientServerTest(cmdline_args, cmd)
test = ClientServerTest(cmdline_args, cmd, fabric=fabric)
test.run()
5 changes: 3 additions & 2 deletions fabtests/pytest/efa/test_multi_recv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
[pytest.param("short", marks=pytest.mark.short),
pytest.param("standard", marks=pytest.mark.standard)])
@pytest.mark.parametrize("message_size", ["1024", "8192"])
def test_multi_recv(cmdline_args, iteration_type, message_size):
def test_multi_recv(cmdline_args, iteration_type, message_size, fabric):
from common import ClientServerTest
test = ClientServerTest(cmdline_args,
"fi_multi_recv -e rdm",
iteration_type,
message_size=message_size)
message_size=message_size,
fabric=fabric)
test.run()
Loading

0 comments on commit 582d7f2

Please sign in to comment.