Skip to content

Commit

Permalink
tests: update storage node failover test
Browse files Browse the repository at this point in the history
Signed-off-by: Evgeniy Zayats <[email protected]>
  • Loading branch information
Evgeniy Zayats committed Jun 15, 2024
1 parent b581cef commit ad9b733
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 107 deletions.
1 change: 1 addition & 0 deletions neofs-testlib/neofs_testlib/env/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,7 @@ def start(self, fresh=True):

@allure.step("Stop storage node")
def stop(self):
logger.info(f"Stopping Storage Node:{self}")
self.process.terminate()

@allure.step("Delete storage node data")
Expand Down
6 changes: 2 additions & 4 deletions pytest_tests/lib/helpers/complex_object_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,17 +275,15 @@ def get_nodes_with_object(

nodes_list = []
for node in nodes:
wallet = node.wallet.path
wallet_config = neofs_env.generate_cli_config(node.wallet)
try:
res = head_object(
wallet,
node.wallet.path,
cid,
oid,
shell=shell,
endpoint=node.endpoint,
is_direct=True,
wallet_config=wallet_config,
wallet_config=node.cli_config,
)
if res is not None:
logger.info(f"Found object {oid} on node {node}")
Expand Down
139 changes: 36 additions & 103 deletions pytest_tests/tests/failovers/test_failover_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,43 +10,33 @@
from helpers.wellknown_acl import PUBLIC_ACL
from neofs_env.neofs_env_test_base import NeofsEnvTestBase
from neofs_testlib.env.env import NeoFSEnv, StorageNode
from neofs_testlib.shell import CommandOptions

logger = logging.getLogger("NeoLogger")
stopped_nodes: list[StorageNode] = []


@pytest.fixture(scope="function", autouse=True)
@pytest.fixture
@allure.step("Return all stopped hosts")
def after_run_return_all_stopped_hosts(neofs_env: NeoFSEnv):
def after_run_return_all_stopped_storage_nodes(neofs_env: NeoFSEnv):
yield
return_stopped_hosts(neofs_env)
return_stopped_storage_nodes(neofs_env)


def panic_reboot_host(neofs_env: NeoFSEnv) -> None:
shell = neofs_env.shell
shell.exec('sudo sh -c "echo 1 > /proc/sys/kernel/sysrq"')

options = CommandOptions(close_stdin=True, timeout=1, check=False)
shell.exec('sudo sh -c "echo b > /proc/sysrq-trigger"', options)


def return_stopped_hosts(neofs_env: NeoFSEnv) -> None:
def return_stopped_storage_nodes(neofs_env: NeoFSEnv) -> None:
for node in list(stopped_nodes):
with allure.step(f"Start host {node}"):
node.host.start_host()
with allure.step(f"Start {node}"):
node.start(fresh=False)
stopped_nodes.remove(node)

wait_all_storage_nodes_returned(neofs_env)


@pytest.mark.failover
@pytest.mark.skip(reason="These tests require multiple hosts to run")
class TestFailoverStorage(NeofsEnvTestBase):
@allure.title("Lose and return storage node's host")
@pytest.mark.parametrize("hard_reboot", [True, False])
@pytest.mark.failover_reboot
def test_lose_storage_node_host(self, default_wallet, hard_reboot: bool, simple_object_size):
@allure.title("Lose and return storage node's process")
def test_storage_node_failover(
self, default_wallet, simple_object_size, after_run_return_all_stopped_storage_nodes
):
wallet = default_wallet
placement_rule = "REP 2 IN X CBF 2 SELECT 2 FROM * AS X"
source_file_path = generate_file(simple_object_size)
Expand All @@ -58,93 +48,36 @@ def test_lose_storage_node_host(self, default_wallet, hard_reboot: bool, simple_
basic_acl=PUBLIC_ACL,
)
oid = put_object_to_random_node(wallet.path, source_file_path, cid, shell=self.shell, neofs_env=self.neofs_env)
nodes = wait_object_replication(cid, oid, 2, shell=self.shell, nodes=self.neofs_env.storage_nodes)

for node in nodes:
stopped_nodes.append(node)
nodes_with_object = wait_object_replication(
cid, oid, 2, shell=self.shell, nodes=self.neofs_env.storage_nodes, neofs_env=self.neofs_env
)

with allure.step(f"Stop host {node}"):
node.host.stop_host("hard" if hard_reboot else "soft")
for node_to_stop in nodes_with_object:
node_to_stop.stop()
stopped_nodes.append(node_to_stop)

new_nodes = wait_object_replication(
object_nodes_after_stop = wait_object_replication(
cid,
oid,
2,
shell=self.shell,
nodes=list(set(self.neofs_env.storage_nodes) - {node}),
nodes=[sn for sn in self.neofs_env.storage_nodes if sn != node_to_stop],
neofs_env=self.neofs_env,
)
assert all(old_node not in new_nodes for old_node in nodes)

with allure.step("Check object data is not corrupted"):
got_file_path = get_object(wallet, cid, oid, endpoint=new_nodes[0].get_rpc_endpoint(), shell=self.shell)
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)

with allure.step("Return all hosts"):
return_stopped_hosts(self.neofs_env)

with allure.step("Check object data is not corrupted"):
new_nodes = wait_object_replication(cid, oid, 2, shell=self.shell, nodes=self.neofs_env.storage_nodes)
got_file_path = get_object(wallet.path, cid, oid, shell=self.shell, endpoint=new_nodes[0].endpoint)
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)

@allure.title("Panic storage node's host")
@pytest.mark.parametrize("sequence", [True, False])
@pytest.mark.failover_panic
def test_panic_storage_node_host(self, default_wallet, sequence: bool, simple_object_size):
wallet = default_wallet
placement_rule = "REP 2 IN X CBF 2 SELECT 2 FROM * AS X"
source_file_path = generate_file(simple_object_size)
cid = create_container(
wallet.path,
shell=self.shell,
endpoint=self.neofs_env.sn_rpc,
rule=placement_rule,
basic_acl=PUBLIC_ACL,
)
oid = put_object_to_random_node(wallet.path, source_file_path, cid, shell=self.shell, neofs_env=self.neofs_env)

nodes = wait_object_replication(cid, oid, 2, shell=self.shell, nodes=self.neofs_env.storage_nodes)
allure.attach(
"\n".join([str(node) for node in nodes]),
"Current nodes with object",
allure.attachment_type.TEXT,
)

new_nodes: list[StorageNode] = []
for node in nodes:
with allure.step(f"Hard reboot host {node} via magic SysRq option"):
panic_reboot_host(node.host)
if sequence:
try:
new_nodes = wait_object_replication(
cid,
oid,
2,
shell=self.shell,
nodes=list(set(self.neofs_env.storage_nodes) - {node}),
)
except AssertionError:
new_nodes = wait_object_replication(
cid,
oid,
2,
shell=self.shell,
nodes=self.neofs_env.storage_nodes,
)

allure.attach(
"\n".join([str(new_node) for new_node in new_nodes]),
f"Nodes with object after {node} fail",
allure.attachment_type.TEXT,
)

if not sequence:
new_nodes = wait_object_replication(cid, oid, 2, shell=self.shell, nodes=self.neofs_env.storage_nodes)
allure.attach(
"\n".join([str(new_node) for new_node in new_nodes]),
"Nodes with object after nodes fail",
allure.attachment_type.TEXT,
)

got_file_path = get_object(wallet, cid, oid, shell=self.shell, endpoint=new_nodes[0].endpoint)
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)
assert node_to_stop not in object_nodes_after_stop

with allure.step("Check object data is not corrupted"):
got_file_path = get_object(
wallet.path, cid, oid, shell=self.shell, endpoint=object_nodes_after_stop[0].endpoint
)
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)

with allure.step("Return stopped storage nodes"):
return_stopped_storage_nodes(self.neofs_env)

with allure.step("Check object data is not corrupted"):
new_nodes = wait_object_replication(
cid, oid, 2, shell=self.shell, nodes=self.neofs_env.storage_nodes, neofs_env=self.neofs_env
)
got_file_path = get_object(wallet.path, cid, oid, shell=self.shell, endpoint=new_nodes[0].endpoint)
assert get_file_hash(source_file_path) == get_file_hash(got_file_path)

0 comments on commit ad9b733

Please sign in to comment.