diff --git a/pytest_tests/helpers/cluster.py b/pytest_tests/helpers/cluster.py index e972c1177..322f7f439 100644 --- a/pytest_tests/helpers/cluster.py +++ b/pytest_tests/helpers/cluster.py @@ -52,6 +52,9 @@ def start_service(self): def stop_service(self): self.host.stop_service(self.name) + def restart_service(self): + self.host.restart_service(self.name) + def get_wallet_password(self) -> str: return self._get_attribute(_ConfigAttributes.WALLET_PASSWORD) diff --git a/pytest_tests/testsuites/failovers/test_failover_part.py b/pytest_tests/testsuites/failovers/test_failover_part.py new file mode 100644 index 000000000..18b26bef6 --- /dev/null +++ b/pytest_tests/testsuites/failovers/test_failover_part.py @@ -0,0 +1,92 @@ +import logging + +import allure +import pytest +from neofs_testlib.shell import Shell + +from cluster import Cluster +from cluster_test_base import ClusterTestBase +from container import create_container +from failover_utils import wait_all_storage_nodes_returned, enable_metabase_resync_on_start +from grpc_responses import OBJECT_NOT_FOUND +from helpers.container import StorageContainer, StorageContainerInfo +from neofs_verbs import get_object +from node_management import delete_node_metadata, start_storage_nodes, stop_storage_nodes +from test_control import expect_not_raises +from wallet import WalletFile, WalletFactory + +logger = logging.getLogger("NeoLogger") + + +@pytest.fixture( + scope="module", +) +def user_wallet(wallet_factory: WalletFactory): + with allure.step("Create user wallet with container"): + wallet_file = wallet_factory.create_wallet() + return wallet_file + + +@pytest.fixture( + scope="function", +) +def user_container(user_wallet: WalletFile, client_shell: Shell, cluster: Cluster): + container_id = create_container( + user_wallet.path, shell=client_shell, endpoint=cluster.default_rpc_endpoint + ) + return StorageContainer(StorageContainerInfo(container_id, user_wallet), client_shell, cluster) + + +@pytest.mark.failover_part +class TestFailoverNodePart(ClusterTestBase): + @allure.title("Enable resync metabase, delete metadata and get object") + @pytest.mark.delete_metadata + def test_enable_resync_metabase_delete_metadata(self, enable_metabase_resync_on_start, + user_container: StorageContainer, + simple_object_size: int): + storage_object = user_container.generate_object(simple_object_size) + + with allure.step("Delete metabase files from storage nodes"): + for node in self.cluster.storage_nodes: + delete_node_metadata(node) + + with allure.step("Start nodes after metabase deletion"): + start_storage_nodes(self.cluster.storage_nodes) + wait_all_storage_nodes_returned(self.cluster) + + with allure.step("Try to fetch object from each storage node"): + for node in self.cluster.storage_nodes: + with expect_not_raises(): + get_object( + storage_object.wallet_file_path, + storage_object.cid, + storage_object.oid, + self.shell, + endpoint=node.get_rpc_endpoint(), + wallet_config=user_container.get_wallet_config_path(), + ) + + @allure.title("Delete metadata without resync metabase enabling, delete metadata try to get object") + @pytest.mark.delete_metadata + def test_delete_metadata(self, user_container: StorageContainer, simple_object_size: int): + storage_object = user_container.generate_object(simple_object_size) + + with allure.step("Delete metabase files from storage nodes"): + for node in self.cluster.storage_nodes: + delete_node_metadata(node) + + with allure.step("Start nodes after metabase deletion"): + start_storage_nodes(self.cluster.storage_nodes) + wait_all_storage_nodes_returned(self.cluster) + + with allure.step("Try to fetch object from each storage node"): + for node in self.cluster.storage_nodes: + with pytest.raises(Exception, match=OBJECT_NOT_FOUND): + get_object( + storage_object.wallet_file_path, + storage_object.cid, + storage_object.oid, + self.shell, + endpoint=node.get_rpc_endpoint(), + wallet_config=user_container.get_wallet_config_path(), + ) diff --git a/pytest_tests/testsuites/object/test_object_lock.py b/pytest_tests/testsuites/object/test_object_lock.py index 2f442b87b..e38b0f288 100755 --- a/pytest_tests/testsuites/object/test_object_lock.py +++ b/pytest_tests/testsuites/object/test_object_lock.py @@ -8,6 +8,7 @@ from common import STORAGE_GC_TIME from complex_object_actions import get_link_object, get_storage_object_chunks from epoch import ensure_fresh_epoch, get_epoch, tick_epoch, tick_epoch_and_wait +from failover_utils import wait_all_storage_nodes_returned, enable_metabase_resync_on_start, docker_compose_restart_storage_nodes from grpc_responses import ( LIFETIME_REQUIRED, LOCK_NON_REGULAR_OBJECT, @@ -18,7 +19,7 @@ OBJECT_NOT_FOUND, ) from neofs_testlib.shell import Shell -from node_management import drop_object +from node_management import drop_object, delete_node_metadata, stop_storage_nodes, start_storage_nodes from pytest import FixtureRequest from python_keywords.container import create_container from python_keywords.neofs_verbs import delete_object, head_object, lock_object @@ -684,3 +685,76 @@ def test_link_object_of_complex_object_should_also_be_protected_from_deletion( self.shell, self.cluster.default_rpc_endpoint, ) + + @pytest.mark.delete_metadata + @allure.title("The locked object must be protected from deletion after metabase deletion " + "(metabase resynchronization must be enabled), and after restarting storage nodes") + @pytest.mark.parametrize( + "new_locked_storage_object", + [pytest.lazy_fixture("simple_object_size"), pytest.lazy_fixture("complex_object_size")], + ids=["simple object", "complex object"], + indirect=True, + ) + def test_the_object_lock_should_be_kept_after_metabase_deletion( + self, + new_locked_storage_object: StorageObjectInfo, + enable_metabase_resync_on_start, + ): + """ + Lock objects should fill metabase on resync_metabase + """ + with allure.step("Log nodes with object"): + nodes_with_object_before_first_try = get_nodes_with_object( + new_locked_storage_object.cid, + new_locked_storage_object.oid, + shell=self.shell, + nodes=self.cluster.storage_nodes, + ) + + with allure.step(f"Try to delete object {new_locked_storage_object.oid} before metabase deletion"): + with pytest.raises(Exception, match=OBJECT_IS_LOCKED): + delete_object( + new_locked_storage_object.wallet_file_path, + new_locked_storage_object.cid, + new_locked_storage_object.oid, + self.shell, + self.cluster.default_rpc_endpoint, + ) + + with allure.step("Log nodes with object"): + nodes_with_object_after_first_try = get_nodes_with_object( + new_locked_storage_object.cid, + new_locked_storage_object.oid, + shell=self.shell, + nodes=self.cluster.storage_nodes, + ) + + assert nodes_with_object_before_first_try == nodes_with_object_after_first_try + + with allure.step("Delete metabase files from storage nodes"): + for node in self.cluster.storage_nodes: + delete_node_metadata(node) + + with allure.step("Start nodes after metabase deletion"): + start_storage_nodes(self.cluster.storage_nodes) + wait_all_storage_nodes_returned(self.cluster) + + with allure.step("Log nodes with object"): + nodes_with_object_after_metabase_deletion = get_nodes_with_object( + new_locked_storage_object.cid, + new_locked_storage_object.oid, + shell=self.shell, + nodes=self.cluster.storage_nodes, + ) + + assert nodes_with_object_before_first_try == nodes_with_object_after_metabase_deletion + + with allure.step(f"Try to delete object {new_locked_storage_object.oid} after metabase deletion"): + with pytest.raises(Exception, match=OBJECT_IS_LOCKED): + delete_object( + new_locked_storage_object.wallet_file_path, + new_locked_storage_object.cid, + new_locked_storage_object.oid, + self.shell, + self.cluster.default_rpc_endpoint, + ) diff --git a/robot/resources/lib/python_keywords/failover_utils.py b/robot/resources/lib/python_keywords/failover_utils.py index a0ace5838..0dcf3493d 100644 --- a/robot/resources/lib/python_keywords/failover_utils.py +++ b/robot/resources/lib/python_keywords/failover_utils.py @@ -1,15 +1,22 @@ import logging +import os +import subprocess from time import sleep import allure from typing import List, Tuple, Optional from urllib.parse import urlparse + +import pytest + +from utility import parse_time from cluster import Cluster, StorageNode from neofs_testlib.shell import Shell from neofs_testlib.hosting import Hosting -from python_keywords.node_management import storage_node_healthcheck +from python_keywords.node_management import storage_node_healthcheck, stop_storage_nodes from storage_policy import get_nodes_with_object -from common import MORPH_CHAIN_SERVICE_NAME_REGEX, ENDPOINT_INTERNAL0 +from common import MORPH_CHAIN_SERVICE_NAME_REGEX, ENDPOINT_INTERNAL0, DOCKER_COMPOSE_ENV_FILE, \ + DOCKER_COMPOSE_STORAGE_CONFIG_FILE, METABASE_RESYNC_TIMEOUT logger = logging.getLogger("NeoLogger") @@ -71,3 +78,84 @@ def get_morph_chain_endpoints(hosting: Hosting) -> List[Tuple[str, str]]: port = str(parsed_url.port) endpoints.append((addr, port)) return endpoints + + +@allure.step("Docker compose restart storage nodes containers with new env file") +def docker_compose_restart_storage_nodes(cluster: Cluster): + stop_storage_nodes(cluster.storage_nodes) + # Not using docker-compose restart because the container needs to be started with new environment variables. + with allure.step("Docker-compose down"): + subprocess.run(["docker-compose", "-f", DOCKER_COMPOSE_STORAGE_CONFIG_FILE, "down"]) + with allure.step("Docker-compose up"): + subprocess.run(["docker-compose", "-f", DOCKER_COMPOSE_STORAGE_CONFIG_FILE, "up", "-d"]) + wait_all_storage_nodes_returned(cluster) + with allure.step("Log resync status"): + for node in cluster.storage_nodes: + envs = subprocess.run(["docker", "inspect", "-f", "'{{range $index, $value := .Config.Env}}{{$value}} " + "{{end}}'", node.name], capture_output=True) + env_stdout = envs.stdout.decode("utf-8") + logger.debug(f"ENV from {node.name}: {env_stdout}") + + +@pytest.fixture(scope="function") +@allure.title("Enable metabase resync on start") +def enable_metabase_resync_on_start(cluster: Cluster): + """ + If there were already any environment variables in the DOCKER_COMPOSE_ENV_FILE, they should be retained and + NEOFS_STORAGE_SHARD_0_RESYNC_METABASE and NEOFS_STORAGE_SHARD_1_RESYNC_METABASE should be added to the file. + + If NEOFS_STORAGE_SHARD_0_RESYNC_METABASE and NEOFS_STORAGE_SHARD_1_RESYNC_METABASE are explicitly specified + as false, they must be changed to true. + + If DOCKER_COMPOSE_ENV_FILE is empty, NEOFS_STORAGE_SHARD_0_RESYNC_METABASE and + NEOFS_STORAGE_SHARD_1_RESYNC_METABASE must be added to DOCKER_COMPOSE_ENV_FILE. + + Of course, after the test, the DOCKER_COMPOSE_ENV_FILE must return to its initial state. + """ + file_path = DOCKER_COMPOSE_ENV_FILE + if not os.path.exists(file_path): + pytest.fail(f'File {file_path} does not exist!') + + with open(file_path, 'r') as file: + lines = file.readlines() + logger.debug(f"Initial file content:\n{''.join(lines)}") + + replacements = { + 'NEOFS_STORAGE_SHARD_0_RESYNC_METABASE=false': 'NEOFS_STORAGE_SHARD_0_RESYNC_METABASE=true\n', + 'NEOFS_STORAGE_SHARD_1_RESYNC_METABASE=false': 'NEOFS_STORAGE_SHARD_1_RESYNC_METABASE=true\n' + } + + unprocessed_lines = set(replacements.values()) + + modified_lines = [] + + for line in lines: + for original, new in replacements.items(): + if original in line: + line = line.replace(original, new) + unprocessed_lines.discard(new) + modified_lines.append(line) + + modified_lines.extend(unprocessed_lines) + + modified_content = ''.join(modified_lines) + + with open(file_path, 'w') as file: + file.write(modified_content) + logger.debug(f"Modified file content:\n{modified_content}") + + with allure.step("Restart docker compose to apply the changes"): + docker_compose_restart_storage_nodes(cluster) + + yield + + with open(file_path, 'w') as file: + file.writelines(lines) + logger.debug(f"Restored file content:\n{''.join(lines)}") + + with allure.step("Restart docker compose to revert the changes"): + docker_compose_restart_storage_nodes(cluster) + + with allure.step(f"Waiting {METABASE_RESYNC_TIMEOUT} seconds for the metabase to synchronize"): + sleep(parse_time(METABASE_RESYNC_TIMEOUT)) + diff --git a/robot/resources/lib/python_keywords/node_management.py b/robot/resources/lib/python_keywords/node_management.py index 6b005b60a..49961af75 100644 --- a/robot/resources/lib/python_keywords/node_management.py +++ b/robot/resources/lib/python_keywords/node_management.py @@ -59,6 +59,28 @@ def start_storage_nodes(nodes: list[StorageNode]) -> None: node.start_service() +@allure.step("Stop storage nodes") +def stop_storage_nodes(nodes: list[StorageNode]) -> None: + """ + The function stops specified storage nodes. + Args: + nodes: the list of nodes to stop + """ + for node in nodes: + node.stop_service() + + +@allure.step("Restart storage nodes") +def restart_storage_nodes(nodes: list[StorageNode]) -> None: + """ + The function restarts specified storage nodes. + Args: + nodes: the list of nodes to restart + """ + for node in nodes: + node.restart_service() + + @allure.step("Get Locode from random storage node") def get_locode_from_random_node(cluster: Cluster) -> str: node = random.choice(cluster.storage_nodes) @@ -157,6 +179,17 @@ def delete_node_data(node: StorageNode) -> None: time.sleep(parse_time(MORPH_BLOCK_TIME)) +@allure.step("Delete metadata from host for node {node}") +def delete_node_metadata(node: StorageNode) -> None: + """ + The function deletes metadata from host for specified node. + Args: + node: node for which metadata should be deleted. + """ + node.stop_service() + node.host.delete_storage_node_data(node.name, cache_only=True) + + @allure.step("Exclude node {node_to_exclude} from network map") def exclude_node_from_network_map( node_to_exclude: StorageNode, diff --git a/robot/variables/common.py b/robot/variables/common.py index 8a69a2275..ac3166b6d 100644 --- a/robot/variables/common.py +++ b/robot/variables/common.py @@ -12,6 +12,7 @@ MAINNET_TIMEOUT = os.getenv("MAINNET_TIMEOUT", "1min") MORPH_BLOCK_TIME = os.getenv("MORPH_BLOCK_TIME", "1s") NEOFS_CONTRACT_CACHE_TIMEOUT = os.getenv("NEOFS_CONTRACT_CACHE_TIMEOUT", "30s") +METABASE_RESYNC_TIMEOUT = os.getenv("METABASE_RESYNC_TIMEOUT", "10s") # Time interval that allows a GC pass on storage node (this includes GC sleep interval # of 1min plus 15 seconds for GC pass itself) @@ -25,6 +26,10 @@ TEST_FILES_DIR = os.getenv("TEST_FILES_DIR", "TestFilesDir") TEST_OBJECTS_DIR = os.getenv("TEST_OBJECTS_DIR", "TestObjectsDir") DEVENV_PATH = os.getenv("DEVENV_PATH", os.path.join("..", "neofs-dev-env")) +DOCKER_COMPOSE_STORAGE_CONFIG_FILE = os.getenv("DOCKER_COMPOSE_STORAGE_CONFIG_FILE", os.path.join(DEVENV_PATH, "services", "storage", + "docker-compose.yml")) +DOCKER_COMPOSE_ENV_FILE = os.getenv("DOCKER_COMPOSE_ENV_FILE", os.path.join(DEVENV_PATH, "services", "storage", + ".int_test.env")) # Password of wallet owned by user on behalf of whom we are running tests WALLET_PASS = os.getenv("WALLET_PASS", "")