Skip to content

Commit

Permalink
Lock resync (#608)
Browse files Browse the repository at this point in the history
  • Loading branch information
roman-khimov authored Oct 2, 2023
2 parents 6817afc + d1a7097 commit e71fc06
Show file tree
Hide file tree
Showing 6 changed files with 298 additions and 3 deletions.
3 changes: 3 additions & 0 deletions pytest_tests/helpers/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ def start_service(self):
def stop_service(self):
self.host.stop_service(self.name)

def restart_service(self):
self.host.restart_service(self.name)

def get_wallet_password(self) -> str:
return self._get_attribute(_ConfigAttributes.WALLET_PASSWORD)

Expand Down
92 changes: 92 additions & 0 deletions pytest_tests/testsuites/failovers/test_failover_part.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import logging

import allure
import pytest
from neofs_testlib.shell import Shell

from cluster import Cluster
from cluster_test_base import ClusterTestBase
from container import create_container
from failover_utils import wait_all_storage_nodes_returned, enable_metabase_resync_on_start
from grpc_responses import OBJECT_NOT_FOUND
from helpers.container import StorageContainer, StorageContainerInfo
from neofs_verbs import get_object
from node_management import delete_node_metadata, start_storage_nodes, stop_storage_nodes
from test_control import expect_not_raises
from wallet import WalletFile, WalletFactory

logger = logging.getLogger("NeoLogger")


@pytest.fixture(
scope="module",
)
def user_wallet(wallet_factory: WalletFactory):
with allure.step("Create user wallet with container"):
wallet_file = wallet_factory.create_wallet()
return wallet_file


@pytest.fixture(
scope="function",
)
def user_container(user_wallet: WalletFile, client_shell: Shell, cluster: Cluster):
container_id = create_container(
user_wallet.path, shell=client_shell, endpoint=cluster.default_rpc_endpoint
)
return StorageContainer(StorageContainerInfo(container_id, user_wallet), client_shell, cluster)


@pytest.mark.failover_part
class TestFailoverNodePart(ClusterTestBase):
@allure.title("Enable resync metabase, delete metadata and get object")
@pytest.mark.delete_metadata
def test_enable_resync_metabase_delete_metadata(self, enable_metabase_resync_on_start,
user_container: StorageContainer,
simple_object_size: int):
storage_object = user_container.generate_object(simple_object_size)

with allure.step("Delete metabase files from storage nodes"):
for node in self.cluster.storage_nodes:
delete_node_metadata(node)

with allure.step("Start nodes after metabase deletion"):
start_storage_nodes(self.cluster.storage_nodes)
wait_all_storage_nodes_returned(self.cluster)

with allure.step("Try to fetch object from each storage node"):
for node in self.cluster.storage_nodes:
with expect_not_raises():
get_object(
storage_object.wallet_file_path,
storage_object.cid,
storage_object.oid,
self.shell,
endpoint=node.get_rpc_endpoint(),
wallet_config=user_container.get_wallet_config_path(),
)

@allure.title("Delete metadata without resync metabase enabling, delete metadata try to get object")
@pytest.mark.delete_metadata
def test_delete_metadata(self, user_container: StorageContainer, simple_object_size: int):
storage_object = user_container.generate_object(simple_object_size)

with allure.step("Delete metabase files from storage nodes"):
for node in self.cluster.storage_nodes:
delete_node_metadata(node)

with allure.step("Start nodes after metabase deletion"):
start_storage_nodes(self.cluster.storage_nodes)
wait_all_storage_nodes_returned(self.cluster)

with allure.step("Try to fetch object from each storage node"):
for node in self.cluster.storage_nodes:
with pytest.raises(Exception, match=OBJECT_NOT_FOUND):
get_object(
storage_object.wallet_file_path,
storage_object.cid,
storage_object.oid,
self.shell,
endpoint=node.get_rpc_endpoint(),
wallet_config=user_container.get_wallet_config_path(),
)
76 changes: 75 additions & 1 deletion pytest_tests/testsuites/object/test_object_lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from common import STORAGE_GC_TIME
from complex_object_actions import get_link_object, get_storage_object_chunks
from epoch import ensure_fresh_epoch, get_epoch, tick_epoch, tick_epoch_and_wait
from failover_utils import wait_all_storage_nodes_returned, enable_metabase_resync_on_start, docker_compose_restart_storage_nodes
from grpc_responses import (
LIFETIME_REQUIRED,
LOCK_NON_REGULAR_OBJECT,
Expand All @@ -18,7 +19,7 @@
OBJECT_NOT_FOUND,
)
from neofs_testlib.shell import Shell
from node_management import drop_object
from node_management import drop_object, delete_node_metadata, stop_storage_nodes, start_storage_nodes
from pytest import FixtureRequest
from python_keywords.container import create_container
from python_keywords.neofs_verbs import delete_object, head_object, lock_object
Expand Down Expand Up @@ -684,3 +685,76 @@ def test_link_object_of_complex_object_should_also_be_protected_from_deletion(
self.shell,
self.cluster.default_rpc_endpoint,
)

@pytest.mark.delete_metadata
@allure.title("The locked object must be protected from deletion after metabase deletion "
"(metabase resynchronization must be enabled), and after restarting storage nodes")
@pytest.mark.parametrize(
"new_locked_storage_object",
[pytest.lazy_fixture("simple_object_size"), pytest.lazy_fixture("complex_object_size")],
ids=["simple object", "complex object"],
indirect=True,
)
def test_the_object_lock_should_be_kept_after_metabase_deletion(
self,
new_locked_storage_object: StorageObjectInfo,
enable_metabase_resync_on_start,
):
"""
Lock objects should fill metabase on resync_metabase
"""
with allure.step("Log nodes with object"):
nodes_with_object_before_first_try = get_nodes_with_object(
new_locked_storage_object.cid,
new_locked_storage_object.oid,
shell=self.shell,
nodes=self.cluster.storage_nodes,
)

with allure.step(f"Try to delete object {new_locked_storage_object.oid} before metabase deletion"):
with pytest.raises(Exception, match=OBJECT_IS_LOCKED):
delete_object(
new_locked_storage_object.wallet_file_path,
new_locked_storage_object.cid,
new_locked_storage_object.oid,
self.shell,
self.cluster.default_rpc_endpoint,
)

with allure.step("Log nodes with object"):
nodes_with_object_after_first_try = get_nodes_with_object(
new_locked_storage_object.cid,
new_locked_storage_object.oid,
shell=self.shell,
nodes=self.cluster.storage_nodes,
)

assert nodes_with_object_before_first_try == nodes_with_object_after_first_try

with allure.step("Delete metabase files from storage nodes"):
for node in self.cluster.storage_nodes:
delete_node_metadata(node)

with allure.step("Start nodes after metabase deletion"):
start_storage_nodes(self.cluster.storage_nodes)
wait_all_storage_nodes_returned(self.cluster)

with allure.step("Log nodes with object"):
nodes_with_object_after_metabase_deletion = get_nodes_with_object(
new_locked_storage_object.cid,
new_locked_storage_object.oid,
shell=self.shell,
nodes=self.cluster.storage_nodes,
)

assert nodes_with_object_before_first_try == nodes_with_object_after_metabase_deletion

with allure.step(f"Try to delete object {new_locked_storage_object.oid} after metabase deletion"):
with pytest.raises(Exception, match=OBJECT_IS_LOCKED):
delete_object(
new_locked_storage_object.wallet_file_path,
new_locked_storage_object.cid,
new_locked_storage_object.oid,
self.shell,
self.cluster.default_rpc_endpoint,
)
92 changes: 90 additions & 2 deletions robot/resources/lib/python_keywords/failover_utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
import logging
import os
import subprocess
from time import sleep

import allure
from typing import List, Tuple, Optional
from urllib.parse import urlparse

import pytest

from utility import parse_time
from cluster import Cluster, StorageNode
from neofs_testlib.shell import Shell
from neofs_testlib.hosting import Hosting
from python_keywords.node_management import storage_node_healthcheck
from python_keywords.node_management import storage_node_healthcheck, stop_storage_nodes
from storage_policy import get_nodes_with_object
from common import MORPH_CHAIN_SERVICE_NAME_REGEX, ENDPOINT_INTERNAL0
from common import MORPH_CHAIN_SERVICE_NAME_REGEX, ENDPOINT_INTERNAL0, DOCKER_COMPOSE_ENV_FILE, \
DOCKER_COMPOSE_STORAGE_CONFIG_FILE, METABASE_RESYNC_TIMEOUT

logger = logging.getLogger("NeoLogger")

Expand Down Expand Up @@ -71,3 +78,84 @@ def get_morph_chain_endpoints(hosting: Hosting) -> List[Tuple[str, str]]:
port = str(parsed_url.port)
endpoints.append((addr, port))
return endpoints


@allure.step("Docker compose restart storage nodes containers with new env file")
def docker_compose_restart_storage_nodes(cluster: Cluster):
stop_storage_nodes(cluster.storage_nodes)
# Not using docker-compose restart because the container needs to be started with new environment variables.
with allure.step("Docker-compose down"):
subprocess.run(["docker-compose", "-f", DOCKER_COMPOSE_STORAGE_CONFIG_FILE, "down"])
with allure.step("Docker-compose up"):
subprocess.run(["docker-compose", "-f", DOCKER_COMPOSE_STORAGE_CONFIG_FILE, "up", "-d"])
wait_all_storage_nodes_returned(cluster)
with allure.step("Log resync status"):
for node in cluster.storage_nodes:
envs = subprocess.run(["docker", "inspect", "-f", "'{{range $index, $value := .Config.Env}}{{$value}} "
"{{end}}'", node.name], capture_output=True)
env_stdout = envs.stdout.decode("utf-8")
logger.debug(f"ENV from {node.name}: {env_stdout}")


@pytest.fixture(scope="function")
@allure.title("Enable metabase resync on start")
def enable_metabase_resync_on_start(cluster: Cluster):
"""
If there were already any environment variables in the DOCKER_COMPOSE_ENV_FILE, they should be retained and
NEOFS_STORAGE_SHARD_0_RESYNC_METABASE and NEOFS_STORAGE_SHARD_1_RESYNC_METABASE should be added to the file.
If NEOFS_STORAGE_SHARD_0_RESYNC_METABASE and NEOFS_STORAGE_SHARD_1_RESYNC_METABASE are explicitly specified
as false, they must be changed to true.
If DOCKER_COMPOSE_ENV_FILE is empty, NEOFS_STORAGE_SHARD_0_RESYNC_METABASE and
NEOFS_STORAGE_SHARD_1_RESYNC_METABASE must be added to DOCKER_COMPOSE_ENV_FILE.
Of course, after the test, the DOCKER_COMPOSE_ENV_FILE must return to its initial state.
"""
file_path = DOCKER_COMPOSE_ENV_FILE
if not os.path.exists(file_path):
pytest.fail(f'File {file_path} does not exist!')

with open(file_path, 'r') as file:
lines = file.readlines()
logger.debug(f"Initial file content:\n{''.join(lines)}")

replacements = {
'NEOFS_STORAGE_SHARD_0_RESYNC_METABASE=false': 'NEOFS_STORAGE_SHARD_0_RESYNC_METABASE=true\n',
'NEOFS_STORAGE_SHARD_1_RESYNC_METABASE=false': 'NEOFS_STORAGE_SHARD_1_RESYNC_METABASE=true\n'
}

unprocessed_lines = set(replacements.values())

modified_lines = []

for line in lines:
for original, new in replacements.items():
if original in line:
line = line.replace(original, new)
unprocessed_lines.discard(new)
modified_lines.append(line)

modified_lines.extend(unprocessed_lines)

modified_content = ''.join(modified_lines)

with open(file_path, 'w') as file:
file.write(modified_content)
logger.debug(f"Modified file content:\n{modified_content}")

with allure.step("Restart docker compose to apply the changes"):
docker_compose_restart_storage_nodes(cluster)

yield

with open(file_path, 'w') as file:
file.writelines(lines)
logger.debug(f"Restored file content:\n{''.join(lines)}")

with allure.step("Restart docker compose to revert the changes"):
docker_compose_restart_storage_nodes(cluster)

with allure.step(f"Waiting {METABASE_RESYNC_TIMEOUT} seconds for the metabase to synchronize"):
sleep(parse_time(METABASE_RESYNC_TIMEOUT))

33 changes: 33 additions & 0 deletions robot/resources/lib/python_keywords/node_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,28 @@ def start_storage_nodes(nodes: list[StorageNode]) -> None:
node.start_service()


@allure.step("Stop storage nodes")
def stop_storage_nodes(nodes: list[StorageNode]) -> None:
"""
The function stops specified storage nodes.
Args:
nodes: the list of nodes to stop
"""
for node in nodes:
node.stop_service()


@allure.step("Restart storage nodes")
def restart_storage_nodes(nodes: list[StorageNode]) -> None:
"""
The function restarts specified storage nodes.
Args:
nodes: the list of nodes to restart
"""
for node in nodes:
node.restart_service()


@allure.step("Get Locode from random storage node")
def get_locode_from_random_node(cluster: Cluster) -> str:
node = random.choice(cluster.storage_nodes)
Expand Down Expand Up @@ -157,6 +179,17 @@ def delete_node_data(node: StorageNode) -> None:
time.sleep(parse_time(MORPH_BLOCK_TIME))


@allure.step("Delete metadata from host for node {node}")
def delete_node_metadata(node: StorageNode) -> None:
"""
The function deletes metadata from host for specified node.
Args:
node: node for which metadata should be deleted.
"""
node.stop_service()
node.host.delete_storage_node_data(node.name, cache_only=True)


@allure.step("Exclude node {node_to_exclude} from network map")
def exclude_node_from_network_map(
node_to_exclude: StorageNode,
Expand Down
5 changes: 5 additions & 0 deletions robot/variables/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
MAINNET_TIMEOUT = os.getenv("MAINNET_TIMEOUT", "1min")
MORPH_BLOCK_TIME = os.getenv("MORPH_BLOCK_TIME", "1s")
NEOFS_CONTRACT_CACHE_TIMEOUT = os.getenv("NEOFS_CONTRACT_CACHE_TIMEOUT", "30s")
METABASE_RESYNC_TIMEOUT = os.getenv("METABASE_RESYNC_TIMEOUT", "10s")

# Time interval that allows a GC pass on storage node (this includes GC sleep interval
# of 1min plus 15 seconds for GC pass itself)
Expand All @@ -25,6 +26,10 @@
TEST_FILES_DIR = os.getenv("TEST_FILES_DIR", "TestFilesDir")
TEST_OBJECTS_DIR = os.getenv("TEST_OBJECTS_DIR", "TestObjectsDir")
DEVENV_PATH = os.getenv("DEVENV_PATH", os.path.join("..", "neofs-dev-env"))
DOCKER_COMPOSE_STORAGE_CONFIG_FILE = os.getenv("DOCKER_COMPOSE_STORAGE_CONFIG_FILE", os.path.join(DEVENV_PATH, "services", "storage",
"docker-compose.yml"))
DOCKER_COMPOSE_ENV_FILE = os.getenv("DOCKER_COMPOSE_ENV_FILE", os.path.join(DEVENV_PATH, "services", "storage",
".int_test.env"))

# Password of wallet owned by user on behalf of whom we are running tests
WALLET_PASS = os.getenv("WALLET_PASS", "")
Expand Down

0 comments on commit e71fc06

Please sign in to comment.