Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lock resync #608

Merged
merged 6 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pytest_tests/helpers/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ def start_service(self):
def stop_service(self):
self.host.stop_service(self.name)

def restart_service(self):
self.host.restart_service(self.name)

def get_wallet_password(self) -> str:
return self._get_attribute(_ConfigAttributes.WALLET_PASSWORD)

Expand Down
92 changes: 92 additions & 0 deletions pytest_tests/testsuites/failovers/test_failover_part.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import logging

import allure
import pytest
from neofs_testlib.shell import Shell

from cluster import Cluster
from cluster_test_base import ClusterTestBase
from container import create_container
from failover_utils import wait_all_storage_nodes_returned, enable_metabase_resync_on_start
from grpc_responses import OBJECT_NOT_FOUND
from helpers.container import StorageContainer, StorageContainerInfo
from neofs_verbs import get_object
from node_management import delete_node_metadata, start_storage_nodes, stop_storage_nodes
from test_control import expect_not_raises
from wallet import WalletFile, WalletFactory

logger = logging.getLogger("NeoLogger")


@pytest.fixture(
scope="module",
)
def user_wallet(wallet_factory: WalletFactory):
with allure.step("Create user wallet with container"):
wallet_file = wallet_factory.create_wallet()
return wallet_file


@pytest.fixture(
scope="function",
)
def user_container(user_wallet: WalletFile, client_shell: Shell, cluster: Cluster):
container_id = create_container(
user_wallet.path, shell=client_shell, endpoint=cluster.default_rpc_endpoint
)
return StorageContainer(StorageContainerInfo(container_id, user_wallet), client_shell, cluster)


@pytest.mark.failover_part
class TestFailoverNodePart(ClusterTestBase):
@allure.title("Enable resync metabase, delete metadata and get object")
@pytest.mark.delete_metadata
def test_enable_resync_metabase_delete_metadata(self, enable_metabase_resync_on_start,
user_container: StorageContainer,
simple_object_size: int):
storage_object = user_container.generate_object(simple_object_size)

with allure.step("Delete metabase files from storage nodes"):
for node in self.cluster.storage_nodes:
delete_node_metadata(node)

with allure.step("Start nodes after metabase deletion"):
start_storage_nodes(self.cluster.storage_nodes)
wait_all_storage_nodes_returned(self.cluster)

with allure.step("Try to fetch object from each storage node"):
for node in self.cluster.storage_nodes:
with expect_not_raises():
get_object(
storage_object.wallet_file_path,
storage_object.cid,
storage_object.oid,
self.shell,
endpoint=node.get_rpc_endpoint(),
wallet_config=user_container.get_wallet_config_path(),
)

@allure.title("Delete metadata without resync metabase enabling, delete metadata try to get object")
@pytest.mark.delete_metadata
def test_delete_metadata(self, user_container: StorageContainer, simple_object_size: int):
storage_object = user_container.generate_object(simple_object_size)

with allure.step("Delete metabase files from storage nodes"):
for node in self.cluster.storage_nodes:
delete_node_metadata(node)

with allure.step("Start nodes after metabase deletion"):
start_storage_nodes(self.cluster.storage_nodes)
wait_all_storage_nodes_returned(self.cluster)

with allure.step("Try to fetch object from each storage node"):
for node in self.cluster.storage_nodes:
with pytest.raises(Exception, match=OBJECT_NOT_FOUND):
get_object(
storage_object.wallet_file_path,
storage_object.cid,
storage_object.oid,
self.shell,
endpoint=node.get_rpc_endpoint(),
wallet_config=user_container.get_wallet_config_path(),
)
76 changes: 75 additions & 1 deletion pytest_tests/testsuites/object/test_object_lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from common import STORAGE_GC_TIME
from complex_object_actions import get_link_object, get_storage_object_chunks
from epoch import ensure_fresh_epoch, get_epoch, tick_epoch, tick_epoch_and_wait
from failover_utils import wait_all_storage_nodes_returned, enable_metabase_resync_on_start, docker_compose_restart_storage_nodes
from grpc_responses import (
LIFETIME_REQUIRED,
LOCK_NON_REGULAR_OBJECT,
Expand All @@ -18,7 +19,7 @@
OBJECT_NOT_FOUND,
)
from neofs_testlib.shell import Shell
from node_management import drop_object
from node_management import drop_object, delete_node_metadata, stop_storage_nodes, start_storage_nodes
from pytest import FixtureRequest
from python_keywords.container import create_container
from python_keywords.neofs_verbs import delete_object, head_object, lock_object
Expand Down Expand Up @@ -684,3 +685,76 @@ def test_link_object_of_complex_object_should_also_be_protected_from_deletion(
self.shell,
self.cluster.default_rpc_endpoint,
)

@pytest.mark.delete_metadata
@allure.title("The locked object must be protected from deletion after metabase deletion "
"(metabase resynchronization must be enabled), and after restarting storage nodes")
@pytest.mark.parametrize(
"new_locked_storage_object",
[pytest.lazy_fixture("simple_object_size"), pytest.lazy_fixture("complex_object_size")],
ids=["simple object", "complex object"],
indirect=True,
)
def test_the_object_lock_should_be_kept_after_metabase_deletion(
self,
new_locked_storage_object: StorageObjectInfo,
enable_metabase_resync_on_start,
):
"""
Lock objects should fill metabase on resync_metabase
"""
with allure.step("Log nodes with object"):
nodes_with_object_before_first_try = get_nodes_with_object(
new_locked_storage_object.cid,
new_locked_storage_object.oid,
shell=self.shell,
nodes=self.cluster.storage_nodes,
)

with allure.step(f"Try to delete object {new_locked_storage_object.oid} before metabase deletion"):
with pytest.raises(Exception, match=OBJECT_IS_LOCKED):
delete_object(
new_locked_storage_object.wallet_file_path,
new_locked_storage_object.cid,
new_locked_storage_object.oid,
self.shell,
self.cluster.default_rpc_endpoint,
)

with allure.step("Log nodes with object"):
nodes_with_object_after_first_try = get_nodes_with_object(
new_locked_storage_object.cid,
new_locked_storage_object.oid,
shell=self.shell,
nodes=self.cluster.storage_nodes,
)

assert nodes_with_object_before_first_try == nodes_with_object_after_first_try

with allure.step("Delete metabase files from storage nodes"):
for node in self.cluster.storage_nodes:
delete_node_metadata(node)

with allure.step("Start nodes after metabase deletion"):
start_storage_nodes(self.cluster.storage_nodes)
wait_all_storage_nodes_returned(self.cluster)

with allure.step("Log nodes with object"):
nodes_with_object_after_metabase_deletion = get_nodes_with_object(
new_locked_storage_object.cid,
new_locked_storage_object.oid,
shell=self.shell,
nodes=self.cluster.storage_nodes,
)

assert nodes_with_object_before_first_try == nodes_with_object_after_metabase_deletion

with allure.step(f"Try to delete object {new_locked_storage_object.oid} after metabase deletion"):
with pytest.raises(Exception, match=OBJECT_IS_LOCKED):
delete_object(
new_locked_storage_object.wallet_file_path,
new_locked_storage_object.cid,
new_locked_storage_object.oid,
self.shell,
self.cluster.default_rpc_endpoint,
)
92 changes: 90 additions & 2 deletions robot/resources/lib/python_keywords/failover_utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
import logging
import os
import subprocess
from time import sleep

import allure
from typing import List, Tuple, Optional
from urllib.parse import urlparse

import pytest

from utility import parse_time
from cluster import Cluster, StorageNode
from neofs_testlib.shell import Shell
from neofs_testlib.hosting import Hosting
from python_keywords.node_management import storage_node_healthcheck
from python_keywords.node_management import storage_node_healthcheck, stop_storage_nodes
from storage_policy import get_nodes_with_object
from common import MORPH_CHAIN_SERVICE_NAME_REGEX, ENDPOINT_INTERNAL0
from common import MORPH_CHAIN_SERVICE_NAME_REGEX, ENDPOINT_INTERNAL0, DOCKER_COMPOSE_ENV_FILE, \
DOCKER_COMPOSE_STORAGE_CONFIG_FILE, METABASE_RESYNC_TIMEOUT

logger = logging.getLogger("NeoLogger")

Expand Down Expand Up @@ -71,3 +78,84 @@ def get_morph_chain_endpoints(hosting: Hosting) -> List[Tuple[str, str]]:
port = str(parsed_url.port)
endpoints.append((addr, port))
return endpoints


roman-khimov marked this conversation as resolved.
Show resolved Hide resolved
@allure.step("Docker compose restart storage nodes containers with new env file")
def docker_compose_restart_storage_nodes(cluster: Cluster):
stop_storage_nodes(cluster.storage_nodes)
# Not using docker-compose restart because the container needs to be started with new environment variables.
with allure.step("Docker-compose down"):
subprocess.run(["docker-compose", "-f", DOCKER_COMPOSE_STORAGE_CONFIG_FILE, "down"])
with allure.step("Docker-compose up"):
subprocess.run(["docker-compose", "-f", DOCKER_COMPOSE_STORAGE_CONFIG_FILE, "up", "-d"])
wait_all_storage_nodes_returned(cluster)
with allure.step("Log resync status"):
for node in cluster.storage_nodes:
envs = subprocess.run(["docker", "inspect", "-f", "'{{range $index, $value := .Config.Env}}{{$value}} "
"{{end}}'", node.name], capture_output=True)
env_stdout = envs.stdout.decode("utf-8")
logger.debug(f"ENV from {node.name}: {env_stdout}")


@pytest.fixture(scope="function")
@allure.title("Enable metabase resync on start")
def enable_metabase_resync_on_start(cluster: Cluster):
"""
If there were already any environment variables in the DOCKER_COMPOSE_ENV_FILE, they should be retained and
NEOFS_STORAGE_SHARD_0_RESYNC_METABASE and NEOFS_STORAGE_SHARD_1_RESYNC_METABASE should be added to the file.

If NEOFS_STORAGE_SHARD_0_RESYNC_METABASE and NEOFS_STORAGE_SHARD_1_RESYNC_METABASE are explicitly specified
as false, they must be changed to true.

If DOCKER_COMPOSE_ENV_FILE is empty, NEOFS_STORAGE_SHARD_0_RESYNC_METABASE and
NEOFS_STORAGE_SHARD_1_RESYNC_METABASE must be added to DOCKER_COMPOSE_ENV_FILE.

Of course, after the test, the DOCKER_COMPOSE_ENV_FILE must return to its initial state.
"""
file_path = DOCKER_COMPOSE_ENV_FILE
if not os.path.exists(file_path):
pytest.fail(f'File {file_path} does not exist!')

with open(file_path, 'r') as file:
lines = file.readlines()
logger.debug(f"Initial file content:\n{''.join(lines)}")

replacements = {
'NEOFS_STORAGE_SHARD_0_RESYNC_METABASE=false': 'NEOFS_STORAGE_SHARD_0_RESYNC_METABASE=true\n',
'NEOFS_STORAGE_SHARD_1_RESYNC_METABASE=false': 'NEOFS_STORAGE_SHARD_1_RESYNC_METABASE=true\n'
}

unprocessed_lines = set(replacements.values())

modified_lines = []

for line in lines:
for original, new in replacements.items():
if original in line:
line = line.replace(original, new)
unprocessed_lines.discard(new)
modified_lines.append(line)

modified_lines.extend(unprocessed_lines)

modified_content = ''.join(modified_lines)

with open(file_path, 'w') as file:
file.write(modified_content)
logger.debug(f"Modified file content:\n{modified_content}")

with allure.step("Restart docker compose to apply the changes"):
docker_compose_restart_storage_nodes(cluster)

yield

with open(file_path, 'w') as file:
file.writelines(lines)
logger.debug(f"Restored file content:\n{''.join(lines)}")

with allure.step("Restart docker compose to revert the changes"):
docker_compose_restart_storage_nodes(cluster)

with allure.step(f"Waiting {METABASE_RESYNC_TIMEOUT} seconds for the metabase to synchronize"):
sleep(parse_time(METABASE_RESYNC_TIMEOUT))

33 changes: 33 additions & 0 deletions robot/resources/lib/python_keywords/node_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,28 @@ def start_storage_nodes(nodes: list[StorageNode]) -> None:
node.start_service()


@allure.step("Stop storage nodes")
def stop_storage_nodes(nodes: list[StorageNode]) -> None:
"""
The function stops specified storage nodes.
Args:
nodes: the list of nodes to stop
"""
for node in nodes:
node.stop_service()


@allure.step("Restart storage nodes")
def restart_storage_nodes(nodes: list[StorageNode]) -> None:
"""
The function restarts specified storage nodes.
Args:
nodes: the list of nodes to restart
"""
for node in nodes:
node.restart_service()


@allure.step("Get Locode from random storage node")
def get_locode_from_random_node(cluster: Cluster) -> str:
node = random.choice(cluster.storage_nodes)
Expand Down Expand Up @@ -157,6 +179,17 @@ def delete_node_data(node: StorageNode) -> None:
time.sleep(parse_time(MORPH_BLOCK_TIME))


@allure.step("Delete metadata from host for node {node}")
def delete_node_metadata(node: StorageNode) -> None:
"""
The function deletes metadata from host for specified node.
Args:
node: node for which metadata should be deleted.
"""
node.stop_service()
node.host.delete_storage_node_data(node.name, cache_only=True)


@allure.step("Exclude node {node_to_exclude} from network map")
def exclude_node_from_network_map(
node_to_exclude: StorageNode,
Expand Down
5 changes: 5 additions & 0 deletions robot/variables/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
MAINNET_TIMEOUT = os.getenv("MAINNET_TIMEOUT", "1min")
MORPH_BLOCK_TIME = os.getenv("MORPH_BLOCK_TIME", "1s")
NEOFS_CONTRACT_CACHE_TIMEOUT = os.getenv("NEOFS_CONTRACT_CACHE_TIMEOUT", "30s")
METABASE_RESYNC_TIMEOUT = os.getenv("METABASE_RESYNC_TIMEOUT", "10s")

# Time interval that allows a GC pass on storage node (this includes GC sleep interval
# of 1min plus 15 seconds for GC pass itself)
Expand All @@ -25,6 +26,10 @@
TEST_FILES_DIR = os.getenv("TEST_FILES_DIR", "TestFilesDir")
TEST_OBJECTS_DIR = os.getenv("TEST_OBJECTS_DIR", "TestObjectsDir")
DEVENV_PATH = os.getenv("DEVENV_PATH", os.path.join("..", "neofs-dev-env"))
DOCKER_COMPOSE_STORAGE_CONFIG_FILE = os.getenv("DOCKER_COMPOSE_STORAGE_CONFIG_FILE", os.path.join(DEVENV_PATH, "services", "storage",
"docker-compose.yml"))
DOCKER_COMPOSE_ENV_FILE = os.getenv("DOCKER_COMPOSE_ENV_FILE", os.path.join(DEVENV_PATH, "services", "storage",
".int_test.env"))

# Password of wallet owned by user on behalf of whom we are running tests
WALLET_PASS = os.getenv("WALLET_PASS", "")
Expand Down
Loading