diff --git a/deker/ABC/base_array.py b/deker/ABC/base_array.py index cfb65df..a435d59 100644 --- a/deker/ABC/base_array.py +++ b/deker/ABC/base_array.py @@ -287,7 +287,7 @@ def __init__( self.__is_deleted = False self._validate(id_, primary_attributes, custom_attributes) self.__collection: "Collection" = collection - self.__id: str = id_ if id_ else get_id(self) + self.__id: str = id_ if id_ else get_id() self.__adapter = adapter self.__array_adapter = array_adapter diff --git a/deker/integrity.py b/deker/integrity.py index d199ecc..17d70fa 100644 --- a/deker/integrity.py +++ b/deker/integrity.py @@ -27,11 +27,12 @@ DekerBaseApplicationError, DekerCollectionNotExistsError, DekerIntegrityError, + DekerMetaDataError, ) +from deker.managers import ArrayManager, VArrayManager from deker.tools import get_main_path, get_symlink_path from deker.types.private.enums import LocksExtensions - if TYPE_CHECKING: from deker.client import Client @@ -170,8 +171,34 @@ def check_arrays_locks(self, collection: Collection) -> None: if self.stop_on_error and self.errors: raise DekerIntegrityError(self._parse_errors()) + def _check_varrays_or_arrays( + self, collection: Collection, data_manager: Union[ArrayManager, Optional[VArrayManager]] + ) -> None: + """Check if Arrays or VArrays in Collection are initializing. + + :param collection: Collection to be checked + :param data_manager: DataManager to get arrays or varrays from collection + """ + try: + for array in data_manager: + try: + if self.next_checker: + self.next_checker.check(array, collection) + except DekerBaseApplicationError as e: + if self.stop_on_error: + raise DekerIntegrityError(str(e)) + self.errors[ + f"Collection {collection.name} arrays integrity errors:" + ].append(str(e)) + except DekerMetaDataError as e: + if self.stop_on_error: + raise e + self.errors[ + f"Collection {collection.name} (V)Arrays initialization errors:" + ].append(str(e)) + def check(self, collection: Collection) -> None: - """Check if Arrays or VArray in Collection are valid. + """Check if Arrays or VArrays and their locks in Collection are valid. :param collection: Collection to be checked """ @@ -179,29 +206,10 @@ def check(self, collection: Collection) -> None: return self.check_arrays_locks(collection) - for array in collection.arrays: - try: - if self.next_checker: - self.next_checker.check(array, collection) - except DekerBaseApplicationError as e: - if not self.stop_on_error: - self.errors[f"Collection {collection.name} arrays integrity errors:"].append( - str(e) - ) - else: - raise DekerIntegrityError(str(e)) + self._check_varrays_or_arrays(collection, collection.arrays) if collection.varray_schema: - for varray in collection.varrays: - try: - if self.next_checker: - self.next_checker.check(varray, collection) - except DekerBaseApplicationError as e: - if not self.stop_on_error: - self.errors[ - f"Collection {collection.name} varrays integrity errors:" - ].append(str(e)) - else: - raise DekerIntegrityError(str(e)) + self._check_varrays_or_arrays(collection, collection.varrays) + return class CollectionsChecker(BaseChecker): @@ -247,23 +255,14 @@ def check(self, collection_name: Optional[str] = None) -> None: :param collection_name: optional collection to be checked """ if collection_name: - # skipping collections checker - self.next_checker: Optional[BaseChecker] = ( - self.next_checker.next_checker if self.next_checker else None - ) - try: - collection: Collection = self.client.get_collection(collection_name) - if not collection: - raise DekerCollectionNotExistsError( - f"Collection {collection_name} does not exist at this storage" - ) + collection: Collection = self.client.get_collection(collection_name) + if not collection: + raise DekerCollectionNotExistsError( + f"Collection {collection_name} does not exist at this storage" + ) + if self.level > self.CHECKER_LEVEL: if self.next_checker: self.next_checker.check(collection) - except DekerCollectionNotExistsError: - raise - except Exception as e: - self.errors["Collections initialization errors:"].append(str(e)) - return collections = self.check_collections() if self.level > self.CHECKER_LEVEL: collections_pbar = tqdm.tqdm(collections) diff --git a/deker/tools/array.py b/deker/tools/array.py index 59f33c4..5613ae0 100644 --- a/deker/tools/array.py +++ b/deker/tools/array.py @@ -16,17 +16,14 @@ import uuid -from functools import singledispatch -from typing import Any, Dict, List, Tuple, Union +from typing import Dict, List, Tuple, Union import numpy as np from deker_tools.data import convert_size_to_human -from deker_tools.time import get_utc from psutil import swap_memory, virtual_memory from deker.errors import DekerMemoryError, DekerValidationError -from deker.types.private.enums import ArrayType def calculate_total_cells_in_array(seq: Union[Tuple[int, ...], List[int]]) -> int: @@ -110,47 +107,6 @@ def check_memory(shape: tuple, dtype: type, mem_limit_from_settings: int) -> Non ) -def generate_uid(array_type: ArrayType) -> str: - """Generate uuid5 for given array_type. - - :param array_type: Either array or varray - """ - if not isinstance(array_type, ArrayType): - raise TypeError("Invalid argument type. Array type is required") - - namespace = uuid.NAMESPACE_X500 if array_type == ArrayType.array else uuid.NAMESPACE_OID - return str(uuid.uuid5(namespace, array_type.value + get_utc().isoformat())) - - -def get_id(array: Any) -> str: - """Generate unique id by object type and datetime. - - :param array: any object - """ - from deker.arrays import Array, VArray - - @singledispatch - def generate_id(arr: Any) -> str: - """Generate unique id by object type and datetime. - - :param arr: any object - """ - raise TypeError(f"Invalid object type: {type(arr)}") - - @generate_id.register(Array) - def array_id(arr: Array) -> str: # noqa[ARG001] - """Generate id for Array. - - :param arr: Array type - """ - return generate_uid(ArrayType.array) - - @generate_id.register(VArray) - def varray_id(arr: VArray) -> str: # noqa[ARG001] - """Generate id for VArray. - - :param arr: VArray type - """ - return generate_uid(ArrayType.varray) - - return generate_id(array) +def get_id() -> str: + """Generate unique id with uuid4.""" + return str(uuid.uuid4()) diff --git a/tests/test_cases/test_client/test_client_methods.py b/tests/test_cases/test_client/test_client_methods.py index 2939a69..2eb2921 100644 --- a/tests/test_cases/test_client/test_client_methods.py +++ b/tests/test_cases/test_client/test_client_methods.py @@ -379,16 +379,11 @@ def test_client_check_integrity_collection( f.seek(0) json.dump(data, f, indent=4) f.truncate() - - client.check_integrity(2, stop_on_error=False, collection=collection_1.name) + try: + client.check_integrity(2, stop_on_error=False, collection=collection_1.name) + except Exception as e: + assert str(e) == f"Collection \"{collection_1.name}\" metadata is invalid/corrupted: 'test'" errors = capsys.readouterr().out - assert all( - s in errors - for s in ( - "Integrity check is running...\n", - f"Collection \"{collection_1.name}\" metadata is invalid/corrupted: 'test'\n\n", - ) - ) collection_1.delete() collection_2.delete() for root, _, files in os.walk(os.path.curdir): diff --git a/tests/test_cases/test_integrity/test_integrity_checker.py b/tests/test_cases/test_integrity/test_integrity_checker.py index 3cef4a1..59c3fbc 100644 --- a/tests/test_cases/test_integrity/test_integrity_checker.py +++ b/tests/test_cases/test_integrity/test_integrity_checker.py @@ -75,8 +75,14 @@ def test_check_collection_does_not_exist( with pytest.raises(DekerCollectionNotExistsError): integrity_checker.check("collection_does_not_exist") + @pytest.mark.parametrize("check_params", [None, "test_integrity_locks"]) def test_check_locks( - self, client: Client, root_path: Path, array_schema: ArraySchema, ctx: CTX + self, + client: Client, + root_path: Path, + array_schema: ArraySchema, + ctx: CTX, + check_params: str, ): """Tests if function returns error if lock is not found.""" @@ -89,7 +95,7 @@ def test_check_locks( try: filename = collection.path.parent / (collection.name + ".lock") os.remove(filename) - errors = integrity_checker.check() + errors = integrity_checker.check(check_params) assert ( errors == f"Collections locks errors:\n\t- BaseLock for {collection.name} not found\n" @@ -127,6 +133,7 @@ def test_check_extra_locks( os.remove(filename) collection.delete() + @pytest.mark.parametrize("check_params", [None, "test_return"]) def test_check_return( self, array_schema_with_attributes: ArraySchema, @@ -135,6 +142,7 @@ def test_check_return( ctx: CTX, uri: Uri, storage_adapter: Type[BaseStorageAdapter], + check_params: str, ): """Tests if function returns errors.""" integrity_checker = IntegrityChecker( @@ -169,7 +177,7 @@ def test_check_return( Path.unlink(symlink_path / files[0]) try: - errors = integrity_checker.check() + errors = integrity_checker.check(check_params) error_1 = f"Symlink {symlink_path} not found\n" error_2 = f"Array {array_1.id} data is corrupted: Index (9) out of range for (0-1)\n" @@ -177,6 +185,7 @@ def test_check_return( finally: collection.delete() + @pytest.mark.parametrize("check_params", [None, "test_check_array_raises_on_init"]) def test_check_array_raises_on_init( self, array_schema_with_attributes: ArraySchema, @@ -185,6 +194,7 @@ def test_check_array_raises_on_init( ctx: CTX, uri: Uri, storage_adapter: Type[BaseStorageAdapter], + check_params: str, ): """Tests if function raises exception if array file is incorrect.""" collection = client.create_collection( @@ -220,8 +230,9 @@ def test_check_array_raises_on_init( f.flush() try: with pytest.raises(DekerMetaDataError): - assert integrity_checker.check() + integrity_checker.check(check_params) finally: + collection.delete() array.delete() diff --git a/tests/test_cases/test_tools/test_tools.py b/tests/test_cases/test_tools/test_tools.py index 33a3105..7e0eb2b 100644 --- a/tests/test_cases/test_tools/test_tools.py +++ b/tests/test_cases/test_tools/test_tools.py @@ -11,7 +11,6 @@ from deker.collection import Collection from deker.errors import DekerInstanceNotExistsError, DekerMemoryError, DekerValidationError from deker.tools import check_memory, convert_human_memory_to_bytes -from deker.tools.array import generate_uid from deker.tools.time import convert_datetime_attrs_to_iso, convert_iso_attrs_to_datetime @@ -221,12 +220,6 @@ def test_convert_isoformat_attrs_raises(attrs): assert convert_iso_attrs_to_datetime(attrs) -@pytest.mark.parametrize("array_type_arg", (list(), set(), tuple(), dict(), 1, "2", 3.4)) -def test_generate_id_raises(array_type_arg): - with pytest.raises(TypeError): - generate_uid(array_type_arg) - - @pytest.mark.parametrize( "params,result,error", (