From d08edbc1c98931c1f3284465a56dad643cd3ea96 Mon Sep 17 00:00:00 2001 From: Jsalz2000 Date: Tue, 27 Aug 2024 15:04:16 -0500 Subject: [PATCH 01/10] Adds existing code for azure blob destination --- .gitignore | 4 + Makefile | 3 + README.rst | 3 + requirements.in | 2 +- requirements.txt | 2 +- requirements_dev.in | 1 + requirements_dev.txt | 2 + twindb_backup/configuration/__init__.py | 19 +- .../configuration/destinations/az.py | 31 + twindb_backup/destination/az.py | 281 +++++ twindb_backup/destination/azblob.py | 1097 ----------------- 11 files changed, 345 insertions(+), 1100 deletions(-) create mode 100644 twindb_backup/configuration/destinations/az.py create mode 100644 twindb_backup/destination/az.py delete mode 100644 twindb_backup/destination/azblob.py diff --git a/.gitignore b/.gitignore index bdaf4a66..a5711735 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,7 @@ target/ /vagrant/README.html /.pytest_cache/ /.venv/ + +# asdf +.envrc +.tool-versions diff --git a/Makefile b/Makefile index 6f4068f0..982a72ca 100644 --- a/Makefile +++ b/Makefile @@ -174,6 +174,9 @@ endif ifeq ($(OS_VERSION),7) PLATFORM = centos endif +ifeq ($(OS_VERSION),8) + PLATFORM = centos +endif package: ## Build package - OS_VERSION must be one of: jammy, focal. @docker run \ diff --git a/README.rst b/README.rst index 748131f2..a5d5009d 100644 --- a/README.rst +++ b/README.rst @@ -54,6 +54,7 @@ backup destination. It can be: - Amazon S3 bucket - Google Cloud Storage bucket +- Azure Blob Storage account - Any server with SSH demon .. figure:: https://user-images.githubusercontent.com/1763754/56677794-20901b80-6676-11e9-8f71-8de0b0b6f066.png @@ -92,6 +93,7 @@ Features **TwinDB Backup** storage options: - Amazon S3 +- Azure Blob Storage - Google Cloud Storage - Remote SSH server - Optional local copy @@ -147,6 +149,7 @@ Possible ``OS_VERSION`` values: * jammy * focal * 7 (for CentOS 7) + * 8 (for CentOS 8) .. code-block:: console diff --git a/requirements.in b/requirements.in index eac6f4bb..adc1eabb 100644 --- a/requirements.in +++ b/requirements.in @@ -1,6 +1,6 @@ #@IgnoreInspection BashAddShebang azure-core ~= 1.24 -azure-storage-blob ~= 12.12 +azure-storage-blob ~= 12.19 Click ~= 8.1 PyMySQL ~= 1.0 boto3 ~= 1.7 diff --git a/requirements.txt b/requirements.txt index 3282cfee..daaa43d9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ azure-core==1.29.4 # via # -r requirements.in # azure-storage-blob -azure-storage-blob==12.18.3 +azure-storage-blob==12.22.0 # via -r requirements.in bcrypt==4.0.1 # via paramiko diff --git a/requirements_dev.in b/requirements_dev.in index a12322d0..570e1a0c 100644 --- a/requirements_dev.in +++ b/requirements_dev.in @@ -1,4 +1,5 @@ #@IgnoreInspection BashAddShebang +azure-storage-blob ~= 12.19 black ~= 22.3 Sphinx ~= 4.5 bumpversion ~= 0.6 diff --git a/requirements_dev.txt b/requirements_dev.txt index d624b925..3a530fcb 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -8,6 +8,8 @@ alabaster==0.7.13 # via sphinx astroid==2.15.8 # via pylint +azure-storage-blob==12.22.0 + # via -r requirements.in babel==2.13.0 # via sphinx black==22.12.0 diff --git a/twindb_backup/configuration/__init__.py b/twindb_backup/configuration/__init__.py index c309ee26..bd9ed2d1 100644 --- a/twindb_backup/configuration/__init__.py +++ b/twindb_backup/configuration/__init__.py @@ -8,6 +8,7 @@ from twindb_backup import INTERVALS, LOG from twindb_backup.configuration.compression import CompressionConfig +from twindb_backup.configuration.destinations.az import AZConfig from twindb_backup.configuration.destinations.gcs import GCSConfig from twindb_backup.configuration.destinations.s3 import S3Config from twindb_backup.configuration.destinations.ssh import SSHConfig @@ -16,6 +17,7 @@ from twindb_backup.configuration.mysql import MySQLConfig from twindb_backup.configuration.retention import RetentionPolicy from twindb_backup.configuration.run_intervals import RunIntervals +from twindb_backup.destination.az import AZ from twindb_backup.destination.gcs import GCS from twindb_backup.destination.s3 import S3 from twindb_backup.destination.ssh import Ssh @@ -97,6 +99,15 @@ def ssh(self): except NoSectionError: return None + @property + def az(self): # pylint: disable=invalid-name + """Azure Blob configuration""" + try: + return AZConfig(**self.__read_options_from_section("az")) + + except NoSectionError: + return None + @property def s3(self): # pylint: disable=invalid-name """Amazon S3 configuration""" @@ -241,7 +252,13 @@ def destination(self, backup_source=socket.gethostname()): gc_encryption_key=self.gcs.gc_encryption_key, hostname=backup_source, ) - + elif backup_destination == "az": + return AZ( + connection_string=self.az.connection_string, + container_name=self.az.container_name, + chunk_size=self.az.chunk_size, + hostname=backup_source, + ) else: raise ConfigurationError(f"Unsupported destination '{backup_destination}'") except NoSectionError as err: diff --git a/twindb_backup/configuration/destinations/az.py b/twindb_backup/configuration/destinations/az.py new file mode 100644 index 00000000..f0470835 --- /dev/null +++ b/twindb_backup/configuration/destinations/az.py @@ -0,0 +1,31 @@ +"""Azure Blob Storage destination configuration""" + + +class AZConfig: + """Azure Blob Storage Configuration.""" + + def __init__( + self, + connection_string, + container_name, + chunk_size=1024*1024*4 # 4MiB + ): + + self._connection_string = connection_string + self._container_name = container_name + self._chunk_size = chunk_size + + @property + def connection_string(self): + """CONNECTION_STRING""" + return self._connection_string + + @property + def container_name(self): + """CONTAINER_NAME""" + return self._container_name + + @property + def chunk_size(self): + """CHUNK_SIZE""" + return self._chunk_size diff --git a/twindb_backup/destination/az.py b/twindb_backup/destination/az.py new file mode 100644 index 00000000..c14133b3 --- /dev/null +++ b/twindb_backup/destination/az.py @@ -0,0 +1,281 @@ +# -*- coding: utf-8 -*- +""" +Module for Azure destination. +""" +import os +import re +import socket +import time + +from contextlib import contextmanager +from multiprocessing import Process +from urllib.parse import urlparse + +from azure.storage.blob import BlobServiceClient + +from twindb_backup import LOG +from twindb_backup.destination.base_destination import BaseDestination +from twindb_backup.destination.exceptions import ( + FileNotFound, + S3DestinationError, +) +from twindb_backup.exceptions import OperationError + +### DEFAULT VALUES SECTION +class AZFileAccess(object): # pylint: disable=too-few-public-methods + """Access modes for AZ files""" + + public_read = "public-read" + private = "private" + + +class AZ(BaseDestination): + """ + AZ destination class. + + :param kwargs: Keyword arguments. + + * **container_name** - Azure container name + * **connection_string** - Azure connection string for the storage account + * **hostname** - Hostname of a host where a backup is taken from. + * **chunk_size** - The size in byptes for read/write streams, default 4MB + """ + + def __init__(self, **kwargs): + + self._container_name = kwargs.get("container_name") + self._connection_string = kwargs.get("connection_string") + self._hostname = kwargs.get("hostname", socket.gethostname()) + self._chunk_size = kwargs.get("chunk_size", 4*1024*1024) + + self.remote_path = "/" + super(AZ, self).__init__(self.remote_path) + + try: + LOG.debug("Initilizing Azure connection to the storage account using connection string (length=" + str(len(self._connection_string)) + ")") + self.service_client = BlobServiceClient.from_connection_string(self._connection_string) + except: + LOG.error("Failed to connect to Azure storage account using the connection string") + exit(1) + + # Check to see if the container exists, otherwise create the container + try: + LOG.debug("Setting up the container(" + self._container_name + ") connection") + self.client = self.service_client.get_container_client(self._container_name) + except: + LOG.debug("The container(" + self._container_name + ") does not exist... creating it") + self.create_container() + + def bucket(self): + """S3 bucket name.... compatibility???""" + return self._container_name + + def create_bucket(self): + """Compatibility.""" + return create_container(self) + + def create_container(self): + """Creates the container in the Azure storage account that will store the backups. + + """ + container_exists = True + + try: + self.client = self.service_client.create_container(self._container_name) + except ClientError as err: + # We come here meaning we did not find the container + raise + + LOG.info("Azure container creation was successful %s", self._container_name) + return True + + def list_files( + self, prefix=None, recursive=False, pattern=None, files_only=False + ): + """ + List files in the destination that have common prefix. + """ + + files = [] + LOG.debug("AZ Listing files") + for blob in self.client.list_blobs(): + if pattern: + if re.search(pattern, blob.name): + files.append(blob.name) + else: + files.append(blob.name) + + return sorted(files) + + def _list_files(self, prefix=None, recursive=False, files_only=False): + raise NotImplementedError + + def _uplaod_blob_options(self, **kwargs): + return True + + def delete(self, path): + """ + Deletes a Azure blob in the container + """ + + try: + LOG.info("Deleting blob: " + path) + return self.client.delete_blob(path) + except: + LOG.error("FAILED to delete blob: " + path) + raise + + def delete_all_objects(self): + """ + Delete all blobs in the container + """ + LOG.info("Deleting ALL blobs in container: " + self._container_name) + for blob in slef.ls(): + self.delete(blob) + + return True + + def delete_bucket(self, force=False): + """ + Delete the container and contents, this is a recursive delete (including all blobs in the container) + """ + try: + LOG.info("Performing recusrsive delete of container and all blobs in container: " + self._container_name) + self.client.delete_container() + except: + raise + + return True + + def read(self, filepath): + """ + Read the status blob (filepath) and return contents to the caller + """ + try: + LOG.debug("Attempting to read blob: " + filepath) + blob_client = self.client.get_blob_client(filepath) + return blob_client.download_blob().readall().decode("utf-8") + + except Exception as err: + LOG.info("The blob " + filepath + " does not exist or there was an issue reading it") + + except: + raise + + def save(self, handler, filepath): + """ + Read from handler and save it to Azure blob + + :param filepath: save backup copy in a file with this name + :param handler: stdout handler from backup source + """ + + LOG.debug("Attempting to write blob: " + filepath) + blob_client = self.client.get_blob_client(filepath) + + with handler as file_obj: + try: + blob_client.upload_blob(file_obj) + + except Exception as err: + LOG.info("The blob " + filepath + " already exists, no need to upload (ignoring)") + + except: + raise + + + @contextmanager + def get_stream(self, copy): + """ + Get a PIPE handler with content of the backup copy streamed from + the destination. + :param copy: Backup copy + :type copy: BaseCopy + :return: Stream with backup copy + :rtype: generator + :raise : if failed to stream a backup copy. + """ + + path = "%s/%s" % (self.remote_path, copy.key) + object_key = urlparse(path).path.lstrip("/") + + def _download_object(self, path, read_fd, write_fd): + # The read end of the pipe must be closed in the child process + # before we start writing to it. + os.close(read_fd) + + # twindb appears to be munging the actual path of the objects as opposed to + # using the list of the valid object paths ... wtf? + # anyway... let's decompile it, grab the host and the actual file name + # then do some matching based on what really exists :P + LOG.debug("Transforming requested restore path: " + path) + exploded_path = path.split('/') + host = exploded_path[1] # first element, the call path begins with / + file = exploded_path[len(exploded_path)-1] # last element + path = '' + for blob in self.list_files(pattern=file): + if re.search(host, blob): + path = blob + + LOG.debug("Tranformed path to match existing blob: " + path) + + + blob_client = self.client.get_blob_client(path) + with os.fdopen(write_fd, "wb") as w_pipe: + try: + retry_interval = 2 + for _ in range(10): + try: + w_pipe.write(blob_client.download_blob().readall()) + except: + raise + + except IOError as err: + LOG.error(err) + exit(1) + + except: + raise + + download_proc = None + try: + blob_client = self.client.get_blob_client(path) + LOG.debug("Fetching blob %s from container %s", path, self._container_name) + + read_pipe, write_pipe = os.pipe() + + download_proc = Process( + target=_download_object, + args=( + self, + path, + read_pipe, + write_pipe, + ), + name="_download_object", + ) + download_proc.start() + + # The write end of the pipe must be closed in this process before + # we start reading from it. + os.close(write_pipe) + LOG.debug("read_pipe type: %s", type(read_pipe)) + yield read_pipe + + os.close(read_pipe) + download_proc.join() + + if download_proc.exitcode: + LOG.error("Failed to download %s", path) + # exit(1) + + LOG.debug("Successfully streamed %s", path) + + finally: + if download_proc: + download_proc.join() + + def write(self, content, filepath): + LOG.debug("Overwriting status file: " + filepath) + blob_client = self.client.get_blob_client(filepath) + blob_client.upload_blob(content, overwrite=True) diff --git a/twindb_backup/destination/azblob.py b/twindb_backup/destination/azblob.py deleted file mode 100644 index 40818764..00000000 --- a/twindb_backup/destination/azblob.py +++ /dev/null @@ -1,1097 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Module for Azure-blob destination. -""" -# builtin module imports -import gc -import io -import multiprocessing as mp -import os -import sys -import time -import traceback -from contextlib import contextmanager -from functools import wraps -from multiprocessing.connection import Connection as mpConn -from pathlib import Path -from textwrap import indent -from typing import AnyStr, Callable, Dict, Generator, Iterable, List, Optional, Tuple, Union - -# Third party module imports -from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError -from azure.storage.blob import ( - BlobClient, - BlobProperties, - BlobServiceClient, - ContainerClient, - ContainerProperties, - StorageStreamDownloader, -) - -# project sub-module imports -from twindb_backup import LOG -from twindb_backup.copy.mysql_copy import MySQLCopy -from twindb_backup.destination.base_destination import BaseDestination -from twindb_backup.destination.exceptions import AzureBlobDestinationError - -IterableClientType = Iterable[Union[BlobServiceClient, ContainerClient, BlobClient]] -DEFAULT_AVAILABLE_CPU = os.cpu_count() -GC_TOGGLE_DEPTH = 0 -"""GC_TOGGLE_DEPTH is used as a reference counter for managing when the _gc_toggle function should call gc.enable().""" -ONE_MiB = 2**20 -MAX_PIPE_CHUNK_BYTES = 8 * ONE_MiB -MAX_SYS_MEM_USE = 512 * ONE_MiB -"""MAX_PIPE_CHUNK_BYTES is a conservatively safe upper bound on the number of bytes we send through -`multiprocessing.connections.Connection` objects. - -This boundary will be derived for the current machine's OS at runtime. - -Per the official Python 3.9.6 documentation: -:: - - send(obj) - Send an object to the other end of the connection which should be read using recv(). - - The object must be picklable. Very large pickles (approximately 32 MiB+, though it depends on the OS) - may raise a ValueError exception. - -For source documentation on send(obj) see: - - https://docs.python.org/3/library/multiprocessing.html#multiprocessing.connection.Connection.send -""" -NONE_LABEL = "None" -BSC_LABEL = "BlobServiceClient" -CC_LABEL = "ContainerClient" -BC_LABEL = "BlobClient" - - -class ClientWrapper: - """The ContainerWrapper class exists to simplify the process of ensuring that a container's name - is accessible from mixed types of inputs. - - """ - - def __init__(self, name: str = None, props: Optional[ContainerProperties] = None) -> None: - self._name = name or None - if not self._name and props is not None: - self._name = props.name - - @property - def name(self) -> str: - return self._name - - -HasNameAttr = Union[ClientWrapper, ContainerProperties] -IterableHasName = Iterable[HasNameAttr] -StrOrHasName = Union[str, HasNameAttr] -IterableStrOrHasName = Iterable[StrOrHasName] - - -def _assemble_fname(path_dict: dict) -> str: - interval = path_dict.get("interval", None) - media = path_dict.get("media_type", None) - prefix = path_dict.get("fname_prefix", None) - fname = path_dict.get("fname", None) - return "/".join((part for part in (interval, media, prefix, fname) if part)) - - -@contextmanager -def _gc_toggle(): - """A context manager that toggles garbage collection off-at-entry and back-on-at-exit. - :return: A bool value indicating if gc was enabled when this context was entered - """ - global GC_TOGGLE_DEPTH - try: - gc.disable() - GC_TOGGLE_DEPTH += 1 - yield GC_TOGGLE_DEPTH - GC_TOGGLE_DEPTH -= 1 - finally: - if GC_TOGGLE_DEPTH == 0: - gc.enable() - - -def _client_name_gen(obj: Union[StrOrHasName, IterableStrOrHasName]) -> str: - if obj: - if isinstance(obj, (str, ClientWrapper, BlobProperties, ContainerProperties)): - obj = (obj,) - for elem in obj: - if isinstance(elem, str): - yield elem - elif isinstance(elem, (ClientWrapper, BlobProperties, ContainerProperties)): - yield elem.name - else: - yield from _client_name_gen(elem) - - -def _ensure_containers_exist(conn_str: str, container: Union[StrOrHasName, IterableStrOrHasName]): - """ - If we have been given a container name (or an iterable of container names) we should ensure they - exist and are ready to be acted upon before returning them to the caller. - Otherwise they will encounter the potentially troublesome `ResourceNotFoundError` - Example of how it becomes troublesome: - If a caller deletes a container just before calling this function, - there will be an some indeterminate amount of time while that delete operation is being - performed that any subsequent operations attempting to create the container will - raise `ResourceExistsError` and operations that would - interact with an existing resource will raise `ResourceNotFoundError`. - """ - gen = _client_name_gen(container) - delay_max = 10 - delay = 0.1 - while True: - unfinished = [] - for cont in gen: - _client: ContainerClient = ContainerClient.from_connection_string(conn_str, cont) - try: - cprop: ContainerProperties = _client.get_container_properties(timeout=2) - # getting etag confirms container is fully created - etag = getattr(cprop, "etag", cprop["etag"]) - except ResourceNotFoundError: - try: - cprop: ContainerProperties = _client.create_container(timeout=2) - # getting etag confirms container is fully created - etag = getattr(cprop, "etag", cprop["etag"]) - except ResourceExistsError: - # We are getting both resource existance errors, meaning the container - # is likely being deleted and we can't recreate it till that operation - # has finished. So, add the container back to our queue and we'll try - # again later. - unfinished.append(cont) - finally: - _client.close() - if not unfinished: - break - gen = _client_name_gen(unfinished) - # added delay to ensure we don't jackhammer requests to remote service. - time.sleep(delay) - delay = min(delay_max, delay + delay) - - -def flatten_client_iters(clients: List[Union[ContainerClient, List[BlobClient]]]): - errs: Dict[str, List[Dict[str, str]]] = {} - for cclient in clients: - if isinstance(cclient, list): - for bclient in cclient: - try: - yield bclient - except BaseException as be: - exc_type, exc_value, exc_traceback = sys.exc_info() - be.with_traceback(exc_traceback) - errs.setdefault(exc_type, []).append( - { - "original": be, - "exc_type": exc_type, - "exc_value": exc_value, - } - ) - else: - try: - yield cclient - except BaseException as be: - exc_type, exc_value, exc_traceback = sys.exc_info() - be.with_traceback(exc_traceback) - errs.setdefault(exc_type, []).append( - { - "original": be, - "exc_type": exc_type, - "exc_value": exc_value, - } - ) - if errs: - err = AzureClientManagerError(f"There were {len(errs)} errors while accessing the flattened clients iterable.") - err.aggregated_traceback = [] - for e, lst in errs.items(): - agg_tb = [] - for args in lst: - args: dict - oe: BaseException = args["original"] - tb = "".join(traceback.format_exception(args["exc_type"], args["exc_value"], oe.__traceback__)) - agg_tb.append(indent(tb, "\t")) - agg_tb = "\n\n".join(agg_tb) - agg_tb = f"\n{'=' * 120}\n{agg_tb}{'-' * 120}" - err.aggregated_traceback.append(agg_tb) - LOG.exception("\n".join(err.aggregated_traceback), exc_info=err) - # raise err - err.err_map = errs - err.args += (errs,) - raise err - - -def client_generator( - conn_str, - container: Optional[Union[StrOrHasName, IterableStrOrHasName]] = None, - prefix: Optional[str] = None, - blob: Optional[Union[StrOrHasName, IterableStrOrHasName]] = None, - recurse: bool = False, -) -> Generator[Union[str, BlobServiceClient, ContainerClient, BlobClient], None, None]: - # forward declared type hints - bprop: BlobProperties - cprop: ContainerProperties - # scope shared state flags - blobs_yielded = False - containers_yielded = False - service_clients_yielded = False - - # a couple of inner functions for handling different client iteration strategies - def client_iter(container_iterable): - nonlocal blobs_yielded, containers_yielded - for c in container_iterable: - with ContainerClient.from_connection_string(conn_str, c) as container_client: - container_client: ContainerClient - if prefix is not None or blob is not None: - for bprop in container_client.list_blobs(prefix): - bname: str = bprop.name - _name = bname.rpartition("/")[2] - if check_blob(_name): - with container_client.get_blob_client(bprop.name) as blob_client: - if not blobs_yielded: - yield BC_LABEL - blobs_yielded = True - yield blob_client - elif recurse: - for bprop in container_client.list_blobs(): - with container_client.get_blob_client(bprop.name) as blob_client: - if not blobs_yielded: - yield BC_LABEL - blobs_yielded = True - yield blob_client - else: - if not containers_yielded: - yield CC_LABEL - containers_yielded = True - yield container_client - if not (blobs_yielded or containers_yielded): - for c in _client_name_gen(container): - with ContainerClient.from_connection_string(conn_str, c) as container_client: - container_client: ContainerClient - if recurse: - for bprop in container_client.list_blobs(): - with BlobClient.from_connection_string( - conn_str, bprop.container, bprop.name - ) as blob_client: - if not blobs_yielded: - yield BC_LABEL - blobs_yielded = True - yield blob_client - else: - if not containers_yielded: - yield CC_LABEL - containers_yielded = True - yield container_client - - # second of the inner functions for client iteration strategies - def bsc_iter(): - nonlocal service_clients_yielded, containers_yielded, blobs_yielded - with BlobServiceClient.from_connection_string(conn_str) as service_client: - service_client: BlobServiceClient - if (prefix or blob) and not (blobs_yielded or containers_yielded): - yield from client_iter(service_client.list_containers()) - elif recurse: - for c in service_client.list_containers(): - with service_client.get_container_client(c) as container_client: - for b in container_client.list_blobs(): - with container_client.get_blob_client(b) as blob_client: - if not blobs_yielded: - yield BC_LABEL - blobs_yielded = True - yield blob_client - if not (blobs_yielded or containers_yielded): - yield BSC_LABEL - service_clients_yielded = True - yield service_client - - # begin context_manager function's logic - if not prefix: - if blob: - prefs = set() - _blob = [] - for b in _client_name_gen(blob): - pref, _, bname = b.rpartition("/") - _blob.append(bname) - if pref: - prefs.add(pref) - # ToDo: work in logic for handling if there are more than 1 kind of prefix found - blob = _blob - try: - _pref = prefs.pop() - except KeyError: - _pref = None # to ensure it's not an empty string - prefix = _pref - - def _check_name(name): - return name in blob_set - - def _always_true(*args): - return True - - if blob: - blob_set = set(_client_name_gen(blob)) - check_blob = _check_name - else: - blob = None - check_blob = _always_true - if container: - _ensure_containers_exist(conn_str, container) - yield from client_iter(_client_name_gen(container)) - else: - yield from bsc_iter() - - if not (blobs_yielded or containers_yielded or service_clients_yielded): - yield from (NONE_LABEL,) - - -def _client_ctx_mgr_wrapper(conn_str: str, gen_func: Callable = client_generator) -> contextmanager: - @contextmanager - @wraps(gen_func) - def context_manager(*args, **kwargs): - nonlocal conn_str, gen_func - try: - ret = gen_func(conn_str, *args, **kwargs) - yield ret - finally: - del ret - - return context_manager - - -def _ensure_str(obj: Union[AnyStr, Union[List[AnyStr], Tuple[AnyStr]]]): - if obj is None: - return "" - if isinstance(obj, (list, tuple)): - if obj: - obj = obj[0] - else: - return "" - if isinstance(obj, bytes): - obj = obj.decode("utf-8") - return str(obj) - - -def _ensure_list_of_str(obj: Union[List[AnyStr], AnyStr]) -> List[Union[str, List[str]]]: - """ - A helper function that allows us to ensure that a given argument parameter is a list of strings. - - This function assumes the given object is one of: - * list - * str - * bytes - :param obj: A string, bytes object, or a list (or nested list) of string/bytes objects. - :return: A list (or nested list) of string objects. - - :raises AzurBlobInitError: If the given object is not a str or bytes object, or if it's a list/tuple of - non-(str/bytes) objects then a logic error has likely occured somewhere and we should - fail execution here. - """ - if obj is None: - return [] - if isinstance(obj, (list, tuple)): - if isinstance(obj, tuple): - obj = list(obj) - elif isinstance(obj, (str, bytes)): - if isinstance(obj, bytes): - obj = obj.decode("utf-8") - obj = [obj] - else: - raise AzureBlobInitError(f"Our attempted to ensure obj is a list of strings failed,\n\tgiven {obj=}") - for i, elem in enumerate(obj): - if isinstance(elem, str): - continue - elif isinstance(elem, bytes): - obj[i] = elem.decode("utf-8") - elif isinstance(obj, (list, tuple)): - if isinstance(obj, tuple): - obj = list(obj) - for j, elem2 in obj: - obj[j] = _ensure_list_of_str(elem2) - else: - err_msg = ( - "Our attempt to ensure obj is a list of strings failed," - f"\n\tgiven: {obj=}" - f"\n\tfailure occured while ensuring each element of given iterable was a string, " - f"at element: obj[{i}]={elem}" - ) - raise AzureBlobInitError(err_msg) - return obj - - -class AzureBlobInitError(AzureBlobDestinationError): - pass - - -class AzureBlobPathParseError(AzureBlobDestinationError): - pass - - -class AzureBlobReadError(AzureBlobDestinationError): - blob_path: str = "" - """The path string which lead to this exception""" - chunk_byte_range: Tuple[int, int] = -1, -1 - """The [start,end) bytes defining the chunk where this exception occurs (if chunking used) else set to (-1,-1)""" - container_name: str = "" - blob_name: str = "" - blob_properties: BlobProperties = None - - -class AzureBlobWriteError(AzureBlobDestinationError): - blob_path: str = "" - """The path string which lead to this exception""" - container_name: str = "" - blob_name: str = "" - blob_properties: BlobProperties = None - content_type = None - - -class AzureBlobClientError(AzureBlobDestinationError): - container_name: str = "" - blob_name: str = "" - - -class AzureClientManagerError(AzureBlobDestinationError): - err_map: Dict[str, List[Dict[str, str]]] - aggregated_traceback: List[str] - - -class AzureClientIterationError(AzureBlobDestinationError): - pass - - -class AzureBlob(BaseDestination): - def __getnewargs__(self): - """utility function that allows an instance of this class to be pickled""" - return ( - self.remote_path, - self.connection_string, - self.can_overwrite, - self._cpu_cap, - self._max_mem_bytes, - self.default_protocol, - self.default_host_name, - self.default_container_name, - self.default_interval, - self.default_media_type, - self.default_fname_prefix, - ) - - def __getstate__(self): - """utility function that allows an instance of this class to be pickled""" - return {k: v if k != "_connection_manager" else None for k, v in self.__dict__.items()} - - def __init__( - self, - remote_path: AnyStr, - connection_string: AnyStr, - can_do_overwrites: bool = False, - cpu_cap: int = DEFAULT_AVAILABLE_CPU, - max_mem_bytes: int = MAX_SYS_MEM_USE, - default_protocol: Optional[AnyStr] = None, - default_host_name: Optional[AnyStr] = None, - default_container_name: Optional[AnyStr] = None, - default_interval: Optional[AnyStr] = None, - default_media_type: Optional[AnyStr] = None, - default_fname_prefix: Optional[AnyStr] = None, - ): - """ - A subclass of BAseDestination; Allows for streaming a backup stream to an Azure-blob destination. - - Here's the expected general form for the remote path: - [protocol]://[host_name]/[container_name]/[interval]/[media_type]/[default_prefix]/[optional_fname] - - NOTE: - Components inside square brackets, E.G.: `[some component]`; are optional as long as they are instead - defined by their corresponding initializer argument. - - :param remote_path: - REQUIRED; A string or bytes object; - Defines the URI (or URL) for where to connect to the backup object. - - :param connection_string: - REQUIRED; A string or bytes object; - When the application makes a request to Azure Storage, it must be authorized. - To authorize a request, add your storage account credentials to the application as a - connection string. - See: - https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python#copy-your-credentials-from-the-azure-portal - - :param can_do_overwrites: - REQUIRED; a boolean value; - Flags if we should overwrite existing data when given a destination that - already exists, or if we should fail and raise a `ResourceExistsError`. - - :param default_protocol: - OPTIONAL; DEFAULT is set from container component of remote_path argument - A string or bytes object; - The name of the container in the destination blob storage we should use. - If undefined, then we assume it is on the given remote_path argument. - - :param default_container_name: - OPTIONAL; DEFAULT is set from container component of remote_path argument - A string or bytes object; - The name of the container in the destination blob storage we should use. - If undefined, then we assume it is on the given remote_path argument. - - :param default_host_name: - OPTIONAL; DEFAULT is set from host component of remote_path argument. - A string or bytes object; - The name of the host server. - If undefined, then we assume it is on the given remote_path argument. - - :param default_interval: - OPTIONAL; DEFAULT to "yearly" - A string or bytes object; - If undefined, then we assume it is on the given remote_path argument. - - :param default_media_type: - OPTIONAL; DEFAULT to "mysql" - A string or bytes object; - if undefined, thenw e assume it is on the given remote_path argument. - - - """ - path = _ensure_str(remote_path) - path = path.strip(" /:").rstrip(".") - parts = self._path2parts(path) - if not path: - protocol = default_protocol or "" - if not protocol.endswith("://"): - protocol += "://" - host = default_host_name or "" - if not host.endswith("/"): - host += "/" - container = default_container_name or "" - if container and not container.endswith("/"): - container += "/" - interval = default_interval or "" - if interval and not interval.endswith("/"): - interval += "/" - media_type = default_media_type or "" - if media_type and not media_type.endswith("/"): - media_type += "/" - fname_prefix = default_fname_prefix or "" - if fname_prefix and not fname_prefix.endswith("/"): - fname_prefix += "/" - path = protocol + host + container + interval + media_type + fname_prefix - super(AzureBlob, self).__init__(path) - connection_string = _ensure_str(connection_string) - self._connection_string = connection_string - self._flag_overwite_on_write = can_do_overwrites - self._cpu_cap = cpu_cap - self._max_mem_bytes = max_mem_bytes - self._max_mem_pipe = min(MAX_PIPE_CHUNK_BYTES, max_mem_bytes) - default_protocol = _ensure_str(default_protocol or parts[0]).strip(":/") - default_host_name = _ensure_str(default_host_name or parts[1]).strip(":/") - default_container_name = _ensure_str(default_container_name or parts[2]).strip(":/") - default_interval = _ensure_str(default_interval or parts[3]).strip(":/") - default_media_type = _ensure_str(default_media_type or parts[4]).strip(":/") - default_fname_prefix = _ensure_str(default_fname_prefix or parts[5]).strip(":/") - self._protocol = default_protocol - self._host_name = default_host_name - self._container_name = default_container_name - self._interval = default_interval - self._media_type = default_media_type - self._fname_prefix = default_fname_prefix - self._part_names = "protocol,host,container,interval,media_type,fname_prefix,fname".split(",") - self._parts_list = [ - (name, parts[i] if i < len(parts) and parts[i] else "") for i, name in enumerate(self._part_names) - ] - self._default_parts: Dict[str, str] = {k: v if v != "" else None for k, v in self._parts_list} - self._default_parts["interval"] = self._default_parts["interval"] or "yearly" - self._default_parts["media_type"] = self._default_parts["media_type"] or "mysql" - self._part_names = self._part_names[::-1] - self._connection_manager: Optional[contextmanager] = None - - @property - def connection_string(self): - """An Azure specific authentication string - for accessing the target backup destination host""" - return self._connection_string - - @property - def default_protocol(self): - return self._protocol - - @property - def default_host_name(self): - """The default host server name directory that - we default to if a relative path string omits the reference""" - return self._host_name - - @property - def default_container_name(self): - """The default container (aka bucket) name that - we default to if a relative path string omits the reference""" - return self._container_name - - @property - def default_interval(self): - """The default backup interval directory that - we default to if a relative path string omits the reference""" - return self._interval - - @property - def default_media_type(self): - return self._media_type - - @property - def default_fname_prefix(self): - return self._fname_prefix - - @property - def can_overwrite(self): - return self._flag_overwite_on_write - - @property - def max_bytes_per_pipe_message(self): - return self._max_mem_pipe - - @property - def max_system_memory_usage(self): - return self._max_mem_bytes - - @property - def connection_manager(self): - if self._connection_manager is None: - self._connection_manager = _client_ctx_mgr_wrapper(self._connection_string, client_generator) - return self._connection_manager - - @staticmethod - def _path2parts(path: str, split_fname: bool = False): - """Breaks a path string into its sub-parts, and produces a tuple of those parts - that is at least 6 elements long. We will insert None where a part is determined to be missing in order to - ensure the minimum length of 6 elements.""" - - def extract_protocol(_path: str): - protocol, _, _path = _path.partition("://") - if not _path: - if protocol.startswith(".../"): - _path = protocol[4:] - protocol = "..." - else: - _path = protocol - protocol = None - else: - protocol = protocol.strip(":/") - return protocol, *partition_path(_path, 1) - - def partition_path(_path: str, depth: int): - if not _path: - if depth < 6: - return None, *partition_path(_path, depth + 1) - elif depth < 5: - part, _, _path = _path.partition("/") - return part.strip(":/"), *partition_path(_path, depth + 1) - elif split_fname: - prefix, _, fname = _path.rpartition("/") - return prefix, fname - return _path.strip(":/"), None - - return extract_protocol(path) - - def _path_parse(self, path: str, split_fname: bool = False): - """ - Called in multiple places where we need to decompose a path string - in order to access specific parts by name. - """ - if not path: - return self.remote_path, {k: v for k, v in self._default_parts.items()} - # noinspection PyTupleAssignmentBalance - ( - protocol, - host, - container, - interval, - media, - prefix, - *fname, - ) = self._path2parts(path, split_fname) - fname: list - protocol = protocol if protocol and protocol != "..." else self.default_protocol - host = host if host and host != "..." else self.default_host_name - container = container if container and container != "..." else self.default_container_name - if container != self.default_container_name: - interval = self.default_interval if interval and interval == "..." else interval if interval else "" - media = self.default_media_type if media and media == "..." else media if media else "" - prefix = self.default_fname_prefix if prefix and prefix == "..." else prefix if prefix else "" - else: - interval = interval if interval and interval != "..." else self.default_interval - media = media if media and media != "..." else self.default_media_type - prefix = prefix if prefix and prefix != "..." else self.default_fname_prefix - if fname: - _fname = list(fname) - while _fname: - fname = _fname.pop() - if fname: - _fname = "/".join(_fname) - break - else: - # noinspection PyTypeChecker - fname = None - parts: str = "/".join((s for s in (host, container, interval, media, prefix, fname) if s)) - relative_depth = 0 - while parts and parts.startswith("../"): - relative_depth += 1 - _, _, parts = parts.partition("/") - base_parts = "/".join(tpl[1] for tpl in self._parts_list[1:-relative_depth]) - base_parts += "/" if base_parts else "" - path = base_parts + parts.lstrip("/") - _parts = path.split("/", 4)[::-1] - shorten = len(self._part_names) - 1 - len(_parts) - _parts2 = [None] * shorten - _parts2 += _parts - # noinspection PyTypeChecker - ret = {k: v for k, v in zip(self._part_names[:-1], _parts2)} - ret["protocol"] = protocol - return path, ret - - def delete(self, path: AnyStr): - """ - Delete object from the destination - - the general form for the path object should conform to the following: - [azure:/]/[bucket or container name]/[server name]/[update interval]/[query language]/ - - NOTE: The protocol type (the left-most component of the example above) is technically optional, - as it should always be an azure storage type; but if passed we will check to confirm that it is - indeed for azure-blob storage, so including it ensures proper sanity checking. - - -- If path defines a new absolute path string then it must contain all parts defined above, - with the option to omit those components wrapped in square brackets, E.G.: [some component] - - where: - [components inside square brackets] => optional - => required - - such that: - optional components that are not provided should be substituted with an ellipsis - (the triple period => ...) - - E.G.: - ...://foo/.../hourly/mysql/bar-that.foos.gz - - Note: - Where optional path components are omitted, we assume that the context of the called AzureBlob instance - should be used to fill in the gaps. - - -- If path is given as a relative path string then you may also use the ellipsis as defined for absolute paths, - with the added option to use `..` for relative directory hierarchy referencing. The one caveat is that - - E.G.: - ../../daily/mysql/relative-foo.bar.gz - or - ../../../some_different_host/.../mysql - where: - The `...` component signals that we wish to use the given default interval this object was - initialized with. - - :param path: A string or bytes object; - The path to the file (blob) to delete. Can be relative or absolute. - """ - abs_path, path_dict = self._path_parse(path) - container = path_dict["container"] - fname = _assemble_fname(path_dict) - if fname: - label = BC_LABEL - client_type = "blob" - args = container, fname - else: - label = CC_LABEL - client_type = "container" - args = (container,) - with self.connection_manager(*args) as client_iter: - iter_type = next(client_iter) - if iter_type != label: - raise AzureClientIterationError( - f"Failed to properly identify deletion target given {path=}" - f"\n\texpected client type of {label} but got {iter_type}" - ) - to_check = [] - del_call = "delete_" + client_type - for client in client_iter: - client: Union[BlobClient, ContainerClient] - to_check.append(client) - getattr(client, del_call)() - for c in to_check: - delay = 0.01 - max_delay = 2 - t0 = time.perf_counter() - while (time.perf_counter() - t0) < 5: - try: - if client_type == "blob": - c: BlobClient - try: - bprop: BlobProperties = c.get_blob_properties() - if bprop.deleted: - break - except AttributeError: - # when calls to get_blob_properties raises AttributeError, - # then the blob is no longer available and the deletion was successful - break - else: - c: ContainerClient - cprop: ContainerProperties = c.get_container_properties() - if cprop.deleted: - break - time.sleep(delay) - delay = min(max_delay, delay + delay) - except ResourceNotFoundError: - break - - def _blob_ospiper( - self, - path_parts_dict: Dict[str, str], - pout: mpConn, - chunk_size: int = None, - ) -> None: - def err_assembly(): - bad_path = "{protocol}://{parts}".format( - protocol=self._part_names[0], - parts="/".join((f"{{{s}}}" for s in self._part_names[1:] if path_parts_dict.get(s, None))), - ).format(**path_parts_dict) - return AzureClientIterationError(f"Unable to find downloadable content files on path : {bad_path}") - - # noinspection PyShadowingNames - def configure_chunking(bsize: int, pipe_chunk_size: int): - """ - - :param bsize: total number of bytes to be downloaded for current blob - :type bsize: int - :param pipe_chunk_size: The maximum buffer size of our transfer pipe - :type pipe_chunk_size: int - :return: 4-tuple of ints indicating: - * the the number of memory chunks - * the size of those mem chunks - * if the pipe buffer is smaller than max allowed mem usage, then - this is the number of pipe chunks needed to fully transfer one - of the memory chunks. - * the size of the transfer chunks - :rtype: tuple[int,int,int,int] - """ - nonlocal self - if bsize < self.max_system_memory_usage: - mem_chunk_size = size - num_mem_chunks = 1 - else: - mem_chunk_size = self.max_system_memory_usage - num_mem_chunks = (size + mem_chunk_size - 1) // mem_chunk_size - if pipe_chunk_size < mem_chunk_size: - _chunk_size = pipe_chunk_size - num_chunks = (mem_chunk_size + _chunk_size - 1) // _chunk_size - else: - _chunk_size = mem_chunk_size - num_chunks = 1 - return num_mem_chunks, mem_chunk_size, num_chunks, _chunk_size - - chunk_size = self.max_bytes_per_pipe_message if chunk_size is None else chunk_size - max_threads = min(32, self._max_mem_bytes) - with pout: - with os.fdopen(pout.fileno(), "wb", buffering=chunk_size, closefd=False) as pipe_out: - container = path_parts_dict.get("container", None) - fname = path_parts_dict.pop("fname", None) - prefix = _assemble_fname(path_parts_dict) or None - with self.connection_manager(container, prefix, fname, recurse=True) as client_iter: - iter_type = next(client_iter) - if iter_type != BC_LABEL: - raise err_assembly() - for client in client_iter: - client: BlobClient - size = client.get_blob_properties().size - ( - num_mem_chunks, - mem_chunk_size, - num_chunks, - _chunk_size, - ) = configure_chunking(size, chunk_size) - with io.BytesIO(b"\x00" * mem_chunk_size) as bio: - for i in range(num_mem_chunks): - ipos = i * mem_chunk_size - dl: StorageStreamDownloader = client.download_blob( - ipos, - mem_chunk_size, - max_concurrency=max_threads, - ) - bio.seek(0) - bytes_read = dl.readinto(bio) - bio.seek(0) - - for pos in range(0, bytes_read, _chunk_size): - pipe_out.write(bio.read(_chunk_size)) - rem = bytes_read % _chunk_size - if rem: - pipe_out.write(bio.read(rem)) - - @contextmanager - def get_stream(self, copy: Union[str, MySQLCopy]): - if copy is None: - copy = self.remote_path - path = copy.key if isinstance(copy, MySQLCopy) else copy - _path = Path(path) - has_fname = "." in _path.name and _path.name != "..." - path, path_parts_dict = self._path_parse(path, has_fname) - pipe_in, pipe_out = mp.Pipe(False) - proc = mp.Process(target=self._blob_ospiper, args=(path_parts_dict, pipe_out)) - try: - with pipe_in: - proc.start() - pipe_out.close() - with os.fdopen(pipe_in.fileno(), "rb", closefd=False) as file_pipe_in: - yield file_pipe_in - finally: - # pipe_out.close() - proc.join() - proc.close() - - def read(self, filepath: str, bytes_per_chunk: Optional[int] = None) -> bytes: - """ - Read content from destination at the end of given filepath. - - :param filepath: - REQUIRED; a str object; - Relative path to destination file that we will read from. - :type filepath: str - - :param bytes_per_chunk: - OPTIONAL; DEFAULT = self.max_bytes_per_pipe_message; an int value; - This parameter dictates the max chunk size (in bytes) that should - be passed into the pipe for any single chunk. - :type bytes_per_chunk: int - - :return: Content of the file. - :rtype: bytes - """ - with self.get_stream(filepath) as conn: - conn: io.FileIO - strt = time.perf_counter() - datum = [] - while time.perf_counter() - strt < 2: - try: - data = conn.read() - if data: - datum.append(data) - strt = time.perf_counter() - except EOFError: - break - return b"".join(datum) - - def save(self, handler, filepath): - """ - Save a stream given as handler to filepath. - - :param handler: Incoming stream. - :type handler: file - :param filepath: Save stream as this name. - :type filepath: str - """ - with handler as f_src: - self.write(f_src, filepath) - - def write(self, content: Union[AnyStr, io.BufferedIOBase], filepath: AnyStr): - """ - Write ``content`` to a file. - - :param content: Content to write to the file. - :type content: str, bytes, or subclass of BufferedIOBase object - :param filepath: Relative path to file. - :type filepath: str or bytes object - """ - if isinstance(filepath, bytes): - filepath = filepath.decode("utf-8") - filepath, _, fname = filepath.rpartition("/") - path, path_dict = self._path_parse(filepath) - container = path_dict["container"] or self.default_container_name - blob_name = _assemble_fname(path_dict) - with self.connection_manager(container, prefix=blob_name, blob=fname) as client_iter: - iter_type = next(client_iter) - if iter_type == CC_LABEL: - blob_name += "/" + fname - client: ContainerClient = next(client_iter) - if isinstance(content, io.BufferedReader): - with content: - client.upload_blob( - blob_name, - content.read(), - overwrite=self.can_overwrite, - ) - else: - client.upload_blob(blob_name, content, overwrite=self.can_overwrite) - elif iter_type != BC_LABEL: - raise AzureClientIterationError(f"Failed to identify path to blob files given: {filepath}") - else: - # Unless filepath used wildcards, client_iter is only going to produce - # a single client instance to upload to. - bclient: BlobClient = next(client_iter) - if isinstance(content, io.BufferedReader): - with content: - bclient.upload_blob(content.read(), overwrite=self.can_overwrite) - else: - bclient.upload_blob(content, overwrite=self.can_overwrite) - - def _list_files(self, prefix: str = None, **kwargs): # , recursive=False, files_only=False): - """ - A descendant class must implement this method. - It should return a list of files already filtered out by prefix. - Some storage engines (e.g. Google Cloud Storage) allow that - at the API level. The method should use storage level filtering - to save on network transfers. - - if prefix is given it is assumed to supersede the default container/interval/media_type/custom-prefix/ parts of - the path. To only replace select parts of that path segment, use the ... (ellipsis) to indicate which portions - you wish to have remain default. - """ - results = set() - if prefix: - if prefix == "..." or prefix.startswith(".../"): - prefix = prefix.strip("/") - path_template = f"{self._protocol}://{self.default_host_name}/{prefix}" - _, path_dict = self._path_parse(path_template, True) - else: - container, _, prefix = prefix.partition("/") - path_dict = {"container": container, "fname_prefix": prefix} - else: - prefix = None # ensure we don't pass along an empty string - path_dict = {"container": None} - fname = path_dict.pop("fname", None) or None - prefix = _assemble_fname(path_dict) or prefix or None - cont_starts, _, _ = (path_dict.get("container", "") or "").partition("*") - with BlobServiceClient.from_connection_string(self.connection_string) as service_client: - service_client: BlobServiceClient - # service_client. - for container in service_client.list_containers(cont_starts or None): - with service_client.get_container_client(container) as cclient: - cclient: ContainerClient - if fname: - for bprop in cclient.list_blobs(prefix): - bprop: BlobProperties - if fname in bprop.name: - with cclient.get_blob_client(bprop) as bclient: - results.add(bclient.url) - else: - for bprop in cclient.list_blobs(prefix): - bprop: BlobProperties - with cclient.get_blob_client(bprop) as bclient: - results.add(bclient.url) - # if files_only: - # if recursive: - # for bprop in cclient.list_blobs(prefix): - # bprop: BlobProperties - # bname: str = bprop.name - # if not fname or fname in bname.rpartition("/")[2]: - # with cclient.get_blob_client(bprop) as bclient: - # results.add(bclient.url) - # else: - # for bprop in cclient.walk_blobs(prefix): - # bprop: BlobProperties - # bname = bprop.name - # dbg_break = 0 - # elif recursive: - # if not fname: - # for bprop in cclient.list_blobs(prefix): - # bprop: BlobProperties - # with cclient.get_blob_client(bprop) as bclient: - # results.add(bclient.url) - # - # else: - # for bprop in cclient.walk_blobs(prefix): - # if fname in bname.rpartition("/")[2]: - # with cclient.get_blob_client(bprop) as bclient: - # results.add(bclient.url) - return results From 1b81fb1f6951e85dafec86ff6716dcef8e58d38a Mon Sep 17 00:00:00 2001 From: Jsalz2000 Date: Tue, 27 Aug 2024 15:29:55 -0500 Subject: [PATCH 02/10] Formats code style to pass make lint --- docs/conf.py | 55 +++--- setup.py | 3 +- support/make_release.py | 32 ++-- .../configuration/destinations/az.py | 7 +- twindb_backup/destination/az.py | 159 +++++++++--------- 5 files changed, 120 insertions(+), 136 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index e8972b6a..dfc38dd6 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,8 +13,8 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys import os +import sys # If extensions (or modules to document with autodoc) are in another # directory, add these directories to sys.path here. If the directory is @@ -40,23 +40,23 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'TwinDB Backup' -copyright = u"2016-2019, TwinDB Development Team" +project = "TwinDB Backup" +copyright = "2016-2019, TwinDB Development Team" # The version info for the project you're documenting, acts as replacement # for |version| and |release|, also used in various other places throughout @@ -79,7 +79,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -97,7 +97,7 @@ # show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] @@ -111,7 +111,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'default' +html_theme = "default" # Theme options are theme-specific and customize the look and feel of a # theme further. For a list of options available for each theme, see the @@ -131,18 +131,18 @@ # The name of an image file (relative to this directory) to place at the # top of the sidebar. -html_logo = '_static/logo.png' +html_logo = "_static/logo.png" # The name of an image file (within the static path) to use as favicon # of the docs. This file should be a Windows icon file (.ico) being # 16x16 or 32x32 pixels large. -html_favicon = '_static/favicon.png' +html_favicon = "_static/favicon.png" # Add any paths that contain custom static files (such as style sheets) # here, relative to this directory. They are copied after the builtin # static files, so a file named "default.css" will overwrite the builtin # "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # If not '', a 'Last updated on:' timestamp is inserted at every page # bottom, using the given strftime format. @@ -188,7 +188,7 @@ # html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'twindb_backupdoc' +htmlhelp_basename = "twindb_backupdoc" # -- Options for LaTeX output ------------------------------------------ @@ -196,10 +196,8 @@ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # 'preamble': '', } @@ -208,9 +206,7 @@ # (source start file, target name, title, author, documentclass # [howto/manual]). latex_documents = [ - ('index', 'twindb_backup.tex', - u'TwinDB Backup Documentation', - u'TwinDB Development Team', 'manual'), + ("index", "twindb_backup.tex", "TwinDB Backup Documentation", "TwinDB Development Team", "manual"), ] # The name of an image file (relative to this directory) to place at @@ -238,11 +234,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'twindb_backup', - u'TwinDB Backup Documentation', - [u'TwinDB Development Team'], 1) -] +man_pages = [("index", "twindb_backup", "TwinDB Backup Documentation", ["TwinDB Development Team"], 1)] # If true, show URL addresses after external links. # man_show_urls = False @@ -254,12 +246,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'twindb_backup', - u'TwinDB Backup Documentation', - u'TwinDB Development Team', - 'twindb_backup', - 'One line description of project.', - 'Miscellaneous'), + ( + "index", + "twindb_backup", + "TwinDB Backup Documentation", + "TwinDB Development Team", + "twindb_backup", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. diff --git a/setup.py b/setup.py index 9833ed11..df20a2a2 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,8 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- import os -from setuptools import setup, find_packages + +from setuptools import find_packages, setup del os.link diff --git a/support/make_release.py b/support/make_release.py index a7eba808..098d5ae1 100644 --- a/support/make_release.py +++ b/support/make_release.py @@ -1,4 +1,4 @@ -from os import listdir, environ +from os import environ, listdir from os import path as osp from subprocess import run @@ -11,23 +11,14 @@ "jammy", "focal", # CentOS - "7" + "7", ] PKG_DIR = "omnibus/pkg" OS_DETAILS = { - "jammy": { - "flavor": "Ubuntu", - "name": "Ubuntu jammy" - }, - "focal": { - "flavor": "Ubuntu", - "name": "Ubuntu focal" - }, - "7": { - "flavor": "CentOS", - "name": "CentOS 7" - }, + "jammy": {"flavor": "Ubuntu", "name": "Ubuntu jammy"}, + "focal": {"flavor": "Ubuntu", "name": "Ubuntu focal"}, + "7": {"flavor": "CentOS", "name": "CentOS 7"}, } @@ -41,15 +32,14 @@ def main(): my_env["OS_VERSION"] = os run(["make", "package"], env=my_env, check=True) for fi_name in listdir(PKG_DIR): - if ( - fi_name.endswith(".rpm") - or fi_name.endswith(".deb") - or fi_name.endswith(".json") - ): + if fi_name.endswith(".rpm") or fi_name.endswith(".deb") or fi_name.endswith(".json"): key = f"twindb-backup/{__version__}/{os}/{fi_name}" with open(osp.join(PKG_DIR, fi_name), "rb") as fp: client.put_object( - ACL="public-read", Body=fp, Bucket="twindb-release", Key=key, + ACL="public-read", + Body=fp, + Bucket="twindb-release", + Key=key, ) print(f"https://twindb-release.s3.amazonaws.com/{key}") @@ -60,7 +50,7 @@ def main(): print(f" * {details['name']}") key = f"twindb-backup/{__version__}/{os}/" response = client.list_objects( - Bucket='twindb-release', + Bucket="twindb-release", Prefix=key, ) for fil in response["Contents"]: diff --git a/twindb_backup/configuration/destinations/az.py b/twindb_backup/configuration/destinations/az.py index f0470835..583c4da5 100644 --- a/twindb_backup/configuration/destinations/az.py +++ b/twindb_backup/configuration/destinations/az.py @@ -4,12 +4,7 @@ class AZConfig: """Azure Blob Storage Configuration.""" - def __init__( - self, - connection_string, - container_name, - chunk_size=1024*1024*4 # 4MiB - ): + def __init__(self, connection_string, container_name, chunk_size=1024 * 1024 * 4): # 4MiB self._connection_string = connection_string self._container_name = container_name diff --git a/twindb_backup/destination/az.py b/twindb_backup/destination/az.py index c14133b3..5a6cb944 100644 --- a/twindb_backup/destination/az.py +++ b/twindb_backup/destination/az.py @@ -2,11 +2,11 @@ """ Module for Azure destination. """ +import builtins import os import re import socket import time - from contextlib import contextmanager from multiprocessing import Process from urllib.parse import urlparse @@ -15,13 +15,14 @@ from twindb_backup import LOG from twindb_backup.destination.base_destination import BaseDestination -from twindb_backup.destination.exceptions import ( - FileNotFound, - S3DestinationError, -) +from twindb_backup.destination.exceptions import FileNotFound, S3DestinationError from twindb_backup.exceptions import OperationError -### DEFAULT VALUES SECTION +""" +DEFAULT VALUES SECTION +""" + + class AZFileAccess(object): # pylint: disable=too-few-public-methods """Access modes for AZ files""" @@ -46,25 +47,30 @@ def __init__(self, **kwargs): self._container_name = kwargs.get("container_name") self._connection_string = kwargs.get("connection_string") self._hostname = kwargs.get("hostname", socket.gethostname()) - self._chunk_size = kwargs.get("chunk_size", 4*1024*1024) + self._chunk_size = kwargs.get("chunk_size", 4 * 1024 * 1024) self.remote_path = "/" super(AZ, self).__init__(self.remote_path) try: - LOG.debug("Initilizing Azure connection to the storage account using connection string (length=" + str(len(self._connection_string)) + ")") - self.service_client = BlobServiceClient.from_connection_string(self._connection_string) - except: - LOG.error("Failed to connect to Azure storage account using the connection string") - exit(1) + LOG.debug( + "Initilizing Azure connection to the storage account using connection string (length=" + + str(len(self._connection_string)) + + ")" + ) + self.service_client = BlobServiceClient.from_connection_string(self._connection_string) + except builtins.Exception as err: + # TODO: add more specific exception handling + LOG.error("Failed to connect to Azure storage account using the connection string") + raise err # Check to see if the container exists, otherwise create the container try: - LOG.debug("Setting up the container(" + self._container_name + ") connection") - self.client = self.service_client.get_container_client(self._container_name) - except: - LOG.debug("The container(" + self._container_name + ") does not exist... creating it") - self.create_container() + LOG.debug("Setting up the container(" + self._container_name + ") connection") + self.client = self.service_client.get_container_client(self._container_name) + except builtins.Exception: + LOG.debug("The container(" + self._container_name + ") does not exist... creating it") + self.create_container() def bucket(self): """S3 bucket name.... compatibility???""" @@ -75,9 +81,7 @@ def create_bucket(self): return create_container(self) def create_container(self): - """Creates the container in the Azure storage account that will store the backups. - - """ + """Creates the container in the Azure storage account that will store the backups.""" container_exists = True try: @@ -89,9 +93,7 @@ def create_container(self): LOG.info("Azure container creation was successful %s", self._container_name) return True - def list_files( - self, prefix=None, recursive=False, pattern=None, files_only=False - ): + def list_files(self, prefix=None, recursive=False, pattern=None, files_only=False): """ List files in the destination that have common prefix. """ @@ -99,11 +101,11 @@ def list_files( files = [] LOG.debug("AZ Listing files") for blob in self.client.list_blobs(): - if pattern: - if re.search(pattern, blob.name): - files.append(blob.name) - else: - files.append(blob.name) + if pattern: + if re.search(pattern, blob.name): + files.append(blob.name) + else: + files.append(blob.name) return sorted(files) @@ -111,7 +113,7 @@ def _list_files(self, prefix=None, recursive=False, files_only=False): raise NotImplementedError def _uplaod_blob_options(self, **kwargs): - return True + return True def delete(self, path): """ @@ -119,48 +121,49 @@ def delete(self, path): """ try: - LOG.info("Deleting blob: " + path) - return self.client.delete_blob(path) - except: - LOG.error("FAILED to delete blob: " + path) - raise + LOG.info("Deleting blob: " + path) + return self.client.delete_blob(path) + except builtins.Exception as err: + # TODO: add more specific exception handling + LOG.error("FAILED to delete blob: " + path) + raise err def delete_all_objects(self): - """ - Delete all blobs in the container - """ - LOG.info("Deleting ALL blobs in container: " + self._container_name) - for blob in slef.ls(): - self.delete(blob) + """ + Delete all blobs in the container + """ + LOG.info("Deleting ALL blobs in container: " + self._container_name) + for blob in self.ls(): + self.delete(blob) - return True + return True def delete_bucket(self, force=False): - """ - Delete the container and contents, this is a recursive delete (including all blobs in the container) - """ - try: - LOG.info("Performing recusrsive delete of container and all blobs in container: " + self._container_name) - self.client.delete_container() - except: - raise + """ + Delete the container and contents, this is a recursive delete (including all blobs in the container) + """ + try: + LOG.info("Performing recusrsive delete of container and all blobs in container: " + self._container_name) + self.client.delete_container() + except builtins.Exception as err: + # TODO: add more specific exception handling + raise err - return True + return True def read(self, filepath): """ Read the status blob (filepath) and return contents to the caller """ try: - LOG.debug("Attempting to read blob: " + filepath) - blob_client = self.client.get_blob_client(filepath) - return blob_client.download_blob().readall().decode("utf-8") + LOG.debug("Attempting to read blob: " + filepath) + blob_client = self.client.get_blob_client(filepath) + return blob_client.download_blob().readall().decode("utf-8") - except Exception as err: - LOG.info("The blob " + filepath + " does not exist or there was an issue reading it") - - except: - raise + except builtins.Exception as err: + # TODO: add more specific exception handling + LOG.info("The blob " + filepath + " does not exist or there was an issue reading it") + raise err def save(self, handler, filepath): """ @@ -174,15 +177,13 @@ def save(self, handler, filepath): blob_client = self.client.get_blob_client(filepath) with handler as file_obj: - try: - blob_client.upload_blob(file_obj) - - except Exception as err: - LOG.info("The blob " + filepath + " already exists, no need to upload (ignoring)") - - except: - raise + try: + blob_client.upload_blob(file_obj) + except builtins.Exception as err: + # TODO: add more specific exception handling + LOG.info("The blob " + filepath + " already exists, no need to upload (ignoring)") + raise err @contextmanager def get_stream(self, copy): @@ -209,17 +210,16 @@ def _download_object(self, path, read_fd, write_fd): # anyway... let's decompile it, grab the host and the actual file name # then do some matching based on what really exists :P LOG.debug("Transforming requested restore path: " + path) - exploded_path = path.split('/') - host = exploded_path[1] # first element, the call path begins with / - file = exploded_path[len(exploded_path)-1] # last element - path = '' + exploded_path = path.split("/") + host = exploded_path[1] # first element, the call path begins with / + file = exploded_path[len(exploded_path) - 1] # last element + path = "" for blob in self.list_files(pattern=file): - if re.search(host, blob): - path = blob + if re.search(host, blob): + path = blob LOG.debug("Tranformed path to match existing blob: " + path) - blob_client = self.client.get_blob_client(path) with os.fdopen(write_fd, "wb") as w_pipe: try: @@ -227,15 +227,18 @@ def _download_object(self, path, read_fd, write_fd): for _ in range(10): try: w_pipe.write(blob_client.download_blob().readall()) - except: - raise + except builtins.Exception as err: + # TODO: add more specific exception handling + LOG.error(f"Failed to download and write blob {path} encountered error: {err}") + raise err except IOError as err: LOG.error(err) - exit(1) + raise err - except: - raise + except builtins.Exception as err: + # TODO: add more specific exception handling + raise err download_proc = None try: From ded4f86101a8041c585b665a8b55ae97fc43c569 Mon Sep 17 00:00:00 2001 From: Jsalz2000 Date: Thu, 29 Aug 2024 16:01:13 -0500 Subject: [PATCH 03/10] Adds working refactored azure storage destination --- README.rst | 7 +- ...indb_backup.configuration.destinations.rst | 8 + docs/twindb_backup.destination.rst | 16 +- docs/usage.rst | 15 +- support/twindb-backup.cfg | 7 + twindb_backup/destination/az.py | 374 +++++++----------- .../modules/profile/files/twindb-backup.cfg | 5 + 7 files changed, 198 insertions(+), 234 deletions(-) diff --git a/README.rst b/README.rst index a5d5009d..52b6005a 100644 --- a/README.rst +++ b/README.rst @@ -25,8 +25,8 @@ TwinDB Backup :alt: Updates TwinDB Backup is a multipurpose tool for backing up MySQL database and regular files/directories on the file system. -It can store backup copies on a remote SSH server, Amazon S3 or -Google Cloud Storage. +It can store backup copies on a remote SSH server, Amazon S3, Azure Blob Storage, +or Google Cloud Storage. TwinDB Backup accepts a backup copy stream from any of supported sources (MySQL Server, Percona Server, Percona XtraDB Cluster, or file system) @@ -53,8 +53,8 @@ After the stream passed all modifiers it is sent to one of the configured backup destination. It can be: - Amazon S3 bucket -- Google Cloud Storage bucket - Azure Blob Storage account +- Google Cloud Storage bucket - Any server with SSH demon .. figure:: https://user-images.githubusercontent.com/1763754/56677794-20901b80-6676-11e9-8f71-8de0b0b6f066.png @@ -185,6 +185,7 @@ Credits * `Arda Beyazoğlu `_ * `Egor Lyutov `_ * `fonthead `_ + * `James Salzman `_ * `Maksym Kryva `_ * `Manjot Singh `_ * `Michael Rikmas `_ diff --git a/docs/twindb_backup.configuration.destinations.rst b/docs/twindb_backup.configuration.destinations.rst index c447eef8..0614c9cf 100644 --- a/docs/twindb_backup.configuration.destinations.rst +++ b/docs/twindb_backup.configuration.destinations.rst @@ -4,6 +4,14 @@ twindb\_backup.configuration.destinations package Submodules ---------- +twindb\_backup.configuration.destinations.az module +---------------------------------------------------- + +.. automodule:: twindb_backup.configuration.destinations.az + :members: + :undoc-members: + :show-inheritance: + twindb\_backup.configuration.destinations.gcs module ---------------------------------------------------- diff --git a/docs/twindb_backup.destination.rst b/docs/twindb_backup.destination.rst index db387da4..569ebdd9 100644 --- a/docs/twindb_backup.destination.rst +++ b/docs/twindb_backup.destination.rst @@ -4,14 +4,6 @@ twindb\_backup.destination package Submodules ---------- -twindb\_backup.destination.azblob module ----------------------------------------- - -.. automodule:: twindb_backup.destination.azblob - :members: - :undoc-members: - :show-inheritance: - twindb\_backup.destination.base\_destination module --------------------------------------------------- @@ -28,6 +20,14 @@ twindb\_backup.destination.exceptions module :undoc-members: :show-inheritance: +twindb\_backup.destination.az module +------------------------------------- + +.. automodule:: twindb_backup.destination.az + :members: + :undoc-members: + :show-inheritance: + twindb\_backup.destination.gcs module ------------------------------------- diff --git a/docs/usage.rst b/docs/usage.rst index 13d4d3e0..c823eb08 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -47,7 +47,7 @@ Backup Destination The ``[destination]`` section specifies where to store backup copies. ``backup_destination`` can be either ``ssh`` (if you want to store backups on a remote SSH server), -or ``s3`` (if you want to store backups in Amazon S3), or ``gsc`` (if the backup should be stored in Google Cloud). +``s3`` (if you want to store backups in Amazon S3), ``az`` (if the backup should be stored in Azure Blob Storage), or ``gsc`` (if the backup should be stored in Google Cloud). In the optional ``keep_local_path`` you can specify a local path where the tool will store a local copy of the backup. It's useful if you want to stream a MySQL backup to S3 and would like to keep a local copy as well. @@ -89,6 +89,19 @@ In the ``[s3]`` section you specify Amazon credentials as well as an S3 bucket w AWS_DEFAULT_REGION = us-east-1 BUCKET = twindb-backups +Azure Blob Storage +~~~~~~~~~~~~~~~~~~~~ + +In the ``[az]`` section you specify Azure credentials as well as Azure Blob Storage container where to store backups. + +.. code-block:: ini + + [az] + + connection_string = "DefaultEndpointsProtocol=https;AccountName=ACCOUNT_NAME;AccountKey=ACCOUNT_KEY;EndpointSuffix=core.windows.net" + container_name = twindb-backups + + Google Cloud Storage ~~~~~~~~~~~~~~~~~~~~ diff --git a/support/twindb-backup.cfg b/support/twindb-backup.cfg index 42359f75..16ab0caf 100644 --- a/support/twindb-backup.cfg +++ b/support/twindb-backup.cfg @@ -31,6 +31,13 @@ AWS_SECRET_ACCESS_KEY=YYYYY AWS_DEFAULT_REGION=us-east-1 BUCKET=twindb-backups +[az] + +# Azure destination settings + +connection_string="DefaultEndpointsProtocol=https;AccountName=ACCOUNT_NAME;AccountKey=ACCOUNT_KEY;EndpointSuffix=core.windows.net" +container_name=twindb-backups + [gcs] # GCS destination settings diff --git a/twindb_backup/destination/az.py b/twindb_backup/destination/az.py index 5a6cb944..c7363baa 100644 --- a/twindb_backup/destination/az.py +++ b/twindb_backup/destination/az.py @@ -4,281 +4,211 @@ """ import builtins import os -import re import socket -import time +import typing as T from contextlib import contextmanager from multiprocessing import Process -from urllib.parse import urlparse -from azure.storage.blob import BlobServiceClient +import azure.core.exceptions as ae +from azure.storage.blob import ContainerClient from twindb_backup import LOG from twindb_backup.destination.base_destination import BaseDestination -from twindb_backup.destination.exceptions import FileNotFound, S3DestinationError -from twindb_backup.exceptions import OperationError - -""" -DEFAULT VALUES SECTION -""" - - -class AZFileAccess(object): # pylint: disable=too-few-public-methods - """Access modes for AZ files""" - - public_read = "public-read" - private = "private" +from twindb_backup.destination.exceptions import FileNotFound class AZ(BaseDestination): - """ - AZ destination class. + """Azure Blob Storage Destination class""" + + def __init__( + self, + container_name: str, + connection_string: str, + hostname: str = socket.gethostname(), + chunk_size: int = 4 * 1024 * 1024, # TODO: Add support for chunk size + remote_path: str = "/", # TODO: Add support for remote path input + ) -> None: + """Creates an instance of the Azure Blob Storage Destination class, + initializes the ContainerClient and validates the connection settings + + Args: + container_name (str): Name of the container in the Azure storage account + connection_string (str): Connection string for the Azure storage account + hostname (str, optional): Hostname of the host performing the backup. Defaults to socket.gethostname(). + chunk_size (int, optional): Size in bytes for read/write streams. Defaults to 4*1024*1024. + + Raises: + err: Raises an error if the client cannot be initialized + """ - :param kwargs: Keyword arguments. + self._container_name = container_name + self._connection_string = connection_string + self._hostname = hostname + self._chunk_size = chunk_size + self._remote_path = remote_path + super(AZ, self).__init__(self._remote_path) - * **container_name** - Azure container name - * **connection_string** - Azure connection string for the storage account - * **hostname** - Hostname of a host where a backup is taken from. - * **chunk_size** - The size in byptes for read/write streams, default 4MB - """ + self._container_client = self._connect() - def __init__(self, **kwargs): + def _connect(self) -> ContainerClient: + """Connects to an Azure Storage Account and initializes a ContainerClient, + ensures the container exists, creating one if not found - self._container_name = kwargs.get("container_name") - self._connection_string = kwargs.get("connection_string") - self._hostname = kwargs.get("hostname", socket.gethostname()) - self._chunk_size = kwargs.get("chunk_size", 4 * 1024 * 1024) + Raises: + err: Returns an error if the connection string is invalid or we failed to validate the container - self.remote_path = "/" - super(AZ, self).__init__(self.remote_path) + Returns: + ContainerClient: An initialized ContainerClient + """ + client: ContainerClient = None + + # Create the container client - validates connection string format try: - LOG.debug( - "Initilizing Azure connection to the storage account using connection string (length=" - + str(len(self._connection_string)) - + ")" - ) - self.service_client = BlobServiceClient.from_connection_string(self._connection_string) - except builtins.Exception as err: - # TODO: add more specific exception handling - LOG.error("Failed to connect to Azure storage account using the connection string") + client = ContainerClient.from_connection_string(self._connection_string, self._container_name) + except builtins.ValueError as err: + LOG.error(f"Failed to create Azure Client. Error: {type(err).__name__}, Reason: {err}") raise err - # Check to see if the container exists, otherwise create the container + # Check if the container exists, if not, create it try: - LOG.debug("Setting up the container(" + self._container_name + ") connection") - self.client = self.service_client.get_container_client(self._container_name) - except builtins.Exception: - LOG.debug("The container(" + self._container_name + ") does not exist... creating it") - self.create_container() - - def bucket(self): - """S3 bucket name.... compatibility???""" - return self._container_name - - def create_bucket(self): - """Compatibility.""" - return create_container(self) + if not client.exists(): + client.create_container() + except builtins.Exception as err: + LOG.error(f"Failed to validate or create container. Error: {type(err).__name__}, Reason: {err}") + raise err - def create_container(self): - """Creates the container in the Azure storage account that will store the backups.""" - container_exists = True + return client - try: - self.client = self.service_client.create_container(self._container_name) - except ClientError as err: - # We come here meaning we did not find the container - raise + def delete(self, path: str) -> None: + """Deletes a blob from the Azure storage account's container - LOG.info("Azure container creation was successful %s", self._container_name) - return True + Args: + path (str): Relative path to the blob in the container to delete - def list_files(self, prefix=None, recursive=False, pattern=None, files_only=False): - """ - List files in the destination that have common prefix. + Raises: + err: Raises an error if the blob failed to be deleted """ + LOG.debug("Attempting to delete blob: " + path) + try: + self._container_client.delete_blob(path) + except builtins.Exception as err: + LOG.error(f"Failed to delete blob {path}. Error: {type(err).__name__}, Reason: {err}") + raise err - files = [] - LOG.debug("AZ Listing files") - for blob in self.client.list_blobs(): - if pattern: - if re.search(pattern, blob.name): - files.append(blob.name) - else: - files.append(blob.name) + @contextmanager + def get_stream(self, copy): + """Streams a blob from Azure Blob Storage into a pipe - return sorted(files) + Args: + copy (BaseCopy): A copy object to stream from Azure - def _list_files(self, prefix=None, recursive=False, files_only=False): - raise NotImplementedError + Yields: + T.Generator(T.BinaryIO): A generator that yields a stream of the blob's content + """ - def _uplaod_blob_options(self, **kwargs): - return True + LOG.debug("Attempting to stream blob: " + copy.key) + pipe_in, pipe_out = os.pipe() + path = f"{self._remote_path}/{copy.key}" - def delete(self, path): - """ - Deletes a Azure blob in the container - """ + def _download_to_pipe(blob_key: str, pipe_in: int, pipe_out: int) -> None: + os.close(pipe_in) + with os.fdopen(pipe_out, "wb") as pipe_out_file: + self._container_client.download_blob(blob_key).readinto(pipe_out_file) + proc = Process(target=_download_to_pipe, args=(path, pipe_in, pipe_out)) + proc.start() + os.close(pipe_out) try: - LOG.info("Deleting blob: " + path) - return self.client.delete_blob(path) - except builtins.Exception as err: - # TODO: add more specific exception handling - LOG.error("FAILED to delete blob: " + path) - raise err - - def delete_all_objects(self): - """ - Delete all blobs in the container - """ - LOG.info("Deleting ALL blobs in container: " + self._container_name) - for blob in self.ls(): - self.delete(blob) + with os.fdopen(pipe_in, "rb") as pipe_in_file: + yield pipe_in_file + finally: + proc.join() - return True + def read(self, filepath: str) -> bytes: + """Read content of a file path from Azure Blob Storage - def delete_bucket(self, force=False): - """ - Delete the container and contents, this is a recursive delete (including all blobs in the container) - """ - try: - LOG.info("Performing recusrsive delete of container and all blobs in container: " + self._container_name) - self.client.delete_container() - except builtins.Exception as err: - # TODO: add more specific exception handling - raise err + Args: + filepath (str): Relative path to a blob in the container - return True + Raises: + err: Raises an error if the blob failed to be read or it does not exist - def read(self, filepath): - """ - Read the status blob (filepath) and return contents to the caller + Returns: + bytes: Content of the blob """ + LOG.debug("Attempting to read blob: " + filepath) try: - LOG.debug("Attempting to read blob: " + filepath) - blob_client = self.client.get_blob_client(filepath) - return blob_client.download_blob().readall().decode("utf-8") - + return self._container_client.download_blob(filepath, encoding="utf-8").read() + except ae.ResourceNotFoundError as err: + LOG.debug(f"File {filepath} does not exist in container {self._container_name}") + raise FileNotFound(f"File {filepath} does not exist in container {self._container_name}") except builtins.Exception as err: - # TODO: add more specific exception handling - LOG.info("The blob " + filepath + " does not exist or there was an issue reading it") + LOG.error(f"Failed to read blob {filepath}. Error: {type(err).__name__}, Reason: {err}") raise err - def save(self, handler, filepath): - """ - Read from handler and save it to Azure blob + def save(self, handler: T.BinaryIO, filepath: str) -> None: + """Save a stream given as handler to filepath in Azure Blob Storage - :param filepath: save backup copy in a file with this name - :param handler: stdout handler from backup source - """ + Args: + handler (T.BinaryIO): Incoming stream + filepath (str): Relative path to a blob in the container - LOG.debug("Attempting to write blob: " + filepath) - blob_client = self.client.get_blob_client(filepath) + Raises: + err: Raises an error if the blob failed to be written + """ + LOG.debug("Attempting to save blob: " + filepath) with handler as file_obj: try: - blob_client.upload_blob(file_obj) - + self._container_client.upload_blob(filepath, file_obj) except builtins.Exception as err: - # TODO: add more specific exception handling - LOG.info("The blob " + filepath + " already exists, no need to upload (ignoring)") + LOG.error(f"Failed to upload blob or it already exists. Error {type(err).__name__}, Reason: {err}") raise err - @contextmanager - def get_stream(self, copy): - """ - Get a PIPE handler with content of the backup copy streamed from - the destination. - :param copy: Backup copy - :type copy: BaseCopy - :return: Stream with backup copy - :rtype: generator - :raise : if failed to stream a backup copy. - """ + def write(self, content: str, filepath: str) -> None: + """Write content to filepath in Azure Blob Storage - path = "%s/%s" % (self.remote_path, copy.key) - object_key = urlparse(path).path.lstrip("/") - - def _download_object(self, path, read_fd, write_fd): - # The read end of the pipe must be closed in the child process - # before we start writing to it. - os.close(read_fd) - - # twindb appears to be munging the actual path of the objects as opposed to - # using the list of the valid object paths ... wtf? - # anyway... let's decompile it, grab the host and the actual file name - # then do some matching based on what really exists :P - LOG.debug("Transforming requested restore path: " + path) - exploded_path = path.split("/") - host = exploded_path[1] # first element, the call path begins with / - file = exploded_path[len(exploded_path) - 1] # last element - path = "" - for blob in self.list_files(pattern=file): - if re.search(host, blob): - path = blob - - LOG.debug("Tranformed path to match existing blob: " + path) - - blob_client = self.client.get_blob_client(path) - with os.fdopen(write_fd, "wb") as w_pipe: - try: - retry_interval = 2 - for _ in range(10): - try: - w_pipe.write(blob_client.download_blob().readall()) - except builtins.Exception as err: - # TODO: add more specific exception handling - LOG.error(f"Failed to download and write blob {path} encountered error: {err}") - raise err - - except IOError as err: - LOG.error(err) - raise err - - except builtins.Exception as err: - # TODO: add more specific exception handling - raise err - - download_proc = None - try: - blob_client = self.client.get_blob_client(path) - LOG.debug("Fetching blob %s from container %s", path, self._container_name) - - read_pipe, write_pipe = os.pipe() - - download_proc = Process( - target=_download_object, - args=( - self, - path, - read_pipe, - write_pipe, - ), - name="_download_object", - ) - download_proc.start() + Args: + content (str): Content to write to blob + filepath (str): Relative path to a blob in the container - # The write end of the pipe must be closed in this process before - # we start reading from it. - os.close(write_pipe) - LOG.debug("read_pipe type: %s", type(read_pipe)) - yield read_pipe + Raises: + err: Raises an error if the blob failed to be written + """ - os.close(read_pipe) - download_proc.join() + LOG.debug("Attempting to write blob: " + filepath) + try: + self._container_client.upload_blob(filepath, content, overwrite=True) + except builtins.Exception as err: + LOG.error(f"Failed to upload or overwrite blob. Error {type(err).__name__}, Reason: {err}") + raise err - if download_proc.exitcode: - LOG.error("Failed to download %s", path) - # exit(1) + def _list_files(self, prefix: str = "", recursive: bool = False, files_only: bool = False) -> T.List[str]: + """List files in the Azure Blob Storage container - LOG.debug("Successfully streamed %s", path) + Args: + prefix (str, optional): Filters blobs by a given prefix. Defaults to "". + recursive (bool, optional): Not supported. Defaults to False. + files_only (bool, optional): Excludes directories when true, + otherwise includes files and directories. Defaults to False. + """ + LOG.debug( + f"""Listing files in container {self._container_name} with prefix={prefix}, + recursive={recursive}, files_only={files_only}""" + ) - finally: - if download_proc: - download_proc.join() + try: + blobs = self._container_client.list_blobs(name_starts_with=prefix, include=["metadata"]) + except builtins.Exception as err: + LOG.error( + f"Failed to list files in container {self._container_name}. Error: {type(err).__name__}, Reason: {err}" + ) + raise err - def write(self, content, filepath): - LOG.debug("Overwriting status file: " + filepath) - blob_client = self.client.get_blob_client(filepath) - blob_client.upload_blob(content, overwrite=True) + return [ + blob.name + for blob in blobs + if not files_only + or not (bool(blob.get("metadata")) and blob.get("metadata", {}).get("hdi_isfolder") == "true") + ] diff --git a/vagrant/environment/puppet/modules/profile/files/twindb-backup.cfg b/vagrant/environment/puppet/modules/profile/files/twindb-backup.cfg index 10a75511..dba2e40b 100644 --- a/vagrant/environment/puppet/modules/profile/files/twindb-backup.cfg +++ b/vagrant/environment/puppet/modules/profile/files/twindb-backup.cfg @@ -22,6 +22,11 @@ AWS_SECRET_ACCESS_KEY="YYYYY" AWS_DEFAULT_REGION="us-east-1" BUCKET="twindb-backups" +# Azure destination settings +[az] +connection_string="DefaultEndpointsProtocol=https;AccountName=ACCOUNT_NAME;AccountKey=ACCOUNT_KEY;EndpointSuffix=core.windows.net" +container_name="twindb-backups" + # GCS destination settings [gcs] GC_CREDENTIALS_FILE=/twindb_backup/env/My Project 17339-bbbc43d1bee3.json From 99fdf40ba3ca1f93e59b318b6ae20882244886ce Mon Sep 17 00:00:00 2001 From: Jsalz2000 Date: Thu, 29 Aug 2024 22:31:23 -0500 Subject: [PATCH 04/10] Adds support for remote_path config option --- docs/usage.rst | 1 + support/twindb-backup.cfg | 1 + twindb_backup/configuration/__init__.py | 1 + .../configuration/destinations/az.py | 16 +++++-- twindb_backup/destination/az.py | 45 ++++++++++++------- .../modules/profile/files/twindb-backup.cfg | 1 + 6 files changed, 45 insertions(+), 20 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index c823eb08..4fdd6fa9 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -100,6 +100,7 @@ In the ``[az]`` section you specify Azure credentials as well as Azure Blob Stor connection_string = "DefaultEndpointsProtocol=https;AccountName=ACCOUNT_NAME;AccountKey=ACCOUNT_KEY;EndpointSuffix=core.windows.net" container_name = twindb-backups + remote_path = /backups/mysql # optional Google Cloud Storage diff --git a/support/twindb-backup.cfg b/support/twindb-backup.cfg index 16ab0caf..e2f9b0e5 100644 --- a/support/twindb-backup.cfg +++ b/support/twindb-backup.cfg @@ -37,6 +37,7 @@ BUCKET=twindb-backups connection_string="DefaultEndpointsProtocol=https;AccountName=ACCOUNT_NAME;AccountKey=ACCOUNT_KEY;EndpointSuffix=core.windows.net" container_name=twindb-backups +#remote_path = /backups/mysql # optional [gcs] diff --git a/twindb_backup/configuration/__init__.py b/twindb_backup/configuration/__init__.py index bd9ed2d1..7cd760d6 100644 --- a/twindb_backup/configuration/__init__.py +++ b/twindb_backup/configuration/__init__.py @@ -257,6 +257,7 @@ def destination(self, backup_source=socket.gethostname()): connection_string=self.az.connection_string, container_name=self.az.container_name, chunk_size=self.az.chunk_size, + remote_path=self.az.remote_path, hostname=backup_source, ) else: diff --git a/twindb_backup/configuration/destinations/az.py b/twindb_backup/configuration/destinations/az.py index 583c4da5..00cafcca 100644 --- a/twindb_backup/configuration/destinations/az.py +++ b/twindb_backup/configuration/destinations/az.py @@ -4,23 +4,31 @@ class AZConfig: """Azure Blob Storage Configuration.""" - def __init__(self, connection_string, container_name, chunk_size=1024 * 1024 * 4): # 4MiB + def __init__( + self, connection_string: str, container_name: str, chunk_size: int = 1024 * 1024 * 4, remote_path: str = "/" + ): self._connection_string = connection_string self._container_name = container_name self._chunk_size = chunk_size + self._remote_path = remote_path @property - def connection_string(self): + def connection_string(self) -> str: """CONNECTION_STRING""" return self._connection_string @property - def container_name(self): + def container_name(self) -> str: """CONTAINER_NAME""" return self._container_name @property - def chunk_size(self): + def chunk_size(self) -> int: """CHUNK_SIZE""" return self._chunk_size + + @property + def remote_path(self) -> str: + """REMOTE_PATH""" + return self._remote_path diff --git a/twindb_backup/destination/az.py b/twindb_backup/destination/az.py index c7363baa..afb857cb 100644 --- a/twindb_backup/destination/az.py +++ b/twindb_backup/destination/az.py @@ -26,7 +26,7 @@ def __init__( connection_string: str, hostname: str = socket.gethostname(), chunk_size: int = 4 * 1024 * 1024, # TODO: Add support for chunk size - remote_path: str = "/", # TODO: Add support for remote path input + remote_path: str = "/", ) -> None: """Creates an instance of the Azure Blob Storage Destination class, initializes the ContainerClient and validates the connection settings @@ -50,6 +50,9 @@ def __init__( self._container_client = self._connect() + """HELPER FUNCTIONS + """ + def _connect(self) -> ContainerClient: """Connects to an Azure Storage Account and initializes a ContainerClient, ensures the container exists, creating one if not found @@ -80,6 +83,17 @@ def _connect(self) -> ContainerClient: return client + def render_path(self, path: str) -> str: + """Renders the absolute path for the Azure Blob Storage Destination + + Returns: + str: Absolute path to the blob in the container + """ + return f"{self._remote_path}/{path}" + + """BaseDestination ABSTRACT METHODS IMPLEMENTATION + """ + def delete(self, path: str) -> None: """Deletes a blob from the Azure storage account's container @@ -89,11 +103,11 @@ def delete(self, path: str) -> None: Raises: err: Raises an error if the blob failed to be deleted """ - LOG.debug("Attempting to delete blob: " + path) + LOG.debug(f"Attempting to delete blob: {self.render_path(path)}") try: - self._container_client.delete_blob(path) + self._container_client.delete_blob(self.render_path(path)) except builtins.Exception as err: - LOG.error(f"Failed to delete blob {path}. Error: {type(err).__name__}, Reason: {err}") + LOG.error(f"Failed to delete blob {self.render_path(path)}. Error: {type(err).__name__}, Reason: {err}") raise err @contextmanager @@ -107,16 +121,15 @@ def get_stream(self, copy): T.Generator(T.BinaryIO): A generator that yields a stream of the blob's content """ - LOG.debug("Attempting to stream blob: " + copy.key) + LOG.debug(f"Attempting to stream blob: {self.render_path(copy.key)}") pipe_in, pipe_out = os.pipe() - path = f"{self._remote_path}/{copy.key}" def _download_to_pipe(blob_key: str, pipe_in: int, pipe_out: int) -> None: os.close(pipe_in) with os.fdopen(pipe_out, "wb") as pipe_out_file: self._container_client.download_blob(blob_key).readinto(pipe_out_file) - proc = Process(target=_download_to_pipe, args=(path, pipe_in, pipe_out)) + proc = Process(target=_download_to_pipe, args=(self.render_path(copy.key), pipe_in, pipe_out)) proc.start() os.close(pipe_out) try: @@ -137,14 +150,14 @@ def read(self, filepath: str) -> bytes: Returns: bytes: Content of the blob """ - LOG.debug("Attempting to read blob: " + filepath) + LOG.debug(f"Attempting to read blob: {self.render_path(filepath)}") try: - return self._container_client.download_blob(filepath, encoding="utf-8").read() + return self._container_client.download_blob(self.render_path(filepath), encoding="utf-8").read() except ae.ResourceNotFoundError as err: - LOG.debug(f"File {filepath} does not exist in container {self._container_name}") - raise FileNotFound(f"File {filepath} does not exist in container {self._container_name}") + LOG.debug(f"File {self.render_path(filepath)} does not exist in container {self._container_name}") + raise FileNotFound(f"File {self.render_path(filepath)} does not exist in container {self._container_name}") except builtins.Exception as err: - LOG.error(f"Failed to read blob {filepath}. Error: {type(err).__name__}, Reason: {err}") + LOG.error(f"Failed to read blob {self.render_path(filepath)}. Error: {type(err).__name__}, Reason: {err}") raise err def save(self, handler: T.BinaryIO, filepath: str) -> None: @@ -158,10 +171,10 @@ def save(self, handler: T.BinaryIO, filepath: str) -> None: err: Raises an error if the blob failed to be written """ - LOG.debug("Attempting to save blob: " + filepath) + LOG.debug(f"Attempting to save blob: {self.render_path(filepath)}") with handler as file_obj: try: - self._container_client.upload_blob(filepath, file_obj) + self._container_client.upload_blob(self.render_path(filepath), file_obj) except builtins.Exception as err: LOG.error(f"Failed to upload blob or it already exists. Error {type(err).__name__}, Reason: {err}") raise err @@ -177,9 +190,9 @@ def write(self, content: str, filepath: str) -> None: err: Raises an error if the blob failed to be written """ - LOG.debug("Attempting to write blob: " + filepath) + LOG.debug(f"Attempting to write blob: {self.render_path(filepath)}") try: - self._container_client.upload_blob(filepath, content, overwrite=True) + self._container_client.upload_blob(self.render_path(filepath), content, overwrite=True) except builtins.Exception as err: LOG.error(f"Failed to upload or overwrite blob. Error {type(err).__name__}, Reason: {err}") raise err diff --git a/vagrant/environment/puppet/modules/profile/files/twindb-backup.cfg b/vagrant/environment/puppet/modules/profile/files/twindb-backup.cfg index dba2e40b..e7f5c697 100644 --- a/vagrant/environment/puppet/modules/profile/files/twindb-backup.cfg +++ b/vagrant/environment/puppet/modules/profile/files/twindb-backup.cfg @@ -26,6 +26,7 @@ BUCKET="twindb-backups" [az] connection_string="DefaultEndpointsProtocol=https;AccountName=ACCOUNT_NAME;AccountKey=ACCOUNT_KEY;EndpointSuffix=core.windows.net" container_name="twindb-backups" +#remote_path = /backups/mysql # optional # GCS destination settings [gcs] From b3aaafe8bcbef0bd2c5461f51512d8e7548a5089 Mon Sep 17 00:00:00 2001 From: Jsalz2000 Date: Thu, 29 Aug 2024 22:59:40 -0500 Subject: [PATCH 05/10] Removes old azure unit tests --- Makefile | 6 - tests/unit/conftest.py | 5 + tests/unittests/azblob_testing/__init__.py | 47 - .../test_azblob_functionality.py | 1189 ----------------- .../test_remote_data_generation.py | 88 -- tests/unittests/excluded_env_config/README.md | 120 -- .../excluded_env_config/_dummy_content.json | 5 - .../build_out_dummy_env.py | 418 ------ .../dummy_content_generation/__init__.py | 40 - .../cave_generation.py | 50 - .../cave_painting_desc.py | 29 - .../dummy_content_generation/dino_namegen.py | 389 ------ .../dummy_content_generation/no_dupes.py | 14 - .../dummy_env_vars.json.template | 40 - tox.ini | 7 +- twindb_backup/__init__.py | 4 +- 16 files changed, 8 insertions(+), 2443 deletions(-) delete mode 100644 tests/unittests/azblob_testing/__init__.py delete mode 100644 tests/unittests/azblob_testing/destination_tests/test_azblob_functionality.py delete mode 100644 tests/unittests/azblob_testing/environment_access_tests/test_remote_data_generation.py delete mode 100644 tests/unittests/excluded_env_config/README.md delete mode 100644 tests/unittests/excluded_env_config/_dummy_content.json delete mode 100644 tests/unittests/excluded_env_config/build_out_dummy_env.py delete mode 100644 tests/unittests/excluded_env_config/dummy_content_generation/__init__.py delete mode 100644 tests/unittests/excluded_env_config/dummy_content_generation/cave_generation.py delete mode 100644 tests/unittests/excluded_env_config/dummy_content_generation/cave_painting_desc.py delete mode 100644 tests/unittests/excluded_env_config/dummy_content_generation/dino_namegen.py delete mode 100644 tests/unittests/excluded_env_config/dummy_content_generation/no_dupes.py delete mode 100644 tests/unittests/excluded_env_config/dummy_env_vars.json.template diff --git a/Makefile b/Makefile index 982a72ca..b4b4ab83 100644 --- a/Makefile +++ b/Makefile @@ -117,12 +117,6 @@ lint: ## check style with pylint test: ## Run tests quickly with the default Python and generate code coverage report pytest -xv --cov-report term-missing --cov-report xml --cov=./twindb_backup tests/unit -test-including-azure-blob: ## Like 'make test' but includes tests for azure blob destination - coverage run --source=twindb_backup -m pytest -xv tests/unit - coverage run -a --source=twindb_backup -m unittest -cvf --locals tests/unittests/azblob_testing/environment_access_tests/test_remote_data_generation.py - coverage run -a --source=twindb_backup -m unittest -cvf --locals tests/unittests/azblob_testing/destination_tests/test_AzureBlob_functions.py - coverage report - test-integration: ## Run integration tests. Must be run in vagrant py.test -xsv tests/integration/ diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 300f368f..9c2aaea9 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -28,6 +28,11 @@ def config_content(): AWS_DEFAULT_REGION="us-east-1" BUCKET="twindb-backups" +[az] +connection_string="DefaultEndpointsProtocol=https;AccountName=ACCOUNT_NAME;AccountKey=ACCOUNT_KEY;EndpointSuffix=core.windows.net" +container_name="twindb-backups" +remote_path="/backups/mysql" + [gcs] GC_CREDENTIALS_FILE="XXXXX" GC_ENCRYPTION_KEY= diff --git a/tests/unittests/azblob_testing/__init__.py b/tests/unittests/azblob_testing/__init__.py deleted file mode 100644 index e448ab72..00000000 --- a/tests/unittests/azblob_testing/__init__.py +++ /dev/null @@ -1,47 +0,0 @@ -from pathlib import Path - -HERE = Path(__file__).parent - - -def do_set_osenvs(setter_func): - here = Path(HERE) - target_dummies = None - while target_dummies is None and here.name: - if "dummy_env_vars.json" not in here.iterdir(): - here = here.parent - else: - target_dummies = str(here.joinpath("dummy_env_vars.json")) - setter_func(target_dummies) - - -PART_NAMES = "store,host,container,interval,media_type,fname".split(",") -SAMPLE_TARGETS = [ - "../../../.../.../mysql-2020-07-29_01_00_03.xbstream.gz", - "../../mysql/some/extra/dirs/mysql-2020-07-29_01_00_03.xbstream.gz", - "../../../../../mysql-fullbackup-qa1-rms", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_03_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_04_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_05_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_06_00_04.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_07_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_08_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_09_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_10_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_11_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_12_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_13_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_14_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_15_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_16_00_04.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_17_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_18_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_19_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_20_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_21_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_22_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-28_23_00_03.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-29_00_05_13.xbstream.gz", - "s3://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-29_01_00_03.xbstream.gz", - "azure://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-29_01_00_03.xbstream.gz", - "azure://{host}/mysql-fullbackup-qa1-rms/hourly/mysql/mysql-2020-07-29_01_00_03.xbstream.gz", -] diff --git a/tests/unittests/azblob_testing/destination_tests/test_azblob_functionality.py b/tests/unittests/azblob_testing/destination_tests/test_azblob_functionality.py deleted file mode 100644 index 0f8d8096..00000000 --- a/tests/unittests/azblob_testing/destination_tests/test_azblob_functionality.py +++ /dev/null @@ -1,1189 +0,0 @@ -import io -import logging -import os -import sys -import time -import types -import unittest -from contextlib import contextmanager -from pathlib import Path -from typing import Dict, List, Optional, Tuple - -# third-party imports -import coverage -from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError - -# azure imports (also a third-party import) ;) -from azure.storage.blob import BlobClient, BlobProperties, ContainerClient -from azure.storage.blob._shared.response_handlers import PartialBatchErrorException - -from tests.unittests.azblob_testing import PART_NAMES, SAMPLE_TARGETS, do_set_osenvs - -# local project imports -from twindb_backup import LOG - -DO_TEST_SKIPPING = False - - -def get_root(path: Path, dir_name: str): - if path.name and path.name == dir_name: - return path - return get_root(path.parent, dir_name) - - -def handle_coverage(): - root = get_root(Path(__file__).parent, "backup") - - @contextmanager - def cover_ctx(): - cov = coverage.Coverage(data_file=str(root.joinpath("cov/.coverage"))) - cov.start() - try: - yield - finally: - cov.stop() - cov.save() - cov.html_report() - - return cover_ctx - - -test_function_logger = LOG -test_function_logger.setLevel(0) - - -class AzureBlobBaseCase(unittest.TestCase): - """No direct tests provided here. This class defines basic setup of testing resources which subclasses will need.""" - - @staticmethod - def _reproduce_potential_import_error(err: ImportError, msg): - def repeatable_raiser(*args, **kwargs): - nonlocal err - try: - raise ImportError(msg) from err - except ImportError as ie: - # creation of trimmed traceback inspired by the stack-overflow answer found here: - # https://stackoverflow.com/a/58821552/7412747 - tb = sys.exc_info()[2] - back_frame = tb.tb_frame.f_back - back_tb = types.TracebackType( - tb_next=None, - tb_frame=back_frame, - tb_lasti=back_frame.f_lasti, - tb_lineno=back_frame.f_lineno, - ) - immediate_err = ie.with_traceback(back_tb) - raise immediate_err - - return repeatable_raiser - - # noinspection PyUnresolvedReferences - @classmethod - def setUpClass(cls) -> None: - """Provides a class level function that will only be run one time - when this TestCase instance is first initialized.""" - try: - from twindb_backup.destination.azblob import AzureBlob - except ImportError as ie: - msg = f"Attempted use of object twindb_backup.destination.azblob.AzureBlob failed due to import error" - AzureBlob = cls._reproduce_potential_import_error(ie, msg) - try: - from twindb_backup.destination.azblob import logger - - # during testing it would be nice to see all console log output (if any). - logger.setLevel(0) - except ImportError as ie: - pass - if "PRIMARY_TEST_CONN_STR" not in os.environ: - from tests.unittests.excluded_env_config.build_out_dummy_env import set_osenvs - - logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(logging.WARNING) - do_set_osenvs(set_osenvs) - conn_str = os.environ["PRIMARY_TEST_CONN_STR"] - conn_str_split = conn_str.split(";") - conn_str_parts = {} - for part in conn_str_split: - try: - k, v = [s for s in part.split("=", 1)] - conn_str_parts[k] = v - except ValueError as ve: - obj = [v for v in part.split("=", 1)] - k, v = obj - ve.args += part, obj, len(obj), k, v - raise ve - # conn_str_parts = {k:v for part in conn_str.split(";") for k,v in part.split("=",1)} - cls._connection_string = conn_str - cls._remote_protocol = conn_str_parts["DefaultEndpointsProtocol"] - remote_host = cls._remote_host = f'{conn_str_parts["AccountName"]}.blob.{conn_str_parts["EndpointSuffix"]}' - cls._remote_path_template = os.environ["TEST_COMPLETE_REMOTE_PATH_TEMPLATE"] - cls._AzureBlob = AzureBlob - sample_resources = Path(__file__).resolve().parent - while not any("sample_resources" in str(p) for p in sample_resources.iterdir()): - sample_resources = sample_resources.parent - cls._sample_resource_folder = sample_resources.joinpath("sample_resources") - sample_targets = cls._sample_targets = SAMPLE_TARGETS - for i in range(len(sample_targets)): - sample_targets[i] = sample_targets[i].format(host=remote_host) - cls._part_names = PART_NAMES - cls._arg_names = [ - "default_protocol", - "default_host_name", - "default_container_name", - "default_interval", - "default_media_type", - "default_fname_prefix", - ] - cls._blank_parts = { - "default_protocol": "", - "default_host_name": "", - "default_container_name": "", - "default_interval": "", - "default_media_type": "", - "default_fname_prefix": "", - } - cls._none_parts = { - "default_protocol": None, - "default_host_name": None, - "default_container_name": None, - "default_interval": None, - "default_media_type": None, - "default_fname_prefix": None, - } - cls._basic_src_dst_kwargs = { - "protocol": "https", - "host_name": f"{remote_host}", - "container_name": "{default_container_name}", - "interval": "{default_interval}", - "media_type": "mysql", - "fname_prefix": "{default_fname_prefix}", - } - cls._container_names: Optional[str] = None - # For clarification on the following class members and their structure, - # see their associated properties defined below. - cls._basic_remote_dest_path: Optional[str] = None - cls._basic_remote_source_path: Optional[str] = None - cls._complex_invalid_remote_paths: Optional[List[Tuple[str, Dict[str, str], Dict[str, str]]]] = None - cls._complex_valid_remote_paths: Optional[Dict[str, List[Tuple[str, Dict[str, str], Dict[str, str]]]]] = None - cls._easy_container_names_map: Optional[Dict[str, str]] = None - cls._simple_valid_remote_paths: Optional[List[Tuple[str, Dict[str, str], Dict[str, str]]]] = None - cls._simple_valid_save_source_paths: Optional[List[str]] = None - cls._structured_parts: Optional[Dict[str, Dict[str, str]]] = None - cls._unique_backup_locations: Optional[Tuple[str]] = None - - @property - def AzureBlob(self): - return self._AzureBlob - - @property - def basic_remote_source_path(self): - if not self._basic_remote_source_path: - self._basic_remote_source_path = self._remote_path_template[: -len("{fname}")].format( - **self._basic_src_dst_kwargs - ) - return self._basic_remote_source_path - - @property - def basic_remote_dest_path(self): - if not self._basic_remote_dest_path: - self._basic_remote_dest_path = self._remote_path_template[: -len("{fname}")].format( - **self._basic_src_dst_kwargs - ) - return self._basic_remote_dest_path - - @property - def complex_valid_remote_paths(self): - if not self._complex_invalid_remote_paths: - # create mutable_parts as a deep copy of structured_parts - mutable_parts = {k: {kk: vv for kk, vv in v.items()} for k, v in self.structured_parts.items()} - path_template = "{default_protocol}://{default_host_name}/{default_container_name}/{default_interval}/{default_media_type}/{default_fname_prefix}" - self._complex_valid_remote_paths = { - "sub_all": [ - ( - "", - {k: v for k, v in mutable_parts[name].items()}, - self.structured_parts[name], - ) - for name in mutable_parts - ] - } - split_point = len("default_") - # build out a suite of test inputs that have part-wise substitution changes marked - for part in self._arg_names: - # marks the part to flag for substitution - [mutable_parts[name].update({part: "..."}) for name in mutable_parts] - _part = part[split_point:] - sub_part = f"sub_{_part}" - self._complex_valid_remote_paths[sub_part] = [ - ( - path_template.format(**args_d), - {part: self.structured_parts[name][part]}, - self.structured_parts[name], - ) - for name, args_d in mutable_parts.items() - ] - # reset the flagged part with its original value in preparation for next loop. - [mutable_parts[name].update({part: self.structured_parts[name][part]}) for name in mutable_parts] - return self._complex_valid_remote_paths - - @property - def complex_invalid_remote_paths(self): - if not self._complex_invalid_remote_paths: - blank_parts = self._blank_parts - none_parts = self._none_parts - self._complex_invalid_remote_paths = [ - # (f"azure://{cls._remote_host}/barney-of-buffalo-lodge/hourly/mysql/",{}), - ("../../../hourly/mysql/", {}), - ("../../../hourly/mysql/", blank_parts), - ("../../../hourly/mysql/", none_parts), - ( - f"../../../https://{self._remote_host}/{self._structured_parts['wilma']['default_container_name']}/hourly/mysql/", - {}, - ), - ( - f"../../../https://{self._remote_host}/{self._structured_parts['wilma']['default_container_name']}/hourly/mysql/", - blank_parts, - ), - ( - f"../../../https://{self._remote_host}/{self._structured_parts['wilma']['default_container_name']}/hourly/mysql/", - none_parts, - ), - # (f"https://{cls._remote_host}/wilma-of-impossibly-good-figure/daily/mysql/",{}), - (f"https://{self._remote_host}/.../daily/mysql/", {}), - (f"https://{self._remote_host}/.../daily/mysql/", blank_parts), - (f"https://{self._remote_host}/.../daily/mysql/", none_parts), - # (f"azure://{cls._remote_host}/betty-of-impossibly-good-figure/weekly/mysql/",{}), - (f"azure://{self._remote_host}/.../", {}), - (f"azure://{self._remote_host}/.../", blank_parts), - (f"azure://{self._remote_host}/.../", none_parts), - # (f"https://{cls._remote_host}/fred-of-buffalo-lodge/monthly/mysql/",{}), - (f"https://{self._remote_host}/", {}), - (f"https://{self._remote_host}/", blank_parts), - (f"https://{self._remote_host}/", none_parts), - ] - return self._complex_invalid_remote_paths - - @property - def connection_string(self): - return self._connection_string - - @property - def container_names(self): - if not self._container_names: - self._container_names = os.environ["TEST_CONTAINER_NAMES"].split(";") - self._container_names.extend( - "save-function-test,write-function-test,delete-function-test,combo-all-flintstones".split(",") - ) - return self._container_names - - @property - def easy_container_names(self): - if not self._easy_container_names_map: - self._easy_container_names_map = {v.split("-")[0]: v for v in self.container_names} - return self._easy_container_names_map - - @property - def part_names(self): - return self._part_names - - @property - def remote_path_template(self): - return self._remote_path_template - - @property - def sample_targets(self): - return self._sample_targets - - @property - def simple_valid_remote_paths(self): - if not self._simple_valid_remote_paths: - none_parts = self._none_parts - blank_parts = self._blank_parts - remote_host = self._remote_host - self._simple_valid_remote_paths = [ - ( - f"https://{remote_host}/barney-of-buffalo-lodge/hourly/mysql/backup/", - {}, - {}, - ), - ( - f"https://{remote_host}/barney-of-buffalo-lodge/hourly/mysql/backup/", - blank_parts, - {}, - ), - ( - f"https://{remote_host}/barney-of-buffalo-lodge/hourly/mysql/backup/", - none_parts, - {}, - ), - ( - f"https://{remote_host}/barney-of-buffalo-lodge/hourly/mysql/backup/", - self.structured_parts["barney"], - {}, - ), - ( - f"https://{remote_host}/wilma-of-impossibly-good-figure/daily/mysql/backup/", - {}, - {}, - ), - ( - f"https://{remote_host}/wilma-of-impossibly-good-figure/daily/mysql/backup/", - blank_parts, - {}, - ), - ( - f"https://{remote_host}/wilma-of-impossibly-good-figure/daily/mysql/backup/", - none_parts, - {}, - ), - ( - f"https://{remote_host}/wilma-of-impossibly-good-figure/daily/mysql/backup/", - self.structured_parts["wilma"], - {}, - ), - ( - f"https://{remote_host}/betty-of-impossibly-good-figure/weekly/mysql/backup/", - {}, - {}, - ), - ( - f"https://{remote_host}/betty-of-impossibly-good-figure/weekly/mysql/backup/", - blank_parts, - {}, - ), - ( - f"https://{remote_host}/betty-of-impossibly-good-figure/weekly/mysql/backup/", - none_parts, - {}, - ), - ( - f"https://{remote_host}/betty-of-impossibly-good-figure/weekly/mysql/backup/", - self.structured_parts["betty"], - {}, - ), - ( - f"https://{remote_host}/fred-of-buffalo-lodge/monthly/mysql/backup/", - {}, - {}, - ), - ( - f"https://{remote_host}/fred-of-buffalo-lodge/monthly/mysql/backup/", - blank_parts, - {}, - ), - ( - f"https://{remote_host}/fred-of-buffalo-lodge/monthly/mysql/backup/", - none_parts, - {}, - ), - ( - f"https://{remote_host}/fred-of-buffalo-lodge/monthly/mysql/backup/", - self.structured_parts["fred"], - {}, - ), - ] - for path, kwargs, out in self._simple_valid_remote_paths: - self._get_remote_parts(path, kwargs, out) - return self._simple_valid_remote_paths - - @property - def simple_valid_save_source_paths(self): - if not self._simple_valid_save_source_paths: - save_trunkate_len = len("backup/") - self._simple_valid_save_source_paths = [p[:-save_trunkate_len] for p in self.unique_backup_locations] - return self._simple_valid_save_source_paths - - @property - def structured_parts(self): - if not self._structured_parts: - remote_host = self._remote_host - self._structured_parts = { - "barney": { - "default_protocol": "https", - "default_host_name": f"{remote_host}", - "default_container_name": "barney-of-buffalo-lodge", - "default_interval": "hourly", - "default_media_type": "mysql", - "default_fname_prefix": "", - }, - "betty": { - "default_protocol": "https", - "default_host_name": f"{remote_host}", - "default_container_name": "betty-of-impossibly-good-figure", - "default_interval": "weekly", - "default_media_type": "mysql", - "default_fname_prefix": "", - }, - "wilma": { - "default_protocol": "https", - "default_host_name": f"{remote_host}", - "default_container_name": "wilma-of-impossibly-good-figure", - "default_interval": "daily", - "default_media_type": "mysql", - "default_fname_prefix": "", - }, - "fred": { - "default_protocol": "https", - "default_host_name": f"{remote_host}", - "default_container_name": "fred-of-buffalo-lodge", - "default_interval": "monthly", - "default_media_type": "mysql", - "default_fname_prefix": "", - }, - } - return self._structured_parts - - @property - def unique_backup_locations(self): - if not self._unique_backup_locations: - self._unique_backup_locations = tuple(set(p for p, _, _ in self._simple_valid_remote_paths)) - return self._unique_backup_locations - - @staticmethod - def _get_remote_parts(path: str, kwargs: dict, out: dict): - """ - "default_protocol" - "default_host_name" - "default_container_name" - "default_interval" - "default_media_type" - "default_fname_prefix" - - :param path: - :type path: - :param kwargs: - :type kwargs: - :param out: - :type out: - :return: - :rtype: - """ - path = path.rstrip("/") - _path = path - part_names = [ - "default_host_name", - "default_container_name", - "default_interval", - "default_media_type", - "default_fname_prefix", - ] - if path: - protocol, sep, path = path.partition("://") - if not path: - path = protocol - protocol = "" - out["default_protocol"] = protocol or kwargs.get("default_protocol", "") - for name in part_names[:-1]: - if not path: - break - part, _, path = path.partition("/") - if not path: - path = part - part = "" - kpart = kwargs.get(name, "") - out[name] = part or kpart - else: - name = part_names[-1] - part, _, path = path.partition("/") - kpart = kwargs.get(name, "") - out[name] = part or kpart - else: - out.update(kwargs) - - def _cleanup_remote(self): - delete_count = 0 - for kwargs in self.structured_parts.values(): - remote = self.AzureBlob( - self.basic_remote_source_path.format(**{k: v.strip(":/") for k, v in kwargs.items()}), - self.connection_string, - ) - delete_targets = [f for f in remote.list_files() if any(s in f for s in ("backup", "delete"))] - if not delete_targets: - continue - parts = [f.partition("://")[2].split("/")[1:] for f in delete_targets] - containers = [fparts[0] for fparts in parts] - full_fnames = ["/".join(fparts[1:]) for fparts in parts] - container_map = {} - for cont, fname in zip(containers, full_fnames): - container_map.setdefault(cont, []).append(fname) - containers = tuple(container_map.keys()) - with remote.connection_manager(containers) as cmanager: - cclients: list[ContainerClient] = cmanager.client - for cclient in cclients: - targets = container_map[cclient.container_name] - cclient.delete_blobs(*targets) - delete_count += len(targets) - return delete_count - - -class TC_000_ImportsTestCase(unittest.TestCase): - def test_00_successful_imports(self): - from twindb_backup.destination.azblob import AzureBlob - - def test_01_correct_os_environs(self): - from tests.unittests.excluded_env_config.build_out_dummy_env import set_osenvs - - do_set_osenvs(set_osenvs) - - -class TC_001_AzureBlobInstantiationTestCase(AzureBlobBaseCase): - def test_01_complex_valid_remote_paths(self) -> None: - expected: dict - for sub_type, sub_args in self.complex_valid_remote_paths.items(): - for remote_path, kwargs, expected in sub_args: - dest = self.AzureBlob(remote_path, self.connection_string, **kwargs) - attr: str - expected_val: str - for attr, expected_val in expected.items(): - produced_val = getattr(dest, attr) - expected_val = expected_val.strip(":/") - with self.subTest( - objective="checks if dest's computed properties match expectations, where dest is an instance of the twindb_backup.destinations.azblob.AzureBlob class", - sub_type=sub_type, - remote_path=remote_path, - kwargs=kwargs, - expected=expected, - attr=attr, - produced_val=produced_val, - expected_val=expected_val, - ): - self.assertEqual( - produced_val, - expected_val, - msg=( - f"\n\t{sub_type=}" - f"\n\t{remote_path=}" - f"\n\t{kwargs=}" - f"\n\t{expected=}" - f"\n\t{attr=}" - f"\n\t{produced_val=}" - f"\n\t{expected_val=}" - ), - ) - - def test_00_simple_valid_remote_paths(self) -> None: - expected: dict - for remote_path, kwargs, expected in self.simple_valid_remote_paths: - dest = self.AzureBlob(remote_path, self.connection_string, **kwargs) - attr: str - val: str - for attr, val in expected.items(): - with self.subTest( - objective="checks if dest's computed properties match expectations, where dest is an instance of the twindb_backup.destinations.azblob.AzureBlob class", - remote_path=remote_path, - kwargs=kwargs, - expected=expected, - attr=attr, - expected_val=val, - ): - self.assertEqual(getattr(dest, attr), val) - - -class TC_002_ListFilesTestCase(AzureBlobBaseCase): - """Tests an AzureBlob class instance's ability to produce a valid list of files when - given a relative path to some file or directory root in the same storage account as - its connection-string is associated with. - - When given an invalid path, that is incorrectly configured or asking for a file name - that doesn't exist, the correct behavior should be to return an empty list, and not - raise any errors. - - """ - - def setUp(self) -> None: - kwargs = self.structured_parts["fred"] - remote_path = self.basic_remote_source_path.format(**{k: v.strip(":/") for k, v in kwargs.items()}) - self.remote_source = self.AzureBlob(remote_path, self.connection_string) - self.expected = {} - for parent, parts_dict in self.structured_parts.items(): - self.expected[parent] = [] - container = parts_dict["default_container_name"] - fnames = os.environ[container.strip("/").replace("-", "_").upper()].split(";") - path = "{default_protocol}://{default_host_name}/{default_container_name}/{{fname}}".format(**parts_dict) - for fname in fnames: - self.expected[parent].append(path.format(fname=fname)) - non_suspend_brk = 0 - - def test_00_list_files_recursive_no_args(self): - retrieved = [ - f - for f in self.remote_source.list_files( - prefix=self.remote_source.default_container_name, - recursive=True, - files_only=True, - ) - if not f.endswith("sticker.png") - ] - expected = [name for cname, names in self.expected.items() for name in names] - for retrieved_f in retrieved: - path_f = Path(retrieved_f) - with self.subTest( - objective="confirm that retrieved_f is among our expected files list.", - retrieved_f=retrieved_f, - expected=expected, - ): - self.assertIn(retrieved_f, expected, f"\n\t{retrieved_f=}\n\t{expected=}") - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_01_list_files_prefixed(self): - dest = self.remote_source - # prefix:str=None, recursive=False, files_only=False - pref_expected = [ - (".../", 6), - ("...", 6), - (".../hourly", 0), - (".../monthly", 6), - (".../monthly/mysql", 6), - (".../monthly/mysql/does_not_exist", 0), - (".../hourly/mysql", 0), - (".../.../does_not_exist", 0), - (".../hourly/does_not_exist", 0), - (".../monthly/does_not_exist", 0), - (".../.../mysql", 6), - ("barney-of-buffalo-lodge", 6), - ("barney*/", 6), - ("barney-of-buffalo-lodge/hourly/mysql", 6), - ] - tf_patterns = [ - (False, False), - (True, False), - (True, True), - (False, True), - ] - testable_prefixes = [ - ( - dict(prefix=prefix, recursive=recursive, files_only=files_only), - expected_res_len, - ) - for prefix, expected_res_len in pref_expected - for recursive, files_only in tf_patterns - ] - for _kwargs, expected_ret_len in testable_prefixes: - retrieved = dest.list_files(**_kwargs) - ret_str = "\n\t\t".join(retrieved) - ret_len = len(retrieved) - kwarg_str = "\n\t\t".join(f"{k}: {v}" for k, v in _kwargs.items()) - failure_msg = ( - f"A prefix of {_kwargs} should result in {expected_ret_len}, actual retrieval got {ret_len}, files found." - f"\n\t{dest.default_protocol=}" - f"\n\t{dest.default_host_name=}" - f"\n\t{dest.default_container_name=}" - f"\n\t{dest.default_interval=}" - f"\n\t{dest.default_media_type=}" - f"\n\t{dest.default_fname_prefix=}" - f"\n\tkwargs=\n\t\t{kwarg_str}" - f"\n\tretrieved=\n\t\t{ret_str}" - ) - with self.subTest( - objective="ensure that the number of returned files for given prefixes matches expectations", - ret_len=ret_len, - expected_ret_len=expected_ret_len, - _kwargs=_kwargs, - ): - self.assertEqual(ret_len, expected_ret_len, failure_msg) - - -class TC_003_ReadTestCase(AzureBlobBaseCase): - def setUp(self) -> None: - kwargs = self.structured_parts["fred"] - src_path = self.basic_remote_source_path.format(**kwargs) - self.remote_source = self.AzureBlob(src_path, self.connection_string) - self.local_copy_location = self._sample_resource_folder.joinpath("remote_example") - container_paths = tuple(self.local_copy_location.iterdir()) - flist = [] - expected_data = {} - for cpath in container_paths: - ref = expected_data.setdefault(cpath.name, {}) - for bpath in cpath.rglob("**/*.txt"): - ref[bpath.name] = bpath.read_text() - flist.append(bpath) - smallest_file = min( - filter( - lambda p: self.remote_source.default_container_name in p.parts, - flist, - ), - key=lambda p: p.stat().st_size, - ) - self.smallest_file = str(smallest_file).split(self.remote_source.default_container_name)[1].lstrip("/") - self.expected_data = expected_data - - def test_read(self): - targets = tuple( - filter( - lambda s: self.smallest_file in s, - self.remote_source.list_files(), - ) - ) - containers = tuple(self.expected_data.keys()) - for f in targets: - _, _, path = f.partition("://") - parts = path.split("/") - container = parts[1] - if container in containers and "likes.dinosaurs.txt" in f: - test_function_logger.debug( - f"Running test on:" - f"\n\ttarget={f}\n\tas_bytes={False}\n\tcontainer={container}\n\tfname={parts[-1]}" - ) - with self.subTest( - objective="evaluate if data read from remote blob correctly matches the seed data stored locally.", - container=container, - fname=parts[-1], - target_file=f, - ): - data = self.remote_source.read(f) - expected = self.expected_data[container][parts[-1]] - data = data.decode("utf-8") - self.assertEqual(len(data), len(expected)) - self.assertMultiLineEqual(data, expected) - - -class TC_004_DeleteTestCase(AzureBlobBaseCase): - def setUp(self) -> None: - """ - Creates a temporary container (named delete-function-test) in the configured Azure blob storage endpoint, - and populates it with files copied from the "wilma-of-impossible-figure" sample container. - This container and its contents will be cleaned up at the end of each test function in this test-case. - """ - dst_container = self.test_container = self.easy_container_names["delete"] - src_container = self.easy_container_names["wilma"] - kwargs = {k: (v if "prefix" not in k else "").strip(":/") for k, v in self.structured_parts["wilma"].items()} - src_path = self.basic_remote_source_path.format(**kwargs) - kwargs["default_container_name"] = dst_container - dst_path = self.basic_remote_dest_path.format(**kwargs) - src = self.azure_source = self.AzureBlob(src_path, self.connection_string, False) - dest = self.azure_del_target = self.AzureBlob(dst_path, self.connection_string, True) - blob_names = [p.split(src_container)[1][1:] for p in src.list_files(src_container)] - self.participating_files = [] - with dest.connection_manager(dest.default_container_name) as cont_iter: - iter_type = next(cont_iter) - if iter_type != "ContainerClient": - from twindb_backup.destination.azblob import AzureClientManagerError - - raise AzureClientManagerError("Failed to get the right type of blob iterator") - dst_client: ContainerClient = next(cont_iter) - with src.connection_manager(src.default_container_name, blob=blob_names) as client_iterator: - iter_type = next(client_iterator) - if iter_type != "BlobClient": - from twindb_backup.destination.azblob import AzureClientManagerError - - raise AzureClientManagerError("Failed to get the right type of blob iterator") - copy_polls = [] - for src_bclient in client_iterator: - src_bclient: BlobClient - bname = src_bclient.blob_name - src_url = src_bclient.url - dst_bclient: BlobClient = dst_client.get_blob_client(bname) - self.participating_files.append((bname, src_url, dst_bclient.url)) - copy_polls.append(dst_bclient.start_copy_from_url(src_url)) - tries = 0 - while copy_polls and tries < 100: - for i in range(len(copy_polls) - 1, -1, -1): - if copy_polls[i]["copy_status"] == "success": - copy_polls.pop(i) - tries += 1 - - def tearDown(self) -> None: - with self.azure_source.connection_manager(self.test_container) as cont_iter: - iter_type = next(cont_iter) - if iter_type != "ContainerClient": - from twindb_backup.destination.azblob import AzureClientManagerError - - raise AzureClientManagerError("Failed to get the right type of blob iterator") - for client in cont_iter: - client: ContainerClient - try: - client.delete_blobs(*(tpl[2] for tpl in self.participating_files)) - except PartialBatchErrorException: - pass - - def test_00_delete_one_file(self): - del_target = self.participating_files[0][2] - self.azure_del_target.delete(del_target) - remaining_files = self.azure_del_target.list_files(".../.../.../") - readable_remaining = [f.split(self.test_container)[1] for f in remaining_files] - with self.subTest( - objective="ensure that once a file is deleted, it does not a member of the updated list of remaining_files", - del_target=del_target, - remaining_files=readable_remaining, - ): - self.assertNotIn(del_target, remaining_files) - for _, _, should_remain in self.participating_files[1:]: - with self.subTest( - objective="ensure that files not specified for deletion still remain", - should_remain=should_remain, - del_target=del_target, - remaining_files=readable_remaining, - ): - self.assertIn(should_remain, remaining_files) - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_01_delete_multiple_files(self): - del_targets = self.participating_files[1::2] - remaining_participants = self.participating_files[::2] - for target in del_targets: - self.azure_del_target.delete(target[2]) - remaining_files = self.azure_del_target.list_files(".../.../.../") - readable_remaining = [f.split(self.test_container)[1] for f in remaining_files] - for target in del_targets: - del_target = target[2] - with self.subTest(del_target=del_target, remaining_files=readable_remaining): - self.assertNotIn(del_target, remaining_files) - for _, _, should_remain in remaining_participants: - with self.subTest(should_remain=should_remain, remaining_files=readable_remaining): - self.assertIn(should_remain, remaining_files) - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_02_delete_all(self): - for bname, src_url, dst_url in self.participating_files: - self.azure_del_target.delete(dst_url) - remaining_files = [ - f - for f in self.azure_del_target.list_files(".../.../.../") - for p, _, fname in [f.rpartition("/")] - if fname and fname != "delete-function-test" - ] - if remaining_files: - self.fail(f"Failed to delete all files in target container: {remaining_files}") - - -class TC_005_WriteTestCase(AzureBlobBaseCase): - """Tests the different ways the `AzureBlob.write(...)` function can be called. - - We are drawing the source data from a single Azure storage subscription and writing the data back to the same - subscription in a different location. So, before running this set of tests, run the tests in the ReadTestCase class - to ensure proper source data is being provided to the writer. - """ - - def setUp(self) -> None: - self.test_container = self.easy_container_names["write"] - self.src_kwargs = self.structured_parts["barney"] - self.dst_kwargs = {k: v for k, v in self.src_kwargs.items()} - self.dst_kwargs["default_container_name"] = self.test_container - - self.local_copy_location = self._sample_resource_folder.joinpath("remote_example") - container_paths = tuple( - p for p in self.local_copy_location.iterdir() if p.name == self.src_kwargs["default_container_name"] - ) - smallest_file = min( - (p for c in container_paths for p in c.rglob("**/*.txt")), - key=lambda p: p.stat().st_size, - ) - self.smallest_file = str(smallest_file).split(self.src_kwargs["default_container_name"], 1)[1].lstrip("/") - - def test_00_write_generated_data_overwrite_fail(self): - test_str_content = "This is a simple and small bit of text to write to the destination_tests endpoint" - dest = self.AzureBlob( - self.basic_remote_dest_path.format(**self.dst_kwargs), - self.connection_string, - can_do_overwrites=False, - ) - pstr = dest.remote_path + "/overwrite.target.txt" - with self.subTest(content=test_str_content, path=pstr): - err = None - try: - dest.write(test_str_content, pstr) - except BaseException as be: - err = be - self.assertIsInstance(err, ResourceExistsError) - - def smallest_file_filter(self, file_url: str): - return self.smallest_file in file_url - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_01_write_generated_data_overwrite_ok(self): - test_str_content = "This is a simple and small bit of text to write to the destination_tests endpoint" - dest = self.AzureBlob( - self.basic_remote_dest_path.format(**self.dst_kwargs), - self.connection_string, - can_do_overwrites=True, - ) - pstr = dest.remote_path + "/overwrite.target.txt" - with self.subTest(content=test_str_content, path=pstr): - try: - dest.write(test_str_content, pstr) - except BaseException as be: - self.fail(f"Failed to write to target file with exception details:\n\t{type(be)}: {be.args}") - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_02_write_from_remote_overwrite_ok(self): - - source = self.AzureBlob( - self.basic_remote_source_path.format(**self.src_kwargs), - self.connection_string, - ) - dest = self.AzureBlob( - self.basic_remote_dest_path.format(**self.dst_kwargs), - self.connection_string, - can_do_overwrites=True, - ) - src_flist = tuple(filter(self.smallest_file_filter, source.list_files(".../"))) - for spath in src_flist: - parts = spath.partition("://")[2].split("/") - container = parts[1] - bname = "/".join(parts[2:]) - with BlobClient.from_connection_string(self.connection_string, container, bname) as bclient: - bclient: BlobClient - bprops: BlobProperties = bclient.get_blob_properties() - size = bprops.size - with source.get_stream(spath) as content: - parts[1] = dest.default_container_name - dpath = "/".join(parts) - with self.subTest(content_len=size, spath=spath, dpath=dpath): - try: - dest.write(content, dpath) - except BaseException as be: - self.fail(f"Failed to write to target file with exception details:\n\t{type(be)}: {be.args}") - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_03_write_from_remote_overwrite_fail(self): - source = self.AzureBlob( - self.basic_remote_source_path.format(**self.src_kwargs), - self.connection_string, - ) - dest = self.AzureBlob( - self.basic_remote_dest_path.format(**self.dst_kwargs), - self.connection_string, - can_do_overwrites=False, - ) - src_flist = tuple(filter(self.smallest_file_filter, source.list_files(".../"))) - for spath in src_flist: - parts = spath.partition("://")[2].split("/") - container = parts[1] - bname = "/".join(parts[2:]) - with BlobClient.from_connection_string(self.connection_string, container, bname) as bclient: - bclient: BlobClient - bprops: BlobProperties = bclient.get_blob_properties() - size = bprops.size - with source.get_stream(spath) as content: - parts[1] = dest.default_container_name - dpath = "/".join(parts) - with self.subTest(content_len=size, spath=spath, dpath=dpath): - self.assertRaises(ResourceExistsError, dest.write, content, dpath) - - -class TC_006_SaveTestCase(AzureBlobBaseCase): - def setUp(self) -> None: - remote_dest_target = Path(self.basic_remote_dest_path.partition("://")[2]) - dparts = remote_dest_target.parts - container_names = self.container_names - container: str - container, *_ = tuple(cont for cont in container_names if "betty" in cont) - self.source_container = container - self.dest_container = self.easy_container_names["save"] - fnames = os.environ[container.upper().replace("-", "_")].split(";") - dparts = dparts[0], self.dest_container, fnames[0].rpartition("/")[0] - sparts = dparts[0], self.source_container, fnames[0].rpartition("/")[0] - remote_dest_target = "https://" + "/".join(dparts) - remote_src_target = "https://" + "/".join(sparts) - self.remote_dest_target = remote_dest_target - self.remote_src_target = remote_src_target - self.dest = self.AzureBlob(remote_dest_target, self.connection_string) - self.source = self.AzureBlob(remote_src_target, self.connection_string) - local_copy = self._sample_resource_folder.joinpath("remote_example") - local_copy = tuple(p for p in local_copy.iterdir() if "betty-of" in str(p))[0] - local_copy = list(local_copy.iterdir()) - while not all(p.suffix and p.suffix == ".txt" for p in local_copy): - extension = [] - for p in local_copy: - extension.extend(p.iterdir()) - local_copy = extension - local_copy = [min(local_copy, key=lambda s: Path(s).stat().st_size)] - self.local_target_files = local_copy - # ".../.../.../" tells our destination instance to use its default names for [protocol, host, container] - remote_blob_names = [] - for p in local_copy: - rel = ".../.../.../" + str(p).split(self.source_container)[1].lstrip("/") - remote_blob_names.append(rel) - self.remote_blob_names = remote_blob_names - - # because we are testing our destination with the overwrite parameter set to false, we need to make - # sure our destination does not already exist. - with ContainerClient.from_connection_string(self.connection_string, self.dest_container) as cclient: - cclient: ContainerClient - try: - cclient.delete_blobs(*cclient.list_blobs()) - except ResourceNotFoundError: - pass - self.local_target_files = sorted(self.local_target_files, key=lambda p: p.name) - self.remote_blob_names = sorted(self.remote_blob_names, key=lambda s: s.rpartition("/")[2]) - self.smallest_file = ( - str(min(self.local_target_files, key=lambda p: p.stat().st_size)) - .split(self.source.default_container_name)[1] - .strip("/") - ) - - def tearDown(self) -> None: - cclient: ContainerClient = ContainerClient.from_connection_string(self.connection_string, self.dest_container) - try: - cclient.delete_blobs(*cclient.list_blobs()) - finally: - cclient.close() - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_00_save_from_local_fd(self): - for local_p, remote_p in zip(self.local_target_files, self.remote_blob_names): - if self.smallest_file not in str(local_p): - continue - with open(local_p, "rb") as f: - expected = f.read() - f.seek(0) - try: - self.dest.save(f, remote_p) - except ResourceExistsError: - self.fail("attempting to save to destination that already exists is a known failure condition.") - results = self.dest.read(remote_p) - with self.subTest( - objective="ensure that round-trip data transfer, starting in a local file, does not change or lose the data", - local_path=local_p, - remote_path=remote_p, - ): - self.assertEqual( - results, - expected, - "We've written from byte file to remote, " - "then read the stored contents back into a new bytes object for comparison.", - ) - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_01_save_from_remote_stream(self): - source_file_urls = self.source.list_files( - ".../.../.../", - True, - str(self.local_target_files[0]).split(self.source.default_container_name)[1].lstrip("/"), - True, - ) - for p in source_file_urls: - if self.smallest_file not in p: - continue - dpath = ".../.../.../" + p.split(self.source.default_container_name)[1].lstrip("/") - with self.subTest( - objective="ensure that round-trip data transfer, starting in a remote blob, does not change or lose the data", - src_path=p, - dst_path=dpath, - ): - with self.source.get_stream(p) as stream_in: - with self.subTest(stream_in=stream_in.fileno()): - try: - self.dest.save(stream_in, dpath) - except BaseException as be: - self.fail(f"Failed to save content to destination:\n\t{type(be)}: {be.args}") - - -class TC_007_StreamTestCase(AzureBlobBaseCase): - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_00_acquire_pipe_per_file(self): - src_kwargs = self.structured_parts["fred"] - source = self.AzureBlob( - self.basic_remote_source_path.format(**src_kwargs), - self.connection_string, - ) - sample_content_relative_path = ( - "backup/sample_resources/remote_example/fred-of-buffalo-lodge/monthly/mysql".split("/") - ) - here = Path(__file__).parent.resolve() - while here.name and here.name != sample_content_relative_path[0]: - here = here.parent - sample_path = here.joinpath("/".join(sample_content_relative_path[1:])) - expected_total_bytes = 0 - paths = [] - for p in sample_path.iterdir(): - paths.append("/".join(p.parts[-3:])) - with open(p, "rb") as f: - f.seek(0, 2) - expected_total_bytes += f.tell() - test_function_logger.debug(f"{expected_total_bytes=}") - expected_type = type(b"blah").__name__ - bytes_recieved = 0 - for p in paths: - dtypes = set() - with source.get_stream(f".../.../{source.default_container_name}/{p}") as stream_pipe: - stream_pipe: io.FileIO - try: - strt = time.perf_counter() - while time.perf_counter() - strt < 4: - data = stream_pipe.read() - data_type = type(data).__name__ - dtypes.add(data_type) - if data: - strt = time.perf_counter() - bytes_recieved += len(data) - test_function_logger.debug(f"{bytes_recieved=}") - except EOFError: - pass - for dtype in dtypes: - with self.subTest( - objective="Ensure that the data type (bytes/str/int) sent over pipe connection match expectations", - expected_output_type=expected_type, - actual_output_type=dtype, - path=p, - ): - self.assertEqual(dtype, expected_type) - with self.subTest( - objective="Ensure that all of the data sent into the pipe is was collected on the other side.", - expected_total_bytes=expected_total_bytes, - bytes_recieved=bytes_recieved, - ): - self.assertEqual(expected_total_bytes, bytes_recieved) - - @unittest.skipUnless(not DO_TEST_SKIPPING, "slow test case, skipping for now") - def test_01_acquire_pipe_per_container(self): - src_kwargs = self.structured_parts["fred"] - source = self.AzureBlob( - self.basic_remote_source_path.format(**src_kwargs), - self.connection_string, - ) - sample_content_relative_path = "backup/sample_resources/remote_example".split("/") - here = Path(__file__).parent.resolve() - while here.name and here.name != sample_content_relative_path[0]: - here = here.parent - sample_path = here.joinpath("/".join(sample_content_relative_path[1:])) - expected_total_bytes = 0 - paths = [] - for p in sample_path.rglob(f"**/{source.default_container_name}/**/*.txt"): - paths.append("/".join(p.parts[-3:])) - with open(p, "rb") as f: - f.seek(0, 2) - expected_total_bytes += f.tell() - test_function_logger.debug(f"{expected_total_bytes=}") - expected_type = type(b"blah").__name__ - bytes_recieved = 0 - dtypes = set() - with source.get_stream(f".../.../.../") as stream_pipe: - stream_pipe: io.FileIO - try: - strt = time.perf_counter() - while time.perf_counter() - strt < 4: - data = stream_pipe.read() - data_type = type(data).__name__ - dtypes.add(data_type) - if data: - strt = time.perf_counter() - bytes_recieved += len(data) - test_function_logger.debug(f"{bytes_recieved=}") - except EOFError: - pass - for dtype in dtypes: - with self.subTest( - objective="Ensure that the data type (bytes/str/int) sent over pipe connection match expectations", - expected_output_type=expected_type, - actual_output_type=dtype, - ): - self.assertEqual(dtype, expected_type) - with self.subTest( - objective="Ensure that no data was mishandled or lost when passed through the pipe.", - expected_total_bytes=expected_total_bytes, - bytes_recieved=bytes_recieved, - ): - self.assertEqual(expected_total_bytes, bytes_recieved) - - -def main(): - cover_ctx_manager = handle_coverage() - with cover_ctx_manager(): - unittest.TextTestRunner().run(unittest.TestLoader().loadTestsFromTestCase(TC_000_ImportsTestCase)) - print("done") - dbg_break = 0 - - -if __name__ == "__main__": - # main() - unittest.main(verbosity=2) diff --git a/tests/unittests/azblob_testing/environment_access_tests/test_remote_data_generation.py b/tests/unittests/azblob_testing/environment_access_tests/test_remote_data_generation.py deleted file mode 100644 index a3cc847d..00000000 --- a/tests/unittests/azblob_testing/environment_access_tests/test_remote_data_generation.py +++ /dev/null @@ -1,88 +0,0 @@ -import os -import unittest - - -class CustomLocalEnvTestCase(unittest.TestCase): - def test_set_osenvs(self): - from tests.unittests.excluded_env_config.build_out_dummy_env import set_osenvs - - def single_equality(a, b): - return a == b - - def sequence_equality(a, b): - if len(a) != len(b): - return False - for i, (_a, _b) in enumerate(zip(a, b)): - if _a != _b: - return False - return True - - # set_osenvs(be_silent=False, use_multi_proc=False) - set_osenvs() - expected_test_interval = ( - "hourly", - "daily", - "weekly", - "monthly", - "yearly", - ) - expected_test_path_parts = ( - "protocol", - "host", - "container", - "interval", - "media_type", - "fname_prefix", - "fname", - ) - expected_test_complete_remote_path_template = ( - "{protocol}://{host_name}/{container_name}/{interval}/{media_type}/{fname_prefix}{fname}" - ) - env_vars = [ - ("test_intervals".upper(), ";", sequence_equality), - ("test_path_parts".upper(), ";", sequence_equality), - ( - "test_complete_remote_path_template".upper(), - None, - single_equality, - ), - ] - expected_vals = [ - expected_test_interval, - expected_test_path_parts, - expected_test_complete_remote_path_template, - ] - dead_tests = [] - for i, (name, *_) in enumerate(env_vars): - with self.subTest( - objective="check if '{}' variable is in os.environ".format(name), - environment_var=name, - ): - try: - check = os.environ[name] - except BaseException as be: - dead_tests.append(i) - for i in dead_tests[::-1]: - env_vars.pop(i) - expected_vals.pop(i) - - for (name, sep, comp), expected in zip(env_vars, expected_vals): - val = os.environ[name] - if sep: - val = val.split(sep) - with self.subTest( - objective="confirm that the configured values match expectations", - environment_var=name, - environment_val=val, - expected=expected, - ): - self.assertTrue( - comp(val, expected), - "{name} did not produce expected value:\n\tgot: {val}\n\texpected: {expected}".format( - name=name, val=val, expected=expected - ), - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unittests/excluded_env_config/README.md b/tests/unittests/excluded_env_config/README.md deleted file mode 100644 index edfb5917..00000000 --- a/tests/unittests/excluded_env_config/README.md +++ /dev/null @@ -1,120 +0,0 @@ -### Regarding `dummy_env_vars.json.template` - -## General purpose: - -`dummy_env_vars.json.template` is a template that you can copy past into a `dummy_env_vars.json` file with your own -values defined. - -It is meant to provide a pythonic mechanism to quickly, and easily, set up the necessary components for testing the -Azure blob storage extension to the twindb/backup project. - -## Quick overview of the file's structure - -#### Minimal key/value pairs for file to serve its purpose - -For the example values, we use a [Flintstones](https://en.wikipedia.org/wiki/The_Flintstones) naming theme to aid -in making it clear where you should supplement your own values. - -```json -{ - "os.environ": { - "comments": [ - "The `os.environ` key is a dict of environment variables that should be created prior to testing", - "the general structure of this dict should look something like this: env_vars['os.environ']['destination_container']" - ], - "test_destination": { - "comments": [ - "The value associated with 'PRIMARY_TEST_CONN_STR' is just a placeholder but it also serves to show", - "the expected structure of the connection string" - ], - "PRIMARY_TEST_CONN_STR": "DefaultEndpointsProtocol=https;AccountName=from_the_town_of_bedrock;AccountKey=hAVE+4+Ya8Ado/time+a+DAb4do/TIME+a+/Y4b4/d484/d0+tIMe==;EndpointSuffix=flintstones.meet.the.flintstones.net", - "INTERVALS": ["hourly","daily","weekly","monthly","yearly"], - "PATH_PARTS":["protocol","host","container","interval","media_type","fname_prefix","fname"], - "COMPLETE_REMOTE_PATH_TEMPLATE": "{protocol}://{host_name}/{container_name}/{interval}/{media_type}/{fname_prefix}{fname}" - } - }, - "dummy_vals": { - "comments": [ - "This is where we define container names and the blob paths under those containers for use in testing." - ], - "container_names": [ - "fred-of-buffalo-lodge", - "barney-of-buffalo-lodge", - "wilma-of-impossibly-good-figure", - "betty-of-impossibly-good-figure" - ], - "fname_template": { - "comments": [ - "this dict is used by tests/unittests/excluded_env_config/build_out_dummy_env.py", - "to build a mock environment for testing." - ], - "optional_directory_prefix": "{interval}/mysql", - "format_string": ["{child}{sep}{disposition}{sep}{item_type}.{extension}"], - "template_parts": { - "sep": ".", - "child": "pebbles|bambam", - "disposition": "likes|hates", - "item_type": "dinosaurs|caves|cave_paintings", - "extension": "txt" - } - } - } -} - - -``` - -## Quick explanation of the component key/value pairs - -```json -{ - "os.environ": { - "comments": [ - "The `os.environ` key is a dict of environment variables that should be created prior to testing", - "the general structure of this dict should look something like this: env_vars['os.environ']['destination_container']" - ], - "test_destination": { - "PRIMARY_TEST_CONN_STR": "This should be the connection string for your target Azure subscription as defined here:\n https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python#copy-your-credentials-from-the-azure-portal" - } - }, - "dummy_vals": { - "comments": [ - "This is where we define container names and the blob paths under those containers for use in testing." - ], - "container_names": [ - "best", - "not", - "change", - "unless", - "you", - "also", - "change", - "test", - "scripts", - "to", - "match" - ], - "fname_template": { - "comments": [ - "this dict is used by tests/unittests/excluded_env_config/build_out_dummy_env.py", - "to build a mock environment for testing." - ], - "optional_directory_prefix": "{interval}/mysql", - "format_string": [ - "{child}{sep}{disposition}{sep}{item_type}.{extension}" - ], - "template_parts": { - "sep": ".", - "child": "pebbles|bambam", - "disposition": "likes|hates", - "item_type": "dinosaurs|caves|cave_paintings", - "extension": "txt" - } - } - } -} -``` - -"the dictionaries that follow are examples of expected data structures": "key names inside chevrons, E.G. <>, are -optional and can be named however you like, all other's are minimum requirements\n\tall values are dummy examples and -should be replaced according to your own account details.", diff --git a/tests/unittests/excluded_env_config/_dummy_content.json b/tests/unittests/excluded_env_config/_dummy_content.json deleted file mode 100644 index 0ec3e714..00000000 --- a/tests/unittests/excluded_env_config/_dummy_content.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "brands": ["Fruity Cards", "Launchpad Fruity", "Fruity Excellent", "Endurance Fruity", "SolidRock Fruity", "Totem Fruity", "Fruity Scouts", "QuantumLeap Fruity", "Fruity Design", "Fruity Fun", "Clearance Fruity", "Micro Fruity", "Starter Fruity", "Fruity Clip", "Fruity Study", "Tunnel Fruity", "Depot Fruity", "Vigor Fruity", "Fruity Reader", "Venue Fruity", "Shop Fruity", "Fruity Speak", "Fruity Chef", "Hobby Fruity", "Monkey Fruity", "Scion Fruity", "Magical Fruity", "Destiny Fruity", "Intrepid Fruity", "Acclaim Fruity", "Fruity Patch", "Axion Fruity", "TopChoice Fruity", "Savage Fruity", "Mustang Fruity", "Fruity Glory", "Big Fruity", "Fruity Watcher", "Fruity Future", "Fruity Secure", "Fruity Army", "Watch Fruity", "AlphaOmega Fruity", "Fruity Eye", "Fruity Concept", "Fruity Dash", "Season Fruity", "OpenMind Fruity", "Fruity Field", "Kronos Fruity", "Fruity Vid", "Skylark Fruity", "Fruity Ist", "Leader Fruity", "Magma Fruity", "Slick Fruity", "Quantum Fruity", "Fruity Dome", "Continuum Fruity"], - "names": ["Nigel Bradley","Saffa Avalos","Filip Hughes","Shanay Oakley","Nataniel Camacho","Samad Hensley","Mallory Pritchard","Abigale Farrow","Ophelia English","Adelina Sutherland","Francesco Cain","Camden Newman","Lamar Ellis","Cherish Osborne","Cari Sharples","Mya Schmitt","Eren Hills","Juanita Moreno","Emer Swan","Jimi Marsh","Tayyib Williams","Reya Wu","Janine Holmes","Calista York","Khalid Michael","Adil Conley","Winifred Mendoza","Eoghan Coffey","Liam Farley","Pia Hastings","Nola Bernal","Wesley Meyer","Elis Thatcher","Ottilie Haley","Leigh Landry","Boris Humphries","Wyatt Partridge","Mitchell Beltran","Codey Carr","Alys Sutton","Wren Li","Campbell Lloyd","Emmie Chase","Shahzaib Kim","Nada Webster","Said Patel","Cheyenne Odom","Emrys Witt","Lucia Francis","Zack Gough","Ailsa Flores","Leandro Bowman","Ayda Bloom","Rojin Adkins","Nathaniel Gentry","Archibald Mercado","Kevin Pennington","Summer Correa","Annabell Jensen","Tyreece Rubio","Collette Weeks","Menna Ponce","Anastazja Ritter","Iman Rennie","Valerie Roy","Nicole Pacheco","Kimberly Dunkley","Anderson Perez","Aleyna Wilder","Buddy Evans","Quinn Kendall","Stuart Mooney","Elmer Wise","Eryk Key","Blade Oliver","Maariyah Decker","Alice Legge","Kenny Noble","Camilla Shah","Imogen Acevedo","Renesmee Simons","Skye Gilbert","August Cruz","Chad Baxter","Tiana Donaldson","Shania Sharpe","Salma Guy","Ibrahim Morales","Iram Sears","Oliwia Olson","Michele Fisher","Connar Durham","Om Wang","Rhiann Thorne","Eduard Hassan","Elizabeth Pate","Rex Downes","Ava-Mae Maddox","Kaya Gray","Helen Hendricks"], - "affixes": ["a-","an-","-acanth","acantho-","-cantho","amphi-","-anthus","antho-","arch-","archi-","archo-","-archus","archaeo-","-arctos","arcto-","arthro-","aspido-","-aspis","-avis","-bates","brachi-","brachy-","bronto-","-canth","cantho-","-acanth","acantho-","carcharo-","-cephalus","cephalo-","-cephale","-cephalian","-ceras","cerat-","-ceratus","cetio-","-cetus","-cheirus","chloro-","coel-","cyan-","cyano-","cyclo-","cyn-","-cyon","-dactyl","-dactylus","-deres","-derm","deino-","dino-","deino-","-delphys","-delphis","delpho-","dendro-","-dendron","-dendrum","di-","dino-","deino-","diplo-","-don","-dont","-donto-","-odon","-odont","-odonto-","dromaeo-","dromeo-","-dromeus","eo-","-erpeton","eu-","-felis","-form","-formes","giga-","giganto-","-gnath-","gnatho-","-gnathus","hemi-","hespero-","hippus","hippo-","hyl-","hylo-","-ia","ichthyo-","-ichthys","-lania","-lepis","lepido-","-lestes","long","-lopho-","-lophus","macro-","-maia","maia-","mega-","megalo-","micro-","mimo-","-mimus","-monas","-monad","-morph","-nax","-anax-","-noto-","-nych","nycho-","-nyx","-onych","onycho-","-onyx","-odon","-odont","-odonto-","-oides","-odes","onycho-","-onychus","-onyx","-ops","-ornis","ornith-","ornitho-","pachy-","para-","-pelta","-philus","-phila","philo-","-phyton","-phyta","phyto-","-phyte","-pithecus","pitheco-","platy-","plesio-","plesi-","-pod","podo-","-pus","pro-","protero-","proto-","psittaco-","-psitta","pter-","ptero-","-pterus","pteryg-","-ptera","-pteryx","-pus","-pod","-podo-","-pus","-raptor","raptor-","-rex","-rhina","rhino-","-rhinus","rhodo-","rhynco-","-rhynchus","sarco-","saur","sauro-","-saurus","smilo-","-smilus","-spondylus","squali-","squalo-","stego-","-stega","strepto-","-stoma","-stome","-stomus","sucho-","-suchus","-teuthis","thero-","-therium","thylac-","tri-","titano-","-titan","tyranno-","-tyrannus","-urus","-uro-","veloci-","-venator","xeno-","-zoon","-zoa"] -} diff --git a/tests/unittests/excluded_env_config/build_out_dummy_env.py b/tests/unittests/excluded_env_config/build_out_dummy_env.py deleted file mode 100644 index bc067622..00000000 --- a/tests/unittests/excluded_env_config/build_out_dummy_env.py +++ /dev/null @@ -1,418 +0,0 @@ -import concurrent.futures as cf -import gc -import json -import logging -import os -from contextlib import contextmanager -from pathlib import Path -from typing import Any, List, Union - -from azure.storage.blob import BlobClient, BlobProperties, BlobServiceClient, ContainerClient, StorageStreamDownloader - -logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(logging.WARNING) -logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING) - -from tests.unittests.excluded_env_config.dummy_content_generation import cave_gen, dino_gen, painting_gen -from twindb_backup import LOG - -PATH_HERE = Path(__file__).parent - -MIN_SAMPLE_SIZE = 2**31 -BLOCK_SIZE = 2**24 # 2**24 == 16 MiB -INTERVAL_MAPPING = { - "pebbles": { - "wilma": "daily", - "fred": "monthly", - }, - "bambam": { - "barney": "hourly", - "betty": "weekly", - }, -} -TARGET_FILE_HISTORY = set() -DUMMY_OBJ = object() - - -def get_local_cache_location(): - here = Path(PATH_HERE) - relative_dirs = "backup/sample_resources/remote_example".split("/") - while here.name and relative_dirs[0] != here.name: - here = here.parent - return here.parent.joinpath("/".join(relative_dirs)) - - -@contextmanager -def _gc_toggle(depth=0): - gc.disable() - try: - yield depth + 1 - finally: - if depth == 0: - gc.enable() - - -def get_key_manager(keychain: list): - @contextmanager - def key_master(val): - keychain.append(val) - try: - yield - finally: - keychain.pop() - - return key_master - - -def content_gen_wrapper(blob_names: dict, for_parent: str, sep): - """This function depends upon the structure of the input argument blob_names which should be derived from the - dummy_vals portion of the dummy_env_vas.json file.""" - - def part_gen(child, child_blobs): - def inner(parent, blob_name, size): - nonlocal content, correct_parents - try: - if parent in correct_parents: - yield True - gen = content[blob_name] - yield from gen(size) - else: - yield False - except BaseException as be: - be.args += ( - { - "parent": parent, - "child": child, - "blob_name": blob_name, - "size": size, - }, - ) - raise be - - correct_parents = parent_map[child] - gen_map = {"dinosaurs": dino_gen, "caves": cave_gen} - # content is a precomputed mapping of data generators used by our inner function to simulate human-readable data - content = { - blob: gen_map.get(blob.rsplit(child + sep, 1)[1].split(sep)[1], painting_gen) for blob in child_blobs - } - return inner - - parent_map = {childkey: {*INTERVAL_MAPPING[childkey]} for childkey in INTERVAL_MAPPING} - ret = { - parent: part_gen(kid, blob_names[kid]) - for kid, parents in parent_map.items() - for parent in parents - if parent == for_parent - } - return ret - - -def make_blobs(container_name, fname_template_dict): - def populate_remote_blob(): - content_map = content_gen_wrapper(children_dict, parent, sep) - with service_client.get_container_client(container_name) as client: - client: ContainerClient - if not client.exists(): - client: ContainerClient = service_client.create_container(container_name) - for child, blobs in children_dict.items(): - # sizes = 2**30,*(block_size for _ in range(len(blobs)-1)) - sizes = 2**27, *(BLOCK_SIZE for _ in range(len(blobs) - 1)) - blob: str - for blob, size in zip(blobs, sizes): - blob = blob.strip() - while blob.startswith("/"): - blob = blob[1:] - person, attitude, item_type, *_ = blob.split(".") - with client.get_blob_client(blob) as bclient: - bclient: BlobClient - cache_file_path = cache_location.joinpath(container_name).joinpath(blob) - cache_file_path.parent.mkdir(parents=True, exist_ok=True) - data_gen = content_map[parent](parent, blob, size) - if not next(data_gen): - continue - if not cache_file_path.exists(): - if bclient.exists(): - bprop: BlobProperties = bclient.get_blob_properties() - _size = bprop.size - LOG.debug( - f"staging {_size} byte content by downloading from {bclient.primary_endpoint}" - ) - with open(cache_file_path, "wb") as f: - dl: StorageStreamDownloader = bclient.download_blob() - dl.readinto(f) - else: - LOG.debug(f"staging {size} byte content before uploading to {bclient.primary_endpoint}") - with open(cache_file_path, "wb") as fd: - fd.writelines(data_gen) - else: - if not bclient.exists(): - with open(cache_file_path, "rb") as fd: - fd.seek(0, 2) # seeks to the end of the file - size = ( - fd.tell() - ) # gets the fd's position which should be the end length of the file - fd.seek(0, 0) # seek back to teh start of the file before we start trying to read - LOG.debug(f"uploading {size} byte content to {bclient.primary_endpoint}") - bclient.upload_blob(data=fd, length=size) - LOG.debug(f"{fd.tell()} byte content uploaded to {bclient.primary_endpoint}") - - blob, person = "", "" - try: - cache_location = get_local_cache_location() - with BlobServiceClient.from_connection_string(os.environ["PRIMARY_TEST_CONN_STR"]) as service_client: - service_client: BlobServiceClient - children_dict, sep = assemble_blob_names(fname_template_dict) - parent = container_name.split("-")[0] - kid_keys = list(children_dict.keys()) - nested_blob_paths = [] - for child in kid_keys: - child_intervals = INTERVAL_MAPPING[child] - if parent in child_intervals: - prefix = fname_template_dict["optional_directory_prefix"].format( - interval=INTERVAL_MAPPING[child][parent] - ) - for i, blob in enumerate(children_dict[child]): - blob = "/".join((prefix, blob)) - nested_blob_paths.append(blob) - children_dict[child][i] = blob - else: - children_dict.pop(child) - populate_remote_blob() - return container_name, nested_blob_paths - except BaseException as be: - be.args += container_name, blob, person - raise be - - -def assemble_blob_names(fname_template_dict): - template_parts = fname_template_dict["template_parts"] - fname_templates = fname_template_dict["format_string"] - sep = template_parts["sep"] - children = template_parts["child"].split("|") - dispositions = template_parts["disposition"].split("|") - items_types = template_parts["item_type"].split("|") - extension = template_parts["extension"] - fmt_kwargs = dict(sep=sep, extension=extension) - blob_names = {} - for child in children: - ref = blob_names.setdefault(child, []) - fmt_kwargs["child"] = child - for disposition in dispositions: - fmt_kwargs["disposition"] = disposition - for itype in items_types: - fmt_kwargs["item_type"] = itype - for template in fname_templates: - ref.append(template.format(**fmt_kwargs)) - return blob_names, sep - - -def crawler(data: dict, target_key: Any = DUMMY_OBJ, target_val: Any = DUMMY_OBJ): - """A support function to craw nested container objects searching for the given targets""" - - def do_dict(d: dict): - nonlocal keys_ctx - for k, v in d.items(): - with keys_ctx(k): - if k == target_key: - yield tuple(keychain), v - yield from enter(v) - - def do_sequence(d: Union[list, tuple]): - nonlocal keys_ctx - for k, v in enumerate(d): - with keys_ctx(k): - if k == target_key: - yield tuple(keychain), v - yield from enter(v) - - def do_value(d): - nonlocal keys_ctx - if d == target_val: - yield tuple(keychain), d - - def enter(d): - if isinstance(d, dict): - yield from do_dict(d) - elif isinstance(d, (list, tuple)): - yield from do_sequence(d) - else: - yield from do_value(d) - - keychain = [] - keys_ctx = get_key_manager(keychain) - yield from enter(data) - - -def set_osenvs(target_file: str = None, be_silent: bool = True, use_multi_proc: bool = True): - def validate_conn_str(connStr): - try: - with BlobServiceClient.from_connection_string(connStr) as client: - client: BlobServiceClient - container_list = tuple(client.list_containers()) - if not all( - any(s == c.name for c in container_list) for s in vars_dict["dummy_vals"]["container_names"] - ): - vars_dict["dummy_vals"]["container_names"] = container_list - vars_dict["os.environ"]["test_destination"]["PRIMARY_TEST_CONN_STR"] = connStr - return True - except BaseException as be: - return False - - if target_file is None: - target_file = str(PATH_HERE.joinpath("dummy_env_vars.json")) - if target_file in TARGET_FILE_HISTORY: - return - TARGET_FILE_HISTORY.add(target_file) - filePath = Path(target_file) - if filePath.exists(): - with open(filePath, "r", encoding="UTF-8") as f: - vars_dict = json.load(f) - else: - with open(filePath.with_suffix(".json.template"), "r") as f: - vars_dict = json.load(f) - LOG.info( - "\nWARNING:\n\tNo connection stored on local machine\n\tfor a guide on how to get your connection string see:\n\t\thttps://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python?tabs=environment-variable-windows#copy-your-credentials-from-the-azure-portal" - ) - conn_str = input("Please enter a valid connection string for the target account\n::") - - while not validate_conn_str(conn_str): - conn_str = input( - f"{conn_str} is not a valid connection string" - f"\n\tPlease enter a valid connection string for the target account\n" - ) - print("\nconnection string valid") - with open(filePath, "w") as f: - json.dump(vars_dict, f, indent=4) - for chain, value in tuple(crawler(vars_dict, target_key="comments")): - ref = vars_dict - for k in chain[:-1]: - ref = ref[k] - ref.pop(chain[-1]) - test_dest_vars: dict = vars_dict["os.environ"]["test_destination"] - os.environ["PRIMARY_TEST_CONN_STR"]: str = test_dest_vars["PRIMARY_TEST_CONN_STR"] - os.environ["TEST_INTERVALS"]: str = ";".join(test_dest_vars["INTERVALS"]) - os.environ["TEST_PATH_PARTS"]: str = ";".join(test_dest_vars["PATH_PARTS"]) - os.environ["TEST_COMPLETE_REMOTE_PATH_TEMPLATE"]: str = test_dest_vars["COMPLETE_REMOTE_PATH_TEMPLATE"] - os.environ["TEST_CONTAINER_NAMES"] = ";".join(vars_dict["dummy_vals"]["container_names"]) - populate_remote_containers(vars_dict, be_silent, use_multi_proc) - - -def populate_remote_containers(vars_dict, be_silent: bool, use_multi_proc: bool): - dummy_targets = vars_dict["dummy_vals"] - containers: List[str] = dummy_targets["container_names"] - fname_template_dict: dict = dummy_targets["fname_template"] - container: str - if use_multi_proc: - with cf.ProcessPoolExecutor(os.cpu_count()) as ppe: - ftrs = [] - for loop_container in containers: - # make_blobs(loop_container,fname_template_dict) - ftrs.append(ppe.submit(make_blobs, loop_container, fname_template_dict)) - for ftr in cf.as_completed(ftrs): - if ftr.exception(): - raise ftr.exception() - else: - container, blobs = ftr.result() - LOG.debug(f"{container} completed") - os.environ[container.replace("-", "_").upper()] = ";".join(blobs) - generate_cli_config(container, blobs) - else: - for loop_container in containers: - try: - container, blobs = make_blobs(loop_container, fname_template_dict) - LOG.debug(f"{container} completed") - os.environ[container.replace("-", "_").upper()] = ";".join(blobs) - generate_cli_config(container, blobs) - except BaseException as be: - LOG.error("{}: {}".format(type(be).__name__, repr(be.args))) - if not be_silent: - strings = [] - longest = max(len(k) for k in os.environ) - for k in os.environ: - strings.append(f"{k.strip():<{longest}} : {os.environ[k]}") - LOG.info("\n" + "\n".join(strings)) - - -def generate_cli_config(container: str, blobs: List[str]): - from configparser import ConfigParser - - from twindb_backup import INTERVALS - from twindb_backup import SUPPORTED_DESTINATION_TYPES as SDT - from twindb_backup import SUPPORTED_QUERY_LANGUAGES as SQ - from twindb_backup import XBSTREAM_BINARY, XTRABACKUP_BINARY - from twindb_backup.configuration import DEFAULT_CONFIG_FILE_PATH, RetentionPolicy - - cache_location = get_local_cache_location() - config_root = cache_location.parent.joinpath("configs").resolve() - os.environ["TEST_CONFIGS_ROOT"] = str(config_root) - config_file_path = config_root.joinpath(container).joinpath(DEFAULT_CONFIG_FILE_PATH.split("/")[-1]) - config_file_path.parent.mkdir(parents=True, exist_ok=True) - true_interval, media_type, *fname = blobs[0].split("/") - prefix: str = "/".join(fname[:-1]) - cache_endpoint = cache_location.joinpath("local_store").joinpath(prefix) - if prefix and not prefix.endswith("/"): - prefix += "/" - cache_endpoint.mkdir(parents=True, exist_ok=True) - # fname:str = fname[-1] - conn_str = os.environ["PRIMARY_TEST_CONN_STR"] - conn_parts = {k: v for part in conn_str.split(";") for k, v in (part.split("=", 1),)} - protocol = conn_parts["DefaultEndpointsProtocol"] - host_name = f'{conn_parts["AccountName"]}.{conn_parts["EndpointSuffix"]}' - path_parts = { - "protocol": protocol.strip(":/"), - "host_name": host_name.strip("/"), - "container_name": container.strip("/"), - "interval": true_interval.strip("/"), - "media_type": media_type.strip("/"), - "fname_prefix": prefix, - "fname": "", - } - sql_config = { - "mysql_defaults_file": "/root/.my.cnf", - "full_backup": INTERVALS[1], - "expire_log_days": 7, - "xtrabackup_binary": XTRABACKUP_BINARY, - "xbstream_binary": XBSTREAM_BINARY, - } - mock_config = { - "compression": { - "program": "pigz", - "threads": max(1, os.cpu_count() // 2), - "level": 9, - }, - "gpg": {"recipient": "", "keyring": "", "secret_keyring": ""}, - "intervals": {f"run_{interval}": interval == true_interval for interval in INTERVALS}, - "destination": { - "keep_local_path": True, - "backup_destination": SDT.azure, - }, - "export": { - "transport": "datadog", - "app_key": "some_app_key", - "api_key": "some_api_key", - }, - "source": {"backup_dirs": [str(cache_endpoint)], "backup_mysql": True}, - "retention": {f"{interval}_copies": count for interval, count in RetentionPolicy._field_defaults.items()}, - "retention_local": {f"{interval}_copies": count for interval, count in RetentionPolicy._field_defaults.items()}, - SQ.mysql: sql_config, - SDT.azure: { - "remote_path": os.environ["TEST_COMPLETE_REMOTE_PATH_TEMPLATE"].format(**path_parts), # remote_path - "connection_string": f"'{conn_str}'", # connection_string - "can_do_overwrites": False, # can_do_overwrites - "cpu_cap": os.cpu_count(), # cpu_cap - "max_mem_bytes": 2**24, # max_mem_bytes - "default_protocol": path_parts["protocol"], # default_protocol - "default_host_name": path_parts["host_name"], # default_host_name - "default_container_name": path_parts["container_name"], # default_container_name - "default_interval": path_parts["interval"], # default_interval - "default_media_type": path_parts["media_type"], # default_media_type - "default_fname_prefix": path_parts["fname_prefix"], # default_fname_prefix - }, - } - writer = ConfigParser() - writer.read_dict(mock_config) - with open(config_file_path, "w") as fd: - writer.write(fd) - - -if __name__ == "__main__": - set_osenvs("dummy_env_vars.json") diff --git a/tests/unittests/excluded_env_config/dummy_content_generation/__init__.py b/tests/unittests/excluded_env_config/dummy_content_generation/__init__.py deleted file mode 100644 index 600cd4ad..00000000 --- a/tests/unittests/excluded_env_config/dummy_content_generation/__init__.py +++ /dev/null @@ -1,40 +0,0 @@ -from tests.unittests.excluded_env_config.dummy_content_generation.cave_generation import cave_gen -from tests.unittests.excluded_env_config.dummy_content_generation.cave_painting_desc import painting_gen -from tests.unittests.excluded_env_config.dummy_content_generation.dino_namegen import dino_gen - - -def manual_dummy_file_creation(out_target: str, fsize: int): - def rando_gen(total_len: int) -> bytes: - parts = total_len // 3 - for grp in zip(dino_gen(parts), cave_gen(parts), painting_gen(parts)): - yield from grp - - from pathlib import Path - - out_target = Path(out_target).resolve() - out_target.parent.mkdir(parents=True, exist_ok=True) - with open(out_target, "wb") as f: - f.writelines(rando_gen(fsize)) - - -if __name__ == "__main__": - import concurrent.futures as cf - import os - from random import random - - oneg = 2**30 - mean = 2**33 - half_span = 2**32 - - with cf.ProcessPoolExecutor(os.cpu_count()) as ppe: - args = [] - for i, mult in enumerate([4] * 5 + [9] * 5): - size = int(oneg * mult + (oneg * random()) * round(random() * 2.0 - 1.0)) - name = f"./big_dummies/{size//oneg}_{i}.txt" - args.append((name, size)) - args.sort(key=lambda s: s[1]) - ftrs = [] - for name, size in args: - print(name, size) - ftrs.append(ppe.submit(manual_dummy_file_creation, name, size)) - cf.wait(ftrs) diff --git a/tests/unittests/excluded_env_config/dummy_content_generation/cave_generation.py b/tests/unittests/excluded_env_config/dummy_content_generation/cave_generation.py deleted file mode 100644 index 8d899c8f..00000000 --- a/tests/unittests/excluded_env_config/dummy_content_generation/cave_generation.py +++ /dev/null @@ -1,50 +0,0 @@ -from random import randint - -biome = ( - ("swamp", "-", "y ", "ed "), - ("desert", "-", "ed "), - ( - "savanna", - "-", - ), - ("mountain", "-", "ous ", "y-"), - ("hill", "top ", "y "), - ("valley", "-", "_floor "), -) -biome = tuple(v for tpl in zip(biome, (("", "") for _ in range(len(biome)))) for v in tpl) -feel = ( - "cozy ", - "damp ", - "dank ", - "spacious ", - "stinky ", - "pleasant ", - "small ", - "large ", - "big ", - "dirty ", - "clean ", -) -look = "open ,hidden ,exposed ,recessed ,majestic ,underwhelming ,high ,low ,deep ,shallow ".split(",") - - -def cave_gen(result_len: int): - alen = len(biome) - 1 - blen = len(feel) - 1 - clen = len(look) - 1 - byte_count = 0 - while byte_count < result_len: - a = biome[randint(0, alen)] - a = a[0] + a[randint(1, len(a) - 1)] - b = feel[randint(0, blen)] - c = look[randint(0, clen)] - abc = a + b + c if a.endswith("y ") or a.endswith("ed ") else b + c + a - abc = abc.replace("-", " ").replace("_", "-") - val = f"A {abc}cave\n".capitalize().encode("utf-8") - yield val - byte_count += len(val) - - -if __name__ == "__main__": - for cave in cave_gen(100): - print(cave) diff --git a/tests/unittests/excluded_env_config/dummy_content_generation/cave_painting_desc.py b/tests/unittests/excluded_env_config/dummy_content_generation/cave_painting_desc.py deleted file mode 100644 index d05e6f0c..00000000 --- a/tests/unittests/excluded_env_config/dummy_content_generation/cave_painting_desc.py +++ /dev/null @@ -1,29 +0,0 @@ -from itertools import combinations -from random import randint - -descriptors = "simple,busy,abstract,obvious,pretty,scary,large,small,relatable,detailed,complex".split(",") -combos = tuple(tuple(combinations(descriptors, i)) for i in range(1, 5)) -combo_len = len(combos) - 1 -lens = tuple(len(c) - 1 for c in combos) - - -def painting_gen(result_len: int): - byte_count = 0 - while byte_count < result_len: - combo_idx = randint(0, combo_len) - clen = lens[combo_idx] - combo = combos[combo_idx][randint(0, clen)] - if len(combo) > 2: - combo = ", ".join(combo[:-1]) + f", and {combo[-1]}" - elif len(combo) == 2: - combo = ", ".join(combo[:-1]) + f" and {combo[-1]}" - else: - combo = combo[0] - val = f"{combo} types of cave-paintings\n".capitalize().encode("utf-8") - yield val - byte_count += len(val) - - -if __name__ == "__main__": - for s in painting_gen(10): - print(s) diff --git a/tests/unittests/excluded_env_config/dummy_content_generation/dino_namegen.py b/tests/unittests/excluded_env_config/dummy_content_generation/dino_namegen.py deleted file mode 100644 index c653d54a..00000000 --- a/tests/unittests/excluded_env_config/dummy_content_generation/dino_namegen.py +++ /dev/null @@ -1,389 +0,0 @@ -"""Filler content generator. -code inspired by that used to create the site: https://www.fantasynamegenerators.com/dinosaur-names.php""" -import multiprocessing as mp -from itertools import combinations -from multiprocessing import Queue -from queue import Empty -from random import sample -from time import perf_counter - -colors = ( - ("fuscus", "dark"), - ("Nigri", "Black"), - ("aterum", "dark-black"), - ("lividus", "blue-black"), - ("Cyano", "Blue"), - ("Leuco", "White"), - ("Chloro", "Green"), - ("prasino", "green"), - ("purpureus", "purple"), - ("caeruleus", "cerulean"), - ("ravus", "gray"), - ("canus", "light-gray"), - ("albus", "white"), - ("Xantho", "Yellow"), - ("flavus", "yellow"), - ("fulvus", "golden"), - ("aurantium", "orange"), - ("croceus", "saffron"), - ("ruber", "red"), - ("roseus", "rose-red"), -) -colors = tuple(pair for tpl in zip(colors, (("", "") for _ in range(len(colors)))) for pair in tpl) -physical_descriptors1 = ( - ("rhyncho", "Beak"), - ("chelo", "Claw"), - ("podo", "Foot"), - ("cerco", "Tail"), - ("canto", "Spined"), - ("cephalo", "Headed"), - ("donto", "Teeth"), - ("don", "Tooth"), - ("lopho", "Crested"), - ("ploce", "Armored"), - ("plo", "Armored"), - ("rhino", "Nosed"), - ("trachelo", "Necked"), - ("minisculum", "extremely-small"), - ("exigum", "very-small"), - ("minimum", "tiny"), - ("parvum", "small"), - ("vegrande", "not-very-big"), - ("praegrande", "very-big"), - ("magnum", "great"), - ("enorme", "enormous"), - ("immane", "huge"), - ("immensum", "immense"), - ("vastum", "vast"), - ("", ""), -) -physical_descriptors2 = ( - ("Acantho", "Spiny"), - ("Acro", "High"), - ("Aegypto", "Egyptian"), - ("Aepy", "Tall"), - ("Afro", "African"), - ("Agili", "Agile"), - ("Alectro", "Eagle"), - ("Ammo", "Sand"), - ("Anchi", "Near"), - ("Ankylo", "Stiff"), - ("Antarcto", "Antarctic"), - ("Apato", "Deceptive"), - ("Archaeo", "Ancient"), - ("Arrhino", "Without Nose"), - ("Austro", "South"), - ("Avi", "Bird"), - ("Baga", "Small"), - ("Baro", "Heavy"), - ("Bellu", "Fine"), - ("Brachio", "Arm"), - ("Brachy", "Short"), - ("Callio", "Beautiful"), - ("Campto", "Bent"), - ("Carno", "Carnivorous"), - ("Cerato", "Horned"), - ("Chloro", "Green"), - ("Coelo", "Hollow"), - ("Colosso", "Giant"), - ("Cyano", "Blue"), - ("Cyclo", "Round"), - ("Cyrto", "Curved"), - ("Daspleto", "Frightful"), - ("Deino", "Terrible"), - ("Di", "Two"), - ("Dicraeo", "Forked"), - ("Dilipho", "Two Ridged"), - ("Draco", "Dragon"), - ("Dromaeo", "Running"), - ("Drypto", "Tearing"), - ("Echino", "Spiny"), - ("Elaphro", "Fleet"), - ("Eo", "Dawn"), - ("Eu", "Well"), - ("Gampso", "Curved"), - ("Gorgo", "Fierce"), - ("Gymno", "Bare"), - ("Gyro", "Round"), - ("Hadro", "Big"), - ("Haplo", "Simple"), - ("Hespero", "Western"), - ("Hetero", "Different"), - ("Hylaeo", "Woodland"), - ("Kentro", "Spiky"), - ("Krito", "Noble"), - ("Lasio", "Hairy"), - ("Lepto", "Slim"), - ("Leuco", "White"), - ("Lopho", "Crested"), - ("Lurdu", "Heavy"), - ("Macro", "Large"), - ("Masso", "Massive"), - ("Mega", "Large"), - ("Megalo", "Big"), - ("Metria", "Moderately"), - ("Micro", "Tiny"), - ("Mono", "Single"), - ("Nano", "Dwarf"), - ("Nano", "Tiny"), - ("Neo", "New"), - ("Nigri", "Black"), - ("Oro", "Mountain"), - ("Orycto", "Digging"), - ("Ovi", "Egg"), - ("Pachy", "Thick"), - ("Parali", "Tidal"), - ("Peloro", "Monstrous"), - ("Plateo", "Flat"), - ("Platy", "Flat"), - ("Pogono", "Bearded"), - ("Preno", "Sloping"), - ("Prenoce", "Sloping"), - ("Pro", "Before"), - ("Proto", "Before"), - ("Rhab", "Rod"), - ("Rugos", "Wrinkled"), - ("Salto", "Hopping"), - ("Sarco", "Flesh"), - ("Segno", "Slow"), - ("Silvi", "Forest"), - ("Sino", "Chinese"), - ("Spino", "Thorn"), - ("Stego", "Roof"), - ("Steno", "Narrow"), - ("Styraco", "Spiked"), - ("Super", "Super"), - ("Theco", "Socket"), - ("Therizino", "Scythe"), - ("Thescelo", "Wonderful"), - ("Toro", "Bull"), - ("Torvo", "Savage"), - ("Trachy", "Rough"), - ("Trichodo", "Hairy"), - ("Troo", "Wounding"), - ("Tyloce", "Swelling"), - ("Tyranno", "Tyrant"), - ("Veloci", "Quick"), - ("Xantho", "Yellow"), - ("", ""), -) -abstract_descriptors1 = ( - ("bator", "Hero"), - ("ceratops", "Horned Face"), - ("draco", "Dragon"), - ("dromeus", "Runner"), - ("gryphus", "Griffin"), - ("lestes", "Stealer"), - ("mimus", "Mimic"), - ("moloch", "Demon"), - ("raptor", "Plunderer"), - ("rex", "King"), - ("sauropteryx", "Winged Lizard"), - ("saurus", "Lizard"), - ("saura", "Lizard"), - ("sornis", "Bird"), - ("titan", "Giant"), - ("tyrannus", "Tyrant"), - ("venator", "Hunter"), - ("amorabundum", "loving"), - ("excitum", "excited"), - ("confūsum", "confused"), - ("detestabile", "hateful"), - ("felix", "happy"), - ("invidum", "envious"), - ("iratum", "irate"), - ("laetum", "joyful"), - ("miserum", "miserable"), - ("solum", "lonely"), - ("somnolentum", "sleepy"), - ("territum", "terrified"), - ("triste", "sad"), - ("bella", "beautiful"), - ("breve", "short"), - ("cānum", "gray-haired"), - ("casuale", "casual"), - ("decens", "proper"), - ("decorum", "well-mannered"), - ("deforme", "ugly"), - ("elegans", "elegant"), - ("flāvum", "blonde"), - ("formale", "formal"), - ("iuvene", "young"), - ("longe", "tall"), - ("rūfum", "red-haired"), - ("venustum", "lovely"), - ("venustum", "charming"), - ("vetere", "old"), - ("", ""), -) -abstract_descriptors2 = ( - ("don", "Tooth"), - ("bator", "Hero"), - ("canthus", "Spine"), - ("ceras", "Roof"), - ("ceratops", "Horned Face"), - ("docus", "Neck"), - ("draco", "Dragon"), - ("dromeus", "Runner"), - ("gryphus", "Griffin"), - ("lestes", "Stealer"), - ("lodon", "Tooth"), - ("mimus", "Mimic"), - ("moloch", "Demon"), - ("nychus", "Claw"), - ("pelix", "Pelvis"), - ("pelta", "Shield"), - ("cephalus", "Head"), - ("pteryx", "Wing"), - ("pus", "Foot"), - ("raptor", "Plunderer"), - ("rex", "King"), - ("rhinus", "Snout"), - ("rhothon", "Nose"), - ("sauropteryx", "Winged Lizard"), - ("saurus", "Lizard"), - ("saura", "Lizard"), - ("sornis", "Bird"), - ("spondylus", "Vertebrae"), - ("suchus", "Crocodile"), - ("tholus", "Dome"), - ("titan", "Giant"), - ("tyrannus", "Tyrant"), - ("venator", "Hunter"), - ("", ""), -) - -colors = [(v1.strip(), v2.strip()) for v1, v2 in colors] -physical_descriptors1 = [(v1.strip(), v2.strip()) for v1, v2 in physical_descriptors1] -physical_descriptors2 = [(v1.strip(), v2.strip()) for v1, v2 in physical_descriptors2] -abstract_descriptors1 = [(v1.strip(), v2.strip()) for v1, v2 in abstract_descriptors1] -abstract_descriptors2 = [(v1.strip(), v2.strip()) for v1, v2 in abstract_descriptors2] - - -def combination_gen(_colors): - def inner(): - phys = physical_descriptors1 + physical_descriptors2 - abst = abstract_descriptors1 + abstract_descriptors2 - combos = combinations( - ( - _colors, - physical_descriptors1, - physical_descriptors2, - abstract_descriptors1, - abstract_descriptors2, - ), - 5, - ) - for la, a in _colors: - for lp1, p1 in phys: - for lp2, p2 in abst: - if a + p1 + p2: - yield "".join(v for v in (la, lp1, lp2) if v) + " " + " ".join(v for v in (a, p1, p2) if v) - for l1, l2, l3, l4, l5 in combos: - for lp1, p1 in l1: - for lp2, p2 in l2: - for lp3, p3 in l3: - for lp4, p4 in l4: - for lp5, p5 in l5: - if p1 + p2 + p3 + p4 + p5: - yield "".join(v for v in (lp1, lp2, lp3, lp4, lp5) if v) + " " + " ".join( - v for v in (p1, p2, p3, p4, p5) if v - ) - - for name in inner(): - if len(name.split()) > 1: - yield name.capitalize().encode("utf-8") - - -def dino_gen(result_len: int, clrs=None): - data = [] - byte_count = 0 - if clrs is None: - clrs = colors - for d in combination_gen(clrs): - d += b"\n" - yield d - data.append(d) - byte_count += len(d) - if byte_count >= result_len: - break - while byte_count < result_len: - for i in sample(range(len(data)), len(data)): - d = data[i] - yield d - data.append(d) - byte_count += len(d) - if byte_count >= result_len: - break - - -def _gen_wrapper(result_len: int, q: Queue, clrs: tuple = None): - if not clrs: - clrs = colors - batch = [] - for n in dino_gen(result_len, clrs): - batch.append(n) - if len(batch) > 1000: - q.put(b"".join(batch)) - batch = [] - if batch: - q.put(b"".join(batch)) - - -def all_gen(chunk_bytes: int, num_chunks: int): - clr_span = (len(colors) + num_chunks - 1) // num_chunks - q = Queue() - procs = [] - for i in range(0, len(colors), clr_span): - procs.append( - mp.Process( - target=_gen_wrapper, - args=(chunk_bytes, q, colors[i : i + clr_span]), - ) - ) - try: - for proc in procs: - proc.start() - with open("big_file.txt", "wb") as f: - strt = perf_counter() - batch = [] - elapsed = perf_counter() - strt - while elapsed < 5: - print(f"\r[{'|'*(round(100*elapsed/5)):<100}]", end="") - try: - v = q.get(True, 0.5) - if v: - batch.append(v) - strt = perf_counter() - except Empty: - pass - if len(batch) > 10: - f.write(b"".join(batch)) - if f.tell() >= (10 * 2**29): - break - batch = [] - elapsed = perf_counter() - strt - if batch: - f.writelines(batch) - finally: - for proc in procs: - try: - proc.join(2) - if proc.exitcode is None: - try: - proc.terminate() - except: - pass - except: - try: - proc.terminate() - except: - pass - try: - proc.close() - except: - pass - - -if __name__ == "__main__": - all_gen(2**29, 10) diff --git a/tests/unittests/excluded_env_config/dummy_content_generation/no_dupes.py b/tests/unittests/excluded_env_config/dummy_content_generation/no_dupes.py deleted file mode 100644 index ed3d88b3..00000000 --- a/tests/unittests/excluded_env_config/dummy_content_generation/no_dupes.py +++ /dev/null @@ -1,14 +0,0 @@ -def kill_dupes(fp: str): - s = set() - with open(fp, "rb") as fin: - fin.seek(0) - with open("no_dupes.txt", "wb") as fout: - for line in fin.readlines(): - line = line.strip() - if line not in s: - s.add(line) - fout.write(line + b"\n") - - -if __name__ == "__main__": - kill_dupes("big_file.txt") diff --git a/tests/unittests/excluded_env_config/dummy_env_vars.json.template b/tests/unittests/excluded_env_config/dummy_env_vars.json.template deleted file mode 100644 index 8da67891..00000000 --- a/tests/unittests/excluded_env_config/dummy_env_vars.json.template +++ /dev/null @@ -1,40 +0,0 @@ -{ - "os.environ": { - "comments": [ - "The `os.environ` key is a dict of environment variables that should be created prior to testing", - "the general structure of this dict should look something like this: env_vars['os.environ']['destination_container']" - ], - "test_destination": { - "PRIMARY_TEST_CONN_STR": "DefaultEndpointsProtocol=https;AccountName=from_the_town_of_bedrock;AccountKey=hAVE+4+Ya8Ado/time+a+DAb4do/TIME+a+/Y4b4/d484/d0+tIMe==;EndpointSuffix=flintstones.meet.the.flintstones.net", - "INTERVALS": ["hourly","daily","weekly","monthly","yearly"], - "PATH_PARTS":["protocol","host","container","interval","media_type","fname_prefix","fname"], - "COMPLETE_REMOTE_PATH_TEMPLATE": "{protocol}://{host_name}/{container_name}/{interval}/{media_type}/{fname_prefix}{fname}" - } - }, - "dummy_vals": { - "comments": [ - "This is where we define container names and the blob paths under those containers for use in testing." - ], - "container_names": [ - "fred-of-buffalo-lodge", - "barney-of-buffalo-lodge", - "wilma-of-impossibly-good-figure", - "betty-of-impossibly-good-figure" - ], - "fname_template": { - "comments": [ - "this dict is used by tests/unittests/excluded_env_config/build_out_dummy_env.py", - "to build a mock environment for testing." - ], - "optional_directory_prefix": "{interval}/mysql", - "format_string": ["{child}{sep}{disposition}{sep}{item_type}.{extension}"], - "template_parts": { - "sep": ".", - "child": "pebbles|bambam", - "disposition": "likes|hates", - "item_type": "dinosaurs|caves|cave_paintings", - "extension": "txt" - } - } - } -} diff --git a/tox.ini b/tox.ini index dc7ba558..c2236722 100644 --- a/tox.ini +++ b/tox.ini @@ -18,9 +18,4 @@ commands = deps=-rrequirements_dev.txt commands= coverage run -m py.test tests/unit - coverage report -; The following lines are candidate's to replace those in commands= (above) so that we can get a cumulative coverage -; report for the existing pytest scripts, as well as the AzureBlob specific unittest scripts. -; coverage run --source=twindb_backup -m py.test tests/unit -; coverage run -a --source=twindb_backup -m unittest discover -cvf --locals --start-directory tests/unittests/destination_tests -; coverage report + coverage report \ No newline at end of file diff --git a/twindb_backup/__init__.py b/twindb_backup/__init__.py index b6efdc0d..ab7bfe4b 100644 --- a/twindb_backup/__init__.py +++ b/twindb_backup/__init__.py @@ -57,9 +57,9 @@ class and saves the backup copy in something defined in a destination class. LOG = logging.getLogger(__name__) LOG.setLevel(GLOBAL_INIT_LOG_LEVEL) -DestTypes = namedtuple("DestinationTypes", "ssh,local,s3,gcs,azure") +DestTypes = namedtuple("DestinationTypes", "ssh,local,s3,gcs,az") QueryTypes = namedtuple("QueryTypes", ["mysql"]) -SUPPORTED_DESTINATION_TYPES = DestTypes("ssh", "local", "s3", "gcs", "azure") +SUPPORTED_DESTINATION_TYPES = DestTypes("ssh", "local", "s3", "gcs", "az") SUPPORTED_QUERY_LANGUAGES = QueryTypes("mysql") From 71840c8a6107911d5e16b9186cf9412789884419 Mon Sep 17 00:00:00 2001 From: Jsalz2000 Date: Fri, 30 Aug 2024 10:58:35 -0500 Subject: [PATCH 06/10] Adds unit tests for Azure blob AZConfig and AZ.__init__ --- .gitignore | 6 ++ tests/unit/destination/az/__init__.py | 0 tests/unit/destination/az/test_config.py | 37 ++++++++ tests/unit/destination/az/test_init.py | 89 +++++++++++++++++++ tests/unit/destination/az/util.py | 40 +++++++++ .../configuration/destinations/az.py | 13 ++- twindb_backup/destination/az.py | 1 + 7 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 tests/unit/destination/az/__init__.py create mode 100644 tests/unit/destination/az/test_config.py create mode 100644 tests/unit/destination/az/test_init.py create mode 100644 tests/unit/destination/az/util.py diff --git a/.gitignore b/.gitignore index a5711735..72ed6e54 100644 --- a/.gitignore +++ b/.gitignore @@ -68,3 +68,9 @@ target/ # asdf .envrc .tool-versions + +# vscode +.vscode/ + +# environment +.env \ No newline at end of file diff --git a/tests/unit/destination/az/__init__.py b/tests/unit/destination/az/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/destination/az/test_config.py b/tests/unit/destination/az/test_config.py new file mode 100644 index 00000000..8fa56c8f --- /dev/null +++ b/tests/unit/destination/az/test_config.py @@ -0,0 +1,37 @@ +import pytest + +from twindb_backup.configuration.destinations.az import AZConfig + +from .util import AZConfigParams + + +def test_initialization_success(): + """Test initialization of AZConfig with all parameters set.""" + p = AZConfigParams() + c = AZConfig(**dict(p)) + assert c.connection_string == p.connection_string + assert c.container_name == p.container_name + assert c.chunk_size == p.chunk_size + assert c.remote_path == p.remote_path + + +def test_initialization_success_defaults(): + """Test initialization of AZConfig with only required parameters set and ensure default values.""" + p = AZConfigParams(only_required=True) + c = AZConfig(**dict(p)) + assert c.connection_string == p.connection_string + assert c.container_name == p.container_name + assert c.chunk_size == 4 * 1024 * 1024 + assert c.remote_path == "/" + + +def test_invalid_params(): + """Test initialization of AZConfig with invalid parameters.""" + with pytest.raises(ValueError): + AZConfig( + connection_string="test_connection_string", container_name="test_container", chunk_size="invalid_chunk_size" + ) + with pytest.raises(ValueError): + AZConfig(connection_string="test_connection_string", container_name="test_container", remote_path=1) + with pytest.raises(TypeError): + AZConfig(connection_string="test_connection_string") diff --git a/tests/unit/destination/az/test_init.py b/tests/unit/destination/az/test_init.py new file mode 100644 index 00000000..fdd2b92e --- /dev/null +++ b/tests/unit/destination/az/test_init.py @@ -0,0 +1,89 @@ +import socket +from unittest.mock import MagicMock, patch + +import pytest +from azure.storage.blob import ContainerClient + +import twindb_backup.destination.az as az + +from .util import AZParams + + +def test_init_param(): + """Test initialization of AZ with all parameters set, mocking the _connect method.""" + with patch("twindb_backup.destination.az.AZ._connect") as mc: + mc.return_value = MagicMock(spec=ContainerClient) + p = AZParams() + c = az.AZ(**dict(p)) + + assert c._container_name == p.container_name + assert c._connection_string == p.connection_string + assert c._hostname == p.hostname + assert c._chunk_size == p.chunk_size + assert c._remote_path == p.remote_path + assert isinstance(c._container_client, ContainerClient) + az.AZ._connect.assert_called_once() + + +def test_init_param_defaults(): + """Test initialization of AZ with only required parameters set, ensuring default values, mocking the _connect method.""" + with patch("twindb_backup.destination.az.AZ._connect") as mc: + mc.return_value = MagicMock(spec=ContainerClient) + p = AZParams(only_required=True) + c = az.AZ(**dict(p)) + + assert c._container_name == p.container_name + assert c._connection_string == p.connection_string + assert c._hostname == socket.gethostname() + assert c._chunk_size == 4 * 1024 * 1024 + assert c._remote_path == "/" + assert isinstance(c._container_client, ContainerClient) + az.AZ._connect.assert_called_once() + + +def test_init_conn_string_valid(): + """Test initialization of AZ with valid connection string.""" + with patch("twindb_backup.destination.az.ContainerClient.exists") as mc: + mc.return_value = True + p = AZParams() + c = az.AZ(**dict(p)) + + az.ContainerClient.exists.assert_called_once() + assert isinstance(c._container_client, ContainerClient) + + +def test_init_conn_string_invalid(): + """Test initialization of AZ with invalid connection string, expecting ValueError.""" + with patch("twindb_backup.destination.az.ContainerClient.exists") as mc: + mc.return_value = True + p = AZParams() + p.connection_string = "invalid_connection_string" + with pytest.raises(ValueError, match="Connection string is either blank or malformed."): + _ = az.AZ(**dict(p)) + + +def test_init_container_not_exists(): + """Test initialization of AZ with container not existing, mocking the create_container method.""" + with patch("twindb_backup.destination.az.ContainerClient.exists") as mc: + mc.return_value = False + with patch("twindb_backup.destination.az.ContainerClient.create_container") as mc_create_container: + mc_create_container.return_value = MagicMock(spec=ContainerClient) + p = AZParams() + c = az.AZ(**dict(p)) + + az.ContainerClient.exists.assert_called_once() + az.ContainerClient.create_container.assert_called_once() + assert isinstance(c._container_client, ContainerClient) + + +def test_init_success(): + """Test initialization of AZ with existing container, mocking the from_connection_string method.""" + with patch("twindb_backup.destination.az.ContainerClient.from_connection_string") as mc: + mc.return_value = MagicMock(spec=ContainerClient) + p = AZParams() + c = az.AZ(**dict(p)) + + az.ContainerClient.from_connection_string.assert_called_once_with(p.connection_string, p.container_name) + mc.return_value.exists.assert_called_once() + mc.return_value.create_container.assert_not_called() + assert isinstance(c._container_client, ContainerClient) diff --git a/tests/unit/destination/az/util.py b/tests/unit/destination/az/util.py new file mode 100644 index 00000000..6db5ef27 --- /dev/null +++ b/tests/unit/destination/az/util.py @@ -0,0 +1,40 @@ +import collections + + +class AZParams(collections.Mapping): + def __init__(self, only_required=False) -> None: + self.container_name = "test_container" + self.connection_string = "DefaultEndpointsProtocol=https;AccountName=ACCOUNT_NAME;AccountKey=ACCOUNT_KEY;EndpointSuffix=core.windows.net" + + if not only_required: + self.hostname = "test_host" + self.chunk_size = 123 + self.remote_path = "/himom" + + def __iter__(self): + return iter(self.__dict__) + + def __len__(self): + return len(self.__dict__) + + def __getitem__(self, key): + return self.__dict__[key] + + +class AZConfigParams(collections.Mapping): + def __init__(self, only_required=False) -> None: + self.connection_string = "test_connection_string" + self.container_name = "test_container" + + if not only_required: + self.chunk_size = 123 + self.remote_path = "/himom" + + def __iter__(self): + return iter(self.__dict__) + + def __len__(self): + return len(self.__dict__) + + def __getitem__(self, key): + return self.__dict__[key] diff --git a/twindb_backup/configuration/destinations/az.py b/twindb_backup/configuration/destinations/az.py index 00cafcca..6d3c03ab 100644 --- a/twindb_backup/configuration/destinations/az.py +++ b/twindb_backup/configuration/destinations/az.py @@ -7,11 +7,22 @@ class AZConfig: def __init__( self, connection_string: str, container_name: str, chunk_size: int = 1024 * 1024 * 4, remote_path: str = "/" ): - self._connection_string = connection_string self._container_name = container_name self._chunk_size = chunk_size self._remote_path = remote_path + self.validate_config() + + def validate_config(self): + """Validate configuration.""" + if not isinstance(self._connection_string, str): + raise ValueError("CONNECTION_STRING must be a string") + if not isinstance(self._container_name, str): + raise ValueError("CONTAINER_NAME must be a string") + if not isinstance(self._chunk_size, int): + raise ValueError("CHUNK_SIZE must be an integer") + if not isinstance(self._remote_path, str): + raise ValueError("REMOTE_PATH must be a string") @property def connection_string(self) -> str: diff --git a/twindb_backup/destination/az.py b/twindb_backup/destination/az.py index afb857cb..105c0cea 100644 --- a/twindb_backup/destination/az.py +++ b/twindb_backup/destination/az.py @@ -36,6 +36,7 @@ def __init__( connection_string (str): Connection string for the Azure storage account hostname (str, optional): Hostname of the host performing the backup. Defaults to socket.gethostname(). chunk_size (int, optional): Size in bytes for read/write streams. Defaults to 4*1024*1024. + remote_path (str, optional): Remote base path in the container to store backups. Defaults to "/". Raises: err: Raises an error if the client cannot be initialized From 1a7370e93d2dee43fb6a3ea14d0a71a925f94275 Mon Sep 17 00:00:00 2001 From: Jsalz2000 Date: Fri, 30 Aug 2024 14:43:55 -0500 Subject: [PATCH 07/10] Adds unit tests for Azure delete, list_files, read, save, write, and render_path --- tests/unit/destination/az/test_delete.py | 23 +++++ tests/unit/destination/az/test_list_files.py | 86 +++++++++++++++++++ tests/unit/destination/az/test_read.py | 45 ++++++++++ tests/unit/destination/az/test_render_path.py | 8 ++ tests/unit/destination/az/test_save.py | 37 ++++++++ tests/unit/destination/az/test_write.py | 27 ++++++ tests/unit/destination/az/util.py | 14 +++ 7 files changed, 240 insertions(+) create mode 100644 tests/unit/destination/az/test_delete.py create mode 100644 tests/unit/destination/az/test_list_files.py create mode 100644 tests/unit/destination/az/test_read.py create mode 100644 tests/unit/destination/az/test_render_path.py create mode 100644 tests/unit/destination/az/test_save.py create mode 100644 tests/unit/destination/az/test_write.py diff --git a/tests/unit/destination/az/test_delete.py b/tests/unit/destination/az/test_delete.py new file mode 100644 index 00000000..357ac84f --- /dev/null +++ b/tests/unit/destination/az/test_delete.py @@ -0,0 +1,23 @@ +import pytest + +import twindb_backup.destination.az as az + +from .util import mocked_az + + +def test_delete_success(): + """Tests AZ.delete method, ensuring the blob is deleted.""" + c = mocked_az() + + c.delete("test") + c._container_client.delete_blob.assert_called_once_with(c.render_path("test")) + + +def test_delete_fail(): + """Tests AZ.delete method, re-raising an exception on failure""" + c = mocked_az() + c._container_client.delete_blob.side_effect = Exception() + + with pytest.raises(Exception): + c.delete("test") + c._container_client.delete_blob.assert_called_once_with(c.render_path("test")) diff --git a/tests/unit/destination/az/test_list_files.py b/tests/unit/destination/az/test_list_files.py new file mode 100644 index 00000000..c89debbf --- /dev/null +++ b/tests/unit/destination/az/test_list_files.py @@ -0,0 +1,86 @@ +import random +import string + +import azure.core.exceptions as ae +import pytest +from azure.storage.blob import BlobProperties + +from .util import mocked_az + +PREFIX = "/backups/mysql" + +BLOBS = [ + BlobProperties(name="blob1", metadata={"hdi_isfolder": "true"}), + BlobProperties(name="blob2", metadata={"hdi_isfolder": "false"}), + BlobProperties(name="blob3"), +] + + +def test_list_files_success(): + """Tests AZ.list_files method, reading a list of blob names from azure.""" + c = mocked_az() + c._container_client.list_blobs.return_value = BLOBS + + blobs = c._list_files() + assert blobs == [b.name for b in BLOBS] + + c._container_client.list_blobs.assert_called_once() + + +def test_list_files_fail(): + """Tests AZ.list_files method, re-raises an exception on failure""" + c = mocked_az() + c._container_client.list_blobs.side_effect = ae.HttpResponseError() + + with pytest.raises(Exception): + c._list_files(PREFIX, False, False) + + c._container_client.list_blobs.assert_called_once_with(name_starts_with=PREFIX, include=["metadata"]) + + +def test_list_files_files_only(): + """Tests AZ.list_files method, listing only file blobs""" + c = mocked_az() + c._container_client.list_blobs.return_value = BLOBS + + blob_names = c._list_files(PREFIX, False, True) + + assert blob_names == ["blob2", "blob3"] + + c._container_client.list_blobs.assert_called_once_with(name_starts_with=PREFIX, include=["metadata"]) + + +def test_list_files_all_files(): + """Tests AZ.list_files method, listing all blobs, including directories""" + c = mocked_az() + c._container_client.list_blobs.return_value = BLOBS + + blob_names = c._list_files(PREFIX, False, False) + + assert blob_names == [b.name for b in BLOBS] + + c._container_client.list_blobs.assert_called_once_with(name_starts_with=PREFIX, include=["metadata"]) + + +def test_list_files_recursive(): + """Tests AZ.list_files method, recursive option is ignored""" + c = mocked_az() + c._container_client.list_blobs.return_value = BLOBS + + blob_names = c._list_files(PREFIX, False, False) + blob_names_recursive = c._list_files(PREFIX, True, False) + + assert blob_names == blob_names_recursive + c._container_client.list_blobs.assert_called_with(name_starts_with=PREFIX, include=["metadata"]) + + +def test_list_files_prefix(): + """Tests AZ.list_files method, prefix is used as a filter in list_blobs only""" + c = mocked_az() + c._container_client.list_blobs.return_value = BLOBS + + # Prefix is used as a filter in list_blobs, and because its mocked - it makes no difference. + blob_names = c._list_files(random.choices(string.ascii_lowercase), False, False) + blob_names_recursive = c._list_files(PREFIX, False, False) + + assert blob_names == blob_names_recursive diff --git a/tests/unit/destination/az/test_read.py b/tests/unit/destination/az/test_read.py new file mode 100644 index 00000000..052cafca --- /dev/null +++ b/tests/unit/destination/az/test_read.py @@ -0,0 +1,45 @@ +from unittest.mock import MagicMock + +import azure.core.exceptions as ae +import pytest +from azure.storage.blob import StorageStreamDownloader + +from twindb_backup.destination.exceptions import FileNotFound + +from .util import mocked_az + +EXAMPLE_FILE = "test/backup.tar.gz" + + +def test_read_success(): + """Tests AZ.read method, ensuring the blob is read from azure.""" + c = mocked_az() + mock = MagicMock(StorageStreamDownloader) + c._container_client.download_blob.return_value = mock + + c.read(EXAMPLE_FILE) + + c._container_client.download_blob.assert_called_once_with(c.render_path(EXAMPLE_FILE), encoding="utf-8") + mock.read.assert_called_once() + + +def test_read_fail(): + """Tests AZ.read method, re-raises an exception on failure""" + c = mocked_az() + c._container_client.download_blob.side_effect = ae.HttpResponseError() + + with pytest.raises(Exception): + c.read(EXAMPLE_FILE) + c._container_client.download_blob.assert_called_once_with(c.render_path(EXAMPLE_FILE), encoding="utf-8") + + +def test_read_fail_not_found(): + """Tests AZ.read method, raising a twindb_backup.destination.exceptions.FileNotFound exception on ResourceNotFoundError""" + c = mocked_az() + c._container_client.download_blob.side_effect = ae.ResourceNotFoundError() + + with pytest.raises( + FileNotFound, match=f"File {c.render_path(EXAMPLE_FILE)} does not exist in container {c._container_name}" + ): + c.read(EXAMPLE_FILE) + c._container_client.download_blob.assert_called_once_with(c.render_path(EXAMPLE_FILE), encoding="utf-8") diff --git a/tests/unit/destination/az/test_render_path.py b/tests/unit/destination/az/test_render_path.py new file mode 100644 index 00000000..6873057f --- /dev/null +++ b/tests/unit/destination/az/test_render_path.py @@ -0,0 +1,8 @@ +from .util import mocked_az + + +def test_render_path(): + """Test render_path method, ensuring the remote path is prepended to the path.""" + c = mocked_az() + + assert c.render_path("test") == f"{c.remote_path}/test" diff --git a/tests/unit/destination/az/test_save.py b/tests/unit/destination/az/test_save.py new file mode 100644 index 00000000..0cafd271 --- /dev/null +++ b/tests/unit/destination/az/test_save.py @@ -0,0 +1,37 @@ +from typing import BinaryIO +from unittest.mock import MagicMock + +import azure.core.exceptions as ae +import pytest + +from .util import mocked_az + +EXAMPLE_FILE = "test/backup.tar.gz" + + +def test_save_success(): + """Tests AZ.save method, ensuring the blob is saved to azure.""" + c = mocked_az() + handler = MagicMock(BinaryIO) + file_obj = MagicMock() + handler.__enter__.return_value = file_obj + handler.__exit__.return_value = None + + c.save(handler, EXAMPLE_FILE) + + c._container_client.upload_blob.assert_called_once_with(c.render_path(EXAMPLE_FILE), file_obj) + + +def test_save_fail(): + """Tests AZ.save method, re-raises an exception on failure""" + c = mocked_az() + handler = MagicMock(BinaryIO) + file_obj = MagicMock() + handler.__enter__.return_value = file_obj + handler.__exit__.return_value = None + c._container_client.upload_blob.side_effect = ae.HttpResponseError() + + with pytest.raises(Exception): + c.save(handler, EXAMPLE_FILE) + + c._container_client.upload_blob.assert_called_once_with(c.render_path(EXAMPLE_FILE), file_obj) diff --git a/tests/unit/destination/az/test_write.py b/tests/unit/destination/az/test_write.py new file mode 100644 index 00000000..99303939 --- /dev/null +++ b/tests/unit/destination/az/test_write.py @@ -0,0 +1,27 @@ +import azure.core.exceptions as ae +import pytest + +from .util import mocked_az + +EXAMPLE_FILE = "test/backup.tar.gz" +CONTENT = b"test content" + + +def test_write_success(): + """Tests AZ.write method, ensuring the blob is written to azure.""" + c = mocked_az() + + c.write(CONTENT, EXAMPLE_FILE) + + c._container_client.upload_blob.assert_called_once_with(c.render_path(EXAMPLE_FILE), CONTENT, overwrite=True) + + +def test_write_fail(): + """Tests AZ.write method, re-raises an exception on failure""" + c = mocked_az() + c._container_client.upload_blob.side_effect = ae.HttpResponseError() + + with pytest.raises(Exception): + c.write(CONTENT, EXAMPLE_FILE) + + c._container_client.upload_blob.assert_called_once_with(c.render_path(EXAMPLE_FILE), CONTENT, overwrite=True) diff --git a/tests/unit/destination/az/util.py b/tests/unit/destination/az/util.py index 6db5ef27..8b221f9f 100644 --- a/tests/unit/destination/az/util.py +++ b/tests/unit/destination/az/util.py @@ -1,4 +1,9 @@ import collections +from unittest.mock import MagicMock, patch + +from azure.storage.blob import ContainerClient + +import twindb_backup.destination.az as az class AZParams(collections.Mapping): @@ -38,3 +43,12 @@ def __len__(self): def __getitem__(self, key): return self.__dict__[key] + + +def mocked_az(): + with patch("twindb_backup.destination.az.AZ._connect") as mc: + mc.return_value = MagicMock(spec=ContainerClient) + p = AZParams() + c = az.AZ(**dict(p)) + + return c From 91c600241df0c8bd84dc6a3827d7e4a2c587860d Mon Sep 17 00:00:00 2001 From: Jsalz2000 Date: Fri, 30 Aug 2024 14:55:48 -0500 Subject: [PATCH 08/10] Adds missing test case in Azure test_init, when init fails to create container --- tests/unit/destination/az/test_init.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/unit/destination/az/test_init.py b/tests/unit/destination/az/test_init.py index fdd2b92e..9361533c 100644 --- a/tests/unit/destination/az/test_init.py +++ b/tests/unit/destination/az/test_init.py @@ -1,6 +1,7 @@ import socket from unittest.mock import MagicMock, patch +import azure.core.exceptions as ae import pytest from azure.storage.blob import ContainerClient @@ -76,6 +77,22 @@ def test_init_container_not_exists(): assert isinstance(c._container_client, ContainerClient) +def test_init_container_create_fails(): + """Test initialization of AZ with container not existing, fails to create container, re-raising error.""" + with patch("twindb_backup.destination.az.ContainerClient.exists") as mc: + mc.return_value = False + with patch("twindb_backup.destination.az.ContainerClient.create_container") as mc_create_container: + mc_create_container.side_effect = ae.HttpResponseError() + + p = AZParams() + with pytest.raises(Exception): + c = az.AZ(**dict(p)) + + az.ContainerClient.exists.assert_called_once() + az.ContainerClient.create_container.assert_called_once() + assert isinstance(c._container_client, ContainerClient) + + def test_init_success(): """Test initialization of AZ with existing container, mocking the from_connection_string method.""" with patch("twindb_backup.destination.az.ContainerClient.from_connection_string") as mc: From 07e6bcc4583bb9c9ffed4c57f9b2e5e7053bc31d Mon Sep 17 00:00:00 2001 From: Jsalz2000 Date: Sat, 31 Aug 2024 13:59:57 -0500 Subject: [PATCH 09/10] Adds tests for Azure get_stream, seperates download to pipe into its own function --- .../destination/az/test_download_to_pipe.py | 40 +++++++++++++ tests/unit/destination/az/test_get_stream.py | 58 +++++++++++++++++++ twindb_backup/destination/az.py | 26 +++++++-- 3 files changed, 118 insertions(+), 6 deletions(-) create mode 100644 tests/unit/destination/az/test_download_to_pipe.py create mode 100644 tests/unit/destination/az/test_get_stream.py diff --git a/tests/unit/destination/az/test_download_to_pipe.py b/tests/unit/destination/az/test_download_to_pipe.py new file mode 100644 index 00000000..837da0c4 --- /dev/null +++ b/tests/unit/destination/az/test_download_to_pipe.py @@ -0,0 +1,40 @@ +from unittest.mock import MagicMock, patch + +import azure.core.exceptions as ae +import pytest + +from .util import mocked_az + + +def test_download_to_pipe_success(): + """Tests AZ.download_to_pipe method, mocks calls for os and ContainerClient""" + with patch("twindb_backup.destination.az.os") as mc_os: + mc_fdopen = MagicMock() + mc_os.fdopen.return_value = mc_fdopen + + c = mocked_az() + + mc_dbr = MagicMock() + c._container_client.download_blob.return_value = mc_dbr + + c._download_to_pipe(c.render_path("foo-key"), 100, 200) + + mc_os.close.assert_called_once_with(100) + mc_os.fdopen.assert_called_once_with(200, "wb") + c._container_client.download_blob.assert_called_once_with(c.render_path("foo-key")) + mc_dbr.readinto.assert_called_once_with(mc_fdopen.__enter__()) + + +def test_download_to_pipe_fail(): + """Tests AZ.download_to_pipe method, re-raises exception when download fails in child process""" + with patch("twindb_backup.destination.az.os") as mc_os: + c = mocked_az() + + c._container_client.download_blob.side_effect = ae.HttpResponseError() + + with pytest.raises(Exception): + c._download_to_pipe(c.render_path("foo-key"), 100, 200) + + mc_os.close.assert_called_once_with(100) + mc_os.fdopen.assert_called_once_with(200, "wb") + c._container_client.download_blob.assert_called_once_with(c.render_path("foo-key")) diff --git a/tests/unit/destination/az/test_get_stream.py b/tests/unit/destination/az/test_get_stream.py new file mode 100644 index 00000000..68c444dc --- /dev/null +++ b/tests/unit/destination/az/test_get_stream.py @@ -0,0 +1,58 @@ +from multiprocessing import Process +from unittest.mock import MagicMock, patch + +import mock +import pytest + +import twindb_backup.destination.az as az + +from .util import mocked_az + + +def test_get_stream_success(): + """Tests AZ.get_stream method, mocks calls for process and os""" + with patch("twindb_backup.destination.az.os") as mc_os: + with patch("twindb_backup.destination.az.Process") as mc_process: + mc = MagicMock(spec=Process) + mc_process.return_value = mc + mc.exitcode = 0 + + mc_os.pipe.return_value = (100, 200) + c = mocked_az() + + mock_copy = mock.Mock() + mock_copy.key = "foo-key" + + with c.get_stream(mock_copy): + pass + + az.Process.assert_called_once_with(target=c._download_to_pipe, args=(c.render_path("foo-key"), 100, 200)) + mc_os.close.assert_called_once_with(200) + mc_os.fdopen.assert_called_once_with(100, "rb") + mc.start.assert_called_once() + mc.join.assert_called_once() + + +def test_get_stream_failure(): + """Tests AZ.get_stream method, raises an exception when child process fails""" + with patch("twindb_backup.destination.az.os") as mc_os: + with patch("twindb_backup.destination.az.Process") as mc_process: + mc = MagicMock(spec=Process) + mc_process.return_value = mc + mc.exitcode = 1 + + mc_os.pipe.return_value = (100, 200) + c = mocked_az() + + mock_copy = mock.Mock() + mock_copy.key = "foo-key" + + with pytest.raises(Exception): + with c.get_stream(mock_copy): + pass + + az.Process.assert_called_once_with(target=c._download_to_pipe, args=(c.render_path("foo-key"), 100, 200)) + mc_os.close.assert_called_once_with(200) + mc_os.fdopen.assert_called_once_with(100, "rb") + mc.start.assert_called_once() + mc.join.assert_called_once() diff --git a/twindb_backup/destination/az.py b/twindb_backup/destination/az.py index 105c0cea..52f08a27 100644 --- a/twindb_backup/destination/az.py +++ b/twindb_backup/destination/az.py @@ -92,6 +92,22 @@ def render_path(self, path: str) -> str: """ return f"{self._remote_path}/{path}" + def _download_to_pipe(self, blob_key: str, pipe_in: int, pipe_out: int) -> None: + """Downloads a blob from Azure Blob Storage and writes it to a pipe + + Args: + blob_key (str): The path to the blob in the container + pipe_in (int): The pipe to read the blob content from, closed in child process. + pipe_out (int): The pipe to write the blob content to, closed in parent process. + """ + os.close(pipe_in) + with os.fdopen(pipe_out, "wb") as pipe_out_file: + try: + self._container_client.download_blob(blob_key).readinto(pipe_out_file) + except builtins.Exception as err: + LOG.error(f"Failed to download blob {blob_key}. Error: {type(err).__name__}, Reason: {err}") + raise err + """BaseDestination ABSTRACT METHODS IMPLEMENTATION """ @@ -125,12 +141,7 @@ def get_stream(self, copy): LOG.debug(f"Attempting to stream blob: {self.render_path(copy.key)}") pipe_in, pipe_out = os.pipe() - def _download_to_pipe(blob_key: str, pipe_in: int, pipe_out: int) -> None: - os.close(pipe_in) - with os.fdopen(pipe_out, "wb") as pipe_out_file: - self._container_client.download_blob(blob_key).readinto(pipe_out_file) - - proc = Process(target=_download_to_pipe, args=(self.render_path(copy.key), pipe_in, pipe_out)) + proc = Process(target=self._download_to_pipe, args=(self.render_path(copy.key), pipe_in, pipe_out)) proc.start() os.close(pipe_out) try: @@ -138,6 +149,9 @@ def _download_to_pipe(blob_key: str, pipe_in: int, pipe_out: int) -> None: yield pipe_in_file finally: proc.join() + if proc.exitcode != 0: + LOG.error(f"Failed to stream blob {self.render_path(copy.key)}") + raise builtins.Exception(f"Failed to stream blob {self.render_path(copy.key)}") def read(self, filepath: str) -> bytes: """Read content of a file path from Azure Blob Storage From 9ac17c8f79caecb15f56350cc8e1f79dff179777 Mon Sep 17 00:00:00 2001 From: Jsalz2000 Date: Tue, 3 Sep 2024 08:34:22 -0500 Subject: [PATCH 10/10] Corrects code style issues, docstrings, type hints --- docs/usage.rst | 2 +- tests/unit/destination/az/test_list_files.py | 2 +- twindb_backup/destination/az.py | 18 +++++++++++------- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index 4fdd6fa9..f10b0aa0 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -47,7 +47,7 @@ Backup Destination The ``[destination]`` section specifies where to store backup copies. ``backup_destination`` can be either ``ssh`` (if you want to store backups on a remote SSH server), -``s3`` (if you want to store backups in Amazon S3), ``az`` (if the backup should be stored in Azure Blob Storage), or ``gsc`` (if the backup should be stored in Google Cloud). +``s3`` (if you want to store backups in Amazon S3), ``az`` (if the backup should be stored in Azure Blob Storage), or ``gcs`` (if the backup should be stored in Google Cloud). In the optional ``keep_local_path`` you can specify a local path where the tool will store a local copy of the backup. It's useful if you want to stream a MySQL backup to S3 and would like to keep a local copy as well. diff --git a/tests/unit/destination/az/test_list_files.py b/tests/unit/destination/az/test_list_files.py index c89debbf..1e69322a 100644 --- a/tests/unit/destination/az/test_list_files.py +++ b/tests/unit/destination/az/test_list_files.py @@ -80,7 +80,7 @@ def test_list_files_prefix(): c._container_client.list_blobs.return_value = BLOBS # Prefix is used as a filter in list_blobs, and because its mocked - it makes no difference. - blob_names = c._list_files(random.choices(string.ascii_lowercase), False, False) + blob_names = c._list_files("".join(random.SystemRandom().choices(string.ascii_lowercase, k=10)), False, False) blob_names_recursive = c._list_files(PREFIX, False, False) assert blob_names == blob_names_recursive diff --git a/twindb_backup/destination/az.py b/twindb_backup/destination/az.py index 52f08a27..e6ba0a59 100644 --- a/twindb_backup/destination/az.py +++ b/twindb_backup/destination/az.py @@ -5,7 +5,7 @@ import builtins import os import socket -import typing as T +import typing as t from contextlib import contextmanager from multiprocessing import Process @@ -13,6 +13,7 @@ from azure.storage.blob import ContainerClient from twindb_backup import LOG +from twindb_backup.copy.base_copy import BaseCopy from twindb_backup.destination.base_destination import BaseDestination from twindb_backup.destination.exceptions import FileNotFound @@ -87,6 +88,9 @@ def _connect(self) -> ContainerClient: def render_path(self, path: str) -> str: """Renders the absolute path for the Azure Blob Storage Destination + Args: + path (str): Relative path to the blob in the container + Returns: str: Absolute path to the blob in the container """ @@ -128,14 +132,14 @@ def delete(self, path: str) -> None: raise err @contextmanager - def get_stream(self, copy): + def get_stream(self, copy: BaseCopy) -> t.Generator[t.BinaryIO, None, None]: """Streams a blob from Azure Blob Storage into a pipe Args: copy (BaseCopy): A copy object to stream from Azure Yields: - T.Generator(T.BinaryIO): A generator that yields a stream of the blob's content + t.Generator(t.BinaryIO): A generator that yields a stream of the blob's content """ LOG.debug(f"Attempting to stream blob: {self.render_path(copy.key)}") @@ -168,18 +172,18 @@ def read(self, filepath: str) -> bytes: LOG.debug(f"Attempting to read blob: {self.render_path(filepath)}") try: return self._container_client.download_blob(self.render_path(filepath), encoding="utf-8").read() - except ae.ResourceNotFoundError as err: + except ae.ResourceNotFoundError: LOG.debug(f"File {self.render_path(filepath)} does not exist in container {self._container_name}") raise FileNotFound(f"File {self.render_path(filepath)} does not exist in container {self._container_name}") except builtins.Exception as err: LOG.error(f"Failed to read blob {self.render_path(filepath)}. Error: {type(err).__name__}, Reason: {err}") raise err - def save(self, handler: T.BinaryIO, filepath: str) -> None: + def save(self, handler: t.BinaryIO, filepath: str) -> None: """Save a stream given as handler to filepath in Azure Blob Storage Args: - handler (T.BinaryIO): Incoming stream + handler (t.BinaryIO): Incoming stream filepath (str): Relative path to a blob in the container Raises: @@ -212,7 +216,7 @@ def write(self, content: str, filepath: str) -> None: LOG.error(f"Failed to upload or overwrite blob. Error {type(err).__name__}, Reason: {err}") raise err - def _list_files(self, prefix: str = "", recursive: bool = False, files_only: bool = False) -> T.List[str]: + def _list_files(self, prefix: str = "", recursive: bool = False, files_only: bool = False) -> t.List[str]: """List files in the Azure Blob Storage container Args: