|
| 1 | +import contextlib |
| 2 | +import logging |
| 3 | +import pytest |
| 4 | +import re |
| 5 | +import exasol.bucketfs as bfs # type: ignore |
| 6 | +from pyexasol import ExaConnection |
| 7 | + |
| 8 | +from typing import Any, Dict, List |
| 9 | +from unittest.mock import Mock, call, patch |
| 10 | +from datetime import timedelta |
| 11 | + |
| 12 | +from exasol.python_extension_common.deployment.extract_validator import ( |
| 13 | + ExtractValidator, |
| 14 | + ExtractException, |
| 15 | + _udf_name, |
| 16 | +) |
| 17 | +from tenacity import RetryError |
| 18 | + |
| 19 | +LOG = logging.getLogger(__name__) |
| 20 | + |
| 21 | + |
| 22 | +def bucket_path(path: str): |
| 23 | + bucket_api = bfs.MountedBucket("svc", "bkt") |
| 24 | + return bfs.path.BucketPath(path, bucket_api=bucket_api) |
| 25 | + |
| 26 | + |
| 27 | +@pytest.fixture |
| 28 | +def archive_bucket_path(): |
| 29 | + return bucket_path("/folder/a.tgz") |
| 30 | + |
| 31 | + |
| 32 | +class ConnectionMock: |
| 33 | + def __init__(self, spec: Dict[str, Any]): |
| 34 | + self.spec = spec |
| 35 | + self.values = iter(()) |
| 36 | + |
| 37 | + def _get_values(self, first_line: str): |
| 38 | + for regex, values in self.spec.items(): |
| 39 | + if re.match(regex, first_line, re.IGNORECASE): |
| 40 | + return values() if callable(values) else values |
| 41 | + LOG.warning(f"ConnectionMock.execute() called with '{first_line[:40]}...'") |
| 42 | + return () |
| 43 | + |
| 44 | + def execute(self, *args, **kwargs): |
| 45 | + statement = args[0] if len(args) else kwargs["query"] |
| 46 | + first_line = statement.strip().splitlines()[0] |
| 47 | + self.values = iter(self._get_values(first_line)) |
| 48 | + return self |
| 49 | + |
| 50 | + def fetchone(self): |
| 51 | + return next(self.values) |
| 52 | + |
| 53 | + def fetchall(self): |
| 54 | + return [ v for v in self.values ] |
| 55 | + |
| 56 | + |
| 57 | +class Simulator: |
| 58 | + def __init__(self, nodes: int, udf_results: List[List[any]], |
| 59 | + create_script=()): |
| 60 | + self.create_script = create_script |
| 61 | + self.nodes = nodes |
| 62 | + self.udf = Mock(side_effect=udf_results) |
| 63 | + self.callback = Mock(side_effect = self._callback) |
| 64 | + |
| 65 | + def _callback(self, n, pending): |
| 66 | + LOG.debug(f"{len(pending)} of {n} nodes pending: {pending}") |
| 67 | + |
| 68 | + @property |
| 69 | + def testee(self): |
| 70 | + connection = ConnectionMock({ |
| 71 | + r"CREATE .* SCRIPT": self.create_script, |
| 72 | + r"(CREATE|DROP) ": (), |
| 73 | + r"SELECT nproc\(\)": [ self.nodes ], |
| 74 | + r'SELECT .*_manifest_': self.udf, |
| 75 | + }) |
| 76 | + return ExtractValidator( |
| 77 | + pyexasol_connection=Mock(execute=connection.execute), |
| 78 | + timeout=timedelta(seconds=10), |
| 79 | + interval=timedelta(seconds=1), |
| 80 | + callback=self.callback, |
| 81 | + ) |
| 82 | + |
| 83 | + |
| 84 | +@contextlib.contextmanager |
| 85 | +def mock_tenacity_wait(*wait_lists: List[int|float], max: int = 1000): |
| 86 | + """ |
| 87 | + This context mocks internals of library ``tenacity`` in order to |
| 88 | + simulate waiting for timeouts in ``tenacity.Retrying()``. All specified |
| 89 | + durations are interpreted as number of seconds which can be floats. |
| 90 | +
|
| 91 | + A test case may provide multiple lists of waiting periods to cover |
| 92 | + multiple consecutive retry phases in the class under test, see |
| 93 | + ``ExtractValidator`` for example. |
| 94 | +
|
| 95 | + mock_tenacity_wait([1, 2], [3, 4], max=100) |
| 96 | +
|
| 97 | + After all wait lists are exhausted, i.e. the mock simulated waiting for |
| 98 | + the specified periods, the mock will constantly simulate |
| 99 | + ``time.monotonic()`` to return the specified max time, typically making |
| 100 | + tenacity detect a timeout. |
| 101 | +
|
| 102 | + Internally the mock needs to prefix each list of waiting periods with two |
| 103 | + additional entries [0, 0] which are used by ``tenacity.Retrying()`` to |
| 104 | + inititialize its start times in ``BaseRetrying.begin()`` and |
| 105 | + ``RetryCallState.__init__()``, see |
| 106 | + https://github.com/jd/tenacity/blob/main/tenacity/__init__.py. |
| 107 | + """ |
| 108 | + def expand(wait_lists): |
| 109 | + for waits in wait_lists: |
| 110 | + yield from [ 0, 0 ] + waits |
| 111 | + |
| 112 | + durations = expand(wait_lists) |
| 113 | + def mock(): |
| 114 | + try: |
| 115 | + return next(durations) |
| 116 | + except StopIteration: |
| 117 | + return max |
| 118 | + |
| 119 | + with patch("tenacity.time.sleep"): |
| 120 | + with patch("tenacity.time.monotonic", side_effect=mock): |
| 121 | + yield |
| 122 | + |
| 123 | + |
| 124 | +@pytest.mark.parametrize( |
| 125 | + "schema, expected", |
| 126 | + [ |
| 127 | + (None, r'"alias_manifest_[0-9]+"'), |
| 128 | + ("schema", r'"schema"\."alias_manifest_[0-9]+"'), |
| 129 | + ]) |
| 130 | +def test_udf_name(schema, expected): |
| 131 | + assert re.match(expected, _udf_name(schema, "alias")) |
| 132 | + |
| 133 | + |
| 134 | +def test_create_script_failure(archive_bucket_path): |
| 135 | + create_script = Mock(side_effect=Exception("failed to create UDF script")) |
| 136 | + sim = Simulator(nodes=4, udf_results=[], create_script=create_script) |
| 137 | + with pytest.raises(Exception, match="failed to create UDF script") as ex: |
| 138 | + with mock_tenacity_wait([1]): |
| 139 | + sim.testee.verify_all_nodes("alias", "schema", archive_bucket_path) |
| 140 | + |
| 141 | + |
| 142 | +def test_failure(archive_bucket_path): |
| 143 | + sim = Simulator( |
| 144 | + nodes=4, |
| 145 | + udf_results=[ |
| 146 | + [[1, False]], |
| 147 | + [[1, False]], |
| 148 | + [[1, False]], |
| 149 | + ]) |
| 150 | + with pytest.raises(ExtractException) as ex: |
| 151 | + with mock_tenacity_wait([1], [2, 4]): |
| 152 | + sim.testee.verify_all_nodes("alias", "schema", archive_bucket_path) |
| 153 | + assert "1 of 4 nodes are still pending. IDs: [1]" == str(ex.value) |
| 154 | + |
| 155 | + |
| 156 | +def test_success(archive_bucket_path): |
| 157 | + sim = Simulator( |
| 158 | + nodes=4, |
| 159 | + udf_results=[ |
| 160 | + [[1, False], [2, False]], |
| 161 | + [[1, True ], [2, False]], |
| 162 | + [[1, True ], [2, True ]], |
| 163 | + ]) |
| 164 | + with mock_tenacity_wait([1], [2, 4]): |
| 165 | + sim.testee.verify_all_nodes("alias", "schema", archive_bucket_path) |
| 166 | + assert sim.callback.call_args_list == [ |
| 167 | + call(4, [1, 2]), |
| 168 | + call(4, [2]), |
| 169 | + call(4, []), |
| 170 | + ] |
| 171 | + |
| 172 | + |
| 173 | +def test_reduced_timeout(archive_bucket_path): |
| 174 | + """ |
| 175 | + This test simulates a retry being required for creating the UDF |
| 176 | + script, hence already eating up part of the total timeout. |
| 177 | +
|
| 178 | + The test then verifies the remaining part of the total timeout for actual |
| 179 | + calls to the UDF being too short for successfully detecting the manifest |
| 180 | + on all nodes. |
| 181 | + """ |
| 182 | + create_script = Mock(side_effect=[Exception("failure"), ()]) |
| 183 | + udf_results=[ |
| 184 | + [[1, False], [2, False]], |
| 185 | + [[1, True ], [2, False]], |
| 186 | + [[1, True ], [2, True ]], |
| 187 | + ] |
| 188 | + sim = Simulator( |
| 189 | + nodes=4, |
| 190 | + udf_results=udf_results, |
| 191 | + create_script=create_script, |
| 192 | + ) |
| 193 | + with pytest.raises(ExtractException) as ex: |
| 194 | + with mock_tenacity_wait([1], [2, 4]): |
| 195 | + sim.testee.verify_all_nodes("alias", "schema", archive_bucket_path) |
| 196 | + assert "1 of 4 nodes are still pending. IDs: [2]" == str(ex.value) |
0 commit comments