From d4d9a6715cc571387474e96667538b56952177ac Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Mon, 25 Sep 2023 13:48:01 +0200 Subject: [PATCH] Issue #74 prime_caches: log errors and try to keep running --- .../background/prime_caches.py | 34 +++++++++++-------- tests/background/test_prime_caches.py | 23 ++++++++++++- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/src/openeo_aggregator/background/prime_caches.py b/src/openeo_aggregator/background/prime_caches.py index 9b70b056..d2a269c4 100644 --- a/src/openeo_aggregator/background/prime_caches.py +++ b/src/openeo_aggregator/background/prime_caches.py @@ -4,7 +4,7 @@ from kazoo.client import KazooClient from openeo.util import TimingLogger -from openeo_driver.util.logging import setup_logging +from openeo_driver.util.logging import just_log_exceptions, setup_logging from openeo_aggregator.app import get_aggregator_logging_config from openeo_aggregator.backend import AggregatorBackendImplementation @@ -54,21 +54,27 @@ def prime_caches(config: Union[str, Path, AggregatorConfig, None] = None): backend_implementation = AggregatorBackendImplementation(backends=backends, config=config) with TimingLogger(title="General capabilities", logger=_log): - backends.get_api_versions() - backend_implementation.file_formats() - backend_implementation.secondary_services.service_types() - - with TimingLogger(title="Get full collection listing", logger=_log): - collections_metadata = backend_implementation.catalog.get_all_metadata() - - with TimingLogger(title="Get per collection metadata", logger=_log): - collection_ids = [m["id"] for m in collections_metadata] - for c, collection_id in enumerate(collection_ids): - _log.info(f"get collection {c+1}/{len(collection_ids)} {collection_id}") - backend_implementation.catalog.get_collection_metadata(collection_id=collection_id) + with just_log_exceptions(log=_log): + backends.get_api_versions() + with just_log_exceptions(log=_log): + backend_implementation.file_formats() + with just_log_exceptions(log=_log): + backend_implementation.secondary_services.service_types() + + with just_log_exceptions(log=_log): + with TimingLogger(title="Get full collection listing", logger=_log): + collections_metadata = backend_implementation.catalog.get_all_metadata() + + with TimingLogger(title="Get per collection metadata", logger=_log): + collection_ids = [m["id"] for m in collections_metadata] + for c, collection_id in enumerate(collection_ids): + _log.info(f"get collection {c+1}/{len(collection_ids)} {collection_id}") + with just_log_exceptions(log=_log): + backend_implementation.catalog.get_collection_metadata(collection_id=collection_id) with TimingLogger(title="Get merged processes", logger=_log): - backend_implementation.processing.get_merged_process_metadata() + with just_log_exceptions(log=_log): + backend_implementation.processing.get_merged_process_metadata() _log.info(f"Zookeeper stats: {kazoo_stats}") diff --git a/tests/background/test_prime_caches.py b/tests/background/test_prime_caches.py index 2ac679d5..dc945d8a 100644 --- a/tests/background/test_prime_caches.py +++ b/tests/background/test_prime_caches.py @@ -1,4 +1,4 @@ -from openeo_aggregator.background.prime_caches import AttrStatsProxy +from openeo_aggregator.background.prime_caches import AttrStatsProxy, prime_caches class TestAttrStatsProxy: @@ -16,3 +16,24 @@ def meh(self, x): assert foo.meh(6) == 12 assert foo.stats == {"bar": 1} + + +def test_prime_caches_basic(config, backend1, backend2, requests_mock, mbldr, caplog): + """Just check that bare basics of `prime_caches` work.""" + # TODO: check that (zookeeper) caches are actually updated/written. + just_geotiff = { + "input": {"GTiff": {"gis_data_types": ["raster"], "parameters": {}, "title": "GeoTiff"}}, + "output": {"GTiff": {"gis_data_types": ["raster"], "parameters": {}, "title": "GeoTiff"}}, + } + mocks = [ + requests_mock.get(backend1 + "/file_formats", json=just_geotiff), + requests_mock.get(backend2 + "/file_formats", json=just_geotiff), + requests_mock.get(backend1 + "/collections", json=mbldr.collections("S2")), + requests_mock.get(backend1 + "/collections/S2", json=mbldr.collection("S2")), + requests_mock.get(backend2 + "/collections", json=mbldr.collections("S2")), + requests_mock.get(backend2 + "/collections/S2", json=mbldr.collection("S2")), + ] + + prime_caches(config=config) + + assert all([m.call_count == 1 for m in mocks])