From 9fe5b8e66b5f5cd5b6762a5ada0b16b25c3e617c Mon Sep 17 00:00:00 2001 From: Adrien Perrin Date: Mon, 21 Oct 2024 09:31:33 +0000 Subject: [PATCH 1/2] auto-discover provider classes in core code and plugins --- geospaas_harvesting/__init__.py | 11 +++ geospaas_harvesting/config.py | 76 ++++++++++--------- geospaas_harvesting/providers/aviso.py | 3 + geospaas_harvesting/providers/ceda.py | 2 + geospaas_harvesting/providers/cmems.py | 2 + .../providers/copernicus_scihub.py | 3 + .../providers/earthdata_cmr.py | 3 + geospaas_harvesting/providers/erddap.py | 3 + geospaas_harvesting/providers/ftp.py | 2 + geospaas_harvesting/providers/http.py | 2 + geospaas_harvesting/providers/jaxa.py | 2 + geospaas_harvesting/providers/local.py | 4 + geospaas_harvesting/providers/metno.py | 3 + geospaas_harvesting/providers/noaa.py | 2 + geospaas_harvesting/providers/podaac.py | 3 + geospaas_harvesting/providers/resto.py | 2 + 16 files changed, 88 insertions(+), 35 deletions(-) diff --git a/geospaas_harvesting/__init__.py b/geospaas_harvesting/__init__.py index 1a0d64c7..8edd5944 100644 --- a/geospaas_harvesting/__init__.py +++ b/geospaas_harvesting/__init__.py @@ -1,9 +1,12 @@ """This module provides means to gather metadata about various datasets into the GeoSPaaS catalog """ + +import importlib import logging.config import os import os.path +import pkgutil import sys import yaml @@ -20,3 +23,11 @@ if logging_configuration: logging.config.dictConfig(logging_configuration) logging.captureWarnings(True) + +# import plugins +discovered_plugins = { + name: importlib.import_module(name) + for finder, name, ispkg + in pkgutil.iter_modules() + if name.startswith('geospaas_harvesting_') +} diff --git a/geospaas_harvesting/config.py b/geospaas_harvesting/config.py index e3d45aad..f7743d72 100644 --- a/geospaas_harvesting/config.py +++ b/geospaas_harvesting/config.py @@ -1,28 +1,37 @@ """Configuration management""" +import importlib import logging +import pkgutil -import geospaas_harvesting.providers.aviso as providers_aviso -import geospaas_harvesting.providers.base as providers_base -import geospaas_harvesting.providers.ceda as providers_ceda -import geospaas_harvesting.providers.cmems as providers_cmems -import geospaas_harvesting.providers.copernicus_scihub as providers_copernicus_scihub -import geospaas_harvesting.providers.earthdata_cmr as providers_earthdata_cmr -import geospaas_harvesting.providers.erddap as providers_erddap -import geospaas_harvesting.providers.ftp as providers_ftp -import geospaas_harvesting.providers.http as providers_http -import geospaas_harvesting.providers.jaxa as providers_jaxa -import geospaas_harvesting.providers.local as providers_local -import geospaas_harvesting.providers.metno as providers_metno -import geospaas_harvesting.providers.noaa as providers_noaa -import geospaas_harvesting.providers.podaac as providers_podaac -import geospaas_harvesting.providers.resto as providers_resto +import geospaas_harvesting from .arguments import ArgumentParser, BooleanArgument, DictArgument, ListArgument +from .providers.base import Provider from .utils import read_yaml_file +def import_provider_modules(): + """Import provider classes from core modules and plugins""" + imported = [] + for base_module in [geospaas_harvesting, *geospaas_harvesting.discovered_plugins.values()]: + for _, name, ispkg in pkgutil.iter_modules(base_module.__path__): + if name == 'providers': + providers = importlib.import_module(f"{base_module.__name__}.{name}") + imported.append(providers) + if ispkg: + for _, provider_name, _ in pkgutil.iter_modules(providers.__path__): + imported.append( + importlib.import_module(f"{providers.__name__}.{provider_name}")) + return imported + + +import_provider_modules() logger = logging.getLogger(__name__) +class NoProviderFoundError(Exception): + """No provider class was found""" + + class Configuration(): """Base class for configuration objects""" @@ -57,23 +66,19 @@ class ProvidersArgument(DictArgument): 'password': 'pass123' } """ - provider_types = { - 'aviso': providers_aviso.AVISOProvider, - 'ceda': providers_ceda.CEDAProvider, - 'cmems': providers_cmems.CMEMSProvider, - 'copernicus_scihub': providers_copernicus_scihub.CopernicusScihubProvider, - 'earthdata_cmr': providers_earthdata_cmr.EarthDataCMRProvider, - 'ftp': providers_ftp.FTPProvider, - 'gportal_ftp': providers_jaxa.GPortalProvider, - 'http': providers_http.HTTPProvider, - 'metno': providers_metno.METNOProvider, - 'nansat': providers_local.NansatProvider, - 'netcdf': providers_local.NetCDFProvider, - 'noaa': providers_noaa.NOAAProvider, - 'podaac': providers_podaac.PODAACProvider, - 'resto': providers_resto.RestoProvider, - 'tabledap': providers_erddap.ERDDAPTableProvider, - } + provider_classes = Provider.__subclasses__() + + def __init__(self, name, **kwargs): + super().__init__(name, **kwargs) + + def _find_provider(self, provider_type): + """Try to find a provider matching the `provider_type` in the + Provider subclasses + """ + for provider_class in self.provider_classes: + if provider_class.type == provider_type: + return provider_class + raise NoProviderFoundError(f"No provider found of type {provider_type}") def parse(self, value): """Go through the list of provider settings and create the @@ -84,15 +89,16 @@ def parse(self, value): for provider_name, provider_settings in providers_dict.items(): try: _providers[provider_name] = ( - self.provider_types[provider_settings['type']]( + self._find_provider(provider_settings['type'])( name=provider_name, **provider_settings, )) except KeyError as error: logger.error('Missing setting for provider: %s', error.args[0]) + except NoProviderFoundError as error: + logger.error(error.args[0]) return _providers - class ProvidersConfiguration(Configuration): """Configuration manager for providers""" @@ -110,7 +116,7 @@ class SearchConfiguration(Configuration): def __init__(self): self.providers = None - common_argument_parser = providers_base.Provider().search_parameters_parser + common_argument_parser = Provider().search_parameters_parser self.config_arguments_parser = ArgumentParser([ DictArgument( 'common', argument_parser=common_argument_parser), diff --git a/geospaas_harvesting/providers/aviso.py b/geospaas_harvesting/providers/aviso.py index c2a6f3f5..89af4614 100644 --- a/geospaas_harvesting/providers/aviso.py +++ b/geospaas_harvesting/providers/aviso.py @@ -6,6 +6,9 @@ class AVISOProvider(TimeFilterMixin, Provider): """Provider for AVISO's Thredds""" + + type = 'aviso' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.url = 'https://tds.aviso.altimetry.fr/thredds' diff --git a/geospaas_harvesting/providers/ceda.py b/geospaas_harvesting/providers/ceda.py index 86d50304..65dc6917 100644 --- a/geospaas_harvesting/providers/ceda.py +++ b/geospaas_harvesting/providers/ceda.py @@ -9,6 +9,8 @@ class CEDAProvider(TimeFilterMixin, Provider): """Provider for CEDA FTP server""" + type = 'ceda' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.url = "ftp://anon-ftp.ceda.ac.uk" diff --git a/geospaas_harvesting/providers/cmems.py b/geospaas_harvesting/providers/cmems.py index ce03113d..3047ab9d 100644 --- a/geospaas_harvesting/providers/cmems.py +++ b/geospaas_harvesting/providers/cmems.py @@ -21,6 +21,8 @@ class CMEMSProvider(Provider): """Provider for CMEMS using the copernicusmarine package""" + type = 'cmems' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.search_parameters_parser.add_arguments([ diff --git a/geospaas_harvesting/providers/copernicus_scihub.py b/geospaas_harvesting/providers/copernicus_scihub.py index 569c2b11..af5b509f 100644 --- a/geospaas_harvesting/providers/copernicus_scihub.py +++ b/geospaas_harvesting/providers/copernicus_scihub.py @@ -17,6 +17,9 @@ class CopernicusScihubProvider(Provider): """Provider for the Copernicus Scihub APIs""" + + type = 'copernicus_scihub' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.search_url = 'https://apihub.copernicus.eu/apihub/search' diff --git a/geospaas_harvesting/providers/earthdata_cmr.py b/geospaas_harvesting/providers/earthdata_cmr.py index 6e3402da..1ab57b6b 100644 --- a/geospaas_harvesting/providers/earthdata_cmr.py +++ b/geospaas_harvesting/providers/earthdata_cmr.py @@ -16,6 +16,9 @@ class EarthDataCMRProvider(Provider): properly validated because of the massive amount of collections available through this API. This needs to be refined. """ + + type = 'earthdata_cmr' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.search_url = 'https://cmr.earthdata.nasa.gov/search/granules.umm_json' diff --git a/geospaas_harvesting/providers/erddap.py b/geospaas_harvesting/providers/erddap.py index ab21b371..935258e4 100644 --- a/geospaas_harvesting/providers/erddap.py +++ b/geospaas_harvesting/providers/erddap.py @@ -6,6 +6,9 @@ class ERDDAPTableProvider(Provider): """Provider for tabledap APIs""" + + type = 'tabledap' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.url = kwargs['url'].rstrip('/') diff --git a/geospaas_harvesting/providers/ftp.py b/geospaas_harvesting/providers/ftp.py index f64dcbb3..f874b620 100644 --- a/geospaas_harvesting/providers/ftp.py +++ b/geospaas_harvesting/providers/ftp.py @@ -9,6 +9,8 @@ class FTPProvider(TimeFilterMixin, Provider): """Generic FTP provider""" + type = 'ftp' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.search_parameters_parser.add_arguments([ diff --git a/geospaas_harvesting/providers/http.py b/geospaas_harvesting/providers/http.py index e246eec8..31268e15 100644 --- a/geospaas_harvesting/providers/http.py +++ b/geospaas_harvesting/providers/http.py @@ -9,6 +9,8 @@ class HTTPProvider(TimeFilterMixin, Provider): """Generic HTTP directory provider""" + type = 'http' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.search_parameters_parser.add_arguments([ diff --git a/geospaas_harvesting/providers/jaxa.py b/geospaas_harvesting/providers/jaxa.py index 41cea4bd..b7eaa1fe 100644 --- a/geospaas_harvesting/providers/jaxa.py +++ b/geospaas_harvesting/providers/jaxa.py @@ -9,6 +9,8 @@ class GPortalProvider(TimeFilterMixin, Provider): """Provider for JAXA GPortal FTP server""" + type = 'gportal_ftp' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.url = "ftp://ftp.gportal.jaxa.jp" diff --git a/geospaas_harvesting/providers/local.py b/geospaas_harvesting/providers/local.py index b19d04c4..7d9d253b 100644 --- a/geospaas_harvesting/providers/local.py +++ b/geospaas_harvesting/providers/local.py @@ -26,6 +26,8 @@ class NansatProvider(TimeFilterMixin, Provider): """Provider for local files with metadata provided by Nansat """ + type = 'nansat' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.search_parameters_parser.add_arguments([ @@ -45,6 +47,8 @@ class NetCDFProvider(TimeFilterMixin, Provider): """Provider for local files with metadata extracted directly using """ + type = 'netcdf' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.longitude_attribute = kwargs.get('longitude_attribute', 'LONGITUDE') diff --git a/geospaas_harvesting/providers/metno.py b/geospaas_harvesting/providers/metno.py index f91d3f75..2b0879ae 100644 --- a/geospaas_harvesting/providers/metno.py +++ b/geospaas_harvesting/providers/metno.py @@ -6,6 +6,9 @@ class METNOProvider(TimeFilterMixin, Provider): """Provider for MET NO's Thredds""" + + type = 'metno' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.url = 'https://thredds.met.no/thredds' diff --git a/geospaas_harvesting/providers/noaa.py b/geospaas_harvesting/providers/noaa.py index c6484b9d..41d3111e 100644 --- a/geospaas_harvesting/providers/noaa.py +++ b/geospaas_harvesting/providers/noaa.py @@ -9,6 +9,8 @@ class NOAAProvider(TimeFilterMixin, Provider): """Provider for NOAA FTP servers""" + type = 'noaa' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.url = "ftp://{server}.ncep.noaa.gov" diff --git a/geospaas_harvesting/providers/podaac.py b/geospaas_harvesting/providers/podaac.py index 53b97c61..d305d0fd 100644 --- a/geospaas_harvesting/providers/podaac.py +++ b/geospaas_harvesting/providers/podaac.py @@ -6,6 +6,9 @@ class PODAACProvider(TimeFilterMixin, Provider): """Provider for PODAAC's OpenDAP""" + + type = 'podaac' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.url = 'https://opendap.jpl.nasa.gov/opendap' diff --git a/geospaas_harvesting/providers/resto.py b/geospaas_harvesting/providers/resto.py index 7a841646..d164e5cc 100644 --- a/geospaas_harvesting/providers/resto.py +++ b/geospaas_harvesting/providers/resto.py @@ -23,6 +23,8 @@ class RestoProvider(Provider): parameters are fetched from the API. """ + type = 'resto' + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.url = kwargs['url'].rstrip('/') From b2236cad2a300f0c3954219156f3d6e7d0e20dfc Mon Sep 17 00:00:00 2001 From: Adrien Perrin Date: Mon, 21 Oct 2024 10:13:55 +0000 Subject: [PATCH 2/2] update config tests --- tests/test_config.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index 6e9e4419..c800c89e 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -3,11 +3,8 @@ import logging import unittest import unittest.mock as mock -from datetime import datetime, timezone as tz -from pathlib import Path import geospaas_harvesting.config as config -import geospaas_harvesting.providers.base as providers_base import geospaas_harvesting.providers.podaac as providers_podaac import geospaas_harvesting.providers.cmems as providers_cmems import geospaas_harvesting.providers.resto as providers_resto @@ -58,10 +55,17 @@ def test_parse(self): name='cmems', username='user', password='pass'), }) - def test_parse_error(self): + def test_parse_config_error(self): """Test error handling when parsing wrong configuration""" with self.assertLogs(config.logger, level=logging.ERROR): - _ = config.ProvidersArgument('providers').parse({'foo': {}}) + config.ProvidersArgument('providers').parse({'foo': {}}) + + def test_parse_no_provider_found(self): + """Test error handling when no provider matches the requested + type + """ + with self.assertLogs(config.logger, level=logging.ERROR): + config.ProvidersArgument('providers').parse({'foo': {'type': 'foo'}}) class ProvidersConfigurationTestCase(unittest.TestCase):