From 8ffa2a61a873e89df268286fd156288cf11de47f Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Wed, 16 Oct 2024 10:27:13 +0200 Subject: [PATCH] Issue #604/#644 Rename UDPJobFactory to ProcessBasedJobCreator --- CHANGELOG.md | 2 +- docs/cookbook/job_manager.rst | 2 +- openeo/extra/job_management.py | 10 +++------- tests/extra/test_job_management.py | 26 +++++++++++++------------- 4 files changed, 18 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6021bf399..873bd90fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Also added `create_job_db()` factory to easily create a job database from a given dataframe and its type guessed from filename extension. ([#635](https://github.com/Open-EO/openeo-python-client/issues/635)) - `MultiBackendJobManager.run_jobs()` now returns a dictionary with counters/stats about various events during the job run ([#645](https://github.com/Open-EO/openeo-python-client/issues/645)) -- Added `UDPJobFactory` to be used as `start_job` callable with `MultiBackendJobManager` to create multiple jobs from a single parameterized process (e.g. a UDP or remote process definition) ([#604](https://github.com/Open-EO/openeo-python-client/issues/604)) +- Added `ProcessBasedJobCreator` to be used as `start_job` callable with `MultiBackendJobManager` to create multiple jobs from a single parameterized process (e.g. a UDP or remote process definition) ([#604](https://github.com/Open-EO/openeo-python-client/issues/604)) ### Changed diff --git a/docs/cookbook/job_manager.rst b/docs/cookbook/job_manager.rst index 58dd0f892..c505e44e5 100644 --- a/docs/cookbook/job_manager.rst +++ b/docs/cookbook/job_manager.rst @@ -15,6 +15,6 @@ Multi Backend Job Manager .. autoclass:: openeo.extra.job_management.ParquetJobDatabase -.. autoclass:: openeo.extra.job_management.UDPJobFactory +.. autoclass:: openeo.extra.job_management.ProcessBasedJobCreator :members: :special-members: __call__ diff --git a/openeo/extra/job_management.py b/openeo/extra/job_management.py index 84563f47e..09dc5bdef 100644 --- a/openeo/extra/job_management.py +++ b/openeo/extra/job_management.py @@ -938,7 +938,7 @@ def create_job_db(path: Union[str, Path], df: pd.DataFrame, *, on_exists: str = return job_db -class UDPJobFactory: +class ProcessBasedJobCreator: """ Batch job creator (to be used together with :py:class:`MultiBackendJobManager`) @@ -955,13 +955,13 @@ class UDPJobFactory: from openeo.extra.job_management import ( MultiBackendJobManager, create_job_db, - UDPJobFactory, + ProcessBasedJobCreator, ) # Job creator, based on a parameterized openEO process # (specified by the remote process definition at given URL) # which has, say, parameters "start_date" and "bands" for example. - job_starter = UDPJobFactory( + job_starter = ProcessBasedJobCreator( namespace="https://example.com/my_process.json", parameter_defaults={ # Default value for the "bands" parameter @@ -1025,10 +1025,6 @@ class UDPJobFactory: .. versionadded:: 0.33.0 """ - - # TODO: find a better class name (e.g. eliminate over-specificity of "UDP", - # or avoid "factory" as technical mumbo-jumbo)? - def __init__( self, *, diff --git a/tests/extra/test_job_management.py b/tests/extra/test_job_management.py index 9ea9067ac..c9a612af9 100644 --- a/tests/extra/test_job_management.py +++ b/tests/extra/test_job_management.py @@ -30,7 +30,7 @@ CsvJobDatabase, MultiBackendJobManager, ParquetJobDatabase, - UDPJobFactory, + ProcessBasedJobCreator, create_job_db, get_job_db, ) @@ -1009,7 +1009,7 @@ def test_create_job_db(tmp_path, filename, expected): assert path.exists() -class TestUDPJobFactory: +class TestProcessBasedJobCreator: @pytest.fixture def dummy_backend(self, requests_mock, con) -> DummyBackend: dummy = DummyBackend(requests_mock=requests_mock, connection=con) @@ -1073,7 +1073,7 @@ def remote_process_definitions(self, requests_mock) -> dict: def test_minimal(self, con, dummy_backend, remote_process_definitions): """Bare minimum: just start a job, no parameters/arguments""" - job_factory = UDPJobFactory(process_id="3plus5", namespace="https://remote.test/3plus5.json") + job_factory = ProcessBasedJobCreator(process_id="3plus5", namespace="https://remote.test/3plus5.json") job = job_factory.start_job(row=pd.Series({"foo": 123}), connection=con) assert isinstance(job, BatchJob) @@ -1097,7 +1097,7 @@ def test_minimal(self, con, dummy_backend, remote_process_definitions): def test_basic(self, con, dummy_backend, remote_process_definitions): """Basic parameterized UDP job generation""" dummy_backend.extra_job_metadata_fields = ["title", "description"] - job_factory = UDPJobFactory(process_id="increment", namespace="https://remote.test/increment.json") + job_factory = ProcessBasedJobCreator(process_id="increment", namespace="https://remote.test/increment.json") job = job_factory.start_job(row=pd.Series({"data": 123}), connection=con) assert isinstance(job, BatchJob) @@ -1130,7 +1130,7 @@ def test_basic(self, con, dummy_backend, remote_process_definitions): ) def test_basic_parameterization(self, con, dummy_backend, parameter_defaults, row, expected_arguments): """Basic parameterized UDP job generation""" - job_factory = UDPJobFactory( + job_factory = ProcessBasedJobCreator( process_id="increment", namespace="https://remote.test/increment.json", parameter_defaults=parameter_defaults, @@ -1190,7 +1190,7 @@ def test_process_references_in_constructor( # Register personal UDP requests_mock.get(con.build_url("/process_graphs/3plus5"), json=self.PG_3PLUS5) - job_factory = UDPJobFactory(process_id=process_id, namespace=namespace) + job_factory = ProcessBasedJobCreator(process_id=process_id, namespace=namespace) job = job_factory.start_job(row=pd.Series({"foo": 123}), connection=con) assert isinstance(job, BatchJob) @@ -1204,7 +1204,7 @@ def test_process_references_in_constructor( def test_no_process_id_nor_namespace(self): with pytest.raises(ValueError, match="At least one of `process_id` and `namespace` should be provided"): - _ = UDPJobFactory() + _ = ProcessBasedJobCreator() @pytest.fixture def job_manager(self, tmp_path, dummy_backend) -> MultiBackendJobManager: @@ -1215,7 +1215,7 @@ def job_manager(self, tmp_path, dummy_backend) -> MultiBackendJobManager: def test_with_job_manager_remote_basic( self, tmp_path, requests_mock, dummy_backend, job_manager, sleep_mock, remote_process_definitions ): - job_starter = UDPJobFactory( + job_starter = ProcessBasedJobCreator( process_id="increment", namespace="https://remote.test/increment.json", parameter_defaults={"increment": 5}, @@ -1321,7 +1321,7 @@ def test_with_job_manager_remote_parameter_handling( df_data, expected_arguments, ): - job_starter = UDPJobFactory( + job_starter = ProcessBasedJobCreator( process_id="increment", namespace="https://remote.test/increment.json", parameter_defaults=parameter_defaults, @@ -1381,7 +1381,7 @@ def test_with_job_manager_remote_parameter_handling( } def test_with_job_manager_remote_geometry(self, tmp_path, requests_mock, dummy_backend, job_manager, sleep_mock): - job_starter = UDPJobFactory( + job_starter = ProcessBasedJobCreator( process_id="offset_polygon", namespace="https://remote.test/offset_polygon.json", parameter_defaults={"data": 123}, @@ -1466,7 +1466,7 @@ def test_with_job_manager_remote_geometry_after_resume( self, tmp_path, requests_mock, dummy_backend, job_manager, sleep_mock, db_class ): """Test if geometry handling works properly after resuming from CSV serialized job db.""" - job_starter = UDPJobFactory( + job_starter = ProcessBasedJobCreator( process_id="offset_polygon", namespace="https://remote.test/offset_polygon.json", parameter_defaults={"data": 123}, @@ -1554,7 +1554,7 @@ def test_with_job_manager_udp_basic( # Register personal UDP increment_udp_mock = requests_mock.get(con.build_url("/process_graphs/increment"), json=udp) - job_starter = UDPJobFactory( + job_starter = ProcessBasedJobCreator( process_id="increment", # No namespace to trigger personal UDP mode namespace=None, @@ -1603,7 +1603,7 @@ def test_with_job_manager_udp_basic( def test_with_job_manager_parameter_column_map( self, tmp_path, requests_mock, dummy_backend, job_manager, sleep_mock, remote_process_definitions ): - job_starter = UDPJobFactory( + job_starter = ProcessBasedJobCreator( process_id="increment", namespace="https://remote.test/increment.json", parameter_column_map={"data": "numberzzz", "increment": "add_thiz"},