diff --git a/pangeo_forge_runner/bakery/base.py b/pangeo_forge_runner/bakery/base.py index 10942532..27eca32a 100644 --- a/pangeo_forge_runner/bakery/base.py +++ b/pangeo_forge_runner/bakery/base.py @@ -1,10 +1,10 @@ from typing import List from apache_beam.pipeline import Pipeline, PipelineOptions -from traitlets import TraitError +from traitlets import HasTraits, TraitError from traitlets.config import LoggingConfigurable -from ..commands.bake import Bake, ExecutionMetadata +from .execution_metadata import ExecutionMetadata class Bakery(LoggingConfigurable): @@ -40,12 +40,12 @@ def bake(self, pipeline: Pipeline, meta: ExecutionMetadata) -> None: result = pipeline.run() job_id = result.job_id() self.log.info( - f"Submitted job {meta.job_id} for recipe {meta.name}", + f"Submitted job {meta.job_id} for recipe {meta.recipe_name}", extra=meta.to_dict() | {"job_id": job_id, "status": "submitted"}, ) @classmethod - def validate_bake_command(cls, bake_command: Bake) -> List[TraitError]: + def validate_bake_command(cls, bake_command: HasTraits) -> List[TraitError]: """ Validates the given bake_command and collects any validation errors. diff --git a/pangeo_forge_runner/bakery/execution_metadata.py b/pangeo_forge_runner/bakery/execution_metadata.py new file mode 100644 index 00000000..0a59d36f --- /dev/null +++ b/pangeo_forge_runner/bakery/execution_metadata.py @@ -0,0 +1,18 @@ +from dataclasses import asdict, dataclass + + +@dataclass +class ExecutionMetadata: + """ + Holds metadata for an execution instance, including recipe and job names. + + Attributes: + recipe_name (str): Name of the recipe being executed. + job_name (str): Unique name for the job execution. + """ + + recipe_name: str + job_name: str + + def to_dict(self) -> dict: + return asdict(self) diff --git a/pangeo_forge_runner/bakery/flink.py b/pangeo_forge_runner/bakery/flink.py index d5b4906a..d695f3ae 100644 --- a/pangeo_forge_runner/bakery/flink.py +++ b/pangeo_forge_runner/bakery/flink.py @@ -13,10 +13,10 @@ import escapism from apache_beam.pipeline import Pipeline, PipelineOptions -from traitlets import Bool, Dict, Integer, TraitError, Unicode +from traitlets import Bool, Dict, HasTraits, Integer, TraitError, Unicode -from ..commands.bake import Bake, ExecutionMetadata from .base import Bakery +from .execution_metadata import ExecutionMetadata # Copied from https://github.com/jupyterhub/kubespawner/blob/7d6d82c2be469dd76f770d6f6ed0d1dade6b24a7/kubespawner/utils.py#L8 @@ -380,15 +380,15 @@ def bake(self, pipeline: Pipeline, meta: ExecutionMetadata) -> None: meta (ExecutionMetadata): An instance of BakeMetadata containing metadata about the bake process. """ self.log.info( - f"Running flink job for recipe {meta.name}\n", + f"Running flink job for recipe {meta.recipe_name}\n", extra=meta.to_dict() | {"status": "running"}, ) pipeline.run() @classmethod - def validate_bake_command(cls, bake_command: Bake) -> List[TraitError]: + def validate_bake_command(cls, bake_command: HasTraits) -> List[TraitError]: errors = [] - if not bake_command.container_image: + if not bake_command._trait_values["container_image"]: errors.append( TraitError( "'container_image' is required when using the 'FlinkOperatorBakery' " diff --git a/pangeo_forge_runner/bakery/local.py b/pangeo_forge_runner/bakery/local.py index 922d6d31..762e6d74 100644 --- a/pangeo_forge_runner/bakery/local.py +++ b/pangeo_forge_runner/bakery/local.py @@ -5,8 +5,8 @@ from apache_beam.pipeline import Pipeline, PipelineOptions from traitlets import Integer -from ..commands.bake import ExecutionMetadata from .base import Bakery +from .execution_metadata import ExecutionMetadata class LocalDirectBakery(Bakery): @@ -54,7 +54,7 @@ def bake(self, pipeline: Pipeline, meta: ExecutionMetadata) -> None: meta (ExecutionMetadata): An instance of BakeMetadata containing metadata about the bake process. """ self.log.info( - f"Running local job for recipe {meta.name}\n", + f"Running local job for recipe {meta.recipe_name}\n", extra=meta.to_dict() | {"status": "running"}, ) pipeline.run() diff --git a/pangeo_forge_runner/commands/bake.py b/pangeo_forge_runner/commands/bake.py index 9c2a9146..8ed8900e 100644 --- a/pangeo_forge_runner/commands/bake.py +++ b/pangeo_forge_runner/commands/bake.py @@ -7,7 +7,6 @@ import re import string import time -from dataclasses import asdict, dataclass from importlib.metadata import distributions from pathlib import Path @@ -17,6 +16,7 @@ from .. import Feedstock from ..bakery.base import Bakery +from ..bakery.execution_metadata import ExecutionMetadata from ..bakery.local import LocalDirectBakery from ..plugin import get_injections, get_injectionspecs_from_entrypoints from ..storage import InputCacheStorage, TargetStorage @@ -24,23 +24,6 @@ from .base import BaseCommand, common_aliases, common_flags -@dataclass -class ExecutionMetadata: - """ - Holds metadata for an execution instance, including recipe and job names. - - Attributes: - recipe_name (str): Name of the recipe being executed. - job_name (str): Unique name for the job execution. - """ - - recipe_name: str - job_name: str - - def to_dict(self) -> dict: - return asdict(self) - - class Bake(BaseCommand): """ Command to bake a pangeo forge recipe in a given bakery @@ -138,8 +121,7 @@ def _validate_job_name(self, proposal): ) return proposal.value - @validate("bakery_class") - def _bakery_impl_validation(self, proposal): + def bakery_impl_validation(self): """ Validates the 'bakery_class' trait using class-specific validation logic. @@ -150,8 +132,11 @@ def _bakery_impl_validation(self, proposal): Raises: - TraitError: If multiple validation errors are encountered, a single TraitError is raised containing a summary of all error messages. + + Note: Traitlets has no convenient way to set initialization hooks (???) so we have + to run this manually. """ - klass = proposal["value"] + klass = self.bakery_class if errors := klass.validate_bake_command(self): if len(errors) == 1: raise errors[0] @@ -202,6 +187,9 @@ def start(self): """ Start the baking process """ + # Validate bakery-specific requirements of this class. Yes, we have to do it here. + self.bakery_impl_validation() + if not "pangeo-forge-recipes" in [d.metadata["Name"] for d in distributions()]: raise ValueError( "To use the `bake` command, `pangeo-forge-recipes` must be installed." diff --git a/tests/unit/test_bake.py b/tests/unit/test_bake.py index 429d2974..21a8dbbf 100644 --- a/tests/unit/test_bake.py +++ b/tests/unit/test_bake.py @@ -10,6 +10,7 @@ import pytest import xarray as xr from packaging.version import parse as parse_version +from traitlets import TraitError from pangeo_forge_runner.commands.bake import Bake @@ -72,12 +73,13 @@ def test_job_name_validation(job_name, raises): bake = Bake() if raises: with pytest.raises( - ValueError, + TraitError, match=re.escape( f"job_name must match the regex ^[a-z][-_0-9a-z]{{0,62}}$, instead found {job_name}" ), ): bake.job_name = job_name + bake.bakery_impl_validation() else: bake.job_name = job_name assert bake.job_name == job_name @@ -90,15 +92,16 @@ def test_job_name_validation(job_name, raises): ["apache/beam_python3.10_sdk:2.51.0", False], ), ) -def test_container_name_validation(container_image, raises): +def test_container_image_validation(container_image, raises): bake = Bake() if raises: with pytest.raises( - ValueError, - match=r"^'container_name' is required.*", + TraitError, + match=r"^'container_image' is required.*", ): bake.bakery_class = "pangeo_forge_runner.bakery.flink.FlinkOperatorBakery" bake.container_image = container_image + bake.bakery_impl_validation() else: bake.bakery_class = "pangeo_forge_runner.bakery.flink.FlinkOperatorBakery" bake.container_image = container_image