From 23c5fe584e4e0054c1488c49746ab354094b005d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20Mart=C3=ADn=20Bl=C3=A1zquez?= Date: Tue, 16 Apr 2024 17:58:18 +0200 Subject: [PATCH] Check that `Step.name` doesn't contain dots or spaces (#545) --- src/distilabel/steps/base.py | 2 +- tests/unit/steps/test_base.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/distilabel/steps/base.py b/src/distilabel/steps/base.py index d5d43bca01..77c459affe 100644 --- a/src/distilabel/steps/base.py +++ b/src/distilabel/steps/base.py @@ -96,7 +96,7 @@ def process(self, inputs: *StepInput) -> StepOutput: arbitrary_types_allowed=True, validate_default=True, validate_assignment=True ) - name: str + name: str = Field(pattern=r"^[a-zA-Z0-9_-]+$") pipeline: Annotated[Any, Field(exclude=True, repr=False)] = None input_mappings: Dict[str, str] = {} output_mappings: Dict[str, str] = {} diff --git a/tests/unit/steps/test_base.py b/tests/unit/steps/test_base.py index 416b02b23c..54e7fe0e3f 100644 --- a/tests/unit/steps/test_base.py +++ b/tests/unit/steps/test_base.py @@ -25,6 +25,7 @@ ) from distilabel.steps.typing import GeneratorStepOutput, StepOutput from distilabel.utils.serialization import TYPE_INFO_KEY +from pydantic import ValidationError class DummyStep(Step): @@ -65,6 +66,17 @@ def process(self, inputs: StepInput) -> StepOutput: class TestStep: + def test_create_step_with_invalid_name(self) -> None: + pipeline = Pipeline(name="unit-test-pipeline") + + with pytest.raises(ValidationError): + DummyStep( + name="this-is-not-va.li.d-because-it-contains-dots", pipeline=pipeline + ) + + with pytest.raises(ValidationError): + DummyStep(name="this is not valid because spaces", pipeline=pipeline) + def test_create_step_passing_pipeline(self) -> None: pipeline = Pipeline(name="unit-test-pipeline") step = DummyStep(name="dummy", pipeline=pipeline)