From 32b87be55f7b7e55179d0c288c97e5e33d139689 Mon Sep 17 00:00:00 2001 From: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com> Date: Mon, 18 Mar 2024 16:01:36 +0000 Subject: [PATCH] Validate node values (#3715) * Added extra inputs/outputs variables validation at the Node level Signed-off-by: Elena Khaustova * Fixing potential typo Signed-off-by: Elena Khaustova * Added release note Signed-off-by: Elena Khaustova * Retrigger the CI Signed-off-by: Elena Khaustova --------- Signed-off-by: Elena Khaustova --- RELEASE.md | 3 ++- kedro/pipeline/node.py | 23 +++++++++++++++++++++-- tests/ipython/conftest.py | 2 +- tests/pipeline/test_node.py | 13 +++++++++++++ 4 files changed, 37 insertions(+), 4 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index b13003c78c..d8d40e7c19 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,9 +1,10 @@ # Upcoming Release 0.19.4 ## Major features and improvements +* Improved error message when passing wrong value to node. * Cookiecutter errors are shown in short format without the `--verbose` flag. * Kedro commands now work from any subdirectory within a Kedro project. -* Kedro CLI now provides a better error message when project commands are run outside of a project i.e. `kedro run` +* Kedro CLI now provides a better error message when project commands are run outside of a project i.e. `kedro run`. ## Bug fixes and other changes * Updated `kedro pipeline create` and `kedro pipeline delete` to read the base environment from the project settings. diff --git a/kedro/pipeline/node.py b/kedro/pipeline/node.py index 32baaf8e35..ba0d43cddf 100644 --- a/kedro/pipeline/node.py +++ b/kedro/pipeline/node.py @@ -42,7 +42,7 @@ def __init__( # noqa: PLR0913 function. When dict[str, str] is provided, variable names will be mapped to function argument names. outputs: The name or the list of the names of variables used - as outputs to the function. The number of names should match + as outputs of the function. The number of names should match the number of outputs returned by the provided function. When dict[str, str] is provided, variable names will be mapped to the named outputs the function returns. @@ -67,7 +67,6 @@ def __init__( # noqa: PLR0913 and/or fullstops. """ - if not callable(func): raise ValueError( _node_error_message( @@ -83,6 +82,16 @@ def __init__( # noqa: PLR0913 ) ) + for _input in _to_list(inputs): + if not isinstance(_input, str): + raise ValueError( + _node_error_message( + f"names of variables used as inputs to the function " + f"must be of 'String' type, but {_input} from {inputs} " + f"is '{type(_input)}'." + ) + ) + if outputs and not isinstance(outputs, (list, dict, str)): raise ValueError( _node_error_message( @@ -91,6 +100,16 @@ def __init__( # noqa: PLR0913 ) ) + for _output in _to_list(outputs): + if not isinstance(_output, str): + raise ValueError( + _node_error_message( + f"names of variables used as outputs of the function " + f"must be of 'String' type, but {_output} from {outputs} " + f"is '{type(_output)}'." + ) + ) + if not inputs and not outputs: raise ValueError( _node_error_message("it must have some 'inputs' or 'outputs'.") diff --git a/tests/ipython/conftest.py b/tests/ipython/conftest.py index edc7dbc9e8..f263602741 100644 --- a/tests/ipython/conftest.py +++ b/tests/ipython/conftest.py @@ -111,7 +111,7 @@ def dummy_node_empty_input(): return node( func=dummy_function, inputs=["", ""], - outputs=[None], + outputs=None, name="dummy_node_empty_input", ) diff --git a/tests/pipeline/test_node.py b/tests/pipeline/test_node.py index ae68dff87e..3dcf3db790 100644 --- a/tests/pipeline/test_node.py +++ b/tests/pipeline/test_node.py @@ -252,6 +252,14 @@ def duplicate_output_list_node(): return identity, "A", ["A", "A"] +def bad_input_variable_name(): + return lambda x: None, {"a": 1, "b": "B"}, {"a": "A", "b": "B"} + + +def bad_output_variable_name(): + return lambda x: None, {"a": "A", "b": "B"}, {"a": "A", "b": 2} + + @pytest.mark.parametrize( "func, expected", [ @@ -275,6 +283,11 @@ def duplicate_output_list_node(): r"\(\[A\]\) -> \[A;A\] due to " r"duplicate output\(s\) {\'A\'}.", ), + (bad_input_variable_name, "names of variables used as inputs to the function "), + ( + bad_output_variable_name, + "names of variables used as outputs of the function ", + ), ], ) def test_bad_node(func, expected):