From bda1e6e8a50978ac704540225a210abd04dc0eaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yolan=20Honor=C3=A9-Roug=C3=A9?= Date: Fri, 22 Nov 2024 22:01:08 +0100 Subject: [PATCH 01/11] :sparkles: Enable to override params at predict time in KedroPipelineModel --- kedro_mlflow/mlflow/kedro_pipeline_model.py | 37 +++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/kedro_mlflow/mlflow/kedro_pipeline_model.py b/kedro_mlflow/mlflow/kedro_pipeline_model.py index 5c8d2ce7..f7e2c760 100644 --- a/kedro_mlflow/mlflow/kedro_pipeline_model.py +++ b/kedro_mlflow/mlflow/kedro_pipeline_model.py @@ -196,17 +196,43 @@ def load_context(self, context): updated_catalog._datasets[name]._filepath = Path(uri) self.loaded_catalog.save(name=name, data=updated_catalog.load(name)) - def predict(self, context, model_input): + def predict(self, context, model_input, params=None): # we create an empty hook manager but do NOT register hooks # because we want this model be executable outside of a kedro project + + # params can pass + # TODO globals + # TODO runtime + # TODO parameters -> I'd prefer not have them, but it would require catalog to be able to not be fully resolved if we want to pass runtime and globals + # TODO hooks + # TODO runner + hook_manager = _create_hook_manager() + # _register_hooks(hook_manager, params.hooks) + + runner = self.runner # params.runner or self.runner + + for name, value in params.parameters.items(): + param = f"params:{name}" + if param in self.loaded_catalog._datasets: + self._logger.info(f"Use {param}={value}") + self.loaded_catalog.save(name=param, data=value, replace=True) + else: + params_set = { + ds[7:] + for ds in self.loaded_catalog._datasets + if ds.startswith("params:") + } + self._logger.info( + f"{name} is not a valid parameter. Use one of '{','.join(params_set)}'. " + ) self.loaded_catalog.save( name=self.input_name, data=model_input, ) - run_output = self.runner.run( + run_output = runner.run( pipeline=self.pipeline, catalog=self.loaded_catalog, hook_manager=hook_manager, @@ -221,3 +247,10 @@ def predict(self, context, model_input): class KedroPipelineModelError(Exception): """Error raised when the KedroPipelineModel construction fails""" + + +# from pydantic import BaseModel +# class PredictParamsSchema(BaseModel): +# parameters: dict[str, Any] +# runner: AbstractRunner +# hooks: Iterable[Any] # cf. _register_hooks From 6fc30d4e31830fd407057c0a3ca00e3366b76fb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yolan=20Honor=C3=A9-Roug=C3=A9?= Date: Fri, 22 Nov 2024 23:07:48 +0100 Subject: [PATCH 02/11] specify signature at runtime with parameters --- kedro_mlflow/framework/hooks/mlflow_hook.py | 13 ++++++- kedro_mlflow/mlflow/kedro_pipeline_model.py | 43 +++++++++------------ 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/kedro_mlflow/framework/hooks/mlflow_hook.py b/kedro_mlflow/framework/hooks/mlflow_hook.py index 7aed8ccb..a6d66639 100644 --- a/kedro_mlflow/framework/hooks/mlflow_hook.py +++ b/kedro_mlflow/framework/hooks/mlflow_hook.py @@ -389,7 +389,18 @@ def after_pipeline_run( if isinstance(model_signature, str): if model_signature == "auto": input_data = catalog.load(pipeline.input_name) - model_signature = infer_signature(model_input=input_data) + + # all pipeline params will be overridable at predict time: https://mlflow.org/docs/latest/model/signatures.html#model-signatures-with-inference-params + # I add the special "runner" parameter to be able to choose it at runtime + pipeline_params = { + ds_name[7:]: catalog.load(ds_name) + for ds_name in pipeline.inputs() + if ds_name.startswith("params:") + } | {"runner": "SequentialRunner"} + model_signature = infer_signature( + model_input=input_data, + params=pipeline_params, + ) mlflow.pyfunc.log_model( python_model=kedro_pipeline_model, diff --git a/kedro_mlflow/mlflow/kedro_pipeline_model.py b/kedro_mlflow/mlflow/kedro_pipeline_model.py index f7e2c760..0358bfc6 100644 --- a/kedro_mlflow/mlflow/kedro_pipeline_model.py +++ b/kedro_mlflow/mlflow/kedro_pipeline_model.py @@ -1,6 +1,6 @@ import logging from pathlib import Path -from typing import Dict, Optional, Union +from typing import Any, Dict, Optional, Union from kedro.framework.hooks import _create_hook_manager from kedro.io import DataCatalog, MemoryDataset @@ -8,10 +8,17 @@ from kedro.runner import AbstractRunner, SequentialRunner from kedro_datasets.pickle import PickleDataset from mlflow.pyfunc import PythonModel +from pydantic import BaseModel from kedro_mlflow.pipeline.pipeline_ml import PipelineML +class PredictParamsSchema(BaseModel): + parameters: Optional[dict[str, Any]] = {} + # runner: AbstractRunner + # hooks: Iterable[Any] # cf. _register_hooks + + class KedroPipelineModel(PythonModel): def __init__( self, @@ -207,25 +214,20 @@ def predict(self, context, model_input, params=None): # TODO hooks # TODO runner - hook_manager = _create_hook_manager() - # _register_hooks(hook_manager, params.hooks) + params = params or {} + runner_class = params.pop("runner", "SequentialRunner") + runner = ( + self.runner + ) # runner="build it dynamically from runner class" or self.runner - runner = self.runner # params.runner or self.runner + hook_manager = _create_hook_manager() + # _register_hooks(hook_manager, predict_params.hooks) - for name, value in params.parameters.items(): + for name, value in params.items(): + # no need to check if params are ni the catalog, because mlflow already checks that the params mathc the signature param = f"params:{name}" - if param in self.loaded_catalog._datasets: - self._logger.info(f"Use {param}={value}") - self.loaded_catalog.save(name=param, data=value, replace=True) - else: - params_set = { - ds[7:] - for ds in self.loaded_catalog._datasets - if ds.startswith("params:") - } - self._logger.info( - f"{name} is not a valid parameter. Use one of '{','.join(params_set)}'. " - ) + self._logger.info(f"Using {param}={value} for the prediction") + self.loaded_catalog.save(name=param, data=value) self.loaded_catalog.save( name=self.input_name, @@ -247,10 +249,3 @@ def predict(self, context, model_input, params=None): class KedroPipelineModelError(Exception): """Error raised when the KedroPipelineModel construction fails""" - - -# from pydantic import BaseModel -# class PredictParamsSchema(BaseModel): -# parameters: dict[str, Any] -# runner: AbstractRunner -# hooks: Iterable[Any] # cf. _register_hooks From cb7e9f5ddbc9c3917a7d850bb080880d4fb31300 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yolan=20Honor=C3=A9-Roug=C3=A9?= Date: Fri, 22 Nov 2024 23:11:19 +0100 Subject: [PATCH 03/11] remove unused pydantic validation class --- kedro_mlflow/mlflow/kedro_pipeline_model.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/kedro_mlflow/mlflow/kedro_pipeline_model.py b/kedro_mlflow/mlflow/kedro_pipeline_model.py index 0358bfc6..5227a2f4 100644 --- a/kedro_mlflow/mlflow/kedro_pipeline_model.py +++ b/kedro_mlflow/mlflow/kedro_pipeline_model.py @@ -1,6 +1,6 @@ import logging from pathlib import Path -from typing import Any, Dict, Optional, Union +from typing import Dict, Optional, Union from kedro.framework.hooks import _create_hook_manager from kedro.io import DataCatalog, MemoryDataset @@ -8,17 +8,10 @@ from kedro.runner import AbstractRunner, SequentialRunner from kedro_datasets.pickle import PickleDataset from mlflow.pyfunc import PythonModel -from pydantic import BaseModel from kedro_mlflow.pipeline.pipeline_ml import PipelineML -class PredictParamsSchema(BaseModel): - parameters: Optional[dict[str, Any]] = {} - # runner: AbstractRunner - # hooks: Iterable[Any] # cf. _register_hooks - - class KedroPipelineModel(PythonModel): def __init__( self, From 25010aad8c7ff5537e045546cd33799f5db35ed5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yolan=20Honor=C3=A9-Roug=C3=A9?= Date: Sat, 23 Nov 2024 16:47:01 +0100 Subject: [PATCH 04/11] make sure that only inference params are allowed, not train ones --- kedro_mlflow/framework/hooks/mlflow_hook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kedro_mlflow/framework/hooks/mlflow_hook.py b/kedro_mlflow/framework/hooks/mlflow_hook.py index a6d66639..53bfc629 100644 --- a/kedro_mlflow/framework/hooks/mlflow_hook.py +++ b/kedro_mlflow/framework/hooks/mlflow_hook.py @@ -394,7 +394,7 @@ def after_pipeline_run( # I add the special "runner" parameter to be able to choose it at runtime pipeline_params = { ds_name[7:]: catalog.load(ds_name) - for ds_name in pipeline.inputs() + for ds_name in pipeline.inference.inputs() if ds_name.startswith("params:") } | {"runner": "SequentialRunner"} model_signature = infer_signature( From 47a0cebbb5cc1bcd3048fe3fa994e142d8011b57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yolan=20Honor=C3=A9-Roug=C3=A9?= Date: Sat, 30 Nov 2024 22:19:49 +0100 Subject: [PATCH 05/11] pass runner through params --- kedro_mlflow/mlflow/kedro_pipeline_model.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kedro_mlflow/mlflow/kedro_pipeline_model.py b/kedro_mlflow/mlflow/kedro_pipeline_model.py index 5227a2f4..fab3e47e 100644 --- a/kedro_mlflow/mlflow/kedro_pipeline_model.py +++ b/kedro_mlflow/mlflow/kedro_pipeline_model.py @@ -6,6 +6,7 @@ from kedro.io import DataCatalog, MemoryDataset from kedro.pipeline import Pipeline from kedro.runner import AbstractRunner, SequentialRunner +from kedro.utils import load_obj from kedro_datasets.pickle import PickleDataset from mlflow.pyfunc import PythonModel @@ -208,16 +209,23 @@ def predict(self, context, model_input, params=None): # TODO runner params = params or {} + runner_class = params.pop("runner", "SequentialRunner") + + # we don't want to recreate the runner object on each predict + # because reimporting comes with a performance penalty in a serving setup + # so if it is the default we just use the existing runner runner = ( self.runner - ) # runner="build it dynamically from runner class" or self.runner + if runner_class == self.runner.__name__ + else load_obj(runner_class, "kedro.runner") + ) hook_manager = _create_hook_manager() # _register_hooks(hook_manager, predict_params.hooks) for name, value in params.items(): - # no need to check if params are ni the catalog, because mlflow already checks that the params mathc the signature + # no need to check if params are in the catalog, because mlflow already checks that the params matching the signature param = f"params:{name}" self._logger.info(f"Using {param}={value} for the prediction") self.loaded_catalog.save(name=param, data=value) From 678ad9485c4b20ad8dfd1d136d88fbf2f1954b03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yolan=20Honor=C3=A9-Roug=C3=A9?= Date: Sat, 30 Nov 2024 22:55:46 +0100 Subject: [PATCH 06/11] fix change of default signature in tests --- kedro_mlflow/mlflow/kedro_pipeline_model.py | 2 +- tests/framework/hooks/test_hook_pipeline_ml.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/kedro_mlflow/mlflow/kedro_pipeline_model.py b/kedro_mlflow/mlflow/kedro_pipeline_model.py index fab3e47e..23e78bff 100644 --- a/kedro_mlflow/mlflow/kedro_pipeline_model.py +++ b/kedro_mlflow/mlflow/kedro_pipeline_model.py @@ -217,7 +217,7 @@ def predict(self, context, model_input, params=None): # so if it is the default we just use the existing runner runner = ( self.runner - if runner_class == self.runner.__name__ + if runner_class == type(self.runner).__name__ else load_obj(runner_class, "kedro.runner") ) diff --git a/tests/framework/hooks/test_hook_pipeline_ml.py b/tests/framework/hooks/test_hook_pipeline_ml.py index 82c8d0b4..7731a20a 100644 --- a/tests/framework/hooks/test_hook_pipeline_ml.py +++ b/tests/framework/hooks/test_hook_pipeline_ml.py @@ -159,7 +159,14 @@ def convert_probs_to_pred(data, threshold): @pytest.fixture def dummy_signature(dummy_catalog, dummy_pipeline_ml): input_data = dummy_catalog.load(dummy_pipeline_ml.input_name) - dummy_signature = infer_signature(input_data) + params_dict = { + key: dummy_catalog.load(key) + for key in dummy_pipeline_ml.inference.inputs() + if key.startswith("params:") + } + dummy_signature = infer_signature( + model_input=input_data, params={**params_dict, "runner": "SequentialRunner"} + ) return dummy_signature @@ -303,7 +310,7 @@ def test_mlflow_hook_save_pipeline_ml( assert trained_model.metadata.signature.to_dict() == { "inputs": '[{"type": "long", "name": "a", "required": true}]', "outputs": None, - "params": None, + "params": '[{"name": "runner", "type": "string", "default": "SequentialRunner", "shape": null}]', } From a8694daf75c4a1c17ecf138f42d55addff6a756f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yolan=20Honor=C3=A9-Roug=C3=A9?= Date: Sun, 1 Dec 2024 15:29:10 +0100 Subject: [PATCH 07/11] add tests --- .../framework/hooks/test_hook_pipeline_ml.py | 120 +++++++++++++++--- 1 file changed, 103 insertions(+), 17 deletions(-) diff --git a/tests/framework/hooks/test_hook_pipeline_ml.py b/tests/framework/hooks/test_hook_pipeline_ml.py index 7731a20a..3197a51f 100644 --- a/tests/framework/hooks/test_hook_pipeline_ml.py +++ b/tests/framework/hooks/test_hook_pipeline_ml.py @@ -45,7 +45,7 @@ def preprocess_fun(data): return data def train_fun(data, param): - return 2 + return 1 def predict_fun(model, data): return data * model @@ -105,7 +105,7 @@ def remove_stopwords(data, stopwords): return data def train_fun_hyperparam(data, hyperparam): - return 2 + return 1 def predict_fun(model, data): return data * model @@ -156,6 +156,25 @@ def convert_probs_to_pred(data, threshold): return pipeline_ml_with_parameters +@pytest.fixture +def catalog_with_parameters(kedro_project_with_mlflow_conf): + catalog_with_parameters = DataCatalog( + { + "data": MemoryDataset(pd.DataFrame(data=[0.5], columns=["a"])), + "cleaned_data": MemoryDataset(), + "params:stopwords": MemoryDataset(["Hello", "Hi"]), + "params:penalty": MemoryDataset(0.1), + "model": PickleDataset( + filepath=( + kedro_project_with_mlflow_conf / "data" / "model.csv" + ).as_posix() + ), + "params:threshold": MemoryDataset(0.5), + } + ) + return catalog_with_parameters + + @pytest.fixture def dummy_signature(dummy_catalog, dummy_pipeline_ml): input_data = dummy_catalog.load(dummy_pipeline_ml.input_name) @@ -441,6 +460,7 @@ def test_mlflow_hook_save_pipeline_ml_with_default_copy_mode_assign( def test_mlflow_hook_save_pipeline_ml_with_parameters( kedro_project_with_mlflow_conf, # a fixture to be in a kedro project pipeline_ml_with_parameters, + catalog_with_parameters, dummy_run_params, ): # config_with_base_mlflow_conf is a conftest fixture @@ -448,21 +468,6 @@ def test_mlflow_hook_save_pipeline_ml_with_parameters( with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() - catalog_with_parameters = DataCatalog( - { - "data": MemoryDataset(pd.DataFrame(data=[1], columns=["a"])), - "cleaned_data": MemoryDataset(), - "params:stopwords": MemoryDataset(["Hello", "Hi"]), - "params:penalty": MemoryDataset(0.1), - "model": PickleDataset( - filepath=( - kedro_project_with_mlflow_conf / "data" / "model.csv" - ).as_posix() - ), - "params:threshold": MemoryDataset(0.5), - } - ) - mlflow_hook = MlflowHook() mlflow_hook.after_context_created(context) @@ -694,3 +699,84 @@ def test_mlflow_hook_save_pipeline_ml_with_dataset_factory( trained_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/artifacts") # the real test is that the model is loaded without error assert trained_model is not None + + +def test_mlflow_hook_save_and_load_pipeline_ml_with_inference_parameters( + kedro_project_with_mlflow_conf, # a fixture to be in a kedro project + pipeline_ml_with_parameters, + catalog_with_parameters, + dummy_run_params, +): + bootstrap_project(kedro_project_with_mlflow_conf) + with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: + context = session.load_context() + + mlflow_hook = MlflowHook() + mlflow_hook.after_context_created(context) + + runner = SequentialRunner() + mlflow_hook.after_catalog_created( + catalog=catalog_with_parameters, + # `after_catalog_created` is not using any of arguments bellow, + # so we are setting them to empty values. + conf_catalog={}, + conf_creds={}, + feed_dict={}, + save_version="", + load_versions="", + ) + mlflow_hook.before_pipeline_run( + run_params=dummy_run_params, + pipeline=pipeline_ml_with_parameters, + catalog=catalog_with_parameters, + ) + runner.run( + pipeline_ml_with_parameters, catalog_with_parameters, session._hook_manager + ) + + current_run_id = mlflow.active_run().info.run_id + + # This is what we want to test: parameters should be passed by defautl to the signature + mlflow_hook.after_pipeline_run( + run_params=dummy_run_params, + pipeline=pipeline_ml_with_parameters, + catalog=catalog_with_parameters, + ) + + # test : parameters should have been logged + trained_model = mlflow.pyfunc.load_model(f"runs:/{current_run_id}/model") + + # test 1 : the parameters in the signature should have the runner with a default "SequentialRunner" + assert ( + '{"name": "runner", "type": "string", "default": "SequentialRunner", "shape": null}' + in trained_model.metadata.signature.to_dict()["params"] + ) + + # test 2 : the "threshold" parameter of the inference pipeline should be in the signature + # { + # key: dummy_catalog.load(key) + # for key in dummy_pipeline_ml.inference.inputs() + # if key.startswith("params:") + # } + + assert ( + '{"name": "threshold", "type": "double", "default": 0.5, "shape": null}' + in trained_model.metadata.signature.to_dict()["params"] + ) + + # test 3 : we get different results when passing parameters + + inference_data = pd.DataFrame(data=[0.2, 0.6, 0.9], columns=["a"]) + + assert all( + trained_model.predict(inference_data) + == pd.DataFrame([0, 1, 1]).values # no param = 0.5, the default + ) + + assert all( + trained_model.predict( + inference_data, + params={"threshold": 0.8}, + ) + == pd.DataFrame([0, 0, 1]).values # 0.6 is now below threshold + ) From 7bf261281cd634ea9458d0b703d32699ce1306be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yolan=20Honor=C3=A9-Roug=C3=A9?= Date: Sun, 1 Dec 2024 22:33:49 +0100 Subject: [PATCH 08/11] add test for runner --- kedro_mlflow/mlflow/kedro_pipeline_model.py | 4 +- .../framework/hooks/test_hook_pipeline_ml.py | 82 +++++++++++++++++-- 2 files changed, 76 insertions(+), 10 deletions(-) diff --git a/kedro_mlflow/mlflow/kedro_pipeline_model.py b/kedro_mlflow/mlflow/kedro_pipeline_model.py index 23e78bff..c630b7b1 100644 --- a/kedro_mlflow/mlflow/kedro_pipeline_model.py +++ b/kedro_mlflow/mlflow/kedro_pipeline_model.py @@ -218,7 +218,9 @@ def predict(self, context, model_input, params=None): runner = ( self.runner if runner_class == type(self.runner).__name__ - else load_obj(runner_class, "kedro.runner") + else load_obj( + runner_class, "kedro.runner" + )() # do not forget to instantiate the class with ending () ) hook_manager = _create_hook_manager() diff --git a/tests/framework/hooks/test_hook_pipeline_ml.py b/tests/framework/hooks/test_hook_pipeline_ml.py index 3197a51f..394921d4 100644 --- a/tests/framework/hooks/test_hook_pipeline_ml.py +++ b/tests/framework/hooks/test_hook_pipeline_ml.py @@ -743,16 +743,10 @@ def test_mlflow_hook_save_and_load_pipeline_ml_with_inference_parameters( catalog=catalog_with_parameters, ) - # test : parameters should have been logged + # test 1 : parameters should have been logged trained_model = mlflow.pyfunc.load_model(f"runs:/{current_run_id}/model") - # test 1 : the parameters in the signature should have the runner with a default "SequentialRunner" - assert ( - '{"name": "runner", "type": "string", "default": "SequentialRunner", "shape": null}' - in trained_model.metadata.signature.to_dict()["params"] - ) - - # test 2 : the "threshold" parameter of the inference pipeline should be in the signature + # The "threshold" parameter of the inference pipeline should be in the signature # { # key: dummy_catalog.load(key) # for key in dummy_pipeline_ml.inference.inputs() @@ -764,7 +758,7 @@ def test_mlflow_hook_save_and_load_pipeline_ml_with_inference_parameters( in trained_model.metadata.signature.to_dict()["params"] ) - # test 3 : we get different results when passing parameters + # test 2 : we get different results when passing parameters inference_data = pd.DataFrame(data=[0.2, 0.6, 0.9], columns=["a"]) @@ -780,3 +774,73 @@ def test_mlflow_hook_save_and_load_pipeline_ml_with_inference_parameters( ) == pd.DataFrame([0, 0, 1]).values # 0.6 is now below threshold ) + + +def test_mlflow_hook_save_and_load_pipeline_ml_specify_runner( + kedro_project_with_mlflow_conf, # a fixture to be in a kedro project + pipeline_ml_with_parameters, + catalog_with_parameters, + dummy_run_params, +): + bootstrap_project(kedro_project_with_mlflow_conf) + with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: + context = session.load_context() + + mlflow_hook = MlflowHook() + mlflow_hook.after_context_created(context) + + runner = SequentialRunner() + mlflow_hook.after_catalog_created( + catalog=catalog_with_parameters, + # `after_catalog_created` is not using any of arguments bellow, + # so we are setting them to empty values. + conf_catalog={}, + conf_creds={}, + feed_dict={}, + save_version="", + load_versions="", + ) + mlflow_hook.before_pipeline_run( + run_params=dummy_run_params, + pipeline=pipeline_ml_with_parameters, + catalog=catalog_with_parameters, + ) + runner.run( + pipeline_ml_with_parameters, catalog_with_parameters, session._hook_manager + ) + + current_run_id = mlflow.active_run().info.run_id + + # This is what we want to test: parameters should be passed by defautl to the signature + mlflow_hook.after_pipeline_run( + run_params=dummy_run_params, + pipeline=pipeline_ml_with_parameters, + catalog=catalog_with_parameters, + ) + + # test : parameters should have been logged + trained_model = mlflow.pyfunc.load_model(f"runs:/{current_run_id}/model") + + # test 1 : the parameters in the signature should have the runner with a default "SequentialRunner" + assert ( + '{"name": "runner", "type": "string", "default": "SequentialRunner", "shape": null}' + in trained_model.metadata.signature.to_dict()["params"] + ) + + inference_data = pd.DataFrame(data=[0.2, 0.6, 0.9], columns=["a"]) + + # raise an error with a non existing runner + with pytest.raises( + AttributeError, + match="module 'kedro.runner' has no attribute 'non_existing_runner'", + ): + trained_model.predict( + inference_data, params={"runner": "non_existing_runner"} + ) + + # second test : run with another runner (i should test that it is indeed the other one which is picked) + # the log clearly shows it + assert all( + trained_model.predict(inference_data, params={"runner": "ThreadRunner"}) + == pd.DataFrame([0, 1, 1]).values # no param = 0.5, the default + ) From b3c8f074556b9a5242e94c77f002538819b8ddcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yolan=20Honor=C3=A9-Roug=C3=A9?= Date: Sun, 1 Dec 2024 22:34:24 +0100 Subject: [PATCH 09/11] pin mlflow>=2.7 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 732f9f72..dbaffa5e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ kedro>=0.19.0, <0.20.0 kedro_datasets -mlflow>=1.29.0, <3.0.0 +mlflow>=2.7.0, <3.0.0 pydantic>=1.0.0, <3.0.0 From b052dff9ec8a4e3e9c2b3ce6a3d77d9f5aa892d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yolan=20Honor=C3=A9-Roug=C3=A9?= Date: Sun, 1 Dec 2024 22:34:42 +0100 Subject: [PATCH 10/11] up changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c8144230..2e308da5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ ## [Unreleased] +- :sparkles: Enable to override parameters and the runner at predict time in ``KedroPipelineModel`` ([#445](https://github.com/Galileo-Galilei/kedro-mlflow/issues/445), [#612](https://github.com/Galileo-Galilei/kedro-mlflow/pull/612)) + +### Changed + +- :boom: :pushpin: Pin ``mlflow>=2.7.0`` to support predict parameters for custom models (see above feature) + ## [0.13.3] - 2024-10-29 ### Added From b81a0826630d084b6f792934531ceb42f19ca423 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yolan=20Honor=C3=A9-Roug=C3=A9?= Date: Sun, 1 Dec 2024 22:35:13 +0100 Subject: [PATCH 11/11] typo --- tests/framework/hooks/test_hook_pipeline_ml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/framework/hooks/test_hook_pipeline_ml.py b/tests/framework/hooks/test_hook_pipeline_ml.py index 394921d4..2173d8d1 100644 --- a/tests/framework/hooks/test_hook_pipeline_ml.py +++ b/tests/framework/hooks/test_hook_pipeline_ml.py @@ -838,7 +838,7 @@ def test_mlflow_hook_save_and_load_pipeline_ml_specify_runner( inference_data, params={"runner": "non_existing_runner"} ) - # second test : run with another runner (i should test that it is indeed the other one which is picked) + # second test : run with another runner (iI should test that it is indeed the other one which is picked) # the log clearly shows it assert all( trained_model.predict(inference_data, params={"runner": "ThreadRunner"})