diff --git a/runtimes/huggingface/mlserver_huggingface/common.py b/runtimes/huggingface/mlserver_huggingface/common.py index 7cb8f76fb..71fb420ef 100644 --- a/runtimes/huggingface/mlserver_huggingface/common.py +++ b/runtimes/huggingface/mlserver_huggingface/common.py @@ -63,7 +63,7 @@ def load_pipeline_from_settings( # If max_batch_size > 0 we need to ensure tokens are padded if settings.max_batch_size: model = hf_pipeline.model - eos_token_id = model.config.eos_token_id + eos_token_id = model.config.eos_token_id # type: ignore hf_pipeline.tokenizer.pad_token_id = [str(eos_token_id)] # type: ignore return hf_pipeline