From 4ed3149806bc84f31354872ebca60a8a0c2652d0 Mon Sep 17 00:00:00 2001 From: Xin Yang <105740670+xyang16@users.noreply.github.com> Date: Sat, 16 Nov 2024 08:38:17 -0800 Subject: [PATCH] [converter] Trim jit output token_str (#3527) --- .../src/main/python/djl_converter/fill_mask_converter.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/extensions/tokenizers/src/main/python/djl_converter/fill_mask_converter.py b/extensions/tokenizers/src/main/python/djl_converter/fill_mask_converter.py index 5c225293f41..a36be6fd07d 100644 --- a/extensions/tokenizers/src/main/python/djl_converter/fill_mask_converter.py +++ b/extensions/tokenizers/src/main/python/djl_converter/fill_mask_converter.py @@ -46,7 +46,9 @@ def verify_jit_output(self, hf_pipeline, encoding, out): text = text.replace("[MASK]", tokenizer.mask_token) pipeline_output = hf_pipeline(text) - if prediction not in [o["token_str"] for o in pipeline_output]: + if prediction not in [ + o["token_str"].strip() for o in pipeline_output + ]: logging.error(f"Unexpected inference result: {prediction}") return False, "Unexpected inference result"