diff --git a/extensions/tokenizers/src/main/python/djl_converter/huggingface_converter.py b/extensions/tokenizers/src/main/python/djl_converter/huggingface_converter.py index e181040c125..cde63d373ce 100644 --- a/extensions/tokenizers/src/main/python/djl_converter/huggingface_converter.py +++ b/extensions/tokenizers/src/main/python/djl_converter/huggingface_converter.py @@ -161,7 +161,8 @@ def save_rust_model(self, model_info, args: Namespace, temp_dir: str, if not os.path.exists(temp_dir): os.makedirs(temp_dir) - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained( + model_id, trust_remote_code=args.trust_remote_code) include_types = config.model_type not in [ "distilbert", "mistral", "qwen2", "gemma2" ] @@ -233,7 +234,7 @@ def save_pytorch_model(self, model_info, args: Namespace, temp_dir: str, os.makedirs(temp_dir) try: - hf_pipeline = self.load_model(model_id) + hf_pipeline = self.load_model(model_id, args.trust_remote_code) except Exception as e: logging.warning(f"Failed to load model: {model_id}.") logging.warning(e, exc_info=True) @@ -436,11 +437,12 @@ def verify_jit_output(self, hf_pipeline, encoding, out): return True, None - def load_model(self, model_id: str): + def load_model(self, model_id: str, trust_remote_code: bool): logging.info(f"Loading model: {model_id} ...") kwargs = { "tokenizer": model_id, - "device": -1 # always use CPU to trace the model + "device": -1, # always use CPU to trace the model + "trust_remote_code": trust_remote_code } return pipeline(task=self.task, model=model_id, diff --git a/extensions/tokenizers/src/main/python/djl_converter/sentence_similarity_converter.py b/extensions/tokenizers/src/main/python/djl_converter/sentence_similarity_converter.py index b0ac589d67c..f46925aeaea 100644 --- a/extensions/tokenizers/src/main/python/djl_converter/sentence_similarity_converter.py +++ b/extensions/tokenizers/src/main/python/djl_converter/sentence_similarity_converter.py @@ -32,10 +32,13 @@ def __init__(self): self.inputs = "This is an example sentence" self.outputs = 0 - def load_model(self, model_id: str): + def load_model(self, model_id: str, trust_remote_code: bool): logging.info(f"Loading model: {model_id} ...") - tokenizer = AutoTokenizer.from_pretrained(model_id) - model = AutoModel.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained( + model_id, trust_remote_code=trust_remote_code) + + model = AutoModel.from_pretrained(model_id, + trust_remote_code=trust_remote_code) return PipelineHolder(tokenizer, model) @@ -78,7 +81,8 @@ def get_extra_arguments(self, hf_pipeline, model_id: str, if hasattr(hf_pipeline.model, "config"): config = hf_pipeline.model.config else: - config = AutoConfig.from_pretrained(model_id) + config = AutoConfig.from_pretrained( + model_id, trust_remote_code=trust_remote_code) tokenizer = hf_pipeline.tokenizer if hasattr(config, "max_position_embeddings") and hasattr( tokenizer, "model_max_length"):