Skip to content

Commit

Permalink
Enable arg trust_remote_code to use custom tokenizers (#16)
Browse files Browse the repository at this point in the history
* Enable arg trust_remote_code to use custom tokenizers

Allow for custom models defined on the Hub in their own modeling files.

Signed-off-by: vbaddi <[email protected]>

* fix the tokenizer in export_hf_to_cloud_ai_100

Signed-off-by: vbaddi <[email protected]>

---------

Signed-off-by: vbaddi <[email protected]>
  • Loading branch information
vbaddi authored May 21, 2024
1 parent 60b880b commit 9358f35
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 9 deletions.
4 changes: 3 additions & 1 deletion QEfficient/cloud/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ def main(
login(hf_token)
# Download tokenizer along with model if it doesn't exist
model_hf_path = hf_download(repo_id=model_name, cache_dir=cache_dir, allow_patterns=["*.json"])
tokenizer = AutoTokenizer.from_pretrained(model_hf_path, use_cache=True, padding_side="left")
tokenizer = AutoTokenizer.from_pretrained(
model_hf_path, use_cache=True, padding_side="left", trust_remote_code=True
)

cloud_ai_100_exec_kv(tokenizer=tokenizer, qpc=qpc_path, device_id=devices, prompt=prompt)

Expand Down
10 changes: 6 additions & 4 deletions QEfficient/cloud/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,13 @@

import QEfficient
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
from QEfficient.utils.constants import Constants, QEFF_MODELS_DIR
from QEfficient.utils import hf_download
from QEfficient.utils.constants import Constants

# Specifically for Docker images.
ROOT_DIR = os.path.dirname(os.path.abspath(""))



def main(model_name: str, cache_dir: str) -> None:
"""
Api() for exporting to Onnx Model.
Expand All @@ -28,7 +27,9 @@ def main(model_name: str, cache_dir: str) -> None:
:cache_dir: str. Cache dir to store the downloaded huggingface files.
"""
model_hf_path = hf_download(repo_id=model_name, hf_token=None, cache_dir=cache_dir)
tokenizer = AutoTokenizer.from_pretrained(model_hf_path, use_cache=True, padding_side="left")
tokenizer = AutoTokenizer.from_pretrained(
model_hf_path, use_cache=True, padding_side="left", trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(model_hf_path, use_cache=True)

# Easy and minimal api to update the model to QEff.
Expand All @@ -52,7 +53,8 @@ def main(model_name: str, cache_dir: str) -> None:
parser = argparse.ArgumentParser(description="Export script.")
parser.add_argument("--model_name", "--model-name", required=True, help="HF Model card name/id")
parser.add_argument(
"--cache_dir", "--cache-dir",
"--cache_dir",
"--cache-dir",
required=False,
default=Constants.CACHE_DIR,
help="Cache_dir to store the HF files",
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/cloud/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ def main(
cache_dir=cache_dir,
ignore_patterns=["*.txt", "*.onnx", "*.ot", "*.md", "*.tflite", "*.pdf"],
)
tokenizer = AutoTokenizer.from_pretrained(model_hf_path, use_cache=True, padding_side="left")
tokenizer = AutoTokenizer.from_pretrained(
model_hf_path, use_cache=True, padding_side="left", trust_remote_code=True
)

if qpc_exists(qpc_dir_path):
# execute
Expand Down
6 changes: 3 additions & 3 deletions QEfficient/exporter/export_hf_to_cloud_ai_100.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ def convert_to_cloud_bertstyle(

# Load tokenizer
if tokenizer is None:
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", trust_remote_code=True)
else:
if tokenizer.padding_side != "left":
logger.warning(f"Please use padding_side='left' while initializing the tokenizer")
logger.warning("Please use padding_side='left' while initializing the tokenizer")
tokenizer.padding_side = "left"

if tokenizer.pad_token_id is None:
Expand Down Expand Up @@ -263,7 +263,7 @@ def convert_to_cloud_kvstyle(
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
else:
if tokenizer.padding_side != "left":
logger.warning(f"Please use padding_side='left' while initializing the tokenizer")
logger.warning("Please use padding_side='left' while initializing the tokenizer")
tokenizer.padding_side = "left"

if tokenizer.pad_token_id is None:
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_con
# Bertstyle models do not have any optimization w.r.t KV cache changes and are unoptimized version.
# It is recommended to use kv=True for better performance.

# For custom models defined on the Hub in their own modeling files. We need `trust_remote_code` option
# Should be set to `True` in `AutoTokenizer` for repositories you trust.
tokenizer = AutoTokenizer.from_pretrained(model_hf_path, use_cache=True, padding_side="left")
base_path, onnx_path = qualcomm_efficient_converter(
model_kv=model_transformed,
Expand Down

0 comments on commit 9358f35

Please sign in to comment.