Skip to content

Commit

Permalink
Fix-3
Browse files Browse the repository at this point in the history
Signed-off-by: amitraj <[email protected]>
  • Loading branch information
quic-amitraj committed Dec 9, 2024
1 parent e2b5306 commit a2c79e3
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 21 deletions.
34 changes: 19 additions & 15 deletions QEfficient/transformers/models/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,21 +66,6 @@ def model_name(self) -> str:
mname = mname[4:]
return mname

@property
def model_hash(self) -> str:
# NOTE: model_config.to_diff_dict() has "_name_or_path" attribute which is the model card name or path.
# Using same card name will result in same hash. But, using a relative path for one run and
# absolute path for another run will result in different hash.
# The added complexity to resolve different paths to same location is not worth pursuing.
# Instead, advise the user to always provide same relative paths or absolute paths for local models.

# Compute the hash with: model_config, transforms
mhash = hashlib.sha256()
mhash.update(to_hashable(self.model.config.to_diff_dict()))
mhash.update(to_hashable(self._transform_names()))
mhash = mhash.hexdigest()[:16]
return mhash


class QEFFAutoModelForCausalLM(QEFFTransformersBase):
"""
Expand All @@ -107,6 +92,10 @@ class QEFFAutoModelForCausalLM(QEFFTransformersBase):
_onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]

def __init__(self, model: nn.Module, continuous_batching: bool = False, **kwargs):
model_class_name = model.__class__.__name__
if not (model_class_name.endswith("ForCausalLM") or model_class_name.endswith("LMHeadModel")):
raise TypeError(f"Required pytorch module for CausalLM or LMHeadModel, got {model_class_name}")

if kwargs.pop("full_batch_size", None):
continuous_batching = True
warnings.warn(
Expand Down Expand Up @@ -382,6 +371,21 @@ def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):

return self

@property
def model_hash(self) -> str:
# NOTE: model_config.to_diff_dict() has "_name_or_path" attribute which is the model card name or path.
# Using same card name will result in same hash. But, using a relative path for one run and
# absolute path for another run will result in different hash.
# The added complexity to resolve different paths to same location is not worth pursuing.
# Instead, advise the user to always provide same relative paths or absolute paths for local models.

# Compute the hash with: model_config, transforms
mhash = hashlib.sha256()
mhash.update(to_hashable(self.model.config.to_diff_dict()))
mhash.update(to_hashable(self._transform_names()))
mhash = mhash.hexdigest()[:16]
return mhash

def export(self, export_dir: Optional[str] = None) -> str:
"""
Exports the model to ``ONNX`` format using ``torch.onnx.export``.
Expand Down
2 changes: 1 addition & 1 deletion QEfficient/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def get_models_dir():
ONNX_EXPORT_EXAMPLE_BATCH_SIZE = 1
ONNX_EXPORT_EXAMPLE_SEQ_LEN = 32
ONNX_EXPORT_EXAMPLE_FBS = 4
ONNX_EXPORT_OPSET = 13
ONNX_EXPORT_OPSET = 14

COMPILER = ["/opt/qti-aic/exec/qaic-exec", "-aic-hw", "-aic-hw-version=2.0"]

Expand Down
23 changes: 18 additions & 5 deletions tests/transformers/models/test_causal_lm_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,21 @@ def check_embed_pytorch_vs_ort_vs_ai100(
# Try to initialize with add_pooling_layer parameter
try:
qeff_model = QEffAutoModel.from_pretrained(
pretrained_model_name_or_path=model_path, add_pooling_layer=False, num_hidden_layers=n_layer
pretrained_model_name_or_path=model_path,
add_pooling_layer=False,
num_hidden_layers=n_layer,
attn_implementation="eager",
trust_remote_code=True,
)
except TypeError:
# If it fails, initialize without the parameter
qeff_model = QEffAutoModel.from_pretrained(pretrained_model_name_or_path=model_path, num_hidden_layers=n_layer)
qeff_model = QEffAutoModel.from_pretrained(
pretrained_model_name_or_path=model_path,
num_hidden_layers=n_layer,
attn_implementation="eager",
trust_remote_code=True,
)

text = "My name is"
tokenizer = AutoTokenizer.from_pretrained(model_name)
inputs = tokenizer(text, return_tensors="pt", padding="max_length", max_length=seq_len)
Expand All @@ -206,7 +216,7 @@ def check_embed_pytorch_vs_ort_vs_ai100(
onnx_embeddings = onnx_outputs[0]
mad = np.mean(np.abs(pt_embeddings - onnx_embeddings))
print("Mad for onnx and pytorch is ", mad)
assert mad <= 10**-6, f"MAD is too high for onnx and Pytorch: {mad}"
assert mad <= 10**-3, f"MAD is too high for onnx and Pytorch: {mad}"

qeff_model.compile(
num_cores=14,
Expand Down Expand Up @@ -277,17 +287,20 @@ def test_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100_pl1():


embed_test_models = [
"intfloat/e5-mistral-7b-instruct", # MistralModel
# model_name, architecture
"nomic-ai/nomic-embed-text-v1.5", # NomicBertModel
"sentence-transformers/multi-qa-mpnet-base-cos-v1", # MPNetForMaskedLM
"BAAI/bge-reranker-v2-m3", # XLMRobertaForSequenceClassification
"BAAI/bge-small-en-v1.5", # BertModel
# "intfloat/e5-mistral-7b-instruct", # MistralModel
# "dunzhang/stella_en_1.5B_v5", # Qwen2ForCausalLM
]


@pytest.mark.on_qaic
@pytest.mark.parametrize("model_name", embed_test_models)
def test_embed_model_pytorch_vs_onnx_vs_ai100(model_name):
"""
Test function to validate the Pytorch model, ONNX model and
Test function to validate output of the Pytorch, ONNX and AI 100 runtime model output.
"""
check_embed_pytorch_vs_ort_vs_ai100(model_name=model_name, seq_len=32, n_layer=1)

0 comments on commit a2c79e3

Please sign in to comment.