From 577980187e4009090dc0627ca4f6f11d6a294db7 Mon Sep 17 00:00:00 2001 From: Pedro Gengo Date: Mon, 27 Nov 2023 10:06:41 -0300 Subject: [PATCH 1/5] Defining argments names to avoid issues with positional args --- src/setfit/exporters/onnx.py | 2 +- tests/exporters/test_onnx.py | 76 +++++++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/src/setfit/exporters/onnx.py b/src/setfit/exporters/onnx.py index cd05c464..a6438134 100644 --- a/src/setfit/exporters/onnx.py +++ b/src/setfit/exporters/onnx.py @@ -47,7 +47,7 @@ def __init__( self.model_head = model_head def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_type_ids: torch.Tensor): - hidden_states = self.model_body(input_ids, attention_mask, token_type_ids) + hidden_states = self.model_body(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) hidden_states = {"token_embeddings": hidden_states[0], "attention_mask": attention_mask} embeddings = self.pooler(hidden_states) diff --git a/tests/exporters/test_onnx.py b/tests/exporters/test_onnx.py index 6c132d43..366efad0 100644 --- a/tests/exporters/test_onnx.py +++ b/tests/exporters/test_onnx.py @@ -63,7 +63,7 @@ def test_export_onnx_sklearn_head(model_path, input_text): @pytest.mark.skip("ONNX exporting of SetFit model with Torch head not yet supported.") @pytest.mark.parametrize("out_features", [1, 2, 3]) -def test_export_onnx_torch_head(out_features): +def test_export_onnx_torch_head_model_accepts_token_type_ids(out_features): """Test that the exported `ONNX` model returns the same predictions as the original model.""" dataset = get_templated_dataset(reference_dataset="SetFit/SentEval-CR") model_path = "sentence-transformers/paraphrase-albert-small-v2" @@ -119,10 +119,82 @@ def test_export_onnx_torch_head(out_features): session = onnxruntime.InferenceSession(output_path) onnx_preds = session.run(None, dict(inputs))[0] + onnx_preds = onnx_preds / (1 + 1e-5) + onnx_preds_soft = np.exp(onnx_preds)/sum(np.exp(onnx_preds)) + onnx_preds_argmax = np.argmax(onnx_preds_soft, axis=0) # Compare the results and ensure that we get the same predictions. - assert np.array_equal(onnx_preds, pytorch_preds) + assert np.array_equal(onnx_preds_argmax, pytorch_preds) finally: # Cleanup the model. os.remove(output_path) + + +@pytest.mark.parametrize("out_features", [1, 2, 3]) +def test_export_onnx_torch_head_model_not_accepts_token_type_ids(out_features): + """Test that the exported `ONNX` model returns the same predictions as the original model.""" + dataset = get_templated_dataset(reference_dataset="SetFit/SentEval-CR") + model_path = "sentence-transformers/paraphrase-mpnet-base-v2" + model = SetFitModel.from_pretrained( + model_path, use_differentiable_head=True, head_params={"out_features": out_features} + ) + + trainer = SetFitTrainer( + model=model, + train_dataset=dataset, + eval_dataset=dataset, + num_iterations=15, + column_mapping={"text": "text", "label": "label"}, + ) + # Train and evaluate + trainer.freeze() # Freeze the head + trainer.train() # Train only the body + # Unfreeze the head and unfreeze the body -> end-to-end training + trainer.unfreeze(keep_body_frozen=False) + trainer.train( + num_epochs=15, + batch_size=16, + body_learning_rate=1e-5, + learning_rate=1e-2, + l2_weight=0.0, + ) + + # Export the sklearn based model + output_path = "model.onnx" + try: + export_onnx(model.model_body, model.model_head, opset=12, output_path=output_path) + + # Check that the model was saved. + assert output_path in os.listdir(), "Model not saved to output_path" + + # Run inference using the original model. + input_text = ["i loved the spiderman movie!", "pineapple on pizza is the worst 🤮"] + pytorch_preds = model(input_text) + + # Run inference using the exported onnx model. + tokenizer = AutoTokenizer.from_pretrained(model_path) + inputs = tokenizer( + input_text, + padding=True, + truncation=True, + return_attention_mask=True, + return_token_type_ids=True, + return_tensors="np", + ) + # Map inputs to int64 from int32 + inputs = {key: value.astype("int64") for key, value in inputs.items()} + + session = onnxruntime.InferenceSession(output_path) + + onnx_preds = session.run(None, dict(inputs))[0] + onnx_preds = onnx_preds / (1 + 1e-5) + onnx_preds_soft = np.exp(onnx_preds)/sum(np.exp(onnx_preds)) + onnx_preds_argmax = np.argmax(onnx_preds_soft, axis=0) + + # Compare the results and ensure that we get the same predictions. + assert np.array_equal(onnx_preds_argmax, pytorch_preds) + + finally: + # Cleanup the model. + os.remove(output_path) \ No newline at end of file From e9ebce5a3382688ba2447644b23ea5762fe3e3ce Mon Sep 17 00:00:00 2001 From: Pedro Gengo Date: Mon, 27 Nov 2023 13:34:03 -0300 Subject: [PATCH 2/5] Fix tests for models that dont use toke_type_ids --- src/setfit/exporters/onnx.py | 10 +++++++++- tests/exporters/test_onnx.py | 22 +++++++++++++--------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/setfit/exporters/onnx.py b/src/setfit/exporters/onnx.py index a6438134..dd458809 100644 --- a/src/setfit/exporters/onnx.py +++ b/src/setfit/exporters/onnx.py @@ -47,7 +47,15 @@ def __init__( self.model_head = model_head def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_type_ids: torch.Tensor): - hidden_states = self.model_body(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) + # inputs = { + # "input_ids": input_ids, + # "attention_mask": attention_mask, + # "token_type_ids": token_type_ids + # } + + hidden_states = self.model_body( + input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids + ) hidden_states = {"token_embeddings": hidden_states[0], "attention_mask": attention_mask} embeddings = self.pooler(hidden_states) diff --git a/tests/exporters/test_onnx.py b/tests/exporters/test_onnx.py index 366efad0..f5636bc9 100644 --- a/tests/exporters/test_onnx.py +++ b/tests/exporters/test_onnx.py @@ -43,7 +43,7 @@ def test_export_onnx_sklearn_head(model_path, input_text): padding=True, truncation=True, return_attention_mask=True, - return_token_type_ids=True, + return_token_type_ids=False, return_tensors="np", ) # Map inputs to int64 from int32 @@ -61,7 +61,6 @@ def test_export_onnx_sklearn_head(model_path, input_text): os.remove(output_path) -@pytest.mark.skip("ONNX exporting of SetFit model with Torch head not yet supported.") @pytest.mark.parametrize("out_features", [1, 2, 3]) def test_export_onnx_torch_head_model_accepts_token_type_ids(out_features): """Test that the exported `ONNX` model returns the same predictions as the original model.""" @@ -120,8 +119,8 @@ def test_export_onnx_torch_head_model_accepts_token_type_ids(out_features): onnx_preds = session.run(None, dict(inputs))[0] onnx_preds = onnx_preds / (1 + 1e-5) - onnx_preds_soft = np.exp(onnx_preds)/sum(np.exp(onnx_preds)) - onnx_preds_argmax = np.argmax(onnx_preds_soft, axis=0) + onnx_preds_soft = np.exp(onnx_preds) / sum(np.exp(onnx_preds)) + onnx_preds_argmax = np.argmax(onnx_preds_soft, axis=1) # Compare the results and ensure that we get the same predictions. assert np.array_equal(onnx_preds_argmax, pytorch_preds) @@ -153,7 +152,7 @@ def test_export_onnx_torch_head_model_not_accepts_token_type_ids(out_features): # Unfreeze the head and unfreeze the body -> end-to-end training trainer.unfreeze(keep_body_frozen=False) trainer.train( - num_epochs=15, + num_epochs=2, batch_size=16, body_learning_rate=1e-5, learning_rate=1e-2, @@ -179,22 +178,27 @@ def test_export_onnx_torch_head_model_not_accepts_token_type_ids(out_features): padding=True, truncation=True, return_attention_mask=True, - return_token_type_ids=True, + return_token_type_ids=False, return_tensors="np", ) # Map inputs to int64 from int32 inputs = {key: value.astype("int64") for key, value in inputs.items()} + import onnx + + model = onnx.load(output_path) + print([input.name for input in model.graph.input]) + session = onnxruntime.InferenceSession(output_path) onnx_preds = session.run(None, dict(inputs))[0] onnx_preds = onnx_preds / (1 + 1e-5) - onnx_preds_soft = np.exp(onnx_preds)/sum(np.exp(onnx_preds)) - onnx_preds_argmax = np.argmax(onnx_preds_soft, axis=0) + onnx_preds_soft = np.exp(onnx_preds) / sum(np.exp(onnx_preds)) + onnx_preds_argmax = np.argmax(onnx_preds_soft, axis=1) # Compare the results and ensure that we get the same predictions. assert np.array_equal(onnx_preds_argmax, pytorch_preds) finally: # Cleanup the model. - os.remove(output_path) \ No newline at end of file + os.remove(output_path) From fed239d78c666c921b7d3193a2e99f869f1a49e6 Mon Sep 17 00:00:00 2001 From: Pedro Gengo Date: Mon, 27 Nov 2023 16:26:15 -0300 Subject: [PATCH 3/5] Added message if the model does not use token_type_ids --- src/setfit/exporters/onnx.py | 16 +++++++--------- tests/exporters/test_onnx.py | 12 +++--------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/setfit/exporters/onnx.py b/src/setfit/exporters/onnx.py index dd458809..ae8e56c6 100644 --- a/src/setfit/exporters/onnx.py +++ b/src/setfit/exporters/onnx.py @@ -47,15 +47,10 @@ def __init__( self.model_head = model_head def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_type_ids: torch.Tensor): - # inputs = { - # "input_ids": input_ids, - # "attention_mask": attention_mask, - # "token_type_ids": token_type_ids - # } - - hidden_states = self.model_body( - input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids - ) + inputs = {"input_ids": input_ids, "attention_mask": attention_mask, "token_type_ids": token_type_ids} + + hidden_states = self.model_body(**inputs) + hidden_states = {"token_embeddings": hidden_states[0], "attention_mask": attention_mask} embeddings = self.pooler(hidden_states) @@ -68,6 +63,7 @@ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_t # If head is set then we have a fully torch based model and make the final predictions # with the head. out = self.model_head(embeddings) + return out @@ -216,6 +212,8 @@ def export_onnx( # Load the model and get all of the parts. model_body_module = model_body._modules["0"] + if "token_type_embeddings" not in model_body._modules["0"].auto_model._modules["embeddings"]._modules: + print("No token_type_embeddings found in model. The input to the model will not have token_type_ids.") model_pooler = model_body._modules["1"] tokenizer = model_body_module.tokenizer max_length = model_body_module.max_seq_length diff --git a/tests/exporters/test_onnx.py b/tests/exporters/test_onnx.py index f5636bc9..98634361 100644 --- a/tests/exporters/test_onnx.py +++ b/tests/exporters/test_onnx.py @@ -83,7 +83,7 @@ def test_export_onnx_torch_head_model_accepts_token_type_ids(out_features): # Unfreeze the head and unfreeze the body -> end-to-end training trainer.unfreeze(keep_body_frozen=False) trainer.train( - num_epochs=15, + num_epochs=20, batch_size=16, body_learning_rate=1e-5, learning_rate=1e-2, @@ -121,7 +121,6 @@ def test_export_onnx_torch_head_model_accepts_token_type_ids(out_features): onnx_preds = onnx_preds / (1 + 1e-5) onnx_preds_soft = np.exp(onnx_preds) / sum(np.exp(onnx_preds)) onnx_preds_argmax = np.argmax(onnx_preds_soft, axis=1) - # Compare the results and ensure that we get the same predictions. assert np.array_equal(onnx_preds_argmax, pytorch_preds) @@ -130,7 +129,7 @@ def test_export_onnx_torch_head_model_accepts_token_type_ids(out_features): os.remove(output_path) -@pytest.mark.parametrize("out_features", [1, 2, 3]) +@pytest.mark.parametrize("out_features", [3]) def test_export_onnx_torch_head_model_not_accepts_token_type_ids(out_features): """Test that the exported `ONNX` model returns the same predictions as the original model.""" dataset = get_templated_dataset(reference_dataset="SetFit/SentEval-CR") @@ -152,7 +151,7 @@ def test_export_onnx_torch_head_model_not_accepts_token_type_ids(out_features): # Unfreeze the head and unfreeze the body -> end-to-end training trainer.unfreeze(keep_body_frozen=False) trainer.train( - num_epochs=2, + num_epochs=15, batch_size=16, body_learning_rate=1e-5, learning_rate=1e-2, @@ -184,11 +183,6 @@ def test_export_onnx_torch_head_model_not_accepts_token_type_ids(out_features): # Map inputs to int64 from int32 inputs = {key: value.astype("int64") for key, value in inputs.items()} - import onnx - - model = onnx.load(output_path) - print([input.name for input in model.graph.input]) - session = onnxruntime.InferenceSession(output_path) onnx_preds = session.run(None, dict(inputs))[0] From 1d59130c72d9860760cc77281a8eb4c0d6f6f2cd Mon Sep 17 00:00:00 2001 From: Pedro Gengo Date: Tue, 28 Nov 2023 09:38:25 -0300 Subject: [PATCH 4/5] Forced token_type_ids to appear on onnx graph --- src/setfit/exporters/onnx.py | 11 ++++++++--- tests/exporters/test_onnx.py | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/setfit/exporters/onnx.py b/src/setfit/exporters/onnx.py index ae8e56c6..1156cb10 100644 --- a/src/setfit/exporters/onnx.py +++ b/src/setfit/exporters/onnx.py @@ -47,7 +47,11 @@ def __init__( self.model_head = model_head def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_type_ids: torch.Tensor): - inputs = {"input_ids": input_ids, "attention_mask": attention_mask, "token_type_ids": token_type_ids} + inputs = { + "input_ids": input_ids, + "attention_mask": attention_mask, + "token_type_ids": token_type_ids, + } hidden_states = self.model_body(**inputs) @@ -55,6 +59,9 @@ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_t embeddings = self.pooler(hidden_states) + # Just to enforce that the token_type_ids will be included in the ONNX graph. + embeddings = embeddings + 0 * token_type_ids.sum() + # If the model_head is none we are using a sklearn head and only output # the embeddings from the setfit model if self.model_head is None: @@ -212,8 +219,6 @@ def export_onnx( # Load the model and get all of the parts. model_body_module = model_body._modules["0"] - if "token_type_embeddings" not in model_body._modules["0"].auto_model._modules["embeddings"]._modules: - print("No token_type_embeddings found in model. The input to the model will not have token_type_ids.") model_pooler = model_body._modules["1"] tokenizer = model_body_module.tokenizer max_length = model_body_module.max_seq_length diff --git a/tests/exporters/test_onnx.py b/tests/exporters/test_onnx.py index 98634361..dc96315f 100644 --- a/tests/exporters/test_onnx.py +++ b/tests/exporters/test_onnx.py @@ -177,7 +177,7 @@ def test_export_onnx_torch_head_model_not_accepts_token_type_ids(out_features): padding=True, truncation=True, return_attention_mask=True, - return_token_type_ids=False, + return_token_type_ids=True, return_tensors="np", ) # Map inputs to int64 from int32 From c82a4a97c19aa512f88fb17ba0da423b1096ec3d Mon Sep 17 00:00:00 2001 From: Pedro Gengo Date: Tue, 28 Nov 2023 09:44:31 -0300 Subject: [PATCH 5/5] Revert change on test onnx --- tests/exporters/test_onnx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/exporters/test_onnx.py b/tests/exporters/test_onnx.py index dc96315f..27f843ab 100644 --- a/tests/exporters/test_onnx.py +++ b/tests/exporters/test_onnx.py @@ -43,7 +43,7 @@ def test_export_onnx_sklearn_head(model_path, input_text): padding=True, truncation=True, return_attention_mask=True, - return_token_type_ids=False, + return_token_type_ids=True, return_tensors="np", ) # Map inputs to int64 from int32