From 577980187e4009090dc0627ca4f6f11d6a294db7 Mon Sep 17 00:00:00 2001
From: Pedro Gengo <pedro.gabriel.lourenco@hotmail.com>
Date: Mon, 27 Nov 2023 10:06:41 -0300
Subject: [PATCH 1/5] Defining argments names to avoid issues with positional
 args

---
 src/setfit/exporters/onnx.py |  2 +-
 tests/exporters/test_onnx.py | 76 +++++++++++++++++++++++++++++++++++-
 2 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/src/setfit/exporters/onnx.py b/src/setfit/exporters/onnx.py
index cd05c464..a6438134 100644
--- a/src/setfit/exporters/onnx.py
+++ b/src/setfit/exporters/onnx.py
@@ -47,7 +47,7 @@ def __init__(
         self.model_head = model_head
 
     def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_type_ids: torch.Tensor):
-        hidden_states = self.model_body(input_ids, attention_mask, token_type_ids)
+        hidden_states = self.model_body(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
         hidden_states = {"token_embeddings": hidden_states[0], "attention_mask": attention_mask}
 
         embeddings = self.pooler(hidden_states)
diff --git a/tests/exporters/test_onnx.py b/tests/exporters/test_onnx.py
index 6c132d43..366efad0 100644
--- a/tests/exporters/test_onnx.py
+++ b/tests/exporters/test_onnx.py
@@ -63,7 +63,7 @@ def test_export_onnx_sklearn_head(model_path, input_text):
 
 @pytest.mark.skip("ONNX exporting of SetFit model with Torch head not yet supported.")
 @pytest.mark.parametrize("out_features", [1, 2, 3])
-def test_export_onnx_torch_head(out_features):
+def test_export_onnx_torch_head_model_accepts_token_type_ids(out_features):
     """Test that the exported `ONNX` model returns the same predictions as the original model."""
     dataset = get_templated_dataset(reference_dataset="SetFit/SentEval-CR")
     model_path = "sentence-transformers/paraphrase-albert-small-v2"
@@ -119,10 +119,82 @@ def test_export_onnx_torch_head(out_features):
         session = onnxruntime.InferenceSession(output_path)
 
         onnx_preds = session.run(None, dict(inputs))[0]
+        onnx_preds = onnx_preds / (1 + 1e-5)
+        onnx_preds_soft = np.exp(onnx_preds)/sum(np.exp(onnx_preds))
+        onnx_preds_argmax = np.argmax(onnx_preds_soft, axis=0)
 
         # Compare the results and ensure that we get the same predictions.
-        assert np.array_equal(onnx_preds, pytorch_preds)
+        assert np.array_equal(onnx_preds_argmax, pytorch_preds)
 
     finally:
         # Cleanup the model.
         os.remove(output_path)
+
+
+@pytest.mark.parametrize("out_features", [1, 2, 3])
+def test_export_onnx_torch_head_model_not_accepts_token_type_ids(out_features):
+    """Test that the exported `ONNX` model returns the same predictions as the original model."""
+    dataset = get_templated_dataset(reference_dataset="SetFit/SentEval-CR")
+    model_path = "sentence-transformers/paraphrase-mpnet-base-v2"
+    model = SetFitModel.from_pretrained(
+        model_path, use_differentiable_head=True, head_params={"out_features": out_features}
+    )
+
+    trainer = SetFitTrainer(
+        model=model,
+        train_dataset=dataset,
+        eval_dataset=dataset,
+        num_iterations=15,
+        column_mapping={"text": "text", "label": "label"},
+    )
+    # Train and evaluate
+    trainer.freeze()  # Freeze the head
+    trainer.train()  # Train only the body
+    # Unfreeze the head and unfreeze the body -> end-to-end training
+    trainer.unfreeze(keep_body_frozen=False)
+    trainer.train(
+        num_epochs=15,
+        batch_size=16,
+        body_learning_rate=1e-5,
+        learning_rate=1e-2,
+        l2_weight=0.0,
+    )
+
+    # Export the sklearn based model
+    output_path = "model.onnx"
+    try:
+        export_onnx(model.model_body, model.model_head, opset=12, output_path=output_path)
+
+        # Check that the model was saved.
+        assert output_path in os.listdir(), "Model not saved to output_path"
+
+        # Run inference using the original model.
+        input_text = ["i loved the spiderman movie!", "pineapple on pizza is the worst 🤮"]
+        pytorch_preds = model(input_text)
+
+        # Run inference using the exported onnx model.
+        tokenizer = AutoTokenizer.from_pretrained(model_path)
+        inputs = tokenizer(
+            input_text,
+            padding=True,
+            truncation=True,
+            return_attention_mask=True,
+            return_token_type_ids=True,
+            return_tensors="np",
+        )
+        # Map inputs to int64 from int32
+        inputs = {key: value.astype("int64") for key, value in inputs.items()}
+
+        session = onnxruntime.InferenceSession(output_path)
+
+        onnx_preds = session.run(None, dict(inputs))[0]
+        onnx_preds = onnx_preds / (1 + 1e-5)
+        onnx_preds_soft = np.exp(onnx_preds)/sum(np.exp(onnx_preds))
+        onnx_preds_argmax = np.argmax(onnx_preds_soft, axis=0)
+
+        # Compare the results and ensure that we get the same predictions.
+        assert np.array_equal(onnx_preds_argmax, pytorch_preds)
+
+    finally:
+        # Cleanup the model.
+        os.remove(output_path)
\ No newline at end of file

From e9ebce5a3382688ba2447644b23ea5762fe3e3ce Mon Sep 17 00:00:00 2001
From: Pedro Gengo <pedro.gabriel.lourenco@hotmail.com>
Date: Mon, 27 Nov 2023 13:34:03 -0300
Subject: [PATCH 2/5] Fix tests for models that dont use toke_type_ids

---
 src/setfit/exporters/onnx.py | 10 +++++++++-
 tests/exporters/test_onnx.py | 22 +++++++++++++---------
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/setfit/exporters/onnx.py b/src/setfit/exporters/onnx.py
index a6438134..dd458809 100644
--- a/src/setfit/exporters/onnx.py
+++ b/src/setfit/exporters/onnx.py
@@ -47,7 +47,15 @@ def __init__(
         self.model_head = model_head
 
     def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_type_ids: torch.Tensor):
-        hidden_states = self.model_body(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
+        # inputs = {
+        #     "input_ids": input_ids,
+        #     "attention_mask": attention_mask,
+        #     "token_type_ids": token_type_ids
+        # }
+
+        hidden_states = self.model_body(
+            input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids
+        )
         hidden_states = {"token_embeddings": hidden_states[0], "attention_mask": attention_mask}
 
         embeddings = self.pooler(hidden_states)
diff --git a/tests/exporters/test_onnx.py b/tests/exporters/test_onnx.py
index 366efad0..f5636bc9 100644
--- a/tests/exporters/test_onnx.py
+++ b/tests/exporters/test_onnx.py
@@ -43,7 +43,7 @@ def test_export_onnx_sklearn_head(model_path, input_text):
             padding=True,
             truncation=True,
             return_attention_mask=True,
-            return_token_type_ids=True,
+            return_token_type_ids=False,
             return_tensors="np",
         )
         # Map inputs to int64 from int32
@@ -61,7 +61,6 @@ def test_export_onnx_sklearn_head(model_path, input_text):
         os.remove(output_path)
 
 
-@pytest.mark.skip("ONNX exporting of SetFit model with Torch head not yet supported.")
 @pytest.mark.parametrize("out_features", [1, 2, 3])
 def test_export_onnx_torch_head_model_accepts_token_type_ids(out_features):
     """Test that the exported `ONNX` model returns the same predictions as the original model."""
@@ -120,8 +119,8 @@ def test_export_onnx_torch_head_model_accepts_token_type_ids(out_features):
 
         onnx_preds = session.run(None, dict(inputs))[0]
         onnx_preds = onnx_preds / (1 + 1e-5)
-        onnx_preds_soft = np.exp(onnx_preds)/sum(np.exp(onnx_preds))
-        onnx_preds_argmax = np.argmax(onnx_preds_soft, axis=0)
+        onnx_preds_soft = np.exp(onnx_preds) / sum(np.exp(onnx_preds))
+        onnx_preds_argmax = np.argmax(onnx_preds_soft, axis=1)
 
         # Compare the results and ensure that we get the same predictions.
         assert np.array_equal(onnx_preds_argmax, pytorch_preds)
@@ -153,7 +152,7 @@ def test_export_onnx_torch_head_model_not_accepts_token_type_ids(out_features):
     # Unfreeze the head and unfreeze the body -> end-to-end training
     trainer.unfreeze(keep_body_frozen=False)
     trainer.train(
-        num_epochs=15,
+        num_epochs=2,
         batch_size=16,
         body_learning_rate=1e-5,
         learning_rate=1e-2,
@@ -179,22 +178,27 @@ def test_export_onnx_torch_head_model_not_accepts_token_type_ids(out_features):
             padding=True,
             truncation=True,
             return_attention_mask=True,
-            return_token_type_ids=True,
+            return_token_type_ids=False,
             return_tensors="np",
         )
         # Map inputs to int64 from int32
         inputs = {key: value.astype("int64") for key, value in inputs.items()}
 
+        import onnx
+
+        model = onnx.load(output_path)
+        print([input.name for input in model.graph.input])
+
         session = onnxruntime.InferenceSession(output_path)
 
         onnx_preds = session.run(None, dict(inputs))[0]
         onnx_preds = onnx_preds / (1 + 1e-5)
-        onnx_preds_soft = np.exp(onnx_preds)/sum(np.exp(onnx_preds))
-        onnx_preds_argmax = np.argmax(onnx_preds_soft, axis=0)
+        onnx_preds_soft = np.exp(onnx_preds) / sum(np.exp(onnx_preds))
+        onnx_preds_argmax = np.argmax(onnx_preds_soft, axis=1)
 
         # Compare the results and ensure that we get the same predictions.
         assert np.array_equal(onnx_preds_argmax, pytorch_preds)
 
     finally:
         # Cleanup the model.
-        os.remove(output_path)
\ No newline at end of file
+        os.remove(output_path)

From fed239d78c666c921b7d3193a2e99f869f1a49e6 Mon Sep 17 00:00:00 2001
From: Pedro Gengo <pedro.gabriel.lourenco@hotmail.com>
Date: Mon, 27 Nov 2023 16:26:15 -0300
Subject: [PATCH 3/5] Added message if the model does not use token_type_ids

---
 src/setfit/exporters/onnx.py | 16 +++++++---------
 tests/exporters/test_onnx.py | 12 +++---------
 2 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/src/setfit/exporters/onnx.py b/src/setfit/exporters/onnx.py
index dd458809..ae8e56c6 100644
--- a/src/setfit/exporters/onnx.py
+++ b/src/setfit/exporters/onnx.py
@@ -47,15 +47,10 @@ def __init__(
         self.model_head = model_head
 
     def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_type_ids: torch.Tensor):
-        # inputs = {
-        #     "input_ids": input_ids,
-        #     "attention_mask": attention_mask,
-        #     "token_type_ids": token_type_ids
-        # }
-
-        hidden_states = self.model_body(
-            input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids
-        )
+        inputs = {"input_ids": input_ids, "attention_mask": attention_mask, "token_type_ids": token_type_ids}
+
+        hidden_states = self.model_body(**inputs)
+
         hidden_states = {"token_embeddings": hidden_states[0], "attention_mask": attention_mask}
 
         embeddings = self.pooler(hidden_states)
@@ -68,6 +63,7 @@ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_t
         # If head is set then we have a fully torch based model and make the final predictions
         # with the head.
         out = self.model_head(embeddings)
+
         return out
 
 
@@ -216,6 +212,8 @@ def export_onnx(
 
     # Load the model and get all of the parts.
     model_body_module = model_body._modules["0"]
+    if "token_type_embeddings" not in model_body._modules["0"].auto_model._modules["embeddings"]._modules:
+        print("No token_type_embeddings found in model. The input to the model will not have token_type_ids.")
     model_pooler = model_body._modules["1"]
     tokenizer = model_body_module.tokenizer
     max_length = model_body_module.max_seq_length
diff --git a/tests/exporters/test_onnx.py b/tests/exporters/test_onnx.py
index f5636bc9..98634361 100644
--- a/tests/exporters/test_onnx.py
+++ b/tests/exporters/test_onnx.py
@@ -83,7 +83,7 @@ def test_export_onnx_torch_head_model_accepts_token_type_ids(out_features):
     # Unfreeze the head and unfreeze the body -> end-to-end training
     trainer.unfreeze(keep_body_frozen=False)
     trainer.train(
-        num_epochs=15,
+        num_epochs=20,
         batch_size=16,
         body_learning_rate=1e-5,
         learning_rate=1e-2,
@@ -121,7 +121,6 @@ def test_export_onnx_torch_head_model_accepts_token_type_ids(out_features):
         onnx_preds = onnx_preds / (1 + 1e-5)
         onnx_preds_soft = np.exp(onnx_preds) / sum(np.exp(onnx_preds))
         onnx_preds_argmax = np.argmax(onnx_preds_soft, axis=1)
-
         # Compare the results and ensure that we get the same predictions.
         assert np.array_equal(onnx_preds_argmax, pytorch_preds)
 
@@ -130,7 +129,7 @@ def test_export_onnx_torch_head_model_accepts_token_type_ids(out_features):
         os.remove(output_path)
 
 
-@pytest.mark.parametrize("out_features", [1, 2, 3])
+@pytest.mark.parametrize("out_features", [3])
 def test_export_onnx_torch_head_model_not_accepts_token_type_ids(out_features):
     """Test that the exported `ONNX` model returns the same predictions as the original model."""
     dataset = get_templated_dataset(reference_dataset="SetFit/SentEval-CR")
@@ -152,7 +151,7 @@ def test_export_onnx_torch_head_model_not_accepts_token_type_ids(out_features):
     # Unfreeze the head and unfreeze the body -> end-to-end training
     trainer.unfreeze(keep_body_frozen=False)
     trainer.train(
-        num_epochs=2,
+        num_epochs=15,
         batch_size=16,
         body_learning_rate=1e-5,
         learning_rate=1e-2,
@@ -184,11 +183,6 @@ def test_export_onnx_torch_head_model_not_accepts_token_type_ids(out_features):
         # Map inputs to int64 from int32
         inputs = {key: value.astype("int64") for key, value in inputs.items()}
 
-        import onnx
-
-        model = onnx.load(output_path)
-        print([input.name for input in model.graph.input])
-
         session = onnxruntime.InferenceSession(output_path)
 
         onnx_preds = session.run(None, dict(inputs))[0]

From 1d59130c72d9860760cc77281a8eb4c0d6f6f2cd Mon Sep 17 00:00:00 2001
From: Pedro Gengo <pedro.gabriel.lourenco@hotmail.com>
Date: Tue, 28 Nov 2023 09:38:25 -0300
Subject: [PATCH 4/5] Forced token_type_ids to appear on onnx graph

---
 src/setfit/exporters/onnx.py | 11 ++++++++---
 tests/exporters/test_onnx.py |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/setfit/exporters/onnx.py b/src/setfit/exporters/onnx.py
index ae8e56c6..1156cb10 100644
--- a/src/setfit/exporters/onnx.py
+++ b/src/setfit/exporters/onnx.py
@@ -47,7 +47,11 @@ def __init__(
         self.model_head = model_head
 
     def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_type_ids: torch.Tensor):
-        inputs = {"input_ids": input_ids, "attention_mask": attention_mask, "token_type_ids": token_type_ids}
+        inputs = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+            "token_type_ids": token_type_ids,
+        }
 
         hidden_states = self.model_body(**inputs)
 
@@ -55,6 +59,9 @@ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_t
 
         embeddings = self.pooler(hidden_states)
 
+        # Just to enforce that the token_type_ids will be included in the ONNX graph.
+        embeddings = embeddings + 0 * token_type_ids.sum()
+
         # If the model_head is none we are using a sklearn head and only output
         # the embeddings from the setfit model
         if self.model_head is None:
@@ -212,8 +219,6 @@ def export_onnx(
 
     # Load the model and get all of the parts.
     model_body_module = model_body._modules["0"]
-    if "token_type_embeddings" not in model_body._modules["0"].auto_model._modules["embeddings"]._modules:
-        print("No token_type_embeddings found in model. The input to the model will not have token_type_ids.")
     model_pooler = model_body._modules["1"]
     tokenizer = model_body_module.tokenizer
     max_length = model_body_module.max_seq_length
diff --git a/tests/exporters/test_onnx.py b/tests/exporters/test_onnx.py
index 98634361..dc96315f 100644
--- a/tests/exporters/test_onnx.py
+++ b/tests/exporters/test_onnx.py
@@ -177,7 +177,7 @@ def test_export_onnx_torch_head_model_not_accepts_token_type_ids(out_features):
             padding=True,
             truncation=True,
             return_attention_mask=True,
-            return_token_type_ids=False,
+            return_token_type_ids=True,
             return_tensors="np",
         )
         # Map inputs to int64 from int32

From c82a4a97c19aa512f88fb17ba0da423b1096ec3d Mon Sep 17 00:00:00 2001
From: Pedro Gengo <pedro.gabriel.lourenco@hotmail.com>
Date: Tue, 28 Nov 2023 09:44:31 -0300
Subject: [PATCH 5/5] Revert change on test onnx

---
 tests/exporters/test_onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/exporters/test_onnx.py b/tests/exporters/test_onnx.py
index dc96315f..27f843ab 100644
--- a/tests/exporters/test_onnx.py
+++ b/tests/exporters/test_onnx.py
@@ -43,7 +43,7 @@ def test_export_onnx_sklearn_head(model_path, input_text):
             padding=True,
             truncation=True,
             return_attention_mask=True,
-            return_token_type_ids=False,
+            return_token_type_ids=True,
             return_tensors="np",
         )
         # Map inputs to int64 from int32