From e72b6058445dcaf94e9befd2b3ef6c5169d949df Mon Sep 17 00:00:00 2001
From: Droid <droid@factory.ai>
Date: Thu, 20 Jun 2024 16:51:57 +0000
Subject: [PATCH 1/5] Add Florence-2-Large model integration

- Created config.yaml for Florence-2-Large with specified dependencies, computational resources, and secrets management.
- Implemented model.py to load, preprocess, predict, and postprocess using Florence-2-Large.
- Added unit tests in test_model.py to verify model loading, inference, and output handling.
---
 florence-2-large/config.yaml         | 23 +++++++++++
 florence-2-large/model/model.py      | 60 ++++++++++++++++++++++++++++
 florence-2-large/tests/test_model.py | 53 ++++++++++++++++++++++++
 3 files changed, 136 insertions(+)
 create mode 100644 florence-2-large/config.yaml
 create mode 100644 florence-2-large/model/model.py
 create mode 100644 florence-2-large/tests/test_model.py
diff --git a/florence-2-large/config.yaml b/florence-2-large/config.yaml
new file mode 100644
index 00000000..75e9c9d6
--- /dev/null
+++ b/florence-2-large/config.yaml
@@ -0,0 +1,23 @@
+description: Deploy Florence-2 Large model for multi-task computer vision
+model_metadata:
+  model_name: Florence-2-Large
+  avatar_url: https://huggingface.co/microsoft/Florence-2-large/resolve/main/florence2_avatar.png
+  cover_image_url: https://huggingface.co/microsoft/Florence-2-large/resolve/main/florence2_cover_image.png
+  tags:
+    - vision
+    - multi-task
+python_version: py39
+requirements:
+  - transformers==4.30.0
+  - torch==2.0.1
+  - torchvision==0.15.2
+  - GitPython==3.1.31
+resources:
+  accelerator: A100
+  cpu: 4
+  memory: 16G
+  use_gpu: true
+secrets:
+  hf_access_token:
+    description: Access token for Hugging Face
+spec_version: 1
diff --git a/florence-2-large/model/model.py b/florence-2-large/model/model.py
new file mode 100644
index 00000000..46fe9542
--- /dev/null
+++ b/florence-2-large/model/model.py
@@ -0,0 +1,60 @@
+from typing import Dict, List
+
+import requests
+from PIL import Image
+from transformers import AutoModelForCausalLM, AutoProcessor
+
+
+class Model:
+    def __init__(self, **kwargs):
+        self.model = None
+        self.processor = None
+        self.model_name = "microsoft/Florence-2-large"
+
+    def load(self):
+        if self.model is None:
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_name, trust_remote_code=True
+            )
+            self.processor = AutoProcessor.from_pretrained(
+                self.model_name, trust_remote_code=True
+            )
+
+    def preprocess(self, prompt: str, image_url: str) -> Dict:
+        image = Image.open(requests.get(image_url, stream=True).raw)
+        inputs = self.processor(text=prompt, images=image, return_tensors="pt")
+        return inputs
+
+    def postprocess(
+        self, generated_ids: List[int], original_image: Image.Image
+    ) -> Dict:
+        generated_text = self.processor.batch_decode(
+            generated_ids, skip_special_tokens=False
+        )[0]
+        parsed_answer = self.processor.post_process_generation(
+            generated_text,
+            task=self.task,
+            image_size=(original_image.width, original_image.height),
+        )
+        return parsed_answer
+
+    def predict(self, model_input: Dict) -> Dict[str, List]:
+        self.load()
+        prompt = model_input["prompt"]
+        image_url = model_input["image_url"]
+        self.task = model_input.get("task", "<OD>")
+
+        inputs = self.preprocess(prompt, image_url)
+        original_image = inputs.pop("images")
+
+        generated_ids = self.model.generate(
+            input_ids=inputs["input_ids"],
+            pixel_values=inputs["pixel_values"],
+            max_new_tokens=1024,
+            num_beams=3,
+            do_sample=False,
+        )
+
+        parsed_answer = self.postprocess(generated_ids, original_image)
+
+        return {"result": parsed_answer}
diff --git a/florence-2-large/tests/test_model.py b/florence-2-large/tests/test_model.py
new file mode 100644
index 00000000..4c931a77
--- /dev/null
+++ b/florence-2-large/tests/test_model.py
@@ -0,0 +1,53 @@
+import unittest
+from typing import Dict
+
+from model.model import Model
+
+
+class TestFlorence2Large(unittest.TestCase):
+    def setUp(self):
+        self.model = Model()
+        self.model.load()
+
+    def test_model_loading(self):
+        """Test if the model and its components are loaded correctly."""
+        self.assertIsNotNone(self.model.model, "Model not loaded")
+        self.assertIsNotNone(self.model.processor, "Processor not loaded")
+
+    def test_inference(self):
+        """Test model inference with predefined inputs and expected outputs."""
+        test_input = {
+            "prompt": "<OD>",
+            "image_url": "https://example.com/test_image.jpg",
+        }
+        expected_output_keys = ["result"]
+
+        output = self.model.predict(test_input)
+
+        self.assertIsInstance(output, Dict, "Output is not a dictionary")
+        self.assertCountEqual(
+            output.keys(),
+            expected_output_keys,
+            "Output keys do not match expected keys",
+        )
+
+    def test_output_handling(self):
+        """Test if the model's output is correctly formatted."""
+        test_input = {
+            "prompt": "<OD>",
+            "image_url": "https://example.com/test_image.jpg",
+        }
+
+        output = self.model.predict(test_input)
+        result = output["result"]
+
+        self.assertIsInstance(result, Dict, "Result is not a dictionary")
+        self.assertTrue("objects" in result, "Objects key not found in result")
+        self.assertTrue(
+            "panoptic_segmentation" in result,
+            "Panoptic segmentation key not found in result",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()

From c8717eb8a3c225035c23649ac2f5573c0d958f66 Mon Sep 17 00:00:00 2001
From: Droid <droid@factory.ai>
Date: Thu, 20 Jun 2024 16:57:25 +0000
Subject: [PATCH 2/5] Address review by @factory-droid[bot] on pull request
 #309

---
 florence-2-large/config.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/florence-2-large/config.yaml b/florence-2-large/config.yaml
index 75e9c9d6..d3eee7e7 100644
--- a/florence-2-large/config.yaml
+++ b/florence-2-large/config.yaml
@@ -6,6 +6,9 @@ model_metadata:
   tags:
     - vision
     - multi-task
+  example_model_input:
+    prompt: "<OD>"
+    image_url: "https://example.com/test_image.jpg"
 python_version: py39
 requirements:
   - transformers==4.30.0

From 01cde3d0423e92444ad16b2e9410e3c46cff2ed6 Mon Sep 17 00:00:00 2001
From: Droid <droid@factory.ai>
Date: Thu, 20 Jun 2024 17:04:21 +0000
Subject: [PATCH 3/5] Address review by @factory-droid[bot] on pull request
 #309

---
 bin/test_truss_deploy.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/test_truss_deploy.py b/bin/test_truss_deploy.py
index 33edb293..9c7d1d60 100644
--- a/bin/test_truss_deploy.py
+++ b/bin/test_truss_deploy.py
@@ -71,8 +71,8 @@ def truss_push():
         raise Exception(
             f"Failed to push model:\n\nSTDOUT: {result.stdout.decode()}\nSTDERR: {result.stderr.decode()}"
         )
-    model_id = match.group(1)
-    deployment_id = match.group(2)
+    model_id = str(match.group(1))  # Ensure model_id is a string
+    deployment_id = str(match.group(2))  # Ensure deployment_id is a string
     print(
         f"Model pushed successfully. model-id: {model_id}. deployment-id: {deployment_id}"
     )

From cc717c457738ee89134b2f6eb4292b72204d3b3a Mon Sep 17 00:00:00 2001
From: Droid <droid@factory.ai>
Date: Thu, 20 Jun 2024 17:10:46 +0000
Subject: [PATCH 4/5] Address review by @factory-droid[bot] on pull request
 #309

---
 bin/test_truss_deploy.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bin/test_truss_deploy.py b/bin/test_truss_deploy.py
index 9c7d1d60..9f3cd381 100644
--- a/bin/test_truss_deploy.py
+++ b/bin/test_truss_deploy.py
@@ -73,6 +73,8 @@ def truss_push():
         )
     model_id = str(match.group(1))  # Ensure model_id is a string
     deployment_id = str(match.group(2))  # Ensure deployment_id is a string
+    model_id = str(match.group(1))  # Ensure model_id is a string
+    deployment_id = str(match.group(2))  # Ensure deployment_id is a string
     print(
         f"Model pushed successfully. model-id: {model_id}. deployment-id: {deployment_id}"
     )

From 7a5efba644817226d9c753bad54120a61be0cb6f Mon Sep 17 00:00:00 2001
From: Droid <droid@factory.ai>
Date: Thu, 20 Jun 2024 17:15:55 +0000
Subject: [PATCH 5/5] Address review by @factory-droid[bot] on pull request
 #309

---
 bin/test_truss_deploy.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bin/test_truss_deploy.py b/bin/test_truss_deploy.py
index 9f3cd381..9c7d1d60 100644
--- a/bin/test_truss_deploy.py
+++ b/bin/test_truss_deploy.py
@@ -73,8 +73,6 @@ def truss_push():
         )
     model_id = str(match.group(1))  # Ensure model_id is a string
     deployment_id = str(match.group(2))  # Ensure deployment_id is a string
-    model_id = str(match.group(1))  # Ensure model_id is a string
-    deployment_id = str(match.group(2))  # Ensure deployment_id is a string
     print(
         f"Model pushed successfully. model-id: {model_id}. deployment-id: {deployment_id}"
     )