basetenlabs · factory-droid · Jun 20, 2024 · Jun 20, 2024 · Jun 20, 2024 · Jun 20, 2024
diff --git a/bin/test_truss_deploy.py b/bin/test_truss_deploy.py
@@ -71,8 +71,8 @@ def truss_push():
         raise Exception(
             f"Failed to push model:\n\nSTDOUT: {result.stdout.decode()}\nSTDERR: {result.stderr.decode()}"
         )
-    model_id = match.group(1)
-    deployment_id = match.group(2)
+    model_id = str(match.group(1))  # Ensure model_id is a string
+    deployment_id = str(match.group(2))  # Ensure deployment_id is a string
     print(
         f"Model pushed successfully. model-id: {model_id}. deployment-id: {deployment_id}"
     )

diff --git a/florence-2-large/config.yaml b/florence-2-large/config.yaml
@@ -0,0 +1,26 @@
+description: Deploy Florence-2 Large model for multi-task computer vision
+model_metadata:
+  model_name: Florence-2-Large
+  avatar_url: https://huggingface.co/microsoft/Florence-2-large/resolve/main/florence2_avatar.png
+  cover_image_url: https://huggingface.co/microsoft/Florence-2-large/resolve/main/florence2_cover_image.png
+  tags:
+    - vision
+    - multi-task
+  example_model_input:
+    prompt: "<OD>"
+    image_url: "https://example.com/test_image.jpg"
+python_version: py39
+requirements:
+  - transformers==4.30.0
+  - torch==2.0.1
+  - torchvision==0.15.2
+  - GitPython==3.1.31
+resources:
+  accelerator: A100
+  cpu: 4
+  memory: 16G
+  use_gpu: true
+secrets:
+  hf_access_token:
+    description: Access token for Hugging Face
+spec_version: 1
diff --git a/florence-2-large/model/model.py b/florence-2-large/model/model.py
@@ -0,0 +1,60 @@
+from typing import Dict, List
+
+import requests
+from PIL import Image
+from transformers import AutoModelForCausalLM, AutoProcessor
+
+
+class Model:
+    def __init__(self, **kwargs):
+        self.model = None
+        self.processor = None
+        self.model_name = "microsoft/Florence-2-large"
+
+    def load(self):
+        if self.model is None:
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_name, trust_remote_code=True
+            )
+            self.processor = AutoProcessor.from_pretrained(
+                self.model_name, trust_remote_code=True
+            )
+
+    def preprocess(self, prompt: str, image_url: str) -> Dict:
+        image = Image.open(requests.get(image_url, stream=True).raw)
+        inputs = self.processor(text=prompt, images=image, return_tensors="pt")
+        return inputs
+
+    def postprocess(
+        self, generated_ids: List[int], original_image: Image.Image
+    ) -> Dict:
+        generated_text = self.processor.batch_decode(
+            generated_ids, skip_special_tokens=False
+        )[0]
+        parsed_answer = self.processor.post_process_generation(
+            generated_text,
+            task=self.task,
+            image_size=(original_image.width, original_image.height),
+        )
+        return parsed_answer
+
+    def predict(self, model_input: Dict) -> Dict[str, List]:
+        self.load()
+        prompt = model_input["prompt"]
+        image_url = model_input["image_url"]
+        self.task = model_input.get("task", "<OD>")
+
+        inputs = self.preprocess(prompt, image_url)
+        original_image = inputs.pop("images")
+
+        generated_ids = self.model.generate(
+            input_ids=inputs["input_ids"],
+            pixel_values=inputs["pixel_values"],
+            max_new_tokens=1024,
+            num_beams=3,
+            do_sample=False,
+        )
+
+        parsed_answer = self.postprocess(generated_ids, original_image)
+
+        return {"result": parsed_answer}
diff --git a/florence-2-large/tests/test_model.py b/florence-2-large/tests/test_model.py
@@ -0,0 +1,53 @@
+import unittest
+from typing import Dict
+
+from model.model import Model
+
+
+class TestFlorence2Large(unittest.TestCase):
+    def setUp(self):
+        self.model = Model()
+        self.model.load()
+
+    def test_model_loading(self):
+        """Test if the model and its components are loaded correctly."""
+        self.assertIsNotNone(self.model.model, "Model not loaded")
+        self.assertIsNotNone(self.model.processor, "Processor not loaded")
+
+    def test_inference(self):
+        """Test model inference with predefined inputs and expected outputs."""
+        test_input = {
+            "prompt": "<OD>",
+            "image_url": "https://example.com/test_image.jpg",
+        }
+        expected_output_keys = ["result"]
+
+        output = self.model.predict(test_input)
+
+        self.assertIsInstance(output, Dict, "Output is not a dictionary")
+        self.assertCountEqual(
+            output.keys(),
+            expected_output_keys,
+            "Output keys do not match expected keys",
+        )
+
+    def test_output_handling(self):
+        """Test if the model's output is correctly formatted."""
+        test_input = {
+            "prompt": "<OD>",
+            "image_url": "https://example.com/test_image.jpg",
+        }
+
+        output = self.model.predict(test_input)
+        result = output["result"]
+
+        self.assertIsInstance(result, Dict, "Result is not a dictionary")
+        self.assertTrue("objects" in result, "Objects key not found in result")
+        self.assertTrue(
+            "panoptic_segmentation" in result,
+            "Panoptic segmentation key not found in result",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()