0.28.1

- Update moondream2 support to 2024-07-23 - Pin openbmb/MiniCPM-Llama3-V-2_5 revision
matatonic · Jul 24, 2024 · 2682aec · 2682aec
1 parent d06f0e2
commit 2682aec
Show file tree

Hide file tree

Showing 5 changed files with 144 additions and 133 deletions.
diff --git a/README.md b/README.md
@@ -119,6 +119,11 @@ See: [OpenVLM Leaderboard](https://huggingface.co/spaces/opencompass/open_vlm_le
 
 ## Recent updates
 
+Version 0.28.1
+
+- Update moondream2 support to 2024-07-23
+- Pin openbmb/MiniCPM-Llama3-V-2_5 revision
+
 Version 0.28.0
 
 - new model support: internlm-xcomposer2d5-7b

diff --git a/backend/minicpm.py b/backend/minicpm.py
@@ -2,8 +2,8 @@
 
 from vision_qna import *
 
-# openbmb/MiniCPM-Llama3-V-2_5
-# openbmb/MiniCPM-V-2  - maybe broken after revision: str = "187851962daa9b63072d40ec802f597b71bff532"
+# openbmb/MiniCPM-Llama3-V-2_5 # broken after 45387f99a455e11801b78a0b24811856688e0c8b
+# openbmb/MiniCPM-V-2  - 4bit broken
 # openbmb/MiniCPM-V aka OmniLMM-3B
 
 class VisionQnA(VisionQnABase):
@@ -14,6 +14,10 @@ class VisionQnA(VisionQnABase):
     def __init__(self, model_id: str, device: str, device_map: str = 'auto', extra_params = {}, format = None):
         super().__init__(model_id, device, device_map, extra_params, format)
 
+        # I wish there was a better way to do this... 
+        if model_id == 'openbmb/MiniCPM-Llama3-V-2_5':
+            self.revision = '45387f99a455e11801b78a0b24811856688e0c8b'
+
         self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=self.params.get('trust_remote_code', False))
         self.model = AutoModel.from_pretrained(**self.params).eval()
 
@@ -41,7 +45,7 @@ async def stream_chat_with_images(self, request: ImageChatRequest) -> AsyncGener
                         msgs.extend([{ 'role': m.role, 'content': c.text }])
 
         if image is None:
-            image = await url_to_image(transparent_pixel_url)
+            image = await url_to_image(black_pixel_url)
 
         # default uses num_beams: 3, but if streaming/sampling is requested, switch the defaults.
         default_sampling_params = {

diff --git a/backend/moondream2.py b/backend/moondream2.py
@@ -7,7 +7,7 @@
 
 class VisionQnA(VisionQnABase):
     model_name: str = "moondream2"
-    revision: str = '2024-05-20' # 'main'
+    revision: str = '2024-07-23' # 'main'
     format: str = 'phi15'
     vision_layers: List[str] = ["vision_encoder"]
 

diff --git a/requirements.txt b/requirements.txt
@@ -4,10 +4,10 @@ bitsandbytes
 fastapi
 # See: https://github.com/bdashore3/flash-attention/releases for other windows flash_attn releases
 # And: https://github.com/Dao-AILab/flash-attention/releases for linux.
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/bdashore3/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/bdashore3/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.2/flash_attn-2.6.2+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.2/flash_attn-2.6.2+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/bdashore3/flash-attention/releases/download/v2.6.2/flash_attn-2.6.1+cu123torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/bdashore3/flash-attention/releases/download/v2.6.2/flash_attn-2.6.1+cu123torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 flash_attn; python_version != "3.10" and python_version != "3.11"
 hf_transfer
 loguru