Qwen2-VL-72B-Instruct-AWQ test results.

matatonic · Sep 19, 2024 · 82de3a9 · 82de3a9
1 parent 8cceae6
commit 82de3a9
Show file tree

Hide file tree

Showing 4 changed files with 7 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -118,11 +118,12 @@ Can't decide which to use? See the [OpenVLM Leaderboard](https://huggingface.co/
 - [X] [qresearch](https://huggingface.co/qresearch/)
 - - [X] [llama-3-vision-alpha-hf](https://huggingface.co/qresearch/llama-3-vision-alpha-hf) (wont gpu split)
 - [X] [Qwen](https://huggingface.co/Qwen/)
-- - [X] [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat)
-- - [X] [wen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)
+- - [X] [Qwen2-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-AWQ)
+- - [X] [Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)
 - - [X] [Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)
 - - [X] [Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)
 - - [X] [Qwen2-VL-2B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-AWQ)
+- - [X] [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat)
 - [X] [vikhyatk](https://huggingface.co/vikhyatk)
 - - [X] [moondream2](https://huggingface.co/vikhyatk/moondream2)
 - - [X] [moondream1](https://huggingface.co/vikhyatk/moondream1) (0.28.1-alt only)
@@ -143,7 +144,6 @@ If you can't find your favorite model, you can [open a new issue](https://github
 
 ## Recent updates
 
-
 Version 0.32.0
 
 - new model support: From AIDC-AI, Ovis1.5-Gemma2-9B and Ovis1.5-Llama3-8B
@@ -492,4 +492,5 @@ In /etc/docker/daemon.json:
     "default-runtime": "nvidia"
 }
 ```
-- In Windows, be sure you have WSL2 installed and docker is configured to use it. Also make sure your nvidia drivers are up to date.
+- In Windows, be sure you have WSL2 installed and docker is configured to use it. Also make sure your nvidia drivers are up to date.
+
diff --git a/backend/omchat.py b/backend/omchat.py
@@ -36,7 +36,6 @@ async def stream_chat_with_images(self, request: ImageChatRequest) -> AsyncGener
         inputs = self.processor(prompt, images=images, return_tensors="pt").to(self.model.device)
 
         default_params = dict(
-            max_new_tokens=256,
             do_sample=False,
             eos_token_id=self.eos_token_id,
             pad_token_id=self.processor.tokenizer.pad_token_id,

diff --git a/model_conf_tests.json b/model_conf_tests.json
@@ -47,6 +47,7 @@
   ["Qwen/Qwen2-VL-2B-Instruct", "-A", "flash_attention_2"],
   ["Qwen/Qwen2-VL-7B-Instruct-AWQ", "-A", "flash_attention_2"],
   ["Qwen/Qwen2-VL-7B-Instruct", "-A", "flash_attention_2"],
+  ["Qwen/Qwen2-VL-72B-Instruct-AWQ", "-A", "flash_attention_2"],
   ["Salesforce/xgen-mm-phi3-mini-instruct-dpo-r-v1.5"],
   ["Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5"],
   ["Salesforce/xgen-mm-phi3-mini-instruct-singleimg-r-v1.5"],

diff --git a/vision.sample.env b/vision.sample.env
@@ -52,6 +52,7 @@ HF_HUB_ENABLE_HF_TRANSFER=1
 #CLI_COMMAND="python vision.py -m Qwen/Qwen2-VL-2B-Instruct -A flash_attention_2"  # test pass✅, time: 17.3s, mem: 16.6GB, 13/13 tests passed.
 #CLI_COMMAND="python vision.py -m Qwen/Qwen2-VL-7B-Instruct-AWQ -A flash_attention_2"  # test pass✅, time: 21.2s, mem: 18.8GB, 13/13 tests passed.
 #CLI_COMMAND="python vision.py -m Qwen/Qwen2-VL-7B-Instruct -A flash_attention_2"  # test pass✅, time: 20.6s, mem: 27.8GB, 13/13 tests passed.
+#CLI_COMMAND="python vision.py -m Qwen/Qwen2-VL-72B-Instruct-AWQ -A flash_attention_2"  # test pass✅, time: 35.2s, mem: 44.5GB, 13/13 tests passed.
 #CLI_COMMAND="python vision.py -m Salesforce/xgen-mm-phi3-mini-instruct-dpo-r-v1.5"  # test pass✅, time: 8.9s, mem: 9.7GB, 13/13 tests passed.
 #CLI_COMMAND="python vision.py -m Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5"  # test pass✅, time: 3.8s, mem: 9.8GB, 13/13 tests passed.
 #CLI_COMMAND="python vision.py -m Salesforce/xgen-mm-phi3-mini-instruct-singleimg-r-v1.5"  # test pass✅, time: 8.7s, mem: 9.4GB, 13/13 tests passed.