From 82de3a905b35d5410b730d230618539e621c7c05 Mon Sep 17 00:00:00 2001 From: matatonic Date: Thu, 19 Sep 2024 15:29:48 -0400 Subject: [PATCH] Qwen2-VL-72B-Instruct-AWQ test results. --- README.md | 9 +++++---- backend/omchat.py | 1 - model_conf_tests.json | 1 + vision.sample.env | 1 + 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 84991cb..5d34ce2 100644 --- a/README.md +++ b/README.md @@ -118,11 +118,12 @@ Can't decide which to use? See the [OpenVLM Leaderboard](https://huggingface.co/ - [X] [qresearch](https://huggingface.co/qresearch/) - - [X] [llama-3-vision-alpha-hf](https://huggingface.co/qresearch/llama-3-vision-alpha-hf) (wont gpu split) - [X] [Qwen](https://huggingface.co/Qwen/) -- - [X] [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat) -- - [X] [wen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct) +- - [X] [Qwen2-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-AWQ) +- - [X] [Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct) - - [X] [Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ) - - [X] [Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct) - - [X] [Qwen2-VL-2B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-AWQ) +- - [X] [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat) - [X] [vikhyatk](https://huggingface.co/vikhyatk) - - [X] [moondream2](https://huggingface.co/vikhyatk/moondream2) - - [X] [moondream1](https://huggingface.co/vikhyatk/moondream1) (0.28.1-alt only) @@ -143,7 +144,6 @@ If you can't find your favorite model, you can [open a new issue](https://github ## Recent updates - Version 0.32.0 - new model support: From AIDC-AI, Ovis1.5-Gemma2-9B and Ovis1.5-Llama3-8B @@ -492,4 +492,5 @@ In /etc/docker/daemon.json: "default-runtime": "nvidia" } ``` -- In Windows, be sure you have WSL2 installed and docker is configured to use it. Also make sure your nvidia drivers are up to date. \ No newline at end of file +- In Windows, be sure you have WSL2 installed and docker is configured to use it. Also make sure your nvidia drivers are up to date. + diff --git a/backend/omchat.py b/backend/omchat.py index 1075bf6..cd20c5f 100644 --- a/backend/omchat.py +++ b/backend/omchat.py @@ -36,7 +36,6 @@ async def stream_chat_with_images(self, request: ImageChatRequest) -> AsyncGener inputs = self.processor(prompt, images=images, return_tensors="pt").to(self.model.device) default_params = dict( - max_new_tokens=256, do_sample=False, eos_token_id=self.eos_token_id, pad_token_id=self.processor.tokenizer.pad_token_id, diff --git a/model_conf_tests.json b/model_conf_tests.json index 4728860..e27939b 100644 --- a/model_conf_tests.json +++ b/model_conf_tests.json @@ -47,6 +47,7 @@ ["Qwen/Qwen2-VL-2B-Instruct", "-A", "flash_attention_2"], ["Qwen/Qwen2-VL-7B-Instruct-AWQ", "-A", "flash_attention_2"], ["Qwen/Qwen2-VL-7B-Instruct", "-A", "flash_attention_2"], + ["Qwen/Qwen2-VL-72B-Instruct-AWQ", "-A", "flash_attention_2"], ["Salesforce/xgen-mm-phi3-mini-instruct-dpo-r-v1.5"], ["Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5"], ["Salesforce/xgen-mm-phi3-mini-instruct-singleimg-r-v1.5"], diff --git a/vision.sample.env b/vision.sample.env index 03249c7..5e5a638 100644 --- a/vision.sample.env +++ b/vision.sample.env @@ -52,6 +52,7 @@ HF_HUB_ENABLE_HF_TRANSFER=1 #CLI_COMMAND="python vision.py -m Qwen/Qwen2-VL-2B-Instruct -A flash_attention_2" # test pass✅, time: 17.3s, mem: 16.6GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m Qwen/Qwen2-VL-7B-Instruct-AWQ -A flash_attention_2" # test pass✅, time: 21.2s, mem: 18.8GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m Qwen/Qwen2-VL-7B-Instruct -A flash_attention_2" # test pass✅, time: 20.6s, mem: 27.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m Qwen/Qwen2-VL-72B-Instruct-AWQ -A flash_attention_2" # test pass✅, time: 35.2s, mem: 44.5GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m Salesforce/xgen-mm-phi3-mini-instruct-dpo-r-v1.5" # test pass✅, time: 8.9s, mem: 9.7GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5" # test pass✅, time: 3.8s, mem: 9.8GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m Salesforce/xgen-mm-phi3-mini-instruct-singleimg-r-v1.5" # test pass✅, time: 8.7s, mem: 9.4GB, 13/13 tests passed.