From 82de3a905b35d5410b730d230618539e621c7c05 Mon Sep 17 00:00:00 2001
From: matatonic <matatonic-git@zhero.org>
Date: Thu, 19 Sep 2024 15:29:48 -0400
Subject: [PATCH] Qwen2-VL-72B-Instruct-AWQ test results.

---
 README.md             | 9 +++++----
 backend/omchat.py     | 1 -
 model_conf_tests.json | 1 +
 vision.sample.env     | 1 +
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 84991cb..5d34ce2 100644
--- a/README.md
+++ b/README.md
@@ -118,11 +118,12 @@ Can't decide which to use? See the [OpenVLM Leaderboard](https://huggingface.co/
 - [X] [qresearch](https://huggingface.co/qresearch/)
 - - [X] [llama-3-vision-alpha-hf](https://huggingface.co/qresearch/llama-3-vision-alpha-hf) (wont gpu split)
 - [X] [Qwen](https://huggingface.co/Qwen/)
-- - [X] [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat)
-- - [X] [wen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)
+- - [X] [Qwen2-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-AWQ)
+- - [X] [Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)
 - - [X] [Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)
 - - [X] [Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)
 - - [X] [Qwen2-VL-2B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-AWQ)
+- - [X] [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat)
 - [X] [vikhyatk](https://huggingface.co/vikhyatk)
 - - [X] [moondream2](https://huggingface.co/vikhyatk/moondream2)
 - - [X] [moondream1](https://huggingface.co/vikhyatk/moondream1) (0.28.1-alt only)
@@ -143,7 +144,6 @@ If you can't find your favorite model, you can [open a new issue](https://github
 
 ## Recent updates
 
-
 Version 0.32.0
 
 - new model support: From AIDC-AI, Ovis1.5-Gemma2-9B and Ovis1.5-Llama3-8B
@@ -492,4 +492,5 @@ In /etc/docker/daemon.json:
     "default-runtime": "nvidia"
 }
 ```
-- In Windows, be sure you have WSL2 installed and docker is configured to use it. Also make sure your nvidia drivers are up to date.
\ No newline at end of file
+- In Windows, be sure you have WSL2 installed and docker is configured to use it. Also make sure your nvidia drivers are up to date.
+
diff --git a/backend/omchat.py b/backend/omchat.py
index 1075bf6..cd20c5f 100644
--- a/backend/omchat.py
+++ b/backend/omchat.py
@@ -36,7 +36,6 @@ async def stream_chat_with_images(self, request: ImageChatRequest) -> AsyncGener
         inputs = self.processor(prompt, images=images, return_tensors="pt").to(self.model.device)
 
         default_params = dict(
-            max_new_tokens=256,
             do_sample=False,
             eos_token_id=self.eos_token_id,
             pad_token_id=self.processor.tokenizer.pad_token_id,
diff --git a/model_conf_tests.json b/model_conf_tests.json
index 4728860..e27939b 100644
--- a/model_conf_tests.json
+++ b/model_conf_tests.json
@@ -47,6 +47,7 @@
   ["Qwen/Qwen2-VL-2B-Instruct", "-A", "flash_attention_2"],
   ["Qwen/Qwen2-VL-7B-Instruct-AWQ", "-A", "flash_attention_2"],
   ["Qwen/Qwen2-VL-7B-Instruct", "-A", "flash_attention_2"],
+  ["Qwen/Qwen2-VL-72B-Instruct-AWQ", "-A", "flash_attention_2"],
   ["Salesforce/xgen-mm-phi3-mini-instruct-dpo-r-v1.5"],
   ["Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5"],
   ["Salesforce/xgen-mm-phi3-mini-instruct-singleimg-r-v1.5"],
diff --git a/vision.sample.env b/vision.sample.env
index 03249c7..5e5a638 100644
--- a/vision.sample.env
+++ b/vision.sample.env
@@ -52,6 +52,7 @@ HF_HUB_ENABLE_HF_TRANSFER=1
 #CLI_COMMAND="python vision.py -m Qwen/Qwen2-VL-2B-Instruct -A flash_attention_2"  # test pass✅, time: 17.3s, mem: 16.6GB, 13/13 tests passed.
 #CLI_COMMAND="python vision.py -m Qwen/Qwen2-VL-7B-Instruct-AWQ -A flash_attention_2"  # test pass✅, time: 21.2s, mem: 18.8GB, 13/13 tests passed.
 #CLI_COMMAND="python vision.py -m Qwen/Qwen2-VL-7B-Instruct -A flash_attention_2"  # test pass✅, time: 20.6s, mem: 27.8GB, 13/13 tests passed.
+#CLI_COMMAND="python vision.py -m Qwen/Qwen2-VL-72B-Instruct-AWQ -A flash_attention_2"  # test pass✅, time: 35.2s, mem: 44.5GB, 13/13 tests passed.
 #CLI_COMMAND="python vision.py -m Salesforce/xgen-mm-phi3-mini-instruct-dpo-r-v1.5"  # test pass✅, time: 8.9s, mem: 9.7GB, 13/13 tests passed.
 #CLI_COMMAND="python vision.py -m Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5"  # test pass✅, time: 3.8s, mem: 9.8GB, 13/13 tests passed.
 #CLI_COMMAND="python vision.py -m Salesforce/xgen-mm-phi3-mini-instruct-singleimg-r-v1.5"  # test pass✅, time: 8.7s, mem: 9.4GB, 13/13 tests passed.