diff --git a/README.md b/README.md index 6d28f58..7a657f6 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,11 @@ See: [OpenVLM Leaderboard](https://huggingface.co/spaces/opencompass/open_vlm_le ## Recent updates +Version 0.28.1 + +- Update moondream2 support to 2024-07-23 +- Pin openbmb/MiniCPM-Llama3-V-2_5 revision + Version 0.28.0 - new model support: internlm-xcomposer2d5-7b diff --git a/backend/minicpm.py b/backend/minicpm.py index 5eaee04..557e8f5 100644 --- a/backend/minicpm.py +++ b/backend/minicpm.py @@ -2,8 +2,8 @@ from vision_qna import * -# openbmb/MiniCPM-Llama3-V-2_5 -# openbmb/MiniCPM-V-2 - maybe broken after revision: str = "187851962daa9b63072d40ec802f597b71bff532" +# openbmb/MiniCPM-Llama3-V-2_5 # broken after 45387f99a455e11801b78a0b24811856688e0c8b +# openbmb/MiniCPM-V-2 - 4bit broken # openbmb/MiniCPM-V aka OmniLMM-3B class VisionQnA(VisionQnABase): @@ -14,6 +14,10 @@ class VisionQnA(VisionQnABase): def __init__(self, model_id: str, device: str, device_map: str = 'auto', extra_params = {}, format = None): super().__init__(model_id, device, device_map, extra_params, format) + # I wish there was a better way to do this... + if model_id == 'openbmb/MiniCPM-Llama3-V-2_5': + self.revision = '45387f99a455e11801b78a0b24811856688e0c8b' + self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=self.params.get('trust_remote_code', False)) self.model = AutoModel.from_pretrained(**self.params).eval() @@ -41,7 +45,7 @@ async def stream_chat_with_images(self, request: ImageChatRequest) -> AsyncGener msgs.extend([{ 'role': m.role, 'content': c.text }]) if image is None: - image = await url_to_image(transparent_pixel_url) + image = await url_to_image(black_pixel_url) # default uses num_beams: 3, but if streaming/sampling is requested, switch the defaults. default_sampling_params = { diff --git a/backend/moondream2.py b/backend/moondream2.py index 5959179..db5072e 100644 --- a/backend/moondream2.py +++ b/backend/moondream2.py @@ -7,7 +7,7 @@ class VisionQnA(VisionQnABase): model_name: str = "moondream2" - revision: str = '2024-05-20' # 'main' + revision: str = '2024-07-23' # 'main' format: str = 'phi15' vision_layers: List[str] = ["vision_encoder"] diff --git a/requirements.txt b/requirements.txt index 271c4e2..e4b12f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,10 +4,10 @@ bitsandbytes fastapi # See: https://github.com/bdashore3/flash-attention/releases for other windows flash_attn releases # And: https://github.com/Dao-AILab/flash-attention/releases for linux. -https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/bdashore3/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/bdashore3/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.2/flash_attn-2.6.2+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.2/flash_attn-2.6.2+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/bdashore3/flash-attention/releases/download/v2.6.2/flash_attn-2.6.1+cu123torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/bdashore3/flash-attention/releases/download/v2.6.2/flash_attn-2.6.1+cu123torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" flash_attn; python_version != "3.10" and python_version != "3.11" hf_transfer loguru diff --git a/vision.sample.env b/vision.sample.env index 94493e8..5c4a778 100644 --- a/vision.sample.env +++ b/vision.sample.env @@ -4,134 +4,136 @@ HF_HOME=hf_home HF_HUB_ENABLE_HF_TRANSFER=1 #HF_TOKEN=hf-... #CUDA_VISIBLE_DEVICES=1,0 -#CLI_COMMAND="python vision.py -m BAAI/Bunny-Llama-3-8B-V --load-in-4bit" # test pass✅, time: 8.5s, mem: 8.7GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Bunny-Llama-3-8B-V" # test pass✅, time: 7.7s, mem: 19.6GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_0-2B-zh --load-in-4bit" # test pass✅, time: 8.3s, mem: 9.2GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_0-2B-zh" # test pass✅, time: 5.8s, mem: 10.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_0-3B --load-in-4bit" # test pass✅, time: 12.1s, mem: 8.4GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_0-3B" # test pass✅, time: 9.4s, mem: 11.9GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_0-3B-zh" # test pass✅, time: 8.1s, mem: 12.4GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_0-4B --load-in-4bit" # test pass✅, time: 9.3s, mem: 5.1GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_0-4B" # test pass✅, time: 7.2s, mem: 12.2GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_1-4B --load-in-4bit" # test pass✅, time: 9.6s, mem: 5.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_1-4B" # test pass✅, time: 8.5s, mem: 13.1GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_1-Llama-3-8B-V --load-in-4bit" # test pass✅, time: 10.1s, mem: 9.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_1-Llama-3-8B-V" # test pass✅, time: 9.9s, mem: 19.6GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Emu2-Chat --load-in-4bit" # test pass✅, time: 27.0s, mem: 29.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Emu2-Chat --max-memory=0:78GiB,1:20GiB" # test pass✅, time: 21.7s, mem: 71.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 11.5s, mem: 12.9GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 11.4s, mem: 22.5GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 13.0s, mem: 12.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 16.0s, mem: 12.9GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty --use-flash-attn --device-map cuda:0" # test pass✅, time: 14.1s, mem: 22.5GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 14.7s, mem: 12.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0 --load-in-4bit" # test pass✅, time: 20.9s, mem: 26.9GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0 --max-tiles 40 --load-in-4bit" # test pass✅, time: 27.1s, mem: 30.0GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0 --max-tiles 40" # test pass✅, time: 25.3s, mem: 54.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-Llama-3-8B-V --load-in-4bit" # test pass✅, time: 8.0s, mem: 8.7GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-Llama-3-8B-V" # test pass✅, time: 7.4s, mem: 19.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_0-2B-zh --load-in-4bit" # test pass✅, time: 7.8s, mem: 9.2GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_0-2B-zh" # test pass✅, time: 5.4s, mem: 10.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_0-3B --load-in-4bit" # test pass✅, time: 11.5s, mem: 8.4GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_0-3B" # test pass✅, time: 8.4s, mem: 11.9GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_0-3B-zh" # test pass✅, time: 7.4s, mem: 12.4GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_0-4B --load-in-4bit" # test pass✅, time: 8.8s, mem: 5.1GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_0-4B" # test pass✅, time: 6.3s, mem: 12.2GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_1-4B --load-in-4bit" # test pass✅, time: 8.9s, mem: 5.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_1-4B" # test pass✅, time: 8.1s, mem: 13.1GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_1-Llama-3-8B-V --load-in-4bit" # test pass✅, time: 10.0s, mem: 9.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-v1_1-Llama-3-8B-V" # test pass✅, time: 9.5s, mem: 19.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Emu2-Chat --load-in-4bit" # test pass✅, time: 23.7s, mem: 29.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Emu2-Chat --max-memory=0:78GiB,1:20GiB" # test pass✅, time: 21.4s, mem: 71.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 11.6s, mem: 12.9GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 10.9s, mem: 22.5GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 12.4s, mem: 12.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 15.6s, mem: 12.9GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty --use-flash-attn --device-map cuda:0" # test pass✅, time: 13.3s, mem: 22.5GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 14.5s, mem: 12.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0 --load-in-4bit" # test pass✅, time: 20.3s, mem: 26.9GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0 --max-tiles 40 --load-in-4bit" # test pass✅, time: 26.4s, mem: 30.0GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0 --max-tiles 40" # test pass✅, time: 25.4s, mem: 54.6GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 18.4s, mem: 52.0GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 35.7s, mem: 31.1GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-1B --device-map cuda:0 --load-in-4bit" # test pass✅, time: 8.4s, mem: 3.9GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-1B --device-map cuda:0" # test pass✅, time: 7.1s, mem: 4.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-2B --device-map cuda:0 --load-in-4bit" # test pass✅, time: 14.8s, mem: 4.7GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-2B --device-map cuda:0" # test pass✅, time: 8.1s, mem: 6.6GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-2B-AWQ --device-map cuda:0" # test fail❌, time: 121.4s, mem: 7.2GB, 1/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-4B --device-map cuda:0 --load-in-4bit" # test pass✅, time: 8.9s, mem: 5.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-4B --device-map cuda:0" # test pass✅, time: 7.8s, mem: 10.9GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-8B --device-map cuda:0 --load-in-4bit" # test pass✅, time: 8.4s, mem: 8.5GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-8B --device-map cuda:0" # test pass✅, time: 7.9s, mem: 18.2GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-26B --device-map cuda:0 --load-in-4bit" # test pass✅, time: 22.3s, mem: 26.9GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-26B --device-map cuda:0" # test pass✅, time: 20.4s, mem: 52.0GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-40B --device-map cuda:0 --load-in-4bit" # test pass✅, time: 56.6s, mem: 32.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-40B --device-map cuda:0" # test pass✅, time: 69.0s, mem: 77.1GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-Llama3-76B --device-map cuda:0 --load-in-4bit" # test pass✅, time: 39.4s, mem: 53.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5 --load-in-4bit" # test pass✅, time: 6.0s, mem: 5.1GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5 --max-tiles 40 --load-in-4bit" # test pass✅, time: 7.0s, mem: 6.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5 --max-tiles 40" # test pass✅, time: 6.8s, mem: 8.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5" # test pass✅, time: 6.1s, mem: 7.0GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-4B-V1-5 --load-in-4bit" # test pass✅, time: 9.2s, mem: 6.2GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-4B-V1-5 --max-tiles 40 --load-in-4bit" # test pass✅, time: 9.7s, mem: 9.7GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-4B-V1-5 --max-tiles 40" # test pass✅, time: 8.7s, mem: 14.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-4B-V1-5" # test pass✅, time: 7.6s, mem: 11.2GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat --load-in-4bit" # test pass✅, time: 9.5s, mem: 11.2GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 6.7s, mem: 19.5GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m Salesforce/xgen-mm-phi3-mini-instruct-r-v1" # test pass✅, time: 10.1s, mem: 10.1GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 25.5s, mem: 13.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 21.4s, mem: 37.6GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 26.5s, mem: 12.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 20.2s, mem: 36.4GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B --load-in-4bit" # test pass✅, time: 32.6s, mem: 22.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B" # test pass✅, time: 32.3s, mem: 40.6GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B --load-in-4bit" # test pass✅, time: 136.8s, mem: 22.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B" # test pass✅, time: 100.9s, mem: 40.6GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/glm-4v-9b --device-map cuda:0 --load-in-4bit" # test pass✅, time: 70.4s, mem: 16.4GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/glm-4v-9b --device-map cuda:0" # test pass✅, time: 52.7s, mem: 27.9GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-Fuyu --device-map cuda:0 --load-in-4bit" # test pass✅, time: 10.1s, mem: 11.5GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-Fuyu --device-map cuda:0" # test pass✅, time: 9.7s, mem: 20.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-clip-llama3 --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 9.6s, mem: 7.6GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-clip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 9.2s, mem: 17.4GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-siglip-llama3 --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 8.8s, mem: 8.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-siglip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.0s, mem: 18.0GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn" # test pass✅, time: 6.0s, mem: 8.9GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0 --load-in-4bit" # test pass✅, time: 14.8s, mem: 15.9GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 18.9s, mem: 25.0GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m cognitivecomputations/dolphin-vision-72b --use-flash-attn --load-in-4bit" # test pass✅, time: 49.3s, mem: 49.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m cognitivecomputations/dolphin-vision-7b --use-flash-attn --load-in-4bit" # test fail❌, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit). -#CLI_COMMAND="python vision.py -m cognitivecomputations/dolphin-vision-7b --use-flash-attn" # test fail❌, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit). -#CLI_COMMAND="python vision.py -m echo840/Monkey --load-in-4bit" # test pass✅, time: 9.4s, mem: 15.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 9.0s, mem: 21.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat --load-in-4bit" # test pass✅, time: 12.7s, mem: 15.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 11.7s, mem: 21.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m failspy/Phi-3-vision-128k-instruct-abliterated-alpha --use-flash-attn --load-in-4bit" # test pass✅, time: 11.3s, mem: 7.1GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m failspy/Phi-3-vision-128k-instruct-abliterated-alpha --use-flash-attn" # test pass✅, time: 9.3s, mem: 12.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2d5-7b --use-flash-attn --device-map cuda:0 --load-in-4bit" # test fail❌, time: 5.6s, mem: 10.7GB, 1/13 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2d5-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 31.9s, mem: 27.4GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0 --load-in-4bit" # test fail❌, time: 2.5s, mem: 7.2GB, 1/13 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 19.8s, mem: 20.6GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0 --load-in-4bit" # test fail❌, time: 2.1s, mem: 6.4GB, 1/13 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 26.6s, mem: 19.0GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn" # test pass✅, time: 14.5s, mem: 9.4GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0 --load-in-4bit" # test fail❌, time: 2.1s, mem: 3.0GB, 1/13 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 9.6s, mem: 7.2GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0 --load-in-4bit" # test fail❌, time: 2.2s, mem: 6.5GB, 1/13 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 26.9s, mem: 20.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn" # test pass✅, time: 14.7s, mem: 10.9GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 13.0s, mem: 9.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-1B --device-map cuda:0 --load-in-4bit" # test pass✅, time: 7.7s, mem: 3.9GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-1B --device-map cuda:0" # test pass✅, time: 6.6s, mem: 4.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-2B --device-map cuda:0 --load-in-4bit" # test pass✅, time: 14.4s, mem: 4.7GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-2B --device-map cuda:0" # test pass✅, time: 7.9s, mem: 6.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-2B-AWQ --device-map cuda:0" # test fail❌, time: 123.0s, mem: 7.2GB, 1/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-4B --device-map cuda:0 --load-in-4bit" # test pass✅, time: 8.3s, mem: 5.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-4B --device-map cuda:0" # test pass✅, time: 7.4s, mem: 10.9GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-8B --device-map cuda:0 --load-in-4bit" # test pass✅, time: 8.1s, mem: 8.5GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-8B --device-map cuda:0" # test pass✅, time: 7.5s, mem: 18.2GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-26B --device-map cuda:0 --load-in-4bit" # test pass✅, time: 21.9s, mem: 26.9GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-26B --device-map cuda:0" # test pass✅, time: 20.3s, mem: 52.0GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-40B --device-map cuda:0 --load-in-4bit" # test pass✅, time: 56.2s, mem: 32.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-40B --device-map cuda:0" # test pass✅, time: 68.2s, mem: 77.1GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL2-Llama3-76B --device-map cuda:0 --load-in-4bit" # test pass✅, time: 42.0s, mem: 53.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5 --load-in-4bit" # test pass✅, time: 5.6s, mem: 5.1GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5 --max-tiles 40 --load-in-4bit" # test pass✅, time: 6.4s, mem: 6.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5 --max-tiles 40" # test pass✅, time: 6.3s, mem: 8.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5" # test pass✅, time: 5.4s, mem: 7.0GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-4B-V1-5 --load-in-4bit" # test pass✅, time: 9.1s, mem: 6.2GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-4B-V1-5 --max-tiles 40 --load-in-4bit" # test pass✅, time: 9.3s, mem: 9.7GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-4B-V1-5 --max-tiles 40" # test pass✅, time: 8.5s, mem: 14.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-4B-V1-5" # test pass✅, time: 7.4s, mem: 11.2GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat --load-in-4bit" # test pass✅, time: 9.0s, mem: 11.2GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 6.0s, mem: 19.5GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m Salesforce/xgen-mm-phi3-mini-instruct-r-v1" # test pass✅, time: 9.7s, mem: 10.1GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 25.0s, mem: 13.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 21.1s, mem: 37.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 26.4s, mem: 12.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 20.3s, mem: 36.4GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B --load-in-4bit" # test pass✅, time: 32.2s, mem: 22.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B" # test pass✅, time: 32.2s, mem: 40.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B --load-in-4bit" # test pass✅, time: 134.6s, mem: 22.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B" # test pass✅, time: 99.9s, mem: 40.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/glm-4v-9b --device-map cuda:0 --load-in-4bit" # test pass✅, time: 70.2s, mem: 16.4GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/glm-4v-9b --device-map cuda:0" # test pass✅, time: 50.8s, mem: 27.9GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-Fuyu --device-map cuda:0 --load-in-4bit" # test pass✅, time: 9.5s, mem: 11.5GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-Fuyu --device-map cuda:0" # test pass✅, time: 8.9s, mem: 20.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-clip-llama3 --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 9.1s, mem: 7.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-clip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.8s, mem: 17.4GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-siglip-llama3 --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 8.1s, mem: 8.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-siglip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.6s, mem: 18.0GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn" # test pass✅, time: 5.7s, mem: 8.9GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0 --load-in-4bit" # test pass✅, time: 14.4s, mem: 15.9GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 18.6s, mem: 25.0GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m cognitivecomputations/dolphin-vision-72b --use-flash-attn --load-in-4bit" # test pass✅, time: 48.2s, mem: 49.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m cognitivecomputations/dolphin-vision-7b --use-flash-attn --load-in-4bit" # test pass✅, time: 24.3s, mem: 9.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m cognitivecomputations/dolphin-vision-7b --use-flash-attn" # test fail❌, time: 8.0s, mem: 17.0GB, Test failed with Exception: Error code: 500 - {'message': 'InternalServerError', 'code': 500} +#CLI_COMMAND="python vision.py -m echo840/Monkey --load-in-4bit" # test pass✅, time: 9.0s, mem: 15.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 8.5s, mem: 21.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat --load-in-4bit" # test pass✅, time: 12.2s, mem: 15.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 10.9s, mem: 21.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m failspy/Phi-3-vision-128k-instruct-abliterated-alpha --use-flash-attn --load-in-4bit" # test pass✅, time: 10.5s, mem: 7.1GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m failspy/Phi-3-vision-128k-instruct-abliterated-alpha --use-flash-attn" # test pass✅, time: 8.7s, mem: 12.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2d5-7b --use-flash-attn --device-map cuda:0 --load-in-4bit" # test fail❌, time: 5.0s, mem: 10.7GB, 1/13 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2d5-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 31.8s, mem: 27.4GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0 --load-in-4bit" # test fail❌, time: 2.4s, mem: 7.2GB, 1/13 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 20.2s, mem: 20.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0 --load-in-4bit" # test fail❌, time: 1.4s, mem: 6.4GB, 1/13 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 25.6s, mem: 19.0GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn" # test pass✅, time: 13.8s, mem: 9.4GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0 --load-in-4bit" # test fail❌, time: 1.6s, mem: 3.0GB, 1/13 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.5s, mem: 7.2GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0 --load-in-4bit" # test fail❌, time: 1.6s, mem: 6.5GB, 1/13 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 25.4s, mem: 20.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn" # test pass✅, time: 13.2s, mem: 10.9GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 12.9s, mem: 9.6GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 9.8s, mem: 26.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 9.4s, mem: 5.7GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.7s, mem: 14.5GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --use-flash-attn --load-in-4bit" # test pass✅, time: 61.6s, mem: 23.4GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --use-flash-attn" # test pass✅, time: 69.9s, mem: 68.7GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --use-flash-attn --load-in-4bit" # test pass✅, time: 21.4s, mem: 8.2GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --use-flash-attn" # test pass✅, time: 19.2s, mem: 17.4GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --use-flash-attn --load-in-4bit" # test pass✅, time: 15.2s, mem: 17.2GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 9.3s, mem: 5.7GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.2s, mem: 14.5GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --use-flash-attn --load-in-4bit" # test pass✅, time: 60.5s, mem: 23.4GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --use-flash-attn" # test pass✅, time: 69.7s, mem: 68.7GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --use-flash-attn --load-in-4bit" # test pass✅, time: 21.3s, mem: 8.2GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --use-flash-attn" # test pass✅, time: 19.1s, mem: 17.4GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --use-flash-attn --load-in-4bit" # test pass✅, time: 15.4s, mem: 17.2GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --use-flash-attn" # test pass✅, time: 14.6s, mem: 33.8GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --use-flash-attn --load-in-4bit" # test pass✅, time: 13.4s, mem: 9.6GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --use-flash-attn" # test pass✅, time: 12.8s, mem: 19.0GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m microsoft/Florence-2-base-ft --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 3.1s, mem: 1.0GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m microsoft/Florence-2-base-ft --use-flash-attn --device-map cuda:0" # test pass✅, time: 2.9s, mem: 1.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m microsoft/Florence-2-large-ft --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 3.5s, mem: 1.5GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m microsoft/Florence-2-large-ft --use-flash-attn --device-map cuda:0" # test pass✅, time: 3.0s, mem: 2.4GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m microsoft/Phi-3-vision-128k-instruct --use-flash-attn --load-in-4bit" # test pass✅, time: 11.1s, mem: 7.1GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m microsoft/Phi-3-vision-128k-instruct --use-flash-attn" # test pass✅, time: 9.2s, mem: 12.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-Llama3-V-2_5 --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 11.8s, mem: 9.6GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-Llama3-V-2_5 --use-flash-attn --device-map cuda:0" # test pass✅, time: 10.4s, mem: 19.2GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 7.1s, mem: 4.8GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --use-flash-attn --load-in-4bit" # test pass✅, time: 13.0s, mem: 9.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --use-flash-attn" # test pass✅, time: 12.3s, mem: 19.0GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m microsoft/Florence-2-base-ft --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 2.8s, mem: 1.0GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m microsoft/Florence-2-base-ft --use-flash-attn --device-map cuda:0" # test pass✅, time: 2.4s, mem: 1.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m microsoft/Florence-2-large-ft --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 3.1s, mem: 1.5GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m microsoft/Florence-2-large-ft --use-flash-attn --device-map cuda:0" # test pass✅, time: 2.5s, mem: 2.4GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m microsoft/Phi-3-vision-128k-instruct --use-flash-attn --load-in-4bit" # test pass✅, time: 10.3s, mem: 7.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m microsoft/Phi-3-vision-128k-instruct --use-flash-attn" # test pass✅, time: 8.7s, mem: 12.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-Llama3-V-2_5 --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 23.5s, mem: 9.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-Llama3-V-2_5 --use-flash-attn --device-map cuda:0" # test pass✅, time: 19.5s, mem: 19.2GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 8.2s, mem: 4.8GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.2s, mem: 8.6GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0 --load-in-4bit" # test fail❌, time: 3.2s, mem: 3.5GB, Test failed with Exception: Error code: 500 - {'message': 'InternalServerError', 'code': 500} -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 10.5s, mem: 9.0GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 10.2s, mem: 8.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 8.5s, mem: 17.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 10.4s, mem: 7.7GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.5s, mem: 8.1GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA-1.5 --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 8.4s, mem: 7.7GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA-1.5 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.9s, mem: 8.1GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0 --load-in-4bit" # test fail❌, time: 3.1s, mem: 3.5GB, Test failed with Exception: Error code: 500 - {'message': 'InternalServerError', 'code': 500} +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 10.1s, mem: 9.0GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 9.8s, mem: 8.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 7.8s, mem: 17.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 10.2s, mem: 7.7GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.9s, mem: 8.1GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA-1.5 --use-flash-attn --device-map cuda:0 --load-in-4bit" # test pass✅, time: 7.9s, mem: 7.7GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA-1.5 --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.1s, mem: 8.1GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m qresearch/llama-3-vision-alpha-hf --device cuda:0 --load-in-4bit" # test pass✅, time: 9.1s, mem: 8.1GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m qresearch/llama-3-vision-alpha-hf --device cuda:0" # test pass✅, time: 9.7s, mem: 17.6GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m tiiuae/falcon-11B-vlm --use-flash-attn --load-in-4bit" # test pass✅, time: 15.5s, mem: 17.0GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m tiiuae/falcon-11B-vlm --use-flash-attn" # test pass✅, time: 17.1s, mem: 32.3GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m togethercomputer/Llama-3-8B-Dragonfly-Med-v1 --load-in-4bit" # test pass✅, time: 11.4s, mem: 8.1GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m togethercomputer/Llama-3-8B-Dragonfly-Med-v1" # test pass✅, time: 10.5s, mem: 17.7GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m togethercomputer/Llama-3-8B-Dragonfly-v1 --load-in-4bit" # test pass✅, time: 10.5s, mem: 8.1GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m togethercomputer/Llama-3-8B-Dragonfly-v1" # test pass✅, time: 13.7s, mem: 17.7GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn --load-in-4bit" # test pass✅, time: 7.0s, mem: 2.9GB, 13/13 tests passed. -#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 5.7s, mem: 4.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m qresearch/llama-3-vision-alpha-hf --device cuda:0" # test pass✅, time: 9.0s, mem: 17.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m tiiuae/falcon-11B-vlm --use-flash-attn --load-in-4bit" # test pass✅, time: 15.2s, mem: 17.0GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m tiiuae/falcon-11B-vlm --use-flash-attn" # test pass✅, time: 16.7s, mem: 32.3GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m togethercomputer/Llama-3-8B-Dragonfly-Med-v1 --load-in-4bit" # test pass✅, time: 10.7s, mem: 8.1GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m togethercomputer/Llama-3-8B-Dragonfly-Med-v1" # test pass✅, time: 9.9s, mem: 17.7GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m togethercomputer/Llama-3-8B-Dragonfly-v1 --load-in-4bit" # test pass✅, time: 10.1s, mem: 8.1GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m togethercomputer/Llama-3-8B-Dragonfly-v1" # test pass✅, time: 13.1s, mem: 17.7GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn --load-in-4bit" # test pass✅, time: 5.4s, mem: 2.9GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 4.2s, mem: 4.6GB, 13/13 tests passed. + +