diff --git a/README.md b/README.md index 1839608..99eb8c8 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,8 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview` - - [X] [idefics2-8b-AWQ](https://huggingface.co/HuggingFaceM4/idefics2-8b-AWQ) (main docker only, wont gpu split) - - [X] [idefics2-8b-chatty](https://huggingface.co/HuggingFaceM4/idefics2-8b-chatty) (main docker only, wont gpu split) - - [X] [idefics2-8b-chatty-AWQ](https://huggingface.co/HuggingFaceM4/idefics2-8b-chatty-AWQ) (main docker only, wont gpu split) +- [X] [Microsoft](https://huggingface.co/microsoft/) +- - [X] [Phi-3-vision-128k-instruct](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct) (main docker only) - [X] [qihoo360](https://huggingface.co/qihoo360) - - [X] [360VL-8B](https://huggingface.co/qihoo360/360VL-8B) - - [X] [360VL-70B](https://huggingface.co/qihoo360/360VL-70B) (untested) @@ -84,6 +86,10 @@ See: [OpenVLM Leaderboard](https://huggingface.co/spaces/opencompass/open_vlm_le ## Recent updates +Version 0.16.0 + +- new model support: microsoft/Phi-3-vision-128k-instruct + Version 0.15.1 - new model support: OpenGVLab/Mini-InternVL-Chat-2B-V1-5 diff --git a/backend/phi3.py b/backend/phi3.py new file mode 100644 index 0000000..bca6717 --- /dev/null +++ b/backend/phi3.py @@ -0,0 +1,34 @@ +from transformers import AutoProcessor, AutoModelForCausalLM + +from vision_qna import * + +# microsoft/Phi-3-vision-128k-instruct + +class VisionQnA(VisionQnABase): + model_name: str = "phi3" + + def __init__(self, model_id: str, device: str, device_map: str = 'auto', extra_params = {}, format = None): + super().__init__(model_id, device, device_map, extra_params, format) + + self.processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=self.params.get('trust_remote_code', False)) + self.model = AutoModelForCausalLM.from_pretrained(**self.params).eval() + + print(f"Loaded on device: {self.model.device} with dtype: {self.model.dtype}") + + async def chat_with_images(self, request: ImageChatRequest) -> str: + images, prompt = await phi3_prompt_from_messages(request.messages) + + inputs = self.processor(prompt, images=images, return_tensors="pt").to(self.model.device) + + default_params = { + "temperature": 0.0, + "do_sample": False, + "eos_token_id": self.processor.tokenizer.eos_token_id, + } + + params = self.get_generation_params(request, default_params) + + output = self.model.generate(**inputs, **params) + response = self.processor.decode(output[0][inputs.input_ids.shape[1]:], skip_special_tokens=True, clean_up_tokenization_spaces=False) + + return response diff --git a/model_conf_tests.alt.json b/model_conf_tests.alt.json index 112cd26..322ab01 100644 --- a/model_conf_tests.alt.json +++ b/model_conf_tests.alt.json @@ -1,53 +1,51 @@ [ - ["vikhyatk/moondream2", "--use-flash-attn"], - ["vikhyatk/moondream1"], - ["echo840/Monkey"], - ["echo840/Monkey-Chat"], ["OpenGVLab/InternVL-Chat-V1-5", "--device-map", "cuda:0"], + ["OpenGVLab/InternVL-Chat-V1-5", "--load-in-4bit", "--device-map", "cuda:0"], + ["OpenGVLab/InternVL-Chat-V1-5-Int8", "--device-map", "cuda:0"], ["OpenGVLab/Mini-InternVL-Chat-2B-V1-5"], + ["Qwen/Qwen-VL-Chat"], + ["THUDM/cogagent-chat-hf", "--load-in-4bit"], + ["THUDM/cogagent-chat-hf"], + ["THUDM/cogvlm-chat-hf", "--load-in-4bit"], + ["THUDM/cogvlm-chat-hf"], ["THUDM/cogvlm2-llama3-chat-19B"], ["THUDM/cogvlm2-llama3-chinese-chat-19B"], - ["THUDM/cogvlm-chat-hf"], - ["THUDM/cogagent-chat-hf"], - ["Qwen/Qwen-VL-Chat"], - ["YanweiLi/MGM-2B", "--use-flash-attn"], - ["YanweiLi/MGM-7B", "--use-flash-attn"], - ["YanweiLi/MGM-7B-HD", "--use-flash-attn"], + ["YanweiLi/MGM-13B", "--load-in-4bit", "--use-flash-attn"], ["YanweiLi/MGM-13B", "--use-flash-attn"], + ["YanweiLi/MGM-13B-HD", "--load-in-4bit", "--use-flash-attn"], ["YanweiLi/MGM-13B-HD", "--use-flash-attn"], + ["YanweiLi/MGM-2B", "--use-flash-attn"], + ["YanweiLi/MGM-34B", "--load-in-4bit", "--use-flash-attn"], ["YanweiLi/MGM-34B", "--use-flash-attn"], + ["YanweiLi/MGM-34B-HD", "--load-in-4bit", "--use-flash-attn"], ["YanweiLi/MGM-34B-HD", "--use-flash-attn"], + ["YanweiLi/MGM-7B", "--load-in-4bit", "--use-flash-attn"], + ["YanweiLi/MGM-7B", "--use-flash-attn"], + ["YanweiLi/MGM-7B-HD", "--load-in-4bit", "--use-flash-attn"], + ["YanweiLi/MGM-7B-HD", "--use-flash-attn"], + ["YanweiLi/MGM-8x7B", "--load-in-4bit", "--use-flash-attn"], ["YanweiLi/MGM-8x7B", "--use-flash-attn"], + ["YanweiLi/MGM-8x7B-HD", "--load-in-4bit", "--use-flash-attn"], ["YanweiLi/MGM-8x7B-HD", "--use-flash-attn"], - ["qihoo360/360VL-8B", "--use-flash-attn"], ["adept/fuyu-8b", "--device-map", "cuda:0"], + ["echo840/Monkey"], + ["echo840/Monkey-Chat"], ["internlm/internlm-xcomposer2-4khd-7b", "--use-flash-attn", "--device-map", "cuda:0"], ["internlm/internlm-xcomposer2-7b", "--use-flash-attn", "--device-map", "cuda:0"], - ["internlm/internlm-xcomposer2-vl-7b", "--use-flash-attn", "--device-map", "cuda:0"], - ["internlm/internlm-xcomposer2-vl-1_8b", "--use-flash-attn", "--device-map", "cuda:0"], - ["openbmb/MiniCPM-V-2", "--use-flash-attn", "--device-map", "cuda:0"], - ["openbmb/MiniCPM-V", "--use-flash-attn", "--device-map", "cuda:0"], - ["llava-hf/bakLlava-v1-hf", "--use-flash-attn", "--device-map", "cuda:0"], - ["llava-hf/llava-1.5-7b-hf", "--use-flash-attn", "--device-map", "cuda:0"], - ["llava-hf/llava-1.5-13b-hf", "--use-flash-attn", "--device-map", "cuda:0"], - - ["OpenGVLab/InternVL-Chat-V1-5-Int8", "--device-map", "cuda:0"], - ["OpenGVLab/InternVL-Chat-V1-5", "--load-in-4bit", "--device-map", "cuda:0"], - ["THUDM/cogvlm-chat-hf", "--load-in-4bit"], - ["THUDM/cogagent-chat-hf", "--load-in-4bit"], - ["qihoo360/360VL-8B", "--use-flash-attn", "--load-in-4bit"], ["internlm/internlm-xcomposer2-7b-4bit", "--use-flash-attn", "--device", "cuda:0"], + ["internlm/internlm-xcomposer2-vl-1_8b", "--use-flash-attn", "--device-map", "cuda:0"], + ["internlm/internlm-xcomposer2-vl-7b", "--use-flash-attn", "--device-map", "cuda:0"], ["internlm/internlm-xcomposer2-vl-7b-4bit", "--use-flash-attn", "--device", "cuda:0"], ["llava-hf/bakLlava-v1-hf", "--load-in-4bit", "--use-flash-attn"], - ["llava-hf/llava-1.5-7b-hf", "--load-in-4bit", "--use-flash-attn"], + ["llava-hf/bakLlava-v1-hf", "--use-flash-attn", "--device-map", "cuda:0"], ["llava-hf/llava-1.5-13b-hf", "--load-in-4bit", "--use-flash-attn"], - - ["YanweiLi/MGM-7B", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/MGM-7B-HD", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/MGM-13B", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/MGM-13B-HD", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/MGM-34B", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/MGM-34B-HD", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/MGM-8x7B", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/MGM-8x7B-HD", "--load-in-4bit", "--use-flash-attn"] + ["llava-hf/llava-1.5-13b-hf", "--use-flash-attn", "--device-map", "cuda:0"], + ["llava-hf/llava-1.5-7b-hf", "--load-in-4bit", "--use-flash-attn"], + ["llava-hf/llava-1.5-7b-hf", "--use-flash-attn", "--device-map", "cuda:0"], + ["openbmb/MiniCPM-V", "--use-flash-attn", "--device-map", "cuda:0"], + ["openbmb/MiniCPM-V-2", "--use-flash-attn", "--device-map", "cuda:0"], + ["qihoo360/360VL-8B", "--use-flash-attn", "--load-in-4bit"], + ["qihoo360/360VL-8B", "--use-flash-attn"], + ["vikhyatk/moondream1"], + ["vikhyatk/moondream2", "--use-flash-attn"] ] diff --git a/model_conf_tests.json b/model_conf_tests.json index b7e9076..557cd09 100644 --- a/model_conf_tests.json +++ b/model_conf_tests.json @@ -1,57 +1,58 @@ [ - ["vikhyatk/moondream2", "--use-flash-attn"], + ["BAAI/Bunny-Llama-3-8B-V"], + ["BAAI/Emu2-Chat", "--max-memory=0:78GiB,1:20GiB"], + ["HuggingFaceM4/idefics2-8b", "--use-flash-attn", "--device-map", "cuda:0"], + ["HuggingFaceM4/idefics2-8b-AWQ", "--use-flash-attn", "--device-map", "cuda:0"], + ["HuggingFaceM4/idefics2-8b-chatty", "--use-flash-attn", "--device-map", "cuda:0"], + ["HuggingFaceM4/idefics2-8b-chatty-AWQ", "--use-flash-attn", "--device-map", "cuda:0"], ["OpenGVLab/InternVL-Chat-V1-5", "--device-map", "cuda:0"], + ["OpenGVLab/InternVL-Chat-V1-5", "--load-in-4bit", "--device-map", "cuda:0"], + ["OpenGVLab/InternVL-Chat-V1-5-Int8", "--device-map", "cuda:0"], ["OpenGVLab/Mini-InternVL-Chat-2B-V1-5"], - ["HuggingFaceM4/idefics2-8b-chatty", "--use-flash-attn", "--device-map", "cuda:0"], - ["HuggingFaceM4/idefics2-8b", "--use-flash-attn", "--device-map", "cuda:0"], - ["qihoo360/360VL-8B", "--use-flash-attn"], - ["qnguyen3/nanoLLaVA", "--use-flash-attn", "--device-map", "cuda:0"], - ["echo840/Monkey"], - ["echo840/Monkey-Chat"], + ["Qwen/Qwen-VL-Chat"], + ["THUDM/cogagent-chat-hf", "--load-in-4bit"], + ["THUDM/cogagent-chat-hf"], + ["THUDM/cogvlm-chat-hf", "--load-in-4bit"], + ["THUDM/cogvlm-chat-hf"], ["THUDM/cogvlm2-llama3-chat-19B"], ["THUDM/cogvlm2-llama3-chinese-chat-19B"], - ["THUDM/cogvlm-chat-hf"], - ["THUDM/cogagent-chat-hf"], - ["Qwen/Qwen-VL-Chat"], - ["BAAI/Emu2-Chat", "--max-memory=0:78GiB,1:20GiB"], - ["BAAI/Bunny-Llama-3-8B-V"], - ["qresearch/llama-3-vision-alpha-hf", "--device", "cuda:0"], - ["TIGER-Lab/Mantis-8B-siglip-llama3", "--use-flash-attn", "--device-map", "cuda:0"], - ["TIGER-Lab/Mantis-8B-clip-llama3", "--use-flash-attn", "--device-map", "cuda:0"], ["TIGER-Lab/Mantis-8B-Fuyu", "--device-map", "cuda:0"], + ["TIGER-Lab/Mantis-8B-clip-llama3", "--use-flash-attn", "--device-map", "cuda:0"], + ["TIGER-Lab/Mantis-8B-siglip-llama3", "--use-flash-attn", "--device-map", "cuda:0"], + ["YanweiLi/MGM-2B", "--use-flash-attn", "--load-in-4bit"], + ["YanweiLi/MGM-2B", "--use-flash-attn"], ["adept/fuyu-8b", "--device-map", "cuda:0"], + ["echo840/Monkey"], + ["echo840/Monkey-Chat"], ["internlm/internlm-xcomposer2-4khd-7b", "--use-flash-attn", "--device-map", "cuda:0"], ["internlm/internlm-xcomposer2-7b", "--use-flash-attn", "--device-map", "cuda:0"], - ["internlm/internlm-xcomposer2-vl-7b", "--use-flash-attn", "--device-map", "cuda:0"], + ["internlm/internlm-xcomposer2-7b-4bit", "--use-flash-attn"], ["internlm/internlm-xcomposer2-vl-1_8b", "--use-flash-attn", "--device-map", "cuda:0"], - ["openbmb/MiniCPM-V-2", "--use-flash-attn", "--device-map", "cuda:0"], - ["openbmb/MiniCPM-V", "--use-flash-attn", "--device-map", "cuda:0"], + ["internlm/internlm-xcomposer2-vl-7b", "--use-flash-attn", "--device-map", "cuda:0"], + ["internlm/internlm-xcomposer2-vl-7b-4bit", "--use-flash-attn"], + ["llava-hf/bakLlava-v1-hf", "--load-in-4bit", "--use-flash-attn"], ["llava-hf/bakLlava-v1-hf", "--use-flash-attn", "--device-map", "cuda:0"], - ["llava-hf/llava-1.5-7b-hf", "--use-flash-attn", "--device-map", "cuda:0"], + ["llava-hf/llava-1.5-13b-hf", "--load-in-4bit", "--use-flash-attn"], ["llava-hf/llava-1.5-13b-hf", "--use-flash-attn", "--device-map", "cuda:0"], + ["llava-hf/llava-1.5-7b-hf", "--load-in-4bit", "--use-flash-attn"], + ["llava-hf/llava-1.5-7b-hf", "--use-flash-attn", "--device-map", "cuda:0"], + ["llava-hf/llava-v1.6-34b-hf", "--load-in-4bit", "--use-flash-attn"], + ["llava-hf/llava-v1.6-34b-hf", "--use-flash-attn"], + ["llava-hf/llava-v1.6-mistral-7b-hf", "--load-in-4bit", "--use-flash-attn"], ["llava-hf/llava-v1.6-mistral-7b-hf", "--use-flash-attn"], - ["llava-hf/llava-v1.6-vicuna-7b-hf", "--use-flash-attn"], + ["llava-hf/llava-v1.6-vicuna-13b-hf", "--load-in-4bit", "--use-flash-attn"], ["llava-hf/llava-v1.6-vicuna-13b-hf", "--use-flash-attn"], - ["llava-hf/llava-v1.6-34b-hf", "--use-flash-attn"], - ["YanweiLi/MGM-2B", "--use-flash-attn"], - - ["OpenGVLab/InternVL-Chat-V1-5-Int8", "--device-map", "cuda:0"], - ["OpenGVLab/InternVL-Chat-V1-5", "--load-in-4bit", "--device-map", "cuda:0"], - ["HuggingFaceM4/idefics2-8b-chatty-AWQ", "--use-flash-attn", "--device-map", "cuda:0"], - ["HuggingFaceM4/idefics2-8b-AWQ", "--use-flash-attn", "--device-map", "cuda:0"], - ["qihoo360/360VL-8B", "--use-flash-attn", "--load-in-4bit"], + ["llava-hf/llava-v1.6-vicuna-7b-hf", "--load-in-4bit", "--use-flash-attn"], + ["llava-hf/llava-v1.6-vicuna-7b-hf", "--use-flash-attn"], + ["microsoft/Phi-3-vision-128k-instruct", "--use-flash-attn", "--load-in-4bit"], + ["microsoft/Phi-3-vision-128k-instruct", "--use-flash-attn"], + ["openbmb/MiniCPM-V", "--use-flash-attn", "--device-map", "cuda:0"], + ["openbmb/MiniCPM-V-2", "--use-flash-attn", "--device-map", "cuda:0"], ["qihoo360/360VL-70B", "--use-flash-attn", "--load-in-4bit"], + ["qihoo360/360VL-8B", "--use-flash-attn", "--load-in-4bit"], + ["qihoo360/360VL-8B", "--use-flash-attn"], + ["qnguyen3/nanoLLaVA", "--use-flash-attn", "--device-map", "cuda:0"], ["qnguyen3/nanoLLaVA", "--use-flash-attn", "--load-in-4bit", "--device-map", "cuda:0"], - ["THUDM/cogvlm-chat-hf", "--load-in-4bit"], - ["THUDM/cogagent-chat-hf", "--load-in-4bit"], - ["internlm/internlm-xcomposer2-7b-4bit", "--use-flash-attn"], - ["internlm/internlm-xcomposer2-vl-7b-4bit", "--use-flash-attn"], - ["llava-hf/bakLlava-v1-hf", "--load-in-4bit", "--use-flash-attn"], - ["llava-hf/llava-1.5-7b-hf", "--load-in-4bit", "--use-flash-attn"], - ["llava-hf/llava-1.5-13b-hf", "--load-in-4bit", "--use-flash-attn"], - ["llava-hf/llava-v1.6-mistral-7b-hf", "--load-in-4bit", "--use-flash-attn"], - ["llava-hf/llava-v1.6-vicuna-7b-hf", "--load-in-4bit", "--use-flash-attn"], - ["llava-hf/llava-v1.6-vicuna-13b-hf", "--load-in-4bit", "--use-flash-attn"], - ["llava-hf/llava-v1.6-34b-hf", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/MGM-2B", "--use-flash-attn", "--load-in-4bit"] + ["qresearch/llama-3-vision-alpha-hf", "--device", "cuda:0"], + ["vikhyatk/moondream2", "--use-flash-attn"] ] diff --git a/vision-alt.sample.env b/vision-alt.sample.env index 7a36c0f..9c47cc5 100644 --- a/vision-alt.sample.env +++ b/vision-alt.sample.env @@ -2,52 +2,52 @@ # Copy this file to vision.env and uncomment the model of your choice. HF_HOME=hf_home #CUDA_VISIBLE_DEVICES=1,0 -#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 4.4s, mem: 4.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m vikhyatk/moondream1" # test pass✅, time: 4.1s, mem: 4.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 6.1s, mem: 21.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 7.8s, mem: 21.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 13.3s, mem: 52.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5" # test pass✅, time: 4.0s, mem: 7.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B" # test pass✅, time: 22.6s, mem: 40.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B" # test pass✅, time: 68.2s, mem: 40.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 14.5s, mem: 36.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 15.4s, mem: 37.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 4.6s, mem: 19.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 13.9s, mem: 52.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail❌, time: 16.6s, mem: 18.2GB, 2/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 26.5s, mem: 31.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5" # test pass✅, time: 3.8s, mem: 7.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 4.8s, mem: 19.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 20.5s, mem: 12.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 14.9s, mem: 37.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 19.9s, mem: 12.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 13.6s, mem: 36.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B" # test pass✅, time: 22.3s, mem: 40.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B" # test pass✅, time: 64.6s, mem: 40.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --load-in-4bit --use-flash-attn" # test pass✅, time: 34.8s, mem: 10.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --use-flash-attn" # test pass✅, time: 20.8s, mem: 27.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 17.6s, mem: 14.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --use-flash-attn" # test pass✅, time: 16.0s, mem: 31.8GB, 8/8 tests passed. #CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn" # test fail❌, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit). -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --use-flash-attn" # test pass✅, time: 5.7s, mem: 15.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --use-flash-attn" # test pass✅, time: 15.8s, mem: 18.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --use-flash-attn" # test pass✅, time: 19.0s, mem: 27.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --use-flash-attn" # test pass✅, time: 18.1s, mem: 31.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --use-flash-attn" # test pass✅, time: 10.9s, mem: 67.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --use-flash-attn" # test pass✅, time: 120.1s, mem: 70.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --use-flash-attn" # test pass✅, time: 14.7s, mem: 91.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --use-flash-attn" # test pass✅, time: 18.3s, mem: 96.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 5.4s, mem: 17.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 14.2s, mem: 25.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 18.0s, mem: 25.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 15.2s, mem: 19.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.4s, mem: 20.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.4s, mem: 7.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.1s, mem: 11.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.4s, mem: 7.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail❌, time: 1.8s, mem: 15.8GB, 0/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.7s, mem: 14.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.8s, mem: 27.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 26.5s, mem: 31.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail❌, time: 17.1s, mem: 18.3GB, 2/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 19.6s, mem: 12.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 20.1s, mem: 12.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 8.5s, mem: 7.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 8.9s, mem: 9.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 14.0s, mem: 10.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail❌, time: 2.2s, mem: 5.9GB, 0/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 8.8s, mem: 5.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 10.7s, mem: 9.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --load-in-4bit --use-flash-attn" # test pass✅, time: 16.7s, mem: 21.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --use-flash-attn" # test pass✅, time: 10.6s, mem: 67.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 217.0s, mem: 24.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --use-flash-attn" # test pass✅, time: 121.4s, mem: 70.4GB, 8/8 tests passed. #CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --load-in-4bit --use-flash-attn" # test pass✅, time: 10.5s, mem: 6.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 33.8s, mem: 9.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --load-in-4bit --use-flash-attn" # test pass✅, time: 33.1s, mem: 9.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 22.7s, mem: 13.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --load-in-4bit --use-flash-attn" # test pass✅, time: 17.0s, mem: 21.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 218.1s, mem: 24.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --load-in-4bit --use-flash-attn" # test pass✅, time: 21.7s, mem: 26.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 25.3s, mem: 29.4GB, 8/8 tests passed. \ No newline at end of file +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --use-flash-attn" # test pass✅, time: 5.2s, mem: 15.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 34.3s, mem: 9.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --use-flash-attn" # test pass✅, time: 15.9s, mem: 18.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --load-in-4bit --use-flash-attn" # test pass✅, time: 22.9s, mem: 26.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --use-flash-attn" # test pass✅, time: 16.0s, mem: 91.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 26.4s, mem: 29.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --use-flash-attn" # test pass✅, time: 18.6s, mem: 96.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 13.9s, mem: 25.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 6.1s, mem: 21.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 7.7s, mem: 21.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 18.3s, mem: 25.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.9s, mem: 19.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 9.3s, mem: 9.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.5s, mem: 7.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.5s, mem: 20.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 10.8s, mem: 10.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail❌, time: 2.3s, mem: 5.9GB, 0/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail❌, time: 1.8s, mem: 15.7GB, 0/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 10.1s, mem: 9.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.5s, mem: 26.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 9.1s, mem: 5.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.9s, mem: 14.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.0s, mem: 7.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.6s, mem: 11.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 7.8s, mem: 7.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 5.2s, mem: 17.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m vikhyatk/moondream1" # test pass✅, time: 3.9s, mem: 4.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 3.8s, mem: 4.6GB, 8/8 tests passed. \ No newline at end of file diff --git a/vision.sample.env b/vision.sample.env index a8ee19e..909c6c6 100644 --- a/vision.sample.env +++ b/vision.sample.env @@ -2,57 +2,59 @@ # Copy this file to vision.env and uncomment the model of your choice. HF_HOME=hf_home #CUDA_VISIBLE_DEVICES=1,0 -#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 4.1s, mem: 4.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 13.5s, mem: 52.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5" # test pass✅, time: 3.5s, mem: 7.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.1s, mem: 22.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.7s, mem: 22.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 5.7s, mem: 17.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.8s, mem: 8.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 6.1s, mem: 21.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 7.8s, mem: 21.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B" # test pass✅, time: 22.2s, mem: 40.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B" # test pass✅, time: 65.4s, mem: 40.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 14.1s, mem: 36.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 14.8s, mem: 37.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 4.7s, mem: 19.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Emu2-Chat --max-memory=0:78GiB,1:20GiB" # test pass✅, time: 22.0s, mem: 78.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Bunny-Llama-3-8B-V" # test pass✅, time: 6.6s, mem: 19.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qresearch/llama-3-vision-alpha-hf --device cuda:0" # test pass✅, time: 6.8s, mem: 19.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-siglip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.0s, mem: 18.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-clip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.6s, mem: 17.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-Fuyu --device-map cuda:0" # test pass✅, time: 6.3s, mem: 20.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 13.0s, mem: 25.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 18.4s, mem: 25.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.2s, mem: 19.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 17.2s, mem: 20.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.6s, mem: 7.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.7s, mem: 11.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.8s, mem: 7.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-Llama-3-8B-V" # test pass✅, time: 6.8s, mem: 19.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Emu2-Chat --max-memory=0:78GiB,1:20GiB" # test pass✅, time: 21.4s, mem: 78.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.6s, mem: 22.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.9s, mem: 12.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.0s, mem: 22.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 9.5s, mem: 12.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 13.3s, mem: 52.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail❌, time: 17.2s, mem: 18.1GB, 2/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 26.1s, mem: 31.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5" # test pass✅, time: 3.6s, mem: 6.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 4.5s, mem: 19.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 19.9s, mem: 12.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 14.4s, mem: 37.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 19.5s, mem: 12.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 13.8s, mem: 36.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B" # test pass✅, time: 22.1s, mem: 40.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B" # test pass✅, time: 64.8s, mem: 40.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-Fuyu --device-map cuda:0" # test pass✅, time: 6.2s, mem: 20.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-clip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.2s, mem: 17.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-siglip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 4.6s, mem: 18.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn --load-in-4bit" # test pass✅, time: 6.7s, mem: 5.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn" # test pass✅, time: 4.1s, mem: 8.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 13.9s, mem: 24.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 6.1s, mem: 21.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 7.7s, mem: 21.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 17.9s, mem: 25.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.6s, mem: 19.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn" # test pass✅, time: 9.2s, mem: 9.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 4.7s, mem: 7.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.2s, mem: 20.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn" # test pass✅, time: 12.4s, mem: 11.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail❌, time: 2.2s, mem: 6.0GB, 0/8 tests passed. #CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail❌, time: 1.8s, mem: 15.8GB, 0/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.8s, mem: 14.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 10.1s, mem: 9.2GB, 8/8 tests passed. #CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.4s, mem: 26.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --use-flash-attn" # test pass✅, time: 12.8s, mem: 19.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --use-flash-attn" # test pass✅, time: 8.8s, mem: 19.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --use-flash-attn" # test pass✅, time: 9.5s, mem: 32.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --use-flash-attn" # test pass✅, time: 46.2s, mem: 72.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn" # test pass✅, time: 4.3s, mem: 8.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 27.5s, mem: 32.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail❌, time: 17.3s, mem: 18.5GB, 2/8 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 9.6s, mem: 13.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.1s, mem: 13.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 8.7s, mem: 8.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-70B --use-flash-attn --load-in-4bit" # test fail❌, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit). -#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --load-in-4bit --device-map cuda:0" # test pass✅, time: 10.7s, mem: 7.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 20.1s, mem: 12.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 21.1s, mem: 12.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn" # test pass✅, time: 10.9s, mem: 9.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn" # test pass✅, time: 10.2s, mem: 11.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail❌, time: 2.2s, mem: 6.2GB, 0/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 9.3s, mem: 6.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 10.8s, mem: 9.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 20.0s, mem: 9.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 13.7s, mem: 10.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 12.2s, mem: 15.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 53.3s, mem: 26.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn --load-in-4bit" # test pass✅, time: 7.0s, mem: 6.0GB, 8/8 tests passed. \ No newline at end of file +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 9.1s, mem: 5.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.9s, mem: 14.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 51.8s, mem: 26.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --use-flash-attn" # test pass✅, time: 46.3s, mem: 72.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 19.3s, mem: 9.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --use-flash-attn" # test pass✅, time: 12.7s, mem: 19.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 12.3s, mem: 14.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --use-flash-attn" # test pass✅, time: 9.0s, mem: 32.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 13.4s, mem: 9.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --use-flash-attn" # test pass✅, time: 8.4s, mem: 19.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m microsoft/Phi-3-vision-128k-instruct --use-flash-attn --load-in-4bit" # test pass✅, time: 7.7s, mem: 7.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m microsoft/Phi-3-vision-128k-instruct --use-flash-attn" # test pass✅, time: 6.4s, mem: 12.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.8s, mem: 7.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.6s, mem: 11.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-70B --use-flash-attn --load-in-4bit" # test fail❌, time: 4.1s, mem: 39.3GB, Test failed with Exception: Internal Server Error +#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 8.3s, mem: 7.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 5.2s, mem: 17.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.1s, mem: 8.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --load-in-4bit --device-map cuda:0" # test pass✅, time: 9.3s, mem: 7.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qresearch/llama-3-vision-alpha-hf --device cuda:0" # test pass✅, time: 5.7s, mem: 19.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 3.9s, mem: 4.7GB, 8/8 tests passed. \ No newline at end of file diff --git a/vision_qna.py b/vision_qna.py index 50e4cb7..9a9c528 100644 --- a/vision_qna.py +++ b/vision_qna.py @@ -420,6 +420,30 @@ async def emu_images_prompt_system_from_messages(messages: list[Message], img_to return images, prompt, system_message +async def phi3_prompt_from_messages(messages: list[Message]): + n = 1 + img_tok = "<|image_{}|>\n" + prompt = '' + images = [] + + for m in messages: + img_tag = '' + + for c in m.content: + if c.type == 'image_url': + images.extend([ await url_to_image(c.image_url.url) ]) + img_tag += img_tok.format(n) + n += 1 + + for c in m.content: + if c.type == 'text': + prompt += f"<|{m.role}|>\n{img_tag}{c.text}<|end|>\n" + + prompt += '<|assistant|>\n' + + return images, prompt + + async def prompt_history_images_system_from_messages(messages: list[Message], img_tok = "\n", url_handler = url_to_image): history = [] images = [] @@ -565,4 +589,7 @@ def guess_backend(model_name: str) -> str: return 'emu' if '360vl' in model_id: - return '360vl' \ No newline at end of file + return '360vl' + + if "phi-3-vision" in model_id: + return 'phi3' \ No newline at end of file