diff --git a/Dockerfile b/Dockerfile index cfcdb19..6da57fc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ FROM python:3.11-slim -RUN apt-get update && apt-get install -y git +RUN apt-get update && apt-get install -y git gcc RUN pip install --no-cache-dir --upgrade pip RUN mkdir -p /app diff --git a/README.md b/README.md index 526c267..bf2ee17 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,8 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview` - - [X] [InternVL-Chat-V1-5](https://huggingface.co/OpenGVLab/InternVL-Chat-V1-5) (wont gpu split yet, 4bit not recommended) - - [X] [InternVL-Chat-V1-5-Int8](https://huggingface.co/OpenGVLab/InternVL-Chat-V1-5-Int8) (wont gpu split yet) - [X] [THUDM/CogVLM](https://github.com/THUDM/CogVLM) +- - [X] [cogvlm2-llama3-chat-19B](https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B) +- - [X] [cogvlm2-llama3-chinese-chat-19B](https://huggingface.co/THUDM/cogvlm2-llama3-chinese-chat-19B) - - [X] [cogvlm-chat-hf](https://huggingface.co/THUDM/cogvlm-chat-hf) - - [X] [cogagent-chat-hf](https://huggingface.co/THUDM/cogagent-chat-hf) - [X] [InternLM](https://huggingface.co/internlm/) @@ -29,7 +31,7 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview` - - [X] [idefics2-8b-chatty-AWQ](https://huggingface.co/HuggingFaceM4/idefics2-8b-chatty-AWQ) (main docker only, wont gpu split) - [X] [qihoo360](https://huggingface.co/qihoo360) - - [X] [360VL-8B](https://huggingface.co/qihoo360/360VL-8B) -- - [X] [360VL-70B](https://huggingface.co/qihoo360/360VL-70B) (loading error, [see note](https://huggingface.co/qihoo360/360VL-70B/discussions/1), also too large for me to test) +- - [X] [360VL-70B](https://huggingface.co/qihoo360/360VL-70B) (untested) - [X] [LlavaNext](https://huggingface.co/llava-hf) (main docker only) - - [X] [llava-v1.6-34b-hf](https://huggingface.co/llava-hf/llava-v1.6-34b-hf) (main docker only) - - [X] [llava-v1.6-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf) (main docker only) @@ -39,9 +41,6 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview` - - [X] [llava-v1.5-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.5-vicuna-7b-hf) - - [X] [llava-v1.5-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.5-vicuna-13b-hf) - - [ ] [llava-v1.5-bakLlava-7b-hf](https://huggingface.co/llava-hf/llava-v1.5-bakLlava-7b-hf) (currently errors) -- [X] [01-ai/Yi-VL](https://huggingface.co/01-ai) -- - [ ] [Yi-VL-6B](https://huggingface.co/01-ai/Yi-VL-6B) (currently errors) -- - [ ] [Yi-VL-34B](https://huggingface.co/01-ai/Yi-VL-34B) (currently errors) - [X] [qresearch](https://huggingface.co/qresearch/) - - [X] [llama-3-vision-alpha-hf](https://huggingface.co/qresearch/llama-3-vision-alpha-hf) (main docker only, wont gpu split) - [X] [BAAI](https://huggingface.co/BAAI/) @@ -72,6 +71,9 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview` - - [X] [MGM-34B-HD](https://huggingface.co/YanweiLi/MGM-34B-HD) (alternate docker only) - - [X] [MGM-8x7B-HD](https://huggingface.co/YanweiLi/MGM-8x7B-HD) (alternate docker only) - [X] [qnguyen3/nanoLLaVA](https://huggingface.co/qnguyen3/nanoLLaVA) (main docker only, wont gpu split) +- [ ] [01-ai/Yi-VL](https://huggingface.co/01-ai) +- - [ ] [Yi-VL-6B](https://huggingface.co/01-ai/Yi-VL-6B) (currently errors) +- - [ ] [Yi-VL-34B](https://huggingface.co/01-ai/Yi-VL-34B) (currently errors) - [ ] [Deepseek-VL-7b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-7b-chat) - [ ] [Deepseek-VL-1.3b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-1.3b-chat) - [ ] [NousResearch/Obsidian-3B-V0.5](https://huggingface.co/NousResearch/Obsidian-3B-V0.5) @@ -81,6 +83,10 @@ See: [OpenVLM Leaderboard](https://huggingface.co/spaces/opencompass/open_vlm_le ## Recent updates +Version 0.15.0 + +- new model support: cogvlm2-llama3-chinese-chat-19B, cogvlm2-llama3-chat-19B + Version 0.14.1 - new model support: idefics2-8b-chatty, idefics2-8b-chatty-AWQ (it worked already, no code change) @@ -89,7 +95,7 @@ Version 0.14.1 Version: 0.14.0 - docker-compose.yml: Assume the runtime supports the device (ie. nvidia) -- new model support: qihoo360/360VL-8B, qihoo360/360VL-70B (70B loading error, [see note](https://huggingface.co/qihoo360/360VL-70B/discussions/1), also too large for me to test) +- new model support: qihoo360/360VL-8B, qihoo360/360VL-70B (70B is untested, too large for me) - new model support: BAAI/Emu2-Chat, Can be slow to load, may need --max-memory option control the loading on multiple gpus - new model support: TIGER-Labs/Mantis: Mantis-8B-siglip-llama3, Mantis-8B-clip-llama3, Mantis-8B-Fuyu diff --git a/backend/cogvlm2.py b/backend/cogvlm2.py new file mode 100644 index 0000000..c67065b --- /dev/null +++ b/backend/cogvlm2.py @@ -0,0 +1,48 @@ +from transformers import AutoTokenizer, AutoModelForCausalLM + +from vision_qna import * + +# THUDM/cogvlm2-llama3-chat-19B +# THUDM/cogvlm2-llama3-chinese-chat-19B +import transformers +transformers.logging.set_verbosity_error() + +class VisionQnA(VisionQnABase): + model_name: str = "cogvlm2" + format: str = 'llama3' + + def __init__(self, model_id: str, device: str, device_map: str = 'auto', extra_params = {}, format = None): + super().__init__(model_id, device, device_map, extra_params, format) + + self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=self.params.get('trust_remote_code', False)) + self.model = AutoModelForCausalLM.from_pretrained(**self.params).eval() + + print(f"Loaded on device: {self.model.device} with dtype: {self.model.dtype}") + + async def chat_with_images(self, request: ImageChatRequest) -> str: + + query, history, images, system_message = await prompt_history_images_system_from_messages( + request.messages, img_tok='', url_handler=url_to_image) + + input_by_model = self.model.build_conversation_input_ids(self.tokenizer, query=query, history=history, images=images, template_version='chat') + + inputs = { + 'input_ids': input_by_model['input_ids'].unsqueeze(0).to(self.model.device), + 'token_type_ids': input_by_model['token_type_ids'].unsqueeze(0).to(self.model.device), + 'attention_mask': input_by_model['attention_mask'].unsqueeze(0).to(self.model.device), + 'images': [[input_by_model['images'][0].to(self.model.device).to(self.model.dtype)]] if images else None, + } + + default_params = { + 'max_new_tokens': 2048, + 'pad_token_id': 128002, + 'top_p': None, # 0.9 + 'temperature': None, # 0.6 + } + + params = self.get_generation_params(request, default_params) + + response = self.model.generate(**inputs, **params) + answer = self.tokenizer.decode(response[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True).strip() + + return answer diff --git a/model_conf_tests.alt.json b/model_conf_tests.alt.json index 1c798f7..ae86bf8 100644 --- a/model_conf_tests.alt.json +++ b/model_conf_tests.alt.json @@ -4,6 +4,8 @@ ["echo840/Monkey"], ["echo840/Monkey-Chat"], ["OpenGVLab/InternVL-Chat-V1-5", "--device-map", "cuda:0"], + ["THUDM/cogvlm2-llama3-chat-19B"], + ["THUDM/cogvlm2-llama3-chinese-chat-19B"], ["THUDM/cogvlm-chat-hf"], ["THUDM/cogagent-chat-hf"], ["Qwen/Qwen-VL-Chat"], diff --git a/model_conf_tests.json b/model_conf_tests.json index d76322b..fa88297 100644 --- a/model_conf_tests.json +++ b/model_conf_tests.json @@ -7,6 +7,8 @@ ["qnguyen3/nanoLLaVA", "--use-flash-attn", "--device-map", "cuda:0"], ["echo840/Monkey"], ["echo840/Monkey-Chat"], + ["THUDM/cogvlm2-llama3-chat-19B"], + ["THUDM/cogvlm2-llama3-chinese-chat-19B"], ["THUDM/cogvlm-chat-hf"], ["THUDM/cogagent-chat-hf"], ["Qwen/Qwen-VL-Chat"], diff --git a/vision-alt.sample.env b/vision-alt.sample.env index f2f16c3..10e8735 100644 --- a/vision-alt.sample.env +++ b/vision-alt.sample.env @@ -2,49 +2,51 @@ # Copy this file to vision.env and uncomment the model of your choice. HF_HOME=hf_home #CUDA_VISIBLE_DEVICES=1,0 -#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 4.8s, mem: 4.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m vikhyatk/moondream1" # test pass✅, time: 4.6s, mem: 4.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 6.3s, mem: 21.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 8.0s, mem: 21.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 13.4s, mem: 52.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 14.2s, mem: 36.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 15.2s, mem: 37.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 4.8s, mem: 19.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 4.4s, mem: 4.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m vikhyatk/moondream1" # test pass✅, time: 4.1s, mem: 4.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 6.1s, mem: 21.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 7.8s, mem: 21.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 13.3s, mem: 52.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B" # test pass✅, time: 22.6s, mem: 40.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B" # test pass✅, time: 68.2s, mem: 40.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 14.5s, mem: 36.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 15.4s, mem: 37.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 4.6s, mem: 19.5GB, 8/8 tests passed. #CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn" # test fail❌, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit). -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --use-flash-attn" # test pass✅, time: 6.1s, mem: 15.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --use-flash-attn" # test pass✅, time: 14.9s, mem: 18.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --use-flash-attn" # test pass✅, time: 18.8s, mem: 27.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --use-flash-attn" # test pass✅, time: 18.2s, mem: 31.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --use-flash-attn" # test pass✅, time: 11.3s, mem: 67.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --use-flash-attn" # test pass✅, time: 196.6s, mem: 70.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --use-flash-attn" # test pass✅, time: 16.0s, mem: 91.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --use-flash-attn" # test pass✅, time: 19.7s, mem: 96.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 5.6s, mem: 17.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 14.1s, mem: 25.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 18.8s, mem: 25.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.0s, mem: 19.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 17.3s, mem: 20.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.4s, mem: 7.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.0s, mem: 11.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.5s, mem: 7.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail❌, time: 2.0s, mem: 15.9GB, 0/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.0s, mem: 14.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.8s, mem: 27.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 27.5s, mem: 31.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail❌, time: 17.5s, mem: 18.3GB, 2/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 20.2s, mem: 12.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 20.9s, mem: 12.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 8.6s, mem: 7.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 11.2s, mem: 9.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 13.2s, mem: 10.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail❌, time: 2.4s, mem: 5.9GB, 0/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 9.9s, mem: 5.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 11.0s, mem: 9.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --load-in-4bit --use-flash-attn" # test pass✅, time: 10.4s, mem: 6.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 35.8s, mem: 9.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --load-in-4bit --use-flash-attn" # test pass✅, time: 37.2s, mem: 10.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 31.3s, mem: 14.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --load-in-4bit --use-flash-attn" # test pass✅, time: 17.3s, mem: 21.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 213.9s, mem: 24.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --load-in-4bit --use-flash-attn" # test pass✅, time: 23.8s, mem: 26.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 25.6s, mem: 29.6GB, 8/8 tests passed. \ No newline at end of file +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --use-flash-attn" # test pass✅, time: 5.7s, mem: 15.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --use-flash-attn" # test pass✅, time: 15.8s, mem: 18.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --use-flash-attn" # test pass✅, time: 19.0s, mem: 27.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --use-flash-attn" # test pass✅, time: 18.1s, mem: 31.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --use-flash-attn" # test pass✅, time: 10.9s, mem: 67.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --use-flash-attn" # test pass✅, time: 120.1s, mem: 70.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --use-flash-attn" # test pass✅, time: 14.7s, mem: 91.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --use-flash-attn" # test pass✅, time: 18.3s, mem: 96.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 5.4s, mem: 17.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 14.2s, mem: 25.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 18.0s, mem: 25.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 15.2s, mem: 19.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.4s, mem: 20.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.4s, mem: 7.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.1s, mem: 11.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.4s, mem: 7.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail❌, time: 1.8s, mem: 15.8GB, 0/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.7s, mem: 14.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.8s, mem: 27.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 26.5s, mem: 31.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail❌, time: 17.1s, mem: 18.3GB, 2/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 19.6s, mem: 12.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 20.1s, mem: 12.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 8.5s, mem: 7.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 8.9s, mem: 9.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 14.0s, mem: 10.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail❌, time: 2.2s, mem: 5.9GB, 0/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 8.8s, mem: 5.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 10.7s, mem: 9.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --load-in-4bit --use-flash-attn" # test pass✅, time: 10.5s, mem: 6.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 33.8s, mem: 9.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --load-in-4bit --use-flash-attn" # test pass✅, time: 33.1s, mem: 9.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 22.7s, mem: 13.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --load-in-4bit --use-flash-attn" # test pass✅, time: 17.0s, mem: 21.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 218.1s, mem: 24.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --load-in-4bit --use-flash-attn" # test pass✅, time: 21.7s, mem: 26.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 25.3s, mem: 29.4GB, 8/8 tests passed. \ No newline at end of file diff --git a/vision.sample.env b/vision.sample.env index e9962a7..0590c15 100644 --- a/vision.sample.env +++ b/vision.sample.env @@ -3,51 +3,55 @@ HF_HOME=hf_home #CUDA_VISIBLE_DEVICES=1,0 #CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 4.1s, mem: 4.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 13.4s, mem: 52.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.6s, mem: 22.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.9s, mem: 22.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 5.7s, mem: 17.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.6s, mem: 8.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 6.2s, mem: 21.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 8.1s, mem: 21.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 14.4s, mem: 36.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 15.3s, mem: 37.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 4.9s, mem: 19.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Emu2-Chat --max-memory=0:78GiB,1:20GiB" # test pass✅, time: 20.5s, mem: 78.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Bunny-Llama-3-8B-V" # test pass✅, time: 6.8s, mem: 19.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qresearch/llama-3-vision-alpha-hf --device cuda:0" # test pass✅, time: 6.8s, mem: 19.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-siglip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 4.9s, mem: 18.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-clip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.4s, mem: 17.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-Fuyu --device-map cuda:0" # test pass✅, time: 6.5s, mem: 20.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 13.2s, mem: 25.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 18.4s, mem: 26.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 17.3s, mem: 19.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 15.5s, mem: 20.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.1s, mem: 11.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.4s, mem: 7.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail❌, time: 2.0s, mem: 15.9GB, 0/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.1s, mem: 14.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.6s, mem: 27.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --use-flash-attn" # test pass✅, time: 12.9s, mem: 19.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --use-flash-attn" # test pass✅, time: 8.9s, mem: 19.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --use-flash-attn" # test pass✅, time: 9.5s, mem: 32.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --use-flash-attn" # test pass✅, time: 46.2s, mem: 72.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn" # test pass✅, time: 4.4s, mem: 8.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 26.9s, mem: 32.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail❌, time: 17.2s, mem: 18.2GB, 2/8 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 9.9s, mem: 12.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.0s, mem: 12.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 8.5s, mem: 7.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --load-in-4bit --device-map cuda:0" # test pass✅, time: 10.5s, mem: 7.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 19.5s, mem: 12.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 20.9s, mem: 12.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn" # test pass✅, time: 11.4s, mem: 9.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn" # test pass✅, time: 12.3s, mem: 10.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail❌, time: 2.4s, mem: 5.9GB, 0/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 9.7s, mem: 5.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 10.1s, mem: 9.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 19.0s, mem: 9.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 13.3s, mem: 9.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 12.5s, mem: 14.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 52.8s, mem: 26.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn --load-in-4bit" # test pass✅, time: 7.6s, mem: 5.7GB, 8/8 tests passed. \ No newline at end of file +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 13.5s, mem: 52.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.1s, mem: 22.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.7s, mem: 22.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 5.7s, mem: 17.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.8s, mem: 8.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 6.1s, mem: 21.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 7.8s, mem: 21.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B" # test pass✅, time: 22.2s, mem: 40.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B" # test pass✅, time: 65.4s, mem: 40.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 14.1s, mem: 36.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 14.8s, mem: 37.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 4.7s, mem: 19.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Emu2-Chat --max-memory=0:78GiB,1:20GiB" # test pass✅, time: 22.0s, mem: 78.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Bunny-Llama-3-8B-V" # test pass✅, time: 6.6s, mem: 19.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qresearch/llama-3-vision-alpha-hf --device cuda:0" # test pass✅, time: 6.8s, mem: 19.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-siglip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.0s, mem: 18.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-clip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.6s, mem: 17.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-Fuyu --device-map cuda:0" # test pass✅, time: 6.3s, mem: 20.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 13.0s, mem: 25.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 18.4s, mem: 25.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.2s, mem: 19.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 17.2s, mem: 20.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.6s, mem: 7.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.7s, mem: 11.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.8s, mem: 7.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail❌, time: 1.8s, mem: 15.8GB, 0/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.8s, mem: 14.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.4s, mem: 26.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --use-flash-attn" # test pass✅, time: 12.8s, mem: 19.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --use-flash-attn" # test pass✅, time: 8.8s, mem: 19.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --use-flash-attn" # test pass✅, time: 9.5s, mem: 32.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --use-flash-attn" # test pass✅, time: 46.2s, mem: 72.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn" # test pass✅, time: 4.3s, mem: 8.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 27.5s, mem: 32.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail❌, time: 17.3s, mem: 18.5GB, 2/8 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 9.6s, mem: 13.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.1s, mem: 13.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 8.7s, mem: 8.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-70B --use-flash-attn --load-in-4bit" # test fail❌, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit). +#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --load-in-4bit --device-map cuda:0" # test pass✅, time: 10.7s, mem: 7.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 20.1s, mem: 12.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 21.1s, mem: 12.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn" # test pass✅, time: 10.9s, mem: 9.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn" # test pass✅, time: 10.2s, mem: 11.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail❌, time: 2.2s, mem: 6.2GB, 0/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 9.3s, mem: 6.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 10.8s, mem: 9.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 20.0s, mem: 9.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 13.7s, mem: 10.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 12.2s, mem: 15.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 53.3s, mem: 26.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn --load-in-4bit" # test pass✅, time: 7.0s, mem: 6.0GB, 8/8 tests passed. \ No newline at end of file diff --git a/vision_qna.py b/vision_qna.py index 3cc801e..267bdd9 100644 --- a/vision_qna.py +++ b/vision_qna.py @@ -536,8 +536,11 @@ def guess_backend(model_name: str) -> str: if 'yi-vl' in model_id: return 'yi-vl' - - if 'thudm/cog' in model_id: + + if 'cogvlm2' in model_id: + return 'cogvlm2' + + if 'cogagent-' in model_id or 'cogvlm-' in model_id: return 'cogvlm' if 'fuyu' in model_id: