diff --git a/Dockerfile b/Dockerfile index 02c7d9a..d70d47f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,18 +5,18 @@ RUN pip install --no-cache-dir --upgrade pip RUN mkdir -p /app RUN git clone https://github.com/01-ai/Yi --single-branch /app/Yi -RUN git clone https://github.com/dvlab-research/MiniGemini.git --single-branch /app/MiniGemini +RUN git clone https://github.com/dvlab-research/MGM.git --single-branch /app/MGM WORKDIR /app COPY requirements.txt . ARG VERSION=latest -# transformers==4.36.2 supports most models except Mini-Gemini-2B, llava-1.6, nanollava +# transformers==4.36.2 supports most models except MGM-2B, llava-1.6, nanollava RUN if [ "$VERSION" = "alt" ]; then echo "transformers==4.36.2" >> requirements.txt; else echo "transformers>=4.39.0" >> requirements.txt ; fi # TODO: nvidia apex wheel RUN pip install --no-cache-dir -U -r requirements.txt \ https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.7/flash_attn-2.5.7+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl -WORKDIR /app/MiniGemini +WORKDIR /app/MGM RUN pip install --no-cache-dir --no-deps -e . WORKDIR /app diff --git a/README.md b/README.md index 12448d7..ce20008 100644 --- a/README.md +++ b/README.md @@ -41,16 +41,16 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview` - - [X] [MiniCPM-V-2](https://huggingface.co/openbmb/MiniCPM-V-2) - - [X] [MiniCPM-V aka. OmniLMM-3B](https://huggingface.co/openbmb/MiniCPM-V) - - [ ] [OmniLMM-12B](https://huggingface.co/openbmb/OmniLMM-12B) -- [X] [MiniGemini](https://huggingface.co/collections/YanweiLi/) (more complex setup, see: `prepare_minigemini.sh`) -- - [X] [MiniGemini-2B](https://huggingface.co/YanweiLi/Mini-Gemini-2B) (main docker only) -- - [X] [MiniGemini-7B](https://huggingface.co/YanweiLi/Mini-Gemini-7B) (alternate docker only) -- - [X] [MiniGemini-13B](https://huggingface.co/YanweiLi/Mini-Gemini-13B) (alternate docker only) -- - [X] [MiniGemini-34B](https://huggingface.co/YanweiLi/Mini-Gemini-34B) (alternate docker only) -- - [X] [MiniGemini-8x7B](https://huggingface.co/YanweiLi/Mini-Gemini-8x7B) (alternate docker only) -- - [X] [MiniGemini-7B-HD](https://huggingface.co/YanweiLi/Mini-Gemini-7B-HD) (alternate docker only) -- - [X] [MiniGemini-13B-HD](https://huggingface.co/YanweiLi/Mini-Gemini-13B-HD) (alternate docker only) -- - [X] [MiniGemini-34B-HD](https://huggingface.co/YanweiLi/Mini-Gemini-34B-HD) (alternate docker only) -- - [X] [MiniGemini-8x7B-HD](https://huggingface.co/YanweiLi/Mini-Gemini-8x7B-HD) (alternate docker only) +- [X] [YanweiLi/MGM](https://huggingface.co/collections/YanweiLi/) (aka Mini-Gemini, more complex setup, see: `prepare_minigemini.sh`) +- - [X] [MGM-2B](https://huggingface.co/YanweiLi/MGM-2B) (main docker only) +- - [X] [MGM-7B](https://huggingface.co/YanweiLi/MGM-7B) (alternate docker only) +- - [X] [MGM-13B](https://huggingface.co/YanweiLi/MGM-13B) (alternate docker only) +- - [X] [MGM-34B](https://huggingface.co/YanweiLi/MGM-34B) (alternate docker only) +- - [X] [MGM-8x7B](https://huggingface.co/YanweiLi/MGM-8x7B) (alternate docker only) +- - [X] [MGM-7B-HD](https://huggingface.co/YanweiLi/MGM-7B-HD) (alternate docker only) +- - [X] [MGM-13B-HD](https://huggingface.co/YanweiLi/MGM-13B-HD) (alternate docker only) +- - [X] [MGM-34B-HD](https://huggingface.co/YanweiLi/MGM-34B-HD) (alternate docker only) +- - [X] [MGM-8x7B-HD](https://huggingface.co/YanweiLi/MGM-8x7B-HD) (alternate docker only) - [X] [qnguyen3/nanoLLaVA](https://huggingface.co/qnguyen3/nanoLLaVA) (main docker only, wont gpu split) - [ ] [Deepseek-VL-7b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-7b-chat) - [ ] [Deepseek-VL-1.3b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-1.3b-chat) @@ -64,6 +64,7 @@ See: [OpenVLM Leaderboard](https://huggingface.co/spaces/opencompass/open_vlm_le Version: 0.11.0 - new model support: OpenGVLab/InternVL-Chat-V1-5, up to 4k resolution, top opensource model +- MiniGemini renamed MGM upstream Version: 0.10.0 diff --git a/backend/minigemini.py b/backend/minigemini.py index f151280..955d203 100644 --- a/backend/minigemini.py +++ b/backend/minigemini.py @@ -1,29 +1,27 @@ import re from accelerate import infer_auto_device_map - - import transformers import warnings # disable some warnings transformers.logging.set_verbosity_error() warnings.filterwarnings('ignore') -from minigemini.constants import IMAGE_TOKEN_INDEX -from minigemini.model.builder import load_pretrained_model -from minigemini.mm_utils import process_images, tokenizer_image_token +from mgm.constants import IMAGE_TOKEN_INDEX +from mgm.model.builder import load_pretrained_model +from mgm.mm_utils import process_images, tokenizer_image_token from vision_qna import * -# YanweiLi/Mini-Gemini-2B -# YanweiLi/Mini-Gemini-7B -# YanweiLi/Mini-Gemini-7B-HD -# YanweiLi/Mini-Gemini-13B -# YanweiLi/Mini-Gemini-34B -# YanweiLi/Mini-Gemini-34B-HD -# YanweiLi/Mini-Gemini-13B-HDs -# YanweiLi/Mini-Gemini-8x7B-HD -# YanweiLi/Mini-Gemini-8x7B +# YanweiLi/MGM-2B +# YanweiLi/MGM-7B +# YanweiLi/MGM-7B-HD +# YanweiLi/MGM-13B +# YanweiLi/MGM-34B +# YanweiLi/MGM-34B-HD +# YanweiLi/MGM-13B-HDs +# YanweiLi/MGM-8x7B-HD +# YanweiLi/MGM-8x7B class VisionQnA(VisionQnABase): model_name: str = "minigemini" diff --git a/model_conf_tests.alt.json b/model_conf_tests.alt.json index e596af6..cc07bd3 100644 --- a/model_conf_tests.alt.json +++ b/model_conf_tests.alt.json @@ -7,15 +7,15 @@ ["THUDM/cogvlm-chat-hf"], ["THUDM/cogagent-chat-hf"], ["Qwen/Qwen-VL-Chat"], - ["YanweiLi/Mini-Gemini-2B", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-7B", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-7B-HD", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-13B", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-13B-HD", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-34B", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-34B-HD", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-8x7B", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-8x7B-HD", "--use-flash-attn"], + ["YanweiLi/MGM-2B", "--use-flash-attn"], + ["YanweiLi/MGM-7B", "--use-flash-attn"], + ["YanweiLi/MGM-7B-HD", "--use-flash-attn"], + ["YanweiLi/MGM-13B", "--use-flash-attn"], + ["YanweiLi/MGM-13B-HD", "--use-flash-attn"], + ["YanweiLi/MGM-34B", "--use-flash-attn"], + ["YanweiLi/MGM-34B-HD", "--use-flash-attn"], + ["YanweiLi/MGM-8x7B", "--use-flash-attn"], + ["YanweiLi/MGM-8x7B-HD", "--use-flash-attn"], ["adept/fuyu-8b", "--device-map", "cuda:0"], ["internlm/internlm-xcomposer2-7b", "--use-flash-attn", "--device-map", "cuda:0"], ["internlm/internlm-xcomposer2-vl-7b", "--use-flash-attn", "--device-map", "cuda:0"], @@ -25,7 +25,7 @@ ["llava-hf/llava-1.5-7b-hf", "--use-flash-attn", "--device-map", "cuda:0"], ["llava-hf/llava-1.5-13b-hf", "--use-flash-attn", "--device-map", "cuda:0"], - ["OpenGVLab/InternVL-Chat-V1-5", , "--load-in-4bit", "--device-map", "cuda:0"], + ["OpenGVLab/InternVL-Chat-V1-5", "--load-in-4bit", "--device-map", "cuda:0"], ["THUDM/cogvlm-chat-hf", "--load-in-4bit"], ["THUDM/cogagent-chat-hf", "--load-in-4bit"], ["internlm/internlm-xcomposer2-7b-4bit", "--use-flash-attn", "--device", "cuda:0"], @@ -34,12 +34,12 @@ ["llava-hf/llava-1.5-7b-hf", "--load-in-4bit", "--use-flash-attn"], ["llava-hf/llava-1.5-13b-hf", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-7B", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-7B-HD", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-13B", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-13B-HD", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-34B", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-34B-HD", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-8x7B", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-8x7B-HD", "--load-in-4bit", "--use-flash-attn"] + ["YanweiLi/MGM-7B", "--load-in-4bit", "--use-flash-attn"], + ["YanweiLi/MGM-7B-HD", "--load-in-4bit", "--use-flash-attn"], + ["YanweiLi/MGM-13B", "--load-in-4bit", "--use-flash-attn"], + ["YanweiLi/MGM-13B-HD", "--load-in-4bit", "--use-flash-attn"], + ["YanweiLi/MGM-34B", "--load-in-4bit", "--use-flash-attn"], + ["YanweiLi/MGM-34B-HD", "--load-in-4bit", "--use-flash-attn"], + ["YanweiLi/MGM-8x7B", "--load-in-4bit", "--use-flash-attn"], + ["YanweiLi/MGM-8x7B-HD", "--load-in-4bit", "--use-flash-attn"] ] diff --git a/model_conf_tests.json b/model_conf_tests.json index 5e40500..1636f36 100644 --- a/model_conf_tests.json +++ b/model_conf_tests.json @@ -22,7 +22,7 @@ ["llava-hf/llava-v1.6-34b-hf", "--use-flash-attn"], ["01-ai/Yi-VL-6B", "--use-flash-attn"], ["01-ai/Yi-VL-34B", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-2B", "--use-flash-attn"], + ["YanweiLi/MGM-2B", "--use-flash-attn"], ["OpenGVLab/InternVL-Chat-V1-5", "--load-in-4bit", "--device-map", "cuda:0"], ["qnguyen3/nanoLLaVA", "--use-flash-attn", "--load-in-4bit"], @@ -39,5 +39,5 @@ ["llava-hf/llava-v1.6-34b-hf", "--load-in-4bit", "--use-flash-attn"], ["01-ai/Yi-VL-6B", "--load-in-4bit", "--use-flash-attn"], ["01-ai/Yi-VL-34B", "--load-in-4bit", "--use-flash-attn"], - ["YanweiLi/Mini-Gemini-2B", "--use-flash-attn", "--load-in-4bit"] + ["YanweiLi/MGM-2B", "--use-flash-attn", "--load-in-4bit"] ] diff --git a/prepare_minigemini.sh b/prepare_minigemini.sh index 0a8fde5..6ddd596 100755 --- a/prepare_minigemini.sh +++ b/prepare_minigemini.sh @@ -12,15 +12,15 @@ huggingface-cli download OpenAI/clip-vit-large-patch14-336 --local-dir model_zoo huggingface-cli download laion/CLIP-convnext_large_d_320.laion2B-s29B-b131K-ft-soup --local-dir model_zoo/OpenAI/openclip-convnext-large-d-320-laion2B-s29B-b131K-ft-soup # Select the model(s) of your choice and download them before starting the server -#huggingface-cli download YanweiLi/Mini-Gemini-2B --local-dir YanweiLi/Mini-Gemini-2B # main image -#huggingface-cli download YanweiLi/Mini-Gemini-7B --local-dir YanweiLi/Mini-Gemini-7B # alt image -#huggingface-cli download YanweiLi/Mini-Gemini-7B-HD --local-dir YanweiLi/Mini-Gemini-7B-HD # alt image -#huggingface-cli download YanweiLi/Mini-Gemini-13B --local-dir YanweiLi/Mini-Gemini-13B # alt image -#huggingface-cli download YanweiLi/Mini-Gemini-13B-HD --local-dir YanweiLi/Mini-Gemini-13B-HD # alt image -#huggingface-cli download YanweiLi/Mini-Gemini-34B --local-dir YanweiLi/Mini-Gemini-34B # alt image -#huggingface-cli download YanweiLi/Mini-Gemini-34B-HD --local-dir YanweiLi/Mini-Gemini-34B-HD # alt image -#huggingface-cli download YanweiLi/Mini-Gemini-8x7B --local-dir YanweiLi/Mini-Gemini-8x7B # alt image -#huggingface-cli download YanweiLi/Mini-Gemini-8x7B-HD --local-dir YanweiLi/Mini-Gemini-8x7B-HD # alt image +huggingface-cli download YanweiLi/MGM-2B --local-dir YanweiLi/MGM-2B # main image +huggingface-cli download YanweiLi/MGM-7B --local-dir YanweiLi/MGM-7B # alt image +huggingface-cli download YanweiLi/MGM-7B-HD --local-dir YanweiLi/MGM-7B-HD # alt image +huggingface-cli download YanweiLi/MGM-13B --local-dir YanweiLi/MGM-13B # alt image +huggingface-cli download YanweiLi/MGM-13B-HD --local-dir YanweiLi/MGM-13B-HD # alt image +huggingface-cli download YanweiLi/MGM-34B --local-dir YanweiLi/MGM-34B # alt image +huggingface-cli download YanweiLi/MGM-34B-HD --local-dir YanweiLi/MGM-34B-HD # alt image +huggingface-cli download YanweiLi/MGM-8x7B --local-dir YanweiLi/MGM-8x7B # alt image +huggingface-cli download YanweiLi/MGM-8x7B-HD --local-dir YanweiLi/MGM-8x7B-HD # alt image #huggingface-cli download 01-ai/Yi-VL-6B --local-dir Yi/VL/01-ai/Yi-VL-6B #huggingface-cli download 01-ai/Yi-VL-34B --local-dir Yi/VL/01-ai/Yi-VL-34B diff --git a/requirements.txt b/requirements.txt index 4ca69b1..a0d8feb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ uvicorn xformers # moondream -deepspeed +deepspeed==0.11.1 einops einops-exts httpx diff --git a/vision-alt.sample.env b/vision-alt.sample.env index 065da26..08dc6a5 100644 --- a/vision-alt.sample.env +++ b/vision-alt.sample.env @@ -2,42 +2,44 @@ # Copy this file to vision.env and uncomment the model of your choice. HF_HOME=hf_home #CUDA_VISIBLE_DEVICES=1,0 -#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass, time: 4.6s, mem: 4.6GB, All tests passed. -#CLI_COMMAND="python vision.py -m vikhyatk/moondream1" # test pass, time: 4.0s, mem: 4.9GB, All tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass, time: 6.1s, mem: 21.8GB, All tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass, time: 7.6s, mem: 21.8GB, All tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass, time: 13.4s, mem: 36.3GB, All tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass, time: 14.7s, mem: 37.2GB, All tests passed. -#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass, time: 4.9s, mem: 19.5GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-2B --use-flash-attn" # test fail, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit). -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-7B --use-flash-attn" # test pass, time: 5.4s, mem: 15.6GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-7B-HD --use-flash-attn" # test pass, time: 15.8s, mem: 18.8GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-13B --use-flash-attn" # test pass, time: 21.3s, mem: 27.6GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-13B-HD --use-flash-attn" # test pass, time: 15.9s, mem: 31.7GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-34B --use-flash-attn" # test pass, time: 11.1s, mem: 67.2GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-34B-HD --use-flash-attn" # test pass, time: 145.1s, mem: 70.3GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-8x7B --use-flash-attn" # test pass, time: 14.3s, mem: 91.3GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-8x7B-HD --use-flash-attn" # test pass, time: 18.5s, mem: 96.1GB, All tests passed. -#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass, time: 13.4s, mem: 25.0GB, All tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass, time: 18.2s, mem: 19.0GB, All tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass, time: 16.7s, mem: 20.2GB, All tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass, time: 6.6s, mem: 11.4GB, All tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass, time: 5.7s, mem: 7.6GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail, time: 2.0s, mem: 15.6GB, -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass, time: 5.4s, mem: 14.5GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass, time: 6.6s, mem: 26.9GB, All tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn --device cuda:0" # test pass, time: 10.5s, mem: 9.5GB, All tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn --device cuda:0" # test pass, time: 11.6s, mem: 10.9GB, All tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass, time: 19.5s, mem: 12.2GB, All tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass, time: 20.4s, mem: 12.2GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail, time: 2.5s, mem: 6.0GB, -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass, time: 9.2s, mem: 5.6GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass, time: 10.0s, mem: 9.0GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-7B --load-in-4bit --use-flash-attn" # test pass, time: 9.4s, mem: 6.7GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-7B-HD --load-in-4bit --use-flash-attn" # test pass, time: 33.9s, mem: 9.9GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-13B --load-in-4bit --use-flash-attn" # test pass, time: 34.3s, mem: 10.0GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-13B-HD --load-in-4bit --use-flash-attn" # test pass, time: 20.6s, mem: 14.0GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-34B --load-in-4bit --use-flash-attn" # test pass, time: 16.8s, mem: 21.5GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-34B-HD --load-in-4bit --use-flash-attn" # test pass, time: 215.3s, mem: 24.2GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-8x7B --load-in-4bit --use-flash-attn" # test pass, time: 22.2s, mem: 26.3GB, All tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-8x7B-HD --load-in-4bit --use-flash-attn" # test pass, time: 24.7s, mem: 29.5GB, All tests passed. \ No newline at end of file +#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass, time: 4.8s, mem: 4.6GB, All tests passed. +#CLI_COMMAND="python vision.py -m vikhyatk/moondream1" # test pass, time: 4.1s, mem: 4.9GB, All tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass, time: 6.1s, mem: 21.9GB, All tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass, time: 7.7s, mem: 21.8GB, All tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass, time: 13.5s, mem: 52.2GB, All tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass, time: 13.8s, mem: 36.5GB, All tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass, time: 15.4s, mem: 37.4GB, All tests passed. +#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass, time: 4.7s, mem: 19.7GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn" # test fail, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit). +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --use-flash-attn" # test pass, time: 5.6s, mem: 15.8GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --use-flash-attn" # test pass, time: 14.8s, mem: 19.1GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --use-flash-attn" # test pass, time: 22.1s, mem: 27.9GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --use-flash-attn" # test pass, time: 17.1s, mem: 31.9GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --use-flash-attn" # test pass, time: 11.2s, mem: 67.5GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --use-flash-attn" # test pass, time: 77.9s, mem: 70.6GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --use-flash-attn" # test pass, time: 14.9s, mem: 91.7GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --use-flash-attn" # test pass, time: 20.0s, mem: 96.4GB, All tests passed. +#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass, time: 15.9s, mem: 25.1GB, All tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass, time: 15.9s, mem: 19.2GB, All tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass, time: 17.6s, mem: 20.3GB, All tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass, time: 7.1s, mem: 11.6GB, All tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass, time: 6.6s, mem: 7.7GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail, time: 2.1s, mem: 15.9GB, +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass, time: 6.2s, mem: 14.6GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass, time: 7.1s, mem: 27.0GB, All tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail, time: 18.3s, mem: 18.3GB, +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass, time: 21.5s, mem: 12.2GB, All tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass, time: 20.8s, mem: 12.2GB, All tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn --device cuda:0" # test pass, time: 10.4s, mem: 9.7GB, All tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn --device cuda:0" # test pass, time: 12.9s, mem: 11.1GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail, time: 2.6s, mem: 6.1GB, +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass, time: 10.0s, mem: 5.5GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass, time: 11.3s, mem: 8.9GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --load-in-4bit --use-flash-attn" # test pass, time: 12.5s, mem: 6.6GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --load-in-4bit --use-flash-attn" # test pass, time: 39.7s, mem: 9.8GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --load-in-4bit --use-flash-attn" # test pass, time: 41.2s, mem: 9.9GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --load-in-4bit --use-flash-attn" # test pass, time: 17.6s, mem: 13.9GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --load-in-4bit --use-flash-attn" # test pass, time: 18.2s, mem: 21.4GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --load-in-4bit --use-flash-attn" # test pass, time: 222.7s, mem: 24.4GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --load-in-4bit --use-flash-attn" # test pass, time: 22.6s, mem: 26.5GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --load-in-4bit --use-flash-attn" # test pass, time: 26.1s, mem: 29.7GB, All tests passed. \ No newline at end of file diff --git a/vision.sample.env b/vision.sample.env index c60024a..9971557 100644 --- a/vision.sample.env +++ b/vision.sample.env @@ -2,43 +2,43 @@ # Copy this file to vision.env and uncomment the model of your choice. HF_HOME=hf_home #CUDA_VISIBLE_DEVICES=1,0 -#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass, time: 4.4s, mem: 4.6GB, All tests passed. -#CLI_COMMAND="python vision.py -m vikhyatk/moondream1" # test fail, time: 3.6s, mem: 4.9GB, Test failed with Exception: Internal Server Error -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass, time: 13.4s, mem: 52.0GB, All tests passed. -#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn" # test pass, time: 7.4s, mem: 8.5GB, All tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass, time: 6.2s, mem: 21.8GB, All tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass, time: 7.8s, mem: 21.7GB, All tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass, time: 14.1s, mem: 36.2GB, All tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass, time: 14.7s, mem: 37.2GB, All tests passed. -#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass, time: 4.8s, mem: 19.5GB, All tests passed. -#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass, time: 13.4s, mem: 25.0GB, All tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass, time: 18.3s, mem: 19.0GB, All tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass, time: 14.9s, mem: 20.2GB, All tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass, time: 6.7s, mem: 11.5GB, All tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass, time: 5.8s, mem: 7.6GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail, time: 1.9s, mem: 15.7GB, -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass, time: 6.0s, mem: 14.5GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass, time: 6.9s, mem: 26.9GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --use-flash-attn" # test pass, time: 12.6s, mem: 19.2GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --use-flash-attn" # test pass, time: 8.5s, mem: 18.9GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --use-flash-attn" # test pass, time: 9.5s, mem: 32.6GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --use-flash-attn" # test pass, time: 46.0s, mem: 72.4GB, All tests passed. +#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass, time: 7.3s, mem: 4.6GB, All tests passed. +#CLI_COMMAND="python vision.py -m vikhyatk/moondream1" # test fail, time: 4.2s, mem: 4.9GB, Test failed with Exception: Internal Server Error +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass, time: 18.7s, mem: 52.0GB, All tests passed. +#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn" # test pass, time: 11.4s, mem: 8.4GB, All tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass, time: 10.0s, mem: 21.8GB, All tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass, time: 13.2s, mem: 21.7GB, All tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass, time: 17.0s, mem: 36.1GB, All tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass, time: 19.9s, mem: 36.9GB, All tests passed. +#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass, time: 8.2s, mem: 19.3GB, All tests passed. +#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass, time: 18.9s, mem: 24.8GB, All tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass, time: 23.7s, mem: 18.8GB, All tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass, time: 20.7s, mem: 20.0GB, All tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass, time: 9.7s, mem: 11.3GB, All tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass, time: 12.7s, mem: 7.4GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail, time: 6.8s, mem: 15.5GB, +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass, time: 8.5s, mem: 14.3GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass, time: 12.6s, mem: 26.6GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --use-flash-attn" # test pass, time: 16.1s, mem: 19.0GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --use-flash-attn" # test pass, time: 13.3s, mem: 18.7GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --use-flash-attn" # test pass, time: 12.4s, mem: 32.3GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --use-flash-attn" # test pass, time: 52.0s, mem: 72.3GB, All tests passed. #CLI_COMMAND="python vision.py -m 01-ai/Yi-VL-6B --use-flash-attn" # test fail, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit). #CLI_COMMAND="python vision.py -m 01-ai/Yi-VL-34B --use-flash-attn" # test fail, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit). -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-2B --use-flash-attn" # test pass, time: 4.2s, mem: 8.3GB, All tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail, time: 17.2s, mem: 18.2GB, -#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --load-in-4bit" # test pass, time: 11.3s, mem: 8.0GB, All tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass, time: 19.5s, mem: 12.1GB, All tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass, time: 19.8s, mem: 12.2GB, All tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn" # test pass, time: 10.1s, mem: 9.4GB, All tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn" # test pass, time: 12.9s, mem: 10.9GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail, time: 2.4s, mem: 5.9GB, -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass, time: 8.8s, mem: 5.7GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass, time: 10.2s, mem: 9.1GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --load-in-4bit --use-flash-attn" # test pass, time: 18.3s, mem: 9.3GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --load-in-4bit --use-flash-attn" # test pass, time: 12.7s, mem: 9.9GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --load-in-4bit --use-flash-attn" # test pass, time: 12.0s, mem: 14.8GB, All tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --load-in-4bit --use-flash-attn" # test pass, time: 51.5s, mem: 26.2GB, All tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn" # test pass, time: 7.9s, mem: 8.2GB, All tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail, time: 20.1s, mem: 18.0GB, +#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --load-in-4bit" # test pass, time: 14.5s, mem: 7.8GB, All tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass, time: 21.3s, mem: 12.0GB, All tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass, time: 24.7s, mem: 12.0GB, All tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn" # test pass, time: 13.2s, mem: 9.2GB, All tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn" # test pass, time: 19.2s, mem: 10.7GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail, time: 5.0s, mem: 5.7GB, +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass, time: 11.0s, mem: 5.5GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass, time: 15.8s, mem: 8.9GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --load-in-4bit --use-flash-attn" # test pass, time: 26.6s, mem: 8.9GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --load-in-4bit --use-flash-attn" # test pass, time: 15.5s, mem: 9.6GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --load-in-4bit --use-flash-attn" # test pass, time: 13.3s, mem: 14.5GB, All tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --load-in-4bit --use-flash-attn" # test pass, time: 54.8s, mem: 26.0GB, All tests passed. #CLI_COMMAND="python vision.py -m 01-ai/Yi-VL-6B --load-in-4bit --use-flash-attn" # test fail, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit). #CLI_COMMAND="python vision.py -m 01-ai/Yi-VL-34B --load-in-4bit --use-flash-attn" # test fail, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit). -#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-2B --use-flash-attn --load-in-4bit" # test pass, time: 6.9s, mem: 5.7GB, All tests passed. \ No newline at end of file +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn --load-in-4bit" # test pass, time: 7.4s, mem: 5.6GB, All tests passed. \ No newline at end of file diff --git a/vision_qna.py b/vision_qna.py index fcb77d6..c74e534 100644 --- a/vision_qna.py +++ b/vision_qna.py @@ -436,7 +436,7 @@ def guess_backend(model_name: str) -> str: if 'monkey' in model_id: return 'monkey' - if 'mini-gemini' in model_id: + if 'mgm-' in model_id or 'minigemini' in model_id or 'mini-gemini' in model_id: return 'minigemini' if 'deepseek' in model_id: @@ -453,9 +453,6 @@ def guess_backend(model_name: str) -> str: if 'xcomposer2' in model_id: return 'xcomposer2' - - if 'mini-gemini' in model_id: - return 'minigemini' if 'yi-vl' in model_id: return 'yi-vl'