0.13.0 updated tests

matatonic · May 2, 2024 · 1338426 · 1338426
1 parent 24b672b
commit 1338426
Show file tree

Hide file tree

Showing 3 changed files with 59 additions and 55 deletions.
diff --git a/README.md b/README.md
@@ -39,7 +39,7 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview`
 - [X] [qresearch](https://huggingface.co/qresearch/)
 - - [X] [llama-3-vision-alpha-hf](https://huggingface.co/qresearch/llama-3-vision-alpha-hf) (main docker only, wont gpu split)
 - [X] [BAAI](https://huggingface.co/BAAI/)
-- - [X] [Bunny-Llama-3-8B-V](https://huggingface.co/BAAI/Bunny-Llama-3-8B-V)
+- - [X] [Bunny-Llama-3-8B-V](https://huggingface.co/BAAI/Bunny-Llama-3-8B-V) (main docker only)
 - [X] [fuyu-8b](https://huggingface.co/adept/fuyu-8b) [pretrain]
 - [X] [Monkey-Chat](https://huggingface.co/echo840/Monkey-Chat)
 - [X] [Monkey](https://huggingface.co/echo840/Monkey)

diff --git a/prepare_minigemini.sh b/prepare_minigemini.sh
@@ -6,21 +6,24 @@ if [ -z "$(which huggingface-cli)" ]; then
 	exit 1
 fi
 
-echo "Edit this script and uncomment which models to download"
+ALL_MODELS="2B 7B 7B-HD 13B 13B-HD 34B 34B-HD 8x7B 8x7B-HD"
+MODELS=${*:-}
 
-huggingface-cli download OpenAI/clip-vit-large-patch14-336 --local-dir model_zoo/OpenAI/clip-vit-large-patch14-336
-huggingface-cli download laion/CLIP-convnext_large_d_320.laion2B-s29B-b131K-ft-soup --local-dir model_zoo/OpenAI/openclip-convnext-large-d-320-laion2B-s29B-b131K-ft-soup
+if [ "$MODELS" = "all" ]; then
+	MODELS=$ALL_MODELS
+elif [ -z "$MODELS" ]; then
+	echo "Specify which sizes of models to download for Mini-Gemini (aka. MGM), or 'all' for all."
+	echo "Chose from: $ALL_MODELS"
+	echo "Example: $0 2B 8x7B-HD"
+	echo "Example: $0 all"
+	exit 1
+fi
 
-# Select the model(s) of your choice and download them before starting the server
-huggingface-cli download YanweiLi/MGM-2B --local-dir YanweiLi/MGM-2B # main image
-huggingface-cli download YanweiLi/MGM-7B --local-dir YanweiLi/MGM-7B # alt image
-huggingface-cli download YanweiLi/MGM-7B-HD --local-dir YanweiLi/MGM-7B-HD # alt image
-huggingface-cli download YanweiLi/MGM-13B --local-dir YanweiLi/MGM-13B # alt image
-huggingface-cli download YanweiLi/MGM-13B-HD --local-dir YanweiLi/MGM-13B-HD # alt image
-huggingface-cli download YanweiLi/MGM-34B --local-dir YanweiLi/MGM-34B # alt image
-huggingface-cli download YanweiLi/MGM-34B-HD --local-dir YanweiLi/MGM-34B-HD # alt image
-huggingface-cli download YanweiLi/MGM-8x7B --local-dir YanweiLi/MGM-8x7B # alt image
-huggingface-cli download YanweiLi/MGM-8x7B-HD --local-dir YanweiLi/MGM-8x7B-HD # alt image
+# Required
+echo "Downloading required vit/clip models..."
+huggingface-cli download OpenAI/clip-vit-large-patch14-336 --local-dir model_zoo/OpenAI/clip-vit-large-patch14-336  || exit
+huggingface-cli download laion/CLIP-convnext_large_d_320.laion2B-s29B-b131K-ft-soup --local-dir model_zoo/OpenAI/openclip-convnext-large-d-320-laion2B-s29B-b131K-ft-soup  || exit
 
-#huggingface-cli download 01-ai/Yi-VL-6B --local-dir Yi/VL/01-ai/Yi-VL-6B
-#huggingface-cli download 01-ai/Yi-VL-34B --local-dir Yi/VL/01-ai/Yi-VL-34B
+for M in $MODELS; do 
+	huggingface-cli download YanweiLi/MGM-$M --local-dir YanweiLi/MGM-$M || exit
+done
diff --git a/vision-alt.sample.env b/vision-alt.sample.env
@@ -2,45 +2,46 @@
 # Copy this file to vision.env and uncomment the model of your choice.
 HF_HOME=hf_home
 #CUDA_VISIBLE_DEVICES=1,0
-#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn"  # test pass✅, time: 4.6s, mem: 4.6GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m vikhyatk/moondream1"  # test pass✅, time: 4.3s, mem: 4.9GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn"  # test pass✅, time: 4.3s, mem: 4.6GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m vikhyatk/moondream1"  # test pass✅, time: 4.2s, mem: 4.9GB, 8/8 tests passed.
 #CLI_COMMAND="python vision.py -m echo840/Monkey"  # test pass✅, time: 6.2s, mem: 21.8GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat"  # test pass✅, time: 7.8s, mem: 21.8GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat"  # test pass✅, time: 8.0s, mem: 21.8GB, 8/8 tests passed.
 #CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0"  # test pass✅, time: 13.5s, mem: 52.0GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf"  # test pass✅, time: 14.4s, mem: 36.2GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf"  # test pass✅, time: 15.1s, mem: 37.0GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat"  # test pass✅, time: 5.0s, mem: 19.4GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf"  # test pass✅, time: 14.0s, mem: 36.2GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf"  # test pass✅, time: 15.0s, mem: 37.1GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat"  # test pass✅, time: 5.0s, mem: 19.5GB, 8/8 tests passed.
 #CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn"  # test fail❌, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit).
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --use-flash-attn"  # test pass✅, time: 5.8s, mem: 15.5GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --use-flash-attn"  # test pass✅, time: 16.0s, mem: 18.7GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --use-flash-attn"  # test pass✅, time: 20.0s, mem: 27.5GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --use-flash-attn"  # test pass✅, time: 18.4s, mem: 31.6GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --use-flash-attn"  # test pass✅, time: 11.3s, mem: 67.1GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --use-flash-attn"  # test pass✅, time: 96.4s, mem: 70.5GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --use-flash-attn"  # test pass✅, time: 16.0s, mem: 91.4GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --use-flash-attn"  # test pass✅, time: 18.6s, mem: 96.1GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0"  # test pass✅, time: 14.9s, mem: 25.0GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0"  # test pass✅, time: 16.8s, mem: 19.1GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0"  # test pass✅, time: 15.8s, mem: 20.2GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0"  # test pass✅, time: 7.0s, mem: 11.5GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0"  # test pass✅, time: 6.2s, mem: 7.7GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0"  # test fail❌, time: 2.1s, mem: 15.7GB, 0/8 tests passed.
-#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0"  # test pass✅, time: 6.0s, mem: 14.5GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0"  # test pass✅, time: 7.1s, mem: 26.8GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0"  # test pass✅, time: 27.9s, mem: 31.1GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0"  # test fail❌, time: 17.6s, mem: 18.0GB, 2/8 tests passed.
-#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit"  # test pass✅, time: 20.5s, mem: 12.0GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit"  # test pass✅, time: 21.2s, mem: 11.9GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn --device cuda:0"  # test pass✅, time: 9.6s, mem: 9.2GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn --device cuda:0"  # test pass✅, time: 12.4s, mem: 10.7GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn"  # test fail❌, time: 2.5s, mem: 5.8GB, 0/8 tests passed.
-#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn"  # test pass✅, time: 10.0s, mem: 5.4GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn"  # test pass✅, time: 10.9s, mem: 8.9GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --load-in-4bit --use-flash-attn"  # test pass✅, time: 9.3s, mem: 6.5GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --load-in-4bit --use-flash-attn"  # test pass✅, time: 37.7s, mem: 9.7GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --load-in-4bit --use-flash-attn"  # test pass✅, time: 37.2s, mem: 9.9GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --load-in-4bit --use-flash-attn"  # test pass✅, time: 22.0s, mem: 13.9GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --load-in-4bit --use-flash-attn"  # test pass✅, time: 18.2s, mem: 21.4GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --load-in-4bit --use-flash-attn"  # test pass✅, time: 224.7s, mem: 24.2GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --load-in-4bit --use-flash-attn"  # test pass✅, time: 23.8s, mem: 26.3GB, 8/8 tests passed.
-#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --load-in-4bit --use-flash-attn"  # test pass✅, time: 25.1s, mem: 29.5GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --use-flash-attn"  # test pass✅, time: 5.7s, mem: 15.6GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --use-flash-attn"  # test pass✅, time: 15.1s, mem: 18.8GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --use-flash-attn"  # test pass✅, time: 21.7s, mem: 27.6GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --use-flash-attn"  # test pass✅, time: 16.4s, mem: 31.8GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --use-flash-attn"  # test pass✅, time: 11.2s, mem: 67.2GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --use-flash-attn"  # test pass✅, time: 100.7s, mem: 70.4GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --use-flash-attn"  # test pass✅, time: 15.1s, mem: 91.4GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --use-flash-attn"  # test pass✅, time: 18.8s, mem: 96.1GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0"  # test pass✅, time: 14.6s, mem: 25.0GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0"  # test pass✅, time: 18.4s, mem: 25.7GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0"  # test pass✅, time: 16.8s, mem: 19.0GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0"  # test pass✅, time: 14.4s, mem: 20.2GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0"  # test pass✅, time: 6.9s, mem: 11.5GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0"  # test pass✅, time: 6.1s, mem: 7.6GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0"  # test fail❌, time: 2.0s, mem: 15.7GB, 0/8 tests passed.
+#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0"  # test pass✅, time: 5.7s, mem: 14.5GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0"  # test pass✅, time: 6.8s, mem: 26.9GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0"  # test pass✅, time: 27.1s, mem: 31.3GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0"  # test fail❌, time: 17.2s, mem: 18.2GB, 2/8 tests passed.
+#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit"  # test pass✅, time: 20.1s, mem: 12.1GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit"  # test pass✅, time: 21.4s, mem: 12.1GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn --device cuda:0"  # test pass✅, time: 10.9s, mem: 9.4GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn --device cuda:0"  # test pass✅, time: 11.6s, mem: 10.9GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn"  # test fail❌, time: 2.5s, mem: 6.0GB, 0/8 tests passed.
+#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn"  # test pass✅, time: 9.4s, mem: 5.6GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn"  # test pass✅, time: 10.7s, mem: 9.0GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --load-in-4bit --use-flash-attn"  # test pass✅, time: 10.6s, mem: 6.6GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --load-in-4bit --use-flash-attn"  # test pass✅, time: 35.9s, mem: 9.8GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --load-in-4bit --use-flash-attn"  # test pass✅, time: 37.9s, mem: 10.0GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --load-in-4bit --use-flash-attn"  # test pass✅, time: 23.5s, mem: 14.0GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --load-in-4bit --use-flash-attn"  # test pass✅, time: 17.2s, mem: 21.5GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --load-in-4bit --use-flash-attn"  # test pass✅, time: 161.9s, mem: 24.2GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --load-in-4bit --use-flash-attn"  # test pass✅, time: 21.6s, mem: 26.3GB, 8/8 tests passed.
+#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --load-in-4bit --use-flash-attn"  # test pass✅, time: 25.5s, mem: 29.5GB, 8/8 tests passed.