From 3964a6d95de6c030ebc29fe8f36a62c397fc1dc0 Mon Sep 17 00:00:00 2001 From: matatonic Date: Wed, 22 May 2024 13:12:32 -0400 Subject: [PATCH] 0.16.1 +Sure, --- README.md | 14 +++-- backend/idefics2.py | 2 + backend/xcomposer2-vl.py | 1 + chat_with_image.py | 3 ++ vision-alt.sample.env | 96 ++++++++++++++++----------------- vision.sample.env | 112 +++++++++++++++++++-------------------- vision_qna.py | 58 +++++++++++++++++--- 7 files changed, 170 insertions(+), 116 deletions(-) diff --git a/README.md b/README.md index 99eb8c8..57c0d57 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,10 @@ See: [OpenVLM Leaderboard](https://huggingface.co/spaces/opencompass/open_vlm_le ## Recent updates +Version 0.16.1 + +- Add "start with" parameter to pre-fill assistant response & backend support (doesn't work with all models) - aka 'Sure,' support. + Version 0.16.0 - new model support: microsoft/Phi-3-vision-128k-instruct @@ -213,23 +217,25 @@ options: Usage ``` -usage: chat_with_image.py [-h] [-s SYSTEM_PROMPT] [-m MAX_TOKENS] [-t TEMPERATURE] [-p TOP_P] [-u] [-1] image_url [questions ...] +usage: chat_with_image.py [-h] [-s SYSTEM_PROMPT] [-S START_WITH] [-m MAX_TOKENS] [-t TEMPERATURE] [-p TOP_P] [-u] [-1] image_url [questions ...] Test vision using OpenAI positional arguments: image_url URL or image file to be tested - questions The question to ask the image + questions The question to ask the image (default: None) options: -h, --help show this help message and exit -s SYSTEM_PROMPT, --system-prompt SYSTEM_PROMPT + -S START_WITH, --start-with START_WITH + Start reply with, ex. 'Sure, ' (doesn't work with all models) (default: None) -m MAX_TOKENS, --max-tokens MAX_TOKENS -t TEMPERATURE, --temperature TEMPERATURE -p TOP_P, --top_p TOP_P -u, --keep-remote-urls - Normally, http urls are converted to data: urls for better latency. - -1, --single Single turn Q&A, output is only the model response. + Normally, http urls are converted to data: urls for better latency. (default: False) + -1, --single Single turn Q&A, output is only the model response. (default: False) ``` Example: diff --git a/backend/idefics2.py b/backend/idefics2.py index c547750..57f2f83 100644 --- a/backend/idefics2.py +++ b/backend/idefics2.py @@ -5,6 +5,8 @@ # "HuggingFaceM4/idefics2-8b" # "HuggingFaceM4/idefics2-8b-AWQ" +# "HuggingFaceM4/idefics2-8b-chatty +# "HuggingFaceM4/idefics2-8b-chatty-AWQ class VisionQnA(VisionQnABase): model_name: str = "idefics2" diff --git a/backend/xcomposer2-vl.py b/backend/xcomposer2-vl.py index ed113b2..d26d653 100644 --- a/backend/xcomposer2-vl.py +++ b/backend/xcomposer2-vl.py @@ -12,6 +12,7 @@ # internlm/internlm-xcomposer2-vl-7b # ~21GB # internlm/internlm-xcomposer2-vl-7b-4bit # ~12GB +# internlm/internlm-xcomposer2-vl-1_8b # ~8GB class InternLMXComposer2QForCausalLM(auto_gptq.modeling.BaseGPTQForCausalLM): layers_block_name = "model.layers" diff --git a/chat_with_image.py b/chat_with_image.py index 9bc613a..9210212 100755 --- a/chat_with_image.py +++ b/chat_with_image.py @@ -29,6 +29,7 @@ def url_for_api(img_url: str = None, filename: str = None, always_data=False) -> parser = argparse.ArgumentParser(description='Test vision using OpenAI', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-s', '--system-prompt', type=str, default=None) + parser.add_argument('-S', '--start-with', type=str, default=None, help="Start reply with, ex. 'Sure, ' (doesn't work with all models)") parser.add_argument('-m', '--max-tokens', type=int, default=None) parser.add_argument('-t', '--temperature', type=float, default=None) parser.add_argument('-p', '--top_p', type=float, default=None) @@ -61,6 +62,8 @@ def url_for_api(img_url: str = None, filename: str = None, always_data=False) -> messages.extend([{ "role": "user", "content": content }]) while True: + if args.start_with: + messages.extend([{ "role": "assistant", "content": [{ "type": "text", "text": args.start_with }] }]) response = client.chat.completions.create(model="gpt-4-vision-preview", messages=messages, **params) if args.single: diff --git a/vision-alt.sample.env b/vision-alt.sample.env index 9c47cc5..ac75d83 100644 --- a/vision-alt.sample.env +++ b/vision-alt.sample.env @@ -2,52 +2,52 @@ # Copy this file to vision.env and uncomment the model of your choice. HF_HOME=hf_home #CUDA_VISIBLE_DEVICES=1,0 -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 13.9s, mem: 52.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail❌, time: 16.6s, mem: 18.2GB, 2/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 26.5s, mem: 31.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5" # test pass✅, time: 3.8s, mem: 7.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 4.8s, mem: 19.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 20.5s, mem: 12.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 14.9s, mem: 37.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 19.9s, mem: 12.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 13.6s, mem: 36.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B" # test pass✅, time: 22.3s, mem: 40.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B" # test pass✅, time: 64.6s, mem: 40.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --load-in-4bit --use-flash-attn" # test pass✅, time: 34.8s, mem: 10.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --use-flash-attn" # test pass✅, time: 20.8s, mem: 27.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 17.6s, mem: 14.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --use-flash-attn" # test pass✅, time: 16.0s, mem: 31.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 13.8s, mem: 52.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail❌, time: 16.4s, mem: 18.2GB, 2/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 26.0s, mem: 31.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5" # test pass✅, time: 3.5s, mem: 7.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 4.2s, mem: 19.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 19.6s, mem: 12.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 15.0s, mem: 37.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 19.4s, mem: 12.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 13.8s, mem: 36.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B" # test pass✅, time: 21.9s, mem: 40.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B" # test pass✅, time: 64.1s, mem: 40.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --load-in-4bit --use-flash-attn" # test pass✅, time: 37.7s, mem: 10.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --use-flash-attn" # test pass✅, time: 20.6s, mem: 27.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 24.1s, mem: 14.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --use-flash-attn" # test pass✅, time: 19.0s, mem: 31.9GB, 8/8 tests passed. #CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn" # test fail❌, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit). -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --load-in-4bit --use-flash-attn" # test pass✅, time: 16.7s, mem: 21.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --use-flash-attn" # test pass✅, time: 10.6s, mem: 67.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 217.0s, mem: 24.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --use-flash-attn" # test pass✅, time: 121.4s, mem: 70.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --load-in-4bit --use-flash-attn" # test pass✅, time: 10.5s, mem: 6.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --use-flash-attn" # test pass✅, time: 5.2s, mem: 15.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 34.3s, mem: 9.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --use-flash-attn" # test pass✅, time: 15.9s, mem: 18.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --load-in-4bit --use-flash-attn" # test pass✅, time: 22.9s, mem: 26.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --use-flash-attn" # test pass✅, time: 16.0s, mem: 91.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 26.4s, mem: 29.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --use-flash-attn" # test pass✅, time: 18.6s, mem: 96.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 13.9s, mem: 25.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 6.1s, mem: 21.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 7.7s, mem: 21.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 18.3s, mem: 25.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.9s, mem: 19.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 9.3s, mem: 9.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.5s, mem: 7.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.5s, mem: 20.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 10.8s, mem: 10.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail❌, time: 2.3s, mem: 5.9GB, 0/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail❌, time: 1.8s, mem: 15.7GB, 0/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 10.1s, mem: 9.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.5s, mem: 26.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 9.1s, mem: 5.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.9s, mem: 14.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.0s, mem: 7.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.6s, mem: 11.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 7.8s, mem: 7.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 5.2s, mem: 17.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m vikhyatk/moondream1" # test pass✅, time: 3.9s, mem: 4.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 3.8s, mem: 4.6GB, 8/8 tests passed. \ No newline at end of file +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --load-in-4bit --use-flash-attn" # test pass✅, time: 16.7s, mem: 21.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --use-flash-attn" # test pass✅, time: 10.6s, mem: 67.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 202.8s, mem: 24.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --use-flash-attn" # test pass✅, time: 174.7s, mem: 70.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --load-in-4bit --use-flash-attn" # test pass✅, time: 11.1s, mem: 6.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --use-flash-attn" # test pass✅, time: 5.0s, mem: 15.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 34.8s, mem: 10.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --use-flash-attn" # test pass✅, time: 14.7s, mem: 19.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --load-in-4bit --use-flash-attn" # test pass✅, time: 22.7s, mem: 26.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --use-flash-attn" # test pass✅, time: 15.2s, mem: 91.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 25.0s, mem: 29.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --use-flash-attn" # test pass✅, time: 18.6s, mem: 96.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 14.8s, mem: 25.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 6.0s, mem: 22.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 7.5s, mem: 22.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 18.0s, mem: 25.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 18.3s, mem: 19.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 10.7s, mem: 9.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.0s, mem: 7.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 14.9s, mem: 20.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 10.1s, mem: 11.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail❌, time: 2.3s, mem: 6.3GB, 0/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail❌, time: 1.8s, mem: 16.0GB, 0/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 10.4s, mem: 9.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.4s, mem: 27.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 9.1s, mem: 5.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.0s, mem: 14.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.2s, mem: 8.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.0s, mem: 11.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 7.7s, mem: 8.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 5.1s, mem: 17.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m vikhyatk/moondream1" # test pass✅, time: 4.0s, mem: 5.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 3.8s, mem: 4.9GB, 8/8 tests passed. \ No newline at end of file diff --git a/vision.sample.env b/vision.sample.env index 909c6c6..642e63e 100644 --- a/vision.sample.env +++ b/vision.sample.env @@ -2,59 +2,59 @@ # Copy this file to vision.env and uncomment the model of your choice. HF_HOME=hf_home #CUDA_VISIBLE_DEVICES=1,0 -#CLI_COMMAND="python vision.py -m BAAI/Bunny-Llama-3-8B-V" # test pass✅, time: 6.8s, mem: 19.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m BAAI/Emu2-Chat --max-memory=0:78GiB,1:20GiB" # test pass✅, time: 21.4s, mem: 78.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.6s, mem: 22.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.9s, mem: 12.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.0s, mem: 22.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 9.5s, mem: 12.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 13.3s, mem: 52.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail❌, time: 17.2s, mem: 18.1GB, 2/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 26.1s, mem: 31.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5" # test pass✅, time: 3.6s, mem: 6.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 4.5s, mem: 19.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 19.9s, mem: 12.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 14.4s, mem: 37.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 19.5s, mem: 12.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 13.8s, mem: 36.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B" # test pass✅, time: 22.1s, mem: 40.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B" # test pass✅, time: 64.8s, mem: 40.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-Fuyu --device-map cuda:0" # test pass✅, time: 6.2s, mem: 20.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-clip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.2s, mem: 17.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-siglip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 4.6s, mem: 18.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn --load-in-4bit" # test pass✅, time: 6.7s, mem: 5.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn" # test pass✅, time: 4.1s, mem: 8.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 13.9s, mem: 24.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 6.1s, mem: 21.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 7.7s, mem: 21.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 17.9s, mem: 25.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.6s, mem: 19.1GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn" # test pass✅, time: 9.2s, mem: 9.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 4.7s, mem: 7.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.2s, mem: 20.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn" # test pass✅, time: 12.4s, mem: 11.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail❌, time: 2.2s, mem: 6.0GB, 0/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail❌, time: 1.8s, mem: 15.8GB, 0/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 10.1s, mem: 9.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.4s, mem: 26.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 9.1s, mem: 5.8GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.9s, mem: 14.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 51.8s, mem: 26.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --use-flash-attn" # test pass✅, time: 46.3s, mem: 72.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 19.3s, mem: 9.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --use-flash-attn" # test pass✅, time: 12.7s, mem: 19.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 12.3s, mem: 14.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --use-flash-attn" # test pass✅, time: 9.0s, mem: 32.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 13.4s, mem: 9.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --use-flash-attn" # test pass✅, time: 8.4s, mem: 19.0GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m microsoft/Phi-3-vision-128k-instruct --use-flash-attn --load-in-4bit" # test pass✅, time: 7.7s, mem: 7.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m microsoft/Phi-3-vision-128k-instruct --use-flash-attn" # test pass✅, time: 6.4s, mem: 12.4GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.8s, mem: 7.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.6s, mem: 11.6GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-70B --use-flash-attn --load-in-4bit" # test fail❌, time: 4.1s, mem: 39.3GB, Test failed with Exception: Internal Server Error -#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 8.3s, mem: 7.9GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 5.2s, mem: 17.5GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.1s, mem: 8.2GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --load-in-4bit --device-map cuda:0" # test pass✅, time: 9.3s, mem: 7.7GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m qresearch/llama-3-vision-alpha-hf --device cuda:0" # test pass✅, time: 5.7s, mem: 19.3GB, 8/8 tests passed. -#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 3.9s, mem: 4.7GB, 8/8 tests passed. \ No newline at end of file +#CLI_COMMAND="python vision.py -m BAAI/Bunny-Llama-3-8B-V" # test pass✅, time: 6.7s, mem: 19.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m BAAI/Emu2-Chat --max-memory=0:78GiB,1:20GiB" # test pass✅, time: 21.9s, mem: 78.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.9s, mem: 22.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.2s, mem: 12.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty --use-flash-attn --device-map cuda:0" # test pass✅, time: 8.5s, mem: 22.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m HuggingFaceM4/idefics2-8b-chatty-AWQ --use-flash-attn --device-map cuda:0" # test pass✅, time: 9.7s, mem: 12.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 13.3s, mem: 52.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail❌, time: 17.1s, mem: 18.3GB, 2/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 26.6s, mem: 32.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5" # test pass✅, time: 3.5s, mem: 7.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 4.7s, mem: 19.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 20.1s, mem: 12.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 14.7s, mem: 37.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 19.5s, mem: 12.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 13.9s, mem: 36.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B" # test pass✅, time: 22.3s, mem: 40.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B" # test pass✅, time: 64.4s, mem: 40.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-Fuyu --device-map cuda:0" # test pass✅, time: 6.9s, mem: 20.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-clip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.3s, mem: 17.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m TIGER-Lab/Mantis-8B-siglip-llama3 --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.0s, mem: 18.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn --load-in-4bit" # test pass✅, time: 7.2s, mem: 6.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn" # test pass✅, time: 4.2s, mem: 8.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 13.9s, mem: 25.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 6.1s, mem: 22.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 7.7s, mem: 22.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 18.1s, mem: 26.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.9s, mem: 19.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn" # test pass✅, time: 9.2s, mem: 9.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.3s, mem: 7.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 19.4s, mem: 20.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn" # test pass✅, time: 12.0s, mem: 11.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail❌, time: 2.3s, mem: 6.1GB, 0/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail❌, time: 1.8s, mem: 15.9GB, 0/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 10.4s, mem: 9.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.4s, mem: 27.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 9.0s, mem: 5.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 5.8s, mem: 14.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 53.5s, mem: 26.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-34b-hf --use-flash-attn" # test pass✅, time: 46.2s, mem: 72.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 18.8s, mem: 9.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-mistral-7b-hf --use-flash-attn" # test pass✅, time: 12.7s, mem: 19.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 12.0s, mem: 15.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-13b-hf --use-flash-attn" # test pass✅, time: 9.5s, mem: 32.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 13.0s, mem: 9.9GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m llava-hf/llava-v1.6-vicuna-7b-hf --use-flash-attn" # test pass✅, time: 8.1s, mem: 19.1GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m microsoft/Phi-3-vision-128k-instruct --use-flash-attn --load-in-4bit" # test pass✅, time: 7.9s, mem: 7.3GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m microsoft/Phi-3-vision-128k-instruct --use-flash-attn" # test pass✅, time: 6.3s, mem: 12.5GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.2s, mem: 7.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.1s, mem: 11.7GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-70B --use-flash-attn --load-in-4bit" # test fail❌, time: 4.1s, mem: 39.4GB, Test failed with Exception: Internal Server Error +#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 8.4s, mem: 8.0GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 5.5s, mem: 17.6GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.4s, mem: 8.2GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qnguyen3/nanoLLaVA --use-flash-attn --load-in-4bit --device-map cuda:0" # test pass✅, time: 9.7s, mem: 7.8GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m qresearch/llama-3-vision-alpha-hf --device cuda:0" # test pass✅, time: 6.1s, mem: 19.4GB, 8/8 tests passed. +#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 3.9s, mem: 4.8GB, 8/8 tests passed. \ No newline at end of file diff --git a/vision_qna.py b/vision_qna.py index 9a9c528..1313bb7 100644 --- a/vision_qna.py +++ b/vision_qna.py @@ -163,6 +163,11 @@ async def images_hfmessages_from_messages(messages: list[Message], url_handler = async def phi15_prompt_from_messages(messages: list[Message], img_tok = "", img_end = ''): # prompt = '' images = [] + generation_msg = "Answer:" + + if messages and messages[-1].role == 'assistant': + generation_msg += messages[-1].content[0].text + messages.pop(-1) for m in messages: if m.role == 'user': @@ -183,13 +188,18 @@ async def phi15_prompt_from_messages(messages: list[Message], img_tok = " if c.type == 'text': prompt += f"{c.text}\n\n" # fake system prompt - prompt += "Answer:" + prompt += generation_msg return images, prompt async def vicuna0_prompt_from_messages(messages: list[Message], img_tok = "\n"): prompt = '' images = [] + generation_msg = "### Assistant:" + + if messages and messages[-1].role == 'assistant': + generation_msg += messages[-1].content[0].text + messages.pop(-1) for m in messages: if m.role == 'user': @@ -214,13 +224,19 @@ async def vicuna0_prompt_from_messages(messages: list[Message], img_tok = ""): prompt = '' images = [] + generation_msg = '<|start_header_id|>assistant<|end_header_id|>\n\n' + + if messages and messages[-1].role == 'assistant': + generation_msg += messages[-1].content[0].text + messages.pop(-1) for m in messages: has_image = False @@ -296,13 +317,18 @@ async def llama3_prompt_from_messages(messages: list[Message], img_tok = "{m.role}<|end_header_id|>\n\n{img_tag}{c.text.strip()}<|eot_id|>" - prompt += '<|start_header_id|>assistant<|end_header_id|>\n\n' + prompt += generation_msg return images, prompt async def chatml_prompt_from_messages(messages: list[Message], img_tok = "\n"): prompt = '' images = [] + generation_msg = "<|im_start|>assistant\n" + + if messages and messages[-1].role == 'assistant': + generation_msg += messages[-1].content[0].text + messages.pop(-1) for m in messages: if m.role == 'user': @@ -327,13 +353,18 @@ async def chatml_prompt_from_messages(messages: list[Message], img_tok = "system\n{c.text}<|im_end|>" - prompt += f"<|im_start|>assistant\n" + prompt += generation_msg return images, prompt async def gemma_prompt_from_messages(messages: list[Message], img_tok = "\n"): prompt = '' images = [] + generation_msg = "model\n" + + if messages and messages[-1].role == 'assistant': + generation_msg += messages[-1].content[0].text + messages.pop(-1) for m in messages: if m.role == 'user': @@ -359,7 +390,7 @@ async def gemma_prompt_from_messages(messages: list[Message], img_tok = " prompt += f"system\n{c.text}" # fake it - prompt += f"model\n" + prompt += generation_msg return images, prompt @@ -393,6 +424,12 @@ async def emu_images_prompt_system_from_messages(messages: list[Message], img_to images = [] system_message = None + generation_msg = ' [ASSISTANT]:' + + if messages and messages[-1].role == 'assistant': + generation_msg += messages[-1].content[0].text + messages.pop(-1) + for m in messages: if m.role == 'user': text = '' @@ -416,7 +453,7 @@ async def emu_images_prompt_system_from_messages(messages: list[Message], img_to if c.type == 'text': system_message = c.text - prompt += " [ASSISTANT]:" + prompt += generation_msg return images, prompt, system_message @@ -425,6 +462,11 @@ async def phi3_prompt_from_messages(messages: list[Message]): img_tok = "<|image_{}|>\n" prompt = '' images = [] + generation_msg = '<|assistant|>\n' + + if messages and messages[-1].role == 'assistant': + generation_msg += messages[-1].content[0].text + messages.pop(-1) for m in messages: img_tag = '' @@ -439,7 +481,7 @@ async def phi3_prompt_from_messages(messages: list[Message]): if c.type == 'text': prompt += f"<|{m.role}|>\n{img_tag}{c.text}<|end|>\n" - prompt += '<|assistant|>\n' + prompt += generation_msg return images, prompt