Skip to content

Commit

Permalink
0.16.1 +Sure,
Browse files Browse the repository at this point in the history
  • Loading branch information
matatonic committed May 22, 2024
1 parent 35a861a commit 3964a6d
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 116 deletions.
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ See: [OpenVLM Leaderboard](https://huggingface.co/spaces/opencompass/open_vlm_le

## Recent updates

Version 0.16.1

- Add "start with" parameter to pre-fill assistant response & backend support (doesn't work with all models) - aka 'Sure,' support.

Version 0.16.0

- new model support: microsoft/Phi-3-vision-128k-instruct
Expand Down Expand Up @@ -213,23 +217,25 @@ options:

Usage
```
usage: chat_with_image.py [-h] [-s SYSTEM_PROMPT] [-m MAX_TOKENS] [-t TEMPERATURE] [-p TOP_P] [-u] [-1] image_url [questions ...]
usage: chat_with_image.py [-h] [-s SYSTEM_PROMPT] [-S START_WITH] [-m MAX_TOKENS] [-t TEMPERATURE] [-p TOP_P] [-u] [-1] image_url [questions ...]
Test vision using OpenAI
positional arguments:
image_url URL or image file to be tested
questions The question to ask the image
questions The question to ask the image (default: None)
options:
-h, --help show this help message and exit
-s SYSTEM_PROMPT, --system-prompt SYSTEM_PROMPT
-S START_WITH, --start-with START_WITH
Start reply with, ex. 'Sure, ' (doesn't work with all models) (default: None)
-m MAX_TOKENS, --max-tokens MAX_TOKENS
-t TEMPERATURE, --temperature TEMPERATURE
-p TOP_P, --top_p TOP_P
-u, --keep-remote-urls
Normally, http urls are converted to data: urls for better latency.
-1, --single Single turn Q&A, output is only the model response.
Normally, http urls are converted to data: urls for better latency. (default: False)
-1, --single Single turn Q&A, output is only the model response. (default: False)
```

Example:
Expand Down
2 changes: 2 additions & 0 deletions backend/idefics2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

# "HuggingFaceM4/idefics2-8b"
# "HuggingFaceM4/idefics2-8b-AWQ"
# "HuggingFaceM4/idefics2-8b-chatty
# "HuggingFaceM4/idefics2-8b-chatty-AWQ

class VisionQnA(VisionQnABase):
model_name: str = "idefics2"
Expand Down
1 change: 1 addition & 0 deletions backend/xcomposer2-vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

# internlm/internlm-xcomposer2-vl-7b # ~21GB
# internlm/internlm-xcomposer2-vl-7b-4bit # ~12GB
# internlm/internlm-xcomposer2-vl-1_8b # ~8GB

class InternLMXComposer2QForCausalLM(auto_gptq.modeling.BaseGPTQForCausalLM):
layers_block_name = "model.layers"
Expand Down
3 changes: 3 additions & 0 deletions chat_with_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def url_for_api(img_url: str = None, filename: str = None, always_data=False) ->
parser = argparse.ArgumentParser(description='Test vision using OpenAI',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-s', '--system-prompt', type=str, default=None)
parser.add_argument('-S', '--start-with', type=str, default=None, help="Start reply with, ex. 'Sure, ' (doesn't work with all models)")
parser.add_argument('-m', '--max-tokens', type=int, default=None)
parser.add_argument('-t', '--temperature', type=float, default=None)
parser.add_argument('-p', '--top_p', type=float, default=None)
Expand Down Expand Up @@ -61,6 +62,8 @@ def url_for_api(img_url: str = None, filename: str = None, always_data=False) ->
messages.extend([{ "role": "user", "content": content }])

while True:
if args.start_with:
messages.extend([{ "role": "assistant", "content": [{ "type": "text", "text": args.start_with }] }])
response = client.chat.completions.create(model="gpt-4-vision-preview", messages=messages, **params)

if args.single:
Expand Down
96 changes: 48 additions & 48 deletions vision-alt.sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,52 @@
# Copy this file to vision.env and uncomment the model of your choice.
HF_HOME=hf_home
#CUDA_VISIBLE_DEVICES=1,0
#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 13.9s, mem: 52.0GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail❌, time: 16.6s, mem: 18.2GB, 2/8 tests passed.
#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 26.5s, mem: 31.3GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5" # test pass✅, time: 3.8s, mem: 7.0GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 4.8s, mem: 19.5GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 20.5s, mem: 12.1GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 14.9s, mem: 37.2GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 19.9s, mem: 12.1GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 13.6s, mem: 36.3GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B" # test pass✅, time: 22.3s, mem: 40.6GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B" # test pass✅, time: 64.6s, mem: 40.7GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --load-in-4bit --use-flash-attn" # test pass✅, time: 34.8s, mem: 10.0GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --use-flash-attn" # test pass✅, time: 20.8s, mem: 27.6GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 17.6s, mem: 14.0GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --use-flash-attn" # test pass✅, time: 16.0s, mem: 31.8GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --device-map cuda:0" # test pass✅, time: 13.8s, mem: 52.0GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5 --load-in-4bit --device-map cuda:0" # test fail❌, time: 16.4s, mem: 18.2GB, 2/8 tests passed.
#CLI_COMMAND="python vision.py -m OpenGVLab/InternVL-Chat-V1-5-Int8 --device-map cuda:0" # test pass✅, time: 26.0s, mem: 31.3GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m OpenGVLab/Mini-InternVL-Chat-2B-V1-5" # test pass✅, time: 3.5s, mem: 7.0GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass✅, time: 4.2s, mem: 19.5GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass✅, time: 19.6s, mem: 12.1GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass✅, time: 15.0s, mem: 37.2GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass✅, time: 19.4s, mem: 12.3GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass✅, time: 13.8s, mem: 36.4GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chat-19B" # test pass✅, time: 21.9s, mem: 40.8GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogvlm2-llama3-chinese-chat-19B" # test pass✅, time: 64.1s, mem: 40.8GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --load-in-4bit --use-flash-attn" # test pass✅, time: 37.7s, mem: 10.1GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B --use-flash-attn" # test pass✅, time: 20.6s, mem: 27.8GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 24.1s, mem: 14.1GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-13B-HD --use-flash-attn" # test pass✅, time: 19.0s, mem: 31.9GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-2B --use-flash-attn" # test fail❌, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit).
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --load-in-4bit --use-flash-attn" # test pass✅, time: 16.7s, mem: 21.5GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --use-flash-attn" # test pass✅, time: 10.6s, mem: 67.2GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 217.0s, mem: 24.1GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --use-flash-attn" # test pass✅, time: 121.4s, mem: 70.4GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --load-in-4bit --use-flash-attn" # test pass✅, time: 10.5s, mem: 6.6GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --use-flash-attn" # test pass✅, time: 5.2s, mem: 15.5GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 34.3s, mem: 9.9GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --use-flash-attn" # test pass✅, time: 15.9s, mem: 18.8GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --load-in-4bit --use-flash-attn" # test pass✅, time: 22.9s, mem: 26.3GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --use-flash-attn" # test pass✅, time: 16.0s, mem: 91.4GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 26.4s, mem: 29.6GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --use-flash-attn" # test pass✅, time: 18.6s, mem: 96.2GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 13.9s, mem: 25.1GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 6.1s, mem: 21.9GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 7.7s, mem: 21.9GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 18.3s, mem: 25.9GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.9s, mem: 19.2GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 9.3s, mem: 9.5GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.5s, mem: 7.3GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 16.5s, mem: 20.2GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 10.8s, mem: 10.9GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail❌, time: 2.3s, mem: 5.9GB, 0/8 tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail❌, time: 1.8s, mem: 15.7GB, 0/8 tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 10.1s, mem: 9.0GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.5s, mem: 26.8GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 9.1s, mem: 5.5GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.9s, mem: 14.5GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.0s, mem: 7.6GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.6s, mem: 11.5GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 7.8s, mem: 7.8GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 5.2s, mem: 17.4GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m vikhyatk/moondream1" # test pass✅, time: 3.9s, mem: 4.9GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 3.8s, mem: 4.6GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --load-in-4bit --use-flash-attn" # test pass✅, time: 16.7s, mem: 21.7GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B --use-flash-attn" # test pass✅, time: 10.6s, mem: 67.4GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 202.8s, mem: 24.4GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-34B-HD --use-flash-attn" # test pass✅, time: 174.7s, mem: 70.6GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --load-in-4bit --use-flash-attn" # test pass✅, time: 11.1s, mem: 6.9GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B --use-flash-attn" # test pass✅, time: 5.0s, mem: 15.8GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 34.8s, mem: 10.1GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-7B-HD --use-flash-attn" # test pass✅, time: 14.7s, mem: 19.0GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --load-in-4bit --use-flash-attn" # test pass✅, time: 22.7s, mem: 26.5GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B --use-flash-attn" # test pass✅, time: 15.2s, mem: 91.6GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --load-in-4bit --use-flash-attn" # test pass✅, time: 25.0s, mem: 29.7GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/MGM-8x7B-HD --use-flash-attn" # test pass✅, time: 18.6s, mem: 96.3GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass✅, time: 14.8s, mem: 25.2GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m echo840/Monkey" # test pass✅, time: 6.0s, mem: 22.0GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m echo840/Monkey-Chat" # test pass✅, time: 7.5s, mem: 22.0GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-4khd-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 18.0s, mem: 25.9GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 18.3s, mem: 19.4GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 10.7s, mem: 9.8GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-1_8b --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.0s, mem: 7.5GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass✅, time: 14.9s, mem: 20.6GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn --device cuda:0" # test pass✅, time: 10.1s, mem: 11.3GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail❌, time: 2.3s, mem: 6.3GB, 0/8 tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail❌, time: 1.8s, mem: 16.0GB, 0/8 tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 10.4s, mem: 9.4GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.4s, mem: 27.2GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass✅, time: 9.1s, mem: 5.9GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.0s, mem: 14.9GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass✅, time: 6.2s, mem: 8.0GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass✅, time: 7.0s, mem: 11.8GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn --load-in-4bit" # test pass✅, time: 7.7s, mem: 8.2GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m qihoo360/360VL-8B --use-flash-attn" # test pass✅, time: 5.1s, mem: 17.8GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m vikhyatk/moondream1" # test pass✅, time: 4.0s, mem: 5.2GB, 8/8 tests passed.
#CLI_COMMAND="python vision.py -m vikhyatk/moondream2 --use-flash-attn" # test pass✅, time: 3.8s, mem: 4.9GB, 8/8 tests passed.
Loading

0 comments on commit 3964a6d

Please sign in to comment.