Skip to content

Commit

Permalink
0.10.0 + adept/fuyu-8b support
Browse files Browse the repository at this point in the history
  • Loading branch information
matatonic committed Apr 14, 2024
1 parent 952f570 commit c8d7fc7
Show file tree
Hide file tree
Showing 10 changed files with 109 additions and 40 deletions.
14 changes: 3 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview`
- [X] [01-ai/Yi-VL](https://huggingface.co/01-ai)
- - [ ] [Yi-VL-6B](https://huggingface.co/01-ai/Yi-VL-6B) (currently errors)
- - [ ] [Yi-VL-34B](https://huggingface.co/01-ai/Yi-VL-34B) (currently errors)
- [X] [fuyu-8b](https://huggingface.co/adept/fuyu-8b) [pretrain]
- [X] [Monkey-Chat](https://huggingface.co/echo840/Monkey-Chat)
- [X] [Monkey](https://huggingface.co/echo840/Monkey)
- [X] [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat)
Expand Down Expand Up @@ -56,10 +57,11 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview`

See: [OpenVLM Leaderboard](https://huggingface.co/spaces/opencompass/open_vlm_leaderboard)

Version: 0.9.1
Version: 0.10.0

## Recent updates

- new model support: adept/fuyu-8b
- new model support: MiniCPM-V-2
- new model support: MiniGemini-7B -> MiniGemini-8x7B-HD, alternate docker.
- new openai_example.sh shell script for simple command line generation.
Expand All @@ -68,16 +70,6 @@ Version: 0.9.1
- Fix: moondream1 (use alt container)
- Split images into main (transformers>=4.39.0) and alt (transformers==4.36.2)
- Big performance gains (10x) for some models, especially llava-v1.6-34B (`use_cache` missing from many models, all llava* models, more.)
- new model support: qnguyen3/nanoLLaVA (sub 1B model)
- Updated chat_with_image.py to include --single (-1) answer mode
- More testing
- `sample.env` contains VRAM usage and some notes about model configurations.
- new model support: MiniGemini-2B (it's still a bit complex to use, see `prepare_minigemini.sh`)
- new model support: echo840/Monkey-Chat, echo840/Monkey
- AutoGPTQ support for internlm/internlm-xcomposer2-7b-4bit, internlm/internlm-xcomposer2-vl-7b-4bit
- Automatic selection of backend, based on the model name




## API Documentation
Expand Down
34 changes: 34 additions & 0 deletions backend/fuyu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from transformers import FuyuProcessor, FuyuForCausalLM

from vision_qna import *

# "adept/fuyu-8b"

class VisionQnA(VisionQnABase):
model_name: str = "fuyu"
format: str = "fuyu"

def __init__(self, model_id: str, device: str, device_map: str = 'auto', extra_params = {}, format = None):
super().__init__(model_id, device, device_map, extra_params, format)

if not format:
self.format = guess_model_format(model_id)

del self.params['trust_remote_code'] # not needed.

self.processor = FuyuProcessor.from_pretrained(model_id)
self.model = FuyuForCausalLM.from_pretrained(**self.params)

print(f"Loaded on device: {self.model.device} with dtype: {self.model.dtype}")

async def chat_with_images(self, request: ImageChatRequest) -> str:
images, prompt = await prompt_from_messages(request.messages, self.format)

inputs = self.processor(text=prompt, images=images[0], return_tensors="pt").to(self.model.device)

params = self.get_generation_params(request)

output = self.model.generate(**inputs, **params)
response = self.processor.decode(output[0][inputs.input_ids.size(1):].cpu(), skip_special_tokens=True)

return response.strip()
8 changes: 7 additions & 1 deletion chat_with_image.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
#!/usr/bin/env python
import os
import requests
import argparse
from datauri import DataURI
from openai import OpenAI

try:
import dotenv
dotenv.load_dotenv(override=True)
except:
pass

def url_for_api(img_url: str = None, filename: str = None, always_data=False) -> str:
if img_url.startswith('http'):
Expand Down Expand Up @@ -31,7 +37,7 @@ def url_for_api(img_url: str = None, filename: str = None, always_data=False) ->
parser.add_argument('questions', type=str, nargs='*', help='The question to ask the image')
args = parser.parse_args()

client = OpenAI(base_url='http://localhost:5006/v1', api_key='skip')
client = OpenAI(base_url=os.environ.get('OPENAI_BASE_URL', 'http://localhost:5006/v1'), api_key='skip')

params = {}
if args.max_tokens is not None:
Expand Down
23 changes: 11 additions & 12 deletions model_conf_tests.alt.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
[
["vikhyatk/moondream2", "--use-flash-attn"],
["vikhyatk/moondream1"],

["echo840/Monkey"],
["echo840/Monkey-Chat"],
["THUDM/cogvlm-chat-hf"],
["THUDM/cogagent-chat-hf"],
["Qwen/Qwen-VL-Chat"],
["YanweiLi/Mini-Gemini-2B", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-7B", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-7B-HD", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-13B", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-13B-HD", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-34B", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-34B-HD", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-8x7B", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-8x7B-HD", "--use-flash-attn"],
["adept/fuyu-8b", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-vl-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["openbmb/MiniCPM-V-2", "--use-flash-attn", "--device-map", "cuda:0"],
Expand All @@ -30,15 +39,5 @@
["YanweiLi/Mini-Gemini-34B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-34B-HD", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-8x7B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-8x7B-HD", "--load-in-4bit", "--use-flash-attn"],

["YanweiLi/Mini-Gemini-2B", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-7B", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-7B-HD", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-13B", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-13B-HD", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-34B", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-34B-HD", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-8x7B", "--use-flash-attn"],
["YanweiLi/Mini-Gemini-8x7B-HD", "--use-flash-attn"]
["YanweiLi/Mini-Gemini-8x7B-HD", "--load-in-4bit", "--use-flash-attn"]
]
2 changes: 1 addition & 1 deletion model_conf_tests.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
[
["vikhyatk/moondream2", "--use-flash-attn"],
["vikhyatk/moondream1"],

["qnguyen3/nanoLLaVA", "--use-flash-attn"],
["echo840/Monkey"],
["echo840/Monkey-Chat"],
["THUDM/cogvlm-chat-hf"],
["THUDM/cogagent-chat-hf"],
["Qwen/Qwen-VL-Chat"],
["adept/fuyu-8b", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-vl-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["openbmb/MiniCPM-V-2", "--use-flash-attn", "--device-map", "cuda:0"],
Expand Down
5 changes: 5 additions & 0 deletions prepare_minigemini.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#!/bin/bash
export HF_HOME=hf_home

if [ -z "$(which huggingface-cli)" ]; then
echo "First install huggingface-hub: pip install huggingface-hub"
exit 1
fi

echo "Edit this script and uncomment which models to download"

huggingface-cli download OpenAI/clip-vit-large-patch14-336 --local-dir model_zoo/OpenAI/clip-vit-large-patch14-336
Expand Down
2 changes: 1 addition & 1 deletion test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def record_result(cmd_args, results, t, mem, note):
'note': note
}])
result = all(results)
print(f"\n#CLI_COMMAND={cmd_args} # test {'pass' if result else 'fail'}, time: {t:.1f}s, mem: {mem:.1f}GB, {note}")
print(f"#CLI_COMMAND=\"python vision.py -m {' '.join(cmd_args)}\" # test {'pass' if result else 'fail'}, time: {t:.1f}s, mem: {mem:.1f}GB, {note}")

torch_memory_baseline = 0

Expand Down
25 changes: 13 additions & 12 deletions vision-alt.sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,27 @@ HF_HOME=hf_home
#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass, time: 13.4s, mem: 36.3GB, All tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass, time: 14.7s, mem: 37.2GB, All tests passed.
#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass, time: 4.9s, mem: 19.5GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-2B --use-flash-attn" # test fail, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit).
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-7B --use-flash-attn" # test pass, time: 5.4s, mem: 15.6GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-7B-HD --use-flash-attn" # test pass, time: 15.8s, mem: 18.8GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-13B --use-flash-attn" # test pass, time: 21.3s, mem: 27.6GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-13B-HD --use-flash-attn" # test pass, time: 15.9s, mem: 31.7GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-34B --use-flash-attn" # test pass, time: 11.1s, mem: 67.2GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-34B-HD --use-flash-attn" # test pass, time: 145.1s, mem: 70.3GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-8x7B --use-flash-attn" # test pass, time: 14.3s, mem: 91.3GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-8x7B-HD --use-flash-attn" # test pass, time: 18.5s, mem: 96.1GB, All tests passed.
#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass, time: 13.4s, mem: 25.0GB, All tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass, time: 18.2s, mem: 19.0GB, All tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass, time: 16.7s, mem: 20.2GB, All tests passed.
#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass, time: 6.6s, mem: 11.4GB, All tests passed.
#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V --use-flash-attn --device-map cuda:0" # test pass, time: 5.7s, mem: 7.6GB, All tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --use-flash-attn --device-map cuda:0" # test fail, time: 2.0s, mem: 15.6GB,
#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --use-flash-attn --device-map cuda:0" # test pass, time: 5.4s, mem: 14.5GB, All tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --use-flash-attn --device-map cuda:0" # test pass, time: 6.6s, mem: 26.9GB, All tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass, time: 19.5s, mem: 12.2GB, All tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass, time: 20.4s, mem: 12.2GB, All tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b-4bit --use-flash-attn --device cuda:0" # test pass, time: 10.5s, mem: 9.5GB, All tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b-4bit --use-flash-attn --device cuda:0" # test pass, time: 11.6s, mem: 10.9GB, All tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf --load-in-4bit" # test pass, time: 19.5s, mem: 12.2GB, All tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf --load-in-4bit" # test pass, time: 20.4s, mem: 12.2GB, All tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/bakLlava-v1-hf --load-in-4bit --use-flash-attn" # test fail, time: 2.5s, mem: 6.0GB,
#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-7b-hf --load-in-4bit --use-flash-attn" # test pass, time: 9.2s, mem: 5.6GB, All tests passed.
#CLI_COMMAND="python vision.py -m llava-hf/llava-1.5-13b-hf --load-in-4bit --use-flash-attn" # test pass, time: 10.0s, mem: 9.0GB, All tests passed.
Expand All @@ -30,13 +40,4 @@ HF_HOME=hf_home
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-34B --load-in-4bit --use-flash-attn" # test pass, time: 16.8s, mem: 21.5GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-34B-HD --load-in-4bit --use-flash-attn" # test pass, time: 215.3s, mem: 24.2GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-8x7B --load-in-4bit --use-flash-attn" # test pass, time: 22.2s, mem: 26.3GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-8x7B-HD --load-in-4bit --use-flash-attn" # test pass, time: 24.7s, mem: 29.5GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-2B --use-flash-attn" # test fail, time: -1.0s, mem: -1.0GB, Error: Server failed to start (exit).
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-7B --use-flash-attn" # test pass, time: 5.4s, mem: 15.6GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-7B-HD --use-flash-attn" # test pass, time: 15.8s, mem: 18.8GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-13B --use-flash-attn" # test pass, time: 21.3s, mem: 27.6GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-13B-HD --use-flash-attn" # test pass, time: 15.9s, mem: 31.7GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-34B --use-flash-attn" # test pass, time: 11.1s, mem: 67.2GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-34B-HD --use-flash-attn" # test pass, time: 145.1s, mem: 70.3GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-8x7B --use-flash-attn" # test pass, time: 14.3s, mem: 91.3GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-8x7B-HD --use-flash-attn" # test pass, time: 18.5s, mem: 96.1GB, All tests passed.
#CLI_COMMAND="python vision.py -m YanweiLi/Mini-Gemini-8x7B-HD --load-in-4bit --use-flash-attn" # test pass, time: 24.7s, mem: 29.5GB, All tests passed.
1 change: 1 addition & 0 deletions vision.sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ HF_HOME=hf_home
#CLI_COMMAND="python vision.py -m THUDM/cogvlm-chat-hf" # test pass, time: 14.1s, mem: 36.2GB, All tests passed.
#CLI_COMMAND="python vision.py -m THUDM/cogagent-chat-hf" # test pass, time: 14.7s, mem: 37.2GB, All tests passed.
#CLI_COMMAND="python vision.py -m Qwen/Qwen-VL-Chat" # test pass, time: 4.8s, mem: 19.5GB, All tests passed.
#CLI_COMMAND="python vision.py -m adept/fuyu-8b --device-map cuda:0" # test pass, time: 13.4s, mem: 25.0GB, All tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-7b --use-flash-attn --device-map cuda:0" # test pass, time: 18.3s, mem: 19.0GB, All tests passed.
#CLI_COMMAND="python vision.py -m internlm/internlm-xcomposer2-vl-7b --use-flash-attn --device-map cuda:0" # test pass, time: 14.9s, mem: 20.2GB, All tests passed.
#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2 --use-flash-attn --device-map cuda:0" # test pass, time: 6.7s, mem: 11.5GB, All tests passed.
Expand Down
35 changes: 33 additions & 2 deletions vision_qna.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,31 @@ async def gemma_prompt_from_messages(messages: list[Message], img_tok = "<image>

return images, prompt

async def fuyu_prompt_from_messages(messages: list[Message], img_tok = "", img_end = ''):
prompt = ''
images = []

for m in messages:
if m.role == 'user':
p = ''
for c in m.content:
if c.type == 'image_url':
images.extend([ await url_to_image(c.image_url.url) ])
p = img_tok + p + img_end
if c.type == 'text':
p += f"{c.text}\n\n" # Question:
prompt += p
elif m.role == 'assistant':
for c in m.content:
if c.type == 'text':
prompt += f"\x04{c.text}\n"
elif m.role == 'system':
for c in m.content:
if c.type == 'text':
prompt += f"{c.text}\n\n" # fake system prompt doesn't work.

return images, prompt

async def prompt_history_images_system_from_messages(messages: list[Message], img_tok = "<image>\n", url_handler = url_to_image):
history = []
images = []
Expand Down Expand Up @@ -361,7 +386,8 @@ async def prompt_from_messages(messages: list[Message], format: str) -> str:
'llama2': llama2_prompt_from_messages,
'mistral': llama2_prompt_from_messages, # simplicity
'chatml': chatml_prompt_from_messages,
'gemma': gemma_prompt_from_messages
'gemma': gemma_prompt_from_messages,
'fuyu': fuyu_prompt_from_messages,
}

if format not in known_formats:
Expand All @@ -379,6 +405,7 @@ def guess_model_format(model_name: str) -> str:
'vicuna0': ['yi-vl'],
'phi15': ['moondream1', 'moondream2', 'monkey'],
'chatml': ['34b', 'yi-6b', 'nanollava'],
'fuyu': ['fuyu'],
}
for format, options in model_format_match_map.items():
if any(x in model_id for x in options):
Expand Down Expand Up @@ -434,4 +461,8 @@ def guess_backend(model_name: str) -> str:
return 'yi-vl'

if 'thudm/cog' in model_id:
return 'cogvlm'
return 'cogvlm'

if 'fuyu' in model_id:
return 'fuyu'

0 comments on commit c8d7fc7

Please sign in to comment.