Skip to content

Commit

Permalink
0.16.0 +phi-3-vision
Browse files Browse the repository at this point in the history
  • Loading branch information
matatonic committed May 21, 2024
1 parent 96f7dc4 commit 35a861a
Show file tree
Hide file tree
Showing 7 changed files with 242 additions and 174 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview`
- - [X] [idefics2-8b-AWQ](https://huggingface.co/HuggingFaceM4/idefics2-8b-AWQ) (main docker only, wont gpu split)
- - [X] [idefics2-8b-chatty](https://huggingface.co/HuggingFaceM4/idefics2-8b-chatty) (main docker only, wont gpu split)
- - [X] [idefics2-8b-chatty-AWQ](https://huggingface.co/HuggingFaceM4/idefics2-8b-chatty-AWQ) (main docker only, wont gpu split)
- [X] [Microsoft](https://huggingface.co/microsoft/)
- - [X] [Phi-3-vision-128k-instruct](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct) (main docker only)
- [X] [qihoo360](https://huggingface.co/qihoo360)
- - [X] [360VL-8B](https://huggingface.co/qihoo360/360VL-8B)
- - [X] [360VL-70B](https://huggingface.co/qihoo360/360VL-70B) (untested)
Expand Down Expand Up @@ -84,6 +86,10 @@ See: [OpenVLM Leaderboard](https://huggingface.co/spaces/opencompass/open_vlm_le

## Recent updates

Version 0.16.0

- new model support: microsoft/Phi-3-vision-128k-instruct

Version 0.15.1

- new model support: OpenGVLab/Mini-InternVL-Chat-2B-V1-5
Expand Down
34 changes: 34 additions & 0 deletions backend/phi3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from transformers import AutoProcessor, AutoModelForCausalLM

from vision_qna import *

# microsoft/Phi-3-vision-128k-instruct

class VisionQnA(VisionQnABase):
model_name: str = "phi3"

def __init__(self, model_id: str, device: str, device_map: str = 'auto', extra_params = {}, format = None):
super().__init__(model_id, device, device_map, extra_params, format)

self.processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=self.params.get('trust_remote_code', False))
self.model = AutoModelForCausalLM.from_pretrained(**self.params).eval()

print(f"Loaded on device: {self.model.device} with dtype: {self.model.dtype}")

async def chat_with_images(self, request: ImageChatRequest) -> str:
images, prompt = await phi3_prompt_from_messages(request.messages)

inputs = self.processor(prompt, images=images, return_tensors="pt").to(self.model.device)

default_params = {
"temperature": 0.0,
"do_sample": False,
"eos_token_id": self.processor.tokenizer.eos_token_id,
}

params = self.get_generation_params(request, default_params)

output = self.model.generate(**inputs, **params)
response = self.processor.decode(output[0][inputs.input_ids.shape[1]:], skip_special_tokens=True, clean_up_tokenization_spaces=False)

return response
66 changes: 32 additions & 34 deletions model_conf_tests.alt.json
Original file line number Diff line number Diff line change
@@ -1,53 +1,51 @@
[
["vikhyatk/moondream2", "--use-flash-attn"],
["vikhyatk/moondream1"],
["echo840/Monkey"],
["echo840/Monkey-Chat"],
["OpenGVLab/InternVL-Chat-V1-5", "--device-map", "cuda:0"],
["OpenGVLab/InternVL-Chat-V1-5", "--load-in-4bit", "--device-map", "cuda:0"],
["OpenGVLab/InternVL-Chat-V1-5-Int8", "--device-map", "cuda:0"],
["OpenGVLab/Mini-InternVL-Chat-2B-V1-5"],
["Qwen/Qwen-VL-Chat"],
["THUDM/cogagent-chat-hf", "--load-in-4bit"],
["THUDM/cogagent-chat-hf"],
["THUDM/cogvlm-chat-hf", "--load-in-4bit"],
["THUDM/cogvlm-chat-hf"],
["THUDM/cogvlm2-llama3-chat-19B"],
["THUDM/cogvlm2-llama3-chinese-chat-19B"],
["THUDM/cogvlm-chat-hf"],
["THUDM/cogagent-chat-hf"],
["Qwen/Qwen-VL-Chat"],
["YanweiLi/MGM-2B", "--use-flash-attn"],
["YanweiLi/MGM-7B", "--use-flash-attn"],
["YanweiLi/MGM-7B-HD", "--use-flash-attn"],
["YanweiLi/MGM-13B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-13B", "--use-flash-attn"],
["YanweiLi/MGM-13B-HD", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-13B-HD", "--use-flash-attn"],
["YanweiLi/MGM-2B", "--use-flash-attn"],
["YanweiLi/MGM-34B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-34B", "--use-flash-attn"],
["YanweiLi/MGM-34B-HD", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-34B-HD", "--use-flash-attn"],
["YanweiLi/MGM-7B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-7B", "--use-flash-attn"],
["YanweiLi/MGM-7B-HD", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-7B-HD", "--use-flash-attn"],
["YanweiLi/MGM-8x7B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-8x7B", "--use-flash-attn"],
["YanweiLi/MGM-8x7B-HD", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-8x7B-HD", "--use-flash-attn"],
["qihoo360/360VL-8B", "--use-flash-attn"],
["adept/fuyu-8b", "--device-map", "cuda:0"],
["echo840/Monkey"],
["echo840/Monkey-Chat"],
["internlm/internlm-xcomposer2-4khd-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-vl-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-vl-1_8b", "--use-flash-attn", "--device-map", "cuda:0"],
["openbmb/MiniCPM-V-2", "--use-flash-attn", "--device-map", "cuda:0"],
["openbmb/MiniCPM-V", "--use-flash-attn", "--device-map", "cuda:0"],
["llava-hf/bakLlava-v1-hf", "--use-flash-attn", "--device-map", "cuda:0"],
["llava-hf/llava-1.5-7b-hf", "--use-flash-attn", "--device-map", "cuda:0"],
["llava-hf/llava-1.5-13b-hf", "--use-flash-attn", "--device-map", "cuda:0"],

["OpenGVLab/InternVL-Chat-V1-5-Int8", "--device-map", "cuda:0"],
["OpenGVLab/InternVL-Chat-V1-5", "--load-in-4bit", "--device-map", "cuda:0"],
["THUDM/cogvlm-chat-hf", "--load-in-4bit"],
["THUDM/cogagent-chat-hf", "--load-in-4bit"],
["qihoo360/360VL-8B", "--use-flash-attn", "--load-in-4bit"],
["internlm/internlm-xcomposer2-7b-4bit", "--use-flash-attn", "--device", "cuda:0"],
["internlm/internlm-xcomposer2-vl-1_8b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-vl-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-vl-7b-4bit", "--use-flash-attn", "--device", "cuda:0"],
["llava-hf/bakLlava-v1-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-1.5-7b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/bakLlava-v1-hf", "--use-flash-attn", "--device-map", "cuda:0"],
["llava-hf/llava-1.5-13b-hf", "--load-in-4bit", "--use-flash-attn"],

["YanweiLi/MGM-7B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-7B-HD", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-13B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-13B-HD", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-34B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-34B-HD", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-8x7B", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-8x7B-HD", "--load-in-4bit", "--use-flash-attn"]
["llava-hf/llava-1.5-13b-hf", "--use-flash-attn", "--device-map", "cuda:0"],
["llava-hf/llava-1.5-7b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-1.5-7b-hf", "--use-flash-attn", "--device-map", "cuda:0"],
["openbmb/MiniCPM-V", "--use-flash-attn", "--device-map", "cuda:0"],
["openbmb/MiniCPM-V-2", "--use-flash-attn", "--device-map", "cuda:0"],
["qihoo360/360VL-8B", "--use-flash-attn", "--load-in-4bit"],
["qihoo360/360VL-8B", "--use-flash-attn"],
["vikhyatk/moondream1"],
["vikhyatk/moondream2", "--use-flash-attn"]
]
81 changes: 41 additions & 40 deletions model_conf_tests.json
Original file line number Diff line number Diff line change
@@ -1,57 +1,58 @@
[
["vikhyatk/moondream2", "--use-flash-attn"],
["BAAI/Bunny-Llama-3-8B-V"],
["BAAI/Emu2-Chat", "--max-memory=0:78GiB,1:20GiB"],
["HuggingFaceM4/idefics2-8b", "--use-flash-attn", "--device-map", "cuda:0"],
["HuggingFaceM4/idefics2-8b-AWQ", "--use-flash-attn", "--device-map", "cuda:0"],
["HuggingFaceM4/idefics2-8b-chatty", "--use-flash-attn", "--device-map", "cuda:0"],
["HuggingFaceM4/idefics2-8b-chatty-AWQ", "--use-flash-attn", "--device-map", "cuda:0"],
["OpenGVLab/InternVL-Chat-V1-5", "--device-map", "cuda:0"],
["OpenGVLab/InternVL-Chat-V1-5", "--load-in-4bit", "--device-map", "cuda:0"],
["OpenGVLab/InternVL-Chat-V1-5-Int8", "--device-map", "cuda:0"],
["OpenGVLab/Mini-InternVL-Chat-2B-V1-5"],
["HuggingFaceM4/idefics2-8b-chatty", "--use-flash-attn", "--device-map", "cuda:0"],
["HuggingFaceM4/idefics2-8b", "--use-flash-attn", "--device-map", "cuda:0"],
["qihoo360/360VL-8B", "--use-flash-attn"],
["qnguyen3/nanoLLaVA", "--use-flash-attn", "--device-map", "cuda:0"],
["echo840/Monkey"],
["echo840/Monkey-Chat"],
["Qwen/Qwen-VL-Chat"],
["THUDM/cogagent-chat-hf", "--load-in-4bit"],
["THUDM/cogagent-chat-hf"],
["THUDM/cogvlm-chat-hf", "--load-in-4bit"],
["THUDM/cogvlm-chat-hf"],
["THUDM/cogvlm2-llama3-chat-19B"],
["THUDM/cogvlm2-llama3-chinese-chat-19B"],
["THUDM/cogvlm-chat-hf"],
["THUDM/cogagent-chat-hf"],
["Qwen/Qwen-VL-Chat"],
["BAAI/Emu2-Chat", "--max-memory=0:78GiB,1:20GiB"],
["BAAI/Bunny-Llama-3-8B-V"],
["qresearch/llama-3-vision-alpha-hf", "--device", "cuda:0"],
["TIGER-Lab/Mantis-8B-siglip-llama3", "--use-flash-attn", "--device-map", "cuda:0"],
["TIGER-Lab/Mantis-8B-clip-llama3", "--use-flash-attn", "--device-map", "cuda:0"],
["TIGER-Lab/Mantis-8B-Fuyu", "--device-map", "cuda:0"],
["TIGER-Lab/Mantis-8B-clip-llama3", "--use-flash-attn", "--device-map", "cuda:0"],
["TIGER-Lab/Mantis-8B-siglip-llama3", "--use-flash-attn", "--device-map", "cuda:0"],
["YanweiLi/MGM-2B", "--use-flash-attn", "--load-in-4bit"],
["YanweiLi/MGM-2B", "--use-flash-attn"],
["adept/fuyu-8b", "--device-map", "cuda:0"],
["echo840/Monkey"],
["echo840/Monkey-Chat"],
["internlm/internlm-xcomposer2-4khd-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-vl-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-7b-4bit", "--use-flash-attn"],
["internlm/internlm-xcomposer2-vl-1_8b", "--use-flash-attn", "--device-map", "cuda:0"],
["openbmb/MiniCPM-V-2", "--use-flash-attn", "--device-map", "cuda:0"],
["openbmb/MiniCPM-V", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-vl-7b", "--use-flash-attn", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-vl-7b-4bit", "--use-flash-attn"],
["llava-hf/bakLlava-v1-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/bakLlava-v1-hf", "--use-flash-attn", "--device-map", "cuda:0"],
["llava-hf/llava-1.5-7b-hf", "--use-flash-attn", "--device-map", "cuda:0"],
["llava-hf/llava-1.5-13b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-1.5-13b-hf", "--use-flash-attn", "--device-map", "cuda:0"],
["llava-hf/llava-1.5-7b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-1.5-7b-hf", "--use-flash-attn", "--device-map", "cuda:0"],
["llava-hf/llava-v1.6-34b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-v1.6-34b-hf", "--use-flash-attn"],
["llava-hf/llava-v1.6-mistral-7b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-v1.6-mistral-7b-hf", "--use-flash-attn"],
["llava-hf/llava-v1.6-vicuna-7b-hf", "--use-flash-attn"],
["llava-hf/llava-v1.6-vicuna-13b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-v1.6-vicuna-13b-hf", "--use-flash-attn"],
["llava-hf/llava-v1.6-34b-hf", "--use-flash-attn"],
["YanweiLi/MGM-2B", "--use-flash-attn"],

["OpenGVLab/InternVL-Chat-V1-5-Int8", "--device-map", "cuda:0"],
["OpenGVLab/InternVL-Chat-V1-5", "--load-in-4bit", "--device-map", "cuda:0"],
["HuggingFaceM4/idefics2-8b-chatty-AWQ", "--use-flash-attn", "--device-map", "cuda:0"],
["HuggingFaceM4/idefics2-8b-AWQ", "--use-flash-attn", "--device-map", "cuda:0"],
["qihoo360/360VL-8B", "--use-flash-attn", "--load-in-4bit"],
["llava-hf/llava-v1.6-vicuna-7b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-v1.6-vicuna-7b-hf", "--use-flash-attn"],
["microsoft/Phi-3-vision-128k-instruct", "--use-flash-attn", "--load-in-4bit"],
["microsoft/Phi-3-vision-128k-instruct", "--use-flash-attn"],
["openbmb/MiniCPM-V", "--use-flash-attn", "--device-map", "cuda:0"],
["openbmb/MiniCPM-V-2", "--use-flash-attn", "--device-map", "cuda:0"],
["qihoo360/360VL-70B", "--use-flash-attn", "--load-in-4bit"],
["qihoo360/360VL-8B", "--use-flash-attn", "--load-in-4bit"],
["qihoo360/360VL-8B", "--use-flash-attn"],
["qnguyen3/nanoLLaVA", "--use-flash-attn", "--device-map", "cuda:0"],
["qnguyen3/nanoLLaVA", "--use-flash-attn", "--load-in-4bit", "--device-map", "cuda:0"],
["THUDM/cogvlm-chat-hf", "--load-in-4bit"],
["THUDM/cogagent-chat-hf", "--load-in-4bit"],
["internlm/internlm-xcomposer2-7b-4bit", "--use-flash-attn"],
["internlm/internlm-xcomposer2-vl-7b-4bit", "--use-flash-attn"],
["llava-hf/bakLlava-v1-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-1.5-7b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-1.5-13b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-v1.6-mistral-7b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-v1.6-vicuna-7b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-v1.6-vicuna-13b-hf", "--load-in-4bit", "--use-flash-attn"],
["llava-hf/llava-v1.6-34b-hf", "--load-in-4bit", "--use-flash-attn"],
["YanweiLi/MGM-2B", "--use-flash-attn", "--load-in-4bit"]
["qresearch/llama-3-vision-alpha-hf", "--device", "cuda:0"],
["vikhyatk/moondream2", "--use-flash-attn"]
]
Loading

0 comments on commit 35a861a

Please sign in to comment.