Skip to content

Commit

Permalink
0.31.0 +qwen2-vl
Browse files Browse the repository at this point in the history
  • Loading branch information
matatonic committed Sep 13, 2024
1 parent c615dcc commit e438d38
Show file tree
Hide file tree
Showing 8 changed files with 211 additions and 126 deletions.
1 change: 1 addition & 0 deletions .github/workflows/build-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
push:
branches:
- 'main'
- 'dev'
release:
types: [published]

Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ RUN git clone https://github.com/togethercomputer/Dragonfly --single-branch /app

COPY requirements.txt .
ARG VERSION=latest
RUN if [ "$VERSION" = "alt" ]; then echo "transformers==4.41.2" >> requirements.txt; else echo "transformers>=4.44.2\nautoawq>=0.2.5" >> requirements.txt ; fi
RUN if [ "$VERSION" = "alt" ]; then echo "transformers==4.41.2" >> requirements.txt; else echo "git+https://github.com/huggingface/transformers\nautoawq>=0.2.5" >> requirements.txt ; fi
# TODO: nvidia apex wheel
RUN --mount=type=cache,target=/root/.cache/pip pip install -U -r requirements.txt

Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ Can't decide which to use? See the [OpenVLM Leaderboard](https://huggingface.co/
- - [ ] [InternVL2-2B-AWQ](https://huggingface.co/OpenGVLab/InternVL2-2B-AWQ) (currently errors)
- - [X] [InternVL2-1B](https://huggingface.co/OpenGVLab/InternVL2-1B)
- - [X] [InternVL-Chat-V1-5](https://huggingface.co/OpenGVLab/InternVL-Chat-V1-5) (wont gpu split yet)
- - [X] [InternVL-Chat-V1-5-Int8](https://huggingface.co/OpenGVLab/InternVL-Chat-V1-5-Int8) (wont gpu split yet)
- - [ ] [InternVL-Chat-V1-5-AWQ](https://huggingface.co/OpenGVLab/InternVL-Chat-V1-5-AWQ) (wont gpu split yet)
- - [X] [Mini-InternVL-Chat-4B-V1-5](https://huggingface.co/OpenGVLab/Mini-InternVL-Chat-4B-V1-5) (alternate docker only)
- - [X] [Mini-InternVL-Chat-2B-V1-5](https://huggingface.co/OpenGVLab/Mini-InternVL-Chat-2B-V1-5)
- [X] [Salesforce](https://huggingface.co/Salesforce)
Expand Down Expand Up @@ -133,6 +133,11 @@ If you can't find your favorite model, you can [open a new issue](https://github

## Recent updates

Version 0.31.0

- new model support: Qwen/Qwen2-VL family of models (video untested, GPTQ not working yet, but AWQ and BF16 are fine)
- transformers from git
- Regression: THUD/glm-4v-9b broken in this release

Version 0.30.0

Expand Down
83 changes: 83 additions & 0 deletions backend/qwen2-vl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info

import os
from vision_qna import *

# Qwen/Qwen2-VL-2B-Instruct-AWQ
# Qwen/Qwen2-VL-2B-Instruct
# Qwen/Qwen2-VL-7B-Instruct-AWQ
# Qwen/Qwen2-VL-7B-Instruct
# X Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4
# X Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8
# X Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4
# X Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8

class VisionQnA(VisionQnABase):
model_name: str = "qwen2-vl"
format: 'chatml'
vision_layers: List[str] = ['visual']

def __init__(self, model_id: str, device: str, device_map: str = 'auto', extra_params = {}, format = None):
super().__init__(model_id, device, device_map, extra_params, format)

self.processor = AutoProcessor.from_pretrained(model_id)

del self.params['trust_remote_code']
self.model = Qwen2VLForConditionalGeneration.from_pretrained(**self.params).eval()

self.loaded_banner()

async def stream_chat_with_images(self, request: ImageChatRequest) -> AsyncGenerator[str, None]:
# image_tag = '<|vision_start|><|image_pad|><|vision_end|>'

messages = []

for m in request.messages:
if m.role == 'user':
msg = { 'role': m.role, 'content': [] }
for c in m.content:
if c.type == 'image_url':
# hack around https://github.com/QwenLM/Qwen2-VL/issues/202'
if c.image_url.url.startswith('data:image'):
parts = c.image_url.url.split(';')
if parts[1].startswith('charset='):
c.image_url.url = parts[0] + ';' + parts[2]

msg['content'].extend([{'type': c.type, 'image': c.image_url.url}])
elif c.type == 'text':
msg['content'].extend([{'type': c.type, 'text': c.text}])
elif c.type == 'video': # not likely to work.
msg['content'].extend([{'type': c.type, 'video': c.image_url.url}])
else:
#msg = { 'role': m.role, 'content': [{ 'type': 'text', 'text': c.text }] }
msg = { 'role': m.role, 'content': c.text }

messages.extend([msg])

text = self.processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

image_inputs, video_inputs = process_vision_info(messages)
inputs = self.processor(
text=[text],
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pt",
)
inputs = inputs.to(self.device)

params = self.get_generation_params(request, default_params={})

generation_kwargs = dict(
**inputs,
**params,
)

for new_text in threaded_streaming_generator(generate=self.model.generate, tokenizer=self.processor.tokenizer, generation_kwargs=generation_kwargs):
end = new_text.find(self.processor.tokenizer.eos_token)
if end == -1:
yield new_text
else:
yield new_text[:end]
break
14 changes: 4 additions & 10 deletions model_conf_tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,10 @@
["OpenGVLab/InternVL-Chat-V1-5", "--device-map", "cuda:0", "--max-tiles", "40", "--load-in-4bit"],
["OpenGVLab/InternVL-Chat-V1-5", "--device-map", "cuda:0", "--max-tiles", "40"],
["OpenGVLab/InternVL-Chat-V1-5", "--device-map", "cuda:0"],
["OpenGVLab/InternVL-Chat-V1-5-Int8", "--device-map", "cuda:0"],
["OpenGVLab/InternVL2-1B", "--device-map", "cuda:0", "--load-in-4bit"],
["OpenGVLab/InternVL2-1B", "--device-map", "cuda:0"],
["OpenGVLab/InternVL2-2B", "--device-map", "cuda:0", "--load-in-4bit"],
["OpenGVLab/InternVL2-2B", "--device-map", "cuda:0"],
["OpenGVLab/InternVL2-4B", "--device-map", "cuda:0", "--load-in-4bit"],
["OpenGVLab/InternVL2-4B", "--device-map", "cuda:0"],
["OpenGVLab/InternVL2-8B", "--device-map", "cuda:0", "--load-in-4bit"],
["OpenGVLab/InternVL2-8B", "--device-map", "cuda:0"],
["OpenGVLab/InternVL2-26B", "--device-map", "cuda:0", "--load-in-4bit"],
Expand All @@ -42,11 +39,12 @@
["OpenGVLab/Mini-InternVL-Chat-2B-V1-5", "--max-tiles", "40", "--load-in-4bit"],
["OpenGVLab/Mini-InternVL-Chat-2B-V1-5", "--max-tiles", "40"],
["OpenGVLab/Mini-InternVL-Chat-2B-V1-5"],
["OpenGVLab/Mini-InternVL-Chat-4B-V1-5", "--max-tiles", "40", "--load-in-4bit"],
["OpenGVLab/Mini-InternVL-Chat-4B-V1-5", "--load-in-4bit"],
["OpenGVLab/Mini-InternVL-Chat-4B-V1-5"],
["Qwen/Qwen-VL-Chat", "--load-in-4bit"],
["Qwen/Qwen-VL-Chat"],
["Qwen/Qwen2-VL-2B-Instruct-AWQ", "-A", "flash_attention_2"],
["Qwen/Qwen2-VL-2B-Instruct", "-A", "flash_attention_2"],
["Qwen/Qwen2-VL-7B-Instruct-AWQ", "-A", "flash_attention_2"],
["Qwen/Qwen2-VL-7B-Instruct", "-A", "flash_attention_2"],
["Salesforce/xgen-mm-phi3-mini-instruct-dpo-r-v1.5"],
["Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5"],
["Salesforce/xgen-mm-phi3-mini-instruct-singleimg-r-v1.5"],
Expand All @@ -69,14 +67,10 @@
["fancyfeast/joy-caption-pre-alpha", "-A", "flash_attention_2"],
["internlm/internlm-xcomposer2d5-7b", "-A", "flash_attention_2", "--device-map", "cuda:0", "--load-in-4bit"],
["internlm/internlm-xcomposer2d5-7b", "-A", "flash_attention_2", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-4khd-7b", "-A", "flash_attention_2", "--device-map", "cuda:0", "--load-in-4bit"],
["internlm/internlm-xcomposer2-4khd-7b", "-A", "flash_attention_2", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-7b", "-A", "flash_attention_2", "--device-map", "cuda:0", "--load-in-4bit"],
["internlm/internlm-xcomposer2-7b", "-A", "flash_attention_2", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-7b-4bit", "-A", "flash_attention_2"],
["internlm/internlm-xcomposer2-vl-1_8b", "-A", "flash_attention_2", "--device-map", "cuda:0", "--load-in-4bit"],
["internlm/internlm-xcomposer2-vl-1_8b", "-A", "flash_attention_2", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-vl-7b", "-A", "flash_attention_2", "--device-map", "cuda:0", "--load-in-4bit"],
["internlm/internlm-xcomposer2-vl-7b", "-A", "flash_attention_2", "--device-map", "cuda:0"],
["internlm/internlm-xcomposer2-vl-7b-4bit", "-A", "flash_attention_2"],
["llava-hf/llava-1.5-13b-hf", "-A", "flash_attention_2", "--device-map", "cuda:0", "--load-in-4bit"],
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ matplotlib
optimum
tiktoken
transformers_stream_generator
qwen-vl-utils

# video
decord
Expand Down
Loading

0 comments on commit e438d38

Please sign in to comment.