Skip to content

Commit

Permalink
Merge pull request #9 from matatonic/dev
Browse files Browse the repository at this point in the history
0.28.0
  • Loading branch information
matatonic authored Jul 17, 2024
2 parents b424f49 + 0961219 commit d06f0e2
Show file tree
Hide file tree
Showing 9 changed files with 265 additions and 136 deletions.
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview`
- - [X] [cogagent-chat-hf](https://huggingface.co/THUDM/cogagent-chat-hf)
- - [X] [glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b) (wont gpu split)
- [X] [InternLM](https://huggingface.co/internlm/)
- - [X] [XComposer2-2d5-7b](https://huggingface.co/internlm/internlm-xcomposer2d5-7b) (wont gpu split)
- - [X] [XComposer2-4KHD-7b](https://huggingface.co/internlm/internlm-xcomposer2-4khd-7b) (wont gpu split)
- - [X] [XComposer2-7b](https://huggingface.co/internlm/internlm-xcomposer2-7b) [finetune] (wont gpu split)
- - [X] [XComposer2-7b-4bit](https://huggingface.co/internlm/internlm-xcomposer2-7b-4bit) (not recommended)
Expand Down Expand Up @@ -100,7 +101,9 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview`
- - [X] [MGM-13B-HD](https://huggingface.co/YanweiLi/MGM-13B-HD) (alternate docker only)
- - [X] [MGM-34B-HD](https://huggingface.co/YanweiLi/MGM-34B-HD) (alternate docker only)
- - [X] [MGM-8x7B-HD](https://huggingface.co/YanweiLi/MGM-8x7B-HD) (alternate docker only)
- [X] [cognitivecomputations/dolphin-vision-72b](https://huggingface.co/cognitivecomputations/dolphin-vision-72b)
- [X] [cognitivecomputations]
- - [X] [dolphin-vision-72b](https://huggingface.co/cognitivecomputations/dolphin-vision-72b)
- - [X] [dolphin-vision-7b](https://huggingface.co/cognitivecomputations/dolphin-vision-7b)
- [X] [qnguyen3]
- - [X] [nanoLLaVA](https://huggingface.co/qnguyen3/nanoLLaVA) (wont gpu split)
- - [X] [nanoLLaVA-1.5](https://huggingface.co/qnguyen3/nanoLLaVA-1.5) (wont gpu split)
Expand All @@ -116,6 +119,12 @@ See: [OpenVLM Leaderboard](https://huggingface.co/spaces/opencompass/open_vlm_le

## Recent updates

Version 0.28.0

- new model support: internlm-xcomposer2d5-7b
- new model support: dolphin-vision-7b (currently KeyError: 'bunny-qwen')
- Pin glm-v-9B revision until we support transformers 4.42

Version 0.27.1

- new model support: qnguyen3/nanoLLaVA-1.5
Expand Down
1 change: 1 addition & 0 deletions backend/dv-qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
warnings.filterwarnings('ignore')

# cognitivecomputations/dolphin-vision-72b
# cognitivecomputations/dolphin-vision-7b

class VisionQnA(VisionQnABase):
model_name: str = "dolphin-vision"
Expand Down
1 change: 1 addition & 0 deletions backend/glm-4v.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# THUDM/glm-4v-9b

class VisionQnA(VisionQnABase):
revision: str = "ade85af5ed77b437edf3cf4d941116026159a618" # until transformers 4.42 support
model_name: str = "glm-4v"
format: str = 'glm-4v'
vision_layers: List[str] = ['vision']
Expand Down
2 changes: 1 addition & 1 deletion backend/minicpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from vision_qna import *

# openbmb/MiniCPM-Llama3-V-2_5
# openbmb/MiniCPM-V-2
# openbmb/MiniCPM-V-2 - maybe broken after revision: str = "187851962daa9b63072d40ec802f597b71bff532"
# openbmb/MiniCPM-V aka OmniLMM-3B

class VisionQnA(VisionQnABase):
Expand Down
88 changes: 88 additions & 0 deletions backend/xcomposer2d5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import os
import warnings
import torch
from transformers import AutoTokenizer, AutoModel, logging

from vision_qna import *

logging.set_verbosity_error()
warnings.filterwarnings('ignore')

# internlm/internlm-xcomposer2d5
MAX_TILES = 24

class VisionQnA(VisionQnABase):
model_name: str = "internlm-xcomposer2d5"
format: str = "internal"
vision_layers: List[str] = ['vit', 'vision_proj']

def __init__(self, model_id: str, device: str, device_map: str = 'auto', extra_params = {}, format = None):
super().__init__(model_id, device, device_map, extra_params, format)

torch.set_grad_enabled(False)

self.max_tiles = extra_params.get('max_tiles', MAX_TILES)

self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=self.params.get('trust_remote_code', False))
self.model = AutoModel.from_pretrained(**self.params).eval()
self.model.tokenizer = self.tokenizer

# bitsandbytes already moves the model to the device, so we don't need to do it again.
if not (extra_params.get('load_in_4bit', False) or extra_params.get('load_in_8bit', False)):
self.model = self.model.to(self.device)

self.eos_token = '[UNUSED_TOKEN_145]'
self.eos_token_id = self.tokenizer.convert_tokens_to_ids([self.eos_token])[0]

self.loaded_banner()


async def stream_chat_with_images(self, request: ImageChatRequest) -> AsyncGenerator[str, None]:
prompt, history, files, meta_instruction = await prompt_history_images_system_from_messages(request.messages, img_tok='<ImageHere>', url_handler=url_to_file)

with torch.autocast(device_type='cuda', dtype=torch.float16):
inputs, im_mask, _ = self.model.interleav_wrap_chat(prompt, files, history=history, meta_instruction=meta_instruction, hd_num=self.max_tiles)

inputs = {
k: v.to(self.device)
for k, v in inputs.items() if torch.is_tensor(v)
}
inputs['im_mask'] = im_mask

default_params = {
#'num_beams': 3,
#'do_sample': False,
"temperature": 1.0,
"top_p": 0.8,
'do_sample': True,
'repetition_penalty': 1.005,
'eos_token_id': [ self.tokenizer.eos_token_id, self.eos_token_id ], # also add end-of-assistant token in eos token id to avoid unnecessary generation
}
params = self.get_generation_params(request, default_params)

generation_kwargs = dict(
**inputs,
**params,
)

try:
def wrapper(**kwargs):
with torch.autocast(device_type='cuda', dtype=torch.float16):
_ = self.model.generate(**kwargs)

for new_text in threaded_streaming_generator(generate=wrapper, tokenizer=self.tokenizer, generation_kwargs=generation_kwargs):
end = new_text.find(self.eos_token)
if end == -1:
yield new_text
else:
yield new_text[:end]
break

except Exception as e:
logger.error(e)
# raise

finally:
for f in files:
os.remove(f)

2 changes: 2 additions & 0 deletions model_conf_tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@
["adept/fuyu-8b", "--device-map", "cuda:0", "--load-in-4bit"],
["adept/fuyu-8b", "--device-map", "cuda:0"],
["cognitivecomputations/dolphin-vision-72b", "--use-flash-attn", "--load-in-4bit"],
["cognitivecomputations/dolphin-vision-7b", "--use-flash-attn", "--load-in-4bit"],
["cognitivecomputations/dolphin-vision-7b", "--use-flash-attn"],
["echo840/Monkey", "--load-in-4bit"],
["echo840/Monkey"],
["echo840/Monkey-Chat", "--load-in-4bit"],
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ optimum
tiktoken
transformers_stream_generator

# video
decord

# 360vl
logger

Expand Down
Loading

0 comments on commit d06f0e2

Please sign in to comment.