Skip to content

Commit

Permalink
0.7.0
Browse files Browse the repository at this point in the history
  • Loading branch information
matatonic committed Apr 8, 2024
1 parent 6a24f4f commit c203ab3
Show file tree
Hide file tree
Showing 18 changed files with 322 additions and 117 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#
hf_home/
model_zoo/
YanweiLi/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
19 changes: 14 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
FROM python:3.11-slim

RUN apt-get update && apt-get install -y git
RUN pip install --no-cache-dir --upgrade pip

RUN mkdir -p /app
WORKDIR /app

RUN pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
RUN pip install --no-cache-dir -U https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.6/flash_attn-2.5.6+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install --no-cache-dir -U -r requirements.txt

RUN git clone https://github.com/dvlab-research/MiniGemini.git --single-branch /app/MiniGemini

WORKDIR /app/MiniGemini
RUN pip install --no-cache-dir --no-deps -e .

WORKDIR /app
COPY requirements.*.txt .
RUN for r in requirements.*.txt ; do pip install --no-cache-dir -r $r; done
RUN for r in requirements.*.txt ; do pip install -U --no-cache-dir -r $r; done

COPY *.py .
COPY backend /app/backend
CMD python vision.py

CMD python vision.py
25 changes: 25 additions & 0 deletions Dockerfile.minigemini
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
FROM python:3.11-slim

RUN mkdir -p /app

RUN apt-get update && apt-get install -y git

RUN git clone https://github.com/dvlab-research/MiniGemini.git --single-branch /app/MiniGemini
WORKDIR /app/MiniGemini
RUN pip install --no-cache-dir --upgrade pip
RUN pip install --no-cache-dir --no-deps -e .

WORKDIR /app

COPY requirements.minigemini.txt .
RUN pip install --no-cache-dir -r requirements.minigemini.txt

COPY *.py .
COPY backend /app/backend

RUN pip show torch
RUN pip show torchvision
RUN pip show transformers
RUN pip show accelerate
CMD python vision.py

89 changes: 57 additions & 32 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,53 @@ An OpenAI API compatible vision server, it functions like `gpt-4-vision-preview`
- Not affiliated with OpenAI in any way

Model support:
- [X] [InternLM-XComposer2](https://huggingface.co/internlm/internlm-xcomposer2-7b) [finetune] (multi-image chat model, lots of warnings on startup, but works fine)
- [X] [InternLM-XComposer2-VL](https://huggingface.co/internlm/internlm-xcomposer2-vl-7b) [pretrain] *(only supports a single image, also lots of warnings)
- [X] [LlavaNext](https://huggingface.co/llava-hf) - (llava-v1.6-mistral-7b-hf, llava-v1.6-34b-hf - llava-v1.6-34b-hf is not working well yet) *(only supports a single image)
- [X] [Llava](https://huggingface.co/llava-hf) - (llava-v1.5-vicuna-7b-hf, llava-v1.5-vicuna-13b-hf, llava-v1.5-bakLlava-7b-hf) *(only supports a single image)
- [X] [InternLM-XComposer2](https://huggingface.co/internlm/internlm-xcomposer2-7b) [finetune] (multi-image chat model, lots of warnings on startup, wont gpu split)
- [X] [InternLM-XComposer2-VL](https://huggingface.co/internlm/internlm-xcomposer2-vl-7b) [pretrain] *(only supports a single image, also lots of warnings, wont gpu split)
- [X] [LlavaNext](https://huggingface.co/llava-hf) *(only supports a single image)
- - [X] [llava-v1.6-34b-hf](https://huggingface.co/llava-hf/llava-v1.6-34b-hf)
- - [X] [llava-v1.6-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf)
- - [X] [llava-v1.6-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf)
- - [X] [llava-v1.6-mistral-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf)
- [X] [Llava](https://huggingface.co/llava-hf) *(only supports a single image)
- - [X] [llava-v1.5-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.5-vicuna-7b-hf)
- - [X] [llava-v1.5-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.5-vicuna-13b-hf)
- - [ ] [llava-v1.5-bakLlava-7b-hf](https://huggingface.co/llava-hf/llava-v1.5-bakLlava-7b-hf) (currently errors)
- [X] [Monkey-Chat](https://huggingface.co/echo840/Monkey-Chat)
- [X] [Monkey](https://huggingface.co/echo840/Monkey)
- [X] [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat)
- [X] Moondream2 - [vikhyatk/moondream2](https://huggingface.co/vikhyatk/moondream2) *(only supports a single image)
- [ ] Moondream1 - [vikhyatk/moondream1](https://huggingface.co/vikhyatk/moondream1)
- [ ] Deepseek-VL - [deepseek-ai/deepseek-vl-7b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-7b-chat)
- [X] [openbmb/MiniCPM-V](https://huggingface.co/openbmb/MiniCPM-V) (aka. OmniLMM-3B) *(only supports a single image)
- [ ] [openbmb/OmniLMM-12B](https://huggingface.co/openbmb/OmniLMM-12B)
- [ ] [echo840/Monkey](https://huggingface.co/echo840/Monkey)
- [ ] [YanweiLi/MiniGemini](https://huggingface.co/collections/YanweiLi/)
- [X] [Moondream2](https://huggingface.co/vikhyatk/moondream2) *(only supports a single image)
- [X] [MiniCPM-V](https://huggingface.co/openbmb/MiniCPM-V) (aka. OmniLMM-3B) *(only supports a single image)
- [X] [MiniGemini](https://huggingface.co/collections/YanweiLi/) (more complex setup, see: `prepare_minigemini.sh`)
- - [X] [MiniGemini-2B](https://huggingface.co/YanweiLi/Mini-Gemini-2B)
- - [ ] [MiniGemini-7B](https://huggingface.co/YanweiLi/Mini-Gemini-7B) (currently errors)
- - [ ] [MiniGemini-13B](https://huggingface.co/YanweiLi/Mini-Gemini-13B) (currently errors)
- - [ ] [MiniGemini-34B](https://huggingface.co/YanweiLi/Mini-Gemini-34B) (currently errors)
- - [ ] [MiniGemini-8x7B](https://huggingface.co/YanweiLi/Mini-Gemini-8x7B) (currently errors)
- - [ ] [MiniGemini-7B-HD](https://huggingface.co/YanweiLi/Mini-Gemini-7B-HD) (currently errors)
- - [ ] [MiniGemini-13B-HD](https://huggingface.co/YanweiLi/Mini-Gemini-13B-HD) (currently errors)
- - [ ] [MiniGemini-34B-HD](https://huggingface.co/YanweiLi/Mini-Gemini-34B-HD) (currently errors)
- - [ ] [MiniGemini-8x7B-HD](https://huggingface.co/YanweiLi/Mini-Gemini-8x7B-HD) (currently errors)
- [ ] [OmniLMM-12B](https://huggingface.co/openbmb/OmniLMM-12B)
- [ ] [Moondream1](https://huggingface.co/vikhyatk/moondream1)
- [ ] [Deepseek-VL-7b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-7b-chat)
- [ ] [Deepseek-VL-1.3b-chat](https://huggingface.co/deepseek-ai/deepseek-vl-1.3b-chat)
- [ ] [NousResearch/Obsidian-3B-V0.5](https://huggingface.co/NousResearch/Obsidian-3B-V0.5)
- [ ] ...


Some vision systems include their own OpenAI compatible API server. Also included are some pre-built images and docker-compose for them:
- [X] [THUDM/CogVLM](https://github.com/THUDM/CogVLM) ([cogvlm-chat-hf](https://huggingface.co/THUDM/cogvlm-chat-hf), [cogagent-chat-hf](https://huggingface.co/THUDM/cogagent-chat-hf)), `docker-compose.cogvlm.yml` **Recommended for 16GB-40GB GPU**s
- [X] [01-ai](https://huggingface.co/01-ai)/Yi-VL ([Yi-VL-6B](https://huggingface.co/01-ai/Yi-VL-6B), [Yi-VL-34B](https://huggingface.co/01-ai/Yi-VL-34B)), `docker-compose.yi-vl.yml`
Some vision systems include their own OpenAI compatible API server. Included are some pre-built images and docker-compose for them (they must be run separately):
- [X] [THUDM/CogVLM](https://github.com/THUDM/CogVLM) `docker-compose.cogvlm.yml`
- - [X] [cogvlm-chat-hf](https://huggingface.co/THUDM/cogvlm-chat-hf)
- - [X] [cogagent-chat-hf](https://huggingface.co/THUDM/cogagent-chat-hf) **Recommended for 16GB-40GB GPU**
- [X] [01-ai](https://huggingface.co/01-ai)/Yi-VL `docker-compose.yi-vl.yml`
- - [X] [Yi-VL-6B](https://huggingface.co/01-ai/Yi-VL-6B)
- - [X] [Yi-VL-34B](https://huggingface.co/01-ai/Yi-VL-34B)

Version: 0.6.1
Version: 0.7.0

Recent updates:
- new model support: MiniGemini-2B (it's still a bit complex to use, see `prepare_minigemini.sh`)
- new model support: echo840/Monkey-Chat, echo840/Monkey
- AutoGPTQ support for internlm/internlm-xcomposer2-7b-4bit, internlm/internlm-xcomposer2-vl-7b-4bit
- Automatic selection of backend, based on the model name
- Enable trust_remote_code by default
Expand All @@ -48,10 +72,23 @@ API Documentation

* [OpenAI Vision guide](https://platform.openai.com/docs/guides/vision)

Installation instructions
-------------------------

(**Docker Recommended**)
Docker support
--------------

1) Edit the docker-compose file to suit your needs.

2) You can run the server via docker like so:
```shell
docker compose up
# for CogVLM
docker compose -f docker-compose.cogvlm.yml up
# for VI-VL
docker compose -f docker-compose.yi-vl.yml up
```

Manual Installation instructions
--------------------------------

```shell
# install the python dependencies
Expand All @@ -64,6 +101,8 @@ pip install .
python vision.py --model vikhyatk/moondream2
```

For MiniGemini support the docker image is recommended. See `Dockerfile` and `requirements.minigemini.txt` for manual installation instructions.

Usage
-----

Expand Down Expand Up @@ -92,20 +131,6 @@ options:
--preload Preload model and exit. (default: False)
```

Docker support
--------------

1) Edit the docker-compose file to suit your needs.

2) You can run the server via docker like so:
```shell
docker compose up
# for CogVLM
docker compose -f docker-compose.cogvlm.yml up
# for VI-VL
docker compose -f docker-compose.yi-vl.yml up
```

Sample API Usage
----------------

Expand Down
6 changes: 4 additions & 2 deletions backend/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def __init__(self, model_id: str, device: str, extra_params = {}, format = None)
if not format:
self.format = guess_model_format(model_id)

del self.params['trust_remote_code']

self.processor = LlavaProcessor.from_pretrained(model_id)
self.model = LlavaForConditionalGeneration.from_pretrained(**self.params).eval()

Expand All @@ -29,6 +31,6 @@ async def chat_with_images(self, request: ImageChatRequest) -> str:
params = self.get_generation_params(request)

output = self.model.generate(**inputs, **params)
response = self.processor.decode(output[0], skip_special_tokens=True)
response = self.processor.decode(output[0][inputs['input_ids'].size(1):].cpu(), skip_special_tokens=True)

return answer_from_response(response, self.format)
return response
11 changes: 7 additions & 4 deletions backend/llavanext.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
from vision_qna import *

# model_id = "llava-hf/llava-v1.6-mistral-7b-hf" # llama2
# model_id = "llava-hf/llava-v1.6-34b-hf" # chatml
# model_id = "llava-hf/llava-v1.6-vicuna-13b-hf" # vicuna
# model_id = "llava-hf/llava-v1.6-vicuna-7b-hf" # vicuna
# model_id = "llava-hf/llava-v1.6-mistral-7b-hf" # llama2

class VisionQnA(VisionQnABase):
model_name: str = "llavanext"
Expand All @@ -16,7 +16,10 @@ def __init__(self, model_id: str, device: str, extra_params = {}, format = None)
if not format:
self.format = guess_model_format(model_id)

self.processor = LlavaNextProcessor.from_pretrained(model_id)
del self.params['trust_remote_code']

use_fast = 'mistral' in model_id
self.processor = LlavaNextProcessor.from_pretrained(model_id, use_fast=use_fast)
self.model = LlavaNextForConditionalGeneration.from_pretrained(**self.params).eval()

print(f"Loaded on device: {self.model.device} with dtype: {self.model.dtype}")
Expand All @@ -29,6 +32,6 @@ async def chat_with_images(self, request: ImageChatRequest) -> str:
params = self.get_generation_params(request)

output = self.model.generate(**inputs, **params)
response = self.processor.decode(output[0], skip_special_tokens=True)
response = self.processor.decode(output[0][inputs['input_ids'].size(1):].cpu(), skip_special_tokens=True)

return answer_from_response(response, self.format)
return response
88 changes: 75 additions & 13 deletions backend/minigemini.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
import re
from transformers import AutoTokenizer, AutoModelForCausalLM

from minigemini.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
from minigemini.conversation import conv_templates, SeparatorStyle
from minigemini.model.builder import load_pretrained_model
from minigemini.mm_utils import process_images
from minigemini.utils import disable_torch_init
from minigemini.mm_utils import process_images, tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria

from vision_qna import *

# YanweiLi/Mini-Gemini-2B
# YanweiLi/Mini-Gemini-7B
# YanweiLi/Mini-Gemini-7B-HD
# YanweiLi/Mini-Gemini-13B
# YanweiLi/Mini-Gemini-34B
# YanweiLi/Mini-Gemini-34B-HD
# YanweiLi/Mini-Gemini-13B-HDs
# YanweiLi/Mini-Gemini-8x7B-HD
# YanweiLi/Mini-Gemini-8x7B

class VisionQnA(VisionQnABase):
model_name: str = "minigemini"
format: str = "llama2"
Expand All @@ -16,19 +29,72 @@ def __init__(self, model_id: str, device: str, extra_params = {}, format = None)
if not format:
self.format = guess_model_format(model_id)

model_base, model_name = model_id.split('/', 1)
del self.params['low_cpu_mem_usage']
del self.params['pretrained_model_name_or_path']
del self.params['trust_remote_code']

self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model(
args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit, device=args.device)
model_id, None, model_name, **self.params)


print(f"Loaded on device: {self.model.device} with dtype: {self.model.dtype}")

async def chat_with_images(self, request: ImageChatRequest) -> str:
images, prompt = await prompt_from_messages(request.messages, self.format)
image_convert, prompt = await prompt_from_messages(request.messages, self.format)

if hasattr(self.model.config, 'image_size_aux'):
if not hasattr(self.image_processor, 'image_size_raw'):
self.image_processor.image_size_raw = self.image_processor.crop_size.copy()
self.image_processor.crop_size['height'] = self.model.config.image_size_aux
self.image_processor.crop_size['width'] = self.model.config.image_size_aux
self.image_processor.size['shortest_edge'] = self.model.config.image_size_aux

image_tensor = process_images(image_convert, self.image_processor, self.model.config)

image_grid = getattr(self.model.config, 'image_grid', 1)
if hasattr(self.model.config, 'image_size_aux'):
raw_shape = [self.image_processor.image_size_raw['height'] * image_grid,
self.image_processor.image_size_raw['width'] * image_grid]
image_tensor_aux = image_tensor
image_tensor = torch.nn.functional.interpolate(image_tensor,
size=raw_shape,
mode='bilinear',
align_corners=False)
else:
image_tensor_aux = []

if image_grid >= 2:
raw_image = image_tensor.reshape(3,
image_grid,
self.image_processor.image_size_raw['height'],
image_grid,
self.image_processor.image_size_raw['width'])
raw_image = raw_image.permute(1, 3, 0, 2, 4)
raw_image = raw_image.reshape(-1, 3,
self.image_processor.image_size_raw['height'],
self.image_processor.image_size_raw['width'])

if getattr(self.model.config, 'image_global', False):
global_image = image_tensor
if len(global_image.shape) == 3:
global_image = global_image[None]
global_image = torch.nn.functional.interpolate(global_image,
size=[self.image_processor.image_size_raw['height'],
self.image_processor.image_size_raw['width']],
mode='bilinear',
align_corners=False)
# [image_crops, image_global]
raw_image = torch.cat([raw_image, global_image], dim=0)
image_tensor = raw_image.contiguous()
image_tensor = image_tensor.unsqueeze(0)

if type(image_tensor) is list:
image_tensor = [image.to(self.model.device, dtype=torch.float16) for image in image_tensor]
image_tensor_aux = [image.to(self.model.device, dtype=torch.float16) for image in image_tensor_aux]
else:
image_tensor = image_tensor.to(self.model.device, dtype=torch.float16)
image_tensor_aux = image_tensor_aux.to(self.model.device, dtype=torch.float16)

#encoded_images = self.model.encode_image(images).to(self.device)
# square?
image_tensor = process_images(image_convert, image_processor, model.config)
image_processor(images, return_tensors='pt')['pixel_values']

input_ids = tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).to(self.model.device)

Expand All @@ -38,18 +104,14 @@ async def chat_with_images(self, request: ImageChatRequest) -> str:
output_ids = self.model.generate(
input_ids,
images=image_tensor,
images_aux=None,
images_aux=image_tensor_aux if len(image_tensor_aux)>0 else None,
bos_token_id=self.tokenizer.bos_token_id, # Begin of sequence token
eos_token_id=self.tokenizer.eos_token_id, # End of sequence token
pad_token_id=self.tokenizer.pad_token_id, # Pad token
use_cache=True,
**params,
)

answer = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()

self.

return answer


Expand Down
Loading

0 comments on commit c203ab3

Please sign in to comment.