Skip to content

Commit

Permalink
0.28.1
Browse files Browse the repository at this point in the history
- Update moondream2 support to 2024-07-23
- Pin openbmb/MiniCPM-Llama3-V-2_5 revision
  • Loading branch information
matatonic committed Jul 24, 2024
1 parent d06f0e2 commit 2682aec
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 133 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ See: [OpenVLM Leaderboard](https://huggingface.co/spaces/opencompass/open_vlm_le

## Recent updates

Version 0.28.1

- Update moondream2 support to 2024-07-23
- Pin openbmb/MiniCPM-Llama3-V-2_5 revision

Version 0.28.0

- new model support: internlm-xcomposer2d5-7b
Expand Down
10 changes: 7 additions & 3 deletions backend/minicpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from vision_qna import *

# openbmb/MiniCPM-Llama3-V-2_5
# openbmb/MiniCPM-V-2 - maybe broken after revision: str = "187851962daa9b63072d40ec802f597b71bff532"
# openbmb/MiniCPM-Llama3-V-2_5 # broken after 45387f99a455e11801b78a0b24811856688e0c8b
# openbmb/MiniCPM-V-2 - 4bit broken
# openbmb/MiniCPM-V aka OmniLMM-3B

class VisionQnA(VisionQnABase):
Expand All @@ -14,6 +14,10 @@ class VisionQnA(VisionQnABase):
def __init__(self, model_id: str, device: str, device_map: str = 'auto', extra_params = {}, format = None):
super().__init__(model_id, device, device_map, extra_params, format)

# I wish there was a better way to do this...
if model_id == 'openbmb/MiniCPM-Llama3-V-2_5':
self.revision = '45387f99a455e11801b78a0b24811856688e0c8b'

self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=self.params.get('trust_remote_code', False))
self.model = AutoModel.from_pretrained(**self.params).eval()

Expand Down Expand Up @@ -41,7 +45,7 @@ async def stream_chat_with_images(self, request: ImageChatRequest) -> AsyncGener
msgs.extend([{ 'role': m.role, 'content': c.text }])

if image is None:
image = await url_to_image(transparent_pixel_url)
image = await url_to_image(black_pixel_url)

# default uses num_beams: 3, but if streaming/sampling is requested, switch the defaults.
default_sampling_params = {
Expand Down
2 changes: 1 addition & 1 deletion backend/moondream2.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

class VisionQnA(VisionQnABase):
model_name: str = "moondream2"
revision: str = '2024-05-20' # 'main'
revision: str = '2024-07-23' # 'main'
format: str = 'phi15'
vision_layers: List[str] = ["vision_encoder"]

Expand Down
8 changes: 4 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ bitsandbytes
fastapi
# See: https://github.com/bdashore3/flash-attention/releases for other windows flash_attn releases
# And: https://github.com/Dao-AILab/flash-attention/releases for linux.
https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/bdashore3/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/bdashore3/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.2/flash_attn-2.6.2+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.2/flash_attn-2.6.2+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/bdashore3/flash-attention/releases/download/v2.6.2/flash_attn-2.6.1+cu123torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/bdashore3/flash-attention/releases/download/v2.6.2/flash_attn-2.6.1+cu123torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
flash_attn; python_version != "3.10" and python_version != "3.11"
hf_transfer
loguru
Expand Down
Loading

0 comments on commit 2682aec

Please sign in to comment.