From e1576c648504dd601b25abf7d108898825bd1e07 Mon Sep 17 00:00:00 2001 From: matatonic Date: Fri, 13 Sep 2024 20:02:03 -0400 Subject: [PATCH] 0.31.1 minicpm v2.6-int4 fix, container rename --- README.md | 5 +++++ backend/minicpm-v-2_6.py | 6 ++++-- docker-compose.alt.yml | 7 ++----- docker-compose.yml | 7 ++----- model_conf_tests.json | 1 + vision.sample.env | 1 + 6 files changed, 15 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 7314437..2955b32 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,7 @@ Can't decide which to use? See the [OpenVLM Leaderboard](https://huggingface.co/ - - [X] [Pixtral-12B](https://huggingface.co/mistralai/Pixtral-12B-2409) - [X] [openbmb](https://huggingface.co/openbmb) - - [X] [MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6) (video not supported yet) +- - [X] [MiniCPM-V-2_6-int4](https://huggingface.co/openbmb/MiniCPM-V-2_6-int4) - - [X] [MiniCPM-Llama3-V-2_5](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5) - - [X] [MiniCPM-V-2](https://huggingface.co/openbmb/MiniCPM-V-2) (alternate docker only) - - [X] [MiniCPM-V aka. OmniLMM-3B](https://huggingface.co/openbmb/MiniCPM-V) (alternate docker only) @@ -138,6 +139,10 @@ If you can't find your favorite model, you can [open a new issue](https://github ## Recent updates +Version 0.31.1 + +- Fix support for openbmb/MiniCPM-V-2_6-int4 + Version 0.31.0 - new model support: Qwen/Qwen2-VL family of models (video untested, GPTQ not working yet, but AWQ and BF16 are fine) diff --git a/backend/minicpm-v-2_6.py b/backend/minicpm-v-2_6.py index e2e63c5..a1a7972 100644 --- a/backend/minicpm-v-2_6.py +++ b/backend/minicpm-v-2_6.py @@ -5,6 +5,7 @@ from decord import VideoReader, cpu # openbmb/MiniCPM-V-2_6 +# openbmb/MiniCPM-V-2_6-int4 MAX_NUM_FRAMES=64 # if cuda OOM set a smaller number @@ -36,8 +37,9 @@ def __init__(self, model_id: str, device: str, device_map: str = 'auto', extra_p self.model = AutoModel.from_pretrained(**self.params).eval() # bitsandbytes already moves the model to the device, so we don't need to do it again. - if not (extra_params.get('load_in_4bit', False) or extra_params.get('load_in_8bit', False)): - self.model = self.model.to(dtype=self.params['torch_dtype'], device=self.device) + if '-int4' not in model_id: + if not (extra_params.get('load_in_4bit', False) or extra_params.get('load_in_8bit', False)): + self.model = self.model.to(dtype=self.params['torch_dtype'], device=self.device) self.loaded_banner() diff --git a/docker-compose.alt.yml b/docker-compose.alt.yml index 0b337e5..689cfd5 100644 --- a/docker-compose.alt.yml +++ b/docker-compose.alt.yml @@ -1,17 +1,14 @@ services: - server: + openedai-vision-alt: build: args: - VERSION=alt dockerfile: Dockerfile - tty: true + container_name: openedai-vision-alt image: ghcr.io/matatonic/openedai-vision-alt env_file: vision-alt.env # your settings go here volumes: - ./hf_home:/app/hf_home # for Hugginface model cache - # be sure to review and run prepare_minigemini.sh before starting a mini-gemini model - - ./model_zoo:/app/model_zoo # for MiniGemini - - ./YanweiLi:/app/YanweiLi # for MiniGemini - ./model_conf_tests.alt.json:/app/model_conf_tests.json ports: - 5006:5006 diff --git a/docker-compose.yml b/docker-compose.yml index 073f62d..1e47538 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,17 +1,14 @@ services: - server: + openedai-vision: build: args: - VERSION=latest dockerfile: Dockerfile - tty: true + container_name: openedai-vision image: ghcr.io/matatonic/openedai-vision env_file: vision.env # your settings go here volumes: - ./hf_home:/app/hf_home # for Hugginface model cache - # be sure to review and run prepare_minigemini.sh before starting a mini-gemini model - - ./model_zoo:/app/model_zoo # for MiniGemini - - ./YanweiLi:/app/YanweiLi # for MiniGemini - ./model_conf_tests.json:/app/model_conf_tests.json ports: - 5006:5006 diff --git a/model_conf_tests.json b/model_conf_tests.json index c6c1e41..ee5e563 100644 --- a/model_conf_tests.json +++ b/model_conf_tests.json @@ -94,6 +94,7 @@ ["microsoft/Phi-3.5-vision-instruct", "-A", "flash_attention_2", "--load-in-4bit"], ["microsoft/Phi-3.5-vision-instruct", "-A", "flash_attention_2"], ["mistralai/Pixtral-12B-2409"], + ["openbmb/MiniCPM-V-2_6-int4", "-A", "flash_attention_2", "--device-map", "cuda:0"], ["openbmb/MiniCPM-V-2_6", "-A", "flash_attention_2", "--device-map", "cuda:0", "--load-in-4bit"], ["openbmb/MiniCPM-V-2_6", "-A", "flash_attention_2", "--device-map", "cuda:0"], ["openbmb/MiniCPM-Llama3-V-2_5", "-A", "flash_attention_2", "--device-map", "cuda:0", "--load-in-4bit"], diff --git a/vision.sample.env b/vision.sample.env index 89ab412..c145e57 100644 --- a/vision.sample.env +++ b/vision.sample.env @@ -98,6 +98,7 @@ HF_HUB_ENABLE_HF_TRANSFER=1 #CLI_COMMAND="python vision.py -m microsoft/Phi-3.5-vision-instruct -A flash_attention_2 --load-in-4bit" # test pass✅, time: 8.5s, mem: 4.4GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m microsoft/Phi-3.5-vision-instruct -A flash_attention_2" # test pass✅, time: 7.5s, mem: 9.3GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m mistralai/Pixtral-12B-2409" # test pass✅, time: 16.3s, mem: 35.6GB, 13/13 tests passed. +#CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2_6-int4 -A flash_attention_2 --device-map cuda:0" # test pass✅, time: 15.5s, mem: 9.5GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2_6 -A flash_attention_2 --device-map cuda:0 --load-in-4bit" # test pass✅, time: 13.6s, mem: 9.4GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m openbmb/MiniCPM-V-2_6 -A flash_attention_2 --device-map cuda:0" # test pass✅, time: 12.1s, mem: 18.8GB, 13/13 tests passed. #CLI_COMMAND="python vision.py -m openbmb/MiniCPM-Llama3-V-2_5 -A flash_attention_2 --device-map cuda:0 --load-in-4bit" # test pass✅, time: 22.8s, mem: 9.0GB, 13/13 tests passed.