Skip to content

Commit

Permalink
glm-4v add input process
Browse files Browse the repository at this point in the history
  • Loading branch information
sixsixcoder committed Oct 9, 2024
1 parent bcbf4f1 commit 5054c7f
Show file tree
Hide file tree
Showing 3 changed files with 268 additions and 31 deletions.
21 changes: 21 additions & 0 deletions examples/offline_inference_vision_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
from vllm.assets.image import ImageAsset
from vllm.assets.video import VideoAsset
from vllm.utils import FlexibleArgumentParser
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '3'


# LLaVA-1.5
Expand Down Expand Up @@ -196,6 +199,23 @@ def run_qwen2_vl(question):
return llm, prompt, stop_token_ids


def run_glm4v(question):
model_name = "THUDM/glm-4v-9b"
model_name = "/workspace/siaowei/model/glm-4v-9b"

llm = LLM(
model=model_name,
tensor_parallel_size=1,
max_model_len=8192,
trust_remote_code=True,
# gpu_memory_utilization=0.5,
enforce_eager=True)
# prompt = f"[gMASK]<sop><|user|>{question}<|begin_of_image|><|endoftext|><|end_of_image|><|assistant|>"
prompt = question
stop_token_ids = [151329, 151336, 151338]
return llm, prompt, stop_token_ids


model_example_map = {
"llava": run_llava,
"llava-next": run_llava_next,
Expand All @@ -209,6 +229,7 @@ def run_qwen2_vl(question):
"internvl_chat": run_internvl,
"qwen_vl": run_qwen_vl,
"qwen2_vl": run_qwen2_vl,
"glm4v": run_glm4v,
}


Expand Down
121 changes: 121 additions & 0 deletions tests/models/decoder_only/vision_language/test_glm4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# tests/models/decoder_only/vision_language/test_glm4v.py
from typing import List, Optional, Tuple, Type, Dict

import pytest
import os
from vllm.sequence import SampleLogprobs
from vllm.multimodal.utils import rescale_image_size
from vllm.utils import is_cpu
from transformers import AutoConfig, AutoTokenizer


from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner)
from ...utils import check_logprobs_close

HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
"What's the content of the image?",
"cherry_blossom":
"What is the season?",
})
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

models = ["/workspace/siaowei/model/glm-4v-9b"]

target_dtype = "bfloat16"
if is_cpu():
target_dtype = "bfloat16"

def run_test(
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
inputs: List[Tuple[List[str], PromptImageInput]],
model: str,
*,
dtype: str,
max_tokens: int,
num_logprobs: int,
mm_limit: int,
tensor_parallel_size: int,
distributed_executor_backend: Optional[str] = None,
):
# max_model_len should be greater than image_feature_size
with vllm_runner(model,
max_model_len=4096,
max_num_seqs=1,
dtype=dtype,
limit_mm_per_prompt={"image": mm_limit},
tensor_parallel_size=tensor_parallel_size,
distributed_executor_backend=distributed_executor_backend,
# gpu_memory_utilization=0.9,
enforce_eager=True) as vllm_model:
# tokenizer = vllm_model.model.get_tokenizer()
# pass
stop_token_ids = [151329, 151336, 151338]
vllm_outputs_per_image = [
vllm_model.generate_greedy_logprobs(prompts,
max_tokens,
num_logprobs=num_logprobs,
images=images,
stop_token_ids=stop_token_ids)
for prompts, images in inputs
]
with hf_runner(model, dtype=dtype) as hf_model:
eos_token_id = hf_model.tokenizer.eos_token_id
hf_outputs_per_image = [
hf_model.generate_greedy_logprobs_limit(prompts,
max_tokens,
num_logprobs=num_logprobs,
images=images,
eos_token_id=eos_token_id
# tokenizer=tokenizer
)
for prompts, images in inputs
]

for hf_outputs, vllm_outputs in zip(hf_outputs_per_image,
vllm_outputs_per_image):

check_logprobs_close(
outputs_0_lst=hf_outputs,
outputs_1_lst=vllm_outputs,
name_0="hf",
name_1="vllm",
)

@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize(
"size_factors",
[
# No image
[],
# Single-scale
# [1.0],
# Single-scale, batched
# [1.0, 1.0, 1.0],
# Multi-scale
# [0.25, 0.5, 1.0],
],
)
@pytest.mark.parametrize("dtype", [target_dtype])
@pytest.mark.parametrize("max_tokens", [128])
@pytest.mark.parametrize("num_logprobs", [5])
def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
dtype: str, max_tokens: int, num_logprobs: int) -> None:
images = [asset.pil_image for asset in image_assets]

inputs_per_image = [(
[prompt for _ in size_factors],
[rescale_image_size(image, factor) for factor in size_factors],
) for image, prompt in zip(images, HF_IMAGE_PROMPTS)]
run_test(
hf_runner,
vllm_runner,
inputs_per_image,
model,
dtype=dtype,
max_tokens=max_tokens,
num_logprobs=num_logprobs,
mm_limit=1,
tensor_parallel_size=1,
)
Loading

0 comments on commit 5054c7f

Please sign in to comment.