Skip to content

Commit

Permalink
[Misc] Add example for LLaVA-NeXT (#5879)
Browse files Browse the repository at this point in the history
  • Loading branch information
ywang96 authored Jun 27, 2024
1 parent 294104c commit b9e8425
Showing 1 changed file with 38 additions and 0 deletions.
38 changes: 38 additions & 0 deletions examples/llava_next_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from io import BytesIO

import requests
from PIL import Image

from vllm import LLM, SamplingParams
from vllm.multimodal.image import ImagePixelData

# Dynamic image input is currently not supported and therefore
# a fixed image input shape and its corresponding feature size is required.
# See https://github.com/vllm-project/vllm/pull/4199 for the complete
# configuration matrix.

llm = LLM(
model="llava-hf/llava-v1.6-mistral-7b-hf",
image_input_type="pixel_values",
image_token_id=32000,
image_input_shape="1,3,336,336",
image_feature_size=1176,
)

prompt = "[INST] " + "<image>" * 1176 + "\nWhat is shown in this image? [/INST]"
url = "https://h2o-release.s3.amazonaws.com/h2ogpt/bigben.jpg"
image = Image.open(BytesIO(requests.get(url).content))
sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=100)

outputs = llm.generate(
{
"prompt": prompt,
"multi_modal_data": ImagePixelData(image),
},
sampling_params=sampling_params)

generated_text = ""
for o in outputs:
generated_text += o.outputs[0].text

print(f"LLM output:{generated_text}")

0 comments on commit b9e8425

Please sign in to comment.