Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
Signed-off-by: DarkLight1337 <[email protected]>
  • Loading branch information
DarkLight1337 committed Dec 19, 2024
1 parent c3f1bde commit 32e5197
Showing 1 changed file with 17 additions and 11 deletions.
28 changes: 17 additions & 11 deletions vllm/multimodal/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,8 +599,8 @@ def __init__(self, capacity: int) -> None:
# DEBUG: Set to None to disable
self.debug_cache_hit_ratio_steps: Optional[int] = None

self._text_cache = LRUCache[str, BatchFeature](capacity)
self._mm_cache = LRUCache[str, BatchFeature](capacity)
self._fine_text_cache = LRUCache[str, BatchFeature](capacity)
self._fine_mm_cache = LRUCache[str, BatchFeature](capacity)
self._coarse_cache = LRUCache[str, BatchFeature](capacity)

def maybe_log_cache_stats(self, cache: LRUCache, name: str) -> None:
Expand Down Expand Up @@ -674,9 +674,9 @@ def _cached_call_fine(
for idx in range(num_items):
mm_item = {k: [v[idx]] for k, v in mm_data.items()}

self.maybe_log_cache_stats(self._mm_cache, "mm_cache")
self.maybe_log_cache_stats(self._fine_mm_cache, "fine_mm_cache")

processed_mm_item = self._mm_cache.get_or_put(
processed_mm_item = self._fine_mm_cache.get_or_put(
self._hash_kwargs(**mm_item, **mm_kwargs),
default_factory=partial(
ctx.call_hf_processor,
Expand All @@ -687,15 +687,16 @@ def _cached_call_fine(
)

for k, v in processed_mm_item.items():
# Remove the extra batch dimension
processed_mm_items[k].append(v[0])

# NOTE: Some processors do not accept mm-only input, in which case
# we have to fallback to processing `prompt` and `mm_data` together
# Therefore, we place the text processing last to avoid redundant
# computation
self.maybe_log_cache_stats(self._text_cache, "text_cache")
# NOTE: Some processors (e.g. llava) do not accept mm-only input,
# in which case we have to fallback to processing `prompt` and `mm_data`
# together. Therefore, we place the text processing last to avoid
# redundant computation
self.maybe_log_cache_stats(self._fine_text_cache, "fine_text_cache")

processed_text = self._text_cache.get_or_put(
processed_text = self._fine_text_cache.get_or_put(
prompt,
default_factory=partial(
ctx.call_hf_processor,
Expand Down Expand Up @@ -751,7 +752,12 @@ def call_hf_processor(
mm_kwargs,
)
except Exception:
pass # See NOTE in `_cached_call_fine`
# Failures are expected; see NOTE in `_cached_call_fine`
logger.debug(
"Failed to apply processor on each item separately",
stack_info=True,
)
pass

return self._cached_call_coarse(
ctx,
Expand Down

0 comments on commit 32e5197

Please sign in to comment.