Make the cache optional

Signed-off-by: DarkLight1337 <[email protected]>
vllm-project · Dec 20, 2024 · 02ea829 · 02ea829
1 parent 7264d4e
commit 02ea829
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 3 deletions.
diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py
@@ -777,7 +777,7 @@ def __init__(
         self,
         ctx: InputProcessingContext,
         *,
-        cache: ProcessingCache,
+        cache: Optional[ProcessingCache] = None,
     ) -> None:
         super().__init__()
 
@@ -869,6 +869,13 @@ def _call_hf_processor(
         mm_data: Mapping[str, object],
         mm_kwargs: Mapping[str, object],
     ) -> BatchFeature:
+        if self.cache is None:
+            return self.ctx.call_hf_processor(
+                self._get_hf_processor(**mm_kwargs),
+                dict(text=prompt, **mm_data),
+                mm_kwargs,
+            )
+
         return self.cache.call_hf_processor(
             self.ctx,
             self._get_hf_processor(**mm_kwargs),

diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py
@@ -32,7 +32,7 @@ def __call__(
         self,
         ctx: InputProcessingContext,
         *,
-        cache: ProcessingCache,
+        cache: Optional[ProcessingCache] = None,
     ) -> BaseMultiModalProcessor:
         ...
 
@@ -359,4 +359,7 @@ def create_processor(
         processor_factory = self._processor_factories[model_cls]
 
         ctx = InputProcessingContext(model_config, tokenizer)
-        return processor_factory(ctx, cache=self._processing_cache)
+        cache = (None if model_config.disable_mm_preprocessor_cache else
+                 self._processing_cache)
+
+        return processor_factory(ctx, cache=cache)