From bc1a2bdf98535a4197e865b8182fb10612091302 Mon Sep 17 00:00:00 2001 From: 50h100a Date: Mon, 16 Dec 2024 19:47:42 +0000 Subject: [PATCH] do not use cached chunks for prompt_logprobs --- aphrodite/processing/scheduler.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/aphrodite/processing/scheduler.py b/aphrodite/processing/scheduler.py index 9fecc6ebc..1452cdc75 100644 --- a/aphrodite/processing/scheduler.py +++ b/aphrodite/processing/scheduler.py @@ -1046,9 +1046,6 @@ def schedule(self) -> Tuple[List[SequenceGroupMetadata], SchedulerOutputs]: scheduler_outputs = self._schedule() now = time.time() - if not self.cache_config.enable_prefix_caching: - common_computed_block_nums = [] - # Create input data structures. seq_group_metadata_list: List[SequenceGroupMetadata] = [] for i, scheduled_seq_group in enumerate( @@ -1079,10 +1076,14 @@ def schedule(self) -> Tuple[List[SequenceGroupMetadata], SchedulerOutputs]: block_tables[seq_id] = self.block_manager.get_block_table(seq) self.block_manager.access_all_blocks_in_seq(seq, now) - if self.cache_config.enable_prefix_caching: + + if (self.cache_config.enable_prefix_caching and + not seq_group.sampling_params.prompt_logprobs): common_computed_block_nums = ( self.block_manager.get_common_computed_block_ids( seq_group.get_seqs(status=SequenceStatus.RUNNING))) + else: + common_computed_block_nums = [] do_sample = True is_prompt = seq_group.is_prefill()