diff --git a/tests/v1/sample/test_sampler.py b/tests/v1/sample/test_sampler.py index 82180090a0728..d7c9178b7dca4 100644 --- a/tests/v1/sample/test_sampler.py +++ b/tests/v1/sample/test_sampler.py @@ -211,8 +211,6 @@ def test_sampler_presence_penalty(device: str, batch_size: int, # Since all tokens initially have the same logprobs, the non-penalized # tokens will appear at the beginning, while the penalized tokens # will appear at the end of the list. - print(' sampler_output.logprob_token_ids ' + - str(sampler_output.logprob_token_ids)) penalized_token_id = sampler_output.logprob_token_ids[batch_idx][ VOCAB_SIZE - 1] penalized_log_prod = sampler_output.logprobs[batch_idx][VOCAB_SIZE - 1] diff --git a/vllm/v1/sample/sampler.py b/vllm/v1/sample/sampler.py index 55465331df0c5..eabef8ff56a7b 100644 --- a/vllm/v1/sample/sampler.py +++ b/vllm/v1/sample/sampler.py @@ -29,7 +29,6 @@ def forward( sampling_metadata.frequency_penalties, sampling_metadata.repetition_penalties, sampling_metadata.output_token_ids) - print('logits123 ' + str(logits.sort())) logits = self.apply_temperature(logits, sampling_metadata.temperature) logits = self.apply_top_k_top_p(logits, sampling_metadata) probs = self.get_probs(logits) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 8cab133d21cb5..89b32df611556 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -902,7 +902,6 @@ def make_sampling_metadata( skip_copy: bool = False, ) -> SamplingMetadata: if not skip_copy: - print('Hello in copy!!!') self.temperature[:self.num_reqs].copy_( self.temperature_cpu_tensor[:self.num_reqs], non_blocking=True) self.top_p[:self.num_reqs].copy_(