Skip to content

Commit

Permalink
fix comments
Browse files Browse the repository at this point in the history
Signed-off-by: jiang1.li <[email protected]>
  • Loading branch information
bigPYJ1151 committed Nov 20, 2024
1 parent 883c6f5 commit f9812e2
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 10 deletions.
6 changes: 0 additions & 6 deletions tests/basic_correctness/test_chunked_prefill.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,6 @@ def test_with_prefix_caching(
@pytest.mark.parametrize("max_tokens", [32])
@pytest.mark.parametrize("chunked_prefill_token_size", [1, 4, 16])
@pytest.mark.parametrize("enforce_eager", [False])
# NOTE: Increasing this in this suite will fail CI because we currently cannot
# reset distributed env properly. Use a value > 1 just when you test.
@pytest.mark.parametrize("tensor_parallel_size", [1])
@pytest.mark.parametrize("attention_backend", ["TORCH_SDPA"])
@pytest.mark.cpu_model
@pytest.mark.skipif(not current_platform.is_cpu(), reason="CPU only")
Expand Down Expand Up @@ -307,9 +304,6 @@ def test_models_cpu(
@pytest.mark.parametrize("max_tokens", [16])
@pytest.mark.parametrize("enforce_eager", [False])
@pytest.mark.parametrize("chunk_size", [30, 32])
# NOTE: Increasing this in this suite will fail CI because we currently cannot
# reset distributed env properly. Use a value > 1 just when you test.
@pytest.mark.parametrize("tensor_parallel_size", [1])
@pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.cpu_model
@pytest.mark.skipif(not current_platform.is_cpu(), reason="CPU only")
Expand Down
6 changes: 3 additions & 3 deletions vllm/attention/backends/torch_sdpa.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def build(self, seq_lens: List[int], query_lens: List[int],
prefill_block_tables = make_tensor_with_pad(
self.input_data.prefill_block_tables,
pad=0,
dtype=torch.int,
dtype=torch.int32,
device="cpu",
)
query_lens_tensor = torch.tensor(prefill_query_lens,
Expand Down Expand Up @@ -330,13 +330,13 @@ def build(self, seq_lens: List[int], query_lens: List[int],
if input_data.num_decode_tokens != 0:
seq_lens_tensor = torch.tensor(
input_data.seq_lens[input_data.num_prefills:],
dtype=torch.int,
dtype=torch.int32,
device="cpu",
)
block_tables = make_tensor_with_pad(
self.input_data.decode_block_tables,
pad=0,
dtype=torch.int,
dtype=torch.int32,
device="cpu",
)
else:
Expand Down
2 changes: 1 addition & 1 deletion vllm/worker/cpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def _compute_prompt_input_tokens(self, data: ModelInputData,
slot_mapping[i] = slot
data.slot_mapping.extend(slot_mapping)

# The MRPOE positions are prepared in _compute_multi_modal_input
# The MROPE positions are prepared in _compute_multi_modal_input
if data.input_positions is not None:
data.input_positions.extend(token_positions)

Expand Down

0 comments on commit f9812e2

Please sign in to comment.