diff --git a/tests/basic_correctness/test_chunked_prefill.py b/tests/basic_correctness/test_chunked_prefill.py
index ffe8de0ba0849..383a10e5b96b4 100644
--- a/tests/basic_correctness/test_chunked_prefill.py
+++ b/tests/basic_correctness/test_chunked_prefill.py
@@ -270,9 +270,6 @@ def test_with_prefix_caching(
 @pytest.mark.parametrize("max_tokens", [32])
 @pytest.mark.parametrize("chunked_prefill_token_size", [1, 4, 16])
 @pytest.mark.parametrize("enforce_eager", [False])
-# NOTE: Increasing this in this suite will fail CI because we currently cannot
-# reset distributed env properly. Use a value > 1 just when you test.
-@pytest.mark.parametrize("tensor_parallel_size", [1])
 @pytest.mark.parametrize("attention_backend", ["TORCH_SDPA"])
 @pytest.mark.cpu_model
 @pytest.mark.skipif(not current_platform.is_cpu(), reason="CPU only")
@@ -307,9 +304,6 @@ def test_models_cpu(
 @pytest.mark.parametrize("max_tokens", [16])
 @pytest.mark.parametrize("enforce_eager", [False])
 @pytest.mark.parametrize("chunk_size", [30, 32])
-# NOTE: Increasing this in this suite will fail CI because we currently cannot
-# reset distributed env properly. Use a value > 1 just when you test.
-@pytest.mark.parametrize("tensor_parallel_size", [1])
 @pytest.mark.parametrize("dtype", ["bfloat16"])
 @pytest.mark.cpu_model
 @pytest.mark.skipif(not current_platform.is_cpu(), reason="CPU only")
diff --git a/vllm/attention/backends/torch_sdpa.py b/vllm/attention/backends/torch_sdpa.py
index 00b72f893e78d..3d025df26a7a1 100644
--- a/vllm/attention/backends/torch_sdpa.py
+++ b/vllm/attention/backends/torch_sdpa.py
@@ -294,7 +294,7 @@ def build(self, seq_lens: List[int], query_lens: List[int],
             prefill_block_tables = make_tensor_with_pad(
                 self.input_data.prefill_block_tables,
                 pad=0,
-                dtype=torch.int,
+                dtype=torch.int32,
                 device="cpu",
             )
             query_lens_tensor = torch.tensor(prefill_query_lens,
@@ -330,13 +330,13 @@ def build(self, seq_lens: List[int], query_lens: List[int],
         if input_data.num_decode_tokens != 0:
             seq_lens_tensor = torch.tensor(
                 input_data.seq_lens[input_data.num_prefills:],
-                dtype=torch.int,
+                dtype=torch.int32,
                 device="cpu",
             )
             block_tables = make_tensor_with_pad(
                 self.input_data.decode_block_tables,
                 pad=0,
-                dtype=torch.int,
+                dtype=torch.int32,
                 device="cpu",
             )
         else:
diff --git a/vllm/worker/cpu_model_runner.py b/vllm/worker/cpu_model_runner.py
index 7d566c45ac2a9..66bd844c94901 100644
--- a/vllm/worker/cpu_model_runner.py
+++ b/vllm/worker/cpu_model_runner.py
@@ -295,7 +295,7 @@ def _compute_prompt_input_tokens(self, data: ModelInputData,
                 slot_mapping[i] = slot
             data.slot_mapping.extend(slot_mapping)
 
-        # The MRPOE positions are prepared in _compute_multi_modal_input
+        # The MROPE positions are prepared in _compute_multi_modal_input
         if data.input_positions is not None:
             data.input_positions.extend(token_positions)