@@ -84,12 +84,12 @@ def test_env(
8484 m .setenv ("VLLM_MLA_DISABLE" , "1" if use_mla else "0" )
8585
8686 if device == "cpu" :
87- with patch ("vllm.attention.selector .current_platform" , CpuPlatform ()):
87+ with patch ("vllm.platforms .current_platform" , CpuPlatform ()):
8888 backend = get_attn_backend (16 , torch .float16 , None , block_size )
8989 assert backend .get_name () == "TORCH_SDPA"
9090
9191 elif device == "hip" :
92- with patch ("vllm.attention.selector .current_platform" , RocmPlatform ()):
92+ with patch ("vllm.platforms .current_platform" , RocmPlatform ()):
9393 if use_mla :
9494 # ROCm MLA backend logic:
9595 # - TRITON_MLA: supported when block_size != 1
@@ -126,7 +126,7 @@ def test_env(
126126 assert backend .get_name () == expected
127127
128128 elif device == "cuda" :
129- with patch ("vllm.attention.selector .current_platform" , CudaPlatform ()):
129+ with patch ("vllm.platforms .current_platform" , CudaPlatform ()):
130130 if use_mla :
131131 # CUDA MLA backend logic:
132132 # - CUTLASS_MLA: only supported with block_size == 128
@@ -214,12 +214,12 @@ def test_env(
214214def test_fp32_fallback (device : str ):
215215 """Test attention backend selection with fp32."""
216216 if device == "cpu" :
217- with patch ("vllm.attention.selector .current_platform" , CpuPlatform ()):
217+ with patch ("vllm.platforms .current_platform" , CpuPlatform ()):
218218 backend = get_attn_backend (16 , torch .float32 , None , 16 )
219219 assert backend .get_name () == "TORCH_SDPA"
220220
221221 elif device == "cuda" :
222- with patch ("vllm.attention.selector .current_platform" , CudaPlatform ()):
222+ with patch ("vllm.platforms .current_platform" , CudaPlatform ()):
223223 backend = get_attn_backend (16 , torch .float32 , None , 16 )
224224 assert backend .get_name () == "FLEX_ATTENTION"
225225
@@ -277,7 +277,7 @@ def test_invalid_env(monkeypatch: pytest.MonkeyPatch):
277277 """Test that invalid attention backend names raise ValueError."""
278278 with (
279279 monkeypatch .context () as m ,
280- patch ("vllm.attention.selector .current_platform" , CudaPlatform ()),
280+ patch ("vllm.platforms .current_platform" , CudaPlatform ()),
281281 ):
282282 m .setenv (STR_BACKEND_ENV_VAR , STR_INVALID_VAL )
283283
0 commit comments