Skip to content

Commit

Permalink
[Hardware][XPU] using current_platform.is_xpu (vllm-project#9605)
Browse files Browse the repository at this point in the history
Signed-off-by: Erkin Sagiroglu <[email protected]>
  • Loading branch information
MengqingCao authored and Erkin Sagiroglu committed Oct 26, 2024
1 parent d1a76b2 commit e618fe8
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 38 deletions.
6 changes: 3 additions & 3 deletions vllm/attention/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from vllm.attention.backends.abstract import AttentionBackend
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.utils import STR_BACKEND_ENV_VAR, is_hip, is_openvino, is_xpu
from vllm.utils import STR_BACKEND_ENV_VAR, is_hip, is_openvino

logger = init_logger(__name__)

Expand Down Expand Up @@ -136,7 +136,7 @@ def get_attn_backend(
from vllm.attention.backends.openvino import OpenVINOAttentionBackend
return OpenVINOAttentionBackend
elif backend == _Backend.IPEX:
assert is_xpu(), RuntimeError(
assert current_platform.is_xpu(), RuntimeError(
"IPEX attention backend is only used for the XPU device.")
logger.info("Using IPEX attention backend.")
from vllm.attention.backends.ipex_attn import IpexAttnBackend
Expand Down Expand Up @@ -198,7 +198,7 @@ def which_attn_to_use(
logger.info("Cannot use %s backend on OpenVINO.", selected_backend)
return _Backend.OPENVINO

if is_xpu():
if current_platform.is_xpu():
if selected_backend != _Backend.IPEX:
logger.info("Cannot use %s backend on XPU.", selected_backend)
return _Backend.IPEX
Expand Down
4 changes: 2 additions & 2 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
get_hf_image_processor_config,
get_hf_text_config)
from vllm.utils import (GiB_bytes, cuda_device_count_stateless, get_cpu_memory,
is_hip, is_openvino, is_xpu, print_warning_once)
is_hip, is_openvino, print_warning_once)

if TYPE_CHECKING:
from ray.util.placement_group import PlacementGroup
Expand Down Expand Up @@ -1121,7 +1121,7 @@ def __init__(self, device: str = "auto") -> None:
self.device_type = "tpu"
elif current_platform.is_cpu():
self.device_type = "cpu"
elif is_xpu():
elif current_platform.is_xpu():
self.device_type = "xpu"
else:
raise RuntimeError("Failed to infer device type")
Expand Down
4 changes: 2 additions & 2 deletions vllm/executor/ray_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.sequence import ExecuteModelRequest, IntermediateTensors
from vllm.utils import get_ip, is_hip, is_xpu
from vllm.utils import get_ip, is_hip
from vllm.worker.worker_base import WorkerWrapperBase

logger = init_logger(__name__)
Expand Down Expand Up @@ -231,7 +231,7 @@ def initialize_ray_cluster(
assert_ray_available()

# Connect to a ray cluster.
if is_hip() or is_xpu():
if is_hip() or current_platform.is_xpu():
ray.init(address=ray_address,
ignore_reinit_error=True,
num_gpus=parallel_config.world_size)
Expand Down
4 changes: 2 additions & 2 deletions vllm/model_executor/custom_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from vllm.compilation.levels import CompilationLevel
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.utils import is_hip, is_xpu, print_warning_once
from vllm.utils import is_hip, print_warning_once

logger = init_logger(__name__)

Expand Down Expand Up @@ -78,7 +78,7 @@ def dispatch_forward(self):
return self.forward_cpu
elif current_platform.is_tpu():
return self.forward_tpu
elif is_xpu():
elif current_platform.is_xpu():
return self.forward_xpu
else:
return self.forward_cuda
Expand Down
29 changes: 3 additions & 26 deletions vllm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,29 +327,6 @@ def is_openvino() -> bool:
return False


@lru_cache(maxsize=None)
def is_xpu() -> bool:
from importlib.metadata import PackageNotFoundError, version
try:
is_xpu_flag = "xpu" in version("vllm")
except PackageNotFoundError:
return False
# vllm is not build with xpu
if not is_xpu_flag:
return False
try:
import intel_extension_for_pytorch as ipex # noqa: F401
_import_ipex = True
except ImportError as e:
logger.warning("Import Error for IPEX: %s", e.msg)
_import_ipex = False
# ipex dependency is not ready
if not _import_ipex:
logger.warning("not found ipex lib")
return False
return hasattr(torch, "xpu") and torch.xpu.is_available()


@lru_cache(maxsize=None)
def get_max_shared_memory_bytes(gpu: int = 0) -> int:
"""Returns the maximum shared memory per thread block in bytes."""
Expand Down Expand Up @@ -379,7 +356,7 @@ def seed_everything(seed: int) -> None:
if current_platform.is_cuda_alike():
torch.cuda.manual_seed_all(seed)

if is_xpu():
if current_platform.is_xpu():
torch.xpu.manual_seed_all(seed)


Expand Down Expand Up @@ -774,7 +751,7 @@ def is_pin_memory_available() -> bool:
print_warning_once("Using 'pin_memory=False' as WSL is detected. "
"This may slow down the performance.")
return False
elif is_xpu():
elif current_platform.is_xpu():
print_warning_once("Pin memory is not supported on XPU.")
return False
elif current_platform.is_neuron():
Expand All @@ -795,7 +772,7 @@ def current_memory_usage(self) -> float:
if current_platform.is_cuda_alike():
torch.cuda.reset_peak_memory_stats(self.device)
mem = torch.cuda.max_memory_allocated(self.device)
elif is_xpu():
elif current_platform.is_xpu():
torch.xpu.reset_peak_memory_stats(self.device) # type: ignore
mem = torch.xpu.max_memory_allocated(self.device) # type: ignore
return mem
Expand Down
7 changes: 4 additions & 3 deletions vllm/worker/xpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from vllm.distributed.parallel_state import get_pp_group
from vllm.logger import init_logger
from vllm.model_executor import set_random_seed
from vllm.utils import is_xpu
from vllm.platforms import current_platform
from vllm.worker.cache_engine import CacheEngine
from vllm.worker.worker import Worker
from vllm.worker.worker_base import LoraNotSupportedWorkerBase
Expand Down Expand Up @@ -53,7 +53,7 @@ def __init__(
observability_config: Optional[ObservabilityConfig] = None,
) -> None:
assert device_config.device_type == "xpu"
assert is_xpu()
assert current_platform.is_xpu()

self.model_config = model_config
self.parallel_config = parallel_config
Expand Down Expand Up @@ -91,7 +91,8 @@ def __init__(
self.gpu_cache: Optional[List[List[torch.Tensor]]]

def init_device(self) -> None:
if self.device_config.device.type == "xpu" and is_xpu():
if self.device_config.device.type == "xpu" and current_platform.is_xpu(
):
self.device = torch.device(f"xpu:{self.local_rank}")
torch.xpu.set_device(self.device)
torch.xpu.empty_cache()
Expand Down

0 comments on commit e618fe8

Please sign in to comment.