Skip to content

Commit

Permalink
rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
bigPYJ1151 committed Nov 18, 2024
1 parent 94c3fa4 commit bbdeca2
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 27 deletions.
26 changes: 0 additions & 26 deletions vllm/executor/cpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,6 @@
from functools import partial
from typing import Any, Awaitable, List, Optional, Set, Tuple, Union

<<<<<<< HEAD
=======
import torch

import vllm.envs as envs
from vllm.config import (CacheConfig, ModelConfig, ParallelConfig,
SchedulerConfig)
>>>>>>> 5980981e (fix test)
from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase
from vllm.executor.multiproc_worker_utils import (ProcessWorkerWrapper,
ResultHandler, WorkerMonitor)
Expand Down Expand Up @@ -62,24 +54,6 @@ def _init_executor(self) -> None:
os.environ["LOCAL_WORLD_SIZE"] = str(
self.parallel_config.tensor_parallel_size)

<<<<<<< HEAD
=======
self.model_config = _verify_and_get_model_config(self.model_config)
self.cache_config = _verify_and_get_cache_config(self.cache_config)
self.scheduler_config = _verify_and_get_scheduler_config(
self.scheduler_config)
self.parallel_config = _verify_and_get_parallel_config(
self.parallel_config)

if ((self.scheduler_config.chunked_prefill_enabled
or self.cache_config.enable_prefix_caching)
and self.model_config.dtype == torch.half):
logger.warning("chunked-prefill and prefix-cache on the CPU "
"backend does not support fp16 for now,"
" cast to bf16.")
self.model_config.dtype = torch.bfloat16

>>>>>>> 5980981e (fix test)
# Multiprocessing-based executor does not support multi-node setting.
# Since it only works for single node, we can use the loopback address
# 127.0.0.1 for communication.
Expand Down
4 changes: 3 additions & 1 deletion vllm/platforms/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
f" {kv_cache_space}, expect a positive integer value.")

scheduler_config = vllm_config.scheduler_config
if (scheduler_config.chunked_prefill_enabled and model_config.dtype == torch.half):
if ((scheduler_config.chunked_prefill_enabled
or cache_config.enable_prefix_caching)
and model_config.dtype == torch.half):
logger.warning("Chunked-prefill on the CPU backend only does not"
" support fp16 for now, cast to bf16.")
model_config.dtype = torch.bfloat16
Expand Down

0 comments on commit bbdeca2

Please sign in to comment.