Skip to content

Commit

Permalink
fix formatter
Browse files Browse the repository at this point in the history
  • Loading branch information
xingyaoww committed Jan 27, 2025
1 parent 6400075 commit e265deb
Showing 1 changed file with 14 additions and 13 deletions.
27 changes: 14 additions & 13 deletions verl/workers/rollout/vllm_rollout/vllm_rollout.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,19 +89,20 @@ def __init__(self, actor_module: nn.Module, config: DictConfig, tokenizer, model

assert model_hf_config.max_position_embeddings >= config.prompt_length + config.response_length, \
"model context length should be greater than total sequence length"
self.inference_engine = LLM(actor_module,
tokenizer=tokenizer,
model_hf_config=model_hf_config,
tensor_parallel_size=tensor_parallel_size,
dtype=config.dtype,
enforce_eager=config.enforce_eager,
gpu_memory_utilization=config.gpu_memory_utilization,
skip_tokenizer_init=False,
max_model_len=config.prompt_length + config.response_length,
load_format=config.load_format,
disable_log_stats=False,
max_num_batched_tokens=max_num_batched_tokens,
)
self.inference_engine = LLM(
actor_module,
tokenizer=tokenizer,
model_hf_config=model_hf_config,
tensor_parallel_size=tensor_parallel_size,
dtype=config.dtype,
enforce_eager=config.enforce_eager,
gpu_memory_utilization=config.gpu_memory_utilization,
skip_tokenizer_init=False,
max_model_len=config.prompt_length + config.response_length,
load_format=config.load_format,
disable_log_stats=False,
max_num_batched_tokens=max_num_batched_tokens,
)

# Offload vllm model to reduce peak memory usage
self.inference_engine.offload_model_weights()
Expand Down

0 comments on commit e265deb

Please sign in to comment.