diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh index 01131807d2021..88a735de40056 100644 --- a/.github/workflows/scripts/build.sh +++ b/.github/workflows/scripts/build.sh @@ -12,7 +12,7 @@ export MAX_JOBS=1 # Make sure release wheels are built for the following architectures export PYTORCH_ROCM_ARCH="gfx90a;gfx942" -rm -f $(which sccache) +rm -f "$(which sccache)" export MAX_JOBS=32 diff --git a/benchmarks/kernels/benchmark_mixtral_moe_rocm.py b/benchmarks/kernels/benchmark_mixtral_moe_rocm.py index 63080eaf2f11c..8fab21c5fd8f5 100755 --- a/benchmarks/kernels/benchmark_mixtral_moe_rocm.py +++ b/benchmarks/kernels/benchmark_mixtral_moe_rocm.py @@ -266,7 +266,7 @@ def run_grid(bs, model, TP): print(f"writing config to file {filename}") existing_content = {} if os.path.exists(filename): - with open(filename, "r") as f: + with open(filename) as f: existing_content = json.load(f) existing_content[str(bs)] = best_config with open(filename, "w") as f: diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index 212177e53e85b..b2320d58f92d2 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -1,7 +1,7 @@ import contextlib import functools import importlib -from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union +from typing import TYPE_CHECKING, List, Optional, Tuple, Union import torch import torch.library @@ -242,8 +242,8 @@ def scaled_rms_norm(out: torch.Tensor, input: torch.Tensor, def scaled_fused_add_rms_norm(out: torch.Tensor, input: torch.Tensor, residual: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor, epsilon: float) -> None: - torch.ops._C.fused_add_rms_norm_static_fp8_quant(out, input, residual, weight, scale, - epsilon) + torch.ops._C.fused_add_rms_norm_static_fp8_quant(out, input, residual, + weight, scale, epsilon) def advance_step_flashattn(num_seqs: int, num_queries: int, block_size: int, diff --git a/vllm/attention/backends/hpu_attn.py b/vllm/attention/backends/hpu_attn.py index a8f4b09b67274..7d7967a1c0329 100644 --- a/vllm/attention/backends/hpu_attn.py +++ b/vllm/attention/backends/hpu_attn.py @@ -141,6 +141,7 @@ def forward( k_scale: float = 1.0, v_scale: float = 1.0, attn_type: AttentionType = AttentionType.DECODER, + fp8_out_scale: Optional[torch.Tensor] = None, ) -> torch.Tensor: """Forward pass with xFormers and PagedAttention. diff --git a/vllm/model_executor/models/grok1.py b/vllm/model_executor/models/grok1.py index 33173072a5df4..99940c547f3d5 100644 --- a/vllm/model_executor/models/grok1.py +++ b/vllm/model_executor/models/grok1.py @@ -1,4 +1,3 @@ -# coding=utf-8 # Adapted from # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py # Copyright 2023 The vLLM team. diff --git a/vllm/utils.py b/vllm/utils.py index 0a51b6a2bd78e..211d3e86c8b05 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -158,7 +158,7 @@ class _Sentinel: ALL_PINNED_SENTINEL = _Sentinel() -class rpd_trace(): +class rpd_trace: def __init__(self, filename=None, @@ -244,7 +244,7 @@ def is_hipScopedMarker_available(): return hipScopedMarker is not None -class rpd_mark(): +class rpd_mark: def __init__(self, name=None): self.name = name