diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh
index 01131807d2021..88a735de40056 100644
--- a/.github/workflows/scripts/build.sh
+++ b/.github/workflows/scripts/build.sh
@@ -12,7 +12,7 @@ export MAX_JOBS=1
 # Make sure release wheels are built for the following architectures
 export PYTORCH_ROCM_ARCH="gfx90a;gfx942"
 
-rm -f $(which sccache)
+rm -f "$(which sccache)"
 
 export MAX_JOBS=32
 
diff --git a/benchmarks/kernels/benchmark_mixtral_moe_rocm.py b/benchmarks/kernels/benchmark_mixtral_moe_rocm.py
index 63080eaf2f11c..8fab21c5fd8f5 100755
--- a/benchmarks/kernels/benchmark_mixtral_moe_rocm.py
+++ b/benchmarks/kernels/benchmark_mixtral_moe_rocm.py
@@ -266,7 +266,7 @@ def run_grid(bs, model, TP):
     print(f"writing config to file {filename}")
     existing_content = {}
     if os.path.exists(filename):
-        with open(filename, "r") as f:
+        with open(filename) as f:
             existing_content = json.load(f)
     existing_content[str(bs)] = best_config
     with open(filename, "w") as f:
diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py
index 212177e53e85b..b2320d58f92d2 100644
--- a/vllm/_custom_ops.py
+++ b/vllm/_custom_ops.py
@@ -1,7 +1,7 @@
 import contextlib
 import functools
 import importlib
-from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union
+from typing import TYPE_CHECKING, List, Optional, Tuple, Union
 
 import torch
 import torch.library
@@ -242,8 +242,8 @@ def scaled_rms_norm(out: torch.Tensor, input: torch.Tensor,
 def scaled_fused_add_rms_norm(out: torch.Tensor, input: torch.Tensor,
                               residual: torch.Tensor, weight: torch.Tensor,
                               scale: torch.Tensor, epsilon: float) -> None:
-    torch.ops._C.fused_add_rms_norm_static_fp8_quant(out, input, residual, weight, scale,
-                                           epsilon)
+    torch.ops._C.fused_add_rms_norm_static_fp8_quant(out, input, residual,
+                                                     weight, scale, epsilon)
 
 
 def advance_step_flashattn(num_seqs: int, num_queries: int, block_size: int,
diff --git a/vllm/attention/backends/hpu_attn.py b/vllm/attention/backends/hpu_attn.py
index a8f4b09b67274..7d7967a1c0329 100644
--- a/vllm/attention/backends/hpu_attn.py
+++ b/vllm/attention/backends/hpu_attn.py
@@ -141,6 +141,7 @@ def forward(
         k_scale: float = 1.0,
         v_scale: float = 1.0,
         attn_type: AttentionType = AttentionType.DECODER,
+        fp8_out_scale: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
         """Forward pass with xFormers and PagedAttention.
 
diff --git a/vllm/model_executor/models/grok1.py b/vllm/model_executor/models/grok1.py
index 33173072a5df4..99940c547f3d5 100644
--- a/vllm/model_executor/models/grok1.py
+++ b/vllm/model_executor/models/grok1.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
 # Copyright 2023 The vLLM team.
diff --git a/vllm/utils.py b/vllm/utils.py
index 0a51b6a2bd78e..211d3e86c8b05 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -158,7 +158,7 @@ class _Sentinel:
 ALL_PINNED_SENTINEL = _Sentinel()
 
 
-class rpd_trace():
+class rpd_trace:
 
     def __init__(self,
                  filename=None,
@@ -244,7 +244,7 @@ def is_hipScopedMarker_available():
     return hipScopedMarker is not None
 
 
-class rpd_mark():
+class rpd_mark:
 
     def __init__(self, name=None):
         self.name = name