diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2
index e7b24268ba398..ce448836a8278 100644
--- a/.buildkite/test-template.j2
+++ b/.buildkite/test-template.j2
@@ -19,7 +19,7 @@ steps:
         - exit_status: -10  # Agent was lost
           limit: 5
     agents:
-      queue: amd
+      queue: amd-cpu
 
 {% for step in steps %}
 {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
@@ -27,7 +27,7 @@ steps:
     depends_on: 
       - "amd-build"
     agents:
-      queue: amd
+      queue: amd_gpu
     commands: 
       - bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe  }} ; {{ step.command  or (step.commands | join(" && ")) | safe }}"
     env:
diff --git a/vllm/attention/ops/triton_flash_attention.py b/vllm/attention/ops/triton_flash_attention.py
index a49df831b46ea..3671c2f91e3b7 100644
--- a/vllm/attention/ops/triton_flash_attention.py
+++ b/vllm/attention/ops/triton_flash_attention.py
@@ -912,9 +912,8 @@ def check_and_convert(t, scale):
         p_descale = 1.0 / p_scale
         o_descale = 1.0 / o_scale
 
-        if is_navi():
-            max_seqlens_q = 0
-            max_seqlens_k = 0
+        arg_max_seqlens_q = 0 if is_navi() else max_seqlens_q
+        arg_max_seqlens_k = 0 if is_navi() else max_seqlens_k
 
         attn_fwd[grid](
             q,
@@ -944,8 +943,8 @@ def check_and_convert(t, scale):
             HQ=nheads_q,
             HK=nheads_k,
             ACTUAL_BLOCK_DMODEL=head_size,
-            MAX_SEQLENS_Q=max_seqlens_q,
-            MAX_SEQLENS_K=max_seqlens_k,
+            MAX_SEQLENS_Q=arg_max_seqlens_q,
+            MAX_SEQLENS_K=arg_max_seqlens_k,
             IS_CAUSAL=causal,
             VARLEN=True,
             BLOCK_DMODEL=padded_d_model,
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index 00efa056f7ef0..d2f7cd40e25b2 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -150,6 +150,8 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
             elif vllm_config.speculative_config:
                 parallel_config.worker_cls = \
                     "vllm.spec_decode.spec_decode_worker.create_spec_worker"
+                parallel_config.sd_worker_cls = \
+                    "vllm.worker.worker.Worker"
             else:
                 parallel_config.worker_cls = "vllm.worker.worker.Worker"