From da7510c1778fe8188ea2504185c8d1dfddf6ce95 Mon Sep 17 00:00:00 2001
From: arakowsk-amd <182798202+arakowsk-amd@users.noreply.github.com>
Date: Mon, 16 Dec 2024 13:41:17 -0800
Subject: [PATCH 1/3] adding `--enable-chunked-prefill false`

---
 docs/dev-docker/README.md | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/docs/dev-docker/README.md b/docs/dev-docker/README.md
index 11c0ef04fd8f7..8f39102ad280d 100644
--- a/docs/dev-docker/README.md
+++ b/docs/dev-docker/README.md
@@ -261,7 +261,8 @@ Benchmark Meta-Llama-3.1-405B FP8 with input 128 tokens, output 128 tokens and t
     --num-scheduler-steps 10 \
     --tensor-parallel-size 8 \
     --input-len 128 \
-    --output-len 128 
+    --output-len 128 \
+    --enable-chunked-prefill false
 
 If you want to run Meta-Llama-3.1-405B FP16, please run
 
@@ -278,9 +279,7 @@ If you want to run Meta-Llama-3.1-405B FP16, please run
     --swap-space 16 \
     --max-model-len 8192 \
     --max-num-batched-tokens 65536 \
-    --swap-space
-    --max-model-len
-    --gpu-memory-utilization 0.99
+    --enable-chunked-prefill false
 
 For fp8 quantized Llama3.18B/70B models:
 

From 08818103523a106b2766d8194fbe86829cc35e5a Mon Sep 17 00:00:00 2001
From: arakowsk-amd <182798202+arakowsk-amd@users.noreply.github.com>
Date: Mon, 16 Dec 2024 13:48:49 -0800
Subject: [PATCH 2/3] Update README.md

Co-authored-by: Reed Slobodin <Reed.Slobodin@amd.com>
---
 docs/dev-docker/README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/dev-docker/README.md b/docs/dev-docker/README.md
index 8f39102ad280d..b3a295e914f3a 100644
--- a/docs/dev-docker/README.md
+++ b/docs/dev-docker/README.md
@@ -24,12 +24,12 @@ The performance data below was measured on a server with MI300X accelerators wit
 |---|---|
 | BKC | 24.13 |
 | ROCm | version ROCm 6.3 |
-| amdgpu | build 2009461 |
+| amdgpu | build 2041575 |
 | OS | Ubuntu 22.04 |
-| Linux Kernel | 5.15.0-117-generic |
-| BMCVersion | C2789.BC.0809.00 |
-| BiosVersion | C2789.5.BS.1C11.AG.1 |
-| CpldVersion | 02.02.00 |
+| Linux Kernel | 5.15.0-70-generic |
+| BMCVersion | C2789.BC.0811.00 |
+| BiosVersion | C2789.5.BS.1C17.GN.1 |
+| CpldVersion | 02.03.00 |
 | DCSCMCpldVersion | 02.02.00 |
 | CX7 | FW 28.40.1000 |
 | RAM | 1 TB |

From c75a4c446c33f16244cb6346d3001c6c107042c1 Mon Sep 17 00:00:00 2001
From: arakowsk-amd <182798202+arakowsk-amd@users.noreply.github.com>
Date: Tue, 17 Dec 2024 13:47:31 -0800
Subject: [PATCH 3/3] removing --enable-chunked-prefill

---
 docs/dev-docker/README.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/docs/dev-docker/README.md b/docs/dev-docker/README.md
index b3a295e914f3a..d8e1c9f68a07b 100644
--- a/docs/dev-docker/README.md
+++ b/docs/dev-docker/README.md
@@ -261,8 +261,7 @@ Benchmark Meta-Llama-3.1-405B FP8 with input 128 tokens, output 128 tokens and t
     --num-scheduler-steps 10 \
     --tensor-parallel-size 8 \
     --input-len 128 \
-    --output-len 128 \
-    --enable-chunked-prefill false
+    --output-len 128
 
 If you want to run Meta-Llama-3.1-405B FP16, please run
 
@@ -278,8 +277,7 @@ If you want to run Meta-Llama-3.1-405B FP16, please run
     --output-len 128 \
     --swap-space 16 \
     --max-model-len 8192 \
-    --max-num-batched-tokens 65536 \
-    --enable-chunked-prefill false
+    --max-num-batched-tokens 65536
 
 For fp8 quantized Llama3.18B/70B models: