diff --git a/src/turbomind/models/llama/LlamaBatch.cc b/src/turbomind/models/llama/LlamaBatch.cc
index f46d7ebe35..1afa591828 100644
--- a/src/turbomind/models/llama/LlamaBatch.cc
+++ b/src/turbomind/models/llama/LlamaBatch.cc
@@ -428,6 +428,10 @@ bool LlamaBatch<T>::Initialize()
         static_assert(sizeof(uintptr_t) == sizeof(void*));
     }
 
+    // clear incoming buffer
+    std::fill(incoming_->requests.begin(), incoming_->requests.end(), nullptr);
+    std::fill(incoming_->sequences.begin(), incoming_->sequences.end(), nullptr);
+
     // in case of swap-in/swap-out or there are holes in active buffer, layout of the buffers is changed
     // generation & sampling need to be re-initialized for correctness
     return exchange || active_holes;