From 8c8d8bfc45aac49f44bc9dbddaaff4cf6adae737 Mon Sep 17 00:00:00 2001 From: Li Zhang Date: Mon, 6 Nov 2023 14:29:08 +0000 Subject: [PATCH] clear incoming buffer --- src/turbomind/models/llama/LlamaBatch.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/turbomind/models/llama/LlamaBatch.cc b/src/turbomind/models/llama/LlamaBatch.cc index f46d7ebe35..1afa591828 100644 --- a/src/turbomind/models/llama/LlamaBatch.cc +++ b/src/turbomind/models/llama/LlamaBatch.cc @@ -428,6 +428,10 @@ bool LlamaBatch::Initialize() static_assert(sizeof(uintptr_t) == sizeof(void*)); } + // clear incoming buffer + std::fill(incoming_->requests.begin(), incoming_->requests.end(), nullptr); + std::fill(incoming_->sequences.begin(), incoming_->sequences.end(), nullptr); + // in case of swap-in/swap-out or there are holes in active buffer, layout of the buffers is changed // generation & sampling need to be re-initialized for correctness return exchange || active_holes;