Skip to content

Commit

Permalink
clear finished requests
Browse files Browse the repository at this point in the history
  • Loading branch information
lzhangzz committed Nov 7, 2023
1 parent 8c8d8bf commit d3a1356
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions src/turbomind/models/llama/LlamaBatch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ auto LlamaBatch<T>::ProcessStopRequests(const Requests& requests) -> std::vector
if (state_->requests[i] && state_->requests[i]->id == r->id) {
ec = 0;
CompleteRequest(i, true, r->end_flag);
state_->requests[i].reset();
break;
}
}
Expand Down Expand Up @@ -174,7 +175,8 @@ void LlamaBatch<T>::ProcessInferRequests(const Requests& requests)
{
auto& state = *incoming_;

state.size = state.active_size = 0;
FT_CHECK(state.size == 0);
FT_CHECK(state.active_size == 0);

int i = 0;
for (const auto& r : requests) {
Expand Down Expand Up @@ -429,8 +431,9 @@ bool LlamaBatch<T>::Initialize()
}

// clear incoming buffer
std::fill(incoming_->requests.begin(), incoming_->requests.end(), nullptr);
std::fill(incoming_->sequences.begin(), incoming_->sequences.end(), nullptr);
std::fill_n(incoming_->requests.begin(), incoming_->size, nullptr);
std::fill_n(incoming_->sequences.begin(), incoming_->size, nullptr);
incoming_->size = 0;

// in case of swap-in/swap-out or there are holes in active buffer, layout of the buffers is changed
// generation & sampling need to be re-initialized for correctness
Expand Down

0 comments on commit d3a1356

Please sign in to comment.