Skip to content

Commit

Permalink
fix finished_count
Browse files Browse the repository at this point in the history
  • Loading branch information
lzhangzz committed Nov 14, 2023
1 parent ffe7f5b commit d32bdaf
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion src/turbomind/models/llama/LlamaBatch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,9 @@ bool LlamaBatch<T>::Initialize()
});

// all blocks are not enough to hold a single sequence
// FT_CHECK_WITH_INFO(active_end != idxs.begin(), "No enough blocks.");
if (!sequences.empty()) {
FT_CHECK_WITH_INFO(active_end != idxs.begin(), "No enough blocks.");
}

// move swap-ins to the back
auto swapin_beg = std::stable_partition(idxs.begin(), active_end, [&](int idx) {
Expand Down Expand Up @@ -398,6 +400,8 @@ bool LlamaBatch<T>::Initialize()
ClearState(*incoming_);
}

FT_CHECK(state_->size <= max_batch_size_);

/// Update block ptrs when there were
// 1. swap-in or swap-out
// 2. holes in the active buffer
Expand Down Expand Up @@ -1399,6 +1403,8 @@ void LlamaBatch<T>::InternalThreadEntry(int device_id)
shared_state->barrier->wait();

auto modified = Initialize();
// finished sequences is handled by `Initialize()`
finished_count = 0;

ContextDecode();

Expand Down

0 comments on commit d32bdaf

Please sign in to comment.