Skip to content

Commit

Permalink
clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
lzhangzz committed Nov 17, 2023
1 parent 9a972b6 commit df40522
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 6 deletions.
2 changes: 1 addition & 1 deletion src/turbomind/models/llama/LlamaBatch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1480,7 +1480,7 @@ void LlamaBatch<T>::SendSignals(std::vector<Signal> signals)
template<typename T>
void LlamaBatch<T>::Start()
{
TM_LOG_ERROR("LlamaBatch<T>::Start()");
TM_LOG_INFO("LlamaBatch<T>::Start()");
int device_id = -1;
check_cuda_error(cudaGetDevice(&device_id));
internal_thread_ = std::thread(&LlamaBatch::InternalThreadEntry, this, device_id);
Expand Down
2 changes: 1 addition & 1 deletion src/turbomind/models/llama/LlamaBatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class LlamaBatch {

~LlamaBatch()
{
TM_LOG_ERROR("~LlamaBatch()");
TM_LOG_INFO("~LlamaBatch()");
model_->shared_state_->request_queue.close();

internal_thread_.join();
Expand Down
4 changes: 0 additions & 4 deletions src/turbomind/models/llama/LlamaV2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,6 @@ LlamaV2<T>::LlamaV2(size_t head_num,
size_t elem_bits = 0;
if (quant_policy & QuantPolicy::kCacheKVInt8) {
elem_bits = sizeof(int8_t) * 8;
if (use_context_fmha) {
TM_LOG_ERROR("use_context_fmha not support int8");
assert(0);
}
}
else {
elem_bits = sizeof(T) * 8;
Expand Down

0 comments on commit df40522

Please sign in to comment.