Skip to content

Commit

Permalink
sycn before using async data
Browse files Browse the repository at this point in the history
  • Loading branch information
lzhangzz committed Oct 26, 2023
1 parent a940032 commit 7a40a84
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/turbomind/layers/sampling_layers/TopKSamplingLayer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "src/turbomind/layers/sampling_layers/TopKSamplingLayer.h"
#include "src/turbomind/macro.h"
#include "src/turbomind/models/llama/llama_utils.h"
#include "src/turbomind/utils/cuda_utils.h"
#include "src/turbomind/utils/logger.h"
#include "src/turbomind/utils/memory_utils.h"

Expand Down Expand Up @@ -202,6 +203,7 @@ void TopKSamplingLayer<T>::setup(const size_t batch_size, const size_t beam_widt
cudaAutoCpy(skip_decode_, skip_decode_buf_, batch_size, stream_);
uint* runtime_top_ks = new uint[batch_size];
cudaAutoCpy(runtime_top_ks, runtime_top_k_buf_, batch_size, stream_);
check_cuda_error(cudaStreamSynchronize(stream_));
runtime_max_top_k_ = static_cast<int>(*std::max_element(runtime_top_ks, runtime_top_ks + batch_size));
delete[] runtime_top_ks;
}
Expand Down
1 change: 1 addition & 0 deletions src/turbomind/layers/sampling_layers/TopPSamplingLayer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ void TopPSamplingLayer<T>::setup(const size_t batch_size, const size_t beam_widt
cudaAutoCpy(skip_decode_, skip_decode_buf_, batch_size, stream_);
float* runtime_top_ps = new float[batch_size];
cudaAutoCpy(runtime_top_ps, runtime_top_p_buf_, batch_size, stream_);
check_cuda_error(cudaStreamSynchronize(stream_));
runtime_max_top_p_ = *std::max_element(runtime_top_ps, runtime_top_ps + batch_size);
delete[] runtime_top_ps;
}
Expand Down

0 comments on commit 7a40a84

Please sign in to comment.