Skip to content

Commit

Permalink
out-of-place allgather
Browse files Browse the repository at this point in the history
  • Loading branch information
chhwang committed Feb 22, 2024
1 parent 141e7da commit c50f7b5
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions apps/nccl/src/nccl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,8 @@ __global__ void __launch_bounds__(1024, 1)
template <typename T>
cudaError_t allgather(T* buff, T* scratch, T* resultBuff, int rank, int nRanksPerNode, int worldSize, size_t nelems,
cudaStream_t stream) {
cudaError_t err = cudaMemcpyAsync(resultBuff + nelems * rank, buff, nelems * sizeof(T), cudaMemcpyDeviceToDevice, stream);
if (err != cudaSuccess) return err;
allgather5<<<24, 1024, 0, stream>>>(rank, worldSize, nRanksPerNode, nelems);
return cudaGetLastError();
}
Expand Down

0 comments on commit c50f7b5

Please sign in to comment.