Skip to content

Commit

Permalink
Use nonblocking sync from CUDA.jl.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Sep 20, 2023
1 parent 70db3b6 commit 47ee618
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 5 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ LLVMLoopInfo = "8b046642-f1f6-4319-8d3c-209ddc03c586"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

[compat]
CUDA = "3.5, 4, 5"
CUDA = "5"
ForwardDiff = "0.10"
LLVM = "3, 4, 5, 6"
LLVMLoopInfo = "1"
Expand Down
6 changes: 2 additions & 4 deletions benchmarks/blas.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,11 @@ function blas_benchmark(group, a_type, b_type, cd_type, N, M=N, K=N; alpha=true,

# NOTE: we use `cuStreamSynchronize` instead of `synchronize` to avoid
# influence from the Julia scheduler
group[name] = @benchmarkable(
group[name] = @async_benchmarkable(
begin
GemmKernels.matmatmul!(c, $a_layout, $b_layout, a, b, $alpha, $beta; $(kwargs)...)
CUDA.cuStreamSynchronize(stream())
end,
setup=(a=CuArray($a_h); b=CuArray($b_h); c=CuArray($c_h);
CUDA.cuStreamSynchronize(stream())),
setup=(a=CuArray($a_h); b=CuArray($b_h); c=CuArray($c_h); synchronize()),
teardown=(CUDA.unsafe_free!(a); CUDA.unsafe_free!(b); CUDA.unsafe_free!(c))
)
end
Expand Down
6 changes: 6 additions & 0 deletions benchmarks/runbenchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ using JSON

using StableRNGs

# convenience macro to create a benchmark that requires synchronizing the GPU
macro async_benchmarkable(ex...)
quote
@benchmarkable CUDA.@sync blocking=true $(ex...)
end
end

# we use setup/teardown phases to allocate/free GPU memory,
# so make sure to run a couple of evaluations to amortize
Expand Down

0 comments on commit 47ee618

Please sign in to comment.