Skip to content

Commit

Permalink
Make nonblocking synchronize optional
Browse files Browse the repository at this point in the history
This addresses JuliaGPU#1910 by adding
the boolean environment variable `JULIA_CUDA_NONBLOCKING_SYNCHRONIZE`
to control if nonblocking synchronizes are used or not.
  • Loading branch information
lcw committed Jul 31, 2023
1 parent d79adbf commit 9223144
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 3 deletions.
4 changes: 3 additions & 1 deletion lib/cudadrv/context.jl
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,9 @@ device_synchronize() = nonblocking_synchronize()
@inline function nonblocking_synchronize()
# perform as much of the sync as possible without blocking in CUDA.
# XXX: remove this using a yield callback, or by synchronizing on a dedicated thread?
nonblocking_synchronize(legacy_stream())
if CUDA._use_nonblocking_synchronize[]
nonblocking_synchronize(legacy_stream())
end

# even though the GPU should be idle now, CUDA hooks work to the actual API call.
# see NVIDIA bug #3383169 for more details.
Expand Down
4 changes: 3 additions & 1 deletion lib/cudadrv/events.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ Waits for an event to complete.
function synchronize(e::CuEvent)
# perform as much of the sync as possible without blocking in CUDA.
# XXX: remove this using a yield callback, or by synchronizing on a dedicated thread?
nonblocking_synchronize(e)
if CUDA._use_nonblocking_synchronize[]
nonblocking_synchronize(e)
end

# even though the GPU should be idle now, CUDA hooks work to the actual API call.
# see NVIDIA bug #3383169 for more details.
Expand Down
4 changes: 3 additions & 1 deletion lib/cudadrv/stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,9 @@ function synchronize(stream::CuStream=stream(); blocking=nothing)

# perform as much of the sync as possible without blocking in CUDA.
# XXX: remove this using a yield callback, or by synchronizing on a dedicated stream?
nonblocking_synchronize(stream)
if CUDA._use_nonblocking_synchronize[]
nonblocking_synchronize(stream)
end

# even though the GPU should be idle now, CUDA hooks work to the actual API call.
# see NVIDIA bug #3383169 for more details.
Expand Down
6 changes: 6 additions & 0 deletions src/initialization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ function functional(show_reason::Bool=false)
return false
end

const _use_nonblocking_synchronize = Ref{Bool}(true)

function __init__()
precompiling = ccall(:jl_generating_output, Cint, ()) != 0

Expand Down Expand Up @@ -183,6 +185,10 @@ function __init__()
end
end

if haskey(ENV, "JULIA_CUDA_NONBLOCKING_SYNCHRONIZE")
_use_nonblocking_synchronize[] = parse(Bool, ENV["JULIA_CUDA_NONBLOCKING_SYNCHRONIZE"])
end

_initialized[] = true
end

Expand Down

0 comments on commit 9223144

Please sign in to comment.