Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/TrixiCUDA.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ export semidiscretizeGPU
# Get called every time the package is loaded
function __init__()

# Initialize the device properties
# Initialize the device
init_device()
end

Expand Down
57 changes: 44 additions & 13 deletions src/auxiliary/auxiliary.jl
Original file line number Diff line number Diff line change
@@ -1,29 +1,60 @@
include("configurators.jl")
include("stable.jl")

# Initialize the device properties
# Initialize the device
function init_device()
try
# Consider single GPU for now
# TODO: Consider multiple GPUs later
device = CUDA.device()

# Get the device properties
global MULTIPROCESSOR_COUNT = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT)
global MAX_THREADS_PER_BLOCK = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK)
global MAX_SHARED_MEMORY_PER_BLOCK = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK)
try
# Get properties
global MULTIPROCESSOR_COUNT = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT)
global MAX_THREADS_PER_BLOCK = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK)
global MAX_SHARED_MEMORY_PER_BLOCK = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK)
catch e
# Handle CUDA errors
println("Error getting device properties: ", e.msg)
println("Ensure that your GPU device properties can be retrieved successfully.")

# Fall back to set zeros
global MULTIPROCESSOR_COUNT = 0
global MAX_THREADS_PER_BLOCK = 0
global MAX_SHARED_MEMORY_PER_BLOCK = 0
println("Device properties have been set to zero. CUDA operations
will fail because no valid properties are configured.")
end

try
# Create CUDA streams
global STREAM1 = CUDA.CuStream()
global STREAM2 = CUDA.CuStream()
catch e
# Handle CUDA errors
println("Error initializing CUDA streams: ", e.msg)
println("Ensure there are enough GPU resources available for stream creation.")

# Fall back to set default streams
global STREAM1 = CUDA.stream()
global STREAM2 = CUDA.stream()
println("Streams have been set to the default CUDA stream.
This will impact performance due to no concurrency.")
end

catch e
# Handle the errors
# Handle CUDA errors or not
if e isa CUDA.CuError
println("Error initializing device: ", e.msg)
println("Ensure a CUDA-enabled GPU is available and properly configured.")
println("Error detecting device: ", e.msg)
println("Ensure a CUDA compatible GPU is available and properly configured.")
else
println("An unexpected error occurred: ", e)
end

# Fall back to set default values
global MULTIPROCESSOR_COUNT = 0
global MAX_THREADS_PER_BLOCK = 0
global MAX_SHARED_MEMORY_PER_BLOCK = 0
# Fall back to set nothing
global MULTIPROCESSOR_COUNT = nothing
global MAX_THREADS_PER_BLOCK = nothing
global MAX_SHARED_MEMORY_PER_BLOCK = nothing
global STREAM1 = nothing
global STREAM2 = nothing
end
end