Skip to content

Add Buffer Device Address Backend #311

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Jun 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .github/workflows/Test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ concurrency:

jobs:
test:
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - PoCL ${{ matrix.pocl }}
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ matrix.memory_backend }} - PoCL ${{ matrix.pocl }}
runs-on: ${{ matrix.os }}
timeout-minutes: 180
permissions: # needed to allow julia-actions/cache to proactively delete old caches that it has created
Expand All @@ -26,7 +26,9 @@ jobs:
os: [ubuntu-24.04, ubuntu-24.04-arm, macOS-13, macOS-15, windows-2025]
arch: [x64, arm64]
pocl: [jll, local]
memory_backend: [usm, svm, buffer]
exclude:
# unsupported combinations
- os: ubuntu-24.04
arch: arm64
- os: windows-2025
Expand Down Expand Up @@ -125,11 +127,13 @@ jobs:
run(```$(cmake()) --build $builddir --parallel $(Sys.CPU_THREADS) --target install```)
end'

echo '[pocl_jll]' > test/LocalPreferences.toml
echo '[pocl_jll]' >> test/LocalPreferences.toml
echo 'libpocl_path="${{ github.workspace }}/target/lib/libpocl.so"' >> test/LocalPreferences.toml

- name: Setup OpenCL.jl
run: |
echo '[OpenCL]' >> test/LocalPreferences.toml
echo 'default_memory_backend="${{ matrix.memory_backend }}"' >> test/LocalPreferences.toml
julia --project -e '
using Pkg
Pkg.develop(path="lib/intrinsics")'
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
Manifest.toml
LocalPreferences.toml
7 changes: 7 additions & 0 deletions LocalPreferences.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[OpenCL]
# Which memory back-end to use for unspecified CLArray allocations. This can be:
# - "buffer": plain buffers (using pointers if `cl_ext_buffer_device_address` is available)
# - "usm": Unified Shared Memory (requiring `cl_intel_unified_shared_memory`)
# - "svm": Shared Virtual Memory (requiring coarse-grained SVM support)
# If unspecified, the default will be used based on the platform and device capabilities.
#default_memory_backend="..."
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
OpenCL_jll = "6cb37087-e8b6-5417-8430-1f242f1e46e4"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Expand All @@ -26,6 +27,7 @@ KernelAbstractions = "0.9.2"
LLVM = "9.1"
LinearAlgebra = "1"
OpenCL_jll = "=2024.10.24"
Preferences = "1"
Printf = "1"
Random = "1"
Reexport = "1"
Expand Down
5 changes: 3 additions & 2 deletions lib/cl/CL.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
module cl

import ..OpenCL
using Printf
using Preferences

include("pointer.jl")
include("api.jl")
Expand All @@ -18,8 +20,7 @@ include("device.jl")
include("context.jl")
include("cmdqueue.jl")
include("event.jl")
include("memory/memory.jl")
include("buffer.jl")
include("memory.jl")
include("program.jl")
include("kernel.jl")

Expand Down
7 changes: 3 additions & 4 deletions lib/cl/device.jl
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,7 @@ function exec_capabilities(d::Device)
)
end

function usm_supported(d::Device)
"cl_intel_unified_shared_memory" in d.extensions || return false
return true
end
usm_supported(d::Device) = "cl_intel_unified_shared_memory" in d.extensions

function usm_capabilities(d::Device)
usm_supported(d) || throw(ArgumentError("Unified Shared Memory not supported on this device"))
Expand Down Expand Up @@ -256,6 +253,8 @@ function svm_capabilities(d::Device)
)
end

bda_supported(d::Device) = "cl_ext_buffer_device_address" in d.extensions

function cl_device_type(dtype::Symbol)
if dtype == :all
cl_dtype = CL_DEVICE_TYPE_ALL
Expand Down
10 changes: 9 additions & 1 deletion lib/cl/kernel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ function set_arg!(k::Kernel, idx::Integer, arg::CLPtr{T}) where {T}
end

# raw memory
function set_arg!(k::Kernel, idx::Integer, arg::AbstractMemory)
function set_arg!(k::Kernel, idx::Integer, arg::AbstractPointerMemory)
# XXX: this assumes that the receiving argument is pointer-typed, which is not the case
# with Julia's `Ptr` ABI. Instead, one should reinterpret the pointer as a
# `Core.LLVMPtr`, which _is_ pointer-valued. We retain this handling for `Ptr` for
Expand All @@ -79,6 +79,8 @@ function set_arg!(k::Kernel, idx::Integer, arg::AbstractMemory)
clSetKernelArgSVMPointer(k, idx - 1, pointer(arg))
elseif arg isa UnifiedMemory
clSetKernelArgMemPointerINTEL(k, idx - 1, pointer(arg))
elseif arg isa Buffer
clSetKernelArgDevicePointerEXT(k, idx - 1, pointer(arg))
else
error("Unknown memory type")
end
Expand Down Expand Up @@ -191,6 +193,7 @@ function call(
if !isempty(indirect_memory)
svm_pointers = CLPtr{Cvoid}[]
usm_pointers = CLPtr{Cvoid}[]
bda_pointers = CLPtr{Cvoid}[]
device_access = host_access = shared_access = false
for memory in indirect_memory
ptr = pointer(memory)
Expand All @@ -200,6 +203,8 @@ function call(

if memory isa SharedVirtualMemory
push!(svm_pointers, ptr)
elseif memory isa Buffer
push!(bda_pointers, ptr)
elseif memory isa UnifiedDeviceMemory
device_access = true
push!(usm_pointers, ptr)
Expand Down Expand Up @@ -229,6 +234,9 @@ function call(
if !isempty(svm_pointers)
clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS, sizeof(svm_pointers), svm_pointers)
end
if !isempty(bda_pointers)
clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT, sizeof(bda_pointers), bda_pointers)
end
if !isempty(usm_pointers)
clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(usm_pointers), usm_pointers)
end
Expand Down
Loading