Skip to content

Commit 51a39ef

Browse files
VarLadmaleadt
andauthored
Add Buffer Device Address support (#311)
Co-authored-by: Tim Besard <[email protected]>
1 parent 5e8c149 commit 51a39ef

File tree

20 files changed

+501
-799
lines changed

20 files changed

+501
-799
lines changed

.github/workflows/Test.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ concurrency:
1313

1414
jobs:
1515
test:
16-
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - PoCL ${{ matrix.pocl }}
16+
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ matrix.memory_backend }} - PoCL ${{ matrix.pocl }}
1717
runs-on: ${{ matrix.os }}
1818
timeout-minutes: 180
1919
permissions: # needed to allow julia-actions/cache to proactively delete old caches that it has created
@@ -26,7 +26,9 @@ jobs:
2626
os: [ubuntu-24.04, ubuntu-24.04-arm, macOS-13, macOS-15, windows-2025]
2727
arch: [x64, arm64]
2828
pocl: [jll, local]
29+
memory_backend: [usm, svm, buffer]
2930
exclude:
31+
# unsupported combinations
3032
- os: ubuntu-24.04
3133
arch: arm64
3234
- os: windows-2025
@@ -125,11 +127,13 @@ jobs:
125127
run(```$(cmake()) --build $builddir --parallel $(Sys.CPU_THREADS) --target install```)
126128
end'
127129
128-
echo '[pocl_jll]' > test/LocalPreferences.toml
130+
echo '[pocl_jll]' >> test/LocalPreferences.toml
129131
echo 'libpocl_path="${{ github.workspace }}/target/lib/libpocl.so"' >> test/LocalPreferences.toml
130132
131133
- name: Setup OpenCL.jl
132134
run: |
135+
echo '[OpenCL]' >> test/LocalPreferences.toml
136+
echo 'default_memory_backend="${{ matrix.memory_backend }}"' >> test/LocalPreferences.toml
133137
julia --project -e '
134138
using Pkg
135139
Pkg.develop(path="lib/intrinsics")'

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
11
Manifest.toml
2-
LocalPreferences.toml

LocalPreferences.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[OpenCL]
2+
# Which memory back-end to use for unspecified CLArray allocations. This can be:
3+
# - "buffer": plain buffers (using pointers if `cl_ext_buffer_device_address` is available)
4+
# - "usm": Unified Shared Memory (requiring `cl_intel_unified_shared_memory`)
5+
# - "svm": Shared Virtual Memory (requiring coarse-grained SVM support)
6+
# If unspecified, the default will be used based on the platform and device capabilities.
7+
#default_memory_backend="..."

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
1010
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
1111
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1212
OpenCL_jll = "6cb37087-e8b6-5417-8430-1f242f1e46e4"
13+
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
1314
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
1415
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1516
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
@@ -26,6 +27,7 @@ KernelAbstractions = "0.9.2"
2627
LLVM = "9.1"
2728
LinearAlgebra = "1"
2829
OpenCL_jll = "=2024.10.24"
30+
Preferences = "1"
2931
Printf = "1"
3032
Random = "1"
3133
Reexport = "1"

lib/cl/CL.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
module cl
22

3+
import ..OpenCL
34
using Printf
5+
using Preferences
46

57
include("pointer.jl")
68
include("api.jl")
@@ -18,8 +20,7 @@ include("device.jl")
1820
include("context.jl")
1921
include("cmdqueue.jl")
2022
include("event.jl")
21-
include("memory/memory.jl")
22-
include("buffer.jl")
23+
include("memory.jl")
2324
include("program.jl")
2425
include("kernel.jl")
2526

lib/cl/device.jl

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,7 @@ function exec_capabilities(d::Device)
190190
)
191191
end
192192

193-
function usm_supported(d::Device)
194-
"cl_intel_unified_shared_memory" in d.extensions || return false
195-
return true
196-
end
193+
usm_supported(d::Device) = "cl_intel_unified_shared_memory" in d.extensions
197194

198195
function usm_capabilities(d::Device)
199196
usm_supported(d) || throw(ArgumentError("Unified Shared Memory not supported on this device"))
@@ -256,6 +253,8 @@ function svm_capabilities(d::Device)
256253
)
257254
end
258255

256+
bda_supported(d::Device) = "cl_ext_buffer_device_address" in d.extensions
257+
259258
function cl_device_type(dtype::Symbol)
260259
if dtype == :all
261260
cl_dtype = CL_DEVICE_TYPE_ALL

lib/cl/kernel.jl

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ function set_arg!(k::Kernel, idx::Integer, arg::CLPtr{T}) where {T}
6969
end
7070

7171
# raw memory
72-
function set_arg!(k::Kernel, idx::Integer, arg::AbstractMemory)
72+
function set_arg!(k::Kernel, idx::Integer, arg::AbstractPointerMemory)
7373
# XXX: this assumes that the receiving argument is pointer-typed, which is not the case
7474
# with Julia's `Ptr` ABI. Instead, one should reinterpret the pointer as a
7575
# `Core.LLVMPtr`, which _is_ pointer-valued. We retain this handling for `Ptr` for
@@ -79,6 +79,8 @@ function set_arg!(k::Kernel, idx::Integer, arg::AbstractMemory)
7979
clSetKernelArgSVMPointer(k, idx - 1, pointer(arg))
8080
elseif arg isa UnifiedMemory
8181
clSetKernelArgMemPointerINTEL(k, idx - 1, pointer(arg))
82+
elseif arg isa Buffer
83+
clSetKernelArgDevicePointerEXT(k, idx - 1, pointer(arg))
8284
else
8385
error("Unknown memory type")
8486
end
@@ -191,6 +193,7 @@ function call(
191193
if !isempty(indirect_memory)
192194
svm_pointers = CLPtr{Cvoid}[]
193195
usm_pointers = CLPtr{Cvoid}[]
196+
bda_pointers = CLPtr{Cvoid}[]
194197
device_access = host_access = shared_access = false
195198
for memory in indirect_memory
196199
ptr = pointer(memory)
@@ -200,6 +203,8 @@ function call(
200203

201204
if memory isa SharedVirtualMemory
202205
push!(svm_pointers, ptr)
206+
elseif memory isa Buffer
207+
push!(bda_pointers, ptr)
203208
elseif memory isa UnifiedDeviceMemory
204209
device_access = true
205210
push!(usm_pointers, ptr)
@@ -229,6 +234,9 @@ function call(
229234
if !isempty(svm_pointers)
230235
clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS, sizeof(svm_pointers), svm_pointers)
231236
end
237+
if !isempty(bda_pointers)
238+
clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT, sizeof(bda_pointers), bda_pointers)
239+
end
232240
if !isempty(usm_pointers)
233241
clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, sizeof(usm_pointers), usm_pointers)
234242
end

0 commit comments

Comments
 (0)